Use custom checked math functions and fix type of multiplications

The previous implementation did not return `signed` for `signed - unsigned` or multiplications that involve `signed`. Furthermore, it seems to have bug in the definition of `subtract`. Since I did not find a library that does what we want, I ultimately decided to implement this myself. Hopefully it doesn't contain too many bugs. In any case, this can be improved later.
tenzir · Jun 28, 2024 · a139809 · a139809
1 parent cae8e09
commit a139809
Show file tree

Hide file tree

Showing 5 changed files with 306 additions and 167 deletions.
diff --git a/libtenzir/include/tenzir/checked_math.hpp b/libtenzir/include/tenzir/checked_math.hpp
@@ -0,0 +1,148 @@
+//    _   _____   __________
+//   | | / / _ | / __/_  __/     Visibility
+//   | |/ / __ |_\ \  / /          Across
+//   |___/_/ |_/___/ /_/       Space and Time
+//
+// SPDX-FileCopyrightText: (c) 2024 The Tenzir Contributors
+// SPDX-License-Identifier: BSD-3-Clause
+
+#pragma once
+
+#include <concepts>
+#include <limits>
+#include <optional>
+
+namespace tenzir {
+
+template <std::integral T>
+constexpr auto max = std::numeric_limits<T>::max();
+
+template <std::integral T>
+constexpr auto min = std::numeric_limits<T>::lowest();
+
+#define CHECK(x)                                                               \
+  if (not(x)) {                                                                \
+    return std::nullopt;                                                       \
+  }
+
+template <std::integral X, std::integral Y>
+constexpr auto checked_add(X x, Y y)
+  -> std::optional<std::common_type_t<X, Y>> {
+  static_assert(sizeof(x) == sizeof(y));
+  using R = std::common_type_t<X, Y>;
+  if constexpr (std::is_signed_v<X> && std::is_signed_v<Y>) {
+    static_assert(std::is_signed_v<R>);
+    if (x > 0 && y > 0) {
+      // -> x + y <= max<R>
+      CHECK(x <= max<R> - y);
+    } else if (x < 0 && y < 0) {
+      // -> min<R> <= x + y
+      CHECK(min<R> - y <= x);
+    } else {
+      // Cannot overflow due to mixed signs.
+    }
+    return x + y;
+  } else if constexpr (std::is_signed_v<X>) {
+    // Make it so that the first argument is the unsigned one.
+    return checked_add(y, x);
+  } else {
+    static_assert(std::is_unsigned_v<X>);
+    // -> 0 <= x + y
+    if constexpr (std::is_signed_v<Y>) {
+      if (y < 0) {
+        // -> -y <= x
+        auto minus_y = y == min<Y> ? R(max<Y>) + 1 : R(-y);
+        CHECK(minus_y <= x);
+      }
+    }
+    // -> x + y <= max<R>
+    if (y >= 0) {
+      // -> y <= max<R> - x
+      CHECK(R(y) <= max<R> - x);
+    }
+    return x + R(y);
+  }
+}
+
+template <std::integral X, std::integral Y>
+constexpr auto checked_sub(X x, Y y) -> std::optional<X> {
+  static_assert(sizeof(x) == sizeof(y));
+  // -> min<X> <= x - y && x - y <= max<X>
+  if (y >= 0) {
+    // -> min<X> <= x - y
+    // -> min<X> + y <= x
+    // Check whether we can convert `y` to `X`.
+    if (y <= Y(max<X>)) {
+      CHECK(min<X> + X(y) <= x);
+      return x - X(y);
+    }
+    // We know `y > max<X>` and thus `min<X> + y >= 0`.
+    CHECK(x >= 0);
+    CHECK(Y(min<X>) + y <= Y(x));
+    return X(Y(x) - Y(y));
+  }
+  // We know `y < 0` (which implies that Y is signed).
+  // -> x - y <= max<X>
+  if (y == min<Y>) {
+    // -> x <= max<X> + min<Y>
+    if constexpr (std::is_signed_v<X>) {
+      CHECK(x <= -1);
+      return x - y;
+    } else {
+      // -> x <= max<U> + min<S>
+      CHECK(x <= max<X> - max<Y> - 1);
+      return X(Y(x) - Y(y));
+    }
+  }
+  CHECK(x <= max<X> - X(-y));
+  return x + X(-y);
+}
+
+template <std::integral X, std::integral Y>
+constexpr auto checked_mul(X x, Y y)
+  -> std::optional<std::conditional_t<std::is_signed_v<X>, X, Y>> {
+  static_assert(sizeof(x) == sizeof(y));
+  if (x == 0 || y == 0) {
+    return 0;
+  }
+  if constexpr (std::is_unsigned_v<X> && std::is_signed_v<Y>) {
+    // Make signed the first argument if possible.
+    return checked_mul(y, x);
+  } else if constexpr (std::is_unsigned_v<X>) {
+    static_assert(std::is_unsigned_v<Y>);
+    // -> x * y <= max<X>;
+    CHECK(x <= max<X> / y);
+    return x * y;
+  } else if constexpr (std::is_unsigned_v<Y>) {
+    if (x == -1 && y == Y(max<X>) + 1) {
+      return min<X>;
+    }
+    CHECK(y <= Y(max<X>));
+    return checked_mul(x, X(y));
+  } else {
+    static_assert(std::same_as<X, Y>);
+    // -> x * y <= max<X>
+    if (y > 0) {
+      CHECK(x <= max<X> / y);
+    } else {
+      CHECK(x >= max<X> / y);
+    }
+    // -> min<X> <= x * y
+    if (y == -1) {
+      if (x == -1) {
+        return 1;
+      }
+      std::swap(x, y);
+    }
+    if (y > 0) {
+      CHECK(min<X> / y <= x);
+    } else {
+      CHECK(min<X> / y >= x);
+    }
+    return x * y;
+  }
+}
+
+#undef CHECK
+
+} // namespace tenzir
diff --git a/libtenzir/include/tenzir/detail/checked_math.hpp b/libtenzir/include/tenzir/detail/checked_math.hpp
diff --git a/libtenzir/src/tql2/eval_binary.cpp b/libtenzir/src/tql2/eval_binary.cpp
@@ -8,18 +8,14 @@
 
 #include "tenzir/fwd.hpp"
 
+#include "tenzir/checked_math.hpp"
 #include "tenzir/detail/assert.hpp"
-#include "tenzir/detail/checked_math.hpp"
 #include "tenzir/series.hpp"
 #include "tenzir/table_slice_builder.hpp"
 #include "tenzir/tql2/arrow_utils.hpp"
 #include "tenzir/tql2/ast.hpp"
 #include "tenzir/tql2/eval_impl.hpp"
 
-#include <arrow/compute/api_scalar.h>
-
-#include <type_traits>
-
 // TODO: This file takes very long to compile. Consider splitting it up even more.
 
 namespace tenzir {
@@ -100,25 +96,36 @@ struct BinOpKernel;
 template <ast::binary_op Op, integral_type L, integral_type R>
   requires(is_arithmetic(Op))
 struct BinOpKernel<Op, L, R> {
-  using result = std::common_type_t<type_to_data_t<L>, type_to_data_t<R>>;
-
-  static auto evaluate(type_to_data_t<L> l, type_to_data_t<R> r)
-    -> std::variant<result, const char*> {
+  // TODO: This is just so that we can have the return type be inferred. This
+  // would not be necessary if the template itself would be friendly to type
+  // inference.
+  static auto inner(type_to_data_t<L> l, type_to_data_t<R> r) {
     using enum ast::binary_op;
     if constexpr (Op == add) {
-      return checked_math::add(l, r);
+      return checked_add(l, r);
     } else if constexpr (Op == sub) {
-      return checked_math::subtract(l, r);
+      return checked_sub(l, r);
     } else if constexpr (Op == mul) {
-      return checked_math::multiply(l, r);
+      return checked_mul(l, r);
     } else {
       static_assert(detail::always_false_v<L>,
                     "division is handled by its own specialization");
     }
   }
+
+  using result = decltype(inner(std::declval<type_to_data_t<L>>(),
+                                std::declval<type_to_data_t<R>>()))::value_type;
+
+  static auto evaluate(type_to_data_t<L> l, type_to_data_t<R> r)
+    -> std::variant<result, const char*> {
+    auto result = inner(l, r);
+    if (not result) {
+      return "integer overflow";
+    }
+    return *result;
+  }
 };
 
-// if any of the operands is double, the result is a double
 template <ast::binary_op Op, numeric_type L, numeric_type R>
   requires(is_arithmetic(Op)
            and (std::same_as<double_type, L> or std::same_as<double_type, R>))
@@ -141,28 +148,6 @@ struct BinOpKernel<Op, L, R> {
   }
 };
 
-template <>
-struct BinOpKernel<ast::binary_op::sub, int64_type, uint64_type> {
-  using result = int64_t;
-
-  static auto evaluate(int64_t l, uint64_t r)
-    -> std::variant<result, const char*> {
-    // r > int_max
-    if (r > static_cast<uint64_t>(std::numeric_limits<int64_t>::max())) {
-      return "subtraction overflow";
-    }
-    // l < 0 and |l| + r > int_max -> l - r < int_min
-    if (l < 0
-        and static_cast<uint64_t>(abs(l)) + r
-              > static_cast<uint64_t>(std::numeric_limits<int64_t>::max())
-                  + 1) {
-      return "subtraction underflow";
-    }
-    return l - static_cast<int64_t>(r);
-  }
-};
-
-// division always yields doubles
 template <numeric_type L, numeric_type R>
 struct BinOpKernel<ast::binary_op::div, L, R> {
   using result = double;