Skip to content

Commit

Permalink
Use custom checked math functions and fix type of multiplications
Browse files Browse the repository at this point in the history
The previous implementation did not return `signed` for `signed - unsigned` or
multiplications that involve `signed`. Furthermore, it seems to have bug in the
definition of `subtract`. Since I did not find a library that does what we want,
I ultimately decided to implement this myself. Hopefully it doesn't contain too
many bugs. In any case, this can be improved later.
  • Loading branch information
jachris committed Jun 28, 2024
1 parent cae8e09 commit a139809
Show file tree
Hide file tree
Showing 5 changed files with 306 additions and 167 deletions.
148 changes: 148 additions & 0 deletions libtenzir/include/tenzir/checked_math.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,148 @@
// _ _____ __________
// | | / / _ | / __/_ __/ Visibility
// | |/ / __ |_\ \ / / Across
// |___/_/ |_/___/ /_/ Space and Time
//
// SPDX-FileCopyrightText: (c) 2024 The Tenzir Contributors
// SPDX-License-Identifier: BSD-3-Clause

#pragma once

#include <concepts>
#include <limits>
#include <optional>

namespace tenzir {

template <std::integral T>
constexpr auto max = std::numeric_limits<T>::max();

template <std::integral T>
constexpr auto min = std::numeric_limits<T>::lowest();

#define CHECK(x) \
if (not(x)) { \
return std::nullopt; \
}

template <std::integral X, std::integral Y>
constexpr auto checked_add(X x, Y y)
-> std::optional<std::common_type_t<X, Y>> {
static_assert(sizeof(x) == sizeof(y));
using R = std::common_type_t<X, Y>;
if constexpr (std::is_signed_v<X> && std::is_signed_v<Y>) {
static_assert(std::is_signed_v<R>);
if (x > 0 && y > 0) {
// -> x + y <= max<R>
CHECK(x <= max<R> - y);
} else if (x < 0 && y < 0) {
// -> min<R> <= x + y
CHECK(min<R> - y <= x);
} else {
// Cannot overflow due to mixed signs.
}
return x + y;
} else if constexpr (std::is_signed_v<X>) {
// Make it so that the first argument is the unsigned one.
return checked_add(y, x);
} else {
static_assert(std::is_unsigned_v<X>);
// -> 0 <= x + y
if constexpr (std::is_signed_v<Y>) {
if (y < 0) {
// -> -y <= x
auto minus_y = y == min<Y> ? R(max<Y>) + 1 : R(-y);
CHECK(minus_y <= x);
}
}
// -> x + y <= max<R>
if (y >= 0) {
// -> y <= max<R> - x
CHECK(R(y) <= max<R> - x);
}
return x + R(y);
}
}

template <std::integral X, std::integral Y>
constexpr auto checked_sub(X x, Y y) -> std::optional<X> {
static_assert(sizeof(x) == sizeof(y));
// -> min<X> <= x - y && x - y <= max<X>
if (y >= 0) {
// -> min<X> <= x - y
// -> min<X> + y <= x
// Check whether we can convert `y` to `X`.
if (y <= Y(max<X>)) {
CHECK(min<X> + X(y) <= x);
return x - X(y);
}
// We know `y > max<X>` and thus `min<X> + y >= 0`.
CHECK(x >= 0);
CHECK(Y(min<X>) + y <= Y(x));
return X(Y(x) - Y(y));
}
// We know `y < 0` (which implies that Y is signed).
// -> x - y <= max<X>
if (y == min<Y>) {
// -> x <= max<X> + min<Y>
if constexpr (std::is_signed_v<X>) {
CHECK(x <= -1);
return x - y;
} else {
// -> x <= max<U> + min<S>
CHECK(x <= max<X> - max<Y> - 1);
return X(Y(x) - Y(y));
}
}
CHECK(x <= max<X> - X(-y));
return x + X(-y);
}

template <std::integral X, std::integral Y>
constexpr auto checked_mul(X x, Y y)
-> std::optional<std::conditional_t<std::is_signed_v<X>, X, Y>> {
static_assert(sizeof(x) == sizeof(y));
if (x == 0 || y == 0) {
return 0;
}
if constexpr (std::is_unsigned_v<X> && std::is_signed_v<Y>) {
// Make signed the first argument if possible.
return checked_mul(y, x);
} else if constexpr (std::is_unsigned_v<X>) {
static_assert(std::is_unsigned_v<Y>);
// -> x * y <= max<X>;
CHECK(x <= max<X> / y);
return x * y;
} else if constexpr (std::is_unsigned_v<Y>) {
if (x == -1 && y == Y(max<X>) + 1) {
return min<X>;
}
CHECK(y <= Y(max<X>));
return checked_mul(x, X(y));
} else {
static_assert(std::same_as<X, Y>);
// -> x * y <= max<X>
if (y > 0) {
CHECK(x <= max<X> / y);
} else {
CHECK(x >= max<X> / y);
}
// -> min<X> <= x * y
if (y == -1) {
if (x == -1) {
return 1;
}
std::swap(x, y);
}
if (y > 0) {
CHECK(min<X> / y <= x);
} else {
CHECK(min<X> / y >= x);
}
return x * y;
}
}

#undef CHECK

} // namespace tenzir
121 changes: 0 additions & 121 deletions libtenzir/include/tenzir/detail/checked_math.hpp

This file was deleted.

55 changes: 20 additions & 35 deletions libtenzir/src/tql2/eval_binary.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,18 +8,14 @@

#include "tenzir/fwd.hpp"

#include "tenzir/checked_math.hpp"
#include "tenzir/detail/assert.hpp"
#include "tenzir/detail/checked_math.hpp"
#include "tenzir/series.hpp"
#include "tenzir/table_slice_builder.hpp"
#include "tenzir/tql2/arrow_utils.hpp"
#include "tenzir/tql2/ast.hpp"
#include "tenzir/tql2/eval_impl.hpp"

#include <arrow/compute/api_scalar.h>

#include <type_traits>

// TODO: This file takes very long to compile. Consider splitting it up even more.

namespace tenzir {
Expand Down Expand Up @@ -100,25 +96,36 @@ struct BinOpKernel;
template <ast::binary_op Op, integral_type L, integral_type R>
requires(is_arithmetic(Op))
struct BinOpKernel<Op, L, R> {
using result = std::common_type_t<type_to_data_t<L>, type_to_data_t<R>>;

static auto evaluate(type_to_data_t<L> l, type_to_data_t<R> r)
-> std::variant<result, const char*> {
// TODO: This is just so that we can have the return type be inferred. This
// would not be necessary if the template itself would be friendly to type
// inference.
static auto inner(type_to_data_t<L> l, type_to_data_t<R> r) {
using enum ast::binary_op;
if constexpr (Op == add) {
return checked_math::add(l, r);
return checked_add(l, r);
} else if constexpr (Op == sub) {
return checked_math::subtract(l, r);
return checked_sub(l, r);
} else if constexpr (Op == mul) {
return checked_math::multiply(l, r);
return checked_mul(l, r);
} else {
static_assert(detail::always_false_v<L>,
"division is handled by its own specialization");
}
}

using result = decltype(inner(std::declval<type_to_data_t<L>>(),
std::declval<type_to_data_t<R>>()))::value_type;

static auto evaluate(type_to_data_t<L> l, type_to_data_t<R> r)
-> std::variant<result, const char*> {
auto result = inner(l, r);
if (not result) {
return "integer overflow";
}
return *result;
}
};

// if any of the operands is double, the result is a double
template <ast::binary_op Op, numeric_type L, numeric_type R>
requires(is_arithmetic(Op)
and (std::same_as<double_type, L> or std::same_as<double_type, R>))
Expand All @@ -141,28 +148,6 @@ struct BinOpKernel<Op, L, R> {
}
};

template <>
struct BinOpKernel<ast::binary_op::sub, int64_type, uint64_type> {
using result = int64_t;

static auto evaluate(int64_t l, uint64_t r)
-> std::variant<result, const char*> {
// r > int_max
if (r > static_cast<uint64_t>(std::numeric_limits<int64_t>::max())) {
return "subtraction overflow";
}
// l < 0 and |l| + r > int_max -> l - r < int_min
if (l < 0
and static_cast<uint64_t>(abs(l)) + r
> static_cast<uint64_t>(std::numeric_limits<int64_t>::max())
+ 1) {
return "subtraction underflow";
}
return l - static_cast<int64_t>(r);
}
};

// division always yields doubles
template <numeric_type L, numeric_type R>
struct BinOpKernel<ast::binary_op::div, L, R> {
using result = double;
Expand Down
Loading

0 comments on commit a139809

Please sign in to comment.