Skip to content

Commit

Permalink
util: Add consteval ""_hex[_v][_u8] literals
Browse files Browse the repository at this point in the history
""_hex is a compile-time user-defined literal returning std::array<std::byte>, equivalent of ParseHex.

Variants:
- ""_hex_v returns std::vector<std::byte>
- ""_hex_u8 returns std::array<uint8_t>
- ""_hex_v_u8 returns std::vector<uint8_t> - Directly serializable as a size-prefixed OP_PUSH CScript payload using operator<<.

Also extracts from_hex into shared util::ConstevalHexDigit function.

Co-Authored-By: hodlinator <172445034+hodlinator@users.noreply.github.com>
Co-Authored-By: MarcoFalke <*~=`'#}+{/-|&$^_@721217.xyz>
Co-Authored-By: Ryan Ofsky <ryan@ofsky.org>
Co-Authored-By: stickies-v <stickies-v@protonmail.com>
  • Loading branch information
5 people committed Aug 28, 2024
1 parent dc5f6f6 commit 5b74a84
Show file tree
Hide file tree
Showing 3 changed files with 108 additions and 10 deletions.
29 changes: 29 additions & 0 deletions src/test/util_tests.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
#include <hash.h> // For Hash()
#include <key.h> // For CKey
#include <script/parsing.h>
#include <span.h>
#include <sync.h>
#include <test/util/random.h>
#include <test/util/setup_common.h>
Expand Down Expand Up @@ -45,6 +46,8 @@
#include <boost/test/unit_test.hpp>

using namespace std::literals;
using namespace util::hex_literals;
using util::ConstevalHexDigit;
using util::Join;
using util::RemovePrefix;
using util::RemovePrefixView;
Expand Down Expand Up @@ -151,6 +154,20 @@ BOOST_AUTO_TEST_CASE(parse_hex)

// Basic test vector
std::vector<unsigned char> expected(std::begin(HEX_PARSE_OUTPUT), std::end(HEX_PARSE_OUTPUT));
constexpr std::array<std::byte, 65> hex_literal_array{operator""_hex<util::detail::Hex(HEX_PARSE_INPUT)>()};
auto hex_literal_span{MakeUCharSpan(hex_literal_array)};
BOOST_CHECK_EQUAL_COLLECTIONS(hex_literal_span.begin(), hex_literal_span.end(), expected.begin(), expected.end());

const std::vector<std::byte> hex_literal_vector{operator""_hex_v<util::detail::Hex(HEX_PARSE_INPUT)>()};
hex_literal_span = MakeUCharSpan(hex_literal_vector);
BOOST_CHECK_EQUAL_COLLECTIONS(hex_literal_span.begin(), hex_literal_span.end(), expected.begin(), expected.end());

constexpr std::array<uint8_t, 65> hex_literal_array_uint8{operator""_hex_u8<util::detail::Hex(HEX_PARSE_INPUT)>()};
BOOST_CHECK_EQUAL_COLLECTIONS(hex_literal_array_uint8.begin(), hex_literal_array_uint8.end(), expected.begin(), expected.end());

result = operator""_hex_v_u8<util::detail::Hex(HEX_PARSE_INPUT)>();
BOOST_CHECK_EQUAL_COLLECTIONS(result.begin(), result.end(), expected.begin(), expected.end());

result = ParseHex(HEX_PARSE_INPUT);
BOOST_CHECK_EQUAL_COLLECTIONS(result.begin(), result.end(), expected.begin(), expected.end());

Expand Down Expand Up @@ -179,6 +196,10 @@ BOOST_AUTO_TEST_CASE(parse_hex)
BOOST_CHECK_EQUAL_COLLECTIONS(result.begin(), result.end(), expected.begin(), expected.end());

// Empty string is supported
static_assert(""_hex.empty());
static_assert(""_hex_u8.empty());
BOOST_CHECK_EQUAL(""_hex_v.size(), 0);
BOOST_CHECK_EQUAL(""_hex_v_u8.size(), 0);
BOOST_CHECK_EQUAL(ParseHex("").size(), 0);
BOOST_CHECK_EQUAL(TryParseHex<uint8_t>("").value().size(), 0);

Expand All @@ -203,6 +224,14 @@ BOOST_AUTO_TEST_CASE(parse_hex)
BOOST_CHECK(!TryParseHex("12 3").has_value());
}

BOOST_AUTO_TEST_CASE(consteval_hex_digit)
{
BOOST_CHECK_EQUAL(ConstevalHexDigit('0'), 0);
BOOST_CHECK_EQUAL(ConstevalHexDigit('9'), 9);
BOOST_CHECK_EQUAL(ConstevalHexDigit('a'), 0xa);
BOOST_CHECK_EQUAL(ConstevalHexDigit('f'), 0xf);
}

BOOST_AUTO_TEST_CASE(util_HexStr)
{
BOOST_CHECK_EQUAL(HexStr(HEX_PARSE_OUTPUT), HEX_PARSE_INPUT);
Expand Down
12 changes: 2 additions & 10 deletions src/uint256.h
Original file line number Diff line number Diff line change
Expand Up @@ -127,19 +127,11 @@ class base_blob
template <unsigned int BITS>
consteval base_blob<BITS>::base_blob(std::string_view hex_str)
{
// Non-lookup table version of HexDigit().
auto from_hex = [](const char c) -> int8_t {
if (c >= '0' && c <= '9') return c - '0';
if (c >= 'a' && c <= 'f') return c - 'a' + 0xa;

throw "Only lowercase hex digits are allowed, for consistency";
};

if (hex_str.length() != m_data.size() * 2) throw "Hex string must fit exactly";
auto str_it = hex_str.rbegin();
for (auto& elem : m_data) {
auto lo = from_hex(*(str_it++));
elem = (from_hex(*(str_it++)) << 4) | lo;
auto lo = util::ConstevalHexDigit(*(str_it++));
elem = (util::ConstevalHexDigit(*(str_it++)) << 4) | lo;
}
}

Expand Down
77 changes: 77 additions & 0 deletions src/util/strencodings.h
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@
#include <span.h>
#include <util/string.h>

#include <array>
#include <bit>
#include <charconv>
#include <cstddef>
#include <cstdint>
Expand Down Expand Up @@ -365,4 +367,79 @@ std::string Capitalize(std::string str);
*/
std::optional<uint64_t> ParseByteUnits(std::string_view str, ByteUnit default_multiplier);

namespace util {
/** consteval version of HexDigit() without the lookup table. */
consteval uint8_t ConstevalHexDigit(const char c)
{
if (c >= '0' && c <= '9') return c - '0';
if (c >= 'a' && c <= 'f') return c - 'a' + 0xa;

throw "Only lowercase hex digits are allowed, for consistency";
}

/**
* ""_hex is a compile-time user-defined literal returning a
* `std::array<std::byte>`, equivalent to ParseHex(). Variants provided:
*
* - ""_hex_v: Returns `std::vector<std::byte>`, useful for heap allocation or
* variable-length serialization.
*
* - ""_hex_u8: Returns `std::array<uint8_t>`, for cases where `std::byte` is
* incompatible.
*
* - ""_hex_v_u8: Returns `std::vector<uint8_t>`, combining heap allocation with
* `uint8_t`.
*
* @warning It could be necessary to use vector instead of array variants when
* serializing, or vice versa, because vectors are assumed to be variable-
* length and serialized with a size prefix, while arrays are considered fixed
* length and serialized with no prefix.
*
* @warning It may be preferable to use vector variants to save stack space when
* declaring local variables if hex strings are large. Alternatively variables
* could be declared constexpr to avoid using stack space.
*
* @warning Avoid `uint8_t` variants when not necessary, as the codebase
* migrates to use `std::byte` instead of `unsigned char` and `uint8_t`.
*
* @note One reason ""_hex uses `std::array` instead of `std::vector` like
* ParseHex() does is because heap-based containers cannot cross the compile-
* time/runtime barrier.
*/
inline namespace hex_literals {
namespace detail {

template <size_t N>
struct Hex {
std::array<std::byte, N / 2> bytes{};
consteval Hex(const char (&hex_str)[N])
// 2 hex digits required per byte + implicit null terminator
requires(N % 2 == 1)
{
if (hex_str[N - 1]) throw "null terminator required";
for (std::size_t i = 0; i < bytes.size(); ++i) {
bytes[i] = static_cast<std::byte>(
(ConstevalHexDigit(hex_str[2 * i]) << 4) |
ConstevalHexDigit(hex_str[2 * i + 1]));
}
}
};

} // namespace detail

template <util::detail::Hex str>
constexpr auto operator""_hex() { return str.bytes; }

template <util::detail::Hex str>
constexpr auto operator""_hex_u8() { return std::bit_cast<std::array<uint8_t, str.bytes.size()>>(str.bytes); }

template <util::detail::Hex str>
constexpr auto operator""_hex_v() { return std::vector<std::byte>{str.bytes.begin(), str.bytes.end()}; }

template <util::detail::Hex str>
inline auto operator""_hex_v_u8() { return std::vector<uint8_t>{UCharCast(str.bytes.data()), UCharCast(str.bytes.data() + str.bytes.size())}; }

} // inline namespace hex_literals
} // namespace util

#endif // BITCOIN_UTIL_STRENCODINGS_H

0 comments on commit 5b74a84

Please sign in to comment.