Skip to content

Commit

Permalink
Merge pull request #10343 from Maxxen/feat/digit-separator
Browse files Browse the repository at this point in the history
Feature: Digit separators in numeric literals
  • Loading branch information
Mytherin authored Jan 28, 2024
2 parents 7a166b2 + 975d642 commit 8de2d29
Show file tree
Hide file tree
Showing 6 changed files with 917 additions and 448 deletions.
43 changes: 34 additions & 9 deletions src/common/operator/cast_operators.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1143,6 +1143,14 @@ static bool IntegerCastLoop(const char *buf, idx_t len, T &result, bool strict)
return false;
}
pos++;

if (pos != len && buf[pos] == '_') {
// Skip one underscore if it is not the last character and followed by a digit
pos++;
if (pos == len || !StringUtil::CharacterIsDigit(buf[pos])) {
return false;
}
}
}
// make sure there is either (1) one number after the period, or (2) one number before the period
// i.e. we accept "1." and ".1" as valid numbers, but not "."
Expand Down Expand Up @@ -1194,6 +1202,14 @@ static bool IntegerCastLoop(const char *buf, idx_t len, T &result, bool strict)
if (!OP::template HandleDigit<T, NEGATIVE>(result, digit)) {
return false;
}

if (pos != len && buf[pos] == '_') {
// Skip one underscore if it is not the last character and followed by a digit
pos++;
if (pos == len || !StringUtil::CharacterIsDigit(buf[pos])) {
return false;
}
}
}
if (!OP::template Finalize<T, NEGATIVE>(result)) {
return false;
Expand Down Expand Up @@ -1221,6 +1237,15 @@ static bool IntegerHexCastLoop(const char *buf, idx_t len, T &result, bool stric
digit = current_char - '0';
}
pos++;

if (pos != len && buf[pos] == '_') {
// Skip one underscore if it is not the last character and followed by a hex
pos++;
if (pos == len || !StringUtil::CharacterIsHex(buf[pos])) {
return false;
}
}

if (!OP::template HandleHexDigit<T, NEGATIVE>(result, digit)) {
return false;
}
Expand All @@ -1242,22 +1267,22 @@ static bool IntegerBinaryCastLoop(const char *buf, idx_t len, T &result, bool st
char current_char;
while (pos < len) {
current_char = buf[pos];
if (current_char == '_' && pos > start_pos) {
// skip underscore, if it is not the first character
pos++;
if (pos == len) {
// we cant end on an underscore either
return false;
}
continue;
} else if (current_char == '0') {
if (current_char == '0') {
digit = 0;
} else if (current_char == '1') {
digit = 1;
} else {
return false;
}
pos++;
if (pos != len && buf[pos] == '_') {
// Skip one underscore if it is not the last character and followed by a digit
pos++;
if (pos == len || (buf[pos] != '0' && buf[pos] != '1')) {
return false;
}
}

if (!OP::template HandleBinaryDigit<T, NEGATIVE>(result, digit)) {
return false;
}
Expand Down
14 changes: 11 additions & 3 deletions src/parser/transform/expression/transform_constant.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@ unique_ptr<ConstantExpression> Transformer::TransformValue(duckdb_libpgquery::PG
bool try_cast_as_integer = true;
bool try_cast_as_decimal = true;
int decimal_position = -1;
int num_underscores = 0;
int num_integer_underscores = 0;
for (idx_t i = 0; i < str_val.GetSize(); i++) {
if (val.val.str[i] == '.') {
// decimal point: cast as either decimal or double
Expand All @@ -33,6 +35,12 @@ unique_ptr<ConstantExpression> Transformer::TransformValue(duckdb_libpgquery::PG
try_cast_as_integer = false;
try_cast_as_decimal = false;
}
if (val.val.str[i] == '_') {
num_underscores++;
if (decimal_position < 0) {
num_integer_underscores++;
}
}
}
if (try_cast_as_integer) {
int64_t bigint_value;
Expand All @@ -50,10 +58,10 @@ unique_ptr<ConstantExpression> Transformer::TransformValue(duckdb_libpgquery::PG
}
idx_t decimal_offset = val.val.str[0] == '-' ? 3 : 2;
if (try_cast_as_decimal && decimal_position >= 0 &&
str_val.GetSize() < Decimal::MAX_WIDTH_DECIMAL + decimal_offset) {
str_val.GetSize() - num_underscores < Decimal::MAX_WIDTH_DECIMAL + decimal_offset) {
// figure out the width/scale based on the decimal position
auto width = uint8_t(str_val.GetSize() - 1);
auto scale = uint8_t(width - decimal_position);
auto width = uint8_t(str_val.GetSize() - 1 - num_underscores);
auto scale = uint8_t(width - decimal_position + num_integer_underscores);
if (val.val.str[0] == '-') {
width--;
}
Expand Down
Loading

0 comments on commit 8de2d29

Please sign in to comment.