Merge pull request #10343 from Maxxen/feat/digit-separator

Feature: Digit separators in numeric literals
duckdb · Jan 28, 2024 · 8de2d29 · 8de2d29
2 parents 7a166b2 + 975d642
commit 8de2d29
Show file tree

Hide file tree

Showing 6 changed files with 917 additions and 448 deletions.
diff --git a/src/common/operator/cast_operators.cpp b/src/common/operator/cast_operators.cpp
@@ -1143,6 +1143,14 @@ static bool IntegerCastLoop(const char *buf, idx_t len, T &result, bool strict)
 						return false;
 					}
 					pos++;
+
+					if (pos != len && buf[pos] == '_') {
+						// Skip one underscore if it is not the last character and followed by a digit
+						pos++;
+						if (pos == len || !StringUtil::CharacterIsDigit(buf[pos])) {
+							return false;
+						}
+					}
 				}
 				// make sure there is either (1) one number after the period, or (2) one number before the period
 				// i.e. we accept "1." and ".1" as valid numbers, but not "."
@@ -1194,6 +1202,14 @@ static bool IntegerCastLoop(const char *buf, idx_t len, T &result, bool strict)
 		if (!OP::template HandleDigit<T, NEGATIVE>(result, digit)) {
 			return false;
 		}
+
+		if (pos != len && buf[pos] == '_') {
+			// Skip one underscore if it is not the last character and followed by a digit
+			pos++;
+			if (pos == len || !StringUtil::CharacterIsDigit(buf[pos])) {
+				return false;
+			}
+		}
 	}
 	if (!OP::template Finalize<T, NEGATIVE>(result)) {
 		return false;
@@ -1221,6 +1237,15 @@ static bool IntegerHexCastLoop(const char *buf, idx_t len, T &result, bool stric
 			digit = current_char - '0';
 		}
 		pos++;
+
+		if (pos != len && buf[pos] == '_') {
+			// Skip one underscore if it is not the last character and followed by a hex
+			pos++;
+			if (pos == len || !StringUtil::CharacterIsHex(buf[pos])) {
+				return false;
+			}
+		}
+
 		if (!OP::template HandleHexDigit<T, NEGATIVE>(result, digit)) {
 			return false;
 		}
@@ -1242,22 +1267,22 @@ static bool IntegerBinaryCastLoop(const char *buf, idx_t len, T &result, bool st
 	char current_char;
 	while (pos < len) {
 		current_char = buf[pos];
-		if (current_char == '_' && pos > start_pos) {
-			// skip underscore, if it is not the first character
-			pos++;
-			if (pos == len) {
-				// we cant end on an underscore either
-				return false;
-			}
-			continue;
-		} else if (current_char == '0') {
+		if (current_char == '0') {
 			digit = 0;
 		} else if (current_char == '1') {
 			digit = 1;
 		} else {
 			return false;
 		}
 		pos++;
+		if (pos != len && buf[pos] == '_') {
+			// Skip one underscore if it is not the last character and followed by a digit
+			pos++;
+			if (pos == len || (buf[pos] != '0' && buf[pos] != '1')) {
+				return false;
+			}
+		}
+
 		if (!OP::template HandleBinaryDigit<T, NEGATIVE>(result, digit)) {
 			return false;
 		}

diff --git a/src/parser/transform/expression/transform_constant.cpp b/src/parser/transform/expression/transform_constant.cpp
@@ -22,6 +22,8 @@ unique_ptr<ConstantExpression> Transformer::TransformValue(duckdb_libpgquery::PG
 		bool try_cast_as_integer = true;
 		bool try_cast_as_decimal = true;
 		int decimal_position = -1;
+		int num_underscores = 0;
+		int num_integer_underscores = 0;
 		for (idx_t i = 0; i < str_val.GetSize(); i++) {
 			if (val.val.str[i] == '.') {
 				// decimal point: cast as either decimal or double
@@ -33,6 +35,12 @@ unique_ptr<ConstantExpression> Transformer::TransformValue(duckdb_libpgquery::PG
 				try_cast_as_integer = false;
 				try_cast_as_decimal = false;
 			}
+			if (val.val.str[i] == '_') {
+				num_underscores++;
+				if (decimal_position < 0) {
+					num_integer_underscores++;
+				}
+			}
 		}
 		if (try_cast_as_integer) {
 			int64_t bigint_value;
@@ -50,10 +58,10 @@ unique_ptr<ConstantExpression> Transformer::TransformValue(duckdb_libpgquery::PG
 		}
 		idx_t decimal_offset = val.val.str[0] == '-' ? 3 : 2;
 		if (try_cast_as_decimal && decimal_position >= 0 &&
-		    str_val.GetSize() < Decimal::MAX_WIDTH_DECIMAL + decimal_offset) {
+		    str_val.GetSize() - num_underscores < Decimal::MAX_WIDTH_DECIMAL + decimal_offset) {
 			// figure out the width/scale based on the decimal position
-			auto width = uint8_t(str_val.GetSize() - 1);
-			auto scale = uint8_t(width - decimal_position);
+			auto width = uint8_t(str_val.GetSize() - 1 - num_underscores);
+			auto scale = uint8_t(width - decimal_position + num_integer_underscores);
 			if (val.val.str[0] == '-') {
 				width--;
 			}