diff --git a/internal/toml-test/tests/invalid/string/bad-hex-esc-1.toml b/internal/toml-test/tests/invalid/string/bad-hex-esc-1.toml new file mode 100644 index 00000000..199c9615 --- /dev/null +++ b/internal/toml-test/tests/invalid/string/bad-hex-esc-1.toml @@ -0,0 +1 @@ +bad-hex-esc-1 = "\x0g" diff --git a/internal/toml-test/tests/invalid/string/bad-hex-esc-2.toml b/internal/toml-test/tests/invalid/string/bad-hex-esc-2.toml new file mode 100644 index 00000000..3ff07653 --- /dev/null +++ b/internal/toml-test/tests/invalid/string/bad-hex-esc-2.toml @@ -0,0 +1 @@ +bad-hex-esc-2 = "\xG0" diff --git a/internal/toml-test/tests/invalid/string/bad-hex-esc-3.toml b/internal/toml-test/tests/invalid/string/bad-hex-esc-3.toml new file mode 100644 index 00000000..5a1df546 --- /dev/null +++ b/internal/toml-test/tests/invalid/string/bad-hex-esc-3.toml @@ -0,0 +1 @@ +bad-hex-esc-3 = "\x" diff --git a/internal/toml-test/tests/invalid/string/bad-hex-esc-4.toml b/internal/toml-test/tests/invalid/string/bad-hex-esc-4.toml new file mode 100644 index 00000000..4df871b6 --- /dev/null +++ b/internal/toml-test/tests/invalid/string/bad-hex-esc-4.toml @@ -0,0 +1 @@ +bad-hex-esc-4 = "\x 50" diff --git a/internal/toml-test/tests/invalid/string/bad-hex-esc-5.toml b/internal/toml-test/tests/invalid/string/bad-hex-esc-5.toml new file mode 100644 index 00000000..379922fd --- /dev/null +++ b/internal/toml-test/tests/invalid/string/bad-hex-esc-5.toml @@ -0,0 +1 @@ +bad-hex-esc-5 = "\x 50" diff --git a/internal/toml-test/tests/invalid/string/bad-hex-esc.multi b/internal/toml-test/tests/invalid/string/bad-hex-esc.multi new file mode 100644 index 00000000..5216ce00 --- /dev/null +++ b/internal/toml-test/tests/invalid/string/bad-hex-esc.multi @@ -0,0 +1,4 @@ +bad-hex-esc-1 = "\x0g" +bad-hex-esc-2 = "\xG0" +bad-hex-esc-3 = "\x" +bad-hex-esc-4 = "\x 50" diff --git a/internal/toml-test/tests/valid/string/hex-escape.json b/internal/toml-test/tests/valid/string/hex-escape.json new file mode 100644 index 00000000..0eae986c --- /dev/null +++ b/internal/toml-test/tests/valid/string/hex-escape.json @@ -0,0 +1,34 @@ +{ + "bs": { + "type": "string", + "value": "" + }, + "hello": { + "type": "string", + "value": "hello\n" + }, + "higher-than-127": { + "type": "string", + "value": "Sørmirbæren" + }, + "literal": { + "type": "string", + "value": "\\x20 \\x09 \\x0d\\x0a" + }, + "multiline": { + "type": "string", + "value": " \t \u001b \r\n\n\n\u0000\nhello\n\nSørmirbæren\n" + }, + "multiline-literal": { + "type": "string", + "value": "\\x20 \\x09 \\x0d\\x0a\n" + }, + "nul": { + "type": "string", + "value": "\u0000" + }, + "whitespace": { + "type": "string", + "value": " \t \u001b \r\n" + } +} diff --git a/internal/toml-test/tests/valid/string/hex-escape.toml b/internal/toml-test/tests/valid/string/hex-escape.toml new file mode 100644 index 00000000..26d1668f --- /dev/null +++ b/internal/toml-test/tests/valid/string/hex-escape.toml @@ -0,0 +1,21 @@ +# \x for the first 255 codepoints + +whitespace = "\x20 \x09 \x1b \x0d\x0a" +bs = "\x7f" +nul = "\x00" +hello = "\x68\x65\x6c\x6c\x6f\x0a" +higher-than-127 = "S\xf8rmirb\xe6ren" + +multiline = """ +\x20 \x09 \x1b \x0d\x0a +\x7f +\x00 +\x68\x65\x6c\x6c\x6f\x0a +\x53\xF8\x72\x6D\x69\x72\x62\xE6\x72\x65\x6E +""" + +# Not inside literals. +literal = '\x20 \x09 \x0d\x0a' +multiline-literal = ''' +\x20 \x09 \x0d\x0a +''' diff --git a/internal/toml-test/version.go b/internal/toml-test/version.go index 50e0e2a0..7c3cfd55 100644 --- a/internal/toml-test/version.go +++ b/internal/toml-test/version.go @@ -11,13 +11,15 @@ type versionSpec struct { var versions = map[string]versionSpec{ "next": versionSpec{ exclude: []string{ - "invalid/datetime/no-secs", // Times without seconds is no longer invalid. + "invalid/datetime/no-secs", // Times without seconds is no longer invalid. + "invalid/string/basic-byte-escapes", // \x is now valid. }, }, "1.0.0": versionSpec{ exclude: []string{ - "valid/string/escape-esc", // \e + "valid/string/escape-esc", // \e + "valid/string/hex-escape", "invalid/string/bad-hex-esc", // \x.. "valid/datetime/no-seconds", // Times without seconds }, }, diff --git a/lex.go b/lex.go index bf2f6ae8..000ccca0 100644 --- a/lex.go +++ b/lex.go @@ -851,6 +851,11 @@ func lexStringEscape(lx *lexer) stateFn { fallthrough case '\\': return lx.pop() + case 'x': + if !tomlNext { + return lx.error(errLexEscape{r}) + } + return lexHexEscape case 'u': return lexShortUnicodeEscape case 'U': @@ -859,6 +864,19 @@ func lexStringEscape(lx *lexer) stateFn { return lx.error(errLexEscape{r}) } +func lexHexEscape(lx *lexer) stateFn { + var r rune + for i := 0; i < 2; i++ { + r = lx.next() + if !isHexadecimal(r) { + return lx.errorf( + `expected two hexadecimal digits after '\x', but got %q instead`, + lx.current()) + } + } + return lx.pop() +} + func lexShortUnicodeEscape(lx *lexer) stateFn { var r rune for i := 0; i < 4; i++ { diff --git a/parse.go b/parse.go index 2ee22970..f9b98214 100644 --- a/parse.go +++ b/parse.go @@ -766,6 +766,12 @@ func (p *parser) replaceEscapes(it item, str string) string { case '\\': replaced = append(replaced, rune(0x005C)) r += 1 + case 'x': + if tomlNext { + escaped := p.asciiEscapeToUnicode(it, s[r+1:r+3]) + replaced = append(replaced, escaped) + r += 3 + } case 'u': // At this point, we know we have a Unicode escape of the form // `uXXXX` at [r, r+5). (Because the lexer guarantees this diff --git a/toml_test.go b/toml_test.go index 7664c4ad..a61b911f 100644 --- a/toml_test.go +++ b/toml_test.go @@ -255,7 +255,8 @@ func TestTomlNext(t *testing.T) { func TestTomlNextFails(t *testing.T) { runTomlTest(t, true, "valid/string/escape-esc", - "valid/datetime/no-seconds") + "valid/datetime/no-seconds", + "valid/string/hex-escape") } func runTomlTest(t *testing.T, includeNext bool, wantFail ...string) {