Skip to content

Commit

Permalink
Merge pull request #377 from BurntSushi/hex-esc
Browse files Browse the repository at this point in the history
Support \x.. escapes
  • Loading branch information
arp242 authored Jan 15, 2023
2 parents 98e0a36 + b711272 commit 69d7903
Show file tree
Hide file tree
Showing 12 changed files with 94 additions and 3 deletions.
1 change: 1 addition & 0 deletions internal/toml-test/tests/invalid/string/bad-hex-esc-1.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
bad-hex-esc-1 = "\x0g"
1 change: 1 addition & 0 deletions internal/toml-test/tests/invalid/string/bad-hex-esc-2.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
bad-hex-esc-2 = "\xG0"
1 change: 1 addition & 0 deletions internal/toml-test/tests/invalid/string/bad-hex-esc-3.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
bad-hex-esc-3 = "\x"
1 change: 1 addition & 0 deletions internal/toml-test/tests/invalid/string/bad-hex-esc-4.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
bad-hex-esc-4 = "\x 50"
1 change: 1 addition & 0 deletions internal/toml-test/tests/invalid/string/bad-hex-esc-5.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
bad-hex-esc-5 = "\x 50"
4 changes: 4 additions & 0 deletions internal/toml-test/tests/invalid/string/bad-hex-esc.multi
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
bad-hex-esc-1 = "\x0g"
bad-hex-esc-2 = "\xG0"
bad-hex-esc-3 = "\x"
bad-hex-esc-4 = "\x 50"
34 changes: 34 additions & 0 deletions internal/toml-test/tests/valid/string/hex-escape.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
{
"bs": {
"type": "string",
"value": ""
},
"hello": {
"type": "string",
"value": "hello\n"
},
"higher-than-127": {
"type": "string",
"value": "Sørmirbæren"
},
"literal": {
"type": "string",
"value": "\\x20 \\x09 \\x0d\\x0a"
},
"multiline": {
"type": "string",
"value": " \t \u001b \r\n\n\n\u0000\nhello\n\nSørmirbæren\n"
},
"multiline-literal": {
"type": "string",
"value": "\\x20 \\x09 \\x0d\\x0a\n"
},
"nul": {
"type": "string",
"value": "\u0000"
},
"whitespace": {
"type": "string",
"value": " \t \u001b \r\n"
}
}
21 changes: 21 additions & 0 deletions internal/toml-test/tests/valid/string/hex-escape.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
# \x for the first 255 codepoints

whitespace = "\x20 \x09 \x1b \x0d\x0a"
bs = "\x7f"
nul = "\x00"
hello = "\x68\x65\x6c\x6c\x6f\x0a"
higher-than-127 = "S\xf8rmirb\xe6ren"

multiline = """
\x20 \x09 \x1b \x0d\x0a
\x7f
\x00
\x68\x65\x6c\x6c\x6f\x0a
\x53\xF8\x72\x6D\x69\x72\x62\xE6\x72\x65\x6E
"""

# Not inside literals.
literal = '\x20 \x09 \x0d\x0a'
multiline-literal = '''
\x20 \x09 \x0d\x0a
'''
6 changes: 4 additions & 2 deletions internal/toml-test/version.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,13 +11,15 @@ type versionSpec struct {
var versions = map[string]versionSpec{
"next": versionSpec{
exclude: []string{
"invalid/datetime/no-secs", // Times without seconds is no longer invalid.
"invalid/datetime/no-secs", // Times without seconds is no longer invalid.
"invalid/string/basic-byte-escapes", // \x is now valid.
},
},

"1.0.0": versionSpec{
exclude: []string{
"valid/string/escape-esc", // \e
"valid/string/escape-esc", // \e
"valid/string/hex-escape", "invalid/string/bad-hex-esc", // \x..
"valid/datetime/no-seconds", // Times without seconds
},
},
Expand Down
18 changes: 18 additions & 0 deletions lex.go
Original file line number Diff line number Diff line change
Expand Up @@ -851,6 +851,11 @@ func lexStringEscape(lx *lexer) stateFn {
fallthrough
case '\\':
return lx.pop()
case 'x':
if !tomlNext {
return lx.error(errLexEscape{r})
}
return lexHexEscape
case 'u':
return lexShortUnicodeEscape
case 'U':
Expand All @@ -859,6 +864,19 @@ func lexStringEscape(lx *lexer) stateFn {
return lx.error(errLexEscape{r})
}

func lexHexEscape(lx *lexer) stateFn {
var r rune
for i := 0; i < 2; i++ {
r = lx.next()
if !isHexadecimal(r) {
return lx.errorf(
`expected two hexadecimal digits after '\x', but got %q instead`,
lx.current())
}
}
return lx.pop()
}

func lexShortUnicodeEscape(lx *lexer) stateFn {
var r rune
for i := 0; i < 4; i++ {
Expand Down
6 changes: 6 additions & 0 deletions parse.go
Original file line number Diff line number Diff line change
Expand Up @@ -766,6 +766,12 @@ func (p *parser) replaceEscapes(it item, str string) string {
case '\\':
replaced = append(replaced, rune(0x005C))
r += 1
case 'x':
if tomlNext {
escaped := p.asciiEscapeToUnicode(it, s[r+1:r+3])
replaced = append(replaced, escaped)
r += 3
}
case 'u':
// At this point, we know we have a Unicode escape of the form
// `uXXXX` at [r, r+5). (Because the lexer guarantees this
Expand Down
3 changes: 2 additions & 1 deletion toml_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -255,7 +255,8 @@ func TestTomlNext(t *testing.T) {
func TestTomlNextFails(t *testing.T) {
runTomlTest(t, true,
"valid/string/escape-esc",
"valid/datetime/no-seconds")
"valid/datetime/no-seconds",
"valid/string/hex-escape")
}

func runTomlTest(t *testing.T, includeNext bool, wantFail ...string) {
Expand Down

0 comments on commit 69d7903

Please sign in to comment.