Skip to content

Commit

Permalink
Support escape sequences in strings and add raw strings.
Browse files Browse the repository at this point in the history
This adapts some functionality from the Go standard library for string
literal lexing and unquoting/unescaping.

The following string types are now supported:

Double- or single-quoted strings:

  These support all escape sequences that Go supports in double-quoted
  string literals. The difference is that Prometheus also has
  single-quoted strings (instead of single-quoted runes in Go). Raw
  newlines are not allowed.

Backtick-quoted raw strings:

  Strings quoted in backticks are treated as raw strings just like in Go
  and may contain raw newlines and other special characters directly.

Fixes prometheus#1122
Fixes prometheus#1121
  • Loading branch information
juliusv committed Oct 8, 2015
1 parent 7e86cd1 commit 46c5260
Show file tree
Hide file tree
Showing 6 changed files with 161 additions and 30 deletions.
93 changes: 89 additions & 4 deletions promql/lex.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ package promql
import (
"fmt"
"strings"
"unicode"
"unicode/utf8"
)

Expand Down Expand Up @@ -465,6 +466,9 @@ func lexStatements(l *lexer) stateFn {
case r == '"' || r == '\'':
l.stringOpen = r
return lexString
case r == '`':
l.stringOpen = r
return lexRawString
case isAlpha(r) || r == ':':
l.backup()
return lexKeywordOrIdentifier
Expand Down Expand Up @@ -523,6 +527,9 @@ func lexInsideBraces(l *lexer) stateFn {
case r == '"' || r == '\'':
l.stringOpen = r
return lexString
case r == '`':
l.stringOpen = r
return lexRawString
case r == '=':
if l.next() == '~' {
l.emit(itemEQLRegex)
Expand Down Expand Up @@ -583,16 +590,79 @@ func lexValueSequence(l *lexer) stateFn {
return lexValueSequence
}

// lexEscape scans a string escape sequence. The initial escaping character (\)
// has already been seen.
//
// NOTE: This function as well as the helper function digitVal() and associated
// tests have been adapted from the corresponding functions in the "go/scanner"
// package of the Go standard library to work for Prometheus-style strings.
// None of the actual escaping/quoting logic was changed in this function - it
// was only modified to integrate with our lexer.
func lexEscape(l *lexer) {
var n int
var base, max uint32

ch := l.next()
switch ch {
case 'a', 'b', 'f', 'n', 'r', 't', 'v', '\\', l.stringOpen:
return
case '0', '1', '2', '3', '4', '5', '6', '7':
n, base, max = 3, 8, 255
case 'x':
ch = l.next()
n, base, max = 2, 16, 255
case 'u':
ch = l.next()
n, base, max = 4, 16, unicode.MaxRune
case 'U':
ch = l.next()
n, base, max = 8, 16, unicode.MaxRune
case eof:
l.errorf("escape sequence not terminated")
default:
l.errorf("unknown escape sequence %#U", ch)
}

var x uint32
for n > 0 {
d := uint32(digitVal(ch))
if d >= base {
if ch == eof {
l.errorf("escape sequence not terminated")
}
l.errorf("illegal character %#U in escape sequence", ch)
}
x = x*base + d
ch = l.next()
n--
}

if x > max || 0xD800 <= x && x < 0xE000 {
l.errorf("escape sequence is an invalid Unicode code point")
}
}

// digitVal returns the digit value of a rune or 16 in case the rune does not
// represent a valid digit.
func digitVal(ch rune) int {
switch {
case '0' <= ch && ch <= '9':
return int(ch - '0')
case 'a' <= ch && ch <= 'f':
return int(ch - 'a' + 10)
case 'A' <= ch && ch <= 'F':
return int(ch - 'A' + 10)
}
return 16 // Larger than any legal digit val.
}

// lexString scans a quoted string. The initial quote has already been seen.
func lexString(l *lexer) stateFn {
Loop:
for {
switch l.next() {
case '\\':
if r := l.next(); r != eof && r != '\n' {
break
}
fallthrough
lexEscape(l)
case eof, '\n':
return l.errorf("unterminated quoted string")
case l.stringOpen:
Expand All @@ -603,6 +673,21 @@ Loop:
return lexStatements
}

// lexRawString scans a raw quoted string. The initial quote has already been seen.
func lexRawString(l *lexer) stateFn {
Loop:
for {
switch l.next() {
case eof:
return l.errorf("unterminated raw string")
case l.stringOpen:
break Loop
}
}
l.emit(itemString)
return lexStatements
}

// lexSpace scans a run of space characters. One space has already been seen.
func lexSpace(l *lexer) stateFn {
for isSpace(l.peek()) {
Expand Down
34 changes: 15 additions & 19 deletions promql/parse.go
Original file line number Diff line number Diff line change
Expand Up @@ -43,9 +43,9 @@ type ParseErr struct {

func (e *ParseErr) Error() string {
if e.Line == 0 {
return fmt.Sprintf("Parse error at char %d: %s", e.Pos, e.Err)
return fmt.Sprintf("parse error at char %d: %s", e.Pos, e.Err)
}
return fmt.Sprintf("Parse error at line %d, char %d: %s", e.Line, e.Pos, e.Err)
return fmt.Sprintf("parse error at line %d, char %d: %s", e.Line, e.Pos, e.Err)
}

// ParseStmts parses the input and returns the resulting statements or any ocurring error.
Expand Down Expand Up @@ -401,21 +401,21 @@ Loop:
p.errorf("summary must not be defined twice")
}
hasSum = true
sum = trimOne(p.expect(itemString, ctx).val)
sum = p.unquoteString(p.expect(itemString, ctx).val)

case itemDescription:
if hasDesc {
p.errorf("description must not be defined twice")
}
hasDesc = true
desc = trimOne(p.expect(itemString, ctx).val)
desc = p.unquoteString(p.expect(itemString, ctx).val)

case itemRunbook:
if hasRunbook {
p.errorf("runbook must not be defined twice")
}
hasRunbook = true
runbook = trimOne(p.expect(itemString, ctx).val)
runbook = p.unquoteString(p.expect(itemString, ctx).val)

default:
p.backup()
Expand Down Expand Up @@ -654,8 +654,7 @@ func (p *parser) primaryExpr() Expr {
return &NumberLiteral{model.SampleValue(f)}

case t.typ == itemString:
s := t.val[1 : len(t.val)-1]
return &StringLiteral{s}
return &StringLiteral{p.unquoteString(t.val)}

case t.typ == itemLeftBrace:
// Metric selector without metric name.
Expand Down Expand Up @@ -843,7 +842,7 @@ func (p *parser) labelMatchers(operators ...itemType) metric.LabelMatchers {
p.errorf("operator must be one of %q, is %q", operators, op)
}

val := trimOne(p.expect(itemString, ctx).val)
val := p.unquoteString(p.expect(itemString, ctx).val)

// Map the item to the respective match type.
var matchType metric.MatchType
Expand Down Expand Up @@ -1104,6 +1103,14 @@ func (p *parser) checkType(node Node) (typ model.ValueType) {
return
}

func (p *parser) unquoteString(s string) string {
unquoted, err := strutil.Unquote(s)
if err != nil {
p.errorf("error unquoting string %q: %s", s, err)
}
return unquoted
}

func parseDuration(ds string) (time.Duration, error) {
dur, err := strutil.StringToDuration(ds)
if err != nil {
Expand All @@ -1114,14 +1121,3 @@ func parseDuration(ds string) (time.Duration, error) {
}
return dur, nil
}

// trimOne removes the first and last character from a string.
func trimOne(s string) string {
if len(s) > 0 {
s = s[1:]
}
if len(s) > 0 {
s = s[:len(s)-1]
}
return s
}
48 changes: 48 additions & 0 deletions promql/parse_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1016,6 +1016,54 @@ var testExpr = []struct {
fail: true,
errMsg: `no valid expression found`,
},
// String quoting and escape sequence interpretation tests.
{
input: `"double-quoted string \" with escaped quote"`,
expected: &StringLiteral{
Val: "double-quoted string \" with escaped quote",
},
}, {
input: `'single-quoted string \' with escaped quote'`,
expected: &StringLiteral{
Val: "single-quoted string ' with escaped quote",
},
}, {
input: "`backtick-quoted string`",
expected: &StringLiteral{
Val: "backtick-quoted string",
},
}, {
input: `"\a\b\f\n\r\t\v\\\" - \xFF\377\u1234\U00010111\U0001011111☺"`,
expected: &StringLiteral{
Val: "\a\b\f\n\r\t\v\\\" - \xFF\377\u1234\U00010111\U0001011111☺",
},
}, {
input: `'\a\b\f\n\r\t\v\\\' - \xFF\377\u1234\U00010111\U0001011111☺'`,
expected: &StringLiteral{
Val: "\a\b\f\n\r\t\v\\' - \xFF\377\u1234\U00010111\U0001011111☺",
},
}, {
input: "`" + `\a\b\f\n\r\t\v\\\"\' - \xFF\377\u1234\U00010111\U0001011111☺` + "`",
expected: &StringLiteral{
Val: `\a\b\f\n\r\t\v\\\"\' - \xFF\377\u1234\U00010111\U0001011111☺`,
},
}, {
input: "`\\``",
fail: true,
errMsg: "could not parse remaining input",
}, {
input: `"\`,
fail: true,
errMsg: "escape sequence not terminated",
}, {
input: `"\c"`,
fail: true,
errMsg: "unknown escape sequence U+0063 'c'",
}, {
input: `"\x."`,
fail: true,
errMsg: "illegal character U+002E '.' in escape sequence",
},
}

func TestParseExpressions(t *testing.T) {
Expand Down
10 changes: 6 additions & 4 deletions util/strutil/quote.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,9 @@ var ErrSyntax = errors.New("invalid syntax")
// NOTE: This function as well as the necessary helper functions below
// (unquoteChar, contains, unhex) and associated tests have been adapted from
// the corresponding functions in the "strconv" package of the Go standard
// library to work for Prometheus-style strings.
// library to work for Prometheus-style strings. Go's special-casing for single
// quotes was removed and single quoted strings are now treated the same as
// double quoted ones.
func Unquote(s string) (t string, err error) {
n := len(s)
if n < 2 {
Expand Down Expand Up @@ -103,7 +105,7 @@ func unquoteChar(s string, quote byte) (value rune, multibyte bool, tail string,
return rune(s[0]), false, s[1:], nil
}

// hard case: c is backslash
// Hard case: c is backslash.
if len(s) <= 1 {
err = ErrSyntax
return
Expand Down Expand Up @@ -151,7 +153,7 @@ func unquoteChar(s string, quote byte) (value rune, multibyte bool, tail string,
}
s = s[n:]
if c == 'x' {
// single-byte string, possibly not UTF-8
// Single-byte string, possibly not UTF-8.
value = v
break
}
Expand All @@ -167,7 +169,7 @@ func unquoteChar(s string, quote byte) (value rune, multibyte bool, tail string,
err = ErrSyntax
return
}
for j := 0; j < 2; j++ { // one digit already; two more
for j := 0; j < 2; j++ { // One digit already; two more.
x := rune(s[j]) - '0'
if x < 0 || x > 7 {
err = ErrSyntax
Expand Down
2 changes: 1 addition & 1 deletion util/strutil/quote_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -110,7 +110,7 @@ func TestUnquote(t *testing.T) {
}
}

// run the quote tests too, backward
// Run the quote tests too, backward.
for _, tt := range quotetests {
if in, err := Unquote(tt.out); in != tt.in {
t.Errorf("Unquote(%#q) = %q, %v, want %q, nil", tt.out, in, err, tt.in)
Expand Down
4 changes: 2 additions & 2 deletions web/api/legacy/api_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ func TestQuery(t *testing.T) {
{
queryStr: "",
status: http.StatusOK,
bodyRe: `{"type":"error","value":"Parse error at char 1: no expression found in input","version":1}`,
bodyRe: `{"type":"error","value":"parse error at char 1: no expression found in input","version":1}`,
},
{
queryStr: "expr=1.4",
Expand Down Expand Up @@ -83,7 +83,7 @@ func TestQuery(t *testing.T) {
{
queryStr: "expr=(badexpression",
status: http.StatusOK,
bodyRe: `{"type":"error","value":"Parse error at char 15: unclosed left parenthesis","version":1}`,
bodyRe: `{"type":"error","value":"parse error at char 15: unclosed left parenthesis","version":1}`,
},
}

Expand Down

0 comments on commit 46c5260

Please sign in to comment.