Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ProcessWord support UTF-8 #17055

Merged
merged 1 commit into from
Oct 21, 2015
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
ProcessWord support UTF-8
modified PorcessWord to working normally for UTF-8 strings and added test cases

Signed-off-by: Daehyeok Mun <daehyeok@gmail.com>
  • Loading branch information
daehyeok committed Oct 21, 2015
commit bb79b7eb9e1db83fd80121b088aa2e5a4c084ace
94 changes: 44 additions & 50 deletions builder/dockerfile/shell_parser.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,13 +9,15 @@ package dockerfile
import (
"fmt"
"strings"
"text/scanner"
"unicode"
)

type shellWord struct {
word string
envs []string
pos int
word string
scanner scanner.Scanner
envs []string
pos int
}

// ProcessWord will use the 'env' list of environment variables,
Expand All @@ -26,11 +28,12 @@ func ProcessWord(word string, env []string) (string, error) {
envs: env,
pos: 0,
}
sw.scanner.Init(strings.NewReader(word))
return sw.process()
}

func (sw *shellWord) process() (string, error) {
return sw.processStopOn('\000')
return sw.processStopOn(scanner.EOF)
}

// Process the word, starting at 'pos', and stop when we get to the
Expand All @@ -43,10 +46,11 @@ func (sw *shellWord) processStopOn(stopChar rune) (string, error) {
'$': sw.processDollar,
}

for sw.pos < len(sw.word) {
ch := sw.peek()
if stopChar != '\000' && ch == stopChar {
sw.next()
for sw.scanner.Peek() != scanner.EOF {
ch := sw.scanner.Peek()

if stopChar != scanner.EOF && ch == stopChar {
sw.scanner.Next()
break
}
if fn, ok := charFuncMapping[ch]; ok {
Expand All @@ -58,51 +62,41 @@ func (sw *shellWord) processStopOn(stopChar rune) (string, error) {
result += tmp
} else {
// Not special, just add it to the result
ch = sw.next()
ch = sw.scanner.Next()

if ch == '\\' {
// '\' escapes, except end of line
ch = sw.next()
if ch == '\000' {
continue

ch = sw.scanner.Next()

if ch == scanner.EOF {
break
}

}

result += string(ch)
}
}

return result, nil
}

func (sw *shellWord) peek() rune {
if sw.pos == len(sw.word) {
return '\000'
}
return rune(sw.word[sw.pos])
}

func (sw *shellWord) next() rune {
if sw.pos == len(sw.word) {
return '\000'
}
ch := rune(sw.word[sw.pos])
sw.pos++
return ch
}

func (sw *shellWord) processSingleQuote() (string, error) {
// All chars between single quotes are taken as-is
// Note, you can't escape '
var result string

sw.next()
sw.scanner.Next()

for {
ch := sw.next()
if ch == '\000' || ch == '\'' {
ch := sw.scanner.Next()
if ch == '\'' || ch == scanner.EOF {
break
}
result += string(ch)
}

return result, nil
}

Expand All @@ -111,12 +105,12 @@ func (sw *shellWord) processDoubleQuote() (string, error) {
// But you can escape " with a \
var result string

sw.next()
sw.scanner.Next()

for sw.pos < len(sw.word) {
ch := sw.peek()
for sw.scanner.Peek() != scanner.EOF {
ch := sw.scanner.Peek()
if ch == '"' {
sw.next()
sw.scanner.Next()
break
}
if ch == '$' {
Expand All @@ -126,18 +120,18 @@ func (sw *shellWord) processDoubleQuote() (string, error) {
}
result += tmp
} else {
ch = sw.next()
ch = sw.scanner.Next()
if ch == '\\' {
chNext := sw.peek()
chNext := sw.scanner.Peek()

if chNext == '\000' {
if chNext == scanner.EOF {
// Ignore \ at end of word
continue
}

if chNext == '"' || chNext == '$' {
// \" and \$ can be escaped, all other \'s are left as-is
ch = sw.next()
ch = sw.scanner.Next()
}
}
result += string(ch)
Expand All @@ -148,23 +142,23 @@ func (sw *shellWord) processDoubleQuote() (string, error) {
}

func (sw *shellWord) processDollar() (string, error) {
sw.next()
ch := sw.peek()
sw.scanner.Next()
ch := sw.scanner.Peek()
if ch == '{' {
sw.next()
sw.scanner.Next()
name := sw.processName()
ch = sw.peek()
ch = sw.scanner.Peek()
if ch == '}' {
// Normal ${xx} case
sw.next()
sw.scanner.Next()
return sw.getEnv(name), nil
}
if ch == ':' {
// Special ${xx:...} format processing
// Yes it allows for recursive $'s in the ... spot

sw.next() // skip over :
modifier := sw.next()
sw.scanner.Next() // skip over :
modifier := sw.scanner.Next()

word, err := sw.processStopOn('}')
if err != nil {
Expand Down Expand Up @@ -207,16 +201,16 @@ func (sw *shellWord) processName() string {
// If it starts with a numeric then just return $#
var name string

for sw.pos < len(sw.word) {
ch := sw.peek()
for sw.scanner.Peek() != scanner.EOF {
ch := sw.scanner.Peek()
if len(name) == 0 && unicode.IsDigit(ch) {
ch = sw.next()
ch = sw.scanner.Next()
return string(ch)
}
if !unicode.IsLetter(ch) && !unicode.IsDigit(ch) && ch != '_' {
break
}
ch = sw.next()
ch = sw.scanner.Next()
name += string(ch)
}

Expand Down
2 changes: 1 addition & 1 deletion builder/dockerfile/shell_parser_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ func TestShellParser(t *testing.T) {
defer file.Close()

scanner := bufio.NewScanner(file)
envs := []string{"PWD=/home", "SHELL=bash"}
envs := []string{"PWD=/home", "SHELL=bash", "KOREAN=한국어"}
for scanner.Scan() {
line := scanner.Text()

Expand Down
54 changes: 54 additions & 0 deletions builder/dockerfile/words
Original file line number Diff line number Diff line change
Expand Up @@ -56,3 +56,57 @@ he${PWD:=000}xx | error
he${PWD:+${PWD}:}xx | he/home:xx
he${XXX:-\$PWD:}xx | he$PWD:xx
he${XXX:-\${PWD}z}xx | he${PWDz}xx
안녕하세요 | 안녕하세요
안'녕'하세요 | 안녕하세요
안'녕하세요 | 안녕하세요
안녕\'하세요 | 안녕'하세요
안\\'녕하세요 | 안\녕하세요
안녕\t하세요 | 안녕t하세요
"안녕\t하세요" | 안녕\t하세요
'안녕\t하세요 | 안녕\t하세요
안녕하세요\ | 안녕하세요
안녕하세요\\ | 안녕하세요\
"안녕하세요 | 안녕하세요
"안녕하세요\" | 안녕하세요"
"안녕'하세요" | 안녕'하세요
'안녕하세요 | 안녕하세요
'안녕하세요\' | 안녕하세요\
안녕$1x | 안녕x
안녕$.x | 안녕$.x
안녕$pwd. | 안녕.
안녕$PWD | 안녕/home
안녕\$PWD | 안녕$PWD
안녕\\$PWD | 안녕\/home
안녕\${} | 안녕${}
안녕\${}xx | 안녕${}xx
안녕${} | 안녕
안녕${}xx | 안녕xx
안녕${hi} | 안녕
안녕${hi}xx | 안녕xx
안녕${PWD} | 안녕/home
안녕${.} | error
안녕${XXX:-000}xx | 안녕000xx
안녕${PWD:-000}xx | 안녕/homexx
안녕${XXX:-$PWD}xx | 안녕/homexx
안녕${XXX:-${PWD:-yyy}}xx | 안녕/homexx
안녕${XXX:-${YYY:-yyy}}xx | 안녕yyyxx
안녕${XXX:YYY} | error
안녕${XXX:+${PWD}}xx | 안녕xx
안녕${PWD:+${XXX}}xx | 안녕xx
안녕${PWD:+${SHELL}}xx | 안녕bashxx
안녕${XXX:+000}xx | 안녕xx
안녕${PWD:+000}xx | 안녕000xx
'안녕${XX}' | 안녕${XX}
"안녕${PWD}" | 안녕/home
"안녕'$PWD'" | 안녕'/home'
'"안녕"' | "안녕"
안녕\$PWD | 안녕$PWD
"안녕\$PWD" | 안녕$PWD
'안녕\$PWD' | 안녕\$PWD
안녕${PWD | error
안녕${PWD:=000}xx | error
안녕${PWD:+${PWD}:}xx | 안녕/home:xx
안녕${XXX:-\$PWD:}xx | 안녕$PWD:xx
안녕${XXX:-\${PWD}z}xx | 안녕${PWDz}xx
$KOREAN | 한국어
안녕$KOREAN | 안녕한국어