From 63514ce549149d96bff7e63c6aed3235961a8f0b Mon Sep 17 00:00:00 2001 From: Jason Lyle Date: Thu, 14 Mar 2024 19:49:45 -0400 Subject: [PATCH 1/4] Update grammar and associated tests --- .../util/grammar/SimpleSqlGrammar.jj | 29 +++++++++---------- .../util/grammar/SimpleSqlGrammarTest.groovy | 8 ++++- 2 files changed, 20 insertions(+), 17 deletions(-) diff --git a/liquibase-standard/src/main/javacc/liquibase/util/grammar/SimpleSqlGrammar.jj b/liquibase-standard/src/main/javacc/liquibase/util/grammar/SimpleSqlGrammar.jj index c6aedda354c..c9b87db61eb 100644 --- a/liquibase-standard/src/main/javacc/liquibase/util/grammar/SimpleSqlGrammar.jj +++ b/liquibase-standard/src/main/javacc/liquibase/util/grammar/SimpleSqlGrammar.jj @@ -57,36 +57,34 @@ TOKEN: TOKEN : /* Numeric Constants */ { < S_DOUBLE: (()? "." ( ["e","E"] (["+", "-"])? )? - | - "." (["e","E"] (["+", "-"])? )? - | - ["e","E"] (["+", "-"])? - )> - | < S_LONG: ( )+ > - | < #DIGIT: ["0" - "9"] > + | + "." (["e","E"] (["+", "-"])? )? + | + ["e","E"] (["+", "-"])? + )> + | < S_LONG: ( )+ > + | < #DIGIT: ["0" - "9"] > } TOKEN: { < COMPLEX_IDENTIFIER: ( | ) ((["\r","\n"," "])* "." (["\r","\n"," "])* ( | ))+ > -| < S_IDENTIFIER: ( | )+ ( | | | )* > -| < #LETTER: ["a"-"z", "A"-"Z", "_", "$"] > +| < S_IDENTIFIER: ( | | "$" | "_" ) ( | | | )* > +| < #LETTER: ["a"-"z", "A"-"Z"] > | < #SPECIAL_CHARS: "$" | "_" | "#" | "@" > | < #ESC_S_QUOTE_A: ( "''" ) > /* probably the closest to a universal standard */ -| < #ESC_S_QUOTE_B: ( "\\'" ) > /* Valid in Postgres and MySQL (if NO_BACKSLASH_ESCAPES not enabled), NOT valid in Oracle or MSSQL */ | < #ESC_D_QUOTE_A: ( "\"\"" ) > /* probably the 2nd closest to a universal standard */ -| < #ESC_D_QUOTE_B: ( "\\\"" ) > /* Valid in Postgres and MySQL (if NO_BACKSLASH_ESCAPES not enabled), NOT valid in Oracle or MSSQL */ -| < #ESC_NON_QUOTE: "\\" ["n","t","b","r","f","\\","0"] > /* SQL-standard is that string literals are delimited only by single-quote, and double-quotes are only for identifiers... */ -| < #S_QUOTED_STRING_HYBRID: ( "'" ( | | | | ~["\\","'"] )* ("'" | "\\'")) > +| < #S_QUOTED_STRING_HYBRID: ( "'" ( | | ~["\\","'"] )* ("'" | "\\'")) > /* ... but many DBs tolerate double-quotes around string literals, including MySQL (unless you enable ANSI SQL mode), and MSSQL (if you disable SET QUOTED_IDENTIFIER) */ -| < #D_QUOTED_STRING_HYBRID: ( "\"" ( | | | | ~["\\","\""] )* ("\"" | "\\\"")) > +| < #D_QUOTED_STRING_HYBRID: ( "\"" ( | | ~["\\","\""] )* ("\"" | "\\\"")) > +| < #ESC_ANY_CHAR: "\\" ~[] > // Matches any character following '\' /* Finally... */ | < S_CHAR_LITERAL: (["U","E","N","R","B"]|"RB"|"_utf8")? ( | ) > | < S_QUOTED_IDENTIFIER: "\"" (~["\n","\r","\""])+ "\"" | ("`" (~["\n","\r","`"])+ "`") | ( "[" ~["0"-"9","]"] (~["\n","\r","]"])* "]" ) > -| < EMPTY_QUOTE: "\"" "\""> +| < EMPTY_QUOTE: "\"" "\""> /* Built list from http://stackoverflow.com/a/37668315/45756 @@ -514,4 +512,3 @@ TOKEN: /* symbols */ "\u00A1"-"\uFF65" /* everything else */ ] > } - diff --git a/liquibase-standard/src/test/groovy/liquibase/util/grammar/SimpleSqlGrammarTest.groovy b/liquibase-standard/src/test/groovy/liquibase/util/grammar/SimpleSqlGrammarTest.groovy index 99b8f131968..be66ee0f6de 100644 --- a/liquibase-standard/src/test/groovy/liquibase/util/grammar/SimpleSqlGrammarTest.groovy +++ b/liquibase-standard/src/test/groovy/liquibase/util/grammar/SimpleSqlGrammarTest.groovy @@ -37,7 +37,7 @@ class SimpleSqlGrammarTest extends Specification { "mysql escaped quotes '\\''" | ["mysql", " ", "escaped", " ", "quotes", " ", "'\\''"] "invalid ' sql" | ["invalid", " ", "'", " ", "sql"] "'invalid' ' sql" | ["'invalid'", " ", "'", " ", "sql"] - "utf8-〠@chars works" | ["utf8", "-", "〠@chars", " ", "works"] + "utf8-〠@chars works" | ["utf8", "-", "〠@chars", " ", "works"] "single '\\' works" | ["single", " ", "'\\'", " ", "works"] "double '\\\\' works" | ["double", " ", "'\\\\'", " ", "works"] "unquoted \\\\ works" | ["unquoted", " ", "\\", "\\", " ", "works"] @@ -52,5 +52,11 @@ class SimpleSqlGrammarTest extends Specification { "This has a \\ and symbol ≤ (u2264)" | ["This", " ", "has", " ", "a", " ", "\\", " ", "and", " ", "symbol", " ", "≤", " ", "(", "u2264", ")"] "This ≤ (u2264) is before the \\" | ["This", " ", "≤", " ", "(", "u2264", ")", " ", "is", " ", "before", " ", "the", " ", "\\"] "This has an unicode char ÀÀÀÀÀÀ+++ãããioú≤₢" | ["This", " ", "has", " ", "an", " ", "unicode"," ", "char", " ", "ÀÀÀÀÀÀ", "+", "+", "+", "ãããioú", "≤", "₢"] + "select 'foo\\_bar' from sys.dual;" | ["select", " ", "'foo\\_bar'", " ", "from", " ", "sys.dual", ";"] + "select \"foo\\_bar\" from sys.dual;" | ["select", " ", "\"foo\\_bar\"", " ", "from", " ", "sys.dual", ";"] + "select 'foo\\sbar' from sys.dual;" | ["select", " ", "'foo\\sbar'", " ", "from", " ", "sys.dual", ";"] + "select \"foo\\sbar\" from sys.dual;" | ["select", " ", "\"foo\\sbar\"", " ", "from", " ", "sys.dual", ";"] + "select '' from sys.dual;" | ["select", " ", "''", " ", "from", " ", "sys.dual", ";"] + "select \"\" from sys.dual;" | ["select", " ", "\"\"", " ", "from", " ", "sys.dual", ";"] } } From d8f03852f40813743ffecfbd784706beb620a012 Mon Sep 17 00:00:00 2001 From: Jason Lyle Date: Fri, 15 Mar 2024 10:18:24 -0400 Subject: [PATCH 2/4] Grammar cleanup and commenting --- .../util/grammar/SimpleSqlGrammar.jj | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/liquibase-standard/src/main/javacc/liquibase/util/grammar/SimpleSqlGrammar.jj b/liquibase-standard/src/main/javacc/liquibase/util/grammar/SimpleSqlGrammar.jj index c9b87db61eb..c6becfd5820 100644 --- a/liquibase-standard/src/main/javacc/liquibase/util/grammar/SimpleSqlGrammar.jj +++ b/liquibase-standard/src/main/javacc/liquibase/util/grammar/SimpleSqlGrammar.jj @@ -69,17 +69,22 @@ TOKEN : /* Numeric Constants */ TOKEN: { < COMPLEX_IDENTIFIER: ( | ) ((["\r","\n"," "])* "." (["\r","\n"," "])* ( | ))+ > -| < S_IDENTIFIER: ( | | "$" | "_" ) ( | | | )* > +| < S_IDENTIFIER: ( | | ) ( | | | )* > | < #LETTER: ["a"-"z", "A"-"Z"] > -| < #SPECIAL_CHARS: "$" | "_" | "#" | "@" > +| < #FIRST_CHAR_SPECIAL_CHARS: "$" | "_" > +| < #SPECIAL_CHARS: | "#" | "@" > -| < #ESC_S_QUOTE_A: ( "''" ) > /* probably the closest to a universal standard */ -| < #ESC_D_QUOTE_A: ( "\"\"" ) > /* probably the 2nd closest to a universal standard */ +| < #ESC_S_QUOTE: ( "''" ) > /* probably the closest to a universal standard */ +| < #ESC_D_QUOTE: ( "\"\"" ) > /* probably the 2nd closest to a universal standard */ +| < #ESC_ANY_CHAR: "\\" ~[] > // Matches any character following '\' /* SQL-standard is that string literals are delimited only by single-quote, and double-quotes are only for identifiers... */ -| < #S_QUOTED_STRING_HYBRID: ( "'" ( | | ~["\\","'"] )* ("'" | "\\'")) > + /* + Negative match in hybrid string tokens looks for a single slash (i.e. "\\") so that slashes are matched + with the ESC_ANY_CHAR token rather than prematuraly ending a match + */ +| < #S_QUOTED_STRING_HYBRID: ( "'" ( | | ~["\\","'"] )* ("'" | "\\'")) > /* ... but many DBs tolerate double-quotes around string literals, including MySQL (unless you enable ANSI SQL mode), and MSSQL (if you disable SET QUOTED_IDENTIFIER) */ -| < #D_QUOTED_STRING_HYBRID: ( "\"" ( | | ~["\\","\""] )* ("\"" | "\\\"")) > -| < #ESC_ANY_CHAR: "\\" ~[] > // Matches any character following '\' +| < #D_QUOTED_STRING_HYBRID: ( "\"" ( | | ~["\\","\""] )* ("\"" | "\\\"")) > /* Finally... */ | < S_CHAR_LITERAL: (["U","E","N","R","B"]|"RB"|"_utf8")? ( | ) > From c40ac8704a41215810e6cccd5e39f95975129a1c Mon Sep 17 00:00:00 2001 From: Jason Lyle Date: Sat, 16 Mar 2024 11:05:41 -0400 Subject: [PATCH 3/4] Add test output to help with future debugging --- .../liquibase/util/grammar/SimpleSqlGrammarTest.groovy | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/liquibase-standard/src/test/groovy/liquibase/util/grammar/SimpleSqlGrammarTest.groovy b/liquibase-standard/src/test/groovy/liquibase/util/grammar/SimpleSqlGrammarTest.groovy index be66ee0f6de..3f21305a339 100644 --- a/liquibase-standard/src/test/groovy/liquibase/util/grammar/SimpleSqlGrammarTest.groovy +++ b/liquibase-standard/src/test/groovy/liquibase/util/grammar/SimpleSqlGrammarTest.groovy @@ -8,12 +8,15 @@ class SimpleSqlGrammarTest extends Specification { @Unroll def test() { when: - def tokenManager = new SimpleSqlGrammarTokenManager(new SimpleCharStream(new StringReader(input))); + def tokenManager = new SimpleSqlGrammarTokenManager(new SimpleCharStream(new StringReader(input))) def grammar = new SimpleSqlGrammar(tokenManager) def tokens = new ArrayList() Token token + System.out.println("----------------------------------------------------------------") + System.out.println("'" + input + "'") while ((token = grammar.getNextToken()).kind != SimpleSqlGrammarConstants.EOF) { + System.out.println(" " + String.format('%1$-32s', SimpleSqlGrammarConstants.tokenImage[token.kind]) + ": '" + token.toString() + "'") tokens.add(token.toString()) } From ba175bf138762ff320ad6841a4d52b096aa59a98 Mon Sep 17 00:00:00 2001 From: Daniel Mallorga Date: Mon, 18 Mar 2024 15:49:26 -0300 Subject: [PATCH 4/4] Two more test scenarios added, plus a formatting change. --- .../liquibase/util/grammar/SimpleSqlGrammarTest.groovy | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/liquibase-standard/src/test/groovy/liquibase/util/grammar/SimpleSqlGrammarTest.groovy b/liquibase-standard/src/test/groovy/liquibase/util/grammar/SimpleSqlGrammarTest.groovy index be66ee0f6de..0db95a5c5de 100644 --- a/liquibase-standard/src/test/groovy/liquibase/util/grammar/SimpleSqlGrammarTest.groovy +++ b/liquibase-standard/src/test/groovy/liquibase/util/grammar/SimpleSqlGrammarTest.groovy @@ -53,10 +53,12 @@ class SimpleSqlGrammarTest extends Specification { "This ≤ (u2264) is before the \\" | ["This", " ", "≤", " ", "(", "u2264", ")", " ", "is", " ", "before", " ", "the", " ", "\\"] "This has an unicode char ÀÀÀÀÀÀ+++ãããioú≤₢" | ["This", " ", "has", " ", "an", " ", "unicode"," ", "char", " ", "ÀÀÀÀÀÀ", "+", "+", "+", "ãããioú", "≤", "₢"] "select 'foo\\_bar' from sys.dual;" | ["select", " ", "'foo\\_bar'", " ", "from", " ", "sys.dual", ";"] - "select \"foo\\_bar\" from sys.dual;" | ["select", " ", "\"foo\\_bar\"", " ", "from", " ", "sys.dual", ";"] + "select \"foo\\_bar\" from sys.dual;" | ["select", " ", "\"foo\\_bar\"", " ", "from", " ", "sys.dual", ";"] "select 'foo\\sbar' from sys.dual;" | ["select", " ", "'foo\\sbar'", " ", "from", " ", "sys.dual", ";"] - "select \"foo\\sbar\" from sys.dual;" | ["select", " ", "\"foo\\sbar\"", " ", "from", " ", "sys.dual", ";"] + "select \"foo\\sbar\" from sys.dual;" | ["select", " ", "\"foo\\sbar\"", " ", "from", " ", "sys.dual", ";"] "select '' from sys.dual;" | ["select", " ", "''", " ", "from", " ", "sys.dual", ";"] "select \"\" from sys.dual;" | ["select", " ", "\"\"", " ", "from", " ", "sys.dual", ";"] + "select q'~;\\~' from sys.dual;" | ["select", " ", "q", "'~;\\~'", " ", "from", " ", "sys.dual", ";"] + "select q'{\\\n;\n\\}' from sys.dual;" | ["select", " ", "q", "'{\\\n;\n\\}'", " ", "from", " ", "sys.dual", ";"] } }