Added DELIMITER support to batch input

shinroo · Mar 17, 2021 · ff7bee1 · ff7bee1
1 parent 0983c60
commit ff7bee1
Showing 4 changed files with 103 additions and 23 deletions.
diff --git a/.github/workflows/ci-check-repo.yaml b/.github/workflows/ci-check-repo.yaml
@@ -6,7 +6,7 @@ on:
 
 jobs:
   verify:
-    name: Verify format and commiters
+    name: Verify format and committers
     runs-on: ubuntu-18.04
     steps:
       - name: Setup Go 1.x

diff --git a/go/cmd/dolt/commands/sql.go b/go/cmd/dolt/commands/sql.go
@@ -94,6 +94,8 @@ const (
 # "exit" or "quit" (or Ctrl-D) to exit.`
 )
 
+var delimiterRegex = regexp.MustCompile(`(?i)^\s*DELIMITER\s+(\S+)\s*(\s+\S+\s*)?$`)
+
 type SqlCmd struct {
 	VersionStr string
 }
@@ -557,12 +559,20 @@ func runBatchMode(ctx *sql.Context, se *sqlEngine, input io.Reader) error {
 		if len(query) == 0 || query == "\n" {
 			continue
 		}
-		if err := processBatchQuery(ctx, query, se); err != nil {
-			// TODO: this line number will not be accurate for errors that occur when flushing a batch of inserts (as opposed
-			//  to processing the query)
-			verr := formatQueryError(fmt.Sprintf("error on line %d for query %s", scanner.statementStartLine, query), err)
-			cli.PrintErrln(verr.Verbose())
-			return err
+		shouldProcessQuery := true
+		if matches := delimiterRegex.FindStringSubmatch(query); len(matches) == 3 {
+			// If we don't match from anything, then we just pass to the SQL engine and let it complain.
+			scanner.Delimiter = matches[1]
+			shouldProcessQuery = false
+		}
+		if shouldProcessQuery {
+			if err := processBatchQuery(ctx, query, se); err != nil {
+				// TODO: this line number will not be accurate for errors that occur when flushing a batch of inserts (as opposed
+				//  to processing the query)
+				verr := formatQueryError(fmt.Sprintf("error on line %d for query %s", scanner.statementStartLine, query), err)
+				cli.PrintErrln(verr.Verbose())
+				return err
+			}
 		}
 		query = ""
 	}
@@ -628,7 +638,6 @@ func runShell(ctx *sql.Context, se *sqlEngine, mrEnv env.MultiRepoEnv, initialRo
 		}
 	})
 
-	delimiterRegex := regexp.MustCompile(`(?i)^DELIMITER\s+(\S+)\s+\S+\s*$`)
 	var returnedVerr errhand.VerboseError = nil // Verr that cannot be just printed but needs to be returned.
 	shell.Uninterpreted(func(c *ishell.Context) {
 		query := c.Args[0]
@@ -649,7 +658,7 @@ func runShell(ctx *sql.Context, se *sqlEngine, mrEnv env.MultiRepoEnv, initialRo
 
 		shouldProcessQuery := true
 		//TODO: Handle comments and enforce the current line terminator
-		if matches := delimiterRegex.FindStringSubmatch(query); len(matches) == 2 {
+		if matches := delimiterRegex.FindStringSubmatch(query); len(matches) == 3 {
 			// If we don't match from anything, then we just pass to the SQL engine and let it complain.
 			shell.SetLineTerminator(matches[1])
 			shouldProcessQuery = false

diff --git a/go/cmd/dolt/commands/sql_statement_scanner.go b/go/cmd/dolt/commands/sql_statement_scanner.go
@@ -25,6 +25,7 @@ type statementScanner struct {
 	statementStartLine int // the line number of the first line of the last parsed statement
 	startLineNum       int // the line number we began parsing the most recent token at
 	lineNum            int // the current line number being parsed
+	Delimiter          string
 }
 
 const maxStatementBufferBytes = 100 * 1024 * 1024
@@ -36,8 +37,9 @@ func NewSqlStatementScanner(input io.Reader) *statementScanner {
 	scanner.Buffer(buf, maxStatementBufferBytes)
 
 	s := &statementScanner{
-		Scanner: scanner,
-		lineNum: 1,
+		Scanner:   scanner,
+		lineNum:   1,
+		Delimiter: ";",
 	}
 	scanner.Split(s.scanStatements)
 
@@ -58,33 +60,42 @@ func (s *statementScanner) scanStatements(data []byte, atEOF bool) (advance int,
 	}
 
 	var (
-		quoteChar                 byte // the opening quote character of the current quote being parsed, or 0 if the current parse location isn't inside a quoted string
-		lastChar                  byte // the last character parsed
-		ignoreNextChar            bool // whether to ignore the next character
-		numConsecutiveBackslashes int  // the number of consecutive backslashes encountered
-		seenNonWhitespaceChar     bool // whether we have encountered a non-whitespace character since we returned the last token
+		quoteChar                      byte // the opening quote character of the current quote being parsed, or 0 if the current parse location isn't inside a quoted string
+		lastChar                       byte // the last character parsed
+		ignoreNextChar                 bool // whether to ignore the next character
+		numConsecutiveBackslashes      int  // the number of consecutive backslashes encountered
+		seenNonWhitespaceChar          bool // whether we have encountered a non-whitespace character since we returned the last token
+		numConsecutiveDelimiterMatches int  // the consecutive number of characters that have been matched to the delimiter
 	)
 
 	s.startLineNum = s.lineNum
 
-	for i := range data {
+	for i := 0; i < len(data); i++ {
 		if !ignoreNextChar {
 			// this doesn't handle unicode characters correctly and will break on some things, but it's only used for line
 			// number reporting.
 			if !seenNonWhitespaceChar && !unicode.IsSpace(rune(data[i])) {
 				seenNonWhitespaceChar = true
 				s.statementStartLine = s.lineNum
 			}
+			// check if we've matched the delimiter string
+			if quoteChar == 0 && data[i] == s.Delimiter[numConsecutiveDelimiterMatches] {
+				numConsecutiveDelimiterMatches++
+				if numConsecutiveDelimiterMatches == len(s.Delimiter) {
+					s.startLineNum = s.lineNum
+					_, _, _ = s.resetState()
+					removalLength := len(s.Delimiter) - 1 // We remove the delimiter so it depends on the length
+					return i + 1, data[0 : i-removalLength], nil
+				}
+				lastChar = data[i]
+				continue
+			} else {
+				numConsecutiveDelimiterMatches = 0
+			}
 
 			switch data[i] {
 			case '\n':
 				s.lineNum++
-			case ';':
-				if quoteChar == 0 {
-					s.startLineNum = s.lineNum
-					_, _, _ = s.resetState()
-					return i + 1, data[0:i], nil
-				}
 			case backslash:
 				numConsecutiveBackslashes++
 			case sQuote, dQuote, backtick:

diff --git a/integration-tests/bats/sql.bats b/integration-tests/bats/sql.bats
@@ -792,6 +792,66 @@ SQL
     rm -rf doltsql
 }
 
+@test "sql: batch delimiter" {
+    dolt sql <<SQL
+DELIMITER // ;
+CREATE TABLE test (
+  pk BIGINT PRIMARY KEY,
+  v1 BIGINT,
+  v2 BIGINT
+)//
+INSERT INTO test VALUES (1, 1, 1) //
+DELIMITER $ //
+INSERT INTO test VALUES (2, 2, 2)$ $
+CREATE PROCEDURE p1(x BIGINT)
+BEGIN
+  IF x < 10 THEN
+    SET x = 10;
+  END IF;
+  SELECT pk+x, v1+x, v2+x FROM test ORDER BY 1;
+END$
+DELIMITER ;   $
+INSERT INTO test VALUES (3, 3, 3);
+DELIMITER ********** ;
+INSERT INTO test VALUES (4, 4, 4)**********
+INSERT INTO test VALUES (5, 5, 5)
+SQL
+    run dolt sql -q "CALL p1(3)" -r=csv
+    [ "$status" -eq "0" ]
+    [[ "$output" =~ "test.pk + x,test.v1 + x,test.v2 + x" ]] || false
+    [[ "$output" =~ "11,11,11" ]] || false
+    [[ "$output" =~ "12,12,12" ]] || false
+    [[ "$output" =~ "13,13,13" ]] || false
+    [[ "$output" =~ "14,14,14" ]] || false
+    [[ "$output" =~ "15,15,15" ]] || false
+    [[ "${#lines[@]}" = "6" ]] || false
+
+    run dolt sql -q "CALL p1(20)" -r=csv
+    [ "$status" -eq "0" ]
+    [[ "$output" =~ "test.pk + x,test.v1 + x,test.v2 + x" ]] || false
+    [[ "$output" =~ "21,21,21" ]] || false
+    [[ "$output" =~ "22,22,22" ]] || false
+    [[ "$output" =~ "23,23,23" ]] || false
+    [[ "$output" =~ "24,24,24" ]] || false
+    [[ "$output" =~ "25,25,25" ]] || false
+    [[ "${#lines[@]}" = "6" ]] || false
+
+    dolt sql <<SQL
+DELIMITER // ;
+CREATE TABLE test2(
+  pk BIGINT PRIMARY KEY,
+  v1 VARCHAR(20)
+)//
+INSERT INTO test2 VALUES (1, '//'), (2, "//")//
+SQL
+    run dolt sql -q "SELECT * FROM test2" -r=csv
+    [ "$status" -eq "0" ]
+    [[ "$output" =~ "pk,v1" ]] || false
+    [[ "$output" =~ "1,//" ]] || false
+    [[ "$output" =~ "2,//" ]] || false
+    [[ "${#lines[@]}" = "3" ]] || false
+}
+
 @test "sql: insert on duplicate key inserts data by column" {
     run dolt sql -q "CREATE TABLE test (col_a varchar(2) not null, col_b varchar(2), col_c varchar(2), primary key(col_a));"
     [ $status -eq 0 ]