Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Make formatting in the CLI use the same SQL formatter as BigQuery #1741

Merged
merged 4 commits into from
May 23, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion cli/api/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -33,12 +33,12 @@ ts_library(
"@npm//deepmerge",
"@npm//fs-extra",
"@npm//glob",
"@npm//google-sql-syntax-ts",
"@npm//js-beautify",
"@npm//js-yaml",
"@npm//promise-pool-executor",
"@npm//protobufjs",
"@npm//semver",
"@npm//tmp",
"@npm//sql-formatter",
],
)
11 changes: 11 additions & 0 deletions contributing.md
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,17 @@ bazel build cli

The projects folder here is not built as it requires an environment file, which can be provided from the team.

### Add New NPM Dependencies

Global yarn installations will throw errors when installing packages, instead you should use:

```bash
$ bazel run @nodejs//:yarn add ...
```

Additionally, installed NPM dependencies need to be added to the `deps` of `ts_library` rules by
prefixing them with `@npm//...`.

## The Contribution Process

1. Decide on what you'd like to contribute. The majority of open-source contributions come from:
Expand Down
2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@
"estraverse": "^5.1.0",
"fs-extra": "^9.0.0",
"glob": "^10.3.3",
"google-sql-syntax-ts": "^1.0.3",
"grpc": "^1.24.0",
"js-beautify": "^1.10.2",
"js-yaml": "^4.1.0",
Expand All @@ -56,7 +57,6 @@
"rollup": "^2.7.3",
"rollup-plugin-dts": "^1.4.0",
"semver": "^7.5.2",
"sql-formatter": "^12.2.1",
"tarjan-graph": "^2.0.0",
"tmp": "^0.2.0",
"ts-loader": "^5.3.1",
Expand Down
2 changes: 1 addition & 1 deletion packages/@dataform/cli/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ externals = [
"deepmerge",
"fs-extra",
"glob",
"google-sql-syntax-ts",
"js-beautify",
"js-yaml",
"moo",
Expand All @@ -39,7 +40,6 @@ externals = [
"protobufjs",
"readline-sync",
"semver",
"sql-formatter",
"tarjan-graph",
"tmp",
"typeid-js",
Expand Down
2 changes: 1 addition & 1 deletion sqlx/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,8 @@ ts_library(
"//common/errors",
"@npm//@types/js-beautify",
"@npm//@types/node",
"@npm//google-sql-syntax-ts",
"@npm//js-beautify",
"@npm//sql-formatter",
"@npm//typeid-js",
],
)
Expand Down
25 changes: 5 additions & 20 deletions sqlx/format.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import * as fs from "fs";
import { GoogleSqlDefinition, QueryFormatter } from "google-sql-syntax-ts";
import * as jsBeautify from "js-beautify";
import * as sqlFormatter from "sql-formatter";
import { promisify } from "util";

import { ErrorWithCause } from "df/common/errors/errors";
Expand All @@ -13,7 +13,7 @@ const JS_BEAUTIFY_OPTIONS = {
max_preserve_newlines: 2
};

const MAX_SQL_FORMAT_ATTEMPTS = 5;
const queryFormatter = new QueryFormatter(new GoogleSqlDefinition());

export function format(text: string, fileExtension: string) {
try {
Expand Down Expand Up @@ -73,7 +73,7 @@ function formatSqlx(node: SyntaxTreeNode, indent = "") {
[placeholderId: string]: SyntaxTreeNode | string;
} = {};
const unformattedPlaceholderSql = stripUnformattableText(sqlxStatement, placeholders).join("");
const formattedPlaceholderSql = formatSql(unformattedPlaceholderSql);
const formattedPlaceholderSql = queryFormatter.formatQuery(unformattedPlaceholderSql);
return formatEveryLine(
replacePlaceholders(formattedPlaceholderSql, placeholders),
line => `${indent}${line}`
Expand Down Expand Up @@ -161,8 +161,8 @@ function stripUnformattableText(
return placeholderId;
}
case SyntaxTreeNodeType.SQL_COMMENT: {
// sql-formatter knows how to format comments (as long as they keep to a single line);
// give it a hint.
// google-sql-syntax-ts knows how to format comments (as long as they keep to a single
// line); give it a hint.
const commentPlaceholderId = part.concatenate().startsWith("--")
? `--${placeholderId}`
: `/*${placeholderId}*/`;
Expand Down Expand Up @@ -207,21 +207,6 @@ function formatJavaScript(text: string) {
return jsBeautify.js(text, JS_BEAUTIFY_OPTIONS);
}

function formatSql(text: string) {
let formatted = sqlFormatter.format(text, { language: "bigquery" }) as string;
// Unfortunately sql-formatter does not always produce final formatted output (even on plain SQL) in a single pass.
for (let attempts = 0; attempts < MAX_SQL_FORMAT_ATTEMPTS; attempts++) {
const newFormatted = sqlFormatter.format(formatted, { language: "bigquery" }) as string;
if (newFormatted === formatted) {
return newFormatted;
}
formatted = newFormatted;
}
throw new Error(
`SQL formatter was unable to determine final formatted form within ${MAX_SQL_FORMAT_ATTEMPTS} attempts. Original text: ${text}`
);
}

function formatPlaceholderInSqlx(
placeholderId: string,
placeholderSyntaxNode: SyntaxTreeNode,
Expand Down
70 changes: 32 additions & 38 deletions sqlx/format_test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -32,9 +32,9 @@ js {
jsFunction("table");
}

select
SELECT
1
from
FROM
\${
ref({
schema: "df_integration_test",
Expand Down Expand Up @@ -78,17 +78,19 @@ SELECT
let finalTableName = 'dkaodihwada';
}

drop something
DROP something

---

alter table \${tempTable}
rename to \${finalTableName}
ALTER TABLE
\${tempTable} RENAME TO \${finalTableName}

---

SELECT
SUM(IF(session_start_event, 1, 0)) AS session_index
SUM(
IF
( session_start_event, 1, 0 ) ) AS session_index
`);
});

Expand All @@ -111,21 +113,14 @@ where sample = 100`;
tags: ["tag1", "tag2"]
}

select
CAST(
REGEXP_EXTRACT("", r'^/([0-9]+)\\'\\"/.*') AS INT64
) AS id,
CAST(
REGEXP_EXTRACT("", r"^/([0-9]+)\\"\\'/.*") AS INT64
) AS id2,
IFNULL(
regexp_extract('', r'\\a?query=([^&]+)&*'),
regexp_extract('', r'\\a?q=([^&]+)&*')
) AS id3,
regexp_extract('bar', r'bar') as ID4
from
SELECT
CAST(REGEXP_EXTRACT("", r'^/([0-9]+)\\'\\"/.*') AS INT64) AS id,
CAST(REGEXP_EXTRACT("", r"^/([0-9]+)\\"\\'/.*") AS INT64) AS id2,
IFNULL ( REGEXP_EXTRACT('', r'\\a?query=([^&]+)&*'), REGEXP_EXTRACT('', r'\\a?q=([^&]+)&*') ) AS id3,
REGEXP_EXTRACT('bar', r'bar') AS ID4
FROM
\${ref("dab")}
where
WHERE
sample = 100
`);
});
Expand Down Expand Up @@ -171,21 +166,20 @@ as test
}

SELECT
MAX(
(
SELECT
SUM(IF(track.event = "event_viewed_project_with_connection", 1, 0))
FROM
UNNEST (records)
)
) > 0 as created_project,
MAX((
SELECT
SUM(
IF
(track.event="event_viewed_project_with_connection",1,0))
FROM
UNNEST(records)))>0 AS created_project,
/* multi line
comment */
2 as foo
2 AS foo

input "something" {
select
1 as test
SELECT
1 AS test
/* something */
/* something
else */
Expand All @@ -195,8 +189,8 @@ input "something" {
});
test("Backslashes within regex don't cause 'r' prefix to separate.", async () => {
const fileContents = `select regexp_extract("", r'abc\\de\\'fg select * from self()'), 'bar'`;
expect(format(fileContents, "sqlx")).equal(`select
regexp_extract("", r'abc\\de\\'fg select * from self()'),
expect(format(fileContents, "sqlx")).equal(`SELECT
REGEXP_EXTRACT("", r'abc\\de\\'fg select * from self()'),
'bar'
`);
});
Expand All @@ -218,7 +212,7 @@ SELECT MAKE_INTERVAL(1, day=>2, minute => 3)`;
}

SELECT
MAKE_INTERVAL(1, day => 2, minute => 3)
MAKE_INTERVAL(1, day=>2, minute => 3)
`);
});

Expand All @@ -236,7 +230,7 @@ QUALIFY MOD(ROW_NUMBER() OVER (), 2) = 0`;
SELECT
*
FROM
UNNEST ([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]) AS n
UNNEST([0,1,2,3,4,5,6,7,8,9]) AS n
WHERE
n < 8
QUALIFY
Expand Down Expand Up @@ -279,7 +273,7 @@ SELECT
line
string
with indent""" AS multi_line,
REGEXP_CONTAINS("\\n abc\\n ", r'''
REGEXP_CONTAINS( "\\n abc\\n ", r'''
abc
''') AS multi_line_regex,
"""
Expand All @@ -288,8 +282,8 @@ This project is ...
""" AS with_js

post_operations {
select
"""1""" as inner_sql
SELECT
"""1""" AS inner_sql
}
`);
});
Expand Down
2 changes: 1 addition & 1 deletion version.bzl
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
# NOTE: If you change the format of this line, you must change the bash command
# in /scripts/publish to extract the version string correctly.
DF_VERSION = "3.0.0-beta.5"
DF_VERSION = "3.0.0-beta.6"
Loading
Loading