Skip to content

Commit

Permalink
CI: Add lint workflow and fix linting issues
Browse files Browse the repository at this point in the history
  • Loading branch information
gutzbenj committed Jun 15, 2023
1 parent 8a5225c commit 6e925a0
Show file tree
Hide file tree
Showing 5 changed files with 57 additions and 29 deletions.
27 changes: 13 additions & 14 deletions bquest/runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@
import os
from typing import Any, Callable, Dict, List, Optional

from google.cloud import bigquery as bq
import pandas
from google.cloud import bigquery as bq

from bquest.tables import BQTable, BQTableDefinition, BQTableDefinitionBuilder

Expand Down Expand Up @@ -130,8 +130,7 @@ def run_config(
# run config with substituted table identifiers
self._bq_executor_func(test_bq_config, templating_vars)

result_df = result_table.to_df()
return result_df
return result_table.to_df()


class BQConfigFileRunner:
Expand All @@ -155,8 +154,8 @@ def run_config(
with open(os.path.join(self._config_base_path, path_to_config), "r", encoding="UTF-8") as f:
try:
config = ast.literal_eval(f.read())
except:
raise ValueError("Could not read the configuration.")
except ValueError as e:
raise ValueError("Could not read the configuration.") from e
return self._bq_config_runner.run_config(
start_date,
end_date,
Expand Down Expand Up @@ -197,8 +196,8 @@ def run(
if string_replacements is None:
string_replacements = {}

source_tables = self._create_source_tables(source_table_definitions)
result_table = (
_ = self._create_source_tables(source_table_definitions)
_ = (
self._create_result_table_from_def(result_table_definition)
if result_table_definition
else self._create_empty_result_table("result")
Expand All @@ -212,8 +211,7 @@ def run(
query_job = self._bq_client.query(sql_with_substitutions, location="EU", job_config=job_config)
query_job.result()

result_df = query_job.result().to_dataframe()
return result_df
return query_job.result().to_dataframe()


class SQLFileRunner:
Expand All @@ -238,9 +236,10 @@ def run(
if string_replacements is None:
string_replacements = {}

with open(os.path.join(self._base_path, path_to_sql), "r", encoding="UTF-8") as f:
try:
file = os.path.join(self._base_path, path_to_sql)
try:
with open(file, "r", encoding="UTF-8") as f:
sql = f.read()
except:
raise ValueError("Could not read the SQL file.")
return self._sql_runner.run(sql, source_table_definitions, substitutions, string_replacements)
except IOError as e:
raise ValueError(f"Could not read the SQL file {file}.") from e
return self._sql_runner.run(sql, source_table_definitions, substitutions, string_replacements)
25 changes: 14 additions & 11 deletions bquest/tables.py
Original file line number Diff line number Diff line change
@@ -1,24 +1,27 @@
"""Module for dealing with BigQueryTables"""

# mypy: allow-untyped-calls

import json
import uuid
from io import BytesIO
from typing import Any, Dict, List, Optional

import pandas
import google.cloud.bigquery
import pandas as pd
from google.api_core import exceptions

from bquest.util import is_sql


class BQTable:
"""
Represents a BigQuery table.
"""

def __init__(self, original_table_id: str, test_table_id: str, bq_client: google.cloud.bigquery.Client) -> None:
assert original_table_id != test_table_id
if original_table_id == test_table_id:
raise ValueError("'original_table_id' and 'test_table_id' can't be the same.")

if is_sql(test_table_id):
raise ValueError("'test_table_id' contains sql syntax.")

self._original_table_id = original_table_id
self._test_table_id = test_table_id
Expand All @@ -42,15 +45,15 @@ def remove_require_partition_filter(self, table_id: str) -> None:
table.require_partition_filter = False
self._bq_client.update_table(table, ["require_partition_filter"])

def to_df(self) -> pandas.DataFrame:
def to_df(self) -> pd.DataFrame:
"""Loads the table into a dataframe
Returns:
Loaded table as pandas dataframe
"""
self.remove_require_partition_filter(self._test_table_id)

sql = f"SELECT * FROM `{self._test_table_id}`"
sql = f"SELECT * FROM `{self._test_table_id}`" # noqa: S608, SQL injection prevented in init

return self._bq_client.query(sql).to_dataframe()

Expand Down Expand Up @@ -106,7 +109,7 @@ class BQTableDataframeDefinition(BQTableDefinition):
Defines BigQuery tables based on a pandas dataframe.
"""

def __init__(self, name: str, df: pandas.DataFrame, project: str, dataset: str, location: str) -> None:
def __init__(self, name: str, df: pd.DataFrame, project: str, dataset: str, location: str) -> None:
BQTableDefinition.__init__(self, name, project, dataset, location)
self._dataframe = df

Expand Down Expand Up @@ -183,9 +186,9 @@ def load_to_bq(self, bq_client: google.cloud.bigquery.Client) -> BQTable:
)
try:
job.result()
except exceptions.BadRequest:
except exceptions.BadRequest as e:
# same error but with full error msg
raise exceptions.BadRequest(job.errors)
raise exceptions.BadRequest(str(job.errors)) from e

return BQTable(self._original_table_name, test_table_id, bq_client)

Expand All @@ -206,7 +209,7 @@ def from_json(
) -> BQTableJsonDefinition:
return BQTableJsonDefinition(name, rows, schema, self._project, self._dataset, self._location)

def from_df(self, name: str, df: pandas.DataFrame) -> BQTableDataframeDefinition:
def from_df(self, name: str, df: pd.DataFrame) -> BQTableDataframeDefinition:
return BQTableDataframeDefinition(name, df, self._project, self._dataset, self._location)

def create_empty(self, name: str) -> BQTableDefinition:
Expand Down
14 changes: 14 additions & 0 deletions bquest/util.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
"""Utility functions for bquest"""
import sqlvalidator


def is_sql(string: str) -> bool:
"""
Small function to test if string contains SQL syntax
Args:
string: string with potential SQL syntax
Returns:
bool if string contains SQL syntax
"""
return sqlvalidator.parse(string).is_valid()
16 changes: 15 additions & 1 deletion poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 1 addition & 3 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@ python = "^3.10"
google-cloud-bigquery = { version = "^3.8", extras = ["bqstorage", "pandas"] }
pandas = "^2.0.2"
pandas-gbq = "^0.18"
sqlvalidator = "^0.0.20"

[tool.poetry.group.dev]
optional = true
Expand All @@ -67,9 +68,6 @@ ruff = "^0.0.271"
twine = "^4.0.1"
types-mock = "^5.0.0.6"
urllib3 = "^2.0.2"
mkdocs = "^1.4.3"
mkdocs-material = "^9.1.15"
mkdocstrings-python = "^1.1.2"

[tool.poetry.group.docs]
optional = true
Expand Down

0 comments on commit 6e925a0

Please sign in to comment.