Skip to content

Commit

Permalink
Added working test cases for duckdb
Browse files Browse the repository at this point in the history
  • Loading branch information
canimus committed Nov 26, 2022
1 parent 5dcb2a9 commit fc2da4e
Show file tree
Hide file tree
Showing 2 changed files with 5 additions and 5 deletions.
2 changes: 1 addition & 1 deletion cuallee/duckdb_validation.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ def is_unique(self, rule: Rule) -> str:
return f"COUNT(DISTINCT({rule.column}))"

def are_unique(self, rule: Rule) -> str:
return f"COUNT(DISTINCT{rule.column}) / {float(len(rule.column))}"
return "( " + " + ".join([f"approx_count_distinct({column})" for column in rule.column]) + f") / cast({float(len(rule.column))} AS FLOAT)"

def is_greater_than(self, rule: Rule) -> str:
return f"CAST({rule.column} > {rule.value} AS INTEGER)"
Expand Down
8 changes: 4 additions & 4 deletions test/unit/duckdb_dataframe/test_are_unique.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,22 +3,20 @@
import pytest
import duckdb

@pytest.mark.skip(reason="Not implemented")
def test_positive(check: Check, db: duckdb.DuckDBPyConnection):
check.are_unique(("id", "id2"))
df = pd.DataFrame({"id": [10, 20], "id2": [300, 500]})
check.table_name = "df"
assert check.validate(db).status.str.match("PASS").all()


@pytest.mark.skip(reason="Not implemented")
def test_negative(check: Check, db: duckdb.DuckDBPyConnection):
check.are_unique(("id", "id2"))
df = pd.DataFrame({"id": [10, 10], "id2": [300, 300]})
check.table_name = "df"
assert check.validate(db).status.str.match("FAIL").all()

@pytest.mark.skip(reason="Not implemented")

@pytest.mark.parametrize(
"rule_column", [tuple(["id", "id2"]), list(["id", "id2"])], ids=("tuple", "list")
)
Expand All @@ -29,11 +27,13 @@ def test_parameters(check: Check, db: duckdb.DuckDBPyConnection, rule_column):
result = check.validate(db)
assert result.status.str.match("FAIL").all()

@pytest.mark.skip(reason="Not implemented")

def test_coverage(check: Check, db: duckdb.DuckDBPyConnection):
check.are_unique(("id", "id2"), 0.75)
df = pd.DataFrame({"id": [10, None], "id2": [300, 500]})
check.table_name = "df"
result = check.validate(db)
assert result.status.str.match("PASS").all()
assert result.pass_rate.max() == 0.75


0 comments on commit fc2da4e

Please sign in to comment.