Skip to content

Commit

Permalink
fix mypy extra unit tests, pin pandas-stubs for dev env (unionai-oss#…
Browse files Browse the repository at this point in the history
…1056)

* fix mypy extra unit tests, pin pandas-stubs for dev env

Signed-off-by: Niels Bantilan <niels.bantilan@gmail.com>

* ignore pandas-stubs version in nox requirements target

Signed-off-by: Niels Bantilan <niels.bantilan@gmail.com>

* fix unit tests

Signed-off-by: Niels Bantilan <niels.bantilan@gmail.com>

* mypy test uses config

Signed-off-by: Niels Bantilan <niels.bantilan@gmail.com>

* update modin tests

Signed-off-by: Niels Bantilan <niels.bantilan@gmail.com>

* update ci to use concurrency groups instead of fail-fast

Signed-off-by: Niels Bantilan <niels.bantilan@gmail.com>

* update concurrency in workflow

Signed-off-by: Niels Bantilan <niels.bantilan@gmail.com>

* fixes

Signed-off-by: Niels Bantilan <niels.bantilan@gmail.com>

Signed-off-by: Niels Bantilan <niels.bantilan@gmail.com>
  • Loading branch information
cosmicBboy authored Dec 15, 2022
1 parent f03e35e commit 8ccede9
Show file tree
Hide file tree
Showing 17 changed files with 83 additions and 60 deletions.
6 changes: 5 additions & 1 deletion .github/workflows/ci-tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,10 @@ env:
# Increase this value to reset cache if environment.yml has not changed
CACHE_VERSION: 6

concurrency:
group: '${{ github.workflow }} @ ${{ github.event.pull_request.head.label || github.head_ref || github.ref }}'
cancel-in-progress: true

jobs:

lint:
Expand Down Expand Up @@ -93,7 +97,7 @@ jobs:
PYTEST_FLAGS: --cov=pandera --cov-report=term-missing --cov-report=xml --cov-append
HYPOTHESIS_FLAGS: -n=auto -q --hypothesis-profile=ci
strategy:
fail-fast: true
fail-fast: false
matrix:
os: ["ubuntu-latest", "macos-latest", "windows-latest"]
python-version: ["3.7", "3.8", "3.9", "3.10"]
Expand Down
4 changes: 3 additions & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,8 @@ repos:
entry: mypy
language: python
types: [python]
files: (^pandera/|^tests/|^scripts/)
pass_filenames: false
exclude: (^docs/|^tests/mypy/modules/)
require_serial: true
args: ["pandera", "tests", "scripts"]
verbose: true
2 changes: 1 addition & 1 deletion environment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ dependencies:
- pydantic

# mypy extra
- pandas-stubs
- pandas-stubs <= 1.5.2.221213

# pyspark extra
- pyspark >= 3.2.0
Expand Down
8 changes: 8 additions & 0 deletions mypy.ini
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
[mypy]
ignore_missing_imports = True
follow_imports = skip
allow_redefinition = True
warn_return_any = False
warn_unused_configs = True
show_error_codes = True
exclude = tests/mypy/modules
2 changes: 1 addition & 1 deletion noxfile.py
Original file line number Diff line number Diff line change
Expand Up @@ -265,7 +265,7 @@ def requirements(session: Session) -> None: # pylint:disable=unused-argument
print(f"{REQUIREMENT_PATH} has been re-generated ✨ 🍰 ✨")
raise err

ignored_pkgs = {"black", "pandas"}
ignored_pkgs = {"black", "pandas", "pandas-stubs"}
mismatched = []
# only compare package versions, not python version markers.
str_dev_reqs = [str(x) for x in DEV_REQUIREMENTS]
Expand Down
2 changes: 1 addition & 1 deletion pandera/checks.py
Original file line number Diff line number Diff line change
Expand Up @@ -325,7 +325,7 @@ def _prepare_series_input(
if check_utils.is_field(df_or_series):
return df_or_series # type: ignore[return-value]
elif self.groupby is None:
return df_or_series[column] # type: ignore[index]
return df_or_series[column] # type: ignore
elif isinstance(self.groupby, list):
return self._format_groupby_input( # type: ignore[return-value]
df_or_series.groupby(self.groupby)[column], # type: ignore[index]
Expand Down
14 changes: 7 additions & 7 deletions pandera/engines/pandas_engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -525,7 +525,7 @@ def coerce_value(self, value: Any) -> decimal.Decimal:
return dec.quantize(self._exp, context=self._ctx)

def coerce(self, data_container: PandasObject) -> PandasObject:
return data_container.apply(self.coerce_value)
return data_container.apply(self.coerce_value) # type: ignore

def check( # type: ignore
self,
Expand Down Expand Up @@ -577,7 +577,7 @@ def __init__( # pylint:disable=super-init-not-called
object.__setattr__(
self,
"type",
pd.CategoricalDtype(self.categories, self.ordered),
pd.CategoricalDtype(self.categories, self.ordered), # type: ignore
)

def coerce(self, data_container: PandasObject) -> PandasObject:
Expand Down Expand Up @@ -639,13 +639,13 @@ def __str__(self) -> str:
else:

@Engine.register_dtype(
equivalents=["string", pd.StringDtype, pd.StringDtype()]
) # type: ignore
equivalents=["string", pd.StringDtype, pd.StringDtype()] # type: ignore
)
@immutable
class STRING(DataType, dtypes.String): # type: ignore
"""Semantic representation of a :class:`pandas.StringDtype`."""

type = pd.StringDtype()
type = pd.StringDtype() # type: ignore


@Engine.register_dtype(
Expand Down Expand Up @@ -984,8 +984,8 @@ def __post_init__(self):
def from_parametrized_dtype(cls, pd_dtype: pd.SparseDtype):
"""Convert a :class:`pandas.SparseDtype` to
a Pandera :class:`pandera.engines.pandas_engine.Sparse`."""
return cls( # type: ignore
dtype=pd_dtype.subtype, fill_value=pd_dtype.fill_value
return cls(
dtype=pd_dtype.subtype, fill_value=pd_dtype.fill_value # type: ignore
)


Expand Down
6 changes: 3 additions & 3 deletions pandera/schemas.py
Original file line number Diff line number Diff line change
Expand Up @@ -347,7 +347,7 @@ def get_dtypes(self, dataframe: pd.DataFrame) -> Dict[str, DataType]:
)
return {
**{n: c.dtype for n, c in self.columns.items() if not c.regex},
**regex_dtype,
**regex_dtype, # type: ignore
}

@property
Expand Down Expand Up @@ -595,7 +595,7 @@ def _validate(
is_schema_col = column in expanded_column_names
if (self.strict is True) and not is_schema_col:
msg = (
f"column '{column}' not in {self.__class__.__name__}"
f"column {column!r} not in {self.__class__.__name__}"
f" {self.columns}"
)
error_handler.collect_error(
Expand All @@ -621,7 +621,7 @@ def _validate(
errors.SchemaError(
self,
check_obj,
message=f"column '{column}' out-of-order",
message=f"column {column!r} out-of-order",
failure_cases=scalar_failure_case(column),
check="column_ordered",
),
Expand Down
2 changes: 1 addition & 1 deletion pandera/strategies.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ def _mask(
) -> Union[pd.Series, pd.Index]:
if pd.api.types.is_timedelta64_dtype(val): # type: ignore [arg-type]
return val.mask(null_mask, pd.NaT) # type: ignore [union-attr,arg-type]
elif val.dtype == pd.StringDtype():
elif val.dtype == pd.StringDtype(): # type: ignore [call-arg]
return val.mask(null_mask, pd.NA) # type: ignore [union-attr,arg-type]
return val.mask(null_mask) # type: ignore [union-attr]

Expand Down
2 changes: 1 addition & 1 deletion requirements-dev.txt
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ typing_extensions >= 3.7.4.3
frictionless
pyarrow
pydantic
pandas-stubs
pandas-stubs <= 1.5.2.221213
pyspark >= 3.2.0
modin
protobuf <= 3.20.3
Expand Down
8 changes: 0 additions & 8 deletions setup.cfg
Original file line number Diff line number Diff line change
@@ -1,11 +1,3 @@
[isort]
float_to_top = true
profile = black

[mypy]
ignore_missing_imports = True
allow_redefinition = True
warn_return_any = False
warn_unused_configs = True
show_error_codes = True
exclude = tests/mypy/modules
14 changes: 7 additions & 7 deletions tests/core/test_decorators.py
Original file line number Diff line number Diff line change
Expand Up @@ -900,13 +900,13 @@ def validate_union(
) -> typing.Union[DataFrame[OnlyZeroesSchema], DataFrame[OnlyOnesSchema]]:
return df

validate_union(pd.DataFrame({"a": [0, 0]}))
validate_union(pd.DataFrame({"a": [1, 1]}))
validate_union(pd.DataFrame({"a": [0, 0]})) # type: ignore [arg-type]
validate_union(pd.DataFrame({"a": [1, 1]})) # type: ignore [arg-type]

with pytest.raises(errors.SchemaErrors):
validate_union(pd.DataFrame({"a": [0, 1]}))
validate_union(pd.DataFrame({"a": [0, 1]})) # type: ignore [arg-type]
with pytest.raises(errors.SchemaErrors):
validate_union(pd.DataFrame({"a": [2, 2]}))
validate_union(pd.DataFrame({"a": [2, 2]})) # type: ignore [arg-type]

@check_types
def validate_union_wrong_outputs(
Expand All @@ -916,10 +916,10 @@ def validate_union_wrong_outputs(
) -> typing.Union[DataFrame[OnlyZeroesSchema], DataFrame[OnlyOnesSchema]]:
new_df = df.copy()
new_df["a"] = [0, 1]
return new_df
return new_df # type: ignore [return-value]

with pytest.raises(errors.SchemaErrors):
validate_union_wrong_outputs(pd.DataFrame({"a": [0, 0]}))
validate_union_wrong_outputs(pd.DataFrame({"a": [0, 0]})) # type: ignore [arg-type]


def test_check_types_non_dataframes() -> None:
Expand Down Expand Up @@ -947,7 +947,7 @@ def union_df_int_types_pydantic_check(
) -> typing.Union[DataFrame[OnlyZeroesSchema], int]:
return val

union_df_int_types_pydantic_check(pd.DataFrame({"a": [0, 0]}))
union_df_int_types_pydantic_check(pd.DataFrame({"a": [0, 0]})) # type: ignore [arg-type]
int_val_pydantic = union_df_int_types_pydantic_check(5)
str_val_pydantic = union_df_int_types_pydantic_check("5") # type: ignore[arg-type]
assert isinstance(int_val_pydantic, int)
Expand Down
13 changes: 7 additions & 6 deletions tests/core/test_logical_dtypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,12 +112,12 @@ def test_logical_datatype_check(
"data, expected_datatype, failure_cases",
[
(
[Decimal("1.2"), Decimal("12.3")],
[Decimal("1.2"), Decimal("12.3")] * 100,
pandas_engine.Decimal(2, 1),
[Decimal("12.3")],
[Decimal("12.3")] * 100,
),
(
[Decimal("1.2"), None, pd.NA, np.nan],
[Decimal("1.2"), None, pd.NA, np.nan] * 100,
pandas_engine.Decimal(19, 5),
[],
),
Expand All @@ -129,14 +129,15 @@ def test_logical_datatype_check(
pd.NA,
np.nan,
pd.NaT,
],
]
* 100,
pandas_engine.Date(),
[],
),
(
["2022-01-01", "01/01/2022"],
["2022-01-01", "01/01/2022"] * 100,
pandas_engine.Date(to_datetime_kwargs={"format": "%Y-%m-%d"}),
["01/01/2022"],
["01/01/2022"] * 100,
),
],
)
Expand Down
12 changes: 6 additions & 6 deletions tests/modin/test_schemas_on_modin.py
Original file line number Diff line number Diff line change
Expand Up @@ -335,16 +335,16 @@ class Schema(pa.SchemaModel):

valid_df = mpd.DataFrame(
{
"int_field": [1, 2, 3],
"float_field": [-1.1, -2.1, -3.1],
"str_field": ["a", "b", "c"],
"int_field": [1, 2, 3] * 10,
"float_field": [-1.1, -2.1, -3.1] * 10,
"str_field": ["a", "b", "c"] * 10,
}
)
invalid_df = mpd.DataFrame(
{
"int_field": [-1],
"field_field": [1],
"str_field": ["d"],
"int_field": [-1] * 100,
"field_field": [1] * 100,
"str_field": ["d"] * 100,
}
)

Expand Down
2 changes: 1 addition & 1 deletion tests/mypy/modules/pandas_dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ def fn_mutate_inplace(df: DataFrame[Schema]) -> DataFrame[SchemaOut]:

@pa.check_types
def fn_assign_and_get_index(df: DataFrame[Schema]) -> DataFrame[SchemaOut]:
return df.assign(foo=30).iloc[:3]
return df.assign(foo=30).iloc[:3] # mypy error
# error: Incompatible return value type (got "pandas.core.frame.DataFrame",
# expected "pandera.typing.pandas.DataFrame[SchemaOut]") [return-value]

Expand Down
10 changes: 5 additions & 5 deletions tests/mypy/modules/pandas_time.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
# pylint: skip-file
import pandas as pd

pd.Timestamp.now() + pd.tseries.offsets.YearEnd(1) # false positive
pd.Timestamp.now() + pd.tseries.offsets.YearEnd(1)

pd.Timedelta(minutes=2) # false positive
pd.Timedelta(2, unit="minutes") # false positive
pd.Timedelta(minutes=2)
pd.Timedelta(2, unit="minutes")

pd.Timedelta(minutes=2, seconds=30) # false positive
pd.Timedelta(2.5, unit="minutes") # false positive
pd.Timedelta(minutes=2, seconds=30)
pd.Timedelta(2.5, unit="minutes") # mypy error
pd.Timedelta(2, unit="minutes") + pd.Timedelta(30, unit="seconds")
36 changes: 26 additions & 10 deletions tests/mypy/test_static_type_checking.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,15 +21,19 @@
test_module_dir = Path(os.path.dirname(__file__))


def _get_mypy_errors(stdout) -> typing.List[typing.Dict[str, str]]:
def _get_mypy_errors(
module_name: str,
stdout,
) -> typing.List[typing.Dict[str, str]]:
"""Parse line number and error message."""
errors: typing.List[typing.Dict[str, str]] = []
# last line is summary of errors
for error in [x for x in stdout.split("\n") if x != ""][:-1]:
matches = re.match(
r".+\.py:(?P<lineno>\d+): error: (?P<msg>.+) \[(?P<errcode>.+)\]",
error,
regex = (
r".+{}:".format(module_name.replace(".", r"\."))
+ r"(?P<lineno>\d+): error: (?P<msg>.+) \[(?P<errcode>.+)\]"
)
matches = re.match(regex, error)
if matches is not None:
match_dict = matches.groupdict()
errors.append(
Expand All @@ -53,16 +57,21 @@ def _get_mypy_errors(stdout) -> typing.List[typing.Dict[str, str]]:
def test_mypy_pandas_dataframe(capfd) -> None:
"""Test that mypy raises expected errors on pandera-decorated functions."""
# pylint: disable=subprocess-run-check
cache_dir = str(test_module_dir / ".mypy_cache" / "test-mypy-default")
subprocess.run(
[
sys.executable,
"-m",
"mypy",
str(test_module_dir / "modules" / "pandas_dataframe.py"),
"--cache-dir",
cache_dir,
"--config-file",
str(test_module_dir / "config" / "no_plugin.ini"),
],
text=True,
)
errors = _get_mypy_errors(capfd.readouterr().out)
errors = _get_mypy_errors("pandas_dataframe.py", capfd.readouterr().out)
assert len(PANDAS_DATAFRAME_ERRORS) == len(errors)
for expected, error in zip(PANDAS_DATAFRAME_ERRORS, errors):
assert error["errcode"] == expected["errcode"]
Expand Down Expand Up @@ -97,6 +106,13 @@ def test_pandera_runtime_errors(fn) -> None:
{"msg": 'Argument 1 to "fn" has incompatible type', "errcode": "arg-type"},
] * 2

PANDAS_TIME_ERRORS = [
{
"msg": 'Argument 1 to "Timedelta" has incompatible type "float"',
"errcode": "arg-type",
},
]

PYTHON_SLICE_ERRORS = [
{"msg": "Slice index must be an integer or None", "errcode": "misc"},
]
Expand Down Expand Up @@ -129,12 +145,12 @@ def test_pandera_runtime_errors(fn) -> None:
["pandera_types.py", "plugin_mypy.ini", PANDERA_TYPES_ERRORS],
["pandas_concat.py", "no_plugin.ini", []],
["pandas_concat.py", "plugin_mypy.ini", []],
["pandas_time.py", "no_plugin.ini", []],
["pandas_time.py", "plugin_mypy.ini", []],
["pandas_time.py", "no_plugin.ini", PANDAS_TIME_ERRORS],
["pandas_time.py", "plugin_mypy.ini", PANDAS_TIME_ERRORS],
["python_slice.py", "no_plugin.ini", PYTHON_SLICE_ERRORS],
["python_slice.py", "plugin_mypy.ini", PYTHON_SLICE_ERRORS],
["pandas_index.py", "no_plugin.ini", PANDAS_INDEX_ERRORS],
["pandas_index.py", "plugin_mypy.ini", PANDAS_INDEX_ERRORS],
["pandas_index.py", "no_plugin.ini", []],
["pandas_index.py", "plugin_mypy.ini", []],
["pandas_series.py", "no_plugin.ini", PANDAS_SERIES_ERRORS],
["pandas_series.py", "plugin_mypy.ini", PANDAS_SERIES_ERRORS],
],
Expand All @@ -160,7 +176,7 @@ def test_pandas_stubs_false_positives(
]
# pylint: disable=subprocess-run-check
subprocess.run(commands, text=True)
resulting_errors = _get_mypy_errors(capfd.readouterr().out)
resulting_errors = _get_mypy_errors(module, capfd.readouterr().out)
assert len(expected_errors) == len(resulting_errors)
for expected, error in zip(expected_errors, resulting_errors):
assert error["errcode"] == expected["errcode"]
Expand Down

0 comments on commit 8ccede9

Please sign in to comment.