Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Adds Null Issue Manager #856

Merged
merged 32 commits into from
Oct 30, 2023
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
32 commits
Select commit Hold shift + click to select a range
baa5021
changes for NullIssueManager
abhijitpal1247 Oct 3, 2023
10a8bfe
changing null_scores to average_null_score
abhijitpal1247 Oct 3, 2023
46b471d
adding comments for readability
abhijitpal1247 Oct 3, 2023
65fe3f8
removing unnecessary dependencies
abhijitpal1247 Oct 3, 2023
c3a53f4
autocorrected formatting with black
abhijitpal1247 Oct 3, 2023
aef2c60
correcting some datatypes
abhijitpal1247 Oct 3, 2023
143e289
autocorrecting the format with black-2
abhijitpal1247 Oct 3, 2023
e6ca06d
Merge branch 'master' into abhijitpal1247-null
abhijitpal1247 Oct 7, 2023
2a8347a
added test and some minor changes
abhijitpal1247 Oct 8, 2023
b028779
added test for both null and non-null values
abhijitpal1247 Oct 8, 2023
0fa0710
reformatted with black
abhijitpal1247 Oct 8, 2023
4f55765
Merge branch 'cleanlab:master' into abhijitpal1247-null
abhijitpal1247 Oct 14, 2023
afa34e8
adding some more metrics to track and test them
abhijitpal1247 Oct 14, 2023
1c96a11
removing unused imports
abhijitpal1247 Oct 14, 2023
348bcad
adding some type declarations
abhijitpal1247 Oct 14, 2023
4efc50c
adding some type declarations - 2
abhijitpal1247 Oct 14, 2023
9791913
adding some type declarations - 3
abhijitpal1247 Oct 14, 2023
9643483
reformatted using black - 2
abhijitpal1247 Oct 14, 2023
931952d
typing changes - 3
abhijitpal1247 Oct 14, 2023
20da838
removing unused imports - 2
abhijitpal1247 Oct 14, 2023
9622342
typing changes - 4
abhijitpal1247 Oct 14, 2023
683ba72
typing changes - 5
abhijitpal1247 Oct 14, 2023
c678f1c
add numpy extra for hypothesis dev dependency
elisno Oct 26, 2023
2b10067
fix quality score for null issue manager
elisno Oct 26, 2023
29e1555
format import
elisno Oct 26, 2023
18de7a5
remove useless super() delegation in __init__ method
elisno Oct 26, 2023
766ff5c
add property based test for scoring null issues
elisno Oct 26, 2023
a87ac46
turn info helper methods into internal methods
elisno Oct 28, 2023
f617653
add basic docstring for method containing main logic of issue manager
elisno Oct 28, 2023
e701df0
update formatting
elisno Oct 28, 2023
a483952
extract variable in test
elisno Oct 28, 2023
cfa26aa
add temporary docs page for null issue manager
elisno Oct 28, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
update formatting
  • Loading branch information
elisno committed Oct 28, 2023
commit e701df0387ace859d4d2155f0b22d0eb07792480
5 changes: 2 additions & 3 deletions cleanlab/datalab/internal/issue_manager/null.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@

class NullIssueManager(IssueManager):
"""Manages issues related to null/missing values in the rows of features.

Parameters
----------
datalab :
Expand All @@ -28,9 +29,7 @@ class NullIssueManager(IssueManager):
issue_name: ClassVar[str] = "null"
verbosity_levels = {
0: ["average_null_score"],
1: [
"most_common_issue",
],
1: ["most_common_issue"],
2: [],
}

Expand Down
13 changes: 7 additions & 6 deletions tests/datalab/issue_manager/test_null.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ def test_find_issues_with_null(self, issue_manager, embeddings_with_null):
issues_sort["is_null_issue"] == expected_sorted_issue_mask
), "Issue mask should be correct"
assert summary_sort["issue_type"][0] == "null"
assert summary_sort["score"][0] == pytest.approx(expected=8/12, abs=1e-7)
assert summary_sort["score"][0] == pytest.approx(expected=8 / 12, abs=1e-7)
assert (
info_sort.get("average_null_score", None) is not None
), "Should have average null score"
Expand Down Expand Up @@ -133,7 +133,7 @@ def test_collect_info_with_nulls(self, issue_manager, embeddings_with_null):
"""Test some values in the info dict."""
issue_manager.find_issues(features=embeddings_with_null)
info = issue_manager.info
assert info["average_null_score"] == pytest.approx(expected=8/12, abs=1e-7)
assert info["average_null_score"] == pytest.approx(expected=8 / 12, abs=1e-7)
assert info["most_common_issue"]["pattern"] == "100"
assert info["most_common_issue"]["count"] == 1
assert info["most_common_issue"]["rows_affected"] == [0]
Expand All @@ -152,11 +152,12 @@ def test_collect_info_with_nulls(self, issue_manager, embeddings_with_null):
elements=float_with_nan,
fill=nan_strategy,
)

@settings(suppress_health_check=[HealthCheck.function_scoped_fixture]) # No need to reset state of issue_manager fixture

@settings(
suppress_health_check=[HealthCheck.function_scoped_fixture]
) # No need to reset state of issue_manager fixture
@given(embeddings=features_with_nan_strategy)
def test_quality_scores_and_full_null_row_identification(self, issue_manager, embeddings):

# Run the find_issues method
issue_manager.find_issues(features=embeddings)
issues_sort, _, _ = (
Expand All @@ -173,4 +174,4 @@ def test_quality_scores_and_full_null_row_identification(self, issue_manager, em

# 2. The rows that are marked as is_null_issue should ONLY be those rows which are 100% null values.
all_rows_are_null = np.all(np.isnan(embeddings), axis=1)
assert np.all(issues_sort["is_null_issue"] == all_rows_are_null)
assert np.all(issues_sort["is_null_issue"] == all_rows_are_null)