Skip to content

Commit

Permalink
TST Enable numpydoc validation default (#20304)
Browse files Browse the repository at this point in the history
  • Loading branch information
thomasjpfan authored Jun 20, 2021
1 parent 617ff6e commit bb6117b
Show file tree
Hide file tree
Showing 3 changed files with 231 additions and 44 deletions.
4 changes: 4 additions & 0 deletions build_tools/azure/posix.yml
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,10 @@ jobs:
- script: |
build_tools/azure/test_docs.sh
displayName: 'Test Docs'
- script: |
build_tools/azure/test_docstring.sh
displayName: "Numpydoc validation"
condition: eq(variables['TEST_DOCSTRINGS'], 'true')
- script: |
build_tools/azure/test_pytest_soft_dependency.sh
displayName: 'Test Soft Dependency'
Expand Down
15 changes: 15 additions & 0 deletions build_tools/azure/test_docstring.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
#!/bin/bash

set -e

if [[ "$DISTRIB" =~ ^conda.* ]]; then
source activate $VIRTUALENV
elif [[ "$DISTRIB" == "ubuntu" ]]; then
source $VIRTUALENV/bin/activate
fi

if [[ "$BUILD_WITH_ICC" == "true" ]]; then
source /opt/intel/oneapi/setvars.sh
fi

pytest maint_tools/test_docstrings.py
256 changes: 212 additions & 44 deletions maint_tools/test_docstrings.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,53 +3,214 @@
from typing import Optional

import pytest
from sklearn.utils._testing import all_estimators
from sklearn.utils import all_estimators

numpydoc_validation = pytest.importorskip("numpydoc.validate")

# List of whitelisted modules and methods; regexp are supported.
DOCSTRING_WHITELIST = [
"LogisticRegression$",
"LogisticRegression.fit",
"LogisticRegression.decision_function",
"Birch.predict",
"Birch.transform",
# List of modules ignored when checking for numpydoc validation.
DOCSTRING_IGNORE_LIST = [
"ARDRegression",
"AdaBoostClassifier",
"AdaBoostRegressor",
"AdditiveChi2Sampler",
"AffinityPropagation",
"AgglomerativeClustering",
"BaggingClassifier",
"BaggingRegressor",
"BayesianGaussianMixture",
"BayesianRidge",
"BernoulliNB",
"BernoulliRBM",
"Binarizer",
"Birch",
"CCA",
"CalibratedClassifierCV",
"CategoricalNB",
"ClassifierChain",
"ColumnTransformer",
"ComplementNB",
"CountVectorizer",
"DBSCAN",
"DecisionTreeClassifier",
"DecisionTreeRegressor",
"DictVectorizer",
"DictionaryLearning",
"DummyClassifier",
"DummyRegressor",
"ElasticNet",
"ElasticNetCV",
"EllipticEnvelope",
"EmpiricalCovariance",
"ExtraTreeClassifier",
"ExtraTreeRegressor",
"ExtraTreesClassifier",
"ExtraTreesRegressor",
"FactorAnalysis",
"FastICA",
"FeatureAgglomeration",
"FeatureHasher",
"FeatureUnion",
"FunctionTransformer",
"GammaRegressor",
"GaussianMixture",
"GaussianNB",
"GaussianProcessClassifier",
"GaussianProcessRegressor",
"GaussianRandomProjection",
"GenericUnivariateSelect",
"GradientBoostingClassifier",
"GradientBoostingRegressor",
"LinearDiscriminantAnalysis.decision_function",
"LinearSVC.decision_function",
"LogisticRegressionCV.decision_function",
"OPTICS",
"OPTICS.fit",
"PassiveAggressiveClassifier.decision_function",
"Perceptron.decision_function",
"RidgeClassifier.decision_function",
"RidgeClassifier.fit",
"RidgeClassifierCV.decision_function",
"GraphicalLasso",
"GraphicalLassoCV",
"GridSearchCV",
"HalvingGridSearchCV",
"HalvingRandomSearchCV",
"HashingVectorizer",
"HistGradientBoostingClassifier",
"HistGradientBoostingRegressor",
"HuberRegressor",
"IncrementalPCA",
"IsolationForest",
"Isomap",
"IsotonicRegression",
"IterativeImputer",
"KBinsDiscretizer",
"KMeans",
"KNNImputer",
"KNeighborsClassifier",
"KNeighborsRegressor",
"KNeighborsTransformer",
"KernelCenterer",
"KernelDensity",
"KernelDensity.fit",
"KernelDensity.score",
"DecisionTreeClassifier",
"DecisionTreeRegressor",
"LinearRegression$",
"SGDClassifier.decision_function",
"SGDClassifier.set_params",
"SGDClassifier.get_params",
"SGDClassifier.fit",
"SGDClassifier.partial_fit",
"SGDClassifier.predict$", # $ to avoid match w/ predict_proba (regex)
"SGDClassifier.score",
"SGDClassifier.sparsify",
"SGDClassifier.densify",
"VotingClassifier.fit",
"VotingClassifier.transform",
"VotingClassifier.predict",
"VotingClassifier.score",
"VotingClassifier.predict_proba",
"VotingClassifier.set_params",
"VotingClassifier.get_params",
"VotingClassifier.named_estimators",
"VotingClassifier$",
"KernelPCA",
"KernelRidge",
"LabelBinarizer",
"LabelEncoder",
"LabelPropagation",
"LabelSpreading",
"Lars",
"LarsCV",
"Lasso",
"LassoCV",
"LassoLars",
"LassoLarsCV",
"LassoLarsIC",
"LatentDirichletAllocation",
"LedoitWolf",
"LinearDiscriminantAnalysis",
"LinearRegression",
"LinearSVC",
"LinearSVR",
"LocalOutlierFactor",
"LocallyLinearEmbedding",
"LogisticRegression",
"LogisticRegressionCV",
"MDS",
"MLPClassifier",
"MLPRegressor",
"MaxAbsScaler",
"MeanShift",
"MinCovDet",
"MinMaxScaler",
"MiniBatchDictionaryLearning",
"MiniBatchKMeans",
"MiniBatchSparsePCA",
"MissingIndicator",
"MultiLabelBinarizer",
"MultiOutputClassifier",
"MultiOutputRegressor",
"MultiTaskElasticNet",
"MultiTaskElasticNetCV",
"MultiTaskLasso",
"MultiTaskLassoCV",
"MultinomialNB",
"NMF",
"NearestCentroid",
"NearestNeighbors",
"NeighborhoodComponentsAnalysis",
"Normalizer",
"NuSVC",
"NuSVR",
"Nystroem",
"OAS",
"OPTICS",
"OneClassSVM",
"OneHotEncoder",
"OneVsOneClassifier",
"OneVsRestClassifier",
"OrdinalEncoder",
"OrthogonalMatchingPursuit",
"OrthogonalMatchingPursuitCV",
"OutputCodeClassifier",
"PCA",
"PLSCanonical",
"PLSRegression",
"PLSSVD",
"PassiveAggressiveClassifier",
"PassiveAggressiveRegressor",
"PatchExtractor",
"Perceptron",
"Pipeline",
"PoissonRegressor",
"PolynomialCountSketch",
"PolynomialFeatures",
"PowerTransformer",
"QuadraticDiscriminantAnalysis",
"QuantileRegressor",
"QuantileTransformer",
"RANSACRegressor",
"RBFSampler",
"RFE",
"RFECV",
"RadiusNeighborsClassifier",
"RadiusNeighborsRegressor",
"RadiusNeighborsTransformer",
"RandomForestClassifier",
"RandomForestRegressor",
"RandomTreesEmbedding",
"RandomizedSearchCV",
"RegressorChain",
"Ridge",
"RidgeCV",
"RidgeClassifier",
"RidgeClassifierCV",
"RobustScaler",
"SGDOneClassSVM",
"SGDRegressor",
"SVC",
"SVR",
"SelectFdr",
"SelectFpr",
"SelectFromModel",
"SelectFwe",
"SelectKBest",
"SelectPercentile",
"SelfTrainingClassifier",
"SequentialFeatureSelector",
"ShrunkCovariance",
"SimpleImputer",
"SkewedChi2Sampler",
"SparseCoder",
"SparsePCA",
"SparseRandomProjection",
"SpectralBiclustering",
"SpectralClustering",
"SpectralCoclustering",
"SpectralEmbedding",
"SplineTransformer",
"StackingClassifier",
"StackingRegressor",
"StandardScaler",
"TSNE",
"TfidfTransformer",
"TfidfVectorizer",
"TheilSenRegressor",
"TransformedTargetRegressor",
"TruncatedSVD",
"TweedieRegressor",
"VarianceThreshold",
"VotingClassifier",
"VotingRegressor",
]


Expand All @@ -72,7 +233,7 @@ def get_all_methods():
yield Estimator, method


def filter_errors(errors, method):
def filter_errors(errors, method, Estimator=None):
"""
Ignore some errors based on the method type.
Expand All @@ -90,6 +251,13 @@ def filter_errors(errors, method):
if code in ["RT02", "GL01"]:
continue

# Ignore PR02: Unknown parameters for properties. We sometimes use
# properties for ducktyping, i.e. SGDClassifier.predict_proba
if code == "PR02" and Estimator is not None and method is not None:
method_obj = getattr(Estimator, method)
if isinstance(method_obj, property):
continue

# Following codes are only taken into account for the
# top level class docstrings:
# - ES01: No extended summary found
Expand Down Expand Up @@ -165,14 +333,14 @@ def test_docstring(Estimator, method, request):

import_path = ".".join(import_path)

if not any(re.search(regex, import_path) for regex in DOCSTRING_WHITELIST):
if any(re.search(regex, import_path) for regex in DOCSTRING_IGNORE_LIST):
request.applymarker(
pytest.mark.xfail(run=False, reason="TODO pass numpydoc validation")
)

res = numpydoc_validation.validate(import_path)

res["errors"] = list(filter_errors(res["errors"], method))
res["errors"] = list(filter_errors(res["errors"], method, Estimator=Estimator))

if res["errors"]:
msg = repr_errors(res, Estimator, method)
Expand Down

0 comments on commit bb6117b

Please sign in to comment.