Cleaning unittest (#129)

* moving to tests/util folder * Organizing test examples. Creating setup routines for test examples * Changing test example variables to fixtures. * PEP8 formatting * PEP8 formatting * pep8 fix (line length) * PEP8 fixes * fixing errors * updating unittests * Adding test to check if the meta estimator implements the predict proba function * Removing unused test case * Fixing bug with the error when classifier was not fitted yet
scikit-learn-contrib · Dec 6, 2018 · ff20a1a · ff20a1a
1 parent 5e3f6a0
commit ff20a1a
Show file tree

Hide file tree

Showing 34 changed files with 1,197 additions and 1,194 deletions.
diff --git a/deslib/static/base.py b/deslib/static/base.py
@@ -94,12 +94,6 @@ class labels of each example in X.
 
         return self
 
-    def _check_is_fitted(self):
-        """Verify if the estimator algorithm was fitted. Raises an error if it
-        is not fitted.
-        """
-        check_is_fitted(self, "estimator_")
-
     def _setup_label_encoder(self, y):
         """
         Setup the label encoder

diff --git a/deslib/static/stacked.py b/deslib/static/stacked.py
@@ -114,12 +114,13 @@ def predict_proba(self, X):
                            Predicted class for each sample in X.
         """
         X = check_array(X)
+        check_is_fitted(self, "meta_classifier_")
 
         # Check if the meta-classifier can output probabilities
-        if not hasattr(self, "predict_proba"):
+        if not hasattr(self.meta_classifier_, "predict_proba"):
             raise ValueError("Meta-classifier does not implement the"
                              " predict_proba method.")
-        check_is_fitted(self, "meta_classifier_")
+
         base_preds = self._predict_proba_base(X)
         return self.meta_classifier_.predict_proba(base_preds)
 

diff --git a/deslib/tests/__init__.py b/deslib/tests/__init__.py
@@ -1 +1 @@
-from .examples_test import *
+from .conftest import *
diff --git a/deslib/tests/conftest.py b/deslib/tests/conftest.py
@@ -0,0 +1,161 @@
+from unittest.mock import MagicMock
+import numpy as np
+import pytest
+
+
+@pytest.fixture
+def example_estimate_competence(create_X_y):
+    X, y = create_X_y
+
+    # Pre-processed results on DSEL. This information is used by the majority
+    # of DS techniques to estimate the classifier competence.
+    dsel_processed = np.array(
+        [[1, 1, 1], [0, 0, 1], [0, 0, 1], [1, 1, 0], [1, 1, 1], [0, 0, 1],
+         [0, 0, 0], [1, 1, 1],
+         [1, 1, 0], [0, 0, 1], [0, 0, 1], [0, 1, 0], [0, 1, 0], [1, 1, 1],
+         [1, 1, 1]])
+
+    # pre-calculated indices of 7 Nearest neighbors for competence estimation.
+    neighbors = np.array([[8, 11, 4, 7, 13, 10, 1],
+                          [7, 1, 11, 13, 0, 8, 4],
+                          [5, 3, 4, 8, 10, 11, 7]])
+
+    # Scores obtained for the two classes. This information is used by the
+    # techniques based on posterior probabilities
+    dsel_scores = np.tile(np.array([[1.0, 0.0], [0.5, 0.5], [0.8, 0.2]]),
+                          (15, 1, 1))
+
+    # Distance information is used by the probabilistic techniques
+    # (des.probabilistic) as well as the MLA, A Priori and A Posteriori methods
+    distances = np.array([[0.35355339, 0.35355339, 0.55901699, 0.79056942,
+                           0.79056942, 0.90138782, 1.03077641],
+                          [0.3, 0.39051248, 0.53851648, 0.86023253, 0.94339811,
+                           1.04403065, 1.28549601],
+                          [0.70710678, 1.41421356, 1.95256242, 2.12132034,
+                           2.79508497, 2.82842712, 2.91547595]])
+
+    return X, y, neighbors, distances, dsel_processed, dsel_scores
+
+
+@pytest.fixture
+def create_X_y():
+    # ex1: The distribution of samples of a test example.
+    X = np.array([[-1, 1], [-0.75, 0.5], [-1.5, 1.5],
+                  [1, 1], [0.75, 0.5], [1.5, 1.5],
+                  [1, -1], [-0.5, 0.5], [0.5, 0.5],
+                  [0, -1], [0.75, -0.5], [0.0, 0.0],
+                  [-1, -1], [0, -0.5], [1, -1]])
+    # Labels associated with the samples. This information is used
+    # by techniques based on a posteriori information.
+    y = np.array([0, 0, 0, 1, 1, 1, 0, 0, 0, 1, 1, 1, 0, 0, 0])
+    return X, y
+
+
+# ----- Test Example all ones ------
+@pytest.fixture
+def example_all_ones(example_estimate_competence):
+    X, y, neighbors = example_estimate_competence[0:3]
+    dsel_processed = np.ones((15, 3))
+    dsel_scores = np.ones((15, 3, 2))
+    distances = np.ones((3, 7))
+
+    return X, y, neighbors, distances, dsel_processed, dsel_scores
+
+
+# ----- Test Example from Combining pattern classifiers  ------
+# This example is used to test the results of the A priori,
+# A posteriori and MLA techniques
+@pytest.fixture
+def example_kuncheva():
+    distances = np.linspace(1, 15, num=15)
+
+    # 10 neighbors used in the example
+    neighbors = np.linspace(0, 14, num=15, dtype=int)
+
+    # target class of each example. independent means that it should be
+    # used by the a priori technique
+    y_independent = np.array([2, 1, 2, 2, 3, 1, 2, 1, 3, 3, 2, 1, 2, 2, 1]) - 1
+
+    # dependent means that it should be used by the a posteriori technique
+    y_dependent = np.array([1, 0, 1, 1, 2, 0, 1, 0, 0, 2, 1, 2, 1, 1, 0])
+
+    # Predictions of the base classifier ci. Used to estimate its competence
+    # level for the A Posteriori
+    classifier_pred = np.array(
+        [2, 3, 2, 2, 1, 1, 2, 2, 3, 3, 1, 2, 2, 2, 1]) - 1
+
+    # whether or not the base classifier made the correct prediction for each
+    # sample in dsel
+    dsel_processed = np.transpose(
+        np.array([[1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1]]))
+
+    # In this example we consider that the posteriori is always 1 for the
+    # predicted class (15 samples, 1 classifier, 3 classes)
+    dsel_scores = np.array([[[0.0, 1.0, 0.0],
+                             [0.0, 0.0, 1.0],
+                             [0.0, 1.0, 0.0],
+                             [0.0, 1.0, 0.0],
+                             [1.0, 0.0, 0.0],
+                             [1.0, 0.0, 0.0],
+                             [0.0, 1.0, 0.0],
+                             [0.0, 1.0, 0.0],
+                             [0.0, 0.0, 1.0],
+                             [0.0, 0.0, 1.0],
+                             [1.0, 0.0, 0.0],
+                             [0.0, 1.0, 0.0],
+                             [0.0, 1.0, 0.0],
+                             [0.0, 1.0, 0.0],
+                             [1.0, 0.0, 0.0]]]).reshape(15, 1, 3)
+
+    k = 15
+    n_classes = 3
+    dict_return = {"dsel_processed": dsel_processed,
+                   "dsel_scores": dsel_scores,
+                   "distances": distances,
+                   "neighbors": neighbors,
+                   "classifier_pred": classifier_pred,
+                   "y_dependent": y_dependent,
+                   "y_independent": y_independent,
+                   "n_classes": n_classes,
+                   "k": k}
+
+    return dict_return
+
+
+# ----- Routines to generate a pool of classifiers using MagicMock  ------
+def create_base_classifier(return_value, return_prob=None):
+    classifier = MagicMock()
+    classifier.predict.return_value = [return_value]
+    classifier.predict_proba.return_value = return_prob
+    return classifier
+
+
+@pytest.fixture
+def create_pool_classifiers():
+    clf_0 = create_base_classifier(return_value=0,
+                                   return_prob=np.atleast_2d([0.5, 0.5]))
+    clf_1 = create_base_classifier(return_value=1,
+                                   return_prob=np.atleast_2d([1.0, 0.0]))
+    clf_2 = create_base_classifier(return_value=0,
+                                   return_prob=np.atleast_2d([0.33, 0.67]))
+    pool_classifiers = [clf_0, clf_1, clf_2]
+    return pool_classifiers
+
+
+@pytest.fixture
+def create_pool_all_agree():
+    return [create_base_classifier(return_value=0)] * 100
+
+
+@pytest.fixture
+def example_static_selection(create_X_y):
+    X, y = create_X_y
+    pool1 = [create_base_classifier(return_value=0)] * 50
+    pool2 = [create_base_classifier(return_value=1)] * 50
+    for clf in pool1:
+        clf.score = MagicMock(return_value=0.5)
+    for clf in pool2:
+        clf.score = MagicMock(return_value=1.0)
+
+    pool = pool1 + pool2
+    return X, y, pool
diff --git a/deslib/tests/dcs/test_a_posteriori.py b/deslib/tests/dcs/test_a_posteriori.py
@@ -1,9 +1,9 @@
+import numpy as np
 import pytest
 from sklearn.linear_model import Perceptron
+from sklearn.utils.estimator_checks import check_estimator
 
 from deslib.dcs.a_posteriori import APosteriori
-from deslib.tests.examples_test import *
-from sklearn.utils.estimator_checks import check_estimator
 
 
 def test_check_estimator():
@@ -12,22 +12,22 @@ def test_check_estimator():
 
 # Should always be 1.0 since the supports for the correct class is always 1.
 @pytest.mark.parametrize('index', [0, 1, 2])
-def test_estimate_competence_all_ones(index):
+def test_estimate_competence_all_ones(index, example_all_ones):
+    _, y, neighbors, distances, dsel_processed, dsel_scores = example_all_ones
+
     query = np.atleast_2d([1, 1])
 
-    a_posteriori_test = APosteriori(create_pool_classifiers())
-    a_posteriori_test.fit(X_dsel_ex1, y_dsel_ex1)
-    a_posteriori_test.DSEL_processed_ = dsel_processed_ex1
-    a_posteriori_test.dsel_scores_ = dsel_scores_all_ones
+    a_posteriori_test = APosteriori()
+    a_posteriori_test.n_classifiers_ = 3
+    a_posteriori_test.DSEL_processed_ = dsel_processed
+    a_posteriori_test.dsel_scores_ = dsel_scores
+    a_posteriori_test.DSEL_target_ = y
 
-    neighbors = neighbors_ex1[index, :].reshape(1, -1)
-    distances = distances_all_ones[index, :].reshape(1, -1)
+    neighbors = neighbors[index, :].reshape(1, -1)
+    distances = distances[index, :].reshape(1, -1)
 
     expected = [1.0, 1.0, 1.0]
-
-    predictions = []
-    for clf in a_posteriori_test.pool_classifiers:
-        predictions.append(clf.predict(query)[0])
+    predictions = np.array([0, 1, 0])
 
     competences = a_posteriori_test.estimate_competence(query, neighbors,
                                                         distances,
@@ -37,24 +37,22 @@ def test_estimate_competence_all_ones(index):
 
 
 # Testing example from kuncheva's book (combining pattern classifiers)
-def test_estimate_competence_kuncheva_ex():
+def test_estimate_competence_kuncheva_ex(example_kuncheva):
     query = np.atleast_2d([1, 1])
 
-    a_posteriori_test = APosteriori([create_base_classifier(return_value=1)],
-                                    k=k_ex_kuncheva)
+    a_posteriori_test = APosteriori(k=example_kuncheva['k'])
     a_posteriori_test.n_classifiers_ = 1
 
-    a_posteriori_test.DSEL_processed_ = dsel_processed_kuncheva
-    a_posteriori_test.dsel_scores_ = dsel_scores_ex_kuncheva
-    a_posteriori_test.DSEL_target_ = y_dsel_ex_kuncheva_dependent
-    a_posteriori_test.n_classes_ = n_classes_ex_kuncheva
+    a_posteriori_test.DSEL_processed_ = example_kuncheva['dsel_processed']
+    a_posteriori_test.dsel_scores_ = example_kuncheva['dsel_scores']
+    a_posteriori_test.DSEL_target_ = example_kuncheva['y_dependent']
+    a_posteriori_test.n_classes_ = example_kuncheva['n_classes']
+
+    neighbors = example_kuncheva['neighbors'].reshape(1, -1)
+    distances = example_kuncheva['distances'].reshape(1, -1)
 
-    neighbors = neighbors_ex_kuncheva.reshape(1, -1)
-    distances = distances_ex_kuncheva.reshape(1, -1)
+    predictions = np.array([[1]])
 
-    predictions = []
-    for clf in a_posteriori_test.pool_classifiers:
-        predictions.append(clf.predict(query)[0])
     competences = a_posteriori_test.estimate_competence(query, neighbors,
                                                         distances,
                                                         predictions=np.array(
@@ -63,54 +61,51 @@ def test_estimate_competence_kuncheva_ex():
 
 
 # Testing example from kuncheva's book (combining pattern classifiers)
-def test_estimate_competence_kuncheva_ex_batch():
+def test_estimate_competence_kuncheva_ex_batch(example_kuncheva):
     # considering a batch composed of 10 samples
     query = np.ones((10, 2))
 
-    a_posteriori_test = APosteriori([create_base_classifier(return_value=1)],
-                                    k=k_ex_kuncheva)
-    a_posteriori_test.fit(dsel_processed_kuncheva,
-                          y_dsel_ex_kuncheva_dependent)
-    a_posteriori_test.DSEL_processed_ = dsel_processed_kuncheva
-    a_posteriori_test.dsel_scores_ = dsel_scores_ex_kuncheva
+    a_posteriori_test = APosteriori(k=example_kuncheva['k'])
+    a_posteriori_test.fit(example_kuncheva['dsel_processed'],
+                          example_kuncheva['y_dependent'])
+    a_posteriori_test.DSEL_processed_ = example_kuncheva['dsel_processed']
+    a_posteriori_test.dsel_scores_ = example_kuncheva['dsel_scores']
 
-    a_posteriori_test.n_classes_ = n_classes_ex_kuncheva
+    a_posteriori_test.n_classes_ = example_kuncheva['n_classes']
 
     # repeating the same matrix in a new axis to simulate a batch input.
-    neighbors = np.tile(neighbors_ex_kuncheva, (10, 1))
-    distances = np.tile(distances_ex_kuncheva, (10, 1))
+    neighbors = np.tile(example_kuncheva['neighbors'], (10, 1))
+    distances = np.tile(example_kuncheva['distances'], (10, 1))
 
-    predictions = []
-    for clf in a_posteriori_test.pool_classifiers:
-        predictions.append(clf.predict(query)[0])
+    predictions = np.ones((1, 10))
     competences = a_posteriori_test.estimate_competence(query, neighbors,
                                                         distances,
                                                         predictions=np.array(
                                                             predictions))
     assert np.allclose(competences, 0.95, atol=0.01)
 
 
-# in this test case, the target of the neighbors is always different than the
-# predicted. So the estimation of competence should always be zero
+# in this test case, the target of the neighbors is always different
+# than the predicted. So
+# the estimation of competence should always be zero
 @pytest.mark.parametrize('index', [0, 1, 2])
-def test_estimate_competence_diff_target(index):
+def test_estimate_competence_diff_target(index, example_all_ones):
+    _, _, neighbors, distances, dsel_processed, _ = example_all_ones
+
     query = np.atleast_2d([1, 1])
-    pool_classifiers = create_pool_classifiers()
-    a_posteriori_test = APosteriori(pool_classifiers=pool_classifiers)
-    a_posteriori_test.n_classifiers_ = len(pool_classifiers)
-    a_posteriori_test.DSEL_processed_ = dsel_processed_ex1
+    a_posteriori_test = APosteriori()
+    a_posteriori_test.n_classifiers_ = 3
+    a_posteriori_test.DSEL_processed_ = dsel_processed
     a_posteriori_test.dsel_scores_ = np.ones((15, 3, 3))
     a_posteriori_test.DSEL_target_ = np.ones(15, dtype=int) * 2
     a_posteriori_test.n_classes_ = 2
 
-    neighbors = neighbors_ex1[index, :].reshape(1, -1)
-    distances = distances_all_ones[index, :].reshape(1, -1)
+    neighbors = neighbors[index, :].reshape(1, -1)
+    distances = distances[index, :].reshape(1, -1)
 
     expected = [0.0, 0.0, 0.0]
 
-    predictions = []
-    for clf in a_posteriori_test.pool_classifiers:
-        predictions.append(clf.predict(query)[0])
+    predictions = np.array([0, 1, 0])
     competences = a_posteriori_test.estimate_competence(query, neighbors,
                                                         distances,
                                                         predictions=np.array(
@@ -119,22 +114,21 @@ def test_estimate_competence_diff_target(index):
 
 
 # Check if the fit method is pre-calculating the classifier scores correctly
-def test_fit():
-    a_posteriori_test = APosteriori(create_pool_classifiers())
-    a_posteriori_test.fit(X_dsel_ex1, y_dsel_ex1)
+def test_fit(create_X_y, create_pool_classifiers):
+    X, y = create_X_y
+    a_posteriori_test = APosteriori(create_pool_classifiers)
+    a_posteriori_test.fit(X, y)
     expected = np.array([[0.5, 0.5], [1.0, 0.0], [0.33, 0.67]])
     expected = np.tile(expected, (15, 1, 1))
     assert np.array_equal(a_posteriori_test.dsel_scores_, expected)
 
 
 # Test if the class is raising an error when the base classifiers do not
-# implements the predict_proba method.
-# Should raise an exception when the base classifier cannot estimate posterior
-# probabilities (predict_proba). Using Perceptron classifier as it does not
-# implements the predict_proba method.
-def test_not_predict_proba():
-    X = X_dsel_ex1
-    y = y_dsel_ex1
+# implements the predict_proba method. Should raise an exception when the
+# base classifier cannot estimate posterior probabilities (predict_proba)
+# Using Perceptron classifier as it does not implements predict_proba.
+def test_not_predict_proba(create_X_y):
+    X, y = create_X_y
     clf1 = Perceptron()
     clf1.fit(X, y)
     with pytest.raises(ValueError):
Original file line number	Diff line number	Diff line change
		@@ -1 +1 @@
		from .examples_test import *
		from .conftest import *