Skip to content

Commit

Permalink
Cleaning unittest (#129)
Browse files Browse the repository at this point in the history
* moving to tests/util folder

* Organizing test examples. Creating setup routines for test examples

* Changing test example variables to fixtures.

* PEP8 formatting

* PEP8 formatting

* pep8 fix (line length)

* PEP8 fixes

* fixing errors

* updating unittests

* Adding test to check if the meta estimator implements the predict proba function

* Removing unused test case

* Fixing bug with the error when classifier was not fitted yet
  • Loading branch information
Menelau authored Dec 6, 2018
1 parent 5e3f6a0 commit ff20a1a
Show file tree
Hide file tree
Showing 34 changed files with 1,197 additions and 1,194 deletions.
6 changes: 0 additions & 6 deletions deslib/static/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,12 +94,6 @@ class labels of each example in X.

return self

def _check_is_fitted(self):
"""Verify if the estimator algorithm was fitted. Raises an error if it
is not fitted.
"""
check_is_fitted(self, "estimator_")

def _setup_label_encoder(self, y):
"""
Setup the label encoder
Expand Down
5 changes: 3 additions & 2 deletions deslib/static/stacked.py
Original file line number Diff line number Diff line change
Expand Up @@ -114,12 +114,13 @@ def predict_proba(self, X):
Predicted class for each sample in X.
"""
X = check_array(X)
check_is_fitted(self, "meta_classifier_")

# Check if the meta-classifier can output probabilities
if not hasattr(self, "predict_proba"):
if not hasattr(self.meta_classifier_, "predict_proba"):
raise ValueError("Meta-classifier does not implement the"
" predict_proba method.")
check_is_fitted(self, "meta_classifier_")

base_preds = self._predict_proba_base(X)
return self.meta_classifier_.predict_proba(base_preds)

Expand Down
2 changes: 1 addition & 1 deletion deslib/tests/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
from .examples_test import *
from .conftest import *
161 changes: 161 additions & 0 deletions deslib/tests/conftest.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,161 @@
from unittest.mock import MagicMock
import numpy as np
import pytest


@pytest.fixture
def example_estimate_competence(create_X_y):
X, y = create_X_y

# Pre-processed results on DSEL. This information is used by the majority
# of DS techniques to estimate the classifier competence.
dsel_processed = np.array(
[[1, 1, 1], [0, 0, 1], [0, 0, 1], [1, 1, 0], [1, 1, 1], [0, 0, 1],
[0, 0, 0], [1, 1, 1],
[1, 1, 0], [0, 0, 1], [0, 0, 1], [0, 1, 0], [0, 1, 0], [1, 1, 1],
[1, 1, 1]])

# pre-calculated indices of 7 Nearest neighbors for competence estimation.
neighbors = np.array([[8, 11, 4, 7, 13, 10, 1],
[7, 1, 11, 13, 0, 8, 4],
[5, 3, 4, 8, 10, 11, 7]])

# Scores obtained for the two classes. This information is used by the
# techniques based on posterior probabilities
dsel_scores = np.tile(np.array([[1.0, 0.0], [0.5, 0.5], [0.8, 0.2]]),
(15, 1, 1))

# Distance information is used by the probabilistic techniques
# (des.probabilistic) as well as the MLA, A Priori and A Posteriori methods
distances = np.array([[0.35355339, 0.35355339, 0.55901699, 0.79056942,
0.79056942, 0.90138782, 1.03077641],
[0.3, 0.39051248, 0.53851648, 0.86023253, 0.94339811,
1.04403065, 1.28549601],
[0.70710678, 1.41421356, 1.95256242, 2.12132034,
2.79508497, 2.82842712, 2.91547595]])

return X, y, neighbors, distances, dsel_processed, dsel_scores


@pytest.fixture
def create_X_y():
# ex1: The distribution of samples of a test example.
X = np.array([[-1, 1], [-0.75, 0.5], [-1.5, 1.5],
[1, 1], [0.75, 0.5], [1.5, 1.5],
[1, -1], [-0.5, 0.5], [0.5, 0.5],
[0, -1], [0.75, -0.5], [0.0, 0.0],
[-1, -1], [0, -0.5], [1, -1]])
# Labels associated with the samples. This information is used
# by techniques based on a posteriori information.
y = np.array([0, 0, 0, 1, 1, 1, 0, 0, 0, 1, 1, 1, 0, 0, 0])
return X, y


# ----- Test Example all ones ------
@pytest.fixture
def example_all_ones(example_estimate_competence):
X, y, neighbors = example_estimate_competence[0:3]
dsel_processed = np.ones((15, 3))
dsel_scores = np.ones((15, 3, 2))
distances = np.ones((3, 7))

return X, y, neighbors, distances, dsel_processed, dsel_scores


# ----- Test Example from Combining pattern classifiers ------
# This example is used to test the results of the A priori,
# A posteriori and MLA techniques
@pytest.fixture
def example_kuncheva():
distances = np.linspace(1, 15, num=15)

# 10 neighbors used in the example
neighbors = np.linspace(0, 14, num=15, dtype=int)

# target class of each example. independent means that it should be
# used by the a priori technique
y_independent = np.array([2, 1, 2, 2, 3, 1, 2, 1, 3, 3, 2, 1, 2, 2, 1]) - 1

# dependent means that it should be used by the a posteriori technique
y_dependent = np.array([1, 0, 1, 1, 2, 0, 1, 0, 0, 2, 1, 2, 1, 1, 0])

# Predictions of the base classifier ci. Used to estimate its competence
# level for the A Posteriori
classifier_pred = np.array(
[2, 3, 2, 2, 1, 1, 2, 2, 3, 3, 1, 2, 2, 2, 1]) - 1

# whether or not the base classifier made the correct prediction for each
# sample in dsel
dsel_processed = np.transpose(
np.array([[1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1]]))

# In this example we consider that the posteriori is always 1 for the
# predicted class (15 samples, 1 classifier, 3 classes)
dsel_scores = np.array([[[0.0, 1.0, 0.0],
[0.0, 0.0, 1.0],
[0.0, 1.0, 0.0],
[0.0, 1.0, 0.0],
[1.0, 0.0, 0.0],
[1.0, 0.0, 0.0],
[0.0, 1.0, 0.0],
[0.0, 1.0, 0.0],
[0.0, 0.0, 1.0],
[0.0, 0.0, 1.0],
[1.0, 0.0, 0.0],
[0.0, 1.0, 0.0],
[0.0, 1.0, 0.0],
[0.0, 1.0, 0.0],
[1.0, 0.0, 0.0]]]).reshape(15, 1, 3)

k = 15
n_classes = 3
dict_return = {"dsel_processed": dsel_processed,
"dsel_scores": dsel_scores,
"distances": distances,
"neighbors": neighbors,
"classifier_pred": classifier_pred,
"y_dependent": y_dependent,
"y_independent": y_independent,
"n_classes": n_classes,
"k": k}

return dict_return


# ----- Routines to generate a pool of classifiers using MagicMock ------
def create_base_classifier(return_value, return_prob=None):
classifier = MagicMock()
classifier.predict.return_value = [return_value]
classifier.predict_proba.return_value = return_prob
return classifier


@pytest.fixture
def create_pool_classifiers():
clf_0 = create_base_classifier(return_value=0,
return_prob=np.atleast_2d([0.5, 0.5]))
clf_1 = create_base_classifier(return_value=1,
return_prob=np.atleast_2d([1.0, 0.0]))
clf_2 = create_base_classifier(return_value=0,
return_prob=np.atleast_2d([0.33, 0.67]))
pool_classifiers = [clf_0, clf_1, clf_2]
return pool_classifiers


@pytest.fixture
def create_pool_all_agree():
return [create_base_classifier(return_value=0)] * 100


@pytest.fixture
def example_static_selection(create_X_y):
X, y = create_X_y
pool1 = [create_base_classifier(return_value=0)] * 50
pool2 = [create_base_classifier(return_value=1)] * 50
for clf in pool1:
clf.score = MagicMock(return_value=0.5)
for clf in pool2:
clf.score = MagicMock(return_value=1.0)

pool = pool1 + pool2
return X, y, pool
114 changes: 54 additions & 60 deletions deslib/tests/dcs/test_a_posteriori.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
import numpy as np
import pytest
from sklearn.linear_model import Perceptron
from sklearn.utils.estimator_checks import check_estimator

from deslib.dcs.a_posteriori import APosteriori
from deslib.tests.examples_test import *
from sklearn.utils.estimator_checks import check_estimator


def test_check_estimator():
Expand All @@ -12,22 +12,22 @@ def test_check_estimator():

# Should always be 1.0 since the supports for the correct class is always 1.
@pytest.mark.parametrize('index', [0, 1, 2])
def test_estimate_competence_all_ones(index):
def test_estimate_competence_all_ones(index, example_all_ones):
_, y, neighbors, distances, dsel_processed, dsel_scores = example_all_ones

query = np.atleast_2d([1, 1])

a_posteriori_test = APosteriori(create_pool_classifiers())
a_posteriori_test.fit(X_dsel_ex1, y_dsel_ex1)
a_posteriori_test.DSEL_processed_ = dsel_processed_ex1
a_posteriori_test.dsel_scores_ = dsel_scores_all_ones
a_posteriori_test = APosteriori()
a_posteriori_test.n_classifiers_ = 3
a_posteriori_test.DSEL_processed_ = dsel_processed
a_posteriori_test.dsel_scores_ = dsel_scores
a_posteriori_test.DSEL_target_ = y

neighbors = neighbors_ex1[index, :].reshape(1, -1)
distances = distances_all_ones[index, :].reshape(1, -1)
neighbors = neighbors[index, :].reshape(1, -1)
distances = distances[index, :].reshape(1, -1)

expected = [1.0, 1.0, 1.0]

predictions = []
for clf in a_posteriori_test.pool_classifiers:
predictions.append(clf.predict(query)[0])
predictions = np.array([0, 1, 0])

competences = a_posteriori_test.estimate_competence(query, neighbors,
distances,
Expand All @@ -37,24 +37,22 @@ def test_estimate_competence_all_ones(index):


# Testing example from kuncheva's book (combining pattern classifiers)
def test_estimate_competence_kuncheva_ex():
def test_estimate_competence_kuncheva_ex(example_kuncheva):
query = np.atleast_2d([1, 1])

a_posteriori_test = APosteriori([create_base_classifier(return_value=1)],
k=k_ex_kuncheva)
a_posteriori_test = APosteriori(k=example_kuncheva['k'])
a_posteriori_test.n_classifiers_ = 1

a_posteriori_test.DSEL_processed_ = dsel_processed_kuncheva
a_posteriori_test.dsel_scores_ = dsel_scores_ex_kuncheva
a_posteriori_test.DSEL_target_ = y_dsel_ex_kuncheva_dependent
a_posteriori_test.n_classes_ = n_classes_ex_kuncheva
a_posteriori_test.DSEL_processed_ = example_kuncheva['dsel_processed']
a_posteriori_test.dsel_scores_ = example_kuncheva['dsel_scores']
a_posteriori_test.DSEL_target_ = example_kuncheva['y_dependent']
a_posteriori_test.n_classes_ = example_kuncheva['n_classes']

neighbors = example_kuncheva['neighbors'].reshape(1, -1)
distances = example_kuncheva['distances'].reshape(1, -1)

neighbors = neighbors_ex_kuncheva.reshape(1, -1)
distances = distances_ex_kuncheva.reshape(1, -1)
predictions = np.array([[1]])

predictions = []
for clf in a_posteriori_test.pool_classifiers:
predictions.append(clf.predict(query)[0])
competences = a_posteriori_test.estimate_competence(query, neighbors,
distances,
predictions=np.array(
Expand All @@ -63,54 +61,51 @@ def test_estimate_competence_kuncheva_ex():


# Testing example from kuncheva's book (combining pattern classifiers)
def test_estimate_competence_kuncheva_ex_batch():
def test_estimate_competence_kuncheva_ex_batch(example_kuncheva):
# considering a batch composed of 10 samples
query = np.ones((10, 2))

a_posteriori_test = APosteriori([create_base_classifier(return_value=1)],
k=k_ex_kuncheva)
a_posteriori_test.fit(dsel_processed_kuncheva,
y_dsel_ex_kuncheva_dependent)
a_posteriori_test.DSEL_processed_ = dsel_processed_kuncheva
a_posteriori_test.dsel_scores_ = dsel_scores_ex_kuncheva
a_posteriori_test = APosteriori(k=example_kuncheva['k'])
a_posteriori_test.fit(example_kuncheva['dsel_processed'],
example_kuncheva['y_dependent'])
a_posteriori_test.DSEL_processed_ = example_kuncheva['dsel_processed']
a_posteriori_test.dsel_scores_ = example_kuncheva['dsel_scores']

a_posteriori_test.n_classes_ = n_classes_ex_kuncheva
a_posteriori_test.n_classes_ = example_kuncheva['n_classes']

# repeating the same matrix in a new axis to simulate a batch input.
neighbors = np.tile(neighbors_ex_kuncheva, (10, 1))
distances = np.tile(distances_ex_kuncheva, (10, 1))
neighbors = np.tile(example_kuncheva['neighbors'], (10, 1))
distances = np.tile(example_kuncheva['distances'], (10, 1))

predictions = []
for clf in a_posteriori_test.pool_classifiers:
predictions.append(clf.predict(query)[0])
predictions = np.ones((1, 10))
competences = a_posteriori_test.estimate_competence(query, neighbors,
distances,
predictions=np.array(
predictions))
assert np.allclose(competences, 0.95, atol=0.01)


# in this test case, the target of the neighbors is always different than the
# predicted. So the estimation of competence should always be zero
# in this test case, the target of the neighbors is always different
# than the predicted. So
# the estimation of competence should always be zero
@pytest.mark.parametrize('index', [0, 1, 2])
def test_estimate_competence_diff_target(index):
def test_estimate_competence_diff_target(index, example_all_ones):
_, _, neighbors, distances, dsel_processed, _ = example_all_ones

query = np.atleast_2d([1, 1])
pool_classifiers = create_pool_classifiers()
a_posteriori_test = APosteriori(pool_classifiers=pool_classifiers)
a_posteriori_test.n_classifiers_ = len(pool_classifiers)
a_posteriori_test.DSEL_processed_ = dsel_processed_ex1
a_posteriori_test = APosteriori()
a_posteriori_test.n_classifiers_ = 3
a_posteriori_test.DSEL_processed_ = dsel_processed
a_posteriori_test.dsel_scores_ = np.ones((15, 3, 3))
a_posteriori_test.DSEL_target_ = np.ones(15, dtype=int) * 2
a_posteriori_test.n_classes_ = 2

neighbors = neighbors_ex1[index, :].reshape(1, -1)
distances = distances_all_ones[index, :].reshape(1, -1)
neighbors = neighbors[index, :].reshape(1, -1)
distances = distances[index, :].reshape(1, -1)

expected = [0.0, 0.0, 0.0]

predictions = []
for clf in a_posteriori_test.pool_classifiers:
predictions.append(clf.predict(query)[0])
predictions = np.array([0, 1, 0])
competences = a_posteriori_test.estimate_competence(query, neighbors,
distances,
predictions=np.array(
Expand All @@ -119,22 +114,21 @@ def test_estimate_competence_diff_target(index):


# Check if the fit method is pre-calculating the classifier scores correctly
def test_fit():
a_posteriori_test = APosteriori(create_pool_classifiers())
a_posteriori_test.fit(X_dsel_ex1, y_dsel_ex1)
def test_fit(create_X_y, create_pool_classifiers):
X, y = create_X_y
a_posteriori_test = APosteriori(create_pool_classifiers)
a_posteriori_test.fit(X, y)
expected = np.array([[0.5, 0.5], [1.0, 0.0], [0.33, 0.67]])
expected = np.tile(expected, (15, 1, 1))
assert np.array_equal(a_posteriori_test.dsel_scores_, expected)


# Test if the class is raising an error when the base classifiers do not
# implements the predict_proba method.
# Should raise an exception when the base classifier cannot estimate posterior
# probabilities (predict_proba). Using Perceptron classifier as it does not
# implements the predict_proba method.
def test_not_predict_proba():
X = X_dsel_ex1
y = y_dsel_ex1
# implements the predict_proba method. Should raise an exception when the
# base classifier cannot estimate posterior probabilities (predict_proba)
# Using Perceptron classifier as it does not implements predict_proba.
def test_not_predict_proba(create_X_y):
X, y = create_X_y
clf1 = Perceptron()
clf1.fit(X, y)
with pytest.raises(ValueError):
Expand Down
Loading

0 comments on commit ff20a1a

Please sign in to comment.