diff --git a/dice_ml/explainer_interfaces/dice_KD.py b/dice_ml/explainer_interfaces/dice_KD.py index c2487dbd..a3783379 100644 --- a/dice_ml/explainer_interfaces/dice_KD.py +++ b/dice_ml/explainer_interfaces/dice_KD.py @@ -77,7 +77,7 @@ def _generate_counterfactuals(self, query_instance, total_CFs, desired_range=Non test_pred = self.predict_fn(query_instance)[0] query_instance[self.data_interface.outcome_name] = test_pred - self.misc_init(stopping_threshold, desired_class, desired_range, test_pred) + desired_class = self.misc_init(stopping_threshold, desired_class, desired_range, test_pred) if desired_range != None: if desired_range[0] > desired_range[1]: raise ValueError("Invalid Range!") @@ -106,6 +106,7 @@ def _generate_counterfactuals(self, query_instance, total_CFs, desired_range=Non stopping_threshold, posthoc_sparsity_param, posthoc_sparsity_algorithm, verbose) + self.cfs_preds = cfs_preds return exp.CounterfactualExamples(data_interface=self.data_interface, final_cfs_df=self.final_cfs_df, diff --git a/dice_ml/explainer_interfaces/dice_genetic.py b/dice_ml/explainer_interfaces/dice_genetic.py index a84d1cf8..246ab908 100644 --- a/dice_ml/explainer_interfaces/dice_genetic.py +++ b/dice_ml/explainer_interfaces/dice_genetic.py @@ -4,7 +4,6 @@ """ from dice_ml.explainer_interfaces.explainer_base import ExplainerBase -import math import numpy as np import pandas as pd import random @@ -151,7 +150,7 @@ def do_KD_init(self, features_to_vary, query_instance, cfs, desired_class, desir remaining_cfs = self.do_random_init(self.population_size - len(uniques), features_to_vary, query_instance, desired_class, desired_range) self.cfs = np.concatenate([uniques, remaining_cfs]) - def do_cf_initializations(self, total_CFs, initialization, algorithm, features_to_vary, permitted_range, desired_range, desired_class, query_instance, query_instance_df_dummies, verbose): + def do_cf_initializations(self, total_CFs, initialization, algorithm, features_to_vary, desired_range, desired_class, query_instance, query_instance_df_dummies, verbose): """Intializes CFs and other related variables.""" self.cf_init_weights = [total_CFs, algorithm, features_to_vary] @@ -195,7 +194,9 @@ def do_param_initializations(self, total_CFs, initialization, desired_range, des self.feature_range = self.get_valid_feature_range(normalized=False) if len(self.cfs) != total_CFs: - self.do_cf_initializations(total_CFs, initialization, algorithm, features_to_vary, permitted_range, desired_range, desired_class, query_instance, query_instance_df_dummies, verbose) + self.do_cf_initializations(total_CFs, initialization, algorithm, features_to_vary, desired_range, desired_class, query_instance, query_instance_df_dummies, verbose) + else: + self.total_CFs = total_CFs self.do_loss_initializations(yloss_type, diversity_loss_type, feature_weights, encoding='label') self.update_hyperparameters(proximity_weight, sparsity_weight, diversity_weight, categorical_penalty) @@ -247,7 +248,7 @@ def _generate_counterfactuals(self, query_instance, total_CFs, initialization="k test_pred = self.predict_fn(query_instance) self.test_pred = test_pred - self.misc_init(stopping_threshold, desired_class, desired_range, test_pred) + desired_class = self.misc_init(stopping_threshold, desired_class, desired_range, test_pred) query_instance_df_dummies = pd.get_dummies(query_instance_orig) for col in pd.get_dummies(self.data_interface.data_df[self.data_interface.feature_names]).columns: @@ -256,7 +257,7 @@ def _generate_counterfactuals(self, query_instance, total_CFs, initialization="k self.do_param_initializations(total_CFs, initialization, desired_range, desired_class, query_instance, query_instance_df_dummies, algorithm, features_to_vary, permitted_range, yloss_type, diversity_loss_type, feature_weights, proximity_weight, sparsity_weight, diversity_weight, categorical_penalty, verbose) - query_instance_df = self.find_counterfactuals(query_instance, desired_range, desired_class, features_to_vary, stopping_threshold, posthoc_sparsity_param, posthoc_sparsity_algorithm, maxiterations, thresh, verbose) + query_instance_df = self.find_counterfactuals(query_instance, desired_range, desired_class, features_to_vary, maxiterations, thresh, verbose) return exp.CounterfactualExamples(data_interface=self.data_interface, test_instance_df=query_instance_df, @@ -357,7 +358,7 @@ def mate(self, k1, k2, features_to_vary, query_instance): one_init[j] = query_instance[j] return one_init - def find_counterfactuals(self, query_instance, desired_range, desired_class, features_to_vary, stopping_threshold, posthoc_sparsity_param, posthoc_sparsity_algorithm, maxiterations, thresh, verbose): + def find_counterfactuals(self, query_instance, desired_range, desired_class, features_to_vary, maxiterations, thresh, verbose): """Finds counterfactuals by generating cfs through the genetic algorithm""" population = self.cfs.copy() iterations = 0 @@ -367,7 +368,7 @@ def find_counterfactuals(self, query_instance, desired_range, desired_class, fea cfs_preds = [np.inf]*self.total_CFs to_pred = None - while iterations < maxiterations or len(population) == self.total_CFs: + while iterations < maxiterations and self.total_CFs > 0: if abs(previous_best_loss - current_best_loss) <= thresh and (self.model.model_type == 'classifier' and all(i == desired_class for i in cfs_preds) or (self.model.model_type == 'regressor' and all(desired_range[0] <= i <= desired_range[1] for i in cfs_preds))): stop_cnt += 1 else: diff --git a/dice_ml/explainer_interfaces/explainer_base.py b/dice_ml/explainer_interfaces/explainer_base.py index 89bf9012..dba992e7 100644 --- a/dice_ml/explainer_interfaces/explainer_base.py +++ b/dice_ml/explainer_interfaces/explainer_base.py @@ -4,13 +4,8 @@ import numpy as np import pandas as pd -import random -import timeit -import copy from collections.abc import Iterable from sklearn.neighbors import KDTree - -import dice_ml.diverse_counterfactuals as exp from dice_ml.counterfactual_explanations import CounterfactualExplanations from dice_ml.utils.exception import UserConfigValidationException @@ -444,6 +439,7 @@ def misc_init(self, stopping_threshold, desired_class, desired_range, test_pred) elif self.model.model_type == 'regressor': self.target_cf_range = self.infer_target_cfs_range(desired_range) + return desired_class def infer_target_cfs_class(self, desired_class_input, original_pred, num_output_nodes): @@ -465,7 +461,7 @@ def infer_target_cfs_class(self, desired_class_input, original_pred, if desired_class_input >= 0 and desired_class_input < num_output_nodes: target_class = desired_class_input else: - raise ValueError("Desired class should be within 0 and num_classes-1.") + raise ValueError("Desired class not present in training data!") return target_class def infer_target_cfs_range(self, desired_range_input): diff --git a/tests/test_dice_interface/test_dice_KD.py b/tests/test_dice_interface/test_dice_KD.py index 210dfa95..4f51121f 100644 --- a/tests/test_dice_interface/test_dice_KD.py +++ b/tests/test_dice_interface/test_dice_KD.py @@ -1,9 +1,7 @@ import pytest import numpy as np -import pandas as pd import dice_ml from dice_ml.utils import helpers -from sklearn.neighbors import KDTree @pytest.fixture @@ -44,36 +42,15 @@ def _initiate_exp_object(self, KD_binary_classification_exp_object): self.exp = KD_binary_classification_exp_object # explainer object self.data_df_copy = self.exp.data_interface.data_df.copy() - # When no elements in the desired_class are present - @pytest.mark.parametrize("desired_range, desired_class, features_to_vary, total_CFs", [(None, 7, "all", 3)]) - def test_empty_KD(self, desired_range, desired_class, features_to_vary, sample_custom_query_1, total_CFs): - self.exp.dataset_with_predictions, self.exp.KD_tree, self.exp.predictions = \ - self.exp.build_KD_tree(self.data_df_copy, desired_range, desired_class, self.exp.predicted_outcome_name) - - # Prepares user defined query_instance for DiCE. - sample_custom_query_orig = sample_custom_query_1.copy() - query_instance = self.exp.data_interface.prepare_query_instance(query_instance=sample_custom_query_1) - - # find the predicted value of query_instance - test_pred = self.exp.predict_fn(query_instance)[0] - - query_instance[self.exp.data_interface.outcome_name] = test_pred - - if features_to_vary == 'all': - features_to_vary = self.exp.data_interface.feature_names - - query_instance, cfs_preds = self.exp.find_counterfactuals(self.data_df_copy, - sample_custom_query_1, - sample_custom_query_orig, - desired_range, - desired_class, - total_CFs, features_to_vary, - permitted_range=None, - sparsity_weight=1, - stopping_threshold=0.5, - posthoc_sparsity_param=0.1, - posthoc_sparsity_algorithm='binary', - verbose=False) + # When no elements in the desired_class are present in the training data + @pytest.mark.parametrize("desired_class, total_CFs", [(1, 3)]) + def test_empty_KD(self, desired_class, sample_custom_query_1, total_CFs): + try: + self.exp._generate_counterfactuals(query_instance=sample_custom_query_1, total_CFs=total_CFs, + desired_class=desired_class) + assert False + except ValueError: + assert True # When a query's feature value is not within the permitted range and the feature is not allowed to vary @pytest.mark.parametrize("desired_range, desired_class, total_CFs, features_to_vary, permitted_range", @@ -91,48 +68,10 @@ def test_invalid_query_instance(self, desired_range, desired_class, sample_custo assert True # Verifying the output of the KD tree - @pytest.mark.parametrize("desired_range, desired_class, total_CFs, features_to_vary, permitted_range", - [(None, 0, 1, "all", None)]) - def test_KD_tree_output(self, desired_range, desired_class, sample_custom_query_1, total_CFs, features_to_vary, - permitted_range): - if features_to_vary == 'all': - features_to_vary = self.exp.data_interface.feature_names - - if permitted_range is None: # use the precomputed default - self.exp.feature_range = self.exp.data_interface.permitted_range - feature_ranges_orig = self.exp.feature_range - else: # compute the new ranges based on user input - self.exp.feature_range, feature_ranges_orig = self.exp.data_interface.get_features_range(permitted_range) - - self.exp.check_query_instance_validity(features_to_vary, permitted_range, sample_custom_query_1, feature_ranges_orig) - - self.exp.dataset_with_predictions, self.exp.KD_tree, self.exp.predictions = \ - self.exp.build_KD_tree(self.data_df_copy, desired_range, desired_class, self.exp.predicted_outcome_name) - - # Prepares user defined query_instance for DiCE. - sample_custom_query_orig = sample_custom_query_1.copy() - query_instance = self.exp.data_interface.prepare_query_instance(query_instance=sample_custom_query_1) - - # find the predicted value of query_instance - test_pred = self.exp.predict_fn(query_instance)[0] - - query_instance[self.exp.data_interface.outcome_name] = test_pred - - if features_to_vary == 'all': - features_to_vary = self.exp.data_interface.feature_names - - query_instance, cfs_preds = self.exp.find_counterfactuals(self.data_df_copy, - sample_custom_query_1, - sample_custom_query_orig, - desired_range, - desired_class, - total_CFs, features_to_vary, - permitted_range, - sparsity_weight=1, - stopping_threshold=0.5, - posthoc_sparsity_param=0.1, - posthoc_sparsity_algorithm='binary', - verbose=False) + @pytest.mark.parametrize("desired_class, total_CFs", [(0, 1)]) + def test_KD_tree_output(self, desired_class, sample_custom_query_1, total_CFs): + self.exp._generate_counterfactuals(query_instance=sample_custom_query_1, desired_class=desired_class, + total_CFs=total_CFs) self.exp.final_cfs_df.Numerical = self.exp.final_cfs_df.Numerical.astype(int) expected_output = self.exp.data_interface.data_df @@ -140,45 +79,10 @@ def test_KD_tree_output(self, desired_range, desired_class, sample_custom_query_ all(self.exp.final_cfs_df.Categorical == expected_output.Categorical[0]) # Testing that the features_to_vary argument actually varies only the features that you wish to vary - @pytest.mark.parametrize("desired_range, desired_class, total_CFs, features_to_vary, permitted_range", - [(None, 0, 1, ["Numerical"], None)]) - def test_features_to_vary(self, desired_range, desired_class, sample_custom_query_2, total_CFs, features_to_vary, - permitted_range): - if features_to_vary == 'all': - features_to_vary = self.exp.data_interface.feature_names - - if permitted_range is None: # use the precomputed default - self.exp.feature_range = self.exp.data_interface.permitted_range - feature_ranges_orig = self.exp.feature_range - else: # compute the new ranges based on user input - self.exp.feature_range, feature_ranges_orig = self.exp.data_interface.get_features_range(permitted_range) - - self.exp.check_query_instance_validity(features_to_vary, permitted_range, sample_custom_query_2, feature_ranges_orig) - - self.exp.dataset_with_predictions, self.exp.KD_tree, self.exp.predictions = \ - self.exp.build_KD_tree(self.data_df_copy, desired_range, desired_class, self.exp.predicted_outcome_name) - - # Prepares user defined query_instance for DiCE. - sample_custom_query_orig = sample_custom_query_2.copy() - query_instance = self.exp.data_interface.prepare_query_instance(query_instance=sample_custom_query_2) - - # find the predicted value of query_instance - test_pred = self.exp.predict_fn(query_instance)[0] - - query_instance[self.exp.data_interface.outcome_name] = test_pred - - query_instance, cfs_preds = self.exp.find_counterfactuals(self.data_df_copy, - sample_custom_query_2, - sample_custom_query_orig, - desired_range, - desired_class, - total_CFs, features_to_vary, - permitted_range, - sparsity_weight=1, - stopping_threshold=0.5, - posthoc_sparsity_param=0.1, - posthoc_sparsity_algorithm='binary', - verbose=False) + @pytest.mark.parametrize("desired_class, total_CFs, features_to_vary", [(0, 1, ["Numerical"])]) + def test_features_to_vary(self, desired_class, sample_custom_query_2, total_CFs, features_to_vary): + self.exp._generate_counterfactuals(query_instance=sample_custom_query_2, desired_class=desired_class, + total_CFs=total_CFs, features_to_vary=features_to_vary) self.exp.final_cfs_df.Numerical = self.exp.final_cfs_df.Numerical.astype(int) expected_output = self.exp.data_interface.data_df @@ -186,169 +90,37 @@ def test_features_to_vary(self, desired_range, desired_class, sample_custom_quer all(self.exp.final_cfs_df.Categorical == expected_output.Categorical[1]) # Testing that the permitted_range argument actually varies the features only within the permitted_range - @pytest.mark.parametrize("desired_range, desired_class, total_CFs, features_to_vary, permitted_range", - [(None, 0, 1, "all", {'Numerical': [1000, 10000]})]) - def test_permitted_range(self, desired_range, desired_class, sample_custom_query_2, total_CFs, features_to_vary, - permitted_range): - if features_to_vary == 'all': - features_to_vary = self.exp.data_interface.feature_names - - if permitted_range is None: # use the precomputed default - self.exp.feature_range = self.exp.data_interface.permitted_range - feature_ranges_orig = self.exp.feature_range - else: # compute the new ranges based on user input - self.exp.feature_range, feature_ranges_orig = self.exp.data_interface.get_features_range(permitted_range) - - self.exp.check_query_instance_validity(features_to_vary, permitted_range, sample_custom_query_2, feature_ranges_orig) - - self.exp.dataset_with_predictions, self.exp.KD_tree, self.exp.predictions = \ - self.exp.build_KD_tree(self.data_df_copy, desired_range, desired_class, self.exp.predicted_outcome_name) - - # Prepares user defined query_instance for DiCE. - sample_custom_query_orig = sample_custom_query_2.copy() - query_instance = self.exp.data_interface.prepare_query_instance(query_instance=sample_custom_query_2) - - # find the predicted value of query_instance - test_pred = self.exp.predict_fn(query_instance)[0] - - query_instance[self.exp.data_interface.outcome_name] = test_pred - - if features_to_vary == 'all': - features_to_vary = self.exp.data_interface.feature_names - - query_instance, cfs_preds = self.exp.find_counterfactuals(self.data_df_copy, - sample_custom_query_2, - sample_custom_query_orig, - desired_range, - desired_class, - total_CFs, features_to_vary, - permitted_range, - sparsity_weight=1, - stopping_threshold=0.5, - posthoc_sparsity_param=0.1, - posthoc_sparsity_algorithm='binary', - verbose=False) + @pytest.mark.parametrize("desired_class, total_CFs, permitted_range", [(0, 1, {'Numerical': [1000, 10000]})]) + def test_permitted_range(self, desired_class, sample_custom_query_2, total_CFs, permitted_range): + self.exp._generate_counterfactuals(query_instance=sample_custom_query_2, desired_class=desired_class, + total_CFs=total_CFs, permitted_range=permitted_range) self.exp.final_cfs_df.Numerical = self.exp.final_cfs_df.Numerical.astype(int) expected_output = self.exp.data_interface.data_df - assert all(self.exp.final_cfs_df.Numerical == expected_output.Numerical[1]) and \ all(self.exp.final_cfs_df.Categorical == expected_output.Categorical[1]) # Testing if you can provide permitted_range for categorical variables - @pytest.mark.parametrize("desired_range, desired_class, total_CFs, features_to_vary, permitted_range", - [(None, 0, 4, "all", {'Categorical': ['b', 'c']})]) - def test_permitted_range_categorical(self, desired_range, desired_class, sample_custom_query_2, total_CFs, - features_to_vary, - permitted_range): - if features_to_vary == 'all': - features_to_vary = self.exp.data_interface.feature_names - - if permitted_range is None: # use the precomputed default - self.exp.feature_range = self.exp.data_interface.permitted_range - feature_ranges_orig = self.exp.feature_range - - else: # compute the new ranges based on user input - self.exp.feature_range, feature_ranges_orig = self.exp.data_interface.get_features_range(permitted_range) - - self.exp.check_query_instance_validity(features_to_vary, permitted_range, sample_custom_query_2, feature_ranges_orig) - - self.exp.dataset_with_predictions, self.exp.KD_tree, self.exp.predictions = \ - self.exp.build_KD_tree(self.data_df_copy, desired_range, desired_class, self.exp.predicted_outcome_name) - - # Prepares user defined query_instance for DiCE. - sample_custom_query_orig = sample_custom_query_2.copy() - query_instance = self.exp.data_interface.prepare_query_instance(query_instance=sample_custom_query_2) - - # find the predicted value of query_instance - test_pred = self.exp.predict_fn(query_instance)[0] - - query_instance[self.exp.data_interface.outcome_name] = test_pred - - if features_to_vary == 'all': - features_to_vary = self.exp.data_interface.feature_names - - query_instance, cfs_preds = self.exp.find_counterfactuals(self.data_df_copy, - sample_custom_query_2, - sample_custom_query_orig, - desired_range, - desired_class, - total_CFs, features_to_vary, - permitted_range, - sparsity_weight=1, - stopping_threshold=0.5, - posthoc_sparsity_param=0.1, - posthoc_sparsity_algorithm='binary', - verbose=False) + @pytest.mark.parametrize("desired_class, total_CFs, permitted_range", [(0, 4, {'Categorical': ['b', 'c']})]) + def test_permitted_range_categorical(self, desired_class, sample_custom_query_2, total_CFs, permitted_range): + self.exp._generate_counterfactuals(query_instance=sample_custom_query_2, desired_class=desired_class, + total_CFs=total_CFs, permitted_range=permitted_range) assert all(i in permitted_range["Categorical"] for i in self.exp.final_cfs_df.Categorical.values) # Testing if an error is thrown when the query instance has an unknown categorical variable - @pytest.mark.parametrize("desired_range, desired_class, total_CFs, features_to_vary, permitted_range", - [(None, 0, 1, "all", None)]) - def test_query_instance_outside_bounds(self, desired_range, desired_class, sample_custom_query_3, total_CFs, - features_to_vary, - permitted_range): - if features_to_vary == 'all': - features_to_vary = self.exp.data_interface.feature_names - - if permitted_range is None: # use the precomputed default - self.exp.feature_range = self.exp.data_interface.permitted_range - feature_ranges_orig = self.exp.feature_range - - else: # compute the new ranges based on user input - self.exp.feature_range, feature_ranges_orig = self.exp.data_interface.get_features_range(permitted_range) - + @pytest.mark.parametrize("desired_class, total_CFs", [(0, 1)]) + def test_query_instance_outside_bounds(self, desired_class, sample_custom_query_3, total_CFs): try: - self.exp.check_query_instance_validity(features_to_vary, permitted_range, sample_custom_query_3, feature_ranges_orig) + self.exp._generate_counterfactuals(query_instance=sample_custom_query_3, total_CFs=total_CFs, + desired_class=desired_class) assert False except ValueError: assert True # Ensuring that there are no duplicates in the resulting counterfactuals even if the dataset has duplicates - @pytest.mark.parametrize("desired_range, desired_class, total_CFs, features_to_vary, permitted_range", - [(None, 0, 2, "all", None)]) - def test_duplicates(self, desired_range, desired_class, sample_custom_query_4, total_CFs, features_to_vary, - permitted_range): - if features_to_vary == 'all': - features_to_vary = self.exp.data_interface.feature_names - - if permitted_range is None: # use the precomputed default - self.exp.feature_range = self.exp.data_interface.permitted_range - feature_ranges_orig = self.exp.feature_range - else: # compute the new ranges based on user input - self.exp.feature_range, feature_ranges_orig = self.exp.data_interface.get_features_range(permitted_range) - - self.exp.check_query_instance_validity(features_to_vary, permitted_range, sample_custom_query_4, feature_ranges_orig) - - self.exp.dataset_with_predictions, self.exp.KD_tree, self.exp.predictions = \ - self.exp.build_KD_tree(self.data_df_copy, desired_range, desired_class, self.exp.predicted_outcome_name) - - # Prepares user defined query_instance for DiCE. - sample_custom_query_orig = sample_custom_query_4.copy() - query_instance = self.exp.data_interface.prepare_query_instance(query_instance=sample_custom_query_4) - - # find the predicted value of query_instance - test_pred = self.exp.predict_fn(query_instance)[0] - - query_instance[self.exp.data_interface.outcome_name] = test_pred - - # find the predicted value of query_instance - self.exp.misc_init(stopping_threshold=0.5, desired_class=desired_class, desired_range=desired_range, test_pred=test_pred) - - if features_to_vary == 'all': - features_to_vary = self.exp.data_interface.feature_names - - query_instance, cfs_preds = self.exp.find_counterfactuals(self.data_df_copy, - sample_custom_query_4, - sample_custom_query_orig, - desired_range, - desired_class, - total_CFs, features_to_vary, - permitted_range, - sparsity_weight=1, - stopping_threshold=0.5, - posthoc_sparsity_param=0.1, - posthoc_sparsity_algorithm='binary', - verbose=False) + @pytest.mark.parametrize("desired_class, total_CFs", [(0, 2)]) + def test_duplicates(self, desired_class, sample_custom_query_4, total_CFs): + self.exp._generate_counterfactuals(query_instance=sample_custom_query_4, total_CFs=total_CFs, + desired_class=desired_class) self.exp.final_cfs_df.Numerical = self.exp.final_cfs_df.Numerical.astype(int) self.exp.final_cfs_df = self.exp.final_cfs_df.reset_index(drop=True) @@ -358,6 +130,12 @@ def test_duplicates(self, desired_range, desired_class, sample_custom_query_4, t assert all(self.exp.final_cfs_df == expected_output) + # Testing for 0 CFs needed + @pytest.mark.parametrize("desired_class, total_CFs", [(0, 0)]) + def test_zero_cfs(self, desired_class, sample_custom_query_4, total_CFs): + self.exp._generate_counterfactuals(query_instance=sample_custom_query_4, total_CFs=total_CFs, + desired_class=desired_class) + class TestDiceKDMultiClassificationMethods: @pytest.fixture(autouse=True) @@ -366,68 +144,17 @@ def _initiate_exp_object(self, KD_multi_classification_exp_object): self.data_df_copy = self.exp_multi.data_interface.data_df.copy() # Testing that the output of multiclass classification lies in the desired_class - @pytest.mark.parametrize("desired_range, desired_class, total_CFs, features_to_vary, permitted_range", - [(None, 2, 3, "all", None)]) - def test_KD_tree_output(self, desired_range, desired_class, sample_custom_query_2, total_CFs, features_to_vary, - permitted_range): - if features_to_vary == 'all': - features_to_vary = self.exp_multi.data_interface.feature_names + @pytest.mark.parametrize("desired_class, total_CFs", [(2, 3)]) + def test_KD_tree_output(self, desired_class, sample_custom_query_2, total_CFs): + self.exp_multi._generate_counterfactuals(query_instance=sample_custom_query_2, total_CFs=total_CFs, + desired_class=desired_class) + assert all(i == desired_class for i in self.exp_multi.cfs_preds) - if permitted_range is None: # use the precomputed default - self.exp_multi.feature_range = self.exp_multi.data_interface.permitted_range - feature_ranges_orig = self.exp_multi.feature_range - else: # compute the new ranges based on user input - self.exp_multi.feature_range, feature_ranges_orig = self.exp_multi.data_interface.get_features_range(permitted_range) - - self.exp_multi.check_query_instance_validity(features_to_vary, permitted_range, sample_custom_query_2, feature_ranges_orig) - predictions = [0, 2, 1, 0, 2] - predicted_outcome_name = self.exp_multi.data_interface.outcome_name + '_pred' - self.data_df_copy[predicted_outcome_name] = predictions - - # segmenting the dataset according to outcome - dataset_with_predictions = None - if self.exp_multi.model.model_type == 'classifier': - dataset_with_predictions = self.data_df_copy.loc[[i == desired_class for i in predictions]].copy() - - elif self.exp_multi.model.model_type == 'regressor': - dataset_with_predictions = self.data_df_copy.loc[ - [desired_range[0] <= pred <= desired_range[1] for pred in predictions]].copy() - - KD_tree = None - # Prepares the KD trees for DiCE - if len(dataset_with_predictions) > 0: - dummies = pd.get_dummies(dataset_with_predictions[self.exp_multi.data_interface.feature_names]) - KD_tree = KDTree(dummies) - - self.exp_multi.dataset_with_predictions = dataset_with_predictions - self.exp_multi.KD_tree = KD_tree - self.exp_multi.predictions = predictions - - # Prepares user defined query_instance for DiCE. - sample_custom_query_orig = sample_custom_query_2.copy() - query_instance = self.exp_multi.data_interface.prepare_query_instance(query_instance=sample_custom_query_2) - - # find the predicted value of query_instance - test_pred = self.exp_multi.predict_fn(query_instance)[0] - - query_instance[self.exp_multi.data_interface.outcome_name] = test_pred - - if features_to_vary == 'all': - features_to_vary = self.exp_multi.data_interface.feature_names - - query_instance, cfs_preds = self.exp_multi.find_counterfactuals(self.data_df_copy, - sample_custom_query_2, - sample_custom_query_orig, - desired_range, - desired_class, - total_CFs, features_to_vary, - permitted_range, - sparsity_weight=1, - stopping_threshold=0.5, - posthoc_sparsity_param=0.1, - posthoc_sparsity_algorithm='binary', - verbose=False) - assert all(i == desired_class for i in cfs_preds) + # Testing for 0 CFs needed + @pytest.mark.parametrize("desired_class, total_CFs", [(0, 0)]) + def test_zero_cfs(self, desired_class, sample_custom_query_4, total_CFs): + self.exp_multi._generate_counterfactuals(query_instance=sample_custom_query_4, total_CFs=total_CFs, + desired_class=desired_class) class TestDiceKDRegressionMethods: @@ -437,72 +164,14 @@ def _initiate_exp_object(self, KD_regression_exp_object): self.data_df_copy = self.exp_regr.data_interface.data_df.copy() # Testing that the output of regression lies in the desired_range - @pytest.mark.parametrize("desired_range, desired_class, total_CFs, features_to_vary, permitted_range", - [([1, 2.8], "opposite", 6, "all", None)]) - def test_KD_tree_output(self, desired_range, desired_class, sample_custom_query_2, total_CFs, features_to_vary, - permitted_range): - if features_to_vary == 'all': - features_to_vary = self.exp_regr.data_interface.feature_names - - if permitted_range is None: # use the precomputed default - self.exp_regr.feature_range = self.exp_regr.data_interface.permitted_range - feature_ranges_orig = self.exp_regr.feature_range - else: # compute the new ranges based on user input - self.exp_regr.feature_range, feature_ranges_orig = self.exp_regr.data_interface.get_features_range(permitted_range) - - self.exp_regr.check_query_instance_validity(features_to_vary, permitted_range, sample_custom_query_2, feature_ranges_orig) - - # Stores the predictions on the training data - dataset_instance = self.exp_regr.data_interface.prepare_query_instance( - query_instance=self.data_df_copy[self.exp_regr.data_interface.feature_names]) - - predictions = [1, 2.8, 0.8, 22, 1.7] - predicted_outcome_name = self.exp_regr.data_interface.outcome_name + '_pred' - self.data_df_copy[predicted_outcome_name] = predictions - - # segmenting the dataset according to outcome - dataset_with_predictions = None - if self.exp_regr.model.model_type == 'classifier': - dataset_with_predictions = self.data_df_copy.loc[[i == desired_class for i in predictions]].copy() - - elif self.exp_regr.model.model_type == 'regressor': - dataset_with_predictions = self.data_df_copy.loc[ - [desired_range[0] <= pred <= desired_range[1] for pred in predictions]].copy() - - KD_tree = None - # Prepares the KD trees for DiCE - if len(dataset_with_predictions) > 0: - dummies = pd.get_dummies(dataset_with_predictions[self.exp_regr.data_interface.feature_names]) - KD_tree = KDTree(dummies) - - self.exp_regr.dataset_with_predictions = dataset_with_predictions - self.exp_regr.KD_tree = KD_tree - self.exp_regr.predictions = predictions - - # Prepares user defined query_instance for DiCE. - sample_custom_query_orig = sample_custom_query_2.copy() - query_instance = self.exp_regr.data_interface.prepare_query_instance(query_instance=sample_custom_query_2) - - # find the predicted value of query_instance - test_pred = self.exp_regr.predict_fn(query_instance)[0] - - query_instance[self.exp_regr.data_interface.outcome_name] = test_pred - - if features_to_vary == 'all': - features_to_vary = self.exp_regr.data_interface.feature_names - - query_instance, cfs_preds = self.exp_regr.find_counterfactuals(self.data_df_copy, - sample_custom_query_2, - sample_custom_query_orig, - desired_range, - desired_class, - total_CFs, features_to_vary, - permitted_range, - sparsity_weight=1, - stopping_threshold=0.5, - posthoc_sparsity_param=0.1, - posthoc_sparsity_algorithm='binary', - verbose=False) - assert all(desired_range[0] <= i <= desired_range[1] for i in cfs_preds) - - # TODO: Test for number of cfs = 0 \ No newline at end of file + @pytest.mark.parametrize("desired_range, total_CFs", [([1, 2.8], 6)]) + def test_KD_tree_output(self, desired_range, sample_custom_query_2, total_CFs): + self.exp_regr._generate_counterfactuals(query_instance=sample_custom_query_2, total_CFs=total_CFs, + desired_range=desired_range) + assert all(desired_range[0] <= i <= desired_range[1] for i in self.exp_regr.cfs_preds) + + # Testing for 0 CFs needed + @pytest.mark.parametrize("desired_class, desired_range, total_CFs", [(0, [1, 2.8], 0)]) + def test_zero_cfs(self, desired_class, desired_range, sample_custom_query_4, total_CFs): + self.exp_regr._generate_counterfactuals(query_instance=sample_custom_query_4, total_CFs=total_CFs, + desired_range=desired_range) diff --git a/tests/test_dice_interface/test_dice_genetic.py b/tests/test_dice_interface/test_dice_genetic.py index 4f596770..3f370e56 100644 --- a/tests/test_dice_interface/test_dice_genetic.py +++ b/tests/test_dice_interface/test_dice_genetic.py @@ -140,6 +140,15 @@ def test_maxiter(self, desired_class, sample_custom_query_2, total_CFs, initiali for i in ans.final_cfs_df[self.exp.data_interface.outcome_name].values: assert i == desired_class + # Testing for 0 CFs needed + @pytest.mark.parametrize("desired_class, total_CFs, initialization", + [(0, 0, "kdtree"), (0, 0, "random")]) + def test_zero_cfs(self, desired_class, sample_custom_query_2, total_CFs, initialization): + features_to_vary = self.exp.setup("all", None, sample_custom_query_2, "inverse_mad") + ans = self.exp._generate_counterfactuals(query_instance=sample_custom_query_2, + total_CFs=total_CFs, desired_class=desired_class, + initialization=initialization) + class TestDiceGeneticMultiClassificationMethods: @pytest.fixture(autouse=True) @@ -157,7 +166,7 @@ def test_desired_class(self, desired_class, sample_custom_query_2, total_CFs, in # Testing if only valid cfs are found after maxiterations @pytest.mark.parametrize("desired_class, total_CFs, initialization, maxiterations", - [(0, 7, "kdtree", 0), (0, 7, "random", 0)]) + [(2, 7, "kdtree", 0), (2, 7, "random", 0)]) def test_maxiter(self, desired_class, sample_custom_query_2, total_CFs, initialization, maxiterations): features_to_vary = self.exp.setup("all", None, sample_custom_query_2, "inverse_mad") ans = self.exp._generate_counterfactuals(query_instance=sample_custom_query_2, @@ -166,6 +175,15 @@ def test_maxiter(self, desired_class, sample_custom_query_2, total_CFs, initiali for i in ans.final_cfs_df[self.exp.data_interface.outcome_name].values: assert i == desired_class + # Testing for 0 CFs needed + @pytest.mark.parametrize("desired_class, total_CFs, initialization", + [(2, 0, "kdtree"), (2, 0, "random")]) + def test_zero_cfs(self, desired_class, sample_custom_query_2, total_CFs, initialization): + features_to_vary = self.exp.setup("all", None, sample_custom_query_2, "inverse_mad") + ans = self.exp._generate_counterfactuals(query_instance=sample_custom_query_2, + total_CFs=total_CFs, desired_class=desired_class, + initialization=initialization) + class TestDiceGeneticRegressionMethods: @pytest.fixture(autouse=True) @@ -195,4 +213,11 @@ def test_maxiter(self, desired_range, sample_custom_query_2, total_CFs, initiali for i in ans.final_cfs_df[self.exp.data_interface.outcome_name].values: assert desired_range[0] <= i <= desired_range[1] - # TODO: Test for number of cfs = 0 \ No newline at end of file + # Testing for 0 CFs needed + @pytest.mark.parametrize("desired_range, total_CFs, initialization", + [([1, 2.8], 0, "kdtree"), ([1, 2.8], 0, "random")]) + def test_zero_cfs(self, desired_range, sample_custom_query_2, total_CFs, initialization): + features_to_vary = self.exp.setup("all", None, sample_custom_query_2, "inverse_mad") + ans = self.exp._generate_counterfactuals(query_instance=sample_custom_query_2, + total_CFs=total_CFs, desired_range=desired_range, + initialization=initialization)