Skip to content

Commit

Permalink
Fixed more incorrect code in metrics and evaluator
Browse files Browse the repository at this point in the history
  • Loading branch information
peastman committed May 24, 2018
1 parent 1fadab6 commit dc4eba9
Show file tree
Hide file tree
Showing 4 changed files with 58 additions and 115 deletions.
12 changes: 6 additions & 6 deletions deepchem/metrics/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -222,15 +222,11 @@ def compute_metric(self,
else:
n_tasks = y_pred.shape[1]
if w is None or len(w) == 0:
w = np.ones_like(y_true)
assert y_true.shape[0] == y_pred.shape[0] == w.shape[0]
w = np.ones((n_samples, n_tasks))
computed_metrics = []
for task in range(n_tasks):
y_task = y_true[:, task]
if self.mode == "regression":
y_pred_task = y_pred[:, task]
else:
y_pred_task = y_pred[:, task]
y_pred_task = y_pred[:, task]
w_task = w[:, task]

metric_value = self.compute_singletask_metric(y_task, y_pred_task, w_task)
Expand Down Expand Up @@ -280,6 +276,10 @@ def compute_singletask_metric(self, y_true, y_pred, w):
return np.nan
if self.threshold is not None:
y_pred = np.greater(y_pred, threshold)
if len(y_true.shape) == 0:
y_true = np.expand_dims(y_true, 0)
if len(y_pred.shape) == 0:
y_pred = np.expand_dims(y_pred, 0)
try:
metric_value = self.metric(y_true, y_pred)
except (AssertionError, ValueError) as e:
Expand Down
25 changes: 7 additions & 18 deletions deepchem/utils/evaluate.py
Original file line number Diff line number Diff line change
Expand Up @@ -178,6 +178,8 @@ def __init__(self,
self.weights = weights
if len(self.label_keys) != len(self.output_keys):
raise ValueError("Must have same number of labels and outputs")
if len(self.label_keys) != 1:
raise ValueError("GeneratorEvaluator currently only supports one label")

def compute_model_performance(self, metrics, per_task_metrics=False):
"""
Expand All @@ -196,30 +198,17 @@ def compute_model_performance(self, metrics, per_task_metrics=False):

def generator_closure():
for feed_dict in self.generator:
labels = []
for layer in self.label_keys:
labels.append(feed_dict[layer])
del feed_dict[layer]
for weight in self.weights:
w.append(feed_dict[weight])
del feed_dict[weight]
y.append(np.array(labels))
y.append(feed_dict[self.label_keys[0]])
if len(self.weights) > 0:
w.append(feed_dict[self.weights[0]])
yield feed_dict

if not len(metrics):
return {}
else:
mode = metrics[0].mode
if mode == "classification":
y_pred = self.model.predict_on_generator(generator_closure())
y = np.transpose(np.array(y), axes=[0, 2, 1, 3])
y = np.reshape(y, newshape=(-1, self.n_tasks, self.n_classes))
y = from_one_hot(y, axis=-1)
else:
y_pred = self.model.predict_on_generator(generator_closure())
y = np.transpose(np.array(y), axes=[0, 2, 1, 3])
y = np.reshape(y, newshape=(-1, self.n_tasks))
y_pred = np.reshape(y_pred, newshape=(-1, self.n_tasks))
y_pred = self.model.predict_on_generator(generator_closure())
y = np.concatenate(y, axis=0)
multitask_scores = {}
all_task_scores = {}

Expand Down
134 changes: 44 additions & 90 deletions deepchem/utils/test/test_generator_evaluator.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from deepchem.data import NumpyDataset
from deepchem.data.datasets import Databag
from deepchem.models.tensorgraph.layers import Dense, ReduceMean, SoftMax, SoftMaxCrossEntropy
from deepchem.models.tensorgraph.layers import Feature, Label
from deepchem.models.tensorgraph.layers import Feature, Label, Reshape
from deepchem.models.tensorgraph.layers import ReduceSquareDifference
from nose.tools import assert_true
from flaky import flaky
Expand All @@ -17,6 +17,8 @@ class TestGeneratorEvaluator(TestCase):
def test_compute_model_performance_multitask_classifier(self):
n_data_points = 20
n_features = 1
n_tasks = 2
n_classes = 2

X = np.ones(shape=(n_data_points // 2, n_features)) * -1
X1 = np.ones(shape=(n_data_points // 2, n_features))
Expand All @@ -25,43 +27,29 @@ def test_compute_model_performance_multitask_classifier(self):
class_0 = np.array([[1.0, 0.0] for x in range(int(n_data_points / 2))])
y1 = np.concatenate((class_0, class_1))
y2 = np.concatenate((class_1, class_0))
X = NumpyDataset(X)
ys = [NumpyDataset(y1), NumpyDataset(y2)]

databag = Databag()
y = np.stack([y1, y2], axis=1)
dataset = NumpyDataset(X, y)

features = Feature(shape=(None, n_features))
databag.add_dataset(features, X)

outputs = []
entropies = []
labels = []
for i in range(2):
label = Label(shape=(None, 2))
labels.append(label)
dense = Dense(out_channels=2, in_layers=[features])
output = SoftMax(in_layers=[dense])
smce = SoftMaxCrossEntropy(in_layers=[label, dense])

entropies.append(smce)
outputs.append(output)
databag.add_dataset(label, ys[i])

total_loss = ReduceMean(in_layers=entropies)
label = Label(shape=(None, n_tasks, n_classes))
dense = Dense(out_channels=n_tasks * n_classes, in_layers=[features])
logits = Reshape(shape=(None, n_tasks, n_classes), in_layers=dense)
output = SoftMax(in_layers=[logits])
smce = SoftMaxCrossEntropy(in_layers=[label, logits])
total_loss = ReduceMean(in_layers=smce)

tg = dc.models.TensorGraph(learning_rate=0.01, batch_size=n_data_points)
for output in outputs:
tg.add_output(output)
tg.add_output(output)
tg.set_loss(total_loss)

tg.fit_generator(
databag.iterbatches(
epochs=1000, batch_size=tg.batch_size, pad_batches=True))
tg.fit(dataset, nb_epoch=1000)
metric = dc.metrics.Metric(
dc.metrics.roc_auc_score, np.mean, mode="classification")

scores = tg.evaluate_generator(
databag.iterbatches(), [metric], labels=labels, per_task_metrics=True)
tg.default_generator(dataset), [metric],
labels=[label],
per_task_metrics=True)
scores = list(scores[1].values())
# Loosening atol to see if tests stop failing sporadically
assert_true(np.all(np.isclose(scores, [1.0, 1.0], atol=0.50)))
Expand All @@ -75,97 +63,63 @@ def test_compute_model_performance_singletask_classifier(self):
X = np.concatenate((X, X1))
class_1 = np.array([[0.0, 1.0] for x in range(int(n_data_points / 2))])
class_0 = np.array([[1.0, 0.0] for x in range(int(n_data_points / 2))])
y1 = np.concatenate((class_0, class_1))
X = NumpyDataset(X)
ys = [NumpyDataset(y1)]

databag = Databag()
y = np.concatenate((class_0, class_1))
dataset = NumpyDataset(X, y)

features = Feature(shape=(None, n_features))
databag.add_dataset(features, X)

outputs = []
entropies = []
labels = []
for i in range(1):
label = Label(shape=(None, 2))
labels.append(label)
dense = Dense(out_channels=2, in_layers=[features])
output = SoftMax(in_layers=[dense])
smce = SoftMaxCrossEntropy(in_layers=[label, dense])

entropies.append(smce)
outputs.append(output)
databag.add_dataset(label, ys[i])

total_loss = ReduceMean(in_layers=entropies)
label = Label(shape=(None, 2))
dense = Dense(out_channels=2, in_layers=[features])
output = SoftMax(in_layers=[dense])
smce = SoftMaxCrossEntropy(in_layers=[label, dense])
total_loss = ReduceMean(in_layers=smce)

tg = dc.models.TensorGraph(learning_rate=0.1)
for output in outputs:
tg.add_output(output)
tg.add_output(output)
tg.set_loss(total_loss)

tg.fit_generator(
databag.iterbatches(
epochs=1000, batch_size=tg.batch_size, pad_batches=True))
tg.fit(dataset, nb_epoch=1000)
metric = dc.metrics.Metric(
dc.metrics.roc_auc_score, np.mean, mode="classification")

scores = tg.evaluate_generator(
databag.iterbatches(), [metric], labels=labels, per_task_metrics=True)
tg.default_generator(dataset), [metric],
labels=[label],
per_task_metrics=True)
scores = list(scores[1].values())
assert_true(np.isclose(scores, [1.0], atol=0.05))

def test_compute_model_performance_multitask_regressor(self):
random_seed = 42
n_data_points = 20
n_features = 2
n_tasks = 2
np.random.seed(seed=random_seed)

X = np.random.rand(n_data_points, n_features)
y1 = np.expand_dims(np.array([0.5 for x in range(n_data_points)]), axis=-1)
y2 = np.expand_dims(np.array([-0.5 for x in range(n_data_points)]), axis=-1)
X = NumpyDataset(X)
ys = [NumpyDataset(y1), NumpyDataset(y2)]

databag = Databag()
y1 = np.array([0.5 for x in range(n_data_points)])
y2 = np.array([-0.5 for x in range(n_data_points)])
y = np.stack([y1, y2], axis=1)
dataset = NumpyDataset(X, y)

features = Feature(shape=(None, n_features))
databag.add_dataset(features, X)

outputs = []
losses = []
labels = []
for i in range(2):
label = Label(shape=(None, 1))
dense = Dense(out_channels=1, in_layers=[features])
loss = ReduceSquareDifference(in_layers=[dense, label])

outputs.append(dense)
losses.append(loss)
labels.append(label)
databag.add_dataset(label, ys[i])

total_loss = ReduceMean(in_layers=losses)
label = Label(shape=(None, n_tasks))
dense = Dense(out_channels=n_tasks, in_layers=[features])
loss = ReduceSquareDifference(in_layers=[dense, label])

tg = dc.models.TensorGraph(
mode="regression",
batch_size=20,
random_seed=random_seed,
learning_rate=0.1)
for output in outputs:
tg.add_output(output)
tg.set_loss(total_loss)
tg = dc.models.TensorGraph(random_seed=random_seed, learning_rate=0.1)
tg.add_output(dense)
tg.set_loss(loss)

tg.fit_generator(
databag.iterbatches(
epochs=1000, batch_size=tg.batch_size, pad_batches=True))
tg.fit(dataset, nb_epoch=1000)
metric = [
dc.metrics.Metric(
dc.metrics.mean_absolute_error, np.mean, mode="regression"),
]
scores = tg.evaluate_generator(
databag.iterbatches(), metric, labels=labels, per_task_metrics=True)
tg.default_generator(dataset),
metric,
labels=[label],
per_task_metrics=True)
scores = list(scores[1].values())
assert_true(np.all(np.isclose(scores, [0.0, 0.0], atol=1.0)))

Expand Down
2 changes: 1 addition & 1 deletion examples/low_data/toxcast_maml.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,7 @@ def compute_scores(optimize):
print()
print('Cross entropy loss:', np.mean(losses))
print('Prediction accuracy:', accuracy_score(y_true, y_pred > 0.5))
print('ROC AUC:', dc.metrics.compute_roc_auc_scores(y_true, y_pred))
print('ROC AUC:', dc.metrics.roc_auc_scores(y_true, y_pred))
print()


Expand Down

0 comments on commit dc4eba9

Please sign in to comment.