-
Notifications
You must be signed in to change notification settings - Fork 22
/
Copy pathmetrics_ogb.py
117 lines (88 loc) · 3.39 KB
/
metrics_ogb.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
import numpy as np
from sklearn.metrics import roc_auc_score, average_precision_score
"""
Evaluation functions from OGB.
https://github.com/snap-stanford/ogb/blob/master/ogb/graphproppred/evaluate.py
"""
def eval_rocauc(y_true, y_pred):
'''
compute ROC-AUC averaged across tasks
'''
rocauc_list = []
for i in range(y_true.shape[1]):
# AUC is only defined when there is at least one positive data.
if np.sum(y_true[:, i] == 1) > 0 and np.sum(y_true[:, i] == 0) > 0:
# ignore nan values
is_labeled = y_true[:, i] == y_true[:, i]
rocauc_list.append(
roc_auc_score(y_true[is_labeled, i], y_pred[is_labeled, i]))
if len(rocauc_list) == 0:
raise RuntimeError(
'No positively labeled data available. Cannot compute ROC-AUC.')
return {'rocauc': sum(rocauc_list) / len(rocauc_list)}
def eval_ap(y_true, y_pred):
'''
compute Average Precision (AP) averaged across tasks
'''
ap_list = []
for i in range(y_true.shape[1]):
# AUC is only defined when there is at least one positive data.
if np.sum(y_true[:, i] == 1) > 0 and np.sum(y_true[:, i] == 0) > 0:
# ignore nan values
is_labeled = y_true[:, i] == y_true[:, i]
ap = average_precision_score(y_true[is_labeled, i],
y_pred[is_labeled, i])
ap_list.append(ap)
if len(ap_list) == 0:
raise RuntimeError(
'No positively labeled data available. Cannot compute Average Precision.')
return {'ap': sum(ap_list) / len(ap_list)}
def eval_rmse(y_true, y_pred):
'''
compute RMSE score averaged across tasks
'''
rmse_list = []
for i in range(y_true.shape[1]):
# ignore nan values
is_labeled = y_true[:, i] == y_true[:, i]
rmse_list.append(np.sqrt(
((y_true[is_labeled, i] - y_pred[is_labeled, i]) ** 2).mean()))
return {'rmse': sum(rmse_list) / len(rmse_list)}
def eval_acc(y_true, y_pred):
acc_list = []
for i in range(y_true.shape[1]):
is_labeled = y_true[:, i] == y_true[:, i]
correct = y_true[is_labeled, i] == y_pred[is_labeled, i]
acc_list.append(float(np.sum(correct)) / len(correct))
return {'acc': sum(acc_list) / len(acc_list)}
def eval_F1(seq_ref, seq_pred):
# '''
# compute F1 score averaged over samples
# '''
precision_list = []
recall_list = []
f1_list = []
for l, p in zip(seq_ref, seq_pred):
label = set(l)
prediction = set(p)
true_positive = len(label.intersection(prediction))
false_positive = len(prediction - label)
false_negative = len(label - prediction)
if true_positive + false_positive > 0:
precision = true_positive / (true_positive + false_positive)
else:
precision = 0
if true_positive + false_negative > 0:
recall = true_positive / (true_positive + false_negative)
else:
recall = 0
if precision + recall > 0:
f1 = 2 * precision * recall / (precision + recall)
else:
f1 = 0
precision_list.append(precision)
recall_list.append(recall)
f1_list.append(f1)
return {'precision': np.average(precision_list),
'recall': np.average(recall_list),
'F1': np.average(f1_list)}