-
Notifications
You must be signed in to change notification settings - Fork 0
/
plot_correlation.py
82 lines (64 loc) · 2.02 KB
/
plot_correlation.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
import math
import matplotlib.pyplot as plt
from tqdm import tqdm
import numpy as np
import seaborn as sns
import scipy.stats
file = "../correlation_margin_msmarco"
val_l = []
test_ms_l = []
test_2019_l = []
test_2020_l = []
with open(file) as f:
for line in f:
margin, val, test_ms, test_2019, test_2020 = line.strip().split()
val_l.append(float(val))
test_ms_l.append(float(test_ms))
test_2019_l.append(float(test_2019))
test_2020_l.append(float(test_2020))
plt.figure()
plt.scatter(val_l, test_ms_l)
c_m = scipy.stats.pearsonr(val_l, test_ms_l)
plt.plot(np.unique(val_l),
np.poly1d(np.polyfit(val_l, test_ms_l, 1))
(np.unique(val_l)), color='red')
plt.xlabel('ndcg@10 validation', size=16)
plt.ylabel('ndcg@10 MS MARCO dev', size=16)
plt.xticks(fontsize=12)
plt.yticks(fontsize=12)
cor = "{:10.4f}".format(c_m[0])
plt.text(0.47, 0.385,f"coefficient={cor}", fontsize=15)
plt.tight_layout()
plt.savefig("msmarco_dev_correlation.pdf")
plt.close()
plt.figure()
plt.scatter(val_l, test_2019_l)
plt.plot(np.unique(val_l),
np.poly1d(np.polyfit(val_l, test_2019_l, 1))
(np.unique(val_l)), color='red')
c_2019 = scipy.stats.pearsonr(val_l, test_2019_l)
cor = "{:10.4f}".format(c_2019[0])
plt.text(0.465, 0.665 ,f"coefficient={cor}", fontsize=15)
plt.xlabel('ndcg@10 validation', size=16)
plt.ylabel('ndcg@10 TREC DL 2019',size=16)
plt.xticks(fontsize=12)
plt.yticks(fontsize=12)
plt.tight_layout()
#plt.show()
plt.savefig("dl2019_correlation.pdf")
plt.close()
plt.figure()
plt.scatter(val_l, test_2020_l)
plt.plot(np.unique(val_l),
np.poly1d(np.polyfit(val_l, test_2020_l, 1))
(np.unique(val_l)), color='red')
c_2020 = scipy.stats.pearsonr(val_l, test_2020_l)
plt.xlabel('ndcg@10 validation', size=16)
plt.ylabel('ndcg@10 TREC DL 2020', size=16)
plt.xticks(fontsize=12)
plt.yticks(fontsize=12)
cor = "{:10.4f}".format(c_2020[0])
plt.text(0.47, 0.66 ,f"coefficient={cor}", fontsize=15)
plt.tight_layout()
plt.savefig("dl2020_correlation.pdf")
plt.close()