forked from scikit-learn/scikit-learn
-
Notifications
You must be signed in to change notification settings - Fork 0
/
bench_plot_ward.py
48 lines (40 loc) · 1.24 KB
/
bench_plot_ward.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
"""
Benchmark scikit-learn's Ward implement compared to SciPy's
"""
import time
import numpy as np
from scipy.cluster import hierarchy
import matplotlib.pyplot as plt
from sklearn.cluster import AgglomerativeClustering
ward = AgglomerativeClustering(n_clusters=3, linkage="ward")
n_samples = np.logspace(0.5, 3, 9)
n_features = np.logspace(1, 3.5, 7)
N_samples, N_features = np.meshgrid(n_samples, n_features)
scikits_time = np.zeros(N_samples.shape)
scipy_time = np.zeros(N_samples.shape)
for i, n in enumerate(n_samples):
for j, p in enumerate(n_features):
X = np.random.normal(size=(n, p))
t0 = time.time()
ward.fit(X)
scikits_time[j, i] = time.time() - t0
t0 = time.time()
hierarchy.ward(X)
scipy_time[j, i] = time.time() - t0
ratio = scikits_time / scipy_time
plt.figure("scikit-learn Ward's method benchmark results")
plt.imshow(np.log(ratio), aspect="auto", origin="lower")
plt.colorbar()
plt.contour(
ratio,
levels=[
1,
],
colors="k",
)
plt.yticks(range(len(n_features)), n_features.astype(int))
plt.ylabel("N features")
plt.xticks(range(len(n_samples)), n_samples.astype(int))
plt.xlabel("N samples")
plt.title("Scikit's time, in units of scipy time (log)")
plt.show()