forked from scikit-learn/scikit-learn
-
Notifications
You must be signed in to change notification settings - Fork 0
/
bench_feature_expansions.py
57 lines (51 loc) · 1.73 KB
/
bench_feature_expansions.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
import matplotlib.pyplot as plt
import numpy as np
import scipy.sparse as sparse
from sklearn.preprocessing import PolynomialFeatures
from time import time
degree = 2
trials = 3
num_rows = 1000
dimensionalities = np.array([1, 2, 8, 16, 32, 64])
densities = np.array([0.01, 0.1, 1.0])
csr_times = {d: np.zeros(len(dimensionalities)) for d in densities}
dense_times = {d: np.zeros(len(dimensionalities)) for d in densities}
transform = PolynomialFeatures(
degree=degree, include_bias=False, interaction_only=False
)
for trial in range(trials):
for density in densities:
for dim_index, dim in enumerate(dimensionalities):
print(trial, density, dim)
X_csr = sparse.random(num_rows, dim, density).tocsr()
X_dense = X_csr.toarray()
# CSR
t0 = time()
transform.fit_transform(X_csr)
csr_times[density][dim_index] += time() - t0
# Dense
t0 = time()
transform.fit_transform(X_dense)
dense_times[density][dim_index] += time() - t0
csr_linestyle = (0, (3, 1, 1, 1, 1, 1)) # densely dashdotdotted
dense_linestyle = (0, ()) # solid
fig, axes = plt.subplots(nrows=len(densities), ncols=1, figsize=(8, 10))
for density, ax in zip(densities, axes):
ax.plot(
dimensionalities,
csr_times[density] / trials,
label="csr",
linestyle=csr_linestyle,
)
ax.plot(
dimensionalities,
dense_times[density] / trials,
label="dense",
linestyle=dense_linestyle,
)
ax.set_title("density %0.2f, degree=%d, n_samples=%d" % (density, degree, num_rows))
ax.legend()
ax.set_xlabel("Dimensionality")
ax.set_ylabel("Time (seconds)")
plt.tight_layout()
plt.show()