forked from scikit-learn/scikit-learn
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathbench_plot_lasso_path.py
117 lines (95 loc) · 3.91 KB
/
bench_plot_lasso_path.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
"""Benchmarks of Lasso regularization path computation using Lars and CD
The input data is mostly low rank but is a fat infinite tail.
"""
from __future__ import print_function
from collections import defaultdict
import gc
import sys
from time import time
import numpy as np
from sklearn.linear_model import lars_path
from sklearn.linear_model import lasso_path
from sklearn.datasets.samples_generator import make_regression
def compute_bench(samples_range, features_range):
it = 0
results = defaultdict(lambda: [])
max_it = len(samples_range) * len(features_range)
for n_samples in samples_range:
for n_features in features_range:
it += 1
print('====================')
print('Iteration %03d of %03d' % (it, max_it))
print('====================')
dataset_kwargs = {
'n_samples': n_samples,
'n_features': n_features,
'n_informative': n_features / 10,
'effective_rank': min(n_samples, n_features) / 10,
#'effective_rank': None,
'bias': 0.0,
}
print("n_samples: %d" % n_samples)
print("n_features: %d" % n_features)
X, y = make_regression(**dataset_kwargs)
gc.collect()
print("benchmarking lars_path (with Gram):", end='')
sys.stdout.flush()
tstart = time()
G = np.dot(X.T, X) # precomputed Gram matrix
Xy = np.dot(X.T, y)
lars_path(X, y, Xy=Xy, Gram=G, method='lasso')
delta = time() - tstart
print("%0.3fs" % delta)
results['lars_path (with Gram)'].append(delta)
gc.collect()
print("benchmarking lars_path (without Gram):", end='')
sys.stdout.flush()
tstart = time()
lars_path(X, y, method='lasso')
delta = time() - tstart
print("%0.3fs" % delta)
results['lars_path (without Gram)'].append(delta)
gc.collect()
print("benchmarking lasso_path (with Gram):", end='')
sys.stdout.flush()
tstart = time()
lasso_path(X, y, precompute=True)
delta = time() - tstart
print("%0.3fs" % delta)
results['lasso_path (with Gram)'].append(delta)
gc.collect()
print("benchmarking lasso_path (without Gram):", end='')
sys.stdout.flush()
tstart = time()
lasso_path(X, y, precompute=False)
delta = time() - tstart
print("%0.3fs" % delta)
results['lasso_path (without Gram)'].append(delta)
return results
if __name__ == '__main__':
from mpl_toolkits.mplot3d import axes3d # register the 3d projection
import matplotlib.pyplot as plt
samples_range = np.linspace(10, 2000, 5).astype(np.int)
features_range = np.linspace(10, 2000, 5).astype(np.int)
results = compute_bench(samples_range, features_range)
max_time = max(max(t) for t in results.values())
fig = plt.figure('scikit-learn Lasso path benchmark results')
i = 1
for c, (label, timings) in zip('bcry', sorted(results.items())):
ax = fig.add_subplot(2, 2, i, projection='3d')
X, Y = np.meshgrid(samples_range, features_range)
Z = np.asarray(timings).reshape(samples_range.shape[0],
features_range.shape[0])
# plot the actual surface
ax.plot_surface(X, Y, Z.T, cstride=1, rstride=1, color=c, alpha=0.8)
# dummy point plot to stick the legend to since surface plot do not
# support legends (yet?)
# ax.plot([1], [1], [1], color=c, label=label)
ax.set_xlabel('n_samples')
ax.set_ylabel('n_features')
ax.set_zlabel('Time (s)')
ax.set_zlim3d(0.0, max_time * 1.1)
ax.set_title(label)
# ax.legend()
i += 1
plt.show()