Skip to content

Commit 6aa7b93

Browse files
committed
BENCHMARKS updated saga benchmark to account for new dtype implementation
1 parent c9f3a5c commit 6aa7b93

File tree

1 file changed

+60
-32
lines changed

1 file changed

+60
-32
lines changed

benchmarks/bench_saga.py

Lines changed: 60 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -5,14 +5,13 @@
55
"""
66
import json
77
import time
8-
from os.path import expanduser
98

109
import matplotlib.pyplot as plt
1110
import numpy as np
1211

1312
from sklearn.datasets import fetch_rcv1, load_iris, load_digits, \
1413
fetch_20newsgroups_vectorized
15-
from sklearn.externals.joblib import delayed, Parallel, Memory
14+
from sklearn.externals.joblib import delayed, Parallel
1615
from sklearn.linear_model import LogisticRegression
1716
from sklearn.metrics import log_loss
1817
from sklearn.model_selection import train_test_split
@@ -70,11 +69,15 @@ def fit_single(solver, X, y, penalty='l2', single_target=True, C=1,
7069
multi_class=multi_class,
7170
C=C,
7271
penalty=penalty,
73-
fit_intercept=False, tol=1e-24,
72+
fit_intercept=False, tol=0,
7473
max_iter=this_max_iter,
7574
random_state=42,
7675
)
76+
77+
# Makes cpu cache even for all fit calls
78+
X_train.max()
7779
t0 = time.clock()
80+
7881
lr.fit(X_train, y_train)
7982
train_time = time.clock() - t0
8083

@@ -98,6 +101,7 @@ def fit_single(solver, X, y, penalty='l2', single_target=True, C=1,
98101
accuracies.append(accuracy)
99102
times.append(train_time)
100103
return lr, times, train_scores, test_scores, accuracies
104+
return True
101105

102106

103107
def _predict_proba(lr, X):
@@ -107,14 +111,13 @@ def _predict_proba(lr, X):
107111
return softmax(pred)
108112

109113

110-
def exp(solvers, penalties, single_target, dtypes=[np.float64, np.float32],
114+
def exp(solvers, penalties, single_target,
111115
n_samples=30000, max_iter=20,
112116
dataset='rcv1', n_jobs=1, skip_slow=False):
113-
mem = Memory(cachedir=expanduser('~/cache'), verbose=0)
114-
dtypes_mapping = {"float32": np.float32,
115-
"float64": np.float64}
116-
if np.float32 in dtypes and "lightning" in solvers:
117-
raise ValueError("Lightning does not support np.float32")
117+
dtypes_mapping = {
118+
"float64": np.float64,
119+
"float32": np.float32,
120+
}
118121

119122
if dataset == 'rcv1':
120123
rcv1 = fetch_rcv1()
@@ -154,12 +157,12 @@ def exp(solvers, penalties, single_target, dtypes=[np.float64, np.float32],
154157
y_n[y <= 16] = 0
155158
y = y_n
156159

157-
#X = X[:n_samples]
158-
#y = y[:n_samples]
160+
X = X[:n_samples]
161+
y = y[:n_samples]
159162

160-
cached_fit = mem.cache(fit_single)
163+
# cached_fit = mem.cache(fit_single)
161164
out = Parallel(n_jobs=n_jobs, mmap_mode=None)(
162-
delayed(cached_fit)(solver, X, y,
165+
delayed(fit_single)(solver, X, y,
163166
penalty=penalty, single_target=single_target,
164167
dtype=dtype,
165168
C=1, max_iter=max_iter, skip_slow=skip_slow)
@@ -197,54 +200,62 @@ def plot():
197200

198201
grouped = res.groupby(level=['single_target', 'penalty'])
199202

200-
colors = {'saga': 'blue', 'liblinear': 'orange', 'lightning': 'green'}
203+
colors = {'saga': 'C0', 'liblinear': 'C1', 'lightning': 'C2'}
201204
linestyles = {"float32": "--", "float64": "-"}
205+
alpha = {"float64": 0.5, "float32": 1}
202206

203207
for idx, group in grouped:
204208
single_target, penalty = idx
205-
fig = plt.figure(figsize=(12, 4))
206-
ax = fig.add_subplot(131)
207-
208-
train_scores = group['train_scores'].values
209-
ref = np.min(np.concatenate(train_scores)) * 0.999
209+
fig, axes = plt.subplots(figsize=(12, 4), ncols=4)
210+
ax = axes[0]
210211

211212
for scores, times, solver, dtype in zip(group['train_scores'],
212213
group['times'],
213214
group['solver'],
214215
group["dtype"]):
215-
scores = scores / ref - 1
216216
ax.plot(times, scores, label="%s - %s" % (solver, dtype),
217217
color=colors[solver],
218+
alpha=alpha[dtype],
219+
marker=".",
218220
linestyle=linestyles[dtype])
221+
ax.axvline(times[-1], color=colors[solver],
222+
alpha=alpha[dtype],
223+
linestyle=linestyles[dtype])
219224
ax.set_xlabel('Time (s)')
220225
ax.set_ylabel('Training objective (relative to min)')
221226
ax.set_yscale('log')
222227

223-
ax = fig.add_subplot(132)
224-
225-
test_scores = group['test_scores'].values
226-
ref = np.min(np.concatenate(test_scores)) * 0.999
228+
ax = axes[1]
227229

228230
for scores, times, solver, dtype in zip(group['test_scores'],
229231
group['times'],
230232
group['solver'],
231233
group["dtype"]):
232-
scores = scores / ref - 1
233234
ax.plot(times, scores, label=solver, color=colors[solver],
234-
linestyle=linestyles[dtype])
235+
linestyle=linestyles[dtype],
236+
marker=".",
237+
alpha=alpha[dtype])
238+
ax.axvline(times[-1], color=colors[solver],
239+
alpha=alpha[dtype],
240+
linestyle=linestyles[dtype])
235241

236242
ax.set_xlabel('Time (s)')
237243
ax.set_ylabel('Test objective (relative to min)')
238244
ax.set_yscale('log')
239245

240-
ax = fig.add_subplot(133)
241-
246+
ax = axes[2]
242247
for accuracy, times, solver, dtype in zip(group['accuracies'],
243248
group['times'],
244249
group['solver'],
245250
group["dtype"]):
246251
ax.plot(times, accuracy, label="%s - %s" % (solver, dtype),
252+
alpha=alpha[dtype],
253+
marker=".",
247254
color=colors[solver], linestyle=linestyles[dtype])
255+
ax.axvline(times[-1], color=colors[solver],
256+
alpha=alpha[dtype],
257+
linestyle=linestyles[dtype])
258+
248259
ax.set_xlabel('Time (s)')
249260
ax.set_ylabel('Test accuracy')
250261
ax.legend()
@@ -254,14 +265,31 @@ def plot():
254265
name += '.png'
255266
fig.tight_layout()
256267
fig.subplots_adjust(top=0.9)
268+
269+
ax = axes[3]
270+
for scores, times, solver, dtype in zip(group['train_scores'],
271+
group['times'],
272+
group['solver'],
273+
group["dtype"]):
274+
ax.plot(np.arange(len(scores)),
275+
scores, label="%s - %s" % (solver, dtype),
276+
marker=".",
277+
alpha=alpha[dtype],
278+
color=colors[solver], linestyle=linestyles[dtype])
279+
280+
ax.set_yscale("log")
281+
ax.set_xlabel('# iterations')
282+
ax.set_ylabel('Objective function')
283+
ax.legend()
284+
257285
plt.savefig(name)
258-
plt.close(fig)
259286

260287

261288
if __name__ == '__main__':
262-
solvers = ['saga', 'liblinear'] #'lightning']
289+
solvers = ['saga', 'liblinear']
263290
penalties = ['l1', 'l2']
264291
single_target = True
265-
exp(solvers, penalties, single_target, n_samples=None, n_jobs=1,
266-
dataset='rcv1', max_iter=20)
292+
exp(solvers, penalties, single_target,
293+
n_samples=None, n_jobs=1,
294+
dataset='rcv1', max_iter=10)
267295
plot()

0 commit comments

Comments
 (0)