Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 2bb42c8

Browse files
committed
Added benchmarks to compare SAGA 32b and 64b
1 parent bbd3c35 commit 2bb42c8

File tree

1 file changed

+97
-47
lines changed

1 file changed

+97
-47
lines changed

benchmarks/bench_saga.py

Lines changed: 97 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -5,14 +5,13 @@
55
"""
66
import json
77
import time
8-
from os.path import expanduser
98

109
import matplotlib.pyplot as plt
1110
import numpy as np
1211

1312
from sklearn.datasets import fetch_rcv1, load_iris, load_digits, \
1413
fetch_20newsgroups_vectorized
15-
from sklearn.externals.joblib import delayed, Parallel, Memory
14+
from sklearn.externals.joblib import delayed, Parallel
1615
from sklearn.linear_model import LogisticRegression
1716
from sklearn.metrics import log_loss
1817
from sklearn.model_selection import train_test_split
@@ -21,7 +20,7 @@
2120

2221

2322
def fit_single(solver, X, y, penalty='l2', single_target=True, C=1,
24-
max_iter=10, skip_slow=False):
23+
max_iter=10, skip_slow=False, dtype=np.float64):
2524
if skip_slow and solver == 'lightning' and penalty == 'l1':
2625
print('skip_slowping l1 logistic regression with solver lightning.')
2726
return
@@ -37,7 +36,8 @@ def fit_single(solver, X, y, penalty='l2', single_target=True, C=1,
3736
multi_class = 'ovr'
3837
else:
3938
multi_class = 'multinomial'
40-
39+
X = X.astype(dtype)
40+
y = y.astype(dtype)
4141
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42,
4242
stratify=y)
4343
n_samples = X_train.shape[0]
@@ -69,11 +69,15 @@ def fit_single(solver, X, y, penalty='l2', single_target=True, C=1,
6969
multi_class=multi_class,
7070
C=C,
7171
penalty=penalty,
72-
fit_intercept=False, tol=1e-24,
72+
fit_intercept=False, tol=0,
7373
max_iter=this_max_iter,
7474
random_state=42,
7575
)
76+
77+
# Makes cpu cache even for all fit calls
78+
X_train.max()
7679
t0 = time.clock()
80+
7781
lr.fit(X_train, y_train)
7882
train_time = time.clock() - t0
7983

@@ -106,9 +110,13 @@ def _predict_proba(lr, X):
106110
return softmax(pred)
107111

108112

109-
def exp(solvers, penalties, single_target, n_samples=30000, max_iter=20,
113+
def exp(solvers, penalties, single_target,
114+
n_samples=30000, max_iter=20,
110115
dataset='rcv1', n_jobs=1, skip_slow=False):
111-
mem = Memory(cachedir=expanduser('~/cache'), verbose=0)
116+
dtypes_mapping = {
117+
"float64": np.float64,
118+
"float32": np.float32,
119+
}
112120

113121
if dataset == 'rcv1':
114122
rcv1 = fetch_rcv1()
@@ -151,27 +159,32 @@ def exp(solvers, penalties, single_target, n_samples=30000, max_iter=20,
151159
X = X[:n_samples]
152160
y = y[:n_samples]
153161

154-
cached_fit = mem.cache(fit_single)
162+
# cached_fit = mem.cache(fit_single)
155163
out = Parallel(n_jobs=n_jobs, mmap_mode=None)(
156-
delayed(cached_fit)(solver, X, y,
164+
delayed(fit_single)(solver, X, y,
157165
penalty=penalty, single_target=single_target,
166+
dtype=dtype,
158167
C=1, max_iter=max_iter, skip_slow=skip_slow)
159168
for solver in solvers
160-
for penalty in penalties)
169+
for penalty in penalties for dtype in dtypes_mapping.values())
161170

162171
res = []
163172
idx = 0
164-
for solver in solvers:
165-
for penalty in penalties:
166-
if not (skip_slow and solver == 'lightning' and penalty == 'l1'):
167-
lr, times, train_scores, test_scores, accuracies = out[idx]
168-
this_res = dict(solver=solver, penalty=penalty,
169-
single_target=single_target,
170-
times=times, train_scores=train_scores,
171-
test_scores=test_scores,
172-
accuracies=accuracies)
173-
res.append(this_res)
174-
idx += 1
173+
for dtype_name in dtypes_mapping.keys():
174+
for solver in solvers:
175+
for penalty in penalties:
176+
if not (skip_slow and
177+
solver == 'lightning' and
178+
penalty == 'l1'):
179+
lr, times, train_scores, test_scores, accuracies = out[idx]
180+
this_res = dict(solver=solver, penalty=penalty,
181+
dtype=dtype_name,
182+
single_target=single_target,
183+
times=times, train_scores=train_scores,
184+
test_scores=test_scores,
185+
accuracies=accuracies)
186+
res.append(this_res)
187+
idx += 1
175188

176189
with open('bench_saga.json', 'w+') as f:
177190
json.dump(res, f)
@@ -186,42 +199,62 @@ def plot():
186199

187200
grouped = res.groupby(level=['single_target', 'penalty'])
188201

189-
colors = {'saga': 'blue', 'liblinear': 'orange', 'lightning': 'green'}
202+
colors = {'saga': 'C0', 'liblinear': 'C1', 'lightning': 'C2'}
203+
linestyles = {"float32": "--", "float64": "-"}
204+
alpha = {"float64": 0.5, "float32": 1}
190205

191206
for idx, group in grouped:
192207
single_target, penalty = idx
193-
fig = plt.figure(figsize=(12, 4))
194-
ax = fig.add_subplot(131)
195-
196-
train_scores = group['train_scores'].values
197-
ref = np.min(np.concatenate(train_scores)) * 0.999
198-
199-
for scores, times, solver in zip(group['train_scores'], group['times'],
200-
group['solver']):
201-
scores = scores / ref - 1
202-
ax.plot(times, scores, label=solver, color=colors[solver])
208+
fig, axes = plt.subplots(figsize=(12, 4), ncols=4)
209+
ax = axes[0]
210+
211+
for scores, times, solver, dtype in zip(group['train_scores'],
212+
group['times'],
213+
group['solver'],
214+
group["dtype"]):
215+
ax.plot(times, scores, label="%s - %s" % (solver, dtype),
216+
color=colors[solver],
217+
alpha=alpha[dtype],
218+
marker=".",
219+
linestyle=linestyles[dtype])
220+
ax.axvline(times[-1], color=colors[solver],
221+
alpha=alpha[dtype],
222+
linestyle=linestyles[dtype])
203223
ax.set_xlabel('Time (s)')
204224
ax.set_ylabel('Training objective (relative to min)')
205225
ax.set_yscale('log')
206226

207-
ax = fig.add_subplot(132)
227+
ax = axes[1]
208228

209-
test_scores = group['test_scores'].values
210-
ref = np.min(np.concatenate(test_scores)) * 0.999
229+
for scores, times, solver, dtype in zip(group['test_scores'],
230+
group['times'],
231+
group['solver'],
232+
group["dtype"]):
233+
ax.plot(times, scores, label=solver, color=colors[solver],
234+
linestyle=linestyles[dtype],
235+
marker=".",
236+
alpha=alpha[dtype])
237+
ax.axvline(times[-1], color=colors[solver],
238+
alpha=alpha[dtype],
239+
linestyle=linestyles[dtype])
211240

212-
for scores, times, solver in zip(group['test_scores'], group['times'],
213-
group['solver']):
214-
scores = scores / ref - 1
215-
ax.plot(times, scores, label=solver, color=colors[solver])
216241
ax.set_xlabel('Time (s)')
217242
ax.set_ylabel('Test objective (relative to min)')
218243
ax.set_yscale('log')
219244

220-
ax = fig.add_subplot(133)
245+
ax = axes[2]
246+
for accuracy, times, solver, dtype in zip(group['accuracies'],
247+
group['times'],
248+
group['solver'],
249+
group["dtype"]):
250+
ax.plot(times, accuracy, label="%s - %s" % (solver, dtype),
251+
alpha=alpha[dtype],
252+
marker=".",
253+
color=colors[solver], linestyle=linestyles[dtype])
254+
ax.axvline(times[-1], color=colors[solver],
255+
alpha=alpha[dtype],
256+
linestyle=linestyles[dtype])
221257

222-
for accuracy, times, solver in zip(group['accuracies'], group['times'],
223-
group['solver']):
224-
ax.plot(times, accuracy, label=solver, color=colors[solver])
225258
ax.set_xlabel('Time (s)')
226259
ax.set_ylabel('Test accuracy')
227260
ax.legend()
@@ -231,14 +264,31 @@ def plot():
231264
name += '.png'
232265
fig.tight_layout()
233266
fig.subplots_adjust(top=0.9)
267+
268+
ax = axes[3]
269+
for scores, times, solver, dtype in zip(group['train_scores'],
270+
group['times'],
271+
group['solver'],
272+
group["dtype"]):
273+
ax.plot(np.arange(len(scores)),
274+
scores, label="%s - %s" % (solver, dtype),
275+
marker=".",
276+
alpha=alpha[dtype],
277+
color=colors[solver], linestyle=linestyles[dtype])
278+
279+
ax.set_yscale("log")
280+
ax.set_xlabel('# iterations')
281+
ax.set_ylabel('Objective function')
282+
ax.legend()
283+
234284
plt.savefig(name)
235-
plt.close(fig)
236285

237286

238287
if __name__ == '__main__':
239-
solvers = ['saga', 'liblinear', 'lightning']
288+
solvers = ['saga', 'liblinear']
240289
penalties = ['l1', 'l2']
241290
single_target = True
242-
exp(solvers, penalties, single_target, n_samples=None, n_jobs=1,
243-
dataset='20newspaper', max_iter=20)
291+
exp(solvers, penalties, single_target,
292+
n_samples=None, n_jobs=1,
293+
dataset='rcv1', max_iter=10)
244294
plot()

0 commit comments

Comments
 (0)