5
5
"""
6
6
import json
7
7
import time
8
- from os .path import expanduser
9
8
10
9
import matplotlib .pyplot as plt
11
10
import numpy as np
12
11
13
12
from sklearn .datasets import fetch_rcv1 , load_iris , load_digits , \
14
13
fetch_20newsgroups_vectorized
15
- from sklearn .externals .joblib import delayed , Parallel , Memory
14
+ from sklearn .externals .joblib import delayed , Parallel
16
15
from sklearn .linear_model import LogisticRegression
17
16
from sklearn .metrics import log_loss
18
17
from sklearn .model_selection import train_test_split
21
20
22
21
23
22
def fit_single (solver , X , y , penalty = 'l2' , single_target = True , C = 1 ,
24
- max_iter = 10 , skip_slow = False ):
23
+ max_iter = 10 , skip_slow = False , dtype = np . float64 ):
25
24
if skip_slow and solver == 'lightning' and penalty == 'l1' :
26
25
print ('skip_slowping l1 logistic regression with solver lightning.' )
27
26
return
@@ -37,7 +36,8 @@ def fit_single(solver, X, y, penalty='l2', single_target=True, C=1,
37
36
multi_class = 'ovr'
38
37
else :
39
38
multi_class = 'multinomial'
40
-
39
+ X = X .astype (dtype )
40
+ y = y .astype (dtype )
41
41
X_train , X_test , y_train , y_test = train_test_split (X , y , random_state = 42 ,
42
42
stratify = y )
43
43
n_samples = X_train .shape [0 ]
@@ -69,11 +69,15 @@ def fit_single(solver, X, y, penalty='l2', single_target=True, C=1,
69
69
multi_class = multi_class ,
70
70
C = C ,
71
71
penalty = penalty ,
72
- fit_intercept = False , tol = 1e-24 ,
72
+ fit_intercept = False , tol = 0 ,
73
73
max_iter = this_max_iter ,
74
74
random_state = 42 ,
75
75
)
76
+
77
+ # Makes cpu cache even for all fit calls
78
+ X_train .max ()
76
79
t0 = time .clock ()
80
+
77
81
lr .fit (X_train , y_train )
78
82
train_time = time .clock () - t0
79
83
@@ -106,9 +110,13 @@ def _predict_proba(lr, X):
106
110
return softmax (pred )
107
111
108
112
109
- def exp (solvers , penalties , single_target , n_samples = 30000 , max_iter = 20 ,
113
+ def exp (solvers , penalties , single_target ,
114
+ n_samples = 30000 , max_iter = 20 ,
110
115
dataset = 'rcv1' , n_jobs = 1 , skip_slow = False ):
111
- mem = Memory (cachedir = expanduser ('~/cache' ), verbose = 0 )
116
+ dtypes_mapping = {
117
+ "float64" : np .float64 ,
118
+ "float32" : np .float32 ,
119
+ }
112
120
113
121
if dataset == 'rcv1' :
114
122
rcv1 = fetch_rcv1 ()
@@ -151,27 +159,32 @@ def exp(solvers, penalties, single_target, n_samples=30000, max_iter=20,
151
159
X = X [:n_samples ]
152
160
y = y [:n_samples ]
153
161
154
- cached_fit = mem .cache (fit_single )
162
+ # cached_fit = mem.cache(fit_single)
155
163
out = Parallel (n_jobs = n_jobs , mmap_mode = None )(
156
- delayed (cached_fit )(solver , X , y ,
164
+ delayed (fit_single )(solver , X , y ,
157
165
penalty = penalty , single_target = single_target ,
166
+ dtype = dtype ,
158
167
C = 1 , max_iter = max_iter , skip_slow = skip_slow )
159
168
for solver in solvers
160
- for penalty in penalties )
169
+ for penalty in penalties for dtype in dtypes_mapping . values () )
161
170
162
171
res = []
163
172
idx = 0
164
- for solver in solvers :
165
- for penalty in penalties :
166
- if not (skip_slow and solver == 'lightning' and penalty == 'l1' ):
167
- lr , times , train_scores , test_scores , accuracies = out [idx ]
168
- this_res = dict (solver = solver , penalty = penalty ,
169
- single_target = single_target ,
170
- times = times , train_scores = train_scores ,
171
- test_scores = test_scores ,
172
- accuracies = accuracies )
173
- res .append (this_res )
174
- idx += 1
173
+ for dtype_name in dtypes_mapping .keys ():
174
+ for solver in solvers :
175
+ for penalty in penalties :
176
+ if not (skip_slow and
177
+ solver == 'lightning' and
178
+ penalty == 'l1' ):
179
+ lr , times , train_scores , test_scores , accuracies = out [idx ]
180
+ this_res = dict (solver = solver , penalty = penalty ,
181
+ dtype = dtype_name ,
182
+ single_target = single_target ,
183
+ times = times , train_scores = train_scores ,
184
+ test_scores = test_scores ,
185
+ accuracies = accuracies )
186
+ res .append (this_res )
187
+ idx += 1
175
188
176
189
with open ('bench_saga.json' , 'w+' ) as f :
177
190
json .dump (res , f )
@@ -186,42 +199,62 @@ def plot():
186
199
187
200
grouped = res .groupby (level = ['single_target' , 'penalty' ])
188
201
189
- colors = {'saga' : 'blue' , 'liblinear' : 'orange' , 'lightning' : 'green' }
202
+ colors = {'saga' : 'C0' , 'liblinear' : 'C1' , 'lightning' : 'C2' }
203
+ linestyles = {"float32" : "--" , "float64" : "-" }
204
+ alpha = {"float64" : 0.5 , "float32" : 1 }
190
205
191
206
for idx , group in grouped :
192
207
single_target , penalty = idx
193
- fig = plt .figure (figsize = (12 , 4 ))
194
- ax = fig .add_subplot (131 )
195
-
196
- train_scores = group ['train_scores' ].values
197
- ref = np .min (np .concatenate (train_scores )) * 0.999
198
-
199
- for scores , times , solver in zip (group ['train_scores' ], group ['times' ],
200
- group ['solver' ]):
201
- scores = scores / ref - 1
202
- ax .plot (times , scores , label = solver , color = colors [solver ])
208
+ fig , axes = plt .subplots (figsize = (12 , 4 ), ncols = 4 )
209
+ ax = axes [0 ]
210
+
211
+ for scores , times , solver , dtype in zip (group ['train_scores' ],
212
+ group ['times' ],
213
+ group ['solver' ],
214
+ group ["dtype" ]):
215
+ ax .plot (times , scores , label = "%s - %s" % (solver , dtype ),
216
+ color = colors [solver ],
217
+ alpha = alpha [dtype ],
218
+ marker = "." ,
219
+ linestyle = linestyles [dtype ])
220
+ ax .axvline (times [- 1 ], color = colors [solver ],
221
+ alpha = alpha [dtype ],
222
+ linestyle = linestyles [dtype ])
203
223
ax .set_xlabel ('Time (s)' )
204
224
ax .set_ylabel ('Training objective (relative to min)' )
205
225
ax .set_yscale ('log' )
206
226
207
- ax = fig . add_subplot ( 132 )
227
+ ax = axes [ 1 ]
208
228
209
- test_scores = group ['test_scores' ].values
210
- ref = np .min (np .concatenate (test_scores )) * 0.999
229
+ for scores , times , solver , dtype in zip (group ['test_scores' ],
230
+ group ['times' ],
231
+ group ['solver' ],
232
+ group ["dtype" ]):
233
+ ax .plot (times , scores , label = solver , color = colors [solver ],
234
+ linestyle = linestyles [dtype ],
235
+ marker = "." ,
236
+ alpha = alpha [dtype ])
237
+ ax .axvline (times [- 1 ], color = colors [solver ],
238
+ alpha = alpha [dtype ],
239
+ linestyle = linestyles [dtype ])
211
240
212
- for scores , times , solver in zip (group ['test_scores' ], group ['times' ],
213
- group ['solver' ]):
214
- scores = scores / ref - 1
215
- ax .plot (times , scores , label = solver , color = colors [solver ])
216
241
ax .set_xlabel ('Time (s)' )
217
242
ax .set_ylabel ('Test objective (relative to min)' )
218
243
ax .set_yscale ('log' )
219
244
220
- ax = fig .add_subplot (133 )
245
+ ax = axes [2 ]
246
+ for accuracy , times , solver , dtype in zip (group ['accuracies' ],
247
+ group ['times' ],
248
+ group ['solver' ],
249
+ group ["dtype" ]):
250
+ ax .plot (times , accuracy , label = "%s - %s" % (solver , dtype ),
251
+ alpha = alpha [dtype ],
252
+ marker = "." ,
253
+ color = colors [solver ], linestyle = linestyles [dtype ])
254
+ ax .axvline (times [- 1 ], color = colors [solver ],
255
+ alpha = alpha [dtype ],
256
+ linestyle = linestyles [dtype ])
221
257
222
- for accuracy , times , solver in zip (group ['accuracies' ], group ['times' ],
223
- group ['solver' ]):
224
- ax .plot (times , accuracy , label = solver , color = colors [solver ])
225
258
ax .set_xlabel ('Time (s)' )
226
259
ax .set_ylabel ('Test accuracy' )
227
260
ax .legend ()
@@ -231,14 +264,31 @@ def plot():
231
264
name += '.png'
232
265
fig .tight_layout ()
233
266
fig .subplots_adjust (top = 0.9 )
267
+
268
+ ax = axes [3 ]
269
+ for scores , times , solver , dtype in zip (group ['train_scores' ],
270
+ group ['times' ],
271
+ group ['solver' ],
272
+ group ["dtype" ]):
273
+ ax .plot (np .arange (len (scores )),
274
+ scores , label = "%s - %s" % (solver , dtype ),
275
+ marker = "." ,
276
+ alpha = alpha [dtype ],
277
+ color = colors [solver ], linestyle = linestyles [dtype ])
278
+
279
+ ax .set_yscale ("log" )
280
+ ax .set_xlabel ('# iterations' )
281
+ ax .set_ylabel ('Objective function' )
282
+ ax .legend ()
283
+
234
284
plt .savefig (name )
235
- plt .close (fig )
236
285
237
286
238
287
if __name__ == '__main__' :
239
- solvers = ['saga' , 'liblinear' , 'lightning' ]
288
+ solvers = ['saga' , 'liblinear' ]
240
289
penalties = ['l1' , 'l2' ]
241
290
single_target = True
242
- exp (solvers , penalties , single_target , n_samples = None , n_jobs = 1 ,
243
- dataset = '20newspaper' , max_iter = 20 )
291
+ exp (solvers , penalties , single_target ,
292
+ n_samples = None , n_jobs = 1 ,
293
+ dataset = 'rcv1' , max_iter = 10 )
244
294
plot ()
0 commit comments