55"""
66import json
77import time
8- from os .path import expanduser
98
109import matplotlib .pyplot as plt
1110import numpy as np
1211
1312from sklearn .datasets import fetch_rcv1 , load_iris , load_digits , \
1413 fetch_20newsgroups_vectorized
15- from sklearn .externals .joblib import delayed , Parallel , Memory
14+ from sklearn .externals .joblib import delayed , Parallel
1615from sklearn .linear_model import LogisticRegression
1716from sklearn .metrics import log_loss
1817from sklearn .model_selection import train_test_split
@@ -70,11 +69,15 @@ def fit_single(solver, X, y, penalty='l2', single_target=True, C=1,
7069 multi_class = multi_class ,
7170 C = C ,
7271 penalty = penalty ,
73- fit_intercept = False , tol = 1e-24 ,
72+ fit_intercept = False , tol = 0 ,
7473 max_iter = this_max_iter ,
7574 random_state = 42 ,
7675 )
76+
77+ # Makes cpu cache even for all fit calls
78+ X_train .max ()
7779 t0 = time .clock ()
80+
7881 lr .fit (X_train , y_train )
7982 train_time = time .clock () - t0
8083
@@ -98,6 +101,7 @@ def fit_single(solver, X, y, penalty='l2', single_target=True, C=1,
98101 accuracies .append (accuracy )
99102 times .append (train_time )
100103 return lr , times , train_scores , test_scores , accuracies
104+ return True
101105
102106
103107def _predict_proba (lr , X ):
@@ -107,14 +111,13 @@ def _predict_proba(lr, X):
107111 return softmax (pred )
108112
109113
110- def exp (solvers , penalties , single_target , dtypes = [ np . float64 , np . float32 ],
114+ def exp (solvers , penalties , single_target ,
111115 n_samples = 30000 , max_iter = 20 ,
112116 dataset = 'rcv1' , n_jobs = 1 , skip_slow = False ):
113- mem = Memory (cachedir = expanduser ('~/cache' ), verbose = 0 )
114- dtypes_mapping = {"float32" : np .float32 ,
115- "float64" : np .float64 }
116- if np .float32 in dtypes and "lightning" in solvers :
117- raise ValueError ("Lightning does not support np.float32" )
117+ dtypes_mapping = {
118+ "float64" : np .float64 ,
119+ "float32" : np .float32 ,
120+ }
118121
119122 if dataset == 'rcv1' :
120123 rcv1 = fetch_rcv1 ()
@@ -154,12 +157,12 @@ def exp(solvers, penalties, single_target, dtypes=[np.float64, np.float32],
154157 y_n [y <= 16 ] = 0
155158 y = y_n
156159
157- # X = X[:n_samples]
158- # y = y[:n_samples]
160+ X = X [:n_samples ]
161+ y = y [:n_samples ]
159162
160- cached_fit = mem .cache (fit_single )
163+ # cached_fit = mem.cache(fit_single)
161164 out = Parallel (n_jobs = n_jobs , mmap_mode = None )(
162- delayed (cached_fit )(solver , X , y ,
165+ delayed (fit_single )(solver , X , y ,
163166 penalty = penalty , single_target = single_target ,
164167 dtype = dtype ,
165168 C = 1 , max_iter = max_iter , skip_slow = skip_slow )
@@ -197,54 +200,62 @@ def plot():
197200
198201 grouped = res .groupby (level = ['single_target' , 'penalty' ])
199202
200- colors = {'saga' : 'blue ' , 'liblinear' : 'orange ' , 'lightning' : 'green ' }
203+ colors = {'saga' : 'C0 ' , 'liblinear' : 'C1 ' , 'lightning' : 'C2 ' }
201204 linestyles = {"float32" : "--" , "float64" : "-" }
205+ alpha = {"float64" : 0.5 , "float32" : 1 }
202206
203207 for idx , group in grouped :
204208 single_target , penalty = idx
205- fig = plt .figure (figsize = (12 , 4 ))
206- ax = fig .add_subplot (131 )
207-
208- train_scores = group ['train_scores' ].values
209- ref = np .min (np .concatenate (train_scores )) * 0.999
209+ fig , axes = plt .subplots (figsize = (12 , 4 ), ncols = 4 )
210+ ax = axes [0 ]
210211
211212 for scores , times , solver , dtype in zip (group ['train_scores' ],
212213 group ['times' ],
213214 group ['solver' ],
214215 group ["dtype" ]):
215- scores = scores / ref - 1
216216 ax .plot (times , scores , label = "%s - %s" % (solver , dtype ),
217217 color = colors [solver ],
218+ alpha = alpha [dtype ],
219+ marker = "." ,
218220 linestyle = linestyles [dtype ])
221+ ax .axvline (times [- 1 ], color = colors [solver ],
222+ alpha = alpha [dtype ],
223+ linestyle = linestyles [dtype ])
219224 ax .set_xlabel ('Time (s)' )
220225 ax .set_ylabel ('Training objective (relative to min)' )
221226 ax .set_yscale ('log' )
222227
223- ax = fig .add_subplot (132 )
224-
225- test_scores = group ['test_scores' ].values
226- ref = np .min (np .concatenate (test_scores )) * 0.999
228+ ax = axes [1 ]
227229
228230 for scores , times , solver , dtype in zip (group ['test_scores' ],
229231 group ['times' ],
230232 group ['solver' ],
231233 group ["dtype" ]):
232- scores = scores / ref - 1
233234 ax .plot (times , scores , label = solver , color = colors [solver ],
234- linestyle = linestyles [dtype ])
235+ linestyle = linestyles [dtype ],
236+ marker = "." ,
237+ alpha = alpha [dtype ])
238+ ax .axvline (times [- 1 ], color = colors [solver ],
239+ alpha = alpha [dtype ],
240+ linestyle = linestyles [dtype ])
235241
236242 ax .set_xlabel ('Time (s)' )
237243 ax .set_ylabel ('Test objective (relative to min)' )
238244 ax .set_yscale ('log' )
239245
240- ax = fig .add_subplot (133 )
241-
246+ ax = axes [2 ]
242247 for accuracy , times , solver , dtype in zip (group ['accuracies' ],
243248 group ['times' ],
244249 group ['solver' ],
245250 group ["dtype" ]):
246251 ax .plot (times , accuracy , label = "%s - %s" % (solver , dtype ),
252+ alpha = alpha [dtype ],
253+ marker = "." ,
247254 color = colors [solver ], linestyle = linestyles [dtype ])
255+ ax .axvline (times [- 1 ], color = colors [solver ],
256+ alpha = alpha [dtype ],
257+ linestyle = linestyles [dtype ])
258+
248259 ax .set_xlabel ('Time (s)' )
249260 ax .set_ylabel ('Test accuracy' )
250261 ax .legend ()
@@ -254,14 +265,31 @@ def plot():
254265 name += '.png'
255266 fig .tight_layout ()
256267 fig .subplots_adjust (top = 0.9 )
268+
269+ ax = axes [3 ]
270+ for scores , times , solver , dtype in zip (group ['train_scores' ],
271+ group ['times' ],
272+ group ['solver' ],
273+ group ["dtype" ]):
274+ ax .plot (np .arange (len (scores )),
275+ scores , label = "%s - %s" % (solver , dtype ),
276+ marker = "." ,
277+ alpha = alpha [dtype ],
278+ color = colors [solver ], linestyle = linestyles [dtype ])
279+
280+ ax .set_yscale ("log" )
281+ ax .set_xlabel ('# iterations' )
282+ ax .set_ylabel ('Objective function' )
283+ ax .legend ()
284+
257285 plt .savefig (name )
258- plt .close (fig )
259286
260287
261288if __name__ == '__main__' :
262- solvers = ['saga' , 'liblinear' ] #'lightning']
289+ solvers = ['saga' , 'liblinear' ]
263290 penalties = ['l1' , 'l2' ]
264291 single_target = True
265- exp (solvers , penalties , single_target , n_samples = None , n_jobs = 1 ,
266- dataset = 'rcv1' , max_iter = 20 )
292+ exp (solvers , penalties , single_target ,
293+ n_samples = None , n_jobs = 1 ,
294+ dataset = 'rcv1' , max_iter = 10 )
267295 plot ()
0 commit comments