@@ -28,37 +28,26 @@ def calc_time_and_err(tf, arr):
2828 return meas_time , meas_err
2929
3030
31- def compute_feature_bench (arr , n_components , batch_size ):
32- print ("===========================" )
33- print ("Computing feature bench for n_components %i, batch_size %i" % (
34- n_components , batch_size ))
31+ def compute_pca_bench (arr , n_components ):
3532 print ("===========================" )
36-
33+ print ( "Computing PCA bench for n_components %i" % n_components )
3734 pca = PCA (n_components = n_components )
3835 pca_time , pca_err = calc_time_and_err (pca , arr )
36+ return pca_time , pca_err
3937
40- ipca = IncrementalPCA (n_components = n_components , batch_size = batch_size )
41- ipca_time , ipca_err = calc_time_and_err (ipca , arr )
4238
39+ def compute_rpca_bench (arr , n_components , random_seed = 1999 ):
40+ print ("===========================" )
41+ print ("Computing RandomizedPCA bench for n_components %i" % n_components )
4342 rpca = RandomizedPCA (n_components = n_components , random_state = 1999 )
4443 rpca_time , rpca_err = calc_time_and_err (rpca , arr )
45- return pca_time , rpca_time , ipca_time , pca_err , rpca_err , ipca_err
46-
47-
48- def compute_pca_bench (arr , n_components ):
49- print ("===========================" )
50- print ("Computing PCA bench for n_components %i" % n_components )
51- print ("===========================" )
52- pca = PCA (n_components = n_components )
53- pca_time , pca_err = calc_time_and_err (pca , arr )
54- return pca_time , pca_err
44+ return rpca_time , rpca_err
5545
5646
5747def compute_ipca_bench (arr , n_components , batch_size ):
5848 print ("===========================" )
5949 print ("Computing IncrementalPCA bench for n_components %i, batch_size %i" % (
6050 n_components , batch_size ))
61- print ("===========================" )
6251
6352 ipca = IncrementalPCA (n_components = n_components , batch_size = batch_size )
6453 ipca_time , ipca_err = calc_time_and_err (ipca , arr )
@@ -78,7 +67,7 @@ def plot_feature_times(all_pca_times, all_rpca_times, all_ipca_times,
7867 plt .ylabel ("Time (seconds)" )
7968
8069 ax = plt .gca ()
81- n_ticks = 11
70+ n_ticks = 10
8271 step_size = arr .shape [1 ] // 10
8372 ax .xaxis .set_major_locator (LinearLocator (numticks = n_ticks ))
8473 ax .set_xticklabels (list (range (step_size , n_ticks * step_size - 1 ,
@@ -98,26 +87,28 @@ def plot_feature_errors(all_pca_err, all_rpca_err, all_ipca_err,
9887 plt .ylabel ("Mean absolute error" )
9988
10089 ax = plt .gca ()
101- n_ticks = 11
90+ n_ticks = 10
10291 step_size = arr .shape [1 ] // 10
10392 ax .xaxis .set_major_locator (LinearLocator (numticks = n_ticks ))
10493 ax .set_xticklabels (list (range (step_size , n_ticks * step_size - 1 ,
10594 step_size )))
10695
10796
108- def plot_batch_times (all_pca_times , all_ipca_times , n_features , arr ):
97+ def plot_batch_times (all_pca_times , all_rpca_times , all_ipca_times ,
98+ n_features , arr ):
10999 plt .figure ()
110100 plt .plot (all_pca_times , label = "PCA" )
101+ plt .plot (all_rpca_times , label = "RandomizedPCA" )
111102 plt .plot (all_ipca_times , label = "IncrementalPCA" )
112103 plt .legend (loc = "lower left" )
113- plt .suptitle ("Algorithm runtime vs. batch_size %i\n \
104+ plt .suptitle ("Algorithm runtime vs. batch_size for n_features %i\n \
114105 Low-rank matix (effective_rank 10), size %i x %i" % (
115106 n_features , arr .shape [0 ], arr .shape [1 ]))
116107 plt .xlabel ("Batch size" )
117108 plt .ylabel ("Time (seconds)" )
118109
119110 ax = plt .gca ()
120- n_ticks = 11
111+ n_ticks = 10
121112 step_size = arr .shape [0 ] // 10
122113 ax .xaxis .set_major_locator (LinearLocator (numticks = n_ticks ))
123114 ax .set_xticklabels (list (range (step_size , n_ticks * step_size - 1 ,
@@ -136,10 +127,11 @@ def plot_batch_errors(all_pca_err, all_ipca_err, n_features, arr):
136127 plt .ylabel ("Mean absolute error" )
137128
138129 ax = plt .gca ()
139- n_ticks = 11
130+ n_ticks = 10
140131 step_size = arr .shape [0 ] // 10
141132 ax .xaxis .set_major_locator (LinearLocator (numticks = n_ticks ))
142- ax .set_xticklabels (list (range (1 , n_ticks * step_size - 1 , step_size )))
133+ ax .set_xticklabels (list (range (step_size , n_ticks * step_size - 1 ,
134+ step_size )))
143135
144136
145137def fixed_batch_size_comparison (arr ):
@@ -155,8 +147,9 @@ def fixed_batch_size_comparison(arr):
155147 bs = 1000
156148 # Compare runtimes and error for fixed batch size
157149 for nf in all_features :
158- (pca_time , rpca_time , ipca_time , pca_err ,
159- rpca_err , ipca_err ) = compute_feature_bench (arr , nf , bs )
150+ pca_time , pca_err = compute_pca_bench (arr , nf )
151+ rpca_time , rpca_err = compute_rpca_bench (arr , nf )
152+ ipca_time , ipca_err = compute_ipca_bench (arr , nf , bs )
160153 all_pca_times .append (pca_time )
161154 all_rpca_times .append (rpca_time )
162155 all_ipca_times .append (ipca_time )
@@ -177,7 +170,7 @@ def fixed_batch_size_comparison(arr):
177170
178171def variable_batch_size_comparison (arr ):
179172 batch_sizes = list (map (int , np .linspace (arr .shape [0 ] // 10 , arr .shape [0 ],
180- num = 5 )))
173+ num = 10 )))
181174 all_features = list (map (int , np .linspace (arr .shape [1 ] // 10 , arr .shape [1 ],
182175 num = 4 )))
183176 for nf in all_features :
@@ -187,6 +180,12 @@ def variable_batch_size_comparison(arr):
187180 all_pca_times .extend ([pca_time ] * len (batch_sizes ))
188181 all_pca_err .extend ([pca_err ] * len (batch_sizes ))
189182
183+ all_rpca_times = []
184+ all_rpca_err = []
185+ rpca_time , rpca_err = compute_rpca_bench (arr , nf )
186+ all_rpca_times .extend ([rpca_time ] * len (batch_sizes ))
187+ all_rpca_err .extend ([rpca_err ] * len (batch_sizes ))
188+
190189 all_ipca_times = []
191190 all_ipca_err = []
192191 for bs in batch_sizes :
@@ -195,14 +194,19 @@ def variable_batch_size_comparison(arr):
195194 all_ipca_err .append (ipca_err )
196195
197196 all_pca_times = np .array (all_pca_times )
198- all_ipca_times = np .array (all_ipca_times )
199197 all_pca_err = np .array (all_pca_err )
198+
199+ all_rpca_times = np .array (all_rpca_times )
200+ all_rpca_err = np .array (all_rpca_err )
201+
202+ all_ipca_times = np .array (all_ipca_times )
200203 all_ipca_err = np .array (all_ipca_err )
201204
202- plot_batch_times (all_pca_times , all_ipca_times , nf , arr )
205+ plot_batch_times (all_pca_times , all_rpca_times , all_ipca_times , nf , arr )
206+ # RandomizePCA error is always worse (approx 100x) than other PCA tests
203207 plot_batch_errors (all_pca_err , all_ipca_err , nf , arr )
204208
205- faces = fetch_lfw_people (resize = .15 , min_faces_per_person = 5 )
209+ faces = fetch_lfw_people (resize = .2 , min_faces_per_person = 5 )
206210# limit dataset to 5000 people (don't care who they are!)
207211X = faces .data [:5000 ]
208212n_samples , h , w = faces .images .shape
0 commit comments