@@ -136,8 +136,8 @@ def _parallel_predict_proba(trees, X, n_classes, n_outputs):
136136
137137def _parallel_predict_paths (trees , X ):
138138 """Private function used to compute a batch of prediction paths within a job."""
139- return [tree .predict ( X , return_paths = True ) for tree in trees ]
140-
139+ return [tree .decision_paths ( X ) for tree in trees ]
140+
141141
142142def _parallel_predict_regression (trees , X ):
143143 """Private function used to compute a batch of predictions within a job."""
@@ -303,6 +303,40 @@ def _validate_y(self, y):
303303 # Default implementation
304304 return y
305305
306+ def decision_paths (self , X ):
307+ """Predict class or regression value for X and return decision paths leading to the prediction, from every tree.
308+
309+
310+ Parameters
311+ ----------
312+ X : array-like of shape = [n_samples, n_features]
313+ The input samples.
314+
315+ Returns
316+ -------
317+ y : list of arrays with shape = [n_estimators, n_samples, max_depth + 1]
318+ Decision paths for each each tree and for eachprediction.
319+ Each path is an array of node ids, starting with the root node id.
320+ If a path is shorter than max_depth + 1, it is padded with -1 on the right.
321+ """
322+
323+ # Check data
324+ if getattr (X , "dtype" , None ) != DTYPE or X .ndim != 2 :
325+ X = array2d (X , dtype = DTYPE )
326+
327+ # Assign chunk of trees to jobs
328+ n_jobs , n_trees , starts = _partition_estimators (self )
329+
330+ # Parallel loop
331+ path_list = Parallel (n_jobs = n_jobs , verbose = self .verbose ,
332+ backend = "threading" )(
333+ delayed (_parallel_predict_paths )(
334+ self .estimators_ [starts [i ]:starts [i + 1 ]], X )
335+ for i in range (n_jobs ))
336+ #unpack the nested list and return
337+ return [lst for med_lst in path_list for lst in med_lst ]
338+
339+
306340 @property
307341 def feature_importances_ (self ):
308342 """Return the feature importances (the higher, the more important the
@@ -404,7 +438,7 @@ def _validate_y(self, y):
404438
405439 return y
406440
407- def predict (self , X , return_paths = True ):
441+ def predict (self , X ):
408442 """Predict class for X.
409443
410444 The predicted class of an input sample is computed as the majority
@@ -420,25 +454,6 @@ def predict(self, X, return_paths = True):
420454 y : array of shape = [n_samples] or [n_samples, n_outputs]
421455 The predicted classes.
422456 """
423-
424-
425- if return_paths :
426- # Check data
427- if getattr (X , "dtype" , None ) != DTYPE or X .ndim != 2 :
428- X = array2d (X , dtype = DTYPE )
429-
430- # Assign chunk of trees to jobs
431- n_jobs , n_trees , starts = _partition_estimators (self )
432-
433- # Parallel loop
434- path_list = Parallel (n_jobs = n_jobs , verbose = self .verbose ,
435- backend = "threading" )(
436- delayed (_parallel_predict_paths )(
437- self .estimators_ [starts [i ]:starts [i + 1 ]], X )
438- for i in range (n_jobs ))
439- #unpack the nested list and return
440- return [lst for med_lst in path_list for lst in med_lst ]
441-
442457 n_samples = len (X )
443458 proba = self .predict_proba (X )
444459
@@ -567,7 +582,7 @@ def __init__(self,
567582 random_state = random_state ,
568583 verbose = verbose )
569584
570- def predict (self , X , return_paths = False ):
585+ def predict (self , X ):
571586 """Predict regression target for X.
572587
573588 The predicted regression target of an input sample is computed as the
@@ -591,27 +606,16 @@ def predict(self, X, return_paths = False):
591606 n_jobs , n_trees , starts = _partition_estimators (self )
592607
593608 # Parallel loop
594- if return_paths :
595- path_list = Parallel (n_jobs = n_jobs , verbose = self .verbose ,
596- backend = "threading" )(
597- delayed (_parallel_predict_paths )(
598- self .estimators_ [starts [i ]:starts [i + 1 ]], X )
599- for i in range (n_jobs ))
600- #unpack the nested list and return
601- return [lst for med_lst in path_list for lst in med_lst ]
602- else :
603- all_y_hat = Parallel (n_jobs = n_jobs , verbose = self .verbose ,
604- backend = "threading" )(
605- delayed (_parallel_predict_regression )(
606- self .estimators_ [starts [i ]:starts [i + 1 ]], X )
607- for i in range (n_jobs ))
608- # Reduce
609- y_hat = sum (all_y_hat ) / len (self .estimators_ )
610-
611- return y_hat
609+ all_y_hat = Parallel (n_jobs = n_jobs , verbose = self .verbose ,
610+ backend = "threading" )(
611+ delayed (_parallel_predict_regression )(
612+ self .estimators_ [starts [i ]:starts [i + 1 ]], X )
613+ for i in range (n_jobs ))
612614
613-
614-
615+ # Reduce
616+ y_hat = sum (all_y_hat ) / len (self .estimators_ )
617+
618+ return y_hat
615619
616620 def _set_oob_score (self , X , y ):
617621 n_samples = y .shape [0 ]
0 commit comments