Skip to content

Commit 08960ec

Browse files
committed
EHN: add boxcox in fixes
1 parent 3b29485 commit 08960ec

File tree

2 files changed

+15
-10
lines changed

2 files changed

+15
-10
lines changed

sklearn/preprocessing/data.py

Lines changed: 4 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@
2424
from ..utils import check_array
2525
from ..utils.extmath import row_norms
2626
from ..utils.extmath import _incremental_mean_and_var
27-
from ..utils.fixes import nanpercentile
27+
from ..utils.fixes import boxcox, nanpercentile
2828
from ..utils.sparsefuncs_fast import (inplace_csr_row_normalize_l1,
2929
inplace_csr_row_normalize_l2)
3030
from ..utils.sparsefuncs import (inplace_column_scale,
@@ -2480,10 +2480,7 @@ def fit(self, X, y=None):
24802480
# the computation of lambda is influenced by NaNs and we need to
24812481
# get rid of them to compute them.
24822482
_, lmbda = stats.boxcox(col[~np.isnan(col)], lmbda=None)
2483-
# FIXME: stats.boxcox should be changed by special.boxcox which
2484-
# handles NaN and does not raise warnings. Check SciPy 0.14.X
2485-
with np.errstate(invalid='ignore'): # hide NaN comparison warnings
2486-
col_trans = stats.boxcox(col, lmbda)
2483+
col_trans = boxcox(col, lmbda)
24872484
self.lambdas_.append(lmbda)
24882485
transformed.append(col_trans)
24892486

@@ -2507,11 +2504,8 @@ def transform(self, X):
25072504
check_is_fitted(self, 'lambdas_')
25082505
X = self._check_input(X, check_positive=True, check_shape=True)
25092506

2510-
# FIXME: stats.boxcox should be changed by special.boxcox which handles
2511-
# NaN and does not raise warnings. Check SciPy 0.14.X
2512-
with np.errstate(invalid='ignore'): # hide NaN comparison warnings
2513-
for i, lmbda in enumerate(self.lambdas_):
2514-
X[:, i] = stats.boxcox(X[:, i], lmbda)
2507+
for i, lmbda in enumerate(self.lambdas_):
2508+
X[:, i] = boxcox(X[:, i], lmbda)
25152509

25162510
if self.standardize:
25172511
X = self._scaler.transform(X)

sklearn/utils/fixes.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,17 @@ def divide(x1, x2, out=None, dtype=None):
7070
return out
7171

7272

73+
# boxcox ignore NaN in scipy.special.boxcox after 0.14
74+
if sp_version < (0, 14):
75+
from scipy import stats
76+
77+
def boxcox(x, lmbda):
78+
with np.errstate(invalid='ignore'):
79+
return stats.boxcox(x, lmbda)
80+
else:
81+
from scipy.special import boxcox
82+
83+
7384
if sp_version < (0, 15):
7485
# Backport fix for scikit-learn/scikit-learn#2986 / scipy/scipy#4142
7586
from ._scipy_sparse_lsqr_backport import lsqr as sparse_lsqr

0 commit comments

Comments
 (0)