Skip to content

Commit 662fe00

Browse files
committed
Merge pull request #3250 from mjbommar/isotonic-refactor-2
[MRG+1] Refactor and clean IsotonicRegression
2 parents dcf48a5 + 3636210 commit 662fe00

File tree

2 files changed

+105
-50
lines changed

2 files changed

+105
-50
lines changed

sklearn/isotonic.py

Lines changed: 56 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -202,6 +202,10 @@ class IsotonicRegression(BaseEstimator, TransformerMixin, RegressorMixin):
202202
`X_max_` : float
203203
Maximum value of input array `X_` for right bound.
204204
205+
`f_` : function
206+
The stepwise interpolating function that covers the domain
207+
X_.
208+
205209
References
206210
----------
207211
Isotonic Median Regression: A Linear Programming Approach
@@ -218,7 +222,41 @@ def __init__(self, y_min=None, y_max=None, increasing=True,
218222

219223
def _check_fit_data(self, X, y, sample_weight=None):
220224
if len(X.shape) != 1:
221-
raise ValueError("X should be a vector")
225+
raise ValueError("X should be a 1d array")
226+
227+
def _build_f(self, X, y):
228+
"""Build the f_ interp1d function."""
229+
230+
# Handle the out_of_bounds argument by setting bounds_error
231+
if self.out_of_bounds not in ["raise", "nan", "clip"]:
232+
raise ValueError("The argument ``out_of_bounds`` must be in "
233+
"'nan', 'clip', 'raise'; got {0}"
234+
.format(self.out_of_bounds))
235+
236+
bounds_error = self.out_of_bounds == "raise"
237+
self.f_ = interpolate.interp1d(X, y, kind='linear',
238+
bounds_error=bounds_error)
239+
240+
def _build_y(self, X, y, sample_weight):
241+
"""Build the y_ IsotonicRegression."""
242+
X, y, sample_weight = check_arrays(X, y, sample_weight,
243+
sparse_format='dense')
244+
y = as_float_array(y)
245+
self._check_fit_data(X, y, sample_weight)
246+
247+
# Determine increasing if auto-determination requested
248+
if self.increasing == 'auto':
249+
self.increasing_ = check_increasing(X, y)
250+
else:
251+
self.increasing_ = self.increasing
252+
253+
order = np.lexsort((y, X))
254+
order_inv = np.argsort(order)
255+
self.X_ = as_float_array(X[order], copy=False)
256+
self.y_ = isotonic_regression(y[order], sample_weight, self.y_min,
257+
self.y_max, increasing=self.increasing_)
258+
259+
return order_inv
222260

223261
def fit(self, X, y, sample_weight=None, weight=None):
224262
"""Fit the model using X, y as training data.
@@ -251,26 +289,16 @@ def fit(self, X, y, sample_weight=None, weight=None):
251289
DeprecationWarning)
252290
sample_weight = weight
253291

254-
X, y, sample_weight = check_arrays(X, y, sample_weight,
255-
sparse_format='dense')
256-
y = as_float_array(y)
257-
self._check_fit_data(X, y, sample_weight)
258-
259-
# Determine increasing if auto-determination requested
260-
if self.increasing == 'auto':
261-
self.increasing_ = check_increasing(X, y)
262-
else:
263-
self.increasing_ = self.increasing
264-
265-
order = np.argsort(X)
266-
self.X_ = as_float_array(X[order], copy=False)
267-
self.y_ = isotonic_regression(y[order], sample_weight, self.y_min,
268-
self.y_max, increasing=self.increasing_)
292+
# Build y_
293+
order_inv = self._build_y(X, y, sample_weight)
269294

270295
# Handle the left and right bounds on X
271296
self.X_min_ = np.min(self.X_)
272297
self.X_max_ = np.max(self.X_)
273298

299+
# Build f_
300+
self._build_f(self.X_, self.y_)
301+
274302
return self
275303

276304
def transform(self, T):
@@ -288,25 +316,17 @@ def transform(self, T):
288316
"""
289317
T = as_float_array(T)
290318
if len(T.shape) != 1:
291-
raise ValueError("X should be a vector")
292-
293-
# Handle the out_of_bounds argument by setting bounds_error and T
294-
if self.out_of_bounds == "raise":
295-
f = interpolate.interp1d(self.X_, self.y_, kind='linear',
296-
bounds_error=True)
297-
elif self.out_of_bounds == "nan":
298-
f = interpolate.interp1d(self.X_, self.y_, kind='linear',
299-
bounds_error=False)
300-
elif self.out_of_bounds == "clip":
301-
f = interpolate.interp1d(self.X_, self.y_, kind='linear',
302-
bounds_error=False)
303-
T = np.clip(T, self.X_min_, self.X_max_)
304-
else:
319+
raise ValueError("Isotonic regression input should be a 1d array")
320+
321+
# Handle the out_of_bounds argument by clipping if needed
322+
if self.out_of_bounds not in ["raise", "nan", "clip"]:
305323
raise ValueError("The argument ``out_of_bounds`` must be in "
306324
"'nan', 'clip', 'raise'; got {0}"
307325
.format(self.out_of_bounds))
308326

309-
return f(T)
327+
if self.out_of_bounds == "clip":
328+
T = np.clip(T, self.X_min_, self.X_max_)
329+
return self.f_(T)
310330

311331
def fit_transform(self, X, y, sample_weight=None, weight=None):
312332
"""Fit model and transform y by linear interpolation.
@@ -340,27 +360,16 @@ def fit_transform(self, X, y, sample_weight=None, weight=None):
340360
DeprecationWarning)
341361
sample_weight = weight
342362

343-
X, y, sample_weight = check_arrays(X, y, sample_weight,
344-
sparse_format='dense')
345-
y = as_float_array(y)
346-
self._check_fit_data(X, y, sample_weight)
347-
348-
# Determine increasing if auto-determination requested
349-
if self.increasing == 'auto':
350-
self.increasing_ = check_increasing(X, y)
351-
else:
352-
self.increasing_ = self.increasing
353-
354-
order = np.lexsort((y, X))
355-
order_inv = np.argsort(order)
356-
self.X_ = as_float_array(X[order], copy=False)
357-
self.y_ = isotonic_regression(y[order], sample_weight, self.y_min,
358-
self.y_max, increasing=self.increasing_)
363+
# Build y_
364+
order_inv = self._build_y(X, y, sample_weight)
359365

360366
# Handle the left and right bounds on X
361367
self.X_min_ = np.min(self.X_)
362368
self.X_max_ = np.max(self.X_)
363369

370+
# Build f_
371+
self._build_f(self.X_, self.y_)
372+
364373
return self.y_[order_inv]
365374

366375
def predict(self, T):

sklearn/tests/test_isotonic.py

Lines changed: 49 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,8 @@
66
from sklearn.utils.testing import assert_raises, assert_array_equal,\
77
assert_true, assert_false, assert_equal
88

9-
from sklearn.utils.testing import assert_warns_message, assert_no_warnings
9+
from sklearn.utils.testing import assert_warns, assert_warns_message,\
10+
assert_no_warnings
1011

1112

1213
def test_check_increasing_up():
@@ -212,10 +213,55 @@ def test_isotonic_regression_oob_bad():
212213

213214
# Create model and fit
214215
ir = IsotonicRegression(increasing='auto', out_of_bounds="xyz")
215-
ir.fit(x, y)
216216

217217
# Make sure that we throw an error for bad out_of_bounds value
218-
assert_raises(ValueError, ir.predict, [min(x)-10, max(x)+10])
218+
assert_raises(ValueError, ir.fit, x, y)
219+
220+
221+
def test_isotonic_regression_oob_bad_after():
222+
# Set y and x
223+
y = np.array([3, 7, 5, 9, 8, 7, 10])
224+
x = np.arange(len(y))
225+
226+
# Create model and fit
227+
ir = IsotonicRegression(increasing='auto', out_of_bounds="raise")
228+
229+
# Make sure that we throw an error for bad out_of_bounds value in transform
230+
ir.fit(x, y)
231+
ir.out_of_bounds = "xyz"
232+
assert_raises(ValueError, ir.transform, x)
233+
234+
235+
def test_isotonic_fit_weight_deprecation():
236+
# Test deprecation of the weight argument
237+
y = np.array([3, 7, 5, 9, 8, 7, 10])
238+
x = np.arange(len(y))
239+
240+
# Create model and fit
241+
ir = IsotonicRegression()
242+
assert_warns(DeprecationWarning, ir.fit, x, y,
243+
weight=[1.0/len(y)] * len(y))
244+
245+
246+
def test_isotonic_fit_transform_weight_deprecation():
247+
# Test deprecation of the weight argument
248+
y = np.array([3, 7, 5, 9, 8, 7, 10])
249+
x = np.arange(len(y))
250+
251+
# Create model and fit
252+
ir = IsotonicRegression()
253+
assert_warns(DeprecationWarning, ir.fit_transform, x, y,
254+
weight=[1.0/len(y)] * len(y))
255+
256+
257+
def test_isotonic_regression_weight_deprecation():
258+
# Test deprecation of the weight argument
259+
y = np.array([3, 7, 5, 9, 8, 7, 10])
260+
x = np.arange(len(y))
261+
262+
# Call fit method
263+
assert_warns(DeprecationWarning, isotonic_regression, y,
264+
weight=[1.0/len(y)] * len(y))
219265

220266

221267
if __name__ == "__main__":

0 commit comments

Comments
 (0)