Skip to content

Commit 9cfacf1

Browse files
maikiaogriselagramfort
authored
DEP Deprecate 'normalize' in ridge models (#17772)
Co-authored-by: Olivier Grisel <[email protected]> Co-authored-by: Alexandre Gramfort <[email protected]>
1 parent 877c6e6 commit 9cfacf1

File tree

9 files changed

+186
-81
lines changed

9 files changed

+186
-81
lines changed

doc/whats_new/v1.0.rst

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -193,12 +193,18 @@ Changelog
193193
Motivation for this deprecation: ``normalize`` parameter did not take any
194194
effect if ``fit_intercept`` was set to False and therefore was deemed
195195
confusing.
196-
The behavior of the deprecated LinearRegression(normalize=True) can be
196+
The behavior of the deprecated LinearModel(normalize=True) can be
197197
reproduced with :class:`~sklearn.pipeline.Pipeline` with
198-
:class:`~sklearn.preprocessing.StandardScaler`as follows:
199-
make_pipeline(StandardScaler(with_mean=False), LinearRegression()).
198+
:class:`~sklearn.preprocessing.LinearModel` (where LinearModel is
199+
LinearRegression, Ridge, RidgeClassifier, RidgeCV or RidgeClassifierCV) as
200+
follows:
201+
make_pipeline(StandardScaler(with_mean=False), LinearModel()).
202+
LinearRegression was deprecated in:
200203
:pr:`17743` by :user:`Maria Telenczuk <maikia>` and
201204
:user:`Alexandre Gramfort <agramfort>`.
205+
Ridge, RidgeClassifier, RidgeCV or RidgeClassifierCV were deprecated in:
206+
:pr:`17772` by :user:`Maria Telenczuk <maikia>` and
207+
:user:`Alexandre Gramfort <agramfort>`.
202208

203209
- |Fix|: `sample_weight` are now fully taken into account in linear models
204210
when `normalize=True` for both feature centering and feature

examples/linear_model/plot_huber_vs_ridge.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -43,15 +43,15 @@
4343
colors = ['r-', 'b-', 'y-', 'm-']
4444

4545
x = np.linspace(X.min(), X.max(), 7)
46-
epsilon_values = [1.35, 1.5, 1.75, 1.9]
46+
epsilon_values = [1, 1.5, 1.75, 1.9]
4747
for k, epsilon in enumerate(epsilon_values):
4848
huber = HuberRegressor(alpha=0.0, epsilon=epsilon)
4949
huber.fit(X, y)
5050
coef_ = huber.coef_ * x + huber.intercept_
5151
plt.plot(x, coef_, colors[k], label="huber loss, %s" % epsilon)
5252

5353
# Fit a ridge regressor to compare it to huber regressor.
54-
ridge = Ridge(alpha=0.0, random_state=0, normalize=True)
54+
ridge = Ridge(alpha=0.0, random_state=0)
5555
ridge.fit(X, y)
5656
coef_ridge = ridge.coef_
5757
coef_ = ridge.coef_ * x + ridge.intercept_

sklearn/linear_model/_base.py

Lines changed: 43 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -101,38 +101,59 @@ def _deprecate_normalize(normalize, default, estimator_name):
101101
else:
102102
_normalize = normalize
103103

104+
pipeline_msg = (
105+
"If you wish to scale the data, use Pipeline with a StandardScaler "
106+
"in a preprocessing stage. To reproduce the previous behavior:\n\n"
107+
"from sklearn.pipeline import make_pipeline\n\n"
108+
"model = make_pipeline(StandardScaler(with_mean=False), "
109+
f"{estimator_name}())\n\n"
110+
"If you wish to pass a sample_weight parameter, you need to pass it "
111+
"as a fit parameter to each step of the pipeline as follows:\n\n"
112+
"kwargs = {s[0] + '__sample_weight': sample_weight for s "
113+
"in model.steps}\n"
114+
"model.fit(X, y, **kwargs)\n\n"
115+
)
116+
117+
if estimator_name == 'Ridge' or estimator_name == 'RidgeClassifier':
118+
alpha_msg = 'Set parameter alpha to: original_alpha * n_samples. '
119+
elif 'Lasso' in estimator_name:
120+
alpha_msg = (
121+
'Set parameter alpha to: original_alpha * np.sqrt(n_samples). '
122+
)
123+
elif 'ElasticNet' in estimator_name:
124+
alpha_msg = (
125+
'Set parameter alpha to original_alpha * np.sqrt(n_samples) if '
126+
'l1_ratio is 1, and to original_alpha * n_samples if l1_ratio is '
127+
'0. For other values of l1_ratio, no analytic formula is '
128+
'available.'
129+
)
130+
elif estimator_name == 'RidgeCV' or estimator_name == 'RidgeClassifierCV':
131+
alpha_msg = 'Set parameter alphas to: original_alphas * n_samples. '
132+
else:
133+
alpha_msg = ""
134+
104135
if default and normalize == 'deprecated':
105136
warnings.warn(
106137
"The default of 'normalize' will be set to False in version 1.2 "
107-
"and deprecated in version 1.4. \nPass normalize=False and use "
108-
"Pipeline with a StandardScaler in a preprocessing stage if you "
109-
"wish to reproduce the previous behavior:\n"
110-
"model = make_pipeline(StandardScaler(with_mean=False), \n"
111-
f"{estimator_name}(normalize=False))\n"
112-
"If you wish to use additional parameters in "
113-
"the fit() you can include them as follows:\n"
114-
"kwargs = {model.steps[-1][0] + "
115-
"'__<your_param_name>': <your_param_value>}\n"
116-
"model.fit(X, y, **kwargs)", FutureWarning
138+
"and deprecated in version 1.4.\n" +
139+
pipeline_msg + alpha_msg,
140+
FutureWarning
117141
)
118142
elif normalize != 'deprecated' and normalize and not default:
119143
warnings.warn(
120144
"'normalize' was deprecated in version 1.0 and will be "
121-
"removed in 1.2 \nIf you still wish to normalize use "
122-
"Pipeline with a StandardScaler in a preprocessing stage if you "
123-
"wish to reproduce the previous behavior:\n"
124-
"model = make_pipeline(StandardScaler(with_mean=False), "
125-
f"{estimator_name}()). \nIf you wish to use additional "
126-
"parameters in the fit() you can include them as follows: "
127-
"kwargs = {model.steps[-1][0] + "
128-
"'__<your_param_name>': <your_param_value>}\n"
129-
"model.fit(X, y, **kwargs)", FutureWarning
145+
"removed in 1.2.\n" +
146+
pipeline_msg + alpha_msg, FutureWarning
130147
)
131148
elif not normalize and not default:
132149
warnings.warn(
133-
"'normalize' was deprecated in version 1.0 and will be"
134-
" removed in 1.2 Don't set 'normalize' parameter"
135-
" and leave it to its default value", FutureWarning
150+
"'normalize' was deprecated in version 1.0 and will be "
151+
"removed in 1.2. "
152+
"Please leave the normalize parameter to its default value to "
153+
"silence this warning. The default behavior of this estimator "
154+
"is to not do any normalization. If normalization is needed "
155+
"please use sklearn.preprocessing.StandardScaler instead.",
156+
FutureWarning
136157
)
137158

138159
return _normalize

sklearn/linear_model/_glm/tests/test_glm.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -294,6 +294,8 @@ def test_warm_start(fit_intercept):
294294
assert_allclose(glm1.score(X, y), glm2.score(X, y), rtol=1e-4)
295295

296296

297+
# FIXME: 'normalize' to be removed in 1.2 in LinearRegression
298+
@pytest.mark.filterwarnings("ignore:'normalize' was deprecated")
297299
@pytest.mark.parametrize('n_samples, n_features', [(100, 10), (10, 100)])
298300
@pytest.mark.parametrize('fit_intercept', [True, False])
299301
@pytest.mark.parametrize('sample_weight', [None, True])

sklearn/linear_model/_ridge.py

Lines changed: 44 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,8 @@
1717
from scipy import sparse
1818
from scipy.sparse import linalg as sp_linalg
1919

20-
from ._base import LinearClassifierMixin, LinearModel, _rescale_data
20+
from ._base import LinearClassifierMixin, LinearModel
21+
from ._base import _deprecate_normalize, _rescale_data
2122
from ._sag import sag_solver
2223
from ..base import RegressorMixin, MultiOutputMixin, is_classifier
2324
from ..utils.extmath import safe_sparse_dot
@@ -521,9 +522,9 @@ def _ridge_regression(X, y, alpha, sample_weight=None, solver='auto',
521522
class _BaseRidge(LinearModel, metaclass=ABCMeta):
522523
@abstractmethod
523524
@_deprecate_positional_args
524-
def __init__(self, alpha=1.0, *, fit_intercept=True, normalize=False,
525-
copy_X=True, max_iter=None, tol=1e-3, solver="auto",
526-
random_state=None):
525+
def __init__(self, alpha=1.0, *, fit_intercept=True,
526+
normalize='deprecated', copy_X=True, max_iter=None, tol=1e-3,
527+
solver="auto", random_state=None):
527528
self.alpha = alpha
528529
self.fit_intercept = fit_intercept
529530
self.normalize = normalize
@@ -535,7 +536,11 @@ def __init__(self, alpha=1.0, *, fit_intercept=True, normalize=False,
535536

536537
def fit(self, X, y, sample_weight=None):
537538

538-
# all other solvers work at both float precision levels
539+
self._normalize = _deprecate_normalize(
540+
self.normalize, default=False,
541+
estimator_name=self.__class__.__name__
542+
)
543+
539544
_dtype = [np.float64, np.float32]
540545
_accept_sparse = _get_valid_accept_sparse(sparse.issparse(X),
541546
self.solver)
@@ -570,7 +575,7 @@ def fit(self, X, y, sample_weight=None):
570575

571576
# when X is sparse we only remove offset from y
572577
X, y, X_offset, y_offset, X_scale = self._preprocess_data(
573-
X, y, self.fit_intercept, self.normalize, self.copy_X,
578+
X, y, self.fit_intercept, self._normalize, self.copy_X,
574579
sample_weight=sample_weight, return_mean=True)
575580

576581
if solver == 'sag' and sparse.issparse(X) and self.fit_intercept:
@@ -640,6 +645,10 @@ class Ridge(MultiOutputMixin, RegressorMixin, _BaseRidge):
640645
:class:`~sklearn.preprocessing.StandardScaler` before calling ``fit``
641646
on an estimator with ``normalize=False``.
642647
648+
.. deprecated:: 1.0
649+
``normalize`` was deprecated in version 1.0 and
650+
will be removed in 1.2.
651+
643652
copy_X : bool, default=True
644653
If True, X will be copied; else, it may be overwritten.
645654
@@ -731,9 +740,9 @@ class Ridge(MultiOutputMixin, RegressorMixin, _BaseRidge):
731740
Ridge()
732741
"""
733742
@_deprecate_positional_args
734-
def __init__(self, alpha=1.0, *, fit_intercept=True, normalize=False,
735-
copy_X=True, max_iter=None, tol=1e-3, solver="auto",
736-
random_state=None):
743+
def __init__(self, alpha=1.0, *, fit_intercept=True,
744+
normalize='deprecated', copy_X=True, max_iter=None, tol=1e-3,
745+
solver="auto", random_state=None):
737746
super().__init__(
738747
alpha=alpha, fit_intercept=fit_intercept,
739748
normalize=normalize, copy_X=copy_X,
@@ -794,6 +803,10 @@ class RidgeClassifier(LinearClassifierMixin, _BaseRidge):
794803
:class:`~sklearn.preprocessing.StandardScaler` before calling ``fit``
795804
on an estimator with ``normalize=False``.
796805
806+
.. deprecated:: 1.0
807+
``normalize`` was deprecated in version 1.0 and
808+
will be removed in 1.2.
809+
797810
copy_X : bool, default=True
798811
If True, X will be copied; else, it may be overwritten.
799812
@@ -889,9 +902,10 @@ class RidgeClassifier(LinearClassifierMixin, _BaseRidge):
889902
0.9595...
890903
"""
891904
@_deprecate_positional_args
892-
def __init__(self, alpha=1.0, *, fit_intercept=True, normalize=False,
893-
copy_X=True, max_iter=None, tol=1e-3, class_weight=None,
894-
solver="auto", random_state=None):
905+
def __init__(self, alpha=1.0, *, fit_intercept=True,
906+
normalize='deprecated', copy_X=True, max_iter=None,
907+
tol=1e-3, class_weight=None, solver="auto",
908+
random_state=None):
895909
super().__init__(
896910
alpha=alpha, fit_intercept=fit_intercept, normalize=normalize,
897911
copy_X=copy_X, max_iter=max_iter, tol=tol, solver=solver,
@@ -1115,7 +1129,7 @@ class _RidgeGCV(LinearModel):
11151129
"""
11161130
@_deprecate_positional_args
11171131
def __init__(self, alphas=(0.1, 1.0, 10.0), *,
1118-
fit_intercept=True, normalize=False,
1132+
fit_intercept=True, normalize='deprecated',
11191133
scoring=None, copy_X=True,
11201134
gcv_mode=None, store_cv_values=False,
11211135
is_clf=False, alpha_per_target=False):
@@ -1451,6 +1465,11 @@ def fit(self, X, y, sample_weight=None):
14511465
-------
14521466
self : object
14531467
"""
1468+
_normalize = _deprecate_normalize(
1469+
self.normalize, default=False,
1470+
estimator_name=self.__class__.__name__
1471+
)
1472+
14541473
X, y = self._validate_data(X, y, accept_sparse=['csr', 'csc', 'coo'],
14551474
dtype=[np.float64],
14561475
multi_output=True, y_numeric=True)
@@ -1470,7 +1489,7 @@ def fit(self, X, y, sample_weight=None):
14701489
"negative or null value instead.".format(self.alphas))
14711490

14721491
X, y, X_offset, y_offset, X_scale = LinearModel._preprocess_data(
1473-
X, y, self.fit_intercept, self.normalize, self.copy_X,
1492+
X, y, self.fit_intercept, _normalize, self.copy_X,
14741493
sample_weight=sample_weight)
14751494

14761495
gcv_mode = _check_gcv_mode(X, self.gcv_mode)
@@ -1584,7 +1603,7 @@ def fit(self, X, y, sample_weight=None):
15841603
class _BaseRidgeCV(LinearModel):
15851604
@_deprecate_positional_args
15861605
def __init__(self, alphas=(0.1, 1.0, 10.0), *,
1587-
fit_intercept=True, normalize=False, scoring=None,
1606+
fit_intercept=True, normalize='deprecated', scoring=None,
15881607
cv=None, gcv_mode=None, store_cv_values=False,
15891608
alpha_per_target=False):
15901609
self.alphas = np.asarray(alphas)
@@ -1699,6 +1718,10 @@ class RidgeCV(MultiOutputMixin, RegressorMixin, _BaseRidgeCV):
16991718
:class:`~sklearn.preprocessing.StandardScaler` before calling ``fit``
17001719
on an estimator with ``normalize=False``.
17011720
1721+
.. deprecated:: 1.0
1722+
``normalize`` was deprecated in version 1.0 and will be removed in
1723+
1.2.
1724+
17021725
scoring : string, callable, default=None
17031726
A string (see model evaluation documentation) or
17041727
a scorer callable object / function with signature
@@ -1828,6 +1851,10 @@ class RidgeClassifierCV(LinearClassifierMixin, _BaseRidgeCV):
18281851
:class:`~sklearn.preprocessing.StandardScaler` before calling ``fit``
18291852
on an estimator with ``normalize=False``.
18301853
1854+
.. deprecated:: 1.0
1855+
``normalize`` was deprecated in version 1.0 and
1856+
will be removed in 1.2.
1857+
18311858
scoring : string, callable, default=None
18321859
A string (see model evaluation documentation) or
18331860
a scorer callable object / function with signature
@@ -1911,8 +1938,8 @@ class RidgeClassifierCV(LinearClassifierMixin, _BaseRidgeCV):
19111938
"""
19121939
@_deprecate_positional_args
19131940
def __init__(self, alphas=(0.1, 1.0, 10.0), *, fit_intercept=True,
1914-
normalize=False, scoring=None, cv=None, class_weight=None,
1915-
store_cv_values=False):
1941+
normalize='deprecated', scoring=None, cv=None,
1942+
class_weight=None, store_cv_values=False):
19161943
super().__init__(
19171944
alphas=alphas, fit_intercept=fit_intercept, normalize=normalize,
19181945
scoring=scoring, cv=cv, store_cv_values=store_cv_values)

sklearn/linear_model/tests/test_base.py

Lines changed: 0 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -159,7 +159,6 @@ def test_error_on_wrong_normalize():
159159
error_msg = "Leave 'normalize' to its default"
160160
with pytest.raises(ValueError, match=error_msg):
161161
_deprecate_normalize(normalize, default, 'estimator')
162-
ValueError
163162

164163

165164
@pytest.mark.parametrize('normalize', [True, False, 'deprecated'])
@@ -222,33 +221,6 @@ def test_linear_regression_sparse(random_state=0):
222221
assert_array_almost_equal(ols.predict(X) - y.ravel(), 0)
223222

224223

225-
@pytest.mark.parametrize(
226-
'normalize, n_warnings, warning',
227-
[(True, 1, FutureWarning),
228-
(False, 1, FutureWarning),
229-
("deprecated", 0, None)]
230-
)
231-
# FIXME remove test in 1.4
232-
def test_linear_regression_normalize_deprecation(
233-
normalize, n_warnings, warning
234-
):
235-
# check that we issue a FutureWarning when normalize was set in
236-
# LinearRegression
237-
rng = check_random_state(0)
238-
n_samples = 200
239-
n_features = 2
240-
X = rng.randn(n_samples, n_features)
241-
X[X < 0.1] = 0.0
242-
y = rng.rand(n_samples)
243-
244-
model = LinearRegression(normalize=normalize)
245-
with pytest.warns(warning) as record:
246-
model.fit(X, y)
247-
assert len(record) == n_warnings
248-
if n_warnings:
249-
assert "'normalize' was deprecated" in str(record[0].message)
250-
251-
252224
# FIXME: 'normalize' to be removed in 1.2 in LinearRegression
253225
@pytest.mark.filterwarnings("ignore:'normalize' was deprecated")
254226
@pytest.mark.parametrize('normalize', [True, False])
Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
# Author: Maria Telenczuk <https://github.com/maikia>
2+
#
3+
# License: BSD 3 clause
4+
5+
import pytest
6+
7+
import numpy as np
8+
9+
from sklearn.base import is_classifier
10+
from sklearn.linear_model import LinearRegression
11+
from sklearn.linear_model import Ridge
12+
from sklearn.linear_model import RidgeCV
13+
from sklearn.linear_model import RidgeClassifier
14+
from sklearn.linear_model import RidgeClassifierCV
15+
16+
from sklearn.utils import check_random_state
17+
18+
19+
@pytest.mark.parametrize(
20+
'normalize, n_warnings, warning_category',
21+
[(True, 1, FutureWarning),
22+
(False, 1, FutureWarning),
23+
("deprecated", 0, None)]
24+
)
25+
@pytest.mark.parametrize(
26+
"estimator",
27+
[LinearRegression, Ridge, RidgeCV, RidgeClassifier, RidgeClassifierCV]
28+
)
29+
# FIXME remove test in 1.2
30+
def test_linear_model_normalize_deprecation_message(
31+
estimator,
32+
normalize, n_warnings, warning_category
33+
):
34+
# check that we issue a FutureWarning when normalize was set in
35+
# linear model
36+
rng = check_random_state(0)
37+
n_samples = 200
38+
n_features = 2
39+
X = rng.randn(n_samples, n_features)
40+
X[X < 0.1] = 0.0
41+
y = rng.rand(n_samples)
42+
if is_classifier(estimator):
43+
y = np.sign(y)
44+
45+
model = estimator(normalize=normalize)
46+
with pytest.warns(warning_category) as record:
47+
model.fit(X, y)
48+
# Filter record in case other unrelated warnings are raised
49+
unwanted = [r for r in record if r.category != warning_category]
50+
if len(unwanted):
51+
msg = "unexpected warnings:\n"
52+
for w in unwanted:
53+
msg += str(w)
54+
msg += "\n"
55+
raise AssertionError(msg)
56+
wanted = [r for r in record if r.category == warning_category]
57+
if warning_category is not None:
58+
assert "'normalize' was deprecated" in str(wanted[0].message)
59+
assert len(wanted) == n_warnings

0 commit comments

Comments
 (0)