DEP Deprecate 'normalize' in ridge models (#17772)

maikia · ogrisel · agramfort · web-flow · commit 9cfacf1540a9 · 2021-04-06T21:33:28.000+02:00
Co-authored-by: Olivier Grisel &lt;olivier.grisel@ensta.org&gt;
Co-authored-by: Alexandre Gramfort &lt;alexandre.gramfort@m4x.org&gt;
diff --git a/doc/whats_new/v1.0.rst b/doc/whats_new/v1.0.rst
@@ -193,12 +193,18 @@ Changelog
   Motivation for this deprecation: ``normalize`` parameter did not take any
   effect if ``fit_intercept`` was set to False and therefore was deemed
   confusing.
-  The behavior of the deprecated LinearRegression(normalize=True) can be
+  The behavior of the deprecated LinearModel(normalize=True) can be
   reproduced with :class:`~sklearn.pipeline.Pipeline` with
-  :class:`~sklearn.preprocessing.StandardScaler`as follows:
-  make_pipeline(StandardScaler(with_mean=False), LinearRegression()).
+  :class:`~sklearn.preprocessing.LinearModel` (where LinearModel is
+  LinearRegression, Ridge, RidgeClassifier, RidgeCV or RidgeClassifierCV) as
+  follows:
+  make_pipeline(StandardScaler(with_mean=False), LinearModel()).
+  LinearRegression was deprecated in:
   :pr:`17743` by :user:`Maria Telenczuk <maikia>` and
   :user:`Alexandre Gramfort <agramfort>`.
+  Ridge, RidgeClassifier, RidgeCV or RidgeClassifierCV were deprecated in:
+  :pr:`17772` by :user:`Maria Telenczuk <maikia>` and
+  :user:`Alexandre Gramfort <agramfort>`.
 
 - |Fix|: `sample_weight` are now fully taken into account in linear models
   when `normalize=True` for both feature centering and feature
diff --git a/examples/linear_model/plot_huber_vs_ridge.py b/examples/linear_model/plot_huber_vs_ridge.py
@@ -43,15 +43,15 @@
 colors = ['r-', 'b-', 'y-', 'm-']
 
 x = np.linspace(X.min(), X.max(), 7)
-epsilon_values = [1.35, 1.5, 1.75, 1.9]
+epsilon_values = [1, 1.5, 1.75, 1.9]
 for k, epsilon in enumerate(epsilon_values):
     huber = HuberRegressor(alpha=0.0, epsilon=epsilon)
     huber.fit(X, y)
     coef_ = huber.coef_ * x + huber.intercept_
     plt.plot(x, coef_, colors[k], label="huber loss, %s" % epsilon)
 
 # Fit a ridge regressor to compare it to huber regressor.
-ridge = Ridge(alpha=0.0, random_state=0, normalize=True)
+ridge = Ridge(alpha=0.0, random_state=0)
 ridge.fit(X, y)
 coef_ridge = ridge.coef_
 coef_ = ridge.coef_ * x + ridge.intercept_
diff --git a/sklearn/linear_model/_base.py b/sklearn/linear_model/_base.py
@@ -101,38 +101,59 @@ def _deprecate_normalize(normalize, default, estimator_name):
     else:
         _normalize = normalize
 
+    pipeline_msg = (
+        "If you wish to scale the data, use Pipeline with a StandardScaler "
+        "in a preprocessing stage. To reproduce the previous behavior:\n\n"
+        "from sklearn.pipeline import make_pipeline\n\n"
+        "model = make_pipeline(StandardScaler(with_mean=False), "
+        f"{estimator_name}())\n\n"
+        "If you wish to pass a sample_weight parameter, you need to pass it "
+        "as a fit parameter to each step of the pipeline as follows:\n\n"
+        "kwargs = {s[0] + '__sample_weight': sample_weight for s "
+        "in model.steps}\n"
+        "model.fit(X, y, **kwargs)\n\n"
+    )
+
+    if estimator_name == 'Ridge' or estimator_name == 'RidgeClassifier':
+        alpha_msg = 'Set parameter alpha to: original_alpha * n_samples. '
+    elif 'Lasso' in estimator_name:
+        alpha_msg = (
+            'Set parameter alpha to: original_alpha * np.sqrt(n_samples). '
+        )
+    elif 'ElasticNet' in estimator_name:
+        alpha_msg = (
+            'Set parameter alpha to original_alpha * np.sqrt(n_samples) if '
+            'l1_ratio is 1, and to original_alpha * n_samples if l1_ratio is '
+            '0. For other values of l1_ratio, no analytic formula is '
+            'available.'
+        )
+    elif estimator_name == 'RidgeCV' or estimator_name == 'RidgeClassifierCV':
+        alpha_msg = 'Set parameter alphas to: original_alphas * n_samples. '
+    else:
+        alpha_msg = ""
+
     if default and normalize == 'deprecated':
         warnings.warn(
             "The default of 'normalize' will be set to False in version 1.2 "
-            "and deprecated in version 1.4. \nPass normalize=False and use "
-            "Pipeline with a StandardScaler in a preprocessing stage if you "
-            "wish to reproduce the previous behavior:\n"
-            "model = make_pipeline(StandardScaler(with_mean=False), \n"
-            f"{estimator_name}(normalize=False))\n"
-            "If you wish to use additional parameters in "
-            "the fit() you can include them as follows:\n"
-            "kwargs = {model.steps[-1][0] + "
-            "'__<your_param_name>': <your_param_value>}\n"
-            "model.fit(X, y, **kwargs)", FutureWarning
+            "and deprecated in version 1.4.\n" +
+            pipeline_msg + alpha_msg,
+            FutureWarning
         )
     elif normalize != 'deprecated' and normalize and not default:
         warnings.warn(
             "'normalize' was deprecated in version 1.0 and will be "
-            "removed in 1.2 \nIf you still wish to normalize use "
-            "Pipeline with a StandardScaler in a preprocessing stage if you "
-            "wish to reproduce the previous behavior:\n"
-            "model = make_pipeline(StandardScaler(with_mean=False), "
-            f"{estimator_name}()). \nIf you wish to use additional "
-            "parameters in the fit() you can include them as follows: "
-            "kwargs = {model.steps[-1][0] + "
-            "'__<your_param_name>': <your_param_value>}\n"
-            "model.fit(X, y, **kwargs)", FutureWarning
+            "removed in 1.2.\n" +
+            pipeline_msg + alpha_msg, FutureWarning
         )
     elif not normalize and not default:
         warnings.warn(
-            "'normalize' was deprecated in version 1.0 and will be"
-            " removed in 1.2 Don't set 'normalize' parameter"
-            " and leave it to its default value", FutureWarning
+            "'normalize' was deprecated in version 1.0 and will be "
+            "removed in 1.2. "
+            "Please leave the normalize parameter to its default value to "
+            "silence this warning. The default behavior of this estimator "
+            "is to not do any normalization. If normalization is needed "
+            "please use sklearn.preprocessing.StandardScaler instead.",
+            FutureWarning
         )
 
     return _normalize
diff --git a/sklearn/linear_model/_glm/tests/test_glm.py b/sklearn/linear_model/_glm/tests/test_glm.py
@@ -294,6 +294,8 @@ def test_warm_start(fit_intercept):
     assert_allclose(glm1.score(X, y), glm2.score(X, y), rtol=1e-4)
 
 
+# FIXME: 'normalize' to be removed in 1.2 in LinearRegression
+@pytest.mark.filterwarnings("ignore:'normalize' was deprecated")
 @pytest.mark.parametrize('n_samples, n_features', [(100, 10), (10, 100)])
 @pytest.mark.parametrize('fit_intercept', [True, False])
 @pytest.mark.parametrize('sample_weight', [None, True])
diff --git a/sklearn/linear_model/_ridge.py b/sklearn/linear_model/_ridge.py
@@ -17,7 +17,8 @@
 from scipy import sparse
 from scipy.sparse import linalg as sp_linalg
 
-from ._base import LinearClassifierMixin, LinearModel, _rescale_data
+from ._base import LinearClassifierMixin, LinearModel
+from ._base import _deprecate_normalize, _rescale_data
 from ._sag import sag_solver
 from ..base import RegressorMixin, MultiOutputMixin, is_classifier
 from ..utils.extmath import safe_sparse_dot
@@ -521,9 +522,9 @@ def _ridge_regression(X, y, alpha, sample_weight=None, solver='auto',
 class _BaseRidge(LinearModel, metaclass=ABCMeta):
     @abstractmethod
     @_deprecate_positional_args
-    def __init__(self, alpha=1.0, *, fit_intercept=True, normalize=False,
-                 copy_X=True, max_iter=None, tol=1e-3, solver="auto",
-                 random_state=None):
+    def __init__(self, alpha=1.0, *, fit_intercept=True,
+                 normalize='deprecated', copy_X=True, max_iter=None, tol=1e-3,
+                 solver="auto", random_state=None):
         self.alpha = alpha
         self.fit_intercept = fit_intercept
         self.normalize = normalize
@@ -535,7 +536,11 @@ def __init__(self, alpha=1.0, *, fit_intercept=True, normalize=False,
 
     def fit(self, X, y, sample_weight=None):
 
-        # all other solvers work at both float precision levels
+        self._normalize = _deprecate_normalize(
+            self.normalize, default=False,
+            estimator_name=self.__class__.__name__
+        )
+
         _dtype = [np.float64, np.float32]
         _accept_sparse = _get_valid_accept_sparse(sparse.issparse(X),
                                                   self.solver)
@@ -570,7 +575,7 @@ def fit(self, X, y, sample_weight=None):
 
         # when X is sparse we only remove offset from y
         X, y, X_offset, y_offset, X_scale = self._preprocess_data(
-            X, y, self.fit_intercept, self.normalize, self.copy_X,
+            X, y, self.fit_intercept, self._normalize, self.copy_X,
             sample_weight=sample_weight, return_mean=True)
 
         if solver == 'sag' and sparse.issparse(X) and self.fit_intercept:
@@ -640,6 +645,10 @@ class Ridge(MultiOutputMixin, RegressorMixin, _BaseRidge):
         :class:`~sklearn.preprocessing.StandardScaler` before calling ``fit``
         on an estimator with ``normalize=False``.
 
+        .. deprecated:: 1.0
+            ``normalize`` was deprecated in version 1.0 and
+            will be removed in 1.2.
+
     copy_X : bool, default=True
         If True, X will be copied; else, it may be overwritten.
 
@@ -731,9 +740,9 @@ class Ridge(MultiOutputMixin, RegressorMixin, _BaseRidge):
     Ridge()
     """
     @_deprecate_positional_args
-    def __init__(self, alpha=1.0, *, fit_intercept=True, normalize=False,
-                 copy_X=True, max_iter=None, tol=1e-3, solver="auto",
-                 random_state=None):
+    def __init__(self, alpha=1.0, *, fit_intercept=True,
+                 normalize='deprecated', copy_X=True, max_iter=None, tol=1e-3,
+                 solver="auto", random_state=None):
         super().__init__(
             alpha=alpha, fit_intercept=fit_intercept,
             normalize=normalize, copy_X=copy_X,
@@ -794,6 +803,10 @@ class RidgeClassifier(LinearClassifierMixin, _BaseRidge):
         :class:`~sklearn.preprocessing.StandardScaler` before calling ``fit``
         on an estimator with ``normalize=False``.
 
+        .. deprecated:: 1.0
+            ``normalize`` was deprecated in version 1.0 and
+            will be removed in 1.2.
+
     copy_X : bool, default=True
         If True, X will be copied; else, it may be overwritten.
 
@@ -889,9 +902,10 @@ class RidgeClassifier(LinearClassifierMixin, _BaseRidge):
     0.9595...
     """
     @_deprecate_positional_args
-    def __init__(self, alpha=1.0, *, fit_intercept=True, normalize=False,
-                 copy_X=True, max_iter=None, tol=1e-3, class_weight=None,
-                 solver="auto", random_state=None):
+    def __init__(self, alpha=1.0, *, fit_intercept=True,
+                 normalize='deprecated', copy_X=True, max_iter=None,
+                 tol=1e-3, class_weight=None, solver="auto",
+                 random_state=None):
         super().__init__(
             alpha=alpha, fit_intercept=fit_intercept, normalize=normalize,
             copy_X=copy_X, max_iter=max_iter, tol=tol, solver=solver,
@@ -1115,7 +1129,7 @@ class _RidgeGCV(LinearModel):
     """
     @_deprecate_positional_args
     def __init__(self, alphas=(0.1, 1.0, 10.0), *,
-                 fit_intercept=True, normalize=False,
+                 fit_intercept=True, normalize='deprecated',
                  scoring=None, copy_X=True,
                  gcv_mode=None, store_cv_values=False,
                  is_clf=False, alpha_per_target=False):
@@ -1451,6 +1465,11 @@ def fit(self, X, y, sample_weight=None):
         -------
         self : object
         """
+        _normalize = _deprecate_normalize(
+            self.normalize, default=False,
+            estimator_name=self.__class__.__name__
+        )
+
         X, y = self._validate_data(X, y, accept_sparse=['csr', 'csc', 'coo'],
                                    dtype=[np.float64],
                                    multi_output=True, y_numeric=True)
@@ -1470,7 +1489,7 @@ def fit(self, X, y, sample_weight=None):
                 "negative or null value instead.".format(self.alphas))
 
         X, y, X_offset, y_offset, X_scale = LinearModel._preprocess_data(
-            X, y, self.fit_intercept, self.normalize, self.copy_X,
+            X, y, self.fit_intercept, _normalize, self.copy_X,
             sample_weight=sample_weight)
 
         gcv_mode = _check_gcv_mode(X, self.gcv_mode)
@@ -1584,7 +1603,7 @@ def fit(self, X, y, sample_weight=None):
 class _BaseRidgeCV(LinearModel):
     @_deprecate_positional_args
     def __init__(self, alphas=(0.1, 1.0, 10.0), *,
-                 fit_intercept=True, normalize=False, scoring=None,
+                 fit_intercept=True, normalize='deprecated', scoring=None,
                  cv=None, gcv_mode=None, store_cv_values=False,
                  alpha_per_target=False):
         self.alphas = np.asarray(alphas)
@@ -1699,6 +1718,10 @@ class RidgeCV(MultiOutputMixin, RegressorMixin, _BaseRidgeCV):
         :class:`~sklearn.preprocessing.StandardScaler` before calling ``fit``
         on an estimator with ``normalize=False``.
 
+        .. deprecated:: 1.0
+            ``normalize`` was deprecated in version 1.0 and will be removed in
+            1.2.
+
     scoring : string, callable, default=None
         A string (see model evaluation documentation) or
         a scorer callable object / function with signature
@@ -1828,6 +1851,10 @@ class RidgeClassifierCV(LinearClassifierMixin, _BaseRidgeCV):
         :class:`~sklearn.preprocessing.StandardScaler` before calling ``fit``
         on an estimator with ``normalize=False``.
 
+        .. deprecated:: 1.0
+            ``normalize`` was deprecated in version 1.0 and
+            will be removed in 1.2.
+
     scoring : string, callable, default=None
         A string (see model evaluation documentation) or
         a scorer callable object / function with signature
@@ -1911,8 +1938,8 @@ class RidgeClassifierCV(LinearClassifierMixin, _BaseRidgeCV):
     """
     @_deprecate_positional_args
     def __init__(self, alphas=(0.1, 1.0, 10.0), *, fit_intercept=True,
-                 normalize=False, scoring=None, cv=None, class_weight=None,
-                 store_cv_values=False):
+                 normalize='deprecated', scoring=None, cv=None,
+                 class_weight=None, store_cv_values=False):
         super().__init__(
             alphas=alphas, fit_intercept=fit_intercept, normalize=normalize,
             scoring=scoring, cv=cv, store_cv_values=store_cv_values)
diff --git a/sklearn/linear_model/tests/test_base.py b/sklearn/linear_model/tests/test_base.py
@@ -159,7 +159,6 @@ def test_error_on_wrong_normalize():
     error_msg = "Leave 'normalize' to its default"
     with pytest.raises(ValueError, match=error_msg):
         _deprecate_normalize(normalize, default, 'estimator')
-    ValueError
 
 
 @pytest.mark.parametrize('normalize', [True, False, 'deprecated'])
@@ -222,33 +221,6 @@ def test_linear_regression_sparse(random_state=0):
         assert_array_almost_equal(ols.predict(X) - y.ravel(), 0)
 
 
-@pytest.mark.parametrize(
-    'normalize, n_warnings, warning',
-    [(True, 1, FutureWarning),
-     (False, 1, FutureWarning),
-     ("deprecated", 0, None)]
-)
-# FIXME remove test in 1.4
-def test_linear_regression_normalize_deprecation(
-     normalize, n_warnings, warning
-):
-    # check that we issue a FutureWarning when normalize was set in
-    # LinearRegression
-    rng = check_random_state(0)
-    n_samples = 200
-    n_features = 2
-    X = rng.randn(n_samples, n_features)
-    X[X < 0.1] = 0.0
-    y = rng.rand(n_samples)
-
-    model = LinearRegression(normalize=normalize)
-    with pytest.warns(warning) as record:
-        model.fit(X, y)
-    assert len(record) == n_warnings
-    if n_warnings:
-        assert "'normalize' was deprecated" in str(record[0].message)
-
-
 # FIXME: 'normalize' to be removed in 1.2 in LinearRegression
 @pytest.mark.filterwarnings("ignore:'normalize' was deprecated")
 @pytest.mark.parametrize('normalize', [True, False])
diff --git a/sklearn/linear_model/tests/test_common.py b/sklearn/linear_model/tests/test_common.py
@@ -0,0 +1,59 @@
+# Author: Maria Telenczuk <https://github.com/maikia>
+#
+# License: BSD 3 clause
+
+import pytest
+
+import numpy as np
+
+from sklearn.base import is_classifier
+from sklearn.linear_model import LinearRegression
+from sklearn.linear_model import Ridge
+from sklearn.linear_model import RidgeCV
+from sklearn.linear_model import RidgeClassifier
+from sklearn.linear_model import RidgeClassifierCV
+
+from sklearn.utils import check_random_state
+
+
+@pytest.mark.parametrize(
+    'normalize, n_warnings, warning_category',
+    [(True, 1, FutureWarning),
+     (False, 1, FutureWarning),
+     ("deprecated", 0, None)]
+)
+@pytest.mark.parametrize(
+    "estimator",
+    [LinearRegression, Ridge, RidgeCV, RidgeClassifier, RidgeClassifierCV]
+)
+# FIXME remove test in 1.2
+def test_linear_model_normalize_deprecation_message(
+     estimator,
+     normalize, n_warnings, warning_category
+):
+    # check that we issue a FutureWarning when normalize was set in
+    # linear model
+    rng = check_random_state(0)
+    n_samples = 200
+    n_features = 2
+    X = rng.randn(n_samples, n_features)
+    X[X < 0.1] = 0.0
+    y = rng.rand(n_samples)
+    if is_classifier(estimator):
+        y = np.sign(y)
+
+    model = estimator(normalize=normalize)
+    with pytest.warns(warning_category) as record:
+        model.fit(X, y)
+    # Filter record in case other unrelated warnings are raised
+    unwanted = [r for r in record if r.category != warning_category]
+    if len(unwanted):
+        msg = "unexpected warnings:\n"
+        for w in unwanted:
+            msg += str(w)
+            msg += "\n"
+        raise AssertionError(msg)
+    wanted = [r for r in record if r.category == warning_category]
+    if warning_category is not None:
+        assert "'normalize' was deprecated" in str(wanted[0].message)
+    assert len(wanted) == n_warnings
diff --git a/sklearn/linear_model/tests/test_coordinate_descent.py b/sklearn/linear_model/tests/test_coordinate_descent.py
diff --git a/sklearn/linear_model/tests/test_ridge.py b/sklearn/linear_model/tests/test_ridge.py