handling backward compatibility and deprecation prototype

adrinjalali · adrinjalali · commit b5c962c74c7b · 2021-11-05T18:21:41.000+01:00
diff --git a/examples/plot_metadata_routing.py b/examples/plot_metadata_routing.py
@@ -22,15 +22,18 @@
 # %%
 
 import numpy as np
+import warnings
 from sklearn.base import BaseEstimator
 from sklearn.base import ClassifierMixin
+from sklearn.base import RegressorMixin
 from sklearn.base import MetaEstimatorMixin
 from sklearn.base import TransformerMixin
 from sklearn.base import clone
 from sklearn.utils.metadata_requests import RequestType
 from sklearn.utils.metadata_requests import metadata_request_factory
 from sklearn.utils.metadata_requests import MetadataRouter
 from sklearn.utils.validation import check_is_fitted
+from sklearn.linear_model import LinearRegression
 
 N, M = 100, 4
 X = np.random.rand(N, M)
@@ -519,3 +522,103 @@ def transform(self, X, bar=None):
     ),
 )
 est.fit(X, y, foo=my_weights, bar=my_groups).predict(X[:3], bar=my_groups)
+
+# %%
+# Deprechation / Default Value Change
+# -----------------------------------
+# In this section we show how one should handle the case where a router becomes
+# also a consumer, especially when it consumes the same metadata as its
+# sub-estimator. In this case, a warning should be raised for a while, to let
+# users know the behavior is changed from previous versions.
+
+
+class MetaRegressor(MetaEstimatorMixin, RegressorMixin, BaseEstimator):
+    def __init__(self, estimator):
+        self.estimator = estimator
+
+    def fit(self, X, y, **fit_params):
+        metadata_request_factory(self).fit.validate_metadata(
+            ignore_extras=False, self_metadata=super(), kwargs=fit_params
+        )
+        fit_params_ = metadata_request_factory(self.estimator).fit.get_method_input(
+            ignore_extras=False, kwargs=fit_params
+        )
+        self.estimator_ = clone(self.estimator).fit(X, y, **fit_params_)
+
+    def get_metadata_request(self):
+        router = MetadataRouter().add(
+            self.estimator, mapping="one-to-one", overwrite=False, mask=True
+        )
+        return router.get_metadata_request()
+
+
+# %%
+# As explained above, this is now a valid usage:
+
+reg = MetaRegressor(estimator=LinearRegression().fit_requests(sample_weight=True))
+reg.fit(X, y, sample_weight=my_weights)
+
+
+# %%
+# Now imagine we further develop ``MetaRegressor`` and it now also *consumes*
+# ``sample_weight``:
+
+
+class SampledMetaRegressor(MetaEstimatorMixin, RegressorMixin, BaseEstimator):
+    __metadata_request__sample_weight = {"fit": {"sample_weight": RequestType.WARN}}
+
+    def __init__(self, estimator):
+        self.estimator = estimator
+
+    def fit(self, X, y, sample_weight=None, **fit_params):
+        if sample_weight is not None:
+            fit_params["sample_weight"] = sample_weight
+        metadata_request_factory(self).fit.validate_metadata(
+            ignore_extras=False, self_metadata=super(), kwargs=fit_params
+        )
+        estimator_fit_params = metadata_request_factory(
+            self.estimator
+        ).fit.get_method_input(ignore_extras=True, kwargs=fit_params)
+        self.estimator_ = clone(self.estimator).fit(X, y, **estimator_fit_params)
+
+    def get_metadata_request(self):
+        router = (
+            MetadataRouter()
+            .add(super(), mapping="one-to-one", overwrite=False, mask=False)
+            .add(self.estimator, mapping="one-to-one", overwrite="smart", mask=True)
+        )
+        return router.get_metadata_request()
+
+
+# %%
+# The above implementation is almost no different than ``MetaRegressor``, and
+# because of the default request value defined in `__metadata_request__sample_weight``
+# there is a warning raised.
+
+with warnings.catch_warnings(record=True) as record:
+    SampledMetaRegressor(
+        estimator=LinearRegression().fit_requests(sample_weight=False)
+    ).fit(X, y, sample_weight=my_weights)
+for w in record:
+    print(w.message)
+
+
+# %%
+# When an estimator suports a metadata which wasn't supported before, the
+# following pattern can be used to warn the users about it.
+
+
+class ExampleRegressor(RegressorMixin, BaseEstimator):
+    __metadata_request__sample_weight = {"fit": {"sample_weight": RequestType.WARN}}
+
+    def fit(self, X, y, sample_weight=None):
+        return self
+
+    def predict(self, X):
+        return np.zeros(shape=(len(X)))
+
+
+with warnings.catch_warnings(record=True) as record:
+    MetaRegressor(estimator=ExampleRegressor()).fit(X, y, sample_weight=my_weights)
+for w in record:
+    print(w.message)
diff --git a/sklearn/utils/metadata_requests.py b/sklearn/utils/metadata_requests.py
@@ -2,6 +2,7 @@
 from enum import Enum
 from collections import defaultdict
 from typing import Union, Optional
+from warnings import warn
 from ..externals._sentinels import sentinel  # type: ignore # mypy error!!!
 
 
@@ -13,6 +14,11 @@ class RequestType(Enum):
     # that a metadata is not present even though it may be present in the
     # corresponding method's signature.
     UNUSED = sentinel("UNUSED")
+    # this sentinel is used whenever a default value is changed, and therefore
+    # the user should explicitly set the value, otherwise a warning is shown.
+    # An example is when a meta-estimator is only a router, but then becomes
+    # also a consumer.
+    WARN = sentinel("WARN")
 
 
 # this sentinel is the default used in `{method}_requests` methods to indicate
@@ -173,13 +179,14 @@ def add_request(
             if alias == RequestType.REQUESTED and current in {
                 RequestType.ERROR_IF_PASSED,
                 RequestType.UNREQUESTED,
+                RequestType.WARN,
             }:
                 self.requests[prop] = alias
-            elif (
-                alias == RequestType.UNREQUESTED
-                and current == RequestType.ERROR_IF_PASSED
-            ):
-                self.requests[prop] = alias
+            elif alias in {RequestType.UNREQUESTED, RequestType.WARN} and current in {
+                RequestType.ERROR_IF_PASSED,
+                RequestType.WARN,
+            }:
+                self.requests[prop] = RequestType.UNREQUESTED
         elif self.requests[prop] != alias:
             raise ValueError(
                 f"{prop} is already requested as {self.requests[prop]}, "
@@ -264,6 +271,17 @@ def validate_metadata(self, ignore_extras=False, self_metadata=None, kwargs=None
         self_metadata = getattr(
             metadata_request_factory(self_metadata), self.name
         ).requests
+        warn_metadata = [k for k, v in self_metadata.items() if v == RequestType.WARN]
+        warn_kwargs = [k for k in kwargs.keys() if k in warn_metadata]
+        if warn_kwargs:
+            warn(
+                "The following metadata are provided, which are now supported by this "
+                f"class: {warn_kwargs}. These metadata were not processed in previous "
+                "versions. Set their requested value to RequestType.UNREQUESTED "
+                "to maintain previous behavior, or to RequestType.REQUESTED to "
+                "consume and use the metadata.",
+                UserWarning,
+            )
         # we then remove self metadata from kwargs, since they should not be
         # validated.
         kwargs = {v: k for v, k in kwargs.items() if v not in self_metadata}
@@ -324,7 +342,15 @@ def get_method_input(self, ignore_extras=False, kwargs=None):
             if not isinstance(alias, str):
                 alias = RequestType(alias)
 
-            if alias == RequestType.UNREQUESTED:
+            if alias == RequestType.WARN:
+                warn(
+                    f"Support for {prop} has recently been added to this class. "
+                    "To maintain backward compatibility, it is ignored now. "
+                    "You can set the request value to RequestType.UNREQUESTED "
+                    "to silence this warning, or to RequestType.REQUESTED to "
+                    "consume and use the metadata."
+                )
+            elif alias == RequestType.UNREQUESTED:
                 continue
             elif alias == RequestType.REQUESTED and prop in args:
                 res[prop] = args[prop]