FIX check (and enforce) that estimators can accept different dtypes.

amueller · amueller · commit c2f0b31679a5 · 2015-02-04T19:47:58.000+01:00
diff --git a/sklearn/cluster/spectral.py b/sklearn/cluster/spectral.py
@@ -243,7 +243,7 @@ def spectral_clustering(affinity, n_clusters=8, n_components=None,
     This algorithm solves the normalized cut for k=2: it is a
     normalized spectral clustering.
     """
-    if not assign_labels in ('kmeans', 'discretize'):
+    if assign_labels not in ('kmeans', 'discretize'):
         raise ValueError("The 'assign_labels' parameter should be "
                          "'kmeans' or 'discretize', but '%s' was given"
                          % assign_labels)
@@ -415,7 +415,7 @@ def fit(self, X):
             OR, if affinity==`precomputed`, a precomputed affinity
             matrix of shape (n_samples, n_samples)
         """
-        X = check_array(X, accept_sparse=['csr', 'csc', 'coo'])
+        X = check_array(X, accept_sparse=['csr', 'csc', 'coo'], dtype=np.float)
         if X.shape[0] == X.shape[1] and self.affinity != "precomputed":
             warnings.warn("The spectral clustering API has changed. ``fit``"
                           "now constructs an affinity matrix from data. To use"
diff --git a/sklearn/linear_model/coordinate_descent.py b/sklearn/linear_model/coordinate_descent.py
@@ -389,7 +389,6 @@ def enet_path(X, y, l1_ratio=0.5, eps=1e-3, n_alphas=100, alphas=None,
     if selection not in ['random', 'cyclic']:
         raise ValueError("selection should be either random or cyclic.")
     random = (selection == 'random')
-    models = []
 
     if not multi_output:
         coefs = np.empty((n_features, n_alphas), dtype=np.float64)
@@ -1016,7 +1015,7 @@ def fit(self, X, y):
             # Let us not impose fortran ordering or float64 so far: it is
             # not useful for the cross-validation loop and will be done
             # by the model fitting itself
-            X = check_array(X, 'csc', copy=False)
+            X = check_array(X, 'csc', copy=False, dtype=np.float64)
             if sparse.isspmatrix(X):
                 if not np.may_share_memory(reference_to_old_X.data, X.data):
                     # X is a sparse matrix and has been copied
@@ -1418,6 +1417,7 @@ def __init__(self, l1_ratio=0.5, eps=1e-3, n_alphas=100, alphas=None,
         self.random_state = random_state
         self.selection = selection
 
+
 ###############################################################################
 # Multi Task ElasticNet and Lasso models (with joint feature selection)
 
diff --git a/sklearn/linear_model/omp.py b/sklearn/linear_model/omp.py
@@ -805,7 +805,7 @@ def fit(self, X, y):
         self : object
             returns an instance of self.
         """
-        X, y = check_X_y(X, y)
+        X, y = check_X_y(X, y, dtype=np.float)
         cv = check_cv(self.cv, X, y, classifier=False)
         max_iter = (min(max(int(0.1 * X.shape[1]), 5), X.shape[1])
                     if not self.max_iter
diff --git a/sklearn/tests/test_common.py b/sklearn/tests/test_common.py
@@ -30,6 +30,7 @@
 from sklearn.utils.estimator_checks import (
     check_parameters_default_constructible,
     check_estimator_sparse_data,
+    check_estimators_dtypes,
     check_transformer,
     check_clustering,
     check_clusterer_compute_labels_predict,
@@ -87,12 +88,14 @@ def test_non_meta_estimators():
     estimators = all_estimators(type_filter=['classifier', 'regressor',
                                              'transformer', 'cluster'])
     for name, Estimator in estimators:
+        if name not in CROSS_DECOMPOSITION + ['SelectFdr']:
+            yield check_estimators_dtypes, name, Estimator
+
         if name not in CROSS_DECOMPOSITION + ['Imputer']:
             # Test that all estimators check their input for NaN's and infs
             yield check_estimators_nan_inf, name, Estimator
 
-        if (name not in ['CCA', '_CCA', 'PLSCanonical', 'PLSRegression',
-                         'PLSSVD', 'GaussianProcess']):
+        if name not in CROSS_DECOMPOSITION + ['GaussianProcess']:
             # FIXME!
             # in particular GaussianProcess!
             yield check_estimators_overwrite_params, name, Estimator
diff --git a/sklearn/utils/estimator_checks.py b/sklearn/utils/estimator_checks.py
@@ -252,6 +252,35 @@ def _check_transformer(name, Transformer, X, y):
             assert_raises(ValueError, transformer.transform, X.T)
 
 
+def check_estimators_dtypes(name, Estimator):
+    rnd = np.random.RandomState(0)
+    X_train_32 = 4 * rnd.uniform(size=(10, 3)).astype(np.float32)
+    X_train_64 = X_train_32.astype(np.float64)
+    X_train_int_64 = X_train_32.astype(np.int64)
+    X_train_int_32 = X_train_32.astype(np.int32)
+    y = X_train_int_64[:, 0]
+    y = multioutput_estimator_convert_y_2d(name, y)
+    for X_train in [X_train_32, X_train_64, X_train_int_64, X_train_int_32]:
+        with warnings.catch_warnings(record=True):
+            estimator = Estimator()
+        set_fast_parameters(estimator)
+        set_random_state(estimator, 1)
+        if issubclass(Estimator, ClusterMixin):
+            estimator.fit(X_train)
+        else:
+            estimator.fit(X_train, y)
+
+        for method in ["predict", "transform", "decision_function",
+                       "predict_proba"]:
+            try:
+                if hasattr(estimator, method):
+                    getattr(estimator, method)(X_train)
+            except NotImplementedError:
+                # FIXME
+                # non-standard handling of ducktyping in BaggingEstimator
+                pass
+
+
 def check_estimators_nan_inf(name, Estimator):
     rnd = np.random.RandomState(0)
     X_train_finite = rnd.uniform(size=(10, 3))