ENH Support float32 in SGDClassifier and SGDRegressor (#25587)

OmarManzoor · web-flow · commit 1a669d8202b1 · 2023-02-27T16:26:11.000-05:00
diff --git a/.gitignore b/.gitignore
@@ -85,6 +85,7 @@ sklearn/utils/_seq_dataset.pxd
 sklearn/utils/_weight_vector.pyx
 sklearn/utils/_weight_vector.pxd
 sklearn/linear_model/_sag_fast.pyx
+sklearn/linear_model/_sgd_fast.pyx
 sklearn/metrics/_dist_metrics.pyx
 sklearn/metrics/_dist_metrics.pxd
 sklearn/metrics/_pairwise_distances_reduction/_argkmin.pxd
diff --git a/doc/whats_new/v1.3.rst b/doc/whats_new/v1.3.rst
@@ -159,6 +159,13 @@ Changelog
 - |Enhancement| Added the parameter `fill_value` to :class:`impute.IterativeImputer`.
   :pr:`25232` by :user:`Thijs van Weezel <ValueInvestorThijs>`.
 
+:mod:`sklearn.linear_model`
+...........................
+
+- |Enhancement| :class:`SGDClassifier`, :class:`SGDRegressor` and
+  :class:`SGDOneClassSVM` now preserve dtype for `numpy.float32`.
+  :pr:`25587` by :user:`Omar Salman <OmarManzoor>`
+
 :mod:`sklearn.metrics`
 ......................
 
diff --git a/setup.py b/setup.py
@@ -309,7 +309,7 @@ def check_package_status(package, min_version):
     ],
     "linear_model": [
         {"sources": ["_cd_fast.pyx"], "include_np": True},
-        {"sources": ["_sgd_fast.pyx"], "include_np": True},
+        {"sources": ["_sgd_fast.pyx.tp"], "include_np": True},
         {"sources": ["_sag_fast.pyx.tp"], "include_np": True},
     ],
     "manifold": [
diff --git a/sklearn/linear_model/_sgd_fast.pyx.tp b/sklearn/linear_model/_sgd_fast.pyx.tp
@@ -1,22 +1,50 @@
-# Author: Peter Prettenhofer <peter.prettenhofer@gmail.com>
-#         Mathieu Blondel (partial_fit support)
-#         Rob Zinkov (passive-aggressive)
-#         Lars Buitinck
-#
-# License: BSD 3 clause
+{{py:
 
+"""
+Template file to easily generate fused types consistent code using Tempita
+(https://github.com/cython/cython/blob/master/Cython/Tempita/_tempita.py).
 
+Generated file: _sgd_fast.pyx
+
+Each relevant function is duplicated for the dtypes float and double.
+The keywords between double braces are substituted in setup.py.
+
+Authors: Peter Prettenhofer <peter.prettenhofer@gmail.com>
+         Mathieu Blondel (partial_fit support)
+         Rob Zinkov (passive-aggressive)
+         Lars Buitinck
+
+License: BSD 3 clause
+"""
+
+# The dtypes are defined as follows (name_suffix, c_type, np_type)
+dtypes = [
+    ("64", "double", "np.float64"),
+    ("32", "float", "np.float32"),
+]
+
+}}
+
+#------------------------------------------------------------------------------
+
+"""
+SGD implementation
+WARNING: Do not edit .pyx file directly, it is generated from .pyx.tp
+"""
+
+from cython cimport floating
 import numpy as np
 from time import time
 
 from libc.math cimport exp, log, pow, fabs
 cimport numpy as cnp
 from numpy.math cimport INFINITY
 cdef extern from "_sgd_fast_helpers.h":
-    bint skl_isfinite(double) nogil
+    bint skl_isfinite32(float) nogil
+    bint skl_isfinite64(double) nogil
 
-from ..utils._weight_vector cimport WeightVector64 as WeightVector
-from ..utils._seq_dataset cimport SequentialDataset64 as SequentialDataset
+from ..utils._weight_vector cimport WeightVector32, WeightVector64
+from ..utils._seq_dataset cimport SequentialDataset32, SequentialDataset64
 
 cnp.import_array()
 
@@ -372,37 +400,48 @@ cdef class SquaredEpsilonInsensitive(Regression):
     def __reduce__(self):
         return SquaredEpsilonInsensitive, (self.epsilon,)
 
-
-def _plain_sgd(const double[::1] weights,
-               double intercept,
-               const double[::1] average_weights,
-               double average_intercept,
-               LossFunction loss,
-               int penalty_type,
-               double alpha, double C,
-               double l1_ratio,
-               SequentialDataset dataset,
-               const unsigned char[::1] validation_mask,
-               bint early_stopping, validation_score_cb,
-               int n_iter_no_change,
-               unsigned int max_iter, double tol, int fit_intercept,
-               int verbose, bint shuffle, cnp.uint32_t seed,
-               double weight_pos, double weight_neg,
-               int learning_rate, double eta0,
-               double power_t,
-               bint one_class,
-               double t=1.0,
-               double intercept_decay=1.0,
-               int average=0):
+{{for name_suffix, c_type, np_type in dtypes}}
+
+def _plain_sgd{{name_suffix}}(
+    const {{c_type}}[::1] weights,
+    double intercept,
+    const {{c_type}}[::1] average_weights,
+    double average_intercept,
+    LossFunction loss,
+    int penalty_type,
+    double alpha,
+    double C,
+    double l1_ratio,
+    SequentialDataset{{name_suffix}} dataset,
+    const unsigned char[::1] validation_mask,
+    bint early_stopping,
+    validation_score_cb,
+    int n_iter_no_change,
+    unsigned int max_iter,
+    double tol,
+    int fit_intercept,
+    int verbose,
+    bint shuffle,
+    cnp.uint32_t seed,
+    double weight_pos,
+    double weight_neg,
+    int learning_rate,
+    double eta0,
+    double power_t,
+    bint one_class,
+    double t=1.0,
+    double intercept_decay=1.0,
+    int average=0,
+):
     """SGD for generic loss functions and penalties with optional averaging
 
     Parameters
     ----------
-    weights : ndarray[double, ndim=1]
+    weights : ndarray[{{c_type}}, ndim=1]
         The allocated vector of weights.
     intercept : double
         The initial intercept.
-    average_weights : ndarray[double, ndim=1]
+    average_weights : ndarray[{{c_type}}, ndim=1]
         The average weights as computed for ASGD. Should be None if average
         is 0.
     average_intercept : double
@@ -489,8 +528,8 @@ def _plain_sgd(const double[::1] weights,
     cdef Py_ssize_t n_samples = dataset.n_samples
     cdef Py_ssize_t n_features = weights.shape[0]
 
-    cdef WeightVector w = WeightVector(weights, average_weights)
-    cdef double *x_data_ptr = NULL
+    cdef WeightVector{{name_suffix}} w = WeightVector{{name_suffix}}(weights, average_weights)
+    cdef {{c_type}} *x_data_ptr = NULL
     cdef int *x_ind_ptr = NULL
 
     # helper variables
@@ -505,9 +544,9 @@ def _plain_sgd(const double[::1] weights,
     cdef double score = 0.0
     cdef double best_loss = INFINITY
     cdef double best_score = -INFINITY
-    cdef double y = 0.0
-    cdef double sample_weight
-    cdef double class_weight = 1.0
+    cdef {{c_type}} y = 0.0
+    cdef {{c_type}} sample_weight
+    cdef {{c_type}} class_weight = 1.0
     cdef unsigned int count = 0
     cdef unsigned int train_count = n_samples - np.sum(validation_mask)
     cdef unsigned int epoch = 0
@@ -520,10 +559,10 @@ def _plain_sgd(const double[::1] weights,
     cdef long long sample_index
 
     # q vector is only used for L1 regularization
-    cdef double[::1] q = None
-    cdef double * q_data_ptr = NULL
+    cdef {{c_type}}[::1] q = None
+    cdef {{c_type}} * q_data_ptr = NULL
     if penalty_type == L1 or penalty_type == ELASTICNET:
-        q = np.zeros((n_features,), dtype=np.float64, order="c")
+        q = np.zeros((n_features,), dtype={{np_type}}, order="c")
         q_data_ptr = &q[0]
     cdef double u = 0.0
 
@@ -627,7 +666,7 @@ def _plain_sgd(const double[::1] weights,
 
                 if penalty_type == L1 or penalty_type == ELASTICNET:
                     u += (l1_ratio * eta * alpha)
-                    l1penalty(w, q_data_ptr, x_ind_ptr, xnnz, u)
+                    l1penalty{{name_suffix}}(w, q_data_ptr, x_ind_ptr, xnnz, u)
 
                 t += 1
                 count += 1
@@ -694,15 +733,28 @@ def _plain_sgd(const double[::1] weights,
         epoch + 1
     )
 
+{{endfor}}
+
+
+cdef inline bint skl_isfinite(floating w) noexcept nogil:
+    if floating is float:
+        return skl_isfinite32(w)
+    else:
+        return skl_isfinite64(w)
 
-cdef bint any_nonfinite(const double *w, int n) noexcept nogil:
+
+cdef inline bint any_nonfinite(const floating *w, int n) noexcept nogil:
     for i in range(n):
         if not skl_isfinite(w[i]):
             return True
     return 0
 
 
-cdef double sqnorm(double * x_data_ptr, int * x_ind_ptr, int xnnz) noexcept nogil:
+cdef inline double sqnorm(
+    floating * x_data_ptr,
+    int * x_ind_ptr,
+    int xnnz,
+) noexcept nogil:
     cdef double x_norm = 0.0
     cdef int j
     cdef double z
@@ -712,8 +764,15 @@ cdef double sqnorm(double * x_data_ptr, int * x_ind_ptr, int xnnz) noexcept nogi
     return x_norm
 
 
-cdef void l1penalty(WeightVector w, double * q_data_ptr,
-                    int *x_ind_ptr, int xnnz, double u) noexcept nogil:
+{{for name_suffix, c_type, np_type in dtypes}}
+
+cdef void l1penalty{{name_suffix}}(
+    WeightVector{{name_suffix}} w,
+    {{c_type}} * q_data_ptr,
+    int *x_ind_ptr,
+    int xnnz,
+    double u,
+) noexcept nogil:
     """Apply the L1 penalty to each updated feature
 
     This implements the truncated gradient approach by
@@ -723,7 +782,7 @@ cdef void l1penalty(WeightVector w, double * q_data_ptr,
     cdef int j = 0
     cdef int idx = 0
     cdef double wscale = w.wscale
-    cdef double *w_data_ptr = w.w_data_ptr
+    cdef {{c_type}} *w_data_ptr = w.w_data_ptr
     for j in range(xnnz):
         idx = x_ind_ptr[j]
         z = w_data_ptr[idx]
@@ -736,3 +795,5 @@ cdef void l1penalty(WeightVector w, double * q_data_ptr,
                 0.0, w_data_ptr[idx] + ((u - q_data_ptr[idx]) / wscale))
 
         q_data_ptr[idx] += wscale * (w_data_ptr[idx] - z)
+
+{{endfor}}
diff --git a/sklearn/linear_model/_stochastic_gradient.py b/sklearn/linear_model/_stochastic_gradient.py
diff --git a/sklearn/linear_model/tests/test_sgd.py b/sklearn/linear_model/tests/test_sgd.py