MAINT remove overflow for RAND_R_MAX (#24955)

glemaitre · jjerphan · web-flow · commit 68a74272a235 · 2022-11-17T11:47:21.000+01:00
Co-authored-by: Julien Jerphanion &lt;git@jjerphan.xyz&gt;
diff --git a/sklearn/utils/_random.pxd b/sklearn/utils/_random.pxd
@@ -12,6 +12,8 @@ cdef enum:
     # Max value for our rand_r replacement (near the bottom).
     # We don't use RAND_MAX because it's different across platforms and
     # particularly tiny on Windows/MSVC.
+    # It corresponds to the maximum representable value for
+    # 32-bit signed integers (i.e. 2^31 - 1).
     RAND_R_MAX = 0x7FFFFFFF
 
 cpdef sample_without_replacement(cnp.int_t n_population,
@@ -30,14 +32,8 @@ cdef inline UINT32_t our_rand_r(UINT32_t* seed) nogil:
     seed[0] ^= <UINT32_t>(seed[0] >> 17)
     seed[0] ^= <UINT32_t>(seed[0] << 5)
 
-    # Note: we must be careful with the final line cast to np.uint32 so that
-    # the function behaves consistently across platforms.
-    #
-    # The following cast might yield different results on different platforms:
-    # wrong_cast = <UINT32_t> RAND_R_MAX + 1
-    #
-    # We can use:
-    # good_cast = <UINT32_t>(RAND_R_MAX + 1)
-    # or:
-    # cdef np.uint32_t another_good_cast = <UINT32_t>RAND_R_MAX + 1
-    return seed[0] % <UINT32_t>(RAND_R_MAX + 1)
+    # Use the modulo to make sure that we don't return a values greater than the
+    # maximum representable value for signed 32bit integers (i.e. 2^31 - 1).
+    # Note that the parenthesis are needed to avoid overflow: here
+    # RAND_R_MAX is cast to UINT32_t before 1 is added.
+    return seed[0] % ((<UINT32_t>RAND_R_MAX) + 1)