Skip to content

Commit cd0634e

Browse files
committed
Use a sentinel for managing vectors' memory
StdVectorSentinel makes a proper life-cycle management for std::vectors' buffers possible. Duplication seems needed as fused types can't be used as attributes. It's possible to obtain a missing symbol (`_ZSt28__throw_bad_array_new_lengthv`) at runtime. This is unrelated to the implementation here, and there are issues reporting the problem, e.g.: cython/cython#4218. A temporary workaround: stan-dev/pystan#294 (comment)
1 parent b8fe6e1 commit cd0634e

File tree

1 file changed

+62
-25
lines changed

1 file changed

+62
-25
lines changed

sklearn/metrics/_parallel_reductions.pyx

Lines changed: 62 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -16,8 +16,10 @@ np.import_array()
1616

1717
from libc.stdlib cimport free, malloc
1818
from libcpp.vector cimport vector
19+
from cpython.object cimport PyObject
1920
from cython.operator cimport dereference as deref
2021
from cython.parallel cimport parallel, prange
22+
from cpython.ref cimport Py_INCREF
2123

2224
from scipy.sparse import issparse
2325

@@ -45,12 +47,6 @@ from ..utils._typedefs cimport ITYPE_t, DTYPE_t, DITYPE_t
4547
from ..utils._typedefs cimport ITYPECODE, DTYPECODE
4648
from ..utils._typedefs import ITYPE, DTYPE
4749

48-
# As type covariance is not supported for C++ container via Cython,
49-
# we need to redefine a fused type
50-
ctypedef fused vector_vector_DITYPE_t:
51-
vector[vector[ITYPE_t]]
52-
vector[vector[DTYPE_t]]
53-
5450
# TODO: This has been introduced in Cython 3.0, change for `libcpp.algorithm.move` once Cython 3 is used
5551
# Introduction in Cython:
5652
# https://github.com/cython/cython/blob/05059e2a9b89bf6738a7750b905057e5b1e3fe2e/Cython/Includes/libcpp/algorithm.pxd#L47
@@ -59,21 +55,63 @@ cdef extern from "<algorithm>" namespace "std" nogil:
5955

6056
######################
6157
## std::vector to np.ndarray coercion
62-
# TODO: for now using this simple solution: https://stackoverflow.com/a/23873586
63-
# A better solution would make sure of using the same allocator implementations.
64-
# Those implementations depend on the runtimes' allocator which can be different
65-
# in some configuration and thus would make the program crash.
58+
# As type covariance is not supported for C++ container via Cython,
59+
# we need to redefine fused types.
60+
ctypedef fused vector_DITYPE_t:
61+
vector[ITYPE_t]
62+
vector[DTYPE_t]
63+
64+
ctypedef fused vector_vector_DITYPE_t:
65+
vector[vector[ITYPE_t]]
66+
vector[vector[DTYPE_t]]
6667

6768
cdef extern from "numpy/arrayobject.h":
68-
void PyArray_ENABLEFLAGS(np.ndarray arr, int flags)
69+
int PyArray_SetBaseObject(np.ndarray arr, PyObject *obj) nogil except -1
70+
71+
cdef class StdVectorSentinel:
72+
pass
73+
74+
cdef class StdVectorSentinelDTYPE(StdVectoqrSentinel):
75+
cdef vector[DTYPE_t] vec
6976

70-
cdef np.ndarray[DITYPE_t, ndim=1] buffer_to_numpy_array(DITYPE_t * ptr, np.npy_intp size):
71-
""" Create a numpy ndarray given a buffer and its size. """
72-
typenum = DTYPECODE if DITYPE_t is DTYPE_t else ITYPECODE
73-
cdef np.ndarray[DITYPE_t, ndim=1] arr = np.PyArray_SimpleNewFromData(1, &size, typenum, ptr)
77+
@staticmethod
78+
cdef StdVectorSentinel create_for(vector[DTYPE_t] * vec_ptr):
79+
sentinel = StdVectorSentinelDTYPE()
80+
sentinel.vec.swap(deref(vec_ptr))
81+
return sentinel
82+
83+
cdef class StdVectorSentinelITYPE(StdVectorSentinel):
84+
cdef vector[ITYPE_t] vec
85+
86+
@staticmethod
87+
cdef StdVectorSentinel create_for(vector[ITYPE_t] * vec_ptr):
88+
sentinel = StdVectorSentinelITYPE()
89+
sentinel.vec.swap(deref(vec_ptr))
90+
return sentinel
91+
92+
93+
cdef np.ndarray vector_to_numpy_array(vector_DITYPE_t * vect_ptr):
94+
""" Create a numpy ndarray given a C++ vector.
95+
96+
This registers a Sentinel as the base object for the numpy array
97+
freeing the C++ vector when it must.
98+
"""
99+
typenum = DTYPECODE if vector_DITYPE_t is vector[DTYPE_t] else ITYPECODE
100+
cdef:
101+
np.npy_intp size = deref(vect_ptr).size()
102+
np.ndarray arr = np.PyArray_SimpleNewFromData(1, &size, typenum, deref(vect_ptr).data())
103+
StdVectorSentinel sentinel
104+
105+
if vector_DITYPE_t is vector[DTYPE_t]:
106+
sentinel = StdVectorSentinelDTYPE.create_for(vect_ptr)
107+
else:
108+
sentinel = StdVectorSentinelITYPE.create_for(vect_ptr)
74109

75110
# Makes the numpy array responsible to the life-cycle of its buffer.
76-
PyArray_ENABLEFLAGS(arr, np.NPY_OWNDATA)
111+
# A reference to the sentinel will be stolen by the call bellow,
112+
# so we increase its reference count.
113+
Py_INCREF(sentinel)
114+
PyArray_SetBaseObject(arr, <PyObject*>sentinel)
77115
return arr
78116

79117
cdef np.ndarray[object, ndim=1] _coerce_vectors_to_np_nd_arrays(vector_vector_DITYPE_t* vecs):
@@ -82,8 +120,7 @@ cdef np.ndarray[object, ndim=1] _coerce_vectors_to_np_nd_arrays(vector_vector_DI
82120
np.ndarray[object, ndim=1] np_arrays_of_np_arrays = np.empty(n, dtype=np.ndarray)
83121

84122
for i in range(n):
85-
np_arrays_of_np_arrays[i] = buffer_to_numpy_array(deref(vecs)[i].data(),
86-
deref(vecs)[i].size())
123+
np_arrays_of_np_arrays[i] = vector_to_numpy_array(&(deref(vecs)[i]))
87124

88125
return np_arrays_of_np_arrays
89126

@@ -1016,10 +1053,7 @@ cdef class RadiusNeighborhood(PairwiseDistancesReduction):
10161053
bint return_distance = False,
10171054
bint sort_results = False
10181055
):
1019-
# This won't be freed for reasons stated at their definition.
10201056
self.neigh_indices = new vector[vector[ITYPE_t]](self.n_X)
1021-
1022-
# This will be freed then solely if return_distance = False
10231057
self.neigh_distances = new vector[vector[DTYPE_t]](self.n_X)
10241058

10251059
self.sort_results = sort_results
@@ -1044,11 +1078,14 @@ cdef class RadiusNeighborhood(PairwiseDistancesReduction):
10441078
def _finalise_compute(self,
10451079
bint return_distance
10461080
):
1081+
10471082
if return_distance:
1048-
return (_coerce_vectors_to_np_nd_arrays(self.neigh_distances),
1083+
res = (_coerce_vectors_to_np_nd_arrays(self.neigh_distances),
10491084
_coerce_vectors_to_np_nd_arrays(self.neigh_indices))
1085+
else:
1086+
res = _coerce_vectors_to_np_nd_arrays(self.neigh_indices)
10501087

1051-
# We need to free the buffers here because they won't be managed
1052-
# by a numpy array then.
10531088
del self.neigh_distances
1054-
return _coerce_vectors_to_np_nd_arrays(self.neigh_indices)
1089+
del self.neigh_indices
1090+
1091+
return res

0 commit comments

Comments
 (0)