Skip to content

Commit 0edbff8

Browse files
committed
FIX & TST at least min_samples to be considered a core sample
1 parent 7ad8b57 commit 0edbff8

File tree

2 files changed

+30
-1
lines changed

2 files changed

+30
-1
lines changed

sklearn/cluster/dbscan_.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -142,7 +142,7 @@ def dbscan(X, eps=0.5, min_samples=5, metric='minkowski',
142142
labels = -np.ones(X.shape[0], dtype=np.intp)
143143

144144
# A list of all core samples found.
145-
core_samples = np.asarray(n_neighbors > min_samples, dtype=np.uint8)
145+
core_samples = np.asarray(n_neighbors >= min_samples, dtype=np.uint8)
146146
dbscan_inner(core_samples, neighborhoods, labels)
147147
return np.where(core_samples)[0], labels
148148

sklearn/cluster/tests/test_dbscan.py

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -259,3 +259,32 @@ def test_weighted_dbscan():
259259
assert_array_equal(core1, core5)
260260
assert_array_equal(label1, label5)
261261
assert_array_equal(label1, est.labels_)
262+
263+
264+
def test_dbscan_core_samples_toy():
265+
X = [[0], [2], [3], [4], [6], [8], [10]]
266+
n_samples = len(X)
267+
268+
# Degenerate case: every sample is a core sample, either with it's own
269+
# cluster or including close core samples.
270+
core_samples, labels = dbscan(X, eps=1, min_samples=1)
271+
assert_array_equal(core_samples, np.arange(n_samples))
272+
assert_array_equal(labels, [0, 1, 1, 1, 2, 3, 4])
273+
274+
# With eps=1 and min_samples=2 only the 3 samples from the dense area
275+
# are core samples. All other points are isolated and considered noise.
276+
core_samples, labels = dbscan(X, eps=1, min_samples=2)
277+
assert_array_equal(core_samples, [1, 2, 3])
278+
assert_array_equal(labels, [-1, 0, 0, 0, -1, -1, -1])
279+
280+
# Only the sample in the middle of the dense area is core. Its two
281+
# neighbors are edges. Remaining samples are noise.
282+
core_samples, _ = dbscan(X, eps=1, min_samples=3)
283+
assert_array_equal(core_samples, [2])
284+
assert_array_equal(labels, [-1, 0, 0, 0, -1, -1, -1])
285+
286+
# It's no longer possible to extract core samples with eps=1: everything
287+
# is noise.
288+
core_samples, labels = dbscan(X, eps=1, min_samples=4)
289+
assert_array_equal(core_samples, [])
290+
assert_array_equal(labels, -np.ones(n_samples))

0 commit comments

Comments
 (0)