Skip to content

Commit c41e2ed

Browse files
committed
Add non-regression test
1 parent cb10786 commit c41e2ed

File tree

1 file changed

+11
-4
lines changed

1 file changed

+11
-4
lines changed

sklearn/cluster/tests/test_k_means.py

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -69,24 +69,31 @@ def test_kmeans_results(representation, algo, dtype):
6969

7070

7171
@pytest.mark.parametrize('distribution', ['normal', 'blobs'])
72-
def test_elkan_results(distribution):
72+
@pytest.mark.parametrize('tol', [1e-2, 1e-4, 1e-8])
73+
def test_elkan_results(distribution, tol):
7374
# check that results are identical between lloyd and elkan algorithms
7475
rnd = np.random.RandomState(0)
7576
if distribution == 'normal':
76-
X = rnd.normal(size=(50, 10))
77+
X = rnd.normal(size=(5000, 10))
7778
else:
7879
X, _ = make_blobs(random_state=rnd)
7980

80-
km_full = KMeans(algorithm='full', n_clusters=5, random_state=0, n_init=1)
81+
km_full = KMeans(algorithm='full', n_clusters=5,
82+
random_state=0, n_init=1, tol=tol)
8183
km_elkan = KMeans(algorithm='elkan', n_clusters=5,
82-
random_state=0, n_init=1)
84+
random_state=0, n_init=1, tol=tol)
8385

8486
km_full.fit(X)
8587
km_elkan.fit(X)
8688
assert_array_almost_equal(km_elkan.cluster_centers_,
8789
km_full.cluster_centers_)
8890
assert_array_equal(km_elkan.labels_, km_full.labels_)
8991

92+
# The number of iterations and inertia should be close but not
93+
# necessarily exactly the same because of rounding errors.
94+
assert km_elkan.n_iter_ == pytest.approx(km_full.n_iter_, rel=0.01)
95+
assert km_elkan.inertia_ == pytest.approx(km_full.inertia_, rel=1e-6)
96+
9097

9198
def test_labels_assignment_and_inertia():
9299
# pure numpy implementation as easily auditable reference gold

0 commit comments

Comments
 (0)