Skip to content

Commit 2ab7c62

Browse files
committed
FIX remove duplicates in MultiLabelBinarizer
1 parent 0e0ac76 commit 2ab7c62

File tree

2 files changed

+10
-1
lines changed

2 files changed

+10
-1
lines changed

sklearn/preprocessing/label.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -558,7 +558,7 @@ def _transform(self, y, class_mapping):
558558
indices = array.array('i')
559559
indptr = array.array('i', [0])
560560
for labels in y:
561-
indices.extend(class_mapping[label] for label in labels)
561+
indices.extend(set(class_mapping[label] for label in labels))
562562
indptr.append(len(indices))
563563
data = np.ones(len(indices), dtype=int)
564564
return sp.csr_matrix((data, indices, indptr),

sklearn/preprocessing/tests/test_label.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -371,3 +371,12 @@ def test_mutlilabel_binarizer_non_integer_labels():
371371

372372
lsb = MultiLabelBinarizer()
373373
assert_raises(TypeError, lsb.fit_transform, [({}), ({}, {'a': 'b'})])
374+
375+
376+
def test_mutlilabel_binarizer_non_unique():
377+
inp = [(1, 1, 1, 0)]
378+
indicator_mat = np.array([[1, 1]])
379+
lsb = MultiLabelBinarizer()
380+
assert_array_equal(lsb.fit_transform(inp), indicator_mat)
381+
382+
assert_array_equal(lsb.inverse_transform(np.array([[1, 3]])), [(0, 1,)])

0 commit comments

Comments
 (0)