Skip to content

Commit 91ae454

Browse files
committed
fix scala 2.13
1 parent 4626614 commit 91ae454

File tree

1 file changed

+7
-12
lines changed

1 file changed

+7
-12
lines changed

mllib/src/main/scala/org/apache/spark/ml/feature/Imputer.scala

Lines changed: 7 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -176,23 +176,18 @@ class Imputer @Since("2.2.0") (@Since("2.2.0") override val uid: String)
176176
.map(_.headOption.getOrElse(Double.NaN))
177177

178178
case Imputer.mode =>
179+
// Keep in line with sklearn.impute.SimpleImputer (using scipy.stats.mode).
180+
// If there is more than one mode, choose the smallest one.
179181
val modes = dataset.select(cols: _*).rdd.flatMap { row =>
180182
Iterator.range(0, numCols).flatMap { i =>
181183
// Ignore null.
182184
if (row.isNullAt(i)) Iterator.empty else Iterator.single((i, row.getDouble(i)), 1L)
183185
}
184-
}.reduceByKey(_ + _).map { case ((i, v), c) => (i, (v, c))
185-
}.reduceByKey { case ((v1, c1), (v2, c2)) =>
186-
if (c1 > c2) {
187-
(v1, c1)
188-
} else if (c1 < c2) {
189-
(v2, c2)
190-
} else {
191-
// Keep in line with sklearn.impute.SimpleImputer (using scipy.stats.mode).
192-
// If there is more than one mode, choose the smallest one.
193-
(math.min(v1, v2), c1)
194-
}
195-
}.mapValues(_._1).collectAsMap()
186+
}.reduceByKey(_ + _).map { case ((i, v), c) =>
187+
// negative value to apply the default ranking of [Long, Double]
188+
(i, (c, -v))
189+
}.reduceByKey(Ordering.apply[(Long, Double)].max
190+
).mapValues(-_._2).collectAsMap()
196191
Array.tabulate(numCols)(i => modes.getOrElse(i, Double.NaN))
197192
}
198193

0 commit comments

Comments
 (0)