File tree Expand file tree Collapse file tree 1 file changed +6
-2
lines changed
mllib/src/main/scala/org/apache/spark/ml/feature Expand file tree Collapse file tree 1 file changed +6
-2
lines changed Original file line number Diff line number Diff line change @@ -355,7 +355,7 @@ object Word2VecModel extends MLReadable[Word2VecModel] {
355355 sc : SparkContext ,
356356 numWords : Int ,
357357 vectorSize : Int ): Int = {
358- val floatSize = 4.0 // Use Double to help avoid overflow
358+ val floatSize = 4L // Use Long to help avoid overflow
359359 val averageWordSize = 15
360360 // [SPARK-11994] - We want to partition the model in partitions smaller than
361361 // spark.kryoserializer.buffer.max
@@ -365,7 +365,11 @@ object Word2VecModel extends MLReadable[Word2VecModel] {
365365 // Assuming an average word size of 15 bytes, the formula is:
366366 // (floatSize * vectorSize + 15) * numWords
367367 val approximateSizeInBytes = (floatSize * vectorSize + averageWordSize) * numWords
368- ((approximateSizeInBytes / bufferSizeInBytes) + 1 ).toInt
368+ val numPartitions = (approximateSizeInBytes / bufferSizeInBytes) + 1
369+ require(numPartitions < 10e8 , s " Word2VecModel calculated that it needs $numPartitions " +
370+ s " partitions to save this model, which is too large. Try increasing " +
371+ s " spark.kryoserializer.buffer.max so that Word2VecModel can use fewer partitions. " )
372+ numPartitions.toInt
369373 }
370374 }
371375
You can’t perform that action at this time.
0 commit comments