resolve the comments

JkSelf · JkSelf · commit cee1c8cb7b4c · 2020-01-10T21:03:59.000+08:00
diff --git a/core/src/main/scala/org/apache/spark/MapOutputTracker.scala b/core/src/main/scala/org/apache/spark/MapOutputTracker.scala
@@ -758,7 +758,7 @@ private[spark] class MapOutputTrackerMaster(
       endMapIndex: Int,
       startPartition: Int,
       endPartition: Int): Iterator[(BlockManagerId, Seq[(BlockId, Long, Int)])] = {
-    logDebug(s"Fetching outputs for shuffle $shuffleId, mappers $startMapIndex - $endMapIndex" +
+    logDebug(s"Fetching outputs for shuffle $shuffleId, mappers $startMapIndex-$endMapIndex" +
       s"partitions $startPartition-$endPartition")
     shuffleStatuses.get(shuffleId) match {
       case Some(shuffleStatus) =>
@@ -822,6 +822,8 @@ private[spark] class MapOutputTrackerWorker(conf: SparkConf) extends MapOutputTr
       endMapIndex: Int,
       startPartition: Int,
       endPartition: Int): Iterator[(BlockManagerId, Seq[(BlockId, Long, Int)])] = {
+    logDebug(s"Fetching outputs for shuffle $shuffleId, mappers $startMapIndex-$endMapIndex" +
+      s"partitions $startPartition-$endPartition")
     val statuses = getStatuses(shuffleId, conf)
     try {
       MapOutputTracker.convertMapStatuses(
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/OptimizeSkewedPartitions.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/OptimizeSkewedPartitions.scala
@@ -74,7 +74,7 @@ case class OptimizeSkewedPartitions(conf: SQLConf) extends Rule[SparkPlan] {
       SQLConf.ADAPTIVE_EXECUTION_SKEWED_PARTITION_MAX_SPLITS), mapPartitionSizes.length)
     val avgPartitionSize = mapPartitionSizes.sum / maxSplits
     val advisoryPartitionSize = math.max(avgPartitionSize,
-      conf.getConf(SQLConf.SHUFFLE_TARGET_POSTSHUFFLE_INPUT_SIZE))
+      conf.getConf(SQLConf.ADAPTIVE_EXECUTION_SKEWED_PARTITION_SIZE_THRESHOLD))
     val partitionIndices = mapPartitionSizes.indices
     val partitionStartIndices = ArrayBuffer[Int]()
     var postMapPartitionSize = mapPartitionSizes(0)
@@ -168,11 +168,13 @@ case class OptimizeSkewedPartitions(conf: SQLConf) extends Rule[SparkPlan] {
             //       obtaining the raw data size of per partition,
             val leftSkewedReader = SkewedShufflePartitionReader(
               left, partitionId, leftMapIdStartIndices(i), leftEndMapId)
+            val leftSort = smj.left.asInstanceOf[SortExec].copy(child = leftSkewedReader)
 
             val rightSkewedReader = SkewedShufflePartitionReader(right, partitionId,
                 rightMapIdStartIndices(j), rightEndMapId)
-            subJoins += SortMergeJoinExec(leftKeys, rightKeys, joinType, condition,
-              leftSkewedReader, rightSkewedReader)
+            val rightSort = smj.right.asInstanceOf[SortExec].copy(child = rightSkewedReader)
+              subJoins += SortMergeJoinExec(leftKeys, rightKeys, joinType, condition,
+                leftSort, rightSort)
           }
         }
       }
@@ -240,6 +242,10 @@ case class SkewedShufflePartitionReader(
   }
   private var cachedSkewedShuffleRDD: SkewedShuffledRowRDD = null
 
+  override def nodeName: String = s"SkewedShuffleReader SkewedShuffleQueryStage: ${child}" +
+    s" SkewedPartition: ${partitionIndex} startMapIndex: ${startMapIndex}" +
+    s" endMapIndex: ${endMapIndex}"
+
   override def doExecute(): RDD[InternalRow] = {
     if (cachedSkewedShuffleRDD == null) {
       cachedSkewedShuffleRDD = child match {