[SPARK-24012][SQL] Union of map and other compatible column

liutang123 · liutang123 · commit a422a7f1c1fb · 2018-04-18T22:29:15.000+08:00
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala
@@ -267,7 +267,11 @@ object TypeCoercion {
       case s: Union if s.childrenResolved &&
           s.children.forall(_.output.length == s.children.head.output.length) && !s.resolved =>
         val newChildren: Seq[LogicalPlan] = buildNewChildrenWithWiderTypes(s.children)
-        s.makeCopy(Array(newChildren))
+        if(newChildren != s.children) {
+          s.makeCopy(Array(newChildren))
+        } else {
+          s
+        }
     }
 
     /** Build new children with the widest types for each attribute among all the children */
@@ -279,7 +283,7 @@ object TypeCoercion {
       val targetTypes: Seq[DataType] =
         getWidestTypes(children, attrIndex = 0, mutable.Queue[DataType]())
 
-      if (targetTypes.nonEmpty) {
+      if (!targetTypes.forall(null == _)) {
         // Add an extra Project if the targetTypes are different from the original types.
         children.map(widenTypes(_, targetTypes))
       } else {
@@ -296,24 +300,35 @@ object TypeCoercion {
       // Return the result after the widen data types have been found for all the children
       if (attrIndex >= children.head.output.length) return castedTypes.toSeq
 
+      val types = children.map(_.output(attrIndex).dataType)
       // For the attrIndex-th attribute, find the widest type
-      findWiderCommonType(children.map(_.output(attrIndex).dataType)) match {
+      findWiderCommonType(types) match {
         // If unable to find an appropriate widen type for this column, return an empty Seq
-        case None => Seq.empty[DataType]
+        case None =>
+          castedTypes.enqueue(null)
         // Otherwise, record the result in the queue and find the type for the next column
-        case Some(widenType) =>
+        case Some(widenType) if types.exists(_ != widenType) =>
           castedTypes.enqueue(widenType)
-          getWidestTypes(children, attrIndex + 1, castedTypes)
+        case _ =>
+          castedTypes.enqueue(null)
       }
+      getWidestTypes(children, attrIndex + 1, castedTypes)
     }
 
     /** Given a plan, add an extra project on top to widen some columns' data types. */
     private def widenTypes(plan: LogicalPlan, targetTypes: Seq[DataType]): LogicalPlan = {
+      var changed = false
       val casted = plan.output.zip(targetTypes).map {
-        case (e, dt) if e.dataType != dt => Alias(Cast(e, dt), e.name)()
+        case (e, dt) if null != dt && e.dataType != dt =>
+          changed = true
+          Alias(Cast(e, dt), e.name)()
         case (e, _) => e
       }
-      Project(casted, plan)
+      if(changed) {
+        Project(casted, plan)
+      } else {
+        plan
+      }
     }
   }
 
diff --git a/sql/core/src/test/resources/sql-tests/inputs/union.sql b/sql/core/src/test/resources/sql-tests/inputs/union.sql
@@ -35,6 +35,11 @@ FROM   (SELECT col AS col
               SELECT col
               FROM p3) T1) T2;
 
+-- SPARK-24012 Union of map and other compatible columns.
+SELECT map(1, 2), 'str'
+UNION ALL
+SELECT map(1, 2, 3, NULL), 1;
+
 -- Clean-up
 DROP VIEW IF EXISTS t1;
 DROP VIEW IF EXISTS t2;
diff --git a/sql/core/src/test/resources/sql-tests/results/union.sql.out b/sql/core/src/test/resources/sql-tests/results/union.sql.out
@@ -1,5 +1,5 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 14
+-- Number of queries: 15
 
 
 -- !query 0
@@ -105,40 +105,51 @@ struct<x:int,col:int>
 
 
 -- !query 9
-DROP VIEW IF EXISTS t1
+SELECT map(1, 2), 'str'
+UNION ALL
+SELECT map(1, 2, 3, NULL), 1
 -- !query 9 schema
-struct<>
+struct<map(1, 2):map<int,int>,str:string>
 -- !query 9 output
-
+{1:2,3:null}	1
+{1:2}	str
 
 
 -- !query 10
-DROP VIEW IF EXISTS t2
+DROP VIEW IF EXISTS t1
 -- !query 10 schema
 struct<>
 -- !query 10 output
 
 
 
 -- !query 11
-DROP VIEW IF EXISTS p1
+DROP VIEW IF EXISTS t2
 -- !query 11 schema
 struct<>
 -- !query 11 output
 
 
 
 -- !query 12
-DROP VIEW IF EXISTS p2
+DROP VIEW IF EXISTS p1
 -- !query 12 schema
 struct<>
 -- !query 12 output
 
 
 
 -- !query 13
-DROP VIEW IF EXISTS p3
+DROP VIEW IF EXISTS p2
 -- !query 13 schema
 struct<>
 -- !query 13 output
 
+
+
+-- !query 14
+DROP VIEW IF EXISTS p3
+-- !query 14 schema
+struct<>
+-- !query 14 output
+
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
@@ -896,6 +896,19 @@ class SQLQuerySuite extends QueryTest with SharedSQLContext {
     }
   }
 
+  test("SPARK-24012 Union of map and other compatible columns") {
+    checkAnswer(
+      sql(
+        """
+          |SELECT map(1, 2), 'str'
+          |UNION ALL
+          |SELECT map(1, 2, 3, NULL), 1""".stripMargin),
+      Row.fromSeq(Seq(Map(1 -> 2), "str"))::
+      Row.fromSeq(Seq(Map(1 -> 2, 3 -> null), "1"))::
+      Nil
+    )
+  }
+
   test("EXCEPT") {
     checkAnswer(
       sql("SELECT * FROM lowerCaseData EXCEPT SELECT * FROM upperCaseData"),