Skip to content

Commit 0f8e5dd

Browse files
yaooqinncloud-fan
authored andcommitted
[SPARK-34003][SQL] Fix Rule conflicts between PaddingAndLengthCheckForCharVarchar and ResolveAggregateFunctions
### What changes were proposed in this pull request? ResolveAggregateFunctions is a hacky rule and it calls `executeSameContext` to generate a `resolved agg` to determine which unresolved sort attribute should be pushed into the agg. However, after we add the PaddingAndLengthCheckForCharVarchar rule which will rewrite the query output, thus, the `resolved agg` cannot match original attributes anymore. It causes some dissociative sort attribute to be pushed in and fails the query ``` logtalk [info] Failed to analyze query: org.apache.spark.sql.AnalysisException: expression 'testcat.t1.`v`' is neither present in the group by, nor is it an aggregate function. Add to group by or wrap in first() (or first_value) if you don't care which value you get.; [info] Project [v#14, sum(i)#11L] [info] +- Sort [aggOrder#12 ASC NULLS FIRST], true [info] +- !Aggregate [v#14], [v#14, sum(cast(i#7 as bigint)) AS sum(i)#11L, v#13 AS aggOrder#12] [info] +- SubqueryAlias testcat.t1 [info] +- Project [if ((length(v#6) <= 3)) v#6 else if ((length(rtrim(v#6, None)) > 3)) cast(raise_error(concat(input string of length , cast(length(v#6) as string), exceeds varchar type length limitation: 3)) as string) else rpad(rtrim(v#6, None), 3, ) AS v#14, i#7] [info] +- RelationV2[v#6, i#7, index#15, _partition#16] testcat.t1 [info] [info] Project [v#14, sum(i)#11L] [info] +- Sort [aggOrder#12 ASC NULLS FIRST], true [info] +- !Aggregate [v#14], [v#14, sum(cast(i#7 as bigint)) AS sum(i)#11L, v#13 AS aggOrder#12] [info] +- SubqueryAlias testcat.t1 [info] +- Project [if ((length(v#6) <= 3)) v#6 else if ((length(rtrim(v#6, None)) > 3)) cast(raise_error(concat(input string of length , cast(length(v#6) as string), exceeds varchar type length limitation: 3)) as string) else rpad(rtrim(v#6, None), 3, ) AS v#14, i#7] [info] +- RelationV2[v#6, i#7, index#15, _partition#16] testcat.t1 ``` ### Why are the changes needed? bugfix ### Does this PR introduce _any_ user-facing change? no ### How was this patch tested? new tests Closes #31027 from yaooqinn/SPARK-34003. Authored-by: Kent Yao <[email protected]> Signed-off-by: Wenchen Fan <[email protected]>
1 parent b95a847 commit 0f8e5dd

File tree

2 files changed

+18
-4
lines changed

2 files changed

+18
-4
lines changed

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2400,16 +2400,22 @@ class Analyzer(override val catalogManager: CatalogManager)
24002400
// to push down this ordering expression and can reference the original aggregate
24012401
// expression instead.
24022402
val needsPushDown = ArrayBuffer.empty[NamedExpression]
2403-
val evaluatedOrderings = resolvedAliasedOrdering.zip(unresolvedSortOrders).map {
2404-
case (evaluated, order) =>
2403+
val orderToAlias = unresolvedSortOrders.zip(aliasedOrdering)
2404+
val evaluatedOrderings = resolvedAliasedOrdering.zip(orderToAlias).map {
2405+
case (evaluated, (order, aliasOrder)) =>
24052406
val index = originalAggExprs.indexWhere {
24062407
case Alias(child, _) => child semanticEquals evaluated.child
24072408
case other => other semanticEquals evaluated.child
24082409
}
24092410

24102411
if (index == -1) {
2411-
needsPushDown += evaluated
2412-
order.copy(child = evaluated.toAttribute)
2412+
if (CharVarcharUtils.getRawType(evaluated.metadata).nonEmpty) {
2413+
needsPushDown += aliasOrder
2414+
order.copy(child = aliasOrder)
2415+
} else {
2416+
needsPushDown += evaluated
2417+
order.copy(child = evaluated.toAttribute)
2418+
}
24132419
} else {
24142420
order.copy(child = originalAggExprs(index).toAttribute)
24152421
}

sql/core/src/test/scala/org/apache/spark/sql/CharVarcharTestSuite.scala

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -466,6 +466,14 @@ trait CharVarcharTestSuite extends QueryTest with SQLTestUtils {
466466
Row("c"))
467467
}
468468
}
469+
470+
test("SPARK-34003: fix char/varchar fails w/ both group by and order by ") {
471+
withTable("t") {
472+
sql(s"CREATE TABLE t(v VARCHAR(3), i INT) USING $format")
473+
sql("INSERT INTO t VALUES ('c', 1)")
474+
checkAnswer(sql("SELECT v, sum(i) FROM t GROUP BY v ORDER BY v"), Row("c", 1))
475+
}
476+
}
469477
}
470478

471479
// Some basic char/varchar tests which doesn't rely on table implementation.

0 commit comments

Comments
 (0)