Skip to content

Commit 2134196

Browse files
bogdanrdccloud-fan
authored andcommitted
[SPARK-20854][SQL] Extend hint syntax to support expressions
## What changes were proposed in this pull request? SQL hint syntax: * support expressions such as strings, numbers, etc. instead of only identifiers as it is currently. * support multiple hints, which was missing compared to the DataFrame syntax. DataFrame API: * support any parameters in DataFrame.hint instead of just strings ## How was this patch tested? Existing tests. New tests in PlanParserSuite. New suite DataFrameHintSuite. Author: Bogdan Raducanu <[email protected]> Closes #18086 from bogdanrdc/SPARK-20854.
1 parent 8efc6e9 commit 2134196

File tree

9 files changed

+225
-26
lines changed

9 files changed

+225
-26
lines changed

sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -371,7 +371,7 @@ querySpecification
371371
(RECORDREADER recordReader=STRING)?
372372
fromClause?
373373
(WHERE where=booleanExpression)?)
374-
| ((kind=SELECT hint? setQuantifier? namedExpressionSeq fromClause?
374+
| ((kind=SELECT (hints+=hint)* setQuantifier? namedExpressionSeq fromClause?
375375
| fromClause (kind=SELECT setQuantifier? namedExpressionSeq)?)
376376
lateralView*
377377
(WHERE where=booleanExpression)?
@@ -381,12 +381,12 @@ querySpecification
381381
;
382382

383383
hint
384-
: '/*+' hintStatement '*/'
384+
: '/*+' hintStatements+=hintStatement (','? hintStatements+=hintStatement)* '*/'
385385
;
386386

387387
hintStatement
388388
: hintName=identifier
389-
| hintName=identifier '(' parameters+=identifier (',' parameters+=identifier)* ')'
389+
| hintName=identifier '(' parameters+=primaryExpression (',' parameters+=primaryExpression)* ')'
390390
;
391391

392392
fromClause

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveHints.scala

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ package org.apache.spark.sql.catalyst.analysis
1919

2020
import java.util.Locale
2121

22+
import org.apache.spark.sql.AnalysisException
2223
import org.apache.spark.sql.catalyst.plans.logical._
2324
import org.apache.spark.sql.catalyst.rules.Rule
2425
import org.apache.spark.sql.catalyst.trees.CurrentOrigin
@@ -91,7 +92,12 @@ object ResolveHints {
9192
ResolvedHint(h.child, HintInfo(isBroadcastable = Option(true)))
9293
} else {
9394
// Otherwise, find within the subtree query plans that should be broadcasted.
94-
applyBroadcastHint(h.child, h.parameters.toSet)
95+
applyBroadcastHint(h.child, h.parameters.map {
96+
case tableName: String => tableName
97+
case tableId: UnresolvedAttribute => tableId.name
98+
case unsupported => throw new AnalysisException("Broadcast hint parameter should be " +
99+
s"an identifier or string but was $unsupported (${unsupported.getClass}")
100+
}.toSet)
95101
}
96102
}
97103
}

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/dsl/package.scala

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -381,6 +381,9 @@ package object dsl {
381381

382382
def analyze: LogicalPlan =
383383
EliminateSubqueryAliases(analysis.SimpleAnalyzer.execute(logicalPlan))
384+
385+
def hint(name: String, parameters: Any*): LogicalPlan =
386+
UnresolvedHint(name, parameters, logicalPlan)
384387
}
385388
}
386389
}

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -407,7 +407,7 @@ class AstBuilder(conf: SQLConf) extends SqlBaseBaseVisitor[AnyRef] with Logging
407407
val withWindow = withDistinct.optionalMap(windows)(withWindows)
408408

409409
// Hint
410-
withWindow.optionalMap(hint)(withHints)
410+
hints.asScala.foldRight(withWindow)(withHints)
411411
}
412412
}
413413

@@ -533,13 +533,16 @@ class AstBuilder(conf: SQLConf) extends SqlBaseBaseVisitor[AnyRef] with Logging
533533
}
534534

535535
/**
536-
* Add a [[UnresolvedHint]] to a logical plan.
536+
* Add [[UnresolvedHint]]s to a logical plan.
537537
*/
538538
private def withHints(
539539
ctx: HintContext,
540540
query: LogicalPlan): LogicalPlan = withOrigin(ctx) {
541-
val stmt = ctx.hintStatement
542-
UnresolvedHint(stmt.hintName.getText, stmt.parameters.asScala.map(_.getText), query)
541+
var plan = query
542+
ctx.hintStatements.asScala.reverse.foreach { case stmt =>
543+
plan = UnresolvedHint(stmt.hintName.getText, stmt.parameters.asScala.map(expression), plan)
544+
}
545+
plan
543546
}
544547

545548
/**

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/hints.scala

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23,9 +23,11 @@ import org.apache.spark.sql.internal.SQLConf
2323
/**
2424
* A general hint for the child that is not yet resolved. This node is generated by the parser and
2525
* should be removed This node will be eliminated post analysis.
26-
* A pair of (name, parameters).
26+
* @param name the name of the hint
27+
* @param parameters the parameters of the hint
28+
* @param child the [[LogicalPlan]] on which this hint applies
2729
*/
28-
case class UnresolvedHint(name: String, parameters: Seq[String], child: LogicalPlan)
30+
case class UnresolvedHint(name: String, parameters: Seq[Any], child: LogicalPlan)
2931
extends UnaryNode {
3032

3133
override lazy val resolved: Boolean = false
Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one or more
3+
* contributor license agreements. See the NOTICE file distributed with
4+
* this work for additional information regarding copyright ownership.
5+
* The ASF licenses this file to You under the Apache License, Version 2.0
6+
* (the "License"); you may not use this file except in compliance with
7+
* the License. You may obtain a copy of the License at
8+
*
9+
* http://www.apache.org/licenses/LICENSE-2.0
10+
*
11+
* Unless required by applicable law or agreed to in writing, software
12+
* distributed under the License is distributed on an "AS IS" BASIS,
13+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
* See the License for the specific language governing permissions and
15+
* limitations under the License.
16+
*/
17+
18+
package org.apache.spark.sql
19+
20+
import org.apache.spark.sql.catalyst.analysis.AnalysisTest
21+
import org.apache.spark.sql.catalyst.dsl.expressions._
22+
import org.apache.spark.sql.catalyst.dsl.plans._
23+
import org.apache.spark.sql.catalyst.expressions._
24+
import org.apache.spark.sql.catalyst.plans.logical._
25+
26+
class DSLHintSuite extends AnalysisTest {
27+
lazy val a = 'a.int
28+
lazy val b = 'b.string
29+
lazy val c = 'c.string
30+
lazy val r1 = LocalRelation(a, b, c)
31+
32+
test("various hint parameters") {
33+
comparePlans(
34+
r1.hint("hint1"),
35+
UnresolvedHint("hint1", Seq(), r1)
36+
)
37+
38+
comparePlans(
39+
r1.hint("hint1", 1, "a"),
40+
UnresolvedHint("hint1", Seq(1, "a"), r1)
41+
)
42+
43+
comparePlans(
44+
r1.hint("hint1", 1, $"a"),
45+
UnresolvedHint("hint1", Seq(1, $"a"), r1)
46+
)
47+
48+
comparePlans(
49+
r1.hint("hint1", Seq(1, 2, 3), Seq($"a", $"b", $"c")),
50+
UnresolvedHint("hint1", Seq(Seq(1, 2, 3), Seq($"a", $"b", $"c")), r1)
51+
)
52+
}
53+
}

sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala

Lines changed: 85 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818
package org.apache.spark.sql.catalyst.parser
1919

2020
import org.apache.spark.sql.catalyst.{FunctionIdentifier, TableIdentifier}
21-
import org.apache.spark.sql.catalyst.analysis.{UnresolvedGenerator, UnresolvedInlineTable, UnresolvedRelation, UnresolvedTableValuedFunction}
21+
import org.apache.spark.sql.catalyst.analysis.{UnresolvedAttribute, UnresolvedFunction, UnresolvedGenerator, UnresolvedInlineTable, UnresolvedRelation, UnresolvedTableValuedFunction}
2222
import org.apache.spark.sql.catalyst.expressions._
2323
import org.apache.spark.sql.catalyst.plans._
2424
import org.apache.spark.sql.catalyst.plans.logical._
@@ -527,47 +527,117 @@ class PlanParserSuite extends PlanTest {
527527
val m = intercept[ParseException] {
528528
parsePlan("SELECT /*+ HINT() */ * FROM t")
529529
}.getMessage
530-
assert(m.contains("no viable alternative at input"))
531-
532-
// Hive compatibility: No database.
533-
val m2 = intercept[ParseException] {
534-
parsePlan("SELECT /*+ MAPJOIN(default.t) */ * from default.t")
535-
}.getMessage
536-
assert(m2.contains("mismatched input '.' expecting {')', ','}"))
530+
assert(m.contains("mismatched input"))
537531

538532
// Disallow space as the delimiter.
539533
val m3 = intercept[ParseException] {
540534
parsePlan("SELECT /*+ INDEX(a b c) */ * from default.t")
541535
}.getMessage
542-
assert(m3.contains("mismatched input 'b' expecting {')', ','}"))
536+
assert(m3.contains("mismatched input 'b' expecting"))
543537

544538
comparePlans(
545539
parsePlan("SELECT /*+ HINT */ * FROM t"),
546540
UnresolvedHint("HINT", Seq.empty, table("t").select(star())))
547541

548542
comparePlans(
549543
parsePlan("SELECT /*+ BROADCASTJOIN(u) */ * FROM t"),
550-
UnresolvedHint("BROADCASTJOIN", Seq("u"), table("t").select(star())))
544+
UnresolvedHint("BROADCASTJOIN", Seq($"u"), table("t").select(star())))
551545

552546
comparePlans(
553547
parsePlan("SELECT /*+ MAPJOIN(u) */ * FROM t"),
554-
UnresolvedHint("MAPJOIN", Seq("u"), table("t").select(star())))
548+
UnresolvedHint("MAPJOIN", Seq($"u"), table("t").select(star())))
555549

556550
comparePlans(
557551
parsePlan("SELECT /*+ STREAMTABLE(a,b,c) */ * FROM t"),
558-
UnresolvedHint("STREAMTABLE", Seq("a", "b", "c"), table("t").select(star())))
552+
UnresolvedHint("STREAMTABLE", Seq($"a", $"b", $"c"), table("t").select(star())))
559553

560554
comparePlans(
561555
parsePlan("SELECT /*+ INDEX(t, emp_job_ix) */ * FROM t"),
562-
UnresolvedHint("INDEX", Seq("t", "emp_job_ix"), table("t").select(star())))
556+
UnresolvedHint("INDEX", Seq($"t", $"emp_job_ix"), table("t").select(star())))
563557

564558
comparePlans(
565559
parsePlan("SELECT /*+ MAPJOIN(`default.t`) */ * from `default.t`"),
566-
UnresolvedHint("MAPJOIN", Seq("default.t"), table("default.t").select(star())))
560+
UnresolvedHint("MAPJOIN", Seq(UnresolvedAttribute.quoted("default.t")),
561+
table("default.t").select(star())))
567562

568563
comparePlans(
569564
parsePlan("SELECT /*+ MAPJOIN(t) */ a from t where true group by a order by a"),
570-
UnresolvedHint("MAPJOIN", Seq("t"),
565+
UnresolvedHint("MAPJOIN", Seq($"t"),
571566
table("t").where(Literal(true)).groupBy('a)('a)).orderBy('a.asc))
572567
}
568+
569+
test("SPARK-20854: select hint syntax with expressions") {
570+
comparePlans(
571+
parsePlan("SELECT /*+ HINT1(a, array(1, 2, 3)) */ * from t"),
572+
UnresolvedHint("HINT1", Seq($"a",
573+
UnresolvedFunction("array", Literal(1) :: Literal(2) :: Literal(3) :: Nil, false)),
574+
table("t").select(star())
575+
)
576+
)
577+
578+
comparePlans(
579+
parsePlan("SELECT /*+ HINT1(a, array(1, 2, 3)) */ * from t"),
580+
UnresolvedHint("HINT1", Seq($"a",
581+
UnresolvedFunction("array", Literal(1) :: Literal(2) :: Literal(3) :: Nil, false)),
582+
table("t").select(star())
583+
)
584+
)
585+
586+
comparePlans(
587+
parsePlan("SELECT /*+ HINT1(a, 5, 'a', b) */ * from t"),
588+
UnresolvedHint("HINT1", Seq($"a", Literal(5), Literal("a"), $"b"),
589+
table("t").select(star())
590+
)
591+
)
592+
593+
comparePlans(
594+
parsePlan("SELECT /*+ HINT1('a', (b, c), (1, 2)) */ * from t"),
595+
UnresolvedHint("HINT1",
596+
Seq(Literal("a"),
597+
CreateStruct($"b" :: $"c" :: Nil),
598+
CreateStruct(Literal(1) :: Literal(2) :: Nil)),
599+
table("t").select(star())
600+
)
601+
)
602+
}
603+
604+
test("SPARK-20854: multiple hints") {
605+
comparePlans(
606+
parsePlan("SELECT /*+ HINT1(a, 1) hint2(b, 2) */ * from t"),
607+
UnresolvedHint("HINT1", Seq($"a", Literal(1)),
608+
UnresolvedHint("hint2", Seq($"b", Literal(2)),
609+
table("t").select(star())
610+
)
611+
)
612+
)
613+
614+
comparePlans(
615+
parsePlan("SELECT /*+ HINT1(a, 1),hint2(b, 2) */ * from t"),
616+
UnresolvedHint("HINT1", Seq($"a", Literal(1)),
617+
UnresolvedHint("hint2", Seq($"b", Literal(2)),
618+
table("t").select(star())
619+
)
620+
)
621+
)
622+
623+
comparePlans(
624+
parsePlan("SELECT /*+ HINT1(a, 1) */ /*+ hint2(b, 2) */ * from t"),
625+
UnresolvedHint("HINT1", Seq($"a", Literal(1)),
626+
UnresolvedHint("hint2", Seq($"b", Literal(2)),
627+
table("t").select(star())
628+
)
629+
)
630+
)
631+
632+
comparePlans(
633+
parsePlan("SELECT /*+ HINT1(a, 1), hint2(b, 2) */ /*+ hint3(c, 3) */ * from t"),
634+
UnresolvedHint("HINT1", Seq($"a", Literal(1)),
635+
UnresolvedHint("hint2", Seq($"b", Literal(2)),
636+
UnresolvedHint("hint3", Seq($"c", Literal(3)),
637+
table("t").select(star())
638+
)
639+
)
640+
)
641+
)
642+
}
573643
}

sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1190,7 +1190,7 @@ class Dataset[T] private[sql](
11901190
* @since 2.2.0
11911191
*/
11921192
@scala.annotation.varargs
1193-
def hint(name: String, parameters: String*): Dataset[T] = withTypedPlan {
1193+
def hint(name: String, parameters: Any*): Dataset[T] = withTypedPlan {
11941194
UnresolvedHint(name, parameters, logicalPlan)
11951195
}
11961196

Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,62 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one or more
3+
* contributor license agreements. See the NOTICE file distributed with
4+
* this work for additional information regarding copyright ownership.
5+
* The ASF licenses this file to You under the Apache License, Version 2.0
6+
* (the "License"); you may not use this file except in compliance with
7+
* the License. You may obtain a copy of the License at
8+
*
9+
* http://www.apache.org/licenses/LICENSE-2.0
10+
*
11+
* Unless required by applicable law or agreed to in writing, software
12+
* distributed under the License is distributed on an "AS IS" BASIS,
13+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
* See the License for the specific language governing permissions and
15+
* limitations under the License.
16+
*/
17+
18+
package org.apache.spark.sql
19+
20+
import org.apache.spark.sql.catalyst.plans.PlanTest
21+
import org.apache.spark.sql.catalyst.plans.logical._
22+
import org.apache.spark.sql.test.SharedSQLContext
23+
24+
class DataFrameHintSuite extends PlanTest with SharedSQLContext {
25+
import testImplicits._
26+
lazy val df = spark.range(10)
27+
28+
private def check(df: Dataset[_], expected: LogicalPlan) = {
29+
comparePlans(
30+
df.queryExecution.logical,
31+
expected
32+
)
33+
}
34+
35+
test("various hint parameters") {
36+
check(
37+
df.hint("hint1"),
38+
UnresolvedHint("hint1", Seq(),
39+
df.logicalPlan
40+
)
41+
)
42+
43+
check(
44+
df.hint("hint1", 1, "a"),
45+
UnresolvedHint("hint1", Seq(1, "a"), df.logicalPlan)
46+
)
47+
48+
check(
49+
df.hint("hint1", 1, $"a"),
50+
UnresolvedHint("hint1", Seq(1, $"a"),
51+
df.logicalPlan
52+
)
53+
)
54+
55+
check(
56+
df.hint("hint1", Seq(1, 2, 3), Seq($"a", $"b", $"c")),
57+
UnresolvedHint("hint1", Seq(Seq(1, 2, 3), Seq($"a", $"b", $"c")),
58+
df.logicalPlan
59+
)
60+
)
61+
}
62+
}

0 commit comments

Comments
 (0)