Skip to content

Commit e0957b1

Browse files
committed
Add column pruning for the right side of LeftSemi join.
1 parent 5fa0a05 commit e0957b1

File tree

1 file changed

+18
-6
lines changed
  • sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer

1 file changed

+18
-6
lines changed

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala

Lines changed: 18 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,7 @@ object Optimizer extends RuleExecutor[LogicalPlan] {
5252
* - Inserting Projections beneath the following operators:
5353
* - Aggregate
5454
* - Project <- Join
55+
* - LeftSemiJoin
5556
* - Collapse adjacent projections, performing alias substitution.
5657
*/
5758
object ColumnPruning extends Rule[LogicalPlan] {
@@ -67,15 +68,18 @@ object ColumnPruning extends Rule[LogicalPlan] {
6768
projectList.flatMap(_.references).toSet ++ condition.map(_.references).getOrElse(Set.empty)
6869

6970
/** Applies a projection only when the child is producing unnecessary attributes */
70-
def prunedChild(c: LogicalPlan) =
71-
if ((c.outputSet -- allReferences.filter(c.outputSet.contains)).nonEmpty) {
72-
Project(allReferences.filter(c.outputSet.contains).toSeq, c)
73-
} else {
74-
c
75-
}
71+
def prunedChild(c: LogicalPlan) = ColumnPruning.prunedChild(c, allReferences)
7672

7773
Project(projectList, Join(prunedChild(left), prunedChild(right), joinType, condition))
7874

75+
// Eliminate unneeded attributes from right side of a LeftSemiJoin.
76+
case Join(left, right, LeftSemi, condition) =>
77+
// Collect the list of off references required either above or to evaluate the condition.
78+
val allReferences: Set[Attribute] =
79+
condition.map(_.references).getOrElse(Set.empty)
80+
81+
Join(left, prunedChild(right, allReferences), LeftSemi, condition)
82+
7983
// Combine adjacent Projects.
8084
case Project(projectList1, Project(projectList2, child)) =>
8185
// Create a map of Aliases to their values from the child projection.
@@ -97,6 +101,14 @@ object ColumnPruning extends Rule[LogicalPlan] {
97101
// Eliminate no-op Projects
98102
case Project(projectList, child) if child.output == projectList => child
99103
}
104+
105+
/** Applies a projection only when the child is producing unnecessary attributes */
106+
private def prunedChild(c: LogicalPlan, allReferences: Set[Attribute]) =
107+
if ((c.outputSet -- allReferences.filter(c.outputSet.contains)).nonEmpty) {
108+
Project(allReferences.filter(c.outputSet.contains).toSeq, c)
109+
} else {
110+
c
111+
}
100112
}
101113

102114
/**

0 commit comments

Comments
 (0)