-
Notifications
You must be signed in to change notification settings - Fork 1.4k
Closed
Labels
Description
one of the TPC-DS complex query seems to have semi-join pushdown issues that didn't apply optimization from #11937
see:
WITH sr_tmp AS
(
SELECT sr_item_sk,
sr_ticket_number,
sr_return_quantity
FROM store_returns
WHERE sr_reason_sk IN
(
SELECT r_reason_sk
FROM reason
WHERE r_reason_desc = 'reason 50')
),
sr_tmp2 AS
(
SELECT sr_ticket_number
FROM store_returns
WHERE sr_reason_sk IN
(
SELECT r_reason_sk
FROM reason
WHERE r_reason_desc = 'reason 50')
),
ss_tmp AS (
SELECT ss_customer_sk, ss_item_sk, ss_ticket_number, ss_quantity, ss_sales_price
FROM store_sales
WHERE ss_ticket_number IN (SELECT sr_ticket_number FROM sr_tmp2)
)
SELECT
/*+ aggOptions(num_groups_limit='1000000000') */
ss_customer_sk,
Sum(
CASE
WHEN sr_return_quantity IS NOT NULL THEN ( ss_quantity - sr_return_quantity ) * ss_sales_price
ELSE ( ss_quantity * ss_sales_price )
END) act_sales
FROM ss_tmp
LEFT OUTER JOIN sr_tmp
ON (
sr_item_sk = ss_item_sk
AND sr_ticket_number = ss_ticket_number)
GROUP BY ss_customer_sk
limit 100;
the issue was at the inner most join:
Execution Plan
LogicalSort(sort0=[$1], sort1=[$0], dir0=[ASC], dir1=[ASC], offset=[0], fetch=[100])
PinotLogicalSortExchange(distribution=[hash], collation=[[1, 0]], isSortOnSender=[false], isSortOnReceiver=[true])
LogicalSort(sort0=[$1], sort1=[$0], dir0=[ASC], dir1=[ASC], fetch=[100])
LogicalProject(ss_customer_sk=[$0], act_sales=[CASE(=($2, 0), null:DOUBLE, $1)])
LogicalAggregate(group=[{0}], agg#0=[$SUM0($1)], agg#1=[COUNT($2)])
PinotLogicalExchange(distribution=[hash[0]])
LogicalAggregate(group=[{0}], agg#0=[$SUM0($1)], agg#1=[COUNT($1)])
LogicalProject(ss_customer_sk=[$0], $f1=[CASE(IS NOT NULL($8), *(-($3, $8), $4), $5)])
LogicalJoin(condition=[AND(=($6, $1), =($7, $2))], joinType=[left])
PinotLogicalExchange(distribution=[hash[1, 2]])
LogicalProject(ss_customer_sk=[$0], ss_item_sk=[$1], ss_ticket_number=[$4], ss_quantity=[$2], ss_sales_price=[$3], EXPR$0=[*($2, $3)])
PinotLogicalExchange(distribution=[hash[4]])
LogicalJoin(condition=[=($4, $6)], joinType=[semi])
LogicalProject(ss_customer_sk=[$6], ss_item_sk=[$13], ss_quantity=[$19], ss_sales_price=[$20], ss_ticket_number=[$24])
LogicalTableScan(table=[[store_sales]])
PinotLogicalExchange(distribution=[broadcast], relExchangeType=[PIPELINE_BREAKER])
------------ THIS JOIN NODE BELOW SHOULD BE SEMI ------------
LogicalJoin(condition=[=($0, $2)], joinType=[inner])
PinotLogicalExchange(distribution=[hash[0]])
LogicalProject(sr_reason_sk=[$10], sr_ticket_number=[$22])
LogicalTableScan(table=[[store_returns]])
PinotLogicalExchange(distribution=[hash[0]])
LogicalAggregate(group=[{0}])
PinotLogicalExchange(distribution=[hash[0]])
LogicalAggregate(group=[{1}])
LogicalProject(r_reason_desc=[$3], r_reason_sk=[$5])
LogicalFilter(condition=[=($3, 'reason 50')])
LogicalTableScan(table=[[reason]])
PinotLogicalExchange(distribution=[hash[0, 1]])
LogicalProject(sr_item_sk=[$0], sr_ticket_number=[$3], sr_return_quantity=[$2])
PinotLogicalExchange(distribution=[hash[1]])
LogicalJoin(condition=[=($1, $5)], joinType=[semi])
LogicalProject(sr_item_sk=[$8], sr_reason_sk=[$10], sr_return_quantity=[$14], sr_ticket_number=[$22])
LogicalTableScan(table=[[store_returns]])
PinotLogicalExchange(distribution=[broadcast], relExchangeType=[PIPELINE_BREAKER])
LogicalProject(r_reason_desc=[$3], r_reason_sk=[$5])
LogicalFilter(condition=[=($3, 'reason 50')])
LogicalTableScan(table=[[reason]])