Skip to content

Commit 34ef01d

Browse files
committed
perf: avoid cartesian product when an index is defined
Fixed issue #3216 The WHERE clause is applied AFTER the Cartesian product is created by chained MatchNodeSteps. The solution is to push ID() filters down into the MatchNodeStep itself when possible.
1 parent 7bc3237 commit 34ef01d

File tree

3 files changed

+400
-4
lines changed

3 files changed

+400
-4
lines changed

engine/src/main/java/com/arcadedb/query/opencypher/executor/CypherExecutionPlan.java

Lines changed: 122 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -813,6 +813,9 @@ private AbstractExecutionStep buildMatchStep(final MatchClause matchClause, Abst
813813
final Set<String> matchVariables = new HashSet<>();
814814
final boolean isOptional = matchClause.isOptional();
815815

816+
// Extract ID filters from WHERE clause (if present) for pushdown optimization
817+
final WhereClause whereClause = matchClause.hasWhereClause() ? matchClause.getWhereClause() : statement.getWhereClause();
818+
816819
AbstractExecutionStep matchChainStart = null;
817820

818821
for (int patternIndex = 0; patternIndex < pathPatterns.size(); patternIndex++) {
@@ -822,7 +825,10 @@ private AbstractExecutionStep buildMatchStep(final MatchClause matchClause, Abst
822825
final NodePattern nodePattern = pathPattern.getFirstNode();
823826
final String variable = nodePattern.getVariable() != null ? nodePattern.getVariable() : ("n" + patternIndex);
824827
matchVariables.add(variable);
825-
final MatchNodeStep matchStep = new MatchNodeStep(variable, nodePattern, context);
828+
829+
// OPTIMIZATION: Extract ID filter for this variable to avoid Cartesian product
830+
final String idFilter = extractIdFilter(whereClause, variable);
831+
final MatchNodeStep matchStep = new MatchNodeStep(variable, nodePattern, context, idFilter);
826832

827833
if (isOptional) {
828834
if (matchChainStart == null) {
@@ -847,7 +853,10 @@ private AbstractExecutionStep buildMatchStep(final MatchClause matchClause, Abst
847853

848854
if (!sourceAlreadyBound) {
849855
matchVariables.add(sourceVar);
850-
final MatchNodeStep sourceStep = new MatchNodeStep(sourceVar, sourceNode, context);
856+
857+
// OPTIMIZATION: Extract ID filter for source variable to avoid Cartesian product
858+
final String sourceIdFilter = extractIdFilter(whereClause, sourceVar);
859+
final MatchNodeStep sourceStep = new MatchNodeStep(sourceVar, sourceNode, context, sourceIdFilter);
851860

852861
if (isOptional) {
853862
if (matchChainStart == null) {
@@ -1005,7 +1014,11 @@ public String prettyPrint(final int depth, final int indent) {
10051014
final NodePattern nodePattern = pathPattern.getFirstNode();
10061015
final String variable = nodePattern.getVariable() != null ? nodePattern.getVariable() : ("n" + patternIndex);
10071016
matchVariables.add(variable); // Track variable for OPTIONAL MATCH
1008-
final MatchNodeStep matchStep = new MatchNodeStep(variable, nodePattern, context);
1017+
1018+
// OPTIMIZATION: Extract ID filter from WHERE clause (if present) for pushdown
1019+
final WhereClause matchWhere = matchClause.hasWhereClause() ? matchClause.getWhereClause() : statement.getWhereClause();
1020+
final String idFilter = extractIdFilter(matchWhere, variable);
1021+
final MatchNodeStep matchStep = new MatchNodeStep(variable, nodePattern, context, idFilter);
10091022

10101023
if (isOptional) {
10111024
// For optional match, chain within the match steps only
@@ -1038,8 +1051,12 @@ public String prettyPrint(final int depth, final int indent) {
10381051
// Only track the source variable if we're creating a new binding for it
10391052
matchVariables.add(sourceVar);
10401053

1054+
// OPTIMIZATION: Extract ID filter from WHERE clause (if present) for pushdown
1055+
final WhereClause matchWhere = matchClause.hasWhereClause() ? matchClause.getWhereClause() : statement.getWhereClause();
1056+
final String sourceIdFilter = extractIdFilter(matchWhere, sourceVar);
1057+
10411058
// Start with source node (or chain if we have previous patterns)
1042-
final MatchNodeStep sourceStep = new MatchNodeStep(sourceVar, sourceNode, context);
1059+
final MatchNodeStep sourceStep = new MatchNodeStep(sourceVar, sourceNode, context, sourceIdFilter);
10431060

10441061
if (isOptional) {
10451062
// For optional match, chain within the match steps only
@@ -1464,4 +1481,105 @@ private AbstractExecutionStep tryCreateTypeCountOptimization(final CommandContex
14641481
final String outputAlias = returnItem.getOutputName();
14651482
return new TypeCountStep(typeName, outputAlias, context);
14661483
}
1484+
1485+
/**
1486+
* Extracts ID filters from a WHERE clause for a specific variable.
1487+
* Looks for predicates like: ID(variable) = "value" or ID(variable) = $param
1488+
* <p>
1489+
* This optimization is critical for performance when matching by ID.
1490+
* Without it, MATCH (a),(b) WHERE ID(a) = x AND ID(b) = y would create
1491+
* a Cartesian product of ALL vertices before filtering (extremely slow).
1492+
*
1493+
* @param whereClause the WHERE clause to analyze
1494+
* @param variable the variable to extract ID filter for
1495+
* @return the ID value to filter by, or null if no ID filter found
1496+
*/
1497+
private String extractIdFilter(final WhereClause whereClause, final String variable) {
1498+
if (whereClause == null || whereClause.getConditionExpression() == null)
1499+
return null;
1500+
1501+
final BooleanExpression condition = whereClause.getConditionExpression();
1502+
1503+
// Try to extract ID filter from the condition expression
1504+
// The condition may be an AND expression containing multiple predicates
1505+
// We need to find the one that matches: ID(variable) = value
1506+
return extractIdFilterFromExpression(condition, variable);
1507+
}
1508+
1509+
/**
1510+
* Recursively extracts ID filter from a boolean expression.
1511+
*/
1512+
private String extractIdFilterFromExpression(final BooleanExpression expr, final String variable) {
1513+
if (expr == null)
1514+
return null;
1515+
1516+
// Check if this is a comparison expression (ID(var) = value)
1517+
if (expr instanceof ComparisonExpression) {
1518+
final ComparisonExpression compExpr = (ComparisonExpression) expr;
1519+
1520+
// Check if left side is ID(variable)
1521+
final Expression left = compExpr.getLeft();
1522+
if (left instanceof FunctionCallExpression) {
1523+
final FunctionCallExpression funcExpr = (FunctionCallExpression) left;
1524+
if ("id".equalsIgnoreCase(funcExpr.getFunctionName()) && funcExpr.getArguments().size() == 1) {
1525+
final Expression arg = funcExpr.getArguments().get(0);
1526+
if (arg instanceof VariableExpression) {
1527+
final VariableExpression varExpr = (VariableExpression) arg;
1528+
if (variable.equals(varExpr.getVariableName())) {
1529+
// Found ID(variable) - extract the value from right side
1530+
final Expression right = compExpr.getRight();
1531+
return evaluateIdValue(right);
1532+
}
1533+
}
1534+
}
1535+
}
1536+
}
1537+
1538+
// Check if this is a logical AND expression - recursively search both sides
1539+
if (expr instanceof LogicalExpression) {
1540+
final LogicalExpression logExpr = (LogicalExpression) expr;
1541+
if (logExpr.getOperator() == LogicalExpression.Operator.AND) {
1542+
final String leftResult = extractIdFilterFromExpression(logExpr.getLeft(), variable);
1543+
if (leftResult != null)
1544+
return leftResult;
1545+
return extractIdFilterFromExpression(logExpr.getRight(), variable);
1546+
}
1547+
}
1548+
1549+
return null;
1550+
}
1551+
1552+
/**
1553+
* Evaluates an expression to extract the ID value (literal or parameter).
1554+
*/
1555+
private String evaluateIdValue(final Expression expr) {
1556+
if (expr == null)
1557+
return null;
1558+
1559+
// Handle literal string values
1560+
if (expr instanceof LiteralExpression) {
1561+
final LiteralExpression litExpr = (LiteralExpression) expr;
1562+
final Object value = litExpr.getValue();
1563+
return value != null ? value.toString() : null;
1564+
}
1565+
1566+
// Handle parameter references
1567+
if (expr instanceof ParameterExpression) {
1568+
final ParameterExpression paramExpr = (ParameterExpression) expr;
1569+
final String paramName = paramExpr.getParameterName();
1570+
if (parameters != null && parameters.containsKey(paramName)) {
1571+
final Object value = parameters.get(paramName);
1572+
return value != null ? value.toString() : null;
1573+
}
1574+
}
1575+
1576+
// Handle property access for UNWIND scenarios (e.g., row.source_id)
1577+
if (expr instanceof PropertyAccessExpression) {
1578+
// Can't evaluate at plan time - would need runtime context
1579+
// This is handled differently via parameter substitution
1580+
return null;
1581+
}
1582+
1583+
return null;
1584+
}
14671585
}

engine/src/main/java/com/arcadedb/query/opencypher/executor/steps/MatchNodeStep.java

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,7 @@
5151
public class MatchNodeStep extends AbstractExecutionStep {
5252
private final String variable;
5353
private final NodePattern pattern;
54+
private final String idFilter; // Optional ID filter to apply (e.g., "#1:0")
5455

5556
/**
5657
* Creates a match node step.
@@ -60,9 +61,22 @@ public class MatchNodeStep extends AbstractExecutionStep {
6061
* @param context command context
6162
*/
6263
public MatchNodeStep(final String variable, final NodePattern pattern, final CommandContext context) {
64+
this(variable, pattern, context, null);
65+
}
66+
67+
/**
68+
* Creates a match node step with ID filter optimization.
69+
*
70+
* @param variable variable name to bind vertices to
71+
* @param pattern node pattern to match
72+
* @param context command context
73+
* @param idFilter optional ID filter to apply (e.g., "#1:0")
74+
*/
75+
public MatchNodeStep(final String variable, final NodePattern pattern, final CommandContext context, final String idFilter) {
6376
super(context);
6477
this.variable = variable;
6578
this.pattern = pattern;
79+
this.idFilter = idFilter;
6680
}
6781

6882
@Override
@@ -192,8 +206,30 @@ public void close() {
192206
* Gets an iterator for vertices matching the pattern.
193207
* OPTIMIZATION: Uses indexes for property equality constraints when available.
194208
* Supports composite indexes with partial key matching (leftmost prefix).
209+
* OPTIMIZATION: Uses ID filter when available to return single vertex.
195210
*/
196211
private Iterator<Identifiable> getVertexIterator() {
212+
// OPTIMIZATION: If ID filter is present, look up the specific vertex by ID
213+
// This is critical for performance when matching by ID (e.g., WHERE ID(a) = "#1:0")
214+
// Without this optimization, MATCH (a),(b) WHERE ID(a) = x AND ID(b) = y
215+
// would create a Cartesian product of ALL vertices before filtering
216+
if (idFilter != null && !idFilter.isEmpty()) {
217+
try {
218+
final com.arcadedb.database.RID rid = new com.arcadedb.database.RID(context.getDatabase(), idFilter);
219+
final Identifiable vertex = context.getDatabase().lookupByRID(rid, true);
220+
if (vertex != null) {
221+
// Return single-element iterator for the matched vertex
222+
return List.of(vertex).iterator();
223+
} else {
224+
// ID not found - return empty iterator
225+
return List.<Identifiable>of().iterator();
226+
}
227+
} catch (final Exception e) {
228+
// Invalid ID format - return empty iterator
229+
return List.<Identifiable>of().iterator();
230+
}
231+
}
232+
197233
if (pattern.hasLabels()) {
198234
final String label = pattern.getFirstLabel();
199235

0 commit comments

Comments
 (0)