Skip to content

Commit c06f408

Browse files
authored
feat(sql): add support for indexing of symbol columns in materialized views (#5961)
1 parent f6d596e commit c06f408

3 files changed

Lines changed: 328 additions & 20 deletions

File tree

core/src/main/java/io/questdb/griffin/SqlCompilerImpl.java

Lines changed: 118 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -660,6 +660,28 @@ private static long copyOrderedBatchedVarcharTimestamp(
660660
return rowCount;
661661
}
662662

663+
private static int estimateIndexValueBlockSizeFromReader(SqlExecutionContext executionContext, TableToken matViewToken, int columnIndex) {
664+
final int indexValueBlockSize;
665+
try (TableReader reader = executionContext.getReader(matViewToken)) {
666+
int symbolCount = reader.getSymbolMapReader(columnIndex).getSymbolCount();
667+
// we are looking to estimate how many rowids we will need to store for each
668+
// symbol per partition. To do that wee are assuming the following formula:
669+
// max(2, table_row_count / table_partition_count / symbol_count / 4)
670+
// here:
671+
// 2 being the minimum number of row IDs per symbol
672+
// div by 4 - we are looking to have 4 chained block for an average symbol to make sure we
673+
// do not create overly-sparse index for symbols with row count below average.
674+
long v = Math.max(2, reader.size() / reader.getPartitionCount() / symbolCount / 4);
675+
if ((int) v != v) {
676+
// overflow, assign sensible default (large)
677+
indexValueBlockSize = 50_000_000;
678+
} else {
679+
indexValueBlockSize = (int) v;
680+
}
681+
}
682+
return indexValueBlockSize;
683+
}
684+
663685
private static boolean isIPv4UpdateCast(int from, int to) {
664686
return (from == ColumnType.STRING && to == ColumnType.IPv4)
665687
|| (from == ColumnType.IPv4 && to == ColumnType.STRING)
@@ -982,13 +1004,13 @@ private void alterTableChangeSymbolCapacity(
9821004
}
9831005
expectKeyword(lexer, "capacity");
9841006

985-
CharSequence tok = expectToken(lexer, "symbol capacity");
1007+
CharSequence tok = expectToken(lexer, "numeric capacity");
9861008

9871009
final boolean negative;
9881010
final int errorPos = lexer.lastTokenPosition();
9891011
if (Chars.equals(tok, '-')) {
9901012
negative = true;
991-
tok = expectToken(lexer, "symbol capacity");
1013+
tok = expectToken(lexer, "numeric capacity");
9921014
} else {
9931015
negative = false;
9941016
}
@@ -1602,7 +1624,7 @@ private void compileAlterMatView(SqlExecutionContext executionContext) throws Sq
16021624
throw SqlException.$(lexer.lastTokenPosition(), "materialized view does not exist");
16031625
}
16041626

1605-
try (TableRecordMetadata tableMetadata = executionContext.getMetadataForWrite(matViewToken)) {
1627+
try (TableRecordMetadata tableMetadata = engine.getTableMetadata(matViewToken)) {
16061628
tok = expectToken(lexer, "'alter' or 'resume' or 'suspend'");
16071629
if (isAlterKeyword(tok)) {
16081630
expectKeyword(lexer, "column");
@@ -1615,16 +1637,85 @@ private void compileAlterMatView(SqlExecutionContext executionContext) throws Sq
16151637
.put("' does not exist in materialized view '").put(matViewToken.getTableName()).put('\'');
16161638
}
16171639

1618-
expectKeyword(lexer, "symbol");
1619-
alterTableChangeSymbolCapacity(
1620-
securityContext,
1621-
matViewNamePosition,
1622-
matViewToken,
1623-
columnNamePosition,
1624-
columnName,
1625-
tableMetadata,
1626-
columnIndex
1627-
);
1640+
tok = expectToken(lexer, "'symbol capacity', 'add index' or 'drop index'");
1641+
if (SqlKeywords.isSymbolKeyword(tok)) {
1642+
alterTableChangeSymbolCapacity(
1643+
securityContext,
1644+
matViewNamePosition,
1645+
matViewToken,
1646+
columnNamePosition,
1647+
columnName,
1648+
tableMetadata,
1649+
columnIndex
1650+
);
1651+
} else if (SqlKeywords.isAddKeyword(tok)) {
1652+
expectKeyword(lexer, "index");
1653+
1654+
if (tableMetadata.isColumnIndexed(columnIndex)) {
1655+
throw SqlException.walRecoverable(columnNamePosition).put("column '").put(columnName)
1656+
.put("' already indexed");
1657+
}
1658+
int columnType = tableMetadata.getColumnType(columnIndex);
1659+
if (columnType != ColumnType.SYMBOL) {
1660+
throw SqlException.walRecoverable(columnNamePosition).put("column '").put(columnName)
1661+
.put("' is of type '").put(ColumnType.nameOf(columnType)).put("'. Index supports column type 'SYMBOL' only.");
1662+
}
1663+
1664+
final int indexValueBlockSize;
1665+
final boolean sizeInferred;
1666+
1667+
tok = SqlUtil.fetchNext(lexer);
1668+
if (tok == null) {
1669+
indexValueBlockSize = estimateIndexValueBlockSizeFromReader(executionContext, matViewToken, columnIndex);
1670+
sizeInferred = true;
1671+
} else {
1672+
if (!SqlKeywords.isCapacityKeyword(tok)) {
1673+
throw SqlException.$(lexer.lastTokenPosition(), "'capacity' keyword expected");
1674+
}
1675+
tok = expectToken(lexer, "index capacity value");
1676+
try {
1677+
indexValueBlockSize = Numbers.parseInt(tok);
1678+
sizeInferred = false;
1679+
} catch (NumericException e) {
1680+
throw SqlException.$(lexer.lastTokenPosition(), "index capacity value must be numeric");
1681+
}
1682+
}
1683+
1684+
LOG.info().$("adding mat view index [viewName=").$(matViewToken)
1685+
.$(", column=").$safe(columnName)
1686+
.$(", indexValueBlockSize=").$(indexValueBlockSize)
1687+
.$(", sizeInferred=").$(sizeInferred)
1688+
.I$();
1689+
1690+
alterTableColumnAddIndex(
1691+
securityContext,
1692+
matViewNamePosition,
1693+
matViewToken,
1694+
columnNamePosition,
1695+
columnName,
1696+
tableMetadata,
1697+
indexValueBlockSize
1698+
);
1699+
} else if (SqlKeywords.isDropKeyword(tok)) {
1700+
expectKeyword(lexer, "index");
1701+
1702+
if (!tableMetadata.isColumnIndexed(columnIndex)) {
1703+
throw SqlException.walRecoverable(columnNamePosition).put("column '").put(columnName)
1704+
.put("' is not indexed");
1705+
}
1706+
1707+
alterTableColumnDropIndex(
1708+
securityContext,
1709+
matViewNamePosition,
1710+
matViewToken,
1711+
columnNamePosition,
1712+
columnName,
1713+
tableMetadata
1714+
);
1715+
1716+
} else {
1717+
throw SqlException.$(lexer.lastTokenPosition(), "'symbol capacity', 'add index' or 'drop index' expected");
1718+
}
16281719
} else if (isSetKeyword(tok)) {
16291720
tok = expectToken(lexer, "'ttl' or 'refresh'");
16301721
if (isTtlKeyword(tok)) {
@@ -1732,7 +1823,7 @@ private void compileAlterMatView(SqlExecutionContext executionContext) throws Sq
17321823
} catch (NumericException e) {
17331824
throw SqlException.$(lexer.lastTokenPosition(), "invalid START timestamp value");
17341825
}
1735-
tok = expectToken(lexer, "'time zone'");
1826+
tok = maybeExpectToken(lexer, "'time zone'", false);
17361827

17371828
if (isTimeKeyword(tok)) {
17381829
expectKeyword(lexer, "zone");
@@ -3949,16 +4040,27 @@ private void validateAndOptimiseInsertAsSelect(SqlExecutionContext executionCont
39494040

39504041
protected static CharSequence expectToken(GenericLexer lexer, CharSequence expected) throws SqlException {
39514042
CharSequence tok = SqlUtil.fetchNext(lexer);
4043+
39524044
if (tok == null) {
39534045
throw SqlException.position(lexer.getPosition()).put(expected).put(" expected");
39544046
}
4047+
4048+
if (Chars.equals(tok, ';')) {
4049+
throw SqlException.position(lexer.lastTokenPosition()).put(expected).put(" expected");
4050+
}
39554051
return tok;
39564052
}
39574053

39584054
protected static CharSequence maybeExpectToken(GenericLexer lexer, CharSequence expected, boolean expect) throws SqlException {
39594055
CharSequence tok = SqlUtil.fetchNext(lexer);
3960-
if (expect && tok == null) {
3961-
throw SqlException.position(lexer.getPosition()).put(expected).put(" expected");
4056+
if (expect) {
4057+
if (tok == null) {
4058+
throw SqlException.position(lexer.getPosition()).put(expected).put(" expected");
4059+
}
4060+
4061+
if (Chars.equals(tok, ';')) {
4062+
throw SqlException.position(lexer.lastTokenPosition()).put(expected).put(" expected");
4063+
}
39624064
}
39634065
return tok;
39644066
}

0 commit comments

Comments
 (0)