@@ -660,6 +660,28 @@ private static long copyOrderedBatchedVarcharTimestamp(
660660 return rowCount ;
661661 }
662662
663+ private static int estimateIndexValueBlockSizeFromReader (SqlExecutionContext executionContext , TableToken matViewToken , int columnIndex ) {
664+ final int indexValueBlockSize ;
665+ try (TableReader reader = executionContext .getReader (matViewToken )) {
666+ int symbolCount = reader .getSymbolMapReader (columnIndex ).getSymbolCount ();
667+ // we are looking to estimate how many rowids we will need to store for each
668+ // symbol per partition. To do that wee are assuming the following formula:
669+ // max(2, table_row_count / table_partition_count / symbol_count / 4)
670+ // here:
671+ // 2 being the minimum number of row IDs per symbol
672+ // div by 4 - we are looking to have 4 chained block for an average symbol to make sure we
673+ // do not create overly-sparse index for symbols with row count below average.
674+ long v = Math .max (2 , reader .size () / reader .getPartitionCount () / symbolCount / 4 );
675+ if ((int ) v != v ) {
676+ // overflow, assign sensible default (large)
677+ indexValueBlockSize = 50_000_000 ;
678+ } else {
679+ indexValueBlockSize = (int ) v ;
680+ }
681+ }
682+ return indexValueBlockSize ;
683+ }
684+
663685 private static boolean isIPv4UpdateCast (int from , int to ) {
664686 return (from == ColumnType .STRING && to == ColumnType .IPv4 )
665687 || (from == ColumnType .IPv4 && to == ColumnType .STRING )
@@ -982,13 +1004,13 @@ private void alterTableChangeSymbolCapacity(
9821004 }
9831005 expectKeyword (lexer , "capacity" );
9841006
985- CharSequence tok = expectToken (lexer , "symbol capacity" );
1007+ CharSequence tok = expectToken (lexer , "numeric capacity" );
9861008
9871009 final boolean negative ;
9881010 final int errorPos = lexer .lastTokenPosition ();
9891011 if (Chars .equals (tok , '-' )) {
9901012 negative = true ;
991- tok = expectToken (lexer , "symbol capacity" );
1013+ tok = expectToken (lexer , "numeric capacity" );
9921014 } else {
9931015 negative = false ;
9941016 }
@@ -1602,7 +1624,7 @@ private void compileAlterMatView(SqlExecutionContext executionContext) throws Sq
16021624 throw SqlException .$ (lexer .lastTokenPosition (), "materialized view does not exist" );
16031625 }
16041626
1605- try (TableRecordMetadata tableMetadata = executionContext . getMetadataForWrite (matViewToken )) {
1627+ try (TableRecordMetadata tableMetadata = engine . getTableMetadata (matViewToken )) {
16061628 tok = expectToken (lexer , "'alter' or 'resume' or 'suspend'" );
16071629 if (isAlterKeyword (tok )) {
16081630 expectKeyword (lexer , "column" );
@@ -1615,16 +1637,85 @@ private void compileAlterMatView(SqlExecutionContext executionContext) throws Sq
16151637 .put ("' does not exist in materialized view '" ).put (matViewToken .getTableName ()).put ('\'' );
16161638 }
16171639
1618- expectKeyword (lexer , "symbol" );
1619- alterTableChangeSymbolCapacity (
1620- securityContext ,
1621- matViewNamePosition ,
1622- matViewToken ,
1623- columnNamePosition ,
1624- columnName ,
1625- tableMetadata ,
1626- columnIndex
1627- );
1640+ tok = expectToken (lexer , "'symbol capacity', 'add index' or 'drop index'" );
1641+ if (SqlKeywords .isSymbolKeyword (tok )) {
1642+ alterTableChangeSymbolCapacity (
1643+ securityContext ,
1644+ matViewNamePosition ,
1645+ matViewToken ,
1646+ columnNamePosition ,
1647+ columnName ,
1648+ tableMetadata ,
1649+ columnIndex
1650+ );
1651+ } else if (SqlKeywords .isAddKeyword (tok )) {
1652+ expectKeyword (lexer , "index" );
1653+
1654+ if (tableMetadata .isColumnIndexed (columnIndex )) {
1655+ throw SqlException .walRecoverable (columnNamePosition ).put ("column '" ).put (columnName )
1656+ .put ("' already indexed" );
1657+ }
1658+ int columnType = tableMetadata .getColumnType (columnIndex );
1659+ if (columnType != ColumnType .SYMBOL ) {
1660+ throw SqlException .walRecoverable (columnNamePosition ).put ("column '" ).put (columnName )
1661+ .put ("' is of type '" ).put (ColumnType .nameOf (columnType )).put ("'. Index supports column type 'SYMBOL' only." );
1662+ }
1663+
1664+ final int indexValueBlockSize ;
1665+ final boolean sizeInferred ;
1666+
1667+ tok = SqlUtil .fetchNext (lexer );
1668+ if (tok == null ) {
1669+ indexValueBlockSize = estimateIndexValueBlockSizeFromReader (executionContext , matViewToken , columnIndex );
1670+ sizeInferred = true ;
1671+ } else {
1672+ if (!SqlKeywords .isCapacityKeyword (tok )) {
1673+ throw SqlException .$ (lexer .lastTokenPosition (), "'capacity' keyword expected" );
1674+ }
1675+ tok = expectToken (lexer , "index capacity value" );
1676+ try {
1677+ indexValueBlockSize = Numbers .parseInt (tok );
1678+ sizeInferred = false ;
1679+ } catch (NumericException e ) {
1680+ throw SqlException .$ (lexer .lastTokenPosition (), "index capacity value must be numeric" );
1681+ }
1682+ }
1683+
1684+ LOG .info ().$ ("adding mat view index [viewName=" ).$ (matViewToken )
1685+ .$ (", column=" ).$safe (columnName )
1686+ .$ (", indexValueBlockSize=" ).$ (indexValueBlockSize )
1687+ .$ (", sizeInferred=" ).$ (sizeInferred )
1688+ .I$ ();
1689+
1690+ alterTableColumnAddIndex (
1691+ securityContext ,
1692+ matViewNamePosition ,
1693+ matViewToken ,
1694+ columnNamePosition ,
1695+ columnName ,
1696+ tableMetadata ,
1697+ indexValueBlockSize
1698+ );
1699+ } else if (SqlKeywords .isDropKeyword (tok )) {
1700+ expectKeyword (lexer , "index" );
1701+
1702+ if (!tableMetadata .isColumnIndexed (columnIndex )) {
1703+ throw SqlException .walRecoverable (columnNamePosition ).put ("column '" ).put (columnName )
1704+ .put ("' is not indexed" );
1705+ }
1706+
1707+ alterTableColumnDropIndex (
1708+ securityContext ,
1709+ matViewNamePosition ,
1710+ matViewToken ,
1711+ columnNamePosition ,
1712+ columnName ,
1713+ tableMetadata
1714+ );
1715+
1716+ } else {
1717+ throw SqlException .$ (lexer .lastTokenPosition (), "'symbol capacity', 'add index' or 'drop index' expected" );
1718+ }
16281719 } else if (isSetKeyword (tok )) {
16291720 tok = expectToken (lexer , "'ttl' or 'refresh'" );
16301721 if (isTtlKeyword (tok )) {
@@ -1732,7 +1823,7 @@ private void compileAlterMatView(SqlExecutionContext executionContext) throws Sq
17321823 } catch (NumericException e ) {
17331824 throw SqlException .$ (lexer .lastTokenPosition (), "invalid START timestamp value" );
17341825 }
1735- tok = expectToken (lexer , "'time zone'" );
1826+ tok = maybeExpectToken (lexer , "'time zone'" , false );
17361827
17371828 if (isTimeKeyword (tok )) {
17381829 expectKeyword (lexer , "zone" );
@@ -3949,16 +4040,27 @@ private void validateAndOptimiseInsertAsSelect(SqlExecutionContext executionCont
39494040
39504041 protected static CharSequence expectToken (GenericLexer lexer , CharSequence expected ) throws SqlException {
39514042 CharSequence tok = SqlUtil .fetchNext (lexer );
4043+
39524044 if (tok == null ) {
39534045 throw SqlException .position (lexer .getPosition ()).put (expected ).put (" expected" );
39544046 }
4047+
4048+ if (Chars .equals (tok , ';' )) {
4049+ throw SqlException .position (lexer .lastTokenPosition ()).put (expected ).put (" expected" );
4050+ }
39554051 return tok ;
39564052 }
39574053
39584054 protected static CharSequence maybeExpectToken (GenericLexer lexer , CharSequence expected , boolean expect ) throws SqlException {
39594055 CharSequence tok = SqlUtil .fetchNext (lexer );
3960- if (expect && tok == null ) {
3961- throw SqlException .position (lexer .getPosition ()).put (expected ).put (" expected" );
4056+ if (expect ) {
4057+ if (tok == null ) {
4058+ throw SqlException .position (lexer .getPosition ()).put (expected ).put (" expected" );
4059+ }
4060+
4061+ if (Chars .equals (tok , ';' )) {
4062+ throw SqlException .position (lexer .lastTokenPosition ()).put (expected ).put (" expected" );
4063+ }
39624064 }
39634065 return tok ;
39644066 }
0 commit comments