Skip to content

Commit 45f45ea

Browse files
authored
Feat(duckdb): add support for SUMMARIZE (#3840)
1 parent 4c912cd commit 45f45ea

File tree

6 files changed

+26
-7
lines changed

6 files changed

+26
-7
lines changed

sqlglot/dialects/duckdb.py

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -238,14 +238,15 @@ class Tokenizer(tokens.Tokenizer):
238238
"POSITIONAL": TokenType.POSITIONAL,
239239
"SIGNED": TokenType.INT,
240240
"STRING": TokenType.TEXT,
241-
"UBIGINT": TokenType.UBIGINT,
242-
"UINTEGER": TokenType.UINT,
243-
"USMALLINT": TokenType.USMALLINT,
244-
"UTINYINT": TokenType.UTINYINT,
241+
"SUMMARIZE": TokenType.SUMMARIZE,
245242
"TIMESTAMP_S": TokenType.TIMESTAMP_S,
246243
"TIMESTAMP_MS": TokenType.TIMESTAMP_MS,
247244
"TIMESTAMP_NS": TokenType.TIMESTAMP_NS,
248245
"TIMESTAMP_US": TokenType.TIMESTAMP,
246+
"UBIGINT": TokenType.UBIGINT,
247+
"UINTEGER": TokenType.UINT,
248+
"USMALLINT": TokenType.USMALLINT,
249+
"UTINYINT": TokenType.UTINYINT,
249250
"VARCHAR": TokenType.TEXT,
250251
}
251252
KEYWORDS.pop("/*+")

sqlglot/expressions.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1439,6 +1439,11 @@ class Describe(Expression):
14391439
arg_types = {"this": True, "style": False, "kind": False, "expressions": False}
14401440

14411441

1442+
# https://duckdb.org/docs/guides/meta/summarize.html
1443+
class Summarize(Expression):
1444+
arg_types = {"this": True, "table": False}
1445+
1446+
14421447
class Kill(Expression):
14431448
arg_types = {"this": True, "kind": False}
14441449

sqlglot/generator.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4067,3 +4067,7 @@ def pad_sql(self, expression: exp.Pad) -> str:
40674067
fill_pattern = "' '"
40684068

40694069
return self.func(f"{prefix}PAD", expression.this, expression.expression, fill_pattern)
4070+
4071+
def summarize_sql(self, expression: exp.Summarize) -> str:
4072+
table = " TABLE" if expression.args.get("table") else ""
4073+
return f"SUMMARIZE{table} {self.sql(expression.this)}"

sqlglot/parser.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2825,12 +2825,14 @@ def _parse_select(
28252825
this = self._parse_derived_table_values()
28262826
elif from_:
28272827
this = exp.select("*").from_(from_.this, copy=False)
2828+
elif self._match(TokenType.SUMMARIZE):
2829+
table = self._match(TokenType.TABLE)
2830+
this = self._parse_select() or self._parse_string() or self._parse_table()
2831+
return self.expression(exp.Summarize, this=this, table=table)
28282832
else:
28292833
this = None
28302834

2831-
if parse_set_operation:
2832-
return self._parse_set_operations(this)
2833-
return this
2835+
return self._parse_set_operations(this) if parse_set_operation else this
28342836

28352837
def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]:
28362838
if not skip_with_token and not self._match(TokenType.WITH):

sqlglot/tokens.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -364,6 +364,7 @@ class TokenType(AutoName):
364364
STORAGE_INTEGRATION = auto()
365365
STRAIGHT_JOIN = auto()
366366
STRUCT = auto()
367+
SUMMARIZE = auto()
367368
TABLE_SAMPLE = auto()
368369
TAG = auto()
369370
TEMPORARY = auto()

tests/dialects/test_duckdb.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -293,6 +293,12 @@ def test_duckdb(self):
293293
self.validate_identity("x -> '$.family'")
294294
self.validate_identity("CREATE TABLE color (name ENUM('RED', 'GREEN', 'BLUE'))")
295295
self.validate_identity("SELECT * FROM foo WHERE bar > $baz AND bla = $bob")
296+
self.validate_identity("SUMMARIZE tbl").assert_is(exp.Summarize)
297+
self.validate_identity("SUMMARIZE SELECT * FROM tbl").assert_is(exp.Summarize)
298+
self.validate_identity("CREATE TABLE tbl_summary AS SELECT * FROM (SUMMARIZE tbl)")
299+
self.validate_identity(
300+
"SUMMARIZE TABLE 'https://blobs.duckdb.org/data/Star_Trek-Season_1.csv'"
301+
).assert_is(exp.Summarize)
296302
self.validate_identity(
297303
"SELECT * FROM x LEFT JOIN UNNEST(y)", "SELECT * FROM x LEFT JOIN UNNEST(y) ON TRUE"
298304
)

0 commit comments

Comments
 (0)