@@ -174,7 +174,6 @@ class Parser(parser.Parser):
174174 "DATE_FORMAT" : _build_date_format ,
175175 "DATE_SUB" : build_date_delta (exp .DateSub , default_unit = None ),
176176 "DATESUB" : build_date_delta (exp .DateSub , default_unit = None ),
177- "EXTRACT" : exp .RegexpExtract .from_arg_list ,
178177 "FORMATDATETIME" : _build_date_format ,
179178 "JSONEXTRACTSTRING" : build_json_extract_path (
180179 exp .JSONExtractScalar , zero_based_indexing = False
@@ -347,7 +346,6 @@ class Parser(parser.Parser):
347346 "QUANTILE" : lambda self : self ._parse_quantile (),
348347 }
349348
350- FUNCTION_PARSERS .pop ("EXTRACT" )
351349 FUNCTION_PARSERS .pop ("MATCH" )
352350
353351 NO_PAREN_FUNCTION_PARSERS = parser .Parser .NO_PAREN_FUNCTION_PARSERS .copy ()
@@ -410,6 +408,23 @@ class Parser(parser.Parser):
410408 "INDEX" ,
411409 }
412410
411+ def _parse_extract (self ) -> exp .Extract | exp .Anonymous :
412+ index = self ._index
413+ this = self ._parse_bitwise ()
414+ if self ._match (TokenType .FROM ):
415+ self ._retreat (index )
416+ return super ()._parse_extract ()
417+
418+ # We return Anonymous here because extract and regexpExtract have different semantics,
419+ # so parsing extract(foo, bar) into RegexpExtract can potentially break queries. E.g.,
420+ # `extract('foobar', 'b')` works, but CH crashes for `regexpExtract('foobar', 'b')`.
421+ #
422+ # TODO: can we somehow convert the former into an equivalent `regexpExtract` call?
423+ self ._match (TokenType .COMMA )
424+ return self .expression (
425+ exp .Anonymous , this = "extract" , expressions = [this , self ._parse_bitwise ()]
426+ )
427+
413428 def _parse_assignment (self ) -> t .Optional [exp .Expression ]:
414429 this = super ()._parse_assignment ()
415430
0 commit comments