sqlglot.parser
1from __future__ import annotations 2 3import logging 4import re 5import typing as t 6import itertools 7from collections import defaultdict 8 9from sqlglot import exp 10from sqlglot.errors import ErrorLevel, ParseError, concat_messages, merge_errors 11from sqlglot.helper import apply_index_offset, ensure_list, seq_get 12from sqlglot.time import format_time 13from sqlglot.tokens import Token, Tokenizer, TokenType 14from sqlglot.trie import TrieResult, in_trie, new_trie 15 16if t.TYPE_CHECKING: 17 from sqlglot._typing import E, Lit 18 from sqlglot.dialects.dialect import Dialect, DialectType 19 20 T = t.TypeVar("T") 21 TCeilFloor = t.TypeVar("TCeilFloor", exp.Ceil, exp.Floor) 22 23logger = logging.getLogger("sqlglot") 24 25OPTIONS_TYPE = t.Dict[str, t.Sequence[t.Union[t.Sequence[str], str]]] 26 27# Used to detect alphabetical characters and +/- in timestamp literals 28TIME_ZONE_RE: t.Pattern[str] = re.compile(r":.*?[a-zA-Z\+\-]") 29 30 31def build_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 32 if len(args) == 1 and args[0].is_star: 33 return exp.StarMap(this=args[0]) 34 35 keys = [] 36 values = [] 37 for i in range(0, len(args), 2): 38 keys.append(args[i]) 39 values.append(args[i + 1]) 40 41 return exp.VarMap(keys=exp.array(*keys, copy=False), values=exp.array(*values, copy=False)) 42 43 44def build_like(args: t.List) -> exp.Escape | exp.Like: 45 like = exp.Like(this=seq_get(args, 1), expression=seq_get(args, 0)) 46 return exp.Escape(this=like, expression=seq_get(args, 2)) if len(args) > 2 else like 47 48 49def binary_range_parser( 50 expr_type: t.Type[exp.Expression], reverse_args: bool = False 51) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]: 52 def _parse_binary_range( 53 self: Parser, this: t.Optional[exp.Expression] 54 ) -> t.Optional[exp.Expression]: 55 expression = self._parse_bitwise() 56 if reverse_args: 57 this, expression = expression, this 58 return self._parse_escape(self.expression(expr_type, this=this, expression=expression)) 59 60 return _parse_binary_range 61 62 63def build_logarithm(args: t.List, dialect: Dialect) -> exp.Func: 64 # Default argument order is base, expression 65 this = seq_get(args, 0) 66 expression = seq_get(args, 1) 67 68 if expression: 69 if not dialect.LOG_BASE_FIRST: 70 this, expression = expression, this 71 return exp.Log(this=this, expression=expression) 72 73 return (exp.Ln if dialect.parser_class.LOG_DEFAULTS_TO_LN else exp.Log)(this=this) 74 75 76def build_hex(args: t.List, dialect: Dialect) -> exp.Hex | exp.LowerHex: 77 arg = seq_get(args, 0) 78 return exp.LowerHex(this=arg) if dialect.HEX_LOWERCASE else exp.Hex(this=arg) 79 80 81def build_lower(args: t.List) -> exp.Lower | exp.Hex: 82 # LOWER(HEX(..)) can be simplified to LowerHex to simplify its transpilation 83 arg = seq_get(args, 0) 84 return exp.LowerHex(this=arg.this) if isinstance(arg, exp.Hex) else exp.Lower(this=arg) 85 86 87def build_upper(args: t.List) -> exp.Upper | exp.Hex: 88 # UPPER(HEX(..)) can be simplified to Hex to simplify its transpilation 89 arg = seq_get(args, 0) 90 return exp.Hex(this=arg.this) if isinstance(arg, exp.Hex) else exp.Upper(this=arg) 91 92 93def build_extract_json_with_path(expr_type: t.Type[E]) -> t.Callable[[t.List, Dialect], E]: 94 def _builder(args: t.List, dialect: Dialect) -> E: 95 expression = expr_type( 96 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 97 ) 98 if len(args) > 2 and expr_type is exp.JSONExtract: 99 expression.set("expressions", args[2:]) 100 101 return expression 102 103 return _builder 104 105 106def build_mod(args: t.List) -> exp.Mod: 107 this = seq_get(args, 0) 108 expression = seq_get(args, 1) 109 110 # Wrap the operands if they are binary nodes, e.g. MOD(a + 1, 7) -> (a + 1) % 7 111 this = exp.Paren(this=this) if isinstance(this, exp.Binary) else this 112 expression = exp.Paren(this=expression) if isinstance(expression, exp.Binary) else expression 113 114 return exp.Mod(this=this, expression=expression) 115 116 117def build_pad(args: t.List, is_left: bool = True): 118 return exp.Pad( 119 this=seq_get(args, 0), 120 expression=seq_get(args, 1), 121 fill_pattern=seq_get(args, 2), 122 is_left=is_left, 123 ) 124 125 126def build_array_constructor( 127 exp_class: t.Type[E], args: t.List, bracket_kind: TokenType, dialect: Dialect 128) -> exp.Expression: 129 array_exp = exp_class(expressions=args) 130 131 if exp_class == exp.Array and dialect.HAS_DISTINCT_ARRAY_CONSTRUCTORS: 132 array_exp.set("bracket_notation", bracket_kind == TokenType.L_BRACKET) 133 134 return array_exp 135 136 137def build_convert_timezone( 138 args: t.List, default_source_tz: t.Optional[str] = None 139) -> t.Union[exp.ConvertTimezone, exp.Anonymous]: 140 if len(args) == 2: 141 source_tz = exp.Literal.string(default_source_tz) if default_source_tz else None 142 return exp.ConvertTimezone( 143 source_tz=source_tz, target_tz=seq_get(args, 0), timestamp=seq_get(args, 1) 144 ) 145 146 return exp.ConvertTimezone.from_arg_list(args) 147 148 149def build_trim(args: t.List, is_left: bool = True): 150 return exp.Trim( 151 this=seq_get(args, 0), 152 expression=seq_get(args, 1), 153 position="LEADING" if is_left else "TRAILING", 154 ) 155 156 157def build_coalesce( 158 args: t.List, is_nvl: t.Optional[bool] = None, is_null: t.Optional[bool] = None 159) -> exp.Coalesce: 160 return exp.Coalesce(this=seq_get(args, 0), expressions=args[1:], is_nvl=is_nvl, is_null=is_null) 161 162 163def build_locate_strposition(args: t.List): 164 return exp.StrPosition( 165 this=seq_get(args, 1), 166 substr=seq_get(args, 0), 167 position=seq_get(args, 2), 168 ) 169 170 171class _Parser(type): 172 def __new__(cls, clsname, bases, attrs): 173 klass = super().__new__(cls, clsname, bases, attrs) 174 175 klass.SHOW_TRIE = new_trie(key.split(" ") for key in klass.SHOW_PARSERS) 176 klass.SET_TRIE = new_trie(key.split(" ") for key in klass.SET_PARSERS) 177 178 return klass 179 180 181class Parser(metaclass=_Parser): 182 """ 183 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 184 185 Args: 186 error_level: The desired error level. 187 Default: ErrorLevel.IMMEDIATE 188 error_message_context: The amount of context to capture from a query string when displaying 189 the error message (in number of characters). 190 Default: 100 191 max_errors: Maximum number of error messages to include in a raised ParseError. 192 This is only relevant if error_level is ErrorLevel.RAISE. 193 Default: 3 194 """ 195 196 FUNCTIONS: t.Dict[str, t.Callable] = { 197 **{name: func.from_arg_list for name, func in exp.FUNCTION_BY_NAME.items()}, 198 **dict.fromkeys(("COALESCE", "IFNULL", "NVL"), build_coalesce), 199 "ARRAY": lambda args, dialect: exp.Array(expressions=args), 200 "ARRAYAGG": lambda args, dialect: exp.ArrayAgg( 201 this=seq_get(args, 0), nulls_excluded=dialect.ARRAY_AGG_INCLUDES_NULLS is None or None 202 ), 203 "ARRAY_AGG": lambda args, dialect: exp.ArrayAgg( 204 this=seq_get(args, 0), nulls_excluded=dialect.ARRAY_AGG_INCLUDES_NULLS is None or None 205 ), 206 "CHAR": lambda args: exp.Chr(expressions=args), 207 "CHR": lambda args: exp.Chr(expressions=args), 208 "COUNT": lambda args: exp.Count(this=seq_get(args, 0), expressions=args[1:], big_int=True), 209 "CONCAT": lambda args, dialect: exp.Concat( 210 expressions=args, 211 safe=not dialect.STRICT_STRING_CONCAT, 212 coalesce=dialect.CONCAT_COALESCE, 213 ), 214 "CONCAT_WS": lambda args, dialect: exp.ConcatWs( 215 expressions=args, 216 safe=not dialect.STRICT_STRING_CONCAT, 217 coalesce=dialect.CONCAT_COALESCE, 218 ), 219 "CONVERT_TIMEZONE": build_convert_timezone, 220 "DATE_TO_DATE_STR": lambda args: exp.Cast( 221 this=seq_get(args, 0), 222 to=exp.DataType(this=exp.DataType.Type.TEXT), 223 ), 224 "GENERATE_DATE_ARRAY": lambda args: exp.GenerateDateArray( 225 start=seq_get(args, 0), 226 end=seq_get(args, 1), 227 step=seq_get(args, 2) or exp.Interval(this=exp.Literal.string(1), unit=exp.var("DAY")), 228 ), 229 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 230 "HEX": build_hex, 231 "JSON_EXTRACT": build_extract_json_with_path(exp.JSONExtract), 232 "JSON_EXTRACT_SCALAR": build_extract_json_with_path(exp.JSONExtractScalar), 233 "JSON_EXTRACT_PATH_TEXT": build_extract_json_with_path(exp.JSONExtractScalar), 234 "LIKE": build_like, 235 "LOG": build_logarithm, 236 "LOG2": lambda args: exp.Log(this=exp.Literal.number(2), expression=seq_get(args, 0)), 237 "LOG10": lambda args: exp.Log(this=exp.Literal.number(10), expression=seq_get(args, 0)), 238 "LOWER": build_lower, 239 "LPAD": lambda args: build_pad(args), 240 "LEFTPAD": lambda args: build_pad(args), 241 "LTRIM": lambda args: build_trim(args), 242 "MOD": build_mod, 243 "RIGHTPAD": lambda args: build_pad(args, is_left=False), 244 "RPAD": lambda args: build_pad(args, is_left=False), 245 "RTRIM": lambda args: build_trim(args, is_left=False), 246 "SCOPE_RESOLUTION": lambda args: exp.ScopeResolution(expression=seq_get(args, 0)) 247 if len(args) != 2 248 else exp.ScopeResolution(this=seq_get(args, 0), expression=seq_get(args, 1)), 249 "STRPOS": exp.StrPosition.from_arg_list, 250 "CHARINDEX": lambda args: build_locate_strposition(args), 251 "INSTR": exp.StrPosition.from_arg_list, 252 "LOCATE": lambda args: build_locate_strposition(args), 253 "TIME_TO_TIME_STR": lambda args: exp.Cast( 254 this=seq_get(args, 0), 255 to=exp.DataType(this=exp.DataType.Type.TEXT), 256 ), 257 "TO_HEX": build_hex, 258 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 259 this=exp.Cast( 260 this=seq_get(args, 0), 261 to=exp.DataType(this=exp.DataType.Type.TEXT), 262 ), 263 start=exp.Literal.number(1), 264 length=exp.Literal.number(10), 265 ), 266 "UNNEST": lambda args: exp.Unnest(expressions=ensure_list(seq_get(args, 0))), 267 "UPPER": build_upper, 268 "VAR_MAP": build_var_map, 269 } 270 271 NO_PAREN_FUNCTIONS = { 272 TokenType.CURRENT_DATE: exp.CurrentDate, 273 TokenType.CURRENT_DATETIME: exp.CurrentDate, 274 TokenType.CURRENT_TIME: exp.CurrentTime, 275 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 276 TokenType.CURRENT_USER: exp.CurrentUser, 277 } 278 279 STRUCT_TYPE_TOKENS = { 280 TokenType.NESTED, 281 TokenType.OBJECT, 282 TokenType.STRUCT, 283 TokenType.UNION, 284 } 285 286 NESTED_TYPE_TOKENS = { 287 TokenType.ARRAY, 288 TokenType.LIST, 289 TokenType.LOWCARDINALITY, 290 TokenType.MAP, 291 TokenType.NULLABLE, 292 TokenType.RANGE, 293 *STRUCT_TYPE_TOKENS, 294 } 295 296 ENUM_TYPE_TOKENS = { 297 TokenType.DYNAMIC, 298 TokenType.ENUM, 299 TokenType.ENUM8, 300 TokenType.ENUM16, 301 } 302 303 AGGREGATE_TYPE_TOKENS = { 304 TokenType.AGGREGATEFUNCTION, 305 TokenType.SIMPLEAGGREGATEFUNCTION, 306 } 307 308 TYPE_TOKENS = { 309 TokenType.BIT, 310 TokenType.BOOLEAN, 311 TokenType.TINYINT, 312 TokenType.UTINYINT, 313 TokenType.SMALLINT, 314 TokenType.USMALLINT, 315 TokenType.INT, 316 TokenType.UINT, 317 TokenType.BIGINT, 318 TokenType.UBIGINT, 319 TokenType.INT128, 320 TokenType.UINT128, 321 TokenType.INT256, 322 TokenType.UINT256, 323 TokenType.MEDIUMINT, 324 TokenType.UMEDIUMINT, 325 TokenType.FIXEDSTRING, 326 TokenType.FLOAT, 327 TokenType.DOUBLE, 328 TokenType.UDOUBLE, 329 TokenType.CHAR, 330 TokenType.NCHAR, 331 TokenType.VARCHAR, 332 TokenType.NVARCHAR, 333 TokenType.BPCHAR, 334 TokenType.TEXT, 335 TokenType.MEDIUMTEXT, 336 TokenType.LONGTEXT, 337 TokenType.BLOB, 338 TokenType.MEDIUMBLOB, 339 TokenType.LONGBLOB, 340 TokenType.BINARY, 341 TokenType.VARBINARY, 342 TokenType.JSON, 343 TokenType.JSONB, 344 TokenType.INTERVAL, 345 TokenType.TINYBLOB, 346 TokenType.TINYTEXT, 347 TokenType.TIME, 348 TokenType.TIMETZ, 349 TokenType.TIMESTAMP, 350 TokenType.TIMESTAMP_S, 351 TokenType.TIMESTAMP_MS, 352 TokenType.TIMESTAMP_NS, 353 TokenType.TIMESTAMPTZ, 354 TokenType.TIMESTAMPLTZ, 355 TokenType.TIMESTAMPNTZ, 356 TokenType.DATETIME, 357 TokenType.DATETIME2, 358 TokenType.DATETIME64, 359 TokenType.SMALLDATETIME, 360 TokenType.DATE, 361 TokenType.DATE32, 362 TokenType.INT4RANGE, 363 TokenType.INT4MULTIRANGE, 364 TokenType.INT8RANGE, 365 TokenType.INT8MULTIRANGE, 366 TokenType.NUMRANGE, 367 TokenType.NUMMULTIRANGE, 368 TokenType.TSRANGE, 369 TokenType.TSMULTIRANGE, 370 TokenType.TSTZRANGE, 371 TokenType.TSTZMULTIRANGE, 372 TokenType.DATERANGE, 373 TokenType.DATEMULTIRANGE, 374 TokenType.DECIMAL, 375 TokenType.DECIMAL32, 376 TokenType.DECIMAL64, 377 TokenType.DECIMAL128, 378 TokenType.DECIMAL256, 379 TokenType.UDECIMAL, 380 TokenType.BIGDECIMAL, 381 TokenType.UUID, 382 TokenType.GEOGRAPHY, 383 TokenType.GEOMETRY, 384 TokenType.POINT, 385 TokenType.RING, 386 TokenType.LINESTRING, 387 TokenType.MULTILINESTRING, 388 TokenType.POLYGON, 389 TokenType.MULTIPOLYGON, 390 TokenType.HLLSKETCH, 391 TokenType.HSTORE, 392 TokenType.PSEUDO_TYPE, 393 TokenType.SUPER, 394 TokenType.SERIAL, 395 TokenType.SMALLSERIAL, 396 TokenType.BIGSERIAL, 397 TokenType.XML, 398 TokenType.YEAR, 399 TokenType.USERDEFINED, 400 TokenType.MONEY, 401 TokenType.SMALLMONEY, 402 TokenType.ROWVERSION, 403 TokenType.IMAGE, 404 TokenType.VARIANT, 405 TokenType.VECTOR, 406 TokenType.VOID, 407 TokenType.OBJECT, 408 TokenType.OBJECT_IDENTIFIER, 409 TokenType.INET, 410 TokenType.IPADDRESS, 411 TokenType.IPPREFIX, 412 TokenType.IPV4, 413 TokenType.IPV6, 414 TokenType.UNKNOWN, 415 TokenType.NOTHING, 416 TokenType.NULL, 417 TokenType.NAME, 418 TokenType.TDIGEST, 419 TokenType.DYNAMIC, 420 *ENUM_TYPE_TOKENS, 421 *NESTED_TYPE_TOKENS, 422 *AGGREGATE_TYPE_TOKENS, 423 } 424 425 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 426 TokenType.BIGINT: TokenType.UBIGINT, 427 TokenType.INT: TokenType.UINT, 428 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 429 TokenType.SMALLINT: TokenType.USMALLINT, 430 TokenType.TINYINT: TokenType.UTINYINT, 431 TokenType.DECIMAL: TokenType.UDECIMAL, 432 TokenType.DOUBLE: TokenType.UDOUBLE, 433 } 434 435 SUBQUERY_PREDICATES = { 436 TokenType.ANY: exp.Any, 437 TokenType.ALL: exp.All, 438 TokenType.EXISTS: exp.Exists, 439 TokenType.SOME: exp.Any, 440 } 441 442 RESERVED_TOKENS = { 443 *Tokenizer.SINGLE_TOKENS.values(), 444 TokenType.SELECT, 445 } - {TokenType.IDENTIFIER} 446 447 DB_CREATABLES = { 448 TokenType.DATABASE, 449 TokenType.DICTIONARY, 450 TokenType.FILE_FORMAT, 451 TokenType.MODEL, 452 TokenType.NAMESPACE, 453 TokenType.SCHEMA, 454 TokenType.SEMANTIC_VIEW, 455 TokenType.SEQUENCE, 456 TokenType.SINK, 457 TokenType.SOURCE, 458 TokenType.STAGE, 459 TokenType.STORAGE_INTEGRATION, 460 TokenType.STREAMLIT, 461 TokenType.TABLE, 462 TokenType.TAG, 463 TokenType.VIEW, 464 TokenType.WAREHOUSE, 465 } 466 467 CREATABLES = { 468 TokenType.COLUMN, 469 TokenType.CONSTRAINT, 470 TokenType.FOREIGN_KEY, 471 TokenType.FUNCTION, 472 TokenType.INDEX, 473 TokenType.PROCEDURE, 474 *DB_CREATABLES, 475 } 476 477 ALTERABLES = { 478 TokenType.INDEX, 479 TokenType.TABLE, 480 TokenType.VIEW, 481 } 482 483 # Tokens that can represent identifiers 484 ID_VAR_TOKENS = { 485 TokenType.ALL, 486 TokenType.ATTACH, 487 TokenType.VAR, 488 TokenType.ANTI, 489 TokenType.APPLY, 490 TokenType.ASC, 491 TokenType.ASOF, 492 TokenType.AUTO_INCREMENT, 493 TokenType.BEGIN, 494 TokenType.BPCHAR, 495 TokenType.CACHE, 496 TokenType.CASE, 497 TokenType.COLLATE, 498 TokenType.COMMAND, 499 TokenType.COMMENT, 500 TokenType.COMMIT, 501 TokenType.CONSTRAINT, 502 TokenType.COPY, 503 TokenType.CUBE, 504 TokenType.CURRENT_SCHEMA, 505 TokenType.DEFAULT, 506 TokenType.DELETE, 507 TokenType.DESC, 508 TokenType.DESCRIBE, 509 TokenType.DETACH, 510 TokenType.DICTIONARY, 511 TokenType.DIV, 512 TokenType.END, 513 TokenType.EXECUTE, 514 TokenType.EXPORT, 515 TokenType.ESCAPE, 516 TokenType.FALSE, 517 TokenType.FIRST, 518 TokenType.FILTER, 519 TokenType.FINAL, 520 TokenType.FORMAT, 521 TokenType.FULL, 522 TokenType.GET, 523 TokenType.IDENTIFIER, 524 TokenType.IS, 525 TokenType.ISNULL, 526 TokenType.INTERVAL, 527 TokenType.KEEP, 528 TokenType.KILL, 529 TokenType.LEFT, 530 TokenType.LIMIT, 531 TokenType.LOAD, 532 TokenType.MERGE, 533 TokenType.NATURAL, 534 TokenType.NEXT, 535 TokenType.OFFSET, 536 TokenType.OPERATOR, 537 TokenType.ORDINALITY, 538 TokenType.OVERLAPS, 539 TokenType.OVERWRITE, 540 TokenType.PARTITION, 541 TokenType.PERCENT, 542 TokenType.PIVOT, 543 TokenType.PRAGMA, 544 TokenType.PUT, 545 TokenType.RANGE, 546 TokenType.RECURSIVE, 547 TokenType.REFERENCES, 548 TokenType.REFRESH, 549 TokenType.RENAME, 550 TokenType.REPLACE, 551 TokenType.RIGHT, 552 TokenType.ROLLUP, 553 TokenType.ROW, 554 TokenType.ROWS, 555 TokenType.SEMI, 556 TokenType.SET, 557 TokenType.SETTINGS, 558 TokenType.SHOW, 559 TokenType.TEMPORARY, 560 TokenType.TOP, 561 TokenType.TRUE, 562 TokenType.TRUNCATE, 563 TokenType.UNIQUE, 564 TokenType.UNNEST, 565 TokenType.UNPIVOT, 566 TokenType.UPDATE, 567 TokenType.USE, 568 TokenType.VOLATILE, 569 TokenType.WINDOW, 570 *CREATABLES, 571 *SUBQUERY_PREDICATES, 572 *TYPE_TOKENS, 573 *NO_PAREN_FUNCTIONS, 574 } 575 ID_VAR_TOKENS.remove(TokenType.UNION) 576 577 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 578 TokenType.ANTI, 579 TokenType.APPLY, 580 TokenType.ASOF, 581 TokenType.FULL, 582 TokenType.LEFT, 583 TokenType.LOCK, 584 TokenType.NATURAL, 585 TokenType.RIGHT, 586 TokenType.SEMI, 587 TokenType.WINDOW, 588 } 589 590 ALIAS_TOKENS = ID_VAR_TOKENS 591 592 COLON_PLACEHOLDER_TOKENS = ID_VAR_TOKENS 593 594 ARRAY_CONSTRUCTORS = { 595 "ARRAY": exp.Array, 596 "LIST": exp.List, 597 } 598 599 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 600 601 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 602 603 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 604 605 FUNC_TOKENS = { 606 TokenType.COLLATE, 607 TokenType.COMMAND, 608 TokenType.CURRENT_DATE, 609 TokenType.CURRENT_DATETIME, 610 TokenType.CURRENT_SCHEMA, 611 TokenType.CURRENT_TIMESTAMP, 612 TokenType.CURRENT_TIME, 613 TokenType.CURRENT_USER, 614 TokenType.FILTER, 615 TokenType.FIRST, 616 TokenType.FORMAT, 617 TokenType.GET, 618 TokenType.GLOB, 619 TokenType.IDENTIFIER, 620 TokenType.INDEX, 621 TokenType.ISNULL, 622 TokenType.ILIKE, 623 TokenType.INSERT, 624 TokenType.LIKE, 625 TokenType.MERGE, 626 TokenType.NEXT, 627 TokenType.OFFSET, 628 TokenType.PRIMARY_KEY, 629 TokenType.RANGE, 630 TokenType.REPLACE, 631 TokenType.RLIKE, 632 TokenType.ROW, 633 TokenType.UNNEST, 634 TokenType.VAR, 635 TokenType.LEFT, 636 TokenType.RIGHT, 637 TokenType.SEQUENCE, 638 TokenType.DATE, 639 TokenType.DATETIME, 640 TokenType.TABLE, 641 TokenType.TIMESTAMP, 642 TokenType.TIMESTAMPTZ, 643 TokenType.TRUNCATE, 644 TokenType.WINDOW, 645 TokenType.XOR, 646 *TYPE_TOKENS, 647 *SUBQUERY_PREDICATES, 648 } 649 650 CONJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 651 TokenType.AND: exp.And, 652 } 653 654 ASSIGNMENT: t.Dict[TokenType, t.Type[exp.Expression]] = { 655 TokenType.COLON_EQ: exp.PropertyEQ, 656 } 657 658 DISJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 659 TokenType.OR: exp.Or, 660 } 661 662 EQUALITY = { 663 TokenType.EQ: exp.EQ, 664 TokenType.NEQ: exp.NEQ, 665 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 666 } 667 668 COMPARISON = { 669 TokenType.GT: exp.GT, 670 TokenType.GTE: exp.GTE, 671 TokenType.LT: exp.LT, 672 TokenType.LTE: exp.LTE, 673 } 674 675 BITWISE = { 676 TokenType.AMP: exp.BitwiseAnd, 677 TokenType.CARET: exp.BitwiseXor, 678 TokenType.PIPE: exp.BitwiseOr, 679 } 680 681 TERM = { 682 TokenType.DASH: exp.Sub, 683 TokenType.PLUS: exp.Add, 684 TokenType.MOD: exp.Mod, 685 TokenType.COLLATE: exp.Collate, 686 } 687 688 FACTOR = { 689 TokenType.DIV: exp.IntDiv, 690 TokenType.LR_ARROW: exp.Distance, 691 TokenType.SLASH: exp.Div, 692 TokenType.STAR: exp.Mul, 693 } 694 695 EXPONENT: t.Dict[TokenType, t.Type[exp.Expression]] = {} 696 697 TIMES = { 698 TokenType.TIME, 699 TokenType.TIMETZ, 700 } 701 702 TIMESTAMPS = { 703 TokenType.TIMESTAMP, 704 TokenType.TIMESTAMPNTZ, 705 TokenType.TIMESTAMPTZ, 706 TokenType.TIMESTAMPLTZ, 707 *TIMES, 708 } 709 710 SET_OPERATIONS = { 711 TokenType.UNION, 712 TokenType.INTERSECT, 713 TokenType.EXCEPT, 714 } 715 716 JOIN_METHODS = { 717 TokenType.ASOF, 718 TokenType.NATURAL, 719 TokenType.POSITIONAL, 720 } 721 722 JOIN_SIDES = { 723 TokenType.LEFT, 724 TokenType.RIGHT, 725 TokenType.FULL, 726 } 727 728 JOIN_KINDS = { 729 TokenType.ANTI, 730 TokenType.CROSS, 731 TokenType.INNER, 732 TokenType.OUTER, 733 TokenType.SEMI, 734 TokenType.STRAIGHT_JOIN, 735 } 736 737 JOIN_HINTS: t.Set[str] = set() 738 739 LAMBDAS = { 740 TokenType.ARROW: lambda self, expressions: self.expression( 741 exp.Lambda, 742 this=self._replace_lambda( 743 self._parse_assignment(), 744 expressions, 745 ), 746 expressions=expressions, 747 ), 748 TokenType.FARROW: lambda self, expressions: self.expression( 749 exp.Kwarg, 750 this=exp.var(expressions[0].name), 751 expression=self._parse_assignment(), 752 ), 753 } 754 755 COLUMN_OPERATORS = { 756 TokenType.DOT: None, 757 TokenType.DOTCOLON: lambda self, this, to: self.expression( 758 exp.JSONCast, 759 this=this, 760 to=to, 761 ), 762 TokenType.DCOLON: lambda self, this, to: self.build_cast( 763 strict=self.STRICT_CAST, this=this, to=to 764 ), 765 TokenType.ARROW: lambda self, this, path: self.expression( 766 exp.JSONExtract, 767 this=this, 768 expression=self.dialect.to_json_path(path), 769 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 770 ), 771 TokenType.DARROW: lambda self, this, path: self.expression( 772 exp.JSONExtractScalar, 773 this=this, 774 expression=self.dialect.to_json_path(path), 775 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 776 ), 777 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 778 exp.JSONBExtract, 779 this=this, 780 expression=path, 781 ), 782 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 783 exp.JSONBExtractScalar, 784 this=this, 785 expression=path, 786 ), 787 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 788 exp.JSONBContains, 789 this=this, 790 expression=key, 791 ), 792 } 793 794 EXPRESSION_PARSERS = { 795 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 796 exp.Column: lambda self: self._parse_column(), 797 exp.Condition: lambda self: self._parse_assignment(), 798 exp.DataType: lambda self: self._parse_types(allow_identifiers=False, schema=True), 799 exp.Expression: lambda self: self._parse_expression(), 800 exp.From: lambda self: self._parse_from(joins=True), 801 exp.Group: lambda self: self._parse_group(), 802 exp.Having: lambda self: self._parse_having(), 803 exp.Hint: lambda self: self._parse_hint_body(), 804 exp.Identifier: lambda self: self._parse_id_var(), 805 exp.Join: lambda self: self._parse_join(), 806 exp.Lambda: lambda self: self._parse_lambda(), 807 exp.Lateral: lambda self: self._parse_lateral(), 808 exp.Limit: lambda self: self._parse_limit(), 809 exp.Offset: lambda self: self._parse_offset(), 810 exp.Order: lambda self: self._parse_order(), 811 exp.Ordered: lambda self: self._parse_ordered(), 812 exp.Properties: lambda self: self._parse_properties(), 813 exp.PartitionedByProperty: lambda self: self._parse_partitioned_by(), 814 exp.Qualify: lambda self: self._parse_qualify(), 815 exp.Returning: lambda self: self._parse_returning(), 816 exp.Select: lambda self: self._parse_select(), 817 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 818 exp.Table: lambda self: self._parse_table_parts(), 819 exp.TableAlias: lambda self: self._parse_table_alias(), 820 exp.Tuple: lambda self: self._parse_value(values=False), 821 exp.Whens: lambda self: self._parse_when_matched(), 822 exp.Where: lambda self: self._parse_where(), 823 exp.Window: lambda self: self._parse_named_window(), 824 exp.With: lambda self: self._parse_with(), 825 "JOIN_TYPE": lambda self: self._parse_join_parts(), 826 } 827 828 STATEMENT_PARSERS = { 829 TokenType.ALTER: lambda self: self._parse_alter(), 830 TokenType.ANALYZE: lambda self: self._parse_analyze(), 831 TokenType.BEGIN: lambda self: self._parse_transaction(), 832 TokenType.CACHE: lambda self: self._parse_cache(), 833 TokenType.COMMENT: lambda self: self._parse_comment(), 834 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 835 TokenType.COPY: lambda self: self._parse_copy(), 836 TokenType.CREATE: lambda self: self._parse_create(), 837 TokenType.DELETE: lambda self: self._parse_delete(), 838 TokenType.DESC: lambda self: self._parse_describe(), 839 TokenType.DESCRIBE: lambda self: self._parse_describe(), 840 TokenType.DROP: lambda self: self._parse_drop(), 841 TokenType.GRANT: lambda self: self._parse_grant(), 842 TokenType.INSERT: lambda self: self._parse_insert(), 843 TokenType.KILL: lambda self: self._parse_kill(), 844 TokenType.LOAD: lambda self: self._parse_load(), 845 TokenType.MERGE: lambda self: self._parse_merge(), 846 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 847 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 848 TokenType.REFRESH: lambda self: self._parse_refresh(), 849 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 850 TokenType.SET: lambda self: self._parse_set(), 851 TokenType.TRUNCATE: lambda self: self._parse_truncate_table(), 852 TokenType.UNCACHE: lambda self: self._parse_uncache(), 853 TokenType.UNPIVOT: lambda self: self._parse_simplified_pivot(is_unpivot=True), 854 TokenType.UPDATE: lambda self: self._parse_update(), 855 TokenType.USE: lambda self: self._parse_use(), 856 TokenType.SEMICOLON: lambda self: exp.Semicolon(), 857 } 858 859 UNARY_PARSERS = { 860 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 861 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 862 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 863 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 864 TokenType.PIPE_SLASH: lambda self: self.expression(exp.Sqrt, this=self._parse_unary()), 865 TokenType.DPIPE_SLASH: lambda self: self.expression(exp.Cbrt, this=self._parse_unary()), 866 } 867 868 STRING_PARSERS = { 869 TokenType.HEREDOC_STRING: lambda self, token: self.expression( 870 exp.RawString, this=token.text 871 ), 872 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 873 exp.National, this=token.text 874 ), 875 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 876 TokenType.STRING: lambda self, token: self.expression( 877 exp.Literal, this=token.text, is_string=True 878 ), 879 TokenType.UNICODE_STRING: lambda self, token: self.expression( 880 exp.UnicodeString, 881 this=token.text, 882 escape=self._match_text_seq("UESCAPE") and self._parse_string(), 883 ), 884 } 885 886 NUMERIC_PARSERS = { 887 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 888 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 889 TokenType.HEX_STRING: lambda self, token: self.expression( 890 exp.HexString, 891 this=token.text, 892 is_integer=self.dialect.HEX_STRING_IS_INTEGER_TYPE or None, 893 ), 894 TokenType.NUMBER: lambda self, token: self.expression( 895 exp.Literal, this=token.text, is_string=False 896 ), 897 } 898 899 PRIMARY_PARSERS = { 900 **STRING_PARSERS, 901 **NUMERIC_PARSERS, 902 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 903 TokenType.NULL: lambda self, _: self.expression(exp.Null), 904 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 905 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 906 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 907 TokenType.STAR: lambda self, _: self._parse_star_ops(), 908 } 909 910 PLACEHOLDER_PARSERS = { 911 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 912 TokenType.PARAMETER: lambda self: self._parse_parameter(), 913 TokenType.COLON: lambda self: ( 914 self.expression(exp.Placeholder, this=self._prev.text) 915 if self._match_set(self.COLON_PLACEHOLDER_TOKENS) 916 else None 917 ), 918 } 919 920 RANGE_PARSERS = { 921 TokenType.AT_GT: binary_range_parser(exp.ArrayContainsAll), 922 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 923 TokenType.GLOB: binary_range_parser(exp.Glob), 924 TokenType.ILIKE: binary_range_parser(exp.ILike), 925 TokenType.IN: lambda self, this: self._parse_in(this), 926 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 927 TokenType.IS: lambda self, this: self._parse_is(this), 928 TokenType.LIKE: binary_range_parser(exp.Like), 929 TokenType.LT_AT: binary_range_parser(exp.ArrayContainsAll, reverse_args=True), 930 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 931 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 932 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 933 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 934 } 935 936 PIPE_SYNTAX_TRANSFORM_PARSERS = { 937 "AGGREGATE": lambda self, query: self._parse_pipe_syntax_aggregate(query), 938 "AS": lambda self, query: self._build_pipe_cte( 939 query, [exp.Star()], self._parse_table_alias() 940 ), 941 "EXTEND": lambda self, query: self._parse_pipe_syntax_extend(query), 942 "LIMIT": lambda self, query: self._parse_pipe_syntax_limit(query), 943 "ORDER BY": lambda self, query: query.order_by( 944 self._parse_order(), append=False, copy=False 945 ), 946 "PIVOT": lambda self, query: self._parse_pipe_syntax_pivot(query), 947 "SELECT": lambda self, query: self._parse_pipe_syntax_select(query), 948 "TABLESAMPLE": lambda self, query: self._parse_pipe_syntax_tablesample(query), 949 "UNPIVOT": lambda self, query: self._parse_pipe_syntax_pivot(query), 950 "WHERE": lambda self, query: query.where(self._parse_where(), copy=False), 951 } 952 953 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 954 "ALLOWED_VALUES": lambda self: self.expression( 955 exp.AllowedValuesProperty, expressions=self._parse_csv(self._parse_primary) 956 ), 957 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 958 "AUTO": lambda self: self._parse_auto_property(), 959 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 960 "BACKUP": lambda self: self.expression( 961 exp.BackupProperty, this=self._parse_var(any_token=True) 962 ), 963 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 964 "CHARSET": lambda self, **kwargs: self._parse_character_set(**kwargs), 965 "CHARACTER SET": lambda self, **kwargs: self._parse_character_set(**kwargs), 966 "CHECKSUM": lambda self: self._parse_checksum(), 967 "CLUSTER BY": lambda self: self._parse_cluster(), 968 "CLUSTERED": lambda self: self._parse_clustered_by(), 969 "COLLATE": lambda self, **kwargs: self._parse_property_assignment( 970 exp.CollateProperty, **kwargs 971 ), 972 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 973 "CONTAINS": lambda self: self._parse_contains_property(), 974 "COPY": lambda self: self._parse_copy_property(), 975 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 976 "DATA_DELETION": lambda self: self._parse_data_deletion_property(), 977 "DEFINER": lambda self: self._parse_definer(), 978 "DETERMINISTIC": lambda self: self.expression( 979 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 980 ), 981 "DISTRIBUTED": lambda self: self._parse_distributed_property(), 982 "DUPLICATE": lambda self: self._parse_composite_key_property(exp.DuplicateKeyProperty), 983 "DYNAMIC": lambda self: self.expression(exp.DynamicProperty), 984 "DISTKEY": lambda self: self._parse_distkey(), 985 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 986 "EMPTY": lambda self: self.expression(exp.EmptyProperty), 987 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 988 "ENVIRONMENT": lambda self: self.expression( 989 exp.EnviromentProperty, expressions=self._parse_wrapped_csv(self._parse_assignment) 990 ), 991 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 992 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 993 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 994 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 995 "FREESPACE": lambda self: self._parse_freespace(), 996 "GLOBAL": lambda self: self.expression(exp.GlobalProperty), 997 "HEAP": lambda self: self.expression(exp.HeapProperty), 998 "ICEBERG": lambda self: self.expression(exp.IcebergProperty), 999 "IMMUTABLE": lambda self: self.expression( 1000 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 1001 ), 1002 "INHERITS": lambda self: self.expression( 1003 exp.InheritsProperty, expressions=self._parse_wrapped_csv(self._parse_table) 1004 ), 1005 "INPUT": lambda self: self.expression(exp.InputModelProperty, this=self._parse_schema()), 1006 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 1007 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 1008 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 1009 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 1010 "LIKE": lambda self: self._parse_create_like(), 1011 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 1012 "LOCK": lambda self: self._parse_locking(), 1013 "LOCKING": lambda self: self._parse_locking(), 1014 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 1015 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 1016 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 1017 "MODIFIES": lambda self: self._parse_modifies_property(), 1018 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 1019 "NO": lambda self: self._parse_no_property(), 1020 "ON": lambda self: self._parse_on_property(), 1021 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 1022 "OUTPUT": lambda self: self.expression(exp.OutputModelProperty, this=self._parse_schema()), 1023 "PARTITION": lambda self: self._parse_partitioned_of(), 1024 "PARTITION BY": lambda self: self._parse_partitioned_by(), 1025 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 1026 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 1027 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 1028 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 1029 "READS": lambda self: self._parse_reads_property(), 1030 "REMOTE": lambda self: self._parse_remote_with_connection(), 1031 "RETURNS": lambda self: self._parse_returns(), 1032 "STRICT": lambda self: self.expression(exp.StrictProperty), 1033 "STREAMING": lambda self: self.expression(exp.StreamingTableProperty), 1034 "ROW": lambda self: self._parse_row(), 1035 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 1036 "SAMPLE": lambda self: self.expression( 1037 exp.SampleProperty, this=self._match_text_seq("BY") and self._parse_bitwise() 1038 ), 1039 "SECURE": lambda self: self.expression(exp.SecureProperty), 1040 "SECURITY": lambda self: self._parse_security(), 1041 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 1042 "SETTINGS": lambda self: self._parse_settings_property(), 1043 "SHARING": lambda self: self._parse_property_assignment(exp.SharingProperty), 1044 "SORTKEY": lambda self: self._parse_sortkey(), 1045 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 1046 "STABLE": lambda self: self.expression( 1047 exp.StabilityProperty, this=exp.Literal.string("STABLE") 1048 ), 1049 "STORED": lambda self: self._parse_stored(), 1050 "SYSTEM_VERSIONING": lambda self: self._parse_system_versioning_property(), 1051 "TBLPROPERTIES": lambda self: self._parse_wrapped_properties(), 1052 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 1053 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 1054 "TO": lambda self: self._parse_to_table(), 1055 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 1056 "TRANSFORM": lambda self: self.expression( 1057 exp.TransformModelProperty, expressions=self._parse_wrapped_csv(self._parse_expression) 1058 ), 1059 "TTL": lambda self: self._parse_ttl(), 1060 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 1061 "UNLOGGED": lambda self: self.expression(exp.UnloggedProperty), 1062 "VOLATILE": lambda self: self._parse_volatile_property(), 1063 "WITH": lambda self: self._parse_with_property(), 1064 } 1065 1066 CONSTRAINT_PARSERS = { 1067 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 1068 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 1069 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 1070 "CHARACTER SET": lambda self: self.expression( 1071 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 1072 ), 1073 "CHECK": lambda self: self.expression( 1074 exp.CheckColumnConstraint, 1075 this=self._parse_wrapped(self._parse_assignment), 1076 enforced=self._match_text_seq("ENFORCED"), 1077 ), 1078 "COLLATE": lambda self: self.expression( 1079 exp.CollateColumnConstraint, 1080 this=self._parse_identifier() or self._parse_column(), 1081 ), 1082 "COMMENT": lambda self: self.expression( 1083 exp.CommentColumnConstraint, this=self._parse_string() 1084 ), 1085 "COMPRESS": lambda self: self._parse_compress(), 1086 "CLUSTERED": lambda self: self.expression( 1087 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 1088 ), 1089 "NONCLUSTERED": lambda self: self.expression( 1090 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 1091 ), 1092 "DEFAULT": lambda self: self.expression( 1093 exp.DefaultColumnConstraint, this=self._parse_bitwise() 1094 ), 1095 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 1096 "EPHEMERAL": lambda self: self.expression( 1097 exp.EphemeralColumnConstraint, this=self._parse_bitwise() 1098 ), 1099 "EXCLUDE": lambda self: self.expression( 1100 exp.ExcludeColumnConstraint, this=self._parse_index_params() 1101 ), 1102 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 1103 "FORMAT": lambda self: self.expression( 1104 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 1105 ), 1106 "GENERATED": lambda self: self._parse_generated_as_identity(), 1107 "IDENTITY": lambda self: self._parse_auto_increment(), 1108 "INLINE": lambda self: self._parse_inline(), 1109 "LIKE": lambda self: self._parse_create_like(), 1110 "NOT": lambda self: self._parse_not_constraint(), 1111 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 1112 "ON": lambda self: ( 1113 self._match(TokenType.UPDATE) 1114 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 1115 ) 1116 or self.expression(exp.OnProperty, this=self._parse_id_var()), 1117 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 1118 "PERIOD": lambda self: self._parse_period_for_system_time(), 1119 "PRIMARY KEY": lambda self: self._parse_primary_key(), 1120 "REFERENCES": lambda self: self._parse_references(match=False), 1121 "TITLE": lambda self: self.expression( 1122 exp.TitleColumnConstraint, this=self._parse_var_or_string() 1123 ), 1124 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 1125 "UNIQUE": lambda self: self._parse_unique(), 1126 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 1127 "WATERMARK": lambda self: self.expression( 1128 exp.WatermarkColumnConstraint, 1129 this=self._match(TokenType.FOR) and self._parse_column(), 1130 expression=self._match(TokenType.ALIAS) and self._parse_disjunction(), 1131 ), 1132 "WITH": lambda self: self.expression( 1133 exp.Properties, expressions=self._parse_wrapped_properties() 1134 ), 1135 "BUCKET": lambda self: self._parse_partitioned_by_bucket_or_truncate(), 1136 "TRUNCATE": lambda self: self._parse_partitioned_by_bucket_or_truncate(), 1137 } 1138 1139 def _parse_partitioned_by_bucket_or_truncate(self) -> exp.Expression: 1140 klass = ( 1141 exp.PartitionedByBucket 1142 if self._prev.text.upper() == "BUCKET" 1143 else exp.PartitionByTruncate 1144 ) 1145 1146 args = self._parse_wrapped_csv(lambda: self._parse_primary() or self._parse_column()) 1147 this, expression = seq_get(args, 0), seq_get(args, 1) 1148 1149 if isinstance(this, exp.Literal): 1150 # Check for Iceberg partition transforms (bucket / truncate) and ensure their arguments are in the right order 1151 # - For Hive, it's `bucket(<num buckets>, <col name>)` or `truncate(<num_chars>, <col_name>)` 1152 # - For Trino, it's reversed - `bucket(<col name>, <num buckets>)` or `truncate(<col_name>, <num_chars>)` 1153 # Both variants are canonicalized in the latter i.e `bucket(<col name>, <num buckets>)` 1154 # 1155 # Hive ref: https://docs.aws.amazon.com/athena/latest/ug/querying-iceberg-creating-tables.html#querying-iceberg-partitioning 1156 # Trino ref: https://docs.aws.amazon.com/athena/latest/ug/create-table-as.html#ctas-table-properties 1157 this, expression = expression, this 1158 1159 return self.expression(klass, this=this, expression=expression) 1160 1161 ALTER_PARSERS = { 1162 "ADD": lambda self: self._parse_alter_table_add(), 1163 "AS": lambda self: self._parse_select(), 1164 "ALTER": lambda self: self._parse_alter_table_alter(), 1165 "CLUSTER BY": lambda self: self._parse_cluster(wrapped=True), 1166 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 1167 "DROP": lambda self: self._parse_alter_table_drop(), 1168 "RENAME": lambda self: self._parse_alter_table_rename(), 1169 "SET": lambda self: self._parse_alter_table_set(), 1170 "SWAP": lambda self: self.expression( 1171 exp.SwapTable, this=self._match(TokenType.WITH) and self._parse_table(schema=True) 1172 ), 1173 } 1174 1175 ALTER_ALTER_PARSERS = { 1176 "DISTKEY": lambda self: self._parse_alter_diststyle(), 1177 "DISTSTYLE": lambda self: self._parse_alter_diststyle(), 1178 "SORTKEY": lambda self: self._parse_alter_sortkey(), 1179 "COMPOUND": lambda self: self._parse_alter_sortkey(compound=True), 1180 } 1181 1182 SCHEMA_UNNAMED_CONSTRAINTS = { 1183 "CHECK", 1184 "EXCLUDE", 1185 "FOREIGN KEY", 1186 "LIKE", 1187 "PERIOD", 1188 "PRIMARY KEY", 1189 "UNIQUE", 1190 "WATERMARK", 1191 "BUCKET", 1192 "TRUNCATE", 1193 } 1194 1195 NO_PAREN_FUNCTION_PARSERS = { 1196 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 1197 "CASE": lambda self: self._parse_case(), 1198 "CONNECT_BY_ROOT": lambda self: self.expression( 1199 exp.ConnectByRoot, this=self._parse_column() 1200 ), 1201 "IF": lambda self: self._parse_if(), 1202 } 1203 1204 INVALID_FUNC_NAME_TOKENS = { 1205 TokenType.IDENTIFIER, 1206 TokenType.STRING, 1207 } 1208 1209 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 1210 1211 KEY_VALUE_DEFINITIONS = (exp.Alias, exp.EQ, exp.PropertyEQ, exp.Slice) 1212 1213 FUNCTION_PARSERS = { 1214 **{ 1215 name: lambda self: self._parse_max_min_by(exp.ArgMax) for name in exp.ArgMax.sql_names() 1216 }, 1217 **{ 1218 name: lambda self: self._parse_max_min_by(exp.ArgMin) for name in exp.ArgMin.sql_names() 1219 }, 1220 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 1221 "CEIL": lambda self: self._parse_ceil_floor(exp.Ceil), 1222 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 1223 "DECODE": lambda self: self._parse_decode(), 1224 "EXTRACT": lambda self: self._parse_extract(), 1225 "FLOOR": lambda self: self._parse_ceil_floor(exp.Floor), 1226 "GAP_FILL": lambda self: self._parse_gap_fill(), 1227 "JSON_OBJECT": lambda self: self._parse_json_object(), 1228 "JSON_OBJECTAGG": lambda self: self._parse_json_object(agg=True), 1229 "JSON_TABLE": lambda self: self._parse_json_table(), 1230 "MATCH": lambda self: self._parse_match_against(), 1231 "NORMALIZE": lambda self: self._parse_normalize(), 1232 "OPENJSON": lambda self: self._parse_open_json(), 1233 "OVERLAY": lambda self: self._parse_overlay(), 1234 "POSITION": lambda self: self._parse_position(), 1235 "PREDICT": lambda self: self._parse_predict(), 1236 "SAFE_CAST": lambda self: self._parse_cast(False, safe=True), 1237 "STRING_AGG": lambda self: self._parse_string_agg(), 1238 "SUBSTRING": lambda self: self._parse_substring(), 1239 "TRIM": lambda self: self._parse_trim(), 1240 "TRY_CAST": lambda self: self._parse_cast(False, safe=True), 1241 "TRY_CONVERT": lambda self: self._parse_convert(False, safe=True), 1242 "XMLELEMENT": lambda self: self.expression( 1243 exp.XMLElement, 1244 this=self._match_text_seq("NAME") and self._parse_id_var(), 1245 expressions=self._match(TokenType.COMMA) and self._parse_csv(self._parse_expression), 1246 ), 1247 "XMLTABLE": lambda self: self._parse_xml_table(), 1248 } 1249 1250 QUERY_MODIFIER_PARSERS = { 1251 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 1252 TokenType.PREWHERE: lambda self: ("prewhere", self._parse_prewhere()), 1253 TokenType.WHERE: lambda self: ("where", self._parse_where()), 1254 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 1255 TokenType.HAVING: lambda self: ("having", self._parse_having()), 1256 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 1257 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 1258 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 1259 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 1260 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 1261 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 1262 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 1263 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 1264 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1265 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1266 TokenType.CLUSTER_BY: lambda self: ( 1267 "cluster", 1268 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 1269 ), 1270 TokenType.DISTRIBUTE_BY: lambda self: ( 1271 "distribute", 1272 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 1273 ), 1274 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 1275 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 1276 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 1277 } 1278 QUERY_MODIFIER_TOKENS = set(QUERY_MODIFIER_PARSERS) 1279 1280 SET_PARSERS = { 1281 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 1282 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 1283 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 1284 "TRANSACTION": lambda self: self._parse_set_transaction(), 1285 } 1286 1287 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 1288 1289 TYPE_LITERAL_PARSERS = { 1290 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 1291 } 1292 1293 TYPE_CONVERTERS: t.Dict[exp.DataType.Type, t.Callable[[exp.DataType], exp.DataType]] = {} 1294 1295 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 1296 1297 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 1298 1299 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 1300 TRANSACTION_CHARACTERISTICS: OPTIONS_TYPE = { 1301 "ISOLATION": ( 1302 ("LEVEL", "REPEATABLE", "READ"), 1303 ("LEVEL", "READ", "COMMITTED"), 1304 ("LEVEL", "READ", "UNCOMITTED"), 1305 ("LEVEL", "SERIALIZABLE"), 1306 ), 1307 "READ": ("WRITE", "ONLY"), 1308 } 1309 1310 CONFLICT_ACTIONS: OPTIONS_TYPE = dict.fromkeys( 1311 ("ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK", "UPDATE"), tuple() 1312 ) 1313 CONFLICT_ACTIONS["DO"] = ("NOTHING", "UPDATE") 1314 1315 CREATE_SEQUENCE: OPTIONS_TYPE = { 1316 "SCALE": ("EXTEND", "NOEXTEND"), 1317 "SHARD": ("EXTEND", "NOEXTEND"), 1318 "NO": ("CYCLE", "CACHE", "MAXVALUE", "MINVALUE"), 1319 **dict.fromkeys( 1320 ( 1321 "SESSION", 1322 "GLOBAL", 1323 "KEEP", 1324 "NOKEEP", 1325 "ORDER", 1326 "NOORDER", 1327 "NOCACHE", 1328 "CYCLE", 1329 "NOCYCLE", 1330 "NOMINVALUE", 1331 "NOMAXVALUE", 1332 "NOSCALE", 1333 "NOSHARD", 1334 ), 1335 tuple(), 1336 ), 1337 } 1338 1339 ISOLATED_LOADING_OPTIONS: OPTIONS_TYPE = {"FOR": ("ALL", "INSERT", "NONE")} 1340 1341 USABLES: OPTIONS_TYPE = dict.fromkeys( 1342 ("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA", "CATALOG"), tuple() 1343 ) 1344 1345 CAST_ACTIONS: OPTIONS_TYPE = dict.fromkeys(("RENAME", "ADD"), ("FIELDS",)) 1346 1347 SCHEMA_BINDING_OPTIONS: OPTIONS_TYPE = { 1348 "TYPE": ("EVOLUTION",), 1349 **dict.fromkeys(("BINDING", "COMPENSATION", "EVOLUTION"), tuple()), 1350 } 1351 1352 PROCEDURE_OPTIONS: OPTIONS_TYPE = {} 1353 1354 EXECUTE_AS_OPTIONS: OPTIONS_TYPE = dict.fromkeys(("CALLER", "SELF", "OWNER"), tuple()) 1355 1356 KEY_CONSTRAINT_OPTIONS: OPTIONS_TYPE = { 1357 "NOT": ("ENFORCED",), 1358 "MATCH": ( 1359 "FULL", 1360 "PARTIAL", 1361 "SIMPLE", 1362 ), 1363 "INITIALLY": ("DEFERRED", "IMMEDIATE"), 1364 "USING": ( 1365 "BTREE", 1366 "HASH", 1367 ), 1368 **dict.fromkeys(("DEFERRABLE", "NORELY", "RELY"), tuple()), 1369 } 1370 1371 WINDOW_EXCLUDE_OPTIONS: OPTIONS_TYPE = { 1372 "NO": ("OTHERS",), 1373 "CURRENT": ("ROW",), 1374 **dict.fromkeys(("GROUP", "TIES"), tuple()), 1375 } 1376 1377 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 1378 1379 CLONE_KEYWORDS = {"CLONE", "COPY"} 1380 HISTORICAL_DATA_PREFIX = {"AT", "BEFORE", "END"} 1381 HISTORICAL_DATA_KIND = {"OFFSET", "STATEMENT", "STREAM", "TIMESTAMP", "VERSION"} 1382 1383 OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS", "WITH"} 1384 1385 OPTYPE_FOLLOW_TOKENS = {TokenType.COMMA, TokenType.R_PAREN} 1386 1387 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 1388 1389 VIEW_ATTRIBUTES = {"ENCRYPTION", "SCHEMABINDING", "VIEW_METADATA"} 1390 1391 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 1392 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 1393 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 1394 1395 JSON_KEY_VALUE_SEPARATOR_TOKENS = {TokenType.COLON, TokenType.COMMA, TokenType.IS} 1396 1397 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 1398 1399 ADD_CONSTRAINT_TOKENS = { 1400 TokenType.CONSTRAINT, 1401 TokenType.FOREIGN_KEY, 1402 TokenType.INDEX, 1403 TokenType.KEY, 1404 TokenType.PRIMARY_KEY, 1405 TokenType.UNIQUE, 1406 } 1407 1408 DISTINCT_TOKENS = {TokenType.DISTINCT} 1409 1410 NULL_TOKENS = {TokenType.NULL} 1411 1412 UNNEST_OFFSET_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - SET_OPERATIONS 1413 1414 SELECT_START_TOKENS = {TokenType.L_PAREN, TokenType.WITH, TokenType.SELECT} 1415 1416 COPY_INTO_VARLEN_OPTIONS = {"FILE_FORMAT", "COPY_OPTIONS", "FORMAT_OPTIONS", "CREDENTIAL"} 1417 1418 IS_JSON_PREDICATE_KIND = {"VALUE", "SCALAR", "ARRAY", "OBJECT"} 1419 1420 ODBC_DATETIME_LITERALS = { 1421 "d": exp.Date, 1422 "t": exp.Time, 1423 "ts": exp.Timestamp, 1424 } 1425 1426 ON_CONDITION_TOKENS = {"ERROR", "NULL", "TRUE", "FALSE", "EMPTY"} 1427 1428 PRIVILEGE_FOLLOW_TOKENS = {TokenType.ON, TokenType.COMMA, TokenType.L_PAREN} 1429 1430 # The style options for the DESCRIBE statement 1431 DESCRIBE_STYLES = {"ANALYZE", "EXTENDED", "FORMATTED", "HISTORY"} 1432 1433 # The style options for the ANALYZE statement 1434 ANALYZE_STYLES = { 1435 "BUFFER_USAGE_LIMIT", 1436 "FULL", 1437 "LOCAL", 1438 "NO_WRITE_TO_BINLOG", 1439 "SAMPLE", 1440 "SKIP_LOCKED", 1441 "VERBOSE", 1442 } 1443 1444 ANALYZE_EXPRESSION_PARSERS = { 1445 "ALL": lambda self: self._parse_analyze_columns(), 1446 "COMPUTE": lambda self: self._parse_analyze_statistics(), 1447 "DELETE": lambda self: self._parse_analyze_delete(), 1448 "DROP": lambda self: self._parse_analyze_histogram(), 1449 "ESTIMATE": lambda self: self._parse_analyze_statistics(), 1450 "LIST": lambda self: self._parse_analyze_list(), 1451 "PREDICATE": lambda self: self._parse_analyze_columns(), 1452 "UPDATE": lambda self: self._parse_analyze_histogram(), 1453 "VALIDATE": lambda self: self._parse_analyze_validate(), 1454 } 1455 1456 PARTITION_KEYWORDS = {"PARTITION", "SUBPARTITION"} 1457 1458 AMBIGUOUS_ALIAS_TOKENS = (TokenType.LIMIT, TokenType.OFFSET) 1459 1460 OPERATION_MODIFIERS: t.Set[str] = set() 1461 1462 RECURSIVE_CTE_SEARCH_KIND = {"BREADTH", "DEPTH", "CYCLE"} 1463 1464 MODIFIABLES = (exp.Query, exp.Table, exp.TableFromRows) 1465 1466 STRICT_CAST = True 1467 1468 PREFIXED_PIVOT_COLUMNS = False 1469 IDENTIFY_PIVOT_STRINGS = False 1470 1471 LOG_DEFAULTS_TO_LN = False 1472 1473 # Whether the table sample clause expects CSV syntax 1474 TABLESAMPLE_CSV = False 1475 1476 # The default method used for table sampling 1477 DEFAULT_SAMPLING_METHOD: t.Optional[str] = None 1478 1479 # Whether the SET command needs a delimiter (e.g. "=") for assignments 1480 SET_REQUIRES_ASSIGNMENT_DELIMITER = True 1481 1482 # Whether the TRIM function expects the characters to trim as its first argument 1483 TRIM_PATTERN_FIRST = False 1484 1485 # Whether string aliases are supported `SELECT COUNT(*) 'count'` 1486 STRING_ALIASES = False 1487 1488 # Whether query modifiers such as LIMIT are attached to the UNION node (vs its right operand) 1489 MODIFIERS_ATTACHED_TO_SET_OP = True 1490 SET_OP_MODIFIERS = {"order", "limit", "offset"} 1491 1492 # Whether to parse IF statements that aren't followed by a left parenthesis as commands 1493 NO_PAREN_IF_COMMANDS = True 1494 1495 # Whether the -> and ->> operators expect documents of type JSON (e.g. Postgres) 1496 JSON_ARROWS_REQUIRE_JSON_TYPE = False 1497 1498 # Whether the `:` operator is used to extract a value from a VARIANT column 1499 COLON_IS_VARIANT_EXTRACT = False 1500 1501 # Whether or not a VALUES keyword needs to be followed by '(' to form a VALUES clause. 1502 # If this is True and '(' is not found, the keyword will be treated as an identifier 1503 VALUES_FOLLOWED_BY_PAREN = True 1504 1505 # Whether implicit unnesting is supported, e.g. SELECT 1 FROM y.z AS z, z.a (Redshift) 1506 SUPPORTS_IMPLICIT_UNNEST = False 1507 1508 # Whether or not interval spans are supported, INTERVAL 1 YEAR TO MONTHS 1509 INTERVAL_SPANS = True 1510 1511 # Whether a PARTITION clause can follow a table reference 1512 SUPPORTS_PARTITION_SELECTION = False 1513 1514 # Whether the `name AS expr` schema/column constraint requires parentheses around `expr` 1515 WRAPPED_TRANSFORM_COLUMN_CONSTRAINT = True 1516 1517 # Whether the 'AS' keyword is optional in the CTE definition syntax 1518 OPTIONAL_ALIAS_TOKEN_CTE = True 1519 1520 # Whether renaming a column with an ALTER statement requires the presence of the COLUMN keyword 1521 ALTER_RENAME_REQUIRES_COLUMN = True 1522 1523 # Whether all join types have the same precedence, i.e., they "naturally" produce a left-deep tree. 1524 # In standard SQL, joins that use the JOIN keyword take higher precedence than comma-joins. That is 1525 # to say, JOIN operators happen before comma operators. This is not the case in some dialects, such 1526 # as BigQuery, where all joins have the same precedence. 1527 JOINS_HAVE_EQUAL_PRECEDENCE = False 1528 1529 # Whether TIMESTAMP <literal> can produce a zone-aware timestamp 1530 ZONE_AWARE_TIMESTAMP_CONSTRUCTOR = False 1531 1532 # Whether map literals support arbitrary expressions as keys. 1533 # When True, allows complex keys like arrays or literals: {[1, 2]: 3}, {1: 2} (e.g. DuckDB). 1534 # When False, keys are typically restricted to identifiers. 1535 MAP_KEYS_ARE_ARBITRARY_EXPRESSIONS = False 1536 1537 # Whether JSON_EXTRACT requires a JSON expression as the first argument, e.g this 1538 # is true for Snowflake but not for BigQuery which can also process strings 1539 JSON_EXTRACT_REQUIRES_JSON_EXPRESSION = False 1540 1541 __slots__ = ( 1542 "error_level", 1543 "error_message_context", 1544 "max_errors", 1545 "dialect", 1546 "sql", 1547 "errors", 1548 "_tokens", 1549 "_index", 1550 "_curr", 1551 "_next", 1552 "_prev", 1553 "_prev_comments", 1554 "_pipe_cte_counter", 1555 ) 1556 1557 # Autofilled 1558 SHOW_TRIE: t.Dict = {} 1559 SET_TRIE: t.Dict = {} 1560 1561 def __init__( 1562 self, 1563 error_level: t.Optional[ErrorLevel] = None, 1564 error_message_context: int = 100, 1565 max_errors: int = 3, 1566 dialect: DialectType = None, 1567 ): 1568 from sqlglot.dialects import Dialect 1569 1570 self.error_level = error_level or ErrorLevel.IMMEDIATE 1571 self.error_message_context = error_message_context 1572 self.max_errors = max_errors 1573 self.dialect = Dialect.get_or_raise(dialect) 1574 self.reset() 1575 1576 def reset(self): 1577 self.sql = "" 1578 self.errors = [] 1579 self._tokens = [] 1580 self._index = 0 1581 self._curr = None 1582 self._next = None 1583 self._prev = None 1584 self._prev_comments = None 1585 self._pipe_cte_counter = 0 1586 1587 def parse( 1588 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1589 ) -> t.List[t.Optional[exp.Expression]]: 1590 """ 1591 Parses a list of tokens and returns a list of syntax trees, one tree 1592 per parsed SQL statement. 1593 1594 Args: 1595 raw_tokens: The list of tokens. 1596 sql: The original SQL string, used to produce helpful debug messages. 1597 1598 Returns: 1599 The list of the produced syntax trees. 1600 """ 1601 return self._parse( 1602 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1603 ) 1604 1605 def parse_into( 1606 self, 1607 expression_types: exp.IntoType, 1608 raw_tokens: t.List[Token], 1609 sql: t.Optional[str] = None, 1610 ) -> t.List[t.Optional[exp.Expression]]: 1611 """ 1612 Parses a list of tokens into a given Expression type. If a collection of Expression 1613 types is given instead, this method will try to parse the token list into each one 1614 of them, stopping at the first for which the parsing succeeds. 1615 1616 Args: 1617 expression_types: The expression type(s) to try and parse the token list into. 1618 raw_tokens: The list of tokens. 1619 sql: The original SQL string, used to produce helpful debug messages. 1620 1621 Returns: 1622 The target Expression. 1623 """ 1624 errors = [] 1625 for expression_type in ensure_list(expression_types): 1626 parser = self.EXPRESSION_PARSERS.get(expression_type) 1627 if not parser: 1628 raise TypeError(f"No parser registered for {expression_type}") 1629 1630 try: 1631 return self._parse(parser, raw_tokens, sql) 1632 except ParseError as e: 1633 e.errors[0]["into_expression"] = expression_type 1634 errors.append(e) 1635 1636 raise ParseError( 1637 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1638 errors=merge_errors(errors), 1639 ) from errors[-1] 1640 1641 def _parse( 1642 self, 1643 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 1644 raw_tokens: t.List[Token], 1645 sql: t.Optional[str] = None, 1646 ) -> t.List[t.Optional[exp.Expression]]: 1647 self.reset() 1648 self.sql = sql or "" 1649 1650 total = len(raw_tokens) 1651 chunks: t.List[t.List[Token]] = [[]] 1652 1653 for i, token in enumerate(raw_tokens): 1654 if token.token_type == TokenType.SEMICOLON: 1655 if token.comments: 1656 chunks.append([token]) 1657 1658 if i < total - 1: 1659 chunks.append([]) 1660 else: 1661 chunks[-1].append(token) 1662 1663 expressions = [] 1664 1665 for tokens in chunks: 1666 self._index = -1 1667 self._tokens = tokens 1668 self._advance() 1669 1670 expressions.append(parse_method(self)) 1671 1672 if self._index < len(self._tokens): 1673 self.raise_error("Invalid expression / Unexpected token") 1674 1675 self.check_errors() 1676 1677 return expressions 1678 1679 def check_errors(self) -> None: 1680 """Logs or raises any found errors, depending on the chosen error level setting.""" 1681 if self.error_level == ErrorLevel.WARN: 1682 for error in self.errors: 1683 logger.error(str(error)) 1684 elif self.error_level == ErrorLevel.RAISE and self.errors: 1685 raise ParseError( 1686 concat_messages(self.errors, self.max_errors), 1687 errors=merge_errors(self.errors), 1688 ) 1689 1690 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1691 """ 1692 Appends an error in the list of recorded errors or raises it, depending on the chosen 1693 error level setting. 1694 """ 1695 token = token or self._curr or self._prev or Token.string("") 1696 start = token.start 1697 end = token.end + 1 1698 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1699 highlight = self.sql[start:end] 1700 end_context = self.sql[end : end + self.error_message_context] 1701 1702 error = ParseError.new( 1703 f"{message}. Line {token.line}, Col: {token.col}.\n" 1704 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1705 description=message, 1706 line=token.line, 1707 col=token.col, 1708 start_context=start_context, 1709 highlight=highlight, 1710 end_context=end_context, 1711 ) 1712 1713 if self.error_level == ErrorLevel.IMMEDIATE: 1714 raise error 1715 1716 self.errors.append(error) 1717 1718 def expression( 1719 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1720 ) -> E: 1721 """ 1722 Creates a new, validated Expression. 1723 1724 Args: 1725 exp_class: The expression class to instantiate. 1726 comments: An optional list of comments to attach to the expression. 1727 kwargs: The arguments to set for the expression along with their respective values. 1728 1729 Returns: 1730 The target expression. 1731 """ 1732 instance = exp_class(**kwargs) 1733 instance.add_comments(comments) if comments else self._add_comments(instance) 1734 return self.validate_expression(instance) 1735 1736 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1737 if expression and self._prev_comments: 1738 expression.add_comments(self._prev_comments) 1739 self._prev_comments = None 1740 1741 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1742 """ 1743 Validates an Expression, making sure that all its mandatory arguments are set. 1744 1745 Args: 1746 expression: The expression to validate. 1747 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1748 1749 Returns: 1750 The validated expression. 1751 """ 1752 if self.error_level != ErrorLevel.IGNORE: 1753 for error_message in expression.error_messages(args): 1754 self.raise_error(error_message) 1755 1756 return expression 1757 1758 def _find_sql(self, start: Token, end: Token) -> str: 1759 return self.sql[start.start : end.end + 1] 1760 1761 def _is_connected(self) -> bool: 1762 return self._prev and self._curr and self._prev.end + 1 == self._curr.start 1763 1764 def _advance(self, times: int = 1) -> None: 1765 self._index += times 1766 self._curr = seq_get(self._tokens, self._index) 1767 self._next = seq_get(self._tokens, self._index + 1) 1768 1769 if self._index > 0: 1770 self._prev = self._tokens[self._index - 1] 1771 self._prev_comments = self._prev.comments 1772 else: 1773 self._prev = None 1774 self._prev_comments = None 1775 1776 def _retreat(self, index: int) -> None: 1777 if index != self._index: 1778 self._advance(index - self._index) 1779 1780 def _warn_unsupported(self) -> None: 1781 if len(self._tokens) <= 1: 1782 return 1783 1784 # We use _find_sql because self.sql may comprise multiple chunks, and we're only 1785 # interested in emitting a warning for the one being currently processed. 1786 sql = self._find_sql(self._tokens[0], self._tokens[-1])[: self.error_message_context] 1787 1788 logger.warning( 1789 f"'{sql}' contains unsupported syntax. Falling back to parsing as a 'Command'." 1790 ) 1791 1792 def _parse_command(self) -> exp.Command: 1793 self._warn_unsupported() 1794 return self.expression( 1795 exp.Command, 1796 comments=self._prev_comments, 1797 this=self._prev.text.upper(), 1798 expression=self._parse_string(), 1799 ) 1800 1801 def _try_parse(self, parse_method: t.Callable[[], T], retreat: bool = False) -> t.Optional[T]: 1802 """ 1803 Attemps to backtrack if a parse function that contains a try/catch internally raises an error. 1804 This behavior can be different depending on the uset-set ErrorLevel, so _try_parse aims to 1805 solve this by setting & resetting the parser state accordingly 1806 """ 1807 index = self._index 1808 error_level = self.error_level 1809 1810 self.error_level = ErrorLevel.IMMEDIATE 1811 try: 1812 this = parse_method() 1813 except ParseError: 1814 this = None 1815 finally: 1816 if not this or retreat: 1817 self._retreat(index) 1818 self.error_level = error_level 1819 1820 return this 1821 1822 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1823 start = self._prev 1824 exists = self._parse_exists() if allow_exists else None 1825 1826 self._match(TokenType.ON) 1827 1828 materialized = self._match_text_seq("MATERIALIZED") 1829 kind = self._match_set(self.CREATABLES) and self._prev 1830 if not kind: 1831 return self._parse_as_command(start) 1832 1833 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1834 this = self._parse_user_defined_function(kind=kind.token_type) 1835 elif kind.token_type == TokenType.TABLE: 1836 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1837 elif kind.token_type == TokenType.COLUMN: 1838 this = self._parse_column() 1839 else: 1840 this = self._parse_id_var() 1841 1842 self._match(TokenType.IS) 1843 1844 return self.expression( 1845 exp.Comment, 1846 this=this, 1847 kind=kind.text, 1848 expression=self._parse_string(), 1849 exists=exists, 1850 materialized=materialized, 1851 ) 1852 1853 def _parse_to_table( 1854 self, 1855 ) -> exp.ToTableProperty: 1856 table = self._parse_table_parts(schema=True) 1857 return self.expression(exp.ToTableProperty, this=table) 1858 1859 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1860 def _parse_ttl(self) -> exp.Expression: 1861 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1862 this = self._parse_bitwise() 1863 1864 if self._match_text_seq("DELETE"): 1865 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1866 if self._match_text_seq("RECOMPRESS"): 1867 return self.expression( 1868 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1869 ) 1870 if self._match_text_seq("TO", "DISK"): 1871 return self.expression( 1872 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1873 ) 1874 if self._match_text_seq("TO", "VOLUME"): 1875 return self.expression( 1876 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1877 ) 1878 1879 return this 1880 1881 expressions = self._parse_csv(_parse_ttl_action) 1882 where = self._parse_where() 1883 group = self._parse_group() 1884 1885 aggregates = None 1886 if group and self._match(TokenType.SET): 1887 aggregates = self._parse_csv(self._parse_set_item) 1888 1889 return self.expression( 1890 exp.MergeTreeTTL, 1891 expressions=expressions, 1892 where=where, 1893 group=group, 1894 aggregates=aggregates, 1895 ) 1896 1897 def _parse_statement(self) -> t.Optional[exp.Expression]: 1898 if self._curr is None: 1899 return None 1900 1901 if self._match_set(self.STATEMENT_PARSERS): 1902 comments = self._prev_comments 1903 stmt = self.STATEMENT_PARSERS[self._prev.token_type](self) 1904 stmt.add_comments(comments, prepend=True) 1905 return stmt 1906 1907 if self._match_set(self.dialect.tokenizer_class.COMMANDS): 1908 return self._parse_command() 1909 1910 expression = self._parse_expression() 1911 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1912 return self._parse_query_modifiers(expression) 1913 1914 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1915 start = self._prev 1916 temporary = self._match(TokenType.TEMPORARY) 1917 materialized = self._match_text_seq("MATERIALIZED") 1918 1919 kind = self._match_set(self.CREATABLES) and self._prev.text.upper() 1920 if not kind: 1921 return self._parse_as_command(start) 1922 1923 concurrently = self._match_text_seq("CONCURRENTLY") 1924 if_exists = exists or self._parse_exists() 1925 1926 if kind == "COLUMN": 1927 this = self._parse_column() 1928 else: 1929 this = self._parse_table_parts( 1930 schema=True, is_db_reference=self._prev.token_type == TokenType.SCHEMA 1931 ) 1932 1933 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 1934 1935 if self._match(TokenType.L_PAREN, advance=False): 1936 expressions = self._parse_wrapped_csv(self._parse_types) 1937 else: 1938 expressions = None 1939 1940 return self.expression( 1941 exp.Drop, 1942 exists=if_exists, 1943 this=this, 1944 expressions=expressions, 1945 kind=self.dialect.CREATABLE_KIND_MAPPING.get(kind) or kind, 1946 temporary=temporary, 1947 materialized=materialized, 1948 cascade=self._match_text_seq("CASCADE"), 1949 constraints=self._match_text_seq("CONSTRAINTS"), 1950 purge=self._match_text_seq("PURGE"), 1951 cluster=cluster, 1952 concurrently=concurrently, 1953 ) 1954 1955 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1956 return ( 1957 self._match_text_seq("IF") 1958 and (not not_ or self._match(TokenType.NOT)) 1959 and self._match(TokenType.EXISTS) 1960 ) 1961 1962 def _parse_create(self) -> exp.Create | exp.Command: 1963 # Note: this can't be None because we've matched a statement parser 1964 start = self._prev 1965 1966 replace = ( 1967 start.token_type == TokenType.REPLACE 1968 or self._match_pair(TokenType.OR, TokenType.REPLACE) 1969 or self._match_pair(TokenType.OR, TokenType.ALTER) 1970 ) 1971 refresh = self._match_pair(TokenType.OR, TokenType.REFRESH) 1972 1973 unique = self._match(TokenType.UNIQUE) 1974 1975 if self._match_text_seq("CLUSTERED", "COLUMNSTORE"): 1976 clustered = True 1977 elif self._match_text_seq("NONCLUSTERED", "COLUMNSTORE") or self._match_text_seq( 1978 "COLUMNSTORE" 1979 ): 1980 clustered = False 1981 else: 1982 clustered = None 1983 1984 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1985 self._advance() 1986 1987 properties = None 1988 create_token = self._match_set(self.CREATABLES) and self._prev 1989 1990 if not create_token: 1991 # exp.Properties.Location.POST_CREATE 1992 properties = self._parse_properties() 1993 create_token = self._match_set(self.CREATABLES) and self._prev 1994 1995 if not properties or not create_token: 1996 return self._parse_as_command(start) 1997 1998 concurrently = self._match_text_seq("CONCURRENTLY") 1999 exists = self._parse_exists(not_=True) 2000 this = None 2001 expression: t.Optional[exp.Expression] = None 2002 indexes = None 2003 no_schema_binding = None 2004 begin = None 2005 end = None 2006 clone = None 2007 2008 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 2009 nonlocal properties 2010 if properties and temp_props: 2011 properties.expressions.extend(temp_props.expressions) 2012 elif temp_props: 2013 properties = temp_props 2014 2015 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 2016 this = self._parse_user_defined_function(kind=create_token.token_type) 2017 2018 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 2019 extend_props(self._parse_properties()) 2020 2021 expression = self._match(TokenType.ALIAS) and self._parse_heredoc() 2022 extend_props(self._parse_properties()) 2023 2024 if not expression: 2025 if self._match(TokenType.COMMAND): 2026 expression = self._parse_as_command(self._prev) 2027 else: 2028 begin = self._match(TokenType.BEGIN) 2029 return_ = self._match_text_seq("RETURN") 2030 2031 if self._match(TokenType.STRING, advance=False): 2032 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 2033 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 2034 expression = self._parse_string() 2035 extend_props(self._parse_properties()) 2036 else: 2037 expression = self._parse_user_defined_function_expression() 2038 2039 end = self._match_text_seq("END") 2040 2041 if return_: 2042 expression = self.expression(exp.Return, this=expression) 2043 elif create_token.token_type == TokenType.INDEX: 2044 # Postgres allows anonymous indexes, eg. CREATE INDEX IF NOT EXISTS ON t(c) 2045 if not self._match(TokenType.ON): 2046 index = self._parse_id_var() 2047 anonymous = False 2048 else: 2049 index = None 2050 anonymous = True 2051 2052 this = self._parse_index(index=index, anonymous=anonymous) 2053 elif create_token.token_type in self.DB_CREATABLES: 2054 table_parts = self._parse_table_parts( 2055 schema=True, is_db_reference=create_token.token_type == TokenType.SCHEMA 2056 ) 2057 2058 # exp.Properties.Location.POST_NAME 2059 self._match(TokenType.COMMA) 2060 extend_props(self._parse_properties(before=True)) 2061 2062 this = self._parse_schema(this=table_parts) 2063 2064 # exp.Properties.Location.POST_SCHEMA and POST_WITH 2065 extend_props(self._parse_properties()) 2066 2067 has_alias = self._match(TokenType.ALIAS) 2068 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 2069 # exp.Properties.Location.POST_ALIAS 2070 extend_props(self._parse_properties()) 2071 2072 if create_token.token_type == TokenType.SEQUENCE: 2073 expression = self._parse_types() 2074 extend_props(self._parse_properties()) 2075 else: 2076 expression = self._parse_ddl_select() 2077 2078 # Some dialects also support using a table as an alias instead of a SELECT. 2079 # Here we fallback to this as an alternative. 2080 if not expression and has_alias: 2081 expression = self._try_parse(self._parse_table_parts) 2082 2083 if create_token.token_type == TokenType.TABLE: 2084 # exp.Properties.Location.POST_EXPRESSION 2085 extend_props(self._parse_properties()) 2086 2087 indexes = [] 2088 while True: 2089 index = self._parse_index() 2090 2091 # exp.Properties.Location.POST_INDEX 2092 extend_props(self._parse_properties()) 2093 if not index: 2094 break 2095 else: 2096 self._match(TokenType.COMMA) 2097 indexes.append(index) 2098 elif create_token.token_type == TokenType.VIEW: 2099 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 2100 no_schema_binding = True 2101 elif create_token.token_type in (TokenType.SINK, TokenType.SOURCE): 2102 extend_props(self._parse_properties()) 2103 2104 shallow = self._match_text_seq("SHALLOW") 2105 2106 if self._match_texts(self.CLONE_KEYWORDS): 2107 copy = self._prev.text.lower() == "copy" 2108 clone = self.expression( 2109 exp.Clone, this=self._parse_table(schema=True), shallow=shallow, copy=copy 2110 ) 2111 2112 if self._curr and not self._match_set((TokenType.R_PAREN, TokenType.COMMA), advance=False): 2113 return self._parse_as_command(start) 2114 2115 create_kind_text = create_token.text.upper() 2116 return self.expression( 2117 exp.Create, 2118 this=this, 2119 kind=self.dialect.CREATABLE_KIND_MAPPING.get(create_kind_text) or create_kind_text, 2120 replace=replace, 2121 refresh=refresh, 2122 unique=unique, 2123 expression=expression, 2124 exists=exists, 2125 properties=properties, 2126 indexes=indexes, 2127 no_schema_binding=no_schema_binding, 2128 begin=begin, 2129 end=end, 2130 clone=clone, 2131 concurrently=concurrently, 2132 clustered=clustered, 2133 ) 2134 2135 def _parse_sequence_properties(self) -> t.Optional[exp.SequenceProperties]: 2136 seq = exp.SequenceProperties() 2137 2138 options = [] 2139 index = self._index 2140 2141 while self._curr: 2142 self._match(TokenType.COMMA) 2143 if self._match_text_seq("INCREMENT"): 2144 self._match_text_seq("BY") 2145 self._match_text_seq("=") 2146 seq.set("increment", self._parse_term()) 2147 elif self._match_text_seq("MINVALUE"): 2148 seq.set("minvalue", self._parse_term()) 2149 elif self._match_text_seq("MAXVALUE"): 2150 seq.set("maxvalue", self._parse_term()) 2151 elif self._match(TokenType.START_WITH) or self._match_text_seq("START"): 2152 self._match_text_seq("=") 2153 seq.set("start", self._parse_term()) 2154 elif self._match_text_seq("CACHE"): 2155 # T-SQL allows empty CACHE which is initialized dynamically 2156 seq.set("cache", self._parse_number() or True) 2157 elif self._match_text_seq("OWNED", "BY"): 2158 # "OWNED BY NONE" is the default 2159 seq.set("owned", None if self._match_text_seq("NONE") else self._parse_column()) 2160 else: 2161 opt = self._parse_var_from_options(self.CREATE_SEQUENCE, raise_unmatched=False) 2162 if opt: 2163 options.append(opt) 2164 else: 2165 break 2166 2167 seq.set("options", options if options else None) 2168 return None if self._index == index else seq 2169 2170 def _parse_property_before(self) -> t.Optional[exp.Expression]: 2171 # only used for teradata currently 2172 self._match(TokenType.COMMA) 2173 2174 kwargs = { 2175 "no": self._match_text_seq("NO"), 2176 "dual": self._match_text_seq("DUAL"), 2177 "before": self._match_text_seq("BEFORE"), 2178 "default": self._match_text_seq("DEFAULT"), 2179 "local": (self._match_text_seq("LOCAL") and "LOCAL") 2180 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 2181 "after": self._match_text_seq("AFTER"), 2182 "minimum": self._match_texts(("MIN", "MINIMUM")), 2183 "maximum": self._match_texts(("MAX", "MAXIMUM")), 2184 } 2185 2186 if self._match_texts(self.PROPERTY_PARSERS): 2187 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 2188 try: 2189 return parser(self, **{k: v for k, v in kwargs.items() if v}) 2190 except TypeError: 2191 self.raise_error(f"Cannot parse property '{self._prev.text}'") 2192 2193 return None 2194 2195 def _parse_wrapped_properties(self) -> t.List[exp.Expression]: 2196 return self._parse_wrapped_csv(self._parse_property) 2197 2198 def _parse_property(self) -> t.Optional[exp.Expression]: 2199 if self._match_texts(self.PROPERTY_PARSERS): 2200 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 2201 2202 if self._match(TokenType.DEFAULT) and self._match_texts(self.PROPERTY_PARSERS): 2203 return self.PROPERTY_PARSERS[self._prev.text.upper()](self, default=True) 2204 2205 if self._match_text_seq("COMPOUND", "SORTKEY"): 2206 return self._parse_sortkey(compound=True) 2207 2208 if self._match_text_seq("SQL", "SECURITY"): 2209 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 2210 2211 index = self._index 2212 key = self._parse_column() 2213 2214 if not self._match(TokenType.EQ): 2215 self._retreat(index) 2216 return self._parse_sequence_properties() 2217 2218 # Transform the key to exp.Dot if it's dotted identifiers wrapped in exp.Column or to exp.Var otherwise 2219 if isinstance(key, exp.Column): 2220 key = key.to_dot() if len(key.parts) > 1 else exp.var(key.name) 2221 2222 value = self._parse_bitwise() or self._parse_var(any_token=True) 2223 2224 # Transform the value to exp.Var if it was parsed as exp.Column(exp.Identifier()) 2225 if isinstance(value, exp.Column): 2226 value = exp.var(value.name) 2227 2228 return self.expression(exp.Property, this=key, value=value) 2229 2230 def _parse_stored(self) -> t.Union[exp.FileFormatProperty, exp.StorageHandlerProperty]: 2231 if self._match_text_seq("BY"): 2232 return self.expression(exp.StorageHandlerProperty, this=self._parse_var_or_string()) 2233 2234 self._match(TokenType.ALIAS) 2235 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 2236 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 2237 2238 return self.expression( 2239 exp.FileFormatProperty, 2240 this=( 2241 self.expression( 2242 exp.InputOutputFormat, 2243 input_format=input_format, 2244 output_format=output_format, 2245 ) 2246 if input_format or output_format 2247 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var() 2248 ), 2249 hive_format=True, 2250 ) 2251 2252 def _parse_unquoted_field(self) -> t.Optional[exp.Expression]: 2253 field = self._parse_field() 2254 if isinstance(field, exp.Identifier) and not field.quoted: 2255 field = exp.var(field) 2256 2257 return field 2258 2259 def _parse_property_assignment(self, exp_class: t.Type[E], **kwargs: t.Any) -> E: 2260 self._match(TokenType.EQ) 2261 self._match(TokenType.ALIAS) 2262 2263 return self.expression(exp_class, this=self._parse_unquoted_field(), **kwargs) 2264 2265 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 2266 properties = [] 2267 while True: 2268 if before: 2269 prop = self._parse_property_before() 2270 else: 2271 prop = self._parse_property() 2272 if not prop: 2273 break 2274 for p in ensure_list(prop): 2275 properties.append(p) 2276 2277 if properties: 2278 return self.expression(exp.Properties, expressions=properties) 2279 2280 return None 2281 2282 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 2283 return self.expression( 2284 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 2285 ) 2286 2287 def _parse_security(self) -> t.Optional[exp.SecurityProperty]: 2288 if self._match_texts(("NONE", "DEFINER", "INVOKER")): 2289 security_specifier = self._prev.text.upper() 2290 return self.expression(exp.SecurityProperty, this=security_specifier) 2291 return None 2292 2293 def _parse_settings_property(self) -> exp.SettingsProperty: 2294 return self.expression( 2295 exp.SettingsProperty, expressions=self._parse_csv(self._parse_assignment) 2296 ) 2297 2298 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 2299 if self._index >= 2: 2300 pre_volatile_token = self._tokens[self._index - 2] 2301 else: 2302 pre_volatile_token = None 2303 2304 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 2305 return exp.VolatileProperty() 2306 2307 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 2308 2309 def _parse_retention_period(self) -> exp.Var: 2310 # Parse TSQL's HISTORY_RETENTION_PERIOD: {INFINITE | <number> DAY | DAYS | MONTH ...} 2311 number = self._parse_number() 2312 number_str = f"{number} " if number else "" 2313 unit = self._parse_var(any_token=True) 2314 return exp.var(f"{number_str}{unit}") 2315 2316 def _parse_system_versioning_property( 2317 self, with_: bool = False 2318 ) -> exp.WithSystemVersioningProperty: 2319 self._match(TokenType.EQ) 2320 prop = self.expression( 2321 exp.WithSystemVersioningProperty, 2322 **{ # type: ignore 2323 "on": True, 2324 "with": with_, 2325 }, 2326 ) 2327 2328 if self._match_text_seq("OFF"): 2329 prop.set("on", False) 2330 return prop 2331 2332 self._match(TokenType.ON) 2333 if self._match(TokenType.L_PAREN): 2334 while self._curr and not self._match(TokenType.R_PAREN): 2335 if self._match_text_seq("HISTORY_TABLE", "="): 2336 prop.set("this", self._parse_table_parts()) 2337 elif self._match_text_seq("DATA_CONSISTENCY_CHECK", "="): 2338 prop.set("data_consistency", self._advance_any() and self._prev.text.upper()) 2339 elif self._match_text_seq("HISTORY_RETENTION_PERIOD", "="): 2340 prop.set("retention_period", self._parse_retention_period()) 2341 2342 self._match(TokenType.COMMA) 2343 2344 return prop 2345 2346 def _parse_data_deletion_property(self) -> exp.DataDeletionProperty: 2347 self._match(TokenType.EQ) 2348 on = self._match_text_seq("ON") or not self._match_text_seq("OFF") 2349 prop = self.expression(exp.DataDeletionProperty, on=on) 2350 2351 if self._match(TokenType.L_PAREN): 2352 while self._curr and not self._match(TokenType.R_PAREN): 2353 if self._match_text_seq("FILTER_COLUMN", "="): 2354 prop.set("filter_column", self._parse_column()) 2355 elif self._match_text_seq("RETENTION_PERIOD", "="): 2356 prop.set("retention_period", self._parse_retention_period()) 2357 2358 self._match(TokenType.COMMA) 2359 2360 return prop 2361 2362 def _parse_distributed_property(self) -> exp.DistributedByProperty: 2363 kind = "HASH" 2364 expressions: t.Optional[t.List[exp.Expression]] = None 2365 if self._match_text_seq("BY", "HASH"): 2366 expressions = self._parse_wrapped_csv(self._parse_id_var) 2367 elif self._match_text_seq("BY", "RANDOM"): 2368 kind = "RANDOM" 2369 2370 # If the BUCKETS keyword is not present, the number of buckets is AUTO 2371 buckets: t.Optional[exp.Expression] = None 2372 if self._match_text_seq("BUCKETS") and not self._match_text_seq("AUTO"): 2373 buckets = self._parse_number() 2374 2375 return self.expression( 2376 exp.DistributedByProperty, 2377 expressions=expressions, 2378 kind=kind, 2379 buckets=buckets, 2380 order=self._parse_order(), 2381 ) 2382 2383 def _parse_composite_key_property(self, expr_type: t.Type[E]) -> E: 2384 self._match_text_seq("KEY") 2385 expressions = self._parse_wrapped_id_vars() 2386 return self.expression(expr_type, expressions=expressions) 2387 2388 def _parse_with_property(self) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 2389 if self._match_text_seq("(", "SYSTEM_VERSIONING"): 2390 prop = self._parse_system_versioning_property(with_=True) 2391 self._match_r_paren() 2392 return prop 2393 2394 if self._match(TokenType.L_PAREN, advance=False): 2395 return self._parse_wrapped_properties() 2396 2397 if self._match_text_seq("JOURNAL"): 2398 return self._parse_withjournaltable() 2399 2400 if self._match_texts(self.VIEW_ATTRIBUTES): 2401 return self.expression(exp.ViewAttributeProperty, this=self._prev.text.upper()) 2402 2403 if self._match_text_seq("DATA"): 2404 return self._parse_withdata(no=False) 2405 elif self._match_text_seq("NO", "DATA"): 2406 return self._parse_withdata(no=True) 2407 2408 if self._match(TokenType.SERDE_PROPERTIES, advance=False): 2409 return self._parse_serde_properties(with_=True) 2410 2411 if self._match(TokenType.SCHEMA): 2412 return self.expression( 2413 exp.WithSchemaBindingProperty, 2414 this=self._parse_var_from_options(self.SCHEMA_BINDING_OPTIONS), 2415 ) 2416 2417 if self._match_texts(self.PROCEDURE_OPTIONS, advance=False): 2418 return self.expression( 2419 exp.WithProcedureOptions, expressions=self._parse_csv(self._parse_procedure_option) 2420 ) 2421 2422 if not self._next: 2423 return None 2424 2425 return self._parse_withisolatedloading() 2426 2427 def _parse_procedure_option(self) -> exp.Expression | None: 2428 if self._match_text_seq("EXECUTE", "AS"): 2429 return self.expression( 2430 exp.ExecuteAsProperty, 2431 this=self._parse_var_from_options(self.EXECUTE_AS_OPTIONS, raise_unmatched=False) 2432 or self._parse_string(), 2433 ) 2434 2435 return self._parse_var_from_options(self.PROCEDURE_OPTIONS) 2436 2437 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 2438 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 2439 self._match(TokenType.EQ) 2440 2441 user = self._parse_id_var() 2442 self._match(TokenType.PARAMETER) 2443 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 2444 2445 if not user or not host: 2446 return None 2447 2448 return exp.DefinerProperty(this=f"{user}@{host}") 2449 2450 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 2451 self._match(TokenType.TABLE) 2452 self._match(TokenType.EQ) 2453 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 2454 2455 def _parse_log(self, no: bool = False) -> exp.LogProperty: 2456 return self.expression(exp.LogProperty, no=no) 2457 2458 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 2459 return self.expression(exp.JournalProperty, **kwargs) 2460 2461 def _parse_checksum(self) -> exp.ChecksumProperty: 2462 self._match(TokenType.EQ) 2463 2464 on = None 2465 if self._match(TokenType.ON): 2466 on = True 2467 elif self._match_text_seq("OFF"): 2468 on = False 2469 2470 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 2471 2472 def _parse_cluster(self, wrapped: bool = False) -> exp.Cluster: 2473 return self.expression( 2474 exp.Cluster, 2475 expressions=( 2476 self._parse_wrapped_csv(self._parse_ordered) 2477 if wrapped 2478 else self._parse_csv(self._parse_ordered) 2479 ), 2480 ) 2481 2482 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 2483 self._match_text_seq("BY") 2484 2485 self._match_l_paren() 2486 expressions = self._parse_csv(self._parse_column) 2487 self._match_r_paren() 2488 2489 if self._match_text_seq("SORTED", "BY"): 2490 self._match_l_paren() 2491 sorted_by = self._parse_csv(self._parse_ordered) 2492 self._match_r_paren() 2493 else: 2494 sorted_by = None 2495 2496 self._match(TokenType.INTO) 2497 buckets = self._parse_number() 2498 self._match_text_seq("BUCKETS") 2499 2500 return self.expression( 2501 exp.ClusteredByProperty, 2502 expressions=expressions, 2503 sorted_by=sorted_by, 2504 buckets=buckets, 2505 ) 2506 2507 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 2508 if not self._match_text_seq("GRANTS"): 2509 self._retreat(self._index - 1) 2510 return None 2511 2512 return self.expression(exp.CopyGrantsProperty) 2513 2514 def _parse_freespace(self) -> exp.FreespaceProperty: 2515 self._match(TokenType.EQ) 2516 return self.expression( 2517 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 2518 ) 2519 2520 def _parse_mergeblockratio( 2521 self, no: bool = False, default: bool = False 2522 ) -> exp.MergeBlockRatioProperty: 2523 if self._match(TokenType.EQ): 2524 return self.expression( 2525 exp.MergeBlockRatioProperty, 2526 this=self._parse_number(), 2527 percent=self._match(TokenType.PERCENT), 2528 ) 2529 2530 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 2531 2532 def _parse_datablocksize( 2533 self, 2534 default: t.Optional[bool] = None, 2535 minimum: t.Optional[bool] = None, 2536 maximum: t.Optional[bool] = None, 2537 ) -> exp.DataBlocksizeProperty: 2538 self._match(TokenType.EQ) 2539 size = self._parse_number() 2540 2541 units = None 2542 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 2543 units = self._prev.text 2544 2545 return self.expression( 2546 exp.DataBlocksizeProperty, 2547 size=size, 2548 units=units, 2549 default=default, 2550 minimum=minimum, 2551 maximum=maximum, 2552 ) 2553 2554 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 2555 self._match(TokenType.EQ) 2556 always = self._match_text_seq("ALWAYS") 2557 manual = self._match_text_seq("MANUAL") 2558 never = self._match_text_seq("NEVER") 2559 default = self._match_text_seq("DEFAULT") 2560 2561 autotemp = None 2562 if self._match_text_seq("AUTOTEMP"): 2563 autotemp = self._parse_schema() 2564 2565 return self.expression( 2566 exp.BlockCompressionProperty, 2567 always=always, 2568 manual=manual, 2569 never=never, 2570 default=default, 2571 autotemp=autotemp, 2572 ) 2573 2574 def _parse_withisolatedloading(self) -> t.Optional[exp.IsolatedLoadingProperty]: 2575 index = self._index 2576 no = self._match_text_seq("NO") 2577 concurrent = self._match_text_seq("CONCURRENT") 2578 2579 if not self._match_text_seq("ISOLATED", "LOADING"): 2580 self._retreat(index) 2581 return None 2582 2583 target = self._parse_var_from_options(self.ISOLATED_LOADING_OPTIONS, raise_unmatched=False) 2584 return self.expression( 2585 exp.IsolatedLoadingProperty, no=no, concurrent=concurrent, target=target 2586 ) 2587 2588 def _parse_locking(self) -> exp.LockingProperty: 2589 if self._match(TokenType.TABLE): 2590 kind = "TABLE" 2591 elif self._match(TokenType.VIEW): 2592 kind = "VIEW" 2593 elif self._match(TokenType.ROW): 2594 kind = "ROW" 2595 elif self._match_text_seq("DATABASE"): 2596 kind = "DATABASE" 2597 else: 2598 kind = None 2599 2600 if kind in ("DATABASE", "TABLE", "VIEW"): 2601 this = self._parse_table_parts() 2602 else: 2603 this = None 2604 2605 if self._match(TokenType.FOR): 2606 for_or_in = "FOR" 2607 elif self._match(TokenType.IN): 2608 for_or_in = "IN" 2609 else: 2610 for_or_in = None 2611 2612 if self._match_text_seq("ACCESS"): 2613 lock_type = "ACCESS" 2614 elif self._match_texts(("EXCL", "EXCLUSIVE")): 2615 lock_type = "EXCLUSIVE" 2616 elif self._match_text_seq("SHARE"): 2617 lock_type = "SHARE" 2618 elif self._match_text_seq("READ"): 2619 lock_type = "READ" 2620 elif self._match_text_seq("WRITE"): 2621 lock_type = "WRITE" 2622 elif self._match_text_seq("CHECKSUM"): 2623 lock_type = "CHECKSUM" 2624 else: 2625 lock_type = None 2626 2627 override = self._match_text_seq("OVERRIDE") 2628 2629 return self.expression( 2630 exp.LockingProperty, 2631 this=this, 2632 kind=kind, 2633 for_or_in=for_or_in, 2634 lock_type=lock_type, 2635 override=override, 2636 ) 2637 2638 def _parse_partition_by(self) -> t.List[exp.Expression]: 2639 if self._match(TokenType.PARTITION_BY): 2640 return self._parse_csv(self._parse_assignment) 2641 return [] 2642 2643 def _parse_partition_bound_spec(self) -> exp.PartitionBoundSpec: 2644 def _parse_partition_bound_expr() -> t.Optional[exp.Expression]: 2645 if self._match_text_seq("MINVALUE"): 2646 return exp.var("MINVALUE") 2647 if self._match_text_seq("MAXVALUE"): 2648 return exp.var("MAXVALUE") 2649 return self._parse_bitwise() 2650 2651 this: t.Optional[exp.Expression | t.List[exp.Expression]] = None 2652 expression = None 2653 from_expressions = None 2654 to_expressions = None 2655 2656 if self._match(TokenType.IN): 2657 this = self._parse_wrapped_csv(self._parse_bitwise) 2658 elif self._match(TokenType.FROM): 2659 from_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2660 self._match_text_seq("TO") 2661 to_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2662 elif self._match_text_seq("WITH", "(", "MODULUS"): 2663 this = self._parse_number() 2664 self._match_text_seq(",", "REMAINDER") 2665 expression = self._parse_number() 2666 self._match_r_paren() 2667 else: 2668 self.raise_error("Failed to parse partition bound spec.") 2669 2670 return self.expression( 2671 exp.PartitionBoundSpec, 2672 this=this, 2673 expression=expression, 2674 from_expressions=from_expressions, 2675 to_expressions=to_expressions, 2676 ) 2677 2678 # https://www.postgresql.org/docs/current/sql-createtable.html 2679 def _parse_partitioned_of(self) -> t.Optional[exp.PartitionedOfProperty]: 2680 if not self._match_text_seq("OF"): 2681 self._retreat(self._index - 1) 2682 return None 2683 2684 this = self._parse_table(schema=True) 2685 2686 if self._match(TokenType.DEFAULT): 2687 expression: exp.Var | exp.PartitionBoundSpec = exp.var("DEFAULT") 2688 elif self._match_text_seq("FOR", "VALUES"): 2689 expression = self._parse_partition_bound_spec() 2690 else: 2691 self.raise_error("Expecting either DEFAULT or FOR VALUES clause.") 2692 2693 return self.expression(exp.PartitionedOfProperty, this=this, expression=expression) 2694 2695 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 2696 self._match(TokenType.EQ) 2697 return self.expression( 2698 exp.PartitionedByProperty, 2699 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 2700 ) 2701 2702 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 2703 if self._match_text_seq("AND", "STATISTICS"): 2704 statistics = True 2705 elif self._match_text_seq("AND", "NO", "STATISTICS"): 2706 statistics = False 2707 else: 2708 statistics = None 2709 2710 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 2711 2712 def _parse_contains_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2713 if self._match_text_seq("SQL"): 2714 return self.expression(exp.SqlReadWriteProperty, this="CONTAINS SQL") 2715 return None 2716 2717 def _parse_modifies_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2718 if self._match_text_seq("SQL", "DATA"): 2719 return self.expression(exp.SqlReadWriteProperty, this="MODIFIES SQL DATA") 2720 return None 2721 2722 def _parse_no_property(self) -> t.Optional[exp.Expression]: 2723 if self._match_text_seq("PRIMARY", "INDEX"): 2724 return exp.NoPrimaryIndexProperty() 2725 if self._match_text_seq("SQL"): 2726 return self.expression(exp.SqlReadWriteProperty, this="NO SQL") 2727 return None 2728 2729 def _parse_on_property(self) -> t.Optional[exp.Expression]: 2730 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 2731 return exp.OnCommitProperty() 2732 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 2733 return exp.OnCommitProperty(delete=True) 2734 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 2735 2736 def _parse_reads_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2737 if self._match_text_seq("SQL", "DATA"): 2738 return self.expression(exp.SqlReadWriteProperty, this="READS SQL DATA") 2739 return None 2740 2741 def _parse_distkey(self) -> exp.DistKeyProperty: 2742 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 2743 2744 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 2745 table = self._parse_table(schema=True) 2746 2747 options = [] 2748 while self._match_texts(("INCLUDING", "EXCLUDING")): 2749 this = self._prev.text.upper() 2750 2751 id_var = self._parse_id_var() 2752 if not id_var: 2753 return None 2754 2755 options.append( 2756 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 2757 ) 2758 2759 return self.expression(exp.LikeProperty, this=table, expressions=options) 2760 2761 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 2762 return self.expression( 2763 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 2764 ) 2765 2766 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 2767 self._match(TokenType.EQ) 2768 return self.expression( 2769 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 2770 ) 2771 2772 def _parse_remote_with_connection(self) -> exp.RemoteWithConnectionModelProperty: 2773 self._match_text_seq("WITH", "CONNECTION") 2774 return self.expression( 2775 exp.RemoteWithConnectionModelProperty, this=self._parse_table_parts() 2776 ) 2777 2778 def _parse_returns(self) -> exp.ReturnsProperty: 2779 value: t.Optional[exp.Expression] 2780 null = None 2781 is_table = self._match(TokenType.TABLE) 2782 2783 if is_table: 2784 if self._match(TokenType.LT): 2785 value = self.expression( 2786 exp.Schema, 2787 this="TABLE", 2788 expressions=self._parse_csv(self._parse_struct_types), 2789 ) 2790 if not self._match(TokenType.GT): 2791 self.raise_error("Expecting >") 2792 else: 2793 value = self._parse_schema(exp.var("TABLE")) 2794 elif self._match_text_seq("NULL", "ON", "NULL", "INPUT"): 2795 null = True 2796 value = None 2797 else: 2798 value = self._parse_types() 2799 2800 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table, null=null) 2801 2802 def _parse_describe(self) -> exp.Describe: 2803 kind = self._match_set(self.CREATABLES) and self._prev.text 2804 style = self._match_texts(self.DESCRIBE_STYLES) and self._prev.text.upper() 2805 if self._match(TokenType.DOT): 2806 style = None 2807 self._retreat(self._index - 2) 2808 2809 format = self._parse_property() if self._match(TokenType.FORMAT, advance=False) else None 2810 2811 if self._match_set(self.STATEMENT_PARSERS, advance=False): 2812 this = self._parse_statement() 2813 else: 2814 this = self._parse_table(schema=True) 2815 2816 properties = self._parse_properties() 2817 expressions = properties.expressions if properties else None 2818 partition = self._parse_partition() 2819 return self.expression( 2820 exp.Describe, 2821 this=this, 2822 style=style, 2823 kind=kind, 2824 expressions=expressions, 2825 partition=partition, 2826 format=format, 2827 ) 2828 2829 def _parse_multitable_inserts(self, comments: t.Optional[t.List[str]]) -> exp.MultitableInserts: 2830 kind = self._prev.text.upper() 2831 expressions = [] 2832 2833 def parse_conditional_insert() -> t.Optional[exp.ConditionalInsert]: 2834 if self._match(TokenType.WHEN): 2835 expression = self._parse_disjunction() 2836 self._match(TokenType.THEN) 2837 else: 2838 expression = None 2839 2840 else_ = self._match(TokenType.ELSE) 2841 2842 if not self._match(TokenType.INTO): 2843 return None 2844 2845 return self.expression( 2846 exp.ConditionalInsert, 2847 this=self.expression( 2848 exp.Insert, 2849 this=self._parse_table(schema=True), 2850 expression=self._parse_derived_table_values(), 2851 ), 2852 expression=expression, 2853 else_=else_, 2854 ) 2855 2856 expression = parse_conditional_insert() 2857 while expression is not None: 2858 expressions.append(expression) 2859 expression = parse_conditional_insert() 2860 2861 return self.expression( 2862 exp.MultitableInserts, 2863 kind=kind, 2864 comments=comments, 2865 expressions=expressions, 2866 source=self._parse_table(), 2867 ) 2868 2869 def _parse_insert(self) -> t.Union[exp.Insert, exp.MultitableInserts]: 2870 comments = [] 2871 hint = self._parse_hint() 2872 overwrite = self._match(TokenType.OVERWRITE) 2873 ignore = self._match(TokenType.IGNORE) 2874 local = self._match_text_seq("LOCAL") 2875 alternative = None 2876 is_function = None 2877 2878 if self._match_text_seq("DIRECTORY"): 2879 this: t.Optional[exp.Expression] = self.expression( 2880 exp.Directory, 2881 this=self._parse_var_or_string(), 2882 local=local, 2883 row_format=self._parse_row_format(match_row=True), 2884 ) 2885 else: 2886 if self._match_set((TokenType.FIRST, TokenType.ALL)): 2887 comments += ensure_list(self._prev_comments) 2888 return self._parse_multitable_inserts(comments) 2889 2890 if self._match(TokenType.OR): 2891 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 2892 2893 self._match(TokenType.INTO) 2894 comments += ensure_list(self._prev_comments) 2895 self._match(TokenType.TABLE) 2896 is_function = self._match(TokenType.FUNCTION) 2897 2898 this = ( 2899 self._parse_table(schema=True, parse_partition=True) 2900 if not is_function 2901 else self._parse_function() 2902 ) 2903 if isinstance(this, exp.Table) and self._match(TokenType.ALIAS, advance=False): 2904 this.set("alias", self._parse_table_alias()) 2905 2906 returning = self._parse_returning() 2907 2908 return self.expression( 2909 exp.Insert, 2910 comments=comments, 2911 hint=hint, 2912 is_function=is_function, 2913 this=this, 2914 stored=self._match_text_seq("STORED") and self._parse_stored(), 2915 by_name=self._match_text_seq("BY", "NAME"), 2916 exists=self._parse_exists(), 2917 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) and self._parse_assignment(), 2918 partition=self._match(TokenType.PARTITION_BY) and self._parse_partitioned_by(), 2919 settings=self._match_text_seq("SETTINGS") and self._parse_settings_property(), 2920 expression=self._parse_derived_table_values() or self._parse_ddl_select(), 2921 conflict=self._parse_on_conflict(), 2922 returning=returning or self._parse_returning(), 2923 overwrite=overwrite, 2924 alternative=alternative, 2925 ignore=ignore, 2926 source=self._match(TokenType.TABLE) and self._parse_table(), 2927 ) 2928 2929 def _parse_kill(self) -> exp.Kill: 2930 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 2931 2932 return self.expression( 2933 exp.Kill, 2934 this=self._parse_primary(), 2935 kind=kind, 2936 ) 2937 2938 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 2939 conflict = self._match_text_seq("ON", "CONFLICT") 2940 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 2941 2942 if not conflict and not duplicate: 2943 return None 2944 2945 conflict_keys = None 2946 constraint = None 2947 2948 if conflict: 2949 if self._match_text_seq("ON", "CONSTRAINT"): 2950 constraint = self._parse_id_var() 2951 elif self._match(TokenType.L_PAREN): 2952 conflict_keys = self._parse_csv(self._parse_id_var) 2953 self._match_r_paren() 2954 2955 action = self._parse_var_from_options(self.CONFLICT_ACTIONS) 2956 if self._prev.token_type == TokenType.UPDATE: 2957 self._match(TokenType.SET) 2958 expressions = self._parse_csv(self._parse_equality) 2959 else: 2960 expressions = None 2961 2962 return self.expression( 2963 exp.OnConflict, 2964 duplicate=duplicate, 2965 expressions=expressions, 2966 action=action, 2967 conflict_keys=conflict_keys, 2968 constraint=constraint, 2969 where=self._parse_where(), 2970 ) 2971 2972 def _parse_returning(self) -> t.Optional[exp.Returning]: 2973 if not self._match(TokenType.RETURNING): 2974 return None 2975 return self.expression( 2976 exp.Returning, 2977 expressions=self._parse_csv(self._parse_expression), 2978 into=self._match(TokenType.INTO) and self._parse_table_part(), 2979 ) 2980 2981 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2982 if not self._match(TokenType.FORMAT): 2983 return None 2984 return self._parse_row_format() 2985 2986 def _parse_serde_properties(self, with_: bool = False) -> t.Optional[exp.SerdeProperties]: 2987 index = self._index 2988 with_ = with_ or self._match_text_seq("WITH") 2989 2990 if not self._match(TokenType.SERDE_PROPERTIES): 2991 self._retreat(index) 2992 return None 2993 return self.expression( 2994 exp.SerdeProperties, 2995 **{ # type: ignore 2996 "expressions": self._parse_wrapped_properties(), 2997 "with": with_, 2998 }, 2999 ) 3000 3001 def _parse_row_format( 3002 self, match_row: bool = False 3003 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 3004 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 3005 return None 3006 3007 if self._match_text_seq("SERDE"): 3008 this = self._parse_string() 3009 3010 serde_properties = self._parse_serde_properties() 3011 3012 return self.expression( 3013 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 3014 ) 3015 3016 self._match_text_seq("DELIMITED") 3017 3018 kwargs = {} 3019 3020 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 3021 kwargs["fields"] = self._parse_string() 3022 if self._match_text_seq("ESCAPED", "BY"): 3023 kwargs["escaped"] = self._parse_string() 3024 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 3025 kwargs["collection_items"] = self._parse_string() 3026 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 3027 kwargs["map_keys"] = self._parse_string() 3028 if self._match_text_seq("LINES", "TERMINATED", "BY"): 3029 kwargs["lines"] = self._parse_string() 3030 if self._match_text_seq("NULL", "DEFINED", "AS"): 3031 kwargs["null"] = self._parse_string() 3032 3033 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 3034 3035 def _parse_load(self) -> exp.LoadData | exp.Command: 3036 if self._match_text_seq("DATA"): 3037 local = self._match_text_seq("LOCAL") 3038 self._match_text_seq("INPATH") 3039 inpath = self._parse_string() 3040 overwrite = self._match(TokenType.OVERWRITE) 3041 self._match_pair(TokenType.INTO, TokenType.TABLE) 3042 3043 return self.expression( 3044 exp.LoadData, 3045 this=self._parse_table(schema=True), 3046 local=local, 3047 overwrite=overwrite, 3048 inpath=inpath, 3049 partition=self._parse_partition(), 3050 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 3051 serde=self._match_text_seq("SERDE") and self._parse_string(), 3052 ) 3053 return self._parse_as_command(self._prev) 3054 3055 def _parse_delete(self) -> exp.Delete: 3056 # This handles MySQL's "Multiple-Table Syntax" 3057 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 3058 tables = None 3059 if not self._match(TokenType.FROM, advance=False): 3060 tables = self._parse_csv(self._parse_table) or None 3061 3062 returning = self._parse_returning() 3063 3064 return self.expression( 3065 exp.Delete, 3066 tables=tables, 3067 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 3068 using=self._match(TokenType.USING) and self._parse_table(joins=True), 3069 cluster=self._match(TokenType.ON) and self._parse_on_property(), 3070 where=self._parse_where(), 3071 returning=returning or self._parse_returning(), 3072 limit=self._parse_limit(), 3073 ) 3074 3075 def _parse_update(self) -> exp.Update: 3076 this = self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS) 3077 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 3078 returning = self._parse_returning() 3079 return self.expression( 3080 exp.Update, 3081 **{ # type: ignore 3082 "this": this, 3083 "expressions": expressions, 3084 "from": self._parse_from(joins=True), 3085 "where": self._parse_where(), 3086 "returning": returning or self._parse_returning(), 3087 "order": self._parse_order(), 3088 "limit": self._parse_limit(), 3089 }, 3090 ) 3091 3092 def _parse_use(self) -> exp.Use: 3093 return self.expression( 3094 exp.Use, 3095 kind=self._parse_var_from_options(self.USABLES, raise_unmatched=False), 3096 this=self._parse_table(schema=False), 3097 ) 3098 3099 def _parse_uncache(self) -> exp.Uncache: 3100 if not self._match(TokenType.TABLE): 3101 self.raise_error("Expecting TABLE after UNCACHE") 3102 3103 return self.expression( 3104 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 3105 ) 3106 3107 def _parse_cache(self) -> exp.Cache: 3108 lazy = self._match_text_seq("LAZY") 3109 self._match(TokenType.TABLE) 3110 table = self._parse_table(schema=True) 3111 3112 options = [] 3113 if self._match_text_seq("OPTIONS"): 3114 self._match_l_paren() 3115 k = self._parse_string() 3116 self._match(TokenType.EQ) 3117 v = self._parse_string() 3118 options = [k, v] 3119 self._match_r_paren() 3120 3121 self._match(TokenType.ALIAS) 3122 return self.expression( 3123 exp.Cache, 3124 this=table, 3125 lazy=lazy, 3126 options=options, 3127 expression=self._parse_select(nested=True), 3128 ) 3129 3130 def _parse_partition(self) -> t.Optional[exp.Partition]: 3131 if not self._match_texts(self.PARTITION_KEYWORDS): 3132 return None 3133 3134 return self.expression( 3135 exp.Partition, 3136 subpartition=self._prev.text.upper() == "SUBPARTITION", 3137 expressions=self._parse_wrapped_csv(self._parse_assignment), 3138 ) 3139 3140 def _parse_value(self, values: bool = True) -> t.Optional[exp.Tuple]: 3141 def _parse_value_expression() -> t.Optional[exp.Expression]: 3142 if self.dialect.SUPPORTS_VALUES_DEFAULT and self._match(TokenType.DEFAULT): 3143 return exp.var(self._prev.text.upper()) 3144 return self._parse_expression() 3145 3146 if self._match(TokenType.L_PAREN): 3147 expressions = self._parse_csv(_parse_value_expression) 3148 self._match_r_paren() 3149 return self.expression(exp.Tuple, expressions=expressions) 3150 3151 # In some dialects we can have VALUES 1, 2 which results in 1 column & 2 rows. 3152 expression = self._parse_expression() 3153 if expression: 3154 return self.expression(exp.Tuple, expressions=[expression]) 3155 return None 3156 3157 def _parse_projections(self) -> t.List[exp.Expression]: 3158 return self._parse_expressions() 3159 3160 def _parse_wrapped_select(self, table: bool = False) -> t.Optional[exp.Expression]: 3161 if self._match_set((TokenType.PIVOT, TokenType.UNPIVOT)): 3162 this: t.Optional[exp.Expression] = self._parse_simplified_pivot( 3163 is_unpivot=self._prev.token_type == TokenType.UNPIVOT 3164 ) 3165 elif self._match(TokenType.FROM): 3166 from_ = self._parse_from(skip_from_token=True, consume_pipe=True) 3167 # Support parentheses for duckdb FROM-first syntax 3168 select = self._parse_select() 3169 if select: 3170 select.set("from", from_) 3171 this = select 3172 else: 3173 this = exp.select("*").from_(t.cast(exp.From, from_)) 3174 else: 3175 this = ( 3176 self._parse_table(consume_pipe=True) 3177 if table 3178 else self._parse_select(nested=True, parse_set_operation=False) 3179 ) 3180 3181 # Transform exp.Values into a exp.Table to pass through parse_query_modifiers 3182 # in case a modifier (e.g. join) is following 3183 if table and isinstance(this, exp.Values) and this.alias: 3184 alias = this.args["alias"].pop() 3185 this = exp.Table(this=this, alias=alias) 3186 3187 this = self._parse_query_modifiers(self._parse_set_operations(this)) 3188 3189 return this 3190 3191 def _parse_select( 3192 self, 3193 nested: bool = False, 3194 table: bool = False, 3195 parse_subquery_alias: bool = True, 3196 parse_set_operation: bool = True, 3197 consume_pipe: bool = True, 3198 ) -> t.Optional[exp.Expression]: 3199 query = self._parse_select_query( 3200 nested=nested, 3201 table=table, 3202 parse_subquery_alias=parse_subquery_alias, 3203 parse_set_operation=parse_set_operation, 3204 ) 3205 3206 if ( 3207 consume_pipe 3208 and self._match(TokenType.PIPE_GT, advance=False) 3209 and isinstance(query, exp.Query) 3210 ): 3211 query = self._parse_pipe_syntax_query(query) 3212 query = query.subquery(copy=False) if query and table else query 3213 3214 return query 3215 3216 def _parse_select_query( 3217 self, 3218 nested: bool = False, 3219 table: bool = False, 3220 parse_subquery_alias: bool = True, 3221 parse_set_operation: bool = True, 3222 ) -> t.Optional[exp.Expression]: 3223 cte = self._parse_with() 3224 3225 if cte: 3226 this = self._parse_statement() 3227 3228 if not this: 3229 self.raise_error("Failed to parse any statement following CTE") 3230 return cte 3231 3232 if "with" in this.arg_types: 3233 this.set("with", cte) 3234 else: 3235 self.raise_error(f"{this.key} does not support CTE") 3236 this = cte 3237 3238 return this 3239 3240 # duckdb supports leading with FROM x 3241 from_ = ( 3242 self._parse_from(consume_pipe=True) 3243 if self._match(TokenType.FROM, advance=False) 3244 else None 3245 ) 3246 3247 if self._match(TokenType.SELECT): 3248 comments = self._prev_comments 3249 3250 hint = self._parse_hint() 3251 3252 if self._next and not self._next.token_type == TokenType.DOT: 3253 all_ = self._match(TokenType.ALL) 3254 distinct = self._match_set(self.DISTINCT_TOKENS) 3255 else: 3256 all_, distinct = None, None 3257 3258 kind = ( 3259 self._match(TokenType.ALIAS) 3260 and self._match_texts(("STRUCT", "VALUE")) 3261 and self._prev.text.upper() 3262 ) 3263 3264 if distinct: 3265 distinct = self.expression( 3266 exp.Distinct, 3267 on=self._parse_value(values=False) if self._match(TokenType.ON) else None, 3268 ) 3269 3270 if all_ and distinct: 3271 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 3272 3273 operation_modifiers = [] 3274 while self._curr and self._match_texts(self.OPERATION_MODIFIERS): 3275 operation_modifiers.append(exp.var(self._prev.text.upper())) 3276 3277 limit = self._parse_limit(top=True) 3278 projections = self._parse_projections() 3279 3280 this = self.expression( 3281 exp.Select, 3282 kind=kind, 3283 hint=hint, 3284 distinct=distinct, 3285 expressions=projections, 3286 limit=limit, 3287 operation_modifiers=operation_modifiers or None, 3288 ) 3289 this.comments = comments 3290 3291 into = self._parse_into() 3292 if into: 3293 this.set("into", into) 3294 3295 if not from_: 3296 from_ = self._parse_from() 3297 3298 if from_: 3299 this.set("from", from_) 3300 3301 this = self._parse_query_modifiers(this) 3302 elif (table or nested) and self._match(TokenType.L_PAREN): 3303 this = self._parse_wrapped_select(table=table) 3304 3305 # We return early here so that the UNION isn't attached to the subquery by the 3306 # following call to _parse_set_operations, but instead becomes the parent node 3307 self._match_r_paren() 3308 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 3309 elif self._match(TokenType.VALUES, advance=False): 3310 this = self._parse_derived_table_values() 3311 elif from_: 3312 this = exp.select("*").from_(from_.this, copy=False) 3313 elif self._match(TokenType.SUMMARIZE): 3314 table = self._match(TokenType.TABLE) 3315 this = self._parse_select() or self._parse_string() or self._parse_table() 3316 return self.expression(exp.Summarize, this=this, table=table) 3317 elif self._match(TokenType.DESCRIBE): 3318 this = self._parse_describe() 3319 elif self._match_text_seq("STREAM"): 3320 this = self._parse_function() 3321 if this: 3322 this = self.expression(exp.Stream, this=this) 3323 else: 3324 self._retreat(self._index - 1) 3325 else: 3326 this = None 3327 3328 return self._parse_set_operations(this) if parse_set_operation else this 3329 3330 def _parse_recursive_with_search(self) -> t.Optional[exp.RecursiveWithSearch]: 3331 self._match_text_seq("SEARCH") 3332 3333 kind = self._match_texts(self.RECURSIVE_CTE_SEARCH_KIND) and self._prev.text.upper() 3334 3335 if not kind: 3336 return None 3337 3338 self._match_text_seq("FIRST", "BY") 3339 3340 return self.expression( 3341 exp.RecursiveWithSearch, 3342 kind=kind, 3343 this=self._parse_id_var(), 3344 expression=self._match_text_seq("SET") and self._parse_id_var(), 3345 using=self._match_text_seq("USING") and self._parse_id_var(), 3346 ) 3347 3348 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 3349 if not skip_with_token and not self._match(TokenType.WITH): 3350 return None 3351 3352 comments = self._prev_comments 3353 recursive = self._match(TokenType.RECURSIVE) 3354 3355 last_comments = None 3356 expressions = [] 3357 while True: 3358 cte = self._parse_cte() 3359 if isinstance(cte, exp.CTE): 3360 expressions.append(cte) 3361 if last_comments: 3362 cte.add_comments(last_comments) 3363 3364 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 3365 break 3366 else: 3367 self._match(TokenType.WITH) 3368 3369 last_comments = self._prev_comments 3370 3371 return self.expression( 3372 exp.With, 3373 comments=comments, 3374 expressions=expressions, 3375 recursive=recursive, 3376 search=self._parse_recursive_with_search(), 3377 ) 3378 3379 def _parse_cte(self) -> t.Optional[exp.CTE]: 3380 index = self._index 3381 3382 alias = self._parse_table_alias(self.ID_VAR_TOKENS) 3383 if not alias or not alias.this: 3384 self.raise_error("Expected CTE to have alias") 3385 3386 if not self._match(TokenType.ALIAS) and not self.OPTIONAL_ALIAS_TOKEN_CTE: 3387 self._retreat(index) 3388 return None 3389 3390 comments = self._prev_comments 3391 3392 if self._match_text_seq("NOT", "MATERIALIZED"): 3393 materialized = False 3394 elif self._match_text_seq("MATERIALIZED"): 3395 materialized = True 3396 else: 3397 materialized = None 3398 3399 cte = self.expression( 3400 exp.CTE, 3401 this=self._parse_wrapped(self._parse_statement), 3402 alias=alias, 3403 materialized=materialized, 3404 comments=comments, 3405 ) 3406 3407 values = cte.this 3408 if isinstance(values, exp.Values): 3409 if values.alias: 3410 cte.set("this", exp.select("*").from_(values)) 3411 else: 3412 cte.set("this", exp.select("*").from_(exp.alias_(values, "_values", table=True))) 3413 3414 return cte 3415 3416 def _parse_table_alias( 3417 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 3418 ) -> t.Optional[exp.TableAlias]: 3419 # In some dialects, LIMIT and OFFSET can act as both identifiers and keywords (clauses) 3420 # so this section tries to parse the clause version and if it fails, it treats the token 3421 # as an identifier (alias) 3422 if self._can_parse_limit_or_offset(): 3423 return None 3424 3425 any_token = self._match(TokenType.ALIAS) 3426 alias = ( 3427 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3428 or self._parse_string_as_identifier() 3429 ) 3430 3431 index = self._index 3432 if self._match(TokenType.L_PAREN): 3433 columns = self._parse_csv(self._parse_function_parameter) 3434 self._match_r_paren() if columns else self._retreat(index) 3435 else: 3436 columns = None 3437 3438 if not alias and not columns: 3439 return None 3440 3441 table_alias = self.expression(exp.TableAlias, this=alias, columns=columns) 3442 3443 # We bubble up comments from the Identifier to the TableAlias 3444 if isinstance(alias, exp.Identifier): 3445 table_alias.add_comments(alias.pop_comments()) 3446 3447 return table_alias 3448 3449 def _parse_subquery( 3450 self, this: t.Optional[exp.Expression], parse_alias: bool = True 3451 ) -> t.Optional[exp.Subquery]: 3452 if not this: 3453 return None 3454 3455 return self.expression( 3456 exp.Subquery, 3457 this=this, 3458 pivots=self._parse_pivots(), 3459 alias=self._parse_table_alias() if parse_alias else None, 3460 sample=self._parse_table_sample(), 3461 ) 3462 3463 def _implicit_unnests_to_explicit(self, this: E) -> E: 3464 from sqlglot.optimizer.normalize_identifiers import normalize_identifiers as _norm 3465 3466 refs = {_norm(this.args["from"].this.copy(), dialect=self.dialect).alias_or_name} 3467 for i, join in enumerate(this.args.get("joins") or []): 3468 table = join.this 3469 normalized_table = table.copy() 3470 normalized_table.meta["maybe_column"] = True 3471 normalized_table = _norm(normalized_table, dialect=self.dialect) 3472 3473 if isinstance(table, exp.Table) and not join.args.get("on"): 3474 if normalized_table.parts[0].name in refs: 3475 table_as_column = table.to_column() 3476 unnest = exp.Unnest(expressions=[table_as_column]) 3477 3478 # Table.to_column creates a parent Alias node that we want to convert to 3479 # a TableAlias and attach to the Unnest, so it matches the parser's output 3480 if isinstance(table.args.get("alias"), exp.TableAlias): 3481 table_as_column.replace(table_as_column.this) 3482 exp.alias_(unnest, None, table=[table.args["alias"].this], copy=False) 3483 3484 table.replace(unnest) 3485 3486 refs.add(normalized_table.alias_or_name) 3487 3488 return this 3489 3490 def _parse_query_modifiers( 3491 self, this: t.Optional[exp.Expression] 3492 ) -> t.Optional[exp.Expression]: 3493 if isinstance(this, self.MODIFIABLES): 3494 for join in self._parse_joins(): 3495 this.append("joins", join) 3496 for lateral in iter(self._parse_lateral, None): 3497 this.append("laterals", lateral) 3498 3499 while True: 3500 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 3501 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 3502 key, expression = parser(self) 3503 3504 if expression: 3505 this.set(key, expression) 3506 if key == "limit": 3507 offset = expression.args.pop("offset", None) 3508 3509 if offset: 3510 offset = exp.Offset(expression=offset) 3511 this.set("offset", offset) 3512 3513 limit_by_expressions = expression.expressions 3514 expression.set("expressions", None) 3515 offset.set("expressions", limit_by_expressions) 3516 continue 3517 break 3518 3519 if self.SUPPORTS_IMPLICIT_UNNEST and this and this.args.get("from"): 3520 this = self._implicit_unnests_to_explicit(this) 3521 3522 return this 3523 3524 def _parse_hint_fallback_to_string(self) -> t.Optional[exp.Hint]: 3525 start = self._curr 3526 while self._curr: 3527 self._advance() 3528 3529 end = self._tokens[self._index - 1] 3530 return exp.Hint(expressions=[self._find_sql(start, end)]) 3531 3532 def _parse_hint_function_call(self) -> t.Optional[exp.Expression]: 3533 return self._parse_function_call() 3534 3535 def _parse_hint_body(self) -> t.Optional[exp.Hint]: 3536 start_index = self._index 3537 should_fallback_to_string = False 3538 3539 hints = [] 3540 try: 3541 for hint in iter( 3542 lambda: self._parse_csv( 3543 lambda: self._parse_hint_function_call() or self._parse_var(upper=True), 3544 ), 3545 [], 3546 ): 3547 hints.extend(hint) 3548 except ParseError: 3549 should_fallback_to_string = True 3550 3551 if should_fallback_to_string or self._curr: 3552 self._retreat(start_index) 3553 return self._parse_hint_fallback_to_string() 3554 3555 return self.expression(exp.Hint, expressions=hints) 3556 3557 def _parse_hint(self) -> t.Optional[exp.Hint]: 3558 if self._match(TokenType.HINT) and self._prev_comments: 3559 return exp.maybe_parse(self._prev_comments[0], into=exp.Hint, dialect=self.dialect) 3560 3561 return None 3562 3563 def _parse_into(self) -> t.Optional[exp.Into]: 3564 if not self._match(TokenType.INTO): 3565 return None 3566 3567 temp = self._match(TokenType.TEMPORARY) 3568 unlogged = self._match_text_seq("UNLOGGED") 3569 self._match(TokenType.TABLE) 3570 3571 return self.expression( 3572 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 3573 ) 3574 3575 def _parse_from( 3576 self, 3577 joins: bool = False, 3578 skip_from_token: bool = False, 3579 consume_pipe: bool = False, 3580 ) -> t.Optional[exp.From]: 3581 if not skip_from_token and not self._match(TokenType.FROM): 3582 return None 3583 3584 return self.expression( 3585 exp.From, 3586 comments=self._prev_comments, 3587 this=self._parse_table(joins=joins, consume_pipe=consume_pipe), 3588 ) 3589 3590 def _parse_match_recognize_measure(self) -> exp.MatchRecognizeMeasure: 3591 return self.expression( 3592 exp.MatchRecognizeMeasure, 3593 window_frame=self._match_texts(("FINAL", "RUNNING")) and self._prev.text.upper(), 3594 this=self._parse_expression(), 3595 ) 3596 3597 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 3598 if not self._match(TokenType.MATCH_RECOGNIZE): 3599 return None 3600 3601 self._match_l_paren() 3602 3603 partition = self._parse_partition_by() 3604 order = self._parse_order() 3605 3606 measures = ( 3607 self._parse_csv(self._parse_match_recognize_measure) 3608 if self._match_text_seq("MEASURES") 3609 else None 3610 ) 3611 3612 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 3613 rows = exp.var("ONE ROW PER MATCH") 3614 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 3615 text = "ALL ROWS PER MATCH" 3616 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 3617 text += " SHOW EMPTY MATCHES" 3618 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 3619 text += " OMIT EMPTY MATCHES" 3620 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 3621 text += " WITH UNMATCHED ROWS" 3622 rows = exp.var(text) 3623 else: 3624 rows = None 3625 3626 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 3627 text = "AFTER MATCH SKIP" 3628 if self._match_text_seq("PAST", "LAST", "ROW"): 3629 text += " PAST LAST ROW" 3630 elif self._match_text_seq("TO", "NEXT", "ROW"): 3631 text += " TO NEXT ROW" 3632 elif self._match_text_seq("TO", "FIRST"): 3633 text += f" TO FIRST {self._advance_any().text}" # type: ignore 3634 elif self._match_text_seq("TO", "LAST"): 3635 text += f" TO LAST {self._advance_any().text}" # type: ignore 3636 after = exp.var(text) 3637 else: 3638 after = None 3639 3640 if self._match_text_seq("PATTERN"): 3641 self._match_l_paren() 3642 3643 if not self._curr: 3644 self.raise_error("Expecting )", self._curr) 3645 3646 paren = 1 3647 start = self._curr 3648 3649 while self._curr and paren > 0: 3650 if self._curr.token_type == TokenType.L_PAREN: 3651 paren += 1 3652 if self._curr.token_type == TokenType.R_PAREN: 3653 paren -= 1 3654 3655 end = self._prev 3656 self._advance() 3657 3658 if paren > 0: 3659 self.raise_error("Expecting )", self._curr) 3660 3661 pattern = exp.var(self._find_sql(start, end)) 3662 else: 3663 pattern = None 3664 3665 define = ( 3666 self._parse_csv(self._parse_name_as_expression) 3667 if self._match_text_seq("DEFINE") 3668 else None 3669 ) 3670 3671 self._match_r_paren() 3672 3673 return self.expression( 3674 exp.MatchRecognize, 3675 partition_by=partition, 3676 order=order, 3677 measures=measures, 3678 rows=rows, 3679 after=after, 3680 pattern=pattern, 3681 define=define, 3682 alias=self._parse_table_alias(), 3683 ) 3684 3685 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 3686 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 3687 if not cross_apply and self._match_pair(TokenType.OUTER, TokenType.APPLY): 3688 cross_apply = False 3689 3690 if cross_apply is not None: 3691 this = self._parse_select(table=True) 3692 view = None 3693 outer = None 3694 elif self._match(TokenType.LATERAL): 3695 this = self._parse_select(table=True) 3696 view = self._match(TokenType.VIEW) 3697 outer = self._match(TokenType.OUTER) 3698 else: 3699 return None 3700 3701 if not this: 3702 this = ( 3703 self._parse_unnest() 3704 or self._parse_function() 3705 or self._parse_id_var(any_token=False) 3706 ) 3707 3708 while self._match(TokenType.DOT): 3709 this = exp.Dot( 3710 this=this, 3711 expression=self._parse_function() or self._parse_id_var(any_token=False), 3712 ) 3713 3714 ordinality: t.Optional[bool] = None 3715 3716 if view: 3717 table = self._parse_id_var(any_token=False) 3718 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 3719 table_alias: t.Optional[exp.TableAlias] = self.expression( 3720 exp.TableAlias, this=table, columns=columns 3721 ) 3722 elif isinstance(this, (exp.Subquery, exp.Unnest)) and this.alias: 3723 # We move the alias from the lateral's child node to the lateral itself 3724 table_alias = this.args["alias"].pop() 3725 else: 3726 ordinality = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 3727 table_alias = self._parse_table_alias() 3728 3729 return self.expression( 3730 exp.Lateral, 3731 this=this, 3732 view=view, 3733 outer=outer, 3734 alias=table_alias, 3735 cross_apply=cross_apply, 3736 ordinality=ordinality, 3737 ) 3738 3739 def _parse_join_parts( 3740 self, 3741 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 3742 return ( 3743 self._match_set(self.JOIN_METHODS) and self._prev, 3744 self._match_set(self.JOIN_SIDES) and self._prev, 3745 self._match_set(self.JOIN_KINDS) and self._prev, 3746 ) 3747 3748 def _parse_using_identifiers(self) -> t.List[exp.Expression]: 3749 def _parse_column_as_identifier() -> t.Optional[exp.Expression]: 3750 this = self._parse_column() 3751 if isinstance(this, exp.Column): 3752 return this.this 3753 return this 3754 3755 return self._parse_wrapped_csv(_parse_column_as_identifier, optional=True) 3756 3757 def _parse_join( 3758 self, skip_join_token: bool = False, parse_bracket: bool = False 3759 ) -> t.Optional[exp.Join]: 3760 if self._match(TokenType.COMMA): 3761 table = self._try_parse(self._parse_table) 3762 cross_join = self.expression(exp.Join, this=table) if table else None 3763 3764 if cross_join and self.JOINS_HAVE_EQUAL_PRECEDENCE: 3765 cross_join.set("kind", "CROSS") 3766 3767 return cross_join 3768 3769 index = self._index 3770 method, side, kind = self._parse_join_parts() 3771 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 3772 join = self._match(TokenType.JOIN) or (kind and kind.token_type == TokenType.STRAIGHT_JOIN) 3773 join_comments = self._prev_comments 3774 3775 if not skip_join_token and not join: 3776 self._retreat(index) 3777 kind = None 3778 method = None 3779 side = None 3780 3781 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 3782 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 3783 3784 if not skip_join_token and not join and not outer_apply and not cross_apply: 3785 return None 3786 3787 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 3788 if kind and kind.token_type == TokenType.ARRAY and self._match(TokenType.COMMA): 3789 kwargs["expressions"] = self._parse_csv( 3790 lambda: self._parse_table(parse_bracket=parse_bracket) 3791 ) 3792 3793 if method: 3794 kwargs["method"] = method.text 3795 if side: 3796 kwargs["side"] = side.text 3797 if kind: 3798 kwargs["kind"] = kind.text 3799 if hint: 3800 kwargs["hint"] = hint 3801 3802 if self._match(TokenType.MATCH_CONDITION): 3803 kwargs["match_condition"] = self._parse_wrapped(self._parse_comparison) 3804 3805 if self._match(TokenType.ON): 3806 kwargs["on"] = self._parse_assignment() 3807 elif self._match(TokenType.USING): 3808 kwargs["using"] = self._parse_using_identifiers() 3809 elif ( 3810 not (outer_apply or cross_apply) 3811 and not isinstance(kwargs["this"], exp.Unnest) 3812 and not (kind and kind.token_type in (TokenType.CROSS, TokenType.ARRAY)) 3813 ): 3814 index = self._index 3815 joins: t.Optional[list] = list(self._parse_joins()) 3816 3817 if joins and self._match(TokenType.ON): 3818 kwargs["on"] = self._parse_assignment() 3819 elif joins and self._match(TokenType.USING): 3820 kwargs["using"] = self._parse_using_identifiers() 3821 else: 3822 joins = None 3823 self._retreat(index) 3824 3825 kwargs["this"].set("joins", joins if joins else None) 3826 3827 kwargs["pivots"] = self._parse_pivots() 3828 3829 comments = [c for token in (method, side, kind) if token for c in token.comments] 3830 comments = (join_comments or []) + comments 3831 return self.expression(exp.Join, comments=comments, **kwargs) 3832 3833 def _parse_opclass(self) -> t.Optional[exp.Expression]: 3834 this = self._parse_assignment() 3835 3836 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 3837 return this 3838 3839 if not self._match_set(self.OPTYPE_FOLLOW_TOKENS, advance=False): 3840 return self.expression(exp.Opclass, this=this, expression=self._parse_table_parts()) 3841 3842 return this 3843 3844 def _parse_index_params(self) -> exp.IndexParameters: 3845 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 3846 3847 if self._match(TokenType.L_PAREN, advance=False): 3848 columns = self._parse_wrapped_csv(self._parse_with_operator) 3849 else: 3850 columns = None 3851 3852 include = self._parse_wrapped_id_vars() if self._match_text_seq("INCLUDE") else None 3853 partition_by = self._parse_partition_by() 3854 with_storage = self._match(TokenType.WITH) and self._parse_wrapped_properties() 3855 tablespace = ( 3856 self._parse_var(any_token=True) 3857 if self._match_text_seq("USING", "INDEX", "TABLESPACE") 3858 else None 3859 ) 3860 where = self._parse_where() 3861 3862 on = self._parse_field() if self._match(TokenType.ON) else None 3863 3864 return self.expression( 3865 exp.IndexParameters, 3866 using=using, 3867 columns=columns, 3868 include=include, 3869 partition_by=partition_by, 3870 where=where, 3871 with_storage=with_storage, 3872 tablespace=tablespace, 3873 on=on, 3874 ) 3875 3876 def _parse_index( 3877 self, index: t.Optional[exp.Expression] = None, anonymous: bool = False 3878 ) -> t.Optional[exp.Index]: 3879 if index or anonymous: 3880 unique = None 3881 primary = None 3882 amp = None 3883 3884 self._match(TokenType.ON) 3885 self._match(TokenType.TABLE) # hive 3886 table = self._parse_table_parts(schema=True) 3887 else: 3888 unique = self._match(TokenType.UNIQUE) 3889 primary = self._match_text_seq("PRIMARY") 3890 amp = self._match_text_seq("AMP") 3891 3892 if not self._match(TokenType.INDEX): 3893 return None 3894 3895 index = self._parse_id_var() 3896 table = None 3897 3898 params = self._parse_index_params() 3899 3900 return self.expression( 3901 exp.Index, 3902 this=index, 3903 table=table, 3904 unique=unique, 3905 primary=primary, 3906 amp=amp, 3907 params=params, 3908 ) 3909 3910 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 3911 hints: t.List[exp.Expression] = [] 3912 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 3913 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 3914 hints.append( 3915 self.expression( 3916 exp.WithTableHint, 3917 expressions=self._parse_csv( 3918 lambda: self._parse_function() or self._parse_var(any_token=True) 3919 ), 3920 ) 3921 ) 3922 self._match_r_paren() 3923 else: 3924 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 3925 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 3926 hint = exp.IndexTableHint(this=self._prev.text.upper()) 3927 3928 self._match_set((TokenType.INDEX, TokenType.KEY)) 3929 if self._match(TokenType.FOR): 3930 hint.set("target", self._advance_any() and self._prev.text.upper()) 3931 3932 hint.set("expressions", self._parse_wrapped_id_vars()) 3933 hints.append(hint) 3934 3935 return hints or None 3936 3937 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 3938 return ( 3939 (not schema and self._parse_function(optional_parens=False)) 3940 or self._parse_id_var(any_token=False) 3941 or self._parse_string_as_identifier() 3942 or self._parse_placeholder() 3943 ) 3944 3945 def _parse_table_parts( 3946 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 3947 ) -> exp.Table: 3948 catalog = None 3949 db = None 3950 table: t.Optional[exp.Expression | str] = self._parse_table_part(schema=schema) 3951 3952 while self._match(TokenType.DOT): 3953 if catalog: 3954 # This allows nesting the table in arbitrarily many dot expressions if needed 3955 table = self.expression( 3956 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 3957 ) 3958 else: 3959 catalog = db 3960 db = table 3961 # "" used for tsql FROM a..b case 3962 table = self._parse_table_part(schema=schema) or "" 3963 3964 if ( 3965 wildcard 3966 and self._is_connected() 3967 and (isinstance(table, exp.Identifier) or not table) 3968 and self._match(TokenType.STAR) 3969 ): 3970 if isinstance(table, exp.Identifier): 3971 table.args["this"] += "*" 3972 else: 3973 table = exp.Identifier(this="*") 3974 3975 # We bubble up comments from the Identifier to the Table 3976 comments = table.pop_comments() if isinstance(table, exp.Expression) else None 3977 3978 if is_db_reference: 3979 catalog = db 3980 db = table 3981 table = None 3982 3983 if not table and not is_db_reference: 3984 self.raise_error(f"Expected table name but got {self._curr}") 3985 if not db and is_db_reference: 3986 self.raise_error(f"Expected database name but got {self._curr}") 3987 3988 table = self.expression( 3989 exp.Table, 3990 comments=comments, 3991 this=table, 3992 db=db, 3993 catalog=catalog, 3994 ) 3995 3996 changes = self._parse_changes() 3997 if changes: 3998 table.set("changes", changes) 3999 4000 at_before = self._parse_historical_data() 4001 if at_before: 4002 table.set("when", at_before) 4003 4004 pivots = self._parse_pivots() 4005 if pivots: 4006 table.set("pivots", pivots) 4007 4008 return table 4009 4010 def _parse_table( 4011 self, 4012 schema: bool = False, 4013 joins: bool = False, 4014 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 4015 parse_bracket: bool = False, 4016 is_db_reference: bool = False, 4017 parse_partition: bool = False, 4018 consume_pipe: bool = False, 4019 ) -> t.Optional[exp.Expression]: 4020 lateral = self._parse_lateral() 4021 if lateral: 4022 return lateral 4023 4024 unnest = self._parse_unnest() 4025 if unnest: 4026 return unnest 4027 4028 values = self._parse_derived_table_values() 4029 if values: 4030 return values 4031 4032 subquery = self._parse_select(table=True, consume_pipe=consume_pipe) 4033 if subquery: 4034 if not subquery.args.get("pivots"): 4035 subquery.set("pivots", self._parse_pivots()) 4036 return subquery 4037 4038 bracket = parse_bracket and self._parse_bracket(None) 4039 bracket = self.expression(exp.Table, this=bracket) if bracket else None 4040 4041 rows_from = self._match_text_seq("ROWS", "FROM") and self._parse_wrapped_csv( 4042 self._parse_table 4043 ) 4044 rows_from = self.expression(exp.Table, rows_from=rows_from) if rows_from else None 4045 4046 only = self._match(TokenType.ONLY) 4047 4048 this = t.cast( 4049 exp.Expression, 4050 bracket 4051 or rows_from 4052 or self._parse_bracket( 4053 self._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 4054 ), 4055 ) 4056 4057 if only: 4058 this.set("only", only) 4059 4060 # Postgres supports a wildcard (table) suffix operator, which is a no-op in this context 4061 self._match_text_seq("*") 4062 4063 parse_partition = parse_partition or self.SUPPORTS_PARTITION_SELECTION 4064 if parse_partition and self._match(TokenType.PARTITION, advance=False): 4065 this.set("partition", self._parse_partition()) 4066 4067 if schema: 4068 return self._parse_schema(this=this) 4069 4070 version = self._parse_version() 4071 4072 if version: 4073 this.set("version", version) 4074 4075 if self.dialect.ALIAS_POST_TABLESAMPLE: 4076 this.set("sample", self._parse_table_sample()) 4077 4078 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 4079 if alias: 4080 this.set("alias", alias) 4081 4082 if isinstance(this, exp.Table) and self._match_text_seq("AT"): 4083 return self.expression( 4084 exp.AtIndex, this=this.to_column(copy=False), expression=self._parse_id_var() 4085 ) 4086 4087 this.set("hints", self._parse_table_hints()) 4088 4089 if not this.args.get("pivots"): 4090 this.set("pivots", self._parse_pivots()) 4091 4092 if not self.dialect.ALIAS_POST_TABLESAMPLE: 4093 this.set("sample", self._parse_table_sample()) 4094 4095 if joins: 4096 for join in self._parse_joins(): 4097 this.append("joins", join) 4098 4099 if self._match_pair(TokenType.WITH, TokenType.ORDINALITY): 4100 this.set("ordinality", True) 4101 this.set("alias", self._parse_table_alias()) 4102 4103 return this 4104 4105 def _parse_version(self) -> t.Optional[exp.Version]: 4106 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 4107 this = "TIMESTAMP" 4108 elif self._match(TokenType.VERSION_SNAPSHOT): 4109 this = "VERSION" 4110 else: 4111 return None 4112 4113 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 4114 kind = self._prev.text.upper() 4115 start = self._parse_bitwise() 4116 self._match_texts(("TO", "AND")) 4117 end = self._parse_bitwise() 4118 expression: t.Optional[exp.Expression] = self.expression( 4119 exp.Tuple, expressions=[start, end] 4120 ) 4121 elif self._match_text_seq("CONTAINED", "IN"): 4122 kind = "CONTAINED IN" 4123 expression = self.expression( 4124 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 4125 ) 4126 elif self._match(TokenType.ALL): 4127 kind = "ALL" 4128 expression = None 4129 else: 4130 self._match_text_seq("AS", "OF") 4131 kind = "AS OF" 4132 expression = self._parse_type() 4133 4134 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 4135 4136 def _parse_historical_data(self) -> t.Optional[exp.HistoricalData]: 4137 # https://docs.snowflake.com/en/sql-reference/constructs/at-before 4138 index = self._index 4139 historical_data = None 4140 if self._match_texts(self.HISTORICAL_DATA_PREFIX): 4141 this = self._prev.text.upper() 4142 kind = ( 4143 self._match(TokenType.L_PAREN) 4144 and self._match_texts(self.HISTORICAL_DATA_KIND) 4145 and self._prev.text.upper() 4146 ) 4147 expression = self._match(TokenType.FARROW) and self._parse_bitwise() 4148 4149 if expression: 4150 self._match_r_paren() 4151 historical_data = self.expression( 4152 exp.HistoricalData, this=this, kind=kind, expression=expression 4153 ) 4154 else: 4155 self._retreat(index) 4156 4157 return historical_data 4158 4159 def _parse_changes(self) -> t.Optional[exp.Changes]: 4160 if not self._match_text_seq("CHANGES", "(", "INFORMATION", "=>"): 4161 return None 4162 4163 information = self._parse_var(any_token=True) 4164 self._match_r_paren() 4165 4166 return self.expression( 4167 exp.Changes, 4168 information=information, 4169 at_before=self._parse_historical_data(), 4170 end=self._parse_historical_data(), 4171 ) 4172 4173 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 4174 if not self._match(TokenType.UNNEST): 4175 return None 4176 4177 expressions = self._parse_wrapped_csv(self._parse_equality) 4178 offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 4179 4180 alias = self._parse_table_alias() if with_alias else None 4181 4182 if alias: 4183 if self.dialect.UNNEST_COLUMN_ONLY: 4184 if alias.args.get("columns"): 4185 self.raise_error("Unexpected extra column alias in unnest.") 4186 4187 alias.set("columns", [alias.this]) 4188 alias.set("this", None) 4189 4190 columns = alias.args.get("columns") or [] 4191 if offset and len(expressions) < len(columns): 4192 offset = columns.pop() 4193 4194 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 4195 self._match(TokenType.ALIAS) 4196 offset = self._parse_id_var( 4197 any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS 4198 ) or exp.to_identifier("offset") 4199 4200 return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset) 4201 4202 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 4203 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 4204 if not is_derived and not ( 4205 # ClickHouse's `FORMAT Values` is equivalent to `VALUES` 4206 self._match_text_seq("VALUES") or self._match_text_seq("FORMAT", "VALUES") 4207 ): 4208 return None 4209 4210 expressions = self._parse_csv(self._parse_value) 4211 alias = self._parse_table_alias() 4212 4213 if is_derived: 4214 self._match_r_paren() 4215 4216 return self.expression( 4217 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 4218 ) 4219 4220 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 4221 if not self._match(TokenType.TABLE_SAMPLE) and not ( 4222 as_modifier and self._match_text_seq("USING", "SAMPLE") 4223 ): 4224 return None 4225 4226 bucket_numerator = None 4227 bucket_denominator = None 4228 bucket_field = None 4229 percent = None 4230 size = None 4231 seed = None 4232 4233 method = self._parse_var(tokens=(TokenType.ROW,), upper=True) 4234 matched_l_paren = self._match(TokenType.L_PAREN) 4235 4236 if self.TABLESAMPLE_CSV: 4237 num = None 4238 expressions = self._parse_csv(self._parse_primary) 4239 else: 4240 expressions = None 4241 num = ( 4242 self._parse_factor() 4243 if self._match(TokenType.NUMBER, advance=False) 4244 else self._parse_primary() or self._parse_placeholder() 4245 ) 4246 4247 if self._match_text_seq("BUCKET"): 4248 bucket_numerator = self._parse_number() 4249 self._match_text_seq("OUT", "OF") 4250 bucket_denominator = bucket_denominator = self._parse_number() 4251 self._match(TokenType.ON) 4252 bucket_field = self._parse_field() 4253 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 4254 percent = num 4255 elif self._match(TokenType.ROWS) or not self.dialect.TABLESAMPLE_SIZE_IS_PERCENT: 4256 size = num 4257 else: 4258 percent = num 4259 4260 if matched_l_paren: 4261 self._match_r_paren() 4262 4263 if self._match(TokenType.L_PAREN): 4264 method = self._parse_var(upper=True) 4265 seed = self._match(TokenType.COMMA) and self._parse_number() 4266 self._match_r_paren() 4267 elif self._match_texts(("SEED", "REPEATABLE")): 4268 seed = self._parse_wrapped(self._parse_number) 4269 4270 if not method and self.DEFAULT_SAMPLING_METHOD: 4271 method = exp.var(self.DEFAULT_SAMPLING_METHOD) 4272 4273 return self.expression( 4274 exp.TableSample, 4275 expressions=expressions, 4276 method=method, 4277 bucket_numerator=bucket_numerator, 4278 bucket_denominator=bucket_denominator, 4279 bucket_field=bucket_field, 4280 percent=percent, 4281 size=size, 4282 seed=seed, 4283 ) 4284 4285 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 4286 return list(iter(self._parse_pivot, None)) or None 4287 4288 def _parse_joins(self) -> t.Iterator[exp.Join]: 4289 return iter(self._parse_join, None) 4290 4291 def _parse_unpivot_columns(self) -> t.Optional[exp.UnpivotColumns]: 4292 if not self._match(TokenType.INTO): 4293 return None 4294 4295 return self.expression( 4296 exp.UnpivotColumns, 4297 this=self._match_text_seq("NAME") and self._parse_column(), 4298 expressions=self._match_text_seq("VALUE") and self._parse_csv(self._parse_column), 4299 ) 4300 4301 # https://duckdb.org/docs/sql/statements/pivot 4302 def _parse_simplified_pivot(self, is_unpivot: t.Optional[bool] = None) -> exp.Pivot: 4303 def _parse_on() -> t.Optional[exp.Expression]: 4304 this = self._parse_bitwise() 4305 4306 if self._match(TokenType.IN): 4307 # PIVOT ... ON col IN (row_val1, row_val2) 4308 return self._parse_in(this) 4309 if self._match(TokenType.ALIAS, advance=False): 4310 # UNPIVOT ... ON (col1, col2, col3) AS row_val 4311 return self._parse_alias(this) 4312 4313 return this 4314 4315 this = self._parse_table() 4316 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 4317 into = self._parse_unpivot_columns() 4318 using = self._match(TokenType.USING) and self._parse_csv( 4319 lambda: self._parse_alias(self._parse_function()) 4320 ) 4321 group = self._parse_group() 4322 4323 return self.expression( 4324 exp.Pivot, 4325 this=this, 4326 expressions=expressions, 4327 using=using, 4328 group=group, 4329 unpivot=is_unpivot, 4330 into=into, 4331 ) 4332 4333 def _parse_pivot_in(self) -> exp.In: 4334 def _parse_aliased_expression() -> t.Optional[exp.Expression]: 4335 this = self._parse_select_or_expression() 4336 4337 self._match(TokenType.ALIAS) 4338 alias = self._parse_bitwise() 4339 if alias: 4340 if isinstance(alias, exp.Column) and not alias.db: 4341 alias = alias.this 4342 return self.expression(exp.PivotAlias, this=this, alias=alias) 4343 4344 return this 4345 4346 value = self._parse_column() 4347 4348 if not self._match_pair(TokenType.IN, TokenType.L_PAREN): 4349 self.raise_error("Expecting IN (") 4350 4351 if self._match(TokenType.ANY): 4352 exprs: t.List[exp.Expression] = ensure_list(exp.PivotAny(this=self._parse_order())) 4353 else: 4354 exprs = self._parse_csv(_parse_aliased_expression) 4355 4356 self._match_r_paren() 4357 return self.expression(exp.In, this=value, expressions=exprs) 4358 4359 def _parse_pivot_aggregation(self) -> t.Optional[exp.Expression]: 4360 func = self._parse_function() 4361 if not func: 4362 self.raise_error("Expecting an aggregation function in PIVOT") 4363 4364 return self._parse_alias(func) 4365 4366 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 4367 index = self._index 4368 include_nulls = None 4369 4370 if self._match(TokenType.PIVOT): 4371 unpivot = False 4372 elif self._match(TokenType.UNPIVOT): 4373 unpivot = True 4374 4375 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 4376 if self._match_text_seq("INCLUDE", "NULLS"): 4377 include_nulls = True 4378 elif self._match_text_seq("EXCLUDE", "NULLS"): 4379 include_nulls = False 4380 else: 4381 return None 4382 4383 expressions = [] 4384 4385 if not self._match(TokenType.L_PAREN): 4386 self._retreat(index) 4387 return None 4388 4389 if unpivot: 4390 expressions = self._parse_csv(self._parse_column) 4391 else: 4392 expressions = self._parse_csv(self._parse_pivot_aggregation) 4393 4394 if not expressions: 4395 self.raise_error("Failed to parse PIVOT's aggregation list") 4396 4397 if not self._match(TokenType.FOR): 4398 self.raise_error("Expecting FOR") 4399 4400 fields = [] 4401 while True: 4402 field = self._try_parse(self._parse_pivot_in) 4403 if not field: 4404 break 4405 fields.append(field) 4406 4407 default_on_null = self._match_text_seq("DEFAULT", "ON", "NULL") and self._parse_wrapped( 4408 self._parse_bitwise 4409 ) 4410 4411 group = self._parse_group() 4412 4413 self._match_r_paren() 4414 4415 pivot = self.expression( 4416 exp.Pivot, 4417 expressions=expressions, 4418 fields=fields, 4419 unpivot=unpivot, 4420 include_nulls=include_nulls, 4421 default_on_null=default_on_null, 4422 group=group, 4423 ) 4424 4425 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 4426 pivot.set("alias", self._parse_table_alias()) 4427 4428 if not unpivot: 4429 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 4430 4431 columns: t.List[exp.Expression] = [] 4432 all_fields = [] 4433 for pivot_field in pivot.fields: 4434 pivot_field_expressions = pivot_field.expressions 4435 4436 # The `PivotAny` expression corresponds to `ANY ORDER BY <column>`; we can't infer in this case. 4437 if isinstance(seq_get(pivot_field_expressions, 0), exp.PivotAny): 4438 continue 4439 4440 all_fields.append( 4441 [ 4442 fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 4443 for fld in pivot_field_expressions 4444 ] 4445 ) 4446 4447 if all_fields: 4448 if names: 4449 all_fields.append(names) 4450 4451 # Generate all possible combinations of the pivot columns 4452 # e.g PIVOT(sum(...) as total FOR year IN (2000, 2010) FOR country IN ('NL', 'US')) 4453 # generates the product between [[2000, 2010], ['NL', 'US'], ['total']] 4454 for fld_parts_tuple in itertools.product(*all_fields): 4455 fld_parts = list(fld_parts_tuple) 4456 4457 if names and self.PREFIXED_PIVOT_COLUMNS: 4458 # Move the "name" to the front of the list 4459 fld_parts.insert(0, fld_parts.pop(-1)) 4460 4461 columns.append(exp.to_identifier("_".join(fld_parts))) 4462 4463 pivot.set("columns", columns) 4464 4465 return pivot 4466 4467 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 4468 return [agg.alias for agg in aggregations if agg.alias] 4469 4470 def _parse_prewhere(self, skip_where_token: bool = False) -> t.Optional[exp.PreWhere]: 4471 if not skip_where_token and not self._match(TokenType.PREWHERE): 4472 return None 4473 4474 return self.expression( 4475 exp.PreWhere, comments=self._prev_comments, this=self._parse_assignment() 4476 ) 4477 4478 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 4479 if not skip_where_token and not self._match(TokenType.WHERE): 4480 return None 4481 4482 return self.expression( 4483 exp.Where, comments=self._prev_comments, this=self._parse_assignment() 4484 ) 4485 4486 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 4487 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 4488 return None 4489 comments = self._prev_comments 4490 4491 elements: t.Dict[str, t.Any] = defaultdict(list) 4492 4493 if self._match(TokenType.ALL): 4494 elements["all"] = True 4495 elif self._match(TokenType.DISTINCT): 4496 elements["all"] = False 4497 4498 if self._match_set(self.QUERY_MODIFIER_TOKENS, advance=False): 4499 return self.expression(exp.Group, comments=comments, **elements) # type: ignore 4500 4501 while True: 4502 index = self._index 4503 4504 elements["expressions"].extend( 4505 self._parse_csv( 4506 lambda: None 4507 if self._match_set((TokenType.CUBE, TokenType.ROLLUP), advance=False) 4508 else self._parse_assignment() 4509 ) 4510 ) 4511 4512 before_with_index = self._index 4513 with_prefix = self._match(TokenType.WITH) 4514 4515 if self._match(TokenType.ROLLUP): 4516 elements["rollup"].append( 4517 self._parse_cube_or_rollup(exp.Rollup, with_prefix=with_prefix) 4518 ) 4519 elif self._match(TokenType.CUBE): 4520 elements["cube"].append( 4521 self._parse_cube_or_rollup(exp.Cube, with_prefix=with_prefix) 4522 ) 4523 elif self._match(TokenType.GROUPING_SETS): 4524 elements["grouping_sets"].append( 4525 self.expression( 4526 exp.GroupingSets, 4527 expressions=self._parse_wrapped_csv(self._parse_grouping_set), 4528 ) 4529 ) 4530 elif self._match_text_seq("TOTALS"): 4531 elements["totals"] = True # type: ignore 4532 4533 if before_with_index <= self._index <= before_with_index + 1: 4534 self._retreat(before_with_index) 4535 break 4536 4537 if index == self._index: 4538 break 4539 4540 return self.expression(exp.Group, comments=comments, **elements) # type: ignore 4541 4542 def _parse_cube_or_rollup(self, kind: t.Type[E], with_prefix: bool = False) -> E: 4543 return self.expression( 4544 kind, expressions=[] if with_prefix else self._parse_wrapped_csv(self._parse_column) 4545 ) 4546 4547 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 4548 if self._match(TokenType.L_PAREN): 4549 grouping_set = self._parse_csv(self._parse_column) 4550 self._match_r_paren() 4551 return self.expression(exp.Tuple, expressions=grouping_set) 4552 4553 return self._parse_column() 4554 4555 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 4556 if not skip_having_token and not self._match(TokenType.HAVING): 4557 return None 4558 return self.expression( 4559 exp.Having, comments=self._prev_comments, this=self._parse_assignment() 4560 ) 4561 4562 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 4563 if not self._match(TokenType.QUALIFY): 4564 return None 4565 return self.expression(exp.Qualify, this=self._parse_assignment()) 4566 4567 def _parse_connect_with_prior(self) -> t.Optional[exp.Expression]: 4568 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 4569 exp.Prior, this=self._parse_bitwise() 4570 ) 4571 connect = self._parse_assignment() 4572 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 4573 return connect 4574 4575 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 4576 if skip_start_token: 4577 start = None 4578 elif self._match(TokenType.START_WITH): 4579 start = self._parse_assignment() 4580 else: 4581 return None 4582 4583 self._match(TokenType.CONNECT_BY) 4584 nocycle = self._match_text_seq("NOCYCLE") 4585 connect = self._parse_connect_with_prior() 4586 4587 if not start and self._match(TokenType.START_WITH): 4588 start = self._parse_assignment() 4589 4590 return self.expression(exp.Connect, start=start, connect=connect, nocycle=nocycle) 4591 4592 def _parse_name_as_expression(self) -> t.Optional[exp.Expression]: 4593 this = self._parse_id_var(any_token=True) 4594 if self._match(TokenType.ALIAS): 4595 this = self.expression(exp.Alias, alias=this, this=self._parse_assignment()) 4596 return this 4597 4598 def _parse_interpolate(self) -> t.Optional[t.List[exp.Expression]]: 4599 if self._match_text_seq("INTERPOLATE"): 4600 return self._parse_wrapped_csv(self._parse_name_as_expression) 4601 return None 4602 4603 def _parse_order( 4604 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 4605 ) -> t.Optional[exp.Expression]: 4606 siblings = None 4607 if not skip_order_token and not self._match(TokenType.ORDER_BY): 4608 if not self._match(TokenType.ORDER_SIBLINGS_BY): 4609 return this 4610 4611 siblings = True 4612 4613 return self.expression( 4614 exp.Order, 4615 comments=self._prev_comments, 4616 this=this, 4617 expressions=self._parse_csv(self._parse_ordered), 4618 siblings=siblings, 4619 ) 4620 4621 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 4622 if not self._match(token): 4623 return None 4624 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 4625 4626 def _parse_ordered( 4627 self, parse_method: t.Optional[t.Callable] = None 4628 ) -> t.Optional[exp.Ordered]: 4629 this = parse_method() if parse_method else self._parse_assignment() 4630 if not this: 4631 return None 4632 4633 if this.name.upper() == "ALL" and self.dialect.SUPPORTS_ORDER_BY_ALL: 4634 this = exp.var("ALL") 4635 4636 asc = self._match(TokenType.ASC) 4637 desc = self._match(TokenType.DESC) or (asc and False) 4638 4639 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 4640 is_nulls_last = self._match_text_seq("NULLS", "LAST") 4641 4642 nulls_first = is_nulls_first or False 4643 explicitly_null_ordered = is_nulls_first or is_nulls_last 4644 4645 if ( 4646 not explicitly_null_ordered 4647 and ( 4648 (not desc and self.dialect.NULL_ORDERING == "nulls_are_small") 4649 or (desc and self.dialect.NULL_ORDERING != "nulls_are_small") 4650 ) 4651 and self.dialect.NULL_ORDERING != "nulls_are_last" 4652 ): 4653 nulls_first = True 4654 4655 if self._match_text_seq("WITH", "FILL"): 4656 with_fill = self.expression( 4657 exp.WithFill, 4658 **{ # type: ignore 4659 "from": self._match(TokenType.FROM) and self._parse_bitwise(), 4660 "to": self._match_text_seq("TO") and self._parse_bitwise(), 4661 "step": self._match_text_seq("STEP") and self._parse_bitwise(), 4662 "interpolate": self._parse_interpolate(), 4663 }, 4664 ) 4665 else: 4666 with_fill = None 4667 4668 return self.expression( 4669 exp.Ordered, this=this, desc=desc, nulls_first=nulls_first, with_fill=with_fill 4670 ) 4671 4672 def _parse_limit_options(self) -> exp.LimitOptions: 4673 percent = self._match(TokenType.PERCENT) 4674 rows = self._match_set((TokenType.ROW, TokenType.ROWS)) 4675 self._match_text_seq("ONLY") 4676 with_ties = self._match_text_seq("WITH", "TIES") 4677 return self.expression(exp.LimitOptions, percent=percent, rows=rows, with_ties=with_ties) 4678 4679 def _parse_limit( 4680 self, 4681 this: t.Optional[exp.Expression] = None, 4682 top: bool = False, 4683 skip_limit_token: bool = False, 4684 ) -> t.Optional[exp.Expression]: 4685 if skip_limit_token or self._match(TokenType.TOP if top else TokenType.LIMIT): 4686 comments = self._prev_comments 4687 if top: 4688 limit_paren = self._match(TokenType.L_PAREN) 4689 expression = self._parse_term() if limit_paren else self._parse_number() 4690 4691 if limit_paren: 4692 self._match_r_paren() 4693 4694 limit_options = self._parse_limit_options() 4695 else: 4696 limit_options = None 4697 expression = self._parse_term() 4698 4699 if self._match(TokenType.COMMA): 4700 offset = expression 4701 expression = self._parse_term() 4702 else: 4703 offset = None 4704 4705 limit_exp = self.expression( 4706 exp.Limit, 4707 this=this, 4708 expression=expression, 4709 offset=offset, 4710 comments=comments, 4711 limit_options=limit_options, 4712 expressions=self._parse_limit_by(), 4713 ) 4714 4715 return limit_exp 4716 4717 if self._match(TokenType.FETCH): 4718 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 4719 direction = self._prev.text.upper() if direction else "FIRST" 4720 4721 count = self._parse_field(tokens=self.FETCH_TOKENS) 4722 4723 return self.expression( 4724 exp.Fetch, 4725 direction=direction, 4726 count=count, 4727 limit_options=self._parse_limit_options(), 4728 ) 4729 4730 return this 4731 4732 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4733 if not self._match(TokenType.OFFSET): 4734 return this 4735 4736 count = self._parse_term() 4737 self._match_set((TokenType.ROW, TokenType.ROWS)) 4738 4739 return self.expression( 4740 exp.Offset, this=this, expression=count, expressions=self._parse_limit_by() 4741 ) 4742 4743 def _can_parse_limit_or_offset(self) -> bool: 4744 if not self._match_set(self.AMBIGUOUS_ALIAS_TOKENS, advance=False): 4745 return False 4746 4747 index = self._index 4748 result = bool( 4749 self._try_parse(self._parse_limit, retreat=True) 4750 or self._try_parse(self._parse_offset, retreat=True) 4751 ) 4752 self._retreat(index) 4753 return result 4754 4755 def _parse_limit_by(self) -> t.Optional[t.List[exp.Expression]]: 4756 return self._match_text_seq("BY") and self._parse_csv(self._parse_bitwise) 4757 4758 def _parse_locks(self) -> t.List[exp.Lock]: 4759 locks = [] 4760 while True: 4761 update, key = None, None 4762 if self._match_text_seq("FOR", "UPDATE"): 4763 update = True 4764 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 4765 "LOCK", "IN", "SHARE", "MODE" 4766 ): 4767 update = False 4768 elif self._match_text_seq("FOR", "KEY", "SHARE"): 4769 update, key = False, True 4770 elif self._match_text_seq("FOR", "NO", "KEY", "UPDATE"): 4771 update, key = True, True 4772 else: 4773 break 4774 4775 expressions = None 4776 if self._match_text_seq("OF"): 4777 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 4778 4779 wait: t.Optional[bool | exp.Expression] = None 4780 if self._match_text_seq("NOWAIT"): 4781 wait = True 4782 elif self._match_text_seq("WAIT"): 4783 wait = self._parse_primary() 4784 elif self._match_text_seq("SKIP", "LOCKED"): 4785 wait = False 4786 4787 locks.append( 4788 self.expression( 4789 exp.Lock, update=update, expressions=expressions, wait=wait, key=key 4790 ) 4791 ) 4792 4793 return locks 4794 4795 def parse_set_operation( 4796 self, this: t.Optional[exp.Expression], consume_pipe: bool = False 4797 ) -> t.Optional[exp.Expression]: 4798 start = self._index 4799 _, side_token, kind_token = self._parse_join_parts() 4800 4801 side = side_token.text if side_token else None 4802 kind = kind_token.text if kind_token else None 4803 4804 if not self._match_set(self.SET_OPERATIONS): 4805 self._retreat(start) 4806 return None 4807 4808 token_type = self._prev.token_type 4809 4810 if token_type == TokenType.UNION: 4811 operation: t.Type[exp.SetOperation] = exp.Union 4812 elif token_type == TokenType.EXCEPT: 4813 operation = exp.Except 4814 else: 4815 operation = exp.Intersect 4816 4817 comments = self._prev.comments 4818 4819 if self._match(TokenType.DISTINCT): 4820 distinct: t.Optional[bool] = True 4821 elif self._match(TokenType.ALL): 4822 distinct = False 4823 else: 4824 distinct = self.dialect.SET_OP_DISTINCT_BY_DEFAULT[operation] 4825 if distinct is None: 4826 self.raise_error(f"Expected DISTINCT or ALL for {operation.__name__}") 4827 4828 by_name = self._match_text_seq("BY", "NAME") or self._match_text_seq( 4829 "STRICT", "CORRESPONDING" 4830 ) 4831 if self._match_text_seq("CORRESPONDING"): 4832 by_name = True 4833 if not side and not kind: 4834 kind = "INNER" 4835 4836 on_column_list = None 4837 if by_name and self._match_texts(("ON", "BY")): 4838 on_column_list = self._parse_wrapped_csv(self._parse_column) 4839 4840 expression = self._parse_select( 4841 nested=True, parse_set_operation=False, consume_pipe=consume_pipe 4842 ) 4843 4844 return self.expression( 4845 operation, 4846 comments=comments, 4847 this=this, 4848 distinct=distinct, 4849 by_name=by_name, 4850 expression=expression, 4851 side=side, 4852 kind=kind, 4853 on=on_column_list, 4854 ) 4855 4856 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4857 while this: 4858 setop = self.parse_set_operation(this) 4859 if not setop: 4860 break 4861 this = setop 4862 4863 if isinstance(this, exp.SetOperation) and self.MODIFIERS_ATTACHED_TO_SET_OP: 4864 expression = this.expression 4865 4866 if expression: 4867 for arg in self.SET_OP_MODIFIERS: 4868 expr = expression.args.get(arg) 4869 if expr: 4870 this.set(arg, expr.pop()) 4871 4872 return this 4873 4874 def _parse_expression(self) -> t.Optional[exp.Expression]: 4875 return self._parse_alias(self._parse_assignment()) 4876 4877 def _parse_assignment(self) -> t.Optional[exp.Expression]: 4878 this = self._parse_disjunction() 4879 if not this and self._next and self._next.token_type in self.ASSIGNMENT: 4880 # This allows us to parse <non-identifier token> := <expr> 4881 this = exp.column( 4882 t.cast(str, self._advance_any(ignore_reserved=True) and self._prev.text) 4883 ) 4884 4885 while self._match_set(self.ASSIGNMENT): 4886 if isinstance(this, exp.Column) and len(this.parts) == 1: 4887 this = this.this 4888 4889 this = self.expression( 4890 self.ASSIGNMENT[self._prev.token_type], 4891 this=this, 4892 comments=self._prev_comments, 4893 expression=self._parse_assignment(), 4894 ) 4895 4896 return this 4897 4898 def _parse_disjunction(self) -> t.Optional[exp.Expression]: 4899 return self._parse_tokens(self._parse_conjunction, self.DISJUNCTION) 4900 4901 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 4902 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 4903 4904 def _parse_equality(self) -> t.Optional[exp.Expression]: 4905 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 4906 4907 def _parse_comparison(self) -> t.Optional[exp.Expression]: 4908 return self._parse_tokens(self._parse_range, self.COMPARISON) 4909 4910 def _parse_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4911 this = this or self._parse_bitwise() 4912 negate = self._match(TokenType.NOT) 4913 4914 if self._match_set(self.RANGE_PARSERS): 4915 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 4916 if not expression: 4917 return this 4918 4919 this = expression 4920 elif self._match(TokenType.ISNULL): 4921 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4922 4923 # Postgres supports ISNULL and NOTNULL for conditions. 4924 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 4925 if self._match(TokenType.NOTNULL): 4926 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4927 this = self.expression(exp.Not, this=this) 4928 4929 if negate: 4930 this = self._negate_range(this) 4931 4932 if self._match(TokenType.IS): 4933 this = self._parse_is(this) 4934 4935 return this 4936 4937 def _negate_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4938 if not this: 4939 return this 4940 4941 return self.expression(exp.Not, this=this) 4942 4943 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4944 index = self._index - 1 4945 negate = self._match(TokenType.NOT) 4946 4947 if self._match_text_seq("DISTINCT", "FROM"): 4948 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 4949 return self.expression(klass, this=this, expression=self._parse_bitwise()) 4950 4951 if self._match(TokenType.JSON): 4952 kind = self._match_texts(self.IS_JSON_PREDICATE_KIND) and self._prev.text.upper() 4953 4954 if self._match_text_seq("WITH"): 4955 _with = True 4956 elif self._match_text_seq("WITHOUT"): 4957 _with = False 4958 else: 4959 _with = None 4960 4961 unique = self._match(TokenType.UNIQUE) 4962 self._match_text_seq("KEYS") 4963 expression: t.Optional[exp.Expression] = self.expression( 4964 exp.JSON, **{"this": kind, "with": _with, "unique": unique} 4965 ) 4966 else: 4967 expression = self._parse_primary() or self._parse_null() 4968 if not expression: 4969 self._retreat(index) 4970 return None 4971 4972 this = self.expression(exp.Is, this=this, expression=expression) 4973 return self.expression(exp.Not, this=this) if negate else this 4974 4975 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 4976 unnest = self._parse_unnest(with_alias=False) 4977 if unnest: 4978 this = self.expression(exp.In, this=this, unnest=unnest) 4979 elif self._match_set((TokenType.L_PAREN, TokenType.L_BRACKET)): 4980 matched_l_paren = self._prev.token_type == TokenType.L_PAREN 4981 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 4982 4983 if len(expressions) == 1 and isinstance(expressions[0], exp.Query): 4984 this = self.expression(exp.In, this=this, query=expressions[0].subquery(copy=False)) 4985 else: 4986 this = self.expression(exp.In, this=this, expressions=expressions) 4987 4988 if matched_l_paren: 4989 self._match_r_paren(this) 4990 elif not self._match(TokenType.R_BRACKET, expression=this): 4991 self.raise_error("Expecting ]") 4992 else: 4993 this = self.expression(exp.In, this=this, field=self._parse_column()) 4994 4995 return this 4996 4997 def _parse_between(self, this: t.Optional[exp.Expression]) -> exp.Between: 4998 symmetric = None 4999 if self._match_text_seq("SYMMETRIC"): 5000 symmetric = True 5001 elif self._match_text_seq("ASYMMETRIC"): 5002 symmetric = False 5003 5004 low = self._parse_bitwise() 5005 self._match(TokenType.AND) 5006 high = self._parse_bitwise() 5007 5008 return self.expression( 5009 exp.Between, 5010 this=this, 5011 low=low, 5012 high=high, 5013 symmetric=symmetric, 5014 ) 5015 5016 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5017 if not self._match(TokenType.ESCAPE): 5018 return this 5019 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 5020 5021 def _parse_interval(self, match_interval: bool = True) -> t.Optional[exp.Add | exp.Interval]: 5022 index = self._index 5023 5024 if not self._match(TokenType.INTERVAL) and match_interval: 5025 return None 5026 5027 if self._match(TokenType.STRING, advance=False): 5028 this = self._parse_primary() 5029 else: 5030 this = self._parse_term() 5031 5032 if not this or ( 5033 isinstance(this, exp.Column) 5034 and not this.table 5035 and not this.this.quoted 5036 and this.name.upper() == "IS" 5037 ): 5038 self._retreat(index) 5039 return None 5040 5041 unit = self._parse_function() or ( 5042 not self._match(TokenType.ALIAS, advance=False) 5043 and self._parse_var(any_token=True, upper=True) 5044 ) 5045 5046 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 5047 # each INTERVAL expression into this canonical form so it's easy to transpile 5048 if this and this.is_number: 5049 this = exp.Literal.string(this.to_py()) 5050 elif this and this.is_string: 5051 parts = exp.INTERVAL_STRING_RE.findall(this.name) 5052 if parts and unit: 5053 # Unconsume the eagerly-parsed unit, since the real unit was part of the string 5054 unit = None 5055 self._retreat(self._index - 1) 5056 5057 if len(parts) == 1: 5058 this = exp.Literal.string(parts[0][0]) 5059 unit = self.expression(exp.Var, this=parts[0][1].upper()) 5060 if self.INTERVAL_SPANS and self._match_text_seq("TO"): 5061 unit = self.expression( 5062 exp.IntervalSpan, this=unit, expression=self._parse_var(any_token=True, upper=True) 5063 ) 5064 5065 interval = self.expression(exp.Interval, this=this, unit=unit) 5066 5067 index = self._index 5068 self._match(TokenType.PLUS) 5069 5070 # Convert INTERVAL 'val_1' unit_1 [+] ... [+] 'val_n' unit_n into a sum of intervals 5071 if self._match_set((TokenType.STRING, TokenType.NUMBER), advance=False): 5072 return self.expression( 5073 exp.Add, this=interval, expression=self._parse_interval(match_interval=False) 5074 ) 5075 5076 self._retreat(index) 5077 return interval 5078 5079 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 5080 this = self._parse_term() 5081 5082 while True: 5083 if self._match_set(self.BITWISE): 5084 this = self.expression( 5085 self.BITWISE[self._prev.token_type], 5086 this=this, 5087 expression=self._parse_term(), 5088 ) 5089 elif self.dialect.DPIPE_IS_STRING_CONCAT and self._match(TokenType.DPIPE): 5090 this = self.expression( 5091 exp.DPipe, 5092 this=this, 5093 expression=self._parse_term(), 5094 safe=not self.dialect.STRICT_STRING_CONCAT, 5095 ) 5096 elif self._match(TokenType.DQMARK): 5097 this = self.expression( 5098 exp.Coalesce, this=this, expressions=ensure_list(self._parse_term()) 5099 ) 5100 elif self._match_pair(TokenType.LT, TokenType.LT): 5101 this = self.expression( 5102 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 5103 ) 5104 elif self._match_pair(TokenType.GT, TokenType.GT): 5105 this = self.expression( 5106 exp.BitwiseRightShift, this=this, expression=self._parse_term() 5107 ) 5108 else: 5109 break 5110 5111 return this 5112 5113 def _parse_term(self) -> t.Optional[exp.Expression]: 5114 this = self._parse_factor() 5115 5116 while self._match_set(self.TERM): 5117 klass = self.TERM[self._prev.token_type] 5118 comments = self._prev_comments 5119 expression = self._parse_factor() 5120 5121 this = self.expression(klass, this=this, comments=comments, expression=expression) 5122 5123 if isinstance(this, exp.Collate): 5124 expr = this.expression 5125 5126 # Preserve collations such as pg_catalog."default" (Postgres) as columns, otherwise 5127 # fallback to Identifier / Var 5128 if isinstance(expr, exp.Column) and len(expr.parts) == 1: 5129 ident = expr.this 5130 if isinstance(ident, exp.Identifier): 5131 this.set("expression", ident if ident.quoted else exp.var(ident.name)) 5132 5133 return this 5134 5135 def _parse_factor(self) -> t.Optional[exp.Expression]: 5136 parse_method = self._parse_exponent if self.EXPONENT else self._parse_unary 5137 this = parse_method() 5138 5139 while self._match_set(self.FACTOR): 5140 klass = self.FACTOR[self._prev.token_type] 5141 comments = self._prev_comments 5142 expression = parse_method() 5143 5144 if not expression and klass is exp.IntDiv and self._prev.text.isalpha(): 5145 self._retreat(self._index - 1) 5146 return this 5147 5148 this = self.expression(klass, this=this, comments=comments, expression=expression) 5149 5150 if isinstance(this, exp.Div): 5151 this.args["typed"] = self.dialect.TYPED_DIVISION 5152 this.args["safe"] = self.dialect.SAFE_DIVISION 5153 5154 return this 5155 5156 def _parse_exponent(self) -> t.Optional[exp.Expression]: 5157 return self._parse_tokens(self._parse_unary, self.EXPONENT) 5158 5159 def _parse_unary(self) -> t.Optional[exp.Expression]: 5160 if self._match_set(self.UNARY_PARSERS): 5161 return self.UNARY_PARSERS[self._prev.token_type](self) 5162 return self._parse_at_time_zone(self._parse_type()) 5163 5164 def _parse_type( 5165 self, parse_interval: bool = True, fallback_to_identifier: bool = False 5166 ) -> t.Optional[exp.Expression]: 5167 interval = parse_interval and self._parse_interval() 5168 if interval: 5169 return interval 5170 5171 index = self._index 5172 data_type = self._parse_types(check_func=True, allow_identifiers=False) 5173 5174 # parse_types() returns a Cast if we parsed BQ's inline constructor <type>(<values>) e.g. 5175 # STRUCT<a INT, b STRING>(1, 'foo'), which is canonicalized to CAST(<values> AS <type>) 5176 if isinstance(data_type, exp.Cast): 5177 # This constructor can contain ops directly after it, for instance struct unnesting: 5178 # STRUCT<a INT, b STRING>(1, 'foo').* --> CAST(STRUCT(1, 'foo') AS STRUCT<a iNT, b STRING).* 5179 return self._parse_column_ops(data_type) 5180 5181 if data_type: 5182 index2 = self._index 5183 this = self._parse_primary() 5184 5185 if isinstance(this, exp.Literal): 5186 literal = this.name 5187 this = self._parse_column_ops(this) 5188 5189 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 5190 if parser: 5191 return parser(self, this, data_type) 5192 5193 if ( 5194 self.ZONE_AWARE_TIMESTAMP_CONSTRUCTOR 5195 and data_type.is_type(exp.DataType.Type.TIMESTAMP) 5196 and TIME_ZONE_RE.search(literal) 5197 ): 5198 data_type = exp.DataType.build("TIMESTAMPTZ") 5199 5200 return self.expression(exp.Cast, this=this, to=data_type) 5201 5202 # The expressions arg gets set by the parser when we have something like DECIMAL(38, 0) 5203 # in the input SQL. In that case, we'll produce these tokens: DECIMAL ( 38 , 0 ) 5204 # 5205 # If the index difference here is greater than 1, that means the parser itself must have 5206 # consumed additional tokens such as the DECIMAL scale and precision in the above example. 5207 # 5208 # If it's not greater than 1, then it must be 1, because we've consumed at least the type 5209 # keyword, meaning that the expressions arg of the DataType must have gotten set by a 5210 # callable in the TYPE_CONVERTERS mapping. For example, Snowflake converts DECIMAL to 5211 # DECIMAL(38, 0)) in order to facilitate the data type's transpilation. 5212 # 5213 # In these cases, we don't really want to return the converted type, but instead retreat 5214 # and try to parse a Column or Identifier in the section below. 5215 if data_type.expressions and index2 - index > 1: 5216 self._retreat(index2) 5217 return self._parse_column_ops(data_type) 5218 5219 self._retreat(index) 5220 5221 if fallback_to_identifier: 5222 return self._parse_id_var() 5223 5224 this = self._parse_column() 5225 return this and self._parse_column_ops(this) 5226 5227 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 5228 this = self._parse_type() 5229 if not this: 5230 return None 5231 5232 if isinstance(this, exp.Column) and not this.table: 5233 this = exp.var(this.name.upper()) 5234 5235 return self.expression( 5236 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 5237 ) 5238 5239 def _parse_user_defined_type(self, identifier: exp.Identifier) -> t.Optional[exp.Expression]: 5240 type_name = identifier.name 5241 5242 while self._match(TokenType.DOT): 5243 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 5244 5245 return exp.DataType.build(type_name, udt=True) 5246 5247 def _parse_types( 5248 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 5249 ) -> t.Optional[exp.Expression]: 5250 index = self._index 5251 5252 this: t.Optional[exp.Expression] = None 5253 prefix = self._match_text_seq("SYSUDTLIB", ".") 5254 5255 if not self._match_set(self.TYPE_TOKENS): 5256 identifier = allow_identifiers and self._parse_id_var( 5257 any_token=False, tokens=(TokenType.VAR,) 5258 ) 5259 if isinstance(identifier, exp.Identifier): 5260 tokens = self.dialect.tokenize(identifier.sql(dialect=self.dialect)) 5261 5262 if len(tokens) != 1: 5263 self.raise_error("Unexpected identifier", self._prev) 5264 5265 if tokens[0].token_type in self.TYPE_TOKENS: 5266 self._prev = tokens[0] 5267 elif self.dialect.SUPPORTS_USER_DEFINED_TYPES: 5268 this = self._parse_user_defined_type(identifier) 5269 else: 5270 self._retreat(self._index - 1) 5271 return None 5272 else: 5273 return None 5274 5275 type_token = self._prev.token_type 5276 5277 if type_token == TokenType.PSEUDO_TYPE: 5278 return self.expression(exp.PseudoType, this=self._prev.text.upper()) 5279 5280 if type_token == TokenType.OBJECT_IDENTIFIER: 5281 return self.expression(exp.ObjectIdentifier, this=self._prev.text.upper()) 5282 5283 # https://materialize.com/docs/sql/types/map/ 5284 if type_token == TokenType.MAP and self._match(TokenType.L_BRACKET): 5285 key_type = self._parse_types( 5286 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5287 ) 5288 if not self._match(TokenType.FARROW): 5289 self._retreat(index) 5290 return None 5291 5292 value_type = self._parse_types( 5293 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5294 ) 5295 if not self._match(TokenType.R_BRACKET): 5296 self._retreat(index) 5297 return None 5298 5299 return exp.DataType( 5300 this=exp.DataType.Type.MAP, 5301 expressions=[key_type, value_type], 5302 nested=True, 5303 prefix=prefix, 5304 ) 5305 5306 nested = type_token in self.NESTED_TYPE_TOKENS 5307 is_struct = type_token in self.STRUCT_TYPE_TOKENS 5308 is_aggregate = type_token in self.AGGREGATE_TYPE_TOKENS 5309 expressions = None 5310 maybe_func = False 5311 5312 if self._match(TokenType.L_PAREN): 5313 if is_struct: 5314 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 5315 elif nested: 5316 expressions = self._parse_csv( 5317 lambda: self._parse_types( 5318 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5319 ) 5320 ) 5321 if type_token == TokenType.NULLABLE and len(expressions) == 1: 5322 this = expressions[0] 5323 this.set("nullable", True) 5324 self._match_r_paren() 5325 return this 5326 elif type_token in self.ENUM_TYPE_TOKENS: 5327 expressions = self._parse_csv(self._parse_equality) 5328 elif is_aggregate: 5329 func_or_ident = self._parse_function(anonymous=True) or self._parse_id_var( 5330 any_token=False, tokens=(TokenType.VAR, TokenType.ANY) 5331 ) 5332 if not func_or_ident: 5333 return None 5334 expressions = [func_or_ident] 5335 if self._match(TokenType.COMMA): 5336 expressions.extend( 5337 self._parse_csv( 5338 lambda: self._parse_types( 5339 check_func=check_func, 5340 schema=schema, 5341 allow_identifiers=allow_identifiers, 5342 ) 5343 ) 5344 ) 5345 else: 5346 expressions = self._parse_csv(self._parse_type_size) 5347 5348 # https://docs.snowflake.com/en/sql-reference/data-types-vector 5349 if type_token == TokenType.VECTOR and len(expressions) == 2: 5350 expressions[0] = exp.DataType.build(expressions[0].name, dialect=self.dialect) 5351 5352 if not expressions or not self._match(TokenType.R_PAREN): 5353 self._retreat(index) 5354 return None 5355 5356 maybe_func = True 5357 5358 values: t.Optional[t.List[exp.Expression]] = None 5359 5360 if nested and self._match(TokenType.LT): 5361 if is_struct: 5362 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 5363 else: 5364 expressions = self._parse_csv( 5365 lambda: self._parse_types( 5366 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5367 ) 5368 ) 5369 5370 if not self._match(TokenType.GT): 5371 self.raise_error("Expecting >") 5372 5373 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 5374 values = self._parse_csv(self._parse_assignment) 5375 if not values and is_struct: 5376 values = None 5377 self._retreat(self._index - 1) 5378 else: 5379 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 5380 5381 if type_token in self.TIMESTAMPS: 5382 if self._match_text_seq("WITH", "TIME", "ZONE"): 5383 maybe_func = False 5384 tz_type = ( 5385 exp.DataType.Type.TIMETZ 5386 if type_token in self.TIMES 5387 else exp.DataType.Type.TIMESTAMPTZ 5388 ) 5389 this = exp.DataType(this=tz_type, expressions=expressions) 5390 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 5391 maybe_func = False 5392 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 5393 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 5394 maybe_func = False 5395 elif type_token == TokenType.INTERVAL: 5396 unit = self._parse_var(upper=True) 5397 if unit: 5398 if self._match_text_seq("TO"): 5399 unit = exp.IntervalSpan(this=unit, expression=self._parse_var(upper=True)) 5400 5401 this = self.expression(exp.DataType, this=self.expression(exp.Interval, unit=unit)) 5402 else: 5403 this = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL) 5404 elif type_token == TokenType.VOID: 5405 this = exp.DataType(this=exp.DataType.Type.NULL) 5406 5407 if maybe_func and check_func: 5408 index2 = self._index 5409 peek = self._parse_string() 5410 5411 if not peek: 5412 self._retreat(index) 5413 return None 5414 5415 self._retreat(index2) 5416 5417 if not this: 5418 if self._match_text_seq("UNSIGNED"): 5419 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 5420 if not unsigned_type_token: 5421 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 5422 5423 type_token = unsigned_type_token or type_token 5424 5425 this = exp.DataType( 5426 this=exp.DataType.Type[type_token.value], 5427 expressions=expressions, 5428 nested=nested, 5429 prefix=prefix, 5430 ) 5431 5432 # Empty arrays/structs are allowed 5433 if values is not None: 5434 cls = exp.Struct if is_struct else exp.Array 5435 this = exp.cast(cls(expressions=values), this, copy=False) 5436 5437 elif expressions: 5438 this.set("expressions", expressions) 5439 5440 # https://materialize.com/docs/sql/types/list/#type-name 5441 while self._match(TokenType.LIST): 5442 this = exp.DataType(this=exp.DataType.Type.LIST, expressions=[this], nested=True) 5443 5444 index = self._index 5445 5446 # Postgres supports the INT ARRAY[3] syntax as a synonym for INT[3] 5447 matched_array = self._match(TokenType.ARRAY) 5448 5449 while self._curr: 5450 datatype_token = self._prev.token_type 5451 matched_l_bracket = self._match(TokenType.L_BRACKET) 5452 5453 if (not matched_l_bracket and not matched_array) or ( 5454 datatype_token == TokenType.ARRAY and self._match(TokenType.R_BRACKET) 5455 ): 5456 # Postgres allows casting empty arrays such as ARRAY[]::INT[], 5457 # not to be confused with the fixed size array parsing 5458 break 5459 5460 matched_array = False 5461 values = self._parse_csv(self._parse_assignment) or None 5462 if ( 5463 values 5464 and not schema 5465 and ( 5466 not self.dialect.SUPPORTS_FIXED_SIZE_ARRAYS or datatype_token == TokenType.ARRAY 5467 ) 5468 ): 5469 # Retreating here means that we should not parse the following values as part of the data type, e.g. in DuckDB 5470 # ARRAY[1] should retreat and instead be parsed into exp.Array in contrast to INT[x][y] which denotes a fixed-size array data type 5471 self._retreat(index) 5472 break 5473 5474 this = exp.DataType( 5475 this=exp.DataType.Type.ARRAY, expressions=[this], values=values, nested=True 5476 ) 5477 self._match(TokenType.R_BRACKET) 5478 5479 if self.TYPE_CONVERTERS and isinstance(this.this, exp.DataType.Type): 5480 converter = self.TYPE_CONVERTERS.get(this.this) 5481 if converter: 5482 this = converter(t.cast(exp.DataType, this)) 5483 5484 return this 5485 5486 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 5487 index = self._index 5488 5489 if ( 5490 self._curr 5491 and self._next 5492 and self._curr.token_type in self.TYPE_TOKENS 5493 and self._next.token_type in self.TYPE_TOKENS 5494 ): 5495 # Takes care of special cases like `STRUCT<list ARRAY<...>>` where the identifier is also a 5496 # type token. Without this, the list will be parsed as a type and we'll eventually crash 5497 this = self._parse_id_var() 5498 else: 5499 this = ( 5500 self._parse_type(parse_interval=False, fallback_to_identifier=True) 5501 or self._parse_id_var() 5502 ) 5503 5504 self._match(TokenType.COLON) 5505 5506 if ( 5507 type_required 5508 and not isinstance(this, exp.DataType) 5509 and not self._match_set(self.TYPE_TOKENS, advance=False) 5510 ): 5511 self._retreat(index) 5512 return self._parse_types() 5513 5514 return self._parse_column_def(this) 5515 5516 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5517 if not self._match_text_seq("AT", "TIME", "ZONE"): 5518 return this 5519 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 5520 5521 def _parse_column(self) -> t.Optional[exp.Expression]: 5522 this = self._parse_column_reference() 5523 column = self._parse_column_ops(this) if this else self._parse_bracket(this) 5524 5525 if self.dialect.SUPPORTS_COLUMN_JOIN_MARKS and column: 5526 column.set("join_mark", self._match(TokenType.JOIN_MARKER)) 5527 5528 return column 5529 5530 def _parse_column_reference(self) -> t.Optional[exp.Expression]: 5531 this = self._parse_field() 5532 if ( 5533 not this 5534 and self._match(TokenType.VALUES, advance=False) 5535 and self.VALUES_FOLLOWED_BY_PAREN 5536 and (not self._next or self._next.token_type != TokenType.L_PAREN) 5537 ): 5538 this = self._parse_id_var() 5539 5540 if isinstance(this, exp.Identifier): 5541 # We bubble up comments from the Identifier to the Column 5542 this = self.expression(exp.Column, comments=this.pop_comments(), this=this) 5543 5544 return this 5545 5546 def _parse_colon_as_variant_extract( 5547 self, this: t.Optional[exp.Expression] 5548 ) -> t.Optional[exp.Expression]: 5549 casts = [] 5550 json_path = [] 5551 escape = None 5552 5553 while self._match(TokenType.COLON): 5554 start_index = self._index 5555 5556 # Snowflake allows reserved keywords as json keys but advance_any() excludes TokenType.SELECT from any_tokens=True 5557 path = self._parse_column_ops( 5558 self._parse_field(any_token=True, tokens=(TokenType.SELECT,)) 5559 ) 5560 5561 # The cast :: operator has a lower precedence than the extraction operator :, so 5562 # we rearrange the AST appropriately to avoid casting the JSON path 5563 while isinstance(path, exp.Cast): 5564 casts.append(path.to) 5565 path = path.this 5566 5567 if casts: 5568 dcolon_offset = next( 5569 i 5570 for i, t in enumerate(self._tokens[start_index:]) 5571 if t.token_type == TokenType.DCOLON 5572 ) 5573 end_token = self._tokens[start_index + dcolon_offset - 1] 5574 else: 5575 end_token = self._prev 5576 5577 if path: 5578 # Escape single quotes from Snowflake's colon extraction (e.g. col:"a'b") as 5579 # it'll roundtrip to a string literal in GET_PATH 5580 if isinstance(path, exp.Identifier) and path.quoted: 5581 escape = True 5582 5583 json_path.append(self._find_sql(self._tokens[start_index], end_token)) 5584 5585 # The VARIANT extract in Snowflake/Databricks is parsed as a JSONExtract; Snowflake uses the json_path in GET_PATH() while 5586 # Databricks transforms it back to the colon/dot notation 5587 if json_path: 5588 json_path_expr = self.dialect.to_json_path(exp.Literal.string(".".join(json_path))) 5589 5590 if json_path_expr: 5591 json_path_expr.set("escape", escape) 5592 5593 this = self.expression( 5594 exp.JSONExtract, 5595 this=this, 5596 expression=json_path_expr, 5597 variant_extract=True, 5598 requires_json=self.JSON_EXTRACT_REQUIRES_JSON_EXPRESSION, 5599 ) 5600 5601 while casts: 5602 this = self.expression(exp.Cast, this=this, to=casts.pop()) 5603 5604 return this 5605 5606 def _parse_dcolon(self) -> t.Optional[exp.Expression]: 5607 return self._parse_types() 5608 5609 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5610 this = self._parse_bracket(this) 5611 5612 while self._match_set(self.COLUMN_OPERATORS): 5613 op_token = self._prev.token_type 5614 op = self.COLUMN_OPERATORS.get(op_token) 5615 5616 if op_token in (TokenType.DCOLON, TokenType.DOTCOLON): 5617 field = self._parse_dcolon() 5618 if not field: 5619 self.raise_error("Expected type") 5620 elif op and self._curr: 5621 field = self._parse_column_reference() or self._parse_bracket() 5622 if isinstance(field, exp.Column) and self._match(TokenType.DOT, advance=False): 5623 field = self._parse_column_ops(field) 5624 else: 5625 field = self._parse_field(any_token=True, anonymous_func=True) 5626 5627 # Function calls can be qualified, e.g., x.y.FOO() 5628 # This converts the final AST to a series of Dots leading to the function call 5629 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 5630 if isinstance(field, (exp.Func, exp.Window)) and this: 5631 this = this.transform( 5632 lambda n: n.to_dot(include_dots=False) if isinstance(n, exp.Column) else n 5633 ) 5634 5635 if op: 5636 this = op(self, this, field) 5637 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 5638 this = self.expression( 5639 exp.Column, 5640 comments=this.comments, 5641 this=field, 5642 table=this.this, 5643 db=this.args.get("table"), 5644 catalog=this.args.get("db"), 5645 ) 5646 elif isinstance(field, exp.Window): 5647 # Move the exp.Dot's to the window's function 5648 window_func = self.expression(exp.Dot, this=this, expression=field.this) 5649 field.set("this", window_func) 5650 this = field 5651 else: 5652 this = self.expression(exp.Dot, this=this, expression=field) 5653 5654 if field and field.comments: 5655 t.cast(exp.Expression, this).add_comments(field.pop_comments()) 5656 5657 this = self._parse_bracket(this) 5658 5659 return self._parse_colon_as_variant_extract(this) if self.COLON_IS_VARIANT_EXTRACT else this 5660 5661 def _parse_paren(self) -> t.Optional[exp.Expression]: 5662 if not self._match(TokenType.L_PAREN): 5663 return None 5664 5665 comments = self._prev_comments 5666 query = self._parse_select() 5667 5668 if query: 5669 expressions = [query] 5670 else: 5671 expressions = self._parse_expressions() 5672 5673 this = self._parse_query_modifiers(seq_get(expressions, 0)) 5674 5675 if not this and self._match(TokenType.R_PAREN, advance=False): 5676 this = self.expression(exp.Tuple) 5677 elif isinstance(this, exp.UNWRAPPED_QUERIES): 5678 this = self._parse_subquery(this=this, parse_alias=False) 5679 elif isinstance(this, exp.Subquery): 5680 this = self._parse_subquery(this=self._parse_set_operations(this), parse_alias=False) 5681 elif len(expressions) > 1 or self._prev.token_type == TokenType.COMMA: 5682 this = self.expression(exp.Tuple, expressions=expressions) 5683 else: 5684 this = self.expression(exp.Paren, this=this) 5685 5686 if this: 5687 this.add_comments(comments) 5688 5689 self._match_r_paren(expression=this) 5690 return this 5691 5692 def _parse_primary(self) -> t.Optional[exp.Expression]: 5693 if self._match_set(self.PRIMARY_PARSERS): 5694 token_type = self._prev.token_type 5695 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 5696 5697 if token_type == TokenType.STRING: 5698 expressions = [primary] 5699 while self._match(TokenType.STRING): 5700 expressions.append(exp.Literal.string(self._prev.text)) 5701 5702 if len(expressions) > 1: 5703 return self.expression(exp.Concat, expressions=expressions) 5704 5705 return primary 5706 5707 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 5708 return exp.Literal.number(f"0.{self._prev.text}") 5709 5710 return self._parse_paren() 5711 5712 def _parse_field( 5713 self, 5714 any_token: bool = False, 5715 tokens: t.Optional[t.Collection[TokenType]] = None, 5716 anonymous_func: bool = False, 5717 ) -> t.Optional[exp.Expression]: 5718 if anonymous_func: 5719 field = ( 5720 self._parse_function(anonymous=anonymous_func, any_token=any_token) 5721 or self._parse_primary() 5722 ) 5723 else: 5724 field = self._parse_primary() or self._parse_function( 5725 anonymous=anonymous_func, any_token=any_token 5726 ) 5727 return field or self._parse_id_var(any_token=any_token, tokens=tokens) 5728 5729 def _parse_function( 5730 self, 5731 functions: t.Optional[t.Dict[str, t.Callable]] = None, 5732 anonymous: bool = False, 5733 optional_parens: bool = True, 5734 any_token: bool = False, 5735 ) -> t.Optional[exp.Expression]: 5736 # This allows us to also parse {fn <function>} syntax (Snowflake, MySQL support this) 5737 # See: https://community.snowflake.com/s/article/SQL-Escape-Sequences 5738 fn_syntax = False 5739 if ( 5740 self._match(TokenType.L_BRACE, advance=False) 5741 and self._next 5742 and self._next.text.upper() == "FN" 5743 ): 5744 self._advance(2) 5745 fn_syntax = True 5746 5747 func = self._parse_function_call( 5748 functions=functions, 5749 anonymous=anonymous, 5750 optional_parens=optional_parens, 5751 any_token=any_token, 5752 ) 5753 5754 if fn_syntax: 5755 self._match(TokenType.R_BRACE) 5756 5757 return func 5758 5759 def _parse_function_call( 5760 self, 5761 functions: t.Optional[t.Dict[str, t.Callable]] = None, 5762 anonymous: bool = False, 5763 optional_parens: bool = True, 5764 any_token: bool = False, 5765 ) -> t.Optional[exp.Expression]: 5766 if not self._curr: 5767 return None 5768 5769 comments = self._curr.comments 5770 token = self._curr 5771 token_type = self._curr.token_type 5772 this = self._curr.text 5773 upper = this.upper() 5774 5775 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 5776 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 5777 self._advance() 5778 return self._parse_window(parser(self)) 5779 5780 if not self._next or self._next.token_type != TokenType.L_PAREN: 5781 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 5782 self._advance() 5783 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 5784 5785 return None 5786 5787 if any_token: 5788 if token_type in self.RESERVED_TOKENS: 5789 return None 5790 elif token_type not in self.FUNC_TOKENS: 5791 return None 5792 5793 self._advance(2) 5794 5795 parser = self.FUNCTION_PARSERS.get(upper) 5796 if parser and not anonymous: 5797 this = parser(self) 5798 else: 5799 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 5800 5801 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 5802 this = self.expression( 5803 subquery_predicate, comments=comments, this=self._parse_select() 5804 ) 5805 self._match_r_paren() 5806 return this 5807 5808 if functions is None: 5809 functions = self.FUNCTIONS 5810 5811 function = functions.get(upper) 5812 known_function = function and not anonymous 5813 5814 alias = not known_function or upper in self.FUNCTIONS_WITH_ALIASED_ARGS 5815 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 5816 5817 post_func_comments = self._curr and self._curr.comments 5818 if known_function and post_func_comments: 5819 # If the user-inputted comment "/* sqlglot.anonymous */" is following the function 5820 # call we'll construct it as exp.Anonymous, even if it's "known" 5821 if any( 5822 comment.lstrip().startswith(exp.SQLGLOT_ANONYMOUS) 5823 for comment in post_func_comments 5824 ): 5825 known_function = False 5826 5827 if alias and known_function: 5828 args = self._kv_to_prop_eq(args) 5829 5830 if known_function: 5831 func_builder = t.cast(t.Callable, function) 5832 5833 if "dialect" in func_builder.__code__.co_varnames: 5834 func = func_builder(args, dialect=self.dialect) 5835 else: 5836 func = func_builder(args) 5837 5838 func = self.validate_expression(func, args) 5839 if self.dialect.PRESERVE_ORIGINAL_NAMES: 5840 func.meta["name"] = this 5841 5842 this = func 5843 else: 5844 if token_type == TokenType.IDENTIFIER: 5845 this = exp.Identifier(this=this, quoted=True).update_positions(token) 5846 5847 this = self.expression(exp.Anonymous, this=this, expressions=args) 5848 this = this.update_positions(token) 5849 5850 if isinstance(this, exp.Expression): 5851 this.add_comments(comments) 5852 5853 self._match_r_paren(this) 5854 return self._parse_window(this) 5855 5856 def _to_prop_eq(self, expression: exp.Expression, index: int) -> exp.Expression: 5857 return expression 5858 5859 def _kv_to_prop_eq( 5860 self, expressions: t.List[exp.Expression], parse_map: bool = False 5861 ) -> t.List[exp.Expression]: 5862 transformed = [] 5863 5864 for index, e in enumerate(expressions): 5865 if isinstance(e, self.KEY_VALUE_DEFINITIONS): 5866 if isinstance(e, exp.Alias): 5867 e = self.expression(exp.PropertyEQ, this=e.args.get("alias"), expression=e.this) 5868 5869 if not isinstance(e, exp.PropertyEQ): 5870 e = self.expression( 5871 exp.PropertyEQ, 5872 this=e.this if parse_map else exp.to_identifier(e.this.name), 5873 expression=e.expression, 5874 ) 5875 5876 if isinstance(e.this, exp.Column): 5877 e.this.replace(e.this.this) 5878 else: 5879 e = self._to_prop_eq(e, index) 5880 5881 transformed.append(e) 5882 5883 return transformed 5884 5885 def _parse_user_defined_function_expression(self) -> t.Optional[exp.Expression]: 5886 return self._parse_statement() 5887 5888 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 5889 return self._parse_column_def(this=self._parse_id_var(), computed_column=False) 5890 5891 def _parse_user_defined_function( 5892 self, kind: t.Optional[TokenType] = None 5893 ) -> t.Optional[exp.Expression]: 5894 this = self._parse_table_parts(schema=True) 5895 5896 if not self._match(TokenType.L_PAREN): 5897 return this 5898 5899 expressions = self._parse_csv(self._parse_function_parameter) 5900 self._match_r_paren() 5901 return self.expression( 5902 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 5903 ) 5904 5905 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 5906 literal = self._parse_primary() 5907 if literal: 5908 return self.expression(exp.Introducer, this=token.text, expression=literal) 5909 5910 return self._identifier_expression(token) 5911 5912 def _parse_session_parameter(self) -> exp.SessionParameter: 5913 kind = None 5914 this = self._parse_id_var() or self._parse_primary() 5915 5916 if this and self._match(TokenType.DOT): 5917 kind = this.name 5918 this = self._parse_var() or self._parse_primary() 5919 5920 return self.expression(exp.SessionParameter, this=this, kind=kind) 5921 5922 def _parse_lambda_arg(self) -> t.Optional[exp.Expression]: 5923 return self._parse_id_var() 5924 5925 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 5926 index = self._index 5927 5928 if self._match(TokenType.L_PAREN): 5929 expressions = t.cast( 5930 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_lambda_arg) 5931 ) 5932 5933 if not self._match(TokenType.R_PAREN): 5934 self._retreat(index) 5935 else: 5936 expressions = [self._parse_lambda_arg()] 5937 5938 if self._match_set(self.LAMBDAS): 5939 return self.LAMBDAS[self._prev.token_type](self, expressions) 5940 5941 self._retreat(index) 5942 5943 this: t.Optional[exp.Expression] 5944 5945 if self._match(TokenType.DISTINCT): 5946 this = self.expression( 5947 exp.Distinct, expressions=self._parse_csv(self._parse_assignment) 5948 ) 5949 else: 5950 this = self._parse_select_or_expression(alias=alias) 5951 5952 return self._parse_limit( 5953 self._parse_order(self._parse_having_max(self._parse_respect_or_ignore_nulls(this))) 5954 ) 5955 5956 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 5957 index = self._index 5958 if not self._match(TokenType.L_PAREN): 5959 return this 5960 5961 # Disambiguate between schema and subquery/CTE, e.g. in INSERT INTO table (<expr>), 5962 # expr can be of both types 5963 if self._match_set(self.SELECT_START_TOKENS): 5964 self._retreat(index) 5965 return this 5966 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 5967 self._match_r_paren() 5968 return self.expression(exp.Schema, this=this, expressions=args) 5969 5970 def _parse_field_def(self) -> t.Optional[exp.Expression]: 5971 return self._parse_column_def(self._parse_field(any_token=True)) 5972 5973 def _parse_column_def( 5974 self, this: t.Optional[exp.Expression], computed_column: bool = True 5975 ) -> t.Optional[exp.Expression]: 5976 # column defs are not really columns, they're identifiers 5977 if isinstance(this, exp.Column): 5978 this = this.this 5979 5980 if not computed_column: 5981 self._match(TokenType.ALIAS) 5982 5983 kind = self._parse_types(schema=True) 5984 5985 if self._match_text_seq("FOR", "ORDINALITY"): 5986 return self.expression(exp.ColumnDef, this=this, ordinality=True) 5987 5988 constraints: t.List[exp.Expression] = [] 5989 5990 if (not kind and self._match(TokenType.ALIAS)) or self._match_texts( 5991 ("ALIAS", "MATERIALIZED") 5992 ): 5993 persisted = self._prev.text.upper() == "MATERIALIZED" 5994 constraint_kind = exp.ComputedColumnConstraint( 5995 this=self._parse_assignment(), 5996 persisted=persisted or self._match_text_seq("PERSISTED"), 5997 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 5998 ) 5999 constraints.append(self.expression(exp.ColumnConstraint, kind=constraint_kind)) 6000 elif ( 6001 kind 6002 and self._match(TokenType.ALIAS, advance=False) 6003 and ( 6004 not self.WRAPPED_TRANSFORM_COLUMN_CONSTRAINT 6005 or (self._next and self._next.token_type == TokenType.L_PAREN) 6006 ) 6007 ): 6008 self._advance() 6009 constraints.append( 6010 self.expression( 6011 exp.ColumnConstraint, 6012 kind=exp.ComputedColumnConstraint( 6013 this=self._parse_disjunction(), 6014 persisted=self._match_texts(("STORED", "VIRTUAL")) 6015 and self._prev.text.upper() == "STORED", 6016 ), 6017 ) 6018 ) 6019 6020 while True: 6021 constraint = self._parse_column_constraint() 6022 if not constraint: 6023 break 6024 constraints.append(constraint) 6025 6026 if not kind and not constraints: 6027 return this 6028 6029 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 6030 6031 def _parse_auto_increment( 6032 self, 6033 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 6034 start = None 6035 increment = None 6036 order = None 6037 6038 if self._match(TokenType.L_PAREN, advance=False): 6039 args = self._parse_wrapped_csv(self._parse_bitwise) 6040 start = seq_get(args, 0) 6041 increment = seq_get(args, 1) 6042 elif self._match_text_seq("START"): 6043 start = self._parse_bitwise() 6044 self._match_text_seq("INCREMENT") 6045 increment = self._parse_bitwise() 6046 if self._match_text_seq("ORDER"): 6047 order = True 6048 elif self._match_text_seq("NOORDER"): 6049 order = False 6050 6051 if start and increment: 6052 return exp.GeneratedAsIdentityColumnConstraint( 6053 start=start, increment=increment, this=False, order=order 6054 ) 6055 6056 return exp.AutoIncrementColumnConstraint() 6057 6058 def _parse_auto_property(self) -> t.Optional[exp.AutoRefreshProperty]: 6059 if not self._match_text_seq("REFRESH"): 6060 self._retreat(self._index - 1) 6061 return None 6062 return self.expression(exp.AutoRefreshProperty, this=self._parse_var(upper=True)) 6063 6064 def _parse_compress(self) -> exp.CompressColumnConstraint: 6065 if self._match(TokenType.L_PAREN, advance=False): 6066 return self.expression( 6067 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 6068 ) 6069 6070 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 6071 6072 def _parse_generated_as_identity( 6073 self, 6074 ) -> ( 6075 exp.GeneratedAsIdentityColumnConstraint 6076 | exp.ComputedColumnConstraint 6077 | exp.GeneratedAsRowColumnConstraint 6078 ): 6079 if self._match_text_seq("BY", "DEFAULT"): 6080 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 6081 this = self.expression( 6082 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 6083 ) 6084 else: 6085 self._match_text_seq("ALWAYS") 6086 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 6087 6088 self._match(TokenType.ALIAS) 6089 6090 if self._match_text_seq("ROW"): 6091 start = self._match_text_seq("START") 6092 if not start: 6093 self._match(TokenType.END) 6094 hidden = self._match_text_seq("HIDDEN") 6095 return self.expression(exp.GeneratedAsRowColumnConstraint, start=start, hidden=hidden) 6096 6097 identity = self._match_text_seq("IDENTITY") 6098 6099 if self._match(TokenType.L_PAREN): 6100 if self._match(TokenType.START_WITH): 6101 this.set("start", self._parse_bitwise()) 6102 if self._match_text_seq("INCREMENT", "BY"): 6103 this.set("increment", self._parse_bitwise()) 6104 if self._match_text_seq("MINVALUE"): 6105 this.set("minvalue", self._parse_bitwise()) 6106 if self._match_text_seq("MAXVALUE"): 6107 this.set("maxvalue", self._parse_bitwise()) 6108 6109 if self._match_text_seq("CYCLE"): 6110 this.set("cycle", True) 6111 elif self._match_text_seq("NO", "CYCLE"): 6112 this.set("cycle", False) 6113 6114 if not identity: 6115 this.set("expression", self._parse_range()) 6116 elif not this.args.get("start") and self._match(TokenType.NUMBER, advance=False): 6117 args = self._parse_csv(self._parse_bitwise) 6118 this.set("start", seq_get(args, 0)) 6119 this.set("increment", seq_get(args, 1)) 6120 6121 self._match_r_paren() 6122 6123 return this 6124 6125 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 6126 self._match_text_seq("LENGTH") 6127 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 6128 6129 def _parse_not_constraint(self) -> t.Optional[exp.Expression]: 6130 if self._match_text_seq("NULL"): 6131 return self.expression(exp.NotNullColumnConstraint) 6132 if self._match_text_seq("CASESPECIFIC"): 6133 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 6134 if self._match_text_seq("FOR", "REPLICATION"): 6135 return self.expression(exp.NotForReplicationColumnConstraint) 6136 6137 # Unconsume the `NOT` token 6138 self._retreat(self._index - 1) 6139 return None 6140 6141 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 6142 this = self._match(TokenType.CONSTRAINT) and self._parse_id_var() 6143 6144 procedure_option_follows = ( 6145 self._match(TokenType.WITH, advance=False) 6146 and self._next 6147 and self._next.text.upper() in self.PROCEDURE_OPTIONS 6148 ) 6149 6150 if not procedure_option_follows and self._match_texts(self.CONSTRAINT_PARSERS): 6151 return self.expression( 6152 exp.ColumnConstraint, 6153 this=this, 6154 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 6155 ) 6156 6157 return this 6158 6159 def _parse_constraint(self) -> t.Optional[exp.Expression]: 6160 if not self._match(TokenType.CONSTRAINT): 6161 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 6162 6163 return self.expression( 6164 exp.Constraint, 6165 this=self._parse_id_var(), 6166 expressions=self._parse_unnamed_constraints(), 6167 ) 6168 6169 def _parse_unnamed_constraints(self) -> t.List[exp.Expression]: 6170 constraints = [] 6171 while True: 6172 constraint = self._parse_unnamed_constraint() or self._parse_function() 6173 if not constraint: 6174 break 6175 constraints.append(constraint) 6176 6177 return constraints 6178 6179 def _parse_unnamed_constraint( 6180 self, constraints: t.Optional[t.Collection[str]] = None 6181 ) -> t.Optional[exp.Expression]: 6182 if self._match(TokenType.IDENTIFIER, advance=False) or not self._match_texts( 6183 constraints or self.CONSTRAINT_PARSERS 6184 ): 6185 return None 6186 6187 constraint = self._prev.text.upper() 6188 if constraint not in self.CONSTRAINT_PARSERS: 6189 self.raise_error(f"No parser found for schema constraint {constraint}.") 6190 6191 return self.CONSTRAINT_PARSERS[constraint](self) 6192 6193 def _parse_unique_key(self) -> t.Optional[exp.Expression]: 6194 return self._parse_id_var(any_token=False) 6195 6196 def _parse_unique(self) -> exp.UniqueColumnConstraint: 6197 self._match_text_seq("KEY") 6198 return self.expression( 6199 exp.UniqueColumnConstraint, 6200 nulls=self._match_text_seq("NULLS", "NOT", "DISTINCT"), 6201 this=self._parse_schema(self._parse_unique_key()), 6202 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 6203 on_conflict=self._parse_on_conflict(), 6204 options=self._parse_key_constraint_options(), 6205 ) 6206 6207 def _parse_key_constraint_options(self) -> t.List[str]: 6208 options = [] 6209 while True: 6210 if not self._curr: 6211 break 6212 6213 if self._match(TokenType.ON): 6214 action = None 6215 on = self._advance_any() and self._prev.text 6216 6217 if self._match_text_seq("NO", "ACTION"): 6218 action = "NO ACTION" 6219 elif self._match_text_seq("CASCADE"): 6220 action = "CASCADE" 6221 elif self._match_text_seq("RESTRICT"): 6222 action = "RESTRICT" 6223 elif self._match_pair(TokenType.SET, TokenType.NULL): 6224 action = "SET NULL" 6225 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 6226 action = "SET DEFAULT" 6227 else: 6228 self.raise_error("Invalid key constraint") 6229 6230 options.append(f"ON {on} {action}") 6231 else: 6232 var = self._parse_var_from_options( 6233 self.KEY_CONSTRAINT_OPTIONS, raise_unmatched=False 6234 ) 6235 if not var: 6236 break 6237 options.append(var.name) 6238 6239 return options 6240 6241 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 6242 if match and not self._match(TokenType.REFERENCES): 6243 return None 6244 6245 expressions = None 6246 this = self._parse_table(schema=True) 6247 options = self._parse_key_constraint_options() 6248 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 6249 6250 def _parse_foreign_key(self) -> exp.ForeignKey: 6251 expressions = ( 6252 self._parse_wrapped_id_vars() 6253 if not self._match(TokenType.REFERENCES, advance=False) 6254 else None 6255 ) 6256 reference = self._parse_references() 6257 on_options = {} 6258 6259 while self._match(TokenType.ON): 6260 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 6261 self.raise_error("Expected DELETE or UPDATE") 6262 6263 kind = self._prev.text.lower() 6264 6265 if self._match_text_seq("NO", "ACTION"): 6266 action = "NO ACTION" 6267 elif self._match(TokenType.SET): 6268 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 6269 action = "SET " + self._prev.text.upper() 6270 else: 6271 self._advance() 6272 action = self._prev.text.upper() 6273 6274 on_options[kind] = action 6275 6276 return self.expression( 6277 exp.ForeignKey, 6278 expressions=expressions, 6279 reference=reference, 6280 options=self._parse_key_constraint_options(), 6281 **on_options, # type: ignore 6282 ) 6283 6284 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 6285 return self._parse_ordered() or self._parse_field() 6286 6287 def _parse_period_for_system_time(self) -> t.Optional[exp.PeriodForSystemTimeConstraint]: 6288 if not self._match(TokenType.TIMESTAMP_SNAPSHOT): 6289 self._retreat(self._index - 1) 6290 return None 6291 6292 id_vars = self._parse_wrapped_id_vars() 6293 return self.expression( 6294 exp.PeriodForSystemTimeConstraint, 6295 this=seq_get(id_vars, 0), 6296 expression=seq_get(id_vars, 1), 6297 ) 6298 6299 def _parse_primary_key( 6300 self, wrapped_optional: bool = False, in_props: bool = False 6301 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 6302 desc = ( 6303 self._match_set((TokenType.ASC, TokenType.DESC)) 6304 and self._prev.token_type == TokenType.DESC 6305 ) 6306 6307 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 6308 return self.expression( 6309 exp.PrimaryKeyColumnConstraint, 6310 desc=desc, 6311 options=self._parse_key_constraint_options(), 6312 ) 6313 6314 expressions = self._parse_wrapped_csv( 6315 self._parse_primary_key_part, optional=wrapped_optional 6316 ) 6317 6318 return self.expression( 6319 exp.PrimaryKey, 6320 expressions=expressions, 6321 include=self._parse_index_params(), 6322 options=self._parse_key_constraint_options(), 6323 ) 6324 6325 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 6326 return self._parse_slice(self._parse_alias(self._parse_assignment(), explicit=True)) 6327 6328 def _parse_odbc_datetime_literal(self) -> exp.Expression: 6329 """ 6330 Parses a datetime column in ODBC format. We parse the column into the corresponding 6331 types, for example `{d'yyyy-mm-dd'}` will be parsed as a `Date` column, exactly the 6332 same as we did for `DATE('yyyy-mm-dd')`. 6333 6334 Reference: 6335 https://learn.microsoft.com/en-us/sql/odbc/reference/develop-app/date-time-and-timestamp-literals 6336 """ 6337 self._match(TokenType.VAR) 6338 exp_class = self.ODBC_DATETIME_LITERALS[self._prev.text.lower()] 6339 expression = self.expression(exp_class=exp_class, this=self._parse_string()) 6340 if not self._match(TokenType.R_BRACE): 6341 self.raise_error("Expected }") 6342 return expression 6343 6344 def _parse_bracket(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 6345 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 6346 return this 6347 6348 if self.MAP_KEYS_ARE_ARBITRARY_EXPRESSIONS: 6349 map_token = seq_get(self._tokens, self._index - 2) 6350 parse_map = map_token is not None and map_token.text.upper() == "MAP" 6351 else: 6352 parse_map = False 6353 6354 bracket_kind = self._prev.token_type 6355 if ( 6356 bracket_kind == TokenType.L_BRACE 6357 and self._curr 6358 and self._curr.token_type == TokenType.VAR 6359 and self._curr.text.lower() in self.ODBC_DATETIME_LITERALS 6360 ): 6361 return self._parse_odbc_datetime_literal() 6362 6363 expressions = self._parse_csv( 6364 lambda: self._parse_bracket_key_value(is_map=bracket_kind == TokenType.L_BRACE) 6365 ) 6366 6367 if bracket_kind == TokenType.L_BRACKET and not self._match(TokenType.R_BRACKET): 6368 self.raise_error("Expected ]") 6369 elif bracket_kind == TokenType.L_BRACE and not self._match(TokenType.R_BRACE): 6370 self.raise_error("Expected }") 6371 6372 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 6373 if bracket_kind == TokenType.L_BRACE: 6374 this = self.expression( 6375 exp.Struct, 6376 expressions=self._kv_to_prop_eq(expressions=expressions, parse_map=parse_map), 6377 ) 6378 elif not this: 6379 this = build_array_constructor( 6380 exp.Array, args=expressions, bracket_kind=bracket_kind, dialect=self.dialect 6381 ) 6382 else: 6383 constructor_type = self.ARRAY_CONSTRUCTORS.get(this.name.upper()) 6384 if constructor_type: 6385 return build_array_constructor( 6386 constructor_type, 6387 args=expressions, 6388 bracket_kind=bracket_kind, 6389 dialect=self.dialect, 6390 ) 6391 6392 expressions = apply_index_offset( 6393 this, expressions, -self.dialect.INDEX_OFFSET, dialect=self.dialect 6394 ) 6395 this = self.expression( 6396 exp.Bracket, 6397 this=this, 6398 expressions=expressions, 6399 comments=this.pop_comments(), 6400 ) 6401 6402 self._add_comments(this) 6403 return self._parse_bracket(this) 6404 6405 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6406 if self._match(TokenType.COLON): 6407 return self.expression(exp.Slice, this=this, expression=self._parse_assignment()) 6408 return this 6409 6410 def _parse_case(self) -> t.Optional[exp.Expression]: 6411 ifs = [] 6412 default = None 6413 6414 comments = self._prev_comments 6415 expression = self._parse_assignment() 6416 6417 while self._match(TokenType.WHEN): 6418 this = self._parse_assignment() 6419 self._match(TokenType.THEN) 6420 then = self._parse_assignment() 6421 ifs.append(self.expression(exp.If, this=this, true=then)) 6422 6423 if self._match(TokenType.ELSE): 6424 default = self._parse_assignment() 6425 6426 if not self._match(TokenType.END): 6427 if isinstance(default, exp.Interval) and default.this.sql().upper() == "END": 6428 default = exp.column("interval") 6429 else: 6430 self.raise_error("Expected END after CASE", self._prev) 6431 6432 return self.expression( 6433 exp.Case, comments=comments, this=expression, ifs=ifs, default=default 6434 ) 6435 6436 def _parse_if(self) -> t.Optional[exp.Expression]: 6437 if self._match(TokenType.L_PAREN): 6438 args = self._parse_csv( 6439 lambda: self._parse_alias(self._parse_assignment(), explicit=True) 6440 ) 6441 this = self.validate_expression(exp.If.from_arg_list(args), args) 6442 self._match_r_paren() 6443 else: 6444 index = self._index - 1 6445 6446 if self.NO_PAREN_IF_COMMANDS and index == 0: 6447 return self._parse_as_command(self._prev) 6448 6449 condition = self._parse_assignment() 6450 6451 if not condition: 6452 self._retreat(index) 6453 return None 6454 6455 self._match(TokenType.THEN) 6456 true = self._parse_assignment() 6457 false = self._parse_assignment() if self._match(TokenType.ELSE) else None 6458 self._match(TokenType.END) 6459 this = self.expression(exp.If, this=condition, true=true, false=false) 6460 6461 return this 6462 6463 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 6464 if not self._match_text_seq("VALUE", "FOR"): 6465 self._retreat(self._index - 1) 6466 return None 6467 6468 return self.expression( 6469 exp.NextValueFor, 6470 this=self._parse_column(), 6471 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 6472 ) 6473 6474 def _parse_extract(self) -> exp.Extract | exp.Anonymous: 6475 this = self._parse_function() or self._parse_var_or_string(upper=True) 6476 6477 if self._match(TokenType.FROM): 6478 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 6479 6480 if not self._match(TokenType.COMMA): 6481 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 6482 6483 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 6484 6485 def _parse_gap_fill(self) -> exp.GapFill: 6486 self._match(TokenType.TABLE) 6487 this = self._parse_table() 6488 6489 self._match(TokenType.COMMA) 6490 args = [this, *self._parse_csv(self._parse_lambda)] 6491 6492 gap_fill = exp.GapFill.from_arg_list(args) 6493 return self.validate_expression(gap_fill, args) 6494 6495 def _parse_cast(self, strict: bool, safe: t.Optional[bool] = None) -> exp.Expression: 6496 this = self._parse_assignment() 6497 6498 if not self._match(TokenType.ALIAS): 6499 if self._match(TokenType.COMMA): 6500 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 6501 6502 self.raise_error("Expected AS after CAST") 6503 6504 fmt = None 6505 to = self._parse_types() 6506 6507 default = self._match(TokenType.DEFAULT) 6508 if default: 6509 default = self._parse_bitwise() 6510 self._match_text_seq("ON", "CONVERSION", "ERROR") 6511 6512 if self._match_set((TokenType.FORMAT, TokenType.COMMA)): 6513 fmt_string = self._parse_string() 6514 fmt = self._parse_at_time_zone(fmt_string) 6515 6516 if not to: 6517 to = exp.DataType.build(exp.DataType.Type.UNKNOWN) 6518 if to.this in exp.DataType.TEMPORAL_TYPES: 6519 this = self.expression( 6520 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 6521 this=this, 6522 format=exp.Literal.string( 6523 format_time( 6524 fmt_string.this if fmt_string else "", 6525 self.dialect.FORMAT_MAPPING or self.dialect.TIME_MAPPING, 6526 self.dialect.FORMAT_TRIE or self.dialect.TIME_TRIE, 6527 ) 6528 ), 6529 safe=safe, 6530 ) 6531 6532 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 6533 this.set("zone", fmt.args["zone"]) 6534 return this 6535 elif not to: 6536 self.raise_error("Expected TYPE after CAST") 6537 elif isinstance(to, exp.Identifier): 6538 to = exp.DataType.build(to.name, udt=True) 6539 elif to.this == exp.DataType.Type.CHAR: 6540 if self._match(TokenType.CHARACTER_SET): 6541 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 6542 6543 return self.build_cast( 6544 strict=strict, 6545 this=this, 6546 to=to, 6547 format=fmt, 6548 safe=safe, 6549 action=self._parse_var_from_options(self.CAST_ACTIONS, raise_unmatched=False), 6550 default=default, 6551 ) 6552 6553 def _parse_string_agg(self) -> exp.GroupConcat: 6554 if self._match(TokenType.DISTINCT): 6555 args: t.List[t.Optional[exp.Expression]] = [ 6556 self.expression(exp.Distinct, expressions=[self._parse_assignment()]) 6557 ] 6558 if self._match(TokenType.COMMA): 6559 args.extend(self._parse_csv(self._parse_assignment)) 6560 else: 6561 args = self._parse_csv(self._parse_assignment) # type: ignore 6562 6563 if self._match_text_seq("ON", "OVERFLOW"): 6564 # trino: LISTAGG(expression [, separator] [ON OVERFLOW overflow_behavior]) 6565 if self._match_text_seq("ERROR"): 6566 on_overflow: t.Optional[exp.Expression] = exp.var("ERROR") 6567 else: 6568 self._match_text_seq("TRUNCATE") 6569 on_overflow = self.expression( 6570 exp.OverflowTruncateBehavior, 6571 this=self._parse_string(), 6572 with_count=( 6573 self._match_text_seq("WITH", "COUNT") 6574 or not self._match_text_seq("WITHOUT", "COUNT") 6575 ), 6576 ) 6577 else: 6578 on_overflow = None 6579 6580 index = self._index 6581 if not self._match(TokenType.R_PAREN) and args: 6582 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 6583 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 6584 # The order is parsed through `this` as a canonicalization for WITHIN GROUPs 6585 args[0] = self._parse_limit(this=self._parse_order(this=args[0])) 6586 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 6587 6588 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 6589 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 6590 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 6591 if not self._match_text_seq("WITHIN", "GROUP"): 6592 self._retreat(index) 6593 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 6594 6595 # The corresponding match_r_paren will be called in parse_function (caller) 6596 self._match_l_paren() 6597 6598 return self.expression( 6599 exp.GroupConcat, 6600 this=self._parse_order(this=seq_get(args, 0)), 6601 separator=seq_get(args, 1), 6602 on_overflow=on_overflow, 6603 ) 6604 6605 def _parse_convert( 6606 self, strict: bool, safe: t.Optional[bool] = None 6607 ) -> t.Optional[exp.Expression]: 6608 this = self._parse_bitwise() 6609 6610 if self._match(TokenType.USING): 6611 to: t.Optional[exp.Expression] = self.expression( 6612 exp.CharacterSet, this=self._parse_var() 6613 ) 6614 elif self._match(TokenType.COMMA): 6615 to = self._parse_types() 6616 else: 6617 to = None 6618 6619 return self.build_cast(strict=strict, this=this, to=to, safe=safe) 6620 6621 def _parse_xml_table(self) -> exp.XMLTable: 6622 namespaces = None 6623 passing = None 6624 columns = None 6625 6626 if self._match_text_seq("XMLNAMESPACES", "("): 6627 namespaces = self._parse_xml_namespace() 6628 self._match_text_seq(")", ",") 6629 6630 this = self._parse_string() 6631 6632 if self._match_text_seq("PASSING"): 6633 # The BY VALUE keywords are optional and are provided for semantic clarity 6634 self._match_text_seq("BY", "VALUE") 6635 passing = self._parse_csv(self._parse_column) 6636 6637 by_ref = self._match_text_seq("RETURNING", "SEQUENCE", "BY", "REF") 6638 6639 if self._match_text_seq("COLUMNS"): 6640 columns = self._parse_csv(self._parse_field_def) 6641 6642 return self.expression( 6643 exp.XMLTable, 6644 this=this, 6645 namespaces=namespaces, 6646 passing=passing, 6647 columns=columns, 6648 by_ref=by_ref, 6649 ) 6650 6651 def _parse_xml_namespace(self) -> t.List[exp.XMLNamespace]: 6652 namespaces = [] 6653 6654 while True: 6655 if self._match(TokenType.DEFAULT): 6656 uri = self._parse_string() 6657 else: 6658 uri = self._parse_alias(self._parse_string()) 6659 namespaces.append(self.expression(exp.XMLNamespace, this=uri)) 6660 if not self._match(TokenType.COMMA): 6661 break 6662 6663 return namespaces 6664 6665 def _parse_decode(self) -> t.Optional[exp.Decode | exp.DecodeCase]: 6666 args = self._parse_csv(self._parse_assignment) 6667 6668 if len(args) < 3: 6669 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 6670 6671 return self.expression(exp.DecodeCase, expressions=args) 6672 6673 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 6674 self._match_text_seq("KEY") 6675 key = self._parse_column() 6676 self._match_set(self.JSON_KEY_VALUE_SEPARATOR_TOKENS) 6677 self._match_text_seq("VALUE") 6678 value = self._parse_bitwise() 6679 6680 if not key and not value: 6681 return None 6682 return self.expression(exp.JSONKeyValue, this=key, expression=value) 6683 6684 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6685 if not this or not self._match_text_seq("FORMAT", "JSON"): 6686 return this 6687 6688 return self.expression(exp.FormatJson, this=this) 6689 6690 def _parse_on_condition(self) -> t.Optional[exp.OnCondition]: 6691 # MySQL uses "X ON EMPTY Y ON ERROR" (e.g. JSON_VALUE) while Oracle uses the opposite (e.g. JSON_EXISTS) 6692 if self.dialect.ON_CONDITION_EMPTY_BEFORE_ERROR: 6693 empty = self._parse_on_handling("EMPTY", *self.ON_CONDITION_TOKENS) 6694 error = self._parse_on_handling("ERROR", *self.ON_CONDITION_TOKENS) 6695 else: 6696 error = self._parse_on_handling("ERROR", *self.ON_CONDITION_TOKENS) 6697 empty = self._parse_on_handling("EMPTY", *self.ON_CONDITION_TOKENS) 6698 6699 null = self._parse_on_handling("NULL", *self.ON_CONDITION_TOKENS) 6700 6701 if not empty and not error and not null: 6702 return None 6703 6704 return self.expression( 6705 exp.OnCondition, 6706 empty=empty, 6707 error=error, 6708 null=null, 6709 ) 6710 6711 def _parse_on_handling( 6712 self, on: str, *values: str 6713 ) -> t.Optional[str] | t.Optional[exp.Expression]: 6714 # Parses the "X ON Y" or "DEFAULT <expr> ON Y syntax, e.g. NULL ON NULL (Oracle, T-SQL, MySQL) 6715 for value in values: 6716 if self._match_text_seq(value, "ON", on): 6717 return f"{value} ON {on}" 6718 6719 index = self._index 6720 if self._match(TokenType.DEFAULT): 6721 default_value = self._parse_bitwise() 6722 if self._match_text_seq("ON", on): 6723 return default_value 6724 6725 self._retreat(index) 6726 6727 return None 6728 6729 @t.overload 6730 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 6731 6732 @t.overload 6733 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 6734 6735 def _parse_json_object(self, agg=False): 6736 star = self._parse_star() 6737 expressions = ( 6738 [star] 6739 if star 6740 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 6741 ) 6742 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 6743 6744 unique_keys = None 6745 if self._match_text_seq("WITH", "UNIQUE"): 6746 unique_keys = True 6747 elif self._match_text_seq("WITHOUT", "UNIQUE"): 6748 unique_keys = False 6749 6750 self._match_text_seq("KEYS") 6751 6752 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 6753 self._parse_type() 6754 ) 6755 encoding = self._match_text_seq("ENCODING") and self._parse_var() 6756 6757 return self.expression( 6758 exp.JSONObjectAgg if agg else exp.JSONObject, 6759 expressions=expressions, 6760 null_handling=null_handling, 6761 unique_keys=unique_keys, 6762 return_type=return_type, 6763 encoding=encoding, 6764 ) 6765 6766 # Note: this is currently incomplete; it only implements the "JSON_value_column" part 6767 def _parse_json_column_def(self) -> exp.JSONColumnDef: 6768 if not self._match_text_seq("NESTED"): 6769 this = self._parse_id_var() 6770 kind = self._parse_types(allow_identifiers=False) 6771 nested = None 6772 else: 6773 this = None 6774 kind = None 6775 nested = True 6776 6777 path = self._match_text_seq("PATH") and self._parse_string() 6778 nested_schema = nested and self._parse_json_schema() 6779 6780 return self.expression( 6781 exp.JSONColumnDef, 6782 this=this, 6783 kind=kind, 6784 path=path, 6785 nested_schema=nested_schema, 6786 ) 6787 6788 def _parse_json_schema(self) -> exp.JSONSchema: 6789 self._match_text_seq("COLUMNS") 6790 return self.expression( 6791 exp.JSONSchema, 6792 expressions=self._parse_wrapped_csv(self._parse_json_column_def, optional=True), 6793 ) 6794 6795 def _parse_json_table(self) -> exp.JSONTable: 6796 this = self._parse_format_json(self._parse_bitwise()) 6797 path = self._match(TokenType.COMMA) and self._parse_string() 6798 error_handling = self._parse_on_handling("ERROR", "ERROR", "NULL") 6799 empty_handling = self._parse_on_handling("EMPTY", "ERROR", "NULL") 6800 schema = self._parse_json_schema() 6801 6802 return exp.JSONTable( 6803 this=this, 6804 schema=schema, 6805 path=path, 6806 error_handling=error_handling, 6807 empty_handling=empty_handling, 6808 ) 6809 6810 def _parse_match_against(self) -> exp.MatchAgainst: 6811 expressions = self._parse_csv(self._parse_column) 6812 6813 self._match_text_seq(")", "AGAINST", "(") 6814 6815 this = self._parse_string() 6816 6817 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 6818 modifier = "IN NATURAL LANGUAGE MODE" 6819 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 6820 modifier = f"{modifier} WITH QUERY EXPANSION" 6821 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 6822 modifier = "IN BOOLEAN MODE" 6823 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 6824 modifier = "WITH QUERY EXPANSION" 6825 else: 6826 modifier = None 6827 6828 return self.expression( 6829 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 6830 ) 6831 6832 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 6833 def _parse_open_json(self) -> exp.OpenJSON: 6834 this = self._parse_bitwise() 6835 path = self._match(TokenType.COMMA) and self._parse_string() 6836 6837 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 6838 this = self._parse_field(any_token=True) 6839 kind = self._parse_types() 6840 path = self._parse_string() 6841 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 6842 6843 return self.expression( 6844 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 6845 ) 6846 6847 expressions = None 6848 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 6849 self._match_l_paren() 6850 expressions = self._parse_csv(_parse_open_json_column_def) 6851 6852 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 6853 6854 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 6855 args = self._parse_csv(self._parse_bitwise) 6856 6857 if self._match(TokenType.IN): 6858 return self.expression( 6859 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 6860 ) 6861 6862 if haystack_first: 6863 haystack = seq_get(args, 0) 6864 needle = seq_get(args, 1) 6865 else: 6866 haystack = seq_get(args, 1) 6867 needle = seq_get(args, 0) 6868 6869 return self.expression( 6870 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 6871 ) 6872 6873 def _parse_predict(self) -> exp.Predict: 6874 self._match_text_seq("MODEL") 6875 this = self._parse_table() 6876 6877 self._match(TokenType.COMMA) 6878 self._match_text_seq("TABLE") 6879 6880 return self.expression( 6881 exp.Predict, 6882 this=this, 6883 expression=self._parse_table(), 6884 params_struct=self._match(TokenType.COMMA) and self._parse_bitwise(), 6885 ) 6886 6887 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 6888 args = self._parse_csv(self._parse_table) 6889 return exp.JoinHint(this=func_name.upper(), expressions=args) 6890 6891 def _parse_substring(self) -> exp.Substring: 6892 # Postgres supports the form: substring(string [from int] [for int]) 6893 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 6894 6895 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 6896 6897 if self._match(TokenType.FROM): 6898 args.append(self._parse_bitwise()) 6899 if self._match(TokenType.FOR): 6900 if len(args) == 1: 6901 args.append(exp.Literal.number(1)) 6902 args.append(self._parse_bitwise()) 6903 6904 return self.validate_expression(exp.Substring.from_arg_list(args), args) 6905 6906 def _parse_trim(self) -> exp.Trim: 6907 # https://www.w3resource.com/sql/character-functions/trim.php 6908 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 6909 6910 position = None 6911 collation = None 6912 expression = None 6913 6914 if self._match_texts(self.TRIM_TYPES): 6915 position = self._prev.text.upper() 6916 6917 this = self._parse_bitwise() 6918 if self._match_set((TokenType.FROM, TokenType.COMMA)): 6919 invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST 6920 expression = self._parse_bitwise() 6921 6922 if invert_order: 6923 this, expression = expression, this 6924 6925 if self._match(TokenType.COLLATE): 6926 collation = self._parse_bitwise() 6927 6928 return self.expression( 6929 exp.Trim, this=this, position=position, expression=expression, collation=collation 6930 ) 6931 6932 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 6933 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 6934 6935 def _parse_named_window(self) -> t.Optional[exp.Expression]: 6936 return self._parse_window(self._parse_id_var(), alias=True) 6937 6938 def _parse_respect_or_ignore_nulls( 6939 self, this: t.Optional[exp.Expression] 6940 ) -> t.Optional[exp.Expression]: 6941 if self._match_text_seq("IGNORE", "NULLS"): 6942 return self.expression(exp.IgnoreNulls, this=this) 6943 if self._match_text_seq("RESPECT", "NULLS"): 6944 return self.expression(exp.RespectNulls, this=this) 6945 return this 6946 6947 def _parse_having_max(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6948 if self._match(TokenType.HAVING): 6949 self._match_texts(("MAX", "MIN")) 6950 max = self._prev.text.upper() != "MIN" 6951 return self.expression( 6952 exp.HavingMax, this=this, expression=self._parse_column(), max=max 6953 ) 6954 6955 return this 6956 6957 def _parse_window( 6958 self, this: t.Optional[exp.Expression], alias: bool = False 6959 ) -> t.Optional[exp.Expression]: 6960 func = this 6961 comments = func.comments if isinstance(func, exp.Expression) else None 6962 6963 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 6964 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 6965 if self._match_text_seq("WITHIN", "GROUP"): 6966 order = self._parse_wrapped(self._parse_order) 6967 this = self.expression(exp.WithinGroup, this=this, expression=order) 6968 6969 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 6970 self._match(TokenType.WHERE) 6971 this = self.expression( 6972 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 6973 ) 6974 self._match_r_paren() 6975 6976 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 6977 # Some dialects choose to implement and some do not. 6978 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 6979 6980 # There is some code above in _parse_lambda that handles 6981 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 6982 6983 # The below changes handle 6984 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 6985 6986 # Oracle allows both formats 6987 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 6988 # and Snowflake chose to do the same for familiarity 6989 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 6990 if isinstance(this, exp.AggFunc): 6991 ignore_respect = this.find(exp.IgnoreNulls, exp.RespectNulls) 6992 6993 if ignore_respect and ignore_respect is not this: 6994 ignore_respect.replace(ignore_respect.this) 6995 this = self.expression(ignore_respect.__class__, this=this) 6996 6997 this = self._parse_respect_or_ignore_nulls(this) 6998 6999 # bigquery select from window x AS (partition by ...) 7000 if alias: 7001 over = None 7002 self._match(TokenType.ALIAS) 7003 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 7004 return this 7005 else: 7006 over = self._prev.text.upper() 7007 7008 if comments and isinstance(func, exp.Expression): 7009 func.pop_comments() 7010 7011 if not self._match(TokenType.L_PAREN): 7012 return self.expression( 7013 exp.Window, 7014 comments=comments, 7015 this=this, 7016 alias=self._parse_id_var(False), 7017 over=over, 7018 ) 7019 7020 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 7021 7022 first = self._match(TokenType.FIRST) 7023 if self._match_text_seq("LAST"): 7024 first = False 7025 7026 partition, order = self._parse_partition_and_order() 7027 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 7028 7029 if kind: 7030 self._match(TokenType.BETWEEN) 7031 start = self._parse_window_spec() 7032 self._match(TokenType.AND) 7033 end = self._parse_window_spec() 7034 exclude = ( 7035 self._parse_var_from_options(self.WINDOW_EXCLUDE_OPTIONS) 7036 if self._match_text_seq("EXCLUDE") 7037 else None 7038 ) 7039 7040 spec = self.expression( 7041 exp.WindowSpec, 7042 kind=kind, 7043 start=start["value"], 7044 start_side=start["side"], 7045 end=end["value"], 7046 end_side=end["side"], 7047 exclude=exclude, 7048 ) 7049 else: 7050 spec = None 7051 7052 self._match_r_paren() 7053 7054 window = self.expression( 7055 exp.Window, 7056 comments=comments, 7057 this=this, 7058 partition_by=partition, 7059 order=order, 7060 spec=spec, 7061 alias=window_alias, 7062 over=over, 7063 first=first, 7064 ) 7065 7066 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 7067 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 7068 return self._parse_window(window, alias=alias) 7069 7070 return window 7071 7072 def _parse_partition_and_order( 7073 self, 7074 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 7075 return self._parse_partition_by(), self._parse_order() 7076 7077 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 7078 self._match(TokenType.BETWEEN) 7079 7080 return { 7081 "value": ( 7082 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 7083 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 7084 or self._parse_bitwise() 7085 ), 7086 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 7087 } 7088 7089 def _parse_alias( 7090 self, this: t.Optional[exp.Expression], explicit: bool = False 7091 ) -> t.Optional[exp.Expression]: 7092 # In some dialects, LIMIT and OFFSET can act as both identifiers and keywords (clauses) 7093 # so this section tries to parse the clause version and if it fails, it treats the token 7094 # as an identifier (alias) 7095 if self._can_parse_limit_or_offset(): 7096 return this 7097 7098 any_token = self._match(TokenType.ALIAS) 7099 comments = self._prev_comments or [] 7100 7101 if explicit and not any_token: 7102 return this 7103 7104 if self._match(TokenType.L_PAREN): 7105 aliases = self.expression( 7106 exp.Aliases, 7107 comments=comments, 7108 this=this, 7109 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 7110 ) 7111 self._match_r_paren(aliases) 7112 return aliases 7113 7114 alias = self._parse_id_var(any_token, tokens=self.ALIAS_TOKENS) or ( 7115 self.STRING_ALIASES and self._parse_string_as_identifier() 7116 ) 7117 7118 if alias: 7119 comments.extend(alias.pop_comments()) 7120 this = self.expression(exp.Alias, comments=comments, this=this, alias=alias) 7121 column = this.this 7122 7123 # Moves the comment next to the alias in `expr /* comment */ AS alias` 7124 if not this.comments and column and column.comments: 7125 this.comments = column.pop_comments() 7126 7127 return this 7128 7129 def _parse_id_var( 7130 self, 7131 any_token: bool = True, 7132 tokens: t.Optional[t.Collection[TokenType]] = None, 7133 ) -> t.Optional[exp.Expression]: 7134 expression = self._parse_identifier() 7135 if not expression and ( 7136 (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS) 7137 ): 7138 quoted = self._prev.token_type == TokenType.STRING 7139 expression = self._identifier_expression(quoted=quoted) 7140 7141 return expression 7142 7143 def _parse_string(self) -> t.Optional[exp.Expression]: 7144 if self._match_set(self.STRING_PARSERS): 7145 return self.STRING_PARSERS[self._prev.token_type](self, self._prev) 7146 return self._parse_placeholder() 7147 7148 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 7149 output = exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 7150 if output: 7151 output.update_positions(self._prev) 7152 return output 7153 7154 def _parse_number(self) -> t.Optional[exp.Expression]: 7155 if self._match_set(self.NUMERIC_PARSERS): 7156 return self.NUMERIC_PARSERS[self._prev.token_type](self, self._prev) 7157 return self._parse_placeholder() 7158 7159 def _parse_identifier(self) -> t.Optional[exp.Expression]: 7160 if self._match(TokenType.IDENTIFIER): 7161 return self._identifier_expression(quoted=True) 7162 return self._parse_placeholder() 7163 7164 def _parse_var( 7165 self, 7166 any_token: bool = False, 7167 tokens: t.Optional[t.Collection[TokenType]] = None, 7168 upper: bool = False, 7169 ) -> t.Optional[exp.Expression]: 7170 if ( 7171 (any_token and self._advance_any()) 7172 or self._match(TokenType.VAR) 7173 or (self._match_set(tokens) if tokens else False) 7174 ): 7175 return self.expression( 7176 exp.Var, this=self._prev.text.upper() if upper else self._prev.text 7177 ) 7178 return self._parse_placeholder() 7179 7180 def _advance_any(self, ignore_reserved: bool = False) -> t.Optional[Token]: 7181 if self._curr and (ignore_reserved or self._curr.token_type not in self.RESERVED_TOKENS): 7182 self._advance() 7183 return self._prev 7184 return None 7185 7186 def _parse_var_or_string(self, upper: bool = False) -> t.Optional[exp.Expression]: 7187 return self._parse_string() or self._parse_var(any_token=True, upper=upper) 7188 7189 def _parse_primary_or_var(self) -> t.Optional[exp.Expression]: 7190 return self._parse_primary() or self._parse_var(any_token=True) 7191 7192 def _parse_null(self) -> t.Optional[exp.Expression]: 7193 if self._match_set(self.NULL_TOKENS): 7194 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 7195 return self._parse_placeholder() 7196 7197 def _parse_boolean(self) -> t.Optional[exp.Expression]: 7198 if self._match(TokenType.TRUE): 7199 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 7200 if self._match(TokenType.FALSE): 7201 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 7202 return self._parse_placeholder() 7203 7204 def _parse_star(self) -> t.Optional[exp.Expression]: 7205 if self._match(TokenType.STAR): 7206 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 7207 return self._parse_placeholder() 7208 7209 def _parse_parameter(self) -> exp.Parameter: 7210 this = self._parse_identifier() or self._parse_primary_or_var() 7211 return self.expression(exp.Parameter, this=this) 7212 7213 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 7214 if self._match_set(self.PLACEHOLDER_PARSERS): 7215 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 7216 if placeholder: 7217 return placeholder 7218 self._advance(-1) 7219 return None 7220 7221 def _parse_star_op(self, *keywords: str) -> t.Optional[t.List[exp.Expression]]: 7222 if not self._match_texts(keywords): 7223 return None 7224 if self._match(TokenType.L_PAREN, advance=False): 7225 return self._parse_wrapped_csv(self._parse_expression) 7226 7227 expression = self._parse_expression() 7228 return [expression] if expression else None 7229 7230 def _parse_csv( 7231 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 7232 ) -> t.List[exp.Expression]: 7233 parse_result = parse_method() 7234 items = [parse_result] if parse_result is not None else [] 7235 7236 while self._match(sep): 7237 self._add_comments(parse_result) 7238 parse_result = parse_method() 7239 if parse_result is not None: 7240 items.append(parse_result) 7241 7242 return items 7243 7244 def _parse_tokens( 7245 self, parse_method: t.Callable, expressions: t.Dict 7246 ) -> t.Optional[exp.Expression]: 7247 this = parse_method() 7248 7249 while self._match_set(expressions): 7250 this = self.expression( 7251 expressions[self._prev.token_type], 7252 this=this, 7253 comments=self._prev_comments, 7254 expression=parse_method(), 7255 ) 7256 7257 return this 7258 7259 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 7260 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 7261 7262 def _parse_wrapped_csv( 7263 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 7264 ) -> t.List[exp.Expression]: 7265 return self._parse_wrapped( 7266 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 7267 ) 7268 7269 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 7270 wrapped = self._match(TokenType.L_PAREN) 7271 if not wrapped and not optional: 7272 self.raise_error("Expecting (") 7273 parse_result = parse_method() 7274 if wrapped: 7275 self._match_r_paren() 7276 return parse_result 7277 7278 def _parse_expressions(self) -> t.List[exp.Expression]: 7279 return self._parse_csv(self._parse_expression) 7280 7281 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 7282 return self._parse_select() or self._parse_set_operations( 7283 self._parse_alias(self._parse_assignment(), explicit=True) 7284 if alias 7285 else self._parse_assignment() 7286 ) 7287 7288 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 7289 return self._parse_query_modifiers( 7290 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 7291 ) 7292 7293 def _parse_transaction(self) -> exp.Transaction | exp.Command: 7294 this = None 7295 if self._match_texts(self.TRANSACTION_KIND): 7296 this = self._prev.text 7297 7298 self._match_texts(("TRANSACTION", "WORK")) 7299 7300 modes = [] 7301 while True: 7302 mode = [] 7303 while self._match(TokenType.VAR): 7304 mode.append(self._prev.text) 7305 7306 if mode: 7307 modes.append(" ".join(mode)) 7308 if not self._match(TokenType.COMMA): 7309 break 7310 7311 return self.expression(exp.Transaction, this=this, modes=modes) 7312 7313 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 7314 chain = None 7315 savepoint = None 7316 is_rollback = self._prev.token_type == TokenType.ROLLBACK 7317 7318 self._match_texts(("TRANSACTION", "WORK")) 7319 7320 if self._match_text_seq("TO"): 7321 self._match_text_seq("SAVEPOINT") 7322 savepoint = self._parse_id_var() 7323 7324 if self._match(TokenType.AND): 7325 chain = not self._match_text_seq("NO") 7326 self._match_text_seq("CHAIN") 7327 7328 if is_rollback: 7329 return self.expression(exp.Rollback, savepoint=savepoint) 7330 7331 return self.expression(exp.Commit, chain=chain) 7332 7333 def _parse_refresh(self) -> exp.Refresh: 7334 self._match(TokenType.TABLE) 7335 return self.expression(exp.Refresh, this=self._parse_string() or self._parse_table()) 7336 7337 def _parse_column_def_with_exists(self): 7338 start = self._index 7339 self._match(TokenType.COLUMN) 7340 7341 exists_column = self._parse_exists(not_=True) 7342 expression = self._parse_field_def() 7343 7344 if not isinstance(expression, exp.ColumnDef): 7345 self._retreat(start) 7346 return None 7347 7348 expression.set("exists", exists_column) 7349 7350 return expression 7351 7352 def _parse_add_column(self) -> t.Optional[exp.ColumnDef]: 7353 if not self._prev.text.upper() == "ADD": 7354 return None 7355 7356 expression = self._parse_column_def_with_exists() 7357 if not expression: 7358 return None 7359 7360 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 7361 if self._match_texts(("FIRST", "AFTER")): 7362 position = self._prev.text 7363 column_position = self.expression( 7364 exp.ColumnPosition, this=self._parse_column(), position=position 7365 ) 7366 expression.set("position", column_position) 7367 7368 return expression 7369 7370 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 7371 drop = self._match(TokenType.DROP) and self._parse_drop() 7372 if drop and not isinstance(drop, exp.Command): 7373 drop.set("kind", drop.args.get("kind", "COLUMN")) 7374 return drop 7375 7376 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 7377 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 7378 return self.expression( 7379 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 7380 ) 7381 7382 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 7383 def _parse_add_alteration() -> t.Optional[exp.Expression]: 7384 self._match_text_seq("ADD") 7385 if self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False): 7386 return self.expression( 7387 exp.AddConstraint, expressions=self._parse_csv(self._parse_constraint) 7388 ) 7389 7390 column_def = self._parse_add_column() 7391 if isinstance(column_def, exp.ColumnDef): 7392 return column_def 7393 7394 exists = self._parse_exists(not_=True) 7395 if self._match_pair(TokenType.PARTITION, TokenType.L_PAREN, advance=False): 7396 return self.expression( 7397 exp.AddPartition, 7398 exists=exists, 7399 this=self._parse_field(any_token=True), 7400 location=self._match_text_seq("LOCATION", advance=False) 7401 and self._parse_property(), 7402 ) 7403 7404 return None 7405 7406 if not self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False) and ( 7407 not self.dialect.ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN 7408 or self._match_text_seq("COLUMNS") 7409 ): 7410 schema = self._parse_schema() 7411 7412 return ( 7413 ensure_list(schema) 7414 if schema 7415 else self._parse_csv(self._parse_column_def_with_exists) 7416 ) 7417 7418 return self._parse_csv(_parse_add_alteration) 7419 7420 def _parse_alter_table_alter(self) -> t.Optional[exp.Expression]: 7421 if self._match_texts(self.ALTER_ALTER_PARSERS): 7422 return self.ALTER_ALTER_PARSERS[self._prev.text.upper()](self) 7423 7424 # Many dialects support the ALTER [COLUMN] syntax, so if there is no 7425 # keyword after ALTER we default to parsing this statement 7426 self._match(TokenType.COLUMN) 7427 column = self._parse_field(any_token=True) 7428 7429 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 7430 return self.expression(exp.AlterColumn, this=column, drop=True) 7431 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 7432 return self.expression(exp.AlterColumn, this=column, default=self._parse_assignment()) 7433 if self._match(TokenType.COMMENT): 7434 return self.expression(exp.AlterColumn, this=column, comment=self._parse_string()) 7435 if self._match_text_seq("DROP", "NOT", "NULL"): 7436 return self.expression( 7437 exp.AlterColumn, 7438 this=column, 7439 drop=True, 7440 allow_null=True, 7441 ) 7442 if self._match_text_seq("SET", "NOT", "NULL"): 7443 return self.expression( 7444 exp.AlterColumn, 7445 this=column, 7446 allow_null=False, 7447 ) 7448 7449 if self._match_text_seq("SET", "VISIBLE"): 7450 return self.expression(exp.AlterColumn, this=column, visible="VISIBLE") 7451 if self._match_text_seq("SET", "INVISIBLE"): 7452 return self.expression(exp.AlterColumn, this=column, visible="INVISIBLE") 7453 7454 self._match_text_seq("SET", "DATA") 7455 self._match_text_seq("TYPE") 7456 return self.expression( 7457 exp.AlterColumn, 7458 this=column, 7459 dtype=self._parse_types(), 7460 collate=self._match(TokenType.COLLATE) and self._parse_term(), 7461 using=self._match(TokenType.USING) and self._parse_assignment(), 7462 ) 7463 7464 def _parse_alter_diststyle(self) -> exp.AlterDistStyle: 7465 if self._match_texts(("ALL", "EVEN", "AUTO")): 7466 return self.expression(exp.AlterDistStyle, this=exp.var(self._prev.text.upper())) 7467 7468 self._match_text_seq("KEY", "DISTKEY") 7469 return self.expression(exp.AlterDistStyle, this=self._parse_column()) 7470 7471 def _parse_alter_sortkey(self, compound: t.Optional[bool] = None) -> exp.AlterSortKey: 7472 if compound: 7473 self._match_text_seq("SORTKEY") 7474 7475 if self._match(TokenType.L_PAREN, advance=False): 7476 return self.expression( 7477 exp.AlterSortKey, expressions=self._parse_wrapped_id_vars(), compound=compound 7478 ) 7479 7480 self._match_texts(("AUTO", "NONE")) 7481 return self.expression( 7482 exp.AlterSortKey, this=exp.var(self._prev.text.upper()), compound=compound 7483 ) 7484 7485 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 7486 index = self._index - 1 7487 7488 partition_exists = self._parse_exists() 7489 if self._match(TokenType.PARTITION, advance=False): 7490 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 7491 7492 self._retreat(index) 7493 return self._parse_csv(self._parse_drop_column) 7494 7495 def _parse_alter_table_rename(self) -> t.Optional[exp.AlterRename | exp.RenameColumn]: 7496 if self._match(TokenType.COLUMN) or not self.ALTER_RENAME_REQUIRES_COLUMN: 7497 exists = self._parse_exists() 7498 old_column = self._parse_column() 7499 to = self._match_text_seq("TO") 7500 new_column = self._parse_column() 7501 7502 if old_column is None or to is None or new_column is None: 7503 return None 7504 7505 return self.expression(exp.RenameColumn, this=old_column, to=new_column, exists=exists) 7506 7507 self._match_text_seq("TO") 7508 return self.expression(exp.AlterRename, this=self._parse_table(schema=True)) 7509 7510 def _parse_alter_table_set(self) -> exp.AlterSet: 7511 alter_set = self.expression(exp.AlterSet) 7512 7513 if self._match(TokenType.L_PAREN, advance=False) or self._match_text_seq( 7514 "TABLE", "PROPERTIES" 7515 ): 7516 alter_set.set("expressions", self._parse_wrapped_csv(self._parse_assignment)) 7517 elif self._match_text_seq("FILESTREAM_ON", advance=False): 7518 alter_set.set("expressions", [self._parse_assignment()]) 7519 elif self._match_texts(("LOGGED", "UNLOGGED")): 7520 alter_set.set("option", exp.var(self._prev.text.upper())) 7521 elif self._match_text_seq("WITHOUT") and self._match_texts(("CLUSTER", "OIDS")): 7522 alter_set.set("option", exp.var(f"WITHOUT {self._prev.text.upper()}")) 7523 elif self._match_text_seq("LOCATION"): 7524 alter_set.set("location", self._parse_field()) 7525 elif self._match_text_seq("ACCESS", "METHOD"): 7526 alter_set.set("access_method", self._parse_field()) 7527 elif self._match_text_seq("TABLESPACE"): 7528 alter_set.set("tablespace", self._parse_field()) 7529 elif self._match_text_seq("FILE", "FORMAT") or self._match_text_seq("FILEFORMAT"): 7530 alter_set.set("file_format", [self._parse_field()]) 7531 elif self._match_text_seq("STAGE_FILE_FORMAT"): 7532 alter_set.set("file_format", self._parse_wrapped_options()) 7533 elif self._match_text_seq("STAGE_COPY_OPTIONS"): 7534 alter_set.set("copy_options", self._parse_wrapped_options()) 7535 elif self._match_text_seq("TAG") or self._match_text_seq("TAGS"): 7536 alter_set.set("tag", self._parse_csv(self._parse_assignment)) 7537 else: 7538 if self._match_text_seq("SERDE"): 7539 alter_set.set("serde", self._parse_field()) 7540 7541 properties = self._parse_wrapped(self._parse_properties, optional=True) 7542 alter_set.set("expressions", [properties]) 7543 7544 return alter_set 7545 7546 def _parse_alter(self) -> exp.Alter | exp.Command: 7547 start = self._prev 7548 7549 alter_token = self._match_set(self.ALTERABLES) and self._prev 7550 if not alter_token: 7551 return self._parse_as_command(start) 7552 7553 exists = self._parse_exists() 7554 only = self._match_text_seq("ONLY") 7555 this = self._parse_table(schema=True) 7556 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 7557 7558 if self._next: 7559 self._advance() 7560 7561 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 7562 if parser: 7563 actions = ensure_list(parser(self)) 7564 not_valid = self._match_text_seq("NOT", "VALID") 7565 options = self._parse_csv(self._parse_property) 7566 7567 if not self._curr and actions: 7568 return self.expression( 7569 exp.Alter, 7570 this=this, 7571 kind=alter_token.text.upper(), 7572 exists=exists, 7573 actions=actions, 7574 only=only, 7575 options=options, 7576 cluster=cluster, 7577 not_valid=not_valid, 7578 ) 7579 7580 return self._parse_as_command(start) 7581 7582 def _parse_analyze(self) -> exp.Analyze | exp.Command: 7583 start = self._prev 7584 # https://duckdb.org/docs/sql/statements/analyze 7585 if not self._curr: 7586 return self.expression(exp.Analyze) 7587 7588 options = [] 7589 while self._match_texts(self.ANALYZE_STYLES): 7590 if self._prev.text.upper() == "BUFFER_USAGE_LIMIT": 7591 options.append(f"BUFFER_USAGE_LIMIT {self._parse_number()}") 7592 else: 7593 options.append(self._prev.text.upper()) 7594 7595 this: t.Optional[exp.Expression] = None 7596 inner_expression: t.Optional[exp.Expression] = None 7597 7598 kind = self._curr and self._curr.text.upper() 7599 7600 if self._match(TokenType.TABLE) or self._match(TokenType.INDEX): 7601 this = self._parse_table_parts() 7602 elif self._match_text_seq("TABLES"): 7603 if self._match_set((TokenType.FROM, TokenType.IN)): 7604 kind = f"{kind} {self._prev.text.upper()}" 7605 this = self._parse_table(schema=True, is_db_reference=True) 7606 elif self._match_text_seq("DATABASE"): 7607 this = self._parse_table(schema=True, is_db_reference=True) 7608 elif self._match_text_seq("CLUSTER"): 7609 this = self._parse_table() 7610 # Try matching inner expr keywords before fallback to parse table. 7611 elif self._match_texts(self.ANALYZE_EXPRESSION_PARSERS): 7612 kind = None 7613 inner_expression = self.ANALYZE_EXPRESSION_PARSERS[self._prev.text.upper()](self) 7614 else: 7615 # Empty kind https://prestodb.io/docs/current/sql/analyze.html 7616 kind = None 7617 this = self._parse_table_parts() 7618 7619 partition = self._try_parse(self._parse_partition) 7620 if not partition and self._match_texts(self.PARTITION_KEYWORDS): 7621 return self._parse_as_command(start) 7622 7623 # https://docs.starrocks.io/docs/sql-reference/sql-statements/cbo_stats/ANALYZE_TABLE/ 7624 if self._match_text_seq("WITH", "SYNC", "MODE") or self._match_text_seq( 7625 "WITH", "ASYNC", "MODE" 7626 ): 7627 mode = f"WITH {self._tokens[self._index - 2].text.upper()} MODE" 7628 else: 7629 mode = None 7630 7631 if self._match_texts(self.ANALYZE_EXPRESSION_PARSERS): 7632 inner_expression = self.ANALYZE_EXPRESSION_PARSERS[self._prev.text.upper()](self) 7633 7634 properties = self._parse_properties() 7635 return self.expression( 7636 exp.Analyze, 7637 kind=kind, 7638 this=this, 7639 mode=mode, 7640 partition=partition, 7641 properties=properties, 7642 expression=inner_expression, 7643 options=options, 7644 ) 7645 7646 # https://spark.apache.org/docs/3.5.1/sql-ref-syntax-aux-analyze-table.html 7647 def _parse_analyze_statistics(self) -> exp.AnalyzeStatistics: 7648 this = None 7649 kind = self._prev.text.upper() 7650 option = self._prev.text.upper() if self._match_text_seq("DELTA") else None 7651 expressions = [] 7652 7653 if not self._match_text_seq("STATISTICS"): 7654 self.raise_error("Expecting token STATISTICS") 7655 7656 if self._match_text_seq("NOSCAN"): 7657 this = "NOSCAN" 7658 elif self._match(TokenType.FOR): 7659 if self._match_text_seq("ALL", "COLUMNS"): 7660 this = "FOR ALL COLUMNS" 7661 if self._match_texts("COLUMNS"): 7662 this = "FOR COLUMNS" 7663 expressions = self._parse_csv(self._parse_column_reference) 7664 elif self._match_text_seq("SAMPLE"): 7665 sample = self._parse_number() 7666 expressions = [ 7667 self.expression( 7668 exp.AnalyzeSample, 7669 sample=sample, 7670 kind=self._prev.text.upper() if self._match(TokenType.PERCENT) else None, 7671 ) 7672 ] 7673 7674 return self.expression( 7675 exp.AnalyzeStatistics, kind=kind, option=option, this=this, expressions=expressions 7676 ) 7677 7678 # https://docs.oracle.com/en/database/oracle/oracle-database/21/sqlrf/ANALYZE.html 7679 def _parse_analyze_validate(self) -> exp.AnalyzeValidate: 7680 kind = None 7681 this = None 7682 expression: t.Optional[exp.Expression] = None 7683 if self._match_text_seq("REF", "UPDATE"): 7684 kind = "REF" 7685 this = "UPDATE" 7686 if self._match_text_seq("SET", "DANGLING", "TO", "NULL"): 7687 this = "UPDATE SET DANGLING TO NULL" 7688 elif self._match_text_seq("STRUCTURE"): 7689 kind = "STRUCTURE" 7690 if self._match_text_seq("CASCADE", "FAST"): 7691 this = "CASCADE FAST" 7692 elif self._match_text_seq("CASCADE", "COMPLETE") and self._match_texts( 7693 ("ONLINE", "OFFLINE") 7694 ): 7695 this = f"CASCADE COMPLETE {self._prev.text.upper()}" 7696 expression = self._parse_into() 7697 7698 return self.expression(exp.AnalyzeValidate, kind=kind, this=this, expression=expression) 7699 7700 def _parse_analyze_columns(self) -> t.Optional[exp.AnalyzeColumns]: 7701 this = self._prev.text.upper() 7702 if self._match_text_seq("COLUMNS"): 7703 return self.expression(exp.AnalyzeColumns, this=f"{this} {self._prev.text.upper()}") 7704 return None 7705 7706 def _parse_analyze_delete(self) -> t.Optional[exp.AnalyzeDelete]: 7707 kind = self._prev.text.upper() if self._match_text_seq("SYSTEM") else None 7708 if self._match_text_seq("STATISTICS"): 7709 return self.expression(exp.AnalyzeDelete, kind=kind) 7710 return None 7711 7712 def _parse_analyze_list(self) -> t.Optional[exp.AnalyzeListChainedRows]: 7713 if self._match_text_seq("CHAINED", "ROWS"): 7714 return self.expression(exp.AnalyzeListChainedRows, expression=self._parse_into()) 7715 return None 7716 7717 # https://dev.mysql.com/doc/refman/8.4/en/analyze-table.html 7718 def _parse_analyze_histogram(self) -> exp.AnalyzeHistogram: 7719 this = self._prev.text.upper() 7720 expression: t.Optional[exp.Expression] = None 7721 expressions = [] 7722 update_options = None 7723 7724 if self._match_text_seq("HISTOGRAM", "ON"): 7725 expressions = self._parse_csv(self._parse_column_reference) 7726 with_expressions = [] 7727 while self._match(TokenType.WITH): 7728 # https://docs.starrocks.io/docs/sql-reference/sql-statements/cbo_stats/ANALYZE_TABLE/ 7729 if self._match_texts(("SYNC", "ASYNC")): 7730 if self._match_text_seq("MODE", advance=False): 7731 with_expressions.append(f"{self._prev.text.upper()} MODE") 7732 self._advance() 7733 else: 7734 buckets = self._parse_number() 7735 if self._match_text_seq("BUCKETS"): 7736 with_expressions.append(f"{buckets} BUCKETS") 7737 if with_expressions: 7738 expression = self.expression(exp.AnalyzeWith, expressions=with_expressions) 7739 7740 if self._match_texts(("MANUAL", "AUTO")) and self._match( 7741 TokenType.UPDATE, advance=False 7742 ): 7743 update_options = self._prev.text.upper() 7744 self._advance() 7745 elif self._match_text_seq("USING", "DATA"): 7746 expression = self.expression(exp.UsingData, this=self._parse_string()) 7747 7748 return self.expression( 7749 exp.AnalyzeHistogram, 7750 this=this, 7751 expressions=expressions, 7752 expression=expression, 7753 update_options=update_options, 7754 ) 7755 7756 def _parse_merge(self) -> exp.Merge: 7757 self._match(TokenType.INTO) 7758 target = self._parse_table() 7759 7760 if target and self._match(TokenType.ALIAS, advance=False): 7761 target.set("alias", self._parse_table_alias()) 7762 7763 self._match(TokenType.USING) 7764 using = self._parse_table() 7765 7766 self._match(TokenType.ON) 7767 on = self._parse_assignment() 7768 7769 return self.expression( 7770 exp.Merge, 7771 this=target, 7772 using=using, 7773 on=on, 7774 whens=self._parse_when_matched(), 7775 returning=self._parse_returning(), 7776 ) 7777 7778 def _parse_when_matched(self) -> exp.Whens: 7779 whens = [] 7780 7781 while self._match(TokenType.WHEN): 7782 matched = not self._match(TokenType.NOT) 7783 self._match_text_seq("MATCHED") 7784 source = ( 7785 False 7786 if self._match_text_seq("BY", "TARGET") 7787 else self._match_text_seq("BY", "SOURCE") 7788 ) 7789 condition = self._parse_assignment() if self._match(TokenType.AND) else None 7790 7791 self._match(TokenType.THEN) 7792 7793 if self._match(TokenType.INSERT): 7794 this = self._parse_star() 7795 if this: 7796 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=this) 7797 else: 7798 then = self.expression( 7799 exp.Insert, 7800 this=exp.var("ROW") 7801 if self._match_text_seq("ROW") 7802 else self._parse_value(values=False), 7803 expression=self._match_text_seq("VALUES") and self._parse_value(), 7804 ) 7805 elif self._match(TokenType.UPDATE): 7806 expressions = self._parse_star() 7807 if expressions: 7808 then = self.expression(exp.Update, expressions=expressions) 7809 else: 7810 then = self.expression( 7811 exp.Update, 7812 expressions=self._match(TokenType.SET) 7813 and self._parse_csv(self._parse_equality), 7814 ) 7815 elif self._match(TokenType.DELETE): 7816 then = self.expression(exp.Var, this=self._prev.text) 7817 else: 7818 then = self._parse_var_from_options(self.CONFLICT_ACTIONS) 7819 7820 whens.append( 7821 self.expression( 7822 exp.When, 7823 matched=matched, 7824 source=source, 7825 condition=condition, 7826 then=then, 7827 ) 7828 ) 7829 return self.expression(exp.Whens, expressions=whens) 7830 7831 def _parse_show(self) -> t.Optional[exp.Expression]: 7832 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 7833 if parser: 7834 return parser(self) 7835 return self._parse_as_command(self._prev) 7836 7837 def _parse_set_item_assignment( 7838 self, kind: t.Optional[str] = None 7839 ) -> t.Optional[exp.Expression]: 7840 index = self._index 7841 7842 if kind in ("GLOBAL", "SESSION") and self._match_text_seq("TRANSACTION"): 7843 return self._parse_set_transaction(global_=kind == "GLOBAL") 7844 7845 left = self._parse_primary() or self._parse_column() 7846 assignment_delimiter = self._match_texts(("=", "TO")) 7847 7848 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 7849 self._retreat(index) 7850 return None 7851 7852 right = self._parse_statement() or self._parse_id_var() 7853 if isinstance(right, (exp.Column, exp.Identifier)): 7854 right = exp.var(right.name) 7855 7856 this = self.expression(exp.EQ, this=left, expression=right) 7857 return self.expression(exp.SetItem, this=this, kind=kind) 7858 7859 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 7860 self._match_text_seq("TRANSACTION") 7861 characteristics = self._parse_csv( 7862 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 7863 ) 7864 return self.expression( 7865 exp.SetItem, 7866 expressions=characteristics, 7867 kind="TRANSACTION", 7868 **{"global": global_}, # type: ignore 7869 ) 7870 7871 def _parse_set_item(self) -> t.Optional[exp.Expression]: 7872 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 7873 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 7874 7875 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 7876 index = self._index 7877 set_ = self.expression( 7878 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 7879 ) 7880 7881 if self._curr: 7882 self._retreat(index) 7883 return self._parse_as_command(self._prev) 7884 7885 return set_ 7886 7887 def _parse_var_from_options( 7888 self, options: OPTIONS_TYPE, raise_unmatched: bool = True 7889 ) -> t.Optional[exp.Var]: 7890 start = self._curr 7891 if not start: 7892 return None 7893 7894 option = start.text.upper() 7895 continuations = options.get(option) 7896 7897 index = self._index 7898 self._advance() 7899 for keywords in continuations or []: 7900 if isinstance(keywords, str): 7901 keywords = (keywords,) 7902 7903 if self._match_text_seq(*keywords): 7904 option = f"{option} {' '.join(keywords)}" 7905 break 7906 else: 7907 if continuations or continuations is None: 7908 if raise_unmatched: 7909 self.raise_error(f"Unknown option {option}") 7910 7911 self._retreat(index) 7912 return None 7913 7914 return exp.var(option) 7915 7916 def _parse_as_command(self, start: Token) -> exp.Command: 7917 while self._curr: 7918 self._advance() 7919 text = self._find_sql(start, self._prev) 7920 size = len(start.text) 7921 self._warn_unsupported() 7922 return exp.Command(this=text[:size], expression=text[size:]) 7923 7924 def _parse_dict_property(self, this: str) -> exp.DictProperty: 7925 settings = [] 7926 7927 self._match_l_paren() 7928 kind = self._parse_id_var() 7929 7930 if self._match(TokenType.L_PAREN): 7931 while True: 7932 key = self._parse_id_var() 7933 value = self._parse_primary() 7934 if not key and value is None: 7935 break 7936 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 7937 self._match(TokenType.R_PAREN) 7938 7939 self._match_r_paren() 7940 7941 return self.expression( 7942 exp.DictProperty, 7943 this=this, 7944 kind=kind.this if kind else None, 7945 settings=settings, 7946 ) 7947 7948 def _parse_dict_range(self, this: str) -> exp.DictRange: 7949 self._match_l_paren() 7950 has_min = self._match_text_seq("MIN") 7951 if has_min: 7952 min = self._parse_var() or self._parse_primary() 7953 self._match_text_seq("MAX") 7954 max = self._parse_var() or self._parse_primary() 7955 else: 7956 max = self._parse_var() or self._parse_primary() 7957 min = exp.Literal.number(0) 7958 self._match_r_paren() 7959 return self.expression(exp.DictRange, this=this, min=min, max=max) 7960 7961 def _parse_comprehension( 7962 self, this: t.Optional[exp.Expression] 7963 ) -> t.Optional[exp.Comprehension]: 7964 index = self._index 7965 expression = self._parse_column() 7966 if not self._match(TokenType.IN): 7967 self._retreat(index - 1) 7968 return None 7969 iterator = self._parse_column() 7970 condition = self._parse_assignment() if self._match_text_seq("IF") else None 7971 return self.expression( 7972 exp.Comprehension, 7973 this=this, 7974 expression=expression, 7975 iterator=iterator, 7976 condition=condition, 7977 ) 7978 7979 def _parse_heredoc(self) -> t.Optional[exp.Heredoc]: 7980 if self._match(TokenType.HEREDOC_STRING): 7981 return self.expression(exp.Heredoc, this=self._prev.text) 7982 7983 if not self._match_text_seq("$"): 7984 return None 7985 7986 tags = ["$"] 7987 tag_text = None 7988 7989 if self._is_connected(): 7990 self._advance() 7991 tags.append(self._prev.text.upper()) 7992 else: 7993 self.raise_error("No closing $ found") 7994 7995 if tags[-1] != "$": 7996 if self._is_connected() and self._match_text_seq("$"): 7997 tag_text = tags[-1] 7998 tags.append("$") 7999 else: 8000 self.raise_error("No closing $ found") 8001 8002 heredoc_start = self._curr 8003 8004 while self._curr: 8005 if self._match_text_seq(*tags, advance=False): 8006 this = self._find_sql(heredoc_start, self._prev) 8007 self._advance(len(tags)) 8008 return self.expression(exp.Heredoc, this=this, tag=tag_text) 8009 8010 self._advance() 8011 8012 self.raise_error(f"No closing {''.join(tags)} found") 8013 return None 8014 8015 def _find_parser( 8016 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 8017 ) -> t.Optional[t.Callable]: 8018 if not self._curr: 8019 return None 8020 8021 index = self._index 8022 this = [] 8023 while True: 8024 # The current token might be multiple words 8025 curr = self._curr.text.upper() 8026 key = curr.split(" ") 8027 this.append(curr) 8028 8029 self._advance() 8030 result, trie = in_trie(trie, key) 8031 if result == TrieResult.FAILED: 8032 break 8033 8034 if result == TrieResult.EXISTS: 8035 subparser = parsers[" ".join(this)] 8036 return subparser 8037 8038 self._retreat(index) 8039 return None 8040 8041 def _match(self, token_type, advance=True, expression=None): 8042 if not self._curr: 8043 return None 8044 8045 if self._curr.token_type == token_type: 8046 if advance: 8047 self._advance() 8048 self._add_comments(expression) 8049 return True 8050 8051 return None 8052 8053 def _match_set(self, types, advance=True): 8054 if not self._curr: 8055 return None 8056 8057 if self._curr.token_type in types: 8058 if advance: 8059 self._advance() 8060 return True 8061 8062 return None 8063 8064 def _match_pair(self, token_type_a, token_type_b, advance=True): 8065 if not self._curr or not self._next: 8066 return None 8067 8068 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 8069 if advance: 8070 self._advance(2) 8071 return True 8072 8073 return None 8074 8075 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 8076 if not self._match(TokenType.L_PAREN, expression=expression): 8077 self.raise_error("Expecting (") 8078 8079 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 8080 if not self._match(TokenType.R_PAREN, expression=expression): 8081 self.raise_error("Expecting )") 8082 8083 def _match_texts(self, texts, advance=True): 8084 if ( 8085 self._curr 8086 and self._curr.token_type != TokenType.STRING 8087 and self._curr.text.upper() in texts 8088 ): 8089 if advance: 8090 self._advance() 8091 return True 8092 return None 8093 8094 def _match_text_seq(self, *texts, advance=True): 8095 index = self._index 8096 for text in texts: 8097 if ( 8098 self._curr 8099 and self._curr.token_type != TokenType.STRING 8100 and self._curr.text.upper() == text 8101 ): 8102 self._advance() 8103 else: 8104 self._retreat(index) 8105 return None 8106 8107 if not advance: 8108 self._retreat(index) 8109 8110 return True 8111 8112 def _replace_lambda( 8113 self, node: t.Optional[exp.Expression], expressions: t.List[exp.Expression] 8114 ) -> t.Optional[exp.Expression]: 8115 if not node: 8116 return node 8117 8118 lambda_types = {e.name: e.args.get("to") or False for e in expressions} 8119 8120 for column in node.find_all(exp.Column): 8121 typ = lambda_types.get(column.parts[0].name) 8122 if typ is not None: 8123 dot_or_id = column.to_dot() if column.table else column.this 8124 8125 if typ: 8126 dot_or_id = self.expression( 8127 exp.Cast, 8128 this=dot_or_id, 8129 to=typ, 8130 ) 8131 8132 parent = column.parent 8133 8134 while isinstance(parent, exp.Dot): 8135 if not isinstance(parent.parent, exp.Dot): 8136 parent.replace(dot_or_id) 8137 break 8138 parent = parent.parent 8139 else: 8140 if column is node: 8141 node = dot_or_id 8142 else: 8143 column.replace(dot_or_id) 8144 return node 8145 8146 def _parse_truncate_table(self) -> t.Optional[exp.TruncateTable] | exp.Expression: 8147 start = self._prev 8148 8149 # Not to be confused with TRUNCATE(number, decimals) function call 8150 if self._match(TokenType.L_PAREN): 8151 self._retreat(self._index - 2) 8152 return self._parse_function() 8153 8154 # Clickhouse supports TRUNCATE DATABASE as well 8155 is_database = self._match(TokenType.DATABASE) 8156 8157 self._match(TokenType.TABLE) 8158 8159 exists = self._parse_exists(not_=False) 8160 8161 expressions = self._parse_csv( 8162 lambda: self._parse_table(schema=True, is_db_reference=is_database) 8163 ) 8164 8165 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 8166 8167 if self._match_text_seq("RESTART", "IDENTITY"): 8168 identity = "RESTART" 8169 elif self._match_text_seq("CONTINUE", "IDENTITY"): 8170 identity = "CONTINUE" 8171 else: 8172 identity = None 8173 8174 if self._match_text_seq("CASCADE") or self._match_text_seq("RESTRICT"): 8175 option = self._prev.text 8176 else: 8177 option = None 8178 8179 partition = self._parse_partition() 8180 8181 # Fallback case 8182 if self._curr: 8183 return self._parse_as_command(start) 8184 8185 return self.expression( 8186 exp.TruncateTable, 8187 expressions=expressions, 8188 is_database=is_database, 8189 exists=exists, 8190 cluster=cluster, 8191 identity=identity, 8192 option=option, 8193 partition=partition, 8194 ) 8195 8196 def _parse_with_operator(self) -> t.Optional[exp.Expression]: 8197 this = self._parse_ordered(self._parse_opclass) 8198 8199 if not self._match(TokenType.WITH): 8200 return this 8201 8202 op = self._parse_var(any_token=True) 8203 8204 return self.expression(exp.WithOperator, this=this, op=op) 8205 8206 def _parse_wrapped_options(self) -> t.List[t.Optional[exp.Expression]]: 8207 self._match(TokenType.EQ) 8208 self._match(TokenType.L_PAREN) 8209 8210 opts: t.List[t.Optional[exp.Expression]] = [] 8211 option: exp.Expression | None 8212 while self._curr and not self._match(TokenType.R_PAREN): 8213 if self._match_text_seq("FORMAT_NAME", "="): 8214 # The FORMAT_NAME can be set to an identifier for Snowflake and T-SQL 8215 option = self._parse_format_name() 8216 else: 8217 option = self._parse_property() 8218 8219 if option is None: 8220 self.raise_error("Unable to parse option") 8221 break 8222 8223 opts.append(option) 8224 8225 return opts 8226 8227 def _parse_copy_parameters(self) -> t.List[exp.CopyParameter]: 8228 sep = TokenType.COMMA if self.dialect.COPY_PARAMS_ARE_CSV else None 8229 8230 options = [] 8231 while self._curr and not self._match(TokenType.R_PAREN, advance=False): 8232 option = self._parse_var(any_token=True) 8233 prev = self._prev.text.upper() 8234 8235 # Different dialects might separate options and values by white space, "=" and "AS" 8236 self._match(TokenType.EQ) 8237 self._match(TokenType.ALIAS) 8238 8239 param = self.expression(exp.CopyParameter, this=option) 8240 8241 if prev in self.COPY_INTO_VARLEN_OPTIONS and self._match( 8242 TokenType.L_PAREN, advance=False 8243 ): 8244 # Snowflake FILE_FORMAT case, Databricks COPY & FORMAT options 8245 param.set("expressions", self._parse_wrapped_options()) 8246 elif prev == "FILE_FORMAT": 8247 # T-SQL's external file format case 8248 param.set("expression", self._parse_field()) 8249 else: 8250 param.set("expression", self._parse_unquoted_field()) 8251 8252 options.append(param) 8253 self._match(sep) 8254 8255 return options 8256 8257 def _parse_credentials(self) -> t.Optional[exp.Credentials]: 8258 expr = self.expression(exp.Credentials) 8259 8260 if self._match_text_seq("STORAGE_INTEGRATION", "="): 8261 expr.set("storage", self._parse_field()) 8262 if self._match_text_seq("CREDENTIALS"): 8263 # Snowflake case: CREDENTIALS = (...), Redshift case: CREDENTIALS <string> 8264 creds = ( 8265 self._parse_wrapped_options() if self._match(TokenType.EQ) else self._parse_field() 8266 ) 8267 expr.set("credentials", creds) 8268 if self._match_text_seq("ENCRYPTION"): 8269 expr.set("encryption", self._parse_wrapped_options()) 8270 if self._match_text_seq("IAM_ROLE"): 8271 expr.set("iam_role", self._parse_field()) 8272 if self._match_text_seq("REGION"): 8273 expr.set("region", self._parse_field()) 8274 8275 return expr 8276 8277 def _parse_file_location(self) -> t.Optional[exp.Expression]: 8278 return self._parse_field() 8279 8280 def _parse_copy(self) -> exp.Copy | exp.Command: 8281 start = self._prev 8282 8283 self._match(TokenType.INTO) 8284 8285 this = ( 8286 self._parse_select(nested=True, parse_subquery_alias=False) 8287 if self._match(TokenType.L_PAREN, advance=False) 8288 else self._parse_table(schema=True) 8289 ) 8290 8291 kind = self._match(TokenType.FROM) or not self._match_text_seq("TO") 8292 8293 files = self._parse_csv(self._parse_file_location) 8294 credentials = self._parse_credentials() 8295 8296 self._match_text_seq("WITH") 8297 8298 params = self._parse_wrapped(self._parse_copy_parameters, optional=True) 8299 8300 # Fallback case 8301 if self._curr: 8302 return self._parse_as_command(start) 8303 8304 return self.expression( 8305 exp.Copy, 8306 this=this, 8307 kind=kind, 8308 credentials=credentials, 8309 files=files, 8310 params=params, 8311 ) 8312 8313 def _parse_normalize(self) -> exp.Normalize: 8314 return self.expression( 8315 exp.Normalize, 8316 this=self._parse_bitwise(), 8317 form=self._match(TokenType.COMMA) and self._parse_var(), 8318 ) 8319 8320 def _parse_ceil_floor(self, expr_type: t.Type[TCeilFloor]) -> TCeilFloor: 8321 args = self._parse_csv(lambda: self._parse_lambda()) 8322 8323 this = seq_get(args, 0) 8324 decimals = seq_get(args, 1) 8325 8326 return expr_type( 8327 this=this, decimals=decimals, to=self._match_text_seq("TO") and self._parse_var() 8328 ) 8329 8330 def _parse_star_ops(self) -> t.Optional[exp.Expression]: 8331 star_token = self._prev 8332 8333 if self._match_text_seq("COLUMNS", "(", advance=False): 8334 this = self._parse_function() 8335 if isinstance(this, exp.Columns): 8336 this.set("unpack", True) 8337 return this 8338 8339 return self.expression( 8340 exp.Star, 8341 **{ # type: ignore 8342 "except": self._parse_star_op("EXCEPT", "EXCLUDE"), 8343 "replace": self._parse_star_op("REPLACE"), 8344 "rename": self._parse_star_op("RENAME"), 8345 }, 8346 ).update_positions(star_token) 8347 8348 def _parse_grant_privilege(self) -> t.Optional[exp.GrantPrivilege]: 8349 privilege_parts = [] 8350 8351 # Keep consuming consecutive keywords until comma (end of this privilege) or ON 8352 # (end of privilege list) or L_PAREN (start of column list) are met 8353 while self._curr and not self._match_set(self.PRIVILEGE_FOLLOW_TOKENS, advance=False): 8354 privilege_parts.append(self._curr.text.upper()) 8355 self._advance() 8356 8357 this = exp.var(" ".join(privilege_parts)) 8358 expressions = ( 8359 self._parse_wrapped_csv(self._parse_column) 8360 if self._match(TokenType.L_PAREN, advance=False) 8361 else None 8362 ) 8363 8364 return self.expression(exp.GrantPrivilege, this=this, expressions=expressions) 8365 8366 def _parse_grant_principal(self) -> t.Optional[exp.GrantPrincipal]: 8367 kind = self._match_texts(("ROLE", "GROUP")) and self._prev.text.upper() 8368 principal = self._parse_id_var() 8369 8370 if not principal: 8371 return None 8372 8373 return self.expression(exp.GrantPrincipal, this=principal, kind=kind) 8374 8375 def _parse_grant(self) -> exp.Grant | exp.Command: 8376 start = self._prev 8377 8378 privileges = self._parse_csv(self._parse_grant_privilege) 8379 8380 self._match(TokenType.ON) 8381 kind = self._match_set(self.CREATABLES) and self._prev.text.upper() 8382 8383 # Attempt to parse the securable e.g. MySQL allows names 8384 # such as "foo.*", "*.*" which are not easily parseable yet 8385 securable = self._try_parse(self._parse_table_parts) 8386 8387 if not securable or not self._match_text_seq("TO"): 8388 return self._parse_as_command(start) 8389 8390 principals = self._parse_csv(self._parse_grant_principal) 8391 8392 grant_option = self._match_text_seq("WITH", "GRANT", "OPTION") 8393 8394 if self._curr: 8395 return self._parse_as_command(start) 8396 8397 return self.expression( 8398 exp.Grant, 8399 privileges=privileges, 8400 kind=kind, 8401 securable=securable, 8402 principals=principals, 8403 grant_option=grant_option, 8404 ) 8405 8406 def _parse_overlay(self) -> exp.Overlay: 8407 return self.expression( 8408 exp.Overlay, 8409 **{ # type: ignore 8410 "this": self._parse_bitwise(), 8411 "expression": self._match_text_seq("PLACING") and self._parse_bitwise(), 8412 "from": self._match_text_seq("FROM") and self._parse_bitwise(), 8413 "for": self._match_text_seq("FOR") and self._parse_bitwise(), 8414 }, 8415 ) 8416 8417 def _parse_format_name(self) -> exp.Property: 8418 # Note: Although not specified in the docs, Snowflake does accept a string/identifier 8419 # for FILE_FORMAT = <format_name> 8420 return self.expression( 8421 exp.Property, 8422 this=exp.var("FORMAT_NAME"), 8423 value=self._parse_string() or self._parse_table_parts(), 8424 ) 8425 8426 def _parse_max_min_by(self, expr_type: t.Type[exp.AggFunc]) -> exp.AggFunc: 8427 args: t.List[exp.Expression] = [] 8428 8429 if self._match(TokenType.DISTINCT): 8430 args.append(self.expression(exp.Distinct, expressions=[self._parse_assignment()])) 8431 self._match(TokenType.COMMA) 8432 8433 args.extend(self._parse_csv(self._parse_assignment)) 8434 8435 return self.expression( 8436 expr_type, this=seq_get(args, 0), expression=seq_get(args, 1), count=seq_get(args, 2) 8437 ) 8438 8439 def _identifier_expression( 8440 self, token: t.Optional[Token] = None, **kwargs: t.Any 8441 ) -> exp.Identifier: 8442 token = token or self._prev 8443 expression = self.expression(exp.Identifier, this=token.text, **kwargs) 8444 expression.update_positions(token) 8445 return expression 8446 8447 def _build_pipe_cte( 8448 self, 8449 query: exp.Query, 8450 expressions: t.List[exp.Expression], 8451 alias_cte: t.Optional[exp.TableAlias] = None, 8452 ) -> exp.Select: 8453 new_cte: t.Optional[t.Union[str, exp.TableAlias]] 8454 if alias_cte: 8455 new_cte = alias_cte 8456 else: 8457 self._pipe_cte_counter += 1 8458 new_cte = f"__tmp{self._pipe_cte_counter}" 8459 8460 with_ = query.args.get("with") 8461 ctes = with_.pop() if with_ else None 8462 8463 new_select = exp.select(*expressions, copy=False).from_(new_cte, copy=False) 8464 if ctes: 8465 new_select.set("with", ctes) 8466 8467 return new_select.with_(new_cte, as_=query, copy=False) 8468 8469 def _parse_pipe_syntax_select(self, query: exp.Select) -> exp.Select: 8470 select = self._parse_select(consume_pipe=False) 8471 if not select: 8472 return query 8473 8474 return self._build_pipe_cte( 8475 query=query.select(*select.expressions, append=False), expressions=[exp.Star()] 8476 ) 8477 8478 def _parse_pipe_syntax_limit(self, query: exp.Select) -> exp.Select: 8479 limit = self._parse_limit() 8480 offset = self._parse_offset() 8481 if limit: 8482 curr_limit = query.args.get("limit", limit) 8483 if curr_limit.expression.to_py() >= limit.expression.to_py(): 8484 query.limit(limit, copy=False) 8485 if offset: 8486 curr_offset = query.args.get("offset") 8487 curr_offset = curr_offset.expression.to_py() if curr_offset else 0 8488 query.offset(exp.Literal.number(curr_offset + offset.expression.to_py()), copy=False) 8489 8490 return query 8491 8492 def _parse_pipe_syntax_aggregate_fields(self) -> t.Optional[exp.Expression]: 8493 this = self._parse_assignment() 8494 if self._match_text_seq("GROUP", "AND", advance=False): 8495 return this 8496 8497 this = self._parse_alias(this) 8498 8499 if self._match_set((TokenType.ASC, TokenType.DESC), advance=False): 8500 return self._parse_ordered(lambda: this) 8501 8502 return this 8503 8504 def _parse_pipe_syntax_aggregate_group_order_by( 8505 self, query: exp.Select, group_by_exists: bool = True 8506 ) -> exp.Select: 8507 expr = self._parse_csv(self._parse_pipe_syntax_aggregate_fields) 8508 aggregates_or_groups, orders = [], [] 8509 for element in expr: 8510 if isinstance(element, exp.Ordered): 8511 this = element.this 8512 if isinstance(this, exp.Alias): 8513 element.set("this", this.args["alias"]) 8514 orders.append(element) 8515 else: 8516 this = element 8517 aggregates_or_groups.append(this) 8518 8519 if group_by_exists: 8520 query.select(*aggregates_or_groups, copy=False).group_by( 8521 *[projection.args.get("alias", projection) for projection in aggregates_or_groups], 8522 copy=False, 8523 ) 8524 else: 8525 query.select(*aggregates_or_groups, append=False, copy=False) 8526 8527 if orders: 8528 return query.order_by(*orders, append=False, copy=False) 8529 8530 return query 8531 8532 def _parse_pipe_syntax_aggregate(self, query: exp.Select) -> exp.Select: 8533 self._match_text_seq("AGGREGATE") 8534 query = self._parse_pipe_syntax_aggregate_group_order_by(query, group_by_exists=False) 8535 8536 if self._match(TokenType.GROUP_BY) or ( 8537 self._match_text_seq("GROUP", "AND") and self._match(TokenType.ORDER_BY) 8538 ): 8539 query = self._parse_pipe_syntax_aggregate_group_order_by(query) 8540 8541 return self._build_pipe_cte(query=query, expressions=[exp.Star()]) 8542 8543 def _parse_pipe_syntax_set_operator(self, query: exp.Query) -> t.Optional[exp.Query]: 8544 first_setop = self.parse_set_operation(this=query) 8545 if not first_setop: 8546 return None 8547 8548 def _parse_and_unwrap_query() -> t.Optional[exp.Select]: 8549 expr = self._parse_paren() 8550 return expr.assert_is(exp.Subquery).unnest() if expr else None 8551 8552 first_setop.this.pop() 8553 8554 setops = [ 8555 first_setop.expression.pop().assert_is(exp.Subquery).unnest(), 8556 *self._parse_csv(_parse_and_unwrap_query), 8557 ] 8558 8559 query = self._build_pipe_cte(query=query, expressions=[exp.Star()]) 8560 with_ = query.args.get("with") 8561 ctes = with_.pop() if with_ else None 8562 8563 if isinstance(first_setop, exp.Union): 8564 query = query.union(*setops, copy=False, **first_setop.args) 8565 elif isinstance(first_setop, exp.Except): 8566 query = query.except_(*setops, copy=False, **first_setop.args) 8567 else: 8568 query = query.intersect(*setops, copy=False, **first_setop.args) 8569 8570 query.set("with", ctes) 8571 8572 return self._build_pipe_cte(query=query, expressions=[exp.Star()]) 8573 8574 def _parse_pipe_syntax_join(self, query: exp.Query) -> t.Optional[exp.Query]: 8575 join = self._parse_join() 8576 if not join: 8577 return None 8578 8579 if isinstance(query, exp.Select): 8580 return query.join(join, copy=False) 8581 8582 return query 8583 8584 def _parse_pipe_syntax_pivot(self, query: exp.Select) -> exp.Select: 8585 pivots = self._parse_pivots() 8586 if not pivots: 8587 return query 8588 8589 from_ = query.args.get("from") 8590 if from_: 8591 from_.this.set("pivots", pivots) 8592 8593 return self._build_pipe_cte(query=query, expressions=[exp.Star()]) 8594 8595 def _parse_pipe_syntax_extend(self, query: exp.Select) -> exp.Select: 8596 self._match_text_seq("EXTEND") 8597 query.select(*[exp.Star(), *self._parse_expressions()], append=False, copy=False) 8598 return self._build_pipe_cte(query=query, expressions=[exp.Star()]) 8599 8600 def _parse_pipe_syntax_tablesample(self, query: exp.Select) -> exp.Select: 8601 sample = self._parse_table_sample() 8602 8603 with_ = query.args.get("with") 8604 if with_: 8605 with_.expressions[-1].this.set("sample", sample) 8606 else: 8607 query.set("sample", sample) 8608 8609 return query 8610 8611 def _parse_pipe_syntax_query(self, query: exp.Query) -> t.Optional[exp.Query]: 8612 if isinstance(query, exp.Subquery): 8613 query = exp.select("*").from_(query, copy=False) 8614 8615 if not query.args.get("from"): 8616 query = exp.select("*").from_(query.subquery(copy=False), copy=False) 8617 8618 while self._match(TokenType.PIPE_GT): 8619 start = self._curr 8620 parser = self.PIPE_SYNTAX_TRANSFORM_PARSERS.get(self._curr.text.upper()) 8621 if not parser: 8622 # The set operators (UNION, etc) and the JOIN operator have a few common starting 8623 # keywords, making it tricky to disambiguate them without lookahead. The approach 8624 # here is to try and parse a set operation and if that fails, then try to parse a 8625 # join operator. If that fails as well, then the operator is not supported. 8626 parsed_query = self._parse_pipe_syntax_set_operator(query) 8627 parsed_query = parsed_query or self._parse_pipe_syntax_join(query) 8628 if not parsed_query: 8629 self._retreat(start) 8630 self.raise_error(f"Unsupported pipe syntax operator: '{start.text.upper()}'.") 8631 break 8632 query = parsed_query 8633 else: 8634 query = parser(self, query) 8635 8636 return query 8637 8638 def _parse_declareitem(self) -> t.Optional[exp.DeclareItem]: 8639 vars = self._parse_csv(self._parse_id_var) 8640 if not vars: 8641 return None 8642 8643 return self.expression( 8644 exp.DeclareItem, 8645 this=vars, 8646 kind=self._parse_types(), 8647 default=self._match(TokenType.DEFAULT) and self._parse_bitwise(), 8648 ) 8649 8650 def _parse_declare(self) -> exp.Declare | exp.Command: 8651 start = self._prev 8652 expressions = self._try_parse(lambda: self._parse_csv(self._parse_declareitem)) 8653 8654 if not expressions or self._curr: 8655 return self._parse_as_command(start) 8656 8657 return self.expression(exp.Declare, expressions=expressions) 8658 8659 def build_cast(self, strict: bool, **kwargs) -> exp.Cast: 8660 exp_class = exp.Cast if strict else exp.TryCast 8661 8662 if exp_class == exp.TryCast: 8663 kwargs["requires_string"] = self.dialect.TRY_CAST_REQUIRES_STRING 8664 8665 return self.expression(exp_class, **kwargs)
32def build_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 33 if len(args) == 1 and args[0].is_star: 34 return exp.StarMap(this=args[0]) 35 36 keys = [] 37 values = [] 38 for i in range(0, len(args), 2): 39 keys.append(args[i]) 40 values.append(args[i + 1]) 41 42 return exp.VarMap(keys=exp.array(*keys, copy=False), values=exp.array(*values, copy=False))
50def binary_range_parser( 51 expr_type: t.Type[exp.Expression], reverse_args: bool = False 52) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]: 53 def _parse_binary_range( 54 self: Parser, this: t.Optional[exp.Expression] 55 ) -> t.Optional[exp.Expression]: 56 expression = self._parse_bitwise() 57 if reverse_args: 58 this, expression = expression, this 59 return self._parse_escape(self.expression(expr_type, this=this, expression=expression)) 60 61 return _parse_binary_range
64def build_logarithm(args: t.List, dialect: Dialect) -> exp.Func: 65 # Default argument order is base, expression 66 this = seq_get(args, 0) 67 expression = seq_get(args, 1) 68 69 if expression: 70 if not dialect.LOG_BASE_FIRST: 71 this, expression = expression, this 72 return exp.Log(this=this, expression=expression) 73 74 return (exp.Ln if dialect.parser_class.LOG_DEFAULTS_TO_LN else exp.Log)(this=this)
94def build_extract_json_with_path(expr_type: t.Type[E]) -> t.Callable[[t.List, Dialect], E]: 95 def _builder(args: t.List, dialect: Dialect) -> E: 96 expression = expr_type( 97 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 98 ) 99 if len(args) > 2 and expr_type is exp.JSONExtract: 100 expression.set("expressions", args[2:]) 101 102 return expression 103 104 return _builder
107def build_mod(args: t.List) -> exp.Mod: 108 this = seq_get(args, 0) 109 expression = seq_get(args, 1) 110 111 # Wrap the operands if they are binary nodes, e.g. MOD(a + 1, 7) -> (a + 1) % 7 112 this = exp.Paren(this=this) if isinstance(this, exp.Binary) else this 113 expression = exp.Paren(this=expression) if isinstance(expression, exp.Binary) else expression 114 115 return exp.Mod(this=this, expression=expression)
127def build_array_constructor( 128 exp_class: t.Type[E], args: t.List, bracket_kind: TokenType, dialect: Dialect 129) -> exp.Expression: 130 array_exp = exp_class(expressions=args) 131 132 if exp_class == exp.Array and dialect.HAS_DISTINCT_ARRAY_CONSTRUCTORS: 133 array_exp.set("bracket_notation", bracket_kind == TokenType.L_BRACKET) 134 135 return array_exp
138def build_convert_timezone( 139 args: t.List, default_source_tz: t.Optional[str] = None 140) -> t.Union[exp.ConvertTimezone, exp.Anonymous]: 141 if len(args) == 2: 142 source_tz = exp.Literal.string(default_source_tz) if default_source_tz else None 143 return exp.ConvertTimezone( 144 source_tz=source_tz, target_tz=seq_get(args, 0), timestamp=seq_get(args, 1) 145 ) 146 147 return exp.ConvertTimezone.from_arg_list(args)
182class Parser(metaclass=_Parser): 183 """ 184 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 185 186 Args: 187 error_level: The desired error level. 188 Default: ErrorLevel.IMMEDIATE 189 error_message_context: The amount of context to capture from a query string when displaying 190 the error message (in number of characters). 191 Default: 100 192 max_errors: Maximum number of error messages to include in a raised ParseError. 193 This is only relevant if error_level is ErrorLevel.RAISE. 194 Default: 3 195 """ 196 197 FUNCTIONS: t.Dict[str, t.Callable] = { 198 **{name: func.from_arg_list for name, func in exp.FUNCTION_BY_NAME.items()}, 199 **dict.fromkeys(("COALESCE", "IFNULL", "NVL"), build_coalesce), 200 "ARRAY": lambda args, dialect: exp.Array(expressions=args), 201 "ARRAYAGG": lambda args, dialect: exp.ArrayAgg( 202 this=seq_get(args, 0), nulls_excluded=dialect.ARRAY_AGG_INCLUDES_NULLS is None or None 203 ), 204 "ARRAY_AGG": lambda args, dialect: exp.ArrayAgg( 205 this=seq_get(args, 0), nulls_excluded=dialect.ARRAY_AGG_INCLUDES_NULLS is None or None 206 ), 207 "CHAR": lambda args: exp.Chr(expressions=args), 208 "CHR": lambda args: exp.Chr(expressions=args), 209 "COUNT": lambda args: exp.Count(this=seq_get(args, 0), expressions=args[1:], big_int=True), 210 "CONCAT": lambda args, dialect: exp.Concat( 211 expressions=args, 212 safe=not dialect.STRICT_STRING_CONCAT, 213 coalesce=dialect.CONCAT_COALESCE, 214 ), 215 "CONCAT_WS": lambda args, dialect: exp.ConcatWs( 216 expressions=args, 217 safe=not dialect.STRICT_STRING_CONCAT, 218 coalesce=dialect.CONCAT_COALESCE, 219 ), 220 "CONVERT_TIMEZONE": build_convert_timezone, 221 "DATE_TO_DATE_STR": lambda args: exp.Cast( 222 this=seq_get(args, 0), 223 to=exp.DataType(this=exp.DataType.Type.TEXT), 224 ), 225 "GENERATE_DATE_ARRAY": lambda args: exp.GenerateDateArray( 226 start=seq_get(args, 0), 227 end=seq_get(args, 1), 228 step=seq_get(args, 2) or exp.Interval(this=exp.Literal.string(1), unit=exp.var("DAY")), 229 ), 230 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 231 "HEX": build_hex, 232 "JSON_EXTRACT": build_extract_json_with_path(exp.JSONExtract), 233 "JSON_EXTRACT_SCALAR": build_extract_json_with_path(exp.JSONExtractScalar), 234 "JSON_EXTRACT_PATH_TEXT": build_extract_json_with_path(exp.JSONExtractScalar), 235 "LIKE": build_like, 236 "LOG": build_logarithm, 237 "LOG2": lambda args: exp.Log(this=exp.Literal.number(2), expression=seq_get(args, 0)), 238 "LOG10": lambda args: exp.Log(this=exp.Literal.number(10), expression=seq_get(args, 0)), 239 "LOWER": build_lower, 240 "LPAD": lambda args: build_pad(args), 241 "LEFTPAD": lambda args: build_pad(args), 242 "LTRIM": lambda args: build_trim(args), 243 "MOD": build_mod, 244 "RIGHTPAD": lambda args: build_pad(args, is_left=False), 245 "RPAD": lambda args: build_pad(args, is_left=False), 246 "RTRIM": lambda args: build_trim(args, is_left=False), 247 "SCOPE_RESOLUTION": lambda args: exp.ScopeResolution(expression=seq_get(args, 0)) 248 if len(args) != 2 249 else exp.ScopeResolution(this=seq_get(args, 0), expression=seq_get(args, 1)), 250 "STRPOS": exp.StrPosition.from_arg_list, 251 "CHARINDEX": lambda args: build_locate_strposition(args), 252 "INSTR": exp.StrPosition.from_arg_list, 253 "LOCATE": lambda args: build_locate_strposition(args), 254 "TIME_TO_TIME_STR": lambda args: exp.Cast( 255 this=seq_get(args, 0), 256 to=exp.DataType(this=exp.DataType.Type.TEXT), 257 ), 258 "TO_HEX": build_hex, 259 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 260 this=exp.Cast( 261 this=seq_get(args, 0), 262 to=exp.DataType(this=exp.DataType.Type.TEXT), 263 ), 264 start=exp.Literal.number(1), 265 length=exp.Literal.number(10), 266 ), 267 "UNNEST": lambda args: exp.Unnest(expressions=ensure_list(seq_get(args, 0))), 268 "UPPER": build_upper, 269 "VAR_MAP": build_var_map, 270 } 271 272 NO_PAREN_FUNCTIONS = { 273 TokenType.CURRENT_DATE: exp.CurrentDate, 274 TokenType.CURRENT_DATETIME: exp.CurrentDate, 275 TokenType.CURRENT_TIME: exp.CurrentTime, 276 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 277 TokenType.CURRENT_USER: exp.CurrentUser, 278 } 279 280 STRUCT_TYPE_TOKENS = { 281 TokenType.NESTED, 282 TokenType.OBJECT, 283 TokenType.STRUCT, 284 TokenType.UNION, 285 } 286 287 NESTED_TYPE_TOKENS = { 288 TokenType.ARRAY, 289 TokenType.LIST, 290 TokenType.LOWCARDINALITY, 291 TokenType.MAP, 292 TokenType.NULLABLE, 293 TokenType.RANGE, 294 *STRUCT_TYPE_TOKENS, 295 } 296 297 ENUM_TYPE_TOKENS = { 298 TokenType.DYNAMIC, 299 TokenType.ENUM, 300 TokenType.ENUM8, 301 TokenType.ENUM16, 302 } 303 304 AGGREGATE_TYPE_TOKENS = { 305 TokenType.AGGREGATEFUNCTION, 306 TokenType.SIMPLEAGGREGATEFUNCTION, 307 } 308 309 TYPE_TOKENS = { 310 TokenType.BIT, 311 TokenType.BOOLEAN, 312 TokenType.TINYINT, 313 TokenType.UTINYINT, 314 TokenType.SMALLINT, 315 TokenType.USMALLINT, 316 TokenType.INT, 317 TokenType.UINT, 318 TokenType.BIGINT, 319 TokenType.UBIGINT, 320 TokenType.INT128, 321 TokenType.UINT128, 322 TokenType.INT256, 323 TokenType.UINT256, 324 TokenType.MEDIUMINT, 325 TokenType.UMEDIUMINT, 326 TokenType.FIXEDSTRING, 327 TokenType.FLOAT, 328 TokenType.DOUBLE, 329 TokenType.UDOUBLE, 330 TokenType.CHAR, 331 TokenType.NCHAR, 332 TokenType.VARCHAR, 333 TokenType.NVARCHAR, 334 TokenType.BPCHAR, 335 TokenType.TEXT, 336 TokenType.MEDIUMTEXT, 337 TokenType.LONGTEXT, 338 TokenType.BLOB, 339 TokenType.MEDIUMBLOB, 340 TokenType.LONGBLOB, 341 TokenType.BINARY, 342 TokenType.VARBINARY, 343 TokenType.JSON, 344 TokenType.JSONB, 345 TokenType.INTERVAL, 346 TokenType.TINYBLOB, 347 TokenType.TINYTEXT, 348 TokenType.TIME, 349 TokenType.TIMETZ, 350 TokenType.TIMESTAMP, 351 TokenType.TIMESTAMP_S, 352 TokenType.TIMESTAMP_MS, 353 TokenType.TIMESTAMP_NS, 354 TokenType.TIMESTAMPTZ, 355 TokenType.TIMESTAMPLTZ, 356 TokenType.TIMESTAMPNTZ, 357 TokenType.DATETIME, 358 TokenType.DATETIME2, 359 TokenType.DATETIME64, 360 TokenType.SMALLDATETIME, 361 TokenType.DATE, 362 TokenType.DATE32, 363 TokenType.INT4RANGE, 364 TokenType.INT4MULTIRANGE, 365 TokenType.INT8RANGE, 366 TokenType.INT8MULTIRANGE, 367 TokenType.NUMRANGE, 368 TokenType.NUMMULTIRANGE, 369 TokenType.TSRANGE, 370 TokenType.TSMULTIRANGE, 371 TokenType.TSTZRANGE, 372 TokenType.TSTZMULTIRANGE, 373 TokenType.DATERANGE, 374 TokenType.DATEMULTIRANGE, 375 TokenType.DECIMAL, 376 TokenType.DECIMAL32, 377 TokenType.DECIMAL64, 378 TokenType.DECIMAL128, 379 TokenType.DECIMAL256, 380 TokenType.UDECIMAL, 381 TokenType.BIGDECIMAL, 382 TokenType.UUID, 383 TokenType.GEOGRAPHY, 384 TokenType.GEOMETRY, 385 TokenType.POINT, 386 TokenType.RING, 387 TokenType.LINESTRING, 388 TokenType.MULTILINESTRING, 389 TokenType.POLYGON, 390 TokenType.MULTIPOLYGON, 391 TokenType.HLLSKETCH, 392 TokenType.HSTORE, 393 TokenType.PSEUDO_TYPE, 394 TokenType.SUPER, 395 TokenType.SERIAL, 396 TokenType.SMALLSERIAL, 397 TokenType.BIGSERIAL, 398 TokenType.XML, 399 TokenType.YEAR, 400 TokenType.USERDEFINED, 401 TokenType.MONEY, 402 TokenType.SMALLMONEY, 403 TokenType.ROWVERSION, 404 TokenType.IMAGE, 405 TokenType.VARIANT, 406 TokenType.VECTOR, 407 TokenType.VOID, 408 TokenType.OBJECT, 409 TokenType.OBJECT_IDENTIFIER, 410 TokenType.INET, 411 TokenType.IPADDRESS, 412 TokenType.IPPREFIX, 413 TokenType.IPV4, 414 TokenType.IPV6, 415 TokenType.UNKNOWN, 416 TokenType.NOTHING, 417 TokenType.NULL, 418 TokenType.NAME, 419 TokenType.TDIGEST, 420 TokenType.DYNAMIC, 421 *ENUM_TYPE_TOKENS, 422 *NESTED_TYPE_TOKENS, 423 *AGGREGATE_TYPE_TOKENS, 424 } 425 426 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 427 TokenType.BIGINT: TokenType.UBIGINT, 428 TokenType.INT: TokenType.UINT, 429 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 430 TokenType.SMALLINT: TokenType.USMALLINT, 431 TokenType.TINYINT: TokenType.UTINYINT, 432 TokenType.DECIMAL: TokenType.UDECIMAL, 433 TokenType.DOUBLE: TokenType.UDOUBLE, 434 } 435 436 SUBQUERY_PREDICATES = { 437 TokenType.ANY: exp.Any, 438 TokenType.ALL: exp.All, 439 TokenType.EXISTS: exp.Exists, 440 TokenType.SOME: exp.Any, 441 } 442 443 RESERVED_TOKENS = { 444 *Tokenizer.SINGLE_TOKENS.values(), 445 TokenType.SELECT, 446 } - {TokenType.IDENTIFIER} 447 448 DB_CREATABLES = { 449 TokenType.DATABASE, 450 TokenType.DICTIONARY, 451 TokenType.FILE_FORMAT, 452 TokenType.MODEL, 453 TokenType.NAMESPACE, 454 TokenType.SCHEMA, 455 TokenType.SEMANTIC_VIEW, 456 TokenType.SEQUENCE, 457 TokenType.SINK, 458 TokenType.SOURCE, 459 TokenType.STAGE, 460 TokenType.STORAGE_INTEGRATION, 461 TokenType.STREAMLIT, 462 TokenType.TABLE, 463 TokenType.TAG, 464 TokenType.VIEW, 465 TokenType.WAREHOUSE, 466 } 467 468 CREATABLES = { 469 TokenType.COLUMN, 470 TokenType.CONSTRAINT, 471 TokenType.FOREIGN_KEY, 472 TokenType.FUNCTION, 473 TokenType.INDEX, 474 TokenType.PROCEDURE, 475 *DB_CREATABLES, 476 } 477 478 ALTERABLES = { 479 TokenType.INDEX, 480 TokenType.TABLE, 481 TokenType.VIEW, 482 } 483 484 # Tokens that can represent identifiers 485 ID_VAR_TOKENS = { 486 TokenType.ALL, 487 TokenType.ATTACH, 488 TokenType.VAR, 489 TokenType.ANTI, 490 TokenType.APPLY, 491 TokenType.ASC, 492 TokenType.ASOF, 493 TokenType.AUTO_INCREMENT, 494 TokenType.BEGIN, 495 TokenType.BPCHAR, 496 TokenType.CACHE, 497 TokenType.CASE, 498 TokenType.COLLATE, 499 TokenType.COMMAND, 500 TokenType.COMMENT, 501 TokenType.COMMIT, 502 TokenType.CONSTRAINT, 503 TokenType.COPY, 504 TokenType.CUBE, 505 TokenType.CURRENT_SCHEMA, 506 TokenType.DEFAULT, 507 TokenType.DELETE, 508 TokenType.DESC, 509 TokenType.DESCRIBE, 510 TokenType.DETACH, 511 TokenType.DICTIONARY, 512 TokenType.DIV, 513 TokenType.END, 514 TokenType.EXECUTE, 515 TokenType.EXPORT, 516 TokenType.ESCAPE, 517 TokenType.FALSE, 518 TokenType.FIRST, 519 TokenType.FILTER, 520 TokenType.FINAL, 521 TokenType.FORMAT, 522 TokenType.FULL, 523 TokenType.GET, 524 TokenType.IDENTIFIER, 525 TokenType.IS, 526 TokenType.ISNULL, 527 TokenType.INTERVAL, 528 TokenType.KEEP, 529 TokenType.KILL, 530 TokenType.LEFT, 531 TokenType.LIMIT, 532 TokenType.LOAD, 533 TokenType.MERGE, 534 TokenType.NATURAL, 535 TokenType.NEXT, 536 TokenType.OFFSET, 537 TokenType.OPERATOR, 538 TokenType.ORDINALITY, 539 TokenType.OVERLAPS, 540 TokenType.OVERWRITE, 541 TokenType.PARTITION, 542 TokenType.PERCENT, 543 TokenType.PIVOT, 544 TokenType.PRAGMA, 545 TokenType.PUT, 546 TokenType.RANGE, 547 TokenType.RECURSIVE, 548 TokenType.REFERENCES, 549 TokenType.REFRESH, 550 TokenType.RENAME, 551 TokenType.REPLACE, 552 TokenType.RIGHT, 553 TokenType.ROLLUP, 554 TokenType.ROW, 555 TokenType.ROWS, 556 TokenType.SEMI, 557 TokenType.SET, 558 TokenType.SETTINGS, 559 TokenType.SHOW, 560 TokenType.TEMPORARY, 561 TokenType.TOP, 562 TokenType.TRUE, 563 TokenType.TRUNCATE, 564 TokenType.UNIQUE, 565 TokenType.UNNEST, 566 TokenType.UNPIVOT, 567 TokenType.UPDATE, 568 TokenType.USE, 569 TokenType.VOLATILE, 570 TokenType.WINDOW, 571 *CREATABLES, 572 *SUBQUERY_PREDICATES, 573 *TYPE_TOKENS, 574 *NO_PAREN_FUNCTIONS, 575 } 576 ID_VAR_TOKENS.remove(TokenType.UNION) 577 578 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 579 TokenType.ANTI, 580 TokenType.APPLY, 581 TokenType.ASOF, 582 TokenType.FULL, 583 TokenType.LEFT, 584 TokenType.LOCK, 585 TokenType.NATURAL, 586 TokenType.RIGHT, 587 TokenType.SEMI, 588 TokenType.WINDOW, 589 } 590 591 ALIAS_TOKENS = ID_VAR_TOKENS 592 593 COLON_PLACEHOLDER_TOKENS = ID_VAR_TOKENS 594 595 ARRAY_CONSTRUCTORS = { 596 "ARRAY": exp.Array, 597 "LIST": exp.List, 598 } 599 600 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 601 602 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 603 604 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 605 606 FUNC_TOKENS = { 607 TokenType.COLLATE, 608 TokenType.COMMAND, 609 TokenType.CURRENT_DATE, 610 TokenType.CURRENT_DATETIME, 611 TokenType.CURRENT_SCHEMA, 612 TokenType.CURRENT_TIMESTAMP, 613 TokenType.CURRENT_TIME, 614 TokenType.CURRENT_USER, 615 TokenType.FILTER, 616 TokenType.FIRST, 617 TokenType.FORMAT, 618 TokenType.GET, 619 TokenType.GLOB, 620 TokenType.IDENTIFIER, 621 TokenType.INDEX, 622 TokenType.ISNULL, 623 TokenType.ILIKE, 624 TokenType.INSERT, 625 TokenType.LIKE, 626 TokenType.MERGE, 627 TokenType.NEXT, 628 TokenType.OFFSET, 629 TokenType.PRIMARY_KEY, 630 TokenType.RANGE, 631 TokenType.REPLACE, 632 TokenType.RLIKE, 633 TokenType.ROW, 634 TokenType.UNNEST, 635 TokenType.VAR, 636 TokenType.LEFT, 637 TokenType.RIGHT, 638 TokenType.SEQUENCE, 639 TokenType.DATE, 640 TokenType.DATETIME, 641 TokenType.TABLE, 642 TokenType.TIMESTAMP, 643 TokenType.TIMESTAMPTZ, 644 TokenType.TRUNCATE, 645 TokenType.WINDOW, 646 TokenType.XOR, 647 *TYPE_TOKENS, 648 *SUBQUERY_PREDICATES, 649 } 650 651 CONJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 652 TokenType.AND: exp.And, 653 } 654 655 ASSIGNMENT: t.Dict[TokenType, t.Type[exp.Expression]] = { 656 TokenType.COLON_EQ: exp.PropertyEQ, 657 } 658 659 DISJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 660 TokenType.OR: exp.Or, 661 } 662 663 EQUALITY = { 664 TokenType.EQ: exp.EQ, 665 TokenType.NEQ: exp.NEQ, 666 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 667 } 668 669 COMPARISON = { 670 TokenType.GT: exp.GT, 671 TokenType.GTE: exp.GTE, 672 TokenType.LT: exp.LT, 673 TokenType.LTE: exp.LTE, 674 } 675 676 BITWISE = { 677 TokenType.AMP: exp.BitwiseAnd, 678 TokenType.CARET: exp.BitwiseXor, 679 TokenType.PIPE: exp.BitwiseOr, 680 } 681 682 TERM = { 683 TokenType.DASH: exp.Sub, 684 TokenType.PLUS: exp.Add, 685 TokenType.MOD: exp.Mod, 686 TokenType.COLLATE: exp.Collate, 687 } 688 689 FACTOR = { 690 TokenType.DIV: exp.IntDiv, 691 TokenType.LR_ARROW: exp.Distance, 692 TokenType.SLASH: exp.Div, 693 TokenType.STAR: exp.Mul, 694 } 695 696 EXPONENT: t.Dict[TokenType, t.Type[exp.Expression]] = {} 697 698 TIMES = { 699 TokenType.TIME, 700 TokenType.TIMETZ, 701 } 702 703 TIMESTAMPS = { 704 TokenType.TIMESTAMP, 705 TokenType.TIMESTAMPNTZ, 706 TokenType.TIMESTAMPTZ, 707 TokenType.TIMESTAMPLTZ, 708 *TIMES, 709 } 710 711 SET_OPERATIONS = { 712 TokenType.UNION, 713 TokenType.INTERSECT, 714 TokenType.EXCEPT, 715 } 716 717 JOIN_METHODS = { 718 TokenType.ASOF, 719 TokenType.NATURAL, 720 TokenType.POSITIONAL, 721 } 722 723 JOIN_SIDES = { 724 TokenType.LEFT, 725 TokenType.RIGHT, 726 TokenType.FULL, 727 } 728 729 JOIN_KINDS = { 730 TokenType.ANTI, 731 TokenType.CROSS, 732 TokenType.INNER, 733 TokenType.OUTER, 734 TokenType.SEMI, 735 TokenType.STRAIGHT_JOIN, 736 } 737 738 JOIN_HINTS: t.Set[str] = set() 739 740 LAMBDAS = { 741 TokenType.ARROW: lambda self, expressions: self.expression( 742 exp.Lambda, 743 this=self._replace_lambda( 744 self._parse_assignment(), 745 expressions, 746 ), 747 expressions=expressions, 748 ), 749 TokenType.FARROW: lambda self, expressions: self.expression( 750 exp.Kwarg, 751 this=exp.var(expressions[0].name), 752 expression=self._parse_assignment(), 753 ), 754 } 755 756 COLUMN_OPERATORS = { 757 TokenType.DOT: None, 758 TokenType.DOTCOLON: lambda self, this, to: self.expression( 759 exp.JSONCast, 760 this=this, 761 to=to, 762 ), 763 TokenType.DCOLON: lambda self, this, to: self.build_cast( 764 strict=self.STRICT_CAST, this=this, to=to 765 ), 766 TokenType.ARROW: lambda self, this, path: self.expression( 767 exp.JSONExtract, 768 this=this, 769 expression=self.dialect.to_json_path(path), 770 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 771 ), 772 TokenType.DARROW: lambda self, this, path: self.expression( 773 exp.JSONExtractScalar, 774 this=this, 775 expression=self.dialect.to_json_path(path), 776 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 777 ), 778 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 779 exp.JSONBExtract, 780 this=this, 781 expression=path, 782 ), 783 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 784 exp.JSONBExtractScalar, 785 this=this, 786 expression=path, 787 ), 788 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 789 exp.JSONBContains, 790 this=this, 791 expression=key, 792 ), 793 } 794 795 EXPRESSION_PARSERS = { 796 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 797 exp.Column: lambda self: self._parse_column(), 798 exp.Condition: lambda self: self._parse_assignment(), 799 exp.DataType: lambda self: self._parse_types(allow_identifiers=False, schema=True), 800 exp.Expression: lambda self: self._parse_expression(), 801 exp.From: lambda self: self._parse_from(joins=True), 802 exp.Group: lambda self: self._parse_group(), 803 exp.Having: lambda self: self._parse_having(), 804 exp.Hint: lambda self: self._parse_hint_body(), 805 exp.Identifier: lambda self: self._parse_id_var(), 806 exp.Join: lambda self: self._parse_join(), 807 exp.Lambda: lambda self: self._parse_lambda(), 808 exp.Lateral: lambda self: self._parse_lateral(), 809 exp.Limit: lambda self: self._parse_limit(), 810 exp.Offset: lambda self: self._parse_offset(), 811 exp.Order: lambda self: self._parse_order(), 812 exp.Ordered: lambda self: self._parse_ordered(), 813 exp.Properties: lambda self: self._parse_properties(), 814 exp.PartitionedByProperty: lambda self: self._parse_partitioned_by(), 815 exp.Qualify: lambda self: self._parse_qualify(), 816 exp.Returning: lambda self: self._parse_returning(), 817 exp.Select: lambda self: self._parse_select(), 818 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 819 exp.Table: lambda self: self._parse_table_parts(), 820 exp.TableAlias: lambda self: self._parse_table_alias(), 821 exp.Tuple: lambda self: self._parse_value(values=False), 822 exp.Whens: lambda self: self._parse_when_matched(), 823 exp.Where: lambda self: self._parse_where(), 824 exp.Window: lambda self: self._parse_named_window(), 825 exp.With: lambda self: self._parse_with(), 826 "JOIN_TYPE": lambda self: self._parse_join_parts(), 827 } 828 829 STATEMENT_PARSERS = { 830 TokenType.ALTER: lambda self: self._parse_alter(), 831 TokenType.ANALYZE: lambda self: self._parse_analyze(), 832 TokenType.BEGIN: lambda self: self._parse_transaction(), 833 TokenType.CACHE: lambda self: self._parse_cache(), 834 TokenType.COMMENT: lambda self: self._parse_comment(), 835 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 836 TokenType.COPY: lambda self: self._parse_copy(), 837 TokenType.CREATE: lambda self: self._parse_create(), 838 TokenType.DELETE: lambda self: self._parse_delete(), 839 TokenType.DESC: lambda self: self._parse_describe(), 840 TokenType.DESCRIBE: lambda self: self._parse_describe(), 841 TokenType.DROP: lambda self: self._parse_drop(), 842 TokenType.GRANT: lambda self: self._parse_grant(), 843 TokenType.INSERT: lambda self: self._parse_insert(), 844 TokenType.KILL: lambda self: self._parse_kill(), 845 TokenType.LOAD: lambda self: self._parse_load(), 846 TokenType.MERGE: lambda self: self._parse_merge(), 847 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 848 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 849 TokenType.REFRESH: lambda self: self._parse_refresh(), 850 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 851 TokenType.SET: lambda self: self._parse_set(), 852 TokenType.TRUNCATE: lambda self: self._parse_truncate_table(), 853 TokenType.UNCACHE: lambda self: self._parse_uncache(), 854 TokenType.UNPIVOT: lambda self: self._parse_simplified_pivot(is_unpivot=True), 855 TokenType.UPDATE: lambda self: self._parse_update(), 856 TokenType.USE: lambda self: self._parse_use(), 857 TokenType.SEMICOLON: lambda self: exp.Semicolon(), 858 } 859 860 UNARY_PARSERS = { 861 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 862 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 863 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 864 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 865 TokenType.PIPE_SLASH: lambda self: self.expression(exp.Sqrt, this=self._parse_unary()), 866 TokenType.DPIPE_SLASH: lambda self: self.expression(exp.Cbrt, this=self._parse_unary()), 867 } 868 869 STRING_PARSERS = { 870 TokenType.HEREDOC_STRING: lambda self, token: self.expression( 871 exp.RawString, this=token.text 872 ), 873 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 874 exp.National, this=token.text 875 ), 876 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 877 TokenType.STRING: lambda self, token: self.expression( 878 exp.Literal, this=token.text, is_string=True 879 ), 880 TokenType.UNICODE_STRING: lambda self, token: self.expression( 881 exp.UnicodeString, 882 this=token.text, 883 escape=self._match_text_seq("UESCAPE") and self._parse_string(), 884 ), 885 } 886 887 NUMERIC_PARSERS = { 888 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 889 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 890 TokenType.HEX_STRING: lambda self, token: self.expression( 891 exp.HexString, 892 this=token.text, 893 is_integer=self.dialect.HEX_STRING_IS_INTEGER_TYPE or None, 894 ), 895 TokenType.NUMBER: lambda self, token: self.expression( 896 exp.Literal, this=token.text, is_string=False 897 ), 898 } 899 900 PRIMARY_PARSERS = { 901 **STRING_PARSERS, 902 **NUMERIC_PARSERS, 903 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 904 TokenType.NULL: lambda self, _: self.expression(exp.Null), 905 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 906 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 907 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 908 TokenType.STAR: lambda self, _: self._parse_star_ops(), 909 } 910 911 PLACEHOLDER_PARSERS = { 912 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 913 TokenType.PARAMETER: lambda self: self._parse_parameter(), 914 TokenType.COLON: lambda self: ( 915 self.expression(exp.Placeholder, this=self._prev.text) 916 if self._match_set(self.COLON_PLACEHOLDER_TOKENS) 917 else None 918 ), 919 } 920 921 RANGE_PARSERS = { 922 TokenType.AT_GT: binary_range_parser(exp.ArrayContainsAll), 923 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 924 TokenType.GLOB: binary_range_parser(exp.Glob), 925 TokenType.ILIKE: binary_range_parser(exp.ILike), 926 TokenType.IN: lambda self, this: self._parse_in(this), 927 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 928 TokenType.IS: lambda self, this: self._parse_is(this), 929 TokenType.LIKE: binary_range_parser(exp.Like), 930 TokenType.LT_AT: binary_range_parser(exp.ArrayContainsAll, reverse_args=True), 931 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 932 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 933 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 934 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 935 } 936 937 PIPE_SYNTAX_TRANSFORM_PARSERS = { 938 "AGGREGATE": lambda self, query: self._parse_pipe_syntax_aggregate(query), 939 "AS": lambda self, query: self._build_pipe_cte( 940 query, [exp.Star()], self._parse_table_alias() 941 ), 942 "EXTEND": lambda self, query: self._parse_pipe_syntax_extend(query), 943 "LIMIT": lambda self, query: self._parse_pipe_syntax_limit(query), 944 "ORDER BY": lambda self, query: query.order_by( 945 self._parse_order(), append=False, copy=False 946 ), 947 "PIVOT": lambda self, query: self._parse_pipe_syntax_pivot(query), 948 "SELECT": lambda self, query: self._parse_pipe_syntax_select(query), 949 "TABLESAMPLE": lambda self, query: self._parse_pipe_syntax_tablesample(query), 950 "UNPIVOT": lambda self, query: self._parse_pipe_syntax_pivot(query), 951 "WHERE": lambda self, query: query.where(self._parse_where(), copy=False), 952 } 953 954 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 955 "ALLOWED_VALUES": lambda self: self.expression( 956 exp.AllowedValuesProperty, expressions=self._parse_csv(self._parse_primary) 957 ), 958 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 959 "AUTO": lambda self: self._parse_auto_property(), 960 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 961 "BACKUP": lambda self: self.expression( 962 exp.BackupProperty, this=self._parse_var(any_token=True) 963 ), 964 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 965 "CHARSET": lambda self, **kwargs: self._parse_character_set(**kwargs), 966 "CHARACTER SET": lambda self, **kwargs: self._parse_character_set(**kwargs), 967 "CHECKSUM": lambda self: self._parse_checksum(), 968 "CLUSTER BY": lambda self: self._parse_cluster(), 969 "CLUSTERED": lambda self: self._parse_clustered_by(), 970 "COLLATE": lambda self, **kwargs: self._parse_property_assignment( 971 exp.CollateProperty, **kwargs 972 ), 973 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 974 "CONTAINS": lambda self: self._parse_contains_property(), 975 "COPY": lambda self: self._parse_copy_property(), 976 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 977 "DATA_DELETION": lambda self: self._parse_data_deletion_property(), 978 "DEFINER": lambda self: self._parse_definer(), 979 "DETERMINISTIC": lambda self: self.expression( 980 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 981 ), 982 "DISTRIBUTED": lambda self: self._parse_distributed_property(), 983 "DUPLICATE": lambda self: self._parse_composite_key_property(exp.DuplicateKeyProperty), 984 "DYNAMIC": lambda self: self.expression(exp.DynamicProperty), 985 "DISTKEY": lambda self: self._parse_distkey(), 986 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 987 "EMPTY": lambda self: self.expression(exp.EmptyProperty), 988 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 989 "ENVIRONMENT": lambda self: self.expression( 990 exp.EnviromentProperty, expressions=self._parse_wrapped_csv(self._parse_assignment) 991 ), 992 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 993 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 994 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 995 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 996 "FREESPACE": lambda self: self._parse_freespace(), 997 "GLOBAL": lambda self: self.expression(exp.GlobalProperty), 998 "HEAP": lambda self: self.expression(exp.HeapProperty), 999 "ICEBERG": lambda self: self.expression(exp.IcebergProperty), 1000 "IMMUTABLE": lambda self: self.expression( 1001 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 1002 ), 1003 "INHERITS": lambda self: self.expression( 1004 exp.InheritsProperty, expressions=self._parse_wrapped_csv(self._parse_table) 1005 ), 1006 "INPUT": lambda self: self.expression(exp.InputModelProperty, this=self._parse_schema()), 1007 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 1008 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 1009 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 1010 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 1011 "LIKE": lambda self: self._parse_create_like(), 1012 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 1013 "LOCK": lambda self: self._parse_locking(), 1014 "LOCKING": lambda self: self._parse_locking(), 1015 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 1016 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 1017 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 1018 "MODIFIES": lambda self: self._parse_modifies_property(), 1019 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 1020 "NO": lambda self: self._parse_no_property(), 1021 "ON": lambda self: self._parse_on_property(), 1022 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 1023 "OUTPUT": lambda self: self.expression(exp.OutputModelProperty, this=self._parse_schema()), 1024 "PARTITION": lambda self: self._parse_partitioned_of(), 1025 "PARTITION BY": lambda self: self._parse_partitioned_by(), 1026 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 1027 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 1028 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 1029 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 1030 "READS": lambda self: self._parse_reads_property(), 1031 "REMOTE": lambda self: self._parse_remote_with_connection(), 1032 "RETURNS": lambda self: self._parse_returns(), 1033 "STRICT": lambda self: self.expression(exp.StrictProperty), 1034 "STREAMING": lambda self: self.expression(exp.StreamingTableProperty), 1035 "ROW": lambda self: self._parse_row(), 1036 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 1037 "SAMPLE": lambda self: self.expression( 1038 exp.SampleProperty, this=self._match_text_seq("BY") and self._parse_bitwise() 1039 ), 1040 "SECURE": lambda self: self.expression(exp.SecureProperty), 1041 "SECURITY": lambda self: self._parse_security(), 1042 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 1043 "SETTINGS": lambda self: self._parse_settings_property(), 1044 "SHARING": lambda self: self._parse_property_assignment(exp.SharingProperty), 1045 "SORTKEY": lambda self: self._parse_sortkey(), 1046 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 1047 "STABLE": lambda self: self.expression( 1048 exp.StabilityProperty, this=exp.Literal.string("STABLE") 1049 ), 1050 "STORED": lambda self: self._parse_stored(), 1051 "SYSTEM_VERSIONING": lambda self: self._parse_system_versioning_property(), 1052 "TBLPROPERTIES": lambda self: self._parse_wrapped_properties(), 1053 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 1054 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 1055 "TO": lambda self: self._parse_to_table(), 1056 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 1057 "TRANSFORM": lambda self: self.expression( 1058 exp.TransformModelProperty, expressions=self._parse_wrapped_csv(self._parse_expression) 1059 ), 1060 "TTL": lambda self: self._parse_ttl(), 1061 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 1062 "UNLOGGED": lambda self: self.expression(exp.UnloggedProperty), 1063 "VOLATILE": lambda self: self._parse_volatile_property(), 1064 "WITH": lambda self: self._parse_with_property(), 1065 } 1066 1067 CONSTRAINT_PARSERS = { 1068 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 1069 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 1070 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 1071 "CHARACTER SET": lambda self: self.expression( 1072 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 1073 ), 1074 "CHECK": lambda self: self.expression( 1075 exp.CheckColumnConstraint, 1076 this=self._parse_wrapped(self._parse_assignment), 1077 enforced=self._match_text_seq("ENFORCED"), 1078 ), 1079 "COLLATE": lambda self: self.expression( 1080 exp.CollateColumnConstraint, 1081 this=self._parse_identifier() or self._parse_column(), 1082 ), 1083 "COMMENT": lambda self: self.expression( 1084 exp.CommentColumnConstraint, this=self._parse_string() 1085 ), 1086 "COMPRESS": lambda self: self._parse_compress(), 1087 "CLUSTERED": lambda self: self.expression( 1088 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 1089 ), 1090 "NONCLUSTERED": lambda self: self.expression( 1091 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 1092 ), 1093 "DEFAULT": lambda self: self.expression( 1094 exp.DefaultColumnConstraint, this=self._parse_bitwise() 1095 ), 1096 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 1097 "EPHEMERAL": lambda self: self.expression( 1098 exp.EphemeralColumnConstraint, this=self._parse_bitwise() 1099 ), 1100 "EXCLUDE": lambda self: self.expression( 1101 exp.ExcludeColumnConstraint, this=self._parse_index_params() 1102 ), 1103 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 1104 "FORMAT": lambda self: self.expression( 1105 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 1106 ), 1107 "GENERATED": lambda self: self._parse_generated_as_identity(), 1108 "IDENTITY": lambda self: self._parse_auto_increment(), 1109 "INLINE": lambda self: self._parse_inline(), 1110 "LIKE": lambda self: self._parse_create_like(), 1111 "NOT": lambda self: self._parse_not_constraint(), 1112 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 1113 "ON": lambda self: ( 1114 self._match(TokenType.UPDATE) 1115 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 1116 ) 1117 or self.expression(exp.OnProperty, this=self._parse_id_var()), 1118 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 1119 "PERIOD": lambda self: self._parse_period_for_system_time(), 1120 "PRIMARY KEY": lambda self: self._parse_primary_key(), 1121 "REFERENCES": lambda self: self._parse_references(match=False), 1122 "TITLE": lambda self: self.expression( 1123 exp.TitleColumnConstraint, this=self._parse_var_or_string() 1124 ), 1125 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 1126 "UNIQUE": lambda self: self._parse_unique(), 1127 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 1128 "WATERMARK": lambda self: self.expression( 1129 exp.WatermarkColumnConstraint, 1130 this=self._match(TokenType.FOR) and self._parse_column(), 1131 expression=self._match(TokenType.ALIAS) and self._parse_disjunction(), 1132 ), 1133 "WITH": lambda self: self.expression( 1134 exp.Properties, expressions=self._parse_wrapped_properties() 1135 ), 1136 "BUCKET": lambda self: self._parse_partitioned_by_bucket_or_truncate(), 1137 "TRUNCATE": lambda self: self._parse_partitioned_by_bucket_or_truncate(), 1138 } 1139 1140 def _parse_partitioned_by_bucket_or_truncate(self) -> exp.Expression: 1141 klass = ( 1142 exp.PartitionedByBucket 1143 if self._prev.text.upper() == "BUCKET" 1144 else exp.PartitionByTruncate 1145 ) 1146 1147 args = self._parse_wrapped_csv(lambda: self._parse_primary() or self._parse_column()) 1148 this, expression = seq_get(args, 0), seq_get(args, 1) 1149 1150 if isinstance(this, exp.Literal): 1151 # Check for Iceberg partition transforms (bucket / truncate) and ensure their arguments are in the right order 1152 # - For Hive, it's `bucket(<num buckets>, <col name>)` or `truncate(<num_chars>, <col_name>)` 1153 # - For Trino, it's reversed - `bucket(<col name>, <num buckets>)` or `truncate(<col_name>, <num_chars>)` 1154 # Both variants are canonicalized in the latter i.e `bucket(<col name>, <num buckets>)` 1155 # 1156 # Hive ref: https://docs.aws.amazon.com/athena/latest/ug/querying-iceberg-creating-tables.html#querying-iceberg-partitioning 1157 # Trino ref: https://docs.aws.amazon.com/athena/latest/ug/create-table-as.html#ctas-table-properties 1158 this, expression = expression, this 1159 1160 return self.expression(klass, this=this, expression=expression) 1161 1162 ALTER_PARSERS = { 1163 "ADD": lambda self: self._parse_alter_table_add(), 1164 "AS": lambda self: self._parse_select(), 1165 "ALTER": lambda self: self._parse_alter_table_alter(), 1166 "CLUSTER BY": lambda self: self._parse_cluster(wrapped=True), 1167 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 1168 "DROP": lambda self: self._parse_alter_table_drop(), 1169 "RENAME": lambda self: self._parse_alter_table_rename(), 1170 "SET": lambda self: self._parse_alter_table_set(), 1171 "SWAP": lambda self: self.expression( 1172 exp.SwapTable, this=self._match(TokenType.WITH) and self._parse_table(schema=True) 1173 ), 1174 } 1175 1176 ALTER_ALTER_PARSERS = { 1177 "DISTKEY": lambda self: self._parse_alter_diststyle(), 1178 "DISTSTYLE": lambda self: self._parse_alter_diststyle(), 1179 "SORTKEY": lambda self: self._parse_alter_sortkey(), 1180 "COMPOUND": lambda self: self._parse_alter_sortkey(compound=True), 1181 } 1182 1183 SCHEMA_UNNAMED_CONSTRAINTS = { 1184 "CHECK", 1185 "EXCLUDE", 1186 "FOREIGN KEY", 1187 "LIKE", 1188 "PERIOD", 1189 "PRIMARY KEY", 1190 "UNIQUE", 1191 "WATERMARK", 1192 "BUCKET", 1193 "TRUNCATE", 1194 } 1195 1196 NO_PAREN_FUNCTION_PARSERS = { 1197 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 1198 "CASE": lambda self: self._parse_case(), 1199 "CONNECT_BY_ROOT": lambda self: self.expression( 1200 exp.ConnectByRoot, this=self._parse_column() 1201 ), 1202 "IF": lambda self: self._parse_if(), 1203 } 1204 1205 INVALID_FUNC_NAME_TOKENS = { 1206 TokenType.IDENTIFIER, 1207 TokenType.STRING, 1208 } 1209 1210 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 1211 1212 KEY_VALUE_DEFINITIONS = (exp.Alias, exp.EQ, exp.PropertyEQ, exp.Slice) 1213 1214 FUNCTION_PARSERS = { 1215 **{ 1216 name: lambda self: self._parse_max_min_by(exp.ArgMax) for name in exp.ArgMax.sql_names() 1217 }, 1218 **{ 1219 name: lambda self: self._parse_max_min_by(exp.ArgMin) for name in exp.ArgMin.sql_names() 1220 }, 1221 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 1222 "CEIL": lambda self: self._parse_ceil_floor(exp.Ceil), 1223 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 1224 "DECODE": lambda self: self._parse_decode(), 1225 "EXTRACT": lambda self: self._parse_extract(), 1226 "FLOOR": lambda self: self._parse_ceil_floor(exp.Floor), 1227 "GAP_FILL": lambda self: self._parse_gap_fill(), 1228 "JSON_OBJECT": lambda self: self._parse_json_object(), 1229 "JSON_OBJECTAGG": lambda self: self._parse_json_object(agg=True), 1230 "JSON_TABLE": lambda self: self._parse_json_table(), 1231 "MATCH": lambda self: self._parse_match_against(), 1232 "NORMALIZE": lambda self: self._parse_normalize(), 1233 "OPENJSON": lambda self: self._parse_open_json(), 1234 "OVERLAY": lambda self: self._parse_overlay(), 1235 "POSITION": lambda self: self._parse_position(), 1236 "PREDICT": lambda self: self._parse_predict(), 1237 "SAFE_CAST": lambda self: self._parse_cast(False, safe=True), 1238 "STRING_AGG": lambda self: self._parse_string_agg(), 1239 "SUBSTRING": lambda self: self._parse_substring(), 1240 "TRIM": lambda self: self._parse_trim(), 1241 "TRY_CAST": lambda self: self._parse_cast(False, safe=True), 1242 "TRY_CONVERT": lambda self: self._parse_convert(False, safe=True), 1243 "XMLELEMENT": lambda self: self.expression( 1244 exp.XMLElement, 1245 this=self._match_text_seq("NAME") and self._parse_id_var(), 1246 expressions=self._match(TokenType.COMMA) and self._parse_csv(self._parse_expression), 1247 ), 1248 "XMLTABLE": lambda self: self._parse_xml_table(), 1249 } 1250 1251 QUERY_MODIFIER_PARSERS = { 1252 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 1253 TokenType.PREWHERE: lambda self: ("prewhere", self._parse_prewhere()), 1254 TokenType.WHERE: lambda self: ("where", self._parse_where()), 1255 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 1256 TokenType.HAVING: lambda self: ("having", self._parse_having()), 1257 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 1258 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 1259 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 1260 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 1261 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 1262 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 1263 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 1264 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 1265 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1266 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1267 TokenType.CLUSTER_BY: lambda self: ( 1268 "cluster", 1269 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 1270 ), 1271 TokenType.DISTRIBUTE_BY: lambda self: ( 1272 "distribute", 1273 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 1274 ), 1275 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 1276 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 1277 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 1278 } 1279 QUERY_MODIFIER_TOKENS = set(QUERY_MODIFIER_PARSERS) 1280 1281 SET_PARSERS = { 1282 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 1283 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 1284 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 1285 "TRANSACTION": lambda self: self._parse_set_transaction(), 1286 } 1287 1288 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 1289 1290 TYPE_LITERAL_PARSERS = { 1291 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 1292 } 1293 1294 TYPE_CONVERTERS: t.Dict[exp.DataType.Type, t.Callable[[exp.DataType], exp.DataType]] = {} 1295 1296 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 1297 1298 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 1299 1300 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 1301 TRANSACTION_CHARACTERISTICS: OPTIONS_TYPE = { 1302 "ISOLATION": ( 1303 ("LEVEL", "REPEATABLE", "READ"), 1304 ("LEVEL", "READ", "COMMITTED"), 1305 ("LEVEL", "READ", "UNCOMITTED"), 1306 ("LEVEL", "SERIALIZABLE"), 1307 ), 1308 "READ": ("WRITE", "ONLY"), 1309 } 1310 1311 CONFLICT_ACTIONS: OPTIONS_TYPE = dict.fromkeys( 1312 ("ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK", "UPDATE"), tuple() 1313 ) 1314 CONFLICT_ACTIONS["DO"] = ("NOTHING", "UPDATE") 1315 1316 CREATE_SEQUENCE: OPTIONS_TYPE = { 1317 "SCALE": ("EXTEND", "NOEXTEND"), 1318 "SHARD": ("EXTEND", "NOEXTEND"), 1319 "NO": ("CYCLE", "CACHE", "MAXVALUE", "MINVALUE"), 1320 **dict.fromkeys( 1321 ( 1322 "SESSION", 1323 "GLOBAL", 1324 "KEEP", 1325 "NOKEEP", 1326 "ORDER", 1327 "NOORDER", 1328 "NOCACHE", 1329 "CYCLE", 1330 "NOCYCLE", 1331 "NOMINVALUE", 1332 "NOMAXVALUE", 1333 "NOSCALE", 1334 "NOSHARD", 1335 ), 1336 tuple(), 1337 ), 1338 } 1339 1340 ISOLATED_LOADING_OPTIONS: OPTIONS_TYPE = {"FOR": ("ALL", "INSERT", "NONE")} 1341 1342 USABLES: OPTIONS_TYPE = dict.fromkeys( 1343 ("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA", "CATALOG"), tuple() 1344 ) 1345 1346 CAST_ACTIONS: OPTIONS_TYPE = dict.fromkeys(("RENAME", "ADD"), ("FIELDS",)) 1347 1348 SCHEMA_BINDING_OPTIONS: OPTIONS_TYPE = { 1349 "TYPE": ("EVOLUTION",), 1350 **dict.fromkeys(("BINDING", "COMPENSATION", "EVOLUTION"), tuple()), 1351 } 1352 1353 PROCEDURE_OPTIONS: OPTIONS_TYPE = {} 1354 1355 EXECUTE_AS_OPTIONS: OPTIONS_TYPE = dict.fromkeys(("CALLER", "SELF", "OWNER"), tuple()) 1356 1357 KEY_CONSTRAINT_OPTIONS: OPTIONS_TYPE = { 1358 "NOT": ("ENFORCED",), 1359 "MATCH": ( 1360 "FULL", 1361 "PARTIAL", 1362 "SIMPLE", 1363 ), 1364 "INITIALLY": ("DEFERRED", "IMMEDIATE"), 1365 "USING": ( 1366 "BTREE", 1367 "HASH", 1368 ), 1369 **dict.fromkeys(("DEFERRABLE", "NORELY", "RELY"), tuple()), 1370 } 1371 1372 WINDOW_EXCLUDE_OPTIONS: OPTIONS_TYPE = { 1373 "NO": ("OTHERS",), 1374 "CURRENT": ("ROW",), 1375 **dict.fromkeys(("GROUP", "TIES"), tuple()), 1376 } 1377 1378 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 1379 1380 CLONE_KEYWORDS = {"CLONE", "COPY"} 1381 HISTORICAL_DATA_PREFIX = {"AT", "BEFORE", "END"} 1382 HISTORICAL_DATA_KIND = {"OFFSET", "STATEMENT", "STREAM", "TIMESTAMP", "VERSION"} 1383 1384 OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS", "WITH"} 1385 1386 OPTYPE_FOLLOW_TOKENS = {TokenType.COMMA, TokenType.R_PAREN} 1387 1388 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 1389 1390 VIEW_ATTRIBUTES = {"ENCRYPTION", "SCHEMABINDING", "VIEW_METADATA"} 1391 1392 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 1393 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 1394 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 1395 1396 JSON_KEY_VALUE_SEPARATOR_TOKENS = {TokenType.COLON, TokenType.COMMA, TokenType.IS} 1397 1398 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 1399 1400 ADD_CONSTRAINT_TOKENS = { 1401 TokenType.CONSTRAINT, 1402 TokenType.FOREIGN_KEY, 1403 TokenType.INDEX, 1404 TokenType.KEY, 1405 TokenType.PRIMARY_KEY, 1406 TokenType.UNIQUE, 1407 } 1408 1409 DISTINCT_TOKENS = {TokenType.DISTINCT} 1410 1411 NULL_TOKENS = {TokenType.NULL} 1412 1413 UNNEST_OFFSET_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - SET_OPERATIONS 1414 1415 SELECT_START_TOKENS = {TokenType.L_PAREN, TokenType.WITH, TokenType.SELECT} 1416 1417 COPY_INTO_VARLEN_OPTIONS = {"FILE_FORMAT", "COPY_OPTIONS", "FORMAT_OPTIONS", "CREDENTIAL"} 1418 1419 IS_JSON_PREDICATE_KIND = {"VALUE", "SCALAR", "ARRAY", "OBJECT"} 1420 1421 ODBC_DATETIME_LITERALS = { 1422 "d": exp.Date, 1423 "t": exp.Time, 1424 "ts": exp.Timestamp, 1425 } 1426 1427 ON_CONDITION_TOKENS = {"ERROR", "NULL", "TRUE", "FALSE", "EMPTY"} 1428 1429 PRIVILEGE_FOLLOW_TOKENS = {TokenType.ON, TokenType.COMMA, TokenType.L_PAREN} 1430 1431 # The style options for the DESCRIBE statement 1432 DESCRIBE_STYLES = {"ANALYZE", "EXTENDED", "FORMATTED", "HISTORY"} 1433 1434 # The style options for the ANALYZE statement 1435 ANALYZE_STYLES = { 1436 "BUFFER_USAGE_LIMIT", 1437 "FULL", 1438 "LOCAL", 1439 "NO_WRITE_TO_BINLOG", 1440 "SAMPLE", 1441 "SKIP_LOCKED", 1442 "VERBOSE", 1443 } 1444 1445 ANALYZE_EXPRESSION_PARSERS = { 1446 "ALL": lambda self: self._parse_analyze_columns(), 1447 "COMPUTE": lambda self: self._parse_analyze_statistics(), 1448 "DELETE": lambda self: self._parse_analyze_delete(), 1449 "DROP": lambda self: self._parse_analyze_histogram(), 1450 "ESTIMATE": lambda self: self._parse_analyze_statistics(), 1451 "LIST": lambda self: self._parse_analyze_list(), 1452 "PREDICATE": lambda self: self._parse_analyze_columns(), 1453 "UPDATE": lambda self: self._parse_analyze_histogram(), 1454 "VALIDATE": lambda self: self._parse_analyze_validate(), 1455 } 1456 1457 PARTITION_KEYWORDS = {"PARTITION", "SUBPARTITION"} 1458 1459 AMBIGUOUS_ALIAS_TOKENS = (TokenType.LIMIT, TokenType.OFFSET) 1460 1461 OPERATION_MODIFIERS: t.Set[str] = set() 1462 1463 RECURSIVE_CTE_SEARCH_KIND = {"BREADTH", "DEPTH", "CYCLE"} 1464 1465 MODIFIABLES = (exp.Query, exp.Table, exp.TableFromRows) 1466 1467 STRICT_CAST = True 1468 1469 PREFIXED_PIVOT_COLUMNS = False 1470 IDENTIFY_PIVOT_STRINGS = False 1471 1472 LOG_DEFAULTS_TO_LN = False 1473 1474 # Whether the table sample clause expects CSV syntax 1475 TABLESAMPLE_CSV = False 1476 1477 # The default method used for table sampling 1478 DEFAULT_SAMPLING_METHOD: t.Optional[str] = None 1479 1480 # Whether the SET command needs a delimiter (e.g. "=") for assignments 1481 SET_REQUIRES_ASSIGNMENT_DELIMITER = True 1482 1483 # Whether the TRIM function expects the characters to trim as its first argument 1484 TRIM_PATTERN_FIRST = False 1485 1486 # Whether string aliases are supported `SELECT COUNT(*) 'count'` 1487 STRING_ALIASES = False 1488 1489 # Whether query modifiers such as LIMIT are attached to the UNION node (vs its right operand) 1490 MODIFIERS_ATTACHED_TO_SET_OP = True 1491 SET_OP_MODIFIERS = {"order", "limit", "offset"} 1492 1493 # Whether to parse IF statements that aren't followed by a left parenthesis as commands 1494 NO_PAREN_IF_COMMANDS = True 1495 1496 # Whether the -> and ->> operators expect documents of type JSON (e.g. Postgres) 1497 JSON_ARROWS_REQUIRE_JSON_TYPE = False 1498 1499 # Whether the `:` operator is used to extract a value from a VARIANT column 1500 COLON_IS_VARIANT_EXTRACT = False 1501 1502 # Whether or not a VALUES keyword needs to be followed by '(' to form a VALUES clause. 1503 # If this is True and '(' is not found, the keyword will be treated as an identifier 1504 VALUES_FOLLOWED_BY_PAREN = True 1505 1506 # Whether implicit unnesting is supported, e.g. SELECT 1 FROM y.z AS z, z.a (Redshift) 1507 SUPPORTS_IMPLICIT_UNNEST = False 1508 1509 # Whether or not interval spans are supported, INTERVAL 1 YEAR TO MONTHS 1510 INTERVAL_SPANS = True 1511 1512 # Whether a PARTITION clause can follow a table reference 1513 SUPPORTS_PARTITION_SELECTION = False 1514 1515 # Whether the `name AS expr` schema/column constraint requires parentheses around `expr` 1516 WRAPPED_TRANSFORM_COLUMN_CONSTRAINT = True 1517 1518 # Whether the 'AS' keyword is optional in the CTE definition syntax 1519 OPTIONAL_ALIAS_TOKEN_CTE = True 1520 1521 # Whether renaming a column with an ALTER statement requires the presence of the COLUMN keyword 1522 ALTER_RENAME_REQUIRES_COLUMN = True 1523 1524 # Whether all join types have the same precedence, i.e., they "naturally" produce a left-deep tree. 1525 # In standard SQL, joins that use the JOIN keyword take higher precedence than comma-joins. That is 1526 # to say, JOIN operators happen before comma operators. This is not the case in some dialects, such 1527 # as BigQuery, where all joins have the same precedence. 1528 JOINS_HAVE_EQUAL_PRECEDENCE = False 1529 1530 # Whether TIMESTAMP <literal> can produce a zone-aware timestamp 1531 ZONE_AWARE_TIMESTAMP_CONSTRUCTOR = False 1532 1533 # Whether map literals support arbitrary expressions as keys. 1534 # When True, allows complex keys like arrays or literals: {[1, 2]: 3}, {1: 2} (e.g. DuckDB). 1535 # When False, keys are typically restricted to identifiers. 1536 MAP_KEYS_ARE_ARBITRARY_EXPRESSIONS = False 1537 1538 # Whether JSON_EXTRACT requires a JSON expression as the first argument, e.g this 1539 # is true for Snowflake but not for BigQuery which can also process strings 1540 JSON_EXTRACT_REQUIRES_JSON_EXPRESSION = False 1541 1542 __slots__ = ( 1543 "error_level", 1544 "error_message_context", 1545 "max_errors", 1546 "dialect", 1547 "sql", 1548 "errors", 1549 "_tokens", 1550 "_index", 1551 "_curr", 1552 "_next", 1553 "_prev", 1554 "_prev_comments", 1555 "_pipe_cte_counter", 1556 ) 1557 1558 # Autofilled 1559 SHOW_TRIE: t.Dict = {} 1560 SET_TRIE: t.Dict = {} 1561 1562 def __init__( 1563 self, 1564 error_level: t.Optional[ErrorLevel] = None, 1565 error_message_context: int = 100, 1566 max_errors: int = 3, 1567 dialect: DialectType = None, 1568 ): 1569 from sqlglot.dialects import Dialect 1570 1571 self.error_level = error_level or ErrorLevel.IMMEDIATE 1572 self.error_message_context = error_message_context 1573 self.max_errors = max_errors 1574 self.dialect = Dialect.get_or_raise(dialect) 1575 self.reset() 1576 1577 def reset(self): 1578 self.sql = "" 1579 self.errors = [] 1580 self._tokens = [] 1581 self._index = 0 1582 self._curr = None 1583 self._next = None 1584 self._prev = None 1585 self._prev_comments = None 1586 self._pipe_cte_counter = 0 1587 1588 def parse( 1589 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1590 ) -> t.List[t.Optional[exp.Expression]]: 1591 """ 1592 Parses a list of tokens and returns a list of syntax trees, one tree 1593 per parsed SQL statement. 1594 1595 Args: 1596 raw_tokens: The list of tokens. 1597 sql: The original SQL string, used to produce helpful debug messages. 1598 1599 Returns: 1600 The list of the produced syntax trees. 1601 """ 1602 return self._parse( 1603 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1604 ) 1605 1606 def parse_into( 1607 self, 1608 expression_types: exp.IntoType, 1609 raw_tokens: t.List[Token], 1610 sql: t.Optional[str] = None, 1611 ) -> t.List[t.Optional[exp.Expression]]: 1612 """ 1613 Parses a list of tokens into a given Expression type. If a collection of Expression 1614 types is given instead, this method will try to parse the token list into each one 1615 of them, stopping at the first for which the parsing succeeds. 1616 1617 Args: 1618 expression_types: The expression type(s) to try and parse the token list into. 1619 raw_tokens: The list of tokens. 1620 sql: The original SQL string, used to produce helpful debug messages. 1621 1622 Returns: 1623 The target Expression. 1624 """ 1625 errors = [] 1626 for expression_type in ensure_list(expression_types): 1627 parser = self.EXPRESSION_PARSERS.get(expression_type) 1628 if not parser: 1629 raise TypeError(f"No parser registered for {expression_type}") 1630 1631 try: 1632 return self._parse(parser, raw_tokens, sql) 1633 except ParseError as e: 1634 e.errors[0]["into_expression"] = expression_type 1635 errors.append(e) 1636 1637 raise ParseError( 1638 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1639 errors=merge_errors(errors), 1640 ) from errors[-1] 1641 1642 def _parse( 1643 self, 1644 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 1645 raw_tokens: t.List[Token], 1646 sql: t.Optional[str] = None, 1647 ) -> t.List[t.Optional[exp.Expression]]: 1648 self.reset() 1649 self.sql = sql or "" 1650 1651 total = len(raw_tokens) 1652 chunks: t.List[t.List[Token]] = [[]] 1653 1654 for i, token in enumerate(raw_tokens): 1655 if token.token_type == TokenType.SEMICOLON: 1656 if token.comments: 1657 chunks.append([token]) 1658 1659 if i < total - 1: 1660 chunks.append([]) 1661 else: 1662 chunks[-1].append(token) 1663 1664 expressions = [] 1665 1666 for tokens in chunks: 1667 self._index = -1 1668 self._tokens = tokens 1669 self._advance() 1670 1671 expressions.append(parse_method(self)) 1672 1673 if self._index < len(self._tokens): 1674 self.raise_error("Invalid expression / Unexpected token") 1675 1676 self.check_errors() 1677 1678 return expressions 1679 1680 def check_errors(self) -> None: 1681 """Logs or raises any found errors, depending on the chosen error level setting.""" 1682 if self.error_level == ErrorLevel.WARN: 1683 for error in self.errors: 1684 logger.error(str(error)) 1685 elif self.error_level == ErrorLevel.RAISE and self.errors: 1686 raise ParseError( 1687 concat_messages(self.errors, self.max_errors), 1688 errors=merge_errors(self.errors), 1689 ) 1690 1691 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1692 """ 1693 Appends an error in the list of recorded errors or raises it, depending on the chosen 1694 error level setting. 1695 """ 1696 token = token or self._curr or self._prev or Token.string("") 1697 start = token.start 1698 end = token.end + 1 1699 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1700 highlight = self.sql[start:end] 1701 end_context = self.sql[end : end + self.error_message_context] 1702 1703 error = ParseError.new( 1704 f"{message}. Line {token.line}, Col: {token.col}.\n" 1705 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1706 description=message, 1707 line=token.line, 1708 col=token.col, 1709 start_context=start_context, 1710 highlight=highlight, 1711 end_context=end_context, 1712 ) 1713 1714 if self.error_level == ErrorLevel.IMMEDIATE: 1715 raise error 1716 1717 self.errors.append(error) 1718 1719 def expression( 1720 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1721 ) -> E: 1722 """ 1723 Creates a new, validated Expression. 1724 1725 Args: 1726 exp_class: The expression class to instantiate. 1727 comments: An optional list of comments to attach to the expression. 1728 kwargs: The arguments to set for the expression along with their respective values. 1729 1730 Returns: 1731 The target expression. 1732 """ 1733 instance = exp_class(**kwargs) 1734 instance.add_comments(comments) if comments else self._add_comments(instance) 1735 return self.validate_expression(instance) 1736 1737 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1738 if expression and self._prev_comments: 1739 expression.add_comments(self._prev_comments) 1740 self._prev_comments = None 1741 1742 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1743 """ 1744 Validates an Expression, making sure that all its mandatory arguments are set. 1745 1746 Args: 1747 expression: The expression to validate. 1748 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1749 1750 Returns: 1751 The validated expression. 1752 """ 1753 if self.error_level != ErrorLevel.IGNORE: 1754 for error_message in expression.error_messages(args): 1755 self.raise_error(error_message) 1756 1757 return expression 1758 1759 def _find_sql(self, start: Token, end: Token) -> str: 1760 return self.sql[start.start : end.end + 1] 1761 1762 def _is_connected(self) -> bool: 1763 return self._prev and self._curr and self._prev.end + 1 == self._curr.start 1764 1765 def _advance(self, times: int = 1) -> None: 1766 self._index += times 1767 self._curr = seq_get(self._tokens, self._index) 1768 self._next = seq_get(self._tokens, self._index + 1) 1769 1770 if self._index > 0: 1771 self._prev = self._tokens[self._index - 1] 1772 self._prev_comments = self._prev.comments 1773 else: 1774 self._prev = None 1775 self._prev_comments = None 1776 1777 def _retreat(self, index: int) -> None: 1778 if index != self._index: 1779 self._advance(index - self._index) 1780 1781 def _warn_unsupported(self) -> None: 1782 if len(self._tokens) <= 1: 1783 return 1784 1785 # We use _find_sql because self.sql may comprise multiple chunks, and we're only 1786 # interested in emitting a warning for the one being currently processed. 1787 sql = self._find_sql(self._tokens[0], self._tokens[-1])[: self.error_message_context] 1788 1789 logger.warning( 1790 f"'{sql}' contains unsupported syntax. Falling back to parsing as a 'Command'." 1791 ) 1792 1793 def _parse_command(self) -> exp.Command: 1794 self._warn_unsupported() 1795 return self.expression( 1796 exp.Command, 1797 comments=self._prev_comments, 1798 this=self._prev.text.upper(), 1799 expression=self._parse_string(), 1800 ) 1801 1802 def _try_parse(self, parse_method: t.Callable[[], T], retreat: bool = False) -> t.Optional[T]: 1803 """ 1804 Attemps to backtrack if a parse function that contains a try/catch internally raises an error. 1805 This behavior can be different depending on the uset-set ErrorLevel, so _try_parse aims to 1806 solve this by setting & resetting the parser state accordingly 1807 """ 1808 index = self._index 1809 error_level = self.error_level 1810 1811 self.error_level = ErrorLevel.IMMEDIATE 1812 try: 1813 this = parse_method() 1814 except ParseError: 1815 this = None 1816 finally: 1817 if not this or retreat: 1818 self._retreat(index) 1819 self.error_level = error_level 1820 1821 return this 1822 1823 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1824 start = self._prev 1825 exists = self._parse_exists() if allow_exists else None 1826 1827 self._match(TokenType.ON) 1828 1829 materialized = self._match_text_seq("MATERIALIZED") 1830 kind = self._match_set(self.CREATABLES) and self._prev 1831 if not kind: 1832 return self._parse_as_command(start) 1833 1834 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1835 this = self._parse_user_defined_function(kind=kind.token_type) 1836 elif kind.token_type == TokenType.TABLE: 1837 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1838 elif kind.token_type == TokenType.COLUMN: 1839 this = self._parse_column() 1840 else: 1841 this = self._parse_id_var() 1842 1843 self._match(TokenType.IS) 1844 1845 return self.expression( 1846 exp.Comment, 1847 this=this, 1848 kind=kind.text, 1849 expression=self._parse_string(), 1850 exists=exists, 1851 materialized=materialized, 1852 ) 1853 1854 def _parse_to_table( 1855 self, 1856 ) -> exp.ToTableProperty: 1857 table = self._parse_table_parts(schema=True) 1858 return self.expression(exp.ToTableProperty, this=table) 1859 1860 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1861 def _parse_ttl(self) -> exp.Expression: 1862 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1863 this = self._parse_bitwise() 1864 1865 if self._match_text_seq("DELETE"): 1866 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1867 if self._match_text_seq("RECOMPRESS"): 1868 return self.expression( 1869 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1870 ) 1871 if self._match_text_seq("TO", "DISK"): 1872 return self.expression( 1873 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1874 ) 1875 if self._match_text_seq("TO", "VOLUME"): 1876 return self.expression( 1877 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1878 ) 1879 1880 return this 1881 1882 expressions = self._parse_csv(_parse_ttl_action) 1883 where = self._parse_where() 1884 group = self._parse_group() 1885 1886 aggregates = None 1887 if group and self._match(TokenType.SET): 1888 aggregates = self._parse_csv(self._parse_set_item) 1889 1890 return self.expression( 1891 exp.MergeTreeTTL, 1892 expressions=expressions, 1893 where=where, 1894 group=group, 1895 aggregates=aggregates, 1896 ) 1897 1898 def _parse_statement(self) -> t.Optional[exp.Expression]: 1899 if self._curr is None: 1900 return None 1901 1902 if self._match_set(self.STATEMENT_PARSERS): 1903 comments = self._prev_comments 1904 stmt = self.STATEMENT_PARSERS[self._prev.token_type](self) 1905 stmt.add_comments(comments, prepend=True) 1906 return stmt 1907 1908 if self._match_set(self.dialect.tokenizer_class.COMMANDS): 1909 return self._parse_command() 1910 1911 expression = self._parse_expression() 1912 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1913 return self._parse_query_modifiers(expression) 1914 1915 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1916 start = self._prev 1917 temporary = self._match(TokenType.TEMPORARY) 1918 materialized = self._match_text_seq("MATERIALIZED") 1919 1920 kind = self._match_set(self.CREATABLES) and self._prev.text.upper() 1921 if not kind: 1922 return self._parse_as_command(start) 1923 1924 concurrently = self._match_text_seq("CONCURRENTLY") 1925 if_exists = exists or self._parse_exists() 1926 1927 if kind == "COLUMN": 1928 this = self._parse_column() 1929 else: 1930 this = self._parse_table_parts( 1931 schema=True, is_db_reference=self._prev.token_type == TokenType.SCHEMA 1932 ) 1933 1934 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 1935 1936 if self._match(TokenType.L_PAREN, advance=False): 1937 expressions = self._parse_wrapped_csv(self._parse_types) 1938 else: 1939 expressions = None 1940 1941 return self.expression( 1942 exp.Drop, 1943 exists=if_exists, 1944 this=this, 1945 expressions=expressions, 1946 kind=self.dialect.CREATABLE_KIND_MAPPING.get(kind) or kind, 1947 temporary=temporary, 1948 materialized=materialized, 1949 cascade=self._match_text_seq("CASCADE"), 1950 constraints=self._match_text_seq("CONSTRAINTS"), 1951 purge=self._match_text_seq("PURGE"), 1952 cluster=cluster, 1953 concurrently=concurrently, 1954 ) 1955 1956 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1957 return ( 1958 self._match_text_seq("IF") 1959 and (not not_ or self._match(TokenType.NOT)) 1960 and self._match(TokenType.EXISTS) 1961 ) 1962 1963 def _parse_create(self) -> exp.Create | exp.Command: 1964 # Note: this can't be None because we've matched a statement parser 1965 start = self._prev 1966 1967 replace = ( 1968 start.token_type == TokenType.REPLACE 1969 or self._match_pair(TokenType.OR, TokenType.REPLACE) 1970 or self._match_pair(TokenType.OR, TokenType.ALTER) 1971 ) 1972 refresh = self._match_pair(TokenType.OR, TokenType.REFRESH) 1973 1974 unique = self._match(TokenType.UNIQUE) 1975 1976 if self._match_text_seq("CLUSTERED", "COLUMNSTORE"): 1977 clustered = True 1978 elif self._match_text_seq("NONCLUSTERED", "COLUMNSTORE") or self._match_text_seq( 1979 "COLUMNSTORE" 1980 ): 1981 clustered = False 1982 else: 1983 clustered = None 1984 1985 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1986 self._advance() 1987 1988 properties = None 1989 create_token = self._match_set(self.CREATABLES) and self._prev 1990 1991 if not create_token: 1992 # exp.Properties.Location.POST_CREATE 1993 properties = self._parse_properties() 1994 create_token = self._match_set(self.CREATABLES) and self._prev 1995 1996 if not properties or not create_token: 1997 return self._parse_as_command(start) 1998 1999 concurrently = self._match_text_seq("CONCURRENTLY") 2000 exists = self._parse_exists(not_=True) 2001 this = None 2002 expression: t.Optional[exp.Expression] = None 2003 indexes = None 2004 no_schema_binding = None 2005 begin = None 2006 end = None 2007 clone = None 2008 2009 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 2010 nonlocal properties 2011 if properties and temp_props: 2012 properties.expressions.extend(temp_props.expressions) 2013 elif temp_props: 2014 properties = temp_props 2015 2016 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 2017 this = self._parse_user_defined_function(kind=create_token.token_type) 2018 2019 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 2020 extend_props(self._parse_properties()) 2021 2022 expression = self._match(TokenType.ALIAS) and self._parse_heredoc() 2023 extend_props(self._parse_properties()) 2024 2025 if not expression: 2026 if self._match(TokenType.COMMAND): 2027 expression = self._parse_as_command(self._prev) 2028 else: 2029 begin = self._match(TokenType.BEGIN) 2030 return_ = self._match_text_seq("RETURN") 2031 2032 if self._match(TokenType.STRING, advance=False): 2033 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 2034 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 2035 expression = self._parse_string() 2036 extend_props(self._parse_properties()) 2037 else: 2038 expression = self._parse_user_defined_function_expression() 2039 2040 end = self._match_text_seq("END") 2041 2042 if return_: 2043 expression = self.expression(exp.Return, this=expression) 2044 elif create_token.token_type == TokenType.INDEX: 2045 # Postgres allows anonymous indexes, eg. CREATE INDEX IF NOT EXISTS ON t(c) 2046 if not self._match(TokenType.ON): 2047 index = self._parse_id_var() 2048 anonymous = False 2049 else: 2050 index = None 2051 anonymous = True 2052 2053 this = self._parse_index(index=index, anonymous=anonymous) 2054 elif create_token.token_type in self.DB_CREATABLES: 2055 table_parts = self._parse_table_parts( 2056 schema=True, is_db_reference=create_token.token_type == TokenType.SCHEMA 2057 ) 2058 2059 # exp.Properties.Location.POST_NAME 2060 self._match(TokenType.COMMA) 2061 extend_props(self._parse_properties(before=True)) 2062 2063 this = self._parse_schema(this=table_parts) 2064 2065 # exp.Properties.Location.POST_SCHEMA and POST_WITH 2066 extend_props(self._parse_properties()) 2067 2068 has_alias = self._match(TokenType.ALIAS) 2069 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 2070 # exp.Properties.Location.POST_ALIAS 2071 extend_props(self._parse_properties()) 2072 2073 if create_token.token_type == TokenType.SEQUENCE: 2074 expression = self._parse_types() 2075 extend_props(self._parse_properties()) 2076 else: 2077 expression = self._parse_ddl_select() 2078 2079 # Some dialects also support using a table as an alias instead of a SELECT. 2080 # Here we fallback to this as an alternative. 2081 if not expression and has_alias: 2082 expression = self._try_parse(self._parse_table_parts) 2083 2084 if create_token.token_type == TokenType.TABLE: 2085 # exp.Properties.Location.POST_EXPRESSION 2086 extend_props(self._parse_properties()) 2087 2088 indexes = [] 2089 while True: 2090 index = self._parse_index() 2091 2092 # exp.Properties.Location.POST_INDEX 2093 extend_props(self._parse_properties()) 2094 if not index: 2095 break 2096 else: 2097 self._match(TokenType.COMMA) 2098 indexes.append(index) 2099 elif create_token.token_type == TokenType.VIEW: 2100 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 2101 no_schema_binding = True 2102 elif create_token.token_type in (TokenType.SINK, TokenType.SOURCE): 2103 extend_props(self._parse_properties()) 2104 2105 shallow = self._match_text_seq("SHALLOW") 2106 2107 if self._match_texts(self.CLONE_KEYWORDS): 2108 copy = self._prev.text.lower() == "copy" 2109 clone = self.expression( 2110 exp.Clone, this=self._parse_table(schema=True), shallow=shallow, copy=copy 2111 ) 2112 2113 if self._curr and not self._match_set((TokenType.R_PAREN, TokenType.COMMA), advance=False): 2114 return self._parse_as_command(start) 2115 2116 create_kind_text = create_token.text.upper() 2117 return self.expression( 2118 exp.Create, 2119 this=this, 2120 kind=self.dialect.CREATABLE_KIND_MAPPING.get(create_kind_text) or create_kind_text, 2121 replace=replace, 2122 refresh=refresh, 2123 unique=unique, 2124 expression=expression, 2125 exists=exists, 2126 properties=properties, 2127 indexes=indexes, 2128 no_schema_binding=no_schema_binding, 2129 begin=begin, 2130 end=end, 2131 clone=clone, 2132 concurrently=concurrently, 2133 clustered=clustered, 2134 ) 2135 2136 def _parse_sequence_properties(self) -> t.Optional[exp.SequenceProperties]: 2137 seq = exp.SequenceProperties() 2138 2139 options = [] 2140 index = self._index 2141 2142 while self._curr: 2143 self._match(TokenType.COMMA) 2144 if self._match_text_seq("INCREMENT"): 2145 self._match_text_seq("BY") 2146 self._match_text_seq("=") 2147 seq.set("increment", self._parse_term()) 2148 elif self._match_text_seq("MINVALUE"): 2149 seq.set("minvalue", self._parse_term()) 2150 elif self._match_text_seq("MAXVALUE"): 2151 seq.set("maxvalue", self._parse_term()) 2152 elif self._match(TokenType.START_WITH) or self._match_text_seq("START"): 2153 self._match_text_seq("=") 2154 seq.set("start", self._parse_term()) 2155 elif self._match_text_seq("CACHE"): 2156 # T-SQL allows empty CACHE which is initialized dynamically 2157 seq.set("cache", self._parse_number() or True) 2158 elif self._match_text_seq("OWNED", "BY"): 2159 # "OWNED BY NONE" is the default 2160 seq.set("owned", None if self._match_text_seq("NONE") else self._parse_column()) 2161 else: 2162 opt = self._parse_var_from_options(self.CREATE_SEQUENCE, raise_unmatched=False) 2163 if opt: 2164 options.append(opt) 2165 else: 2166 break 2167 2168 seq.set("options", options if options else None) 2169 return None if self._index == index else seq 2170 2171 def _parse_property_before(self) -> t.Optional[exp.Expression]: 2172 # only used for teradata currently 2173 self._match(TokenType.COMMA) 2174 2175 kwargs = { 2176 "no": self._match_text_seq("NO"), 2177 "dual": self._match_text_seq("DUAL"), 2178 "before": self._match_text_seq("BEFORE"), 2179 "default": self._match_text_seq("DEFAULT"), 2180 "local": (self._match_text_seq("LOCAL") and "LOCAL") 2181 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 2182 "after": self._match_text_seq("AFTER"), 2183 "minimum": self._match_texts(("MIN", "MINIMUM")), 2184 "maximum": self._match_texts(("MAX", "MAXIMUM")), 2185 } 2186 2187 if self._match_texts(self.PROPERTY_PARSERS): 2188 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 2189 try: 2190 return parser(self, **{k: v for k, v in kwargs.items() if v}) 2191 except TypeError: 2192 self.raise_error(f"Cannot parse property '{self._prev.text}'") 2193 2194 return None 2195 2196 def _parse_wrapped_properties(self) -> t.List[exp.Expression]: 2197 return self._parse_wrapped_csv(self._parse_property) 2198 2199 def _parse_property(self) -> t.Optional[exp.Expression]: 2200 if self._match_texts(self.PROPERTY_PARSERS): 2201 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 2202 2203 if self._match(TokenType.DEFAULT) and self._match_texts(self.PROPERTY_PARSERS): 2204 return self.PROPERTY_PARSERS[self._prev.text.upper()](self, default=True) 2205 2206 if self._match_text_seq("COMPOUND", "SORTKEY"): 2207 return self._parse_sortkey(compound=True) 2208 2209 if self._match_text_seq("SQL", "SECURITY"): 2210 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 2211 2212 index = self._index 2213 key = self._parse_column() 2214 2215 if not self._match(TokenType.EQ): 2216 self._retreat(index) 2217 return self._parse_sequence_properties() 2218 2219 # Transform the key to exp.Dot if it's dotted identifiers wrapped in exp.Column or to exp.Var otherwise 2220 if isinstance(key, exp.Column): 2221 key = key.to_dot() if len(key.parts) > 1 else exp.var(key.name) 2222 2223 value = self._parse_bitwise() or self._parse_var(any_token=True) 2224 2225 # Transform the value to exp.Var if it was parsed as exp.Column(exp.Identifier()) 2226 if isinstance(value, exp.Column): 2227 value = exp.var(value.name) 2228 2229 return self.expression(exp.Property, this=key, value=value) 2230 2231 def _parse_stored(self) -> t.Union[exp.FileFormatProperty, exp.StorageHandlerProperty]: 2232 if self._match_text_seq("BY"): 2233 return self.expression(exp.StorageHandlerProperty, this=self._parse_var_or_string()) 2234 2235 self._match(TokenType.ALIAS) 2236 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 2237 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 2238 2239 return self.expression( 2240 exp.FileFormatProperty, 2241 this=( 2242 self.expression( 2243 exp.InputOutputFormat, 2244 input_format=input_format, 2245 output_format=output_format, 2246 ) 2247 if input_format or output_format 2248 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var() 2249 ), 2250 hive_format=True, 2251 ) 2252 2253 def _parse_unquoted_field(self) -> t.Optional[exp.Expression]: 2254 field = self._parse_field() 2255 if isinstance(field, exp.Identifier) and not field.quoted: 2256 field = exp.var(field) 2257 2258 return field 2259 2260 def _parse_property_assignment(self, exp_class: t.Type[E], **kwargs: t.Any) -> E: 2261 self._match(TokenType.EQ) 2262 self._match(TokenType.ALIAS) 2263 2264 return self.expression(exp_class, this=self._parse_unquoted_field(), **kwargs) 2265 2266 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 2267 properties = [] 2268 while True: 2269 if before: 2270 prop = self._parse_property_before() 2271 else: 2272 prop = self._parse_property() 2273 if not prop: 2274 break 2275 for p in ensure_list(prop): 2276 properties.append(p) 2277 2278 if properties: 2279 return self.expression(exp.Properties, expressions=properties) 2280 2281 return None 2282 2283 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 2284 return self.expression( 2285 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 2286 ) 2287 2288 def _parse_security(self) -> t.Optional[exp.SecurityProperty]: 2289 if self._match_texts(("NONE", "DEFINER", "INVOKER")): 2290 security_specifier = self._prev.text.upper() 2291 return self.expression(exp.SecurityProperty, this=security_specifier) 2292 return None 2293 2294 def _parse_settings_property(self) -> exp.SettingsProperty: 2295 return self.expression( 2296 exp.SettingsProperty, expressions=self._parse_csv(self._parse_assignment) 2297 ) 2298 2299 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 2300 if self._index >= 2: 2301 pre_volatile_token = self._tokens[self._index - 2] 2302 else: 2303 pre_volatile_token = None 2304 2305 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 2306 return exp.VolatileProperty() 2307 2308 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 2309 2310 def _parse_retention_period(self) -> exp.Var: 2311 # Parse TSQL's HISTORY_RETENTION_PERIOD: {INFINITE | <number> DAY | DAYS | MONTH ...} 2312 number = self._parse_number() 2313 number_str = f"{number} " if number else "" 2314 unit = self._parse_var(any_token=True) 2315 return exp.var(f"{number_str}{unit}") 2316 2317 def _parse_system_versioning_property( 2318 self, with_: bool = False 2319 ) -> exp.WithSystemVersioningProperty: 2320 self._match(TokenType.EQ) 2321 prop = self.expression( 2322 exp.WithSystemVersioningProperty, 2323 **{ # type: ignore 2324 "on": True, 2325 "with": with_, 2326 }, 2327 ) 2328 2329 if self._match_text_seq("OFF"): 2330 prop.set("on", False) 2331 return prop 2332 2333 self._match(TokenType.ON) 2334 if self._match(TokenType.L_PAREN): 2335 while self._curr and not self._match(TokenType.R_PAREN): 2336 if self._match_text_seq("HISTORY_TABLE", "="): 2337 prop.set("this", self._parse_table_parts()) 2338 elif self._match_text_seq("DATA_CONSISTENCY_CHECK", "="): 2339 prop.set("data_consistency", self._advance_any() and self._prev.text.upper()) 2340 elif self._match_text_seq("HISTORY_RETENTION_PERIOD", "="): 2341 prop.set("retention_period", self._parse_retention_period()) 2342 2343 self._match(TokenType.COMMA) 2344 2345 return prop 2346 2347 def _parse_data_deletion_property(self) -> exp.DataDeletionProperty: 2348 self._match(TokenType.EQ) 2349 on = self._match_text_seq("ON") or not self._match_text_seq("OFF") 2350 prop = self.expression(exp.DataDeletionProperty, on=on) 2351 2352 if self._match(TokenType.L_PAREN): 2353 while self._curr and not self._match(TokenType.R_PAREN): 2354 if self._match_text_seq("FILTER_COLUMN", "="): 2355 prop.set("filter_column", self._parse_column()) 2356 elif self._match_text_seq("RETENTION_PERIOD", "="): 2357 prop.set("retention_period", self._parse_retention_period()) 2358 2359 self._match(TokenType.COMMA) 2360 2361 return prop 2362 2363 def _parse_distributed_property(self) -> exp.DistributedByProperty: 2364 kind = "HASH" 2365 expressions: t.Optional[t.List[exp.Expression]] = None 2366 if self._match_text_seq("BY", "HASH"): 2367 expressions = self._parse_wrapped_csv(self._parse_id_var) 2368 elif self._match_text_seq("BY", "RANDOM"): 2369 kind = "RANDOM" 2370 2371 # If the BUCKETS keyword is not present, the number of buckets is AUTO 2372 buckets: t.Optional[exp.Expression] = None 2373 if self._match_text_seq("BUCKETS") and not self._match_text_seq("AUTO"): 2374 buckets = self._parse_number() 2375 2376 return self.expression( 2377 exp.DistributedByProperty, 2378 expressions=expressions, 2379 kind=kind, 2380 buckets=buckets, 2381 order=self._parse_order(), 2382 ) 2383 2384 def _parse_composite_key_property(self, expr_type: t.Type[E]) -> E: 2385 self._match_text_seq("KEY") 2386 expressions = self._parse_wrapped_id_vars() 2387 return self.expression(expr_type, expressions=expressions) 2388 2389 def _parse_with_property(self) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 2390 if self._match_text_seq("(", "SYSTEM_VERSIONING"): 2391 prop = self._parse_system_versioning_property(with_=True) 2392 self._match_r_paren() 2393 return prop 2394 2395 if self._match(TokenType.L_PAREN, advance=False): 2396 return self._parse_wrapped_properties() 2397 2398 if self._match_text_seq("JOURNAL"): 2399 return self._parse_withjournaltable() 2400 2401 if self._match_texts(self.VIEW_ATTRIBUTES): 2402 return self.expression(exp.ViewAttributeProperty, this=self._prev.text.upper()) 2403 2404 if self._match_text_seq("DATA"): 2405 return self._parse_withdata(no=False) 2406 elif self._match_text_seq("NO", "DATA"): 2407 return self._parse_withdata(no=True) 2408 2409 if self._match(TokenType.SERDE_PROPERTIES, advance=False): 2410 return self._parse_serde_properties(with_=True) 2411 2412 if self._match(TokenType.SCHEMA): 2413 return self.expression( 2414 exp.WithSchemaBindingProperty, 2415 this=self._parse_var_from_options(self.SCHEMA_BINDING_OPTIONS), 2416 ) 2417 2418 if self._match_texts(self.PROCEDURE_OPTIONS, advance=False): 2419 return self.expression( 2420 exp.WithProcedureOptions, expressions=self._parse_csv(self._parse_procedure_option) 2421 ) 2422 2423 if not self._next: 2424 return None 2425 2426 return self._parse_withisolatedloading() 2427 2428 def _parse_procedure_option(self) -> exp.Expression | None: 2429 if self._match_text_seq("EXECUTE", "AS"): 2430 return self.expression( 2431 exp.ExecuteAsProperty, 2432 this=self._parse_var_from_options(self.EXECUTE_AS_OPTIONS, raise_unmatched=False) 2433 or self._parse_string(), 2434 ) 2435 2436 return self._parse_var_from_options(self.PROCEDURE_OPTIONS) 2437 2438 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 2439 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 2440 self._match(TokenType.EQ) 2441 2442 user = self._parse_id_var() 2443 self._match(TokenType.PARAMETER) 2444 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 2445 2446 if not user or not host: 2447 return None 2448 2449 return exp.DefinerProperty(this=f"{user}@{host}") 2450 2451 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 2452 self._match(TokenType.TABLE) 2453 self._match(TokenType.EQ) 2454 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 2455 2456 def _parse_log(self, no: bool = False) -> exp.LogProperty: 2457 return self.expression(exp.LogProperty, no=no) 2458 2459 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 2460 return self.expression(exp.JournalProperty, **kwargs) 2461 2462 def _parse_checksum(self) -> exp.ChecksumProperty: 2463 self._match(TokenType.EQ) 2464 2465 on = None 2466 if self._match(TokenType.ON): 2467 on = True 2468 elif self._match_text_seq("OFF"): 2469 on = False 2470 2471 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 2472 2473 def _parse_cluster(self, wrapped: bool = False) -> exp.Cluster: 2474 return self.expression( 2475 exp.Cluster, 2476 expressions=( 2477 self._parse_wrapped_csv(self._parse_ordered) 2478 if wrapped 2479 else self._parse_csv(self._parse_ordered) 2480 ), 2481 ) 2482 2483 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 2484 self._match_text_seq("BY") 2485 2486 self._match_l_paren() 2487 expressions = self._parse_csv(self._parse_column) 2488 self._match_r_paren() 2489 2490 if self._match_text_seq("SORTED", "BY"): 2491 self._match_l_paren() 2492 sorted_by = self._parse_csv(self._parse_ordered) 2493 self._match_r_paren() 2494 else: 2495 sorted_by = None 2496 2497 self._match(TokenType.INTO) 2498 buckets = self._parse_number() 2499 self._match_text_seq("BUCKETS") 2500 2501 return self.expression( 2502 exp.ClusteredByProperty, 2503 expressions=expressions, 2504 sorted_by=sorted_by, 2505 buckets=buckets, 2506 ) 2507 2508 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 2509 if not self._match_text_seq("GRANTS"): 2510 self._retreat(self._index - 1) 2511 return None 2512 2513 return self.expression(exp.CopyGrantsProperty) 2514 2515 def _parse_freespace(self) -> exp.FreespaceProperty: 2516 self._match(TokenType.EQ) 2517 return self.expression( 2518 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 2519 ) 2520 2521 def _parse_mergeblockratio( 2522 self, no: bool = False, default: bool = False 2523 ) -> exp.MergeBlockRatioProperty: 2524 if self._match(TokenType.EQ): 2525 return self.expression( 2526 exp.MergeBlockRatioProperty, 2527 this=self._parse_number(), 2528 percent=self._match(TokenType.PERCENT), 2529 ) 2530 2531 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 2532 2533 def _parse_datablocksize( 2534 self, 2535 default: t.Optional[bool] = None, 2536 minimum: t.Optional[bool] = None, 2537 maximum: t.Optional[bool] = None, 2538 ) -> exp.DataBlocksizeProperty: 2539 self._match(TokenType.EQ) 2540 size = self._parse_number() 2541 2542 units = None 2543 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 2544 units = self._prev.text 2545 2546 return self.expression( 2547 exp.DataBlocksizeProperty, 2548 size=size, 2549 units=units, 2550 default=default, 2551 minimum=minimum, 2552 maximum=maximum, 2553 ) 2554 2555 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 2556 self._match(TokenType.EQ) 2557 always = self._match_text_seq("ALWAYS") 2558 manual = self._match_text_seq("MANUAL") 2559 never = self._match_text_seq("NEVER") 2560 default = self._match_text_seq("DEFAULT") 2561 2562 autotemp = None 2563 if self._match_text_seq("AUTOTEMP"): 2564 autotemp = self._parse_schema() 2565 2566 return self.expression( 2567 exp.BlockCompressionProperty, 2568 always=always, 2569 manual=manual, 2570 never=never, 2571 default=default, 2572 autotemp=autotemp, 2573 ) 2574 2575 def _parse_withisolatedloading(self) -> t.Optional[exp.IsolatedLoadingProperty]: 2576 index = self._index 2577 no = self._match_text_seq("NO") 2578 concurrent = self._match_text_seq("CONCURRENT") 2579 2580 if not self._match_text_seq("ISOLATED", "LOADING"): 2581 self._retreat(index) 2582 return None 2583 2584 target = self._parse_var_from_options(self.ISOLATED_LOADING_OPTIONS, raise_unmatched=False) 2585 return self.expression( 2586 exp.IsolatedLoadingProperty, no=no, concurrent=concurrent, target=target 2587 ) 2588 2589 def _parse_locking(self) -> exp.LockingProperty: 2590 if self._match(TokenType.TABLE): 2591 kind = "TABLE" 2592 elif self._match(TokenType.VIEW): 2593 kind = "VIEW" 2594 elif self._match(TokenType.ROW): 2595 kind = "ROW" 2596 elif self._match_text_seq("DATABASE"): 2597 kind = "DATABASE" 2598 else: 2599 kind = None 2600 2601 if kind in ("DATABASE", "TABLE", "VIEW"): 2602 this = self._parse_table_parts() 2603 else: 2604 this = None 2605 2606 if self._match(TokenType.FOR): 2607 for_or_in = "FOR" 2608 elif self._match(TokenType.IN): 2609 for_or_in = "IN" 2610 else: 2611 for_or_in = None 2612 2613 if self._match_text_seq("ACCESS"): 2614 lock_type = "ACCESS" 2615 elif self._match_texts(("EXCL", "EXCLUSIVE")): 2616 lock_type = "EXCLUSIVE" 2617 elif self._match_text_seq("SHARE"): 2618 lock_type = "SHARE" 2619 elif self._match_text_seq("READ"): 2620 lock_type = "READ" 2621 elif self._match_text_seq("WRITE"): 2622 lock_type = "WRITE" 2623 elif self._match_text_seq("CHECKSUM"): 2624 lock_type = "CHECKSUM" 2625 else: 2626 lock_type = None 2627 2628 override = self._match_text_seq("OVERRIDE") 2629 2630 return self.expression( 2631 exp.LockingProperty, 2632 this=this, 2633 kind=kind, 2634 for_or_in=for_or_in, 2635 lock_type=lock_type, 2636 override=override, 2637 ) 2638 2639 def _parse_partition_by(self) -> t.List[exp.Expression]: 2640 if self._match(TokenType.PARTITION_BY): 2641 return self._parse_csv(self._parse_assignment) 2642 return [] 2643 2644 def _parse_partition_bound_spec(self) -> exp.PartitionBoundSpec: 2645 def _parse_partition_bound_expr() -> t.Optional[exp.Expression]: 2646 if self._match_text_seq("MINVALUE"): 2647 return exp.var("MINVALUE") 2648 if self._match_text_seq("MAXVALUE"): 2649 return exp.var("MAXVALUE") 2650 return self._parse_bitwise() 2651 2652 this: t.Optional[exp.Expression | t.List[exp.Expression]] = None 2653 expression = None 2654 from_expressions = None 2655 to_expressions = None 2656 2657 if self._match(TokenType.IN): 2658 this = self._parse_wrapped_csv(self._parse_bitwise) 2659 elif self._match(TokenType.FROM): 2660 from_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2661 self._match_text_seq("TO") 2662 to_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2663 elif self._match_text_seq("WITH", "(", "MODULUS"): 2664 this = self._parse_number() 2665 self._match_text_seq(",", "REMAINDER") 2666 expression = self._parse_number() 2667 self._match_r_paren() 2668 else: 2669 self.raise_error("Failed to parse partition bound spec.") 2670 2671 return self.expression( 2672 exp.PartitionBoundSpec, 2673 this=this, 2674 expression=expression, 2675 from_expressions=from_expressions, 2676 to_expressions=to_expressions, 2677 ) 2678 2679 # https://www.postgresql.org/docs/current/sql-createtable.html 2680 def _parse_partitioned_of(self) -> t.Optional[exp.PartitionedOfProperty]: 2681 if not self._match_text_seq("OF"): 2682 self._retreat(self._index - 1) 2683 return None 2684 2685 this = self._parse_table(schema=True) 2686 2687 if self._match(TokenType.DEFAULT): 2688 expression: exp.Var | exp.PartitionBoundSpec = exp.var("DEFAULT") 2689 elif self._match_text_seq("FOR", "VALUES"): 2690 expression = self._parse_partition_bound_spec() 2691 else: 2692 self.raise_error("Expecting either DEFAULT or FOR VALUES clause.") 2693 2694 return self.expression(exp.PartitionedOfProperty, this=this, expression=expression) 2695 2696 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 2697 self._match(TokenType.EQ) 2698 return self.expression( 2699 exp.PartitionedByProperty, 2700 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 2701 ) 2702 2703 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 2704 if self._match_text_seq("AND", "STATISTICS"): 2705 statistics = True 2706 elif self._match_text_seq("AND", "NO", "STATISTICS"): 2707 statistics = False 2708 else: 2709 statistics = None 2710 2711 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 2712 2713 def _parse_contains_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2714 if self._match_text_seq("SQL"): 2715 return self.expression(exp.SqlReadWriteProperty, this="CONTAINS SQL") 2716 return None 2717 2718 def _parse_modifies_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2719 if self._match_text_seq("SQL", "DATA"): 2720 return self.expression(exp.SqlReadWriteProperty, this="MODIFIES SQL DATA") 2721 return None 2722 2723 def _parse_no_property(self) -> t.Optional[exp.Expression]: 2724 if self._match_text_seq("PRIMARY", "INDEX"): 2725 return exp.NoPrimaryIndexProperty() 2726 if self._match_text_seq("SQL"): 2727 return self.expression(exp.SqlReadWriteProperty, this="NO SQL") 2728 return None 2729 2730 def _parse_on_property(self) -> t.Optional[exp.Expression]: 2731 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 2732 return exp.OnCommitProperty() 2733 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 2734 return exp.OnCommitProperty(delete=True) 2735 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 2736 2737 def _parse_reads_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2738 if self._match_text_seq("SQL", "DATA"): 2739 return self.expression(exp.SqlReadWriteProperty, this="READS SQL DATA") 2740 return None 2741 2742 def _parse_distkey(self) -> exp.DistKeyProperty: 2743 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 2744 2745 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 2746 table = self._parse_table(schema=True) 2747 2748 options = [] 2749 while self._match_texts(("INCLUDING", "EXCLUDING")): 2750 this = self._prev.text.upper() 2751 2752 id_var = self._parse_id_var() 2753 if not id_var: 2754 return None 2755 2756 options.append( 2757 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 2758 ) 2759 2760 return self.expression(exp.LikeProperty, this=table, expressions=options) 2761 2762 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 2763 return self.expression( 2764 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 2765 ) 2766 2767 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 2768 self._match(TokenType.EQ) 2769 return self.expression( 2770 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 2771 ) 2772 2773 def _parse_remote_with_connection(self) -> exp.RemoteWithConnectionModelProperty: 2774 self._match_text_seq("WITH", "CONNECTION") 2775 return self.expression( 2776 exp.RemoteWithConnectionModelProperty, this=self._parse_table_parts() 2777 ) 2778 2779 def _parse_returns(self) -> exp.ReturnsProperty: 2780 value: t.Optional[exp.Expression] 2781 null = None 2782 is_table = self._match(TokenType.TABLE) 2783 2784 if is_table: 2785 if self._match(TokenType.LT): 2786 value = self.expression( 2787 exp.Schema, 2788 this="TABLE", 2789 expressions=self._parse_csv(self._parse_struct_types), 2790 ) 2791 if not self._match(TokenType.GT): 2792 self.raise_error("Expecting >") 2793 else: 2794 value = self._parse_schema(exp.var("TABLE")) 2795 elif self._match_text_seq("NULL", "ON", "NULL", "INPUT"): 2796 null = True 2797 value = None 2798 else: 2799 value = self._parse_types() 2800 2801 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table, null=null) 2802 2803 def _parse_describe(self) -> exp.Describe: 2804 kind = self._match_set(self.CREATABLES) and self._prev.text 2805 style = self._match_texts(self.DESCRIBE_STYLES) and self._prev.text.upper() 2806 if self._match(TokenType.DOT): 2807 style = None 2808 self._retreat(self._index - 2) 2809 2810 format = self._parse_property() if self._match(TokenType.FORMAT, advance=False) else None 2811 2812 if self._match_set(self.STATEMENT_PARSERS, advance=False): 2813 this = self._parse_statement() 2814 else: 2815 this = self._parse_table(schema=True) 2816 2817 properties = self._parse_properties() 2818 expressions = properties.expressions if properties else None 2819 partition = self._parse_partition() 2820 return self.expression( 2821 exp.Describe, 2822 this=this, 2823 style=style, 2824 kind=kind, 2825 expressions=expressions, 2826 partition=partition, 2827 format=format, 2828 ) 2829 2830 def _parse_multitable_inserts(self, comments: t.Optional[t.List[str]]) -> exp.MultitableInserts: 2831 kind = self._prev.text.upper() 2832 expressions = [] 2833 2834 def parse_conditional_insert() -> t.Optional[exp.ConditionalInsert]: 2835 if self._match(TokenType.WHEN): 2836 expression = self._parse_disjunction() 2837 self._match(TokenType.THEN) 2838 else: 2839 expression = None 2840 2841 else_ = self._match(TokenType.ELSE) 2842 2843 if not self._match(TokenType.INTO): 2844 return None 2845 2846 return self.expression( 2847 exp.ConditionalInsert, 2848 this=self.expression( 2849 exp.Insert, 2850 this=self._parse_table(schema=True), 2851 expression=self._parse_derived_table_values(), 2852 ), 2853 expression=expression, 2854 else_=else_, 2855 ) 2856 2857 expression = parse_conditional_insert() 2858 while expression is not None: 2859 expressions.append(expression) 2860 expression = parse_conditional_insert() 2861 2862 return self.expression( 2863 exp.MultitableInserts, 2864 kind=kind, 2865 comments=comments, 2866 expressions=expressions, 2867 source=self._parse_table(), 2868 ) 2869 2870 def _parse_insert(self) -> t.Union[exp.Insert, exp.MultitableInserts]: 2871 comments = [] 2872 hint = self._parse_hint() 2873 overwrite = self._match(TokenType.OVERWRITE) 2874 ignore = self._match(TokenType.IGNORE) 2875 local = self._match_text_seq("LOCAL") 2876 alternative = None 2877 is_function = None 2878 2879 if self._match_text_seq("DIRECTORY"): 2880 this: t.Optional[exp.Expression] = self.expression( 2881 exp.Directory, 2882 this=self._parse_var_or_string(), 2883 local=local, 2884 row_format=self._parse_row_format(match_row=True), 2885 ) 2886 else: 2887 if self._match_set((TokenType.FIRST, TokenType.ALL)): 2888 comments += ensure_list(self._prev_comments) 2889 return self._parse_multitable_inserts(comments) 2890 2891 if self._match(TokenType.OR): 2892 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 2893 2894 self._match(TokenType.INTO) 2895 comments += ensure_list(self._prev_comments) 2896 self._match(TokenType.TABLE) 2897 is_function = self._match(TokenType.FUNCTION) 2898 2899 this = ( 2900 self._parse_table(schema=True, parse_partition=True) 2901 if not is_function 2902 else self._parse_function() 2903 ) 2904 if isinstance(this, exp.Table) and self._match(TokenType.ALIAS, advance=False): 2905 this.set("alias", self._parse_table_alias()) 2906 2907 returning = self._parse_returning() 2908 2909 return self.expression( 2910 exp.Insert, 2911 comments=comments, 2912 hint=hint, 2913 is_function=is_function, 2914 this=this, 2915 stored=self._match_text_seq("STORED") and self._parse_stored(), 2916 by_name=self._match_text_seq("BY", "NAME"), 2917 exists=self._parse_exists(), 2918 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) and self._parse_assignment(), 2919 partition=self._match(TokenType.PARTITION_BY) and self._parse_partitioned_by(), 2920 settings=self._match_text_seq("SETTINGS") and self._parse_settings_property(), 2921 expression=self._parse_derived_table_values() or self._parse_ddl_select(), 2922 conflict=self._parse_on_conflict(), 2923 returning=returning or self._parse_returning(), 2924 overwrite=overwrite, 2925 alternative=alternative, 2926 ignore=ignore, 2927 source=self._match(TokenType.TABLE) and self._parse_table(), 2928 ) 2929 2930 def _parse_kill(self) -> exp.Kill: 2931 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 2932 2933 return self.expression( 2934 exp.Kill, 2935 this=self._parse_primary(), 2936 kind=kind, 2937 ) 2938 2939 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 2940 conflict = self._match_text_seq("ON", "CONFLICT") 2941 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 2942 2943 if not conflict and not duplicate: 2944 return None 2945 2946 conflict_keys = None 2947 constraint = None 2948 2949 if conflict: 2950 if self._match_text_seq("ON", "CONSTRAINT"): 2951 constraint = self._parse_id_var() 2952 elif self._match(TokenType.L_PAREN): 2953 conflict_keys = self._parse_csv(self._parse_id_var) 2954 self._match_r_paren() 2955 2956 action = self._parse_var_from_options(self.CONFLICT_ACTIONS) 2957 if self._prev.token_type == TokenType.UPDATE: 2958 self._match(TokenType.SET) 2959 expressions = self._parse_csv(self._parse_equality) 2960 else: 2961 expressions = None 2962 2963 return self.expression( 2964 exp.OnConflict, 2965 duplicate=duplicate, 2966 expressions=expressions, 2967 action=action, 2968 conflict_keys=conflict_keys, 2969 constraint=constraint, 2970 where=self._parse_where(), 2971 ) 2972 2973 def _parse_returning(self) -> t.Optional[exp.Returning]: 2974 if not self._match(TokenType.RETURNING): 2975 return None 2976 return self.expression( 2977 exp.Returning, 2978 expressions=self._parse_csv(self._parse_expression), 2979 into=self._match(TokenType.INTO) and self._parse_table_part(), 2980 ) 2981 2982 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2983 if not self._match(TokenType.FORMAT): 2984 return None 2985 return self._parse_row_format() 2986 2987 def _parse_serde_properties(self, with_: bool = False) -> t.Optional[exp.SerdeProperties]: 2988 index = self._index 2989 with_ = with_ or self._match_text_seq("WITH") 2990 2991 if not self._match(TokenType.SERDE_PROPERTIES): 2992 self._retreat(index) 2993 return None 2994 return self.expression( 2995 exp.SerdeProperties, 2996 **{ # type: ignore 2997 "expressions": self._parse_wrapped_properties(), 2998 "with": with_, 2999 }, 3000 ) 3001 3002 def _parse_row_format( 3003 self, match_row: bool = False 3004 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 3005 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 3006 return None 3007 3008 if self._match_text_seq("SERDE"): 3009 this = self._parse_string() 3010 3011 serde_properties = self._parse_serde_properties() 3012 3013 return self.expression( 3014 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 3015 ) 3016 3017 self._match_text_seq("DELIMITED") 3018 3019 kwargs = {} 3020 3021 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 3022 kwargs["fields"] = self._parse_string() 3023 if self._match_text_seq("ESCAPED", "BY"): 3024 kwargs["escaped"] = self._parse_string() 3025 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 3026 kwargs["collection_items"] = self._parse_string() 3027 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 3028 kwargs["map_keys"] = self._parse_string() 3029 if self._match_text_seq("LINES", "TERMINATED", "BY"): 3030 kwargs["lines"] = self._parse_string() 3031 if self._match_text_seq("NULL", "DEFINED", "AS"): 3032 kwargs["null"] = self._parse_string() 3033 3034 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 3035 3036 def _parse_load(self) -> exp.LoadData | exp.Command: 3037 if self._match_text_seq("DATA"): 3038 local = self._match_text_seq("LOCAL") 3039 self._match_text_seq("INPATH") 3040 inpath = self._parse_string() 3041 overwrite = self._match(TokenType.OVERWRITE) 3042 self._match_pair(TokenType.INTO, TokenType.TABLE) 3043 3044 return self.expression( 3045 exp.LoadData, 3046 this=self._parse_table(schema=True), 3047 local=local, 3048 overwrite=overwrite, 3049 inpath=inpath, 3050 partition=self._parse_partition(), 3051 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 3052 serde=self._match_text_seq("SERDE") and self._parse_string(), 3053 ) 3054 return self._parse_as_command(self._prev) 3055 3056 def _parse_delete(self) -> exp.Delete: 3057 # This handles MySQL's "Multiple-Table Syntax" 3058 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 3059 tables = None 3060 if not self._match(TokenType.FROM, advance=False): 3061 tables = self._parse_csv(self._parse_table) or None 3062 3063 returning = self._parse_returning() 3064 3065 return self.expression( 3066 exp.Delete, 3067 tables=tables, 3068 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 3069 using=self._match(TokenType.USING) and self._parse_table(joins=True), 3070 cluster=self._match(TokenType.ON) and self._parse_on_property(), 3071 where=self._parse_where(), 3072 returning=returning or self._parse_returning(), 3073 limit=self._parse_limit(), 3074 ) 3075 3076 def _parse_update(self) -> exp.Update: 3077 this = self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS) 3078 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 3079 returning = self._parse_returning() 3080 return self.expression( 3081 exp.Update, 3082 **{ # type: ignore 3083 "this": this, 3084 "expressions": expressions, 3085 "from": self._parse_from(joins=True), 3086 "where": self._parse_where(), 3087 "returning": returning or self._parse_returning(), 3088 "order": self._parse_order(), 3089 "limit": self._parse_limit(), 3090 }, 3091 ) 3092 3093 def _parse_use(self) -> exp.Use: 3094 return self.expression( 3095 exp.Use, 3096 kind=self._parse_var_from_options(self.USABLES, raise_unmatched=False), 3097 this=self._parse_table(schema=False), 3098 ) 3099 3100 def _parse_uncache(self) -> exp.Uncache: 3101 if not self._match(TokenType.TABLE): 3102 self.raise_error("Expecting TABLE after UNCACHE") 3103 3104 return self.expression( 3105 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 3106 ) 3107 3108 def _parse_cache(self) -> exp.Cache: 3109 lazy = self._match_text_seq("LAZY") 3110 self._match(TokenType.TABLE) 3111 table = self._parse_table(schema=True) 3112 3113 options = [] 3114 if self._match_text_seq("OPTIONS"): 3115 self._match_l_paren() 3116 k = self._parse_string() 3117 self._match(TokenType.EQ) 3118 v = self._parse_string() 3119 options = [k, v] 3120 self._match_r_paren() 3121 3122 self._match(TokenType.ALIAS) 3123 return self.expression( 3124 exp.Cache, 3125 this=table, 3126 lazy=lazy, 3127 options=options, 3128 expression=self._parse_select(nested=True), 3129 ) 3130 3131 def _parse_partition(self) -> t.Optional[exp.Partition]: 3132 if not self._match_texts(self.PARTITION_KEYWORDS): 3133 return None 3134 3135 return self.expression( 3136 exp.Partition, 3137 subpartition=self._prev.text.upper() == "SUBPARTITION", 3138 expressions=self._parse_wrapped_csv(self._parse_assignment), 3139 ) 3140 3141 def _parse_value(self, values: bool = True) -> t.Optional[exp.Tuple]: 3142 def _parse_value_expression() -> t.Optional[exp.Expression]: 3143 if self.dialect.SUPPORTS_VALUES_DEFAULT and self._match(TokenType.DEFAULT): 3144 return exp.var(self._prev.text.upper()) 3145 return self._parse_expression() 3146 3147 if self._match(TokenType.L_PAREN): 3148 expressions = self._parse_csv(_parse_value_expression) 3149 self._match_r_paren() 3150 return self.expression(exp.Tuple, expressions=expressions) 3151 3152 # In some dialects we can have VALUES 1, 2 which results in 1 column & 2 rows. 3153 expression = self._parse_expression() 3154 if expression: 3155 return self.expression(exp.Tuple, expressions=[expression]) 3156 return None 3157 3158 def _parse_projections(self) -> t.List[exp.Expression]: 3159 return self._parse_expressions() 3160 3161 def _parse_wrapped_select(self, table: bool = False) -> t.Optional[exp.Expression]: 3162 if self._match_set((TokenType.PIVOT, TokenType.UNPIVOT)): 3163 this: t.Optional[exp.Expression] = self._parse_simplified_pivot( 3164 is_unpivot=self._prev.token_type == TokenType.UNPIVOT 3165 ) 3166 elif self._match(TokenType.FROM): 3167 from_ = self._parse_from(skip_from_token=True, consume_pipe=True) 3168 # Support parentheses for duckdb FROM-first syntax 3169 select = self._parse_select() 3170 if select: 3171 select.set("from", from_) 3172 this = select 3173 else: 3174 this = exp.select("*").from_(t.cast(exp.From, from_)) 3175 else: 3176 this = ( 3177 self._parse_table(consume_pipe=True) 3178 if table 3179 else self._parse_select(nested=True, parse_set_operation=False) 3180 ) 3181 3182 # Transform exp.Values into a exp.Table to pass through parse_query_modifiers 3183 # in case a modifier (e.g. join) is following 3184 if table and isinstance(this, exp.Values) and this.alias: 3185 alias = this.args["alias"].pop() 3186 this = exp.Table(this=this, alias=alias) 3187 3188 this = self._parse_query_modifiers(self._parse_set_operations(this)) 3189 3190 return this 3191 3192 def _parse_select( 3193 self, 3194 nested: bool = False, 3195 table: bool = False, 3196 parse_subquery_alias: bool = True, 3197 parse_set_operation: bool = True, 3198 consume_pipe: bool = True, 3199 ) -> t.Optional[exp.Expression]: 3200 query = self._parse_select_query( 3201 nested=nested, 3202 table=table, 3203 parse_subquery_alias=parse_subquery_alias, 3204 parse_set_operation=parse_set_operation, 3205 ) 3206 3207 if ( 3208 consume_pipe 3209 and self._match(TokenType.PIPE_GT, advance=False) 3210 and isinstance(query, exp.Query) 3211 ): 3212 query = self._parse_pipe_syntax_query(query) 3213 query = query.subquery(copy=False) if query and table else query 3214 3215 return query 3216 3217 def _parse_select_query( 3218 self, 3219 nested: bool = False, 3220 table: bool = False, 3221 parse_subquery_alias: bool = True, 3222 parse_set_operation: bool = True, 3223 ) -> t.Optional[exp.Expression]: 3224 cte = self._parse_with() 3225 3226 if cte: 3227 this = self._parse_statement() 3228 3229 if not this: 3230 self.raise_error("Failed to parse any statement following CTE") 3231 return cte 3232 3233 if "with" in this.arg_types: 3234 this.set("with", cte) 3235 else: 3236 self.raise_error(f"{this.key} does not support CTE") 3237 this = cte 3238 3239 return this 3240 3241 # duckdb supports leading with FROM x 3242 from_ = ( 3243 self._parse_from(consume_pipe=True) 3244 if self._match(TokenType.FROM, advance=False) 3245 else None 3246 ) 3247 3248 if self._match(TokenType.SELECT): 3249 comments = self._prev_comments 3250 3251 hint = self._parse_hint() 3252 3253 if self._next and not self._next.token_type == TokenType.DOT: 3254 all_ = self._match(TokenType.ALL) 3255 distinct = self._match_set(self.DISTINCT_TOKENS) 3256 else: 3257 all_, distinct = None, None 3258 3259 kind = ( 3260 self._match(TokenType.ALIAS) 3261 and self._match_texts(("STRUCT", "VALUE")) 3262 and self._prev.text.upper() 3263 ) 3264 3265 if distinct: 3266 distinct = self.expression( 3267 exp.Distinct, 3268 on=self._parse_value(values=False) if self._match(TokenType.ON) else None, 3269 ) 3270 3271 if all_ and distinct: 3272 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 3273 3274 operation_modifiers = [] 3275 while self._curr and self._match_texts(self.OPERATION_MODIFIERS): 3276 operation_modifiers.append(exp.var(self._prev.text.upper())) 3277 3278 limit = self._parse_limit(top=True) 3279 projections = self._parse_projections() 3280 3281 this = self.expression( 3282 exp.Select, 3283 kind=kind, 3284 hint=hint, 3285 distinct=distinct, 3286 expressions=projections, 3287 limit=limit, 3288 operation_modifiers=operation_modifiers or None, 3289 ) 3290 this.comments = comments 3291 3292 into = self._parse_into() 3293 if into: 3294 this.set("into", into) 3295 3296 if not from_: 3297 from_ = self._parse_from() 3298 3299 if from_: 3300 this.set("from", from_) 3301 3302 this = self._parse_query_modifiers(this) 3303 elif (table or nested) and self._match(TokenType.L_PAREN): 3304 this = self._parse_wrapped_select(table=table) 3305 3306 # We return early here so that the UNION isn't attached to the subquery by the 3307 # following call to _parse_set_operations, but instead becomes the parent node 3308 self._match_r_paren() 3309 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 3310 elif self._match(TokenType.VALUES, advance=False): 3311 this = self._parse_derived_table_values() 3312 elif from_: 3313 this = exp.select("*").from_(from_.this, copy=False) 3314 elif self._match(TokenType.SUMMARIZE): 3315 table = self._match(TokenType.TABLE) 3316 this = self._parse_select() or self._parse_string() or self._parse_table() 3317 return self.expression(exp.Summarize, this=this, table=table) 3318 elif self._match(TokenType.DESCRIBE): 3319 this = self._parse_describe() 3320 elif self._match_text_seq("STREAM"): 3321 this = self._parse_function() 3322 if this: 3323 this = self.expression(exp.Stream, this=this) 3324 else: 3325 self._retreat(self._index - 1) 3326 else: 3327 this = None 3328 3329 return self._parse_set_operations(this) if parse_set_operation else this 3330 3331 def _parse_recursive_with_search(self) -> t.Optional[exp.RecursiveWithSearch]: 3332 self._match_text_seq("SEARCH") 3333 3334 kind = self._match_texts(self.RECURSIVE_CTE_SEARCH_KIND) and self._prev.text.upper() 3335 3336 if not kind: 3337 return None 3338 3339 self._match_text_seq("FIRST", "BY") 3340 3341 return self.expression( 3342 exp.RecursiveWithSearch, 3343 kind=kind, 3344 this=self._parse_id_var(), 3345 expression=self._match_text_seq("SET") and self._parse_id_var(), 3346 using=self._match_text_seq("USING") and self._parse_id_var(), 3347 ) 3348 3349 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 3350 if not skip_with_token and not self._match(TokenType.WITH): 3351 return None 3352 3353 comments = self._prev_comments 3354 recursive = self._match(TokenType.RECURSIVE) 3355 3356 last_comments = None 3357 expressions = [] 3358 while True: 3359 cte = self._parse_cte() 3360 if isinstance(cte, exp.CTE): 3361 expressions.append(cte) 3362 if last_comments: 3363 cte.add_comments(last_comments) 3364 3365 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 3366 break 3367 else: 3368 self._match(TokenType.WITH) 3369 3370 last_comments = self._prev_comments 3371 3372 return self.expression( 3373 exp.With, 3374 comments=comments, 3375 expressions=expressions, 3376 recursive=recursive, 3377 search=self._parse_recursive_with_search(), 3378 ) 3379 3380 def _parse_cte(self) -> t.Optional[exp.CTE]: 3381 index = self._index 3382 3383 alias = self._parse_table_alias(self.ID_VAR_TOKENS) 3384 if not alias or not alias.this: 3385 self.raise_error("Expected CTE to have alias") 3386 3387 if not self._match(TokenType.ALIAS) and not self.OPTIONAL_ALIAS_TOKEN_CTE: 3388 self._retreat(index) 3389 return None 3390 3391 comments = self._prev_comments 3392 3393 if self._match_text_seq("NOT", "MATERIALIZED"): 3394 materialized = False 3395 elif self._match_text_seq("MATERIALIZED"): 3396 materialized = True 3397 else: 3398 materialized = None 3399 3400 cte = self.expression( 3401 exp.CTE, 3402 this=self._parse_wrapped(self._parse_statement), 3403 alias=alias, 3404 materialized=materialized, 3405 comments=comments, 3406 ) 3407 3408 values = cte.this 3409 if isinstance(values, exp.Values): 3410 if values.alias: 3411 cte.set("this", exp.select("*").from_(values)) 3412 else: 3413 cte.set("this", exp.select("*").from_(exp.alias_(values, "_values", table=True))) 3414 3415 return cte 3416 3417 def _parse_table_alias( 3418 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 3419 ) -> t.Optional[exp.TableAlias]: 3420 # In some dialects, LIMIT and OFFSET can act as both identifiers and keywords (clauses) 3421 # so this section tries to parse the clause version and if it fails, it treats the token 3422 # as an identifier (alias) 3423 if self._can_parse_limit_or_offset(): 3424 return None 3425 3426 any_token = self._match(TokenType.ALIAS) 3427 alias = ( 3428 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3429 or self._parse_string_as_identifier() 3430 ) 3431 3432 index = self._index 3433 if self._match(TokenType.L_PAREN): 3434 columns = self._parse_csv(self._parse_function_parameter) 3435 self._match_r_paren() if columns else self._retreat(index) 3436 else: 3437 columns = None 3438 3439 if not alias and not columns: 3440 return None 3441 3442 table_alias = self.expression(exp.TableAlias, this=alias, columns=columns) 3443 3444 # We bubble up comments from the Identifier to the TableAlias 3445 if isinstance(alias, exp.Identifier): 3446 table_alias.add_comments(alias.pop_comments()) 3447 3448 return table_alias 3449 3450 def _parse_subquery( 3451 self, this: t.Optional[exp.Expression], parse_alias: bool = True 3452 ) -> t.Optional[exp.Subquery]: 3453 if not this: 3454 return None 3455 3456 return self.expression( 3457 exp.Subquery, 3458 this=this, 3459 pivots=self._parse_pivots(), 3460 alias=self._parse_table_alias() if parse_alias else None, 3461 sample=self._parse_table_sample(), 3462 ) 3463 3464 def _implicit_unnests_to_explicit(self, this: E) -> E: 3465 from sqlglot.optimizer.normalize_identifiers import normalize_identifiers as _norm 3466 3467 refs = {_norm(this.args["from"].this.copy(), dialect=self.dialect).alias_or_name} 3468 for i, join in enumerate(this.args.get("joins") or []): 3469 table = join.this 3470 normalized_table = table.copy() 3471 normalized_table.meta["maybe_column"] = True 3472 normalized_table = _norm(normalized_table, dialect=self.dialect) 3473 3474 if isinstance(table, exp.Table) and not join.args.get("on"): 3475 if normalized_table.parts[0].name in refs: 3476 table_as_column = table.to_column() 3477 unnest = exp.Unnest(expressions=[table_as_column]) 3478 3479 # Table.to_column creates a parent Alias node that we want to convert to 3480 # a TableAlias and attach to the Unnest, so it matches the parser's output 3481 if isinstance(table.args.get("alias"), exp.TableAlias): 3482 table_as_column.replace(table_as_column.this) 3483 exp.alias_(unnest, None, table=[table.args["alias"].this], copy=False) 3484 3485 table.replace(unnest) 3486 3487 refs.add(normalized_table.alias_or_name) 3488 3489 return this 3490 3491 def _parse_query_modifiers( 3492 self, this: t.Optional[exp.Expression] 3493 ) -> t.Optional[exp.Expression]: 3494 if isinstance(this, self.MODIFIABLES): 3495 for join in self._parse_joins(): 3496 this.append("joins", join) 3497 for lateral in iter(self._parse_lateral, None): 3498 this.append("laterals", lateral) 3499 3500 while True: 3501 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 3502 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 3503 key, expression = parser(self) 3504 3505 if expression: 3506 this.set(key, expression) 3507 if key == "limit": 3508 offset = expression.args.pop("offset", None) 3509 3510 if offset: 3511 offset = exp.Offset(expression=offset) 3512 this.set("offset", offset) 3513 3514 limit_by_expressions = expression.expressions 3515 expression.set("expressions", None) 3516 offset.set("expressions", limit_by_expressions) 3517 continue 3518 break 3519 3520 if self.SUPPORTS_IMPLICIT_UNNEST and this and this.args.get("from"): 3521 this = self._implicit_unnests_to_explicit(this) 3522 3523 return this 3524 3525 def _parse_hint_fallback_to_string(self) -> t.Optional[exp.Hint]: 3526 start = self._curr 3527 while self._curr: 3528 self._advance() 3529 3530 end = self._tokens[self._index - 1] 3531 return exp.Hint(expressions=[self._find_sql(start, end)]) 3532 3533 def _parse_hint_function_call(self) -> t.Optional[exp.Expression]: 3534 return self._parse_function_call() 3535 3536 def _parse_hint_body(self) -> t.Optional[exp.Hint]: 3537 start_index = self._index 3538 should_fallback_to_string = False 3539 3540 hints = [] 3541 try: 3542 for hint in iter( 3543 lambda: self._parse_csv( 3544 lambda: self._parse_hint_function_call() or self._parse_var(upper=True), 3545 ), 3546 [], 3547 ): 3548 hints.extend(hint) 3549 except ParseError: 3550 should_fallback_to_string = True 3551 3552 if should_fallback_to_string or self._curr: 3553 self._retreat(start_index) 3554 return self._parse_hint_fallback_to_string() 3555 3556 return self.expression(exp.Hint, expressions=hints) 3557 3558 def _parse_hint(self) -> t.Optional[exp.Hint]: 3559 if self._match(TokenType.HINT) and self._prev_comments: 3560 return exp.maybe_parse(self._prev_comments[0], into=exp.Hint, dialect=self.dialect) 3561 3562 return None 3563 3564 def _parse_into(self) -> t.Optional[exp.Into]: 3565 if not self._match(TokenType.INTO): 3566 return None 3567 3568 temp = self._match(TokenType.TEMPORARY) 3569 unlogged = self._match_text_seq("UNLOGGED") 3570 self._match(TokenType.TABLE) 3571 3572 return self.expression( 3573 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 3574 ) 3575 3576 def _parse_from( 3577 self, 3578 joins: bool = False, 3579 skip_from_token: bool = False, 3580 consume_pipe: bool = False, 3581 ) -> t.Optional[exp.From]: 3582 if not skip_from_token and not self._match(TokenType.FROM): 3583 return None 3584 3585 return self.expression( 3586 exp.From, 3587 comments=self._prev_comments, 3588 this=self._parse_table(joins=joins, consume_pipe=consume_pipe), 3589 ) 3590 3591 def _parse_match_recognize_measure(self) -> exp.MatchRecognizeMeasure: 3592 return self.expression( 3593 exp.MatchRecognizeMeasure, 3594 window_frame=self._match_texts(("FINAL", "RUNNING")) and self._prev.text.upper(), 3595 this=self._parse_expression(), 3596 ) 3597 3598 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 3599 if not self._match(TokenType.MATCH_RECOGNIZE): 3600 return None 3601 3602 self._match_l_paren() 3603 3604 partition = self._parse_partition_by() 3605 order = self._parse_order() 3606 3607 measures = ( 3608 self._parse_csv(self._parse_match_recognize_measure) 3609 if self._match_text_seq("MEASURES") 3610 else None 3611 ) 3612 3613 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 3614 rows = exp.var("ONE ROW PER MATCH") 3615 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 3616 text = "ALL ROWS PER MATCH" 3617 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 3618 text += " SHOW EMPTY MATCHES" 3619 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 3620 text += " OMIT EMPTY MATCHES" 3621 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 3622 text += " WITH UNMATCHED ROWS" 3623 rows = exp.var(text) 3624 else: 3625 rows = None 3626 3627 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 3628 text = "AFTER MATCH SKIP" 3629 if self._match_text_seq("PAST", "LAST", "ROW"): 3630 text += " PAST LAST ROW" 3631 elif self._match_text_seq("TO", "NEXT", "ROW"): 3632 text += " TO NEXT ROW" 3633 elif self._match_text_seq("TO", "FIRST"): 3634 text += f" TO FIRST {self._advance_any().text}" # type: ignore 3635 elif self._match_text_seq("TO", "LAST"): 3636 text += f" TO LAST {self._advance_any().text}" # type: ignore 3637 after = exp.var(text) 3638 else: 3639 after = None 3640 3641 if self._match_text_seq("PATTERN"): 3642 self._match_l_paren() 3643 3644 if not self._curr: 3645 self.raise_error("Expecting )", self._curr) 3646 3647 paren = 1 3648 start = self._curr 3649 3650 while self._curr and paren > 0: 3651 if self._curr.token_type == TokenType.L_PAREN: 3652 paren += 1 3653 if self._curr.token_type == TokenType.R_PAREN: 3654 paren -= 1 3655 3656 end = self._prev 3657 self._advance() 3658 3659 if paren > 0: 3660 self.raise_error("Expecting )", self._curr) 3661 3662 pattern = exp.var(self._find_sql(start, end)) 3663 else: 3664 pattern = None 3665 3666 define = ( 3667 self._parse_csv(self._parse_name_as_expression) 3668 if self._match_text_seq("DEFINE") 3669 else None 3670 ) 3671 3672 self._match_r_paren() 3673 3674 return self.expression( 3675 exp.MatchRecognize, 3676 partition_by=partition, 3677 order=order, 3678 measures=measures, 3679 rows=rows, 3680 after=after, 3681 pattern=pattern, 3682 define=define, 3683 alias=self._parse_table_alias(), 3684 ) 3685 3686 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 3687 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 3688 if not cross_apply and self._match_pair(TokenType.OUTER, TokenType.APPLY): 3689 cross_apply = False 3690 3691 if cross_apply is not None: 3692 this = self._parse_select(table=True) 3693 view = None 3694 outer = None 3695 elif self._match(TokenType.LATERAL): 3696 this = self._parse_select(table=True) 3697 view = self._match(TokenType.VIEW) 3698 outer = self._match(TokenType.OUTER) 3699 else: 3700 return None 3701 3702 if not this: 3703 this = ( 3704 self._parse_unnest() 3705 or self._parse_function() 3706 or self._parse_id_var(any_token=False) 3707 ) 3708 3709 while self._match(TokenType.DOT): 3710 this = exp.Dot( 3711 this=this, 3712 expression=self._parse_function() or self._parse_id_var(any_token=False), 3713 ) 3714 3715 ordinality: t.Optional[bool] = None 3716 3717 if view: 3718 table = self._parse_id_var(any_token=False) 3719 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 3720 table_alias: t.Optional[exp.TableAlias] = self.expression( 3721 exp.TableAlias, this=table, columns=columns 3722 ) 3723 elif isinstance(this, (exp.Subquery, exp.Unnest)) and this.alias: 3724 # We move the alias from the lateral's child node to the lateral itself 3725 table_alias = this.args["alias"].pop() 3726 else: 3727 ordinality = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 3728 table_alias = self._parse_table_alias() 3729 3730 return self.expression( 3731 exp.Lateral, 3732 this=this, 3733 view=view, 3734 outer=outer, 3735 alias=table_alias, 3736 cross_apply=cross_apply, 3737 ordinality=ordinality, 3738 ) 3739 3740 def _parse_join_parts( 3741 self, 3742 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 3743 return ( 3744 self._match_set(self.JOIN_METHODS) and self._prev, 3745 self._match_set(self.JOIN_SIDES) and self._prev, 3746 self._match_set(self.JOIN_KINDS) and self._prev, 3747 ) 3748 3749 def _parse_using_identifiers(self) -> t.List[exp.Expression]: 3750 def _parse_column_as_identifier() -> t.Optional[exp.Expression]: 3751 this = self._parse_column() 3752 if isinstance(this, exp.Column): 3753 return this.this 3754 return this 3755 3756 return self._parse_wrapped_csv(_parse_column_as_identifier, optional=True) 3757 3758 def _parse_join( 3759 self, skip_join_token: bool = False, parse_bracket: bool = False 3760 ) -> t.Optional[exp.Join]: 3761 if self._match(TokenType.COMMA): 3762 table = self._try_parse(self._parse_table) 3763 cross_join = self.expression(exp.Join, this=table) if table else None 3764 3765 if cross_join and self.JOINS_HAVE_EQUAL_PRECEDENCE: 3766 cross_join.set("kind", "CROSS") 3767 3768 return cross_join 3769 3770 index = self._index 3771 method, side, kind = self._parse_join_parts() 3772 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 3773 join = self._match(TokenType.JOIN) or (kind and kind.token_type == TokenType.STRAIGHT_JOIN) 3774 join_comments = self._prev_comments 3775 3776 if not skip_join_token and not join: 3777 self._retreat(index) 3778 kind = None 3779 method = None 3780 side = None 3781 3782 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 3783 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 3784 3785 if not skip_join_token and not join and not outer_apply and not cross_apply: 3786 return None 3787 3788 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 3789 if kind and kind.token_type == TokenType.ARRAY and self._match(TokenType.COMMA): 3790 kwargs["expressions"] = self._parse_csv( 3791 lambda: self._parse_table(parse_bracket=parse_bracket) 3792 ) 3793 3794 if method: 3795 kwargs["method"] = method.text 3796 if side: 3797 kwargs["side"] = side.text 3798 if kind: 3799 kwargs["kind"] = kind.text 3800 if hint: 3801 kwargs["hint"] = hint 3802 3803 if self._match(TokenType.MATCH_CONDITION): 3804 kwargs["match_condition"] = self._parse_wrapped(self._parse_comparison) 3805 3806 if self._match(TokenType.ON): 3807 kwargs["on"] = self._parse_assignment() 3808 elif self._match(TokenType.USING): 3809 kwargs["using"] = self._parse_using_identifiers() 3810 elif ( 3811 not (outer_apply or cross_apply) 3812 and not isinstance(kwargs["this"], exp.Unnest) 3813 and not (kind and kind.token_type in (TokenType.CROSS, TokenType.ARRAY)) 3814 ): 3815 index = self._index 3816 joins: t.Optional[list] = list(self._parse_joins()) 3817 3818 if joins and self._match(TokenType.ON): 3819 kwargs["on"] = self._parse_assignment() 3820 elif joins and self._match(TokenType.USING): 3821 kwargs["using"] = self._parse_using_identifiers() 3822 else: 3823 joins = None 3824 self._retreat(index) 3825 3826 kwargs["this"].set("joins", joins if joins else None) 3827 3828 kwargs["pivots"] = self._parse_pivots() 3829 3830 comments = [c for token in (method, side, kind) if token for c in token.comments] 3831 comments = (join_comments or []) + comments 3832 return self.expression(exp.Join, comments=comments, **kwargs) 3833 3834 def _parse_opclass(self) -> t.Optional[exp.Expression]: 3835 this = self._parse_assignment() 3836 3837 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 3838 return this 3839 3840 if not self._match_set(self.OPTYPE_FOLLOW_TOKENS, advance=False): 3841 return self.expression(exp.Opclass, this=this, expression=self._parse_table_parts()) 3842 3843 return this 3844 3845 def _parse_index_params(self) -> exp.IndexParameters: 3846 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 3847 3848 if self._match(TokenType.L_PAREN, advance=False): 3849 columns = self._parse_wrapped_csv(self._parse_with_operator) 3850 else: 3851 columns = None 3852 3853 include = self._parse_wrapped_id_vars() if self._match_text_seq("INCLUDE") else None 3854 partition_by = self._parse_partition_by() 3855 with_storage = self._match(TokenType.WITH) and self._parse_wrapped_properties() 3856 tablespace = ( 3857 self._parse_var(any_token=True) 3858 if self._match_text_seq("USING", "INDEX", "TABLESPACE") 3859 else None 3860 ) 3861 where = self._parse_where() 3862 3863 on = self._parse_field() if self._match(TokenType.ON) else None 3864 3865 return self.expression( 3866 exp.IndexParameters, 3867 using=using, 3868 columns=columns, 3869 include=include, 3870 partition_by=partition_by, 3871 where=where, 3872 with_storage=with_storage, 3873 tablespace=tablespace, 3874 on=on, 3875 ) 3876 3877 def _parse_index( 3878 self, index: t.Optional[exp.Expression] = None, anonymous: bool = False 3879 ) -> t.Optional[exp.Index]: 3880 if index or anonymous: 3881 unique = None 3882 primary = None 3883 amp = None 3884 3885 self._match(TokenType.ON) 3886 self._match(TokenType.TABLE) # hive 3887 table = self._parse_table_parts(schema=True) 3888 else: 3889 unique = self._match(TokenType.UNIQUE) 3890 primary = self._match_text_seq("PRIMARY") 3891 amp = self._match_text_seq("AMP") 3892 3893 if not self._match(TokenType.INDEX): 3894 return None 3895 3896 index = self._parse_id_var() 3897 table = None 3898 3899 params = self._parse_index_params() 3900 3901 return self.expression( 3902 exp.Index, 3903 this=index, 3904 table=table, 3905 unique=unique, 3906 primary=primary, 3907 amp=amp, 3908 params=params, 3909 ) 3910 3911 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 3912 hints: t.List[exp.Expression] = [] 3913 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 3914 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 3915 hints.append( 3916 self.expression( 3917 exp.WithTableHint, 3918 expressions=self._parse_csv( 3919 lambda: self._parse_function() or self._parse_var(any_token=True) 3920 ), 3921 ) 3922 ) 3923 self._match_r_paren() 3924 else: 3925 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 3926 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 3927 hint = exp.IndexTableHint(this=self._prev.text.upper()) 3928 3929 self._match_set((TokenType.INDEX, TokenType.KEY)) 3930 if self._match(TokenType.FOR): 3931 hint.set("target", self._advance_any() and self._prev.text.upper()) 3932 3933 hint.set("expressions", self._parse_wrapped_id_vars()) 3934 hints.append(hint) 3935 3936 return hints or None 3937 3938 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 3939 return ( 3940 (not schema and self._parse_function(optional_parens=False)) 3941 or self._parse_id_var(any_token=False) 3942 or self._parse_string_as_identifier() 3943 or self._parse_placeholder() 3944 ) 3945 3946 def _parse_table_parts( 3947 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 3948 ) -> exp.Table: 3949 catalog = None 3950 db = None 3951 table: t.Optional[exp.Expression | str] = self._parse_table_part(schema=schema) 3952 3953 while self._match(TokenType.DOT): 3954 if catalog: 3955 # This allows nesting the table in arbitrarily many dot expressions if needed 3956 table = self.expression( 3957 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 3958 ) 3959 else: 3960 catalog = db 3961 db = table 3962 # "" used for tsql FROM a..b case 3963 table = self._parse_table_part(schema=schema) or "" 3964 3965 if ( 3966 wildcard 3967 and self._is_connected() 3968 and (isinstance(table, exp.Identifier) or not table) 3969 and self._match(TokenType.STAR) 3970 ): 3971 if isinstance(table, exp.Identifier): 3972 table.args["this"] += "*" 3973 else: 3974 table = exp.Identifier(this="*") 3975 3976 # We bubble up comments from the Identifier to the Table 3977 comments = table.pop_comments() if isinstance(table, exp.Expression) else None 3978 3979 if is_db_reference: 3980 catalog = db 3981 db = table 3982 table = None 3983 3984 if not table and not is_db_reference: 3985 self.raise_error(f"Expected table name but got {self._curr}") 3986 if not db and is_db_reference: 3987 self.raise_error(f"Expected database name but got {self._curr}") 3988 3989 table = self.expression( 3990 exp.Table, 3991 comments=comments, 3992 this=table, 3993 db=db, 3994 catalog=catalog, 3995 ) 3996 3997 changes = self._parse_changes() 3998 if changes: 3999 table.set("changes", changes) 4000 4001 at_before = self._parse_historical_data() 4002 if at_before: 4003 table.set("when", at_before) 4004 4005 pivots = self._parse_pivots() 4006 if pivots: 4007 table.set("pivots", pivots) 4008 4009 return table 4010 4011 def _parse_table( 4012 self, 4013 schema: bool = False, 4014 joins: bool = False, 4015 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 4016 parse_bracket: bool = False, 4017 is_db_reference: bool = False, 4018 parse_partition: bool = False, 4019 consume_pipe: bool = False, 4020 ) -> t.Optional[exp.Expression]: 4021 lateral = self._parse_lateral() 4022 if lateral: 4023 return lateral 4024 4025 unnest = self._parse_unnest() 4026 if unnest: 4027 return unnest 4028 4029 values = self._parse_derived_table_values() 4030 if values: 4031 return values 4032 4033 subquery = self._parse_select(table=True, consume_pipe=consume_pipe) 4034 if subquery: 4035 if not subquery.args.get("pivots"): 4036 subquery.set("pivots", self._parse_pivots()) 4037 return subquery 4038 4039 bracket = parse_bracket and self._parse_bracket(None) 4040 bracket = self.expression(exp.Table, this=bracket) if bracket else None 4041 4042 rows_from = self._match_text_seq("ROWS", "FROM") and self._parse_wrapped_csv( 4043 self._parse_table 4044 ) 4045 rows_from = self.expression(exp.Table, rows_from=rows_from) if rows_from else None 4046 4047 only = self._match(TokenType.ONLY) 4048 4049 this = t.cast( 4050 exp.Expression, 4051 bracket 4052 or rows_from 4053 or self._parse_bracket( 4054 self._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 4055 ), 4056 ) 4057 4058 if only: 4059 this.set("only", only) 4060 4061 # Postgres supports a wildcard (table) suffix operator, which is a no-op in this context 4062 self._match_text_seq("*") 4063 4064 parse_partition = parse_partition or self.SUPPORTS_PARTITION_SELECTION 4065 if parse_partition and self._match(TokenType.PARTITION, advance=False): 4066 this.set("partition", self._parse_partition()) 4067 4068 if schema: 4069 return self._parse_schema(this=this) 4070 4071 version = self._parse_version() 4072 4073 if version: 4074 this.set("version", version) 4075 4076 if self.dialect.ALIAS_POST_TABLESAMPLE: 4077 this.set("sample", self._parse_table_sample()) 4078 4079 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 4080 if alias: 4081 this.set("alias", alias) 4082 4083 if isinstance(this, exp.Table) and self._match_text_seq("AT"): 4084 return self.expression( 4085 exp.AtIndex, this=this.to_column(copy=False), expression=self._parse_id_var() 4086 ) 4087 4088 this.set("hints", self._parse_table_hints()) 4089 4090 if not this.args.get("pivots"): 4091 this.set("pivots", self._parse_pivots()) 4092 4093 if not self.dialect.ALIAS_POST_TABLESAMPLE: 4094 this.set("sample", self._parse_table_sample()) 4095 4096 if joins: 4097 for join in self._parse_joins(): 4098 this.append("joins", join) 4099 4100 if self._match_pair(TokenType.WITH, TokenType.ORDINALITY): 4101 this.set("ordinality", True) 4102 this.set("alias", self._parse_table_alias()) 4103 4104 return this 4105 4106 def _parse_version(self) -> t.Optional[exp.Version]: 4107 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 4108 this = "TIMESTAMP" 4109 elif self._match(TokenType.VERSION_SNAPSHOT): 4110 this = "VERSION" 4111 else: 4112 return None 4113 4114 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 4115 kind = self._prev.text.upper() 4116 start = self._parse_bitwise() 4117 self._match_texts(("TO", "AND")) 4118 end = self._parse_bitwise() 4119 expression: t.Optional[exp.Expression] = self.expression( 4120 exp.Tuple, expressions=[start, end] 4121 ) 4122 elif self._match_text_seq("CONTAINED", "IN"): 4123 kind = "CONTAINED IN" 4124 expression = self.expression( 4125 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 4126 ) 4127 elif self._match(TokenType.ALL): 4128 kind = "ALL" 4129 expression = None 4130 else: 4131 self._match_text_seq("AS", "OF") 4132 kind = "AS OF" 4133 expression = self._parse_type() 4134 4135 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 4136 4137 def _parse_historical_data(self) -> t.Optional[exp.HistoricalData]: 4138 # https://docs.snowflake.com/en/sql-reference/constructs/at-before 4139 index = self._index 4140 historical_data = None 4141 if self._match_texts(self.HISTORICAL_DATA_PREFIX): 4142 this = self._prev.text.upper() 4143 kind = ( 4144 self._match(TokenType.L_PAREN) 4145 and self._match_texts(self.HISTORICAL_DATA_KIND) 4146 and self._prev.text.upper() 4147 ) 4148 expression = self._match(TokenType.FARROW) and self._parse_bitwise() 4149 4150 if expression: 4151 self._match_r_paren() 4152 historical_data = self.expression( 4153 exp.HistoricalData, this=this, kind=kind, expression=expression 4154 ) 4155 else: 4156 self._retreat(index) 4157 4158 return historical_data 4159 4160 def _parse_changes(self) -> t.Optional[exp.Changes]: 4161 if not self._match_text_seq("CHANGES", "(", "INFORMATION", "=>"): 4162 return None 4163 4164 information = self._parse_var(any_token=True) 4165 self._match_r_paren() 4166 4167 return self.expression( 4168 exp.Changes, 4169 information=information, 4170 at_before=self._parse_historical_data(), 4171 end=self._parse_historical_data(), 4172 ) 4173 4174 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 4175 if not self._match(TokenType.UNNEST): 4176 return None 4177 4178 expressions = self._parse_wrapped_csv(self._parse_equality) 4179 offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 4180 4181 alias = self._parse_table_alias() if with_alias else None 4182 4183 if alias: 4184 if self.dialect.UNNEST_COLUMN_ONLY: 4185 if alias.args.get("columns"): 4186 self.raise_error("Unexpected extra column alias in unnest.") 4187 4188 alias.set("columns", [alias.this]) 4189 alias.set("this", None) 4190 4191 columns = alias.args.get("columns") or [] 4192 if offset and len(expressions) < len(columns): 4193 offset = columns.pop() 4194 4195 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 4196 self._match(TokenType.ALIAS) 4197 offset = self._parse_id_var( 4198 any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS 4199 ) or exp.to_identifier("offset") 4200 4201 return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset) 4202 4203 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 4204 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 4205 if not is_derived and not ( 4206 # ClickHouse's `FORMAT Values` is equivalent to `VALUES` 4207 self._match_text_seq("VALUES") or self._match_text_seq("FORMAT", "VALUES") 4208 ): 4209 return None 4210 4211 expressions = self._parse_csv(self._parse_value) 4212 alias = self._parse_table_alias() 4213 4214 if is_derived: 4215 self._match_r_paren() 4216 4217 return self.expression( 4218 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 4219 ) 4220 4221 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 4222 if not self._match(TokenType.TABLE_SAMPLE) and not ( 4223 as_modifier and self._match_text_seq("USING", "SAMPLE") 4224 ): 4225 return None 4226 4227 bucket_numerator = None 4228 bucket_denominator = None 4229 bucket_field = None 4230 percent = None 4231 size = None 4232 seed = None 4233 4234 method = self._parse_var(tokens=(TokenType.ROW,), upper=True) 4235 matched_l_paren = self._match(TokenType.L_PAREN) 4236 4237 if self.TABLESAMPLE_CSV: 4238 num = None 4239 expressions = self._parse_csv(self._parse_primary) 4240 else: 4241 expressions = None 4242 num = ( 4243 self._parse_factor() 4244 if self._match(TokenType.NUMBER, advance=False) 4245 else self._parse_primary() or self._parse_placeholder() 4246 ) 4247 4248 if self._match_text_seq("BUCKET"): 4249 bucket_numerator = self._parse_number() 4250 self._match_text_seq("OUT", "OF") 4251 bucket_denominator = bucket_denominator = self._parse_number() 4252 self._match(TokenType.ON) 4253 bucket_field = self._parse_field() 4254 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 4255 percent = num 4256 elif self._match(TokenType.ROWS) or not self.dialect.TABLESAMPLE_SIZE_IS_PERCENT: 4257 size = num 4258 else: 4259 percent = num 4260 4261 if matched_l_paren: 4262 self._match_r_paren() 4263 4264 if self._match(TokenType.L_PAREN): 4265 method = self._parse_var(upper=True) 4266 seed = self._match(TokenType.COMMA) and self._parse_number() 4267 self._match_r_paren() 4268 elif self._match_texts(("SEED", "REPEATABLE")): 4269 seed = self._parse_wrapped(self._parse_number) 4270 4271 if not method and self.DEFAULT_SAMPLING_METHOD: 4272 method = exp.var(self.DEFAULT_SAMPLING_METHOD) 4273 4274 return self.expression( 4275 exp.TableSample, 4276 expressions=expressions, 4277 method=method, 4278 bucket_numerator=bucket_numerator, 4279 bucket_denominator=bucket_denominator, 4280 bucket_field=bucket_field, 4281 percent=percent, 4282 size=size, 4283 seed=seed, 4284 ) 4285 4286 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 4287 return list(iter(self._parse_pivot, None)) or None 4288 4289 def _parse_joins(self) -> t.Iterator[exp.Join]: 4290 return iter(self._parse_join, None) 4291 4292 def _parse_unpivot_columns(self) -> t.Optional[exp.UnpivotColumns]: 4293 if not self._match(TokenType.INTO): 4294 return None 4295 4296 return self.expression( 4297 exp.UnpivotColumns, 4298 this=self._match_text_seq("NAME") and self._parse_column(), 4299 expressions=self._match_text_seq("VALUE") and self._parse_csv(self._parse_column), 4300 ) 4301 4302 # https://duckdb.org/docs/sql/statements/pivot 4303 def _parse_simplified_pivot(self, is_unpivot: t.Optional[bool] = None) -> exp.Pivot: 4304 def _parse_on() -> t.Optional[exp.Expression]: 4305 this = self._parse_bitwise() 4306 4307 if self._match(TokenType.IN): 4308 # PIVOT ... ON col IN (row_val1, row_val2) 4309 return self._parse_in(this) 4310 if self._match(TokenType.ALIAS, advance=False): 4311 # UNPIVOT ... ON (col1, col2, col3) AS row_val 4312 return self._parse_alias(this) 4313 4314 return this 4315 4316 this = self._parse_table() 4317 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 4318 into = self._parse_unpivot_columns() 4319 using = self._match(TokenType.USING) and self._parse_csv( 4320 lambda: self._parse_alias(self._parse_function()) 4321 ) 4322 group = self._parse_group() 4323 4324 return self.expression( 4325 exp.Pivot, 4326 this=this, 4327 expressions=expressions, 4328 using=using, 4329 group=group, 4330 unpivot=is_unpivot, 4331 into=into, 4332 ) 4333 4334 def _parse_pivot_in(self) -> exp.In: 4335 def _parse_aliased_expression() -> t.Optional[exp.Expression]: 4336 this = self._parse_select_or_expression() 4337 4338 self._match(TokenType.ALIAS) 4339 alias = self._parse_bitwise() 4340 if alias: 4341 if isinstance(alias, exp.Column) and not alias.db: 4342 alias = alias.this 4343 return self.expression(exp.PivotAlias, this=this, alias=alias) 4344 4345 return this 4346 4347 value = self._parse_column() 4348 4349 if not self._match_pair(TokenType.IN, TokenType.L_PAREN): 4350 self.raise_error("Expecting IN (") 4351 4352 if self._match(TokenType.ANY): 4353 exprs: t.List[exp.Expression] = ensure_list(exp.PivotAny(this=self._parse_order())) 4354 else: 4355 exprs = self._parse_csv(_parse_aliased_expression) 4356 4357 self._match_r_paren() 4358 return self.expression(exp.In, this=value, expressions=exprs) 4359 4360 def _parse_pivot_aggregation(self) -> t.Optional[exp.Expression]: 4361 func = self._parse_function() 4362 if not func: 4363 self.raise_error("Expecting an aggregation function in PIVOT") 4364 4365 return self._parse_alias(func) 4366 4367 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 4368 index = self._index 4369 include_nulls = None 4370 4371 if self._match(TokenType.PIVOT): 4372 unpivot = False 4373 elif self._match(TokenType.UNPIVOT): 4374 unpivot = True 4375 4376 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 4377 if self._match_text_seq("INCLUDE", "NULLS"): 4378 include_nulls = True 4379 elif self._match_text_seq("EXCLUDE", "NULLS"): 4380 include_nulls = False 4381 else: 4382 return None 4383 4384 expressions = [] 4385 4386 if not self._match(TokenType.L_PAREN): 4387 self._retreat(index) 4388 return None 4389 4390 if unpivot: 4391 expressions = self._parse_csv(self._parse_column) 4392 else: 4393 expressions = self._parse_csv(self._parse_pivot_aggregation) 4394 4395 if not expressions: 4396 self.raise_error("Failed to parse PIVOT's aggregation list") 4397 4398 if not self._match(TokenType.FOR): 4399 self.raise_error("Expecting FOR") 4400 4401 fields = [] 4402 while True: 4403 field = self._try_parse(self._parse_pivot_in) 4404 if not field: 4405 break 4406 fields.append(field) 4407 4408 default_on_null = self._match_text_seq("DEFAULT", "ON", "NULL") and self._parse_wrapped( 4409 self._parse_bitwise 4410 ) 4411 4412 group = self._parse_group() 4413 4414 self._match_r_paren() 4415 4416 pivot = self.expression( 4417 exp.Pivot, 4418 expressions=expressions, 4419 fields=fields, 4420 unpivot=unpivot, 4421 include_nulls=include_nulls, 4422 default_on_null=default_on_null, 4423 group=group, 4424 ) 4425 4426 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 4427 pivot.set("alias", self._parse_table_alias()) 4428 4429 if not unpivot: 4430 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 4431 4432 columns: t.List[exp.Expression] = [] 4433 all_fields = [] 4434 for pivot_field in pivot.fields: 4435 pivot_field_expressions = pivot_field.expressions 4436 4437 # The `PivotAny` expression corresponds to `ANY ORDER BY <column>`; we can't infer in this case. 4438 if isinstance(seq_get(pivot_field_expressions, 0), exp.PivotAny): 4439 continue 4440 4441 all_fields.append( 4442 [ 4443 fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 4444 for fld in pivot_field_expressions 4445 ] 4446 ) 4447 4448 if all_fields: 4449 if names: 4450 all_fields.append(names) 4451 4452 # Generate all possible combinations of the pivot columns 4453 # e.g PIVOT(sum(...) as total FOR year IN (2000, 2010) FOR country IN ('NL', 'US')) 4454 # generates the product between [[2000, 2010], ['NL', 'US'], ['total']] 4455 for fld_parts_tuple in itertools.product(*all_fields): 4456 fld_parts = list(fld_parts_tuple) 4457 4458 if names and self.PREFIXED_PIVOT_COLUMNS: 4459 # Move the "name" to the front of the list 4460 fld_parts.insert(0, fld_parts.pop(-1)) 4461 4462 columns.append(exp.to_identifier("_".join(fld_parts))) 4463 4464 pivot.set("columns", columns) 4465 4466 return pivot 4467 4468 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 4469 return [agg.alias for agg in aggregations if agg.alias] 4470 4471 def _parse_prewhere(self, skip_where_token: bool = False) -> t.Optional[exp.PreWhere]: 4472 if not skip_where_token and not self._match(TokenType.PREWHERE): 4473 return None 4474 4475 return self.expression( 4476 exp.PreWhere, comments=self._prev_comments, this=self._parse_assignment() 4477 ) 4478 4479 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 4480 if not skip_where_token and not self._match(TokenType.WHERE): 4481 return None 4482 4483 return self.expression( 4484 exp.Where, comments=self._prev_comments, this=self._parse_assignment() 4485 ) 4486 4487 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 4488 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 4489 return None 4490 comments = self._prev_comments 4491 4492 elements: t.Dict[str, t.Any] = defaultdict(list) 4493 4494 if self._match(TokenType.ALL): 4495 elements["all"] = True 4496 elif self._match(TokenType.DISTINCT): 4497 elements["all"] = False 4498 4499 if self._match_set(self.QUERY_MODIFIER_TOKENS, advance=False): 4500 return self.expression(exp.Group, comments=comments, **elements) # type: ignore 4501 4502 while True: 4503 index = self._index 4504 4505 elements["expressions"].extend( 4506 self._parse_csv( 4507 lambda: None 4508 if self._match_set((TokenType.CUBE, TokenType.ROLLUP), advance=False) 4509 else self._parse_assignment() 4510 ) 4511 ) 4512 4513 before_with_index = self._index 4514 with_prefix = self._match(TokenType.WITH) 4515 4516 if self._match(TokenType.ROLLUP): 4517 elements["rollup"].append( 4518 self._parse_cube_or_rollup(exp.Rollup, with_prefix=with_prefix) 4519 ) 4520 elif self._match(TokenType.CUBE): 4521 elements["cube"].append( 4522 self._parse_cube_or_rollup(exp.Cube, with_prefix=with_prefix) 4523 ) 4524 elif self._match(TokenType.GROUPING_SETS): 4525 elements["grouping_sets"].append( 4526 self.expression( 4527 exp.GroupingSets, 4528 expressions=self._parse_wrapped_csv(self._parse_grouping_set), 4529 ) 4530 ) 4531 elif self._match_text_seq("TOTALS"): 4532 elements["totals"] = True # type: ignore 4533 4534 if before_with_index <= self._index <= before_with_index + 1: 4535 self._retreat(before_with_index) 4536 break 4537 4538 if index == self._index: 4539 break 4540 4541 return self.expression(exp.Group, comments=comments, **elements) # type: ignore 4542 4543 def _parse_cube_or_rollup(self, kind: t.Type[E], with_prefix: bool = False) -> E: 4544 return self.expression( 4545 kind, expressions=[] if with_prefix else self._parse_wrapped_csv(self._parse_column) 4546 ) 4547 4548 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 4549 if self._match(TokenType.L_PAREN): 4550 grouping_set = self._parse_csv(self._parse_column) 4551 self._match_r_paren() 4552 return self.expression(exp.Tuple, expressions=grouping_set) 4553 4554 return self._parse_column() 4555 4556 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 4557 if not skip_having_token and not self._match(TokenType.HAVING): 4558 return None 4559 return self.expression( 4560 exp.Having, comments=self._prev_comments, this=self._parse_assignment() 4561 ) 4562 4563 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 4564 if not self._match(TokenType.QUALIFY): 4565 return None 4566 return self.expression(exp.Qualify, this=self._parse_assignment()) 4567 4568 def _parse_connect_with_prior(self) -> t.Optional[exp.Expression]: 4569 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 4570 exp.Prior, this=self._parse_bitwise() 4571 ) 4572 connect = self._parse_assignment() 4573 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 4574 return connect 4575 4576 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 4577 if skip_start_token: 4578 start = None 4579 elif self._match(TokenType.START_WITH): 4580 start = self._parse_assignment() 4581 else: 4582 return None 4583 4584 self._match(TokenType.CONNECT_BY) 4585 nocycle = self._match_text_seq("NOCYCLE") 4586 connect = self._parse_connect_with_prior() 4587 4588 if not start and self._match(TokenType.START_WITH): 4589 start = self._parse_assignment() 4590 4591 return self.expression(exp.Connect, start=start, connect=connect, nocycle=nocycle) 4592 4593 def _parse_name_as_expression(self) -> t.Optional[exp.Expression]: 4594 this = self._parse_id_var(any_token=True) 4595 if self._match(TokenType.ALIAS): 4596 this = self.expression(exp.Alias, alias=this, this=self._parse_assignment()) 4597 return this 4598 4599 def _parse_interpolate(self) -> t.Optional[t.List[exp.Expression]]: 4600 if self._match_text_seq("INTERPOLATE"): 4601 return self._parse_wrapped_csv(self._parse_name_as_expression) 4602 return None 4603 4604 def _parse_order( 4605 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 4606 ) -> t.Optional[exp.Expression]: 4607 siblings = None 4608 if not skip_order_token and not self._match(TokenType.ORDER_BY): 4609 if not self._match(TokenType.ORDER_SIBLINGS_BY): 4610 return this 4611 4612 siblings = True 4613 4614 return self.expression( 4615 exp.Order, 4616 comments=self._prev_comments, 4617 this=this, 4618 expressions=self._parse_csv(self._parse_ordered), 4619 siblings=siblings, 4620 ) 4621 4622 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 4623 if not self._match(token): 4624 return None 4625 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 4626 4627 def _parse_ordered( 4628 self, parse_method: t.Optional[t.Callable] = None 4629 ) -> t.Optional[exp.Ordered]: 4630 this = parse_method() if parse_method else self._parse_assignment() 4631 if not this: 4632 return None 4633 4634 if this.name.upper() == "ALL" and self.dialect.SUPPORTS_ORDER_BY_ALL: 4635 this = exp.var("ALL") 4636 4637 asc = self._match(TokenType.ASC) 4638 desc = self._match(TokenType.DESC) or (asc and False) 4639 4640 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 4641 is_nulls_last = self._match_text_seq("NULLS", "LAST") 4642 4643 nulls_first = is_nulls_first or False 4644 explicitly_null_ordered = is_nulls_first or is_nulls_last 4645 4646 if ( 4647 not explicitly_null_ordered 4648 and ( 4649 (not desc and self.dialect.NULL_ORDERING == "nulls_are_small") 4650 or (desc and self.dialect.NULL_ORDERING != "nulls_are_small") 4651 ) 4652 and self.dialect.NULL_ORDERING != "nulls_are_last" 4653 ): 4654 nulls_first = True 4655 4656 if self._match_text_seq("WITH", "FILL"): 4657 with_fill = self.expression( 4658 exp.WithFill, 4659 **{ # type: ignore 4660 "from": self._match(TokenType.FROM) and self._parse_bitwise(), 4661 "to": self._match_text_seq("TO") and self._parse_bitwise(), 4662 "step": self._match_text_seq("STEP") and self._parse_bitwise(), 4663 "interpolate": self._parse_interpolate(), 4664 }, 4665 ) 4666 else: 4667 with_fill = None 4668 4669 return self.expression( 4670 exp.Ordered, this=this, desc=desc, nulls_first=nulls_first, with_fill=with_fill 4671 ) 4672 4673 def _parse_limit_options(self) -> exp.LimitOptions: 4674 percent = self._match(TokenType.PERCENT) 4675 rows = self._match_set((TokenType.ROW, TokenType.ROWS)) 4676 self._match_text_seq("ONLY") 4677 with_ties = self._match_text_seq("WITH", "TIES") 4678 return self.expression(exp.LimitOptions, percent=percent, rows=rows, with_ties=with_ties) 4679 4680 def _parse_limit( 4681 self, 4682 this: t.Optional[exp.Expression] = None, 4683 top: bool = False, 4684 skip_limit_token: bool = False, 4685 ) -> t.Optional[exp.Expression]: 4686 if skip_limit_token or self._match(TokenType.TOP if top else TokenType.LIMIT): 4687 comments = self._prev_comments 4688 if top: 4689 limit_paren = self._match(TokenType.L_PAREN) 4690 expression = self._parse_term() if limit_paren else self._parse_number() 4691 4692 if limit_paren: 4693 self._match_r_paren() 4694 4695 limit_options = self._parse_limit_options() 4696 else: 4697 limit_options = None 4698 expression = self._parse_term() 4699 4700 if self._match(TokenType.COMMA): 4701 offset = expression 4702 expression = self._parse_term() 4703 else: 4704 offset = None 4705 4706 limit_exp = self.expression( 4707 exp.Limit, 4708 this=this, 4709 expression=expression, 4710 offset=offset, 4711 comments=comments, 4712 limit_options=limit_options, 4713 expressions=self._parse_limit_by(), 4714 ) 4715 4716 return limit_exp 4717 4718 if self._match(TokenType.FETCH): 4719 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 4720 direction = self._prev.text.upper() if direction else "FIRST" 4721 4722 count = self._parse_field(tokens=self.FETCH_TOKENS) 4723 4724 return self.expression( 4725 exp.Fetch, 4726 direction=direction, 4727 count=count, 4728 limit_options=self._parse_limit_options(), 4729 ) 4730 4731 return this 4732 4733 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4734 if not self._match(TokenType.OFFSET): 4735 return this 4736 4737 count = self._parse_term() 4738 self._match_set((TokenType.ROW, TokenType.ROWS)) 4739 4740 return self.expression( 4741 exp.Offset, this=this, expression=count, expressions=self._parse_limit_by() 4742 ) 4743 4744 def _can_parse_limit_or_offset(self) -> bool: 4745 if not self._match_set(self.AMBIGUOUS_ALIAS_TOKENS, advance=False): 4746 return False 4747 4748 index = self._index 4749 result = bool( 4750 self._try_parse(self._parse_limit, retreat=True) 4751 or self._try_parse(self._parse_offset, retreat=True) 4752 ) 4753 self._retreat(index) 4754 return result 4755 4756 def _parse_limit_by(self) -> t.Optional[t.List[exp.Expression]]: 4757 return self._match_text_seq("BY") and self._parse_csv(self._parse_bitwise) 4758 4759 def _parse_locks(self) -> t.List[exp.Lock]: 4760 locks = [] 4761 while True: 4762 update, key = None, None 4763 if self._match_text_seq("FOR", "UPDATE"): 4764 update = True 4765 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 4766 "LOCK", "IN", "SHARE", "MODE" 4767 ): 4768 update = False 4769 elif self._match_text_seq("FOR", "KEY", "SHARE"): 4770 update, key = False, True 4771 elif self._match_text_seq("FOR", "NO", "KEY", "UPDATE"): 4772 update, key = True, True 4773 else: 4774 break 4775 4776 expressions = None 4777 if self._match_text_seq("OF"): 4778 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 4779 4780 wait: t.Optional[bool | exp.Expression] = None 4781 if self._match_text_seq("NOWAIT"): 4782 wait = True 4783 elif self._match_text_seq("WAIT"): 4784 wait = self._parse_primary() 4785 elif self._match_text_seq("SKIP", "LOCKED"): 4786 wait = False 4787 4788 locks.append( 4789 self.expression( 4790 exp.Lock, update=update, expressions=expressions, wait=wait, key=key 4791 ) 4792 ) 4793 4794 return locks 4795 4796 def parse_set_operation( 4797 self, this: t.Optional[exp.Expression], consume_pipe: bool = False 4798 ) -> t.Optional[exp.Expression]: 4799 start = self._index 4800 _, side_token, kind_token = self._parse_join_parts() 4801 4802 side = side_token.text if side_token else None 4803 kind = kind_token.text if kind_token else None 4804 4805 if not self._match_set(self.SET_OPERATIONS): 4806 self._retreat(start) 4807 return None 4808 4809 token_type = self._prev.token_type 4810 4811 if token_type == TokenType.UNION: 4812 operation: t.Type[exp.SetOperation] = exp.Union 4813 elif token_type == TokenType.EXCEPT: 4814 operation = exp.Except 4815 else: 4816 operation = exp.Intersect 4817 4818 comments = self._prev.comments 4819 4820 if self._match(TokenType.DISTINCT): 4821 distinct: t.Optional[bool] = True 4822 elif self._match(TokenType.ALL): 4823 distinct = False 4824 else: 4825 distinct = self.dialect.SET_OP_DISTINCT_BY_DEFAULT[operation] 4826 if distinct is None: 4827 self.raise_error(f"Expected DISTINCT or ALL for {operation.__name__}") 4828 4829 by_name = self._match_text_seq("BY", "NAME") or self._match_text_seq( 4830 "STRICT", "CORRESPONDING" 4831 ) 4832 if self._match_text_seq("CORRESPONDING"): 4833 by_name = True 4834 if not side and not kind: 4835 kind = "INNER" 4836 4837 on_column_list = None 4838 if by_name and self._match_texts(("ON", "BY")): 4839 on_column_list = self._parse_wrapped_csv(self._parse_column) 4840 4841 expression = self._parse_select( 4842 nested=True, parse_set_operation=False, consume_pipe=consume_pipe 4843 ) 4844 4845 return self.expression( 4846 operation, 4847 comments=comments, 4848 this=this, 4849 distinct=distinct, 4850 by_name=by_name, 4851 expression=expression, 4852 side=side, 4853 kind=kind, 4854 on=on_column_list, 4855 ) 4856 4857 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4858 while this: 4859 setop = self.parse_set_operation(this) 4860 if not setop: 4861 break 4862 this = setop 4863 4864 if isinstance(this, exp.SetOperation) and self.MODIFIERS_ATTACHED_TO_SET_OP: 4865 expression = this.expression 4866 4867 if expression: 4868 for arg in self.SET_OP_MODIFIERS: 4869 expr = expression.args.get(arg) 4870 if expr: 4871 this.set(arg, expr.pop()) 4872 4873 return this 4874 4875 def _parse_expression(self) -> t.Optional[exp.Expression]: 4876 return self._parse_alias(self._parse_assignment()) 4877 4878 def _parse_assignment(self) -> t.Optional[exp.Expression]: 4879 this = self._parse_disjunction() 4880 if not this and self._next and self._next.token_type in self.ASSIGNMENT: 4881 # This allows us to parse <non-identifier token> := <expr> 4882 this = exp.column( 4883 t.cast(str, self._advance_any(ignore_reserved=True) and self._prev.text) 4884 ) 4885 4886 while self._match_set(self.ASSIGNMENT): 4887 if isinstance(this, exp.Column) and len(this.parts) == 1: 4888 this = this.this 4889 4890 this = self.expression( 4891 self.ASSIGNMENT[self._prev.token_type], 4892 this=this, 4893 comments=self._prev_comments, 4894 expression=self._parse_assignment(), 4895 ) 4896 4897 return this 4898 4899 def _parse_disjunction(self) -> t.Optional[exp.Expression]: 4900 return self._parse_tokens(self._parse_conjunction, self.DISJUNCTION) 4901 4902 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 4903 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 4904 4905 def _parse_equality(self) -> t.Optional[exp.Expression]: 4906 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 4907 4908 def _parse_comparison(self) -> t.Optional[exp.Expression]: 4909 return self._parse_tokens(self._parse_range, self.COMPARISON) 4910 4911 def _parse_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4912 this = this or self._parse_bitwise() 4913 negate = self._match(TokenType.NOT) 4914 4915 if self._match_set(self.RANGE_PARSERS): 4916 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 4917 if not expression: 4918 return this 4919 4920 this = expression 4921 elif self._match(TokenType.ISNULL): 4922 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4923 4924 # Postgres supports ISNULL and NOTNULL for conditions. 4925 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 4926 if self._match(TokenType.NOTNULL): 4927 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4928 this = self.expression(exp.Not, this=this) 4929 4930 if negate: 4931 this = self._negate_range(this) 4932 4933 if self._match(TokenType.IS): 4934 this = self._parse_is(this) 4935 4936 return this 4937 4938 def _negate_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4939 if not this: 4940 return this 4941 4942 return self.expression(exp.Not, this=this) 4943 4944 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4945 index = self._index - 1 4946 negate = self._match(TokenType.NOT) 4947 4948 if self._match_text_seq("DISTINCT", "FROM"): 4949 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 4950 return self.expression(klass, this=this, expression=self._parse_bitwise()) 4951 4952 if self._match(TokenType.JSON): 4953 kind = self._match_texts(self.IS_JSON_PREDICATE_KIND) and self._prev.text.upper() 4954 4955 if self._match_text_seq("WITH"): 4956 _with = True 4957 elif self._match_text_seq("WITHOUT"): 4958 _with = False 4959 else: 4960 _with = None 4961 4962 unique = self._match(TokenType.UNIQUE) 4963 self._match_text_seq("KEYS") 4964 expression: t.Optional[exp.Expression] = self.expression( 4965 exp.JSON, **{"this": kind, "with": _with, "unique": unique} 4966 ) 4967 else: 4968 expression = self._parse_primary() or self._parse_null() 4969 if not expression: 4970 self._retreat(index) 4971 return None 4972 4973 this = self.expression(exp.Is, this=this, expression=expression) 4974 return self.expression(exp.Not, this=this) if negate else this 4975 4976 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 4977 unnest = self._parse_unnest(with_alias=False) 4978 if unnest: 4979 this = self.expression(exp.In, this=this, unnest=unnest) 4980 elif self._match_set((TokenType.L_PAREN, TokenType.L_BRACKET)): 4981 matched_l_paren = self._prev.token_type == TokenType.L_PAREN 4982 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 4983 4984 if len(expressions) == 1 and isinstance(expressions[0], exp.Query): 4985 this = self.expression(exp.In, this=this, query=expressions[0].subquery(copy=False)) 4986 else: 4987 this = self.expression(exp.In, this=this, expressions=expressions) 4988 4989 if matched_l_paren: 4990 self._match_r_paren(this) 4991 elif not self._match(TokenType.R_BRACKET, expression=this): 4992 self.raise_error("Expecting ]") 4993 else: 4994 this = self.expression(exp.In, this=this, field=self._parse_column()) 4995 4996 return this 4997 4998 def _parse_between(self, this: t.Optional[exp.Expression]) -> exp.Between: 4999 symmetric = None 5000 if self._match_text_seq("SYMMETRIC"): 5001 symmetric = True 5002 elif self._match_text_seq("ASYMMETRIC"): 5003 symmetric = False 5004 5005 low = self._parse_bitwise() 5006 self._match(TokenType.AND) 5007 high = self._parse_bitwise() 5008 5009 return self.expression( 5010 exp.Between, 5011 this=this, 5012 low=low, 5013 high=high, 5014 symmetric=symmetric, 5015 ) 5016 5017 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5018 if not self._match(TokenType.ESCAPE): 5019 return this 5020 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 5021 5022 def _parse_interval(self, match_interval: bool = True) -> t.Optional[exp.Add | exp.Interval]: 5023 index = self._index 5024 5025 if not self._match(TokenType.INTERVAL) and match_interval: 5026 return None 5027 5028 if self._match(TokenType.STRING, advance=False): 5029 this = self._parse_primary() 5030 else: 5031 this = self._parse_term() 5032 5033 if not this or ( 5034 isinstance(this, exp.Column) 5035 and not this.table 5036 and not this.this.quoted 5037 and this.name.upper() == "IS" 5038 ): 5039 self._retreat(index) 5040 return None 5041 5042 unit = self._parse_function() or ( 5043 not self._match(TokenType.ALIAS, advance=False) 5044 and self._parse_var(any_token=True, upper=True) 5045 ) 5046 5047 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 5048 # each INTERVAL expression into this canonical form so it's easy to transpile 5049 if this and this.is_number: 5050 this = exp.Literal.string(this.to_py()) 5051 elif this and this.is_string: 5052 parts = exp.INTERVAL_STRING_RE.findall(this.name) 5053 if parts and unit: 5054 # Unconsume the eagerly-parsed unit, since the real unit was part of the string 5055 unit = None 5056 self._retreat(self._index - 1) 5057 5058 if len(parts) == 1: 5059 this = exp.Literal.string(parts[0][0]) 5060 unit = self.expression(exp.Var, this=parts[0][1].upper()) 5061 if self.INTERVAL_SPANS and self._match_text_seq("TO"): 5062 unit = self.expression( 5063 exp.IntervalSpan, this=unit, expression=self._parse_var(any_token=True, upper=True) 5064 ) 5065 5066 interval = self.expression(exp.Interval, this=this, unit=unit) 5067 5068 index = self._index 5069 self._match(TokenType.PLUS) 5070 5071 # Convert INTERVAL 'val_1' unit_1 [+] ... [+] 'val_n' unit_n into a sum of intervals 5072 if self._match_set((TokenType.STRING, TokenType.NUMBER), advance=False): 5073 return self.expression( 5074 exp.Add, this=interval, expression=self._parse_interval(match_interval=False) 5075 ) 5076 5077 self._retreat(index) 5078 return interval 5079 5080 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 5081 this = self._parse_term() 5082 5083 while True: 5084 if self._match_set(self.BITWISE): 5085 this = self.expression( 5086 self.BITWISE[self._prev.token_type], 5087 this=this, 5088 expression=self._parse_term(), 5089 ) 5090 elif self.dialect.DPIPE_IS_STRING_CONCAT and self._match(TokenType.DPIPE): 5091 this = self.expression( 5092 exp.DPipe, 5093 this=this, 5094 expression=self._parse_term(), 5095 safe=not self.dialect.STRICT_STRING_CONCAT, 5096 ) 5097 elif self._match(TokenType.DQMARK): 5098 this = self.expression( 5099 exp.Coalesce, this=this, expressions=ensure_list(self._parse_term()) 5100 ) 5101 elif self._match_pair(TokenType.LT, TokenType.LT): 5102 this = self.expression( 5103 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 5104 ) 5105 elif self._match_pair(TokenType.GT, TokenType.GT): 5106 this = self.expression( 5107 exp.BitwiseRightShift, this=this, expression=self._parse_term() 5108 ) 5109 else: 5110 break 5111 5112 return this 5113 5114 def _parse_term(self) -> t.Optional[exp.Expression]: 5115 this = self._parse_factor() 5116 5117 while self._match_set(self.TERM): 5118 klass = self.TERM[self._prev.token_type] 5119 comments = self._prev_comments 5120 expression = self._parse_factor() 5121 5122 this = self.expression(klass, this=this, comments=comments, expression=expression) 5123 5124 if isinstance(this, exp.Collate): 5125 expr = this.expression 5126 5127 # Preserve collations such as pg_catalog."default" (Postgres) as columns, otherwise 5128 # fallback to Identifier / Var 5129 if isinstance(expr, exp.Column) and len(expr.parts) == 1: 5130 ident = expr.this 5131 if isinstance(ident, exp.Identifier): 5132 this.set("expression", ident if ident.quoted else exp.var(ident.name)) 5133 5134 return this 5135 5136 def _parse_factor(self) -> t.Optional[exp.Expression]: 5137 parse_method = self._parse_exponent if self.EXPONENT else self._parse_unary 5138 this = parse_method() 5139 5140 while self._match_set(self.FACTOR): 5141 klass = self.FACTOR[self._prev.token_type] 5142 comments = self._prev_comments 5143 expression = parse_method() 5144 5145 if not expression and klass is exp.IntDiv and self._prev.text.isalpha(): 5146 self._retreat(self._index - 1) 5147 return this 5148 5149 this = self.expression(klass, this=this, comments=comments, expression=expression) 5150 5151 if isinstance(this, exp.Div): 5152 this.args["typed"] = self.dialect.TYPED_DIVISION 5153 this.args["safe"] = self.dialect.SAFE_DIVISION 5154 5155 return this 5156 5157 def _parse_exponent(self) -> t.Optional[exp.Expression]: 5158 return self._parse_tokens(self._parse_unary, self.EXPONENT) 5159 5160 def _parse_unary(self) -> t.Optional[exp.Expression]: 5161 if self._match_set(self.UNARY_PARSERS): 5162 return self.UNARY_PARSERS[self._prev.token_type](self) 5163 return self._parse_at_time_zone(self._parse_type()) 5164 5165 def _parse_type( 5166 self, parse_interval: bool = True, fallback_to_identifier: bool = False 5167 ) -> t.Optional[exp.Expression]: 5168 interval = parse_interval and self._parse_interval() 5169 if interval: 5170 return interval 5171 5172 index = self._index 5173 data_type = self._parse_types(check_func=True, allow_identifiers=False) 5174 5175 # parse_types() returns a Cast if we parsed BQ's inline constructor <type>(<values>) e.g. 5176 # STRUCT<a INT, b STRING>(1, 'foo'), which is canonicalized to CAST(<values> AS <type>) 5177 if isinstance(data_type, exp.Cast): 5178 # This constructor can contain ops directly after it, for instance struct unnesting: 5179 # STRUCT<a INT, b STRING>(1, 'foo').* --> CAST(STRUCT(1, 'foo') AS STRUCT<a iNT, b STRING).* 5180 return self._parse_column_ops(data_type) 5181 5182 if data_type: 5183 index2 = self._index 5184 this = self._parse_primary() 5185 5186 if isinstance(this, exp.Literal): 5187 literal = this.name 5188 this = self._parse_column_ops(this) 5189 5190 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 5191 if parser: 5192 return parser(self, this, data_type) 5193 5194 if ( 5195 self.ZONE_AWARE_TIMESTAMP_CONSTRUCTOR 5196 and data_type.is_type(exp.DataType.Type.TIMESTAMP) 5197 and TIME_ZONE_RE.search(literal) 5198 ): 5199 data_type = exp.DataType.build("TIMESTAMPTZ") 5200 5201 return self.expression(exp.Cast, this=this, to=data_type) 5202 5203 # The expressions arg gets set by the parser when we have something like DECIMAL(38, 0) 5204 # in the input SQL. In that case, we'll produce these tokens: DECIMAL ( 38 , 0 ) 5205 # 5206 # If the index difference here is greater than 1, that means the parser itself must have 5207 # consumed additional tokens such as the DECIMAL scale and precision in the above example. 5208 # 5209 # If it's not greater than 1, then it must be 1, because we've consumed at least the type 5210 # keyword, meaning that the expressions arg of the DataType must have gotten set by a 5211 # callable in the TYPE_CONVERTERS mapping. For example, Snowflake converts DECIMAL to 5212 # DECIMAL(38, 0)) in order to facilitate the data type's transpilation. 5213 # 5214 # In these cases, we don't really want to return the converted type, but instead retreat 5215 # and try to parse a Column or Identifier in the section below. 5216 if data_type.expressions and index2 - index > 1: 5217 self._retreat(index2) 5218 return self._parse_column_ops(data_type) 5219 5220 self._retreat(index) 5221 5222 if fallback_to_identifier: 5223 return self._parse_id_var() 5224 5225 this = self._parse_column() 5226 return this and self._parse_column_ops(this) 5227 5228 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 5229 this = self._parse_type() 5230 if not this: 5231 return None 5232 5233 if isinstance(this, exp.Column) and not this.table: 5234 this = exp.var(this.name.upper()) 5235 5236 return self.expression( 5237 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 5238 ) 5239 5240 def _parse_user_defined_type(self, identifier: exp.Identifier) -> t.Optional[exp.Expression]: 5241 type_name = identifier.name 5242 5243 while self._match(TokenType.DOT): 5244 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 5245 5246 return exp.DataType.build(type_name, udt=True) 5247 5248 def _parse_types( 5249 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 5250 ) -> t.Optional[exp.Expression]: 5251 index = self._index 5252 5253 this: t.Optional[exp.Expression] = None 5254 prefix = self._match_text_seq("SYSUDTLIB", ".") 5255 5256 if not self._match_set(self.TYPE_TOKENS): 5257 identifier = allow_identifiers and self._parse_id_var( 5258 any_token=False, tokens=(TokenType.VAR,) 5259 ) 5260 if isinstance(identifier, exp.Identifier): 5261 tokens = self.dialect.tokenize(identifier.sql(dialect=self.dialect)) 5262 5263 if len(tokens) != 1: 5264 self.raise_error("Unexpected identifier", self._prev) 5265 5266 if tokens[0].token_type in self.TYPE_TOKENS: 5267 self._prev = tokens[0] 5268 elif self.dialect.SUPPORTS_USER_DEFINED_TYPES: 5269 this = self._parse_user_defined_type(identifier) 5270 else: 5271 self._retreat(self._index - 1) 5272 return None 5273 else: 5274 return None 5275 5276 type_token = self._prev.token_type 5277 5278 if type_token == TokenType.PSEUDO_TYPE: 5279 return self.expression(exp.PseudoType, this=self._prev.text.upper()) 5280 5281 if type_token == TokenType.OBJECT_IDENTIFIER: 5282 return self.expression(exp.ObjectIdentifier, this=self._prev.text.upper()) 5283 5284 # https://materialize.com/docs/sql/types/map/ 5285 if type_token == TokenType.MAP and self._match(TokenType.L_BRACKET): 5286 key_type = self._parse_types( 5287 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5288 ) 5289 if not self._match(TokenType.FARROW): 5290 self._retreat(index) 5291 return None 5292 5293 value_type = self._parse_types( 5294 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5295 ) 5296 if not self._match(TokenType.R_BRACKET): 5297 self._retreat(index) 5298 return None 5299 5300 return exp.DataType( 5301 this=exp.DataType.Type.MAP, 5302 expressions=[key_type, value_type], 5303 nested=True, 5304 prefix=prefix, 5305 ) 5306 5307 nested = type_token in self.NESTED_TYPE_TOKENS 5308 is_struct = type_token in self.STRUCT_TYPE_TOKENS 5309 is_aggregate = type_token in self.AGGREGATE_TYPE_TOKENS 5310 expressions = None 5311 maybe_func = False 5312 5313 if self._match(TokenType.L_PAREN): 5314 if is_struct: 5315 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 5316 elif nested: 5317 expressions = self._parse_csv( 5318 lambda: self._parse_types( 5319 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5320 ) 5321 ) 5322 if type_token == TokenType.NULLABLE and len(expressions) == 1: 5323 this = expressions[0] 5324 this.set("nullable", True) 5325 self._match_r_paren() 5326 return this 5327 elif type_token in self.ENUM_TYPE_TOKENS: 5328 expressions = self._parse_csv(self._parse_equality) 5329 elif is_aggregate: 5330 func_or_ident = self._parse_function(anonymous=True) or self._parse_id_var( 5331 any_token=False, tokens=(TokenType.VAR, TokenType.ANY) 5332 ) 5333 if not func_or_ident: 5334 return None 5335 expressions = [func_or_ident] 5336 if self._match(TokenType.COMMA): 5337 expressions.extend( 5338 self._parse_csv( 5339 lambda: self._parse_types( 5340 check_func=check_func, 5341 schema=schema, 5342 allow_identifiers=allow_identifiers, 5343 ) 5344 ) 5345 ) 5346 else: 5347 expressions = self._parse_csv(self._parse_type_size) 5348 5349 # https://docs.snowflake.com/en/sql-reference/data-types-vector 5350 if type_token == TokenType.VECTOR and len(expressions) == 2: 5351 expressions[0] = exp.DataType.build(expressions[0].name, dialect=self.dialect) 5352 5353 if not expressions or not self._match(TokenType.R_PAREN): 5354 self._retreat(index) 5355 return None 5356 5357 maybe_func = True 5358 5359 values: t.Optional[t.List[exp.Expression]] = None 5360 5361 if nested and self._match(TokenType.LT): 5362 if is_struct: 5363 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 5364 else: 5365 expressions = self._parse_csv( 5366 lambda: self._parse_types( 5367 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5368 ) 5369 ) 5370 5371 if not self._match(TokenType.GT): 5372 self.raise_error("Expecting >") 5373 5374 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 5375 values = self._parse_csv(self._parse_assignment) 5376 if not values and is_struct: 5377 values = None 5378 self._retreat(self._index - 1) 5379 else: 5380 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 5381 5382 if type_token in self.TIMESTAMPS: 5383 if self._match_text_seq("WITH", "TIME", "ZONE"): 5384 maybe_func = False 5385 tz_type = ( 5386 exp.DataType.Type.TIMETZ 5387 if type_token in self.TIMES 5388 else exp.DataType.Type.TIMESTAMPTZ 5389 ) 5390 this = exp.DataType(this=tz_type, expressions=expressions) 5391 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 5392 maybe_func = False 5393 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 5394 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 5395 maybe_func = False 5396 elif type_token == TokenType.INTERVAL: 5397 unit = self._parse_var(upper=True) 5398 if unit: 5399 if self._match_text_seq("TO"): 5400 unit = exp.IntervalSpan(this=unit, expression=self._parse_var(upper=True)) 5401 5402 this = self.expression(exp.DataType, this=self.expression(exp.Interval, unit=unit)) 5403 else: 5404 this = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL) 5405 elif type_token == TokenType.VOID: 5406 this = exp.DataType(this=exp.DataType.Type.NULL) 5407 5408 if maybe_func and check_func: 5409 index2 = self._index 5410 peek = self._parse_string() 5411 5412 if not peek: 5413 self._retreat(index) 5414 return None 5415 5416 self._retreat(index2) 5417 5418 if not this: 5419 if self._match_text_seq("UNSIGNED"): 5420 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 5421 if not unsigned_type_token: 5422 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 5423 5424 type_token = unsigned_type_token or type_token 5425 5426 this = exp.DataType( 5427 this=exp.DataType.Type[type_token.value], 5428 expressions=expressions, 5429 nested=nested, 5430 prefix=prefix, 5431 ) 5432 5433 # Empty arrays/structs are allowed 5434 if values is not None: 5435 cls = exp.Struct if is_struct else exp.Array 5436 this = exp.cast(cls(expressions=values), this, copy=False) 5437 5438 elif expressions: 5439 this.set("expressions", expressions) 5440 5441 # https://materialize.com/docs/sql/types/list/#type-name 5442 while self._match(TokenType.LIST): 5443 this = exp.DataType(this=exp.DataType.Type.LIST, expressions=[this], nested=True) 5444 5445 index = self._index 5446 5447 # Postgres supports the INT ARRAY[3] syntax as a synonym for INT[3] 5448 matched_array = self._match(TokenType.ARRAY) 5449 5450 while self._curr: 5451 datatype_token = self._prev.token_type 5452 matched_l_bracket = self._match(TokenType.L_BRACKET) 5453 5454 if (not matched_l_bracket and not matched_array) or ( 5455 datatype_token == TokenType.ARRAY and self._match(TokenType.R_BRACKET) 5456 ): 5457 # Postgres allows casting empty arrays such as ARRAY[]::INT[], 5458 # not to be confused with the fixed size array parsing 5459 break 5460 5461 matched_array = False 5462 values = self._parse_csv(self._parse_assignment) or None 5463 if ( 5464 values 5465 and not schema 5466 and ( 5467 not self.dialect.SUPPORTS_FIXED_SIZE_ARRAYS or datatype_token == TokenType.ARRAY 5468 ) 5469 ): 5470 # Retreating here means that we should not parse the following values as part of the data type, e.g. in DuckDB 5471 # ARRAY[1] should retreat and instead be parsed into exp.Array in contrast to INT[x][y] which denotes a fixed-size array data type 5472 self._retreat(index) 5473 break 5474 5475 this = exp.DataType( 5476 this=exp.DataType.Type.ARRAY, expressions=[this], values=values, nested=True 5477 ) 5478 self._match(TokenType.R_BRACKET) 5479 5480 if self.TYPE_CONVERTERS and isinstance(this.this, exp.DataType.Type): 5481 converter = self.TYPE_CONVERTERS.get(this.this) 5482 if converter: 5483 this = converter(t.cast(exp.DataType, this)) 5484 5485 return this 5486 5487 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 5488 index = self._index 5489 5490 if ( 5491 self._curr 5492 and self._next 5493 and self._curr.token_type in self.TYPE_TOKENS 5494 and self._next.token_type in self.TYPE_TOKENS 5495 ): 5496 # Takes care of special cases like `STRUCT<list ARRAY<...>>` where the identifier is also a 5497 # type token. Without this, the list will be parsed as a type and we'll eventually crash 5498 this = self._parse_id_var() 5499 else: 5500 this = ( 5501 self._parse_type(parse_interval=False, fallback_to_identifier=True) 5502 or self._parse_id_var() 5503 ) 5504 5505 self._match(TokenType.COLON) 5506 5507 if ( 5508 type_required 5509 and not isinstance(this, exp.DataType) 5510 and not self._match_set(self.TYPE_TOKENS, advance=False) 5511 ): 5512 self._retreat(index) 5513 return self._parse_types() 5514 5515 return self._parse_column_def(this) 5516 5517 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5518 if not self._match_text_seq("AT", "TIME", "ZONE"): 5519 return this 5520 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 5521 5522 def _parse_column(self) -> t.Optional[exp.Expression]: 5523 this = self._parse_column_reference() 5524 column = self._parse_column_ops(this) if this else self._parse_bracket(this) 5525 5526 if self.dialect.SUPPORTS_COLUMN_JOIN_MARKS and column: 5527 column.set("join_mark", self._match(TokenType.JOIN_MARKER)) 5528 5529 return column 5530 5531 def _parse_column_reference(self) -> t.Optional[exp.Expression]: 5532 this = self._parse_field() 5533 if ( 5534 not this 5535 and self._match(TokenType.VALUES, advance=False) 5536 and self.VALUES_FOLLOWED_BY_PAREN 5537 and (not self._next or self._next.token_type != TokenType.L_PAREN) 5538 ): 5539 this = self._parse_id_var() 5540 5541 if isinstance(this, exp.Identifier): 5542 # We bubble up comments from the Identifier to the Column 5543 this = self.expression(exp.Column, comments=this.pop_comments(), this=this) 5544 5545 return this 5546 5547 def _parse_colon_as_variant_extract( 5548 self, this: t.Optional[exp.Expression] 5549 ) -> t.Optional[exp.Expression]: 5550 casts = [] 5551 json_path = [] 5552 escape = None 5553 5554 while self._match(TokenType.COLON): 5555 start_index = self._index 5556 5557 # Snowflake allows reserved keywords as json keys but advance_any() excludes TokenType.SELECT from any_tokens=True 5558 path = self._parse_column_ops( 5559 self._parse_field(any_token=True, tokens=(TokenType.SELECT,)) 5560 ) 5561 5562 # The cast :: operator has a lower precedence than the extraction operator :, so 5563 # we rearrange the AST appropriately to avoid casting the JSON path 5564 while isinstance(path, exp.Cast): 5565 casts.append(path.to) 5566 path = path.this 5567 5568 if casts: 5569 dcolon_offset = next( 5570 i 5571 for i, t in enumerate(self._tokens[start_index:]) 5572 if t.token_type == TokenType.DCOLON 5573 ) 5574 end_token = self._tokens[start_index + dcolon_offset - 1] 5575 else: 5576 end_token = self._prev 5577 5578 if path: 5579 # Escape single quotes from Snowflake's colon extraction (e.g. col:"a'b") as 5580 # it'll roundtrip to a string literal in GET_PATH 5581 if isinstance(path, exp.Identifier) and path.quoted: 5582 escape = True 5583 5584 json_path.append(self._find_sql(self._tokens[start_index], end_token)) 5585 5586 # The VARIANT extract in Snowflake/Databricks is parsed as a JSONExtract; Snowflake uses the json_path in GET_PATH() while 5587 # Databricks transforms it back to the colon/dot notation 5588 if json_path: 5589 json_path_expr = self.dialect.to_json_path(exp.Literal.string(".".join(json_path))) 5590 5591 if json_path_expr: 5592 json_path_expr.set("escape", escape) 5593 5594 this = self.expression( 5595 exp.JSONExtract, 5596 this=this, 5597 expression=json_path_expr, 5598 variant_extract=True, 5599 requires_json=self.JSON_EXTRACT_REQUIRES_JSON_EXPRESSION, 5600 ) 5601 5602 while casts: 5603 this = self.expression(exp.Cast, this=this, to=casts.pop()) 5604 5605 return this 5606 5607 def _parse_dcolon(self) -> t.Optional[exp.Expression]: 5608 return self._parse_types() 5609 5610 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5611 this = self._parse_bracket(this) 5612 5613 while self._match_set(self.COLUMN_OPERATORS): 5614 op_token = self._prev.token_type 5615 op = self.COLUMN_OPERATORS.get(op_token) 5616 5617 if op_token in (TokenType.DCOLON, TokenType.DOTCOLON): 5618 field = self._parse_dcolon() 5619 if not field: 5620 self.raise_error("Expected type") 5621 elif op and self._curr: 5622 field = self._parse_column_reference() or self._parse_bracket() 5623 if isinstance(field, exp.Column) and self._match(TokenType.DOT, advance=False): 5624 field = self._parse_column_ops(field) 5625 else: 5626 field = self._parse_field(any_token=True, anonymous_func=True) 5627 5628 # Function calls can be qualified, e.g., x.y.FOO() 5629 # This converts the final AST to a series of Dots leading to the function call 5630 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 5631 if isinstance(field, (exp.Func, exp.Window)) and this: 5632 this = this.transform( 5633 lambda n: n.to_dot(include_dots=False) if isinstance(n, exp.Column) else n 5634 ) 5635 5636 if op: 5637 this = op(self, this, field) 5638 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 5639 this = self.expression( 5640 exp.Column, 5641 comments=this.comments, 5642 this=field, 5643 table=this.this, 5644 db=this.args.get("table"), 5645 catalog=this.args.get("db"), 5646 ) 5647 elif isinstance(field, exp.Window): 5648 # Move the exp.Dot's to the window's function 5649 window_func = self.expression(exp.Dot, this=this, expression=field.this) 5650 field.set("this", window_func) 5651 this = field 5652 else: 5653 this = self.expression(exp.Dot, this=this, expression=field) 5654 5655 if field and field.comments: 5656 t.cast(exp.Expression, this).add_comments(field.pop_comments()) 5657 5658 this = self._parse_bracket(this) 5659 5660 return self._parse_colon_as_variant_extract(this) if self.COLON_IS_VARIANT_EXTRACT else this 5661 5662 def _parse_paren(self) -> t.Optional[exp.Expression]: 5663 if not self._match(TokenType.L_PAREN): 5664 return None 5665 5666 comments = self._prev_comments 5667 query = self._parse_select() 5668 5669 if query: 5670 expressions = [query] 5671 else: 5672 expressions = self._parse_expressions() 5673 5674 this = self._parse_query_modifiers(seq_get(expressions, 0)) 5675 5676 if not this and self._match(TokenType.R_PAREN, advance=False): 5677 this = self.expression(exp.Tuple) 5678 elif isinstance(this, exp.UNWRAPPED_QUERIES): 5679 this = self._parse_subquery(this=this, parse_alias=False) 5680 elif isinstance(this, exp.Subquery): 5681 this = self._parse_subquery(this=self._parse_set_operations(this), parse_alias=False) 5682 elif len(expressions) > 1 or self._prev.token_type == TokenType.COMMA: 5683 this = self.expression(exp.Tuple, expressions=expressions) 5684 else: 5685 this = self.expression(exp.Paren, this=this) 5686 5687 if this: 5688 this.add_comments(comments) 5689 5690 self._match_r_paren(expression=this) 5691 return this 5692 5693 def _parse_primary(self) -> t.Optional[exp.Expression]: 5694 if self._match_set(self.PRIMARY_PARSERS): 5695 token_type = self._prev.token_type 5696 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 5697 5698 if token_type == TokenType.STRING: 5699 expressions = [primary] 5700 while self._match(TokenType.STRING): 5701 expressions.append(exp.Literal.string(self._prev.text)) 5702 5703 if len(expressions) > 1: 5704 return self.expression(exp.Concat, expressions=expressions) 5705 5706 return primary 5707 5708 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 5709 return exp.Literal.number(f"0.{self._prev.text}") 5710 5711 return self._parse_paren() 5712 5713 def _parse_field( 5714 self, 5715 any_token: bool = False, 5716 tokens: t.Optional[t.Collection[TokenType]] = None, 5717 anonymous_func: bool = False, 5718 ) -> t.Optional[exp.Expression]: 5719 if anonymous_func: 5720 field = ( 5721 self._parse_function(anonymous=anonymous_func, any_token=any_token) 5722 or self._parse_primary() 5723 ) 5724 else: 5725 field = self._parse_primary() or self._parse_function( 5726 anonymous=anonymous_func, any_token=any_token 5727 ) 5728 return field or self._parse_id_var(any_token=any_token, tokens=tokens) 5729 5730 def _parse_function( 5731 self, 5732 functions: t.Optional[t.Dict[str, t.Callable]] = None, 5733 anonymous: bool = False, 5734 optional_parens: bool = True, 5735 any_token: bool = False, 5736 ) -> t.Optional[exp.Expression]: 5737 # This allows us to also parse {fn <function>} syntax (Snowflake, MySQL support this) 5738 # See: https://community.snowflake.com/s/article/SQL-Escape-Sequences 5739 fn_syntax = False 5740 if ( 5741 self._match(TokenType.L_BRACE, advance=False) 5742 and self._next 5743 and self._next.text.upper() == "FN" 5744 ): 5745 self._advance(2) 5746 fn_syntax = True 5747 5748 func = self._parse_function_call( 5749 functions=functions, 5750 anonymous=anonymous, 5751 optional_parens=optional_parens, 5752 any_token=any_token, 5753 ) 5754 5755 if fn_syntax: 5756 self._match(TokenType.R_BRACE) 5757 5758 return func 5759 5760 def _parse_function_call( 5761 self, 5762 functions: t.Optional[t.Dict[str, t.Callable]] = None, 5763 anonymous: bool = False, 5764 optional_parens: bool = True, 5765 any_token: bool = False, 5766 ) -> t.Optional[exp.Expression]: 5767 if not self._curr: 5768 return None 5769 5770 comments = self._curr.comments 5771 token = self._curr 5772 token_type = self._curr.token_type 5773 this = self._curr.text 5774 upper = this.upper() 5775 5776 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 5777 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 5778 self._advance() 5779 return self._parse_window(parser(self)) 5780 5781 if not self._next or self._next.token_type != TokenType.L_PAREN: 5782 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 5783 self._advance() 5784 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 5785 5786 return None 5787 5788 if any_token: 5789 if token_type in self.RESERVED_TOKENS: 5790 return None 5791 elif token_type not in self.FUNC_TOKENS: 5792 return None 5793 5794 self._advance(2) 5795 5796 parser = self.FUNCTION_PARSERS.get(upper) 5797 if parser and not anonymous: 5798 this = parser(self) 5799 else: 5800 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 5801 5802 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 5803 this = self.expression( 5804 subquery_predicate, comments=comments, this=self._parse_select() 5805 ) 5806 self._match_r_paren() 5807 return this 5808 5809 if functions is None: 5810 functions = self.FUNCTIONS 5811 5812 function = functions.get(upper) 5813 known_function = function and not anonymous 5814 5815 alias = not known_function or upper in self.FUNCTIONS_WITH_ALIASED_ARGS 5816 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 5817 5818 post_func_comments = self._curr and self._curr.comments 5819 if known_function and post_func_comments: 5820 # If the user-inputted comment "/* sqlglot.anonymous */" is following the function 5821 # call we'll construct it as exp.Anonymous, even if it's "known" 5822 if any( 5823 comment.lstrip().startswith(exp.SQLGLOT_ANONYMOUS) 5824 for comment in post_func_comments 5825 ): 5826 known_function = False 5827 5828 if alias and known_function: 5829 args = self._kv_to_prop_eq(args) 5830 5831 if known_function: 5832 func_builder = t.cast(t.Callable, function) 5833 5834 if "dialect" in func_builder.__code__.co_varnames: 5835 func = func_builder(args, dialect=self.dialect) 5836 else: 5837 func = func_builder(args) 5838 5839 func = self.validate_expression(func, args) 5840 if self.dialect.PRESERVE_ORIGINAL_NAMES: 5841 func.meta["name"] = this 5842 5843 this = func 5844 else: 5845 if token_type == TokenType.IDENTIFIER: 5846 this = exp.Identifier(this=this, quoted=True).update_positions(token) 5847 5848 this = self.expression(exp.Anonymous, this=this, expressions=args) 5849 this = this.update_positions(token) 5850 5851 if isinstance(this, exp.Expression): 5852 this.add_comments(comments) 5853 5854 self._match_r_paren(this) 5855 return self._parse_window(this) 5856 5857 def _to_prop_eq(self, expression: exp.Expression, index: int) -> exp.Expression: 5858 return expression 5859 5860 def _kv_to_prop_eq( 5861 self, expressions: t.List[exp.Expression], parse_map: bool = False 5862 ) -> t.List[exp.Expression]: 5863 transformed = [] 5864 5865 for index, e in enumerate(expressions): 5866 if isinstance(e, self.KEY_VALUE_DEFINITIONS): 5867 if isinstance(e, exp.Alias): 5868 e = self.expression(exp.PropertyEQ, this=e.args.get("alias"), expression=e.this) 5869 5870 if not isinstance(e, exp.PropertyEQ): 5871 e = self.expression( 5872 exp.PropertyEQ, 5873 this=e.this if parse_map else exp.to_identifier(e.this.name), 5874 expression=e.expression, 5875 ) 5876 5877 if isinstance(e.this, exp.Column): 5878 e.this.replace(e.this.this) 5879 else: 5880 e = self._to_prop_eq(e, index) 5881 5882 transformed.append(e) 5883 5884 return transformed 5885 5886 def _parse_user_defined_function_expression(self) -> t.Optional[exp.Expression]: 5887 return self._parse_statement() 5888 5889 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 5890 return self._parse_column_def(this=self._parse_id_var(), computed_column=False) 5891 5892 def _parse_user_defined_function( 5893 self, kind: t.Optional[TokenType] = None 5894 ) -> t.Optional[exp.Expression]: 5895 this = self._parse_table_parts(schema=True) 5896 5897 if not self._match(TokenType.L_PAREN): 5898 return this 5899 5900 expressions = self._parse_csv(self._parse_function_parameter) 5901 self._match_r_paren() 5902 return self.expression( 5903 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 5904 ) 5905 5906 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 5907 literal = self._parse_primary() 5908 if literal: 5909 return self.expression(exp.Introducer, this=token.text, expression=literal) 5910 5911 return self._identifier_expression(token) 5912 5913 def _parse_session_parameter(self) -> exp.SessionParameter: 5914 kind = None 5915 this = self._parse_id_var() or self._parse_primary() 5916 5917 if this and self._match(TokenType.DOT): 5918 kind = this.name 5919 this = self._parse_var() or self._parse_primary() 5920 5921 return self.expression(exp.SessionParameter, this=this, kind=kind) 5922 5923 def _parse_lambda_arg(self) -> t.Optional[exp.Expression]: 5924 return self._parse_id_var() 5925 5926 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 5927 index = self._index 5928 5929 if self._match(TokenType.L_PAREN): 5930 expressions = t.cast( 5931 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_lambda_arg) 5932 ) 5933 5934 if not self._match(TokenType.R_PAREN): 5935 self._retreat(index) 5936 else: 5937 expressions = [self._parse_lambda_arg()] 5938 5939 if self._match_set(self.LAMBDAS): 5940 return self.LAMBDAS[self._prev.token_type](self, expressions) 5941 5942 self._retreat(index) 5943 5944 this: t.Optional[exp.Expression] 5945 5946 if self._match(TokenType.DISTINCT): 5947 this = self.expression( 5948 exp.Distinct, expressions=self._parse_csv(self._parse_assignment) 5949 ) 5950 else: 5951 this = self._parse_select_or_expression(alias=alias) 5952 5953 return self._parse_limit( 5954 self._parse_order(self._parse_having_max(self._parse_respect_or_ignore_nulls(this))) 5955 ) 5956 5957 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 5958 index = self._index 5959 if not self._match(TokenType.L_PAREN): 5960 return this 5961 5962 # Disambiguate between schema and subquery/CTE, e.g. in INSERT INTO table (<expr>), 5963 # expr can be of both types 5964 if self._match_set(self.SELECT_START_TOKENS): 5965 self._retreat(index) 5966 return this 5967 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 5968 self._match_r_paren() 5969 return self.expression(exp.Schema, this=this, expressions=args) 5970 5971 def _parse_field_def(self) -> t.Optional[exp.Expression]: 5972 return self._parse_column_def(self._parse_field(any_token=True)) 5973 5974 def _parse_column_def( 5975 self, this: t.Optional[exp.Expression], computed_column: bool = True 5976 ) -> t.Optional[exp.Expression]: 5977 # column defs are not really columns, they're identifiers 5978 if isinstance(this, exp.Column): 5979 this = this.this 5980 5981 if not computed_column: 5982 self._match(TokenType.ALIAS) 5983 5984 kind = self._parse_types(schema=True) 5985 5986 if self._match_text_seq("FOR", "ORDINALITY"): 5987 return self.expression(exp.ColumnDef, this=this, ordinality=True) 5988 5989 constraints: t.List[exp.Expression] = [] 5990 5991 if (not kind and self._match(TokenType.ALIAS)) or self._match_texts( 5992 ("ALIAS", "MATERIALIZED") 5993 ): 5994 persisted = self._prev.text.upper() == "MATERIALIZED" 5995 constraint_kind = exp.ComputedColumnConstraint( 5996 this=self._parse_assignment(), 5997 persisted=persisted or self._match_text_seq("PERSISTED"), 5998 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 5999 ) 6000 constraints.append(self.expression(exp.ColumnConstraint, kind=constraint_kind)) 6001 elif ( 6002 kind 6003 and self._match(TokenType.ALIAS, advance=False) 6004 and ( 6005 not self.WRAPPED_TRANSFORM_COLUMN_CONSTRAINT 6006 or (self._next and self._next.token_type == TokenType.L_PAREN) 6007 ) 6008 ): 6009 self._advance() 6010 constraints.append( 6011 self.expression( 6012 exp.ColumnConstraint, 6013 kind=exp.ComputedColumnConstraint( 6014 this=self._parse_disjunction(), 6015 persisted=self._match_texts(("STORED", "VIRTUAL")) 6016 and self._prev.text.upper() == "STORED", 6017 ), 6018 ) 6019 ) 6020 6021 while True: 6022 constraint = self._parse_column_constraint() 6023 if not constraint: 6024 break 6025 constraints.append(constraint) 6026 6027 if not kind and not constraints: 6028 return this 6029 6030 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 6031 6032 def _parse_auto_increment( 6033 self, 6034 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 6035 start = None 6036 increment = None 6037 order = None 6038 6039 if self._match(TokenType.L_PAREN, advance=False): 6040 args = self._parse_wrapped_csv(self._parse_bitwise) 6041 start = seq_get(args, 0) 6042 increment = seq_get(args, 1) 6043 elif self._match_text_seq("START"): 6044 start = self._parse_bitwise() 6045 self._match_text_seq("INCREMENT") 6046 increment = self._parse_bitwise() 6047 if self._match_text_seq("ORDER"): 6048 order = True 6049 elif self._match_text_seq("NOORDER"): 6050 order = False 6051 6052 if start and increment: 6053 return exp.GeneratedAsIdentityColumnConstraint( 6054 start=start, increment=increment, this=False, order=order 6055 ) 6056 6057 return exp.AutoIncrementColumnConstraint() 6058 6059 def _parse_auto_property(self) -> t.Optional[exp.AutoRefreshProperty]: 6060 if not self._match_text_seq("REFRESH"): 6061 self._retreat(self._index - 1) 6062 return None 6063 return self.expression(exp.AutoRefreshProperty, this=self._parse_var(upper=True)) 6064 6065 def _parse_compress(self) -> exp.CompressColumnConstraint: 6066 if self._match(TokenType.L_PAREN, advance=False): 6067 return self.expression( 6068 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 6069 ) 6070 6071 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 6072 6073 def _parse_generated_as_identity( 6074 self, 6075 ) -> ( 6076 exp.GeneratedAsIdentityColumnConstraint 6077 | exp.ComputedColumnConstraint 6078 | exp.GeneratedAsRowColumnConstraint 6079 ): 6080 if self._match_text_seq("BY", "DEFAULT"): 6081 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 6082 this = self.expression( 6083 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 6084 ) 6085 else: 6086 self._match_text_seq("ALWAYS") 6087 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 6088 6089 self._match(TokenType.ALIAS) 6090 6091 if self._match_text_seq("ROW"): 6092 start = self._match_text_seq("START") 6093 if not start: 6094 self._match(TokenType.END) 6095 hidden = self._match_text_seq("HIDDEN") 6096 return self.expression(exp.GeneratedAsRowColumnConstraint, start=start, hidden=hidden) 6097 6098 identity = self._match_text_seq("IDENTITY") 6099 6100 if self._match(TokenType.L_PAREN): 6101 if self._match(TokenType.START_WITH): 6102 this.set("start", self._parse_bitwise()) 6103 if self._match_text_seq("INCREMENT", "BY"): 6104 this.set("increment", self._parse_bitwise()) 6105 if self._match_text_seq("MINVALUE"): 6106 this.set("minvalue", self._parse_bitwise()) 6107 if self._match_text_seq("MAXVALUE"): 6108 this.set("maxvalue", self._parse_bitwise()) 6109 6110 if self._match_text_seq("CYCLE"): 6111 this.set("cycle", True) 6112 elif self._match_text_seq("NO", "CYCLE"): 6113 this.set("cycle", False) 6114 6115 if not identity: 6116 this.set("expression", self._parse_range()) 6117 elif not this.args.get("start") and self._match(TokenType.NUMBER, advance=False): 6118 args = self._parse_csv(self._parse_bitwise) 6119 this.set("start", seq_get(args, 0)) 6120 this.set("increment", seq_get(args, 1)) 6121 6122 self._match_r_paren() 6123 6124 return this 6125 6126 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 6127 self._match_text_seq("LENGTH") 6128 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 6129 6130 def _parse_not_constraint(self) -> t.Optional[exp.Expression]: 6131 if self._match_text_seq("NULL"): 6132 return self.expression(exp.NotNullColumnConstraint) 6133 if self._match_text_seq("CASESPECIFIC"): 6134 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 6135 if self._match_text_seq("FOR", "REPLICATION"): 6136 return self.expression(exp.NotForReplicationColumnConstraint) 6137 6138 # Unconsume the `NOT` token 6139 self._retreat(self._index - 1) 6140 return None 6141 6142 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 6143 this = self._match(TokenType.CONSTRAINT) and self._parse_id_var() 6144 6145 procedure_option_follows = ( 6146 self._match(TokenType.WITH, advance=False) 6147 and self._next 6148 and self._next.text.upper() in self.PROCEDURE_OPTIONS 6149 ) 6150 6151 if not procedure_option_follows and self._match_texts(self.CONSTRAINT_PARSERS): 6152 return self.expression( 6153 exp.ColumnConstraint, 6154 this=this, 6155 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 6156 ) 6157 6158 return this 6159 6160 def _parse_constraint(self) -> t.Optional[exp.Expression]: 6161 if not self._match(TokenType.CONSTRAINT): 6162 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 6163 6164 return self.expression( 6165 exp.Constraint, 6166 this=self._parse_id_var(), 6167 expressions=self._parse_unnamed_constraints(), 6168 ) 6169 6170 def _parse_unnamed_constraints(self) -> t.List[exp.Expression]: 6171 constraints = [] 6172 while True: 6173 constraint = self._parse_unnamed_constraint() or self._parse_function() 6174 if not constraint: 6175 break 6176 constraints.append(constraint) 6177 6178 return constraints 6179 6180 def _parse_unnamed_constraint( 6181 self, constraints: t.Optional[t.Collection[str]] = None 6182 ) -> t.Optional[exp.Expression]: 6183 if self._match(TokenType.IDENTIFIER, advance=False) or not self._match_texts( 6184 constraints or self.CONSTRAINT_PARSERS 6185 ): 6186 return None 6187 6188 constraint = self._prev.text.upper() 6189 if constraint not in self.CONSTRAINT_PARSERS: 6190 self.raise_error(f"No parser found for schema constraint {constraint}.") 6191 6192 return self.CONSTRAINT_PARSERS[constraint](self) 6193 6194 def _parse_unique_key(self) -> t.Optional[exp.Expression]: 6195 return self._parse_id_var(any_token=False) 6196 6197 def _parse_unique(self) -> exp.UniqueColumnConstraint: 6198 self._match_text_seq("KEY") 6199 return self.expression( 6200 exp.UniqueColumnConstraint, 6201 nulls=self._match_text_seq("NULLS", "NOT", "DISTINCT"), 6202 this=self._parse_schema(self._parse_unique_key()), 6203 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 6204 on_conflict=self._parse_on_conflict(), 6205 options=self._parse_key_constraint_options(), 6206 ) 6207 6208 def _parse_key_constraint_options(self) -> t.List[str]: 6209 options = [] 6210 while True: 6211 if not self._curr: 6212 break 6213 6214 if self._match(TokenType.ON): 6215 action = None 6216 on = self._advance_any() and self._prev.text 6217 6218 if self._match_text_seq("NO", "ACTION"): 6219 action = "NO ACTION" 6220 elif self._match_text_seq("CASCADE"): 6221 action = "CASCADE" 6222 elif self._match_text_seq("RESTRICT"): 6223 action = "RESTRICT" 6224 elif self._match_pair(TokenType.SET, TokenType.NULL): 6225 action = "SET NULL" 6226 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 6227 action = "SET DEFAULT" 6228 else: 6229 self.raise_error("Invalid key constraint") 6230 6231 options.append(f"ON {on} {action}") 6232 else: 6233 var = self._parse_var_from_options( 6234 self.KEY_CONSTRAINT_OPTIONS, raise_unmatched=False 6235 ) 6236 if not var: 6237 break 6238 options.append(var.name) 6239 6240 return options 6241 6242 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 6243 if match and not self._match(TokenType.REFERENCES): 6244 return None 6245 6246 expressions = None 6247 this = self._parse_table(schema=True) 6248 options = self._parse_key_constraint_options() 6249 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 6250 6251 def _parse_foreign_key(self) -> exp.ForeignKey: 6252 expressions = ( 6253 self._parse_wrapped_id_vars() 6254 if not self._match(TokenType.REFERENCES, advance=False) 6255 else None 6256 ) 6257 reference = self._parse_references() 6258 on_options = {} 6259 6260 while self._match(TokenType.ON): 6261 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 6262 self.raise_error("Expected DELETE or UPDATE") 6263 6264 kind = self._prev.text.lower() 6265 6266 if self._match_text_seq("NO", "ACTION"): 6267 action = "NO ACTION" 6268 elif self._match(TokenType.SET): 6269 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 6270 action = "SET " + self._prev.text.upper() 6271 else: 6272 self._advance() 6273 action = self._prev.text.upper() 6274 6275 on_options[kind] = action 6276 6277 return self.expression( 6278 exp.ForeignKey, 6279 expressions=expressions, 6280 reference=reference, 6281 options=self._parse_key_constraint_options(), 6282 **on_options, # type: ignore 6283 ) 6284 6285 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 6286 return self._parse_ordered() or self._parse_field() 6287 6288 def _parse_period_for_system_time(self) -> t.Optional[exp.PeriodForSystemTimeConstraint]: 6289 if not self._match(TokenType.TIMESTAMP_SNAPSHOT): 6290 self._retreat(self._index - 1) 6291 return None 6292 6293 id_vars = self._parse_wrapped_id_vars() 6294 return self.expression( 6295 exp.PeriodForSystemTimeConstraint, 6296 this=seq_get(id_vars, 0), 6297 expression=seq_get(id_vars, 1), 6298 ) 6299 6300 def _parse_primary_key( 6301 self, wrapped_optional: bool = False, in_props: bool = False 6302 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 6303 desc = ( 6304 self._match_set((TokenType.ASC, TokenType.DESC)) 6305 and self._prev.token_type == TokenType.DESC 6306 ) 6307 6308 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 6309 return self.expression( 6310 exp.PrimaryKeyColumnConstraint, 6311 desc=desc, 6312 options=self._parse_key_constraint_options(), 6313 ) 6314 6315 expressions = self._parse_wrapped_csv( 6316 self._parse_primary_key_part, optional=wrapped_optional 6317 ) 6318 6319 return self.expression( 6320 exp.PrimaryKey, 6321 expressions=expressions, 6322 include=self._parse_index_params(), 6323 options=self._parse_key_constraint_options(), 6324 ) 6325 6326 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 6327 return self._parse_slice(self._parse_alias(self._parse_assignment(), explicit=True)) 6328 6329 def _parse_odbc_datetime_literal(self) -> exp.Expression: 6330 """ 6331 Parses a datetime column in ODBC format. We parse the column into the corresponding 6332 types, for example `{d'yyyy-mm-dd'}` will be parsed as a `Date` column, exactly the 6333 same as we did for `DATE('yyyy-mm-dd')`. 6334 6335 Reference: 6336 https://learn.microsoft.com/en-us/sql/odbc/reference/develop-app/date-time-and-timestamp-literals 6337 """ 6338 self._match(TokenType.VAR) 6339 exp_class = self.ODBC_DATETIME_LITERALS[self._prev.text.lower()] 6340 expression = self.expression(exp_class=exp_class, this=self._parse_string()) 6341 if not self._match(TokenType.R_BRACE): 6342 self.raise_error("Expected }") 6343 return expression 6344 6345 def _parse_bracket(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 6346 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 6347 return this 6348 6349 if self.MAP_KEYS_ARE_ARBITRARY_EXPRESSIONS: 6350 map_token = seq_get(self._tokens, self._index - 2) 6351 parse_map = map_token is not None and map_token.text.upper() == "MAP" 6352 else: 6353 parse_map = False 6354 6355 bracket_kind = self._prev.token_type 6356 if ( 6357 bracket_kind == TokenType.L_BRACE 6358 and self._curr 6359 and self._curr.token_type == TokenType.VAR 6360 and self._curr.text.lower() in self.ODBC_DATETIME_LITERALS 6361 ): 6362 return self._parse_odbc_datetime_literal() 6363 6364 expressions = self._parse_csv( 6365 lambda: self._parse_bracket_key_value(is_map=bracket_kind == TokenType.L_BRACE) 6366 ) 6367 6368 if bracket_kind == TokenType.L_BRACKET and not self._match(TokenType.R_BRACKET): 6369 self.raise_error("Expected ]") 6370 elif bracket_kind == TokenType.L_BRACE and not self._match(TokenType.R_BRACE): 6371 self.raise_error("Expected }") 6372 6373 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 6374 if bracket_kind == TokenType.L_BRACE: 6375 this = self.expression( 6376 exp.Struct, 6377 expressions=self._kv_to_prop_eq(expressions=expressions, parse_map=parse_map), 6378 ) 6379 elif not this: 6380 this = build_array_constructor( 6381 exp.Array, args=expressions, bracket_kind=bracket_kind, dialect=self.dialect 6382 ) 6383 else: 6384 constructor_type = self.ARRAY_CONSTRUCTORS.get(this.name.upper()) 6385 if constructor_type: 6386 return build_array_constructor( 6387 constructor_type, 6388 args=expressions, 6389 bracket_kind=bracket_kind, 6390 dialect=self.dialect, 6391 ) 6392 6393 expressions = apply_index_offset( 6394 this, expressions, -self.dialect.INDEX_OFFSET, dialect=self.dialect 6395 ) 6396 this = self.expression( 6397 exp.Bracket, 6398 this=this, 6399 expressions=expressions, 6400 comments=this.pop_comments(), 6401 ) 6402 6403 self._add_comments(this) 6404 return self._parse_bracket(this) 6405 6406 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6407 if self._match(TokenType.COLON): 6408 return self.expression(exp.Slice, this=this, expression=self._parse_assignment()) 6409 return this 6410 6411 def _parse_case(self) -> t.Optional[exp.Expression]: 6412 ifs = [] 6413 default = None 6414 6415 comments = self._prev_comments 6416 expression = self._parse_assignment() 6417 6418 while self._match(TokenType.WHEN): 6419 this = self._parse_assignment() 6420 self._match(TokenType.THEN) 6421 then = self._parse_assignment() 6422 ifs.append(self.expression(exp.If, this=this, true=then)) 6423 6424 if self._match(TokenType.ELSE): 6425 default = self._parse_assignment() 6426 6427 if not self._match(TokenType.END): 6428 if isinstance(default, exp.Interval) and default.this.sql().upper() == "END": 6429 default = exp.column("interval") 6430 else: 6431 self.raise_error("Expected END after CASE", self._prev) 6432 6433 return self.expression( 6434 exp.Case, comments=comments, this=expression, ifs=ifs, default=default 6435 ) 6436 6437 def _parse_if(self) -> t.Optional[exp.Expression]: 6438 if self._match(TokenType.L_PAREN): 6439 args = self._parse_csv( 6440 lambda: self._parse_alias(self._parse_assignment(), explicit=True) 6441 ) 6442 this = self.validate_expression(exp.If.from_arg_list(args), args) 6443 self._match_r_paren() 6444 else: 6445 index = self._index - 1 6446 6447 if self.NO_PAREN_IF_COMMANDS and index == 0: 6448 return self._parse_as_command(self._prev) 6449 6450 condition = self._parse_assignment() 6451 6452 if not condition: 6453 self._retreat(index) 6454 return None 6455 6456 self._match(TokenType.THEN) 6457 true = self._parse_assignment() 6458 false = self._parse_assignment() if self._match(TokenType.ELSE) else None 6459 self._match(TokenType.END) 6460 this = self.expression(exp.If, this=condition, true=true, false=false) 6461 6462 return this 6463 6464 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 6465 if not self._match_text_seq("VALUE", "FOR"): 6466 self._retreat(self._index - 1) 6467 return None 6468 6469 return self.expression( 6470 exp.NextValueFor, 6471 this=self._parse_column(), 6472 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 6473 ) 6474 6475 def _parse_extract(self) -> exp.Extract | exp.Anonymous: 6476 this = self._parse_function() or self._parse_var_or_string(upper=True) 6477 6478 if self._match(TokenType.FROM): 6479 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 6480 6481 if not self._match(TokenType.COMMA): 6482 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 6483 6484 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 6485 6486 def _parse_gap_fill(self) -> exp.GapFill: 6487 self._match(TokenType.TABLE) 6488 this = self._parse_table() 6489 6490 self._match(TokenType.COMMA) 6491 args = [this, *self._parse_csv(self._parse_lambda)] 6492 6493 gap_fill = exp.GapFill.from_arg_list(args) 6494 return self.validate_expression(gap_fill, args) 6495 6496 def _parse_cast(self, strict: bool, safe: t.Optional[bool] = None) -> exp.Expression: 6497 this = self._parse_assignment() 6498 6499 if not self._match(TokenType.ALIAS): 6500 if self._match(TokenType.COMMA): 6501 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 6502 6503 self.raise_error("Expected AS after CAST") 6504 6505 fmt = None 6506 to = self._parse_types() 6507 6508 default = self._match(TokenType.DEFAULT) 6509 if default: 6510 default = self._parse_bitwise() 6511 self._match_text_seq("ON", "CONVERSION", "ERROR") 6512 6513 if self._match_set((TokenType.FORMAT, TokenType.COMMA)): 6514 fmt_string = self._parse_string() 6515 fmt = self._parse_at_time_zone(fmt_string) 6516 6517 if not to: 6518 to = exp.DataType.build(exp.DataType.Type.UNKNOWN) 6519 if to.this in exp.DataType.TEMPORAL_TYPES: 6520 this = self.expression( 6521 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 6522 this=this, 6523 format=exp.Literal.string( 6524 format_time( 6525 fmt_string.this if fmt_string else "", 6526 self.dialect.FORMAT_MAPPING or self.dialect.TIME_MAPPING, 6527 self.dialect.FORMAT_TRIE or self.dialect.TIME_TRIE, 6528 ) 6529 ), 6530 safe=safe, 6531 ) 6532 6533 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 6534 this.set("zone", fmt.args["zone"]) 6535 return this 6536 elif not to: 6537 self.raise_error("Expected TYPE after CAST") 6538 elif isinstance(to, exp.Identifier): 6539 to = exp.DataType.build(to.name, udt=True) 6540 elif to.this == exp.DataType.Type.CHAR: 6541 if self._match(TokenType.CHARACTER_SET): 6542 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 6543 6544 return self.build_cast( 6545 strict=strict, 6546 this=this, 6547 to=to, 6548 format=fmt, 6549 safe=safe, 6550 action=self._parse_var_from_options(self.CAST_ACTIONS, raise_unmatched=False), 6551 default=default, 6552 ) 6553 6554 def _parse_string_agg(self) -> exp.GroupConcat: 6555 if self._match(TokenType.DISTINCT): 6556 args: t.List[t.Optional[exp.Expression]] = [ 6557 self.expression(exp.Distinct, expressions=[self._parse_assignment()]) 6558 ] 6559 if self._match(TokenType.COMMA): 6560 args.extend(self._parse_csv(self._parse_assignment)) 6561 else: 6562 args = self._parse_csv(self._parse_assignment) # type: ignore 6563 6564 if self._match_text_seq("ON", "OVERFLOW"): 6565 # trino: LISTAGG(expression [, separator] [ON OVERFLOW overflow_behavior]) 6566 if self._match_text_seq("ERROR"): 6567 on_overflow: t.Optional[exp.Expression] = exp.var("ERROR") 6568 else: 6569 self._match_text_seq("TRUNCATE") 6570 on_overflow = self.expression( 6571 exp.OverflowTruncateBehavior, 6572 this=self._parse_string(), 6573 with_count=( 6574 self._match_text_seq("WITH", "COUNT") 6575 or not self._match_text_seq("WITHOUT", "COUNT") 6576 ), 6577 ) 6578 else: 6579 on_overflow = None 6580 6581 index = self._index 6582 if not self._match(TokenType.R_PAREN) and args: 6583 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 6584 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 6585 # The order is parsed through `this` as a canonicalization for WITHIN GROUPs 6586 args[0] = self._parse_limit(this=self._parse_order(this=args[0])) 6587 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 6588 6589 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 6590 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 6591 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 6592 if not self._match_text_seq("WITHIN", "GROUP"): 6593 self._retreat(index) 6594 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 6595 6596 # The corresponding match_r_paren will be called in parse_function (caller) 6597 self._match_l_paren() 6598 6599 return self.expression( 6600 exp.GroupConcat, 6601 this=self._parse_order(this=seq_get(args, 0)), 6602 separator=seq_get(args, 1), 6603 on_overflow=on_overflow, 6604 ) 6605 6606 def _parse_convert( 6607 self, strict: bool, safe: t.Optional[bool] = None 6608 ) -> t.Optional[exp.Expression]: 6609 this = self._parse_bitwise() 6610 6611 if self._match(TokenType.USING): 6612 to: t.Optional[exp.Expression] = self.expression( 6613 exp.CharacterSet, this=self._parse_var() 6614 ) 6615 elif self._match(TokenType.COMMA): 6616 to = self._parse_types() 6617 else: 6618 to = None 6619 6620 return self.build_cast(strict=strict, this=this, to=to, safe=safe) 6621 6622 def _parse_xml_table(self) -> exp.XMLTable: 6623 namespaces = None 6624 passing = None 6625 columns = None 6626 6627 if self._match_text_seq("XMLNAMESPACES", "("): 6628 namespaces = self._parse_xml_namespace() 6629 self._match_text_seq(")", ",") 6630 6631 this = self._parse_string() 6632 6633 if self._match_text_seq("PASSING"): 6634 # The BY VALUE keywords are optional and are provided for semantic clarity 6635 self._match_text_seq("BY", "VALUE") 6636 passing = self._parse_csv(self._parse_column) 6637 6638 by_ref = self._match_text_seq("RETURNING", "SEQUENCE", "BY", "REF") 6639 6640 if self._match_text_seq("COLUMNS"): 6641 columns = self._parse_csv(self._parse_field_def) 6642 6643 return self.expression( 6644 exp.XMLTable, 6645 this=this, 6646 namespaces=namespaces, 6647 passing=passing, 6648 columns=columns, 6649 by_ref=by_ref, 6650 ) 6651 6652 def _parse_xml_namespace(self) -> t.List[exp.XMLNamespace]: 6653 namespaces = [] 6654 6655 while True: 6656 if self._match(TokenType.DEFAULT): 6657 uri = self._parse_string() 6658 else: 6659 uri = self._parse_alias(self._parse_string()) 6660 namespaces.append(self.expression(exp.XMLNamespace, this=uri)) 6661 if not self._match(TokenType.COMMA): 6662 break 6663 6664 return namespaces 6665 6666 def _parse_decode(self) -> t.Optional[exp.Decode | exp.DecodeCase]: 6667 args = self._parse_csv(self._parse_assignment) 6668 6669 if len(args) < 3: 6670 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 6671 6672 return self.expression(exp.DecodeCase, expressions=args) 6673 6674 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 6675 self._match_text_seq("KEY") 6676 key = self._parse_column() 6677 self._match_set(self.JSON_KEY_VALUE_SEPARATOR_TOKENS) 6678 self._match_text_seq("VALUE") 6679 value = self._parse_bitwise() 6680 6681 if not key and not value: 6682 return None 6683 return self.expression(exp.JSONKeyValue, this=key, expression=value) 6684 6685 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6686 if not this or not self._match_text_seq("FORMAT", "JSON"): 6687 return this 6688 6689 return self.expression(exp.FormatJson, this=this) 6690 6691 def _parse_on_condition(self) -> t.Optional[exp.OnCondition]: 6692 # MySQL uses "X ON EMPTY Y ON ERROR" (e.g. JSON_VALUE) while Oracle uses the opposite (e.g. JSON_EXISTS) 6693 if self.dialect.ON_CONDITION_EMPTY_BEFORE_ERROR: 6694 empty = self._parse_on_handling("EMPTY", *self.ON_CONDITION_TOKENS) 6695 error = self._parse_on_handling("ERROR", *self.ON_CONDITION_TOKENS) 6696 else: 6697 error = self._parse_on_handling("ERROR", *self.ON_CONDITION_TOKENS) 6698 empty = self._parse_on_handling("EMPTY", *self.ON_CONDITION_TOKENS) 6699 6700 null = self._parse_on_handling("NULL", *self.ON_CONDITION_TOKENS) 6701 6702 if not empty and not error and not null: 6703 return None 6704 6705 return self.expression( 6706 exp.OnCondition, 6707 empty=empty, 6708 error=error, 6709 null=null, 6710 ) 6711 6712 def _parse_on_handling( 6713 self, on: str, *values: str 6714 ) -> t.Optional[str] | t.Optional[exp.Expression]: 6715 # Parses the "X ON Y" or "DEFAULT <expr> ON Y syntax, e.g. NULL ON NULL (Oracle, T-SQL, MySQL) 6716 for value in values: 6717 if self._match_text_seq(value, "ON", on): 6718 return f"{value} ON {on}" 6719 6720 index = self._index 6721 if self._match(TokenType.DEFAULT): 6722 default_value = self._parse_bitwise() 6723 if self._match_text_seq("ON", on): 6724 return default_value 6725 6726 self._retreat(index) 6727 6728 return None 6729 6730 @t.overload 6731 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 6732 6733 @t.overload 6734 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 6735 6736 def _parse_json_object(self, agg=False): 6737 star = self._parse_star() 6738 expressions = ( 6739 [star] 6740 if star 6741 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 6742 ) 6743 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 6744 6745 unique_keys = None 6746 if self._match_text_seq("WITH", "UNIQUE"): 6747 unique_keys = True 6748 elif self._match_text_seq("WITHOUT", "UNIQUE"): 6749 unique_keys = False 6750 6751 self._match_text_seq("KEYS") 6752 6753 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 6754 self._parse_type() 6755 ) 6756 encoding = self._match_text_seq("ENCODING") and self._parse_var() 6757 6758 return self.expression( 6759 exp.JSONObjectAgg if agg else exp.JSONObject, 6760 expressions=expressions, 6761 null_handling=null_handling, 6762 unique_keys=unique_keys, 6763 return_type=return_type, 6764 encoding=encoding, 6765 ) 6766 6767 # Note: this is currently incomplete; it only implements the "JSON_value_column" part 6768 def _parse_json_column_def(self) -> exp.JSONColumnDef: 6769 if not self._match_text_seq("NESTED"): 6770 this = self._parse_id_var() 6771 kind = self._parse_types(allow_identifiers=False) 6772 nested = None 6773 else: 6774 this = None 6775 kind = None 6776 nested = True 6777 6778 path = self._match_text_seq("PATH") and self._parse_string() 6779 nested_schema = nested and self._parse_json_schema() 6780 6781 return self.expression( 6782 exp.JSONColumnDef, 6783 this=this, 6784 kind=kind, 6785 path=path, 6786 nested_schema=nested_schema, 6787 ) 6788 6789 def _parse_json_schema(self) -> exp.JSONSchema: 6790 self._match_text_seq("COLUMNS") 6791 return self.expression( 6792 exp.JSONSchema, 6793 expressions=self._parse_wrapped_csv(self._parse_json_column_def, optional=True), 6794 ) 6795 6796 def _parse_json_table(self) -> exp.JSONTable: 6797 this = self._parse_format_json(self._parse_bitwise()) 6798 path = self._match(TokenType.COMMA) and self._parse_string() 6799 error_handling = self._parse_on_handling("ERROR", "ERROR", "NULL") 6800 empty_handling = self._parse_on_handling("EMPTY", "ERROR", "NULL") 6801 schema = self._parse_json_schema() 6802 6803 return exp.JSONTable( 6804 this=this, 6805 schema=schema, 6806 path=path, 6807 error_handling=error_handling, 6808 empty_handling=empty_handling, 6809 ) 6810 6811 def _parse_match_against(self) -> exp.MatchAgainst: 6812 expressions = self._parse_csv(self._parse_column) 6813 6814 self._match_text_seq(")", "AGAINST", "(") 6815 6816 this = self._parse_string() 6817 6818 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 6819 modifier = "IN NATURAL LANGUAGE MODE" 6820 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 6821 modifier = f"{modifier} WITH QUERY EXPANSION" 6822 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 6823 modifier = "IN BOOLEAN MODE" 6824 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 6825 modifier = "WITH QUERY EXPANSION" 6826 else: 6827 modifier = None 6828 6829 return self.expression( 6830 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 6831 ) 6832 6833 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 6834 def _parse_open_json(self) -> exp.OpenJSON: 6835 this = self._parse_bitwise() 6836 path = self._match(TokenType.COMMA) and self._parse_string() 6837 6838 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 6839 this = self._parse_field(any_token=True) 6840 kind = self._parse_types() 6841 path = self._parse_string() 6842 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 6843 6844 return self.expression( 6845 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 6846 ) 6847 6848 expressions = None 6849 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 6850 self._match_l_paren() 6851 expressions = self._parse_csv(_parse_open_json_column_def) 6852 6853 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 6854 6855 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 6856 args = self._parse_csv(self._parse_bitwise) 6857 6858 if self._match(TokenType.IN): 6859 return self.expression( 6860 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 6861 ) 6862 6863 if haystack_first: 6864 haystack = seq_get(args, 0) 6865 needle = seq_get(args, 1) 6866 else: 6867 haystack = seq_get(args, 1) 6868 needle = seq_get(args, 0) 6869 6870 return self.expression( 6871 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 6872 ) 6873 6874 def _parse_predict(self) -> exp.Predict: 6875 self._match_text_seq("MODEL") 6876 this = self._parse_table() 6877 6878 self._match(TokenType.COMMA) 6879 self._match_text_seq("TABLE") 6880 6881 return self.expression( 6882 exp.Predict, 6883 this=this, 6884 expression=self._parse_table(), 6885 params_struct=self._match(TokenType.COMMA) and self._parse_bitwise(), 6886 ) 6887 6888 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 6889 args = self._parse_csv(self._parse_table) 6890 return exp.JoinHint(this=func_name.upper(), expressions=args) 6891 6892 def _parse_substring(self) -> exp.Substring: 6893 # Postgres supports the form: substring(string [from int] [for int]) 6894 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 6895 6896 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 6897 6898 if self._match(TokenType.FROM): 6899 args.append(self._parse_bitwise()) 6900 if self._match(TokenType.FOR): 6901 if len(args) == 1: 6902 args.append(exp.Literal.number(1)) 6903 args.append(self._parse_bitwise()) 6904 6905 return self.validate_expression(exp.Substring.from_arg_list(args), args) 6906 6907 def _parse_trim(self) -> exp.Trim: 6908 # https://www.w3resource.com/sql/character-functions/trim.php 6909 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 6910 6911 position = None 6912 collation = None 6913 expression = None 6914 6915 if self._match_texts(self.TRIM_TYPES): 6916 position = self._prev.text.upper() 6917 6918 this = self._parse_bitwise() 6919 if self._match_set((TokenType.FROM, TokenType.COMMA)): 6920 invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST 6921 expression = self._parse_bitwise() 6922 6923 if invert_order: 6924 this, expression = expression, this 6925 6926 if self._match(TokenType.COLLATE): 6927 collation = self._parse_bitwise() 6928 6929 return self.expression( 6930 exp.Trim, this=this, position=position, expression=expression, collation=collation 6931 ) 6932 6933 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 6934 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 6935 6936 def _parse_named_window(self) -> t.Optional[exp.Expression]: 6937 return self._parse_window(self._parse_id_var(), alias=True) 6938 6939 def _parse_respect_or_ignore_nulls( 6940 self, this: t.Optional[exp.Expression] 6941 ) -> t.Optional[exp.Expression]: 6942 if self._match_text_seq("IGNORE", "NULLS"): 6943 return self.expression(exp.IgnoreNulls, this=this) 6944 if self._match_text_seq("RESPECT", "NULLS"): 6945 return self.expression(exp.RespectNulls, this=this) 6946 return this 6947 6948 def _parse_having_max(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6949 if self._match(TokenType.HAVING): 6950 self._match_texts(("MAX", "MIN")) 6951 max = self._prev.text.upper() != "MIN" 6952 return self.expression( 6953 exp.HavingMax, this=this, expression=self._parse_column(), max=max 6954 ) 6955 6956 return this 6957 6958 def _parse_window( 6959 self, this: t.Optional[exp.Expression], alias: bool = False 6960 ) -> t.Optional[exp.Expression]: 6961 func = this 6962 comments = func.comments if isinstance(func, exp.Expression) else None 6963 6964 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 6965 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 6966 if self._match_text_seq("WITHIN", "GROUP"): 6967 order = self._parse_wrapped(self._parse_order) 6968 this = self.expression(exp.WithinGroup, this=this, expression=order) 6969 6970 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 6971 self._match(TokenType.WHERE) 6972 this = self.expression( 6973 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 6974 ) 6975 self._match_r_paren() 6976 6977 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 6978 # Some dialects choose to implement and some do not. 6979 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 6980 6981 # There is some code above in _parse_lambda that handles 6982 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 6983 6984 # The below changes handle 6985 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 6986 6987 # Oracle allows both formats 6988 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 6989 # and Snowflake chose to do the same for familiarity 6990 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 6991 if isinstance(this, exp.AggFunc): 6992 ignore_respect = this.find(exp.IgnoreNulls, exp.RespectNulls) 6993 6994 if ignore_respect and ignore_respect is not this: 6995 ignore_respect.replace(ignore_respect.this) 6996 this = self.expression(ignore_respect.__class__, this=this) 6997 6998 this = self._parse_respect_or_ignore_nulls(this) 6999 7000 # bigquery select from window x AS (partition by ...) 7001 if alias: 7002 over = None 7003 self._match(TokenType.ALIAS) 7004 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 7005 return this 7006 else: 7007 over = self._prev.text.upper() 7008 7009 if comments and isinstance(func, exp.Expression): 7010 func.pop_comments() 7011 7012 if not self._match(TokenType.L_PAREN): 7013 return self.expression( 7014 exp.Window, 7015 comments=comments, 7016 this=this, 7017 alias=self._parse_id_var(False), 7018 over=over, 7019 ) 7020 7021 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 7022 7023 first = self._match(TokenType.FIRST) 7024 if self._match_text_seq("LAST"): 7025 first = False 7026 7027 partition, order = self._parse_partition_and_order() 7028 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 7029 7030 if kind: 7031 self._match(TokenType.BETWEEN) 7032 start = self._parse_window_spec() 7033 self._match(TokenType.AND) 7034 end = self._parse_window_spec() 7035 exclude = ( 7036 self._parse_var_from_options(self.WINDOW_EXCLUDE_OPTIONS) 7037 if self._match_text_seq("EXCLUDE") 7038 else None 7039 ) 7040 7041 spec = self.expression( 7042 exp.WindowSpec, 7043 kind=kind, 7044 start=start["value"], 7045 start_side=start["side"], 7046 end=end["value"], 7047 end_side=end["side"], 7048 exclude=exclude, 7049 ) 7050 else: 7051 spec = None 7052 7053 self._match_r_paren() 7054 7055 window = self.expression( 7056 exp.Window, 7057 comments=comments, 7058 this=this, 7059 partition_by=partition, 7060 order=order, 7061 spec=spec, 7062 alias=window_alias, 7063 over=over, 7064 first=first, 7065 ) 7066 7067 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 7068 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 7069 return self._parse_window(window, alias=alias) 7070 7071 return window 7072 7073 def _parse_partition_and_order( 7074 self, 7075 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 7076 return self._parse_partition_by(), self._parse_order() 7077 7078 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 7079 self._match(TokenType.BETWEEN) 7080 7081 return { 7082 "value": ( 7083 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 7084 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 7085 or self._parse_bitwise() 7086 ), 7087 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 7088 } 7089 7090 def _parse_alias( 7091 self, this: t.Optional[exp.Expression], explicit: bool = False 7092 ) -> t.Optional[exp.Expression]: 7093 # In some dialects, LIMIT and OFFSET can act as both identifiers and keywords (clauses) 7094 # so this section tries to parse the clause version and if it fails, it treats the token 7095 # as an identifier (alias) 7096 if self._can_parse_limit_or_offset(): 7097 return this 7098 7099 any_token = self._match(TokenType.ALIAS) 7100 comments = self._prev_comments or [] 7101 7102 if explicit and not any_token: 7103 return this 7104 7105 if self._match(TokenType.L_PAREN): 7106 aliases = self.expression( 7107 exp.Aliases, 7108 comments=comments, 7109 this=this, 7110 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 7111 ) 7112 self._match_r_paren(aliases) 7113 return aliases 7114 7115 alias = self._parse_id_var(any_token, tokens=self.ALIAS_TOKENS) or ( 7116 self.STRING_ALIASES and self._parse_string_as_identifier() 7117 ) 7118 7119 if alias: 7120 comments.extend(alias.pop_comments()) 7121 this = self.expression(exp.Alias, comments=comments, this=this, alias=alias) 7122 column = this.this 7123 7124 # Moves the comment next to the alias in `expr /* comment */ AS alias` 7125 if not this.comments and column and column.comments: 7126 this.comments = column.pop_comments() 7127 7128 return this 7129 7130 def _parse_id_var( 7131 self, 7132 any_token: bool = True, 7133 tokens: t.Optional[t.Collection[TokenType]] = None, 7134 ) -> t.Optional[exp.Expression]: 7135 expression = self._parse_identifier() 7136 if not expression and ( 7137 (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS) 7138 ): 7139 quoted = self._prev.token_type == TokenType.STRING 7140 expression = self._identifier_expression(quoted=quoted) 7141 7142 return expression 7143 7144 def _parse_string(self) -> t.Optional[exp.Expression]: 7145 if self._match_set(self.STRING_PARSERS): 7146 return self.STRING_PARSERS[self._prev.token_type](self, self._prev) 7147 return self._parse_placeholder() 7148 7149 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 7150 output = exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 7151 if output: 7152 output.update_positions(self._prev) 7153 return output 7154 7155 def _parse_number(self) -> t.Optional[exp.Expression]: 7156 if self._match_set(self.NUMERIC_PARSERS): 7157 return self.NUMERIC_PARSERS[self._prev.token_type](self, self._prev) 7158 return self._parse_placeholder() 7159 7160 def _parse_identifier(self) -> t.Optional[exp.Expression]: 7161 if self._match(TokenType.IDENTIFIER): 7162 return self._identifier_expression(quoted=True) 7163 return self._parse_placeholder() 7164 7165 def _parse_var( 7166 self, 7167 any_token: bool = False, 7168 tokens: t.Optional[t.Collection[TokenType]] = None, 7169 upper: bool = False, 7170 ) -> t.Optional[exp.Expression]: 7171 if ( 7172 (any_token and self._advance_any()) 7173 or self._match(TokenType.VAR) 7174 or (self._match_set(tokens) if tokens else False) 7175 ): 7176 return self.expression( 7177 exp.Var, this=self._prev.text.upper() if upper else self._prev.text 7178 ) 7179 return self._parse_placeholder() 7180 7181 def _advance_any(self, ignore_reserved: bool = False) -> t.Optional[Token]: 7182 if self._curr and (ignore_reserved or self._curr.token_type not in self.RESERVED_TOKENS): 7183 self._advance() 7184 return self._prev 7185 return None 7186 7187 def _parse_var_or_string(self, upper: bool = False) -> t.Optional[exp.Expression]: 7188 return self._parse_string() or self._parse_var(any_token=True, upper=upper) 7189 7190 def _parse_primary_or_var(self) -> t.Optional[exp.Expression]: 7191 return self._parse_primary() or self._parse_var(any_token=True) 7192 7193 def _parse_null(self) -> t.Optional[exp.Expression]: 7194 if self._match_set(self.NULL_TOKENS): 7195 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 7196 return self._parse_placeholder() 7197 7198 def _parse_boolean(self) -> t.Optional[exp.Expression]: 7199 if self._match(TokenType.TRUE): 7200 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 7201 if self._match(TokenType.FALSE): 7202 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 7203 return self._parse_placeholder() 7204 7205 def _parse_star(self) -> t.Optional[exp.Expression]: 7206 if self._match(TokenType.STAR): 7207 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 7208 return self._parse_placeholder() 7209 7210 def _parse_parameter(self) -> exp.Parameter: 7211 this = self._parse_identifier() or self._parse_primary_or_var() 7212 return self.expression(exp.Parameter, this=this) 7213 7214 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 7215 if self._match_set(self.PLACEHOLDER_PARSERS): 7216 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 7217 if placeholder: 7218 return placeholder 7219 self._advance(-1) 7220 return None 7221 7222 def _parse_star_op(self, *keywords: str) -> t.Optional[t.List[exp.Expression]]: 7223 if not self._match_texts(keywords): 7224 return None 7225 if self._match(TokenType.L_PAREN, advance=False): 7226 return self._parse_wrapped_csv(self._parse_expression) 7227 7228 expression = self._parse_expression() 7229 return [expression] if expression else None 7230 7231 def _parse_csv( 7232 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 7233 ) -> t.List[exp.Expression]: 7234 parse_result = parse_method() 7235 items = [parse_result] if parse_result is not None else [] 7236 7237 while self._match(sep): 7238 self._add_comments(parse_result) 7239 parse_result = parse_method() 7240 if parse_result is not None: 7241 items.append(parse_result) 7242 7243 return items 7244 7245 def _parse_tokens( 7246 self, parse_method: t.Callable, expressions: t.Dict 7247 ) -> t.Optional[exp.Expression]: 7248 this = parse_method() 7249 7250 while self._match_set(expressions): 7251 this = self.expression( 7252 expressions[self._prev.token_type], 7253 this=this, 7254 comments=self._prev_comments, 7255 expression=parse_method(), 7256 ) 7257 7258 return this 7259 7260 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 7261 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 7262 7263 def _parse_wrapped_csv( 7264 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 7265 ) -> t.List[exp.Expression]: 7266 return self._parse_wrapped( 7267 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 7268 ) 7269 7270 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 7271 wrapped = self._match(TokenType.L_PAREN) 7272 if not wrapped and not optional: 7273 self.raise_error("Expecting (") 7274 parse_result = parse_method() 7275 if wrapped: 7276 self._match_r_paren() 7277 return parse_result 7278 7279 def _parse_expressions(self) -> t.List[exp.Expression]: 7280 return self._parse_csv(self._parse_expression) 7281 7282 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 7283 return self._parse_select() or self._parse_set_operations( 7284 self._parse_alias(self._parse_assignment(), explicit=True) 7285 if alias 7286 else self._parse_assignment() 7287 ) 7288 7289 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 7290 return self._parse_query_modifiers( 7291 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 7292 ) 7293 7294 def _parse_transaction(self) -> exp.Transaction | exp.Command: 7295 this = None 7296 if self._match_texts(self.TRANSACTION_KIND): 7297 this = self._prev.text 7298 7299 self._match_texts(("TRANSACTION", "WORK")) 7300 7301 modes = [] 7302 while True: 7303 mode = [] 7304 while self._match(TokenType.VAR): 7305 mode.append(self._prev.text) 7306 7307 if mode: 7308 modes.append(" ".join(mode)) 7309 if not self._match(TokenType.COMMA): 7310 break 7311 7312 return self.expression(exp.Transaction, this=this, modes=modes) 7313 7314 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 7315 chain = None 7316 savepoint = None 7317 is_rollback = self._prev.token_type == TokenType.ROLLBACK 7318 7319 self._match_texts(("TRANSACTION", "WORK")) 7320 7321 if self._match_text_seq("TO"): 7322 self._match_text_seq("SAVEPOINT") 7323 savepoint = self._parse_id_var() 7324 7325 if self._match(TokenType.AND): 7326 chain = not self._match_text_seq("NO") 7327 self._match_text_seq("CHAIN") 7328 7329 if is_rollback: 7330 return self.expression(exp.Rollback, savepoint=savepoint) 7331 7332 return self.expression(exp.Commit, chain=chain) 7333 7334 def _parse_refresh(self) -> exp.Refresh: 7335 self._match(TokenType.TABLE) 7336 return self.expression(exp.Refresh, this=self._parse_string() or self._parse_table()) 7337 7338 def _parse_column_def_with_exists(self): 7339 start = self._index 7340 self._match(TokenType.COLUMN) 7341 7342 exists_column = self._parse_exists(not_=True) 7343 expression = self._parse_field_def() 7344 7345 if not isinstance(expression, exp.ColumnDef): 7346 self._retreat(start) 7347 return None 7348 7349 expression.set("exists", exists_column) 7350 7351 return expression 7352 7353 def _parse_add_column(self) -> t.Optional[exp.ColumnDef]: 7354 if not self._prev.text.upper() == "ADD": 7355 return None 7356 7357 expression = self._parse_column_def_with_exists() 7358 if not expression: 7359 return None 7360 7361 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 7362 if self._match_texts(("FIRST", "AFTER")): 7363 position = self._prev.text 7364 column_position = self.expression( 7365 exp.ColumnPosition, this=self._parse_column(), position=position 7366 ) 7367 expression.set("position", column_position) 7368 7369 return expression 7370 7371 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 7372 drop = self._match(TokenType.DROP) and self._parse_drop() 7373 if drop and not isinstance(drop, exp.Command): 7374 drop.set("kind", drop.args.get("kind", "COLUMN")) 7375 return drop 7376 7377 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 7378 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 7379 return self.expression( 7380 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 7381 ) 7382 7383 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 7384 def _parse_add_alteration() -> t.Optional[exp.Expression]: 7385 self._match_text_seq("ADD") 7386 if self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False): 7387 return self.expression( 7388 exp.AddConstraint, expressions=self._parse_csv(self._parse_constraint) 7389 ) 7390 7391 column_def = self._parse_add_column() 7392 if isinstance(column_def, exp.ColumnDef): 7393 return column_def 7394 7395 exists = self._parse_exists(not_=True) 7396 if self._match_pair(TokenType.PARTITION, TokenType.L_PAREN, advance=False): 7397 return self.expression( 7398 exp.AddPartition, 7399 exists=exists, 7400 this=self._parse_field(any_token=True), 7401 location=self._match_text_seq("LOCATION", advance=False) 7402 and self._parse_property(), 7403 ) 7404 7405 return None 7406 7407 if not self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False) and ( 7408 not self.dialect.ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN 7409 or self._match_text_seq("COLUMNS") 7410 ): 7411 schema = self._parse_schema() 7412 7413 return ( 7414 ensure_list(schema) 7415 if schema 7416 else self._parse_csv(self._parse_column_def_with_exists) 7417 ) 7418 7419 return self._parse_csv(_parse_add_alteration) 7420 7421 def _parse_alter_table_alter(self) -> t.Optional[exp.Expression]: 7422 if self._match_texts(self.ALTER_ALTER_PARSERS): 7423 return self.ALTER_ALTER_PARSERS[self._prev.text.upper()](self) 7424 7425 # Many dialects support the ALTER [COLUMN] syntax, so if there is no 7426 # keyword after ALTER we default to parsing this statement 7427 self._match(TokenType.COLUMN) 7428 column = self._parse_field(any_token=True) 7429 7430 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 7431 return self.expression(exp.AlterColumn, this=column, drop=True) 7432 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 7433 return self.expression(exp.AlterColumn, this=column, default=self._parse_assignment()) 7434 if self._match(TokenType.COMMENT): 7435 return self.expression(exp.AlterColumn, this=column, comment=self._parse_string()) 7436 if self._match_text_seq("DROP", "NOT", "NULL"): 7437 return self.expression( 7438 exp.AlterColumn, 7439 this=column, 7440 drop=True, 7441 allow_null=True, 7442 ) 7443 if self._match_text_seq("SET", "NOT", "NULL"): 7444 return self.expression( 7445 exp.AlterColumn, 7446 this=column, 7447 allow_null=False, 7448 ) 7449 7450 if self._match_text_seq("SET", "VISIBLE"): 7451 return self.expression(exp.AlterColumn, this=column, visible="VISIBLE") 7452 if self._match_text_seq("SET", "INVISIBLE"): 7453 return self.expression(exp.AlterColumn, this=column, visible="INVISIBLE") 7454 7455 self._match_text_seq("SET", "DATA") 7456 self._match_text_seq("TYPE") 7457 return self.expression( 7458 exp.AlterColumn, 7459 this=column, 7460 dtype=self._parse_types(), 7461 collate=self._match(TokenType.COLLATE) and self._parse_term(), 7462 using=self._match(TokenType.USING) and self._parse_assignment(), 7463 ) 7464 7465 def _parse_alter_diststyle(self) -> exp.AlterDistStyle: 7466 if self._match_texts(("ALL", "EVEN", "AUTO")): 7467 return self.expression(exp.AlterDistStyle, this=exp.var(self._prev.text.upper())) 7468 7469 self._match_text_seq("KEY", "DISTKEY") 7470 return self.expression(exp.AlterDistStyle, this=self._parse_column()) 7471 7472 def _parse_alter_sortkey(self, compound: t.Optional[bool] = None) -> exp.AlterSortKey: 7473 if compound: 7474 self._match_text_seq("SORTKEY") 7475 7476 if self._match(TokenType.L_PAREN, advance=False): 7477 return self.expression( 7478 exp.AlterSortKey, expressions=self._parse_wrapped_id_vars(), compound=compound 7479 ) 7480 7481 self._match_texts(("AUTO", "NONE")) 7482 return self.expression( 7483 exp.AlterSortKey, this=exp.var(self._prev.text.upper()), compound=compound 7484 ) 7485 7486 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 7487 index = self._index - 1 7488 7489 partition_exists = self._parse_exists() 7490 if self._match(TokenType.PARTITION, advance=False): 7491 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 7492 7493 self._retreat(index) 7494 return self._parse_csv(self._parse_drop_column) 7495 7496 def _parse_alter_table_rename(self) -> t.Optional[exp.AlterRename | exp.RenameColumn]: 7497 if self._match(TokenType.COLUMN) or not self.ALTER_RENAME_REQUIRES_COLUMN: 7498 exists = self._parse_exists() 7499 old_column = self._parse_column() 7500 to = self._match_text_seq("TO") 7501 new_column = self._parse_column() 7502 7503 if old_column is None or to is None or new_column is None: 7504 return None 7505 7506 return self.expression(exp.RenameColumn, this=old_column, to=new_column, exists=exists) 7507 7508 self._match_text_seq("TO") 7509 return self.expression(exp.AlterRename, this=self._parse_table(schema=True)) 7510 7511 def _parse_alter_table_set(self) -> exp.AlterSet: 7512 alter_set = self.expression(exp.AlterSet) 7513 7514 if self._match(TokenType.L_PAREN, advance=False) or self._match_text_seq( 7515 "TABLE", "PROPERTIES" 7516 ): 7517 alter_set.set("expressions", self._parse_wrapped_csv(self._parse_assignment)) 7518 elif self._match_text_seq("FILESTREAM_ON", advance=False): 7519 alter_set.set("expressions", [self._parse_assignment()]) 7520 elif self._match_texts(("LOGGED", "UNLOGGED")): 7521 alter_set.set("option", exp.var(self._prev.text.upper())) 7522 elif self._match_text_seq("WITHOUT") and self._match_texts(("CLUSTER", "OIDS")): 7523 alter_set.set("option", exp.var(f"WITHOUT {self._prev.text.upper()}")) 7524 elif self._match_text_seq("LOCATION"): 7525 alter_set.set("location", self._parse_field()) 7526 elif self._match_text_seq("ACCESS", "METHOD"): 7527 alter_set.set("access_method", self._parse_field()) 7528 elif self._match_text_seq("TABLESPACE"): 7529 alter_set.set("tablespace", self._parse_field()) 7530 elif self._match_text_seq("FILE", "FORMAT") or self._match_text_seq("FILEFORMAT"): 7531 alter_set.set("file_format", [self._parse_field()]) 7532 elif self._match_text_seq("STAGE_FILE_FORMAT"): 7533 alter_set.set("file_format", self._parse_wrapped_options()) 7534 elif self._match_text_seq("STAGE_COPY_OPTIONS"): 7535 alter_set.set("copy_options", self._parse_wrapped_options()) 7536 elif self._match_text_seq("TAG") or self._match_text_seq("TAGS"): 7537 alter_set.set("tag", self._parse_csv(self._parse_assignment)) 7538 else: 7539 if self._match_text_seq("SERDE"): 7540 alter_set.set("serde", self._parse_field()) 7541 7542 properties = self._parse_wrapped(self._parse_properties, optional=True) 7543 alter_set.set("expressions", [properties]) 7544 7545 return alter_set 7546 7547 def _parse_alter(self) -> exp.Alter | exp.Command: 7548 start = self._prev 7549 7550 alter_token = self._match_set(self.ALTERABLES) and self._prev 7551 if not alter_token: 7552 return self._parse_as_command(start) 7553 7554 exists = self._parse_exists() 7555 only = self._match_text_seq("ONLY") 7556 this = self._parse_table(schema=True) 7557 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 7558 7559 if self._next: 7560 self._advance() 7561 7562 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 7563 if parser: 7564 actions = ensure_list(parser(self)) 7565 not_valid = self._match_text_seq("NOT", "VALID") 7566 options = self._parse_csv(self._parse_property) 7567 7568 if not self._curr and actions: 7569 return self.expression( 7570 exp.Alter, 7571 this=this, 7572 kind=alter_token.text.upper(), 7573 exists=exists, 7574 actions=actions, 7575 only=only, 7576 options=options, 7577 cluster=cluster, 7578 not_valid=not_valid, 7579 ) 7580 7581 return self._parse_as_command(start) 7582 7583 def _parse_analyze(self) -> exp.Analyze | exp.Command: 7584 start = self._prev 7585 # https://duckdb.org/docs/sql/statements/analyze 7586 if not self._curr: 7587 return self.expression(exp.Analyze) 7588 7589 options = [] 7590 while self._match_texts(self.ANALYZE_STYLES): 7591 if self._prev.text.upper() == "BUFFER_USAGE_LIMIT": 7592 options.append(f"BUFFER_USAGE_LIMIT {self._parse_number()}") 7593 else: 7594 options.append(self._prev.text.upper()) 7595 7596 this: t.Optional[exp.Expression] = None 7597 inner_expression: t.Optional[exp.Expression] = None 7598 7599 kind = self._curr and self._curr.text.upper() 7600 7601 if self._match(TokenType.TABLE) or self._match(TokenType.INDEX): 7602 this = self._parse_table_parts() 7603 elif self._match_text_seq("TABLES"): 7604 if self._match_set((TokenType.FROM, TokenType.IN)): 7605 kind = f"{kind} {self._prev.text.upper()}" 7606 this = self._parse_table(schema=True, is_db_reference=True) 7607 elif self._match_text_seq("DATABASE"): 7608 this = self._parse_table(schema=True, is_db_reference=True) 7609 elif self._match_text_seq("CLUSTER"): 7610 this = self._parse_table() 7611 # Try matching inner expr keywords before fallback to parse table. 7612 elif self._match_texts(self.ANALYZE_EXPRESSION_PARSERS): 7613 kind = None 7614 inner_expression = self.ANALYZE_EXPRESSION_PARSERS[self._prev.text.upper()](self) 7615 else: 7616 # Empty kind https://prestodb.io/docs/current/sql/analyze.html 7617 kind = None 7618 this = self._parse_table_parts() 7619 7620 partition = self._try_parse(self._parse_partition) 7621 if not partition and self._match_texts(self.PARTITION_KEYWORDS): 7622 return self._parse_as_command(start) 7623 7624 # https://docs.starrocks.io/docs/sql-reference/sql-statements/cbo_stats/ANALYZE_TABLE/ 7625 if self._match_text_seq("WITH", "SYNC", "MODE") or self._match_text_seq( 7626 "WITH", "ASYNC", "MODE" 7627 ): 7628 mode = f"WITH {self._tokens[self._index - 2].text.upper()} MODE" 7629 else: 7630 mode = None 7631 7632 if self._match_texts(self.ANALYZE_EXPRESSION_PARSERS): 7633 inner_expression = self.ANALYZE_EXPRESSION_PARSERS[self._prev.text.upper()](self) 7634 7635 properties = self._parse_properties() 7636 return self.expression( 7637 exp.Analyze, 7638 kind=kind, 7639 this=this, 7640 mode=mode, 7641 partition=partition, 7642 properties=properties, 7643 expression=inner_expression, 7644 options=options, 7645 ) 7646 7647 # https://spark.apache.org/docs/3.5.1/sql-ref-syntax-aux-analyze-table.html 7648 def _parse_analyze_statistics(self) -> exp.AnalyzeStatistics: 7649 this = None 7650 kind = self._prev.text.upper() 7651 option = self._prev.text.upper() if self._match_text_seq("DELTA") else None 7652 expressions = [] 7653 7654 if not self._match_text_seq("STATISTICS"): 7655 self.raise_error("Expecting token STATISTICS") 7656 7657 if self._match_text_seq("NOSCAN"): 7658 this = "NOSCAN" 7659 elif self._match(TokenType.FOR): 7660 if self._match_text_seq("ALL", "COLUMNS"): 7661 this = "FOR ALL COLUMNS" 7662 if self._match_texts("COLUMNS"): 7663 this = "FOR COLUMNS" 7664 expressions = self._parse_csv(self._parse_column_reference) 7665 elif self._match_text_seq("SAMPLE"): 7666 sample = self._parse_number() 7667 expressions = [ 7668 self.expression( 7669 exp.AnalyzeSample, 7670 sample=sample, 7671 kind=self._prev.text.upper() if self._match(TokenType.PERCENT) else None, 7672 ) 7673 ] 7674 7675 return self.expression( 7676 exp.AnalyzeStatistics, kind=kind, option=option, this=this, expressions=expressions 7677 ) 7678 7679 # https://docs.oracle.com/en/database/oracle/oracle-database/21/sqlrf/ANALYZE.html 7680 def _parse_analyze_validate(self) -> exp.AnalyzeValidate: 7681 kind = None 7682 this = None 7683 expression: t.Optional[exp.Expression] = None 7684 if self._match_text_seq("REF", "UPDATE"): 7685 kind = "REF" 7686 this = "UPDATE" 7687 if self._match_text_seq("SET", "DANGLING", "TO", "NULL"): 7688 this = "UPDATE SET DANGLING TO NULL" 7689 elif self._match_text_seq("STRUCTURE"): 7690 kind = "STRUCTURE" 7691 if self._match_text_seq("CASCADE", "FAST"): 7692 this = "CASCADE FAST" 7693 elif self._match_text_seq("CASCADE", "COMPLETE") and self._match_texts( 7694 ("ONLINE", "OFFLINE") 7695 ): 7696 this = f"CASCADE COMPLETE {self._prev.text.upper()}" 7697 expression = self._parse_into() 7698 7699 return self.expression(exp.AnalyzeValidate, kind=kind, this=this, expression=expression) 7700 7701 def _parse_analyze_columns(self) -> t.Optional[exp.AnalyzeColumns]: 7702 this = self._prev.text.upper() 7703 if self._match_text_seq("COLUMNS"): 7704 return self.expression(exp.AnalyzeColumns, this=f"{this} {self._prev.text.upper()}") 7705 return None 7706 7707 def _parse_analyze_delete(self) -> t.Optional[exp.AnalyzeDelete]: 7708 kind = self._prev.text.upper() if self._match_text_seq("SYSTEM") else None 7709 if self._match_text_seq("STATISTICS"): 7710 return self.expression(exp.AnalyzeDelete, kind=kind) 7711 return None 7712 7713 def _parse_analyze_list(self) -> t.Optional[exp.AnalyzeListChainedRows]: 7714 if self._match_text_seq("CHAINED", "ROWS"): 7715 return self.expression(exp.AnalyzeListChainedRows, expression=self._parse_into()) 7716 return None 7717 7718 # https://dev.mysql.com/doc/refman/8.4/en/analyze-table.html 7719 def _parse_analyze_histogram(self) -> exp.AnalyzeHistogram: 7720 this = self._prev.text.upper() 7721 expression: t.Optional[exp.Expression] = None 7722 expressions = [] 7723 update_options = None 7724 7725 if self._match_text_seq("HISTOGRAM", "ON"): 7726 expressions = self._parse_csv(self._parse_column_reference) 7727 with_expressions = [] 7728 while self._match(TokenType.WITH): 7729 # https://docs.starrocks.io/docs/sql-reference/sql-statements/cbo_stats/ANALYZE_TABLE/ 7730 if self._match_texts(("SYNC", "ASYNC")): 7731 if self._match_text_seq("MODE", advance=False): 7732 with_expressions.append(f"{self._prev.text.upper()} MODE") 7733 self._advance() 7734 else: 7735 buckets = self._parse_number() 7736 if self._match_text_seq("BUCKETS"): 7737 with_expressions.append(f"{buckets} BUCKETS") 7738 if with_expressions: 7739 expression = self.expression(exp.AnalyzeWith, expressions=with_expressions) 7740 7741 if self._match_texts(("MANUAL", "AUTO")) and self._match( 7742 TokenType.UPDATE, advance=False 7743 ): 7744 update_options = self._prev.text.upper() 7745 self._advance() 7746 elif self._match_text_seq("USING", "DATA"): 7747 expression = self.expression(exp.UsingData, this=self._parse_string()) 7748 7749 return self.expression( 7750 exp.AnalyzeHistogram, 7751 this=this, 7752 expressions=expressions, 7753 expression=expression, 7754 update_options=update_options, 7755 ) 7756 7757 def _parse_merge(self) -> exp.Merge: 7758 self._match(TokenType.INTO) 7759 target = self._parse_table() 7760 7761 if target and self._match(TokenType.ALIAS, advance=False): 7762 target.set("alias", self._parse_table_alias()) 7763 7764 self._match(TokenType.USING) 7765 using = self._parse_table() 7766 7767 self._match(TokenType.ON) 7768 on = self._parse_assignment() 7769 7770 return self.expression( 7771 exp.Merge, 7772 this=target, 7773 using=using, 7774 on=on, 7775 whens=self._parse_when_matched(), 7776 returning=self._parse_returning(), 7777 ) 7778 7779 def _parse_when_matched(self) -> exp.Whens: 7780 whens = [] 7781 7782 while self._match(TokenType.WHEN): 7783 matched = not self._match(TokenType.NOT) 7784 self._match_text_seq("MATCHED") 7785 source = ( 7786 False 7787 if self._match_text_seq("BY", "TARGET") 7788 else self._match_text_seq("BY", "SOURCE") 7789 ) 7790 condition = self._parse_assignment() if self._match(TokenType.AND) else None 7791 7792 self._match(TokenType.THEN) 7793 7794 if self._match(TokenType.INSERT): 7795 this = self._parse_star() 7796 if this: 7797 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=this) 7798 else: 7799 then = self.expression( 7800 exp.Insert, 7801 this=exp.var("ROW") 7802 if self._match_text_seq("ROW") 7803 else self._parse_value(values=False), 7804 expression=self._match_text_seq("VALUES") and self._parse_value(), 7805 ) 7806 elif self._match(TokenType.UPDATE): 7807 expressions = self._parse_star() 7808 if expressions: 7809 then = self.expression(exp.Update, expressions=expressions) 7810 else: 7811 then = self.expression( 7812 exp.Update, 7813 expressions=self._match(TokenType.SET) 7814 and self._parse_csv(self._parse_equality), 7815 ) 7816 elif self._match(TokenType.DELETE): 7817 then = self.expression(exp.Var, this=self._prev.text) 7818 else: 7819 then = self._parse_var_from_options(self.CONFLICT_ACTIONS) 7820 7821 whens.append( 7822 self.expression( 7823 exp.When, 7824 matched=matched, 7825 source=source, 7826 condition=condition, 7827 then=then, 7828 ) 7829 ) 7830 return self.expression(exp.Whens, expressions=whens) 7831 7832 def _parse_show(self) -> t.Optional[exp.Expression]: 7833 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 7834 if parser: 7835 return parser(self) 7836 return self._parse_as_command(self._prev) 7837 7838 def _parse_set_item_assignment( 7839 self, kind: t.Optional[str] = None 7840 ) -> t.Optional[exp.Expression]: 7841 index = self._index 7842 7843 if kind in ("GLOBAL", "SESSION") and self._match_text_seq("TRANSACTION"): 7844 return self._parse_set_transaction(global_=kind == "GLOBAL") 7845 7846 left = self._parse_primary() or self._parse_column() 7847 assignment_delimiter = self._match_texts(("=", "TO")) 7848 7849 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 7850 self._retreat(index) 7851 return None 7852 7853 right = self._parse_statement() or self._parse_id_var() 7854 if isinstance(right, (exp.Column, exp.Identifier)): 7855 right = exp.var(right.name) 7856 7857 this = self.expression(exp.EQ, this=left, expression=right) 7858 return self.expression(exp.SetItem, this=this, kind=kind) 7859 7860 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 7861 self._match_text_seq("TRANSACTION") 7862 characteristics = self._parse_csv( 7863 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 7864 ) 7865 return self.expression( 7866 exp.SetItem, 7867 expressions=characteristics, 7868 kind="TRANSACTION", 7869 **{"global": global_}, # type: ignore 7870 ) 7871 7872 def _parse_set_item(self) -> t.Optional[exp.Expression]: 7873 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 7874 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 7875 7876 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 7877 index = self._index 7878 set_ = self.expression( 7879 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 7880 ) 7881 7882 if self._curr: 7883 self._retreat(index) 7884 return self._parse_as_command(self._prev) 7885 7886 return set_ 7887 7888 def _parse_var_from_options( 7889 self, options: OPTIONS_TYPE, raise_unmatched: bool = True 7890 ) -> t.Optional[exp.Var]: 7891 start = self._curr 7892 if not start: 7893 return None 7894 7895 option = start.text.upper() 7896 continuations = options.get(option) 7897 7898 index = self._index 7899 self._advance() 7900 for keywords in continuations or []: 7901 if isinstance(keywords, str): 7902 keywords = (keywords,) 7903 7904 if self._match_text_seq(*keywords): 7905 option = f"{option} {' '.join(keywords)}" 7906 break 7907 else: 7908 if continuations or continuations is None: 7909 if raise_unmatched: 7910 self.raise_error(f"Unknown option {option}") 7911 7912 self._retreat(index) 7913 return None 7914 7915 return exp.var(option) 7916 7917 def _parse_as_command(self, start: Token) -> exp.Command: 7918 while self._curr: 7919 self._advance() 7920 text = self._find_sql(start, self._prev) 7921 size = len(start.text) 7922 self._warn_unsupported() 7923 return exp.Command(this=text[:size], expression=text[size:]) 7924 7925 def _parse_dict_property(self, this: str) -> exp.DictProperty: 7926 settings = [] 7927 7928 self._match_l_paren() 7929 kind = self._parse_id_var() 7930 7931 if self._match(TokenType.L_PAREN): 7932 while True: 7933 key = self._parse_id_var() 7934 value = self._parse_primary() 7935 if not key and value is None: 7936 break 7937 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 7938 self._match(TokenType.R_PAREN) 7939 7940 self._match_r_paren() 7941 7942 return self.expression( 7943 exp.DictProperty, 7944 this=this, 7945 kind=kind.this if kind else None, 7946 settings=settings, 7947 ) 7948 7949 def _parse_dict_range(self, this: str) -> exp.DictRange: 7950 self._match_l_paren() 7951 has_min = self._match_text_seq("MIN") 7952 if has_min: 7953 min = self._parse_var() or self._parse_primary() 7954 self._match_text_seq("MAX") 7955 max = self._parse_var() or self._parse_primary() 7956 else: 7957 max = self._parse_var() or self._parse_primary() 7958 min = exp.Literal.number(0) 7959 self._match_r_paren() 7960 return self.expression(exp.DictRange, this=this, min=min, max=max) 7961 7962 def _parse_comprehension( 7963 self, this: t.Optional[exp.Expression] 7964 ) -> t.Optional[exp.Comprehension]: 7965 index = self._index 7966 expression = self._parse_column() 7967 if not self._match(TokenType.IN): 7968 self._retreat(index - 1) 7969 return None 7970 iterator = self._parse_column() 7971 condition = self._parse_assignment() if self._match_text_seq("IF") else None 7972 return self.expression( 7973 exp.Comprehension, 7974 this=this, 7975 expression=expression, 7976 iterator=iterator, 7977 condition=condition, 7978 ) 7979 7980 def _parse_heredoc(self) -> t.Optional[exp.Heredoc]: 7981 if self._match(TokenType.HEREDOC_STRING): 7982 return self.expression(exp.Heredoc, this=self._prev.text) 7983 7984 if not self._match_text_seq("$"): 7985 return None 7986 7987 tags = ["$"] 7988 tag_text = None 7989 7990 if self._is_connected(): 7991 self._advance() 7992 tags.append(self._prev.text.upper()) 7993 else: 7994 self.raise_error("No closing $ found") 7995 7996 if tags[-1] != "$": 7997 if self._is_connected() and self._match_text_seq("$"): 7998 tag_text = tags[-1] 7999 tags.append("$") 8000 else: 8001 self.raise_error("No closing $ found") 8002 8003 heredoc_start = self._curr 8004 8005 while self._curr: 8006 if self._match_text_seq(*tags, advance=False): 8007 this = self._find_sql(heredoc_start, self._prev) 8008 self._advance(len(tags)) 8009 return self.expression(exp.Heredoc, this=this, tag=tag_text) 8010 8011 self._advance() 8012 8013 self.raise_error(f"No closing {''.join(tags)} found") 8014 return None 8015 8016 def _find_parser( 8017 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 8018 ) -> t.Optional[t.Callable]: 8019 if not self._curr: 8020 return None 8021 8022 index = self._index 8023 this = [] 8024 while True: 8025 # The current token might be multiple words 8026 curr = self._curr.text.upper() 8027 key = curr.split(" ") 8028 this.append(curr) 8029 8030 self._advance() 8031 result, trie = in_trie(trie, key) 8032 if result == TrieResult.FAILED: 8033 break 8034 8035 if result == TrieResult.EXISTS: 8036 subparser = parsers[" ".join(this)] 8037 return subparser 8038 8039 self._retreat(index) 8040 return None 8041 8042 def _match(self, token_type, advance=True, expression=None): 8043 if not self._curr: 8044 return None 8045 8046 if self._curr.token_type == token_type: 8047 if advance: 8048 self._advance() 8049 self._add_comments(expression) 8050 return True 8051 8052 return None 8053 8054 def _match_set(self, types, advance=True): 8055 if not self._curr: 8056 return None 8057 8058 if self._curr.token_type in types: 8059 if advance: 8060 self._advance() 8061 return True 8062 8063 return None 8064 8065 def _match_pair(self, token_type_a, token_type_b, advance=True): 8066 if not self._curr or not self._next: 8067 return None 8068 8069 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 8070 if advance: 8071 self._advance(2) 8072 return True 8073 8074 return None 8075 8076 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 8077 if not self._match(TokenType.L_PAREN, expression=expression): 8078 self.raise_error("Expecting (") 8079 8080 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 8081 if not self._match(TokenType.R_PAREN, expression=expression): 8082 self.raise_error("Expecting )") 8083 8084 def _match_texts(self, texts, advance=True): 8085 if ( 8086 self._curr 8087 and self._curr.token_type != TokenType.STRING 8088 and self._curr.text.upper() in texts 8089 ): 8090 if advance: 8091 self._advance() 8092 return True 8093 return None 8094 8095 def _match_text_seq(self, *texts, advance=True): 8096 index = self._index 8097 for text in texts: 8098 if ( 8099 self._curr 8100 and self._curr.token_type != TokenType.STRING 8101 and self._curr.text.upper() == text 8102 ): 8103 self._advance() 8104 else: 8105 self._retreat(index) 8106 return None 8107 8108 if not advance: 8109 self._retreat(index) 8110 8111 return True 8112 8113 def _replace_lambda( 8114 self, node: t.Optional[exp.Expression], expressions: t.List[exp.Expression] 8115 ) -> t.Optional[exp.Expression]: 8116 if not node: 8117 return node 8118 8119 lambda_types = {e.name: e.args.get("to") or False for e in expressions} 8120 8121 for column in node.find_all(exp.Column): 8122 typ = lambda_types.get(column.parts[0].name) 8123 if typ is not None: 8124 dot_or_id = column.to_dot() if column.table else column.this 8125 8126 if typ: 8127 dot_or_id = self.expression( 8128 exp.Cast, 8129 this=dot_or_id, 8130 to=typ, 8131 ) 8132 8133 parent = column.parent 8134 8135 while isinstance(parent, exp.Dot): 8136 if not isinstance(parent.parent, exp.Dot): 8137 parent.replace(dot_or_id) 8138 break 8139 parent = parent.parent 8140 else: 8141 if column is node: 8142 node = dot_or_id 8143 else: 8144 column.replace(dot_or_id) 8145 return node 8146 8147 def _parse_truncate_table(self) -> t.Optional[exp.TruncateTable] | exp.Expression: 8148 start = self._prev 8149 8150 # Not to be confused with TRUNCATE(number, decimals) function call 8151 if self._match(TokenType.L_PAREN): 8152 self._retreat(self._index - 2) 8153 return self._parse_function() 8154 8155 # Clickhouse supports TRUNCATE DATABASE as well 8156 is_database = self._match(TokenType.DATABASE) 8157 8158 self._match(TokenType.TABLE) 8159 8160 exists = self._parse_exists(not_=False) 8161 8162 expressions = self._parse_csv( 8163 lambda: self._parse_table(schema=True, is_db_reference=is_database) 8164 ) 8165 8166 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 8167 8168 if self._match_text_seq("RESTART", "IDENTITY"): 8169 identity = "RESTART" 8170 elif self._match_text_seq("CONTINUE", "IDENTITY"): 8171 identity = "CONTINUE" 8172 else: 8173 identity = None 8174 8175 if self._match_text_seq("CASCADE") or self._match_text_seq("RESTRICT"): 8176 option = self._prev.text 8177 else: 8178 option = None 8179 8180 partition = self._parse_partition() 8181 8182 # Fallback case 8183 if self._curr: 8184 return self._parse_as_command(start) 8185 8186 return self.expression( 8187 exp.TruncateTable, 8188 expressions=expressions, 8189 is_database=is_database, 8190 exists=exists, 8191 cluster=cluster, 8192 identity=identity, 8193 option=option, 8194 partition=partition, 8195 ) 8196 8197 def _parse_with_operator(self) -> t.Optional[exp.Expression]: 8198 this = self._parse_ordered(self._parse_opclass) 8199 8200 if not self._match(TokenType.WITH): 8201 return this 8202 8203 op = self._parse_var(any_token=True) 8204 8205 return self.expression(exp.WithOperator, this=this, op=op) 8206 8207 def _parse_wrapped_options(self) -> t.List[t.Optional[exp.Expression]]: 8208 self._match(TokenType.EQ) 8209 self._match(TokenType.L_PAREN) 8210 8211 opts: t.List[t.Optional[exp.Expression]] = [] 8212 option: exp.Expression | None 8213 while self._curr and not self._match(TokenType.R_PAREN): 8214 if self._match_text_seq("FORMAT_NAME", "="): 8215 # The FORMAT_NAME can be set to an identifier for Snowflake and T-SQL 8216 option = self._parse_format_name() 8217 else: 8218 option = self._parse_property() 8219 8220 if option is None: 8221 self.raise_error("Unable to parse option") 8222 break 8223 8224 opts.append(option) 8225 8226 return opts 8227 8228 def _parse_copy_parameters(self) -> t.List[exp.CopyParameter]: 8229 sep = TokenType.COMMA if self.dialect.COPY_PARAMS_ARE_CSV else None 8230 8231 options = [] 8232 while self._curr and not self._match(TokenType.R_PAREN, advance=False): 8233 option = self._parse_var(any_token=True) 8234 prev = self._prev.text.upper() 8235 8236 # Different dialects might separate options and values by white space, "=" and "AS" 8237 self._match(TokenType.EQ) 8238 self._match(TokenType.ALIAS) 8239 8240 param = self.expression(exp.CopyParameter, this=option) 8241 8242 if prev in self.COPY_INTO_VARLEN_OPTIONS and self._match( 8243 TokenType.L_PAREN, advance=False 8244 ): 8245 # Snowflake FILE_FORMAT case, Databricks COPY & FORMAT options 8246 param.set("expressions", self._parse_wrapped_options()) 8247 elif prev == "FILE_FORMAT": 8248 # T-SQL's external file format case 8249 param.set("expression", self._parse_field()) 8250 else: 8251 param.set("expression", self._parse_unquoted_field()) 8252 8253 options.append(param) 8254 self._match(sep) 8255 8256 return options 8257 8258 def _parse_credentials(self) -> t.Optional[exp.Credentials]: 8259 expr = self.expression(exp.Credentials) 8260 8261 if self._match_text_seq("STORAGE_INTEGRATION", "="): 8262 expr.set("storage", self._parse_field()) 8263 if self._match_text_seq("CREDENTIALS"): 8264 # Snowflake case: CREDENTIALS = (...), Redshift case: CREDENTIALS <string> 8265 creds = ( 8266 self._parse_wrapped_options() if self._match(TokenType.EQ) else self._parse_field() 8267 ) 8268 expr.set("credentials", creds) 8269 if self._match_text_seq("ENCRYPTION"): 8270 expr.set("encryption", self._parse_wrapped_options()) 8271 if self._match_text_seq("IAM_ROLE"): 8272 expr.set("iam_role", self._parse_field()) 8273 if self._match_text_seq("REGION"): 8274 expr.set("region", self._parse_field()) 8275 8276 return expr 8277 8278 def _parse_file_location(self) -> t.Optional[exp.Expression]: 8279 return self._parse_field() 8280 8281 def _parse_copy(self) -> exp.Copy | exp.Command: 8282 start = self._prev 8283 8284 self._match(TokenType.INTO) 8285 8286 this = ( 8287 self._parse_select(nested=True, parse_subquery_alias=False) 8288 if self._match(TokenType.L_PAREN, advance=False) 8289 else self._parse_table(schema=True) 8290 ) 8291 8292 kind = self._match(TokenType.FROM) or not self._match_text_seq("TO") 8293 8294 files = self._parse_csv(self._parse_file_location) 8295 credentials = self._parse_credentials() 8296 8297 self._match_text_seq("WITH") 8298 8299 params = self._parse_wrapped(self._parse_copy_parameters, optional=True) 8300 8301 # Fallback case 8302 if self._curr: 8303 return self._parse_as_command(start) 8304 8305 return self.expression( 8306 exp.Copy, 8307 this=this, 8308 kind=kind, 8309 credentials=credentials, 8310 files=files, 8311 params=params, 8312 ) 8313 8314 def _parse_normalize(self) -> exp.Normalize: 8315 return self.expression( 8316 exp.Normalize, 8317 this=self._parse_bitwise(), 8318 form=self._match(TokenType.COMMA) and self._parse_var(), 8319 ) 8320 8321 def _parse_ceil_floor(self, expr_type: t.Type[TCeilFloor]) -> TCeilFloor: 8322 args = self._parse_csv(lambda: self._parse_lambda()) 8323 8324 this = seq_get(args, 0) 8325 decimals = seq_get(args, 1) 8326 8327 return expr_type( 8328 this=this, decimals=decimals, to=self._match_text_seq("TO") and self._parse_var() 8329 ) 8330 8331 def _parse_star_ops(self) -> t.Optional[exp.Expression]: 8332 star_token = self._prev 8333 8334 if self._match_text_seq("COLUMNS", "(", advance=False): 8335 this = self._parse_function() 8336 if isinstance(this, exp.Columns): 8337 this.set("unpack", True) 8338 return this 8339 8340 return self.expression( 8341 exp.Star, 8342 **{ # type: ignore 8343 "except": self._parse_star_op("EXCEPT", "EXCLUDE"), 8344 "replace": self._parse_star_op("REPLACE"), 8345 "rename": self._parse_star_op("RENAME"), 8346 }, 8347 ).update_positions(star_token) 8348 8349 def _parse_grant_privilege(self) -> t.Optional[exp.GrantPrivilege]: 8350 privilege_parts = [] 8351 8352 # Keep consuming consecutive keywords until comma (end of this privilege) or ON 8353 # (end of privilege list) or L_PAREN (start of column list) are met 8354 while self._curr and not self._match_set(self.PRIVILEGE_FOLLOW_TOKENS, advance=False): 8355 privilege_parts.append(self._curr.text.upper()) 8356 self._advance() 8357 8358 this = exp.var(" ".join(privilege_parts)) 8359 expressions = ( 8360 self._parse_wrapped_csv(self._parse_column) 8361 if self._match(TokenType.L_PAREN, advance=False) 8362 else None 8363 ) 8364 8365 return self.expression(exp.GrantPrivilege, this=this, expressions=expressions) 8366 8367 def _parse_grant_principal(self) -> t.Optional[exp.GrantPrincipal]: 8368 kind = self._match_texts(("ROLE", "GROUP")) and self._prev.text.upper() 8369 principal = self._parse_id_var() 8370 8371 if not principal: 8372 return None 8373 8374 return self.expression(exp.GrantPrincipal, this=principal, kind=kind) 8375 8376 def _parse_grant(self) -> exp.Grant | exp.Command: 8377 start = self._prev 8378 8379 privileges = self._parse_csv(self._parse_grant_privilege) 8380 8381 self._match(TokenType.ON) 8382 kind = self._match_set(self.CREATABLES) and self._prev.text.upper() 8383 8384 # Attempt to parse the securable e.g. MySQL allows names 8385 # such as "foo.*", "*.*" which are not easily parseable yet 8386 securable = self._try_parse(self._parse_table_parts) 8387 8388 if not securable or not self._match_text_seq("TO"): 8389 return self._parse_as_command(start) 8390 8391 principals = self._parse_csv(self._parse_grant_principal) 8392 8393 grant_option = self._match_text_seq("WITH", "GRANT", "OPTION") 8394 8395 if self._curr: 8396 return self._parse_as_command(start) 8397 8398 return self.expression( 8399 exp.Grant, 8400 privileges=privileges, 8401 kind=kind, 8402 securable=securable, 8403 principals=principals, 8404 grant_option=grant_option, 8405 ) 8406 8407 def _parse_overlay(self) -> exp.Overlay: 8408 return self.expression( 8409 exp.Overlay, 8410 **{ # type: ignore 8411 "this": self._parse_bitwise(), 8412 "expression": self._match_text_seq("PLACING") and self._parse_bitwise(), 8413 "from": self._match_text_seq("FROM") and self._parse_bitwise(), 8414 "for": self._match_text_seq("FOR") and self._parse_bitwise(), 8415 }, 8416 ) 8417 8418 def _parse_format_name(self) -> exp.Property: 8419 # Note: Although not specified in the docs, Snowflake does accept a string/identifier 8420 # for FILE_FORMAT = <format_name> 8421 return self.expression( 8422 exp.Property, 8423 this=exp.var("FORMAT_NAME"), 8424 value=self._parse_string() or self._parse_table_parts(), 8425 ) 8426 8427 def _parse_max_min_by(self, expr_type: t.Type[exp.AggFunc]) -> exp.AggFunc: 8428 args: t.List[exp.Expression] = [] 8429 8430 if self._match(TokenType.DISTINCT): 8431 args.append(self.expression(exp.Distinct, expressions=[self._parse_assignment()])) 8432 self._match(TokenType.COMMA) 8433 8434 args.extend(self._parse_csv(self._parse_assignment)) 8435 8436 return self.expression( 8437 expr_type, this=seq_get(args, 0), expression=seq_get(args, 1), count=seq_get(args, 2) 8438 ) 8439 8440 def _identifier_expression( 8441 self, token: t.Optional[Token] = None, **kwargs: t.Any 8442 ) -> exp.Identifier: 8443 token = token or self._prev 8444 expression = self.expression(exp.Identifier, this=token.text, **kwargs) 8445 expression.update_positions(token) 8446 return expression 8447 8448 def _build_pipe_cte( 8449 self, 8450 query: exp.Query, 8451 expressions: t.List[exp.Expression], 8452 alias_cte: t.Optional[exp.TableAlias] = None, 8453 ) -> exp.Select: 8454 new_cte: t.Optional[t.Union[str, exp.TableAlias]] 8455 if alias_cte: 8456 new_cte = alias_cte 8457 else: 8458 self._pipe_cte_counter += 1 8459 new_cte = f"__tmp{self._pipe_cte_counter}" 8460 8461 with_ = query.args.get("with") 8462 ctes = with_.pop() if with_ else None 8463 8464 new_select = exp.select(*expressions, copy=False).from_(new_cte, copy=False) 8465 if ctes: 8466 new_select.set("with", ctes) 8467 8468 return new_select.with_(new_cte, as_=query, copy=False) 8469 8470 def _parse_pipe_syntax_select(self, query: exp.Select) -> exp.Select: 8471 select = self._parse_select(consume_pipe=False) 8472 if not select: 8473 return query 8474 8475 return self._build_pipe_cte( 8476 query=query.select(*select.expressions, append=False), expressions=[exp.Star()] 8477 ) 8478 8479 def _parse_pipe_syntax_limit(self, query: exp.Select) -> exp.Select: 8480 limit = self._parse_limit() 8481 offset = self._parse_offset() 8482 if limit: 8483 curr_limit = query.args.get("limit", limit) 8484 if curr_limit.expression.to_py() >= limit.expression.to_py(): 8485 query.limit(limit, copy=False) 8486 if offset: 8487 curr_offset = query.args.get("offset") 8488 curr_offset = curr_offset.expression.to_py() if curr_offset else 0 8489 query.offset(exp.Literal.number(curr_offset + offset.expression.to_py()), copy=False) 8490 8491 return query 8492 8493 def _parse_pipe_syntax_aggregate_fields(self) -> t.Optional[exp.Expression]: 8494 this = self._parse_assignment() 8495 if self._match_text_seq("GROUP", "AND", advance=False): 8496 return this 8497 8498 this = self._parse_alias(this) 8499 8500 if self._match_set((TokenType.ASC, TokenType.DESC), advance=False): 8501 return self._parse_ordered(lambda: this) 8502 8503 return this 8504 8505 def _parse_pipe_syntax_aggregate_group_order_by( 8506 self, query: exp.Select, group_by_exists: bool = True 8507 ) -> exp.Select: 8508 expr = self._parse_csv(self._parse_pipe_syntax_aggregate_fields) 8509 aggregates_or_groups, orders = [], [] 8510 for element in expr: 8511 if isinstance(element, exp.Ordered): 8512 this = element.this 8513 if isinstance(this, exp.Alias): 8514 element.set("this", this.args["alias"]) 8515 orders.append(element) 8516 else: 8517 this = element 8518 aggregates_or_groups.append(this) 8519 8520 if group_by_exists: 8521 query.select(*aggregates_or_groups, copy=False).group_by( 8522 *[projection.args.get("alias", projection) for projection in aggregates_or_groups], 8523 copy=False, 8524 ) 8525 else: 8526 query.select(*aggregates_or_groups, append=False, copy=False) 8527 8528 if orders: 8529 return query.order_by(*orders, append=False, copy=False) 8530 8531 return query 8532 8533 def _parse_pipe_syntax_aggregate(self, query: exp.Select) -> exp.Select: 8534 self._match_text_seq("AGGREGATE") 8535 query = self._parse_pipe_syntax_aggregate_group_order_by(query, group_by_exists=False) 8536 8537 if self._match(TokenType.GROUP_BY) or ( 8538 self._match_text_seq("GROUP", "AND") and self._match(TokenType.ORDER_BY) 8539 ): 8540 query = self._parse_pipe_syntax_aggregate_group_order_by(query) 8541 8542 return self._build_pipe_cte(query=query, expressions=[exp.Star()]) 8543 8544 def _parse_pipe_syntax_set_operator(self, query: exp.Query) -> t.Optional[exp.Query]: 8545 first_setop = self.parse_set_operation(this=query) 8546 if not first_setop: 8547 return None 8548 8549 def _parse_and_unwrap_query() -> t.Optional[exp.Select]: 8550 expr = self._parse_paren() 8551 return expr.assert_is(exp.Subquery).unnest() if expr else None 8552 8553 first_setop.this.pop() 8554 8555 setops = [ 8556 first_setop.expression.pop().assert_is(exp.Subquery).unnest(), 8557 *self._parse_csv(_parse_and_unwrap_query), 8558 ] 8559 8560 query = self._build_pipe_cte(query=query, expressions=[exp.Star()]) 8561 with_ = query.args.get("with") 8562 ctes = with_.pop() if with_ else None 8563 8564 if isinstance(first_setop, exp.Union): 8565 query = query.union(*setops, copy=False, **first_setop.args) 8566 elif isinstance(first_setop, exp.Except): 8567 query = query.except_(*setops, copy=False, **first_setop.args) 8568 else: 8569 query = query.intersect(*setops, copy=False, **first_setop.args) 8570 8571 query.set("with", ctes) 8572 8573 return self._build_pipe_cte(query=query, expressions=[exp.Star()]) 8574 8575 def _parse_pipe_syntax_join(self, query: exp.Query) -> t.Optional[exp.Query]: 8576 join = self._parse_join() 8577 if not join: 8578 return None 8579 8580 if isinstance(query, exp.Select): 8581 return query.join(join, copy=False) 8582 8583 return query 8584 8585 def _parse_pipe_syntax_pivot(self, query: exp.Select) -> exp.Select: 8586 pivots = self._parse_pivots() 8587 if not pivots: 8588 return query 8589 8590 from_ = query.args.get("from") 8591 if from_: 8592 from_.this.set("pivots", pivots) 8593 8594 return self._build_pipe_cte(query=query, expressions=[exp.Star()]) 8595 8596 def _parse_pipe_syntax_extend(self, query: exp.Select) -> exp.Select: 8597 self._match_text_seq("EXTEND") 8598 query.select(*[exp.Star(), *self._parse_expressions()], append=False, copy=False) 8599 return self._build_pipe_cte(query=query, expressions=[exp.Star()]) 8600 8601 def _parse_pipe_syntax_tablesample(self, query: exp.Select) -> exp.Select: 8602 sample = self._parse_table_sample() 8603 8604 with_ = query.args.get("with") 8605 if with_: 8606 with_.expressions[-1].this.set("sample", sample) 8607 else: 8608 query.set("sample", sample) 8609 8610 return query 8611 8612 def _parse_pipe_syntax_query(self, query: exp.Query) -> t.Optional[exp.Query]: 8613 if isinstance(query, exp.Subquery): 8614 query = exp.select("*").from_(query, copy=False) 8615 8616 if not query.args.get("from"): 8617 query = exp.select("*").from_(query.subquery(copy=False), copy=False) 8618 8619 while self._match(TokenType.PIPE_GT): 8620 start = self._curr 8621 parser = self.PIPE_SYNTAX_TRANSFORM_PARSERS.get(self._curr.text.upper()) 8622 if not parser: 8623 # The set operators (UNION, etc) and the JOIN operator have a few common starting 8624 # keywords, making it tricky to disambiguate them without lookahead. The approach 8625 # here is to try and parse a set operation and if that fails, then try to parse a 8626 # join operator. If that fails as well, then the operator is not supported. 8627 parsed_query = self._parse_pipe_syntax_set_operator(query) 8628 parsed_query = parsed_query or self._parse_pipe_syntax_join(query) 8629 if not parsed_query: 8630 self._retreat(start) 8631 self.raise_error(f"Unsupported pipe syntax operator: '{start.text.upper()}'.") 8632 break 8633 query = parsed_query 8634 else: 8635 query = parser(self, query) 8636 8637 return query 8638 8639 def _parse_declareitem(self) -> t.Optional[exp.DeclareItem]: 8640 vars = self._parse_csv(self._parse_id_var) 8641 if not vars: 8642 return None 8643 8644 return self.expression( 8645 exp.DeclareItem, 8646 this=vars, 8647 kind=self._parse_types(), 8648 default=self._match(TokenType.DEFAULT) and self._parse_bitwise(), 8649 ) 8650 8651 def _parse_declare(self) -> exp.Declare | exp.Command: 8652 start = self._prev 8653 expressions = self._try_parse(lambda: self._parse_csv(self._parse_declareitem)) 8654 8655 if not expressions or self._curr: 8656 return self._parse_as_command(start) 8657 8658 return self.expression(exp.Declare, expressions=expressions) 8659 8660 def build_cast(self, strict: bool, **kwargs) -> exp.Cast: 8661 exp_class = exp.Cast if strict else exp.TryCast 8662 8663 if exp_class == exp.TryCast: 8664 kwargs["requires_string"] = self.dialect.TRY_CAST_REQUIRES_STRING 8665 8666 return self.expression(exp_class, **kwargs)
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
1562 def __init__( 1563 self, 1564 error_level: t.Optional[ErrorLevel] = None, 1565 error_message_context: int = 100, 1566 max_errors: int = 3, 1567 dialect: DialectType = None, 1568 ): 1569 from sqlglot.dialects import Dialect 1570 1571 self.error_level = error_level or ErrorLevel.IMMEDIATE 1572 self.error_message_context = error_message_context 1573 self.max_errors = max_errors 1574 self.dialect = Dialect.get_or_raise(dialect) 1575 self.reset()
1588 def parse( 1589 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1590 ) -> t.List[t.Optional[exp.Expression]]: 1591 """ 1592 Parses a list of tokens and returns a list of syntax trees, one tree 1593 per parsed SQL statement. 1594 1595 Args: 1596 raw_tokens: The list of tokens. 1597 sql: The original SQL string, used to produce helpful debug messages. 1598 1599 Returns: 1600 The list of the produced syntax trees. 1601 """ 1602 return self._parse( 1603 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1604 )
Parses a list of tokens and returns a list of syntax trees, one tree per parsed SQL statement.
Arguments:
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The list of the produced syntax trees.
1606 def parse_into( 1607 self, 1608 expression_types: exp.IntoType, 1609 raw_tokens: t.List[Token], 1610 sql: t.Optional[str] = None, 1611 ) -> t.List[t.Optional[exp.Expression]]: 1612 """ 1613 Parses a list of tokens into a given Expression type. If a collection of Expression 1614 types is given instead, this method will try to parse the token list into each one 1615 of them, stopping at the first for which the parsing succeeds. 1616 1617 Args: 1618 expression_types: The expression type(s) to try and parse the token list into. 1619 raw_tokens: The list of tokens. 1620 sql: The original SQL string, used to produce helpful debug messages. 1621 1622 Returns: 1623 The target Expression. 1624 """ 1625 errors = [] 1626 for expression_type in ensure_list(expression_types): 1627 parser = self.EXPRESSION_PARSERS.get(expression_type) 1628 if not parser: 1629 raise TypeError(f"No parser registered for {expression_type}") 1630 1631 try: 1632 return self._parse(parser, raw_tokens, sql) 1633 except ParseError as e: 1634 e.errors[0]["into_expression"] = expression_type 1635 errors.append(e) 1636 1637 raise ParseError( 1638 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1639 errors=merge_errors(errors), 1640 ) from errors[-1]
Parses a list of tokens into a given Expression type. If a collection of Expression types is given instead, this method will try to parse the token list into each one of them, stopping at the first for which the parsing succeeds.
Arguments:
- expression_types: The expression type(s) to try and parse the token list into.
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The target Expression.
1680 def check_errors(self) -> None: 1681 """Logs or raises any found errors, depending on the chosen error level setting.""" 1682 if self.error_level == ErrorLevel.WARN: 1683 for error in self.errors: 1684 logger.error(str(error)) 1685 elif self.error_level == ErrorLevel.RAISE and self.errors: 1686 raise ParseError( 1687 concat_messages(self.errors, self.max_errors), 1688 errors=merge_errors(self.errors), 1689 )
Logs or raises any found errors, depending on the chosen error level setting.
1691 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1692 """ 1693 Appends an error in the list of recorded errors or raises it, depending on the chosen 1694 error level setting. 1695 """ 1696 token = token or self._curr or self._prev or Token.string("") 1697 start = token.start 1698 end = token.end + 1 1699 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1700 highlight = self.sql[start:end] 1701 end_context = self.sql[end : end + self.error_message_context] 1702 1703 error = ParseError.new( 1704 f"{message}. Line {token.line}, Col: {token.col}.\n" 1705 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1706 description=message, 1707 line=token.line, 1708 col=token.col, 1709 start_context=start_context, 1710 highlight=highlight, 1711 end_context=end_context, 1712 ) 1713 1714 if self.error_level == ErrorLevel.IMMEDIATE: 1715 raise error 1716 1717 self.errors.append(error)
Appends an error in the list of recorded errors or raises it, depending on the chosen error level setting.
1719 def expression( 1720 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1721 ) -> E: 1722 """ 1723 Creates a new, validated Expression. 1724 1725 Args: 1726 exp_class: The expression class to instantiate. 1727 comments: An optional list of comments to attach to the expression. 1728 kwargs: The arguments to set for the expression along with their respective values. 1729 1730 Returns: 1731 The target expression. 1732 """ 1733 instance = exp_class(**kwargs) 1734 instance.add_comments(comments) if comments else self._add_comments(instance) 1735 return self.validate_expression(instance)
Creates a new, validated Expression.
Arguments:
- exp_class: The expression class to instantiate.
- comments: An optional list of comments to attach to the expression.
- kwargs: The arguments to set for the expression along with their respective values.
Returns:
The target expression.
1742 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1743 """ 1744 Validates an Expression, making sure that all its mandatory arguments are set. 1745 1746 Args: 1747 expression: The expression to validate. 1748 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1749 1750 Returns: 1751 The validated expression. 1752 """ 1753 if self.error_level != ErrorLevel.IGNORE: 1754 for error_message in expression.error_messages(args): 1755 self.raise_error(error_message) 1756 1757 return expression
Validates an Expression, making sure that all its mandatory arguments are set.
Arguments:
- expression: The expression to validate.
- args: An optional list of items that was used to instantiate the expression, if it's a Func.
Returns:
The validated expression.
4796 def parse_set_operation( 4797 self, this: t.Optional[exp.Expression], consume_pipe: bool = False 4798 ) -> t.Optional[exp.Expression]: 4799 start = self._index 4800 _, side_token, kind_token = self._parse_join_parts() 4801 4802 side = side_token.text if side_token else None 4803 kind = kind_token.text if kind_token else None 4804 4805 if not self._match_set(self.SET_OPERATIONS): 4806 self._retreat(start) 4807 return None 4808 4809 token_type = self._prev.token_type 4810 4811 if token_type == TokenType.UNION: 4812 operation: t.Type[exp.SetOperation] = exp.Union 4813 elif token_type == TokenType.EXCEPT: 4814 operation = exp.Except 4815 else: 4816 operation = exp.Intersect 4817 4818 comments = self._prev.comments 4819 4820 if self._match(TokenType.DISTINCT): 4821 distinct: t.Optional[bool] = True 4822 elif self._match(TokenType.ALL): 4823 distinct = False 4824 else: 4825 distinct = self.dialect.SET_OP_DISTINCT_BY_DEFAULT[operation] 4826 if distinct is None: 4827 self.raise_error(f"Expected DISTINCT or ALL for {operation.__name__}") 4828 4829 by_name = self._match_text_seq("BY", "NAME") or self._match_text_seq( 4830 "STRICT", "CORRESPONDING" 4831 ) 4832 if self._match_text_seq("CORRESPONDING"): 4833 by_name = True 4834 if not side and not kind: 4835 kind = "INNER" 4836 4837 on_column_list = None 4838 if by_name and self._match_texts(("ON", "BY")): 4839 on_column_list = self._parse_wrapped_csv(self._parse_column) 4840 4841 expression = self._parse_select( 4842 nested=True, parse_set_operation=False, consume_pipe=consume_pipe 4843 ) 4844 4845 return self.expression( 4846 operation, 4847 comments=comments, 4848 this=this, 4849 distinct=distinct, 4850 by_name=by_name, 4851 expression=expression, 4852 side=side, 4853 kind=kind, 4854 on=on_column_list, 4855 )