sqlglot.parser
1from __future__ import annotations 2 3import logging 4import typing as t 5from collections import defaultdict 6 7from sqlglot import exp 8from sqlglot.errors import ErrorLevel, ParseError, concat_messages, merge_errors 9from sqlglot.helper import apply_index_offset, ensure_list, seq_get 10from sqlglot.time import format_time 11from sqlglot.tokens import Token, Tokenizer, TokenType 12from sqlglot.trie import TrieResult, in_trie, new_trie 13 14if t.TYPE_CHECKING: 15 from sqlglot._typing import E, Lit 16 from sqlglot.dialects.dialect import Dialect, DialectType 17 18 T = t.TypeVar("T") 19 20logger = logging.getLogger("sqlglot") 21 22OPTIONS_TYPE = t.Dict[str, t.Sequence[t.Union[t.Sequence[str], str]]] 23 24 25def build_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 26 if len(args) == 1 and args[0].is_star: 27 return exp.StarMap(this=args[0]) 28 29 keys = [] 30 values = [] 31 for i in range(0, len(args), 2): 32 keys.append(args[i]) 33 values.append(args[i + 1]) 34 35 return exp.VarMap(keys=exp.array(*keys, copy=False), values=exp.array(*values, copy=False)) 36 37 38def build_like(args: t.List) -> exp.Escape | exp.Like: 39 like = exp.Like(this=seq_get(args, 1), expression=seq_get(args, 0)) 40 return exp.Escape(this=like, expression=seq_get(args, 2)) if len(args) > 2 else like 41 42 43def binary_range_parser( 44 expr_type: t.Type[exp.Expression], reverse_args: bool = False 45) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]: 46 def _parse_binary_range( 47 self: Parser, this: t.Optional[exp.Expression] 48 ) -> t.Optional[exp.Expression]: 49 expression = self._parse_bitwise() 50 if reverse_args: 51 this, expression = expression, this 52 return self._parse_escape(self.expression(expr_type, this=this, expression=expression)) 53 54 return _parse_binary_range 55 56 57def build_logarithm(args: t.List, dialect: Dialect) -> exp.Func: 58 # Default argument order is base, expression 59 this = seq_get(args, 0) 60 expression = seq_get(args, 1) 61 62 if expression: 63 if not dialect.LOG_BASE_FIRST: 64 this, expression = expression, this 65 return exp.Log(this=this, expression=expression) 66 67 return (exp.Ln if dialect.parser_class.LOG_DEFAULTS_TO_LN else exp.Log)(this=this) 68 69 70def build_hex(args: t.List, dialect: Dialect) -> exp.Hex | exp.LowerHex: 71 arg = seq_get(args, 0) 72 return exp.LowerHex(this=arg) if dialect.HEX_LOWERCASE else exp.Hex(this=arg) 73 74 75def build_lower(args: t.List) -> exp.Lower | exp.Hex: 76 # LOWER(HEX(..)) can be simplified to LowerHex to simplify its transpilation 77 arg = seq_get(args, 0) 78 return exp.LowerHex(this=arg.this) if isinstance(arg, exp.Hex) else exp.Lower(this=arg) 79 80 81def build_upper(args: t.List) -> exp.Upper | exp.Hex: 82 # UPPER(HEX(..)) can be simplified to Hex to simplify its transpilation 83 arg = seq_get(args, 0) 84 return exp.Hex(this=arg.this) if isinstance(arg, exp.Hex) else exp.Upper(this=arg) 85 86 87def build_extract_json_with_path(expr_type: t.Type[E]) -> t.Callable[[t.List, Dialect], E]: 88 def _builder(args: t.List, dialect: Dialect) -> E: 89 expression = expr_type( 90 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 91 ) 92 if len(args) > 2 and expr_type is exp.JSONExtract: 93 expression.set("expressions", args[2:]) 94 95 return expression 96 97 return _builder 98 99 100def build_mod(args: t.List) -> exp.Mod: 101 this = seq_get(args, 0) 102 expression = seq_get(args, 1) 103 104 # Wrap the operands if they are binary nodes, e.g. MOD(a + 1, 7) -> (a + 1) % 7 105 this = exp.Paren(this=this) if isinstance(this, exp.Binary) else this 106 expression = exp.Paren(this=expression) if isinstance(expression, exp.Binary) else expression 107 108 return exp.Mod(this=this, expression=expression) 109 110 111def build_pad(args: t.List, is_left: bool = True): 112 return exp.Pad( 113 this=seq_get(args, 0), 114 expression=seq_get(args, 1), 115 fill_pattern=seq_get(args, 2), 116 is_left=is_left, 117 ) 118 119 120def build_array_constructor( 121 exp_class: t.Type[E], args: t.List, bracket_kind: TokenType, dialect: Dialect 122) -> exp.Expression: 123 array_exp = exp_class(expressions=args) 124 125 if exp_class == exp.Array and dialect.HAS_DISTINCT_ARRAY_CONSTRUCTORS: 126 array_exp.set("bracket_notation", bracket_kind == TokenType.L_BRACKET) 127 128 return array_exp 129 130 131class _Parser(type): 132 def __new__(cls, clsname, bases, attrs): 133 klass = super().__new__(cls, clsname, bases, attrs) 134 135 klass.SHOW_TRIE = new_trie(key.split(" ") for key in klass.SHOW_PARSERS) 136 klass.SET_TRIE = new_trie(key.split(" ") for key in klass.SET_PARSERS) 137 138 return klass 139 140 141class Parser(metaclass=_Parser): 142 """ 143 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 144 145 Args: 146 error_level: The desired error level. 147 Default: ErrorLevel.IMMEDIATE 148 error_message_context: The amount of context to capture from a query string when displaying 149 the error message (in number of characters). 150 Default: 100 151 max_errors: Maximum number of error messages to include in a raised ParseError. 152 This is only relevant if error_level is ErrorLevel.RAISE. 153 Default: 3 154 """ 155 156 FUNCTIONS: t.Dict[str, t.Callable] = { 157 **{name: func.from_arg_list for name, func in exp.FUNCTION_BY_NAME.items()}, 158 "ARRAY": lambda args, dialect: exp.Array(expressions=args), 159 "CONCAT": lambda args, dialect: exp.Concat( 160 expressions=args, 161 safe=not dialect.STRICT_STRING_CONCAT, 162 coalesce=dialect.CONCAT_COALESCE, 163 ), 164 "CONCAT_WS": lambda args, dialect: exp.ConcatWs( 165 expressions=args, 166 safe=not dialect.STRICT_STRING_CONCAT, 167 coalesce=dialect.CONCAT_COALESCE, 168 ), 169 "DATE_TO_DATE_STR": lambda args: exp.Cast( 170 this=seq_get(args, 0), 171 to=exp.DataType(this=exp.DataType.Type.TEXT), 172 ), 173 "GENERATE_DATE_ARRAY": lambda args: exp.GenerateDateArray( 174 start=seq_get(args, 0), 175 end=seq_get(args, 1), 176 interval=seq_get(args, 2) 177 or exp.Interval(this=exp.Literal.number(1), unit=exp.var("DAY")), 178 ), 179 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 180 "HEX": build_hex, 181 "JSON_EXTRACT": build_extract_json_with_path(exp.JSONExtract), 182 "JSON_EXTRACT_SCALAR": build_extract_json_with_path(exp.JSONExtractScalar), 183 "JSON_EXTRACT_PATH_TEXT": build_extract_json_with_path(exp.JSONExtractScalar), 184 "LIKE": build_like, 185 "LOG": build_logarithm, 186 "LOG2": lambda args: exp.Log(this=exp.Literal.number(2), expression=seq_get(args, 0)), 187 "LOG10": lambda args: exp.Log(this=exp.Literal.number(10), expression=seq_get(args, 0)), 188 "LOWER": build_lower, 189 "LPAD": lambda args: build_pad(args), 190 "LEFTPAD": lambda args: build_pad(args), 191 "MOD": build_mod, 192 "RPAD": lambda args: build_pad(args, is_left=False), 193 "RIGHTPAD": lambda args: build_pad(args, is_left=False), 194 "SCOPE_RESOLUTION": lambda args: exp.ScopeResolution(expression=seq_get(args, 0)) 195 if len(args) != 2 196 else exp.ScopeResolution(this=seq_get(args, 0), expression=seq_get(args, 1)), 197 "TIME_TO_TIME_STR": lambda args: exp.Cast( 198 this=seq_get(args, 0), 199 to=exp.DataType(this=exp.DataType.Type.TEXT), 200 ), 201 "TO_HEX": build_hex, 202 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 203 this=exp.Cast( 204 this=seq_get(args, 0), 205 to=exp.DataType(this=exp.DataType.Type.TEXT), 206 ), 207 start=exp.Literal.number(1), 208 length=exp.Literal.number(10), 209 ), 210 "UNNEST": lambda args: exp.Unnest(expressions=ensure_list(seq_get(args, 0))), 211 "UPPER": build_upper, 212 "VAR_MAP": build_var_map, 213 } 214 215 NO_PAREN_FUNCTIONS = { 216 TokenType.CURRENT_DATE: exp.CurrentDate, 217 TokenType.CURRENT_DATETIME: exp.CurrentDate, 218 TokenType.CURRENT_TIME: exp.CurrentTime, 219 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 220 TokenType.CURRENT_USER: exp.CurrentUser, 221 } 222 223 STRUCT_TYPE_TOKENS = { 224 TokenType.NESTED, 225 TokenType.OBJECT, 226 TokenType.STRUCT, 227 } 228 229 NESTED_TYPE_TOKENS = { 230 TokenType.ARRAY, 231 TokenType.LIST, 232 TokenType.LOWCARDINALITY, 233 TokenType.MAP, 234 TokenType.NULLABLE, 235 *STRUCT_TYPE_TOKENS, 236 } 237 238 ENUM_TYPE_TOKENS = { 239 TokenType.ENUM, 240 TokenType.ENUM8, 241 TokenType.ENUM16, 242 } 243 244 AGGREGATE_TYPE_TOKENS = { 245 TokenType.AGGREGATEFUNCTION, 246 TokenType.SIMPLEAGGREGATEFUNCTION, 247 } 248 249 TYPE_TOKENS = { 250 TokenType.BIT, 251 TokenType.BOOLEAN, 252 TokenType.TINYINT, 253 TokenType.UTINYINT, 254 TokenType.SMALLINT, 255 TokenType.USMALLINT, 256 TokenType.INT, 257 TokenType.UINT, 258 TokenType.BIGINT, 259 TokenType.UBIGINT, 260 TokenType.INT128, 261 TokenType.UINT128, 262 TokenType.INT256, 263 TokenType.UINT256, 264 TokenType.MEDIUMINT, 265 TokenType.UMEDIUMINT, 266 TokenType.FIXEDSTRING, 267 TokenType.FLOAT, 268 TokenType.DOUBLE, 269 TokenType.CHAR, 270 TokenType.NCHAR, 271 TokenType.VARCHAR, 272 TokenType.NVARCHAR, 273 TokenType.BPCHAR, 274 TokenType.TEXT, 275 TokenType.MEDIUMTEXT, 276 TokenType.LONGTEXT, 277 TokenType.MEDIUMBLOB, 278 TokenType.LONGBLOB, 279 TokenType.BINARY, 280 TokenType.VARBINARY, 281 TokenType.JSON, 282 TokenType.JSONB, 283 TokenType.INTERVAL, 284 TokenType.TINYBLOB, 285 TokenType.TINYTEXT, 286 TokenType.TIME, 287 TokenType.TIMETZ, 288 TokenType.TIMESTAMP, 289 TokenType.TIMESTAMP_S, 290 TokenType.TIMESTAMP_MS, 291 TokenType.TIMESTAMP_NS, 292 TokenType.TIMESTAMPTZ, 293 TokenType.TIMESTAMPLTZ, 294 TokenType.TIMESTAMPNTZ, 295 TokenType.DATETIME, 296 TokenType.DATETIME64, 297 TokenType.DATE, 298 TokenType.DATE32, 299 TokenType.INT4RANGE, 300 TokenType.INT4MULTIRANGE, 301 TokenType.INT8RANGE, 302 TokenType.INT8MULTIRANGE, 303 TokenType.NUMRANGE, 304 TokenType.NUMMULTIRANGE, 305 TokenType.TSRANGE, 306 TokenType.TSMULTIRANGE, 307 TokenType.TSTZRANGE, 308 TokenType.TSTZMULTIRANGE, 309 TokenType.DATERANGE, 310 TokenType.DATEMULTIRANGE, 311 TokenType.DECIMAL, 312 TokenType.UDECIMAL, 313 TokenType.BIGDECIMAL, 314 TokenType.UUID, 315 TokenType.GEOGRAPHY, 316 TokenType.GEOMETRY, 317 TokenType.HLLSKETCH, 318 TokenType.HSTORE, 319 TokenType.PSEUDO_TYPE, 320 TokenType.SUPER, 321 TokenType.SERIAL, 322 TokenType.SMALLSERIAL, 323 TokenType.BIGSERIAL, 324 TokenType.XML, 325 TokenType.YEAR, 326 TokenType.UNIQUEIDENTIFIER, 327 TokenType.USERDEFINED, 328 TokenType.MONEY, 329 TokenType.SMALLMONEY, 330 TokenType.ROWVERSION, 331 TokenType.IMAGE, 332 TokenType.VARIANT, 333 TokenType.VECTOR, 334 TokenType.OBJECT, 335 TokenType.OBJECT_IDENTIFIER, 336 TokenType.INET, 337 TokenType.IPADDRESS, 338 TokenType.IPPREFIX, 339 TokenType.IPV4, 340 TokenType.IPV6, 341 TokenType.UNKNOWN, 342 TokenType.NULL, 343 TokenType.NAME, 344 TokenType.TDIGEST, 345 *ENUM_TYPE_TOKENS, 346 *NESTED_TYPE_TOKENS, 347 *AGGREGATE_TYPE_TOKENS, 348 } 349 350 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 351 TokenType.BIGINT: TokenType.UBIGINT, 352 TokenType.INT: TokenType.UINT, 353 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 354 TokenType.SMALLINT: TokenType.USMALLINT, 355 TokenType.TINYINT: TokenType.UTINYINT, 356 TokenType.DECIMAL: TokenType.UDECIMAL, 357 } 358 359 SUBQUERY_PREDICATES = { 360 TokenType.ANY: exp.Any, 361 TokenType.ALL: exp.All, 362 TokenType.EXISTS: exp.Exists, 363 TokenType.SOME: exp.Any, 364 } 365 366 RESERVED_TOKENS = { 367 *Tokenizer.SINGLE_TOKENS.values(), 368 TokenType.SELECT, 369 } - {TokenType.IDENTIFIER} 370 371 DB_CREATABLES = { 372 TokenType.DATABASE, 373 TokenType.DICTIONARY, 374 TokenType.MODEL, 375 TokenType.SCHEMA, 376 TokenType.SEQUENCE, 377 TokenType.STORAGE_INTEGRATION, 378 TokenType.TABLE, 379 TokenType.TAG, 380 TokenType.VIEW, 381 TokenType.WAREHOUSE, 382 TokenType.STREAMLIT, 383 } 384 385 CREATABLES = { 386 TokenType.COLUMN, 387 TokenType.CONSTRAINT, 388 TokenType.FOREIGN_KEY, 389 TokenType.FUNCTION, 390 TokenType.INDEX, 391 TokenType.PROCEDURE, 392 *DB_CREATABLES, 393 } 394 395 # Tokens that can represent identifiers 396 ID_VAR_TOKENS = { 397 TokenType.ALL, 398 TokenType.VAR, 399 TokenType.ANTI, 400 TokenType.APPLY, 401 TokenType.ASC, 402 TokenType.ASOF, 403 TokenType.AUTO_INCREMENT, 404 TokenType.BEGIN, 405 TokenType.BPCHAR, 406 TokenType.CACHE, 407 TokenType.CASE, 408 TokenType.COLLATE, 409 TokenType.COMMAND, 410 TokenType.COMMENT, 411 TokenType.COMMIT, 412 TokenType.CONSTRAINT, 413 TokenType.COPY, 414 TokenType.CUBE, 415 TokenType.DEFAULT, 416 TokenType.DELETE, 417 TokenType.DESC, 418 TokenType.DESCRIBE, 419 TokenType.DICTIONARY, 420 TokenType.DIV, 421 TokenType.END, 422 TokenType.EXECUTE, 423 TokenType.ESCAPE, 424 TokenType.FALSE, 425 TokenType.FIRST, 426 TokenType.FILTER, 427 TokenType.FINAL, 428 TokenType.FORMAT, 429 TokenType.FULL, 430 TokenType.IDENTIFIER, 431 TokenType.IS, 432 TokenType.ISNULL, 433 TokenType.INTERVAL, 434 TokenType.KEEP, 435 TokenType.KILL, 436 TokenType.LEFT, 437 TokenType.LOAD, 438 TokenType.MERGE, 439 TokenType.NATURAL, 440 TokenType.NEXT, 441 TokenType.OFFSET, 442 TokenType.OPERATOR, 443 TokenType.ORDINALITY, 444 TokenType.OVERLAPS, 445 TokenType.OVERWRITE, 446 TokenType.PARTITION, 447 TokenType.PERCENT, 448 TokenType.PIVOT, 449 TokenType.PRAGMA, 450 TokenType.RANGE, 451 TokenType.RECURSIVE, 452 TokenType.REFERENCES, 453 TokenType.REFRESH, 454 TokenType.RENAME, 455 TokenType.REPLACE, 456 TokenType.RIGHT, 457 TokenType.ROLLUP, 458 TokenType.ROW, 459 TokenType.ROWS, 460 TokenType.SEMI, 461 TokenType.SET, 462 TokenType.SETTINGS, 463 TokenType.SHOW, 464 TokenType.TEMPORARY, 465 TokenType.TOP, 466 TokenType.TRUE, 467 TokenType.TRUNCATE, 468 TokenType.UNIQUE, 469 TokenType.UNNEST, 470 TokenType.UNPIVOT, 471 TokenType.UPDATE, 472 TokenType.USE, 473 TokenType.VOLATILE, 474 TokenType.WINDOW, 475 *CREATABLES, 476 *SUBQUERY_PREDICATES, 477 *TYPE_TOKENS, 478 *NO_PAREN_FUNCTIONS, 479 } 480 481 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 482 483 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 484 TokenType.ANTI, 485 TokenType.APPLY, 486 TokenType.ASOF, 487 TokenType.FULL, 488 TokenType.LEFT, 489 TokenType.LOCK, 490 TokenType.NATURAL, 491 TokenType.OFFSET, 492 TokenType.RIGHT, 493 TokenType.SEMI, 494 TokenType.WINDOW, 495 } 496 497 ALIAS_TOKENS = ID_VAR_TOKENS 498 499 ARRAY_CONSTRUCTORS = { 500 "ARRAY": exp.Array, 501 "LIST": exp.List, 502 } 503 504 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 505 506 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 507 508 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 509 510 FUNC_TOKENS = { 511 TokenType.COLLATE, 512 TokenType.COMMAND, 513 TokenType.CURRENT_DATE, 514 TokenType.CURRENT_DATETIME, 515 TokenType.CURRENT_TIMESTAMP, 516 TokenType.CURRENT_TIME, 517 TokenType.CURRENT_USER, 518 TokenType.FILTER, 519 TokenType.FIRST, 520 TokenType.FORMAT, 521 TokenType.GLOB, 522 TokenType.IDENTIFIER, 523 TokenType.INDEX, 524 TokenType.ISNULL, 525 TokenType.ILIKE, 526 TokenType.INSERT, 527 TokenType.LIKE, 528 TokenType.MERGE, 529 TokenType.OFFSET, 530 TokenType.PRIMARY_KEY, 531 TokenType.RANGE, 532 TokenType.REPLACE, 533 TokenType.RLIKE, 534 TokenType.ROW, 535 TokenType.UNNEST, 536 TokenType.VAR, 537 TokenType.LEFT, 538 TokenType.RIGHT, 539 TokenType.SEQUENCE, 540 TokenType.DATE, 541 TokenType.DATETIME, 542 TokenType.TABLE, 543 TokenType.TIMESTAMP, 544 TokenType.TIMESTAMPTZ, 545 TokenType.TRUNCATE, 546 TokenType.WINDOW, 547 TokenType.XOR, 548 *TYPE_TOKENS, 549 *SUBQUERY_PREDICATES, 550 } 551 552 CONJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 553 TokenType.AND: exp.And, 554 } 555 556 ASSIGNMENT: t.Dict[TokenType, t.Type[exp.Expression]] = { 557 TokenType.COLON_EQ: exp.PropertyEQ, 558 } 559 560 DISJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 561 TokenType.OR: exp.Or, 562 } 563 564 EQUALITY = { 565 TokenType.EQ: exp.EQ, 566 TokenType.NEQ: exp.NEQ, 567 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 568 } 569 570 COMPARISON = { 571 TokenType.GT: exp.GT, 572 TokenType.GTE: exp.GTE, 573 TokenType.LT: exp.LT, 574 TokenType.LTE: exp.LTE, 575 } 576 577 BITWISE = { 578 TokenType.AMP: exp.BitwiseAnd, 579 TokenType.CARET: exp.BitwiseXor, 580 TokenType.PIPE: exp.BitwiseOr, 581 } 582 583 TERM = { 584 TokenType.DASH: exp.Sub, 585 TokenType.PLUS: exp.Add, 586 TokenType.MOD: exp.Mod, 587 TokenType.COLLATE: exp.Collate, 588 } 589 590 FACTOR = { 591 TokenType.DIV: exp.IntDiv, 592 TokenType.LR_ARROW: exp.Distance, 593 TokenType.SLASH: exp.Div, 594 TokenType.STAR: exp.Mul, 595 } 596 597 EXPONENT: t.Dict[TokenType, t.Type[exp.Expression]] = {} 598 599 TIMES = { 600 TokenType.TIME, 601 TokenType.TIMETZ, 602 } 603 604 TIMESTAMPS = { 605 TokenType.TIMESTAMP, 606 TokenType.TIMESTAMPTZ, 607 TokenType.TIMESTAMPLTZ, 608 *TIMES, 609 } 610 611 SET_OPERATIONS = { 612 TokenType.UNION, 613 TokenType.INTERSECT, 614 TokenType.EXCEPT, 615 } 616 617 JOIN_METHODS = { 618 TokenType.ASOF, 619 TokenType.NATURAL, 620 TokenType.POSITIONAL, 621 } 622 623 JOIN_SIDES = { 624 TokenType.LEFT, 625 TokenType.RIGHT, 626 TokenType.FULL, 627 } 628 629 JOIN_KINDS = { 630 TokenType.ANTI, 631 TokenType.CROSS, 632 TokenType.INNER, 633 TokenType.OUTER, 634 TokenType.SEMI, 635 TokenType.STRAIGHT_JOIN, 636 } 637 638 JOIN_HINTS: t.Set[str] = set() 639 640 LAMBDAS = { 641 TokenType.ARROW: lambda self, expressions: self.expression( 642 exp.Lambda, 643 this=self._replace_lambda( 644 self._parse_assignment(), 645 expressions, 646 ), 647 expressions=expressions, 648 ), 649 TokenType.FARROW: lambda self, expressions: self.expression( 650 exp.Kwarg, 651 this=exp.var(expressions[0].name), 652 expression=self._parse_assignment(), 653 ), 654 } 655 656 COLUMN_OPERATORS = { 657 TokenType.DOT: None, 658 TokenType.DCOLON: lambda self, this, to: self.expression( 659 exp.Cast if self.STRICT_CAST else exp.TryCast, 660 this=this, 661 to=to, 662 ), 663 TokenType.ARROW: lambda self, this, path: self.expression( 664 exp.JSONExtract, 665 this=this, 666 expression=self.dialect.to_json_path(path), 667 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 668 ), 669 TokenType.DARROW: lambda self, this, path: self.expression( 670 exp.JSONExtractScalar, 671 this=this, 672 expression=self.dialect.to_json_path(path), 673 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 674 ), 675 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 676 exp.JSONBExtract, 677 this=this, 678 expression=path, 679 ), 680 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 681 exp.JSONBExtractScalar, 682 this=this, 683 expression=path, 684 ), 685 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 686 exp.JSONBContains, 687 this=this, 688 expression=key, 689 ), 690 } 691 692 EXPRESSION_PARSERS = { 693 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 694 exp.Column: lambda self: self._parse_column(), 695 exp.Condition: lambda self: self._parse_assignment(), 696 exp.DataType: lambda self: self._parse_types(allow_identifiers=False, schema=True), 697 exp.Expression: lambda self: self._parse_expression(), 698 exp.From: lambda self: self._parse_from(joins=True), 699 exp.Group: lambda self: self._parse_group(), 700 exp.Having: lambda self: self._parse_having(), 701 exp.Identifier: lambda self: self._parse_id_var(), 702 exp.Join: lambda self: self._parse_join(), 703 exp.Lambda: lambda self: self._parse_lambda(), 704 exp.Lateral: lambda self: self._parse_lateral(), 705 exp.Limit: lambda self: self._parse_limit(), 706 exp.Offset: lambda self: self._parse_offset(), 707 exp.Order: lambda self: self._parse_order(), 708 exp.Ordered: lambda self: self._parse_ordered(), 709 exp.Properties: lambda self: self._parse_properties(), 710 exp.Qualify: lambda self: self._parse_qualify(), 711 exp.Returning: lambda self: self._parse_returning(), 712 exp.Select: lambda self: self._parse_select(), 713 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 714 exp.Table: lambda self: self._parse_table_parts(), 715 exp.TableAlias: lambda self: self._parse_table_alias(), 716 exp.When: lambda self: seq_get(self._parse_when_matched(), 0), 717 exp.Where: lambda self: self._parse_where(), 718 exp.Window: lambda self: self._parse_named_window(), 719 exp.With: lambda self: self._parse_with(), 720 "JOIN_TYPE": lambda self: self._parse_join_parts(), 721 } 722 723 STATEMENT_PARSERS = { 724 TokenType.ALTER: lambda self: self._parse_alter(), 725 TokenType.BEGIN: lambda self: self._parse_transaction(), 726 TokenType.CACHE: lambda self: self._parse_cache(), 727 TokenType.COMMENT: lambda self: self._parse_comment(), 728 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 729 TokenType.COPY: lambda self: self._parse_copy(), 730 TokenType.CREATE: lambda self: self._parse_create(), 731 TokenType.DELETE: lambda self: self._parse_delete(), 732 TokenType.DESC: lambda self: self._parse_describe(), 733 TokenType.DESCRIBE: lambda self: self._parse_describe(), 734 TokenType.DROP: lambda self: self._parse_drop(), 735 TokenType.INSERT: lambda self: self._parse_insert(), 736 TokenType.KILL: lambda self: self._parse_kill(), 737 TokenType.LOAD: lambda self: self._parse_load(), 738 TokenType.MERGE: lambda self: self._parse_merge(), 739 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 740 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 741 TokenType.REFRESH: lambda self: self._parse_refresh(), 742 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 743 TokenType.SET: lambda self: self._parse_set(), 744 TokenType.TRUNCATE: lambda self: self._parse_truncate_table(), 745 TokenType.UNCACHE: lambda self: self._parse_uncache(), 746 TokenType.UPDATE: lambda self: self._parse_update(), 747 TokenType.USE: lambda self: self.expression( 748 exp.Use, 749 kind=self._parse_var_from_options(self.USABLES, raise_unmatched=False), 750 this=self._parse_table(schema=False), 751 ), 752 TokenType.SEMICOLON: lambda self: self.expression(exp.Semicolon), 753 } 754 755 UNARY_PARSERS = { 756 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 757 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 758 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 759 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 760 TokenType.PIPE_SLASH: lambda self: self.expression(exp.Sqrt, this=self._parse_unary()), 761 TokenType.DPIPE_SLASH: lambda self: self.expression(exp.Cbrt, this=self._parse_unary()), 762 } 763 764 STRING_PARSERS = { 765 TokenType.HEREDOC_STRING: lambda self, token: self.expression( 766 exp.RawString, this=token.text 767 ), 768 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 769 exp.National, this=token.text 770 ), 771 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 772 TokenType.STRING: lambda self, token: self.expression( 773 exp.Literal, this=token.text, is_string=True 774 ), 775 TokenType.UNICODE_STRING: lambda self, token: self.expression( 776 exp.UnicodeString, 777 this=token.text, 778 escape=self._match_text_seq("UESCAPE") and self._parse_string(), 779 ), 780 } 781 782 NUMERIC_PARSERS = { 783 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 784 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 785 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 786 TokenType.NUMBER: lambda self, token: self.expression( 787 exp.Literal, this=token.text, is_string=False 788 ), 789 } 790 791 PRIMARY_PARSERS = { 792 **STRING_PARSERS, 793 **NUMERIC_PARSERS, 794 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 795 TokenType.NULL: lambda self, _: self.expression(exp.Null), 796 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 797 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 798 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 799 TokenType.STAR: lambda self, _: self.expression( 800 exp.Star, 801 **{ 802 "except": self._parse_star_op("EXCEPT", "EXCLUDE"), 803 "replace": self._parse_star_op("REPLACE"), 804 "rename": self._parse_star_op("RENAME"), 805 }, 806 ), 807 } 808 809 PLACEHOLDER_PARSERS = { 810 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 811 TokenType.PARAMETER: lambda self: self._parse_parameter(), 812 TokenType.COLON: lambda self: ( 813 self.expression(exp.Placeholder, this=self._prev.text) 814 if self._match_set(self.ID_VAR_TOKENS) 815 else None 816 ), 817 } 818 819 RANGE_PARSERS = { 820 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 821 TokenType.GLOB: binary_range_parser(exp.Glob), 822 TokenType.ILIKE: binary_range_parser(exp.ILike), 823 TokenType.IN: lambda self, this: self._parse_in(this), 824 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 825 TokenType.IS: lambda self, this: self._parse_is(this), 826 TokenType.LIKE: binary_range_parser(exp.Like), 827 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 828 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 829 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 830 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 831 } 832 833 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 834 "ALLOWED_VALUES": lambda self: self.expression( 835 exp.AllowedValuesProperty, expressions=self._parse_csv(self._parse_primary) 836 ), 837 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 838 "AUTO": lambda self: self._parse_auto_property(), 839 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 840 "BACKUP": lambda self: self.expression( 841 exp.BackupProperty, this=self._parse_var(any_token=True) 842 ), 843 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 844 "CHARSET": lambda self, **kwargs: self._parse_character_set(**kwargs), 845 "CHARACTER SET": lambda self, **kwargs: self._parse_character_set(**kwargs), 846 "CHECKSUM": lambda self: self._parse_checksum(), 847 "CLUSTER BY": lambda self: self._parse_cluster(), 848 "CLUSTERED": lambda self: self._parse_clustered_by(), 849 "COLLATE": lambda self, **kwargs: self._parse_property_assignment( 850 exp.CollateProperty, **kwargs 851 ), 852 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 853 "CONTAINS": lambda self: self._parse_contains_property(), 854 "COPY": lambda self: self._parse_copy_property(), 855 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 856 "DATA_DELETION": lambda self: self._parse_data_deletion_property(), 857 "DEFINER": lambda self: self._parse_definer(), 858 "DETERMINISTIC": lambda self: self.expression( 859 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 860 ), 861 "DYNAMIC": lambda self: self.expression(exp.DynamicProperty), 862 "DISTKEY": lambda self: self._parse_distkey(), 863 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 864 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 865 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 866 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 867 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 868 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 869 "FREESPACE": lambda self: self._parse_freespace(), 870 "GLOBAL": lambda self: self.expression(exp.GlobalProperty), 871 "HEAP": lambda self: self.expression(exp.HeapProperty), 872 "ICEBERG": lambda self: self.expression(exp.IcebergProperty), 873 "IMMUTABLE": lambda self: self.expression( 874 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 875 ), 876 "INHERITS": lambda self: self.expression( 877 exp.InheritsProperty, expressions=self._parse_wrapped_csv(self._parse_table) 878 ), 879 "INPUT": lambda self: self.expression(exp.InputModelProperty, this=self._parse_schema()), 880 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 881 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 882 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 883 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 884 "LIKE": lambda self: self._parse_create_like(), 885 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 886 "LOCK": lambda self: self._parse_locking(), 887 "LOCKING": lambda self: self._parse_locking(), 888 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 889 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 890 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 891 "MODIFIES": lambda self: self._parse_modifies_property(), 892 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 893 "NO": lambda self: self._parse_no_property(), 894 "ON": lambda self: self._parse_on_property(), 895 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 896 "OUTPUT": lambda self: self.expression(exp.OutputModelProperty, this=self._parse_schema()), 897 "PARTITION": lambda self: self._parse_partitioned_of(), 898 "PARTITION BY": lambda self: self._parse_partitioned_by(), 899 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 900 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 901 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 902 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 903 "READS": lambda self: self._parse_reads_property(), 904 "REMOTE": lambda self: self._parse_remote_with_connection(), 905 "RETURNS": lambda self: self._parse_returns(), 906 "STRICT": lambda self: self.expression(exp.StrictProperty), 907 "ROW": lambda self: self._parse_row(), 908 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 909 "SAMPLE": lambda self: self.expression( 910 exp.SampleProperty, this=self._match_text_seq("BY") and self._parse_bitwise() 911 ), 912 "SECURE": lambda self: self.expression(exp.SecureProperty), 913 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 914 "SETTINGS": lambda self: self.expression( 915 exp.SettingsProperty, expressions=self._parse_csv(self._parse_assignment) 916 ), 917 "SHARING": lambda self: self._parse_property_assignment(exp.SharingProperty), 918 "SORTKEY": lambda self: self._parse_sortkey(), 919 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 920 "STABLE": lambda self: self.expression( 921 exp.StabilityProperty, this=exp.Literal.string("STABLE") 922 ), 923 "STORED": lambda self: self._parse_stored(), 924 "SYSTEM_VERSIONING": lambda self: self._parse_system_versioning_property(), 925 "TBLPROPERTIES": lambda self: self._parse_wrapped_properties(), 926 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 927 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 928 "TO": lambda self: self._parse_to_table(), 929 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 930 "TRANSFORM": lambda self: self.expression( 931 exp.TransformModelProperty, expressions=self._parse_wrapped_csv(self._parse_expression) 932 ), 933 "TTL": lambda self: self._parse_ttl(), 934 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 935 "UNLOGGED": lambda self: self.expression(exp.UnloggedProperty), 936 "VOLATILE": lambda self: self._parse_volatile_property(), 937 "WITH": lambda self: self._parse_with_property(), 938 } 939 940 CONSTRAINT_PARSERS = { 941 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 942 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 943 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 944 "CHARACTER SET": lambda self: self.expression( 945 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 946 ), 947 "CHECK": lambda self: self.expression( 948 exp.CheckColumnConstraint, 949 this=self._parse_wrapped(self._parse_assignment), 950 enforced=self._match_text_seq("ENFORCED"), 951 ), 952 "COLLATE": lambda self: self.expression( 953 exp.CollateColumnConstraint, 954 this=self._parse_identifier() or self._parse_column(), 955 ), 956 "COMMENT": lambda self: self.expression( 957 exp.CommentColumnConstraint, this=self._parse_string() 958 ), 959 "COMPRESS": lambda self: self._parse_compress(), 960 "CLUSTERED": lambda self: self.expression( 961 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 962 ), 963 "NONCLUSTERED": lambda self: self.expression( 964 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 965 ), 966 "DEFAULT": lambda self: self.expression( 967 exp.DefaultColumnConstraint, this=self._parse_bitwise() 968 ), 969 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 970 "EPHEMERAL": lambda self: self.expression( 971 exp.EphemeralColumnConstraint, this=self._parse_bitwise() 972 ), 973 "EXCLUDE": lambda self: self.expression( 974 exp.ExcludeColumnConstraint, this=self._parse_index_params() 975 ), 976 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 977 "FORMAT": lambda self: self.expression( 978 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 979 ), 980 "GENERATED": lambda self: self._parse_generated_as_identity(), 981 "IDENTITY": lambda self: self._parse_auto_increment(), 982 "INLINE": lambda self: self._parse_inline(), 983 "LIKE": lambda self: self._parse_create_like(), 984 "NOT": lambda self: self._parse_not_constraint(), 985 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 986 "ON": lambda self: ( 987 self._match(TokenType.UPDATE) 988 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 989 ) 990 or self.expression(exp.OnProperty, this=self._parse_id_var()), 991 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 992 "PERIOD": lambda self: self._parse_period_for_system_time(), 993 "PRIMARY KEY": lambda self: self._parse_primary_key(), 994 "REFERENCES": lambda self: self._parse_references(match=False), 995 "TITLE": lambda self: self.expression( 996 exp.TitleColumnConstraint, this=self._parse_var_or_string() 997 ), 998 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 999 "UNIQUE": lambda self: self._parse_unique(), 1000 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 1001 "WITH": lambda self: self.expression( 1002 exp.Properties, expressions=self._parse_wrapped_properties() 1003 ), 1004 } 1005 1006 ALTER_PARSERS = { 1007 "ADD": lambda self: self._parse_alter_table_add(), 1008 "ALTER": lambda self: self._parse_alter_table_alter(), 1009 "CLUSTER BY": lambda self: self._parse_cluster(wrapped=True), 1010 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 1011 "DROP": lambda self: self._parse_alter_table_drop(), 1012 "RENAME": lambda self: self._parse_alter_table_rename(), 1013 "SET": lambda self: self._parse_alter_table_set(), 1014 } 1015 1016 ALTER_ALTER_PARSERS = { 1017 "DISTKEY": lambda self: self._parse_alter_diststyle(), 1018 "DISTSTYLE": lambda self: self._parse_alter_diststyle(), 1019 "SORTKEY": lambda self: self._parse_alter_sortkey(), 1020 "COMPOUND": lambda self: self._parse_alter_sortkey(compound=True), 1021 } 1022 1023 SCHEMA_UNNAMED_CONSTRAINTS = { 1024 "CHECK", 1025 "EXCLUDE", 1026 "FOREIGN KEY", 1027 "LIKE", 1028 "PERIOD", 1029 "PRIMARY KEY", 1030 "UNIQUE", 1031 } 1032 1033 NO_PAREN_FUNCTION_PARSERS = { 1034 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 1035 "CASE": lambda self: self._parse_case(), 1036 "CONNECT_BY_ROOT": lambda self: self.expression( 1037 exp.ConnectByRoot, this=self._parse_column() 1038 ), 1039 "IF": lambda self: self._parse_if(), 1040 "NEXT": lambda self: self._parse_next_value_for(), 1041 } 1042 1043 INVALID_FUNC_NAME_TOKENS = { 1044 TokenType.IDENTIFIER, 1045 TokenType.STRING, 1046 } 1047 1048 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 1049 1050 KEY_VALUE_DEFINITIONS = (exp.Alias, exp.EQ, exp.PropertyEQ, exp.Slice) 1051 1052 FUNCTION_PARSERS = { 1053 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 1054 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 1055 "DECODE": lambda self: self._parse_decode(), 1056 "EXTRACT": lambda self: self._parse_extract(), 1057 "GAP_FILL": lambda self: self._parse_gap_fill(), 1058 "JSON_OBJECT": lambda self: self._parse_json_object(), 1059 "JSON_OBJECTAGG": lambda self: self._parse_json_object(agg=True), 1060 "JSON_TABLE": lambda self: self._parse_json_table(), 1061 "MATCH": lambda self: self._parse_match_against(), 1062 "OPENJSON": lambda self: self._parse_open_json(), 1063 "POSITION": lambda self: self._parse_position(), 1064 "PREDICT": lambda self: self._parse_predict(), 1065 "SAFE_CAST": lambda self: self._parse_cast(False, safe=True), 1066 "STRING_AGG": lambda self: self._parse_string_agg(), 1067 "SUBSTRING": lambda self: self._parse_substring(), 1068 "TRIM": lambda self: self._parse_trim(), 1069 "TRY_CAST": lambda self: self._parse_cast(False, safe=True), 1070 "TRY_CONVERT": lambda self: self._parse_convert(False, safe=True), 1071 } 1072 1073 QUERY_MODIFIER_PARSERS = { 1074 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 1075 TokenType.PREWHERE: lambda self: ("prewhere", self._parse_prewhere()), 1076 TokenType.WHERE: lambda self: ("where", self._parse_where()), 1077 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 1078 TokenType.HAVING: lambda self: ("having", self._parse_having()), 1079 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 1080 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 1081 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 1082 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 1083 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 1084 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 1085 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 1086 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 1087 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1088 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1089 TokenType.CLUSTER_BY: lambda self: ( 1090 "cluster", 1091 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 1092 ), 1093 TokenType.DISTRIBUTE_BY: lambda self: ( 1094 "distribute", 1095 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 1096 ), 1097 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 1098 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 1099 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 1100 } 1101 1102 SET_PARSERS = { 1103 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 1104 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 1105 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 1106 "TRANSACTION": lambda self: self._parse_set_transaction(), 1107 } 1108 1109 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 1110 1111 TYPE_LITERAL_PARSERS = { 1112 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 1113 } 1114 1115 TYPE_CONVERTERS: t.Dict[exp.DataType.Type, t.Callable[[exp.DataType], exp.DataType]] = {} 1116 1117 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 1118 1119 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 1120 1121 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 1122 TRANSACTION_CHARACTERISTICS: OPTIONS_TYPE = { 1123 "ISOLATION": ( 1124 ("LEVEL", "REPEATABLE", "READ"), 1125 ("LEVEL", "READ", "COMMITTED"), 1126 ("LEVEL", "READ", "UNCOMITTED"), 1127 ("LEVEL", "SERIALIZABLE"), 1128 ), 1129 "READ": ("WRITE", "ONLY"), 1130 } 1131 1132 CONFLICT_ACTIONS: OPTIONS_TYPE = dict.fromkeys( 1133 ("ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK", "UPDATE"), tuple() 1134 ) 1135 CONFLICT_ACTIONS["DO"] = ("NOTHING", "UPDATE") 1136 1137 CREATE_SEQUENCE: OPTIONS_TYPE = { 1138 "SCALE": ("EXTEND", "NOEXTEND"), 1139 "SHARD": ("EXTEND", "NOEXTEND"), 1140 "NO": ("CYCLE", "CACHE", "MAXVALUE", "MINVALUE"), 1141 **dict.fromkeys( 1142 ( 1143 "SESSION", 1144 "GLOBAL", 1145 "KEEP", 1146 "NOKEEP", 1147 "ORDER", 1148 "NOORDER", 1149 "NOCACHE", 1150 "CYCLE", 1151 "NOCYCLE", 1152 "NOMINVALUE", 1153 "NOMAXVALUE", 1154 "NOSCALE", 1155 "NOSHARD", 1156 ), 1157 tuple(), 1158 ), 1159 } 1160 1161 ISOLATED_LOADING_OPTIONS: OPTIONS_TYPE = {"FOR": ("ALL", "INSERT", "NONE")} 1162 1163 USABLES: OPTIONS_TYPE = dict.fromkeys( 1164 ("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA", "CATALOG"), tuple() 1165 ) 1166 1167 CAST_ACTIONS: OPTIONS_TYPE = dict.fromkeys(("RENAME", "ADD"), ("FIELDS",)) 1168 1169 SCHEMA_BINDING_OPTIONS: OPTIONS_TYPE = { 1170 "TYPE": ("EVOLUTION",), 1171 **dict.fromkeys(("BINDING", "COMPENSATION", "EVOLUTION"), tuple()), 1172 } 1173 1174 KEY_CONSTRAINT_OPTIONS: OPTIONS_TYPE = { 1175 "NOT": ("ENFORCED",), 1176 "MATCH": ( 1177 "FULL", 1178 "PARTIAL", 1179 "SIMPLE", 1180 ), 1181 "INITIALLY": ("DEFERRED", "IMMEDIATE"), 1182 **dict.fromkeys(("DEFERRABLE", "NORELY"), tuple()), 1183 } 1184 1185 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 1186 1187 CLONE_KEYWORDS = {"CLONE", "COPY"} 1188 HISTORICAL_DATA_PREFIX = {"AT", "BEFORE", "END"} 1189 HISTORICAL_DATA_KIND = {"TIMESTAMP", "OFFSET", "STATEMENT", "STREAM"} 1190 1191 OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS", "WITH"} 1192 1193 OPTYPE_FOLLOW_TOKENS = {TokenType.COMMA, TokenType.R_PAREN} 1194 1195 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 1196 1197 VIEW_ATTRIBUTES = {"ENCRYPTION", "SCHEMABINDING", "VIEW_METADATA"} 1198 1199 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 1200 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 1201 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 1202 1203 JSON_KEY_VALUE_SEPARATOR_TOKENS = {TokenType.COLON, TokenType.COMMA, TokenType.IS} 1204 1205 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 1206 1207 ADD_CONSTRAINT_TOKENS = { 1208 TokenType.CONSTRAINT, 1209 TokenType.FOREIGN_KEY, 1210 TokenType.INDEX, 1211 TokenType.KEY, 1212 TokenType.PRIMARY_KEY, 1213 TokenType.UNIQUE, 1214 } 1215 1216 DISTINCT_TOKENS = {TokenType.DISTINCT} 1217 1218 NULL_TOKENS = {TokenType.NULL} 1219 1220 UNNEST_OFFSET_ALIAS_TOKENS = ID_VAR_TOKENS - SET_OPERATIONS 1221 1222 SELECT_START_TOKENS = {TokenType.L_PAREN, TokenType.WITH, TokenType.SELECT} 1223 1224 COPY_INTO_VARLEN_OPTIONS = {"FILE_FORMAT", "COPY_OPTIONS", "FORMAT_OPTIONS", "CREDENTIAL"} 1225 1226 STRICT_CAST = True 1227 1228 PREFIXED_PIVOT_COLUMNS = False 1229 IDENTIFY_PIVOT_STRINGS = False 1230 1231 LOG_DEFAULTS_TO_LN = False 1232 1233 # Whether ADD is present for each column added by ALTER TABLE 1234 ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN = True 1235 1236 # Whether the table sample clause expects CSV syntax 1237 TABLESAMPLE_CSV = False 1238 1239 # The default method used for table sampling 1240 DEFAULT_SAMPLING_METHOD: t.Optional[str] = None 1241 1242 # Whether the SET command needs a delimiter (e.g. "=") for assignments 1243 SET_REQUIRES_ASSIGNMENT_DELIMITER = True 1244 1245 # Whether the TRIM function expects the characters to trim as its first argument 1246 TRIM_PATTERN_FIRST = False 1247 1248 # Whether string aliases are supported `SELECT COUNT(*) 'count'` 1249 STRING_ALIASES = False 1250 1251 # Whether query modifiers such as LIMIT are attached to the UNION node (vs its right operand) 1252 MODIFIERS_ATTACHED_TO_SET_OP = True 1253 SET_OP_MODIFIERS = {"order", "limit", "offset"} 1254 1255 # Whether to parse IF statements that aren't followed by a left parenthesis as commands 1256 NO_PAREN_IF_COMMANDS = True 1257 1258 # Whether the -> and ->> operators expect documents of type JSON (e.g. Postgres) 1259 JSON_ARROWS_REQUIRE_JSON_TYPE = False 1260 1261 # Whether the `:` operator is used to extract a value from a VARIANT column 1262 COLON_IS_VARIANT_EXTRACT = False 1263 1264 # Whether or not a VALUES keyword needs to be followed by '(' to form a VALUES clause. 1265 # If this is True and '(' is not found, the keyword will be treated as an identifier 1266 VALUES_FOLLOWED_BY_PAREN = True 1267 1268 # Whether implicit unnesting is supported, e.g. SELECT 1 FROM y.z AS z, z.a (Redshift) 1269 SUPPORTS_IMPLICIT_UNNEST = False 1270 1271 # Whether or not interval spans are supported, INTERVAL 1 YEAR TO MONTHS 1272 INTERVAL_SPANS = True 1273 1274 # Whether a PARTITION clause can follow a table reference 1275 SUPPORTS_PARTITION_SELECTION = False 1276 1277 __slots__ = ( 1278 "error_level", 1279 "error_message_context", 1280 "max_errors", 1281 "dialect", 1282 "sql", 1283 "errors", 1284 "_tokens", 1285 "_index", 1286 "_curr", 1287 "_next", 1288 "_prev", 1289 "_prev_comments", 1290 ) 1291 1292 # Autofilled 1293 SHOW_TRIE: t.Dict = {} 1294 SET_TRIE: t.Dict = {} 1295 1296 def __init__( 1297 self, 1298 error_level: t.Optional[ErrorLevel] = None, 1299 error_message_context: int = 100, 1300 max_errors: int = 3, 1301 dialect: DialectType = None, 1302 ): 1303 from sqlglot.dialects import Dialect 1304 1305 self.error_level = error_level or ErrorLevel.IMMEDIATE 1306 self.error_message_context = error_message_context 1307 self.max_errors = max_errors 1308 self.dialect = Dialect.get_or_raise(dialect) 1309 self.reset() 1310 1311 def reset(self): 1312 self.sql = "" 1313 self.errors = [] 1314 self._tokens = [] 1315 self._index = 0 1316 self._curr = None 1317 self._next = None 1318 self._prev = None 1319 self._prev_comments = None 1320 1321 def parse( 1322 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1323 ) -> t.List[t.Optional[exp.Expression]]: 1324 """ 1325 Parses a list of tokens and returns a list of syntax trees, one tree 1326 per parsed SQL statement. 1327 1328 Args: 1329 raw_tokens: The list of tokens. 1330 sql: The original SQL string, used to produce helpful debug messages. 1331 1332 Returns: 1333 The list of the produced syntax trees. 1334 """ 1335 return self._parse( 1336 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1337 ) 1338 1339 def parse_into( 1340 self, 1341 expression_types: exp.IntoType, 1342 raw_tokens: t.List[Token], 1343 sql: t.Optional[str] = None, 1344 ) -> t.List[t.Optional[exp.Expression]]: 1345 """ 1346 Parses a list of tokens into a given Expression type. If a collection of Expression 1347 types is given instead, this method will try to parse the token list into each one 1348 of them, stopping at the first for which the parsing succeeds. 1349 1350 Args: 1351 expression_types: The expression type(s) to try and parse the token list into. 1352 raw_tokens: The list of tokens. 1353 sql: The original SQL string, used to produce helpful debug messages. 1354 1355 Returns: 1356 The target Expression. 1357 """ 1358 errors = [] 1359 for expression_type in ensure_list(expression_types): 1360 parser = self.EXPRESSION_PARSERS.get(expression_type) 1361 if not parser: 1362 raise TypeError(f"No parser registered for {expression_type}") 1363 1364 try: 1365 return self._parse(parser, raw_tokens, sql) 1366 except ParseError as e: 1367 e.errors[0]["into_expression"] = expression_type 1368 errors.append(e) 1369 1370 raise ParseError( 1371 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1372 errors=merge_errors(errors), 1373 ) from errors[-1] 1374 1375 def _parse( 1376 self, 1377 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 1378 raw_tokens: t.List[Token], 1379 sql: t.Optional[str] = None, 1380 ) -> t.List[t.Optional[exp.Expression]]: 1381 self.reset() 1382 self.sql = sql or "" 1383 1384 total = len(raw_tokens) 1385 chunks: t.List[t.List[Token]] = [[]] 1386 1387 for i, token in enumerate(raw_tokens): 1388 if token.token_type == TokenType.SEMICOLON: 1389 if token.comments: 1390 chunks.append([token]) 1391 1392 if i < total - 1: 1393 chunks.append([]) 1394 else: 1395 chunks[-1].append(token) 1396 1397 expressions = [] 1398 1399 for tokens in chunks: 1400 self._index = -1 1401 self._tokens = tokens 1402 self._advance() 1403 1404 expressions.append(parse_method(self)) 1405 1406 if self._index < len(self._tokens): 1407 self.raise_error("Invalid expression / Unexpected token") 1408 1409 self.check_errors() 1410 1411 return expressions 1412 1413 def check_errors(self) -> None: 1414 """Logs or raises any found errors, depending on the chosen error level setting.""" 1415 if self.error_level == ErrorLevel.WARN: 1416 for error in self.errors: 1417 logger.error(str(error)) 1418 elif self.error_level == ErrorLevel.RAISE and self.errors: 1419 raise ParseError( 1420 concat_messages(self.errors, self.max_errors), 1421 errors=merge_errors(self.errors), 1422 ) 1423 1424 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1425 """ 1426 Appends an error in the list of recorded errors or raises it, depending on the chosen 1427 error level setting. 1428 """ 1429 token = token or self._curr or self._prev or Token.string("") 1430 start = token.start 1431 end = token.end + 1 1432 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1433 highlight = self.sql[start:end] 1434 end_context = self.sql[end : end + self.error_message_context] 1435 1436 error = ParseError.new( 1437 f"{message}. Line {token.line}, Col: {token.col}.\n" 1438 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1439 description=message, 1440 line=token.line, 1441 col=token.col, 1442 start_context=start_context, 1443 highlight=highlight, 1444 end_context=end_context, 1445 ) 1446 1447 if self.error_level == ErrorLevel.IMMEDIATE: 1448 raise error 1449 1450 self.errors.append(error) 1451 1452 def expression( 1453 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1454 ) -> E: 1455 """ 1456 Creates a new, validated Expression. 1457 1458 Args: 1459 exp_class: The expression class to instantiate. 1460 comments: An optional list of comments to attach to the expression. 1461 kwargs: The arguments to set for the expression along with their respective values. 1462 1463 Returns: 1464 The target expression. 1465 """ 1466 instance = exp_class(**kwargs) 1467 instance.add_comments(comments) if comments else self._add_comments(instance) 1468 return self.validate_expression(instance) 1469 1470 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1471 if expression and self._prev_comments: 1472 expression.add_comments(self._prev_comments) 1473 self._prev_comments = None 1474 1475 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1476 """ 1477 Validates an Expression, making sure that all its mandatory arguments are set. 1478 1479 Args: 1480 expression: The expression to validate. 1481 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1482 1483 Returns: 1484 The validated expression. 1485 """ 1486 if self.error_level != ErrorLevel.IGNORE: 1487 for error_message in expression.error_messages(args): 1488 self.raise_error(error_message) 1489 1490 return expression 1491 1492 def _find_sql(self, start: Token, end: Token) -> str: 1493 return self.sql[start.start : end.end + 1] 1494 1495 def _is_connected(self) -> bool: 1496 return self._prev and self._curr and self._prev.end + 1 == self._curr.start 1497 1498 def _advance(self, times: int = 1) -> None: 1499 self._index += times 1500 self._curr = seq_get(self._tokens, self._index) 1501 self._next = seq_get(self._tokens, self._index + 1) 1502 1503 if self._index > 0: 1504 self._prev = self._tokens[self._index - 1] 1505 self._prev_comments = self._prev.comments 1506 else: 1507 self._prev = None 1508 self._prev_comments = None 1509 1510 def _retreat(self, index: int) -> None: 1511 if index != self._index: 1512 self._advance(index - self._index) 1513 1514 def _warn_unsupported(self) -> None: 1515 if len(self._tokens) <= 1: 1516 return 1517 1518 # We use _find_sql because self.sql may comprise multiple chunks, and we're only 1519 # interested in emitting a warning for the one being currently processed. 1520 sql = self._find_sql(self._tokens[0], self._tokens[-1])[: self.error_message_context] 1521 1522 logger.warning( 1523 f"'{sql}' contains unsupported syntax. Falling back to parsing as a 'Command'." 1524 ) 1525 1526 def _parse_command(self) -> exp.Command: 1527 self._warn_unsupported() 1528 return self.expression( 1529 exp.Command, 1530 comments=self._prev_comments, 1531 this=self._prev.text.upper(), 1532 expression=self._parse_string(), 1533 ) 1534 1535 def _try_parse(self, parse_method: t.Callable[[], T], retreat: bool = False) -> t.Optional[T]: 1536 """ 1537 Attemps to backtrack if a parse function that contains a try/catch internally raises an error. 1538 This behavior can be different depending on the uset-set ErrorLevel, so _try_parse aims to 1539 solve this by setting & resetting the parser state accordingly 1540 """ 1541 index = self._index 1542 error_level = self.error_level 1543 1544 self.error_level = ErrorLevel.IMMEDIATE 1545 try: 1546 this = parse_method() 1547 except ParseError: 1548 this = None 1549 finally: 1550 if not this or retreat: 1551 self._retreat(index) 1552 self.error_level = error_level 1553 1554 return this 1555 1556 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1557 start = self._prev 1558 exists = self._parse_exists() if allow_exists else None 1559 1560 self._match(TokenType.ON) 1561 1562 materialized = self._match_text_seq("MATERIALIZED") 1563 kind = self._match_set(self.CREATABLES) and self._prev 1564 if not kind: 1565 return self._parse_as_command(start) 1566 1567 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1568 this = self._parse_user_defined_function(kind=kind.token_type) 1569 elif kind.token_type == TokenType.TABLE: 1570 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1571 elif kind.token_type == TokenType.COLUMN: 1572 this = self._parse_column() 1573 else: 1574 this = self._parse_id_var() 1575 1576 self._match(TokenType.IS) 1577 1578 return self.expression( 1579 exp.Comment, 1580 this=this, 1581 kind=kind.text, 1582 expression=self._parse_string(), 1583 exists=exists, 1584 materialized=materialized, 1585 ) 1586 1587 def _parse_to_table( 1588 self, 1589 ) -> exp.ToTableProperty: 1590 table = self._parse_table_parts(schema=True) 1591 return self.expression(exp.ToTableProperty, this=table) 1592 1593 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1594 def _parse_ttl(self) -> exp.Expression: 1595 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1596 this = self._parse_bitwise() 1597 1598 if self._match_text_seq("DELETE"): 1599 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1600 if self._match_text_seq("RECOMPRESS"): 1601 return self.expression( 1602 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1603 ) 1604 if self._match_text_seq("TO", "DISK"): 1605 return self.expression( 1606 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1607 ) 1608 if self._match_text_seq("TO", "VOLUME"): 1609 return self.expression( 1610 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1611 ) 1612 1613 return this 1614 1615 expressions = self._parse_csv(_parse_ttl_action) 1616 where = self._parse_where() 1617 group = self._parse_group() 1618 1619 aggregates = None 1620 if group and self._match(TokenType.SET): 1621 aggregates = self._parse_csv(self._parse_set_item) 1622 1623 return self.expression( 1624 exp.MergeTreeTTL, 1625 expressions=expressions, 1626 where=where, 1627 group=group, 1628 aggregates=aggregates, 1629 ) 1630 1631 def _parse_statement(self) -> t.Optional[exp.Expression]: 1632 if self._curr is None: 1633 return None 1634 1635 if self._match_set(self.STATEMENT_PARSERS): 1636 return self.STATEMENT_PARSERS[self._prev.token_type](self) 1637 1638 if self._match_set(self.dialect.tokenizer.COMMANDS): 1639 return self._parse_command() 1640 1641 expression = self._parse_expression() 1642 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1643 return self._parse_query_modifiers(expression) 1644 1645 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1646 start = self._prev 1647 temporary = self._match(TokenType.TEMPORARY) 1648 materialized = self._match_text_seq("MATERIALIZED") 1649 1650 kind = self._match_set(self.CREATABLES) and self._prev.text 1651 if not kind: 1652 return self._parse_as_command(start) 1653 1654 if_exists = exists or self._parse_exists() 1655 table = self._parse_table_parts( 1656 schema=True, is_db_reference=self._prev.token_type == TokenType.SCHEMA 1657 ) 1658 1659 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 1660 1661 if self._match(TokenType.L_PAREN, advance=False): 1662 expressions = self._parse_wrapped_csv(self._parse_types) 1663 else: 1664 expressions = None 1665 1666 return self.expression( 1667 exp.Drop, 1668 comments=start.comments, 1669 exists=if_exists, 1670 this=table, 1671 expressions=expressions, 1672 kind=kind.upper(), 1673 temporary=temporary, 1674 materialized=materialized, 1675 cascade=self._match_text_seq("CASCADE"), 1676 constraints=self._match_text_seq("CONSTRAINTS"), 1677 purge=self._match_text_seq("PURGE"), 1678 cluster=cluster, 1679 ) 1680 1681 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1682 return ( 1683 self._match_text_seq("IF") 1684 and (not not_ or self._match(TokenType.NOT)) 1685 and self._match(TokenType.EXISTS) 1686 ) 1687 1688 def _parse_create(self) -> exp.Create | exp.Command: 1689 # Note: this can't be None because we've matched a statement parser 1690 start = self._prev 1691 comments = self._prev_comments 1692 1693 replace = ( 1694 start.token_type == TokenType.REPLACE 1695 or self._match_pair(TokenType.OR, TokenType.REPLACE) 1696 or self._match_pair(TokenType.OR, TokenType.ALTER) 1697 ) 1698 1699 unique = self._match(TokenType.UNIQUE) 1700 1701 if self._match_text_seq("CLUSTERED", "COLUMNSTORE"): 1702 clustered = True 1703 elif self._match_text_seq("NONCLUSTERED", "COLUMNSTORE") or self._match_text_seq( 1704 "COLUMNSTORE" 1705 ): 1706 clustered = False 1707 else: 1708 clustered = None 1709 1710 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1711 self._advance() 1712 1713 properties = None 1714 create_token = self._match_set(self.CREATABLES) and self._prev 1715 1716 if not create_token: 1717 # exp.Properties.Location.POST_CREATE 1718 properties = self._parse_properties() 1719 create_token = self._match_set(self.CREATABLES) and self._prev 1720 1721 if not properties or not create_token: 1722 return self._parse_as_command(start) 1723 1724 concurrently = self._match_text_seq("CONCURRENTLY") 1725 exists = self._parse_exists(not_=True) 1726 this = None 1727 expression: t.Optional[exp.Expression] = None 1728 indexes = None 1729 no_schema_binding = None 1730 begin = None 1731 end = None 1732 clone = None 1733 1734 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1735 nonlocal properties 1736 if properties and temp_props: 1737 properties.expressions.extend(temp_props.expressions) 1738 elif temp_props: 1739 properties = temp_props 1740 1741 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1742 this = self._parse_user_defined_function(kind=create_token.token_type) 1743 1744 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1745 extend_props(self._parse_properties()) 1746 1747 expression = self._match(TokenType.ALIAS) and self._parse_heredoc() 1748 extend_props(self._parse_properties()) 1749 1750 if not expression: 1751 if self._match(TokenType.COMMAND): 1752 expression = self._parse_as_command(self._prev) 1753 else: 1754 begin = self._match(TokenType.BEGIN) 1755 return_ = self._match_text_seq("RETURN") 1756 1757 if self._match(TokenType.STRING, advance=False): 1758 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 1759 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 1760 expression = self._parse_string() 1761 extend_props(self._parse_properties()) 1762 else: 1763 expression = self._parse_statement() 1764 1765 end = self._match_text_seq("END") 1766 1767 if return_: 1768 expression = self.expression(exp.Return, this=expression) 1769 elif create_token.token_type == TokenType.INDEX: 1770 # Postgres allows anonymous indexes, eg. CREATE INDEX IF NOT EXISTS ON t(c) 1771 if not self._match(TokenType.ON): 1772 index = self._parse_id_var() 1773 anonymous = False 1774 else: 1775 index = None 1776 anonymous = True 1777 1778 this = self._parse_index(index=index, anonymous=anonymous) 1779 elif create_token.token_type in self.DB_CREATABLES: 1780 table_parts = self._parse_table_parts( 1781 schema=True, is_db_reference=create_token.token_type == TokenType.SCHEMA 1782 ) 1783 1784 # exp.Properties.Location.POST_NAME 1785 self._match(TokenType.COMMA) 1786 extend_props(self._parse_properties(before=True)) 1787 1788 this = self._parse_schema(this=table_parts) 1789 1790 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1791 extend_props(self._parse_properties()) 1792 1793 self._match(TokenType.ALIAS) 1794 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 1795 # exp.Properties.Location.POST_ALIAS 1796 extend_props(self._parse_properties()) 1797 1798 if create_token.token_type == TokenType.SEQUENCE: 1799 expression = self._parse_types() 1800 extend_props(self._parse_properties()) 1801 else: 1802 expression = self._parse_ddl_select() 1803 1804 if create_token.token_type == TokenType.TABLE: 1805 # exp.Properties.Location.POST_EXPRESSION 1806 extend_props(self._parse_properties()) 1807 1808 indexes = [] 1809 while True: 1810 index = self._parse_index() 1811 1812 # exp.Properties.Location.POST_INDEX 1813 extend_props(self._parse_properties()) 1814 if not index: 1815 break 1816 else: 1817 self._match(TokenType.COMMA) 1818 indexes.append(index) 1819 elif create_token.token_type == TokenType.VIEW: 1820 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1821 no_schema_binding = True 1822 1823 shallow = self._match_text_seq("SHALLOW") 1824 1825 if self._match_texts(self.CLONE_KEYWORDS): 1826 copy = self._prev.text.lower() == "copy" 1827 clone = self.expression( 1828 exp.Clone, this=self._parse_table(schema=True), shallow=shallow, copy=copy 1829 ) 1830 1831 if self._curr and not self._match_set((TokenType.R_PAREN, TokenType.COMMA), advance=False): 1832 return self._parse_as_command(start) 1833 1834 return self.expression( 1835 exp.Create, 1836 comments=comments, 1837 this=this, 1838 kind=create_token.text.upper(), 1839 replace=replace, 1840 unique=unique, 1841 expression=expression, 1842 exists=exists, 1843 properties=properties, 1844 indexes=indexes, 1845 no_schema_binding=no_schema_binding, 1846 begin=begin, 1847 end=end, 1848 clone=clone, 1849 concurrently=concurrently, 1850 clustered=clustered, 1851 ) 1852 1853 def _parse_sequence_properties(self) -> t.Optional[exp.SequenceProperties]: 1854 seq = exp.SequenceProperties() 1855 1856 options = [] 1857 index = self._index 1858 1859 while self._curr: 1860 self._match(TokenType.COMMA) 1861 if self._match_text_seq("INCREMENT"): 1862 self._match_text_seq("BY") 1863 self._match_text_seq("=") 1864 seq.set("increment", self._parse_term()) 1865 elif self._match_text_seq("MINVALUE"): 1866 seq.set("minvalue", self._parse_term()) 1867 elif self._match_text_seq("MAXVALUE"): 1868 seq.set("maxvalue", self._parse_term()) 1869 elif self._match(TokenType.START_WITH) or self._match_text_seq("START"): 1870 self._match_text_seq("=") 1871 seq.set("start", self._parse_term()) 1872 elif self._match_text_seq("CACHE"): 1873 # T-SQL allows empty CACHE which is initialized dynamically 1874 seq.set("cache", self._parse_number() or True) 1875 elif self._match_text_seq("OWNED", "BY"): 1876 # "OWNED BY NONE" is the default 1877 seq.set("owned", None if self._match_text_seq("NONE") else self._parse_column()) 1878 else: 1879 opt = self._parse_var_from_options(self.CREATE_SEQUENCE, raise_unmatched=False) 1880 if opt: 1881 options.append(opt) 1882 else: 1883 break 1884 1885 seq.set("options", options if options else None) 1886 return None if self._index == index else seq 1887 1888 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1889 # only used for teradata currently 1890 self._match(TokenType.COMMA) 1891 1892 kwargs = { 1893 "no": self._match_text_seq("NO"), 1894 "dual": self._match_text_seq("DUAL"), 1895 "before": self._match_text_seq("BEFORE"), 1896 "default": self._match_text_seq("DEFAULT"), 1897 "local": (self._match_text_seq("LOCAL") and "LOCAL") 1898 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 1899 "after": self._match_text_seq("AFTER"), 1900 "minimum": self._match_texts(("MIN", "MINIMUM")), 1901 "maximum": self._match_texts(("MAX", "MAXIMUM")), 1902 } 1903 1904 if self._match_texts(self.PROPERTY_PARSERS): 1905 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 1906 try: 1907 return parser(self, **{k: v for k, v in kwargs.items() if v}) 1908 except TypeError: 1909 self.raise_error(f"Cannot parse property '{self._prev.text}'") 1910 1911 return None 1912 1913 def _parse_wrapped_properties(self) -> t.List[exp.Expression]: 1914 return self._parse_wrapped_csv(self._parse_property) 1915 1916 def _parse_property(self) -> t.Optional[exp.Expression]: 1917 if self._match_texts(self.PROPERTY_PARSERS): 1918 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 1919 1920 if self._match(TokenType.DEFAULT) and self._match_texts(self.PROPERTY_PARSERS): 1921 return self.PROPERTY_PARSERS[self._prev.text.upper()](self, default=True) 1922 1923 if self._match_text_seq("COMPOUND", "SORTKEY"): 1924 return self._parse_sortkey(compound=True) 1925 1926 if self._match_text_seq("SQL", "SECURITY"): 1927 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 1928 1929 index = self._index 1930 key = self._parse_column() 1931 1932 if not self._match(TokenType.EQ): 1933 self._retreat(index) 1934 return self._parse_sequence_properties() 1935 1936 # Transform the key to exp.Dot if it's dotted identifiers wrapped in exp.Column or to exp.Var otherwise 1937 if isinstance(key, exp.Column): 1938 key = key.to_dot() if len(key.parts) > 1 else exp.var(key.name) 1939 1940 value = self._parse_bitwise() or self._parse_var(any_token=True) 1941 1942 # Transform the value to exp.Var if it was parsed as exp.Column(exp.Identifier()) 1943 if isinstance(value, exp.Column): 1944 value = exp.var(value.name) 1945 1946 return self.expression(exp.Property, this=key, value=value) 1947 1948 def _parse_stored(self) -> exp.FileFormatProperty: 1949 self._match(TokenType.ALIAS) 1950 1951 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 1952 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 1953 1954 return self.expression( 1955 exp.FileFormatProperty, 1956 this=( 1957 self.expression( 1958 exp.InputOutputFormat, input_format=input_format, output_format=output_format 1959 ) 1960 if input_format or output_format 1961 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var() 1962 ), 1963 ) 1964 1965 def _parse_unquoted_field(self) -> t.Optional[exp.Expression]: 1966 field = self._parse_field() 1967 if isinstance(field, exp.Identifier) and not field.quoted: 1968 field = exp.var(field) 1969 1970 return field 1971 1972 def _parse_property_assignment(self, exp_class: t.Type[E], **kwargs: t.Any) -> E: 1973 self._match(TokenType.EQ) 1974 self._match(TokenType.ALIAS) 1975 1976 return self.expression(exp_class, this=self._parse_unquoted_field(), **kwargs) 1977 1978 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 1979 properties = [] 1980 while True: 1981 if before: 1982 prop = self._parse_property_before() 1983 else: 1984 prop = self._parse_property() 1985 if not prop: 1986 break 1987 for p in ensure_list(prop): 1988 properties.append(p) 1989 1990 if properties: 1991 return self.expression(exp.Properties, expressions=properties) 1992 1993 return None 1994 1995 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 1996 return self.expression( 1997 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 1998 ) 1999 2000 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 2001 if self._index >= 2: 2002 pre_volatile_token = self._tokens[self._index - 2] 2003 else: 2004 pre_volatile_token = None 2005 2006 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 2007 return exp.VolatileProperty() 2008 2009 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 2010 2011 def _parse_retention_period(self) -> exp.Var: 2012 # Parse TSQL's HISTORY_RETENTION_PERIOD: {INFINITE | <number> DAY | DAYS | MONTH ...} 2013 number = self._parse_number() 2014 number_str = f"{number} " if number else "" 2015 unit = self._parse_var(any_token=True) 2016 return exp.var(f"{number_str}{unit}") 2017 2018 def _parse_system_versioning_property( 2019 self, with_: bool = False 2020 ) -> exp.WithSystemVersioningProperty: 2021 self._match(TokenType.EQ) 2022 prop = self.expression( 2023 exp.WithSystemVersioningProperty, 2024 **{ # type: ignore 2025 "on": True, 2026 "with": with_, 2027 }, 2028 ) 2029 2030 if self._match_text_seq("OFF"): 2031 prop.set("on", False) 2032 return prop 2033 2034 self._match(TokenType.ON) 2035 if self._match(TokenType.L_PAREN): 2036 while self._curr and not self._match(TokenType.R_PAREN): 2037 if self._match_text_seq("HISTORY_TABLE", "="): 2038 prop.set("this", self._parse_table_parts()) 2039 elif self._match_text_seq("DATA_CONSISTENCY_CHECK", "="): 2040 prop.set("data_consistency", self._advance_any() and self._prev.text.upper()) 2041 elif self._match_text_seq("HISTORY_RETENTION_PERIOD", "="): 2042 prop.set("retention_period", self._parse_retention_period()) 2043 2044 self._match(TokenType.COMMA) 2045 2046 return prop 2047 2048 def _parse_data_deletion_property(self) -> exp.DataDeletionProperty: 2049 self._match(TokenType.EQ) 2050 on = self._match_text_seq("ON") or not self._match_text_seq("OFF") 2051 prop = self.expression(exp.DataDeletionProperty, on=on) 2052 2053 if self._match(TokenType.L_PAREN): 2054 while self._curr and not self._match(TokenType.R_PAREN): 2055 if self._match_text_seq("FILTER_COLUMN", "="): 2056 prop.set("filter_column", self._parse_column()) 2057 elif self._match_text_seq("RETENTION_PERIOD", "="): 2058 prop.set("retention_period", self._parse_retention_period()) 2059 2060 self._match(TokenType.COMMA) 2061 2062 return prop 2063 2064 def _parse_with_property(self) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 2065 if self._match_text_seq("(", "SYSTEM_VERSIONING"): 2066 prop = self._parse_system_versioning_property(with_=True) 2067 self._match_r_paren() 2068 return prop 2069 2070 if self._match(TokenType.L_PAREN, advance=False): 2071 return self._parse_wrapped_properties() 2072 2073 if self._match_text_seq("JOURNAL"): 2074 return self._parse_withjournaltable() 2075 2076 if self._match_texts(self.VIEW_ATTRIBUTES): 2077 return self.expression(exp.ViewAttributeProperty, this=self._prev.text.upper()) 2078 2079 if self._match_text_seq("DATA"): 2080 return self._parse_withdata(no=False) 2081 elif self._match_text_seq("NO", "DATA"): 2082 return self._parse_withdata(no=True) 2083 2084 if self._match(TokenType.SERDE_PROPERTIES, advance=False): 2085 return self._parse_serde_properties(with_=True) 2086 2087 if self._match(TokenType.SCHEMA): 2088 return self.expression( 2089 exp.WithSchemaBindingProperty, 2090 this=self._parse_var_from_options(self.SCHEMA_BINDING_OPTIONS), 2091 ) 2092 2093 if not self._next: 2094 return None 2095 2096 return self._parse_withisolatedloading() 2097 2098 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 2099 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 2100 self._match(TokenType.EQ) 2101 2102 user = self._parse_id_var() 2103 self._match(TokenType.PARAMETER) 2104 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 2105 2106 if not user or not host: 2107 return None 2108 2109 return exp.DefinerProperty(this=f"{user}@{host}") 2110 2111 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 2112 self._match(TokenType.TABLE) 2113 self._match(TokenType.EQ) 2114 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 2115 2116 def _parse_log(self, no: bool = False) -> exp.LogProperty: 2117 return self.expression(exp.LogProperty, no=no) 2118 2119 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 2120 return self.expression(exp.JournalProperty, **kwargs) 2121 2122 def _parse_checksum(self) -> exp.ChecksumProperty: 2123 self._match(TokenType.EQ) 2124 2125 on = None 2126 if self._match(TokenType.ON): 2127 on = True 2128 elif self._match_text_seq("OFF"): 2129 on = False 2130 2131 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 2132 2133 def _parse_cluster(self, wrapped: bool = False) -> exp.Cluster: 2134 return self.expression( 2135 exp.Cluster, 2136 expressions=( 2137 self._parse_wrapped_csv(self._parse_ordered) 2138 if wrapped 2139 else self._parse_csv(self._parse_ordered) 2140 ), 2141 ) 2142 2143 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 2144 self._match_text_seq("BY") 2145 2146 self._match_l_paren() 2147 expressions = self._parse_csv(self._parse_column) 2148 self._match_r_paren() 2149 2150 if self._match_text_seq("SORTED", "BY"): 2151 self._match_l_paren() 2152 sorted_by = self._parse_csv(self._parse_ordered) 2153 self._match_r_paren() 2154 else: 2155 sorted_by = None 2156 2157 self._match(TokenType.INTO) 2158 buckets = self._parse_number() 2159 self._match_text_seq("BUCKETS") 2160 2161 return self.expression( 2162 exp.ClusteredByProperty, 2163 expressions=expressions, 2164 sorted_by=sorted_by, 2165 buckets=buckets, 2166 ) 2167 2168 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 2169 if not self._match_text_seq("GRANTS"): 2170 self._retreat(self._index - 1) 2171 return None 2172 2173 return self.expression(exp.CopyGrantsProperty) 2174 2175 def _parse_freespace(self) -> exp.FreespaceProperty: 2176 self._match(TokenType.EQ) 2177 return self.expression( 2178 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 2179 ) 2180 2181 def _parse_mergeblockratio( 2182 self, no: bool = False, default: bool = False 2183 ) -> exp.MergeBlockRatioProperty: 2184 if self._match(TokenType.EQ): 2185 return self.expression( 2186 exp.MergeBlockRatioProperty, 2187 this=self._parse_number(), 2188 percent=self._match(TokenType.PERCENT), 2189 ) 2190 2191 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 2192 2193 def _parse_datablocksize( 2194 self, 2195 default: t.Optional[bool] = None, 2196 minimum: t.Optional[bool] = None, 2197 maximum: t.Optional[bool] = None, 2198 ) -> exp.DataBlocksizeProperty: 2199 self._match(TokenType.EQ) 2200 size = self._parse_number() 2201 2202 units = None 2203 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 2204 units = self._prev.text 2205 2206 return self.expression( 2207 exp.DataBlocksizeProperty, 2208 size=size, 2209 units=units, 2210 default=default, 2211 minimum=minimum, 2212 maximum=maximum, 2213 ) 2214 2215 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 2216 self._match(TokenType.EQ) 2217 always = self._match_text_seq("ALWAYS") 2218 manual = self._match_text_seq("MANUAL") 2219 never = self._match_text_seq("NEVER") 2220 default = self._match_text_seq("DEFAULT") 2221 2222 autotemp = None 2223 if self._match_text_seq("AUTOTEMP"): 2224 autotemp = self._parse_schema() 2225 2226 return self.expression( 2227 exp.BlockCompressionProperty, 2228 always=always, 2229 manual=manual, 2230 never=never, 2231 default=default, 2232 autotemp=autotemp, 2233 ) 2234 2235 def _parse_withisolatedloading(self) -> t.Optional[exp.IsolatedLoadingProperty]: 2236 index = self._index 2237 no = self._match_text_seq("NO") 2238 concurrent = self._match_text_seq("CONCURRENT") 2239 2240 if not self._match_text_seq("ISOLATED", "LOADING"): 2241 self._retreat(index) 2242 return None 2243 2244 target = self._parse_var_from_options(self.ISOLATED_LOADING_OPTIONS, raise_unmatched=False) 2245 return self.expression( 2246 exp.IsolatedLoadingProperty, no=no, concurrent=concurrent, target=target 2247 ) 2248 2249 def _parse_locking(self) -> exp.LockingProperty: 2250 if self._match(TokenType.TABLE): 2251 kind = "TABLE" 2252 elif self._match(TokenType.VIEW): 2253 kind = "VIEW" 2254 elif self._match(TokenType.ROW): 2255 kind = "ROW" 2256 elif self._match_text_seq("DATABASE"): 2257 kind = "DATABASE" 2258 else: 2259 kind = None 2260 2261 if kind in ("DATABASE", "TABLE", "VIEW"): 2262 this = self._parse_table_parts() 2263 else: 2264 this = None 2265 2266 if self._match(TokenType.FOR): 2267 for_or_in = "FOR" 2268 elif self._match(TokenType.IN): 2269 for_or_in = "IN" 2270 else: 2271 for_or_in = None 2272 2273 if self._match_text_seq("ACCESS"): 2274 lock_type = "ACCESS" 2275 elif self._match_texts(("EXCL", "EXCLUSIVE")): 2276 lock_type = "EXCLUSIVE" 2277 elif self._match_text_seq("SHARE"): 2278 lock_type = "SHARE" 2279 elif self._match_text_seq("READ"): 2280 lock_type = "READ" 2281 elif self._match_text_seq("WRITE"): 2282 lock_type = "WRITE" 2283 elif self._match_text_seq("CHECKSUM"): 2284 lock_type = "CHECKSUM" 2285 else: 2286 lock_type = None 2287 2288 override = self._match_text_seq("OVERRIDE") 2289 2290 return self.expression( 2291 exp.LockingProperty, 2292 this=this, 2293 kind=kind, 2294 for_or_in=for_or_in, 2295 lock_type=lock_type, 2296 override=override, 2297 ) 2298 2299 def _parse_partition_by(self) -> t.List[exp.Expression]: 2300 if self._match(TokenType.PARTITION_BY): 2301 return self._parse_csv(self._parse_assignment) 2302 return [] 2303 2304 def _parse_partition_bound_spec(self) -> exp.PartitionBoundSpec: 2305 def _parse_partition_bound_expr() -> t.Optional[exp.Expression]: 2306 if self._match_text_seq("MINVALUE"): 2307 return exp.var("MINVALUE") 2308 if self._match_text_seq("MAXVALUE"): 2309 return exp.var("MAXVALUE") 2310 return self._parse_bitwise() 2311 2312 this: t.Optional[exp.Expression | t.List[exp.Expression]] = None 2313 expression = None 2314 from_expressions = None 2315 to_expressions = None 2316 2317 if self._match(TokenType.IN): 2318 this = self._parse_wrapped_csv(self._parse_bitwise) 2319 elif self._match(TokenType.FROM): 2320 from_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2321 self._match_text_seq("TO") 2322 to_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2323 elif self._match_text_seq("WITH", "(", "MODULUS"): 2324 this = self._parse_number() 2325 self._match_text_seq(",", "REMAINDER") 2326 expression = self._parse_number() 2327 self._match_r_paren() 2328 else: 2329 self.raise_error("Failed to parse partition bound spec.") 2330 2331 return self.expression( 2332 exp.PartitionBoundSpec, 2333 this=this, 2334 expression=expression, 2335 from_expressions=from_expressions, 2336 to_expressions=to_expressions, 2337 ) 2338 2339 # https://www.postgresql.org/docs/current/sql-createtable.html 2340 def _parse_partitioned_of(self) -> t.Optional[exp.PartitionedOfProperty]: 2341 if not self._match_text_seq("OF"): 2342 self._retreat(self._index - 1) 2343 return None 2344 2345 this = self._parse_table(schema=True) 2346 2347 if self._match(TokenType.DEFAULT): 2348 expression: exp.Var | exp.PartitionBoundSpec = exp.var("DEFAULT") 2349 elif self._match_text_seq("FOR", "VALUES"): 2350 expression = self._parse_partition_bound_spec() 2351 else: 2352 self.raise_error("Expecting either DEFAULT or FOR VALUES clause.") 2353 2354 return self.expression(exp.PartitionedOfProperty, this=this, expression=expression) 2355 2356 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 2357 self._match(TokenType.EQ) 2358 return self.expression( 2359 exp.PartitionedByProperty, 2360 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 2361 ) 2362 2363 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 2364 if self._match_text_seq("AND", "STATISTICS"): 2365 statistics = True 2366 elif self._match_text_seq("AND", "NO", "STATISTICS"): 2367 statistics = False 2368 else: 2369 statistics = None 2370 2371 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 2372 2373 def _parse_contains_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2374 if self._match_text_seq("SQL"): 2375 return self.expression(exp.SqlReadWriteProperty, this="CONTAINS SQL") 2376 return None 2377 2378 def _parse_modifies_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2379 if self._match_text_seq("SQL", "DATA"): 2380 return self.expression(exp.SqlReadWriteProperty, this="MODIFIES SQL DATA") 2381 return None 2382 2383 def _parse_no_property(self) -> t.Optional[exp.Expression]: 2384 if self._match_text_seq("PRIMARY", "INDEX"): 2385 return exp.NoPrimaryIndexProperty() 2386 if self._match_text_seq("SQL"): 2387 return self.expression(exp.SqlReadWriteProperty, this="NO SQL") 2388 return None 2389 2390 def _parse_on_property(self) -> t.Optional[exp.Expression]: 2391 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 2392 return exp.OnCommitProperty() 2393 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 2394 return exp.OnCommitProperty(delete=True) 2395 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 2396 2397 def _parse_reads_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2398 if self._match_text_seq("SQL", "DATA"): 2399 return self.expression(exp.SqlReadWriteProperty, this="READS SQL DATA") 2400 return None 2401 2402 def _parse_distkey(self) -> exp.DistKeyProperty: 2403 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 2404 2405 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 2406 table = self._parse_table(schema=True) 2407 2408 options = [] 2409 while self._match_texts(("INCLUDING", "EXCLUDING")): 2410 this = self._prev.text.upper() 2411 2412 id_var = self._parse_id_var() 2413 if not id_var: 2414 return None 2415 2416 options.append( 2417 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 2418 ) 2419 2420 return self.expression(exp.LikeProperty, this=table, expressions=options) 2421 2422 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 2423 return self.expression( 2424 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 2425 ) 2426 2427 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 2428 self._match(TokenType.EQ) 2429 return self.expression( 2430 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 2431 ) 2432 2433 def _parse_remote_with_connection(self) -> exp.RemoteWithConnectionModelProperty: 2434 self._match_text_seq("WITH", "CONNECTION") 2435 return self.expression( 2436 exp.RemoteWithConnectionModelProperty, this=self._parse_table_parts() 2437 ) 2438 2439 def _parse_returns(self) -> exp.ReturnsProperty: 2440 value: t.Optional[exp.Expression] 2441 null = None 2442 is_table = self._match(TokenType.TABLE) 2443 2444 if is_table: 2445 if self._match(TokenType.LT): 2446 value = self.expression( 2447 exp.Schema, 2448 this="TABLE", 2449 expressions=self._parse_csv(self._parse_struct_types), 2450 ) 2451 if not self._match(TokenType.GT): 2452 self.raise_error("Expecting >") 2453 else: 2454 value = self._parse_schema(exp.var("TABLE")) 2455 elif self._match_text_seq("NULL", "ON", "NULL", "INPUT"): 2456 null = True 2457 value = None 2458 else: 2459 value = self._parse_types() 2460 2461 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table, null=null) 2462 2463 def _parse_describe(self) -> exp.Describe: 2464 kind = self._match_set(self.CREATABLES) and self._prev.text 2465 style = self._match_texts(("EXTENDED", "FORMATTED", "HISTORY")) and self._prev.text.upper() 2466 if self._match(TokenType.DOT): 2467 style = None 2468 self._retreat(self._index - 2) 2469 this = self._parse_table(schema=True) 2470 properties = self._parse_properties() 2471 expressions = properties.expressions if properties else None 2472 return self.expression( 2473 exp.Describe, this=this, style=style, kind=kind, expressions=expressions 2474 ) 2475 2476 def _parse_insert(self) -> exp.Insert: 2477 comments = ensure_list(self._prev_comments) 2478 hint = self._parse_hint() 2479 overwrite = self._match(TokenType.OVERWRITE) 2480 ignore = self._match(TokenType.IGNORE) 2481 local = self._match_text_seq("LOCAL") 2482 alternative = None 2483 is_function = None 2484 2485 if self._match_text_seq("DIRECTORY"): 2486 this: t.Optional[exp.Expression] = self.expression( 2487 exp.Directory, 2488 this=self._parse_var_or_string(), 2489 local=local, 2490 row_format=self._parse_row_format(match_row=True), 2491 ) 2492 else: 2493 if self._match(TokenType.OR): 2494 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 2495 2496 self._match(TokenType.INTO) 2497 comments += ensure_list(self._prev_comments) 2498 self._match(TokenType.TABLE) 2499 is_function = self._match(TokenType.FUNCTION) 2500 2501 this = ( 2502 self._parse_table(schema=True, parse_partition=True) 2503 if not is_function 2504 else self._parse_function() 2505 ) 2506 2507 returning = self._parse_returning() 2508 2509 return self.expression( 2510 exp.Insert, 2511 comments=comments, 2512 hint=hint, 2513 is_function=is_function, 2514 this=this, 2515 stored=self._match_text_seq("STORED") and self._parse_stored(), 2516 by_name=self._match_text_seq("BY", "NAME"), 2517 exists=self._parse_exists(), 2518 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) and self._parse_assignment(), 2519 expression=self._parse_derived_table_values() or self._parse_ddl_select(), 2520 conflict=self._parse_on_conflict(), 2521 returning=returning or self._parse_returning(), 2522 overwrite=overwrite, 2523 alternative=alternative, 2524 ignore=ignore, 2525 ) 2526 2527 def _parse_kill(self) -> exp.Kill: 2528 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 2529 2530 return self.expression( 2531 exp.Kill, 2532 this=self._parse_primary(), 2533 kind=kind, 2534 ) 2535 2536 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 2537 conflict = self._match_text_seq("ON", "CONFLICT") 2538 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 2539 2540 if not conflict and not duplicate: 2541 return None 2542 2543 conflict_keys = None 2544 constraint = None 2545 2546 if conflict: 2547 if self._match_text_seq("ON", "CONSTRAINT"): 2548 constraint = self._parse_id_var() 2549 elif self._match(TokenType.L_PAREN): 2550 conflict_keys = self._parse_csv(self._parse_id_var) 2551 self._match_r_paren() 2552 2553 action = self._parse_var_from_options(self.CONFLICT_ACTIONS) 2554 if self._prev.token_type == TokenType.UPDATE: 2555 self._match(TokenType.SET) 2556 expressions = self._parse_csv(self._parse_equality) 2557 else: 2558 expressions = None 2559 2560 return self.expression( 2561 exp.OnConflict, 2562 duplicate=duplicate, 2563 expressions=expressions, 2564 action=action, 2565 conflict_keys=conflict_keys, 2566 constraint=constraint, 2567 ) 2568 2569 def _parse_returning(self) -> t.Optional[exp.Returning]: 2570 if not self._match(TokenType.RETURNING): 2571 return None 2572 return self.expression( 2573 exp.Returning, 2574 expressions=self._parse_csv(self._parse_expression), 2575 into=self._match(TokenType.INTO) and self._parse_table_part(), 2576 ) 2577 2578 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2579 if not self._match(TokenType.FORMAT): 2580 return None 2581 return self._parse_row_format() 2582 2583 def _parse_serde_properties(self, with_: bool = False) -> t.Optional[exp.SerdeProperties]: 2584 index = self._index 2585 with_ = with_ or self._match_text_seq("WITH") 2586 2587 if not self._match(TokenType.SERDE_PROPERTIES): 2588 self._retreat(index) 2589 return None 2590 return self.expression( 2591 exp.SerdeProperties, 2592 **{ # type: ignore 2593 "expressions": self._parse_wrapped_properties(), 2594 "with": with_, 2595 }, 2596 ) 2597 2598 def _parse_row_format( 2599 self, match_row: bool = False 2600 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2601 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 2602 return None 2603 2604 if self._match_text_seq("SERDE"): 2605 this = self._parse_string() 2606 2607 serde_properties = self._parse_serde_properties() 2608 2609 return self.expression( 2610 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 2611 ) 2612 2613 self._match_text_seq("DELIMITED") 2614 2615 kwargs = {} 2616 2617 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 2618 kwargs["fields"] = self._parse_string() 2619 if self._match_text_seq("ESCAPED", "BY"): 2620 kwargs["escaped"] = self._parse_string() 2621 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 2622 kwargs["collection_items"] = self._parse_string() 2623 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 2624 kwargs["map_keys"] = self._parse_string() 2625 if self._match_text_seq("LINES", "TERMINATED", "BY"): 2626 kwargs["lines"] = self._parse_string() 2627 if self._match_text_seq("NULL", "DEFINED", "AS"): 2628 kwargs["null"] = self._parse_string() 2629 2630 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 2631 2632 def _parse_load(self) -> exp.LoadData | exp.Command: 2633 if self._match_text_seq("DATA"): 2634 local = self._match_text_seq("LOCAL") 2635 self._match_text_seq("INPATH") 2636 inpath = self._parse_string() 2637 overwrite = self._match(TokenType.OVERWRITE) 2638 self._match_pair(TokenType.INTO, TokenType.TABLE) 2639 2640 return self.expression( 2641 exp.LoadData, 2642 this=self._parse_table(schema=True), 2643 local=local, 2644 overwrite=overwrite, 2645 inpath=inpath, 2646 partition=self._parse_partition(), 2647 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 2648 serde=self._match_text_seq("SERDE") and self._parse_string(), 2649 ) 2650 return self._parse_as_command(self._prev) 2651 2652 def _parse_delete(self) -> exp.Delete: 2653 # This handles MySQL's "Multiple-Table Syntax" 2654 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 2655 tables = None 2656 comments = self._prev_comments 2657 if not self._match(TokenType.FROM, advance=False): 2658 tables = self._parse_csv(self._parse_table) or None 2659 2660 returning = self._parse_returning() 2661 2662 return self.expression( 2663 exp.Delete, 2664 comments=comments, 2665 tables=tables, 2666 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 2667 using=self._match(TokenType.USING) and self._parse_table(joins=True), 2668 where=self._parse_where(), 2669 returning=returning or self._parse_returning(), 2670 limit=self._parse_limit(), 2671 ) 2672 2673 def _parse_update(self) -> exp.Update: 2674 comments = self._prev_comments 2675 this = self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS) 2676 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 2677 returning = self._parse_returning() 2678 return self.expression( 2679 exp.Update, 2680 comments=comments, 2681 **{ # type: ignore 2682 "this": this, 2683 "expressions": expressions, 2684 "from": self._parse_from(joins=True), 2685 "where": self._parse_where(), 2686 "returning": returning or self._parse_returning(), 2687 "order": self._parse_order(), 2688 "limit": self._parse_limit(), 2689 }, 2690 ) 2691 2692 def _parse_uncache(self) -> exp.Uncache: 2693 if not self._match(TokenType.TABLE): 2694 self.raise_error("Expecting TABLE after UNCACHE") 2695 2696 return self.expression( 2697 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 2698 ) 2699 2700 def _parse_cache(self) -> exp.Cache: 2701 lazy = self._match_text_seq("LAZY") 2702 self._match(TokenType.TABLE) 2703 table = self._parse_table(schema=True) 2704 2705 options = [] 2706 if self._match_text_seq("OPTIONS"): 2707 self._match_l_paren() 2708 k = self._parse_string() 2709 self._match(TokenType.EQ) 2710 v = self._parse_string() 2711 options = [k, v] 2712 self._match_r_paren() 2713 2714 self._match(TokenType.ALIAS) 2715 return self.expression( 2716 exp.Cache, 2717 this=table, 2718 lazy=lazy, 2719 options=options, 2720 expression=self._parse_select(nested=True), 2721 ) 2722 2723 def _parse_partition(self) -> t.Optional[exp.Partition]: 2724 if not self._match(TokenType.PARTITION): 2725 return None 2726 2727 return self.expression( 2728 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_assignment) 2729 ) 2730 2731 def _parse_value(self) -> t.Optional[exp.Tuple]: 2732 if self._match(TokenType.L_PAREN): 2733 expressions = self._parse_csv(self._parse_expression) 2734 self._match_r_paren() 2735 return self.expression(exp.Tuple, expressions=expressions) 2736 2737 # In some dialects we can have VALUES 1, 2 which results in 1 column & 2 rows. 2738 expression = self._parse_expression() 2739 if expression: 2740 return self.expression(exp.Tuple, expressions=[expression]) 2741 return None 2742 2743 def _parse_projections(self) -> t.List[exp.Expression]: 2744 return self._parse_expressions() 2745 2746 def _parse_select( 2747 self, 2748 nested: bool = False, 2749 table: bool = False, 2750 parse_subquery_alias: bool = True, 2751 parse_set_operation: bool = True, 2752 ) -> t.Optional[exp.Expression]: 2753 cte = self._parse_with() 2754 2755 if cte: 2756 this = self._parse_statement() 2757 2758 if not this: 2759 self.raise_error("Failed to parse any statement following CTE") 2760 return cte 2761 2762 if "with" in this.arg_types: 2763 this.set("with", cte) 2764 else: 2765 self.raise_error(f"{this.key} does not support CTE") 2766 this = cte 2767 2768 return this 2769 2770 # duckdb supports leading with FROM x 2771 from_ = self._parse_from() if self._match(TokenType.FROM, advance=False) else None 2772 2773 if self._match(TokenType.SELECT): 2774 comments = self._prev_comments 2775 2776 hint = self._parse_hint() 2777 2778 if self._next and not self._next.token_type == TokenType.DOT: 2779 all_ = self._match(TokenType.ALL) 2780 distinct = self._match_set(self.DISTINCT_TOKENS) 2781 else: 2782 all_, distinct = None, None 2783 2784 kind = ( 2785 self._match(TokenType.ALIAS) 2786 and self._match_texts(("STRUCT", "VALUE")) 2787 and self._prev.text.upper() 2788 ) 2789 2790 if distinct: 2791 distinct = self.expression( 2792 exp.Distinct, 2793 on=self._parse_value() if self._match(TokenType.ON) else None, 2794 ) 2795 2796 if all_ and distinct: 2797 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 2798 2799 limit = self._parse_limit(top=True) 2800 projections = self._parse_projections() 2801 2802 this = self.expression( 2803 exp.Select, 2804 kind=kind, 2805 hint=hint, 2806 distinct=distinct, 2807 expressions=projections, 2808 limit=limit, 2809 ) 2810 this.comments = comments 2811 2812 into = self._parse_into() 2813 if into: 2814 this.set("into", into) 2815 2816 if not from_: 2817 from_ = self._parse_from() 2818 2819 if from_: 2820 this.set("from", from_) 2821 2822 this = self._parse_query_modifiers(this) 2823 elif (table or nested) and self._match(TokenType.L_PAREN): 2824 if self._match(TokenType.PIVOT): 2825 this = self._parse_simplified_pivot() 2826 elif self._match(TokenType.FROM): 2827 this = exp.select("*").from_( 2828 t.cast(exp.From, self._parse_from(skip_from_token=True)) 2829 ) 2830 else: 2831 this = ( 2832 self._parse_table() 2833 if table 2834 else self._parse_select(nested=True, parse_set_operation=False) 2835 ) 2836 this = self._parse_query_modifiers(self._parse_set_operations(this)) 2837 2838 self._match_r_paren() 2839 2840 # We return early here so that the UNION isn't attached to the subquery by the 2841 # following call to _parse_set_operations, but instead becomes the parent node 2842 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 2843 elif self._match(TokenType.VALUES, advance=False): 2844 this = self._parse_derived_table_values() 2845 elif from_: 2846 this = exp.select("*").from_(from_.this, copy=False) 2847 elif self._match(TokenType.SUMMARIZE): 2848 table = self._match(TokenType.TABLE) 2849 this = self._parse_select() or self._parse_string() or self._parse_table() 2850 return self.expression(exp.Summarize, this=this, table=table) 2851 elif self._match(TokenType.DESCRIBE): 2852 this = self._parse_describe() 2853 else: 2854 this = None 2855 2856 return self._parse_set_operations(this) if parse_set_operation else this 2857 2858 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 2859 if not skip_with_token and not self._match(TokenType.WITH): 2860 return None 2861 2862 comments = self._prev_comments 2863 recursive = self._match(TokenType.RECURSIVE) 2864 2865 expressions = [] 2866 while True: 2867 expressions.append(self._parse_cte()) 2868 2869 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 2870 break 2871 else: 2872 self._match(TokenType.WITH) 2873 2874 return self.expression( 2875 exp.With, comments=comments, expressions=expressions, recursive=recursive 2876 ) 2877 2878 def _parse_cte(self) -> exp.CTE: 2879 alias = self._parse_table_alias(self.ID_VAR_TOKENS) 2880 if not alias or not alias.this: 2881 self.raise_error("Expected CTE to have alias") 2882 2883 self._match(TokenType.ALIAS) 2884 comments = self._prev_comments 2885 2886 if self._match_text_seq("NOT", "MATERIALIZED"): 2887 materialized = False 2888 elif self._match_text_seq("MATERIALIZED"): 2889 materialized = True 2890 else: 2891 materialized = None 2892 2893 return self.expression( 2894 exp.CTE, 2895 this=self._parse_wrapped(self._parse_statement), 2896 alias=alias, 2897 materialized=materialized, 2898 comments=comments, 2899 ) 2900 2901 def _parse_table_alias( 2902 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 2903 ) -> t.Optional[exp.TableAlias]: 2904 any_token = self._match(TokenType.ALIAS) 2905 alias = ( 2906 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2907 or self._parse_string_as_identifier() 2908 ) 2909 2910 index = self._index 2911 if self._match(TokenType.L_PAREN): 2912 columns = self._parse_csv(self._parse_function_parameter) 2913 self._match_r_paren() if columns else self._retreat(index) 2914 else: 2915 columns = None 2916 2917 if not alias and not columns: 2918 return None 2919 2920 table_alias = self.expression(exp.TableAlias, this=alias, columns=columns) 2921 2922 # We bubble up comments from the Identifier to the TableAlias 2923 if isinstance(alias, exp.Identifier): 2924 table_alias.add_comments(alias.pop_comments()) 2925 2926 return table_alias 2927 2928 def _parse_subquery( 2929 self, this: t.Optional[exp.Expression], parse_alias: bool = True 2930 ) -> t.Optional[exp.Subquery]: 2931 if not this: 2932 return None 2933 2934 return self.expression( 2935 exp.Subquery, 2936 this=this, 2937 pivots=self._parse_pivots(), 2938 alias=self._parse_table_alias() if parse_alias else None, 2939 ) 2940 2941 def _implicit_unnests_to_explicit(self, this: E) -> E: 2942 from sqlglot.optimizer.normalize_identifiers import normalize_identifiers as _norm 2943 2944 refs = {_norm(this.args["from"].this.copy(), dialect=self.dialect).alias_or_name} 2945 for i, join in enumerate(this.args.get("joins") or []): 2946 table = join.this 2947 normalized_table = table.copy() 2948 normalized_table.meta["maybe_column"] = True 2949 normalized_table = _norm(normalized_table, dialect=self.dialect) 2950 2951 if isinstance(table, exp.Table) and not join.args.get("on"): 2952 if normalized_table.parts[0].name in refs: 2953 table_as_column = table.to_column() 2954 unnest = exp.Unnest(expressions=[table_as_column]) 2955 2956 # Table.to_column creates a parent Alias node that we want to convert to 2957 # a TableAlias and attach to the Unnest, so it matches the parser's output 2958 if isinstance(table.args.get("alias"), exp.TableAlias): 2959 table_as_column.replace(table_as_column.this) 2960 exp.alias_(unnest, None, table=[table.args["alias"].this], copy=False) 2961 2962 table.replace(unnest) 2963 2964 refs.add(normalized_table.alias_or_name) 2965 2966 return this 2967 2968 def _parse_query_modifiers( 2969 self, this: t.Optional[exp.Expression] 2970 ) -> t.Optional[exp.Expression]: 2971 if isinstance(this, (exp.Query, exp.Table)): 2972 for join in self._parse_joins(): 2973 this.append("joins", join) 2974 for lateral in iter(self._parse_lateral, None): 2975 this.append("laterals", lateral) 2976 2977 while True: 2978 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 2979 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 2980 key, expression = parser(self) 2981 2982 if expression: 2983 this.set(key, expression) 2984 if key == "limit": 2985 offset = expression.args.pop("offset", None) 2986 2987 if offset: 2988 offset = exp.Offset(expression=offset) 2989 this.set("offset", offset) 2990 2991 limit_by_expressions = expression.expressions 2992 expression.set("expressions", None) 2993 offset.set("expressions", limit_by_expressions) 2994 continue 2995 break 2996 2997 if self.SUPPORTS_IMPLICIT_UNNEST and this and this.args.get("from"): 2998 this = self._implicit_unnests_to_explicit(this) 2999 3000 return this 3001 3002 def _parse_hint(self) -> t.Optional[exp.Hint]: 3003 if self._match(TokenType.HINT): 3004 hints = [] 3005 for hint in iter( 3006 lambda: self._parse_csv( 3007 lambda: self._parse_function() or self._parse_var(upper=True) 3008 ), 3009 [], 3010 ): 3011 hints.extend(hint) 3012 3013 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 3014 self.raise_error("Expected */ after HINT") 3015 3016 return self.expression(exp.Hint, expressions=hints) 3017 3018 return None 3019 3020 def _parse_into(self) -> t.Optional[exp.Into]: 3021 if not self._match(TokenType.INTO): 3022 return None 3023 3024 temp = self._match(TokenType.TEMPORARY) 3025 unlogged = self._match_text_seq("UNLOGGED") 3026 self._match(TokenType.TABLE) 3027 3028 return self.expression( 3029 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 3030 ) 3031 3032 def _parse_from( 3033 self, joins: bool = False, skip_from_token: bool = False 3034 ) -> t.Optional[exp.From]: 3035 if not skip_from_token and not self._match(TokenType.FROM): 3036 return None 3037 3038 return self.expression( 3039 exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins) 3040 ) 3041 3042 def _parse_match_recognize_measure(self) -> exp.MatchRecognizeMeasure: 3043 return self.expression( 3044 exp.MatchRecognizeMeasure, 3045 window_frame=self._match_texts(("FINAL", "RUNNING")) and self._prev.text.upper(), 3046 this=self._parse_expression(), 3047 ) 3048 3049 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 3050 if not self._match(TokenType.MATCH_RECOGNIZE): 3051 return None 3052 3053 self._match_l_paren() 3054 3055 partition = self._parse_partition_by() 3056 order = self._parse_order() 3057 3058 measures = ( 3059 self._parse_csv(self._parse_match_recognize_measure) 3060 if self._match_text_seq("MEASURES") 3061 else None 3062 ) 3063 3064 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 3065 rows = exp.var("ONE ROW PER MATCH") 3066 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 3067 text = "ALL ROWS PER MATCH" 3068 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 3069 text += " SHOW EMPTY MATCHES" 3070 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 3071 text += " OMIT EMPTY MATCHES" 3072 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 3073 text += " WITH UNMATCHED ROWS" 3074 rows = exp.var(text) 3075 else: 3076 rows = None 3077 3078 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 3079 text = "AFTER MATCH SKIP" 3080 if self._match_text_seq("PAST", "LAST", "ROW"): 3081 text += " PAST LAST ROW" 3082 elif self._match_text_seq("TO", "NEXT", "ROW"): 3083 text += " TO NEXT ROW" 3084 elif self._match_text_seq("TO", "FIRST"): 3085 text += f" TO FIRST {self._advance_any().text}" # type: ignore 3086 elif self._match_text_seq("TO", "LAST"): 3087 text += f" TO LAST {self._advance_any().text}" # type: ignore 3088 after = exp.var(text) 3089 else: 3090 after = None 3091 3092 if self._match_text_seq("PATTERN"): 3093 self._match_l_paren() 3094 3095 if not self._curr: 3096 self.raise_error("Expecting )", self._curr) 3097 3098 paren = 1 3099 start = self._curr 3100 3101 while self._curr and paren > 0: 3102 if self._curr.token_type == TokenType.L_PAREN: 3103 paren += 1 3104 if self._curr.token_type == TokenType.R_PAREN: 3105 paren -= 1 3106 3107 end = self._prev 3108 self._advance() 3109 3110 if paren > 0: 3111 self.raise_error("Expecting )", self._curr) 3112 3113 pattern = exp.var(self._find_sql(start, end)) 3114 else: 3115 pattern = None 3116 3117 define = ( 3118 self._parse_csv(self._parse_name_as_expression) 3119 if self._match_text_seq("DEFINE") 3120 else None 3121 ) 3122 3123 self._match_r_paren() 3124 3125 return self.expression( 3126 exp.MatchRecognize, 3127 partition_by=partition, 3128 order=order, 3129 measures=measures, 3130 rows=rows, 3131 after=after, 3132 pattern=pattern, 3133 define=define, 3134 alias=self._parse_table_alias(), 3135 ) 3136 3137 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 3138 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 3139 if not cross_apply and self._match_pair(TokenType.OUTER, TokenType.APPLY): 3140 cross_apply = False 3141 3142 if cross_apply is not None: 3143 this = self._parse_select(table=True) 3144 view = None 3145 outer = None 3146 elif self._match(TokenType.LATERAL): 3147 this = self._parse_select(table=True) 3148 view = self._match(TokenType.VIEW) 3149 outer = self._match(TokenType.OUTER) 3150 else: 3151 return None 3152 3153 if not this: 3154 this = ( 3155 self._parse_unnest() 3156 or self._parse_function() 3157 or self._parse_id_var(any_token=False) 3158 ) 3159 3160 while self._match(TokenType.DOT): 3161 this = exp.Dot( 3162 this=this, 3163 expression=self._parse_function() or self._parse_id_var(any_token=False), 3164 ) 3165 3166 if view: 3167 table = self._parse_id_var(any_token=False) 3168 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 3169 table_alias: t.Optional[exp.TableAlias] = self.expression( 3170 exp.TableAlias, this=table, columns=columns 3171 ) 3172 elif isinstance(this, (exp.Subquery, exp.Unnest)) and this.alias: 3173 # We move the alias from the lateral's child node to the lateral itself 3174 table_alias = this.args["alias"].pop() 3175 else: 3176 table_alias = self._parse_table_alias() 3177 3178 return self.expression( 3179 exp.Lateral, 3180 this=this, 3181 view=view, 3182 outer=outer, 3183 alias=table_alias, 3184 cross_apply=cross_apply, 3185 ) 3186 3187 def _parse_join_parts( 3188 self, 3189 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 3190 return ( 3191 self._match_set(self.JOIN_METHODS) and self._prev, 3192 self._match_set(self.JOIN_SIDES) and self._prev, 3193 self._match_set(self.JOIN_KINDS) and self._prev, 3194 ) 3195 3196 def _parse_join( 3197 self, skip_join_token: bool = False, parse_bracket: bool = False 3198 ) -> t.Optional[exp.Join]: 3199 if self._match(TokenType.COMMA): 3200 return self.expression(exp.Join, this=self._parse_table()) 3201 3202 index = self._index 3203 method, side, kind = self._parse_join_parts() 3204 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 3205 join = self._match(TokenType.JOIN) or (kind and kind.token_type == TokenType.STRAIGHT_JOIN) 3206 3207 if not skip_join_token and not join: 3208 self._retreat(index) 3209 kind = None 3210 method = None 3211 side = None 3212 3213 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 3214 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 3215 3216 if not skip_join_token and not join and not outer_apply and not cross_apply: 3217 return None 3218 3219 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 3220 3221 if method: 3222 kwargs["method"] = method.text 3223 if side: 3224 kwargs["side"] = side.text 3225 if kind: 3226 kwargs["kind"] = kind.text 3227 if hint: 3228 kwargs["hint"] = hint 3229 3230 if self._match(TokenType.MATCH_CONDITION): 3231 kwargs["match_condition"] = self._parse_wrapped(self._parse_comparison) 3232 3233 if self._match(TokenType.ON): 3234 kwargs["on"] = self._parse_assignment() 3235 elif self._match(TokenType.USING): 3236 kwargs["using"] = self._parse_wrapped_id_vars() 3237 elif not isinstance(kwargs["this"], exp.Unnest) and not ( 3238 kind and kind.token_type == TokenType.CROSS 3239 ): 3240 index = self._index 3241 joins: t.Optional[list] = list(self._parse_joins()) 3242 3243 if joins and self._match(TokenType.ON): 3244 kwargs["on"] = self._parse_assignment() 3245 elif joins and self._match(TokenType.USING): 3246 kwargs["using"] = self._parse_wrapped_id_vars() 3247 else: 3248 joins = None 3249 self._retreat(index) 3250 3251 kwargs["this"].set("joins", joins if joins else None) 3252 3253 comments = [c for token in (method, side, kind) if token for c in token.comments] 3254 return self.expression(exp.Join, comments=comments, **kwargs) 3255 3256 def _parse_opclass(self) -> t.Optional[exp.Expression]: 3257 this = self._parse_assignment() 3258 3259 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 3260 return this 3261 3262 if not self._match_set(self.OPTYPE_FOLLOW_TOKENS, advance=False): 3263 return self.expression(exp.Opclass, this=this, expression=self._parse_table_parts()) 3264 3265 return this 3266 3267 def _parse_index_params(self) -> exp.IndexParameters: 3268 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 3269 3270 if self._match(TokenType.L_PAREN, advance=False): 3271 columns = self._parse_wrapped_csv(self._parse_with_operator) 3272 else: 3273 columns = None 3274 3275 include = self._parse_wrapped_id_vars() if self._match_text_seq("INCLUDE") else None 3276 partition_by = self._parse_partition_by() 3277 with_storage = self._match(TokenType.WITH) and self._parse_wrapped_properties() 3278 tablespace = ( 3279 self._parse_var(any_token=True) 3280 if self._match_text_seq("USING", "INDEX", "TABLESPACE") 3281 else None 3282 ) 3283 where = self._parse_where() 3284 3285 on = self._parse_field() if self._match(TokenType.ON) else None 3286 3287 return self.expression( 3288 exp.IndexParameters, 3289 using=using, 3290 columns=columns, 3291 include=include, 3292 partition_by=partition_by, 3293 where=where, 3294 with_storage=with_storage, 3295 tablespace=tablespace, 3296 on=on, 3297 ) 3298 3299 def _parse_index( 3300 self, index: t.Optional[exp.Expression] = None, anonymous: bool = False 3301 ) -> t.Optional[exp.Index]: 3302 if index or anonymous: 3303 unique = None 3304 primary = None 3305 amp = None 3306 3307 self._match(TokenType.ON) 3308 self._match(TokenType.TABLE) # hive 3309 table = self._parse_table_parts(schema=True) 3310 else: 3311 unique = self._match(TokenType.UNIQUE) 3312 primary = self._match_text_seq("PRIMARY") 3313 amp = self._match_text_seq("AMP") 3314 3315 if not self._match(TokenType.INDEX): 3316 return None 3317 3318 index = self._parse_id_var() 3319 table = None 3320 3321 params = self._parse_index_params() 3322 3323 return self.expression( 3324 exp.Index, 3325 this=index, 3326 table=table, 3327 unique=unique, 3328 primary=primary, 3329 amp=amp, 3330 params=params, 3331 ) 3332 3333 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 3334 hints: t.List[exp.Expression] = [] 3335 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 3336 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 3337 hints.append( 3338 self.expression( 3339 exp.WithTableHint, 3340 expressions=self._parse_csv( 3341 lambda: self._parse_function() or self._parse_var(any_token=True) 3342 ), 3343 ) 3344 ) 3345 self._match_r_paren() 3346 else: 3347 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 3348 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 3349 hint = exp.IndexTableHint(this=self._prev.text.upper()) 3350 3351 self._match_set((TokenType.INDEX, TokenType.KEY)) 3352 if self._match(TokenType.FOR): 3353 hint.set("target", self._advance_any() and self._prev.text.upper()) 3354 3355 hint.set("expressions", self._parse_wrapped_id_vars()) 3356 hints.append(hint) 3357 3358 return hints or None 3359 3360 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 3361 return ( 3362 (not schema and self._parse_function(optional_parens=False)) 3363 or self._parse_id_var(any_token=False) 3364 or self._parse_string_as_identifier() 3365 or self._parse_placeholder() 3366 ) 3367 3368 def _parse_table_parts( 3369 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 3370 ) -> exp.Table: 3371 catalog = None 3372 db = None 3373 table: t.Optional[exp.Expression | str] = self._parse_table_part(schema=schema) 3374 3375 while self._match(TokenType.DOT): 3376 if catalog: 3377 # This allows nesting the table in arbitrarily many dot expressions if needed 3378 table = self.expression( 3379 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 3380 ) 3381 else: 3382 catalog = db 3383 db = table 3384 # "" used for tsql FROM a..b case 3385 table = self._parse_table_part(schema=schema) or "" 3386 3387 if ( 3388 wildcard 3389 and self._is_connected() 3390 and (isinstance(table, exp.Identifier) or not table) 3391 and self._match(TokenType.STAR) 3392 ): 3393 if isinstance(table, exp.Identifier): 3394 table.args["this"] += "*" 3395 else: 3396 table = exp.Identifier(this="*") 3397 3398 # We bubble up comments from the Identifier to the Table 3399 comments = table.pop_comments() if isinstance(table, exp.Expression) else None 3400 3401 if is_db_reference: 3402 catalog = db 3403 db = table 3404 table = None 3405 3406 if not table and not is_db_reference: 3407 self.raise_error(f"Expected table name but got {self._curr}") 3408 if not db and is_db_reference: 3409 self.raise_error(f"Expected database name but got {self._curr}") 3410 3411 table = self.expression( 3412 exp.Table, 3413 comments=comments, 3414 this=table, 3415 db=db, 3416 catalog=catalog, 3417 ) 3418 3419 changes = self._parse_changes() 3420 if changes: 3421 table.set("changes", changes) 3422 3423 at_before = self._parse_historical_data() 3424 if at_before: 3425 table.set("when", at_before) 3426 3427 pivots = self._parse_pivots() 3428 if pivots: 3429 table.set("pivots", pivots) 3430 3431 return table 3432 3433 def _parse_table( 3434 self, 3435 schema: bool = False, 3436 joins: bool = False, 3437 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 3438 parse_bracket: bool = False, 3439 is_db_reference: bool = False, 3440 parse_partition: bool = False, 3441 ) -> t.Optional[exp.Expression]: 3442 lateral = self._parse_lateral() 3443 if lateral: 3444 return lateral 3445 3446 unnest = self._parse_unnest() 3447 if unnest: 3448 return unnest 3449 3450 values = self._parse_derived_table_values() 3451 if values: 3452 return values 3453 3454 subquery = self._parse_select(table=True) 3455 if subquery: 3456 if not subquery.args.get("pivots"): 3457 subquery.set("pivots", self._parse_pivots()) 3458 return subquery 3459 3460 bracket = parse_bracket and self._parse_bracket(None) 3461 bracket = self.expression(exp.Table, this=bracket) if bracket else None 3462 3463 rows_from = self._match_text_seq("ROWS", "FROM") and self._parse_wrapped_csv( 3464 self._parse_table 3465 ) 3466 rows_from = self.expression(exp.Table, rows_from=rows_from) if rows_from else None 3467 3468 only = self._match(TokenType.ONLY) 3469 3470 this = t.cast( 3471 exp.Expression, 3472 bracket 3473 or rows_from 3474 or self._parse_bracket( 3475 self._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 3476 ), 3477 ) 3478 3479 if only: 3480 this.set("only", only) 3481 3482 # Postgres supports a wildcard (table) suffix operator, which is a no-op in this context 3483 self._match_text_seq("*") 3484 3485 parse_partition = parse_partition or self.SUPPORTS_PARTITION_SELECTION 3486 if parse_partition and self._match(TokenType.PARTITION, advance=False): 3487 this.set("partition", self._parse_partition()) 3488 3489 if schema: 3490 return self._parse_schema(this=this) 3491 3492 version = self._parse_version() 3493 3494 if version: 3495 this.set("version", version) 3496 3497 if self.dialect.ALIAS_POST_TABLESAMPLE: 3498 table_sample = self._parse_table_sample() 3499 3500 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3501 if alias: 3502 this.set("alias", alias) 3503 3504 if isinstance(this, exp.Table) and self._match_text_seq("AT"): 3505 return self.expression( 3506 exp.AtIndex, this=this.to_column(copy=False), expression=self._parse_id_var() 3507 ) 3508 3509 this.set("hints", self._parse_table_hints()) 3510 3511 if not this.args.get("pivots"): 3512 this.set("pivots", self._parse_pivots()) 3513 3514 if not self.dialect.ALIAS_POST_TABLESAMPLE: 3515 table_sample = self._parse_table_sample() 3516 3517 if table_sample: 3518 table_sample.set("this", this) 3519 this = table_sample 3520 3521 if joins: 3522 for join in self._parse_joins(): 3523 this.append("joins", join) 3524 3525 if self._match_pair(TokenType.WITH, TokenType.ORDINALITY): 3526 this.set("ordinality", True) 3527 this.set("alias", self._parse_table_alias()) 3528 3529 return this 3530 3531 def _parse_version(self) -> t.Optional[exp.Version]: 3532 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 3533 this = "TIMESTAMP" 3534 elif self._match(TokenType.VERSION_SNAPSHOT): 3535 this = "VERSION" 3536 else: 3537 return None 3538 3539 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 3540 kind = self._prev.text.upper() 3541 start = self._parse_bitwise() 3542 self._match_texts(("TO", "AND")) 3543 end = self._parse_bitwise() 3544 expression: t.Optional[exp.Expression] = self.expression( 3545 exp.Tuple, expressions=[start, end] 3546 ) 3547 elif self._match_text_seq("CONTAINED", "IN"): 3548 kind = "CONTAINED IN" 3549 expression = self.expression( 3550 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 3551 ) 3552 elif self._match(TokenType.ALL): 3553 kind = "ALL" 3554 expression = None 3555 else: 3556 self._match_text_seq("AS", "OF") 3557 kind = "AS OF" 3558 expression = self._parse_type() 3559 3560 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 3561 3562 def _parse_historical_data(self) -> t.Optional[exp.HistoricalData]: 3563 # https://docs.snowflake.com/en/sql-reference/constructs/at-before 3564 index = self._index 3565 historical_data = None 3566 if self._match_texts(self.HISTORICAL_DATA_PREFIX): 3567 this = self._prev.text.upper() 3568 kind = ( 3569 self._match(TokenType.L_PAREN) 3570 and self._match_texts(self.HISTORICAL_DATA_KIND) 3571 and self._prev.text.upper() 3572 ) 3573 expression = self._match(TokenType.FARROW) and self._parse_bitwise() 3574 3575 if expression: 3576 self._match_r_paren() 3577 historical_data = self.expression( 3578 exp.HistoricalData, this=this, kind=kind, expression=expression 3579 ) 3580 else: 3581 self._retreat(index) 3582 3583 return historical_data 3584 3585 def _parse_changes(self) -> t.Optional[exp.Changes]: 3586 if not self._match_text_seq("CHANGES", "(", "INFORMATION", "=>"): 3587 return None 3588 3589 information = self._parse_var(any_token=True) 3590 self._match_r_paren() 3591 3592 return self.expression( 3593 exp.Changes, 3594 information=information, 3595 at_before=self._parse_historical_data(), 3596 end=self._parse_historical_data(), 3597 ) 3598 3599 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 3600 if not self._match(TokenType.UNNEST): 3601 return None 3602 3603 expressions = self._parse_wrapped_csv(self._parse_equality) 3604 offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 3605 3606 alias = self._parse_table_alias() if with_alias else None 3607 3608 if alias: 3609 if self.dialect.UNNEST_COLUMN_ONLY: 3610 if alias.args.get("columns"): 3611 self.raise_error("Unexpected extra column alias in unnest.") 3612 3613 alias.set("columns", [alias.this]) 3614 alias.set("this", None) 3615 3616 columns = alias.args.get("columns") or [] 3617 if offset and len(expressions) < len(columns): 3618 offset = columns.pop() 3619 3620 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 3621 self._match(TokenType.ALIAS) 3622 offset = self._parse_id_var( 3623 any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS 3624 ) or exp.to_identifier("offset") 3625 3626 return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset) 3627 3628 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 3629 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 3630 if not is_derived and not self._match_text_seq("VALUES"): 3631 return None 3632 3633 expressions = self._parse_csv(self._parse_value) 3634 alias = self._parse_table_alias() 3635 3636 if is_derived: 3637 self._match_r_paren() 3638 3639 return self.expression( 3640 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 3641 ) 3642 3643 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 3644 if not self._match(TokenType.TABLE_SAMPLE) and not ( 3645 as_modifier and self._match_text_seq("USING", "SAMPLE") 3646 ): 3647 return None 3648 3649 bucket_numerator = None 3650 bucket_denominator = None 3651 bucket_field = None 3652 percent = None 3653 size = None 3654 seed = None 3655 3656 method = self._parse_var(tokens=(TokenType.ROW,), upper=True) 3657 matched_l_paren = self._match(TokenType.L_PAREN) 3658 3659 if self.TABLESAMPLE_CSV: 3660 num = None 3661 expressions = self._parse_csv(self._parse_primary) 3662 else: 3663 expressions = None 3664 num = ( 3665 self._parse_factor() 3666 if self._match(TokenType.NUMBER, advance=False) 3667 else self._parse_primary() or self._parse_placeholder() 3668 ) 3669 3670 if self._match_text_seq("BUCKET"): 3671 bucket_numerator = self._parse_number() 3672 self._match_text_seq("OUT", "OF") 3673 bucket_denominator = bucket_denominator = self._parse_number() 3674 self._match(TokenType.ON) 3675 bucket_field = self._parse_field() 3676 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 3677 percent = num 3678 elif self._match(TokenType.ROWS) or not self.dialect.TABLESAMPLE_SIZE_IS_PERCENT: 3679 size = num 3680 else: 3681 percent = num 3682 3683 if matched_l_paren: 3684 self._match_r_paren() 3685 3686 if self._match(TokenType.L_PAREN): 3687 method = self._parse_var(upper=True) 3688 seed = self._match(TokenType.COMMA) and self._parse_number() 3689 self._match_r_paren() 3690 elif self._match_texts(("SEED", "REPEATABLE")): 3691 seed = self._parse_wrapped(self._parse_number) 3692 3693 if not method and self.DEFAULT_SAMPLING_METHOD: 3694 method = exp.var(self.DEFAULT_SAMPLING_METHOD) 3695 3696 return self.expression( 3697 exp.TableSample, 3698 expressions=expressions, 3699 method=method, 3700 bucket_numerator=bucket_numerator, 3701 bucket_denominator=bucket_denominator, 3702 bucket_field=bucket_field, 3703 percent=percent, 3704 size=size, 3705 seed=seed, 3706 ) 3707 3708 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 3709 return list(iter(self._parse_pivot, None)) or None 3710 3711 def _parse_joins(self) -> t.Iterator[exp.Join]: 3712 return iter(self._parse_join, None) 3713 3714 # https://duckdb.org/docs/sql/statements/pivot 3715 def _parse_simplified_pivot(self) -> exp.Pivot: 3716 def _parse_on() -> t.Optional[exp.Expression]: 3717 this = self._parse_bitwise() 3718 return self._parse_in(this) if self._match(TokenType.IN) else this 3719 3720 this = self._parse_table() 3721 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 3722 using = self._match(TokenType.USING) and self._parse_csv( 3723 lambda: self._parse_alias(self._parse_function()) 3724 ) 3725 group = self._parse_group() 3726 return self.expression( 3727 exp.Pivot, this=this, expressions=expressions, using=using, group=group 3728 ) 3729 3730 def _parse_pivot_in(self) -> exp.In | exp.PivotAny: 3731 def _parse_aliased_expression() -> t.Optional[exp.Expression]: 3732 this = self._parse_select_or_expression() 3733 3734 self._match(TokenType.ALIAS) 3735 alias = self._parse_field() 3736 if alias: 3737 return self.expression(exp.PivotAlias, this=this, alias=alias) 3738 3739 return this 3740 3741 value = self._parse_column() 3742 3743 if not self._match_pair(TokenType.IN, TokenType.L_PAREN): 3744 self.raise_error("Expecting IN (") 3745 3746 if self._match(TokenType.ANY): 3747 expr: exp.PivotAny | exp.In = self.expression(exp.PivotAny, this=self._parse_order()) 3748 else: 3749 aliased_expressions = self._parse_csv(_parse_aliased_expression) 3750 expr = self.expression(exp.In, this=value, expressions=aliased_expressions) 3751 3752 self._match_r_paren() 3753 return expr 3754 3755 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 3756 index = self._index 3757 include_nulls = None 3758 3759 if self._match(TokenType.PIVOT): 3760 unpivot = False 3761 elif self._match(TokenType.UNPIVOT): 3762 unpivot = True 3763 3764 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 3765 if self._match_text_seq("INCLUDE", "NULLS"): 3766 include_nulls = True 3767 elif self._match_text_seq("EXCLUDE", "NULLS"): 3768 include_nulls = False 3769 else: 3770 return None 3771 3772 expressions = [] 3773 3774 if not self._match(TokenType.L_PAREN): 3775 self._retreat(index) 3776 return None 3777 3778 if unpivot: 3779 expressions = self._parse_csv(self._parse_column) 3780 else: 3781 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 3782 3783 if not expressions: 3784 self.raise_error("Failed to parse PIVOT's aggregation list") 3785 3786 if not self._match(TokenType.FOR): 3787 self.raise_error("Expecting FOR") 3788 3789 field = self._parse_pivot_in() 3790 default_on_null = self._match_text_seq("DEFAULT", "ON", "NULL") and self._parse_wrapped( 3791 self._parse_bitwise 3792 ) 3793 3794 self._match_r_paren() 3795 3796 pivot = self.expression( 3797 exp.Pivot, 3798 expressions=expressions, 3799 field=field, 3800 unpivot=unpivot, 3801 include_nulls=include_nulls, 3802 default_on_null=default_on_null, 3803 ) 3804 3805 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 3806 pivot.set("alias", self._parse_table_alias()) 3807 3808 if not unpivot: 3809 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 3810 3811 columns: t.List[exp.Expression] = [] 3812 for fld in pivot.args["field"].expressions: 3813 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 3814 for name in names: 3815 if self.PREFIXED_PIVOT_COLUMNS: 3816 name = f"{name}_{field_name}" if name else field_name 3817 else: 3818 name = f"{field_name}_{name}" if name else field_name 3819 3820 columns.append(exp.to_identifier(name)) 3821 3822 pivot.set("columns", columns) 3823 3824 return pivot 3825 3826 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 3827 return [agg.alias for agg in aggregations] 3828 3829 def _parse_prewhere(self, skip_where_token: bool = False) -> t.Optional[exp.PreWhere]: 3830 if not skip_where_token and not self._match(TokenType.PREWHERE): 3831 return None 3832 3833 return self.expression( 3834 exp.PreWhere, comments=self._prev_comments, this=self._parse_assignment() 3835 ) 3836 3837 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 3838 if not skip_where_token and not self._match(TokenType.WHERE): 3839 return None 3840 3841 return self.expression( 3842 exp.Where, comments=self._prev_comments, this=self._parse_assignment() 3843 ) 3844 3845 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 3846 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 3847 return None 3848 3849 elements: t.Dict[str, t.Any] = defaultdict(list) 3850 3851 if self._match(TokenType.ALL): 3852 elements["all"] = True 3853 elif self._match(TokenType.DISTINCT): 3854 elements["all"] = False 3855 3856 while True: 3857 expressions = self._parse_csv( 3858 lambda: None 3859 if self._match_set((TokenType.CUBE, TokenType.ROLLUP), advance=False) 3860 else self._parse_assignment() 3861 ) 3862 if expressions: 3863 elements["expressions"].extend(expressions) 3864 3865 grouping_sets = self._parse_grouping_sets() 3866 if grouping_sets: 3867 elements["grouping_sets"].extend(grouping_sets) 3868 3869 rollup = None 3870 cube = None 3871 totals = None 3872 3873 index = self._index 3874 with_ = self._match(TokenType.WITH) 3875 if self._match(TokenType.ROLLUP): 3876 rollup = with_ or self._parse_wrapped_csv(self._parse_column) 3877 elements["rollup"].extend(ensure_list(rollup)) 3878 3879 if self._match(TokenType.CUBE): 3880 cube = with_ or self._parse_wrapped_csv(self._parse_column) 3881 elements["cube"].extend(ensure_list(cube)) 3882 3883 if self._match_text_seq("TOTALS"): 3884 totals = True 3885 elements["totals"] = True # type: ignore 3886 3887 if not (grouping_sets or rollup or cube or totals): 3888 if with_: 3889 self._retreat(index) 3890 break 3891 3892 return self.expression(exp.Group, **elements) # type: ignore 3893 3894 def _parse_grouping_sets(self) -> t.Optional[t.List[exp.Expression]]: 3895 if not self._match(TokenType.GROUPING_SETS): 3896 return None 3897 3898 return self._parse_wrapped_csv(self._parse_grouping_set) 3899 3900 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 3901 if self._match(TokenType.L_PAREN): 3902 grouping_set = self._parse_csv(self._parse_column) 3903 self._match_r_paren() 3904 return self.expression(exp.Tuple, expressions=grouping_set) 3905 3906 return self._parse_column() 3907 3908 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 3909 if not skip_having_token and not self._match(TokenType.HAVING): 3910 return None 3911 return self.expression(exp.Having, this=self._parse_assignment()) 3912 3913 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 3914 if not self._match(TokenType.QUALIFY): 3915 return None 3916 return self.expression(exp.Qualify, this=self._parse_assignment()) 3917 3918 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 3919 if skip_start_token: 3920 start = None 3921 elif self._match(TokenType.START_WITH): 3922 start = self._parse_assignment() 3923 else: 3924 return None 3925 3926 self._match(TokenType.CONNECT_BY) 3927 nocycle = self._match_text_seq("NOCYCLE") 3928 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 3929 exp.Prior, this=self._parse_bitwise() 3930 ) 3931 connect = self._parse_assignment() 3932 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 3933 3934 if not start and self._match(TokenType.START_WITH): 3935 start = self._parse_assignment() 3936 3937 return self.expression(exp.Connect, start=start, connect=connect, nocycle=nocycle) 3938 3939 def _parse_name_as_expression(self) -> exp.Alias: 3940 return self.expression( 3941 exp.Alias, 3942 alias=self._parse_id_var(any_token=True), 3943 this=self._match(TokenType.ALIAS) and self._parse_assignment(), 3944 ) 3945 3946 def _parse_interpolate(self) -> t.Optional[t.List[exp.Expression]]: 3947 if self._match_text_seq("INTERPOLATE"): 3948 return self._parse_wrapped_csv(self._parse_name_as_expression) 3949 return None 3950 3951 def _parse_order( 3952 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 3953 ) -> t.Optional[exp.Expression]: 3954 siblings = None 3955 if not skip_order_token and not self._match(TokenType.ORDER_BY): 3956 if not self._match(TokenType.ORDER_SIBLINGS_BY): 3957 return this 3958 3959 siblings = True 3960 3961 return self.expression( 3962 exp.Order, 3963 this=this, 3964 expressions=self._parse_csv(self._parse_ordered), 3965 interpolate=self._parse_interpolate(), 3966 siblings=siblings, 3967 ) 3968 3969 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 3970 if not self._match(token): 3971 return None 3972 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 3973 3974 def _parse_ordered( 3975 self, parse_method: t.Optional[t.Callable] = None 3976 ) -> t.Optional[exp.Ordered]: 3977 this = parse_method() if parse_method else self._parse_assignment() 3978 if not this: 3979 return None 3980 3981 if this.name.upper() == "ALL" and self.dialect.SUPPORTS_ORDER_BY_ALL: 3982 this = exp.var("ALL") 3983 3984 asc = self._match(TokenType.ASC) 3985 desc = self._match(TokenType.DESC) or (asc and False) 3986 3987 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 3988 is_nulls_last = self._match_text_seq("NULLS", "LAST") 3989 3990 nulls_first = is_nulls_first or False 3991 explicitly_null_ordered = is_nulls_first or is_nulls_last 3992 3993 if ( 3994 not explicitly_null_ordered 3995 and ( 3996 (not desc and self.dialect.NULL_ORDERING == "nulls_are_small") 3997 or (desc and self.dialect.NULL_ORDERING != "nulls_are_small") 3998 ) 3999 and self.dialect.NULL_ORDERING != "nulls_are_last" 4000 ): 4001 nulls_first = True 4002 4003 if self._match_text_seq("WITH", "FILL"): 4004 with_fill = self.expression( 4005 exp.WithFill, 4006 **{ # type: ignore 4007 "from": self._match(TokenType.FROM) and self._parse_bitwise(), 4008 "to": self._match_text_seq("TO") and self._parse_bitwise(), 4009 "step": self._match_text_seq("STEP") and self._parse_bitwise(), 4010 }, 4011 ) 4012 else: 4013 with_fill = None 4014 4015 return self.expression( 4016 exp.Ordered, this=this, desc=desc, nulls_first=nulls_first, with_fill=with_fill 4017 ) 4018 4019 def _parse_limit( 4020 self, 4021 this: t.Optional[exp.Expression] = None, 4022 top: bool = False, 4023 skip_limit_token: bool = False, 4024 ) -> t.Optional[exp.Expression]: 4025 if skip_limit_token or self._match(TokenType.TOP if top else TokenType.LIMIT): 4026 comments = self._prev_comments 4027 if top: 4028 limit_paren = self._match(TokenType.L_PAREN) 4029 expression = self._parse_term() if limit_paren else self._parse_number() 4030 4031 if limit_paren: 4032 self._match_r_paren() 4033 else: 4034 expression = self._parse_term() 4035 4036 if self._match(TokenType.COMMA): 4037 offset = expression 4038 expression = self._parse_term() 4039 else: 4040 offset = None 4041 4042 limit_exp = self.expression( 4043 exp.Limit, 4044 this=this, 4045 expression=expression, 4046 offset=offset, 4047 comments=comments, 4048 expressions=self._parse_limit_by(), 4049 ) 4050 4051 return limit_exp 4052 4053 if self._match(TokenType.FETCH): 4054 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 4055 direction = self._prev.text.upper() if direction else "FIRST" 4056 4057 count = self._parse_field(tokens=self.FETCH_TOKENS) 4058 percent = self._match(TokenType.PERCENT) 4059 4060 self._match_set((TokenType.ROW, TokenType.ROWS)) 4061 4062 only = self._match_text_seq("ONLY") 4063 with_ties = self._match_text_seq("WITH", "TIES") 4064 4065 if only and with_ties: 4066 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 4067 4068 return self.expression( 4069 exp.Fetch, 4070 direction=direction, 4071 count=count, 4072 percent=percent, 4073 with_ties=with_ties, 4074 ) 4075 4076 return this 4077 4078 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4079 if not self._match(TokenType.OFFSET): 4080 return this 4081 4082 count = self._parse_term() 4083 self._match_set((TokenType.ROW, TokenType.ROWS)) 4084 4085 return self.expression( 4086 exp.Offset, this=this, expression=count, expressions=self._parse_limit_by() 4087 ) 4088 4089 def _parse_limit_by(self) -> t.Optional[t.List[exp.Expression]]: 4090 return self._match_text_seq("BY") and self._parse_csv(self._parse_bitwise) 4091 4092 def _parse_locks(self) -> t.List[exp.Lock]: 4093 locks = [] 4094 while True: 4095 if self._match_text_seq("FOR", "UPDATE"): 4096 update = True 4097 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 4098 "LOCK", "IN", "SHARE", "MODE" 4099 ): 4100 update = False 4101 else: 4102 break 4103 4104 expressions = None 4105 if self._match_text_seq("OF"): 4106 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 4107 4108 wait: t.Optional[bool | exp.Expression] = None 4109 if self._match_text_seq("NOWAIT"): 4110 wait = True 4111 elif self._match_text_seq("WAIT"): 4112 wait = self._parse_primary() 4113 elif self._match_text_seq("SKIP", "LOCKED"): 4114 wait = False 4115 4116 locks.append( 4117 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 4118 ) 4119 4120 return locks 4121 4122 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4123 while this and self._match_set(self.SET_OPERATIONS): 4124 token_type = self._prev.token_type 4125 4126 if token_type == TokenType.UNION: 4127 operation: t.Type[exp.SetOperation] = exp.Union 4128 elif token_type == TokenType.EXCEPT: 4129 operation = exp.Except 4130 else: 4131 operation = exp.Intersect 4132 4133 comments = self._prev.comments 4134 distinct = self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL) 4135 by_name = self._match_text_seq("BY", "NAME") 4136 expression = self._parse_select(nested=True, parse_set_operation=False) 4137 4138 this = self.expression( 4139 operation, 4140 comments=comments, 4141 this=this, 4142 distinct=distinct, 4143 by_name=by_name, 4144 expression=expression, 4145 ) 4146 4147 if isinstance(this, exp.SetOperation) and self.MODIFIERS_ATTACHED_TO_SET_OP: 4148 expression = this.expression 4149 4150 if expression: 4151 for arg in self.SET_OP_MODIFIERS: 4152 expr = expression.args.get(arg) 4153 if expr: 4154 this.set(arg, expr.pop()) 4155 4156 return this 4157 4158 def _parse_expression(self) -> t.Optional[exp.Expression]: 4159 return self._parse_alias(self._parse_assignment()) 4160 4161 def _parse_assignment(self) -> t.Optional[exp.Expression]: 4162 this = self._parse_disjunction() 4163 if not this and self._next and self._next.token_type in self.ASSIGNMENT: 4164 # This allows us to parse <non-identifier token> := <expr> 4165 this = exp.column( 4166 t.cast(str, self._advance_any(ignore_reserved=True) and self._prev.text) 4167 ) 4168 4169 while self._match_set(self.ASSIGNMENT): 4170 this = self.expression( 4171 self.ASSIGNMENT[self._prev.token_type], 4172 this=this, 4173 comments=self._prev_comments, 4174 expression=self._parse_assignment(), 4175 ) 4176 4177 return this 4178 4179 def _parse_disjunction(self) -> t.Optional[exp.Expression]: 4180 return self._parse_tokens(self._parse_conjunction, self.DISJUNCTION) 4181 4182 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 4183 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 4184 4185 def _parse_equality(self) -> t.Optional[exp.Expression]: 4186 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 4187 4188 def _parse_comparison(self) -> t.Optional[exp.Expression]: 4189 return self._parse_tokens(self._parse_range, self.COMPARISON) 4190 4191 def _parse_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4192 this = this or self._parse_bitwise() 4193 negate = self._match(TokenType.NOT) 4194 4195 if self._match_set(self.RANGE_PARSERS): 4196 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 4197 if not expression: 4198 return this 4199 4200 this = expression 4201 elif self._match(TokenType.ISNULL): 4202 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4203 4204 # Postgres supports ISNULL and NOTNULL for conditions. 4205 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 4206 if self._match(TokenType.NOTNULL): 4207 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4208 this = self.expression(exp.Not, this=this) 4209 4210 if negate: 4211 this = self.expression(exp.Not, this=this) 4212 4213 if self._match(TokenType.IS): 4214 this = self._parse_is(this) 4215 4216 return this 4217 4218 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4219 index = self._index - 1 4220 negate = self._match(TokenType.NOT) 4221 4222 if self._match_text_seq("DISTINCT", "FROM"): 4223 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 4224 return self.expression(klass, this=this, expression=self._parse_bitwise()) 4225 4226 expression = self._parse_null() or self._parse_boolean() 4227 if not expression: 4228 self._retreat(index) 4229 return None 4230 4231 this = self.expression(exp.Is, this=this, expression=expression) 4232 return self.expression(exp.Not, this=this) if negate else this 4233 4234 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 4235 unnest = self._parse_unnest(with_alias=False) 4236 if unnest: 4237 this = self.expression(exp.In, this=this, unnest=unnest) 4238 elif self._match_set((TokenType.L_PAREN, TokenType.L_BRACKET)): 4239 matched_l_paren = self._prev.token_type == TokenType.L_PAREN 4240 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 4241 4242 if len(expressions) == 1 and isinstance(expressions[0], exp.Query): 4243 this = self.expression(exp.In, this=this, query=expressions[0].subquery(copy=False)) 4244 else: 4245 this = self.expression(exp.In, this=this, expressions=expressions) 4246 4247 if matched_l_paren: 4248 self._match_r_paren(this) 4249 elif not self._match(TokenType.R_BRACKET, expression=this): 4250 self.raise_error("Expecting ]") 4251 else: 4252 this = self.expression(exp.In, this=this, field=self._parse_field()) 4253 4254 return this 4255 4256 def _parse_between(self, this: t.Optional[exp.Expression]) -> exp.Between: 4257 low = self._parse_bitwise() 4258 self._match(TokenType.AND) 4259 high = self._parse_bitwise() 4260 return self.expression(exp.Between, this=this, low=low, high=high) 4261 4262 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4263 if not self._match(TokenType.ESCAPE): 4264 return this 4265 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 4266 4267 def _parse_interval(self, match_interval: bool = True) -> t.Optional[exp.Add | exp.Interval]: 4268 index = self._index 4269 4270 if not self._match(TokenType.INTERVAL) and match_interval: 4271 return None 4272 4273 if self._match(TokenType.STRING, advance=False): 4274 this = self._parse_primary() 4275 else: 4276 this = self._parse_term() 4277 4278 if not this or ( 4279 isinstance(this, exp.Column) 4280 and not this.table 4281 and not this.this.quoted 4282 and this.name.upper() == "IS" 4283 ): 4284 self._retreat(index) 4285 return None 4286 4287 unit = self._parse_function() or ( 4288 not self._match(TokenType.ALIAS, advance=False) 4289 and self._parse_var(any_token=True, upper=True) 4290 ) 4291 4292 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 4293 # each INTERVAL expression into this canonical form so it's easy to transpile 4294 if this and this.is_number: 4295 this = exp.Literal.string(this.to_py()) 4296 elif this and this.is_string: 4297 parts = exp.INTERVAL_STRING_RE.findall(this.name) 4298 if len(parts) == 1: 4299 if unit: 4300 # Unconsume the eagerly-parsed unit, since the real unit was part of the string 4301 self._retreat(self._index - 1) 4302 4303 this = exp.Literal.string(parts[0][0]) 4304 unit = self.expression(exp.Var, this=parts[0][1].upper()) 4305 4306 if self.INTERVAL_SPANS and self._match_text_seq("TO"): 4307 unit = self.expression( 4308 exp.IntervalSpan, this=unit, expression=self._parse_var(any_token=True, upper=True) 4309 ) 4310 4311 interval = self.expression(exp.Interval, this=this, unit=unit) 4312 4313 index = self._index 4314 self._match(TokenType.PLUS) 4315 4316 # Convert INTERVAL 'val_1' unit_1 [+] ... [+] 'val_n' unit_n into a sum of intervals 4317 if self._match_set((TokenType.STRING, TokenType.NUMBER), advance=False): 4318 return self.expression( 4319 exp.Add, this=interval, expression=self._parse_interval(match_interval=False) 4320 ) 4321 4322 self._retreat(index) 4323 return interval 4324 4325 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 4326 this = self._parse_term() 4327 4328 while True: 4329 if self._match_set(self.BITWISE): 4330 this = self.expression( 4331 self.BITWISE[self._prev.token_type], 4332 this=this, 4333 expression=self._parse_term(), 4334 ) 4335 elif self.dialect.DPIPE_IS_STRING_CONCAT and self._match(TokenType.DPIPE): 4336 this = self.expression( 4337 exp.DPipe, 4338 this=this, 4339 expression=self._parse_term(), 4340 safe=not self.dialect.STRICT_STRING_CONCAT, 4341 ) 4342 elif self._match(TokenType.DQMARK): 4343 this = self.expression(exp.Coalesce, this=this, expressions=self._parse_term()) 4344 elif self._match_pair(TokenType.LT, TokenType.LT): 4345 this = self.expression( 4346 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 4347 ) 4348 elif self._match_pair(TokenType.GT, TokenType.GT): 4349 this = self.expression( 4350 exp.BitwiseRightShift, this=this, expression=self._parse_term() 4351 ) 4352 else: 4353 break 4354 4355 return this 4356 4357 def _parse_term(self) -> t.Optional[exp.Expression]: 4358 return self._parse_tokens(self._parse_factor, self.TERM) 4359 4360 def _parse_factor(self) -> t.Optional[exp.Expression]: 4361 parse_method = self._parse_exponent if self.EXPONENT else self._parse_unary 4362 this = parse_method() 4363 4364 while self._match_set(self.FACTOR): 4365 klass = self.FACTOR[self._prev.token_type] 4366 comments = self._prev_comments 4367 expression = parse_method() 4368 4369 if not expression and klass is exp.IntDiv and self._prev.text.isalpha(): 4370 self._retreat(self._index - 1) 4371 return this 4372 4373 this = self.expression(klass, this=this, comments=comments, expression=expression) 4374 4375 if isinstance(this, exp.Div): 4376 this.args["typed"] = self.dialect.TYPED_DIVISION 4377 this.args["safe"] = self.dialect.SAFE_DIVISION 4378 4379 return this 4380 4381 def _parse_exponent(self) -> t.Optional[exp.Expression]: 4382 return self._parse_tokens(self._parse_unary, self.EXPONENT) 4383 4384 def _parse_unary(self) -> t.Optional[exp.Expression]: 4385 if self._match_set(self.UNARY_PARSERS): 4386 return self.UNARY_PARSERS[self._prev.token_type](self) 4387 return self._parse_at_time_zone(self._parse_type()) 4388 4389 def _parse_type( 4390 self, parse_interval: bool = True, fallback_to_identifier: bool = False 4391 ) -> t.Optional[exp.Expression]: 4392 interval = parse_interval and self._parse_interval() 4393 if interval: 4394 return interval 4395 4396 index = self._index 4397 data_type = self._parse_types(check_func=True, allow_identifiers=False) 4398 4399 # parse_types() returns a Cast if we parsed BQ's inline constructor <type>(<values>) e.g. 4400 # STRUCT<a INT, b STRING>(1, 'foo'), which is canonicalized to CAST(<values> AS <type>) 4401 if isinstance(data_type, exp.Cast): 4402 # This constructor can contain ops directly after it, for instance struct unnesting: 4403 # STRUCT<a INT, b STRING>(1, 'foo').* --> CAST(STRUCT(1, 'foo') AS STRUCT<a iNT, b STRING).* 4404 return self._parse_column_ops(data_type) 4405 4406 if data_type: 4407 index2 = self._index 4408 this = self._parse_primary() 4409 4410 if isinstance(this, exp.Literal): 4411 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 4412 if parser: 4413 return parser(self, this, data_type) 4414 4415 return self.expression(exp.Cast, this=this, to=data_type) 4416 4417 # The expressions arg gets set by the parser when we have something like DECIMAL(38, 0) 4418 # in the input SQL. In that case, we'll produce these tokens: DECIMAL ( 38 , 0 ) 4419 # 4420 # If the index difference here is greater than 1, that means the parser itself must have 4421 # consumed additional tokens such as the DECIMAL scale and precision in the above example. 4422 # 4423 # If it's not greater than 1, then it must be 1, because we've consumed at least the type 4424 # keyword, meaning that the expressions arg of the DataType must have gotten set by a 4425 # callable in the TYPE_CONVERTERS mapping. For example, Snowflake converts DECIMAL to 4426 # DECIMAL(38, 0)) in order to facilitate the data type's transpilation. 4427 # 4428 # In these cases, we don't really want to return the converted type, but instead retreat 4429 # and try to parse a Column or Identifier in the section below. 4430 if data_type.expressions and index2 - index > 1: 4431 self._retreat(index2) 4432 return self._parse_column_ops(data_type) 4433 4434 self._retreat(index) 4435 4436 if fallback_to_identifier: 4437 return self._parse_id_var() 4438 4439 this = self._parse_column() 4440 return this and self._parse_column_ops(this) 4441 4442 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 4443 this = self._parse_type() 4444 if not this: 4445 return None 4446 4447 if isinstance(this, exp.Column) and not this.table: 4448 this = exp.var(this.name.upper()) 4449 4450 return self.expression( 4451 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 4452 ) 4453 4454 def _parse_types( 4455 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 4456 ) -> t.Optional[exp.Expression]: 4457 index = self._index 4458 4459 this: t.Optional[exp.Expression] = None 4460 prefix = self._match_text_seq("SYSUDTLIB", ".") 4461 4462 if not self._match_set(self.TYPE_TOKENS): 4463 identifier = allow_identifiers and self._parse_id_var( 4464 any_token=False, tokens=(TokenType.VAR,) 4465 ) 4466 if isinstance(identifier, exp.Identifier): 4467 tokens = self.dialect.tokenize(identifier.sql(dialect=self.dialect)) 4468 4469 if len(tokens) != 1: 4470 self.raise_error("Unexpected identifier", self._prev) 4471 4472 if tokens[0].token_type in self.TYPE_TOKENS: 4473 self._prev = tokens[0] 4474 elif self.dialect.SUPPORTS_USER_DEFINED_TYPES: 4475 type_name = identifier.name 4476 4477 while self._match(TokenType.DOT): 4478 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 4479 4480 this = exp.DataType.build(type_name, udt=True) 4481 else: 4482 self._retreat(self._index - 1) 4483 return None 4484 else: 4485 return None 4486 4487 type_token = self._prev.token_type 4488 4489 if type_token == TokenType.PSEUDO_TYPE: 4490 return self.expression(exp.PseudoType, this=self._prev.text.upper()) 4491 4492 if type_token == TokenType.OBJECT_IDENTIFIER: 4493 return self.expression(exp.ObjectIdentifier, this=self._prev.text.upper()) 4494 4495 # https://materialize.com/docs/sql/types/map/ 4496 if type_token == TokenType.MAP and self._match(TokenType.L_BRACKET): 4497 key_type = self._parse_types( 4498 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4499 ) 4500 if not self._match(TokenType.FARROW): 4501 self._retreat(index) 4502 return None 4503 4504 value_type = self._parse_types( 4505 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4506 ) 4507 if not self._match(TokenType.R_BRACKET): 4508 self._retreat(index) 4509 return None 4510 4511 return exp.DataType( 4512 this=exp.DataType.Type.MAP, 4513 expressions=[key_type, value_type], 4514 nested=True, 4515 prefix=prefix, 4516 ) 4517 4518 nested = type_token in self.NESTED_TYPE_TOKENS 4519 is_struct = type_token in self.STRUCT_TYPE_TOKENS 4520 is_aggregate = type_token in self.AGGREGATE_TYPE_TOKENS 4521 expressions = None 4522 maybe_func = False 4523 4524 if self._match(TokenType.L_PAREN): 4525 if is_struct: 4526 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 4527 elif nested: 4528 expressions = self._parse_csv( 4529 lambda: self._parse_types( 4530 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4531 ) 4532 ) 4533 elif type_token in self.ENUM_TYPE_TOKENS: 4534 expressions = self._parse_csv(self._parse_equality) 4535 elif is_aggregate: 4536 func_or_ident = self._parse_function(anonymous=True) or self._parse_id_var( 4537 any_token=False, tokens=(TokenType.VAR,) 4538 ) 4539 if not func_or_ident or not self._match(TokenType.COMMA): 4540 return None 4541 expressions = self._parse_csv( 4542 lambda: self._parse_types( 4543 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4544 ) 4545 ) 4546 expressions.insert(0, func_or_ident) 4547 else: 4548 expressions = self._parse_csv(self._parse_type_size) 4549 4550 # https://docs.snowflake.com/en/sql-reference/data-types-vector 4551 if type_token == TokenType.VECTOR and len(expressions) == 2: 4552 expressions[0] = exp.DataType.build(expressions[0].name, dialect=self.dialect) 4553 4554 if not expressions or not self._match(TokenType.R_PAREN): 4555 self._retreat(index) 4556 return None 4557 4558 maybe_func = True 4559 4560 values: t.Optional[t.List[exp.Expression]] = None 4561 4562 if nested and self._match(TokenType.LT): 4563 if is_struct: 4564 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 4565 else: 4566 expressions = self._parse_csv( 4567 lambda: self._parse_types( 4568 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4569 ) 4570 ) 4571 4572 if not self._match(TokenType.GT): 4573 self.raise_error("Expecting >") 4574 4575 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 4576 values = self._parse_csv(self._parse_assignment) 4577 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 4578 4579 if type_token in self.TIMESTAMPS: 4580 if self._match_text_seq("WITH", "TIME", "ZONE"): 4581 maybe_func = False 4582 tz_type = ( 4583 exp.DataType.Type.TIMETZ 4584 if type_token in self.TIMES 4585 else exp.DataType.Type.TIMESTAMPTZ 4586 ) 4587 this = exp.DataType(this=tz_type, expressions=expressions) 4588 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 4589 maybe_func = False 4590 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 4591 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 4592 maybe_func = False 4593 elif type_token == TokenType.INTERVAL: 4594 unit = self._parse_var(upper=True) 4595 if unit: 4596 if self._match_text_seq("TO"): 4597 unit = exp.IntervalSpan(this=unit, expression=self._parse_var(upper=True)) 4598 4599 this = self.expression(exp.DataType, this=self.expression(exp.Interval, unit=unit)) 4600 else: 4601 this = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL) 4602 4603 if maybe_func and check_func: 4604 index2 = self._index 4605 peek = self._parse_string() 4606 4607 if not peek: 4608 self._retreat(index) 4609 return None 4610 4611 self._retreat(index2) 4612 4613 if not this: 4614 if self._match_text_seq("UNSIGNED"): 4615 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 4616 if not unsigned_type_token: 4617 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 4618 4619 type_token = unsigned_type_token or type_token 4620 4621 this = exp.DataType( 4622 this=exp.DataType.Type[type_token.value], 4623 expressions=expressions, 4624 nested=nested, 4625 prefix=prefix, 4626 ) 4627 4628 # Empty arrays/structs are allowed 4629 if values is not None: 4630 cls = exp.Struct if is_struct else exp.Array 4631 this = exp.cast(cls(expressions=values), this, copy=False) 4632 4633 elif expressions: 4634 this.set("expressions", expressions) 4635 4636 # https://materialize.com/docs/sql/types/list/#type-name 4637 while self._match(TokenType.LIST): 4638 this = exp.DataType(this=exp.DataType.Type.LIST, expressions=[this], nested=True) 4639 4640 index = self._index 4641 4642 # Postgres supports the INT ARRAY[3] syntax as a synonym for INT[3] 4643 matched_array = self._match(TokenType.ARRAY) 4644 4645 while self._curr: 4646 datatype_token = self._prev.token_type 4647 matched_l_bracket = self._match(TokenType.L_BRACKET) 4648 if not matched_l_bracket and not matched_array: 4649 break 4650 4651 matched_array = False 4652 values = self._parse_csv(self._parse_assignment) or None 4653 if ( 4654 values 4655 and not schema 4656 and ( 4657 not self.dialect.SUPPORTS_FIXED_SIZE_ARRAYS or datatype_token == TokenType.ARRAY 4658 ) 4659 ): 4660 # Retreating here means that we should not parse the following values as part of the data type, e.g. in DuckDB 4661 # ARRAY[1] should retreat and instead be parsed into exp.Array in contrast to INT[x][y] which denotes a fixed-size array data type 4662 self._retreat(index) 4663 break 4664 4665 this = exp.DataType( 4666 this=exp.DataType.Type.ARRAY, expressions=[this], values=values, nested=True 4667 ) 4668 self._match(TokenType.R_BRACKET) 4669 4670 if self.TYPE_CONVERTERS and isinstance(this.this, exp.DataType.Type): 4671 converter = self.TYPE_CONVERTERS.get(this.this) 4672 if converter: 4673 this = converter(t.cast(exp.DataType, this)) 4674 4675 return this 4676 4677 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 4678 index = self._index 4679 4680 if ( 4681 self._curr 4682 and self._next 4683 and self._curr.token_type in self.TYPE_TOKENS 4684 and self._next.token_type in self.TYPE_TOKENS 4685 ): 4686 # Takes care of special cases like `STRUCT<list ARRAY<...>>` where the identifier is also a 4687 # type token. Without this, the list will be parsed as a type and we'll eventually crash 4688 this = self._parse_id_var() 4689 else: 4690 this = ( 4691 self._parse_type(parse_interval=False, fallback_to_identifier=True) 4692 or self._parse_id_var() 4693 ) 4694 4695 self._match(TokenType.COLON) 4696 4697 if ( 4698 type_required 4699 and not isinstance(this, exp.DataType) 4700 and not self._match_set(self.TYPE_TOKENS, advance=False) 4701 ): 4702 self._retreat(index) 4703 return self._parse_types() 4704 4705 return self._parse_column_def(this) 4706 4707 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4708 if not self._match_text_seq("AT", "TIME", "ZONE"): 4709 return this 4710 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 4711 4712 def _parse_column(self) -> t.Optional[exp.Expression]: 4713 this = self._parse_column_reference() 4714 column = self._parse_column_ops(this) if this else self._parse_bracket(this) 4715 4716 if self.dialect.SUPPORTS_COLUMN_JOIN_MARKS and column: 4717 column.set("join_mark", self._match(TokenType.JOIN_MARKER)) 4718 4719 return column 4720 4721 def _parse_column_reference(self) -> t.Optional[exp.Expression]: 4722 this = self._parse_field() 4723 if ( 4724 not this 4725 and self._match(TokenType.VALUES, advance=False) 4726 and self.VALUES_FOLLOWED_BY_PAREN 4727 and (not self._next or self._next.token_type != TokenType.L_PAREN) 4728 ): 4729 this = self._parse_id_var() 4730 4731 if isinstance(this, exp.Identifier): 4732 # We bubble up comments from the Identifier to the Column 4733 this = self.expression(exp.Column, comments=this.pop_comments(), this=this) 4734 4735 return this 4736 4737 def _parse_colon_as_variant_extract( 4738 self, this: t.Optional[exp.Expression] 4739 ) -> t.Optional[exp.Expression]: 4740 casts = [] 4741 json_path = [] 4742 4743 while self._match(TokenType.COLON): 4744 start_index = self._index 4745 4746 # Snowflake allows reserved keywords as json keys but advance_any() excludes TokenType.SELECT from any_tokens=True 4747 path = self._parse_column_ops( 4748 self._parse_field(any_token=True, tokens=(TokenType.SELECT,)) 4749 ) 4750 4751 # The cast :: operator has a lower precedence than the extraction operator :, so 4752 # we rearrange the AST appropriately to avoid casting the JSON path 4753 while isinstance(path, exp.Cast): 4754 casts.append(path.to) 4755 path = path.this 4756 4757 if casts: 4758 dcolon_offset = next( 4759 i 4760 for i, t in enumerate(self._tokens[start_index:]) 4761 if t.token_type == TokenType.DCOLON 4762 ) 4763 end_token = self._tokens[start_index + dcolon_offset - 1] 4764 else: 4765 end_token = self._prev 4766 4767 if path: 4768 json_path.append(self._find_sql(self._tokens[start_index], end_token)) 4769 4770 # The VARIANT extract in Snowflake/Databricks is parsed as a JSONExtract; Snowflake uses the json_path in GET_PATH() while 4771 # Databricks transforms it back to the colon/dot notation 4772 if json_path: 4773 this = self.expression( 4774 exp.JSONExtract, 4775 this=this, 4776 expression=self.dialect.to_json_path(exp.Literal.string(".".join(json_path))), 4777 variant_extract=True, 4778 ) 4779 4780 while casts: 4781 this = self.expression(exp.Cast, this=this, to=casts.pop()) 4782 4783 return this 4784 4785 def _parse_dcolon(self) -> t.Optional[exp.Expression]: 4786 return self._parse_types() 4787 4788 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4789 this = self._parse_bracket(this) 4790 4791 while self._match_set(self.COLUMN_OPERATORS): 4792 op_token = self._prev.token_type 4793 op = self.COLUMN_OPERATORS.get(op_token) 4794 4795 if op_token == TokenType.DCOLON: 4796 field = self._parse_dcolon() 4797 if not field: 4798 self.raise_error("Expected type") 4799 elif op and self._curr: 4800 field = self._parse_column_reference() 4801 else: 4802 field = self._parse_field(any_token=True, anonymous_func=True) 4803 4804 if isinstance(field, exp.Func) and this: 4805 # bigquery allows function calls like x.y.count(...) 4806 # SAFE.SUBSTR(...) 4807 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 4808 this = exp.replace_tree( 4809 this, 4810 lambda n: ( 4811 self.expression(exp.Dot, this=n.args.get("table"), expression=n.this) 4812 if n.table 4813 else n.this 4814 ) 4815 if isinstance(n, exp.Column) 4816 else n, 4817 ) 4818 4819 if op: 4820 this = op(self, this, field) 4821 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 4822 this = self.expression( 4823 exp.Column, 4824 this=field, 4825 table=this.this, 4826 db=this.args.get("table"), 4827 catalog=this.args.get("db"), 4828 ) 4829 else: 4830 this = self.expression(exp.Dot, this=this, expression=field) 4831 4832 this = self._parse_bracket(this) 4833 4834 return self._parse_colon_as_variant_extract(this) if self.COLON_IS_VARIANT_EXTRACT else this 4835 4836 def _parse_primary(self) -> t.Optional[exp.Expression]: 4837 if self._match_set(self.PRIMARY_PARSERS): 4838 token_type = self._prev.token_type 4839 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 4840 4841 if token_type == TokenType.STRING: 4842 expressions = [primary] 4843 while self._match(TokenType.STRING): 4844 expressions.append(exp.Literal.string(self._prev.text)) 4845 4846 if len(expressions) > 1: 4847 return self.expression(exp.Concat, expressions=expressions) 4848 4849 return primary 4850 4851 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 4852 return exp.Literal.number(f"0.{self._prev.text}") 4853 4854 if self._match(TokenType.L_PAREN): 4855 comments = self._prev_comments 4856 query = self._parse_select() 4857 4858 if query: 4859 expressions = [query] 4860 else: 4861 expressions = self._parse_expressions() 4862 4863 this = self._parse_query_modifiers(seq_get(expressions, 0)) 4864 4865 if not this and self._match(TokenType.R_PAREN, advance=False): 4866 this = self.expression(exp.Tuple) 4867 elif isinstance(this, exp.UNWRAPPED_QUERIES): 4868 this = self._parse_subquery(this=this, parse_alias=False) 4869 elif isinstance(this, exp.Subquery): 4870 this = self._parse_subquery( 4871 this=self._parse_set_operations(this), parse_alias=False 4872 ) 4873 elif len(expressions) > 1 or self._prev.token_type == TokenType.COMMA: 4874 this = self.expression(exp.Tuple, expressions=expressions) 4875 else: 4876 this = self.expression(exp.Paren, this=this) 4877 4878 if this: 4879 this.add_comments(comments) 4880 4881 self._match_r_paren(expression=this) 4882 return this 4883 4884 return None 4885 4886 def _parse_field( 4887 self, 4888 any_token: bool = False, 4889 tokens: t.Optional[t.Collection[TokenType]] = None, 4890 anonymous_func: bool = False, 4891 ) -> t.Optional[exp.Expression]: 4892 if anonymous_func: 4893 field = ( 4894 self._parse_function(anonymous=anonymous_func, any_token=any_token) 4895 or self._parse_primary() 4896 ) 4897 else: 4898 field = self._parse_primary() or self._parse_function( 4899 anonymous=anonymous_func, any_token=any_token 4900 ) 4901 return field or self._parse_id_var(any_token=any_token, tokens=tokens) 4902 4903 def _parse_function( 4904 self, 4905 functions: t.Optional[t.Dict[str, t.Callable]] = None, 4906 anonymous: bool = False, 4907 optional_parens: bool = True, 4908 any_token: bool = False, 4909 ) -> t.Optional[exp.Expression]: 4910 # This allows us to also parse {fn <function>} syntax (Snowflake, MySQL support this) 4911 # See: https://community.snowflake.com/s/article/SQL-Escape-Sequences 4912 fn_syntax = False 4913 if ( 4914 self._match(TokenType.L_BRACE, advance=False) 4915 and self._next 4916 and self._next.text.upper() == "FN" 4917 ): 4918 self._advance(2) 4919 fn_syntax = True 4920 4921 func = self._parse_function_call( 4922 functions=functions, 4923 anonymous=anonymous, 4924 optional_parens=optional_parens, 4925 any_token=any_token, 4926 ) 4927 4928 if fn_syntax: 4929 self._match(TokenType.R_BRACE) 4930 4931 return func 4932 4933 def _parse_function_call( 4934 self, 4935 functions: t.Optional[t.Dict[str, t.Callable]] = None, 4936 anonymous: bool = False, 4937 optional_parens: bool = True, 4938 any_token: bool = False, 4939 ) -> t.Optional[exp.Expression]: 4940 if not self._curr: 4941 return None 4942 4943 comments = self._curr.comments 4944 token_type = self._curr.token_type 4945 this = self._curr.text 4946 upper = this.upper() 4947 4948 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 4949 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 4950 self._advance() 4951 return self._parse_window(parser(self)) 4952 4953 if not self._next or self._next.token_type != TokenType.L_PAREN: 4954 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 4955 self._advance() 4956 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 4957 4958 return None 4959 4960 if any_token: 4961 if token_type in self.RESERVED_TOKENS: 4962 return None 4963 elif token_type not in self.FUNC_TOKENS: 4964 return None 4965 4966 self._advance(2) 4967 4968 parser = self.FUNCTION_PARSERS.get(upper) 4969 if parser and not anonymous: 4970 this = parser(self) 4971 else: 4972 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 4973 4974 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 4975 this = self.expression(subquery_predicate, this=self._parse_select()) 4976 self._match_r_paren() 4977 return this 4978 4979 if functions is None: 4980 functions = self.FUNCTIONS 4981 4982 function = functions.get(upper) 4983 4984 alias = upper in self.FUNCTIONS_WITH_ALIASED_ARGS 4985 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 4986 4987 if alias: 4988 args = self._kv_to_prop_eq(args) 4989 4990 if function and not anonymous: 4991 if "dialect" in function.__code__.co_varnames: 4992 func = function(args, dialect=self.dialect) 4993 else: 4994 func = function(args) 4995 4996 func = self.validate_expression(func, args) 4997 if not self.dialect.NORMALIZE_FUNCTIONS: 4998 func.meta["name"] = this 4999 5000 this = func 5001 else: 5002 if token_type == TokenType.IDENTIFIER: 5003 this = exp.Identifier(this=this, quoted=True) 5004 this = self.expression(exp.Anonymous, this=this, expressions=args) 5005 5006 if isinstance(this, exp.Expression): 5007 this.add_comments(comments) 5008 5009 self._match_r_paren(this) 5010 return self._parse_window(this) 5011 5012 def _kv_to_prop_eq(self, expressions: t.List[exp.Expression]) -> t.List[exp.Expression]: 5013 transformed = [] 5014 5015 for e in expressions: 5016 if isinstance(e, self.KEY_VALUE_DEFINITIONS): 5017 if isinstance(e, exp.Alias): 5018 e = self.expression(exp.PropertyEQ, this=e.args.get("alias"), expression=e.this) 5019 5020 if not isinstance(e, exp.PropertyEQ): 5021 e = self.expression( 5022 exp.PropertyEQ, this=exp.to_identifier(e.this.name), expression=e.expression 5023 ) 5024 5025 if isinstance(e.this, exp.Column): 5026 e.this.replace(e.this.this) 5027 5028 transformed.append(e) 5029 5030 return transformed 5031 5032 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 5033 return self._parse_column_def(self._parse_id_var()) 5034 5035 def _parse_user_defined_function( 5036 self, kind: t.Optional[TokenType] = None 5037 ) -> t.Optional[exp.Expression]: 5038 this = self._parse_id_var() 5039 5040 while self._match(TokenType.DOT): 5041 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 5042 5043 if not self._match(TokenType.L_PAREN): 5044 return this 5045 5046 expressions = self._parse_csv(self._parse_function_parameter) 5047 self._match_r_paren() 5048 return self.expression( 5049 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 5050 ) 5051 5052 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 5053 literal = self._parse_primary() 5054 if literal: 5055 return self.expression(exp.Introducer, this=token.text, expression=literal) 5056 5057 return self.expression(exp.Identifier, this=token.text) 5058 5059 def _parse_session_parameter(self) -> exp.SessionParameter: 5060 kind = None 5061 this = self._parse_id_var() or self._parse_primary() 5062 5063 if this and self._match(TokenType.DOT): 5064 kind = this.name 5065 this = self._parse_var() or self._parse_primary() 5066 5067 return self.expression(exp.SessionParameter, this=this, kind=kind) 5068 5069 def _parse_lambda_arg(self) -> t.Optional[exp.Expression]: 5070 return self._parse_id_var() 5071 5072 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 5073 index = self._index 5074 5075 if self._match(TokenType.L_PAREN): 5076 expressions = t.cast( 5077 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_lambda_arg) 5078 ) 5079 5080 if not self._match(TokenType.R_PAREN): 5081 self._retreat(index) 5082 else: 5083 expressions = [self._parse_lambda_arg()] 5084 5085 if self._match_set(self.LAMBDAS): 5086 return self.LAMBDAS[self._prev.token_type](self, expressions) 5087 5088 self._retreat(index) 5089 5090 this: t.Optional[exp.Expression] 5091 5092 if self._match(TokenType.DISTINCT): 5093 this = self.expression( 5094 exp.Distinct, expressions=self._parse_csv(self._parse_assignment) 5095 ) 5096 else: 5097 this = self._parse_select_or_expression(alias=alias) 5098 5099 return self._parse_limit( 5100 self._parse_order(self._parse_having_max(self._parse_respect_or_ignore_nulls(this))) 5101 ) 5102 5103 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 5104 index = self._index 5105 if not self._match(TokenType.L_PAREN): 5106 return this 5107 5108 # Disambiguate between schema and subquery/CTE, e.g. in INSERT INTO table (<expr>), 5109 # expr can be of both types 5110 if self._match_set(self.SELECT_START_TOKENS): 5111 self._retreat(index) 5112 return this 5113 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 5114 self._match_r_paren() 5115 return self.expression(exp.Schema, this=this, expressions=args) 5116 5117 def _parse_field_def(self) -> t.Optional[exp.Expression]: 5118 return self._parse_column_def(self._parse_field(any_token=True)) 5119 5120 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5121 # column defs are not really columns, they're identifiers 5122 if isinstance(this, exp.Column): 5123 this = this.this 5124 5125 kind = self._parse_types(schema=True) 5126 5127 if self._match_text_seq("FOR", "ORDINALITY"): 5128 return self.expression(exp.ColumnDef, this=this, ordinality=True) 5129 5130 constraints: t.List[exp.Expression] = [] 5131 5132 if (not kind and self._match(TokenType.ALIAS)) or self._match_texts( 5133 ("ALIAS", "MATERIALIZED") 5134 ): 5135 persisted = self._prev.text.upper() == "MATERIALIZED" 5136 constraints.append( 5137 self.expression( 5138 exp.ComputedColumnConstraint, 5139 this=self._parse_assignment(), 5140 persisted=persisted or self._match_text_seq("PERSISTED"), 5141 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 5142 ) 5143 ) 5144 elif kind and self._match_pair(TokenType.ALIAS, TokenType.L_PAREN, advance=False): 5145 self._match(TokenType.ALIAS) 5146 constraints.append( 5147 self.expression(exp.TransformColumnConstraint, this=self._parse_field()) 5148 ) 5149 5150 while True: 5151 constraint = self._parse_column_constraint() 5152 if not constraint: 5153 break 5154 constraints.append(constraint) 5155 5156 if not kind and not constraints: 5157 return this 5158 5159 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 5160 5161 def _parse_auto_increment( 5162 self, 5163 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 5164 start = None 5165 increment = None 5166 5167 if self._match(TokenType.L_PAREN, advance=False): 5168 args = self._parse_wrapped_csv(self._parse_bitwise) 5169 start = seq_get(args, 0) 5170 increment = seq_get(args, 1) 5171 elif self._match_text_seq("START"): 5172 start = self._parse_bitwise() 5173 self._match_text_seq("INCREMENT") 5174 increment = self._parse_bitwise() 5175 5176 if start and increment: 5177 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 5178 5179 return exp.AutoIncrementColumnConstraint() 5180 5181 def _parse_auto_property(self) -> t.Optional[exp.AutoRefreshProperty]: 5182 if not self._match_text_seq("REFRESH"): 5183 self._retreat(self._index - 1) 5184 return None 5185 return self.expression(exp.AutoRefreshProperty, this=self._parse_var(upper=True)) 5186 5187 def _parse_compress(self) -> exp.CompressColumnConstraint: 5188 if self._match(TokenType.L_PAREN, advance=False): 5189 return self.expression( 5190 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 5191 ) 5192 5193 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 5194 5195 def _parse_generated_as_identity( 5196 self, 5197 ) -> ( 5198 exp.GeneratedAsIdentityColumnConstraint 5199 | exp.ComputedColumnConstraint 5200 | exp.GeneratedAsRowColumnConstraint 5201 ): 5202 if self._match_text_seq("BY", "DEFAULT"): 5203 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 5204 this = self.expression( 5205 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 5206 ) 5207 else: 5208 self._match_text_seq("ALWAYS") 5209 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 5210 5211 self._match(TokenType.ALIAS) 5212 5213 if self._match_text_seq("ROW"): 5214 start = self._match_text_seq("START") 5215 if not start: 5216 self._match(TokenType.END) 5217 hidden = self._match_text_seq("HIDDEN") 5218 return self.expression(exp.GeneratedAsRowColumnConstraint, start=start, hidden=hidden) 5219 5220 identity = self._match_text_seq("IDENTITY") 5221 5222 if self._match(TokenType.L_PAREN): 5223 if self._match(TokenType.START_WITH): 5224 this.set("start", self._parse_bitwise()) 5225 if self._match_text_seq("INCREMENT", "BY"): 5226 this.set("increment", self._parse_bitwise()) 5227 if self._match_text_seq("MINVALUE"): 5228 this.set("minvalue", self._parse_bitwise()) 5229 if self._match_text_seq("MAXVALUE"): 5230 this.set("maxvalue", self._parse_bitwise()) 5231 5232 if self._match_text_seq("CYCLE"): 5233 this.set("cycle", True) 5234 elif self._match_text_seq("NO", "CYCLE"): 5235 this.set("cycle", False) 5236 5237 if not identity: 5238 this.set("expression", self._parse_range()) 5239 elif not this.args.get("start") and self._match(TokenType.NUMBER, advance=False): 5240 args = self._parse_csv(self._parse_bitwise) 5241 this.set("start", seq_get(args, 0)) 5242 this.set("increment", seq_get(args, 1)) 5243 5244 self._match_r_paren() 5245 5246 return this 5247 5248 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 5249 self._match_text_seq("LENGTH") 5250 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 5251 5252 def _parse_not_constraint(self) -> t.Optional[exp.Expression]: 5253 if self._match_text_seq("NULL"): 5254 return self.expression(exp.NotNullColumnConstraint) 5255 if self._match_text_seq("CASESPECIFIC"): 5256 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 5257 if self._match_text_seq("FOR", "REPLICATION"): 5258 return self.expression(exp.NotForReplicationColumnConstraint) 5259 return None 5260 5261 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 5262 if self._match(TokenType.CONSTRAINT): 5263 this = self._parse_id_var() 5264 else: 5265 this = None 5266 5267 if self._match_texts(self.CONSTRAINT_PARSERS): 5268 return self.expression( 5269 exp.ColumnConstraint, 5270 this=this, 5271 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 5272 ) 5273 5274 return this 5275 5276 def _parse_constraint(self) -> t.Optional[exp.Expression]: 5277 if not self._match(TokenType.CONSTRAINT): 5278 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 5279 5280 return self.expression( 5281 exp.Constraint, 5282 this=self._parse_id_var(), 5283 expressions=self._parse_unnamed_constraints(), 5284 ) 5285 5286 def _parse_unnamed_constraints(self) -> t.List[exp.Expression]: 5287 constraints = [] 5288 while True: 5289 constraint = self._parse_unnamed_constraint() or self._parse_function() 5290 if not constraint: 5291 break 5292 constraints.append(constraint) 5293 5294 return constraints 5295 5296 def _parse_unnamed_constraint( 5297 self, constraints: t.Optional[t.Collection[str]] = None 5298 ) -> t.Optional[exp.Expression]: 5299 if self._match(TokenType.IDENTIFIER, advance=False) or not self._match_texts( 5300 constraints or self.CONSTRAINT_PARSERS 5301 ): 5302 return None 5303 5304 constraint = self._prev.text.upper() 5305 if constraint not in self.CONSTRAINT_PARSERS: 5306 self.raise_error(f"No parser found for schema constraint {constraint}.") 5307 5308 return self.CONSTRAINT_PARSERS[constraint](self) 5309 5310 def _parse_unique_key(self) -> t.Optional[exp.Expression]: 5311 return self._parse_id_var(any_token=False) 5312 5313 def _parse_unique(self) -> exp.UniqueColumnConstraint: 5314 self._match_text_seq("KEY") 5315 return self.expression( 5316 exp.UniqueColumnConstraint, 5317 nulls=self._match_text_seq("NULLS", "NOT", "DISTINCT"), 5318 this=self._parse_schema(self._parse_unique_key()), 5319 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 5320 on_conflict=self._parse_on_conflict(), 5321 ) 5322 5323 def _parse_key_constraint_options(self) -> t.List[str]: 5324 options = [] 5325 while True: 5326 if not self._curr: 5327 break 5328 5329 if self._match(TokenType.ON): 5330 action = None 5331 on = self._advance_any() and self._prev.text 5332 5333 if self._match_text_seq("NO", "ACTION"): 5334 action = "NO ACTION" 5335 elif self._match_text_seq("CASCADE"): 5336 action = "CASCADE" 5337 elif self._match_text_seq("RESTRICT"): 5338 action = "RESTRICT" 5339 elif self._match_pair(TokenType.SET, TokenType.NULL): 5340 action = "SET NULL" 5341 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 5342 action = "SET DEFAULT" 5343 else: 5344 self.raise_error("Invalid key constraint") 5345 5346 options.append(f"ON {on} {action}") 5347 else: 5348 var = self._parse_var_from_options( 5349 self.KEY_CONSTRAINT_OPTIONS, raise_unmatched=False 5350 ) 5351 if not var: 5352 break 5353 options.append(var.name) 5354 5355 return options 5356 5357 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 5358 if match and not self._match(TokenType.REFERENCES): 5359 return None 5360 5361 expressions = None 5362 this = self._parse_table(schema=True) 5363 options = self._parse_key_constraint_options() 5364 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 5365 5366 def _parse_foreign_key(self) -> exp.ForeignKey: 5367 expressions = self._parse_wrapped_id_vars() 5368 reference = self._parse_references() 5369 options = {} 5370 5371 while self._match(TokenType.ON): 5372 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 5373 self.raise_error("Expected DELETE or UPDATE") 5374 5375 kind = self._prev.text.lower() 5376 5377 if self._match_text_seq("NO", "ACTION"): 5378 action = "NO ACTION" 5379 elif self._match(TokenType.SET): 5380 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 5381 action = "SET " + self._prev.text.upper() 5382 else: 5383 self._advance() 5384 action = self._prev.text.upper() 5385 5386 options[kind] = action 5387 5388 return self.expression( 5389 exp.ForeignKey, 5390 expressions=expressions, 5391 reference=reference, 5392 **options, # type: ignore 5393 ) 5394 5395 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 5396 return self._parse_field() 5397 5398 def _parse_period_for_system_time(self) -> t.Optional[exp.PeriodForSystemTimeConstraint]: 5399 if not self._match(TokenType.TIMESTAMP_SNAPSHOT): 5400 self._retreat(self._index - 1) 5401 return None 5402 5403 id_vars = self._parse_wrapped_id_vars() 5404 return self.expression( 5405 exp.PeriodForSystemTimeConstraint, 5406 this=seq_get(id_vars, 0), 5407 expression=seq_get(id_vars, 1), 5408 ) 5409 5410 def _parse_primary_key( 5411 self, wrapped_optional: bool = False, in_props: bool = False 5412 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 5413 desc = ( 5414 self._match_set((TokenType.ASC, TokenType.DESC)) 5415 and self._prev.token_type == TokenType.DESC 5416 ) 5417 5418 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 5419 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 5420 5421 expressions = self._parse_wrapped_csv( 5422 self._parse_primary_key_part, optional=wrapped_optional 5423 ) 5424 options = self._parse_key_constraint_options() 5425 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 5426 5427 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 5428 return self._parse_slice(self._parse_alias(self._parse_assignment(), explicit=True)) 5429 5430 def _parse_bracket(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 5431 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 5432 return this 5433 5434 bracket_kind = self._prev.token_type 5435 expressions = self._parse_csv( 5436 lambda: self._parse_bracket_key_value(is_map=bracket_kind == TokenType.L_BRACE) 5437 ) 5438 5439 if bracket_kind == TokenType.L_BRACKET and not self._match(TokenType.R_BRACKET): 5440 self.raise_error("Expected ]") 5441 elif bracket_kind == TokenType.L_BRACE and not self._match(TokenType.R_BRACE): 5442 self.raise_error("Expected }") 5443 5444 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 5445 if bracket_kind == TokenType.L_BRACE: 5446 this = self.expression(exp.Struct, expressions=self._kv_to_prop_eq(expressions)) 5447 elif not this: 5448 this = build_array_constructor( 5449 exp.Array, args=expressions, bracket_kind=bracket_kind, dialect=self.dialect 5450 ) 5451 else: 5452 constructor_type = self.ARRAY_CONSTRUCTORS.get(this.name.upper()) 5453 if constructor_type: 5454 return build_array_constructor( 5455 constructor_type, 5456 args=expressions, 5457 bracket_kind=bracket_kind, 5458 dialect=self.dialect, 5459 ) 5460 5461 expressions = apply_index_offset(this, expressions, -self.dialect.INDEX_OFFSET) 5462 this = self.expression(exp.Bracket, this=this, expressions=expressions) 5463 5464 self._add_comments(this) 5465 return self._parse_bracket(this) 5466 5467 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5468 if self._match(TokenType.COLON): 5469 return self.expression(exp.Slice, this=this, expression=self._parse_assignment()) 5470 return this 5471 5472 def _parse_case(self) -> t.Optional[exp.Expression]: 5473 ifs = [] 5474 default = None 5475 5476 comments = self._prev_comments 5477 expression = self._parse_assignment() 5478 5479 while self._match(TokenType.WHEN): 5480 this = self._parse_assignment() 5481 self._match(TokenType.THEN) 5482 then = self._parse_assignment() 5483 ifs.append(self.expression(exp.If, this=this, true=then)) 5484 5485 if self._match(TokenType.ELSE): 5486 default = self._parse_assignment() 5487 5488 if not self._match(TokenType.END): 5489 if isinstance(default, exp.Interval) and default.this.sql().upper() == "END": 5490 default = exp.column("interval") 5491 else: 5492 self.raise_error("Expected END after CASE", self._prev) 5493 5494 return self.expression( 5495 exp.Case, comments=comments, this=expression, ifs=ifs, default=default 5496 ) 5497 5498 def _parse_if(self) -> t.Optional[exp.Expression]: 5499 if self._match(TokenType.L_PAREN): 5500 args = self._parse_csv(self._parse_assignment) 5501 this = self.validate_expression(exp.If.from_arg_list(args), args) 5502 self._match_r_paren() 5503 else: 5504 index = self._index - 1 5505 5506 if self.NO_PAREN_IF_COMMANDS and index == 0: 5507 return self._parse_as_command(self._prev) 5508 5509 condition = self._parse_assignment() 5510 5511 if not condition: 5512 self._retreat(index) 5513 return None 5514 5515 self._match(TokenType.THEN) 5516 true = self._parse_assignment() 5517 false = self._parse_assignment() if self._match(TokenType.ELSE) else None 5518 self._match(TokenType.END) 5519 this = self.expression(exp.If, this=condition, true=true, false=false) 5520 5521 return this 5522 5523 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 5524 if not self._match_text_seq("VALUE", "FOR"): 5525 self._retreat(self._index - 1) 5526 return None 5527 5528 return self.expression( 5529 exp.NextValueFor, 5530 this=self._parse_column(), 5531 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 5532 ) 5533 5534 def _parse_extract(self) -> exp.Extract | exp.Anonymous: 5535 this = self._parse_function() or self._parse_var_or_string(upper=True) 5536 5537 if self._match(TokenType.FROM): 5538 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 5539 5540 if not self._match(TokenType.COMMA): 5541 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 5542 5543 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 5544 5545 def _parse_gap_fill(self) -> exp.GapFill: 5546 self._match(TokenType.TABLE) 5547 this = self._parse_table() 5548 5549 self._match(TokenType.COMMA) 5550 args = [this, *self._parse_csv(self._parse_lambda)] 5551 5552 gap_fill = exp.GapFill.from_arg_list(args) 5553 return self.validate_expression(gap_fill, args) 5554 5555 def _parse_cast(self, strict: bool, safe: t.Optional[bool] = None) -> exp.Expression: 5556 this = self._parse_assignment() 5557 5558 if not self._match(TokenType.ALIAS): 5559 if self._match(TokenType.COMMA): 5560 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 5561 5562 self.raise_error("Expected AS after CAST") 5563 5564 fmt = None 5565 to = self._parse_types() 5566 5567 if self._match(TokenType.FORMAT): 5568 fmt_string = self._parse_string() 5569 fmt = self._parse_at_time_zone(fmt_string) 5570 5571 if not to: 5572 to = exp.DataType.build(exp.DataType.Type.UNKNOWN) 5573 if to.this in exp.DataType.TEMPORAL_TYPES: 5574 this = self.expression( 5575 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 5576 this=this, 5577 format=exp.Literal.string( 5578 format_time( 5579 fmt_string.this if fmt_string else "", 5580 self.dialect.FORMAT_MAPPING or self.dialect.TIME_MAPPING, 5581 self.dialect.FORMAT_TRIE or self.dialect.TIME_TRIE, 5582 ) 5583 ), 5584 safe=safe, 5585 ) 5586 5587 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 5588 this.set("zone", fmt.args["zone"]) 5589 return this 5590 elif not to: 5591 self.raise_error("Expected TYPE after CAST") 5592 elif isinstance(to, exp.Identifier): 5593 to = exp.DataType.build(to.name, udt=True) 5594 elif to.this == exp.DataType.Type.CHAR: 5595 if self._match(TokenType.CHARACTER_SET): 5596 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 5597 5598 return self.expression( 5599 exp.Cast if strict else exp.TryCast, 5600 this=this, 5601 to=to, 5602 format=fmt, 5603 safe=safe, 5604 action=self._parse_var_from_options(self.CAST_ACTIONS, raise_unmatched=False), 5605 ) 5606 5607 def _parse_string_agg(self) -> exp.Expression: 5608 if self._match(TokenType.DISTINCT): 5609 args: t.List[t.Optional[exp.Expression]] = [ 5610 self.expression(exp.Distinct, expressions=[self._parse_assignment()]) 5611 ] 5612 if self._match(TokenType.COMMA): 5613 args.extend(self._parse_csv(self._parse_assignment)) 5614 else: 5615 args = self._parse_csv(self._parse_assignment) # type: ignore 5616 5617 index = self._index 5618 if not self._match(TokenType.R_PAREN) and args: 5619 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 5620 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 5621 args[-1] = self._parse_limit(this=self._parse_order(this=args[-1])) 5622 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 5623 5624 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 5625 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 5626 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 5627 if not self._match_text_seq("WITHIN", "GROUP"): 5628 self._retreat(index) 5629 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 5630 5631 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 5632 order = self._parse_order(this=seq_get(args, 0)) 5633 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 5634 5635 def _parse_convert( 5636 self, strict: bool, safe: t.Optional[bool] = None 5637 ) -> t.Optional[exp.Expression]: 5638 this = self._parse_bitwise() 5639 5640 if self._match(TokenType.USING): 5641 to: t.Optional[exp.Expression] = self.expression( 5642 exp.CharacterSet, this=self._parse_var() 5643 ) 5644 elif self._match(TokenType.COMMA): 5645 to = self._parse_types() 5646 else: 5647 to = None 5648 5649 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, safe=safe) 5650 5651 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 5652 """ 5653 There are generally two variants of the DECODE function: 5654 5655 - DECODE(bin, charset) 5656 - DECODE(expression, search, result [, search, result] ... [, default]) 5657 5658 The second variant will always be parsed into a CASE expression. Note that NULL 5659 needs special treatment, since we need to explicitly check for it with `IS NULL`, 5660 instead of relying on pattern matching. 5661 """ 5662 args = self._parse_csv(self._parse_assignment) 5663 5664 if len(args) < 3: 5665 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 5666 5667 expression, *expressions = args 5668 if not expression: 5669 return None 5670 5671 ifs = [] 5672 for search, result in zip(expressions[::2], expressions[1::2]): 5673 if not search or not result: 5674 return None 5675 5676 if isinstance(search, exp.Literal): 5677 ifs.append( 5678 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 5679 ) 5680 elif isinstance(search, exp.Null): 5681 ifs.append( 5682 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 5683 ) 5684 else: 5685 cond = exp.or_( 5686 exp.EQ(this=expression.copy(), expression=search), 5687 exp.and_( 5688 exp.Is(this=expression.copy(), expression=exp.Null()), 5689 exp.Is(this=search.copy(), expression=exp.Null()), 5690 copy=False, 5691 ), 5692 copy=False, 5693 ) 5694 ifs.append(exp.If(this=cond, true=result)) 5695 5696 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 5697 5698 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 5699 self._match_text_seq("KEY") 5700 key = self._parse_column() 5701 self._match_set(self.JSON_KEY_VALUE_SEPARATOR_TOKENS) 5702 self._match_text_seq("VALUE") 5703 value = self._parse_bitwise() 5704 5705 if not key and not value: 5706 return None 5707 return self.expression(exp.JSONKeyValue, this=key, expression=value) 5708 5709 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5710 if not this or not self._match_text_seq("FORMAT", "JSON"): 5711 return this 5712 5713 return self.expression(exp.FormatJson, this=this) 5714 5715 def _parse_on_handling(self, on: str, *values: str) -> t.Optional[str]: 5716 # Parses the "X ON Y" syntax, i.e. NULL ON NULL (Oracle, T-SQL) 5717 for value in values: 5718 if self._match_text_seq(value, "ON", on): 5719 return f"{value} ON {on}" 5720 5721 return None 5722 5723 @t.overload 5724 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 5725 5726 @t.overload 5727 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 5728 5729 def _parse_json_object(self, agg=False): 5730 star = self._parse_star() 5731 expressions = ( 5732 [star] 5733 if star 5734 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 5735 ) 5736 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 5737 5738 unique_keys = None 5739 if self._match_text_seq("WITH", "UNIQUE"): 5740 unique_keys = True 5741 elif self._match_text_seq("WITHOUT", "UNIQUE"): 5742 unique_keys = False 5743 5744 self._match_text_seq("KEYS") 5745 5746 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 5747 self._parse_type() 5748 ) 5749 encoding = self._match_text_seq("ENCODING") and self._parse_var() 5750 5751 return self.expression( 5752 exp.JSONObjectAgg if agg else exp.JSONObject, 5753 expressions=expressions, 5754 null_handling=null_handling, 5755 unique_keys=unique_keys, 5756 return_type=return_type, 5757 encoding=encoding, 5758 ) 5759 5760 # Note: this is currently incomplete; it only implements the "JSON_value_column" part 5761 def _parse_json_column_def(self) -> exp.JSONColumnDef: 5762 if not self._match_text_seq("NESTED"): 5763 this = self._parse_id_var() 5764 kind = self._parse_types(allow_identifiers=False) 5765 nested = None 5766 else: 5767 this = None 5768 kind = None 5769 nested = True 5770 5771 path = self._match_text_seq("PATH") and self._parse_string() 5772 nested_schema = nested and self._parse_json_schema() 5773 5774 return self.expression( 5775 exp.JSONColumnDef, 5776 this=this, 5777 kind=kind, 5778 path=path, 5779 nested_schema=nested_schema, 5780 ) 5781 5782 def _parse_json_schema(self) -> exp.JSONSchema: 5783 self._match_text_seq("COLUMNS") 5784 return self.expression( 5785 exp.JSONSchema, 5786 expressions=self._parse_wrapped_csv(self._parse_json_column_def, optional=True), 5787 ) 5788 5789 def _parse_json_table(self) -> exp.JSONTable: 5790 this = self._parse_format_json(self._parse_bitwise()) 5791 path = self._match(TokenType.COMMA) and self._parse_string() 5792 error_handling = self._parse_on_handling("ERROR", "ERROR", "NULL") 5793 empty_handling = self._parse_on_handling("EMPTY", "ERROR", "NULL") 5794 schema = self._parse_json_schema() 5795 5796 return exp.JSONTable( 5797 this=this, 5798 schema=schema, 5799 path=path, 5800 error_handling=error_handling, 5801 empty_handling=empty_handling, 5802 ) 5803 5804 def _parse_match_against(self) -> exp.MatchAgainst: 5805 expressions = self._parse_csv(self._parse_column) 5806 5807 self._match_text_seq(")", "AGAINST", "(") 5808 5809 this = self._parse_string() 5810 5811 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 5812 modifier = "IN NATURAL LANGUAGE MODE" 5813 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 5814 modifier = f"{modifier} WITH QUERY EXPANSION" 5815 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 5816 modifier = "IN BOOLEAN MODE" 5817 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 5818 modifier = "WITH QUERY EXPANSION" 5819 else: 5820 modifier = None 5821 5822 return self.expression( 5823 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 5824 ) 5825 5826 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 5827 def _parse_open_json(self) -> exp.OpenJSON: 5828 this = self._parse_bitwise() 5829 path = self._match(TokenType.COMMA) and self._parse_string() 5830 5831 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 5832 this = self._parse_field(any_token=True) 5833 kind = self._parse_types() 5834 path = self._parse_string() 5835 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 5836 5837 return self.expression( 5838 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 5839 ) 5840 5841 expressions = None 5842 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 5843 self._match_l_paren() 5844 expressions = self._parse_csv(_parse_open_json_column_def) 5845 5846 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 5847 5848 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 5849 args = self._parse_csv(self._parse_bitwise) 5850 5851 if self._match(TokenType.IN): 5852 return self.expression( 5853 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 5854 ) 5855 5856 if haystack_first: 5857 haystack = seq_get(args, 0) 5858 needle = seq_get(args, 1) 5859 else: 5860 needle = seq_get(args, 0) 5861 haystack = seq_get(args, 1) 5862 5863 return self.expression( 5864 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 5865 ) 5866 5867 def _parse_predict(self) -> exp.Predict: 5868 self._match_text_seq("MODEL") 5869 this = self._parse_table() 5870 5871 self._match(TokenType.COMMA) 5872 self._match_text_seq("TABLE") 5873 5874 return self.expression( 5875 exp.Predict, 5876 this=this, 5877 expression=self._parse_table(), 5878 params_struct=self._match(TokenType.COMMA) and self._parse_bitwise(), 5879 ) 5880 5881 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 5882 args = self._parse_csv(self._parse_table) 5883 return exp.JoinHint(this=func_name.upper(), expressions=args) 5884 5885 def _parse_substring(self) -> exp.Substring: 5886 # Postgres supports the form: substring(string [from int] [for int]) 5887 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 5888 5889 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 5890 5891 if self._match(TokenType.FROM): 5892 args.append(self._parse_bitwise()) 5893 if self._match(TokenType.FOR): 5894 if len(args) == 1: 5895 args.append(exp.Literal.number(1)) 5896 args.append(self._parse_bitwise()) 5897 5898 return self.validate_expression(exp.Substring.from_arg_list(args), args) 5899 5900 def _parse_trim(self) -> exp.Trim: 5901 # https://www.w3resource.com/sql/character-functions/trim.php 5902 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 5903 5904 position = None 5905 collation = None 5906 expression = None 5907 5908 if self._match_texts(self.TRIM_TYPES): 5909 position = self._prev.text.upper() 5910 5911 this = self._parse_bitwise() 5912 if self._match_set((TokenType.FROM, TokenType.COMMA)): 5913 invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST 5914 expression = self._parse_bitwise() 5915 5916 if invert_order: 5917 this, expression = expression, this 5918 5919 if self._match(TokenType.COLLATE): 5920 collation = self._parse_bitwise() 5921 5922 return self.expression( 5923 exp.Trim, this=this, position=position, expression=expression, collation=collation 5924 ) 5925 5926 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 5927 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 5928 5929 def _parse_named_window(self) -> t.Optional[exp.Expression]: 5930 return self._parse_window(self._parse_id_var(), alias=True) 5931 5932 def _parse_respect_or_ignore_nulls( 5933 self, this: t.Optional[exp.Expression] 5934 ) -> t.Optional[exp.Expression]: 5935 if self._match_text_seq("IGNORE", "NULLS"): 5936 return self.expression(exp.IgnoreNulls, this=this) 5937 if self._match_text_seq("RESPECT", "NULLS"): 5938 return self.expression(exp.RespectNulls, this=this) 5939 return this 5940 5941 def _parse_having_max(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5942 if self._match(TokenType.HAVING): 5943 self._match_texts(("MAX", "MIN")) 5944 max = self._prev.text.upper() != "MIN" 5945 return self.expression( 5946 exp.HavingMax, this=this, expression=self._parse_column(), max=max 5947 ) 5948 5949 return this 5950 5951 def _parse_window( 5952 self, this: t.Optional[exp.Expression], alias: bool = False 5953 ) -> t.Optional[exp.Expression]: 5954 func = this 5955 comments = func.comments if isinstance(func, exp.Expression) else None 5956 5957 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 5958 self._match(TokenType.WHERE) 5959 this = self.expression( 5960 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 5961 ) 5962 self._match_r_paren() 5963 5964 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 5965 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 5966 if self._match_text_seq("WITHIN", "GROUP"): 5967 order = self._parse_wrapped(self._parse_order) 5968 this = self.expression(exp.WithinGroup, this=this, expression=order) 5969 5970 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 5971 # Some dialects choose to implement and some do not. 5972 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 5973 5974 # There is some code above in _parse_lambda that handles 5975 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 5976 5977 # The below changes handle 5978 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 5979 5980 # Oracle allows both formats 5981 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 5982 # and Snowflake chose to do the same for familiarity 5983 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 5984 if isinstance(this, exp.AggFunc): 5985 ignore_respect = this.find(exp.IgnoreNulls, exp.RespectNulls) 5986 5987 if ignore_respect and ignore_respect is not this: 5988 ignore_respect.replace(ignore_respect.this) 5989 this = self.expression(ignore_respect.__class__, this=this) 5990 5991 this = self._parse_respect_or_ignore_nulls(this) 5992 5993 # bigquery select from window x AS (partition by ...) 5994 if alias: 5995 over = None 5996 self._match(TokenType.ALIAS) 5997 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 5998 return this 5999 else: 6000 over = self._prev.text.upper() 6001 6002 if comments and isinstance(func, exp.Expression): 6003 func.pop_comments() 6004 6005 if not self._match(TokenType.L_PAREN): 6006 return self.expression( 6007 exp.Window, 6008 comments=comments, 6009 this=this, 6010 alias=self._parse_id_var(False), 6011 over=over, 6012 ) 6013 6014 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 6015 6016 first = self._match(TokenType.FIRST) 6017 if self._match_text_seq("LAST"): 6018 first = False 6019 6020 partition, order = self._parse_partition_and_order() 6021 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 6022 6023 if kind: 6024 self._match(TokenType.BETWEEN) 6025 start = self._parse_window_spec() 6026 self._match(TokenType.AND) 6027 end = self._parse_window_spec() 6028 6029 spec = self.expression( 6030 exp.WindowSpec, 6031 kind=kind, 6032 start=start["value"], 6033 start_side=start["side"], 6034 end=end["value"], 6035 end_side=end["side"], 6036 ) 6037 else: 6038 spec = None 6039 6040 self._match_r_paren() 6041 6042 window = self.expression( 6043 exp.Window, 6044 comments=comments, 6045 this=this, 6046 partition_by=partition, 6047 order=order, 6048 spec=spec, 6049 alias=window_alias, 6050 over=over, 6051 first=first, 6052 ) 6053 6054 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 6055 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 6056 return self._parse_window(window, alias=alias) 6057 6058 return window 6059 6060 def _parse_partition_and_order( 6061 self, 6062 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 6063 return self._parse_partition_by(), self._parse_order() 6064 6065 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 6066 self._match(TokenType.BETWEEN) 6067 6068 return { 6069 "value": ( 6070 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 6071 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 6072 or self._parse_bitwise() 6073 ), 6074 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 6075 } 6076 6077 def _parse_alias( 6078 self, this: t.Optional[exp.Expression], explicit: bool = False 6079 ) -> t.Optional[exp.Expression]: 6080 any_token = self._match(TokenType.ALIAS) 6081 comments = self._prev_comments or [] 6082 6083 if explicit and not any_token: 6084 return this 6085 6086 if self._match(TokenType.L_PAREN): 6087 aliases = self.expression( 6088 exp.Aliases, 6089 comments=comments, 6090 this=this, 6091 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 6092 ) 6093 self._match_r_paren(aliases) 6094 return aliases 6095 6096 alias = self._parse_id_var(any_token, tokens=self.ALIAS_TOKENS) or ( 6097 self.STRING_ALIASES and self._parse_string_as_identifier() 6098 ) 6099 6100 if alias: 6101 comments.extend(alias.pop_comments()) 6102 this = self.expression(exp.Alias, comments=comments, this=this, alias=alias) 6103 column = this.this 6104 6105 # Moves the comment next to the alias in `expr /* comment */ AS alias` 6106 if not this.comments and column and column.comments: 6107 this.comments = column.pop_comments() 6108 6109 return this 6110 6111 def _parse_id_var( 6112 self, 6113 any_token: bool = True, 6114 tokens: t.Optional[t.Collection[TokenType]] = None, 6115 ) -> t.Optional[exp.Expression]: 6116 expression = self._parse_identifier() 6117 if not expression and ( 6118 (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS) 6119 ): 6120 quoted = self._prev.token_type == TokenType.STRING 6121 expression = self.expression(exp.Identifier, this=self._prev.text, quoted=quoted) 6122 6123 return expression 6124 6125 def _parse_string(self) -> t.Optional[exp.Expression]: 6126 if self._match_set(self.STRING_PARSERS): 6127 return self.STRING_PARSERS[self._prev.token_type](self, self._prev) 6128 return self._parse_placeholder() 6129 6130 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 6131 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 6132 6133 def _parse_number(self) -> t.Optional[exp.Expression]: 6134 if self._match_set(self.NUMERIC_PARSERS): 6135 return self.NUMERIC_PARSERS[self._prev.token_type](self, self._prev) 6136 return self._parse_placeholder() 6137 6138 def _parse_identifier(self) -> t.Optional[exp.Expression]: 6139 if self._match(TokenType.IDENTIFIER): 6140 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 6141 return self._parse_placeholder() 6142 6143 def _parse_var( 6144 self, 6145 any_token: bool = False, 6146 tokens: t.Optional[t.Collection[TokenType]] = None, 6147 upper: bool = False, 6148 ) -> t.Optional[exp.Expression]: 6149 if ( 6150 (any_token and self._advance_any()) 6151 or self._match(TokenType.VAR) 6152 or (self._match_set(tokens) if tokens else False) 6153 ): 6154 return self.expression( 6155 exp.Var, this=self._prev.text.upper() if upper else self._prev.text 6156 ) 6157 return self._parse_placeholder() 6158 6159 def _advance_any(self, ignore_reserved: bool = False) -> t.Optional[Token]: 6160 if self._curr and (ignore_reserved or self._curr.token_type not in self.RESERVED_TOKENS): 6161 self._advance() 6162 return self._prev 6163 return None 6164 6165 def _parse_var_or_string(self, upper: bool = False) -> t.Optional[exp.Expression]: 6166 return self._parse_string() or self._parse_var(any_token=True, upper=upper) 6167 6168 def _parse_primary_or_var(self) -> t.Optional[exp.Expression]: 6169 return self._parse_primary() or self._parse_var(any_token=True) 6170 6171 def _parse_null(self) -> t.Optional[exp.Expression]: 6172 if self._match_set(self.NULL_TOKENS): 6173 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 6174 return self._parse_placeholder() 6175 6176 def _parse_boolean(self) -> t.Optional[exp.Expression]: 6177 if self._match(TokenType.TRUE): 6178 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 6179 if self._match(TokenType.FALSE): 6180 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 6181 return self._parse_placeholder() 6182 6183 def _parse_star(self) -> t.Optional[exp.Expression]: 6184 if self._match(TokenType.STAR): 6185 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 6186 return self._parse_placeholder() 6187 6188 def _parse_parameter(self) -> exp.Parameter: 6189 this = self._parse_identifier() or self._parse_primary_or_var() 6190 return self.expression(exp.Parameter, this=this) 6191 6192 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 6193 if self._match_set(self.PLACEHOLDER_PARSERS): 6194 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 6195 if placeholder: 6196 return placeholder 6197 self._advance(-1) 6198 return None 6199 6200 def _parse_star_op(self, *keywords: str) -> t.Optional[t.List[exp.Expression]]: 6201 if not self._match_texts(keywords): 6202 return None 6203 if self._match(TokenType.L_PAREN, advance=False): 6204 return self._parse_wrapped_csv(self._parse_expression) 6205 6206 expression = self._parse_expression() 6207 return [expression] if expression else None 6208 6209 def _parse_csv( 6210 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 6211 ) -> t.List[exp.Expression]: 6212 parse_result = parse_method() 6213 items = [parse_result] if parse_result is not None else [] 6214 6215 while self._match(sep): 6216 self._add_comments(parse_result) 6217 parse_result = parse_method() 6218 if parse_result is not None: 6219 items.append(parse_result) 6220 6221 return items 6222 6223 def _parse_tokens( 6224 self, parse_method: t.Callable, expressions: t.Dict 6225 ) -> t.Optional[exp.Expression]: 6226 this = parse_method() 6227 6228 while self._match_set(expressions): 6229 this = self.expression( 6230 expressions[self._prev.token_type], 6231 this=this, 6232 comments=self._prev_comments, 6233 expression=parse_method(), 6234 ) 6235 6236 return this 6237 6238 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 6239 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 6240 6241 def _parse_wrapped_csv( 6242 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 6243 ) -> t.List[exp.Expression]: 6244 return self._parse_wrapped( 6245 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 6246 ) 6247 6248 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 6249 wrapped = self._match(TokenType.L_PAREN) 6250 if not wrapped and not optional: 6251 self.raise_error("Expecting (") 6252 parse_result = parse_method() 6253 if wrapped: 6254 self._match_r_paren() 6255 return parse_result 6256 6257 def _parse_expressions(self) -> t.List[exp.Expression]: 6258 return self._parse_csv(self._parse_expression) 6259 6260 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 6261 return self._parse_select() or self._parse_set_operations( 6262 self._parse_expression() if alias else self._parse_assignment() 6263 ) 6264 6265 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 6266 return self._parse_query_modifiers( 6267 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 6268 ) 6269 6270 def _parse_transaction(self) -> exp.Transaction | exp.Command: 6271 this = None 6272 if self._match_texts(self.TRANSACTION_KIND): 6273 this = self._prev.text 6274 6275 self._match_texts(("TRANSACTION", "WORK")) 6276 6277 modes = [] 6278 while True: 6279 mode = [] 6280 while self._match(TokenType.VAR): 6281 mode.append(self._prev.text) 6282 6283 if mode: 6284 modes.append(" ".join(mode)) 6285 if not self._match(TokenType.COMMA): 6286 break 6287 6288 return self.expression(exp.Transaction, this=this, modes=modes) 6289 6290 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 6291 chain = None 6292 savepoint = None 6293 is_rollback = self._prev.token_type == TokenType.ROLLBACK 6294 6295 self._match_texts(("TRANSACTION", "WORK")) 6296 6297 if self._match_text_seq("TO"): 6298 self._match_text_seq("SAVEPOINT") 6299 savepoint = self._parse_id_var() 6300 6301 if self._match(TokenType.AND): 6302 chain = not self._match_text_seq("NO") 6303 self._match_text_seq("CHAIN") 6304 6305 if is_rollback: 6306 return self.expression(exp.Rollback, savepoint=savepoint) 6307 6308 return self.expression(exp.Commit, chain=chain) 6309 6310 def _parse_refresh(self) -> exp.Refresh: 6311 self._match(TokenType.TABLE) 6312 return self.expression(exp.Refresh, this=self._parse_string() or self._parse_table()) 6313 6314 def _parse_add_column(self) -> t.Optional[exp.Expression]: 6315 if not self._match_text_seq("ADD"): 6316 return None 6317 6318 self._match(TokenType.COLUMN) 6319 exists_column = self._parse_exists(not_=True) 6320 expression = self._parse_field_def() 6321 6322 if expression: 6323 expression.set("exists", exists_column) 6324 6325 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 6326 if self._match_texts(("FIRST", "AFTER")): 6327 position = self._prev.text 6328 column_position = self.expression( 6329 exp.ColumnPosition, this=self._parse_column(), position=position 6330 ) 6331 expression.set("position", column_position) 6332 6333 return expression 6334 6335 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 6336 drop = self._match(TokenType.DROP) and self._parse_drop() 6337 if drop and not isinstance(drop, exp.Command): 6338 drop.set("kind", drop.args.get("kind", "COLUMN")) 6339 return drop 6340 6341 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 6342 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 6343 return self.expression( 6344 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 6345 ) 6346 6347 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 6348 index = self._index - 1 6349 6350 if self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False): 6351 return self._parse_csv( 6352 lambda: self.expression( 6353 exp.AddConstraint, expressions=self._parse_csv(self._parse_constraint) 6354 ) 6355 ) 6356 6357 self._retreat(index) 6358 if not self.ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN and self._match_text_seq("ADD"): 6359 return self._parse_wrapped_csv(self._parse_field_def, optional=True) 6360 6361 if self._match_text_seq("ADD", "COLUMNS"): 6362 schema = self._parse_schema() 6363 if schema: 6364 return [schema] 6365 return [] 6366 6367 return self._parse_wrapped_csv(self._parse_add_column, optional=True) 6368 6369 def _parse_alter_table_alter(self) -> t.Optional[exp.Expression]: 6370 if self._match_texts(self.ALTER_ALTER_PARSERS): 6371 return self.ALTER_ALTER_PARSERS[self._prev.text.upper()](self) 6372 6373 # Many dialects support the ALTER [COLUMN] syntax, so if there is no 6374 # keyword after ALTER we default to parsing this statement 6375 self._match(TokenType.COLUMN) 6376 column = self._parse_field(any_token=True) 6377 6378 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 6379 return self.expression(exp.AlterColumn, this=column, drop=True) 6380 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 6381 return self.expression(exp.AlterColumn, this=column, default=self._parse_assignment()) 6382 if self._match(TokenType.COMMENT): 6383 return self.expression(exp.AlterColumn, this=column, comment=self._parse_string()) 6384 if self._match_text_seq("DROP", "NOT", "NULL"): 6385 return self.expression( 6386 exp.AlterColumn, 6387 this=column, 6388 drop=True, 6389 allow_null=True, 6390 ) 6391 if self._match_text_seq("SET", "NOT", "NULL"): 6392 return self.expression( 6393 exp.AlterColumn, 6394 this=column, 6395 allow_null=False, 6396 ) 6397 self._match_text_seq("SET", "DATA") 6398 self._match_text_seq("TYPE") 6399 return self.expression( 6400 exp.AlterColumn, 6401 this=column, 6402 dtype=self._parse_types(), 6403 collate=self._match(TokenType.COLLATE) and self._parse_term(), 6404 using=self._match(TokenType.USING) and self._parse_assignment(), 6405 ) 6406 6407 def _parse_alter_diststyle(self) -> exp.AlterDistStyle: 6408 if self._match_texts(("ALL", "EVEN", "AUTO")): 6409 return self.expression(exp.AlterDistStyle, this=exp.var(self._prev.text.upper())) 6410 6411 self._match_text_seq("KEY", "DISTKEY") 6412 return self.expression(exp.AlterDistStyle, this=self._parse_column()) 6413 6414 def _parse_alter_sortkey(self, compound: t.Optional[bool] = None) -> exp.AlterSortKey: 6415 if compound: 6416 self._match_text_seq("SORTKEY") 6417 6418 if self._match(TokenType.L_PAREN, advance=False): 6419 return self.expression( 6420 exp.AlterSortKey, expressions=self._parse_wrapped_id_vars(), compound=compound 6421 ) 6422 6423 self._match_texts(("AUTO", "NONE")) 6424 return self.expression( 6425 exp.AlterSortKey, this=exp.var(self._prev.text.upper()), compound=compound 6426 ) 6427 6428 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 6429 index = self._index - 1 6430 6431 partition_exists = self._parse_exists() 6432 if self._match(TokenType.PARTITION, advance=False): 6433 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 6434 6435 self._retreat(index) 6436 return self._parse_csv(self._parse_drop_column) 6437 6438 def _parse_alter_table_rename(self) -> t.Optional[exp.RenameTable | exp.RenameColumn]: 6439 if self._match(TokenType.COLUMN): 6440 exists = self._parse_exists() 6441 old_column = self._parse_column() 6442 to = self._match_text_seq("TO") 6443 new_column = self._parse_column() 6444 6445 if old_column is None or to is None or new_column is None: 6446 return None 6447 6448 return self.expression(exp.RenameColumn, this=old_column, to=new_column, exists=exists) 6449 6450 self._match_text_seq("TO") 6451 return self.expression(exp.RenameTable, this=self._parse_table(schema=True)) 6452 6453 def _parse_alter_table_set(self) -> exp.AlterSet: 6454 alter_set = self.expression(exp.AlterSet) 6455 6456 if self._match(TokenType.L_PAREN, advance=False) or self._match_text_seq( 6457 "TABLE", "PROPERTIES" 6458 ): 6459 alter_set.set("expressions", self._parse_wrapped_csv(self._parse_assignment)) 6460 elif self._match_text_seq("FILESTREAM_ON", advance=False): 6461 alter_set.set("expressions", [self._parse_assignment()]) 6462 elif self._match_texts(("LOGGED", "UNLOGGED")): 6463 alter_set.set("option", exp.var(self._prev.text.upper())) 6464 elif self._match_text_seq("WITHOUT") and self._match_texts(("CLUSTER", "OIDS")): 6465 alter_set.set("option", exp.var(f"WITHOUT {self._prev.text.upper()}")) 6466 elif self._match_text_seq("LOCATION"): 6467 alter_set.set("location", self._parse_field()) 6468 elif self._match_text_seq("ACCESS", "METHOD"): 6469 alter_set.set("access_method", self._parse_field()) 6470 elif self._match_text_seq("TABLESPACE"): 6471 alter_set.set("tablespace", self._parse_field()) 6472 elif self._match_text_seq("FILE", "FORMAT") or self._match_text_seq("FILEFORMAT"): 6473 alter_set.set("file_format", [self._parse_field()]) 6474 elif self._match_text_seq("STAGE_FILE_FORMAT"): 6475 alter_set.set("file_format", self._parse_wrapped_options()) 6476 elif self._match_text_seq("STAGE_COPY_OPTIONS"): 6477 alter_set.set("copy_options", self._parse_wrapped_options()) 6478 elif self._match_text_seq("TAG") or self._match_text_seq("TAGS"): 6479 alter_set.set("tag", self._parse_csv(self._parse_assignment)) 6480 else: 6481 if self._match_text_seq("SERDE"): 6482 alter_set.set("serde", self._parse_field()) 6483 6484 alter_set.set("expressions", [self._parse_properties()]) 6485 6486 return alter_set 6487 6488 def _parse_alter(self) -> exp.AlterTable | exp.Command: 6489 start = self._prev 6490 6491 if not self._match(TokenType.TABLE): 6492 return self._parse_as_command(start) 6493 6494 exists = self._parse_exists() 6495 only = self._match_text_seq("ONLY") 6496 this = self._parse_table(schema=True) 6497 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 6498 6499 if self._next: 6500 self._advance() 6501 6502 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 6503 if parser: 6504 actions = ensure_list(parser(self)) 6505 options = self._parse_csv(self._parse_property) 6506 6507 if not self._curr and actions: 6508 return self.expression( 6509 exp.AlterTable, 6510 this=this, 6511 exists=exists, 6512 actions=actions, 6513 only=only, 6514 options=options, 6515 cluster=cluster, 6516 ) 6517 6518 return self._parse_as_command(start) 6519 6520 def _parse_merge(self) -> exp.Merge: 6521 self._match(TokenType.INTO) 6522 target = self._parse_table() 6523 6524 if target and self._match(TokenType.ALIAS, advance=False): 6525 target.set("alias", self._parse_table_alias()) 6526 6527 self._match(TokenType.USING) 6528 using = self._parse_table() 6529 6530 self._match(TokenType.ON) 6531 on = self._parse_assignment() 6532 6533 return self.expression( 6534 exp.Merge, 6535 this=target, 6536 using=using, 6537 on=on, 6538 expressions=self._parse_when_matched(), 6539 ) 6540 6541 def _parse_when_matched(self) -> t.List[exp.When]: 6542 whens = [] 6543 6544 while self._match(TokenType.WHEN): 6545 matched = not self._match(TokenType.NOT) 6546 self._match_text_seq("MATCHED") 6547 source = ( 6548 False 6549 if self._match_text_seq("BY", "TARGET") 6550 else self._match_text_seq("BY", "SOURCE") 6551 ) 6552 condition = self._parse_assignment() if self._match(TokenType.AND) else None 6553 6554 self._match(TokenType.THEN) 6555 6556 if self._match(TokenType.INSERT): 6557 _this = self._parse_star() 6558 if _this: 6559 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=_this) 6560 else: 6561 then = self.expression( 6562 exp.Insert, 6563 this=self._parse_value(), 6564 expression=self._match_text_seq("VALUES") and self._parse_value(), 6565 ) 6566 elif self._match(TokenType.UPDATE): 6567 expressions = self._parse_star() 6568 if expressions: 6569 then = self.expression(exp.Update, expressions=expressions) 6570 else: 6571 then = self.expression( 6572 exp.Update, 6573 expressions=self._match(TokenType.SET) 6574 and self._parse_csv(self._parse_equality), 6575 ) 6576 elif self._match(TokenType.DELETE): 6577 then = self.expression(exp.Var, this=self._prev.text) 6578 else: 6579 then = None 6580 6581 whens.append( 6582 self.expression( 6583 exp.When, 6584 matched=matched, 6585 source=source, 6586 condition=condition, 6587 then=then, 6588 ) 6589 ) 6590 return whens 6591 6592 def _parse_show(self) -> t.Optional[exp.Expression]: 6593 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 6594 if parser: 6595 return parser(self) 6596 return self._parse_as_command(self._prev) 6597 6598 def _parse_set_item_assignment( 6599 self, kind: t.Optional[str] = None 6600 ) -> t.Optional[exp.Expression]: 6601 index = self._index 6602 6603 if kind in ("GLOBAL", "SESSION") and self._match_text_seq("TRANSACTION"): 6604 return self._parse_set_transaction(global_=kind == "GLOBAL") 6605 6606 left = self._parse_primary() or self._parse_column() 6607 assignment_delimiter = self._match_texts(("=", "TO")) 6608 6609 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 6610 self._retreat(index) 6611 return None 6612 6613 right = self._parse_statement() or self._parse_id_var() 6614 if isinstance(right, (exp.Column, exp.Identifier)): 6615 right = exp.var(right.name) 6616 6617 this = self.expression(exp.EQ, this=left, expression=right) 6618 return self.expression(exp.SetItem, this=this, kind=kind) 6619 6620 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 6621 self._match_text_seq("TRANSACTION") 6622 characteristics = self._parse_csv( 6623 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 6624 ) 6625 return self.expression( 6626 exp.SetItem, 6627 expressions=characteristics, 6628 kind="TRANSACTION", 6629 **{"global": global_}, # type: ignore 6630 ) 6631 6632 def _parse_set_item(self) -> t.Optional[exp.Expression]: 6633 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 6634 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 6635 6636 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 6637 index = self._index 6638 set_ = self.expression( 6639 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 6640 ) 6641 6642 if self._curr: 6643 self._retreat(index) 6644 return self._parse_as_command(self._prev) 6645 6646 return set_ 6647 6648 def _parse_var_from_options( 6649 self, options: OPTIONS_TYPE, raise_unmatched: bool = True 6650 ) -> t.Optional[exp.Var]: 6651 start = self._curr 6652 if not start: 6653 return None 6654 6655 option = start.text.upper() 6656 continuations = options.get(option) 6657 6658 index = self._index 6659 self._advance() 6660 for keywords in continuations or []: 6661 if isinstance(keywords, str): 6662 keywords = (keywords,) 6663 6664 if self._match_text_seq(*keywords): 6665 option = f"{option} {' '.join(keywords)}" 6666 break 6667 else: 6668 if continuations or continuations is None: 6669 if raise_unmatched: 6670 self.raise_error(f"Unknown option {option}") 6671 6672 self._retreat(index) 6673 return None 6674 6675 return exp.var(option) 6676 6677 def _parse_as_command(self, start: Token) -> exp.Command: 6678 while self._curr: 6679 self._advance() 6680 text = self._find_sql(start, self._prev) 6681 size = len(start.text) 6682 self._warn_unsupported() 6683 return exp.Command(this=text[:size], expression=text[size:]) 6684 6685 def _parse_dict_property(self, this: str) -> exp.DictProperty: 6686 settings = [] 6687 6688 self._match_l_paren() 6689 kind = self._parse_id_var() 6690 6691 if self._match(TokenType.L_PAREN): 6692 while True: 6693 key = self._parse_id_var() 6694 value = self._parse_primary() 6695 6696 if not key and value is None: 6697 break 6698 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 6699 self._match(TokenType.R_PAREN) 6700 6701 self._match_r_paren() 6702 6703 return self.expression( 6704 exp.DictProperty, 6705 this=this, 6706 kind=kind.this if kind else None, 6707 settings=settings, 6708 ) 6709 6710 def _parse_dict_range(self, this: str) -> exp.DictRange: 6711 self._match_l_paren() 6712 has_min = self._match_text_seq("MIN") 6713 if has_min: 6714 min = self._parse_var() or self._parse_primary() 6715 self._match_text_seq("MAX") 6716 max = self._parse_var() or self._parse_primary() 6717 else: 6718 max = self._parse_var() or self._parse_primary() 6719 min = exp.Literal.number(0) 6720 self._match_r_paren() 6721 return self.expression(exp.DictRange, this=this, min=min, max=max) 6722 6723 def _parse_comprehension( 6724 self, this: t.Optional[exp.Expression] 6725 ) -> t.Optional[exp.Comprehension]: 6726 index = self._index 6727 expression = self._parse_column() 6728 if not self._match(TokenType.IN): 6729 self._retreat(index - 1) 6730 return None 6731 iterator = self._parse_column() 6732 condition = self._parse_assignment() if self._match_text_seq("IF") else None 6733 return self.expression( 6734 exp.Comprehension, 6735 this=this, 6736 expression=expression, 6737 iterator=iterator, 6738 condition=condition, 6739 ) 6740 6741 def _parse_heredoc(self) -> t.Optional[exp.Heredoc]: 6742 if self._match(TokenType.HEREDOC_STRING): 6743 return self.expression(exp.Heredoc, this=self._prev.text) 6744 6745 if not self._match_text_seq("$"): 6746 return None 6747 6748 tags = ["$"] 6749 tag_text = None 6750 6751 if self._is_connected(): 6752 self._advance() 6753 tags.append(self._prev.text.upper()) 6754 else: 6755 self.raise_error("No closing $ found") 6756 6757 if tags[-1] != "$": 6758 if self._is_connected() and self._match_text_seq("$"): 6759 tag_text = tags[-1] 6760 tags.append("$") 6761 else: 6762 self.raise_error("No closing $ found") 6763 6764 heredoc_start = self._curr 6765 6766 while self._curr: 6767 if self._match_text_seq(*tags, advance=False): 6768 this = self._find_sql(heredoc_start, self._prev) 6769 self._advance(len(tags)) 6770 return self.expression(exp.Heredoc, this=this, tag=tag_text) 6771 6772 self._advance() 6773 6774 self.raise_error(f"No closing {''.join(tags)} found") 6775 return None 6776 6777 def _find_parser( 6778 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 6779 ) -> t.Optional[t.Callable]: 6780 if not self._curr: 6781 return None 6782 6783 index = self._index 6784 this = [] 6785 while True: 6786 # The current token might be multiple words 6787 curr = self._curr.text.upper() 6788 key = curr.split(" ") 6789 this.append(curr) 6790 6791 self._advance() 6792 result, trie = in_trie(trie, key) 6793 if result == TrieResult.FAILED: 6794 break 6795 6796 if result == TrieResult.EXISTS: 6797 subparser = parsers[" ".join(this)] 6798 return subparser 6799 6800 self._retreat(index) 6801 return None 6802 6803 def _match(self, token_type, advance=True, expression=None): 6804 if not self._curr: 6805 return None 6806 6807 if self._curr.token_type == token_type: 6808 if advance: 6809 self._advance() 6810 self._add_comments(expression) 6811 return True 6812 6813 return None 6814 6815 def _match_set(self, types, advance=True): 6816 if not self._curr: 6817 return None 6818 6819 if self._curr.token_type in types: 6820 if advance: 6821 self._advance() 6822 return True 6823 6824 return None 6825 6826 def _match_pair(self, token_type_a, token_type_b, advance=True): 6827 if not self._curr or not self._next: 6828 return None 6829 6830 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 6831 if advance: 6832 self._advance(2) 6833 return True 6834 6835 return None 6836 6837 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 6838 if not self._match(TokenType.L_PAREN, expression=expression): 6839 self.raise_error("Expecting (") 6840 6841 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 6842 if not self._match(TokenType.R_PAREN, expression=expression): 6843 self.raise_error("Expecting )") 6844 6845 def _match_texts(self, texts, advance=True): 6846 if self._curr and self._curr.text.upper() in texts: 6847 if advance: 6848 self._advance() 6849 return True 6850 return None 6851 6852 def _match_text_seq(self, *texts, advance=True): 6853 index = self._index 6854 for text in texts: 6855 if self._curr and self._curr.text.upper() == text: 6856 self._advance() 6857 else: 6858 self._retreat(index) 6859 return None 6860 6861 if not advance: 6862 self._retreat(index) 6863 6864 return True 6865 6866 def _replace_lambda( 6867 self, node: t.Optional[exp.Expression], expressions: t.List[exp.Expression] 6868 ) -> t.Optional[exp.Expression]: 6869 if not node: 6870 return node 6871 6872 lambda_types = {e.name: e.args.get("to") or False for e in expressions} 6873 6874 for column in node.find_all(exp.Column): 6875 typ = lambda_types.get(column.parts[0].name) 6876 if typ is not None: 6877 dot_or_id = column.to_dot() if column.table else column.this 6878 6879 if typ: 6880 dot_or_id = self.expression( 6881 exp.Cast, 6882 this=dot_or_id, 6883 to=typ, 6884 ) 6885 6886 parent = column.parent 6887 6888 while isinstance(parent, exp.Dot): 6889 if not isinstance(parent.parent, exp.Dot): 6890 parent.replace(dot_or_id) 6891 break 6892 parent = parent.parent 6893 else: 6894 if column is node: 6895 node = dot_or_id 6896 else: 6897 column.replace(dot_or_id) 6898 return node 6899 6900 def _parse_truncate_table(self) -> t.Optional[exp.TruncateTable] | exp.Expression: 6901 start = self._prev 6902 6903 # Not to be confused with TRUNCATE(number, decimals) function call 6904 if self._match(TokenType.L_PAREN): 6905 self._retreat(self._index - 2) 6906 return self._parse_function() 6907 6908 # Clickhouse supports TRUNCATE DATABASE as well 6909 is_database = self._match(TokenType.DATABASE) 6910 6911 self._match(TokenType.TABLE) 6912 6913 exists = self._parse_exists(not_=False) 6914 6915 expressions = self._parse_csv( 6916 lambda: self._parse_table(schema=True, is_db_reference=is_database) 6917 ) 6918 6919 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 6920 6921 if self._match_text_seq("RESTART", "IDENTITY"): 6922 identity = "RESTART" 6923 elif self._match_text_seq("CONTINUE", "IDENTITY"): 6924 identity = "CONTINUE" 6925 else: 6926 identity = None 6927 6928 if self._match_text_seq("CASCADE") or self._match_text_seq("RESTRICT"): 6929 option = self._prev.text 6930 else: 6931 option = None 6932 6933 partition = self._parse_partition() 6934 6935 # Fallback case 6936 if self._curr: 6937 return self._parse_as_command(start) 6938 6939 return self.expression( 6940 exp.TruncateTable, 6941 expressions=expressions, 6942 is_database=is_database, 6943 exists=exists, 6944 cluster=cluster, 6945 identity=identity, 6946 option=option, 6947 partition=partition, 6948 ) 6949 6950 def _parse_with_operator(self) -> t.Optional[exp.Expression]: 6951 this = self._parse_ordered(self._parse_opclass) 6952 6953 if not self._match(TokenType.WITH): 6954 return this 6955 6956 op = self._parse_var(any_token=True) 6957 6958 return self.expression(exp.WithOperator, this=this, op=op) 6959 6960 def _parse_wrapped_options(self) -> t.List[t.Optional[exp.Expression]]: 6961 self._match(TokenType.EQ) 6962 self._match(TokenType.L_PAREN) 6963 6964 opts: t.List[t.Optional[exp.Expression]] = [] 6965 while self._curr and not self._match(TokenType.R_PAREN): 6966 if self._match_text_seq("FORMAT_NAME", "="): 6967 # The FORMAT_NAME can be set to an identifier for Snowflake and T-SQL, 6968 # so we parse it separately to use _parse_field() 6969 prop = self.expression( 6970 exp.Property, this=exp.var("FORMAT_NAME"), value=self._parse_field() 6971 ) 6972 opts.append(prop) 6973 else: 6974 opts.append(self._parse_property()) 6975 6976 self._match(TokenType.COMMA) 6977 6978 return opts 6979 6980 def _parse_copy_parameters(self) -> t.List[exp.CopyParameter]: 6981 sep = TokenType.COMMA if self.dialect.COPY_PARAMS_ARE_CSV else None 6982 6983 options = [] 6984 while self._curr and not self._match(TokenType.R_PAREN, advance=False): 6985 option = self._parse_var(any_token=True) 6986 prev = self._prev.text.upper() 6987 6988 # Different dialects might separate options and values by white space, "=" and "AS" 6989 self._match(TokenType.EQ) 6990 self._match(TokenType.ALIAS) 6991 6992 param = self.expression(exp.CopyParameter, this=option) 6993 6994 if prev in self.COPY_INTO_VARLEN_OPTIONS and self._match( 6995 TokenType.L_PAREN, advance=False 6996 ): 6997 # Snowflake FILE_FORMAT case, Databricks COPY & FORMAT options 6998 param.set("expressions", self._parse_wrapped_options()) 6999 elif prev == "FILE_FORMAT": 7000 # T-SQL's external file format case 7001 param.set("expression", self._parse_field()) 7002 else: 7003 param.set("expression", self._parse_unquoted_field()) 7004 7005 options.append(param) 7006 self._match(sep) 7007 7008 return options 7009 7010 def _parse_credentials(self) -> t.Optional[exp.Credentials]: 7011 expr = self.expression(exp.Credentials) 7012 7013 if self._match_text_seq("STORAGE_INTEGRATION", "="): 7014 expr.set("storage", self._parse_field()) 7015 if self._match_text_seq("CREDENTIALS"): 7016 # Snowflake case: CREDENTIALS = (...), Redshift case: CREDENTIALS <string> 7017 creds = ( 7018 self._parse_wrapped_options() if self._match(TokenType.EQ) else self._parse_field() 7019 ) 7020 expr.set("credentials", creds) 7021 if self._match_text_seq("ENCRYPTION"): 7022 expr.set("encryption", self._parse_wrapped_options()) 7023 if self._match_text_seq("IAM_ROLE"): 7024 expr.set("iam_role", self._parse_field()) 7025 if self._match_text_seq("REGION"): 7026 expr.set("region", self._parse_field()) 7027 7028 return expr 7029 7030 def _parse_file_location(self) -> t.Optional[exp.Expression]: 7031 return self._parse_field() 7032 7033 def _parse_copy(self) -> exp.Copy | exp.Command: 7034 start = self._prev 7035 7036 self._match(TokenType.INTO) 7037 7038 this = ( 7039 self._parse_select(nested=True, parse_subquery_alias=False) 7040 if self._match(TokenType.L_PAREN, advance=False) 7041 else self._parse_table(schema=True) 7042 ) 7043 7044 kind = self._match(TokenType.FROM) or not self._match_text_seq("TO") 7045 7046 files = self._parse_csv(self._parse_file_location) 7047 credentials = self._parse_credentials() 7048 7049 self._match_text_seq("WITH") 7050 7051 params = self._parse_wrapped(self._parse_copy_parameters, optional=True) 7052 7053 # Fallback case 7054 if self._curr: 7055 return self._parse_as_command(start) 7056 7057 return self.expression( 7058 exp.Copy, 7059 this=this, 7060 kind=kind, 7061 credentials=credentials, 7062 files=files, 7063 params=params, 7064 )
26def build_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 27 if len(args) == 1 and args[0].is_star: 28 return exp.StarMap(this=args[0]) 29 30 keys = [] 31 values = [] 32 for i in range(0, len(args), 2): 33 keys.append(args[i]) 34 values.append(args[i + 1]) 35 36 return exp.VarMap(keys=exp.array(*keys, copy=False), values=exp.array(*values, copy=False))
44def binary_range_parser( 45 expr_type: t.Type[exp.Expression], reverse_args: bool = False 46) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]: 47 def _parse_binary_range( 48 self: Parser, this: t.Optional[exp.Expression] 49 ) -> t.Optional[exp.Expression]: 50 expression = self._parse_bitwise() 51 if reverse_args: 52 this, expression = expression, this 53 return self._parse_escape(self.expression(expr_type, this=this, expression=expression)) 54 55 return _parse_binary_range
58def build_logarithm(args: t.List, dialect: Dialect) -> exp.Func: 59 # Default argument order is base, expression 60 this = seq_get(args, 0) 61 expression = seq_get(args, 1) 62 63 if expression: 64 if not dialect.LOG_BASE_FIRST: 65 this, expression = expression, this 66 return exp.Log(this=this, expression=expression) 67 68 return (exp.Ln if dialect.parser_class.LOG_DEFAULTS_TO_LN else exp.Log)(this=this)
88def build_extract_json_with_path(expr_type: t.Type[E]) -> t.Callable[[t.List, Dialect], E]: 89 def _builder(args: t.List, dialect: Dialect) -> E: 90 expression = expr_type( 91 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 92 ) 93 if len(args) > 2 and expr_type is exp.JSONExtract: 94 expression.set("expressions", args[2:]) 95 96 return expression 97 98 return _builder
101def build_mod(args: t.List) -> exp.Mod: 102 this = seq_get(args, 0) 103 expression = seq_get(args, 1) 104 105 # Wrap the operands if they are binary nodes, e.g. MOD(a + 1, 7) -> (a + 1) % 7 106 this = exp.Paren(this=this) if isinstance(this, exp.Binary) else this 107 expression = exp.Paren(this=expression) if isinstance(expression, exp.Binary) else expression 108 109 return exp.Mod(this=this, expression=expression)
121def build_array_constructor( 122 exp_class: t.Type[E], args: t.List, bracket_kind: TokenType, dialect: Dialect 123) -> exp.Expression: 124 array_exp = exp_class(expressions=args) 125 126 if exp_class == exp.Array and dialect.HAS_DISTINCT_ARRAY_CONSTRUCTORS: 127 array_exp.set("bracket_notation", bracket_kind == TokenType.L_BRACKET) 128 129 return array_exp
142class Parser(metaclass=_Parser): 143 """ 144 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 145 146 Args: 147 error_level: The desired error level. 148 Default: ErrorLevel.IMMEDIATE 149 error_message_context: The amount of context to capture from a query string when displaying 150 the error message (in number of characters). 151 Default: 100 152 max_errors: Maximum number of error messages to include in a raised ParseError. 153 This is only relevant if error_level is ErrorLevel.RAISE. 154 Default: 3 155 """ 156 157 FUNCTIONS: t.Dict[str, t.Callable] = { 158 **{name: func.from_arg_list for name, func in exp.FUNCTION_BY_NAME.items()}, 159 "ARRAY": lambda args, dialect: exp.Array(expressions=args), 160 "CONCAT": lambda args, dialect: exp.Concat( 161 expressions=args, 162 safe=not dialect.STRICT_STRING_CONCAT, 163 coalesce=dialect.CONCAT_COALESCE, 164 ), 165 "CONCAT_WS": lambda args, dialect: exp.ConcatWs( 166 expressions=args, 167 safe=not dialect.STRICT_STRING_CONCAT, 168 coalesce=dialect.CONCAT_COALESCE, 169 ), 170 "DATE_TO_DATE_STR": lambda args: exp.Cast( 171 this=seq_get(args, 0), 172 to=exp.DataType(this=exp.DataType.Type.TEXT), 173 ), 174 "GENERATE_DATE_ARRAY": lambda args: exp.GenerateDateArray( 175 start=seq_get(args, 0), 176 end=seq_get(args, 1), 177 interval=seq_get(args, 2) 178 or exp.Interval(this=exp.Literal.number(1), unit=exp.var("DAY")), 179 ), 180 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 181 "HEX": build_hex, 182 "JSON_EXTRACT": build_extract_json_with_path(exp.JSONExtract), 183 "JSON_EXTRACT_SCALAR": build_extract_json_with_path(exp.JSONExtractScalar), 184 "JSON_EXTRACT_PATH_TEXT": build_extract_json_with_path(exp.JSONExtractScalar), 185 "LIKE": build_like, 186 "LOG": build_logarithm, 187 "LOG2": lambda args: exp.Log(this=exp.Literal.number(2), expression=seq_get(args, 0)), 188 "LOG10": lambda args: exp.Log(this=exp.Literal.number(10), expression=seq_get(args, 0)), 189 "LOWER": build_lower, 190 "LPAD": lambda args: build_pad(args), 191 "LEFTPAD": lambda args: build_pad(args), 192 "MOD": build_mod, 193 "RPAD": lambda args: build_pad(args, is_left=False), 194 "RIGHTPAD": lambda args: build_pad(args, is_left=False), 195 "SCOPE_RESOLUTION": lambda args: exp.ScopeResolution(expression=seq_get(args, 0)) 196 if len(args) != 2 197 else exp.ScopeResolution(this=seq_get(args, 0), expression=seq_get(args, 1)), 198 "TIME_TO_TIME_STR": lambda args: exp.Cast( 199 this=seq_get(args, 0), 200 to=exp.DataType(this=exp.DataType.Type.TEXT), 201 ), 202 "TO_HEX": build_hex, 203 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 204 this=exp.Cast( 205 this=seq_get(args, 0), 206 to=exp.DataType(this=exp.DataType.Type.TEXT), 207 ), 208 start=exp.Literal.number(1), 209 length=exp.Literal.number(10), 210 ), 211 "UNNEST": lambda args: exp.Unnest(expressions=ensure_list(seq_get(args, 0))), 212 "UPPER": build_upper, 213 "VAR_MAP": build_var_map, 214 } 215 216 NO_PAREN_FUNCTIONS = { 217 TokenType.CURRENT_DATE: exp.CurrentDate, 218 TokenType.CURRENT_DATETIME: exp.CurrentDate, 219 TokenType.CURRENT_TIME: exp.CurrentTime, 220 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 221 TokenType.CURRENT_USER: exp.CurrentUser, 222 } 223 224 STRUCT_TYPE_TOKENS = { 225 TokenType.NESTED, 226 TokenType.OBJECT, 227 TokenType.STRUCT, 228 } 229 230 NESTED_TYPE_TOKENS = { 231 TokenType.ARRAY, 232 TokenType.LIST, 233 TokenType.LOWCARDINALITY, 234 TokenType.MAP, 235 TokenType.NULLABLE, 236 *STRUCT_TYPE_TOKENS, 237 } 238 239 ENUM_TYPE_TOKENS = { 240 TokenType.ENUM, 241 TokenType.ENUM8, 242 TokenType.ENUM16, 243 } 244 245 AGGREGATE_TYPE_TOKENS = { 246 TokenType.AGGREGATEFUNCTION, 247 TokenType.SIMPLEAGGREGATEFUNCTION, 248 } 249 250 TYPE_TOKENS = { 251 TokenType.BIT, 252 TokenType.BOOLEAN, 253 TokenType.TINYINT, 254 TokenType.UTINYINT, 255 TokenType.SMALLINT, 256 TokenType.USMALLINT, 257 TokenType.INT, 258 TokenType.UINT, 259 TokenType.BIGINT, 260 TokenType.UBIGINT, 261 TokenType.INT128, 262 TokenType.UINT128, 263 TokenType.INT256, 264 TokenType.UINT256, 265 TokenType.MEDIUMINT, 266 TokenType.UMEDIUMINT, 267 TokenType.FIXEDSTRING, 268 TokenType.FLOAT, 269 TokenType.DOUBLE, 270 TokenType.CHAR, 271 TokenType.NCHAR, 272 TokenType.VARCHAR, 273 TokenType.NVARCHAR, 274 TokenType.BPCHAR, 275 TokenType.TEXT, 276 TokenType.MEDIUMTEXT, 277 TokenType.LONGTEXT, 278 TokenType.MEDIUMBLOB, 279 TokenType.LONGBLOB, 280 TokenType.BINARY, 281 TokenType.VARBINARY, 282 TokenType.JSON, 283 TokenType.JSONB, 284 TokenType.INTERVAL, 285 TokenType.TINYBLOB, 286 TokenType.TINYTEXT, 287 TokenType.TIME, 288 TokenType.TIMETZ, 289 TokenType.TIMESTAMP, 290 TokenType.TIMESTAMP_S, 291 TokenType.TIMESTAMP_MS, 292 TokenType.TIMESTAMP_NS, 293 TokenType.TIMESTAMPTZ, 294 TokenType.TIMESTAMPLTZ, 295 TokenType.TIMESTAMPNTZ, 296 TokenType.DATETIME, 297 TokenType.DATETIME64, 298 TokenType.DATE, 299 TokenType.DATE32, 300 TokenType.INT4RANGE, 301 TokenType.INT4MULTIRANGE, 302 TokenType.INT8RANGE, 303 TokenType.INT8MULTIRANGE, 304 TokenType.NUMRANGE, 305 TokenType.NUMMULTIRANGE, 306 TokenType.TSRANGE, 307 TokenType.TSMULTIRANGE, 308 TokenType.TSTZRANGE, 309 TokenType.TSTZMULTIRANGE, 310 TokenType.DATERANGE, 311 TokenType.DATEMULTIRANGE, 312 TokenType.DECIMAL, 313 TokenType.UDECIMAL, 314 TokenType.BIGDECIMAL, 315 TokenType.UUID, 316 TokenType.GEOGRAPHY, 317 TokenType.GEOMETRY, 318 TokenType.HLLSKETCH, 319 TokenType.HSTORE, 320 TokenType.PSEUDO_TYPE, 321 TokenType.SUPER, 322 TokenType.SERIAL, 323 TokenType.SMALLSERIAL, 324 TokenType.BIGSERIAL, 325 TokenType.XML, 326 TokenType.YEAR, 327 TokenType.UNIQUEIDENTIFIER, 328 TokenType.USERDEFINED, 329 TokenType.MONEY, 330 TokenType.SMALLMONEY, 331 TokenType.ROWVERSION, 332 TokenType.IMAGE, 333 TokenType.VARIANT, 334 TokenType.VECTOR, 335 TokenType.OBJECT, 336 TokenType.OBJECT_IDENTIFIER, 337 TokenType.INET, 338 TokenType.IPADDRESS, 339 TokenType.IPPREFIX, 340 TokenType.IPV4, 341 TokenType.IPV6, 342 TokenType.UNKNOWN, 343 TokenType.NULL, 344 TokenType.NAME, 345 TokenType.TDIGEST, 346 *ENUM_TYPE_TOKENS, 347 *NESTED_TYPE_TOKENS, 348 *AGGREGATE_TYPE_TOKENS, 349 } 350 351 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 352 TokenType.BIGINT: TokenType.UBIGINT, 353 TokenType.INT: TokenType.UINT, 354 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 355 TokenType.SMALLINT: TokenType.USMALLINT, 356 TokenType.TINYINT: TokenType.UTINYINT, 357 TokenType.DECIMAL: TokenType.UDECIMAL, 358 } 359 360 SUBQUERY_PREDICATES = { 361 TokenType.ANY: exp.Any, 362 TokenType.ALL: exp.All, 363 TokenType.EXISTS: exp.Exists, 364 TokenType.SOME: exp.Any, 365 } 366 367 RESERVED_TOKENS = { 368 *Tokenizer.SINGLE_TOKENS.values(), 369 TokenType.SELECT, 370 } - {TokenType.IDENTIFIER} 371 372 DB_CREATABLES = { 373 TokenType.DATABASE, 374 TokenType.DICTIONARY, 375 TokenType.MODEL, 376 TokenType.SCHEMA, 377 TokenType.SEQUENCE, 378 TokenType.STORAGE_INTEGRATION, 379 TokenType.TABLE, 380 TokenType.TAG, 381 TokenType.VIEW, 382 TokenType.WAREHOUSE, 383 TokenType.STREAMLIT, 384 } 385 386 CREATABLES = { 387 TokenType.COLUMN, 388 TokenType.CONSTRAINT, 389 TokenType.FOREIGN_KEY, 390 TokenType.FUNCTION, 391 TokenType.INDEX, 392 TokenType.PROCEDURE, 393 *DB_CREATABLES, 394 } 395 396 # Tokens that can represent identifiers 397 ID_VAR_TOKENS = { 398 TokenType.ALL, 399 TokenType.VAR, 400 TokenType.ANTI, 401 TokenType.APPLY, 402 TokenType.ASC, 403 TokenType.ASOF, 404 TokenType.AUTO_INCREMENT, 405 TokenType.BEGIN, 406 TokenType.BPCHAR, 407 TokenType.CACHE, 408 TokenType.CASE, 409 TokenType.COLLATE, 410 TokenType.COMMAND, 411 TokenType.COMMENT, 412 TokenType.COMMIT, 413 TokenType.CONSTRAINT, 414 TokenType.COPY, 415 TokenType.CUBE, 416 TokenType.DEFAULT, 417 TokenType.DELETE, 418 TokenType.DESC, 419 TokenType.DESCRIBE, 420 TokenType.DICTIONARY, 421 TokenType.DIV, 422 TokenType.END, 423 TokenType.EXECUTE, 424 TokenType.ESCAPE, 425 TokenType.FALSE, 426 TokenType.FIRST, 427 TokenType.FILTER, 428 TokenType.FINAL, 429 TokenType.FORMAT, 430 TokenType.FULL, 431 TokenType.IDENTIFIER, 432 TokenType.IS, 433 TokenType.ISNULL, 434 TokenType.INTERVAL, 435 TokenType.KEEP, 436 TokenType.KILL, 437 TokenType.LEFT, 438 TokenType.LOAD, 439 TokenType.MERGE, 440 TokenType.NATURAL, 441 TokenType.NEXT, 442 TokenType.OFFSET, 443 TokenType.OPERATOR, 444 TokenType.ORDINALITY, 445 TokenType.OVERLAPS, 446 TokenType.OVERWRITE, 447 TokenType.PARTITION, 448 TokenType.PERCENT, 449 TokenType.PIVOT, 450 TokenType.PRAGMA, 451 TokenType.RANGE, 452 TokenType.RECURSIVE, 453 TokenType.REFERENCES, 454 TokenType.REFRESH, 455 TokenType.RENAME, 456 TokenType.REPLACE, 457 TokenType.RIGHT, 458 TokenType.ROLLUP, 459 TokenType.ROW, 460 TokenType.ROWS, 461 TokenType.SEMI, 462 TokenType.SET, 463 TokenType.SETTINGS, 464 TokenType.SHOW, 465 TokenType.TEMPORARY, 466 TokenType.TOP, 467 TokenType.TRUE, 468 TokenType.TRUNCATE, 469 TokenType.UNIQUE, 470 TokenType.UNNEST, 471 TokenType.UNPIVOT, 472 TokenType.UPDATE, 473 TokenType.USE, 474 TokenType.VOLATILE, 475 TokenType.WINDOW, 476 *CREATABLES, 477 *SUBQUERY_PREDICATES, 478 *TYPE_TOKENS, 479 *NO_PAREN_FUNCTIONS, 480 } 481 482 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 483 484 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 485 TokenType.ANTI, 486 TokenType.APPLY, 487 TokenType.ASOF, 488 TokenType.FULL, 489 TokenType.LEFT, 490 TokenType.LOCK, 491 TokenType.NATURAL, 492 TokenType.OFFSET, 493 TokenType.RIGHT, 494 TokenType.SEMI, 495 TokenType.WINDOW, 496 } 497 498 ALIAS_TOKENS = ID_VAR_TOKENS 499 500 ARRAY_CONSTRUCTORS = { 501 "ARRAY": exp.Array, 502 "LIST": exp.List, 503 } 504 505 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 506 507 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 508 509 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 510 511 FUNC_TOKENS = { 512 TokenType.COLLATE, 513 TokenType.COMMAND, 514 TokenType.CURRENT_DATE, 515 TokenType.CURRENT_DATETIME, 516 TokenType.CURRENT_TIMESTAMP, 517 TokenType.CURRENT_TIME, 518 TokenType.CURRENT_USER, 519 TokenType.FILTER, 520 TokenType.FIRST, 521 TokenType.FORMAT, 522 TokenType.GLOB, 523 TokenType.IDENTIFIER, 524 TokenType.INDEX, 525 TokenType.ISNULL, 526 TokenType.ILIKE, 527 TokenType.INSERT, 528 TokenType.LIKE, 529 TokenType.MERGE, 530 TokenType.OFFSET, 531 TokenType.PRIMARY_KEY, 532 TokenType.RANGE, 533 TokenType.REPLACE, 534 TokenType.RLIKE, 535 TokenType.ROW, 536 TokenType.UNNEST, 537 TokenType.VAR, 538 TokenType.LEFT, 539 TokenType.RIGHT, 540 TokenType.SEQUENCE, 541 TokenType.DATE, 542 TokenType.DATETIME, 543 TokenType.TABLE, 544 TokenType.TIMESTAMP, 545 TokenType.TIMESTAMPTZ, 546 TokenType.TRUNCATE, 547 TokenType.WINDOW, 548 TokenType.XOR, 549 *TYPE_TOKENS, 550 *SUBQUERY_PREDICATES, 551 } 552 553 CONJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 554 TokenType.AND: exp.And, 555 } 556 557 ASSIGNMENT: t.Dict[TokenType, t.Type[exp.Expression]] = { 558 TokenType.COLON_EQ: exp.PropertyEQ, 559 } 560 561 DISJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 562 TokenType.OR: exp.Or, 563 } 564 565 EQUALITY = { 566 TokenType.EQ: exp.EQ, 567 TokenType.NEQ: exp.NEQ, 568 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 569 } 570 571 COMPARISON = { 572 TokenType.GT: exp.GT, 573 TokenType.GTE: exp.GTE, 574 TokenType.LT: exp.LT, 575 TokenType.LTE: exp.LTE, 576 } 577 578 BITWISE = { 579 TokenType.AMP: exp.BitwiseAnd, 580 TokenType.CARET: exp.BitwiseXor, 581 TokenType.PIPE: exp.BitwiseOr, 582 } 583 584 TERM = { 585 TokenType.DASH: exp.Sub, 586 TokenType.PLUS: exp.Add, 587 TokenType.MOD: exp.Mod, 588 TokenType.COLLATE: exp.Collate, 589 } 590 591 FACTOR = { 592 TokenType.DIV: exp.IntDiv, 593 TokenType.LR_ARROW: exp.Distance, 594 TokenType.SLASH: exp.Div, 595 TokenType.STAR: exp.Mul, 596 } 597 598 EXPONENT: t.Dict[TokenType, t.Type[exp.Expression]] = {} 599 600 TIMES = { 601 TokenType.TIME, 602 TokenType.TIMETZ, 603 } 604 605 TIMESTAMPS = { 606 TokenType.TIMESTAMP, 607 TokenType.TIMESTAMPTZ, 608 TokenType.TIMESTAMPLTZ, 609 *TIMES, 610 } 611 612 SET_OPERATIONS = { 613 TokenType.UNION, 614 TokenType.INTERSECT, 615 TokenType.EXCEPT, 616 } 617 618 JOIN_METHODS = { 619 TokenType.ASOF, 620 TokenType.NATURAL, 621 TokenType.POSITIONAL, 622 } 623 624 JOIN_SIDES = { 625 TokenType.LEFT, 626 TokenType.RIGHT, 627 TokenType.FULL, 628 } 629 630 JOIN_KINDS = { 631 TokenType.ANTI, 632 TokenType.CROSS, 633 TokenType.INNER, 634 TokenType.OUTER, 635 TokenType.SEMI, 636 TokenType.STRAIGHT_JOIN, 637 } 638 639 JOIN_HINTS: t.Set[str] = set() 640 641 LAMBDAS = { 642 TokenType.ARROW: lambda self, expressions: self.expression( 643 exp.Lambda, 644 this=self._replace_lambda( 645 self._parse_assignment(), 646 expressions, 647 ), 648 expressions=expressions, 649 ), 650 TokenType.FARROW: lambda self, expressions: self.expression( 651 exp.Kwarg, 652 this=exp.var(expressions[0].name), 653 expression=self._parse_assignment(), 654 ), 655 } 656 657 COLUMN_OPERATORS = { 658 TokenType.DOT: None, 659 TokenType.DCOLON: lambda self, this, to: self.expression( 660 exp.Cast if self.STRICT_CAST else exp.TryCast, 661 this=this, 662 to=to, 663 ), 664 TokenType.ARROW: lambda self, this, path: self.expression( 665 exp.JSONExtract, 666 this=this, 667 expression=self.dialect.to_json_path(path), 668 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 669 ), 670 TokenType.DARROW: lambda self, this, path: self.expression( 671 exp.JSONExtractScalar, 672 this=this, 673 expression=self.dialect.to_json_path(path), 674 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 675 ), 676 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 677 exp.JSONBExtract, 678 this=this, 679 expression=path, 680 ), 681 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 682 exp.JSONBExtractScalar, 683 this=this, 684 expression=path, 685 ), 686 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 687 exp.JSONBContains, 688 this=this, 689 expression=key, 690 ), 691 } 692 693 EXPRESSION_PARSERS = { 694 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 695 exp.Column: lambda self: self._parse_column(), 696 exp.Condition: lambda self: self._parse_assignment(), 697 exp.DataType: lambda self: self._parse_types(allow_identifiers=False, schema=True), 698 exp.Expression: lambda self: self._parse_expression(), 699 exp.From: lambda self: self._parse_from(joins=True), 700 exp.Group: lambda self: self._parse_group(), 701 exp.Having: lambda self: self._parse_having(), 702 exp.Identifier: lambda self: self._parse_id_var(), 703 exp.Join: lambda self: self._parse_join(), 704 exp.Lambda: lambda self: self._parse_lambda(), 705 exp.Lateral: lambda self: self._parse_lateral(), 706 exp.Limit: lambda self: self._parse_limit(), 707 exp.Offset: lambda self: self._parse_offset(), 708 exp.Order: lambda self: self._parse_order(), 709 exp.Ordered: lambda self: self._parse_ordered(), 710 exp.Properties: lambda self: self._parse_properties(), 711 exp.Qualify: lambda self: self._parse_qualify(), 712 exp.Returning: lambda self: self._parse_returning(), 713 exp.Select: lambda self: self._parse_select(), 714 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 715 exp.Table: lambda self: self._parse_table_parts(), 716 exp.TableAlias: lambda self: self._parse_table_alias(), 717 exp.When: lambda self: seq_get(self._parse_when_matched(), 0), 718 exp.Where: lambda self: self._parse_where(), 719 exp.Window: lambda self: self._parse_named_window(), 720 exp.With: lambda self: self._parse_with(), 721 "JOIN_TYPE": lambda self: self._parse_join_parts(), 722 } 723 724 STATEMENT_PARSERS = { 725 TokenType.ALTER: lambda self: self._parse_alter(), 726 TokenType.BEGIN: lambda self: self._parse_transaction(), 727 TokenType.CACHE: lambda self: self._parse_cache(), 728 TokenType.COMMENT: lambda self: self._parse_comment(), 729 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 730 TokenType.COPY: lambda self: self._parse_copy(), 731 TokenType.CREATE: lambda self: self._parse_create(), 732 TokenType.DELETE: lambda self: self._parse_delete(), 733 TokenType.DESC: lambda self: self._parse_describe(), 734 TokenType.DESCRIBE: lambda self: self._parse_describe(), 735 TokenType.DROP: lambda self: self._parse_drop(), 736 TokenType.INSERT: lambda self: self._parse_insert(), 737 TokenType.KILL: lambda self: self._parse_kill(), 738 TokenType.LOAD: lambda self: self._parse_load(), 739 TokenType.MERGE: lambda self: self._parse_merge(), 740 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 741 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 742 TokenType.REFRESH: lambda self: self._parse_refresh(), 743 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 744 TokenType.SET: lambda self: self._parse_set(), 745 TokenType.TRUNCATE: lambda self: self._parse_truncate_table(), 746 TokenType.UNCACHE: lambda self: self._parse_uncache(), 747 TokenType.UPDATE: lambda self: self._parse_update(), 748 TokenType.USE: lambda self: self.expression( 749 exp.Use, 750 kind=self._parse_var_from_options(self.USABLES, raise_unmatched=False), 751 this=self._parse_table(schema=False), 752 ), 753 TokenType.SEMICOLON: lambda self: self.expression(exp.Semicolon), 754 } 755 756 UNARY_PARSERS = { 757 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 758 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 759 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 760 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 761 TokenType.PIPE_SLASH: lambda self: self.expression(exp.Sqrt, this=self._parse_unary()), 762 TokenType.DPIPE_SLASH: lambda self: self.expression(exp.Cbrt, this=self._parse_unary()), 763 } 764 765 STRING_PARSERS = { 766 TokenType.HEREDOC_STRING: lambda self, token: self.expression( 767 exp.RawString, this=token.text 768 ), 769 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 770 exp.National, this=token.text 771 ), 772 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 773 TokenType.STRING: lambda self, token: self.expression( 774 exp.Literal, this=token.text, is_string=True 775 ), 776 TokenType.UNICODE_STRING: lambda self, token: self.expression( 777 exp.UnicodeString, 778 this=token.text, 779 escape=self._match_text_seq("UESCAPE") and self._parse_string(), 780 ), 781 } 782 783 NUMERIC_PARSERS = { 784 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 785 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 786 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 787 TokenType.NUMBER: lambda self, token: self.expression( 788 exp.Literal, this=token.text, is_string=False 789 ), 790 } 791 792 PRIMARY_PARSERS = { 793 **STRING_PARSERS, 794 **NUMERIC_PARSERS, 795 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 796 TokenType.NULL: lambda self, _: self.expression(exp.Null), 797 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 798 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 799 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 800 TokenType.STAR: lambda self, _: self.expression( 801 exp.Star, 802 **{ 803 "except": self._parse_star_op("EXCEPT", "EXCLUDE"), 804 "replace": self._parse_star_op("REPLACE"), 805 "rename": self._parse_star_op("RENAME"), 806 }, 807 ), 808 } 809 810 PLACEHOLDER_PARSERS = { 811 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 812 TokenType.PARAMETER: lambda self: self._parse_parameter(), 813 TokenType.COLON: lambda self: ( 814 self.expression(exp.Placeholder, this=self._prev.text) 815 if self._match_set(self.ID_VAR_TOKENS) 816 else None 817 ), 818 } 819 820 RANGE_PARSERS = { 821 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 822 TokenType.GLOB: binary_range_parser(exp.Glob), 823 TokenType.ILIKE: binary_range_parser(exp.ILike), 824 TokenType.IN: lambda self, this: self._parse_in(this), 825 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 826 TokenType.IS: lambda self, this: self._parse_is(this), 827 TokenType.LIKE: binary_range_parser(exp.Like), 828 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 829 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 830 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 831 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 832 } 833 834 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 835 "ALLOWED_VALUES": lambda self: self.expression( 836 exp.AllowedValuesProperty, expressions=self._parse_csv(self._parse_primary) 837 ), 838 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 839 "AUTO": lambda self: self._parse_auto_property(), 840 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 841 "BACKUP": lambda self: self.expression( 842 exp.BackupProperty, this=self._parse_var(any_token=True) 843 ), 844 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 845 "CHARSET": lambda self, **kwargs: self._parse_character_set(**kwargs), 846 "CHARACTER SET": lambda self, **kwargs: self._parse_character_set(**kwargs), 847 "CHECKSUM": lambda self: self._parse_checksum(), 848 "CLUSTER BY": lambda self: self._parse_cluster(), 849 "CLUSTERED": lambda self: self._parse_clustered_by(), 850 "COLLATE": lambda self, **kwargs: self._parse_property_assignment( 851 exp.CollateProperty, **kwargs 852 ), 853 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 854 "CONTAINS": lambda self: self._parse_contains_property(), 855 "COPY": lambda self: self._parse_copy_property(), 856 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 857 "DATA_DELETION": lambda self: self._parse_data_deletion_property(), 858 "DEFINER": lambda self: self._parse_definer(), 859 "DETERMINISTIC": lambda self: self.expression( 860 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 861 ), 862 "DYNAMIC": lambda self: self.expression(exp.DynamicProperty), 863 "DISTKEY": lambda self: self._parse_distkey(), 864 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 865 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 866 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 867 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 868 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 869 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 870 "FREESPACE": lambda self: self._parse_freespace(), 871 "GLOBAL": lambda self: self.expression(exp.GlobalProperty), 872 "HEAP": lambda self: self.expression(exp.HeapProperty), 873 "ICEBERG": lambda self: self.expression(exp.IcebergProperty), 874 "IMMUTABLE": lambda self: self.expression( 875 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 876 ), 877 "INHERITS": lambda self: self.expression( 878 exp.InheritsProperty, expressions=self._parse_wrapped_csv(self._parse_table) 879 ), 880 "INPUT": lambda self: self.expression(exp.InputModelProperty, this=self._parse_schema()), 881 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 882 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 883 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 884 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 885 "LIKE": lambda self: self._parse_create_like(), 886 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 887 "LOCK": lambda self: self._parse_locking(), 888 "LOCKING": lambda self: self._parse_locking(), 889 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 890 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 891 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 892 "MODIFIES": lambda self: self._parse_modifies_property(), 893 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 894 "NO": lambda self: self._parse_no_property(), 895 "ON": lambda self: self._parse_on_property(), 896 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 897 "OUTPUT": lambda self: self.expression(exp.OutputModelProperty, this=self._parse_schema()), 898 "PARTITION": lambda self: self._parse_partitioned_of(), 899 "PARTITION BY": lambda self: self._parse_partitioned_by(), 900 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 901 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 902 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 903 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 904 "READS": lambda self: self._parse_reads_property(), 905 "REMOTE": lambda self: self._parse_remote_with_connection(), 906 "RETURNS": lambda self: self._parse_returns(), 907 "STRICT": lambda self: self.expression(exp.StrictProperty), 908 "ROW": lambda self: self._parse_row(), 909 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 910 "SAMPLE": lambda self: self.expression( 911 exp.SampleProperty, this=self._match_text_seq("BY") and self._parse_bitwise() 912 ), 913 "SECURE": lambda self: self.expression(exp.SecureProperty), 914 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 915 "SETTINGS": lambda self: self.expression( 916 exp.SettingsProperty, expressions=self._parse_csv(self._parse_assignment) 917 ), 918 "SHARING": lambda self: self._parse_property_assignment(exp.SharingProperty), 919 "SORTKEY": lambda self: self._parse_sortkey(), 920 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 921 "STABLE": lambda self: self.expression( 922 exp.StabilityProperty, this=exp.Literal.string("STABLE") 923 ), 924 "STORED": lambda self: self._parse_stored(), 925 "SYSTEM_VERSIONING": lambda self: self._parse_system_versioning_property(), 926 "TBLPROPERTIES": lambda self: self._parse_wrapped_properties(), 927 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 928 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 929 "TO": lambda self: self._parse_to_table(), 930 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 931 "TRANSFORM": lambda self: self.expression( 932 exp.TransformModelProperty, expressions=self._parse_wrapped_csv(self._parse_expression) 933 ), 934 "TTL": lambda self: self._parse_ttl(), 935 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 936 "UNLOGGED": lambda self: self.expression(exp.UnloggedProperty), 937 "VOLATILE": lambda self: self._parse_volatile_property(), 938 "WITH": lambda self: self._parse_with_property(), 939 } 940 941 CONSTRAINT_PARSERS = { 942 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 943 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 944 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 945 "CHARACTER SET": lambda self: self.expression( 946 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 947 ), 948 "CHECK": lambda self: self.expression( 949 exp.CheckColumnConstraint, 950 this=self._parse_wrapped(self._parse_assignment), 951 enforced=self._match_text_seq("ENFORCED"), 952 ), 953 "COLLATE": lambda self: self.expression( 954 exp.CollateColumnConstraint, 955 this=self._parse_identifier() or self._parse_column(), 956 ), 957 "COMMENT": lambda self: self.expression( 958 exp.CommentColumnConstraint, this=self._parse_string() 959 ), 960 "COMPRESS": lambda self: self._parse_compress(), 961 "CLUSTERED": lambda self: self.expression( 962 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 963 ), 964 "NONCLUSTERED": lambda self: self.expression( 965 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 966 ), 967 "DEFAULT": lambda self: self.expression( 968 exp.DefaultColumnConstraint, this=self._parse_bitwise() 969 ), 970 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 971 "EPHEMERAL": lambda self: self.expression( 972 exp.EphemeralColumnConstraint, this=self._parse_bitwise() 973 ), 974 "EXCLUDE": lambda self: self.expression( 975 exp.ExcludeColumnConstraint, this=self._parse_index_params() 976 ), 977 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 978 "FORMAT": lambda self: self.expression( 979 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 980 ), 981 "GENERATED": lambda self: self._parse_generated_as_identity(), 982 "IDENTITY": lambda self: self._parse_auto_increment(), 983 "INLINE": lambda self: self._parse_inline(), 984 "LIKE": lambda self: self._parse_create_like(), 985 "NOT": lambda self: self._parse_not_constraint(), 986 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 987 "ON": lambda self: ( 988 self._match(TokenType.UPDATE) 989 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 990 ) 991 or self.expression(exp.OnProperty, this=self._parse_id_var()), 992 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 993 "PERIOD": lambda self: self._parse_period_for_system_time(), 994 "PRIMARY KEY": lambda self: self._parse_primary_key(), 995 "REFERENCES": lambda self: self._parse_references(match=False), 996 "TITLE": lambda self: self.expression( 997 exp.TitleColumnConstraint, this=self._parse_var_or_string() 998 ), 999 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 1000 "UNIQUE": lambda self: self._parse_unique(), 1001 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 1002 "WITH": lambda self: self.expression( 1003 exp.Properties, expressions=self._parse_wrapped_properties() 1004 ), 1005 } 1006 1007 ALTER_PARSERS = { 1008 "ADD": lambda self: self._parse_alter_table_add(), 1009 "ALTER": lambda self: self._parse_alter_table_alter(), 1010 "CLUSTER BY": lambda self: self._parse_cluster(wrapped=True), 1011 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 1012 "DROP": lambda self: self._parse_alter_table_drop(), 1013 "RENAME": lambda self: self._parse_alter_table_rename(), 1014 "SET": lambda self: self._parse_alter_table_set(), 1015 } 1016 1017 ALTER_ALTER_PARSERS = { 1018 "DISTKEY": lambda self: self._parse_alter_diststyle(), 1019 "DISTSTYLE": lambda self: self._parse_alter_diststyle(), 1020 "SORTKEY": lambda self: self._parse_alter_sortkey(), 1021 "COMPOUND": lambda self: self._parse_alter_sortkey(compound=True), 1022 } 1023 1024 SCHEMA_UNNAMED_CONSTRAINTS = { 1025 "CHECK", 1026 "EXCLUDE", 1027 "FOREIGN KEY", 1028 "LIKE", 1029 "PERIOD", 1030 "PRIMARY KEY", 1031 "UNIQUE", 1032 } 1033 1034 NO_PAREN_FUNCTION_PARSERS = { 1035 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 1036 "CASE": lambda self: self._parse_case(), 1037 "CONNECT_BY_ROOT": lambda self: self.expression( 1038 exp.ConnectByRoot, this=self._parse_column() 1039 ), 1040 "IF": lambda self: self._parse_if(), 1041 "NEXT": lambda self: self._parse_next_value_for(), 1042 } 1043 1044 INVALID_FUNC_NAME_TOKENS = { 1045 TokenType.IDENTIFIER, 1046 TokenType.STRING, 1047 } 1048 1049 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 1050 1051 KEY_VALUE_DEFINITIONS = (exp.Alias, exp.EQ, exp.PropertyEQ, exp.Slice) 1052 1053 FUNCTION_PARSERS = { 1054 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 1055 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 1056 "DECODE": lambda self: self._parse_decode(), 1057 "EXTRACT": lambda self: self._parse_extract(), 1058 "GAP_FILL": lambda self: self._parse_gap_fill(), 1059 "JSON_OBJECT": lambda self: self._parse_json_object(), 1060 "JSON_OBJECTAGG": lambda self: self._parse_json_object(agg=True), 1061 "JSON_TABLE": lambda self: self._parse_json_table(), 1062 "MATCH": lambda self: self._parse_match_against(), 1063 "OPENJSON": lambda self: self._parse_open_json(), 1064 "POSITION": lambda self: self._parse_position(), 1065 "PREDICT": lambda self: self._parse_predict(), 1066 "SAFE_CAST": lambda self: self._parse_cast(False, safe=True), 1067 "STRING_AGG": lambda self: self._parse_string_agg(), 1068 "SUBSTRING": lambda self: self._parse_substring(), 1069 "TRIM": lambda self: self._parse_trim(), 1070 "TRY_CAST": lambda self: self._parse_cast(False, safe=True), 1071 "TRY_CONVERT": lambda self: self._parse_convert(False, safe=True), 1072 } 1073 1074 QUERY_MODIFIER_PARSERS = { 1075 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 1076 TokenType.PREWHERE: lambda self: ("prewhere", self._parse_prewhere()), 1077 TokenType.WHERE: lambda self: ("where", self._parse_where()), 1078 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 1079 TokenType.HAVING: lambda self: ("having", self._parse_having()), 1080 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 1081 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 1082 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 1083 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 1084 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 1085 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 1086 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 1087 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 1088 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1089 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1090 TokenType.CLUSTER_BY: lambda self: ( 1091 "cluster", 1092 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 1093 ), 1094 TokenType.DISTRIBUTE_BY: lambda self: ( 1095 "distribute", 1096 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 1097 ), 1098 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 1099 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 1100 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 1101 } 1102 1103 SET_PARSERS = { 1104 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 1105 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 1106 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 1107 "TRANSACTION": lambda self: self._parse_set_transaction(), 1108 } 1109 1110 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 1111 1112 TYPE_LITERAL_PARSERS = { 1113 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 1114 } 1115 1116 TYPE_CONVERTERS: t.Dict[exp.DataType.Type, t.Callable[[exp.DataType], exp.DataType]] = {} 1117 1118 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 1119 1120 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 1121 1122 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 1123 TRANSACTION_CHARACTERISTICS: OPTIONS_TYPE = { 1124 "ISOLATION": ( 1125 ("LEVEL", "REPEATABLE", "READ"), 1126 ("LEVEL", "READ", "COMMITTED"), 1127 ("LEVEL", "READ", "UNCOMITTED"), 1128 ("LEVEL", "SERIALIZABLE"), 1129 ), 1130 "READ": ("WRITE", "ONLY"), 1131 } 1132 1133 CONFLICT_ACTIONS: OPTIONS_TYPE = dict.fromkeys( 1134 ("ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK", "UPDATE"), tuple() 1135 ) 1136 CONFLICT_ACTIONS["DO"] = ("NOTHING", "UPDATE") 1137 1138 CREATE_SEQUENCE: OPTIONS_TYPE = { 1139 "SCALE": ("EXTEND", "NOEXTEND"), 1140 "SHARD": ("EXTEND", "NOEXTEND"), 1141 "NO": ("CYCLE", "CACHE", "MAXVALUE", "MINVALUE"), 1142 **dict.fromkeys( 1143 ( 1144 "SESSION", 1145 "GLOBAL", 1146 "KEEP", 1147 "NOKEEP", 1148 "ORDER", 1149 "NOORDER", 1150 "NOCACHE", 1151 "CYCLE", 1152 "NOCYCLE", 1153 "NOMINVALUE", 1154 "NOMAXVALUE", 1155 "NOSCALE", 1156 "NOSHARD", 1157 ), 1158 tuple(), 1159 ), 1160 } 1161 1162 ISOLATED_LOADING_OPTIONS: OPTIONS_TYPE = {"FOR": ("ALL", "INSERT", "NONE")} 1163 1164 USABLES: OPTIONS_TYPE = dict.fromkeys( 1165 ("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA", "CATALOG"), tuple() 1166 ) 1167 1168 CAST_ACTIONS: OPTIONS_TYPE = dict.fromkeys(("RENAME", "ADD"), ("FIELDS",)) 1169 1170 SCHEMA_BINDING_OPTIONS: OPTIONS_TYPE = { 1171 "TYPE": ("EVOLUTION",), 1172 **dict.fromkeys(("BINDING", "COMPENSATION", "EVOLUTION"), tuple()), 1173 } 1174 1175 KEY_CONSTRAINT_OPTIONS: OPTIONS_TYPE = { 1176 "NOT": ("ENFORCED",), 1177 "MATCH": ( 1178 "FULL", 1179 "PARTIAL", 1180 "SIMPLE", 1181 ), 1182 "INITIALLY": ("DEFERRED", "IMMEDIATE"), 1183 **dict.fromkeys(("DEFERRABLE", "NORELY"), tuple()), 1184 } 1185 1186 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 1187 1188 CLONE_KEYWORDS = {"CLONE", "COPY"} 1189 HISTORICAL_DATA_PREFIX = {"AT", "BEFORE", "END"} 1190 HISTORICAL_DATA_KIND = {"TIMESTAMP", "OFFSET", "STATEMENT", "STREAM"} 1191 1192 OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS", "WITH"} 1193 1194 OPTYPE_FOLLOW_TOKENS = {TokenType.COMMA, TokenType.R_PAREN} 1195 1196 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 1197 1198 VIEW_ATTRIBUTES = {"ENCRYPTION", "SCHEMABINDING", "VIEW_METADATA"} 1199 1200 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 1201 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 1202 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 1203 1204 JSON_KEY_VALUE_SEPARATOR_TOKENS = {TokenType.COLON, TokenType.COMMA, TokenType.IS} 1205 1206 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 1207 1208 ADD_CONSTRAINT_TOKENS = { 1209 TokenType.CONSTRAINT, 1210 TokenType.FOREIGN_KEY, 1211 TokenType.INDEX, 1212 TokenType.KEY, 1213 TokenType.PRIMARY_KEY, 1214 TokenType.UNIQUE, 1215 } 1216 1217 DISTINCT_TOKENS = {TokenType.DISTINCT} 1218 1219 NULL_TOKENS = {TokenType.NULL} 1220 1221 UNNEST_OFFSET_ALIAS_TOKENS = ID_VAR_TOKENS - SET_OPERATIONS 1222 1223 SELECT_START_TOKENS = {TokenType.L_PAREN, TokenType.WITH, TokenType.SELECT} 1224 1225 COPY_INTO_VARLEN_OPTIONS = {"FILE_FORMAT", "COPY_OPTIONS", "FORMAT_OPTIONS", "CREDENTIAL"} 1226 1227 STRICT_CAST = True 1228 1229 PREFIXED_PIVOT_COLUMNS = False 1230 IDENTIFY_PIVOT_STRINGS = False 1231 1232 LOG_DEFAULTS_TO_LN = False 1233 1234 # Whether ADD is present for each column added by ALTER TABLE 1235 ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN = True 1236 1237 # Whether the table sample clause expects CSV syntax 1238 TABLESAMPLE_CSV = False 1239 1240 # The default method used for table sampling 1241 DEFAULT_SAMPLING_METHOD: t.Optional[str] = None 1242 1243 # Whether the SET command needs a delimiter (e.g. "=") for assignments 1244 SET_REQUIRES_ASSIGNMENT_DELIMITER = True 1245 1246 # Whether the TRIM function expects the characters to trim as its first argument 1247 TRIM_PATTERN_FIRST = False 1248 1249 # Whether string aliases are supported `SELECT COUNT(*) 'count'` 1250 STRING_ALIASES = False 1251 1252 # Whether query modifiers such as LIMIT are attached to the UNION node (vs its right operand) 1253 MODIFIERS_ATTACHED_TO_SET_OP = True 1254 SET_OP_MODIFIERS = {"order", "limit", "offset"} 1255 1256 # Whether to parse IF statements that aren't followed by a left parenthesis as commands 1257 NO_PAREN_IF_COMMANDS = True 1258 1259 # Whether the -> and ->> operators expect documents of type JSON (e.g. Postgres) 1260 JSON_ARROWS_REQUIRE_JSON_TYPE = False 1261 1262 # Whether the `:` operator is used to extract a value from a VARIANT column 1263 COLON_IS_VARIANT_EXTRACT = False 1264 1265 # Whether or not a VALUES keyword needs to be followed by '(' to form a VALUES clause. 1266 # If this is True and '(' is not found, the keyword will be treated as an identifier 1267 VALUES_FOLLOWED_BY_PAREN = True 1268 1269 # Whether implicit unnesting is supported, e.g. SELECT 1 FROM y.z AS z, z.a (Redshift) 1270 SUPPORTS_IMPLICIT_UNNEST = False 1271 1272 # Whether or not interval spans are supported, INTERVAL 1 YEAR TO MONTHS 1273 INTERVAL_SPANS = True 1274 1275 # Whether a PARTITION clause can follow a table reference 1276 SUPPORTS_PARTITION_SELECTION = False 1277 1278 __slots__ = ( 1279 "error_level", 1280 "error_message_context", 1281 "max_errors", 1282 "dialect", 1283 "sql", 1284 "errors", 1285 "_tokens", 1286 "_index", 1287 "_curr", 1288 "_next", 1289 "_prev", 1290 "_prev_comments", 1291 ) 1292 1293 # Autofilled 1294 SHOW_TRIE: t.Dict = {} 1295 SET_TRIE: t.Dict = {} 1296 1297 def __init__( 1298 self, 1299 error_level: t.Optional[ErrorLevel] = None, 1300 error_message_context: int = 100, 1301 max_errors: int = 3, 1302 dialect: DialectType = None, 1303 ): 1304 from sqlglot.dialects import Dialect 1305 1306 self.error_level = error_level or ErrorLevel.IMMEDIATE 1307 self.error_message_context = error_message_context 1308 self.max_errors = max_errors 1309 self.dialect = Dialect.get_or_raise(dialect) 1310 self.reset() 1311 1312 def reset(self): 1313 self.sql = "" 1314 self.errors = [] 1315 self._tokens = [] 1316 self._index = 0 1317 self._curr = None 1318 self._next = None 1319 self._prev = None 1320 self._prev_comments = None 1321 1322 def parse( 1323 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1324 ) -> t.List[t.Optional[exp.Expression]]: 1325 """ 1326 Parses a list of tokens and returns a list of syntax trees, one tree 1327 per parsed SQL statement. 1328 1329 Args: 1330 raw_tokens: The list of tokens. 1331 sql: The original SQL string, used to produce helpful debug messages. 1332 1333 Returns: 1334 The list of the produced syntax trees. 1335 """ 1336 return self._parse( 1337 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1338 ) 1339 1340 def parse_into( 1341 self, 1342 expression_types: exp.IntoType, 1343 raw_tokens: t.List[Token], 1344 sql: t.Optional[str] = None, 1345 ) -> t.List[t.Optional[exp.Expression]]: 1346 """ 1347 Parses a list of tokens into a given Expression type. If a collection of Expression 1348 types is given instead, this method will try to parse the token list into each one 1349 of them, stopping at the first for which the parsing succeeds. 1350 1351 Args: 1352 expression_types: The expression type(s) to try and parse the token list into. 1353 raw_tokens: The list of tokens. 1354 sql: The original SQL string, used to produce helpful debug messages. 1355 1356 Returns: 1357 The target Expression. 1358 """ 1359 errors = [] 1360 for expression_type in ensure_list(expression_types): 1361 parser = self.EXPRESSION_PARSERS.get(expression_type) 1362 if not parser: 1363 raise TypeError(f"No parser registered for {expression_type}") 1364 1365 try: 1366 return self._parse(parser, raw_tokens, sql) 1367 except ParseError as e: 1368 e.errors[0]["into_expression"] = expression_type 1369 errors.append(e) 1370 1371 raise ParseError( 1372 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1373 errors=merge_errors(errors), 1374 ) from errors[-1] 1375 1376 def _parse( 1377 self, 1378 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 1379 raw_tokens: t.List[Token], 1380 sql: t.Optional[str] = None, 1381 ) -> t.List[t.Optional[exp.Expression]]: 1382 self.reset() 1383 self.sql = sql or "" 1384 1385 total = len(raw_tokens) 1386 chunks: t.List[t.List[Token]] = [[]] 1387 1388 for i, token in enumerate(raw_tokens): 1389 if token.token_type == TokenType.SEMICOLON: 1390 if token.comments: 1391 chunks.append([token]) 1392 1393 if i < total - 1: 1394 chunks.append([]) 1395 else: 1396 chunks[-1].append(token) 1397 1398 expressions = [] 1399 1400 for tokens in chunks: 1401 self._index = -1 1402 self._tokens = tokens 1403 self._advance() 1404 1405 expressions.append(parse_method(self)) 1406 1407 if self._index < len(self._tokens): 1408 self.raise_error("Invalid expression / Unexpected token") 1409 1410 self.check_errors() 1411 1412 return expressions 1413 1414 def check_errors(self) -> None: 1415 """Logs or raises any found errors, depending on the chosen error level setting.""" 1416 if self.error_level == ErrorLevel.WARN: 1417 for error in self.errors: 1418 logger.error(str(error)) 1419 elif self.error_level == ErrorLevel.RAISE and self.errors: 1420 raise ParseError( 1421 concat_messages(self.errors, self.max_errors), 1422 errors=merge_errors(self.errors), 1423 ) 1424 1425 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1426 """ 1427 Appends an error in the list of recorded errors or raises it, depending on the chosen 1428 error level setting. 1429 """ 1430 token = token or self._curr or self._prev or Token.string("") 1431 start = token.start 1432 end = token.end + 1 1433 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1434 highlight = self.sql[start:end] 1435 end_context = self.sql[end : end + self.error_message_context] 1436 1437 error = ParseError.new( 1438 f"{message}. Line {token.line}, Col: {token.col}.\n" 1439 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1440 description=message, 1441 line=token.line, 1442 col=token.col, 1443 start_context=start_context, 1444 highlight=highlight, 1445 end_context=end_context, 1446 ) 1447 1448 if self.error_level == ErrorLevel.IMMEDIATE: 1449 raise error 1450 1451 self.errors.append(error) 1452 1453 def expression( 1454 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1455 ) -> E: 1456 """ 1457 Creates a new, validated Expression. 1458 1459 Args: 1460 exp_class: The expression class to instantiate. 1461 comments: An optional list of comments to attach to the expression. 1462 kwargs: The arguments to set for the expression along with their respective values. 1463 1464 Returns: 1465 The target expression. 1466 """ 1467 instance = exp_class(**kwargs) 1468 instance.add_comments(comments) if comments else self._add_comments(instance) 1469 return self.validate_expression(instance) 1470 1471 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1472 if expression and self._prev_comments: 1473 expression.add_comments(self._prev_comments) 1474 self._prev_comments = None 1475 1476 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1477 """ 1478 Validates an Expression, making sure that all its mandatory arguments are set. 1479 1480 Args: 1481 expression: The expression to validate. 1482 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1483 1484 Returns: 1485 The validated expression. 1486 """ 1487 if self.error_level != ErrorLevel.IGNORE: 1488 for error_message in expression.error_messages(args): 1489 self.raise_error(error_message) 1490 1491 return expression 1492 1493 def _find_sql(self, start: Token, end: Token) -> str: 1494 return self.sql[start.start : end.end + 1] 1495 1496 def _is_connected(self) -> bool: 1497 return self._prev and self._curr and self._prev.end + 1 == self._curr.start 1498 1499 def _advance(self, times: int = 1) -> None: 1500 self._index += times 1501 self._curr = seq_get(self._tokens, self._index) 1502 self._next = seq_get(self._tokens, self._index + 1) 1503 1504 if self._index > 0: 1505 self._prev = self._tokens[self._index - 1] 1506 self._prev_comments = self._prev.comments 1507 else: 1508 self._prev = None 1509 self._prev_comments = None 1510 1511 def _retreat(self, index: int) -> None: 1512 if index != self._index: 1513 self._advance(index - self._index) 1514 1515 def _warn_unsupported(self) -> None: 1516 if len(self._tokens) <= 1: 1517 return 1518 1519 # We use _find_sql because self.sql may comprise multiple chunks, and we're only 1520 # interested in emitting a warning for the one being currently processed. 1521 sql = self._find_sql(self._tokens[0], self._tokens[-1])[: self.error_message_context] 1522 1523 logger.warning( 1524 f"'{sql}' contains unsupported syntax. Falling back to parsing as a 'Command'." 1525 ) 1526 1527 def _parse_command(self) -> exp.Command: 1528 self._warn_unsupported() 1529 return self.expression( 1530 exp.Command, 1531 comments=self._prev_comments, 1532 this=self._prev.text.upper(), 1533 expression=self._parse_string(), 1534 ) 1535 1536 def _try_parse(self, parse_method: t.Callable[[], T], retreat: bool = False) -> t.Optional[T]: 1537 """ 1538 Attemps to backtrack if a parse function that contains a try/catch internally raises an error. 1539 This behavior can be different depending on the uset-set ErrorLevel, so _try_parse aims to 1540 solve this by setting & resetting the parser state accordingly 1541 """ 1542 index = self._index 1543 error_level = self.error_level 1544 1545 self.error_level = ErrorLevel.IMMEDIATE 1546 try: 1547 this = parse_method() 1548 except ParseError: 1549 this = None 1550 finally: 1551 if not this or retreat: 1552 self._retreat(index) 1553 self.error_level = error_level 1554 1555 return this 1556 1557 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1558 start = self._prev 1559 exists = self._parse_exists() if allow_exists else None 1560 1561 self._match(TokenType.ON) 1562 1563 materialized = self._match_text_seq("MATERIALIZED") 1564 kind = self._match_set(self.CREATABLES) and self._prev 1565 if not kind: 1566 return self._parse_as_command(start) 1567 1568 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1569 this = self._parse_user_defined_function(kind=kind.token_type) 1570 elif kind.token_type == TokenType.TABLE: 1571 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1572 elif kind.token_type == TokenType.COLUMN: 1573 this = self._parse_column() 1574 else: 1575 this = self._parse_id_var() 1576 1577 self._match(TokenType.IS) 1578 1579 return self.expression( 1580 exp.Comment, 1581 this=this, 1582 kind=kind.text, 1583 expression=self._parse_string(), 1584 exists=exists, 1585 materialized=materialized, 1586 ) 1587 1588 def _parse_to_table( 1589 self, 1590 ) -> exp.ToTableProperty: 1591 table = self._parse_table_parts(schema=True) 1592 return self.expression(exp.ToTableProperty, this=table) 1593 1594 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1595 def _parse_ttl(self) -> exp.Expression: 1596 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1597 this = self._parse_bitwise() 1598 1599 if self._match_text_seq("DELETE"): 1600 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1601 if self._match_text_seq("RECOMPRESS"): 1602 return self.expression( 1603 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1604 ) 1605 if self._match_text_seq("TO", "DISK"): 1606 return self.expression( 1607 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1608 ) 1609 if self._match_text_seq("TO", "VOLUME"): 1610 return self.expression( 1611 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1612 ) 1613 1614 return this 1615 1616 expressions = self._parse_csv(_parse_ttl_action) 1617 where = self._parse_where() 1618 group = self._parse_group() 1619 1620 aggregates = None 1621 if group and self._match(TokenType.SET): 1622 aggregates = self._parse_csv(self._parse_set_item) 1623 1624 return self.expression( 1625 exp.MergeTreeTTL, 1626 expressions=expressions, 1627 where=where, 1628 group=group, 1629 aggregates=aggregates, 1630 ) 1631 1632 def _parse_statement(self) -> t.Optional[exp.Expression]: 1633 if self._curr is None: 1634 return None 1635 1636 if self._match_set(self.STATEMENT_PARSERS): 1637 return self.STATEMENT_PARSERS[self._prev.token_type](self) 1638 1639 if self._match_set(self.dialect.tokenizer.COMMANDS): 1640 return self._parse_command() 1641 1642 expression = self._parse_expression() 1643 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1644 return self._parse_query_modifiers(expression) 1645 1646 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1647 start = self._prev 1648 temporary = self._match(TokenType.TEMPORARY) 1649 materialized = self._match_text_seq("MATERIALIZED") 1650 1651 kind = self._match_set(self.CREATABLES) and self._prev.text 1652 if not kind: 1653 return self._parse_as_command(start) 1654 1655 if_exists = exists or self._parse_exists() 1656 table = self._parse_table_parts( 1657 schema=True, is_db_reference=self._prev.token_type == TokenType.SCHEMA 1658 ) 1659 1660 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 1661 1662 if self._match(TokenType.L_PAREN, advance=False): 1663 expressions = self._parse_wrapped_csv(self._parse_types) 1664 else: 1665 expressions = None 1666 1667 return self.expression( 1668 exp.Drop, 1669 comments=start.comments, 1670 exists=if_exists, 1671 this=table, 1672 expressions=expressions, 1673 kind=kind.upper(), 1674 temporary=temporary, 1675 materialized=materialized, 1676 cascade=self._match_text_seq("CASCADE"), 1677 constraints=self._match_text_seq("CONSTRAINTS"), 1678 purge=self._match_text_seq("PURGE"), 1679 cluster=cluster, 1680 ) 1681 1682 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1683 return ( 1684 self._match_text_seq("IF") 1685 and (not not_ or self._match(TokenType.NOT)) 1686 and self._match(TokenType.EXISTS) 1687 ) 1688 1689 def _parse_create(self) -> exp.Create | exp.Command: 1690 # Note: this can't be None because we've matched a statement parser 1691 start = self._prev 1692 comments = self._prev_comments 1693 1694 replace = ( 1695 start.token_type == TokenType.REPLACE 1696 or self._match_pair(TokenType.OR, TokenType.REPLACE) 1697 or self._match_pair(TokenType.OR, TokenType.ALTER) 1698 ) 1699 1700 unique = self._match(TokenType.UNIQUE) 1701 1702 if self._match_text_seq("CLUSTERED", "COLUMNSTORE"): 1703 clustered = True 1704 elif self._match_text_seq("NONCLUSTERED", "COLUMNSTORE") or self._match_text_seq( 1705 "COLUMNSTORE" 1706 ): 1707 clustered = False 1708 else: 1709 clustered = None 1710 1711 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1712 self._advance() 1713 1714 properties = None 1715 create_token = self._match_set(self.CREATABLES) and self._prev 1716 1717 if not create_token: 1718 # exp.Properties.Location.POST_CREATE 1719 properties = self._parse_properties() 1720 create_token = self._match_set(self.CREATABLES) and self._prev 1721 1722 if not properties or not create_token: 1723 return self._parse_as_command(start) 1724 1725 concurrently = self._match_text_seq("CONCURRENTLY") 1726 exists = self._parse_exists(not_=True) 1727 this = None 1728 expression: t.Optional[exp.Expression] = None 1729 indexes = None 1730 no_schema_binding = None 1731 begin = None 1732 end = None 1733 clone = None 1734 1735 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1736 nonlocal properties 1737 if properties and temp_props: 1738 properties.expressions.extend(temp_props.expressions) 1739 elif temp_props: 1740 properties = temp_props 1741 1742 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1743 this = self._parse_user_defined_function(kind=create_token.token_type) 1744 1745 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1746 extend_props(self._parse_properties()) 1747 1748 expression = self._match(TokenType.ALIAS) and self._parse_heredoc() 1749 extend_props(self._parse_properties()) 1750 1751 if not expression: 1752 if self._match(TokenType.COMMAND): 1753 expression = self._parse_as_command(self._prev) 1754 else: 1755 begin = self._match(TokenType.BEGIN) 1756 return_ = self._match_text_seq("RETURN") 1757 1758 if self._match(TokenType.STRING, advance=False): 1759 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 1760 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 1761 expression = self._parse_string() 1762 extend_props(self._parse_properties()) 1763 else: 1764 expression = self._parse_statement() 1765 1766 end = self._match_text_seq("END") 1767 1768 if return_: 1769 expression = self.expression(exp.Return, this=expression) 1770 elif create_token.token_type == TokenType.INDEX: 1771 # Postgres allows anonymous indexes, eg. CREATE INDEX IF NOT EXISTS ON t(c) 1772 if not self._match(TokenType.ON): 1773 index = self._parse_id_var() 1774 anonymous = False 1775 else: 1776 index = None 1777 anonymous = True 1778 1779 this = self._parse_index(index=index, anonymous=anonymous) 1780 elif create_token.token_type in self.DB_CREATABLES: 1781 table_parts = self._parse_table_parts( 1782 schema=True, is_db_reference=create_token.token_type == TokenType.SCHEMA 1783 ) 1784 1785 # exp.Properties.Location.POST_NAME 1786 self._match(TokenType.COMMA) 1787 extend_props(self._parse_properties(before=True)) 1788 1789 this = self._parse_schema(this=table_parts) 1790 1791 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1792 extend_props(self._parse_properties()) 1793 1794 self._match(TokenType.ALIAS) 1795 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 1796 # exp.Properties.Location.POST_ALIAS 1797 extend_props(self._parse_properties()) 1798 1799 if create_token.token_type == TokenType.SEQUENCE: 1800 expression = self._parse_types() 1801 extend_props(self._parse_properties()) 1802 else: 1803 expression = self._parse_ddl_select() 1804 1805 if create_token.token_type == TokenType.TABLE: 1806 # exp.Properties.Location.POST_EXPRESSION 1807 extend_props(self._parse_properties()) 1808 1809 indexes = [] 1810 while True: 1811 index = self._parse_index() 1812 1813 # exp.Properties.Location.POST_INDEX 1814 extend_props(self._parse_properties()) 1815 if not index: 1816 break 1817 else: 1818 self._match(TokenType.COMMA) 1819 indexes.append(index) 1820 elif create_token.token_type == TokenType.VIEW: 1821 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1822 no_schema_binding = True 1823 1824 shallow = self._match_text_seq("SHALLOW") 1825 1826 if self._match_texts(self.CLONE_KEYWORDS): 1827 copy = self._prev.text.lower() == "copy" 1828 clone = self.expression( 1829 exp.Clone, this=self._parse_table(schema=True), shallow=shallow, copy=copy 1830 ) 1831 1832 if self._curr and not self._match_set((TokenType.R_PAREN, TokenType.COMMA), advance=False): 1833 return self._parse_as_command(start) 1834 1835 return self.expression( 1836 exp.Create, 1837 comments=comments, 1838 this=this, 1839 kind=create_token.text.upper(), 1840 replace=replace, 1841 unique=unique, 1842 expression=expression, 1843 exists=exists, 1844 properties=properties, 1845 indexes=indexes, 1846 no_schema_binding=no_schema_binding, 1847 begin=begin, 1848 end=end, 1849 clone=clone, 1850 concurrently=concurrently, 1851 clustered=clustered, 1852 ) 1853 1854 def _parse_sequence_properties(self) -> t.Optional[exp.SequenceProperties]: 1855 seq = exp.SequenceProperties() 1856 1857 options = [] 1858 index = self._index 1859 1860 while self._curr: 1861 self._match(TokenType.COMMA) 1862 if self._match_text_seq("INCREMENT"): 1863 self._match_text_seq("BY") 1864 self._match_text_seq("=") 1865 seq.set("increment", self._parse_term()) 1866 elif self._match_text_seq("MINVALUE"): 1867 seq.set("minvalue", self._parse_term()) 1868 elif self._match_text_seq("MAXVALUE"): 1869 seq.set("maxvalue", self._parse_term()) 1870 elif self._match(TokenType.START_WITH) or self._match_text_seq("START"): 1871 self._match_text_seq("=") 1872 seq.set("start", self._parse_term()) 1873 elif self._match_text_seq("CACHE"): 1874 # T-SQL allows empty CACHE which is initialized dynamically 1875 seq.set("cache", self._parse_number() or True) 1876 elif self._match_text_seq("OWNED", "BY"): 1877 # "OWNED BY NONE" is the default 1878 seq.set("owned", None if self._match_text_seq("NONE") else self._parse_column()) 1879 else: 1880 opt = self._parse_var_from_options(self.CREATE_SEQUENCE, raise_unmatched=False) 1881 if opt: 1882 options.append(opt) 1883 else: 1884 break 1885 1886 seq.set("options", options if options else None) 1887 return None if self._index == index else seq 1888 1889 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1890 # only used for teradata currently 1891 self._match(TokenType.COMMA) 1892 1893 kwargs = { 1894 "no": self._match_text_seq("NO"), 1895 "dual": self._match_text_seq("DUAL"), 1896 "before": self._match_text_seq("BEFORE"), 1897 "default": self._match_text_seq("DEFAULT"), 1898 "local": (self._match_text_seq("LOCAL") and "LOCAL") 1899 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 1900 "after": self._match_text_seq("AFTER"), 1901 "minimum": self._match_texts(("MIN", "MINIMUM")), 1902 "maximum": self._match_texts(("MAX", "MAXIMUM")), 1903 } 1904 1905 if self._match_texts(self.PROPERTY_PARSERS): 1906 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 1907 try: 1908 return parser(self, **{k: v for k, v in kwargs.items() if v}) 1909 except TypeError: 1910 self.raise_error(f"Cannot parse property '{self._prev.text}'") 1911 1912 return None 1913 1914 def _parse_wrapped_properties(self) -> t.List[exp.Expression]: 1915 return self._parse_wrapped_csv(self._parse_property) 1916 1917 def _parse_property(self) -> t.Optional[exp.Expression]: 1918 if self._match_texts(self.PROPERTY_PARSERS): 1919 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 1920 1921 if self._match(TokenType.DEFAULT) and self._match_texts(self.PROPERTY_PARSERS): 1922 return self.PROPERTY_PARSERS[self._prev.text.upper()](self, default=True) 1923 1924 if self._match_text_seq("COMPOUND", "SORTKEY"): 1925 return self._parse_sortkey(compound=True) 1926 1927 if self._match_text_seq("SQL", "SECURITY"): 1928 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 1929 1930 index = self._index 1931 key = self._parse_column() 1932 1933 if not self._match(TokenType.EQ): 1934 self._retreat(index) 1935 return self._parse_sequence_properties() 1936 1937 # Transform the key to exp.Dot if it's dotted identifiers wrapped in exp.Column or to exp.Var otherwise 1938 if isinstance(key, exp.Column): 1939 key = key.to_dot() if len(key.parts) > 1 else exp.var(key.name) 1940 1941 value = self._parse_bitwise() or self._parse_var(any_token=True) 1942 1943 # Transform the value to exp.Var if it was parsed as exp.Column(exp.Identifier()) 1944 if isinstance(value, exp.Column): 1945 value = exp.var(value.name) 1946 1947 return self.expression(exp.Property, this=key, value=value) 1948 1949 def _parse_stored(self) -> exp.FileFormatProperty: 1950 self._match(TokenType.ALIAS) 1951 1952 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 1953 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 1954 1955 return self.expression( 1956 exp.FileFormatProperty, 1957 this=( 1958 self.expression( 1959 exp.InputOutputFormat, input_format=input_format, output_format=output_format 1960 ) 1961 if input_format or output_format 1962 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var() 1963 ), 1964 ) 1965 1966 def _parse_unquoted_field(self) -> t.Optional[exp.Expression]: 1967 field = self._parse_field() 1968 if isinstance(field, exp.Identifier) and not field.quoted: 1969 field = exp.var(field) 1970 1971 return field 1972 1973 def _parse_property_assignment(self, exp_class: t.Type[E], **kwargs: t.Any) -> E: 1974 self._match(TokenType.EQ) 1975 self._match(TokenType.ALIAS) 1976 1977 return self.expression(exp_class, this=self._parse_unquoted_field(), **kwargs) 1978 1979 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 1980 properties = [] 1981 while True: 1982 if before: 1983 prop = self._parse_property_before() 1984 else: 1985 prop = self._parse_property() 1986 if not prop: 1987 break 1988 for p in ensure_list(prop): 1989 properties.append(p) 1990 1991 if properties: 1992 return self.expression(exp.Properties, expressions=properties) 1993 1994 return None 1995 1996 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 1997 return self.expression( 1998 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 1999 ) 2000 2001 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 2002 if self._index >= 2: 2003 pre_volatile_token = self._tokens[self._index - 2] 2004 else: 2005 pre_volatile_token = None 2006 2007 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 2008 return exp.VolatileProperty() 2009 2010 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 2011 2012 def _parse_retention_period(self) -> exp.Var: 2013 # Parse TSQL's HISTORY_RETENTION_PERIOD: {INFINITE | <number> DAY | DAYS | MONTH ...} 2014 number = self._parse_number() 2015 number_str = f"{number} " if number else "" 2016 unit = self._parse_var(any_token=True) 2017 return exp.var(f"{number_str}{unit}") 2018 2019 def _parse_system_versioning_property( 2020 self, with_: bool = False 2021 ) -> exp.WithSystemVersioningProperty: 2022 self._match(TokenType.EQ) 2023 prop = self.expression( 2024 exp.WithSystemVersioningProperty, 2025 **{ # type: ignore 2026 "on": True, 2027 "with": with_, 2028 }, 2029 ) 2030 2031 if self._match_text_seq("OFF"): 2032 prop.set("on", False) 2033 return prop 2034 2035 self._match(TokenType.ON) 2036 if self._match(TokenType.L_PAREN): 2037 while self._curr and not self._match(TokenType.R_PAREN): 2038 if self._match_text_seq("HISTORY_TABLE", "="): 2039 prop.set("this", self._parse_table_parts()) 2040 elif self._match_text_seq("DATA_CONSISTENCY_CHECK", "="): 2041 prop.set("data_consistency", self._advance_any() and self._prev.text.upper()) 2042 elif self._match_text_seq("HISTORY_RETENTION_PERIOD", "="): 2043 prop.set("retention_period", self._parse_retention_period()) 2044 2045 self._match(TokenType.COMMA) 2046 2047 return prop 2048 2049 def _parse_data_deletion_property(self) -> exp.DataDeletionProperty: 2050 self._match(TokenType.EQ) 2051 on = self._match_text_seq("ON") or not self._match_text_seq("OFF") 2052 prop = self.expression(exp.DataDeletionProperty, on=on) 2053 2054 if self._match(TokenType.L_PAREN): 2055 while self._curr and not self._match(TokenType.R_PAREN): 2056 if self._match_text_seq("FILTER_COLUMN", "="): 2057 prop.set("filter_column", self._parse_column()) 2058 elif self._match_text_seq("RETENTION_PERIOD", "="): 2059 prop.set("retention_period", self._parse_retention_period()) 2060 2061 self._match(TokenType.COMMA) 2062 2063 return prop 2064 2065 def _parse_with_property(self) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 2066 if self._match_text_seq("(", "SYSTEM_VERSIONING"): 2067 prop = self._parse_system_versioning_property(with_=True) 2068 self._match_r_paren() 2069 return prop 2070 2071 if self._match(TokenType.L_PAREN, advance=False): 2072 return self._parse_wrapped_properties() 2073 2074 if self._match_text_seq("JOURNAL"): 2075 return self._parse_withjournaltable() 2076 2077 if self._match_texts(self.VIEW_ATTRIBUTES): 2078 return self.expression(exp.ViewAttributeProperty, this=self._prev.text.upper()) 2079 2080 if self._match_text_seq("DATA"): 2081 return self._parse_withdata(no=False) 2082 elif self._match_text_seq("NO", "DATA"): 2083 return self._parse_withdata(no=True) 2084 2085 if self._match(TokenType.SERDE_PROPERTIES, advance=False): 2086 return self._parse_serde_properties(with_=True) 2087 2088 if self._match(TokenType.SCHEMA): 2089 return self.expression( 2090 exp.WithSchemaBindingProperty, 2091 this=self._parse_var_from_options(self.SCHEMA_BINDING_OPTIONS), 2092 ) 2093 2094 if not self._next: 2095 return None 2096 2097 return self._parse_withisolatedloading() 2098 2099 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 2100 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 2101 self._match(TokenType.EQ) 2102 2103 user = self._parse_id_var() 2104 self._match(TokenType.PARAMETER) 2105 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 2106 2107 if not user or not host: 2108 return None 2109 2110 return exp.DefinerProperty(this=f"{user}@{host}") 2111 2112 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 2113 self._match(TokenType.TABLE) 2114 self._match(TokenType.EQ) 2115 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 2116 2117 def _parse_log(self, no: bool = False) -> exp.LogProperty: 2118 return self.expression(exp.LogProperty, no=no) 2119 2120 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 2121 return self.expression(exp.JournalProperty, **kwargs) 2122 2123 def _parse_checksum(self) -> exp.ChecksumProperty: 2124 self._match(TokenType.EQ) 2125 2126 on = None 2127 if self._match(TokenType.ON): 2128 on = True 2129 elif self._match_text_seq("OFF"): 2130 on = False 2131 2132 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 2133 2134 def _parse_cluster(self, wrapped: bool = False) -> exp.Cluster: 2135 return self.expression( 2136 exp.Cluster, 2137 expressions=( 2138 self._parse_wrapped_csv(self._parse_ordered) 2139 if wrapped 2140 else self._parse_csv(self._parse_ordered) 2141 ), 2142 ) 2143 2144 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 2145 self._match_text_seq("BY") 2146 2147 self._match_l_paren() 2148 expressions = self._parse_csv(self._parse_column) 2149 self._match_r_paren() 2150 2151 if self._match_text_seq("SORTED", "BY"): 2152 self._match_l_paren() 2153 sorted_by = self._parse_csv(self._parse_ordered) 2154 self._match_r_paren() 2155 else: 2156 sorted_by = None 2157 2158 self._match(TokenType.INTO) 2159 buckets = self._parse_number() 2160 self._match_text_seq("BUCKETS") 2161 2162 return self.expression( 2163 exp.ClusteredByProperty, 2164 expressions=expressions, 2165 sorted_by=sorted_by, 2166 buckets=buckets, 2167 ) 2168 2169 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 2170 if not self._match_text_seq("GRANTS"): 2171 self._retreat(self._index - 1) 2172 return None 2173 2174 return self.expression(exp.CopyGrantsProperty) 2175 2176 def _parse_freespace(self) -> exp.FreespaceProperty: 2177 self._match(TokenType.EQ) 2178 return self.expression( 2179 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 2180 ) 2181 2182 def _parse_mergeblockratio( 2183 self, no: bool = False, default: bool = False 2184 ) -> exp.MergeBlockRatioProperty: 2185 if self._match(TokenType.EQ): 2186 return self.expression( 2187 exp.MergeBlockRatioProperty, 2188 this=self._parse_number(), 2189 percent=self._match(TokenType.PERCENT), 2190 ) 2191 2192 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 2193 2194 def _parse_datablocksize( 2195 self, 2196 default: t.Optional[bool] = None, 2197 minimum: t.Optional[bool] = None, 2198 maximum: t.Optional[bool] = None, 2199 ) -> exp.DataBlocksizeProperty: 2200 self._match(TokenType.EQ) 2201 size = self._parse_number() 2202 2203 units = None 2204 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 2205 units = self._prev.text 2206 2207 return self.expression( 2208 exp.DataBlocksizeProperty, 2209 size=size, 2210 units=units, 2211 default=default, 2212 minimum=minimum, 2213 maximum=maximum, 2214 ) 2215 2216 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 2217 self._match(TokenType.EQ) 2218 always = self._match_text_seq("ALWAYS") 2219 manual = self._match_text_seq("MANUAL") 2220 never = self._match_text_seq("NEVER") 2221 default = self._match_text_seq("DEFAULT") 2222 2223 autotemp = None 2224 if self._match_text_seq("AUTOTEMP"): 2225 autotemp = self._parse_schema() 2226 2227 return self.expression( 2228 exp.BlockCompressionProperty, 2229 always=always, 2230 manual=manual, 2231 never=never, 2232 default=default, 2233 autotemp=autotemp, 2234 ) 2235 2236 def _parse_withisolatedloading(self) -> t.Optional[exp.IsolatedLoadingProperty]: 2237 index = self._index 2238 no = self._match_text_seq("NO") 2239 concurrent = self._match_text_seq("CONCURRENT") 2240 2241 if not self._match_text_seq("ISOLATED", "LOADING"): 2242 self._retreat(index) 2243 return None 2244 2245 target = self._parse_var_from_options(self.ISOLATED_LOADING_OPTIONS, raise_unmatched=False) 2246 return self.expression( 2247 exp.IsolatedLoadingProperty, no=no, concurrent=concurrent, target=target 2248 ) 2249 2250 def _parse_locking(self) -> exp.LockingProperty: 2251 if self._match(TokenType.TABLE): 2252 kind = "TABLE" 2253 elif self._match(TokenType.VIEW): 2254 kind = "VIEW" 2255 elif self._match(TokenType.ROW): 2256 kind = "ROW" 2257 elif self._match_text_seq("DATABASE"): 2258 kind = "DATABASE" 2259 else: 2260 kind = None 2261 2262 if kind in ("DATABASE", "TABLE", "VIEW"): 2263 this = self._parse_table_parts() 2264 else: 2265 this = None 2266 2267 if self._match(TokenType.FOR): 2268 for_or_in = "FOR" 2269 elif self._match(TokenType.IN): 2270 for_or_in = "IN" 2271 else: 2272 for_or_in = None 2273 2274 if self._match_text_seq("ACCESS"): 2275 lock_type = "ACCESS" 2276 elif self._match_texts(("EXCL", "EXCLUSIVE")): 2277 lock_type = "EXCLUSIVE" 2278 elif self._match_text_seq("SHARE"): 2279 lock_type = "SHARE" 2280 elif self._match_text_seq("READ"): 2281 lock_type = "READ" 2282 elif self._match_text_seq("WRITE"): 2283 lock_type = "WRITE" 2284 elif self._match_text_seq("CHECKSUM"): 2285 lock_type = "CHECKSUM" 2286 else: 2287 lock_type = None 2288 2289 override = self._match_text_seq("OVERRIDE") 2290 2291 return self.expression( 2292 exp.LockingProperty, 2293 this=this, 2294 kind=kind, 2295 for_or_in=for_or_in, 2296 lock_type=lock_type, 2297 override=override, 2298 ) 2299 2300 def _parse_partition_by(self) -> t.List[exp.Expression]: 2301 if self._match(TokenType.PARTITION_BY): 2302 return self._parse_csv(self._parse_assignment) 2303 return [] 2304 2305 def _parse_partition_bound_spec(self) -> exp.PartitionBoundSpec: 2306 def _parse_partition_bound_expr() -> t.Optional[exp.Expression]: 2307 if self._match_text_seq("MINVALUE"): 2308 return exp.var("MINVALUE") 2309 if self._match_text_seq("MAXVALUE"): 2310 return exp.var("MAXVALUE") 2311 return self._parse_bitwise() 2312 2313 this: t.Optional[exp.Expression | t.List[exp.Expression]] = None 2314 expression = None 2315 from_expressions = None 2316 to_expressions = None 2317 2318 if self._match(TokenType.IN): 2319 this = self._parse_wrapped_csv(self._parse_bitwise) 2320 elif self._match(TokenType.FROM): 2321 from_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2322 self._match_text_seq("TO") 2323 to_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2324 elif self._match_text_seq("WITH", "(", "MODULUS"): 2325 this = self._parse_number() 2326 self._match_text_seq(",", "REMAINDER") 2327 expression = self._parse_number() 2328 self._match_r_paren() 2329 else: 2330 self.raise_error("Failed to parse partition bound spec.") 2331 2332 return self.expression( 2333 exp.PartitionBoundSpec, 2334 this=this, 2335 expression=expression, 2336 from_expressions=from_expressions, 2337 to_expressions=to_expressions, 2338 ) 2339 2340 # https://www.postgresql.org/docs/current/sql-createtable.html 2341 def _parse_partitioned_of(self) -> t.Optional[exp.PartitionedOfProperty]: 2342 if not self._match_text_seq("OF"): 2343 self._retreat(self._index - 1) 2344 return None 2345 2346 this = self._parse_table(schema=True) 2347 2348 if self._match(TokenType.DEFAULT): 2349 expression: exp.Var | exp.PartitionBoundSpec = exp.var("DEFAULT") 2350 elif self._match_text_seq("FOR", "VALUES"): 2351 expression = self._parse_partition_bound_spec() 2352 else: 2353 self.raise_error("Expecting either DEFAULT or FOR VALUES clause.") 2354 2355 return self.expression(exp.PartitionedOfProperty, this=this, expression=expression) 2356 2357 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 2358 self._match(TokenType.EQ) 2359 return self.expression( 2360 exp.PartitionedByProperty, 2361 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 2362 ) 2363 2364 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 2365 if self._match_text_seq("AND", "STATISTICS"): 2366 statistics = True 2367 elif self._match_text_seq("AND", "NO", "STATISTICS"): 2368 statistics = False 2369 else: 2370 statistics = None 2371 2372 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 2373 2374 def _parse_contains_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2375 if self._match_text_seq("SQL"): 2376 return self.expression(exp.SqlReadWriteProperty, this="CONTAINS SQL") 2377 return None 2378 2379 def _parse_modifies_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2380 if self._match_text_seq("SQL", "DATA"): 2381 return self.expression(exp.SqlReadWriteProperty, this="MODIFIES SQL DATA") 2382 return None 2383 2384 def _parse_no_property(self) -> t.Optional[exp.Expression]: 2385 if self._match_text_seq("PRIMARY", "INDEX"): 2386 return exp.NoPrimaryIndexProperty() 2387 if self._match_text_seq("SQL"): 2388 return self.expression(exp.SqlReadWriteProperty, this="NO SQL") 2389 return None 2390 2391 def _parse_on_property(self) -> t.Optional[exp.Expression]: 2392 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 2393 return exp.OnCommitProperty() 2394 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 2395 return exp.OnCommitProperty(delete=True) 2396 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 2397 2398 def _parse_reads_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2399 if self._match_text_seq("SQL", "DATA"): 2400 return self.expression(exp.SqlReadWriteProperty, this="READS SQL DATA") 2401 return None 2402 2403 def _parse_distkey(self) -> exp.DistKeyProperty: 2404 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 2405 2406 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 2407 table = self._parse_table(schema=True) 2408 2409 options = [] 2410 while self._match_texts(("INCLUDING", "EXCLUDING")): 2411 this = self._prev.text.upper() 2412 2413 id_var = self._parse_id_var() 2414 if not id_var: 2415 return None 2416 2417 options.append( 2418 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 2419 ) 2420 2421 return self.expression(exp.LikeProperty, this=table, expressions=options) 2422 2423 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 2424 return self.expression( 2425 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 2426 ) 2427 2428 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 2429 self._match(TokenType.EQ) 2430 return self.expression( 2431 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 2432 ) 2433 2434 def _parse_remote_with_connection(self) -> exp.RemoteWithConnectionModelProperty: 2435 self._match_text_seq("WITH", "CONNECTION") 2436 return self.expression( 2437 exp.RemoteWithConnectionModelProperty, this=self._parse_table_parts() 2438 ) 2439 2440 def _parse_returns(self) -> exp.ReturnsProperty: 2441 value: t.Optional[exp.Expression] 2442 null = None 2443 is_table = self._match(TokenType.TABLE) 2444 2445 if is_table: 2446 if self._match(TokenType.LT): 2447 value = self.expression( 2448 exp.Schema, 2449 this="TABLE", 2450 expressions=self._parse_csv(self._parse_struct_types), 2451 ) 2452 if not self._match(TokenType.GT): 2453 self.raise_error("Expecting >") 2454 else: 2455 value = self._parse_schema(exp.var("TABLE")) 2456 elif self._match_text_seq("NULL", "ON", "NULL", "INPUT"): 2457 null = True 2458 value = None 2459 else: 2460 value = self._parse_types() 2461 2462 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table, null=null) 2463 2464 def _parse_describe(self) -> exp.Describe: 2465 kind = self._match_set(self.CREATABLES) and self._prev.text 2466 style = self._match_texts(("EXTENDED", "FORMATTED", "HISTORY")) and self._prev.text.upper() 2467 if self._match(TokenType.DOT): 2468 style = None 2469 self._retreat(self._index - 2) 2470 this = self._parse_table(schema=True) 2471 properties = self._parse_properties() 2472 expressions = properties.expressions if properties else None 2473 return self.expression( 2474 exp.Describe, this=this, style=style, kind=kind, expressions=expressions 2475 ) 2476 2477 def _parse_insert(self) -> exp.Insert: 2478 comments = ensure_list(self._prev_comments) 2479 hint = self._parse_hint() 2480 overwrite = self._match(TokenType.OVERWRITE) 2481 ignore = self._match(TokenType.IGNORE) 2482 local = self._match_text_seq("LOCAL") 2483 alternative = None 2484 is_function = None 2485 2486 if self._match_text_seq("DIRECTORY"): 2487 this: t.Optional[exp.Expression] = self.expression( 2488 exp.Directory, 2489 this=self._parse_var_or_string(), 2490 local=local, 2491 row_format=self._parse_row_format(match_row=True), 2492 ) 2493 else: 2494 if self._match(TokenType.OR): 2495 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 2496 2497 self._match(TokenType.INTO) 2498 comments += ensure_list(self._prev_comments) 2499 self._match(TokenType.TABLE) 2500 is_function = self._match(TokenType.FUNCTION) 2501 2502 this = ( 2503 self._parse_table(schema=True, parse_partition=True) 2504 if not is_function 2505 else self._parse_function() 2506 ) 2507 2508 returning = self._parse_returning() 2509 2510 return self.expression( 2511 exp.Insert, 2512 comments=comments, 2513 hint=hint, 2514 is_function=is_function, 2515 this=this, 2516 stored=self._match_text_seq("STORED") and self._parse_stored(), 2517 by_name=self._match_text_seq("BY", "NAME"), 2518 exists=self._parse_exists(), 2519 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) and self._parse_assignment(), 2520 expression=self._parse_derived_table_values() or self._parse_ddl_select(), 2521 conflict=self._parse_on_conflict(), 2522 returning=returning or self._parse_returning(), 2523 overwrite=overwrite, 2524 alternative=alternative, 2525 ignore=ignore, 2526 ) 2527 2528 def _parse_kill(self) -> exp.Kill: 2529 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 2530 2531 return self.expression( 2532 exp.Kill, 2533 this=self._parse_primary(), 2534 kind=kind, 2535 ) 2536 2537 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 2538 conflict = self._match_text_seq("ON", "CONFLICT") 2539 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 2540 2541 if not conflict and not duplicate: 2542 return None 2543 2544 conflict_keys = None 2545 constraint = None 2546 2547 if conflict: 2548 if self._match_text_seq("ON", "CONSTRAINT"): 2549 constraint = self._parse_id_var() 2550 elif self._match(TokenType.L_PAREN): 2551 conflict_keys = self._parse_csv(self._parse_id_var) 2552 self._match_r_paren() 2553 2554 action = self._parse_var_from_options(self.CONFLICT_ACTIONS) 2555 if self._prev.token_type == TokenType.UPDATE: 2556 self._match(TokenType.SET) 2557 expressions = self._parse_csv(self._parse_equality) 2558 else: 2559 expressions = None 2560 2561 return self.expression( 2562 exp.OnConflict, 2563 duplicate=duplicate, 2564 expressions=expressions, 2565 action=action, 2566 conflict_keys=conflict_keys, 2567 constraint=constraint, 2568 ) 2569 2570 def _parse_returning(self) -> t.Optional[exp.Returning]: 2571 if not self._match(TokenType.RETURNING): 2572 return None 2573 return self.expression( 2574 exp.Returning, 2575 expressions=self._parse_csv(self._parse_expression), 2576 into=self._match(TokenType.INTO) and self._parse_table_part(), 2577 ) 2578 2579 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2580 if not self._match(TokenType.FORMAT): 2581 return None 2582 return self._parse_row_format() 2583 2584 def _parse_serde_properties(self, with_: bool = False) -> t.Optional[exp.SerdeProperties]: 2585 index = self._index 2586 with_ = with_ or self._match_text_seq("WITH") 2587 2588 if not self._match(TokenType.SERDE_PROPERTIES): 2589 self._retreat(index) 2590 return None 2591 return self.expression( 2592 exp.SerdeProperties, 2593 **{ # type: ignore 2594 "expressions": self._parse_wrapped_properties(), 2595 "with": with_, 2596 }, 2597 ) 2598 2599 def _parse_row_format( 2600 self, match_row: bool = False 2601 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2602 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 2603 return None 2604 2605 if self._match_text_seq("SERDE"): 2606 this = self._parse_string() 2607 2608 serde_properties = self._parse_serde_properties() 2609 2610 return self.expression( 2611 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 2612 ) 2613 2614 self._match_text_seq("DELIMITED") 2615 2616 kwargs = {} 2617 2618 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 2619 kwargs["fields"] = self._parse_string() 2620 if self._match_text_seq("ESCAPED", "BY"): 2621 kwargs["escaped"] = self._parse_string() 2622 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 2623 kwargs["collection_items"] = self._parse_string() 2624 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 2625 kwargs["map_keys"] = self._parse_string() 2626 if self._match_text_seq("LINES", "TERMINATED", "BY"): 2627 kwargs["lines"] = self._parse_string() 2628 if self._match_text_seq("NULL", "DEFINED", "AS"): 2629 kwargs["null"] = self._parse_string() 2630 2631 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 2632 2633 def _parse_load(self) -> exp.LoadData | exp.Command: 2634 if self._match_text_seq("DATA"): 2635 local = self._match_text_seq("LOCAL") 2636 self._match_text_seq("INPATH") 2637 inpath = self._parse_string() 2638 overwrite = self._match(TokenType.OVERWRITE) 2639 self._match_pair(TokenType.INTO, TokenType.TABLE) 2640 2641 return self.expression( 2642 exp.LoadData, 2643 this=self._parse_table(schema=True), 2644 local=local, 2645 overwrite=overwrite, 2646 inpath=inpath, 2647 partition=self._parse_partition(), 2648 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 2649 serde=self._match_text_seq("SERDE") and self._parse_string(), 2650 ) 2651 return self._parse_as_command(self._prev) 2652 2653 def _parse_delete(self) -> exp.Delete: 2654 # This handles MySQL's "Multiple-Table Syntax" 2655 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 2656 tables = None 2657 comments = self._prev_comments 2658 if not self._match(TokenType.FROM, advance=False): 2659 tables = self._parse_csv(self._parse_table) or None 2660 2661 returning = self._parse_returning() 2662 2663 return self.expression( 2664 exp.Delete, 2665 comments=comments, 2666 tables=tables, 2667 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 2668 using=self._match(TokenType.USING) and self._parse_table(joins=True), 2669 where=self._parse_where(), 2670 returning=returning or self._parse_returning(), 2671 limit=self._parse_limit(), 2672 ) 2673 2674 def _parse_update(self) -> exp.Update: 2675 comments = self._prev_comments 2676 this = self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS) 2677 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 2678 returning = self._parse_returning() 2679 return self.expression( 2680 exp.Update, 2681 comments=comments, 2682 **{ # type: ignore 2683 "this": this, 2684 "expressions": expressions, 2685 "from": self._parse_from(joins=True), 2686 "where": self._parse_where(), 2687 "returning": returning or self._parse_returning(), 2688 "order": self._parse_order(), 2689 "limit": self._parse_limit(), 2690 }, 2691 ) 2692 2693 def _parse_uncache(self) -> exp.Uncache: 2694 if not self._match(TokenType.TABLE): 2695 self.raise_error("Expecting TABLE after UNCACHE") 2696 2697 return self.expression( 2698 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 2699 ) 2700 2701 def _parse_cache(self) -> exp.Cache: 2702 lazy = self._match_text_seq("LAZY") 2703 self._match(TokenType.TABLE) 2704 table = self._parse_table(schema=True) 2705 2706 options = [] 2707 if self._match_text_seq("OPTIONS"): 2708 self._match_l_paren() 2709 k = self._parse_string() 2710 self._match(TokenType.EQ) 2711 v = self._parse_string() 2712 options = [k, v] 2713 self._match_r_paren() 2714 2715 self._match(TokenType.ALIAS) 2716 return self.expression( 2717 exp.Cache, 2718 this=table, 2719 lazy=lazy, 2720 options=options, 2721 expression=self._parse_select(nested=True), 2722 ) 2723 2724 def _parse_partition(self) -> t.Optional[exp.Partition]: 2725 if not self._match(TokenType.PARTITION): 2726 return None 2727 2728 return self.expression( 2729 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_assignment) 2730 ) 2731 2732 def _parse_value(self) -> t.Optional[exp.Tuple]: 2733 if self._match(TokenType.L_PAREN): 2734 expressions = self._parse_csv(self._parse_expression) 2735 self._match_r_paren() 2736 return self.expression(exp.Tuple, expressions=expressions) 2737 2738 # In some dialects we can have VALUES 1, 2 which results in 1 column & 2 rows. 2739 expression = self._parse_expression() 2740 if expression: 2741 return self.expression(exp.Tuple, expressions=[expression]) 2742 return None 2743 2744 def _parse_projections(self) -> t.List[exp.Expression]: 2745 return self._parse_expressions() 2746 2747 def _parse_select( 2748 self, 2749 nested: bool = False, 2750 table: bool = False, 2751 parse_subquery_alias: bool = True, 2752 parse_set_operation: bool = True, 2753 ) -> t.Optional[exp.Expression]: 2754 cte = self._parse_with() 2755 2756 if cte: 2757 this = self._parse_statement() 2758 2759 if not this: 2760 self.raise_error("Failed to parse any statement following CTE") 2761 return cte 2762 2763 if "with" in this.arg_types: 2764 this.set("with", cte) 2765 else: 2766 self.raise_error(f"{this.key} does not support CTE") 2767 this = cte 2768 2769 return this 2770 2771 # duckdb supports leading with FROM x 2772 from_ = self._parse_from() if self._match(TokenType.FROM, advance=False) else None 2773 2774 if self._match(TokenType.SELECT): 2775 comments = self._prev_comments 2776 2777 hint = self._parse_hint() 2778 2779 if self._next and not self._next.token_type == TokenType.DOT: 2780 all_ = self._match(TokenType.ALL) 2781 distinct = self._match_set(self.DISTINCT_TOKENS) 2782 else: 2783 all_, distinct = None, None 2784 2785 kind = ( 2786 self._match(TokenType.ALIAS) 2787 and self._match_texts(("STRUCT", "VALUE")) 2788 and self._prev.text.upper() 2789 ) 2790 2791 if distinct: 2792 distinct = self.expression( 2793 exp.Distinct, 2794 on=self._parse_value() if self._match(TokenType.ON) else None, 2795 ) 2796 2797 if all_ and distinct: 2798 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 2799 2800 limit = self._parse_limit(top=True) 2801 projections = self._parse_projections() 2802 2803 this = self.expression( 2804 exp.Select, 2805 kind=kind, 2806 hint=hint, 2807 distinct=distinct, 2808 expressions=projections, 2809 limit=limit, 2810 ) 2811 this.comments = comments 2812 2813 into = self._parse_into() 2814 if into: 2815 this.set("into", into) 2816 2817 if not from_: 2818 from_ = self._parse_from() 2819 2820 if from_: 2821 this.set("from", from_) 2822 2823 this = self._parse_query_modifiers(this) 2824 elif (table or nested) and self._match(TokenType.L_PAREN): 2825 if self._match(TokenType.PIVOT): 2826 this = self._parse_simplified_pivot() 2827 elif self._match(TokenType.FROM): 2828 this = exp.select("*").from_( 2829 t.cast(exp.From, self._parse_from(skip_from_token=True)) 2830 ) 2831 else: 2832 this = ( 2833 self._parse_table() 2834 if table 2835 else self._parse_select(nested=True, parse_set_operation=False) 2836 ) 2837 this = self._parse_query_modifiers(self._parse_set_operations(this)) 2838 2839 self._match_r_paren() 2840 2841 # We return early here so that the UNION isn't attached to the subquery by the 2842 # following call to _parse_set_operations, but instead becomes the parent node 2843 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 2844 elif self._match(TokenType.VALUES, advance=False): 2845 this = self._parse_derived_table_values() 2846 elif from_: 2847 this = exp.select("*").from_(from_.this, copy=False) 2848 elif self._match(TokenType.SUMMARIZE): 2849 table = self._match(TokenType.TABLE) 2850 this = self._parse_select() or self._parse_string() or self._parse_table() 2851 return self.expression(exp.Summarize, this=this, table=table) 2852 elif self._match(TokenType.DESCRIBE): 2853 this = self._parse_describe() 2854 else: 2855 this = None 2856 2857 return self._parse_set_operations(this) if parse_set_operation else this 2858 2859 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 2860 if not skip_with_token and not self._match(TokenType.WITH): 2861 return None 2862 2863 comments = self._prev_comments 2864 recursive = self._match(TokenType.RECURSIVE) 2865 2866 expressions = [] 2867 while True: 2868 expressions.append(self._parse_cte()) 2869 2870 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 2871 break 2872 else: 2873 self._match(TokenType.WITH) 2874 2875 return self.expression( 2876 exp.With, comments=comments, expressions=expressions, recursive=recursive 2877 ) 2878 2879 def _parse_cte(self) -> exp.CTE: 2880 alias = self._parse_table_alias(self.ID_VAR_TOKENS) 2881 if not alias or not alias.this: 2882 self.raise_error("Expected CTE to have alias") 2883 2884 self._match(TokenType.ALIAS) 2885 comments = self._prev_comments 2886 2887 if self._match_text_seq("NOT", "MATERIALIZED"): 2888 materialized = False 2889 elif self._match_text_seq("MATERIALIZED"): 2890 materialized = True 2891 else: 2892 materialized = None 2893 2894 return self.expression( 2895 exp.CTE, 2896 this=self._parse_wrapped(self._parse_statement), 2897 alias=alias, 2898 materialized=materialized, 2899 comments=comments, 2900 ) 2901 2902 def _parse_table_alias( 2903 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 2904 ) -> t.Optional[exp.TableAlias]: 2905 any_token = self._match(TokenType.ALIAS) 2906 alias = ( 2907 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2908 or self._parse_string_as_identifier() 2909 ) 2910 2911 index = self._index 2912 if self._match(TokenType.L_PAREN): 2913 columns = self._parse_csv(self._parse_function_parameter) 2914 self._match_r_paren() if columns else self._retreat(index) 2915 else: 2916 columns = None 2917 2918 if not alias and not columns: 2919 return None 2920 2921 table_alias = self.expression(exp.TableAlias, this=alias, columns=columns) 2922 2923 # We bubble up comments from the Identifier to the TableAlias 2924 if isinstance(alias, exp.Identifier): 2925 table_alias.add_comments(alias.pop_comments()) 2926 2927 return table_alias 2928 2929 def _parse_subquery( 2930 self, this: t.Optional[exp.Expression], parse_alias: bool = True 2931 ) -> t.Optional[exp.Subquery]: 2932 if not this: 2933 return None 2934 2935 return self.expression( 2936 exp.Subquery, 2937 this=this, 2938 pivots=self._parse_pivots(), 2939 alias=self._parse_table_alias() if parse_alias else None, 2940 ) 2941 2942 def _implicit_unnests_to_explicit(self, this: E) -> E: 2943 from sqlglot.optimizer.normalize_identifiers import normalize_identifiers as _norm 2944 2945 refs = {_norm(this.args["from"].this.copy(), dialect=self.dialect).alias_or_name} 2946 for i, join in enumerate(this.args.get("joins") or []): 2947 table = join.this 2948 normalized_table = table.copy() 2949 normalized_table.meta["maybe_column"] = True 2950 normalized_table = _norm(normalized_table, dialect=self.dialect) 2951 2952 if isinstance(table, exp.Table) and not join.args.get("on"): 2953 if normalized_table.parts[0].name in refs: 2954 table_as_column = table.to_column() 2955 unnest = exp.Unnest(expressions=[table_as_column]) 2956 2957 # Table.to_column creates a parent Alias node that we want to convert to 2958 # a TableAlias and attach to the Unnest, so it matches the parser's output 2959 if isinstance(table.args.get("alias"), exp.TableAlias): 2960 table_as_column.replace(table_as_column.this) 2961 exp.alias_(unnest, None, table=[table.args["alias"].this], copy=False) 2962 2963 table.replace(unnest) 2964 2965 refs.add(normalized_table.alias_or_name) 2966 2967 return this 2968 2969 def _parse_query_modifiers( 2970 self, this: t.Optional[exp.Expression] 2971 ) -> t.Optional[exp.Expression]: 2972 if isinstance(this, (exp.Query, exp.Table)): 2973 for join in self._parse_joins(): 2974 this.append("joins", join) 2975 for lateral in iter(self._parse_lateral, None): 2976 this.append("laterals", lateral) 2977 2978 while True: 2979 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 2980 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 2981 key, expression = parser(self) 2982 2983 if expression: 2984 this.set(key, expression) 2985 if key == "limit": 2986 offset = expression.args.pop("offset", None) 2987 2988 if offset: 2989 offset = exp.Offset(expression=offset) 2990 this.set("offset", offset) 2991 2992 limit_by_expressions = expression.expressions 2993 expression.set("expressions", None) 2994 offset.set("expressions", limit_by_expressions) 2995 continue 2996 break 2997 2998 if self.SUPPORTS_IMPLICIT_UNNEST and this and this.args.get("from"): 2999 this = self._implicit_unnests_to_explicit(this) 3000 3001 return this 3002 3003 def _parse_hint(self) -> t.Optional[exp.Hint]: 3004 if self._match(TokenType.HINT): 3005 hints = [] 3006 for hint in iter( 3007 lambda: self._parse_csv( 3008 lambda: self._parse_function() or self._parse_var(upper=True) 3009 ), 3010 [], 3011 ): 3012 hints.extend(hint) 3013 3014 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 3015 self.raise_error("Expected */ after HINT") 3016 3017 return self.expression(exp.Hint, expressions=hints) 3018 3019 return None 3020 3021 def _parse_into(self) -> t.Optional[exp.Into]: 3022 if not self._match(TokenType.INTO): 3023 return None 3024 3025 temp = self._match(TokenType.TEMPORARY) 3026 unlogged = self._match_text_seq("UNLOGGED") 3027 self._match(TokenType.TABLE) 3028 3029 return self.expression( 3030 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 3031 ) 3032 3033 def _parse_from( 3034 self, joins: bool = False, skip_from_token: bool = False 3035 ) -> t.Optional[exp.From]: 3036 if not skip_from_token and not self._match(TokenType.FROM): 3037 return None 3038 3039 return self.expression( 3040 exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins) 3041 ) 3042 3043 def _parse_match_recognize_measure(self) -> exp.MatchRecognizeMeasure: 3044 return self.expression( 3045 exp.MatchRecognizeMeasure, 3046 window_frame=self._match_texts(("FINAL", "RUNNING")) and self._prev.text.upper(), 3047 this=self._parse_expression(), 3048 ) 3049 3050 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 3051 if not self._match(TokenType.MATCH_RECOGNIZE): 3052 return None 3053 3054 self._match_l_paren() 3055 3056 partition = self._parse_partition_by() 3057 order = self._parse_order() 3058 3059 measures = ( 3060 self._parse_csv(self._parse_match_recognize_measure) 3061 if self._match_text_seq("MEASURES") 3062 else None 3063 ) 3064 3065 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 3066 rows = exp.var("ONE ROW PER MATCH") 3067 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 3068 text = "ALL ROWS PER MATCH" 3069 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 3070 text += " SHOW EMPTY MATCHES" 3071 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 3072 text += " OMIT EMPTY MATCHES" 3073 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 3074 text += " WITH UNMATCHED ROWS" 3075 rows = exp.var(text) 3076 else: 3077 rows = None 3078 3079 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 3080 text = "AFTER MATCH SKIP" 3081 if self._match_text_seq("PAST", "LAST", "ROW"): 3082 text += " PAST LAST ROW" 3083 elif self._match_text_seq("TO", "NEXT", "ROW"): 3084 text += " TO NEXT ROW" 3085 elif self._match_text_seq("TO", "FIRST"): 3086 text += f" TO FIRST {self._advance_any().text}" # type: ignore 3087 elif self._match_text_seq("TO", "LAST"): 3088 text += f" TO LAST {self._advance_any().text}" # type: ignore 3089 after = exp.var(text) 3090 else: 3091 after = None 3092 3093 if self._match_text_seq("PATTERN"): 3094 self._match_l_paren() 3095 3096 if not self._curr: 3097 self.raise_error("Expecting )", self._curr) 3098 3099 paren = 1 3100 start = self._curr 3101 3102 while self._curr and paren > 0: 3103 if self._curr.token_type == TokenType.L_PAREN: 3104 paren += 1 3105 if self._curr.token_type == TokenType.R_PAREN: 3106 paren -= 1 3107 3108 end = self._prev 3109 self._advance() 3110 3111 if paren > 0: 3112 self.raise_error("Expecting )", self._curr) 3113 3114 pattern = exp.var(self._find_sql(start, end)) 3115 else: 3116 pattern = None 3117 3118 define = ( 3119 self._parse_csv(self._parse_name_as_expression) 3120 if self._match_text_seq("DEFINE") 3121 else None 3122 ) 3123 3124 self._match_r_paren() 3125 3126 return self.expression( 3127 exp.MatchRecognize, 3128 partition_by=partition, 3129 order=order, 3130 measures=measures, 3131 rows=rows, 3132 after=after, 3133 pattern=pattern, 3134 define=define, 3135 alias=self._parse_table_alias(), 3136 ) 3137 3138 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 3139 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 3140 if not cross_apply and self._match_pair(TokenType.OUTER, TokenType.APPLY): 3141 cross_apply = False 3142 3143 if cross_apply is not None: 3144 this = self._parse_select(table=True) 3145 view = None 3146 outer = None 3147 elif self._match(TokenType.LATERAL): 3148 this = self._parse_select(table=True) 3149 view = self._match(TokenType.VIEW) 3150 outer = self._match(TokenType.OUTER) 3151 else: 3152 return None 3153 3154 if not this: 3155 this = ( 3156 self._parse_unnest() 3157 or self._parse_function() 3158 or self._parse_id_var(any_token=False) 3159 ) 3160 3161 while self._match(TokenType.DOT): 3162 this = exp.Dot( 3163 this=this, 3164 expression=self._parse_function() or self._parse_id_var(any_token=False), 3165 ) 3166 3167 if view: 3168 table = self._parse_id_var(any_token=False) 3169 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 3170 table_alias: t.Optional[exp.TableAlias] = self.expression( 3171 exp.TableAlias, this=table, columns=columns 3172 ) 3173 elif isinstance(this, (exp.Subquery, exp.Unnest)) and this.alias: 3174 # We move the alias from the lateral's child node to the lateral itself 3175 table_alias = this.args["alias"].pop() 3176 else: 3177 table_alias = self._parse_table_alias() 3178 3179 return self.expression( 3180 exp.Lateral, 3181 this=this, 3182 view=view, 3183 outer=outer, 3184 alias=table_alias, 3185 cross_apply=cross_apply, 3186 ) 3187 3188 def _parse_join_parts( 3189 self, 3190 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 3191 return ( 3192 self._match_set(self.JOIN_METHODS) and self._prev, 3193 self._match_set(self.JOIN_SIDES) and self._prev, 3194 self._match_set(self.JOIN_KINDS) and self._prev, 3195 ) 3196 3197 def _parse_join( 3198 self, skip_join_token: bool = False, parse_bracket: bool = False 3199 ) -> t.Optional[exp.Join]: 3200 if self._match(TokenType.COMMA): 3201 return self.expression(exp.Join, this=self._parse_table()) 3202 3203 index = self._index 3204 method, side, kind = self._parse_join_parts() 3205 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 3206 join = self._match(TokenType.JOIN) or (kind and kind.token_type == TokenType.STRAIGHT_JOIN) 3207 3208 if not skip_join_token and not join: 3209 self._retreat(index) 3210 kind = None 3211 method = None 3212 side = None 3213 3214 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 3215 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 3216 3217 if not skip_join_token and not join and not outer_apply and not cross_apply: 3218 return None 3219 3220 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 3221 3222 if method: 3223 kwargs["method"] = method.text 3224 if side: 3225 kwargs["side"] = side.text 3226 if kind: 3227 kwargs["kind"] = kind.text 3228 if hint: 3229 kwargs["hint"] = hint 3230 3231 if self._match(TokenType.MATCH_CONDITION): 3232 kwargs["match_condition"] = self._parse_wrapped(self._parse_comparison) 3233 3234 if self._match(TokenType.ON): 3235 kwargs["on"] = self._parse_assignment() 3236 elif self._match(TokenType.USING): 3237 kwargs["using"] = self._parse_wrapped_id_vars() 3238 elif not isinstance(kwargs["this"], exp.Unnest) and not ( 3239 kind and kind.token_type == TokenType.CROSS 3240 ): 3241 index = self._index 3242 joins: t.Optional[list] = list(self._parse_joins()) 3243 3244 if joins and self._match(TokenType.ON): 3245 kwargs["on"] = self._parse_assignment() 3246 elif joins and self._match(TokenType.USING): 3247 kwargs["using"] = self._parse_wrapped_id_vars() 3248 else: 3249 joins = None 3250 self._retreat(index) 3251 3252 kwargs["this"].set("joins", joins if joins else None) 3253 3254 comments = [c for token in (method, side, kind) if token for c in token.comments] 3255 return self.expression(exp.Join, comments=comments, **kwargs) 3256 3257 def _parse_opclass(self) -> t.Optional[exp.Expression]: 3258 this = self._parse_assignment() 3259 3260 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 3261 return this 3262 3263 if not self._match_set(self.OPTYPE_FOLLOW_TOKENS, advance=False): 3264 return self.expression(exp.Opclass, this=this, expression=self._parse_table_parts()) 3265 3266 return this 3267 3268 def _parse_index_params(self) -> exp.IndexParameters: 3269 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 3270 3271 if self._match(TokenType.L_PAREN, advance=False): 3272 columns = self._parse_wrapped_csv(self._parse_with_operator) 3273 else: 3274 columns = None 3275 3276 include = self._parse_wrapped_id_vars() if self._match_text_seq("INCLUDE") else None 3277 partition_by = self._parse_partition_by() 3278 with_storage = self._match(TokenType.WITH) and self._parse_wrapped_properties() 3279 tablespace = ( 3280 self._parse_var(any_token=True) 3281 if self._match_text_seq("USING", "INDEX", "TABLESPACE") 3282 else None 3283 ) 3284 where = self._parse_where() 3285 3286 on = self._parse_field() if self._match(TokenType.ON) else None 3287 3288 return self.expression( 3289 exp.IndexParameters, 3290 using=using, 3291 columns=columns, 3292 include=include, 3293 partition_by=partition_by, 3294 where=where, 3295 with_storage=with_storage, 3296 tablespace=tablespace, 3297 on=on, 3298 ) 3299 3300 def _parse_index( 3301 self, index: t.Optional[exp.Expression] = None, anonymous: bool = False 3302 ) -> t.Optional[exp.Index]: 3303 if index or anonymous: 3304 unique = None 3305 primary = None 3306 amp = None 3307 3308 self._match(TokenType.ON) 3309 self._match(TokenType.TABLE) # hive 3310 table = self._parse_table_parts(schema=True) 3311 else: 3312 unique = self._match(TokenType.UNIQUE) 3313 primary = self._match_text_seq("PRIMARY") 3314 amp = self._match_text_seq("AMP") 3315 3316 if not self._match(TokenType.INDEX): 3317 return None 3318 3319 index = self._parse_id_var() 3320 table = None 3321 3322 params = self._parse_index_params() 3323 3324 return self.expression( 3325 exp.Index, 3326 this=index, 3327 table=table, 3328 unique=unique, 3329 primary=primary, 3330 amp=amp, 3331 params=params, 3332 ) 3333 3334 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 3335 hints: t.List[exp.Expression] = [] 3336 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 3337 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 3338 hints.append( 3339 self.expression( 3340 exp.WithTableHint, 3341 expressions=self._parse_csv( 3342 lambda: self._parse_function() or self._parse_var(any_token=True) 3343 ), 3344 ) 3345 ) 3346 self._match_r_paren() 3347 else: 3348 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 3349 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 3350 hint = exp.IndexTableHint(this=self._prev.text.upper()) 3351 3352 self._match_set((TokenType.INDEX, TokenType.KEY)) 3353 if self._match(TokenType.FOR): 3354 hint.set("target", self._advance_any() and self._prev.text.upper()) 3355 3356 hint.set("expressions", self._parse_wrapped_id_vars()) 3357 hints.append(hint) 3358 3359 return hints or None 3360 3361 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 3362 return ( 3363 (not schema and self._parse_function(optional_parens=False)) 3364 or self._parse_id_var(any_token=False) 3365 or self._parse_string_as_identifier() 3366 or self._parse_placeholder() 3367 ) 3368 3369 def _parse_table_parts( 3370 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 3371 ) -> exp.Table: 3372 catalog = None 3373 db = None 3374 table: t.Optional[exp.Expression | str] = self._parse_table_part(schema=schema) 3375 3376 while self._match(TokenType.DOT): 3377 if catalog: 3378 # This allows nesting the table in arbitrarily many dot expressions if needed 3379 table = self.expression( 3380 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 3381 ) 3382 else: 3383 catalog = db 3384 db = table 3385 # "" used for tsql FROM a..b case 3386 table = self._parse_table_part(schema=schema) or "" 3387 3388 if ( 3389 wildcard 3390 and self._is_connected() 3391 and (isinstance(table, exp.Identifier) or not table) 3392 and self._match(TokenType.STAR) 3393 ): 3394 if isinstance(table, exp.Identifier): 3395 table.args["this"] += "*" 3396 else: 3397 table = exp.Identifier(this="*") 3398 3399 # We bubble up comments from the Identifier to the Table 3400 comments = table.pop_comments() if isinstance(table, exp.Expression) else None 3401 3402 if is_db_reference: 3403 catalog = db 3404 db = table 3405 table = None 3406 3407 if not table and not is_db_reference: 3408 self.raise_error(f"Expected table name but got {self._curr}") 3409 if not db and is_db_reference: 3410 self.raise_error(f"Expected database name but got {self._curr}") 3411 3412 table = self.expression( 3413 exp.Table, 3414 comments=comments, 3415 this=table, 3416 db=db, 3417 catalog=catalog, 3418 ) 3419 3420 changes = self._parse_changes() 3421 if changes: 3422 table.set("changes", changes) 3423 3424 at_before = self._parse_historical_data() 3425 if at_before: 3426 table.set("when", at_before) 3427 3428 pivots = self._parse_pivots() 3429 if pivots: 3430 table.set("pivots", pivots) 3431 3432 return table 3433 3434 def _parse_table( 3435 self, 3436 schema: bool = False, 3437 joins: bool = False, 3438 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 3439 parse_bracket: bool = False, 3440 is_db_reference: bool = False, 3441 parse_partition: bool = False, 3442 ) -> t.Optional[exp.Expression]: 3443 lateral = self._parse_lateral() 3444 if lateral: 3445 return lateral 3446 3447 unnest = self._parse_unnest() 3448 if unnest: 3449 return unnest 3450 3451 values = self._parse_derived_table_values() 3452 if values: 3453 return values 3454 3455 subquery = self._parse_select(table=True) 3456 if subquery: 3457 if not subquery.args.get("pivots"): 3458 subquery.set("pivots", self._parse_pivots()) 3459 return subquery 3460 3461 bracket = parse_bracket and self._parse_bracket(None) 3462 bracket = self.expression(exp.Table, this=bracket) if bracket else None 3463 3464 rows_from = self._match_text_seq("ROWS", "FROM") and self._parse_wrapped_csv( 3465 self._parse_table 3466 ) 3467 rows_from = self.expression(exp.Table, rows_from=rows_from) if rows_from else None 3468 3469 only = self._match(TokenType.ONLY) 3470 3471 this = t.cast( 3472 exp.Expression, 3473 bracket 3474 or rows_from 3475 or self._parse_bracket( 3476 self._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 3477 ), 3478 ) 3479 3480 if only: 3481 this.set("only", only) 3482 3483 # Postgres supports a wildcard (table) suffix operator, which is a no-op in this context 3484 self._match_text_seq("*") 3485 3486 parse_partition = parse_partition or self.SUPPORTS_PARTITION_SELECTION 3487 if parse_partition and self._match(TokenType.PARTITION, advance=False): 3488 this.set("partition", self._parse_partition()) 3489 3490 if schema: 3491 return self._parse_schema(this=this) 3492 3493 version = self._parse_version() 3494 3495 if version: 3496 this.set("version", version) 3497 3498 if self.dialect.ALIAS_POST_TABLESAMPLE: 3499 table_sample = self._parse_table_sample() 3500 3501 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3502 if alias: 3503 this.set("alias", alias) 3504 3505 if isinstance(this, exp.Table) and self._match_text_seq("AT"): 3506 return self.expression( 3507 exp.AtIndex, this=this.to_column(copy=False), expression=self._parse_id_var() 3508 ) 3509 3510 this.set("hints", self._parse_table_hints()) 3511 3512 if not this.args.get("pivots"): 3513 this.set("pivots", self._parse_pivots()) 3514 3515 if not self.dialect.ALIAS_POST_TABLESAMPLE: 3516 table_sample = self._parse_table_sample() 3517 3518 if table_sample: 3519 table_sample.set("this", this) 3520 this = table_sample 3521 3522 if joins: 3523 for join in self._parse_joins(): 3524 this.append("joins", join) 3525 3526 if self._match_pair(TokenType.WITH, TokenType.ORDINALITY): 3527 this.set("ordinality", True) 3528 this.set("alias", self._parse_table_alias()) 3529 3530 return this 3531 3532 def _parse_version(self) -> t.Optional[exp.Version]: 3533 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 3534 this = "TIMESTAMP" 3535 elif self._match(TokenType.VERSION_SNAPSHOT): 3536 this = "VERSION" 3537 else: 3538 return None 3539 3540 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 3541 kind = self._prev.text.upper() 3542 start = self._parse_bitwise() 3543 self._match_texts(("TO", "AND")) 3544 end = self._parse_bitwise() 3545 expression: t.Optional[exp.Expression] = self.expression( 3546 exp.Tuple, expressions=[start, end] 3547 ) 3548 elif self._match_text_seq("CONTAINED", "IN"): 3549 kind = "CONTAINED IN" 3550 expression = self.expression( 3551 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 3552 ) 3553 elif self._match(TokenType.ALL): 3554 kind = "ALL" 3555 expression = None 3556 else: 3557 self._match_text_seq("AS", "OF") 3558 kind = "AS OF" 3559 expression = self._parse_type() 3560 3561 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 3562 3563 def _parse_historical_data(self) -> t.Optional[exp.HistoricalData]: 3564 # https://docs.snowflake.com/en/sql-reference/constructs/at-before 3565 index = self._index 3566 historical_data = None 3567 if self._match_texts(self.HISTORICAL_DATA_PREFIX): 3568 this = self._prev.text.upper() 3569 kind = ( 3570 self._match(TokenType.L_PAREN) 3571 and self._match_texts(self.HISTORICAL_DATA_KIND) 3572 and self._prev.text.upper() 3573 ) 3574 expression = self._match(TokenType.FARROW) and self._parse_bitwise() 3575 3576 if expression: 3577 self._match_r_paren() 3578 historical_data = self.expression( 3579 exp.HistoricalData, this=this, kind=kind, expression=expression 3580 ) 3581 else: 3582 self._retreat(index) 3583 3584 return historical_data 3585 3586 def _parse_changes(self) -> t.Optional[exp.Changes]: 3587 if not self._match_text_seq("CHANGES", "(", "INFORMATION", "=>"): 3588 return None 3589 3590 information = self._parse_var(any_token=True) 3591 self._match_r_paren() 3592 3593 return self.expression( 3594 exp.Changes, 3595 information=information, 3596 at_before=self._parse_historical_data(), 3597 end=self._parse_historical_data(), 3598 ) 3599 3600 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 3601 if not self._match(TokenType.UNNEST): 3602 return None 3603 3604 expressions = self._parse_wrapped_csv(self._parse_equality) 3605 offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 3606 3607 alias = self._parse_table_alias() if with_alias else None 3608 3609 if alias: 3610 if self.dialect.UNNEST_COLUMN_ONLY: 3611 if alias.args.get("columns"): 3612 self.raise_error("Unexpected extra column alias in unnest.") 3613 3614 alias.set("columns", [alias.this]) 3615 alias.set("this", None) 3616 3617 columns = alias.args.get("columns") or [] 3618 if offset and len(expressions) < len(columns): 3619 offset = columns.pop() 3620 3621 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 3622 self._match(TokenType.ALIAS) 3623 offset = self._parse_id_var( 3624 any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS 3625 ) or exp.to_identifier("offset") 3626 3627 return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset) 3628 3629 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 3630 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 3631 if not is_derived and not self._match_text_seq("VALUES"): 3632 return None 3633 3634 expressions = self._parse_csv(self._parse_value) 3635 alias = self._parse_table_alias() 3636 3637 if is_derived: 3638 self._match_r_paren() 3639 3640 return self.expression( 3641 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 3642 ) 3643 3644 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 3645 if not self._match(TokenType.TABLE_SAMPLE) and not ( 3646 as_modifier and self._match_text_seq("USING", "SAMPLE") 3647 ): 3648 return None 3649 3650 bucket_numerator = None 3651 bucket_denominator = None 3652 bucket_field = None 3653 percent = None 3654 size = None 3655 seed = None 3656 3657 method = self._parse_var(tokens=(TokenType.ROW,), upper=True) 3658 matched_l_paren = self._match(TokenType.L_PAREN) 3659 3660 if self.TABLESAMPLE_CSV: 3661 num = None 3662 expressions = self._parse_csv(self._parse_primary) 3663 else: 3664 expressions = None 3665 num = ( 3666 self._parse_factor() 3667 if self._match(TokenType.NUMBER, advance=False) 3668 else self._parse_primary() or self._parse_placeholder() 3669 ) 3670 3671 if self._match_text_seq("BUCKET"): 3672 bucket_numerator = self._parse_number() 3673 self._match_text_seq("OUT", "OF") 3674 bucket_denominator = bucket_denominator = self._parse_number() 3675 self._match(TokenType.ON) 3676 bucket_field = self._parse_field() 3677 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 3678 percent = num 3679 elif self._match(TokenType.ROWS) or not self.dialect.TABLESAMPLE_SIZE_IS_PERCENT: 3680 size = num 3681 else: 3682 percent = num 3683 3684 if matched_l_paren: 3685 self._match_r_paren() 3686 3687 if self._match(TokenType.L_PAREN): 3688 method = self._parse_var(upper=True) 3689 seed = self._match(TokenType.COMMA) and self._parse_number() 3690 self._match_r_paren() 3691 elif self._match_texts(("SEED", "REPEATABLE")): 3692 seed = self._parse_wrapped(self._parse_number) 3693 3694 if not method and self.DEFAULT_SAMPLING_METHOD: 3695 method = exp.var(self.DEFAULT_SAMPLING_METHOD) 3696 3697 return self.expression( 3698 exp.TableSample, 3699 expressions=expressions, 3700 method=method, 3701 bucket_numerator=bucket_numerator, 3702 bucket_denominator=bucket_denominator, 3703 bucket_field=bucket_field, 3704 percent=percent, 3705 size=size, 3706 seed=seed, 3707 ) 3708 3709 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 3710 return list(iter(self._parse_pivot, None)) or None 3711 3712 def _parse_joins(self) -> t.Iterator[exp.Join]: 3713 return iter(self._parse_join, None) 3714 3715 # https://duckdb.org/docs/sql/statements/pivot 3716 def _parse_simplified_pivot(self) -> exp.Pivot: 3717 def _parse_on() -> t.Optional[exp.Expression]: 3718 this = self._parse_bitwise() 3719 return self._parse_in(this) if self._match(TokenType.IN) else this 3720 3721 this = self._parse_table() 3722 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 3723 using = self._match(TokenType.USING) and self._parse_csv( 3724 lambda: self._parse_alias(self._parse_function()) 3725 ) 3726 group = self._parse_group() 3727 return self.expression( 3728 exp.Pivot, this=this, expressions=expressions, using=using, group=group 3729 ) 3730 3731 def _parse_pivot_in(self) -> exp.In | exp.PivotAny: 3732 def _parse_aliased_expression() -> t.Optional[exp.Expression]: 3733 this = self._parse_select_or_expression() 3734 3735 self._match(TokenType.ALIAS) 3736 alias = self._parse_field() 3737 if alias: 3738 return self.expression(exp.PivotAlias, this=this, alias=alias) 3739 3740 return this 3741 3742 value = self._parse_column() 3743 3744 if not self._match_pair(TokenType.IN, TokenType.L_PAREN): 3745 self.raise_error("Expecting IN (") 3746 3747 if self._match(TokenType.ANY): 3748 expr: exp.PivotAny | exp.In = self.expression(exp.PivotAny, this=self._parse_order()) 3749 else: 3750 aliased_expressions = self._parse_csv(_parse_aliased_expression) 3751 expr = self.expression(exp.In, this=value, expressions=aliased_expressions) 3752 3753 self._match_r_paren() 3754 return expr 3755 3756 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 3757 index = self._index 3758 include_nulls = None 3759 3760 if self._match(TokenType.PIVOT): 3761 unpivot = False 3762 elif self._match(TokenType.UNPIVOT): 3763 unpivot = True 3764 3765 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 3766 if self._match_text_seq("INCLUDE", "NULLS"): 3767 include_nulls = True 3768 elif self._match_text_seq("EXCLUDE", "NULLS"): 3769 include_nulls = False 3770 else: 3771 return None 3772 3773 expressions = [] 3774 3775 if not self._match(TokenType.L_PAREN): 3776 self._retreat(index) 3777 return None 3778 3779 if unpivot: 3780 expressions = self._parse_csv(self._parse_column) 3781 else: 3782 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 3783 3784 if not expressions: 3785 self.raise_error("Failed to parse PIVOT's aggregation list") 3786 3787 if not self._match(TokenType.FOR): 3788 self.raise_error("Expecting FOR") 3789 3790 field = self._parse_pivot_in() 3791 default_on_null = self._match_text_seq("DEFAULT", "ON", "NULL") and self._parse_wrapped( 3792 self._parse_bitwise 3793 ) 3794 3795 self._match_r_paren() 3796 3797 pivot = self.expression( 3798 exp.Pivot, 3799 expressions=expressions, 3800 field=field, 3801 unpivot=unpivot, 3802 include_nulls=include_nulls, 3803 default_on_null=default_on_null, 3804 ) 3805 3806 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 3807 pivot.set("alias", self._parse_table_alias()) 3808 3809 if not unpivot: 3810 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 3811 3812 columns: t.List[exp.Expression] = [] 3813 for fld in pivot.args["field"].expressions: 3814 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 3815 for name in names: 3816 if self.PREFIXED_PIVOT_COLUMNS: 3817 name = f"{name}_{field_name}" if name else field_name 3818 else: 3819 name = f"{field_name}_{name}" if name else field_name 3820 3821 columns.append(exp.to_identifier(name)) 3822 3823 pivot.set("columns", columns) 3824 3825 return pivot 3826 3827 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 3828 return [agg.alias for agg in aggregations] 3829 3830 def _parse_prewhere(self, skip_where_token: bool = False) -> t.Optional[exp.PreWhere]: 3831 if not skip_where_token and not self._match(TokenType.PREWHERE): 3832 return None 3833 3834 return self.expression( 3835 exp.PreWhere, comments=self._prev_comments, this=self._parse_assignment() 3836 ) 3837 3838 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 3839 if not skip_where_token and not self._match(TokenType.WHERE): 3840 return None 3841 3842 return self.expression( 3843 exp.Where, comments=self._prev_comments, this=self._parse_assignment() 3844 ) 3845 3846 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 3847 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 3848 return None 3849 3850 elements: t.Dict[str, t.Any] = defaultdict(list) 3851 3852 if self._match(TokenType.ALL): 3853 elements["all"] = True 3854 elif self._match(TokenType.DISTINCT): 3855 elements["all"] = False 3856 3857 while True: 3858 expressions = self._parse_csv( 3859 lambda: None 3860 if self._match_set((TokenType.CUBE, TokenType.ROLLUP), advance=False) 3861 else self._parse_assignment() 3862 ) 3863 if expressions: 3864 elements["expressions"].extend(expressions) 3865 3866 grouping_sets = self._parse_grouping_sets() 3867 if grouping_sets: 3868 elements["grouping_sets"].extend(grouping_sets) 3869 3870 rollup = None 3871 cube = None 3872 totals = None 3873 3874 index = self._index 3875 with_ = self._match(TokenType.WITH) 3876 if self._match(TokenType.ROLLUP): 3877 rollup = with_ or self._parse_wrapped_csv(self._parse_column) 3878 elements["rollup"].extend(ensure_list(rollup)) 3879 3880 if self._match(TokenType.CUBE): 3881 cube = with_ or self._parse_wrapped_csv(self._parse_column) 3882 elements["cube"].extend(ensure_list(cube)) 3883 3884 if self._match_text_seq("TOTALS"): 3885 totals = True 3886 elements["totals"] = True # type: ignore 3887 3888 if not (grouping_sets or rollup or cube or totals): 3889 if with_: 3890 self._retreat(index) 3891 break 3892 3893 return self.expression(exp.Group, **elements) # type: ignore 3894 3895 def _parse_grouping_sets(self) -> t.Optional[t.List[exp.Expression]]: 3896 if not self._match(TokenType.GROUPING_SETS): 3897 return None 3898 3899 return self._parse_wrapped_csv(self._parse_grouping_set) 3900 3901 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 3902 if self._match(TokenType.L_PAREN): 3903 grouping_set = self._parse_csv(self._parse_column) 3904 self._match_r_paren() 3905 return self.expression(exp.Tuple, expressions=grouping_set) 3906 3907 return self._parse_column() 3908 3909 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 3910 if not skip_having_token and not self._match(TokenType.HAVING): 3911 return None 3912 return self.expression(exp.Having, this=self._parse_assignment()) 3913 3914 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 3915 if not self._match(TokenType.QUALIFY): 3916 return None 3917 return self.expression(exp.Qualify, this=self._parse_assignment()) 3918 3919 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 3920 if skip_start_token: 3921 start = None 3922 elif self._match(TokenType.START_WITH): 3923 start = self._parse_assignment() 3924 else: 3925 return None 3926 3927 self._match(TokenType.CONNECT_BY) 3928 nocycle = self._match_text_seq("NOCYCLE") 3929 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 3930 exp.Prior, this=self._parse_bitwise() 3931 ) 3932 connect = self._parse_assignment() 3933 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 3934 3935 if not start and self._match(TokenType.START_WITH): 3936 start = self._parse_assignment() 3937 3938 return self.expression(exp.Connect, start=start, connect=connect, nocycle=nocycle) 3939 3940 def _parse_name_as_expression(self) -> exp.Alias: 3941 return self.expression( 3942 exp.Alias, 3943 alias=self._parse_id_var(any_token=True), 3944 this=self._match(TokenType.ALIAS) and self._parse_assignment(), 3945 ) 3946 3947 def _parse_interpolate(self) -> t.Optional[t.List[exp.Expression]]: 3948 if self._match_text_seq("INTERPOLATE"): 3949 return self._parse_wrapped_csv(self._parse_name_as_expression) 3950 return None 3951 3952 def _parse_order( 3953 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 3954 ) -> t.Optional[exp.Expression]: 3955 siblings = None 3956 if not skip_order_token and not self._match(TokenType.ORDER_BY): 3957 if not self._match(TokenType.ORDER_SIBLINGS_BY): 3958 return this 3959 3960 siblings = True 3961 3962 return self.expression( 3963 exp.Order, 3964 this=this, 3965 expressions=self._parse_csv(self._parse_ordered), 3966 interpolate=self._parse_interpolate(), 3967 siblings=siblings, 3968 ) 3969 3970 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 3971 if not self._match(token): 3972 return None 3973 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 3974 3975 def _parse_ordered( 3976 self, parse_method: t.Optional[t.Callable] = None 3977 ) -> t.Optional[exp.Ordered]: 3978 this = parse_method() if parse_method else self._parse_assignment() 3979 if not this: 3980 return None 3981 3982 if this.name.upper() == "ALL" and self.dialect.SUPPORTS_ORDER_BY_ALL: 3983 this = exp.var("ALL") 3984 3985 asc = self._match(TokenType.ASC) 3986 desc = self._match(TokenType.DESC) or (asc and False) 3987 3988 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 3989 is_nulls_last = self._match_text_seq("NULLS", "LAST") 3990 3991 nulls_first = is_nulls_first or False 3992 explicitly_null_ordered = is_nulls_first or is_nulls_last 3993 3994 if ( 3995 not explicitly_null_ordered 3996 and ( 3997 (not desc and self.dialect.NULL_ORDERING == "nulls_are_small") 3998 or (desc and self.dialect.NULL_ORDERING != "nulls_are_small") 3999 ) 4000 and self.dialect.NULL_ORDERING != "nulls_are_last" 4001 ): 4002 nulls_first = True 4003 4004 if self._match_text_seq("WITH", "FILL"): 4005 with_fill = self.expression( 4006 exp.WithFill, 4007 **{ # type: ignore 4008 "from": self._match(TokenType.FROM) and self._parse_bitwise(), 4009 "to": self._match_text_seq("TO") and self._parse_bitwise(), 4010 "step": self._match_text_seq("STEP") and self._parse_bitwise(), 4011 }, 4012 ) 4013 else: 4014 with_fill = None 4015 4016 return self.expression( 4017 exp.Ordered, this=this, desc=desc, nulls_first=nulls_first, with_fill=with_fill 4018 ) 4019 4020 def _parse_limit( 4021 self, 4022 this: t.Optional[exp.Expression] = None, 4023 top: bool = False, 4024 skip_limit_token: bool = False, 4025 ) -> t.Optional[exp.Expression]: 4026 if skip_limit_token or self._match(TokenType.TOP if top else TokenType.LIMIT): 4027 comments = self._prev_comments 4028 if top: 4029 limit_paren = self._match(TokenType.L_PAREN) 4030 expression = self._parse_term() if limit_paren else self._parse_number() 4031 4032 if limit_paren: 4033 self._match_r_paren() 4034 else: 4035 expression = self._parse_term() 4036 4037 if self._match(TokenType.COMMA): 4038 offset = expression 4039 expression = self._parse_term() 4040 else: 4041 offset = None 4042 4043 limit_exp = self.expression( 4044 exp.Limit, 4045 this=this, 4046 expression=expression, 4047 offset=offset, 4048 comments=comments, 4049 expressions=self._parse_limit_by(), 4050 ) 4051 4052 return limit_exp 4053 4054 if self._match(TokenType.FETCH): 4055 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 4056 direction = self._prev.text.upper() if direction else "FIRST" 4057 4058 count = self._parse_field(tokens=self.FETCH_TOKENS) 4059 percent = self._match(TokenType.PERCENT) 4060 4061 self._match_set((TokenType.ROW, TokenType.ROWS)) 4062 4063 only = self._match_text_seq("ONLY") 4064 with_ties = self._match_text_seq("WITH", "TIES") 4065 4066 if only and with_ties: 4067 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 4068 4069 return self.expression( 4070 exp.Fetch, 4071 direction=direction, 4072 count=count, 4073 percent=percent, 4074 with_ties=with_ties, 4075 ) 4076 4077 return this 4078 4079 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4080 if not self._match(TokenType.OFFSET): 4081 return this 4082 4083 count = self._parse_term() 4084 self._match_set((TokenType.ROW, TokenType.ROWS)) 4085 4086 return self.expression( 4087 exp.Offset, this=this, expression=count, expressions=self._parse_limit_by() 4088 ) 4089 4090 def _parse_limit_by(self) -> t.Optional[t.List[exp.Expression]]: 4091 return self._match_text_seq("BY") and self._parse_csv(self._parse_bitwise) 4092 4093 def _parse_locks(self) -> t.List[exp.Lock]: 4094 locks = [] 4095 while True: 4096 if self._match_text_seq("FOR", "UPDATE"): 4097 update = True 4098 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 4099 "LOCK", "IN", "SHARE", "MODE" 4100 ): 4101 update = False 4102 else: 4103 break 4104 4105 expressions = None 4106 if self._match_text_seq("OF"): 4107 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 4108 4109 wait: t.Optional[bool | exp.Expression] = None 4110 if self._match_text_seq("NOWAIT"): 4111 wait = True 4112 elif self._match_text_seq("WAIT"): 4113 wait = self._parse_primary() 4114 elif self._match_text_seq("SKIP", "LOCKED"): 4115 wait = False 4116 4117 locks.append( 4118 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 4119 ) 4120 4121 return locks 4122 4123 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4124 while this and self._match_set(self.SET_OPERATIONS): 4125 token_type = self._prev.token_type 4126 4127 if token_type == TokenType.UNION: 4128 operation: t.Type[exp.SetOperation] = exp.Union 4129 elif token_type == TokenType.EXCEPT: 4130 operation = exp.Except 4131 else: 4132 operation = exp.Intersect 4133 4134 comments = self._prev.comments 4135 distinct = self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL) 4136 by_name = self._match_text_seq("BY", "NAME") 4137 expression = self._parse_select(nested=True, parse_set_operation=False) 4138 4139 this = self.expression( 4140 operation, 4141 comments=comments, 4142 this=this, 4143 distinct=distinct, 4144 by_name=by_name, 4145 expression=expression, 4146 ) 4147 4148 if isinstance(this, exp.SetOperation) and self.MODIFIERS_ATTACHED_TO_SET_OP: 4149 expression = this.expression 4150 4151 if expression: 4152 for arg in self.SET_OP_MODIFIERS: 4153 expr = expression.args.get(arg) 4154 if expr: 4155 this.set(arg, expr.pop()) 4156 4157 return this 4158 4159 def _parse_expression(self) -> t.Optional[exp.Expression]: 4160 return self._parse_alias(self._parse_assignment()) 4161 4162 def _parse_assignment(self) -> t.Optional[exp.Expression]: 4163 this = self._parse_disjunction() 4164 if not this and self._next and self._next.token_type in self.ASSIGNMENT: 4165 # This allows us to parse <non-identifier token> := <expr> 4166 this = exp.column( 4167 t.cast(str, self._advance_any(ignore_reserved=True) and self._prev.text) 4168 ) 4169 4170 while self._match_set(self.ASSIGNMENT): 4171 this = self.expression( 4172 self.ASSIGNMENT[self._prev.token_type], 4173 this=this, 4174 comments=self._prev_comments, 4175 expression=self._parse_assignment(), 4176 ) 4177 4178 return this 4179 4180 def _parse_disjunction(self) -> t.Optional[exp.Expression]: 4181 return self._parse_tokens(self._parse_conjunction, self.DISJUNCTION) 4182 4183 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 4184 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 4185 4186 def _parse_equality(self) -> t.Optional[exp.Expression]: 4187 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 4188 4189 def _parse_comparison(self) -> t.Optional[exp.Expression]: 4190 return self._parse_tokens(self._parse_range, self.COMPARISON) 4191 4192 def _parse_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4193 this = this or self._parse_bitwise() 4194 negate = self._match(TokenType.NOT) 4195 4196 if self._match_set(self.RANGE_PARSERS): 4197 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 4198 if not expression: 4199 return this 4200 4201 this = expression 4202 elif self._match(TokenType.ISNULL): 4203 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4204 4205 # Postgres supports ISNULL and NOTNULL for conditions. 4206 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 4207 if self._match(TokenType.NOTNULL): 4208 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4209 this = self.expression(exp.Not, this=this) 4210 4211 if negate: 4212 this = self.expression(exp.Not, this=this) 4213 4214 if self._match(TokenType.IS): 4215 this = self._parse_is(this) 4216 4217 return this 4218 4219 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4220 index = self._index - 1 4221 negate = self._match(TokenType.NOT) 4222 4223 if self._match_text_seq("DISTINCT", "FROM"): 4224 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 4225 return self.expression(klass, this=this, expression=self._parse_bitwise()) 4226 4227 expression = self._parse_null() or self._parse_boolean() 4228 if not expression: 4229 self._retreat(index) 4230 return None 4231 4232 this = self.expression(exp.Is, this=this, expression=expression) 4233 return self.expression(exp.Not, this=this) if negate else this 4234 4235 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 4236 unnest = self._parse_unnest(with_alias=False) 4237 if unnest: 4238 this = self.expression(exp.In, this=this, unnest=unnest) 4239 elif self._match_set((TokenType.L_PAREN, TokenType.L_BRACKET)): 4240 matched_l_paren = self._prev.token_type == TokenType.L_PAREN 4241 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 4242 4243 if len(expressions) == 1 and isinstance(expressions[0], exp.Query): 4244 this = self.expression(exp.In, this=this, query=expressions[0].subquery(copy=False)) 4245 else: 4246 this = self.expression(exp.In, this=this, expressions=expressions) 4247 4248 if matched_l_paren: 4249 self._match_r_paren(this) 4250 elif not self._match(TokenType.R_BRACKET, expression=this): 4251 self.raise_error("Expecting ]") 4252 else: 4253 this = self.expression(exp.In, this=this, field=self._parse_field()) 4254 4255 return this 4256 4257 def _parse_between(self, this: t.Optional[exp.Expression]) -> exp.Between: 4258 low = self._parse_bitwise() 4259 self._match(TokenType.AND) 4260 high = self._parse_bitwise() 4261 return self.expression(exp.Between, this=this, low=low, high=high) 4262 4263 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4264 if not self._match(TokenType.ESCAPE): 4265 return this 4266 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 4267 4268 def _parse_interval(self, match_interval: bool = True) -> t.Optional[exp.Add | exp.Interval]: 4269 index = self._index 4270 4271 if not self._match(TokenType.INTERVAL) and match_interval: 4272 return None 4273 4274 if self._match(TokenType.STRING, advance=False): 4275 this = self._parse_primary() 4276 else: 4277 this = self._parse_term() 4278 4279 if not this or ( 4280 isinstance(this, exp.Column) 4281 and not this.table 4282 and not this.this.quoted 4283 and this.name.upper() == "IS" 4284 ): 4285 self._retreat(index) 4286 return None 4287 4288 unit = self._parse_function() or ( 4289 not self._match(TokenType.ALIAS, advance=False) 4290 and self._parse_var(any_token=True, upper=True) 4291 ) 4292 4293 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 4294 # each INTERVAL expression into this canonical form so it's easy to transpile 4295 if this and this.is_number: 4296 this = exp.Literal.string(this.to_py()) 4297 elif this and this.is_string: 4298 parts = exp.INTERVAL_STRING_RE.findall(this.name) 4299 if len(parts) == 1: 4300 if unit: 4301 # Unconsume the eagerly-parsed unit, since the real unit was part of the string 4302 self._retreat(self._index - 1) 4303 4304 this = exp.Literal.string(parts[0][0]) 4305 unit = self.expression(exp.Var, this=parts[0][1].upper()) 4306 4307 if self.INTERVAL_SPANS and self._match_text_seq("TO"): 4308 unit = self.expression( 4309 exp.IntervalSpan, this=unit, expression=self._parse_var(any_token=True, upper=True) 4310 ) 4311 4312 interval = self.expression(exp.Interval, this=this, unit=unit) 4313 4314 index = self._index 4315 self._match(TokenType.PLUS) 4316 4317 # Convert INTERVAL 'val_1' unit_1 [+] ... [+] 'val_n' unit_n into a sum of intervals 4318 if self._match_set((TokenType.STRING, TokenType.NUMBER), advance=False): 4319 return self.expression( 4320 exp.Add, this=interval, expression=self._parse_interval(match_interval=False) 4321 ) 4322 4323 self._retreat(index) 4324 return interval 4325 4326 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 4327 this = self._parse_term() 4328 4329 while True: 4330 if self._match_set(self.BITWISE): 4331 this = self.expression( 4332 self.BITWISE[self._prev.token_type], 4333 this=this, 4334 expression=self._parse_term(), 4335 ) 4336 elif self.dialect.DPIPE_IS_STRING_CONCAT and self._match(TokenType.DPIPE): 4337 this = self.expression( 4338 exp.DPipe, 4339 this=this, 4340 expression=self._parse_term(), 4341 safe=not self.dialect.STRICT_STRING_CONCAT, 4342 ) 4343 elif self._match(TokenType.DQMARK): 4344 this = self.expression(exp.Coalesce, this=this, expressions=self._parse_term()) 4345 elif self._match_pair(TokenType.LT, TokenType.LT): 4346 this = self.expression( 4347 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 4348 ) 4349 elif self._match_pair(TokenType.GT, TokenType.GT): 4350 this = self.expression( 4351 exp.BitwiseRightShift, this=this, expression=self._parse_term() 4352 ) 4353 else: 4354 break 4355 4356 return this 4357 4358 def _parse_term(self) -> t.Optional[exp.Expression]: 4359 return self._parse_tokens(self._parse_factor, self.TERM) 4360 4361 def _parse_factor(self) -> t.Optional[exp.Expression]: 4362 parse_method = self._parse_exponent if self.EXPONENT else self._parse_unary 4363 this = parse_method() 4364 4365 while self._match_set(self.FACTOR): 4366 klass = self.FACTOR[self._prev.token_type] 4367 comments = self._prev_comments 4368 expression = parse_method() 4369 4370 if not expression and klass is exp.IntDiv and self._prev.text.isalpha(): 4371 self._retreat(self._index - 1) 4372 return this 4373 4374 this = self.expression(klass, this=this, comments=comments, expression=expression) 4375 4376 if isinstance(this, exp.Div): 4377 this.args["typed"] = self.dialect.TYPED_DIVISION 4378 this.args["safe"] = self.dialect.SAFE_DIVISION 4379 4380 return this 4381 4382 def _parse_exponent(self) -> t.Optional[exp.Expression]: 4383 return self._parse_tokens(self._parse_unary, self.EXPONENT) 4384 4385 def _parse_unary(self) -> t.Optional[exp.Expression]: 4386 if self._match_set(self.UNARY_PARSERS): 4387 return self.UNARY_PARSERS[self._prev.token_type](self) 4388 return self._parse_at_time_zone(self._parse_type()) 4389 4390 def _parse_type( 4391 self, parse_interval: bool = True, fallback_to_identifier: bool = False 4392 ) -> t.Optional[exp.Expression]: 4393 interval = parse_interval and self._parse_interval() 4394 if interval: 4395 return interval 4396 4397 index = self._index 4398 data_type = self._parse_types(check_func=True, allow_identifiers=False) 4399 4400 # parse_types() returns a Cast if we parsed BQ's inline constructor <type>(<values>) e.g. 4401 # STRUCT<a INT, b STRING>(1, 'foo'), which is canonicalized to CAST(<values> AS <type>) 4402 if isinstance(data_type, exp.Cast): 4403 # This constructor can contain ops directly after it, for instance struct unnesting: 4404 # STRUCT<a INT, b STRING>(1, 'foo').* --> CAST(STRUCT(1, 'foo') AS STRUCT<a iNT, b STRING).* 4405 return self._parse_column_ops(data_type) 4406 4407 if data_type: 4408 index2 = self._index 4409 this = self._parse_primary() 4410 4411 if isinstance(this, exp.Literal): 4412 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 4413 if parser: 4414 return parser(self, this, data_type) 4415 4416 return self.expression(exp.Cast, this=this, to=data_type) 4417 4418 # The expressions arg gets set by the parser when we have something like DECIMAL(38, 0) 4419 # in the input SQL. In that case, we'll produce these tokens: DECIMAL ( 38 , 0 ) 4420 # 4421 # If the index difference here is greater than 1, that means the parser itself must have 4422 # consumed additional tokens such as the DECIMAL scale and precision in the above example. 4423 # 4424 # If it's not greater than 1, then it must be 1, because we've consumed at least the type 4425 # keyword, meaning that the expressions arg of the DataType must have gotten set by a 4426 # callable in the TYPE_CONVERTERS mapping. For example, Snowflake converts DECIMAL to 4427 # DECIMAL(38, 0)) in order to facilitate the data type's transpilation. 4428 # 4429 # In these cases, we don't really want to return the converted type, but instead retreat 4430 # and try to parse a Column or Identifier in the section below. 4431 if data_type.expressions and index2 - index > 1: 4432 self._retreat(index2) 4433 return self._parse_column_ops(data_type) 4434 4435 self._retreat(index) 4436 4437 if fallback_to_identifier: 4438 return self._parse_id_var() 4439 4440 this = self._parse_column() 4441 return this and self._parse_column_ops(this) 4442 4443 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 4444 this = self._parse_type() 4445 if not this: 4446 return None 4447 4448 if isinstance(this, exp.Column) and not this.table: 4449 this = exp.var(this.name.upper()) 4450 4451 return self.expression( 4452 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 4453 ) 4454 4455 def _parse_types( 4456 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 4457 ) -> t.Optional[exp.Expression]: 4458 index = self._index 4459 4460 this: t.Optional[exp.Expression] = None 4461 prefix = self._match_text_seq("SYSUDTLIB", ".") 4462 4463 if not self._match_set(self.TYPE_TOKENS): 4464 identifier = allow_identifiers and self._parse_id_var( 4465 any_token=False, tokens=(TokenType.VAR,) 4466 ) 4467 if isinstance(identifier, exp.Identifier): 4468 tokens = self.dialect.tokenize(identifier.sql(dialect=self.dialect)) 4469 4470 if len(tokens) != 1: 4471 self.raise_error("Unexpected identifier", self._prev) 4472 4473 if tokens[0].token_type in self.TYPE_TOKENS: 4474 self._prev = tokens[0] 4475 elif self.dialect.SUPPORTS_USER_DEFINED_TYPES: 4476 type_name = identifier.name 4477 4478 while self._match(TokenType.DOT): 4479 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 4480 4481 this = exp.DataType.build(type_name, udt=True) 4482 else: 4483 self._retreat(self._index - 1) 4484 return None 4485 else: 4486 return None 4487 4488 type_token = self._prev.token_type 4489 4490 if type_token == TokenType.PSEUDO_TYPE: 4491 return self.expression(exp.PseudoType, this=self._prev.text.upper()) 4492 4493 if type_token == TokenType.OBJECT_IDENTIFIER: 4494 return self.expression(exp.ObjectIdentifier, this=self._prev.text.upper()) 4495 4496 # https://materialize.com/docs/sql/types/map/ 4497 if type_token == TokenType.MAP and self._match(TokenType.L_BRACKET): 4498 key_type = self._parse_types( 4499 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4500 ) 4501 if not self._match(TokenType.FARROW): 4502 self._retreat(index) 4503 return None 4504 4505 value_type = self._parse_types( 4506 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4507 ) 4508 if not self._match(TokenType.R_BRACKET): 4509 self._retreat(index) 4510 return None 4511 4512 return exp.DataType( 4513 this=exp.DataType.Type.MAP, 4514 expressions=[key_type, value_type], 4515 nested=True, 4516 prefix=prefix, 4517 ) 4518 4519 nested = type_token in self.NESTED_TYPE_TOKENS 4520 is_struct = type_token in self.STRUCT_TYPE_TOKENS 4521 is_aggregate = type_token in self.AGGREGATE_TYPE_TOKENS 4522 expressions = None 4523 maybe_func = False 4524 4525 if self._match(TokenType.L_PAREN): 4526 if is_struct: 4527 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 4528 elif nested: 4529 expressions = self._parse_csv( 4530 lambda: self._parse_types( 4531 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4532 ) 4533 ) 4534 elif type_token in self.ENUM_TYPE_TOKENS: 4535 expressions = self._parse_csv(self._parse_equality) 4536 elif is_aggregate: 4537 func_or_ident = self._parse_function(anonymous=True) or self._parse_id_var( 4538 any_token=False, tokens=(TokenType.VAR,) 4539 ) 4540 if not func_or_ident or not self._match(TokenType.COMMA): 4541 return None 4542 expressions = self._parse_csv( 4543 lambda: self._parse_types( 4544 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4545 ) 4546 ) 4547 expressions.insert(0, func_or_ident) 4548 else: 4549 expressions = self._parse_csv(self._parse_type_size) 4550 4551 # https://docs.snowflake.com/en/sql-reference/data-types-vector 4552 if type_token == TokenType.VECTOR and len(expressions) == 2: 4553 expressions[0] = exp.DataType.build(expressions[0].name, dialect=self.dialect) 4554 4555 if not expressions or not self._match(TokenType.R_PAREN): 4556 self._retreat(index) 4557 return None 4558 4559 maybe_func = True 4560 4561 values: t.Optional[t.List[exp.Expression]] = None 4562 4563 if nested and self._match(TokenType.LT): 4564 if is_struct: 4565 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 4566 else: 4567 expressions = self._parse_csv( 4568 lambda: self._parse_types( 4569 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4570 ) 4571 ) 4572 4573 if not self._match(TokenType.GT): 4574 self.raise_error("Expecting >") 4575 4576 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 4577 values = self._parse_csv(self._parse_assignment) 4578 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 4579 4580 if type_token in self.TIMESTAMPS: 4581 if self._match_text_seq("WITH", "TIME", "ZONE"): 4582 maybe_func = False 4583 tz_type = ( 4584 exp.DataType.Type.TIMETZ 4585 if type_token in self.TIMES 4586 else exp.DataType.Type.TIMESTAMPTZ 4587 ) 4588 this = exp.DataType(this=tz_type, expressions=expressions) 4589 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 4590 maybe_func = False 4591 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 4592 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 4593 maybe_func = False 4594 elif type_token == TokenType.INTERVAL: 4595 unit = self._parse_var(upper=True) 4596 if unit: 4597 if self._match_text_seq("TO"): 4598 unit = exp.IntervalSpan(this=unit, expression=self._parse_var(upper=True)) 4599 4600 this = self.expression(exp.DataType, this=self.expression(exp.Interval, unit=unit)) 4601 else: 4602 this = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL) 4603 4604 if maybe_func and check_func: 4605 index2 = self._index 4606 peek = self._parse_string() 4607 4608 if not peek: 4609 self._retreat(index) 4610 return None 4611 4612 self._retreat(index2) 4613 4614 if not this: 4615 if self._match_text_seq("UNSIGNED"): 4616 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 4617 if not unsigned_type_token: 4618 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 4619 4620 type_token = unsigned_type_token or type_token 4621 4622 this = exp.DataType( 4623 this=exp.DataType.Type[type_token.value], 4624 expressions=expressions, 4625 nested=nested, 4626 prefix=prefix, 4627 ) 4628 4629 # Empty arrays/structs are allowed 4630 if values is not None: 4631 cls = exp.Struct if is_struct else exp.Array 4632 this = exp.cast(cls(expressions=values), this, copy=False) 4633 4634 elif expressions: 4635 this.set("expressions", expressions) 4636 4637 # https://materialize.com/docs/sql/types/list/#type-name 4638 while self._match(TokenType.LIST): 4639 this = exp.DataType(this=exp.DataType.Type.LIST, expressions=[this], nested=True) 4640 4641 index = self._index 4642 4643 # Postgres supports the INT ARRAY[3] syntax as a synonym for INT[3] 4644 matched_array = self._match(TokenType.ARRAY) 4645 4646 while self._curr: 4647 datatype_token = self._prev.token_type 4648 matched_l_bracket = self._match(TokenType.L_BRACKET) 4649 if not matched_l_bracket and not matched_array: 4650 break 4651 4652 matched_array = False 4653 values = self._parse_csv(self._parse_assignment) or None 4654 if ( 4655 values 4656 and not schema 4657 and ( 4658 not self.dialect.SUPPORTS_FIXED_SIZE_ARRAYS or datatype_token == TokenType.ARRAY 4659 ) 4660 ): 4661 # Retreating here means that we should not parse the following values as part of the data type, e.g. in DuckDB 4662 # ARRAY[1] should retreat and instead be parsed into exp.Array in contrast to INT[x][y] which denotes a fixed-size array data type 4663 self._retreat(index) 4664 break 4665 4666 this = exp.DataType( 4667 this=exp.DataType.Type.ARRAY, expressions=[this], values=values, nested=True 4668 ) 4669 self._match(TokenType.R_BRACKET) 4670 4671 if self.TYPE_CONVERTERS and isinstance(this.this, exp.DataType.Type): 4672 converter = self.TYPE_CONVERTERS.get(this.this) 4673 if converter: 4674 this = converter(t.cast(exp.DataType, this)) 4675 4676 return this 4677 4678 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 4679 index = self._index 4680 4681 if ( 4682 self._curr 4683 and self._next 4684 and self._curr.token_type in self.TYPE_TOKENS 4685 and self._next.token_type in self.TYPE_TOKENS 4686 ): 4687 # Takes care of special cases like `STRUCT<list ARRAY<...>>` where the identifier is also a 4688 # type token. Without this, the list will be parsed as a type and we'll eventually crash 4689 this = self._parse_id_var() 4690 else: 4691 this = ( 4692 self._parse_type(parse_interval=False, fallback_to_identifier=True) 4693 or self._parse_id_var() 4694 ) 4695 4696 self._match(TokenType.COLON) 4697 4698 if ( 4699 type_required 4700 and not isinstance(this, exp.DataType) 4701 and not self._match_set(self.TYPE_TOKENS, advance=False) 4702 ): 4703 self._retreat(index) 4704 return self._parse_types() 4705 4706 return self._parse_column_def(this) 4707 4708 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4709 if not self._match_text_seq("AT", "TIME", "ZONE"): 4710 return this 4711 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 4712 4713 def _parse_column(self) -> t.Optional[exp.Expression]: 4714 this = self._parse_column_reference() 4715 column = self._parse_column_ops(this) if this else self._parse_bracket(this) 4716 4717 if self.dialect.SUPPORTS_COLUMN_JOIN_MARKS and column: 4718 column.set("join_mark", self._match(TokenType.JOIN_MARKER)) 4719 4720 return column 4721 4722 def _parse_column_reference(self) -> t.Optional[exp.Expression]: 4723 this = self._parse_field() 4724 if ( 4725 not this 4726 and self._match(TokenType.VALUES, advance=False) 4727 and self.VALUES_FOLLOWED_BY_PAREN 4728 and (not self._next or self._next.token_type != TokenType.L_PAREN) 4729 ): 4730 this = self._parse_id_var() 4731 4732 if isinstance(this, exp.Identifier): 4733 # We bubble up comments from the Identifier to the Column 4734 this = self.expression(exp.Column, comments=this.pop_comments(), this=this) 4735 4736 return this 4737 4738 def _parse_colon_as_variant_extract( 4739 self, this: t.Optional[exp.Expression] 4740 ) -> t.Optional[exp.Expression]: 4741 casts = [] 4742 json_path = [] 4743 4744 while self._match(TokenType.COLON): 4745 start_index = self._index 4746 4747 # Snowflake allows reserved keywords as json keys but advance_any() excludes TokenType.SELECT from any_tokens=True 4748 path = self._parse_column_ops( 4749 self._parse_field(any_token=True, tokens=(TokenType.SELECT,)) 4750 ) 4751 4752 # The cast :: operator has a lower precedence than the extraction operator :, so 4753 # we rearrange the AST appropriately to avoid casting the JSON path 4754 while isinstance(path, exp.Cast): 4755 casts.append(path.to) 4756 path = path.this 4757 4758 if casts: 4759 dcolon_offset = next( 4760 i 4761 for i, t in enumerate(self._tokens[start_index:]) 4762 if t.token_type == TokenType.DCOLON 4763 ) 4764 end_token = self._tokens[start_index + dcolon_offset - 1] 4765 else: 4766 end_token = self._prev 4767 4768 if path: 4769 json_path.append(self._find_sql(self._tokens[start_index], end_token)) 4770 4771 # The VARIANT extract in Snowflake/Databricks is parsed as a JSONExtract; Snowflake uses the json_path in GET_PATH() while 4772 # Databricks transforms it back to the colon/dot notation 4773 if json_path: 4774 this = self.expression( 4775 exp.JSONExtract, 4776 this=this, 4777 expression=self.dialect.to_json_path(exp.Literal.string(".".join(json_path))), 4778 variant_extract=True, 4779 ) 4780 4781 while casts: 4782 this = self.expression(exp.Cast, this=this, to=casts.pop()) 4783 4784 return this 4785 4786 def _parse_dcolon(self) -> t.Optional[exp.Expression]: 4787 return self._parse_types() 4788 4789 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4790 this = self._parse_bracket(this) 4791 4792 while self._match_set(self.COLUMN_OPERATORS): 4793 op_token = self._prev.token_type 4794 op = self.COLUMN_OPERATORS.get(op_token) 4795 4796 if op_token == TokenType.DCOLON: 4797 field = self._parse_dcolon() 4798 if not field: 4799 self.raise_error("Expected type") 4800 elif op and self._curr: 4801 field = self._parse_column_reference() 4802 else: 4803 field = self._parse_field(any_token=True, anonymous_func=True) 4804 4805 if isinstance(field, exp.Func) and this: 4806 # bigquery allows function calls like x.y.count(...) 4807 # SAFE.SUBSTR(...) 4808 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 4809 this = exp.replace_tree( 4810 this, 4811 lambda n: ( 4812 self.expression(exp.Dot, this=n.args.get("table"), expression=n.this) 4813 if n.table 4814 else n.this 4815 ) 4816 if isinstance(n, exp.Column) 4817 else n, 4818 ) 4819 4820 if op: 4821 this = op(self, this, field) 4822 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 4823 this = self.expression( 4824 exp.Column, 4825 this=field, 4826 table=this.this, 4827 db=this.args.get("table"), 4828 catalog=this.args.get("db"), 4829 ) 4830 else: 4831 this = self.expression(exp.Dot, this=this, expression=field) 4832 4833 this = self._parse_bracket(this) 4834 4835 return self._parse_colon_as_variant_extract(this) if self.COLON_IS_VARIANT_EXTRACT else this 4836 4837 def _parse_primary(self) -> t.Optional[exp.Expression]: 4838 if self._match_set(self.PRIMARY_PARSERS): 4839 token_type = self._prev.token_type 4840 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 4841 4842 if token_type == TokenType.STRING: 4843 expressions = [primary] 4844 while self._match(TokenType.STRING): 4845 expressions.append(exp.Literal.string(self._prev.text)) 4846 4847 if len(expressions) > 1: 4848 return self.expression(exp.Concat, expressions=expressions) 4849 4850 return primary 4851 4852 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 4853 return exp.Literal.number(f"0.{self._prev.text}") 4854 4855 if self._match(TokenType.L_PAREN): 4856 comments = self._prev_comments 4857 query = self._parse_select() 4858 4859 if query: 4860 expressions = [query] 4861 else: 4862 expressions = self._parse_expressions() 4863 4864 this = self._parse_query_modifiers(seq_get(expressions, 0)) 4865 4866 if not this and self._match(TokenType.R_PAREN, advance=False): 4867 this = self.expression(exp.Tuple) 4868 elif isinstance(this, exp.UNWRAPPED_QUERIES): 4869 this = self._parse_subquery(this=this, parse_alias=False) 4870 elif isinstance(this, exp.Subquery): 4871 this = self._parse_subquery( 4872 this=self._parse_set_operations(this), parse_alias=False 4873 ) 4874 elif len(expressions) > 1 or self._prev.token_type == TokenType.COMMA: 4875 this = self.expression(exp.Tuple, expressions=expressions) 4876 else: 4877 this = self.expression(exp.Paren, this=this) 4878 4879 if this: 4880 this.add_comments(comments) 4881 4882 self._match_r_paren(expression=this) 4883 return this 4884 4885 return None 4886 4887 def _parse_field( 4888 self, 4889 any_token: bool = False, 4890 tokens: t.Optional[t.Collection[TokenType]] = None, 4891 anonymous_func: bool = False, 4892 ) -> t.Optional[exp.Expression]: 4893 if anonymous_func: 4894 field = ( 4895 self._parse_function(anonymous=anonymous_func, any_token=any_token) 4896 or self._parse_primary() 4897 ) 4898 else: 4899 field = self._parse_primary() or self._parse_function( 4900 anonymous=anonymous_func, any_token=any_token 4901 ) 4902 return field or self._parse_id_var(any_token=any_token, tokens=tokens) 4903 4904 def _parse_function( 4905 self, 4906 functions: t.Optional[t.Dict[str, t.Callable]] = None, 4907 anonymous: bool = False, 4908 optional_parens: bool = True, 4909 any_token: bool = False, 4910 ) -> t.Optional[exp.Expression]: 4911 # This allows us to also parse {fn <function>} syntax (Snowflake, MySQL support this) 4912 # See: https://community.snowflake.com/s/article/SQL-Escape-Sequences 4913 fn_syntax = False 4914 if ( 4915 self._match(TokenType.L_BRACE, advance=False) 4916 and self._next 4917 and self._next.text.upper() == "FN" 4918 ): 4919 self._advance(2) 4920 fn_syntax = True 4921 4922 func = self._parse_function_call( 4923 functions=functions, 4924 anonymous=anonymous, 4925 optional_parens=optional_parens, 4926 any_token=any_token, 4927 ) 4928 4929 if fn_syntax: 4930 self._match(TokenType.R_BRACE) 4931 4932 return func 4933 4934 def _parse_function_call( 4935 self, 4936 functions: t.Optional[t.Dict[str, t.Callable]] = None, 4937 anonymous: bool = False, 4938 optional_parens: bool = True, 4939 any_token: bool = False, 4940 ) -> t.Optional[exp.Expression]: 4941 if not self._curr: 4942 return None 4943 4944 comments = self._curr.comments 4945 token_type = self._curr.token_type 4946 this = self._curr.text 4947 upper = this.upper() 4948 4949 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 4950 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 4951 self._advance() 4952 return self._parse_window(parser(self)) 4953 4954 if not self._next or self._next.token_type != TokenType.L_PAREN: 4955 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 4956 self._advance() 4957 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 4958 4959 return None 4960 4961 if any_token: 4962 if token_type in self.RESERVED_TOKENS: 4963 return None 4964 elif token_type not in self.FUNC_TOKENS: 4965 return None 4966 4967 self._advance(2) 4968 4969 parser = self.FUNCTION_PARSERS.get(upper) 4970 if parser and not anonymous: 4971 this = parser(self) 4972 else: 4973 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 4974 4975 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 4976 this = self.expression(subquery_predicate, this=self._parse_select()) 4977 self._match_r_paren() 4978 return this 4979 4980 if functions is None: 4981 functions = self.FUNCTIONS 4982 4983 function = functions.get(upper) 4984 4985 alias = upper in self.FUNCTIONS_WITH_ALIASED_ARGS 4986 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 4987 4988 if alias: 4989 args = self._kv_to_prop_eq(args) 4990 4991 if function and not anonymous: 4992 if "dialect" in function.__code__.co_varnames: 4993 func = function(args, dialect=self.dialect) 4994 else: 4995 func = function(args) 4996 4997 func = self.validate_expression(func, args) 4998 if not self.dialect.NORMALIZE_FUNCTIONS: 4999 func.meta["name"] = this 5000 5001 this = func 5002 else: 5003 if token_type == TokenType.IDENTIFIER: 5004 this = exp.Identifier(this=this, quoted=True) 5005 this = self.expression(exp.Anonymous, this=this, expressions=args) 5006 5007 if isinstance(this, exp.Expression): 5008 this.add_comments(comments) 5009 5010 self._match_r_paren(this) 5011 return self._parse_window(this) 5012 5013 def _kv_to_prop_eq(self, expressions: t.List[exp.Expression]) -> t.List[exp.Expression]: 5014 transformed = [] 5015 5016 for e in expressions: 5017 if isinstance(e, self.KEY_VALUE_DEFINITIONS): 5018 if isinstance(e, exp.Alias): 5019 e = self.expression(exp.PropertyEQ, this=e.args.get("alias"), expression=e.this) 5020 5021 if not isinstance(e, exp.PropertyEQ): 5022 e = self.expression( 5023 exp.PropertyEQ, this=exp.to_identifier(e.this.name), expression=e.expression 5024 ) 5025 5026 if isinstance(e.this, exp.Column): 5027 e.this.replace(e.this.this) 5028 5029 transformed.append(e) 5030 5031 return transformed 5032 5033 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 5034 return self._parse_column_def(self._parse_id_var()) 5035 5036 def _parse_user_defined_function( 5037 self, kind: t.Optional[TokenType] = None 5038 ) -> t.Optional[exp.Expression]: 5039 this = self._parse_id_var() 5040 5041 while self._match(TokenType.DOT): 5042 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 5043 5044 if not self._match(TokenType.L_PAREN): 5045 return this 5046 5047 expressions = self._parse_csv(self._parse_function_parameter) 5048 self._match_r_paren() 5049 return self.expression( 5050 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 5051 ) 5052 5053 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 5054 literal = self._parse_primary() 5055 if literal: 5056 return self.expression(exp.Introducer, this=token.text, expression=literal) 5057 5058 return self.expression(exp.Identifier, this=token.text) 5059 5060 def _parse_session_parameter(self) -> exp.SessionParameter: 5061 kind = None 5062 this = self._parse_id_var() or self._parse_primary() 5063 5064 if this and self._match(TokenType.DOT): 5065 kind = this.name 5066 this = self._parse_var() or self._parse_primary() 5067 5068 return self.expression(exp.SessionParameter, this=this, kind=kind) 5069 5070 def _parse_lambda_arg(self) -> t.Optional[exp.Expression]: 5071 return self._parse_id_var() 5072 5073 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 5074 index = self._index 5075 5076 if self._match(TokenType.L_PAREN): 5077 expressions = t.cast( 5078 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_lambda_arg) 5079 ) 5080 5081 if not self._match(TokenType.R_PAREN): 5082 self._retreat(index) 5083 else: 5084 expressions = [self._parse_lambda_arg()] 5085 5086 if self._match_set(self.LAMBDAS): 5087 return self.LAMBDAS[self._prev.token_type](self, expressions) 5088 5089 self._retreat(index) 5090 5091 this: t.Optional[exp.Expression] 5092 5093 if self._match(TokenType.DISTINCT): 5094 this = self.expression( 5095 exp.Distinct, expressions=self._parse_csv(self._parse_assignment) 5096 ) 5097 else: 5098 this = self._parse_select_or_expression(alias=alias) 5099 5100 return self._parse_limit( 5101 self._parse_order(self._parse_having_max(self._parse_respect_or_ignore_nulls(this))) 5102 ) 5103 5104 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 5105 index = self._index 5106 if not self._match(TokenType.L_PAREN): 5107 return this 5108 5109 # Disambiguate between schema and subquery/CTE, e.g. in INSERT INTO table (<expr>), 5110 # expr can be of both types 5111 if self._match_set(self.SELECT_START_TOKENS): 5112 self._retreat(index) 5113 return this 5114 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 5115 self._match_r_paren() 5116 return self.expression(exp.Schema, this=this, expressions=args) 5117 5118 def _parse_field_def(self) -> t.Optional[exp.Expression]: 5119 return self._parse_column_def(self._parse_field(any_token=True)) 5120 5121 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5122 # column defs are not really columns, they're identifiers 5123 if isinstance(this, exp.Column): 5124 this = this.this 5125 5126 kind = self._parse_types(schema=True) 5127 5128 if self._match_text_seq("FOR", "ORDINALITY"): 5129 return self.expression(exp.ColumnDef, this=this, ordinality=True) 5130 5131 constraints: t.List[exp.Expression] = [] 5132 5133 if (not kind and self._match(TokenType.ALIAS)) or self._match_texts( 5134 ("ALIAS", "MATERIALIZED") 5135 ): 5136 persisted = self._prev.text.upper() == "MATERIALIZED" 5137 constraints.append( 5138 self.expression( 5139 exp.ComputedColumnConstraint, 5140 this=self._parse_assignment(), 5141 persisted=persisted or self._match_text_seq("PERSISTED"), 5142 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 5143 ) 5144 ) 5145 elif kind and self._match_pair(TokenType.ALIAS, TokenType.L_PAREN, advance=False): 5146 self._match(TokenType.ALIAS) 5147 constraints.append( 5148 self.expression(exp.TransformColumnConstraint, this=self._parse_field()) 5149 ) 5150 5151 while True: 5152 constraint = self._parse_column_constraint() 5153 if not constraint: 5154 break 5155 constraints.append(constraint) 5156 5157 if not kind and not constraints: 5158 return this 5159 5160 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 5161 5162 def _parse_auto_increment( 5163 self, 5164 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 5165 start = None 5166 increment = None 5167 5168 if self._match(TokenType.L_PAREN, advance=False): 5169 args = self._parse_wrapped_csv(self._parse_bitwise) 5170 start = seq_get(args, 0) 5171 increment = seq_get(args, 1) 5172 elif self._match_text_seq("START"): 5173 start = self._parse_bitwise() 5174 self._match_text_seq("INCREMENT") 5175 increment = self._parse_bitwise() 5176 5177 if start and increment: 5178 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 5179 5180 return exp.AutoIncrementColumnConstraint() 5181 5182 def _parse_auto_property(self) -> t.Optional[exp.AutoRefreshProperty]: 5183 if not self._match_text_seq("REFRESH"): 5184 self._retreat(self._index - 1) 5185 return None 5186 return self.expression(exp.AutoRefreshProperty, this=self._parse_var(upper=True)) 5187 5188 def _parse_compress(self) -> exp.CompressColumnConstraint: 5189 if self._match(TokenType.L_PAREN, advance=False): 5190 return self.expression( 5191 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 5192 ) 5193 5194 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 5195 5196 def _parse_generated_as_identity( 5197 self, 5198 ) -> ( 5199 exp.GeneratedAsIdentityColumnConstraint 5200 | exp.ComputedColumnConstraint 5201 | exp.GeneratedAsRowColumnConstraint 5202 ): 5203 if self._match_text_seq("BY", "DEFAULT"): 5204 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 5205 this = self.expression( 5206 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 5207 ) 5208 else: 5209 self._match_text_seq("ALWAYS") 5210 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 5211 5212 self._match(TokenType.ALIAS) 5213 5214 if self._match_text_seq("ROW"): 5215 start = self._match_text_seq("START") 5216 if not start: 5217 self._match(TokenType.END) 5218 hidden = self._match_text_seq("HIDDEN") 5219 return self.expression(exp.GeneratedAsRowColumnConstraint, start=start, hidden=hidden) 5220 5221 identity = self._match_text_seq("IDENTITY") 5222 5223 if self._match(TokenType.L_PAREN): 5224 if self._match(TokenType.START_WITH): 5225 this.set("start", self._parse_bitwise()) 5226 if self._match_text_seq("INCREMENT", "BY"): 5227 this.set("increment", self._parse_bitwise()) 5228 if self._match_text_seq("MINVALUE"): 5229 this.set("minvalue", self._parse_bitwise()) 5230 if self._match_text_seq("MAXVALUE"): 5231 this.set("maxvalue", self._parse_bitwise()) 5232 5233 if self._match_text_seq("CYCLE"): 5234 this.set("cycle", True) 5235 elif self._match_text_seq("NO", "CYCLE"): 5236 this.set("cycle", False) 5237 5238 if not identity: 5239 this.set("expression", self._parse_range()) 5240 elif not this.args.get("start") and self._match(TokenType.NUMBER, advance=False): 5241 args = self._parse_csv(self._parse_bitwise) 5242 this.set("start", seq_get(args, 0)) 5243 this.set("increment", seq_get(args, 1)) 5244 5245 self._match_r_paren() 5246 5247 return this 5248 5249 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 5250 self._match_text_seq("LENGTH") 5251 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 5252 5253 def _parse_not_constraint(self) -> t.Optional[exp.Expression]: 5254 if self._match_text_seq("NULL"): 5255 return self.expression(exp.NotNullColumnConstraint) 5256 if self._match_text_seq("CASESPECIFIC"): 5257 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 5258 if self._match_text_seq("FOR", "REPLICATION"): 5259 return self.expression(exp.NotForReplicationColumnConstraint) 5260 return None 5261 5262 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 5263 if self._match(TokenType.CONSTRAINT): 5264 this = self._parse_id_var() 5265 else: 5266 this = None 5267 5268 if self._match_texts(self.CONSTRAINT_PARSERS): 5269 return self.expression( 5270 exp.ColumnConstraint, 5271 this=this, 5272 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 5273 ) 5274 5275 return this 5276 5277 def _parse_constraint(self) -> t.Optional[exp.Expression]: 5278 if not self._match(TokenType.CONSTRAINT): 5279 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 5280 5281 return self.expression( 5282 exp.Constraint, 5283 this=self._parse_id_var(), 5284 expressions=self._parse_unnamed_constraints(), 5285 ) 5286 5287 def _parse_unnamed_constraints(self) -> t.List[exp.Expression]: 5288 constraints = [] 5289 while True: 5290 constraint = self._parse_unnamed_constraint() or self._parse_function() 5291 if not constraint: 5292 break 5293 constraints.append(constraint) 5294 5295 return constraints 5296 5297 def _parse_unnamed_constraint( 5298 self, constraints: t.Optional[t.Collection[str]] = None 5299 ) -> t.Optional[exp.Expression]: 5300 if self._match(TokenType.IDENTIFIER, advance=False) or not self._match_texts( 5301 constraints or self.CONSTRAINT_PARSERS 5302 ): 5303 return None 5304 5305 constraint = self._prev.text.upper() 5306 if constraint not in self.CONSTRAINT_PARSERS: 5307 self.raise_error(f"No parser found for schema constraint {constraint}.") 5308 5309 return self.CONSTRAINT_PARSERS[constraint](self) 5310 5311 def _parse_unique_key(self) -> t.Optional[exp.Expression]: 5312 return self._parse_id_var(any_token=False) 5313 5314 def _parse_unique(self) -> exp.UniqueColumnConstraint: 5315 self._match_text_seq("KEY") 5316 return self.expression( 5317 exp.UniqueColumnConstraint, 5318 nulls=self._match_text_seq("NULLS", "NOT", "DISTINCT"), 5319 this=self._parse_schema(self._parse_unique_key()), 5320 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 5321 on_conflict=self._parse_on_conflict(), 5322 ) 5323 5324 def _parse_key_constraint_options(self) -> t.List[str]: 5325 options = [] 5326 while True: 5327 if not self._curr: 5328 break 5329 5330 if self._match(TokenType.ON): 5331 action = None 5332 on = self._advance_any() and self._prev.text 5333 5334 if self._match_text_seq("NO", "ACTION"): 5335 action = "NO ACTION" 5336 elif self._match_text_seq("CASCADE"): 5337 action = "CASCADE" 5338 elif self._match_text_seq("RESTRICT"): 5339 action = "RESTRICT" 5340 elif self._match_pair(TokenType.SET, TokenType.NULL): 5341 action = "SET NULL" 5342 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 5343 action = "SET DEFAULT" 5344 else: 5345 self.raise_error("Invalid key constraint") 5346 5347 options.append(f"ON {on} {action}") 5348 else: 5349 var = self._parse_var_from_options( 5350 self.KEY_CONSTRAINT_OPTIONS, raise_unmatched=False 5351 ) 5352 if not var: 5353 break 5354 options.append(var.name) 5355 5356 return options 5357 5358 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 5359 if match and not self._match(TokenType.REFERENCES): 5360 return None 5361 5362 expressions = None 5363 this = self._parse_table(schema=True) 5364 options = self._parse_key_constraint_options() 5365 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 5366 5367 def _parse_foreign_key(self) -> exp.ForeignKey: 5368 expressions = self._parse_wrapped_id_vars() 5369 reference = self._parse_references() 5370 options = {} 5371 5372 while self._match(TokenType.ON): 5373 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 5374 self.raise_error("Expected DELETE or UPDATE") 5375 5376 kind = self._prev.text.lower() 5377 5378 if self._match_text_seq("NO", "ACTION"): 5379 action = "NO ACTION" 5380 elif self._match(TokenType.SET): 5381 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 5382 action = "SET " + self._prev.text.upper() 5383 else: 5384 self._advance() 5385 action = self._prev.text.upper() 5386 5387 options[kind] = action 5388 5389 return self.expression( 5390 exp.ForeignKey, 5391 expressions=expressions, 5392 reference=reference, 5393 **options, # type: ignore 5394 ) 5395 5396 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 5397 return self._parse_field() 5398 5399 def _parse_period_for_system_time(self) -> t.Optional[exp.PeriodForSystemTimeConstraint]: 5400 if not self._match(TokenType.TIMESTAMP_SNAPSHOT): 5401 self._retreat(self._index - 1) 5402 return None 5403 5404 id_vars = self._parse_wrapped_id_vars() 5405 return self.expression( 5406 exp.PeriodForSystemTimeConstraint, 5407 this=seq_get(id_vars, 0), 5408 expression=seq_get(id_vars, 1), 5409 ) 5410 5411 def _parse_primary_key( 5412 self, wrapped_optional: bool = False, in_props: bool = False 5413 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 5414 desc = ( 5415 self._match_set((TokenType.ASC, TokenType.DESC)) 5416 and self._prev.token_type == TokenType.DESC 5417 ) 5418 5419 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 5420 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 5421 5422 expressions = self._parse_wrapped_csv( 5423 self._parse_primary_key_part, optional=wrapped_optional 5424 ) 5425 options = self._parse_key_constraint_options() 5426 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 5427 5428 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 5429 return self._parse_slice(self._parse_alias(self._parse_assignment(), explicit=True)) 5430 5431 def _parse_bracket(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 5432 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 5433 return this 5434 5435 bracket_kind = self._prev.token_type 5436 expressions = self._parse_csv( 5437 lambda: self._parse_bracket_key_value(is_map=bracket_kind == TokenType.L_BRACE) 5438 ) 5439 5440 if bracket_kind == TokenType.L_BRACKET and not self._match(TokenType.R_BRACKET): 5441 self.raise_error("Expected ]") 5442 elif bracket_kind == TokenType.L_BRACE and not self._match(TokenType.R_BRACE): 5443 self.raise_error("Expected }") 5444 5445 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 5446 if bracket_kind == TokenType.L_BRACE: 5447 this = self.expression(exp.Struct, expressions=self._kv_to_prop_eq(expressions)) 5448 elif not this: 5449 this = build_array_constructor( 5450 exp.Array, args=expressions, bracket_kind=bracket_kind, dialect=self.dialect 5451 ) 5452 else: 5453 constructor_type = self.ARRAY_CONSTRUCTORS.get(this.name.upper()) 5454 if constructor_type: 5455 return build_array_constructor( 5456 constructor_type, 5457 args=expressions, 5458 bracket_kind=bracket_kind, 5459 dialect=self.dialect, 5460 ) 5461 5462 expressions = apply_index_offset(this, expressions, -self.dialect.INDEX_OFFSET) 5463 this = self.expression(exp.Bracket, this=this, expressions=expressions) 5464 5465 self._add_comments(this) 5466 return self._parse_bracket(this) 5467 5468 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5469 if self._match(TokenType.COLON): 5470 return self.expression(exp.Slice, this=this, expression=self._parse_assignment()) 5471 return this 5472 5473 def _parse_case(self) -> t.Optional[exp.Expression]: 5474 ifs = [] 5475 default = None 5476 5477 comments = self._prev_comments 5478 expression = self._parse_assignment() 5479 5480 while self._match(TokenType.WHEN): 5481 this = self._parse_assignment() 5482 self._match(TokenType.THEN) 5483 then = self._parse_assignment() 5484 ifs.append(self.expression(exp.If, this=this, true=then)) 5485 5486 if self._match(TokenType.ELSE): 5487 default = self._parse_assignment() 5488 5489 if not self._match(TokenType.END): 5490 if isinstance(default, exp.Interval) and default.this.sql().upper() == "END": 5491 default = exp.column("interval") 5492 else: 5493 self.raise_error("Expected END after CASE", self._prev) 5494 5495 return self.expression( 5496 exp.Case, comments=comments, this=expression, ifs=ifs, default=default 5497 ) 5498 5499 def _parse_if(self) -> t.Optional[exp.Expression]: 5500 if self._match(TokenType.L_PAREN): 5501 args = self._parse_csv(self._parse_assignment) 5502 this = self.validate_expression(exp.If.from_arg_list(args), args) 5503 self._match_r_paren() 5504 else: 5505 index = self._index - 1 5506 5507 if self.NO_PAREN_IF_COMMANDS and index == 0: 5508 return self._parse_as_command(self._prev) 5509 5510 condition = self._parse_assignment() 5511 5512 if not condition: 5513 self._retreat(index) 5514 return None 5515 5516 self._match(TokenType.THEN) 5517 true = self._parse_assignment() 5518 false = self._parse_assignment() if self._match(TokenType.ELSE) else None 5519 self._match(TokenType.END) 5520 this = self.expression(exp.If, this=condition, true=true, false=false) 5521 5522 return this 5523 5524 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 5525 if not self._match_text_seq("VALUE", "FOR"): 5526 self._retreat(self._index - 1) 5527 return None 5528 5529 return self.expression( 5530 exp.NextValueFor, 5531 this=self._parse_column(), 5532 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 5533 ) 5534 5535 def _parse_extract(self) -> exp.Extract | exp.Anonymous: 5536 this = self._parse_function() or self._parse_var_or_string(upper=True) 5537 5538 if self._match(TokenType.FROM): 5539 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 5540 5541 if not self._match(TokenType.COMMA): 5542 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 5543 5544 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 5545 5546 def _parse_gap_fill(self) -> exp.GapFill: 5547 self._match(TokenType.TABLE) 5548 this = self._parse_table() 5549 5550 self._match(TokenType.COMMA) 5551 args = [this, *self._parse_csv(self._parse_lambda)] 5552 5553 gap_fill = exp.GapFill.from_arg_list(args) 5554 return self.validate_expression(gap_fill, args) 5555 5556 def _parse_cast(self, strict: bool, safe: t.Optional[bool] = None) -> exp.Expression: 5557 this = self._parse_assignment() 5558 5559 if not self._match(TokenType.ALIAS): 5560 if self._match(TokenType.COMMA): 5561 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 5562 5563 self.raise_error("Expected AS after CAST") 5564 5565 fmt = None 5566 to = self._parse_types() 5567 5568 if self._match(TokenType.FORMAT): 5569 fmt_string = self._parse_string() 5570 fmt = self._parse_at_time_zone(fmt_string) 5571 5572 if not to: 5573 to = exp.DataType.build(exp.DataType.Type.UNKNOWN) 5574 if to.this in exp.DataType.TEMPORAL_TYPES: 5575 this = self.expression( 5576 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 5577 this=this, 5578 format=exp.Literal.string( 5579 format_time( 5580 fmt_string.this if fmt_string else "", 5581 self.dialect.FORMAT_MAPPING or self.dialect.TIME_MAPPING, 5582 self.dialect.FORMAT_TRIE or self.dialect.TIME_TRIE, 5583 ) 5584 ), 5585 safe=safe, 5586 ) 5587 5588 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 5589 this.set("zone", fmt.args["zone"]) 5590 return this 5591 elif not to: 5592 self.raise_error("Expected TYPE after CAST") 5593 elif isinstance(to, exp.Identifier): 5594 to = exp.DataType.build(to.name, udt=True) 5595 elif to.this == exp.DataType.Type.CHAR: 5596 if self._match(TokenType.CHARACTER_SET): 5597 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 5598 5599 return self.expression( 5600 exp.Cast if strict else exp.TryCast, 5601 this=this, 5602 to=to, 5603 format=fmt, 5604 safe=safe, 5605 action=self._parse_var_from_options(self.CAST_ACTIONS, raise_unmatched=False), 5606 ) 5607 5608 def _parse_string_agg(self) -> exp.Expression: 5609 if self._match(TokenType.DISTINCT): 5610 args: t.List[t.Optional[exp.Expression]] = [ 5611 self.expression(exp.Distinct, expressions=[self._parse_assignment()]) 5612 ] 5613 if self._match(TokenType.COMMA): 5614 args.extend(self._parse_csv(self._parse_assignment)) 5615 else: 5616 args = self._parse_csv(self._parse_assignment) # type: ignore 5617 5618 index = self._index 5619 if not self._match(TokenType.R_PAREN) and args: 5620 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 5621 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 5622 args[-1] = self._parse_limit(this=self._parse_order(this=args[-1])) 5623 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 5624 5625 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 5626 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 5627 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 5628 if not self._match_text_seq("WITHIN", "GROUP"): 5629 self._retreat(index) 5630 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 5631 5632 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 5633 order = self._parse_order(this=seq_get(args, 0)) 5634 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 5635 5636 def _parse_convert( 5637 self, strict: bool, safe: t.Optional[bool] = None 5638 ) -> t.Optional[exp.Expression]: 5639 this = self._parse_bitwise() 5640 5641 if self._match(TokenType.USING): 5642 to: t.Optional[exp.Expression] = self.expression( 5643 exp.CharacterSet, this=self._parse_var() 5644 ) 5645 elif self._match(TokenType.COMMA): 5646 to = self._parse_types() 5647 else: 5648 to = None 5649 5650 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, safe=safe) 5651 5652 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 5653 """ 5654 There are generally two variants of the DECODE function: 5655 5656 - DECODE(bin, charset) 5657 - DECODE(expression, search, result [, search, result] ... [, default]) 5658 5659 The second variant will always be parsed into a CASE expression. Note that NULL 5660 needs special treatment, since we need to explicitly check for it with `IS NULL`, 5661 instead of relying on pattern matching. 5662 """ 5663 args = self._parse_csv(self._parse_assignment) 5664 5665 if len(args) < 3: 5666 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 5667 5668 expression, *expressions = args 5669 if not expression: 5670 return None 5671 5672 ifs = [] 5673 for search, result in zip(expressions[::2], expressions[1::2]): 5674 if not search or not result: 5675 return None 5676 5677 if isinstance(search, exp.Literal): 5678 ifs.append( 5679 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 5680 ) 5681 elif isinstance(search, exp.Null): 5682 ifs.append( 5683 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 5684 ) 5685 else: 5686 cond = exp.or_( 5687 exp.EQ(this=expression.copy(), expression=search), 5688 exp.and_( 5689 exp.Is(this=expression.copy(), expression=exp.Null()), 5690 exp.Is(this=search.copy(), expression=exp.Null()), 5691 copy=False, 5692 ), 5693 copy=False, 5694 ) 5695 ifs.append(exp.If(this=cond, true=result)) 5696 5697 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 5698 5699 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 5700 self._match_text_seq("KEY") 5701 key = self._parse_column() 5702 self._match_set(self.JSON_KEY_VALUE_SEPARATOR_TOKENS) 5703 self._match_text_seq("VALUE") 5704 value = self._parse_bitwise() 5705 5706 if not key and not value: 5707 return None 5708 return self.expression(exp.JSONKeyValue, this=key, expression=value) 5709 5710 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5711 if not this or not self._match_text_seq("FORMAT", "JSON"): 5712 return this 5713 5714 return self.expression(exp.FormatJson, this=this) 5715 5716 def _parse_on_handling(self, on: str, *values: str) -> t.Optional[str]: 5717 # Parses the "X ON Y" syntax, i.e. NULL ON NULL (Oracle, T-SQL) 5718 for value in values: 5719 if self._match_text_seq(value, "ON", on): 5720 return f"{value} ON {on}" 5721 5722 return None 5723 5724 @t.overload 5725 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 5726 5727 @t.overload 5728 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 5729 5730 def _parse_json_object(self, agg=False): 5731 star = self._parse_star() 5732 expressions = ( 5733 [star] 5734 if star 5735 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 5736 ) 5737 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 5738 5739 unique_keys = None 5740 if self._match_text_seq("WITH", "UNIQUE"): 5741 unique_keys = True 5742 elif self._match_text_seq("WITHOUT", "UNIQUE"): 5743 unique_keys = False 5744 5745 self._match_text_seq("KEYS") 5746 5747 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 5748 self._parse_type() 5749 ) 5750 encoding = self._match_text_seq("ENCODING") and self._parse_var() 5751 5752 return self.expression( 5753 exp.JSONObjectAgg if agg else exp.JSONObject, 5754 expressions=expressions, 5755 null_handling=null_handling, 5756 unique_keys=unique_keys, 5757 return_type=return_type, 5758 encoding=encoding, 5759 ) 5760 5761 # Note: this is currently incomplete; it only implements the "JSON_value_column" part 5762 def _parse_json_column_def(self) -> exp.JSONColumnDef: 5763 if not self._match_text_seq("NESTED"): 5764 this = self._parse_id_var() 5765 kind = self._parse_types(allow_identifiers=False) 5766 nested = None 5767 else: 5768 this = None 5769 kind = None 5770 nested = True 5771 5772 path = self._match_text_seq("PATH") and self._parse_string() 5773 nested_schema = nested and self._parse_json_schema() 5774 5775 return self.expression( 5776 exp.JSONColumnDef, 5777 this=this, 5778 kind=kind, 5779 path=path, 5780 nested_schema=nested_schema, 5781 ) 5782 5783 def _parse_json_schema(self) -> exp.JSONSchema: 5784 self._match_text_seq("COLUMNS") 5785 return self.expression( 5786 exp.JSONSchema, 5787 expressions=self._parse_wrapped_csv(self._parse_json_column_def, optional=True), 5788 ) 5789 5790 def _parse_json_table(self) -> exp.JSONTable: 5791 this = self._parse_format_json(self._parse_bitwise()) 5792 path = self._match(TokenType.COMMA) and self._parse_string() 5793 error_handling = self._parse_on_handling("ERROR", "ERROR", "NULL") 5794 empty_handling = self._parse_on_handling("EMPTY", "ERROR", "NULL") 5795 schema = self._parse_json_schema() 5796 5797 return exp.JSONTable( 5798 this=this, 5799 schema=schema, 5800 path=path, 5801 error_handling=error_handling, 5802 empty_handling=empty_handling, 5803 ) 5804 5805 def _parse_match_against(self) -> exp.MatchAgainst: 5806 expressions = self._parse_csv(self._parse_column) 5807 5808 self._match_text_seq(")", "AGAINST", "(") 5809 5810 this = self._parse_string() 5811 5812 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 5813 modifier = "IN NATURAL LANGUAGE MODE" 5814 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 5815 modifier = f"{modifier} WITH QUERY EXPANSION" 5816 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 5817 modifier = "IN BOOLEAN MODE" 5818 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 5819 modifier = "WITH QUERY EXPANSION" 5820 else: 5821 modifier = None 5822 5823 return self.expression( 5824 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 5825 ) 5826 5827 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 5828 def _parse_open_json(self) -> exp.OpenJSON: 5829 this = self._parse_bitwise() 5830 path = self._match(TokenType.COMMA) and self._parse_string() 5831 5832 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 5833 this = self._parse_field(any_token=True) 5834 kind = self._parse_types() 5835 path = self._parse_string() 5836 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 5837 5838 return self.expression( 5839 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 5840 ) 5841 5842 expressions = None 5843 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 5844 self._match_l_paren() 5845 expressions = self._parse_csv(_parse_open_json_column_def) 5846 5847 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 5848 5849 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 5850 args = self._parse_csv(self._parse_bitwise) 5851 5852 if self._match(TokenType.IN): 5853 return self.expression( 5854 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 5855 ) 5856 5857 if haystack_first: 5858 haystack = seq_get(args, 0) 5859 needle = seq_get(args, 1) 5860 else: 5861 needle = seq_get(args, 0) 5862 haystack = seq_get(args, 1) 5863 5864 return self.expression( 5865 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 5866 ) 5867 5868 def _parse_predict(self) -> exp.Predict: 5869 self._match_text_seq("MODEL") 5870 this = self._parse_table() 5871 5872 self._match(TokenType.COMMA) 5873 self._match_text_seq("TABLE") 5874 5875 return self.expression( 5876 exp.Predict, 5877 this=this, 5878 expression=self._parse_table(), 5879 params_struct=self._match(TokenType.COMMA) and self._parse_bitwise(), 5880 ) 5881 5882 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 5883 args = self._parse_csv(self._parse_table) 5884 return exp.JoinHint(this=func_name.upper(), expressions=args) 5885 5886 def _parse_substring(self) -> exp.Substring: 5887 # Postgres supports the form: substring(string [from int] [for int]) 5888 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 5889 5890 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 5891 5892 if self._match(TokenType.FROM): 5893 args.append(self._parse_bitwise()) 5894 if self._match(TokenType.FOR): 5895 if len(args) == 1: 5896 args.append(exp.Literal.number(1)) 5897 args.append(self._parse_bitwise()) 5898 5899 return self.validate_expression(exp.Substring.from_arg_list(args), args) 5900 5901 def _parse_trim(self) -> exp.Trim: 5902 # https://www.w3resource.com/sql/character-functions/trim.php 5903 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 5904 5905 position = None 5906 collation = None 5907 expression = None 5908 5909 if self._match_texts(self.TRIM_TYPES): 5910 position = self._prev.text.upper() 5911 5912 this = self._parse_bitwise() 5913 if self._match_set((TokenType.FROM, TokenType.COMMA)): 5914 invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST 5915 expression = self._parse_bitwise() 5916 5917 if invert_order: 5918 this, expression = expression, this 5919 5920 if self._match(TokenType.COLLATE): 5921 collation = self._parse_bitwise() 5922 5923 return self.expression( 5924 exp.Trim, this=this, position=position, expression=expression, collation=collation 5925 ) 5926 5927 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 5928 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 5929 5930 def _parse_named_window(self) -> t.Optional[exp.Expression]: 5931 return self._parse_window(self._parse_id_var(), alias=True) 5932 5933 def _parse_respect_or_ignore_nulls( 5934 self, this: t.Optional[exp.Expression] 5935 ) -> t.Optional[exp.Expression]: 5936 if self._match_text_seq("IGNORE", "NULLS"): 5937 return self.expression(exp.IgnoreNulls, this=this) 5938 if self._match_text_seq("RESPECT", "NULLS"): 5939 return self.expression(exp.RespectNulls, this=this) 5940 return this 5941 5942 def _parse_having_max(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5943 if self._match(TokenType.HAVING): 5944 self._match_texts(("MAX", "MIN")) 5945 max = self._prev.text.upper() != "MIN" 5946 return self.expression( 5947 exp.HavingMax, this=this, expression=self._parse_column(), max=max 5948 ) 5949 5950 return this 5951 5952 def _parse_window( 5953 self, this: t.Optional[exp.Expression], alias: bool = False 5954 ) -> t.Optional[exp.Expression]: 5955 func = this 5956 comments = func.comments if isinstance(func, exp.Expression) else None 5957 5958 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 5959 self._match(TokenType.WHERE) 5960 this = self.expression( 5961 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 5962 ) 5963 self._match_r_paren() 5964 5965 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 5966 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 5967 if self._match_text_seq("WITHIN", "GROUP"): 5968 order = self._parse_wrapped(self._parse_order) 5969 this = self.expression(exp.WithinGroup, this=this, expression=order) 5970 5971 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 5972 # Some dialects choose to implement and some do not. 5973 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 5974 5975 # There is some code above in _parse_lambda that handles 5976 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 5977 5978 # The below changes handle 5979 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 5980 5981 # Oracle allows both formats 5982 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 5983 # and Snowflake chose to do the same for familiarity 5984 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 5985 if isinstance(this, exp.AggFunc): 5986 ignore_respect = this.find(exp.IgnoreNulls, exp.RespectNulls) 5987 5988 if ignore_respect and ignore_respect is not this: 5989 ignore_respect.replace(ignore_respect.this) 5990 this = self.expression(ignore_respect.__class__, this=this) 5991 5992 this = self._parse_respect_or_ignore_nulls(this) 5993 5994 # bigquery select from window x AS (partition by ...) 5995 if alias: 5996 over = None 5997 self._match(TokenType.ALIAS) 5998 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 5999 return this 6000 else: 6001 over = self._prev.text.upper() 6002 6003 if comments and isinstance(func, exp.Expression): 6004 func.pop_comments() 6005 6006 if not self._match(TokenType.L_PAREN): 6007 return self.expression( 6008 exp.Window, 6009 comments=comments, 6010 this=this, 6011 alias=self._parse_id_var(False), 6012 over=over, 6013 ) 6014 6015 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 6016 6017 first = self._match(TokenType.FIRST) 6018 if self._match_text_seq("LAST"): 6019 first = False 6020 6021 partition, order = self._parse_partition_and_order() 6022 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 6023 6024 if kind: 6025 self._match(TokenType.BETWEEN) 6026 start = self._parse_window_spec() 6027 self._match(TokenType.AND) 6028 end = self._parse_window_spec() 6029 6030 spec = self.expression( 6031 exp.WindowSpec, 6032 kind=kind, 6033 start=start["value"], 6034 start_side=start["side"], 6035 end=end["value"], 6036 end_side=end["side"], 6037 ) 6038 else: 6039 spec = None 6040 6041 self._match_r_paren() 6042 6043 window = self.expression( 6044 exp.Window, 6045 comments=comments, 6046 this=this, 6047 partition_by=partition, 6048 order=order, 6049 spec=spec, 6050 alias=window_alias, 6051 over=over, 6052 first=first, 6053 ) 6054 6055 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 6056 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 6057 return self._parse_window(window, alias=alias) 6058 6059 return window 6060 6061 def _parse_partition_and_order( 6062 self, 6063 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 6064 return self._parse_partition_by(), self._parse_order() 6065 6066 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 6067 self._match(TokenType.BETWEEN) 6068 6069 return { 6070 "value": ( 6071 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 6072 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 6073 or self._parse_bitwise() 6074 ), 6075 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 6076 } 6077 6078 def _parse_alias( 6079 self, this: t.Optional[exp.Expression], explicit: bool = False 6080 ) -> t.Optional[exp.Expression]: 6081 any_token = self._match(TokenType.ALIAS) 6082 comments = self._prev_comments or [] 6083 6084 if explicit and not any_token: 6085 return this 6086 6087 if self._match(TokenType.L_PAREN): 6088 aliases = self.expression( 6089 exp.Aliases, 6090 comments=comments, 6091 this=this, 6092 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 6093 ) 6094 self._match_r_paren(aliases) 6095 return aliases 6096 6097 alias = self._parse_id_var(any_token, tokens=self.ALIAS_TOKENS) or ( 6098 self.STRING_ALIASES and self._parse_string_as_identifier() 6099 ) 6100 6101 if alias: 6102 comments.extend(alias.pop_comments()) 6103 this = self.expression(exp.Alias, comments=comments, this=this, alias=alias) 6104 column = this.this 6105 6106 # Moves the comment next to the alias in `expr /* comment */ AS alias` 6107 if not this.comments and column and column.comments: 6108 this.comments = column.pop_comments() 6109 6110 return this 6111 6112 def _parse_id_var( 6113 self, 6114 any_token: bool = True, 6115 tokens: t.Optional[t.Collection[TokenType]] = None, 6116 ) -> t.Optional[exp.Expression]: 6117 expression = self._parse_identifier() 6118 if not expression and ( 6119 (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS) 6120 ): 6121 quoted = self._prev.token_type == TokenType.STRING 6122 expression = self.expression(exp.Identifier, this=self._prev.text, quoted=quoted) 6123 6124 return expression 6125 6126 def _parse_string(self) -> t.Optional[exp.Expression]: 6127 if self._match_set(self.STRING_PARSERS): 6128 return self.STRING_PARSERS[self._prev.token_type](self, self._prev) 6129 return self._parse_placeholder() 6130 6131 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 6132 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 6133 6134 def _parse_number(self) -> t.Optional[exp.Expression]: 6135 if self._match_set(self.NUMERIC_PARSERS): 6136 return self.NUMERIC_PARSERS[self._prev.token_type](self, self._prev) 6137 return self._parse_placeholder() 6138 6139 def _parse_identifier(self) -> t.Optional[exp.Expression]: 6140 if self._match(TokenType.IDENTIFIER): 6141 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 6142 return self._parse_placeholder() 6143 6144 def _parse_var( 6145 self, 6146 any_token: bool = False, 6147 tokens: t.Optional[t.Collection[TokenType]] = None, 6148 upper: bool = False, 6149 ) -> t.Optional[exp.Expression]: 6150 if ( 6151 (any_token and self._advance_any()) 6152 or self._match(TokenType.VAR) 6153 or (self._match_set(tokens) if tokens else False) 6154 ): 6155 return self.expression( 6156 exp.Var, this=self._prev.text.upper() if upper else self._prev.text 6157 ) 6158 return self._parse_placeholder() 6159 6160 def _advance_any(self, ignore_reserved: bool = False) -> t.Optional[Token]: 6161 if self._curr and (ignore_reserved or self._curr.token_type not in self.RESERVED_TOKENS): 6162 self._advance() 6163 return self._prev 6164 return None 6165 6166 def _parse_var_or_string(self, upper: bool = False) -> t.Optional[exp.Expression]: 6167 return self._parse_string() or self._parse_var(any_token=True, upper=upper) 6168 6169 def _parse_primary_or_var(self) -> t.Optional[exp.Expression]: 6170 return self._parse_primary() or self._parse_var(any_token=True) 6171 6172 def _parse_null(self) -> t.Optional[exp.Expression]: 6173 if self._match_set(self.NULL_TOKENS): 6174 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 6175 return self._parse_placeholder() 6176 6177 def _parse_boolean(self) -> t.Optional[exp.Expression]: 6178 if self._match(TokenType.TRUE): 6179 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 6180 if self._match(TokenType.FALSE): 6181 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 6182 return self._parse_placeholder() 6183 6184 def _parse_star(self) -> t.Optional[exp.Expression]: 6185 if self._match(TokenType.STAR): 6186 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 6187 return self._parse_placeholder() 6188 6189 def _parse_parameter(self) -> exp.Parameter: 6190 this = self._parse_identifier() or self._parse_primary_or_var() 6191 return self.expression(exp.Parameter, this=this) 6192 6193 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 6194 if self._match_set(self.PLACEHOLDER_PARSERS): 6195 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 6196 if placeholder: 6197 return placeholder 6198 self._advance(-1) 6199 return None 6200 6201 def _parse_star_op(self, *keywords: str) -> t.Optional[t.List[exp.Expression]]: 6202 if not self._match_texts(keywords): 6203 return None 6204 if self._match(TokenType.L_PAREN, advance=False): 6205 return self._parse_wrapped_csv(self._parse_expression) 6206 6207 expression = self._parse_expression() 6208 return [expression] if expression else None 6209 6210 def _parse_csv( 6211 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 6212 ) -> t.List[exp.Expression]: 6213 parse_result = parse_method() 6214 items = [parse_result] if parse_result is not None else [] 6215 6216 while self._match(sep): 6217 self._add_comments(parse_result) 6218 parse_result = parse_method() 6219 if parse_result is not None: 6220 items.append(parse_result) 6221 6222 return items 6223 6224 def _parse_tokens( 6225 self, parse_method: t.Callable, expressions: t.Dict 6226 ) -> t.Optional[exp.Expression]: 6227 this = parse_method() 6228 6229 while self._match_set(expressions): 6230 this = self.expression( 6231 expressions[self._prev.token_type], 6232 this=this, 6233 comments=self._prev_comments, 6234 expression=parse_method(), 6235 ) 6236 6237 return this 6238 6239 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 6240 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 6241 6242 def _parse_wrapped_csv( 6243 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 6244 ) -> t.List[exp.Expression]: 6245 return self._parse_wrapped( 6246 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 6247 ) 6248 6249 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 6250 wrapped = self._match(TokenType.L_PAREN) 6251 if not wrapped and not optional: 6252 self.raise_error("Expecting (") 6253 parse_result = parse_method() 6254 if wrapped: 6255 self._match_r_paren() 6256 return parse_result 6257 6258 def _parse_expressions(self) -> t.List[exp.Expression]: 6259 return self._parse_csv(self._parse_expression) 6260 6261 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 6262 return self._parse_select() or self._parse_set_operations( 6263 self._parse_expression() if alias else self._parse_assignment() 6264 ) 6265 6266 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 6267 return self._parse_query_modifiers( 6268 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 6269 ) 6270 6271 def _parse_transaction(self) -> exp.Transaction | exp.Command: 6272 this = None 6273 if self._match_texts(self.TRANSACTION_KIND): 6274 this = self._prev.text 6275 6276 self._match_texts(("TRANSACTION", "WORK")) 6277 6278 modes = [] 6279 while True: 6280 mode = [] 6281 while self._match(TokenType.VAR): 6282 mode.append(self._prev.text) 6283 6284 if mode: 6285 modes.append(" ".join(mode)) 6286 if not self._match(TokenType.COMMA): 6287 break 6288 6289 return self.expression(exp.Transaction, this=this, modes=modes) 6290 6291 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 6292 chain = None 6293 savepoint = None 6294 is_rollback = self._prev.token_type == TokenType.ROLLBACK 6295 6296 self._match_texts(("TRANSACTION", "WORK")) 6297 6298 if self._match_text_seq("TO"): 6299 self._match_text_seq("SAVEPOINT") 6300 savepoint = self._parse_id_var() 6301 6302 if self._match(TokenType.AND): 6303 chain = not self._match_text_seq("NO") 6304 self._match_text_seq("CHAIN") 6305 6306 if is_rollback: 6307 return self.expression(exp.Rollback, savepoint=savepoint) 6308 6309 return self.expression(exp.Commit, chain=chain) 6310 6311 def _parse_refresh(self) -> exp.Refresh: 6312 self._match(TokenType.TABLE) 6313 return self.expression(exp.Refresh, this=self._parse_string() or self._parse_table()) 6314 6315 def _parse_add_column(self) -> t.Optional[exp.Expression]: 6316 if not self._match_text_seq("ADD"): 6317 return None 6318 6319 self._match(TokenType.COLUMN) 6320 exists_column = self._parse_exists(not_=True) 6321 expression = self._parse_field_def() 6322 6323 if expression: 6324 expression.set("exists", exists_column) 6325 6326 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 6327 if self._match_texts(("FIRST", "AFTER")): 6328 position = self._prev.text 6329 column_position = self.expression( 6330 exp.ColumnPosition, this=self._parse_column(), position=position 6331 ) 6332 expression.set("position", column_position) 6333 6334 return expression 6335 6336 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 6337 drop = self._match(TokenType.DROP) and self._parse_drop() 6338 if drop and not isinstance(drop, exp.Command): 6339 drop.set("kind", drop.args.get("kind", "COLUMN")) 6340 return drop 6341 6342 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 6343 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 6344 return self.expression( 6345 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 6346 ) 6347 6348 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 6349 index = self._index - 1 6350 6351 if self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False): 6352 return self._parse_csv( 6353 lambda: self.expression( 6354 exp.AddConstraint, expressions=self._parse_csv(self._parse_constraint) 6355 ) 6356 ) 6357 6358 self._retreat(index) 6359 if not self.ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN and self._match_text_seq("ADD"): 6360 return self._parse_wrapped_csv(self._parse_field_def, optional=True) 6361 6362 if self._match_text_seq("ADD", "COLUMNS"): 6363 schema = self._parse_schema() 6364 if schema: 6365 return [schema] 6366 return [] 6367 6368 return self._parse_wrapped_csv(self._parse_add_column, optional=True) 6369 6370 def _parse_alter_table_alter(self) -> t.Optional[exp.Expression]: 6371 if self._match_texts(self.ALTER_ALTER_PARSERS): 6372 return self.ALTER_ALTER_PARSERS[self._prev.text.upper()](self) 6373 6374 # Many dialects support the ALTER [COLUMN] syntax, so if there is no 6375 # keyword after ALTER we default to parsing this statement 6376 self._match(TokenType.COLUMN) 6377 column = self._parse_field(any_token=True) 6378 6379 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 6380 return self.expression(exp.AlterColumn, this=column, drop=True) 6381 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 6382 return self.expression(exp.AlterColumn, this=column, default=self._parse_assignment()) 6383 if self._match(TokenType.COMMENT): 6384 return self.expression(exp.AlterColumn, this=column, comment=self._parse_string()) 6385 if self._match_text_seq("DROP", "NOT", "NULL"): 6386 return self.expression( 6387 exp.AlterColumn, 6388 this=column, 6389 drop=True, 6390 allow_null=True, 6391 ) 6392 if self._match_text_seq("SET", "NOT", "NULL"): 6393 return self.expression( 6394 exp.AlterColumn, 6395 this=column, 6396 allow_null=False, 6397 ) 6398 self._match_text_seq("SET", "DATA") 6399 self._match_text_seq("TYPE") 6400 return self.expression( 6401 exp.AlterColumn, 6402 this=column, 6403 dtype=self._parse_types(), 6404 collate=self._match(TokenType.COLLATE) and self._parse_term(), 6405 using=self._match(TokenType.USING) and self._parse_assignment(), 6406 ) 6407 6408 def _parse_alter_diststyle(self) -> exp.AlterDistStyle: 6409 if self._match_texts(("ALL", "EVEN", "AUTO")): 6410 return self.expression(exp.AlterDistStyle, this=exp.var(self._prev.text.upper())) 6411 6412 self._match_text_seq("KEY", "DISTKEY") 6413 return self.expression(exp.AlterDistStyle, this=self._parse_column()) 6414 6415 def _parse_alter_sortkey(self, compound: t.Optional[bool] = None) -> exp.AlterSortKey: 6416 if compound: 6417 self._match_text_seq("SORTKEY") 6418 6419 if self._match(TokenType.L_PAREN, advance=False): 6420 return self.expression( 6421 exp.AlterSortKey, expressions=self._parse_wrapped_id_vars(), compound=compound 6422 ) 6423 6424 self._match_texts(("AUTO", "NONE")) 6425 return self.expression( 6426 exp.AlterSortKey, this=exp.var(self._prev.text.upper()), compound=compound 6427 ) 6428 6429 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 6430 index = self._index - 1 6431 6432 partition_exists = self._parse_exists() 6433 if self._match(TokenType.PARTITION, advance=False): 6434 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 6435 6436 self._retreat(index) 6437 return self._parse_csv(self._parse_drop_column) 6438 6439 def _parse_alter_table_rename(self) -> t.Optional[exp.RenameTable | exp.RenameColumn]: 6440 if self._match(TokenType.COLUMN): 6441 exists = self._parse_exists() 6442 old_column = self._parse_column() 6443 to = self._match_text_seq("TO") 6444 new_column = self._parse_column() 6445 6446 if old_column is None or to is None or new_column is None: 6447 return None 6448 6449 return self.expression(exp.RenameColumn, this=old_column, to=new_column, exists=exists) 6450 6451 self._match_text_seq("TO") 6452 return self.expression(exp.RenameTable, this=self._parse_table(schema=True)) 6453 6454 def _parse_alter_table_set(self) -> exp.AlterSet: 6455 alter_set = self.expression(exp.AlterSet) 6456 6457 if self._match(TokenType.L_PAREN, advance=False) or self._match_text_seq( 6458 "TABLE", "PROPERTIES" 6459 ): 6460 alter_set.set("expressions", self._parse_wrapped_csv(self._parse_assignment)) 6461 elif self._match_text_seq("FILESTREAM_ON", advance=False): 6462 alter_set.set("expressions", [self._parse_assignment()]) 6463 elif self._match_texts(("LOGGED", "UNLOGGED")): 6464 alter_set.set("option", exp.var(self._prev.text.upper())) 6465 elif self._match_text_seq("WITHOUT") and self._match_texts(("CLUSTER", "OIDS")): 6466 alter_set.set("option", exp.var(f"WITHOUT {self._prev.text.upper()}")) 6467 elif self._match_text_seq("LOCATION"): 6468 alter_set.set("location", self._parse_field()) 6469 elif self._match_text_seq("ACCESS", "METHOD"): 6470 alter_set.set("access_method", self._parse_field()) 6471 elif self._match_text_seq("TABLESPACE"): 6472 alter_set.set("tablespace", self._parse_field()) 6473 elif self._match_text_seq("FILE", "FORMAT") or self._match_text_seq("FILEFORMAT"): 6474 alter_set.set("file_format", [self._parse_field()]) 6475 elif self._match_text_seq("STAGE_FILE_FORMAT"): 6476 alter_set.set("file_format", self._parse_wrapped_options()) 6477 elif self._match_text_seq("STAGE_COPY_OPTIONS"): 6478 alter_set.set("copy_options", self._parse_wrapped_options()) 6479 elif self._match_text_seq("TAG") or self._match_text_seq("TAGS"): 6480 alter_set.set("tag", self._parse_csv(self._parse_assignment)) 6481 else: 6482 if self._match_text_seq("SERDE"): 6483 alter_set.set("serde", self._parse_field()) 6484 6485 alter_set.set("expressions", [self._parse_properties()]) 6486 6487 return alter_set 6488 6489 def _parse_alter(self) -> exp.AlterTable | exp.Command: 6490 start = self._prev 6491 6492 if not self._match(TokenType.TABLE): 6493 return self._parse_as_command(start) 6494 6495 exists = self._parse_exists() 6496 only = self._match_text_seq("ONLY") 6497 this = self._parse_table(schema=True) 6498 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 6499 6500 if self._next: 6501 self._advance() 6502 6503 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 6504 if parser: 6505 actions = ensure_list(parser(self)) 6506 options = self._parse_csv(self._parse_property) 6507 6508 if not self._curr and actions: 6509 return self.expression( 6510 exp.AlterTable, 6511 this=this, 6512 exists=exists, 6513 actions=actions, 6514 only=only, 6515 options=options, 6516 cluster=cluster, 6517 ) 6518 6519 return self._parse_as_command(start) 6520 6521 def _parse_merge(self) -> exp.Merge: 6522 self._match(TokenType.INTO) 6523 target = self._parse_table() 6524 6525 if target and self._match(TokenType.ALIAS, advance=False): 6526 target.set("alias", self._parse_table_alias()) 6527 6528 self._match(TokenType.USING) 6529 using = self._parse_table() 6530 6531 self._match(TokenType.ON) 6532 on = self._parse_assignment() 6533 6534 return self.expression( 6535 exp.Merge, 6536 this=target, 6537 using=using, 6538 on=on, 6539 expressions=self._parse_when_matched(), 6540 ) 6541 6542 def _parse_when_matched(self) -> t.List[exp.When]: 6543 whens = [] 6544 6545 while self._match(TokenType.WHEN): 6546 matched = not self._match(TokenType.NOT) 6547 self._match_text_seq("MATCHED") 6548 source = ( 6549 False 6550 if self._match_text_seq("BY", "TARGET") 6551 else self._match_text_seq("BY", "SOURCE") 6552 ) 6553 condition = self._parse_assignment() if self._match(TokenType.AND) else None 6554 6555 self._match(TokenType.THEN) 6556 6557 if self._match(TokenType.INSERT): 6558 _this = self._parse_star() 6559 if _this: 6560 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=_this) 6561 else: 6562 then = self.expression( 6563 exp.Insert, 6564 this=self._parse_value(), 6565 expression=self._match_text_seq("VALUES") and self._parse_value(), 6566 ) 6567 elif self._match(TokenType.UPDATE): 6568 expressions = self._parse_star() 6569 if expressions: 6570 then = self.expression(exp.Update, expressions=expressions) 6571 else: 6572 then = self.expression( 6573 exp.Update, 6574 expressions=self._match(TokenType.SET) 6575 and self._parse_csv(self._parse_equality), 6576 ) 6577 elif self._match(TokenType.DELETE): 6578 then = self.expression(exp.Var, this=self._prev.text) 6579 else: 6580 then = None 6581 6582 whens.append( 6583 self.expression( 6584 exp.When, 6585 matched=matched, 6586 source=source, 6587 condition=condition, 6588 then=then, 6589 ) 6590 ) 6591 return whens 6592 6593 def _parse_show(self) -> t.Optional[exp.Expression]: 6594 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 6595 if parser: 6596 return parser(self) 6597 return self._parse_as_command(self._prev) 6598 6599 def _parse_set_item_assignment( 6600 self, kind: t.Optional[str] = None 6601 ) -> t.Optional[exp.Expression]: 6602 index = self._index 6603 6604 if kind in ("GLOBAL", "SESSION") and self._match_text_seq("TRANSACTION"): 6605 return self._parse_set_transaction(global_=kind == "GLOBAL") 6606 6607 left = self._parse_primary() or self._parse_column() 6608 assignment_delimiter = self._match_texts(("=", "TO")) 6609 6610 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 6611 self._retreat(index) 6612 return None 6613 6614 right = self._parse_statement() or self._parse_id_var() 6615 if isinstance(right, (exp.Column, exp.Identifier)): 6616 right = exp.var(right.name) 6617 6618 this = self.expression(exp.EQ, this=left, expression=right) 6619 return self.expression(exp.SetItem, this=this, kind=kind) 6620 6621 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 6622 self._match_text_seq("TRANSACTION") 6623 characteristics = self._parse_csv( 6624 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 6625 ) 6626 return self.expression( 6627 exp.SetItem, 6628 expressions=characteristics, 6629 kind="TRANSACTION", 6630 **{"global": global_}, # type: ignore 6631 ) 6632 6633 def _parse_set_item(self) -> t.Optional[exp.Expression]: 6634 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 6635 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 6636 6637 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 6638 index = self._index 6639 set_ = self.expression( 6640 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 6641 ) 6642 6643 if self._curr: 6644 self._retreat(index) 6645 return self._parse_as_command(self._prev) 6646 6647 return set_ 6648 6649 def _parse_var_from_options( 6650 self, options: OPTIONS_TYPE, raise_unmatched: bool = True 6651 ) -> t.Optional[exp.Var]: 6652 start = self._curr 6653 if not start: 6654 return None 6655 6656 option = start.text.upper() 6657 continuations = options.get(option) 6658 6659 index = self._index 6660 self._advance() 6661 for keywords in continuations or []: 6662 if isinstance(keywords, str): 6663 keywords = (keywords,) 6664 6665 if self._match_text_seq(*keywords): 6666 option = f"{option} {' '.join(keywords)}" 6667 break 6668 else: 6669 if continuations or continuations is None: 6670 if raise_unmatched: 6671 self.raise_error(f"Unknown option {option}") 6672 6673 self._retreat(index) 6674 return None 6675 6676 return exp.var(option) 6677 6678 def _parse_as_command(self, start: Token) -> exp.Command: 6679 while self._curr: 6680 self._advance() 6681 text = self._find_sql(start, self._prev) 6682 size = len(start.text) 6683 self._warn_unsupported() 6684 return exp.Command(this=text[:size], expression=text[size:]) 6685 6686 def _parse_dict_property(self, this: str) -> exp.DictProperty: 6687 settings = [] 6688 6689 self._match_l_paren() 6690 kind = self._parse_id_var() 6691 6692 if self._match(TokenType.L_PAREN): 6693 while True: 6694 key = self._parse_id_var() 6695 value = self._parse_primary() 6696 6697 if not key and value is None: 6698 break 6699 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 6700 self._match(TokenType.R_PAREN) 6701 6702 self._match_r_paren() 6703 6704 return self.expression( 6705 exp.DictProperty, 6706 this=this, 6707 kind=kind.this if kind else None, 6708 settings=settings, 6709 ) 6710 6711 def _parse_dict_range(self, this: str) -> exp.DictRange: 6712 self._match_l_paren() 6713 has_min = self._match_text_seq("MIN") 6714 if has_min: 6715 min = self._parse_var() or self._parse_primary() 6716 self._match_text_seq("MAX") 6717 max = self._parse_var() or self._parse_primary() 6718 else: 6719 max = self._parse_var() or self._parse_primary() 6720 min = exp.Literal.number(0) 6721 self._match_r_paren() 6722 return self.expression(exp.DictRange, this=this, min=min, max=max) 6723 6724 def _parse_comprehension( 6725 self, this: t.Optional[exp.Expression] 6726 ) -> t.Optional[exp.Comprehension]: 6727 index = self._index 6728 expression = self._parse_column() 6729 if not self._match(TokenType.IN): 6730 self._retreat(index - 1) 6731 return None 6732 iterator = self._parse_column() 6733 condition = self._parse_assignment() if self._match_text_seq("IF") else None 6734 return self.expression( 6735 exp.Comprehension, 6736 this=this, 6737 expression=expression, 6738 iterator=iterator, 6739 condition=condition, 6740 ) 6741 6742 def _parse_heredoc(self) -> t.Optional[exp.Heredoc]: 6743 if self._match(TokenType.HEREDOC_STRING): 6744 return self.expression(exp.Heredoc, this=self._prev.text) 6745 6746 if not self._match_text_seq("$"): 6747 return None 6748 6749 tags = ["$"] 6750 tag_text = None 6751 6752 if self._is_connected(): 6753 self._advance() 6754 tags.append(self._prev.text.upper()) 6755 else: 6756 self.raise_error("No closing $ found") 6757 6758 if tags[-1] != "$": 6759 if self._is_connected() and self._match_text_seq("$"): 6760 tag_text = tags[-1] 6761 tags.append("$") 6762 else: 6763 self.raise_error("No closing $ found") 6764 6765 heredoc_start = self._curr 6766 6767 while self._curr: 6768 if self._match_text_seq(*tags, advance=False): 6769 this = self._find_sql(heredoc_start, self._prev) 6770 self._advance(len(tags)) 6771 return self.expression(exp.Heredoc, this=this, tag=tag_text) 6772 6773 self._advance() 6774 6775 self.raise_error(f"No closing {''.join(tags)} found") 6776 return None 6777 6778 def _find_parser( 6779 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 6780 ) -> t.Optional[t.Callable]: 6781 if not self._curr: 6782 return None 6783 6784 index = self._index 6785 this = [] 6786 while True: 6787 # The current token might be multiple words 6788 curr = self._curr.text.upper() 6789 key = curr.split(" ") 6790 this.append(curr) 6791 6792 self._advance() 6793 result, trie = in_trie(trie, key) 6794 if result == TrieResult.FAILED: 6795 break 6796 6797 if result == TrieResult.EXISTS: 6798 subparser = parsers[" ".join(this)] 6799 return subparser 6800 6801 self._retreat(index) 6802 return None 6803 6804 def _match(self, token_type, advance=True, expression=None): 6805 if not self._curr: 6806 return None 6807 6808 if self._curr.token_type == token_type: 6809 if advance: 6810 self._advance() 6811 self._add_comments(expression) 6812 return True 6813 6814 return None 6815 6816 def _match_set(self, types, advance=True): 6817 if not self._curr: 6818 return None 6819 6820 if self._curr.token_type in types: 6821 if advance: 6822 self._advance() 6823 return True 6824 6825 return None 6826 6827 def _match_pair(self, token_type_a, token_type_b, advance=True): 6828 if not self._curr or not self._next: 6829 return None 6830 6831 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 6832 if advance: 6833 self._advance(2) 6834 return True 6835 6836 return None 6837 6838 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 6839 if not self._match(TokenType.L_PAREN, expression=expression): 6840 self.raise_error("Expecting (") 6841 6842 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 6843 if not self._match(TokenType.R_PAREN, expression=expression): 6844 self.raise_error("Expecting )") 6845 6846 def _match_texts(self, texts, advance=True): 6847 if self._curr and self._curr.text.upper() in texts: 6848 if advance: 6849 self._advance() 6850 return True 6851 return None 6852 6853 def _match_text_seq(self, *texts, advance=True): 6854 index = self._index 6855 for text in texts: 6856 if self._curr and self._curr.text.upper() == text: 6857 self._advance() 6858 else: 6859 self._retreat(index) 6860 return None 6861 6862 if not advance: 6863 self._retreat(index) 6864 6865 return True 6866 6867 def _replace_lambda( 6868 self, node: t.Optional[exp.Expression], expressions: t.List[exp.Expression] 6869 ) -> t.Optional[exp.Expression]: 6870 if not node: 6871 return node 6872 6873 lambda_types = {e.name: e.args.get("to") or False for e in expressions} 6874 6875 for column in node.find_all(exp.Column): 6876 typ = lambda_types.get(column.parts[0].name) 6877 if typ is not None: 6878 dot_or_id = column.to_dot() if column.table else column.this 6879 6880 if typ: 6881 dot_or_id = self.expression( 6882 exp.Cast, 6883 this=dot_or_id, 6884 to=typ, 6885 ) 6886 6887 parent = column.parent 6888 6889 while isinstance(parent, exp.Dot): 6890 if not isinstance(parent.parent, exp.Dot): 6891 parent.replace(dot_or_id) 6892 break 6893 parent = parent.parent 6894 else: 6895 if column is node: 6896 node = dot_or_id 6897 else: 6898 column.replace(dot_or_id) 6899 return node 6900 6901 def _parse_truncate_table(self) -> t.Optional[exp.TruncateTable] | exp.Expression: 6902 start = self._prev 6903 6904 # Not to be confused with TRUNCATE(number, decimals) function call 6905 if self._match(TokenType.L_PAREN): 6906 self._retreat(self._index - 2) 6907 return self._parse_function() 6908 6909 # Clickhouse supports TRUNCATE DATABASE as well 6910 is_database = self._match(TokenType.DATABASE) 6911 6912 self._match(TokenType.TABLE) 6913 6914 exists = self._parse_exists(not_=False) 6915 6916 expressions = self._parse_csv( 6917 lambda: self._parse_table(schema=True, is_db_reference=is_database) 6918 ) 6919 6920 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 6921 6922 if self._match_text_seq("RESTART", "IDENTITY"): 6923 identity = "RESTART" 6924 elif self._match_text_seq("CONTINUE", "IDENTITY"): 6925 identity = "CONTINUE" 6926 else: 6927 identity = None 6928 6929 if self._match_text_seq("CASCADE") or self._match_text_seq("RESTRICT"): 6930 option = self._prev.text 6931 else: 6932 option = None 6933 6934 partition = self._parse_partition() 6935 6936 # Fallback case 6937 if self._curr: 6938 return self._parse_as_command(start) 6939 6940 return self.expression( 6941 exp.TruncateTable, 6942 expressions=expressions, 6943 is_database=is_database, 6944 exists=exists, 6945 cluster=cluster, 6946 identity=identity, 6947 option=option, 6948 partition=partition, 6949 ) 6950 6951 def _parse_with_operator(self) -> t.Optional[exp.Expression]: 6952 this = self._parse_ordered(self._parse_opclass) 6953 6954 if not self._match(TokenType.WITH): 6955 return this 6956 6957 op = self._parse_var(any_token=True) 6958 6959 return self.expression(exp.WithOperator, this=this, op=op) 6960 6961 def _parse_wrapped_options(self) -> t.List[t.Optional[exp.Expression]]: 6962 self._match(TokenType.EQ) 6963 self._match(TokenType.L_PAREN) 6964 6965 opts: t.List[t.Optional[exp.Expression]] = [] 6966 while self._curr and not self._match(TokenType.R_PAREN): 6967 if self._match_text_seq("FORMAT_NAME", "="): 6968 # The FORMAT_NAME can be set to an identifier for Snowflake and T-SQL, 6969 # so we parse it separately to use _parse_field() 6970 prop = self.expression( 6971 exp.Property, this=exp.var("FORMAT_NAME"), value=self._parse_field() 6972 ) 6973 opts.append(prop) 6974 else: 6975 opts.append(self._parse_property()) 6976 6977 self._match(TokenType.COMMA) 6978 6979 return opts 6980 6981 def _parse_copy_parameters(self) -> t.List[exp.CopyParameter]: 6982 sep = TokenType.COMMA if self.dialect.COPY_PARAMS_ARE_CSV else None 6983 6984 options = [] 6985 while self._curr and not self._match(TokenType.R_PAREN, advance=False): 6986 option = self._parse_var(any_token=True) 6987 prev = self._prev.text.upper() 6988 6989 # Different dialects might separate options and values by white space, "=" and "AS" 6990 self._match(TokenType.EQ) 6991 self._match(TokenType.ALIAS) 6992 6993 param = self.expression(exp.CopyParameter, this=option) 6994 6995 if prev in self.COPY_INTO_VARLEN_OPTIONS and self._match( 6996 TokenType.L_PAREN, advance=False 6997 ): 6998 # Snowflake FILE_FORMAT case, Databricks COPY & FORMAT options 6999 param.set("expressions", self._parse_wrapped_options()) 7000 elif prev == "FILE_FORMAT": 7001 # T-SQL's external file format case 7002 param.set("expression", self._parse_field()) 7003 else: 7004 param.set("expression", self._parse_unquoted_field()) 7005 7006 options.append(param) 7007 self._match(sep) 7008 7009 return options 7010 7011 def _parse_credentials(self) -> t.Optional[exp.Credentials]: 7012 expr = self.expression(exp.Credentials) 7013 7014 if self._match_text_seq("STORAGE_INTEGRATION", "="): 7015 expr.set("storage", self._parse_field()) 7016 if self._match_text_seq("CREDENTIALS"): 7017 # Snowflake case: CREDENTIALS = (...), Redshift case: CREDENTIALS <string> 7018 creds = ( 7019 self._parse_wrapped_options() if self._match(TokenType.EQ) else self._parse_field() 7020 ) 7021 expr.set("credentials", creds) 7022 if self._match_text_seq("ENCRYPTION"): 7023 expr.set("encryption", self._parse_wrapped_options()) 7024 if self._match_text_seq("IAM_ROLE"): 7025 expr.set("iam_role", self._parse_field()) 7026 if self._match_text_seq("REGION"): 7027 expr.set("region", self._parse_field()) 7028 7029 return expr 7030 7031 def _parse_file_location(self) -> t.Optional[exp.Expression]: 7032 return self._parse_field() 7033 7034 def _parse_copy(self) -> exp.Copy | exp.Command: 7035 start = self._prev 7036 7037 self._match(TokenType.INTO) 7038 7039 this = ( 7040 self._parse_select(nested=True, parse_subquery_alias=False) 7041 if self._match(TokenType.L_PAREN, advance=False) 7042 else self._parse_table(schema=True) 7043 ) 7044 7045 kind = self._match(TokenType.FROM) or not self._match_text_seq("TO") 7046 7047 files = self._parse_csv(self._parse_file_location) 7048 credentials = self._parse_credentials() 7049 7050 self._match_text_seq("WITH") 7051 7052 params = self._parse_wrapped(self._parse_copy_parameters, optional=True) 7053 7054 # Fallback case 7055 if self._curr: 7056 return self._parse_as_command(start) 7057 7058 return self.expression( 7059 exp.Copy, 7060 this=this, 7061 kind=kind, 7062 credentials=credentials, 7063 files=files, 7064 params=params, 7065 )
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
1297 def __init__( 1298 self, 1299 error_level: t.Optional[ErrorLevel] = None, 1300 error_message_context: int = 100, 1301 max_errors: int = 3, 1302 dialect: DialectType = None, 1303 ): 1304 from sqlglot.dialects import Dialect 1305 1306 self.error_level = error_level or ErrorLevel.IMMEDIATE 1307 self.error_message_context = error_message_context 1308 self.max_errors = max_errors 1309 self.dialect = Dialect.get_or_raise(dialect) 1310 self.reset()
1322 def parse( 1323 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1324 ) -> t.List[t.Optional[exp.Expression]]: 1325 """ 1326 Parses a list of tokens and returns a list of syntax trees, one tree 1327 per parsed SQL statement. 1328 1329 Args: 1330 raw_tokens: The list of tokens. 1331 sql: The original SQL string, used to produce helpful debug messages. 1332 1333 Returns: 1334 The list of the produced syntax trees. 1335 """ 1336 return self._parse( 1337 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1338 )
Parses a list of tokens and returns a list of syntax trees, one tree per parsed SQL statement.
Arguments:
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The list of the produced syntax trees.
1340 def parse_into( 1341 self, 1342 expression_types: exp.IntoType, 1343 raw_tokens: t.List[Token], 1344 sql: t.Optional[str] = None, 1345 ) -> t.List[t.Optional[exp.Expression]]: 1346 """ 1347 Parses a list of tokens into a given Expression type. If a collection of Expression 1348 types is given instead, this method will try to parse the token list into each one 1349 of them, stopping at the first for which the parsing succeeds. 1350 1351 Args: 1352 expression_types: The expression type(s) to try and parse the token list into. 1353 raw_tokens: The list of tokens. 1354 sql: The original SQL string, used to produce helpful debug messages. 1355 1356 Returns: 1357 The target Expression. 1358 """ 1359 errors = [] 1360 for expression_type in ensure_list(expression_types): 1361 parser = self.EXPRESSION_PARSERS.get(expression_type) 1362 if not parser: 1363 raise TypeError(f"No parser registered for {expression_type}") 1364 1365 try: 1366 return self._parse(parser, raw_tokens, sql) 1367 except ParseError as e: 1368 e.errors[0]["into_expression"] = expression_type 1369 errors.append(e) 1370 1371 raise ParseError( 1372 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1373 errors=merge_errors(errors), 1374 ) from errors[-1]
Parses a list of tokens into a given Expression type. If a collection of Expression types is given instead, this method will try to parse the token list into each one of them, stopping at the first for which the parsing succeeds.
Arguments:
- expression_types: The expression type(s) to try and parse the token list into.
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The target Expression.
1414 def check_errors(self) -> None: 1415 """Logs or raises any found errors, depending on the chosen error level setting.""" 1416 if self.error_level == ErrorLevel.WARN: 1417 for error in self.errors: 1418 logger.error(str(error)) 1419 elif self.error_level == ErrorLevel.RAISE and self.errors: 1420 raise ParseError( 1421 concat_messages(self.errors, self.max_errors), 1422 errors=merge_errors(self.errors), 1423 )
Logs or raises any found errors, depending on the chosen error level setting.
1425 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1426 """ 1427 Appends an error in the list of recorded errors or raises it, depending on the chosen 1428 error level setting. 1429 """ 1430 token = token or self._curr or self._prev or Token.string("") 1431 start = token.start 1432 end = token.end + 1 1433 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1434 highlight = self.sql[start:end] 1435 end_context = self.sql[end : end + self.error_message_context] 1436 1437 error = ParseError.new( 1438 f"{message}. Line {token.line}, Col: {token.col}.\n" 1439 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1440 description=message, 1441 line=token.line, 1442 col=token.col, 1443 start_context=start_context, 1444 highlight=highlight, 1445 end_context=end_context, 1446 ) 1447 1448 if self.error_level == ErrorLevel.IMMEDIATE: 1449 raise error 1450 1451 self.errors.append(error)
Appends an error in the list of recorded errors or raises it, depending on the chosen error level setting.
1453 def expression( 1454 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1455 ) -> E: 1456 """ 1457 Creates a new, validated Expression. 1458 1459 Args: 1460 exp_class: The expression class to instantiate. 1461 comments: An optional list of comments to attach to the expression. 1462 kwargs: The arguments to set for the expression along with their respective values. 1463 1464 Returns: 1465 The target expression. 1466 """ 1467 instance = exp_class(**kwargs) 1468 instance.add_comments(comments) if comments else self._add_comments(instance) 1469 return self.validate_expression(instance)
Creates a new, validated Expression.
Arguments:
- exp_class: The expression class to instantiate.
- comments: An optional list of comments to attach to the expression.
- kwargs: The arguments to set for the expression along with their respective values.
Returns:
The target expression.
1476 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1477 """ 1478 Validates an Expression, making sure that all its mandatory arguments are set. 1479 1480 Args: 1481 expression: The expression to validate. 1482 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1483 1484 Returns: 1485 The validated expression. 1486 """ 1487 if self.error_level != ErrorLevel.IGNORE: 1488 for error_message in expression.error_messages(args): 1489 self.raise_error(error_message) 1490 1491 return expression
Validates an Expression, making sure that all its mandatory arguments are set.
Arguments:
- expression: The expression to validate.
- args: An optional list of items that was used to instantiate the expression, if it's a Func.
Returns:
The validated expression.