sqlglot.parser
1from __future__ import annotations 2 3import logging 4import typing as t 5from collections import defaultdict 6 7from sqlglot import exp 8from sqlglot.errors import ErrorLevel, ParseError, concat_messages, merge_errors 9from sqlglot.helper import apply_index_offset, ensure_list, seq_get 10from sqlglot.time import format_time 11from sqlglot.tokens import Token, Tokenizer, TokenType 12from sqlglot.trie import TrieResult, in_trie, new_trie 13 14if t.TYPE_CHECKING: 15 from sqlglot._typing import E, Lit 16 from sqlglot.dialects.dialect import Dialect, DialectType 17 18 T = t.TypeVar("T") 19 20logger = logging.getLogger("sqlglot") 21 22OPTIONS_TYPE = t.Dict[str, t.Sequence[t.Union[t.Sequence[str], str]]] 23 24 25def build_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 26 if len(args) == 1 and args[0].is_star: 27 return exp.StarMap(this=args[0]) 28 29 keys = [] 30 values = [] 31 for i in range(0, len(args), 2): 32 keys.append(args[i]) 33 values.append(args[i + 1]) 34 35 return exp.VarMap(keys=exp.array(*keys, copy=False), values=exp.array(*values, copy=False)) 36 37 38def build_like(args: t.List) -> exp.Escape | exp.Like: 39 like = exp.Like(this=seq_get(args, 1), expression=seq_get(args, 0)) 40 return exp.Escape(this=like, expression=seq_get(args, 2)) if len(args) > 2 else like 41 42 43def binary_range_parser( 44 expr_type: t.Type[exp.Expression], reverse_args: bool = False 45) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]: 46 def _parse_binary_range( 47 self: Parser, this: t.Optional[exp.Expression] 48 ) -> t.Optional[exp.Expression]: 49 expression = self._parse_bitwise() 50 if reverse_args: 51 this, expression = expression, this 52 return self._parse_escape(self.expression(expr_type, this=this, expression=expression)) 53 54 return _parse_binary_range 55 56 57def build_logarithm(args: t.List, dialect: Dialect) -> exp.Func: 58 # Default argument order is base, expression 59 this = seq_get(args, 0) 60 expression = seq_get(args, 1) 61 62 if expression: 63 if not dialect.LOG_BASE_FIRST: 64 this, expression = expression, this 65 return exp.Log(this=this, expression=expression) 66 67 return (exp.Ln if dialect.parser_class.LOG_DEFAULTS_TO_LN else exp.Log)(this=this) 68 69 70def build_hex(args: t.List, dialect: Dialect) -> exp.Hex | exp.LowerHex: 71 arg = seq_get(args, 0) 72 return exp.LowerHex(this=arg) if dialect.HEX_LOWERCASE else exp.Hex(this=arg) 73 74 75def build_lower(args: t.List) -> exp.Lower | exp.Hex: 76 # LOWER(HEX(..)) can be simplified to LowerHex to simplify its transpilation 77 arg = seq_get(args, 0) 78 return exp.LowerHex(this=arg.this) if isinstance(arg, exp.Hex) else exp.Lower(this=arg) 79 80 81def build_upper(args: t.List) -> exp.Upper | exp.Hex: 82 # UPPER(HEX(..)) can be simplified to Hex to simplify its transpilation 83 arg = seq_get(args, 0) 84 return exp.Hex(this=arg.this) if isinstance(arg, exp.Hex) else exp.Upper(this=arg) 85 86 87def build_extract_json_with_path(expr_type: t.Type[E]) -> t.Callable[[t.List, Dialect], E]: 88 def _builder(args: t.List, dialect: Dialect) -> E: 89 expression = expr_type( 90 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 91 ) 92 if len(args) > 2 and expr_type is exp.JSONExtract: 93 expression.set("expressions", args[2:]) 94 95 return expression 96 97 return _builder 98 99 100def build_mod(args: t.List) -> exp.Mod: 101 this = seq_get(args, 0) 102 expression = seq_get(args, 1) 103 104 # Wrap the operands if they are binary nodes, e.g. MOD(a + 1, 7) -> (a + 1) % 7 105 this = exp.Paren(this=this) if isinstance(this, exp.Binary) else this 106 expression = exp.Paren(this=expression) if isinstance(expression, exp.Binary) else expression 107 108 return exp.Mod(this=this, expression=expression) 109 110 111class _Parser(type): 112 def __new__(cls, clsname, bases, attrs): 113 klass = super().__new__(cls, clsname, bases, attrs) 114 115 klass.SHOW_TRIE = new_trie(key.split(" ") for key in klass.SHOW_PARSERS) 116 klass.SET_TRIE = new_trie(key.split(" ") for key in klass.SET_PARSERS) 117 118 return klass 119 120 121class Parser(metaclass=_Parser): 122 """ 123 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 124 125 Args: 126 error_level: The desired error level. 127 Default: ErrorLevel.IMMEDIATE 128 error_message_context: The amount of context to capture from a query string when displaying 129 the error message (in number of characters). 130 Default: 100 131 max_errors: Maximum number of error messages to include in a raised ParseError. 132 This is only relevant if error_level is ErrorLevel.RAISE. 133 Default: 3 134 """ 135 136 FUNCTIONS: t.Dict[str, t.Callable] = { 137 **{name: func.from_arg_list for name, func in exp.FUNCTION_BY_NAME.items()}, 138 "CONCAT": lambda args, dialect: exp.Concat( 139 expressions=args, 140 safe=not dialect.STRICT_STRING_CONCAT, 141 coalesce=dialect.CONCAT_COALESCE, 142 ), 143 "CONCAT_WS": lambda args, dialect: exp.ConcatWs( 144 expressions=args, 145 safe=not dialect.STRICT_STRING_CONCAT, 146 coalesce=dialect.CONCAT_COALESCE, 147 ), 148 "DATE_TO_DATE_STR": lambda args: exp.Cast( 149 this=seq_get(args, 0), 150 to=exp.DataType(this=exp.DataType.Type.TEXT), 151 ), 152 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 153 "HEX": build_hex, 154 "JSON_EXTRACT": build_extract_json_with_path(exp.JSONExtract), 155 "JSON_EXTRACT_SCALAR": build_extract_json_with_path(exp.JSONExtractScalar), 156 "JSON_EXTRACT_PATH_TEXT": build_extract_json_with_path(exp.JSONExtractScalar), 157 "LIKE": build_like, 158 "LOG": build_logarithm, 159 "LOG2": lambda args: exp.Log(this=exp.Literal.number(2), expression=seq_get(args, 0)), 160 "LOG10": lambda args: exp.Log(this=exp.Literal.number(10), expression=seq_get(args, 0)), 161 "LOWER": build_lower, 162 "MOD": build_mod, 163 "SCOPE_RESOLUTION": lambda args: exp.ScopeResolution(expression=seq_get(args, 0)) 164 if len(args) != 2 165 else exp.ScopeResolution(this=seq_get(args, 0), expression=seq_get(args, 1)), 166 "TIME_TO_TIME_STR": lambda args: exp.Cast( 167 this=seq_get(args, 0), 168 to=exp.DataType(this=exp.DataType.Type.TEXT), 169 ), 170 "TO_HEX": build_hex, 171 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 172 this=exp.Cast( 173 this=seq_get(args, 0), 174 to=exp.DataType(this=exp.DataType.Type.TEXT), 175 ), 176 start=exp.Literal.number(1), 177 length=exp.Literal.number(10), 178 ), 179 "UNNEST": lambda args: exp.Unnest(expressions=ensure_list(seq_get(args, 0))), 180 "UPPER": build_upper, 181 "VAR_MAP": build_var_map, 182 } 183 184 NO_PAREN_FUNCTIONS = { 185 TokenType.CURRENT_DATE: exp.CurrentDate, 186 TokenType.CURRENT_DATETIME: exp.CurrentDate, 187 TokenType.CURRENT_TIME: exp.CurrentTime, 188 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 189 TokenType.CURRENT_USER: exp.CurrentUser, 190 } 191 192 STRUCT_TYPE_TOKENS = { 193 TokenType.NESTED, 194 TokenType.OBJECT, 195 TokenType.STRUCT, 196 } 197 198 NESTED_TYPE_TOKENS = { 199 TokenType.ARRAY, 200 TokenType.LIST, 201 TokenType.LOWCARDINALITY, 202 TokenType.MAP, 203 TokenType.NULLABLE, 204 *STRUCT_TYPE_TOKENS, 205 } 206 207 ENUM_TYPE_TOKENS = { 208 TokenType.ENUM, 209 TokenType.ENUM8, 210 TokenType.ENUM16, 211 } 212 213 AGGREGATE_TYPE_TOKENS = { 214 TokenType.AGGREGATEFUNCTION, 215 TokenType.SIMPLEAGGREGATEFUNCTION, 216 } 217 218 TYPE_TOKENS = { 219 TokenType.BIT, 220 TokenType.BOOLEAN, 221 TokenType.TINYINT, 222 TokenType.UTINYINT, 223 TokenType.SMALLINT, 224 TokenType.USMALLINT, 225 TokenType.INT, 226 TokenType.UINT, 227 TokenType.BIGINT, 228 TokenType.UBIGINT, 229 TokenType.INT128, 230 TokenType.UINT128, 231 TokenType.INT256, 232 TokenType.UINT256, 233 TokenType.MEDIUMINT, 234 TokenType.UMEDIUMINT, 235 TokenType.FIXEDSTRING, 236 TokenType.FLOAT, 237 TokenType.DOUBLE, 238 TokenType.CHAR, 239 TokenType.NCHAR, 240 TokenType.VARCHAR, 241 TokenType.NVARCHAR, 242 TokenType.BPCHAR, 243 TokenType.TEXT, 244 TokenType.MEDIUMTEXT, 245 TokenType.LONGTEXT, 246 TokenType.MEDIUMBLOB, 247 TokenType.LONGBLOB, 248 TokenType.BINARY, 249 TokenType.VARBINARY, 250 TokenType.JSON, 251 TokenType.JSONB, 252 TokenType.INTERVAL, 253 TokenType.TINYBLOB, 254 TokenType.TINYTEXT, 255 TokenType.TIME, 256 TokenType.TIMETZ, 257 TokenType.TIMESTAMP, 258 TokenType.TIMESTAMP_S, 259 TokenType.TIMESTAMP_MS, 260 TokenType.TIMESTAMP_NS, 261 TokenType.TIMESTAMPTZ, 262 TokenType.TIMESTAMPLTZ, 263 TokenType.TIMESTAMPNTZ, 264 TokenType.DATETIME, 265 TokenType.DATETIME64, 266 TokenType.DATE, 267 TokenType.DATE32, 268 TokenType.INT4RANGE, 269 TokenType.INT4MULTIRANGE, 270 TokenType.INT8RANGE, 271 TokenType.INT8MULTIRANGE, 272 TokenType.NUMRANGE, 273 TokenType.NUMMULTIRANGE, 274 TokenType.TSRANGE, 275 TokenType.TSMULTIRANGE, 276 TokenType.TSTZRANGE, 277 TokenType.TSTZMULTIRANGE, 278 TokenType.DATERANGE, 279 TokenType.DATEMULTIRANGE, 280 TokenType.DECIMAL, 281 TokenType.UDECIMAL, 282 TokenType.BIGDECIMAL, 283 TokenType.UUID, 284 TokenType.GEOGRAPHY, 285 TokenType.GEOMETRY, 286 TokenType.HLLSKETCH, 287 TokenType.HSTORE, 288 TokenType.PSEUDO_TYPE, 289 TokenType.SUPER, 290 TokenType.SERIAL, 291 TokenType.SMALLSERIAL, 292 TokenType.BIGSERIAL, 293 TokenType.XML, 294 TokenType.YEAR, 295 TokenType.UNIQUEIDENTIFIER, 296 TokenType.USERDEFINED, 297 TokenType.MONEY, 298 TokenType.SMALLMONEY, 299 TokenType.ROWVERSION, 300 TokenType.IMAGE, 301 TokenType.VARIANT, 302 TokenType.OBJECT, 303 TokenType.OBJECT_IDENTIFIER, 304 TokenType.INET, 305 TokenType.IPADDRESS, 306 TokenType.IPPREFIX, 307 TokenType.IPV4, 308 TokenType.IPV6, 309 TokenType.UNKNOWN, 310 TokenType.NULL, 311 TokenType.NAME, 312 TokenType.TDIGEST, 313 *ENUM_TYPE_TOKENS, 314 *NESTED_TYPE_TOKENS, 315 *AGGREGATE_TYPE_TOKENS, 316 } 317 318 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 319 TokenType.BIGINT: TokenType.UBIGINT, 320 TokenType.INT: TokenType.UINT, 321 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 322 TokenType.SMALLINT: TokenType.USMALLINT, 323 TokenType.TINYINT: TokenType.UTINYINT, 324 TokenType.DECIMAL: TokenType.UDECIMAL, 325 } 326 327 SUBQUERY_PREDICATES = { 328 TokenType.ANY: exp.Any, 329 TokenType.ALL: exp.All, 330 TokenType.EXISTS: exp.Exists, 331 TokenType.SOME: exp.Any, 332 } 333 334 RESERVED_TOKENS = { 335 *Tokenizer.SINGLE_TOKENS.values(), 336 TokenType.SELECT, 337 } - {TokenType.IDENTIFIER} 338 339 DB_CREATABLES = { 340 TokenType.DATABASE, 341 TokenType.DICTIONARY, 342 TokenType.MODEL, 343 TokenType.SCHEMA, 344 TokenType.SEQUENCE, 345 TokenType.STORAGE_INTEGRATION, 346 TokenType.TABLE, 347 TokenType.TAG, 348 TokenType.VIEW, 349 TokenType.WAREHOUSE, 350 TokenType.STREAMLIT, 351 } 352 353 CREATABLES = { 354 TokenType.COLUMN, 355 TokenType.CONSTRAINT, 356 TokenType.FOREIGN_KEY, 357 TokenType.FUNCTION, 358 TokenType.INDEX, 359 TokenType.PROCEDURE, 360 *DB_CREATABLES, 361 } 362 363 # Tokens that can represent identifiers 364 ID_VAR_TOKENS = { 365 TokenType.VAR, 366 TokenType.ANTI, 367 TokenType.APPLY, 368 TokenType.ASC, 369 TokenType.ASOF, 370 TokenType.AUTO_INCREMENT, 371 TokenType.BEGIN, 372 TokenType.BPCHAR, 373 TokenType.CACHE, 374 TokenType.CASE, 375 TokenType.COLLATE, 376 TokenType.COMMAND, 377 TokenType.COMMENT, 378 TokenType.COMMIT, 379 TokenType.CONSTRAINT, 380 TokenType.COPY, 381 TokenType.DEFAULT, 382 TokenType.DELETE, 383 TokenType.DESC, 384 TokenType.DESCRIBE, 385 TokenType.DICTIONARY, 386 TokenType.DIV, 387 TokenType.END, 388 TokenType.EXECUTE, 389 TokenType.ESCAPE, 390 TokenType.FALSE, 391 TokenType.FIRST, 392 TokenType.FILTER, 393 TokenType.FINAL, 394 TokenType.FORMAT, 395 TokenType.FULL, 396 TokenType.IDENTIFIER, 397 TokenType.IS, 398 TokenType.ISNULL, 399 TokenType.INTERVAL, 400 TokenType.KEEP, 401 TokenType.KILL, 402 TokenType.LEFT, 403 TokenType.LOAD, 404 TokenType.MERGE, 405 TokenType.NATURAL, 406 TokenType.NEXT, 407 TokenType.OFFSET, 408 TokenType.OPERATOR, 409 TokenType.ORDINALITY, 410 TokenType.OVERLAPS, 411 TokenType.OVERWRITE, 412 TokenType.PARTITION, 413 TokenType.PERCENT, 414 TokenType.PIVOT, 415 TokenType.PRAGMA, 416 TokenType.RANGE, 417 TokenType.RECURSIVE, 418 TokenType.REFERENCES, 419 TokenType.REFRESH, 420 TokenType.REPLACE, 421 TokenType.RIGHT, 422 TokenType.ROLLUP, 423 TokenType.ROW, 424 TokenType.ROWS, 425 TokenType.SEMI, 426 TokenType.SET, 427 TokenType.SETTINGS, 428 TokenType.SHOW, 429 TokenType.TEMPORARY, 430 TokenType.TOP, 431 TokenType.TRUE, 432 TokenType.TRUNCATE, 433 TokenType.UNIQUE, 434 TokenType.UNNEST, 435 TokenType.UNPIVOT, 436 TokenType.UPDATE, 437 TokenType.USE, 438 TokenType.VOLATILE, 439 TokenType.WINDOW, 440 *CREATABLES, 441 *SUBQUERY_PREDICATES, 442 *TYPE_TOKENS, 443 *NO_PAREN_FUNCTIONS, 444 } 445 446 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 447 448 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 449 TokenType.ANTI, 450 TokenType.APPLY, 451 TokenType.ASOF, 452 TokenType.FULL, 453 TokenType.LEFT, 454 TokenType.LOCK, 455 TokenType.NATURAL, 456 TokenType.OFFSET, 457 TokenType.RIGHT, 458 TokenType.SEMI, 459 TokenType.WINDOW, 460 } 461 462 ALIAS_TOKENS = ID_VAR_TOKENS 463 464 ARRAY_CONSTRUCTORS = { 465 "ARRAY": exp.Array, 466 "LIST": exp.List, 467 } 468 469 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 470 471 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 472 473 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 474 475 FUNC_TOKENS = { 476 TokenType.COLLATE, 477 TokenType.COMMAND, 478 TokenType.CURRENT_DATE, 479 TokenType.CURRENT_DATETIME, 480 TokenType.CURRENT_TIMESTAMP, 481 TokenType.CURRENT_TIME, 482 TokenType.CURRENT_USER, 483 TokenType.FILTER, 484 TokenType.FIRST, 485 TokenType.FORMAT, 486 TokenType.GLOB, 487 TokenType.IDENTIFIER, 488 TokenType.INDEX, 489 TokenType.ISNULL, 490 TokenType.ILIKE, 491 TokenType.INSERT, 492 TokenType.LIKE, 493 TokenType.MERGE, 494 TokenType.OFFSET, 495 TokenType.PRIMARY_KEY, 496 TokenType.RANGE, 497 TokenType.REPLACE, 498 TokenType.RLIKE, 499 TokenType.ROW, 500 TokenType.UNNEST, 501 TokenType.VAR, 502 TokenType.LEFT, 503 TokenType.RIGHT, 504 TokenType.SEQUENCE, 505 TokenType.DATE, 506 TokenType.DATETIME, 507 TokenType.TABLE, 508 TokenType.TIMESTAMP, 509 TokenType.TIMESTAMPTZ, 510 TokenType.TRUNCATE, 511 TokenType.WINDOW, 512 TokenType.XOR, 513 *TYPE_TOKENS, 514 *SUBQUERY_PREDICATES, 515 } 516 517 CONJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 518 TokenType.AND: exp.And, 519 } 520 521 ASSIGNMENT: t.Dict[TokenType, t.Type[exp.Expression]] = { 522 TokenType.COLON_EQ: exp.PropertyEQ, 523 } 524 525 DISJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 526 TokenType.OR: exp.Or, 527 } 528 529 EQUALITY = { 530 TokenType.EQ: exp.EQ, 531 TokenType.NEQ: exp.NEQ, 532 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 533 } 534 535 COMPARISON = { 536 TokenType.GT: exp.GT, 537 TokenType.GTE: exp.GTE, 538 TokenType.LT: exp.LT, 539 TokenType.LTE: exp.LTE, 540 } 541 542 BITWISE = { 543 TokenType.AMP: exp.BitwiseAnd, 544 TokenType.CARET: exp.BitwiseXor, 545 TokenType.PIPE: exp.BitwiseOr, 546 } 547 548 TERM = { 549 TokenType.DASH: exp.Sub, 550 TokenType.PLUS: exp.Add, 551 TokenType.MOD: exp.Mod, 552 TokenType.COLLATE: exp.Collate, 553 } 554 555 FACTOR = { 556 TokenType.DIV: exp.IntDiv, 557 TokenType.LR_ARROW: exp.Distance, 558 TokenType.SLASH: exp.Div, 559 TokenType.STAR: exp.Mul, 560 } 561 562 EXPONENT: t.Dict[TokenType, t.Type[exp.Expression]] = {} 563 564 TIMES = { 565 TokenType.TIME, 566 TokenType.TIMETZ, 567 } 568 569 TIMESTAMPS = { 570 TokenType.TIMESTAMP, 571 TokenType.TIMESTAMPTZ, 572 TokenType.TIMESTAMPLTZ, 573 *TIMES, 574 } 575 576 SET_OPERATIONS = { 577 TokenType.UNION, 578 TokenType.INTERSECT, 579 TokenType.EXCEPT, 580 } 581 582 JOIN_METHODS = { 583 TokenType.ASOF, 584 TokenType.NATURAL, 585 TokenType.POSITIONAL, 586 } 587 588 JOIN_SIDES = { 589 TokenType.LEFT, 590 TokenType.RIGHT, 591 TokenType.FULL, 592 } 593 594 JOIN_KINDS = { 595 TokenType.ANTI, 596 TokenType.CROSS, 597 TokenType.INNER, 598 TokenType.OUTER, 599 TokenType.SEMI, 600 TokenType.STRAIGHT_JOIN, 601 } 602 603 JOIN_HINTS: t.Set[str] = set() 604 605 LAMBDAS = { 606 TokenType.ARROW: lambda self, expressions: self.expression( 607 exp.Lambda, 608 this=self._replace_lambda( 609 self._parse_assignment(), 610 expressions, 611 ), 612 expressions=expressions, 613 ), 614 TokenType.FARROW: lambda self, expressions: self.expression( 615 exp.Kwarg, 616 this=exp.var(expressions[0].name), 617 expression=self._parse_assignment(), 618 ), 619 } 620 621 COLUMN_OPERATORS = { 622 TokenType.DOT: None, 623 TokenType.DCOLON: lambda self, this, to: self.expression( 624 exp.Cast if self.STRICT_CAST else exp.TryCast, 625 this=this, 626 to=to, 627 ), 628 TokenType.ARROW: lambda self, this, path: self.expression( 629 exp.JSONExtract, 630 this=this, 631 expression=self.dialect.to_json_path(path), 632 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 633 ), 634 TokenType.DARROW: lambda self, this, path: self.expression( 635 exp.JSONExtractScalar, 636 this=this, 637 expression=self.dialect.to_json_path(path), 638 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 639 ), 640 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 641 exp.JSONBExtract, 642 this=this, 643 expression=path, 644 ), 645 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 646 exp.JSONBExtractScalar, 647 this=this, 648 expression=path, 649 ), 650 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 651 exp.JSONBContains, 652 this=this, 653 expression=key, 654 ), 655 } 656 657 EXPRESSION_PARSERS = { 658 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 659 exp.Column: lambda self: self._parse_column(), 660 exp.Condition: lambda self: self._parse_assignment(), 661 exp.DataType: lambda self: self._parse_types(allow_identifiers=False), 662 exp.Expression: lambda self: self._parse_expression(), 663 exp.From: lambda self: self._parse_from(joins=True), 664 exp.Group: lambda self: self._parse_group(), 665 exp.Having: lambda self: self._parse_having(), 666 exp.Identifier: lambda self: self._parse_id_var(), 667 exp.Join: lambda self: self._parse_join(), 668 exp.Lambda: lambda self: self._parse_lambda(), 669 exp.Lateral: lambda self: self._parse_lateral(), 670 exp.Limit: lambda self: self._parse_limit(), 671 exp.Offset: lambda self: self._parse_offset(), 672 exp.Order: lambda self: self._parse_order(), 673 exp.Ordered: lambda self: self._parse_ordered(), 674 exp.Properties: lambda self: self._parse_properties(), 675 exp.Qualify: lambda self: self._parse_qualify(), 676 exp.Returning: lambda self: self._parse_returning(), 677 exp.Select: lambda self: self._parse_select(), 678 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 679 exp.Table: lambda self: self._parse_table_parts(), 680 exp.TableAlias: lambda self: self._parse_table_alias(), 681 exp.When: lambda self: seq_get(self._parse_when_matched(), 0), 682 exp.Where: lambda self: self._parse_where(), 683 exp.Window: lambda self: self._parse_named_window(), 684 exp.With: lambda self: self._parse_with(), 685 "JOIN_TYPE": lambda self: self._parse_join_parts(), 686 } 687 688 STATEMENT_PARSERS = { 689 TokenType.ALTER: lambda self: self._parse_alter(), 690 TokenType.BEGIN: lambda self: self._parse_transaction(), 691 TokenType.CACHE: lambda self: self._parse_cache(), 692 TokenType.COMMENT: lambda self: self._parse_comment(), 693 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 694 TokenType.COPY: lambda self: self._parse_copy(), 695 TokenType.CREATE: lambda self: self._parse_create(), 696 TokenType.DELETE: lambda self: self._parse_delete(), 697 TokenType.DESC: lambda self: self._parse_describe(), 698 TokenType.DESCRIBE: lambda self: self._parse_describe(), 699 TokenType.DROP: lambda self: self._parse_drop(), 700 TokenType.INSERT: lambda self: self._parse_insert(), 701 TokenType.KILL: lambda self: self._parse_kill(), 702 TokenType.LOAD: lambda self: self._parse_load(), 703 TokenType.MERGE: lambda self: self._parse_merge(), 704 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 705 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 706 TokenType.REFRESH: lambda self: self._parse_refresh(), 707 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 708 TokenType.SET: lambda self: self._parse_set(), 709 TokenType.TRUNCATE: lambda self: self._parse_truncate_table(), 710 TokenType.UNCACHE: lambda self: self._parse_uncache(), 711 TokenType.UPDATE: lambda self: self._parse_update(), 712 TokenType.USE: lambda self: self.expression( 713 exp.Use, 714 kind=self._parse_var_from_options(self.USABLES, raise_unmatched=False), 715 this=self._parse_table(schema=False), 716 ), 717 TokenType.SEMICOLON: lambda self: self.expression(exp.Semicolon), 718 } 719 720 UNARY_PARSERS = { 721 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 722 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 723 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 724 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 725 TokenType.PIPE_SLASH: lambda self: self.expression(exp.Sqrt, this=self._parse_unary()), 726 TokenType.DPIPE_SLASH: lambda self: self.expression(exp.Cbrt, this=self._parse_unary()), 727 } 728 729 STRING_PARSERS = { 730 TokenType.HEREDOC_STRING: lambda self, token: self.expression( 731 exp.RawString, this=token.text 732 ), 733 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 734 exp.National, this=token.text 735 ), 736 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 737 TokenType.STRING: lambda self, token: self.expression( 738 exp.Literal, this=token.text, is_string=True 739 ), 740 TokenType.UNICODE_STRING: lambda self, token: self.expression( 741 exp.UnicodeString, 742 this=token.text, 743 escape=self._match_text_seq("UESCAPE") and self._parse_string(), 744 ), 745 } 746 747 NUMERIC_PARSERS = { 748 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 749 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 750 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 751 TokenType.NUMBER: lambda self, token: self.expression( 752 exp.Literal, this=token.text, is_string=False 753 ), 754 } 755 756 PRIMARY_PARSERS = { 757 **STRING_PARSERS, 758 **NUMERIC_PARSERS, 759 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 760 TokenType.NULL: lambda self, _: self.expression(exp.Null), 761 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 762 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 763 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 764 TokenType.STAR: lambda self, _: self.expression( 765 exp.Star, 766 **{ 767 "except": self._parse_star_op("EXCEPT", "EXCLUDE"), 768 "replace": self._parse_star_op("REPLACE"), 769 "rename": self._parse_star_op("RENAME"), 770 }, 771 ), 772 } 773 774 PLACEHOLDER_PARSERS = { 775 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 776 TokenType.PARAMETER: lambda self: self._parse_parameter(), 777 TokenType.COLON: lambda self: ( 778 self.expression(exp.Placeholder, this=self._prev.text) 779 if self._match(TokenType.NUMBER) or self._match_set(self.ID_VAR_TOKENS) 780 else None 781 ), 782 } 783 784 RANGE_PARSERS = { 785 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 786 TokenType.GLOB: binary_range_parser(exp.Glob), 787 TokenType.ILIKE: binary_range_parser(exp.ILike), 788 TokenType.IN: lambda self, this: self._parse_in(this), 789 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 790 TokenType.IS: lambda self, this: self._parse_is(this), 791 TokenType.LIKE: binary_range_parser(exp.Like), 792 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 793 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 794 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 795 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 796 } 797 798 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 799 "ALLOWED_VALUES": lambda self: self.expression( 800 exp.AllowedValuesProperty, expressions=self._parse_csv(self._parse_primary) 801 ), 802 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 803 "AUTO": lambda self: self._parse_auto_property(), 804 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 805 "BACKUP": lambda self: self.expression( 806 exp.BackupProperty, this=self._parse_var(any_token=True) 807 ), 808 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 809 "CHARSET": lambda self, **kwargs: self._parse_character_set(**kwargs), 810 "CHARACTER SET": lambda self, **kwargs: self._parse_character_set(**kwargs), 811 "CHECKSUM": lambda self: self._parse_checksum(), 812 "CLUSTER BY": lambda self: self._parse_cluster(), 813 "CLUSTERED": lambda self: self._parse_clustered_by(), 814 "COLLATE": lambda self, **kwargs: self._parse_property_assignment( 815 exp.CollateProperty, **kwargs 816 ), 817 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 818 "CONTAINS": lambda self: self._parse_contains_property(), 819 "COPY": lambda self: self._parse_copy_property(), 820 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 821 "DATA_DELETION": lambda self: self._parse_data_deletion_property(), 822 "DEFINER": lambda self: self._parse_definer(), 823 "DETERMINISTIC": lambda self: self.expression( 824 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 825 ), 826 "DYNAMIC": lambda self: self.expression(exp.DynamicProperty), 827 "DISTKEY": lambda self: self._parse_distkey(), 828 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 829 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 830 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 831 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 832 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 833 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 834 "FREESPACE": lambda self: self._parse_freespace(), 835 "GLOBAL": lambda self: self.expression(exp.GlobalProperty), 836 "HEAP": lambda self: self.expression(exp.HeapProperty), 837 "ICEBERG": lambda self: self.expression(exp.IcebergProperty), 838 "IMMUTABLE": lambda self: self.expression( 839 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 840 ), 841 "INHERITS": lambda self: self.expression( 842 exp.InheritsProperty, expressions=self._parse_wrapped_csv(self._parse_table) 843 ), 844 "INPUT": lambda self: self.expression(exp.InputModelProperty, this=self._parse_schema()), 845 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 846 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 847 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 848 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 849 "LIKE": lambda self: self._parse_create_like(), 850 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 851 "LOCK": lambda self: self._parse_locking(), 852 "LOCKING": lambda self: self._parse_locking(), 853 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 854 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 855 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 856 "MODIFIES": lambda self: self._parse_modifies_property(), 857 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 858 "NO": lambda self: self._parse_no_property(), 859 "ON": lambda self: self._parse_on_property(), 860 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 861 "OUTPUT": lambda self: self.expression(exp.OutputModelProperty, this=self._parse_schema()), 862 "PARTITION": lambda self: self._parse_partitioned_of(), 863 "PARTITION BY": lambda self: self._parse_partitioned_by(), 864 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 865 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 866 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 867 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 868 "READS": lambda self: self._parse_reads_property(), 869 "REMOTE": lambda self: self._parse_remote_with_connection(), 870 "RETURNS": lambda self: self._parse_returns(), 871 "STRICT": lambda self: self.expression(exp.StrictProperty), 872 "ROW": lambda self: self._parse_row(), 873 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 874 "SAMPLE": lambda self: self.expression( 875 exp.SampleProperty, this=self._match_text_seq("BY") and self._parse_bitwise() 876 ), 877 "SECURE": lambda self: self.expression(exp.SecureProperty), 878 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 879 "SETTINGS": lambda self: self.expression( 880 exp.SettingsProperty, expressions=self._parse_csv(self._parse_set_item) 881 ), 882 "SHARING": lambda self: self._parse_property_assignment(exp.SharingProperty), 883 "SORTKEY": lambda self: self._parse_sortkey(), 884 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 885 "STABLE": lambda self: self.expression( 886 exp.StabilityProperty, this=exp.Literal.string("STABLE") 887 ), 888 "STORED": lambda self: self._parse_stored(), 889 "SYSTEM_VERSIONING": lambda self: self._parse_system_versioning_property(), 890 "TBLPROPERTIES": lambda self: self._parse_wrapped_properties(), 891 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 892 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 893 "TO": lambda self: self._parse_to_table(), 894 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 895 "TRANSFORM": lambda self: self.expression( 896 exp.TransformModelProperty, expressions=self._parse_wrapped_csv(self._parse_expression) 897 ), 898 "TTL": lambda self: self._parse_ttl(), 899 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 900 "UNLOGGED": lambda self: self.expression(exp.UnloggedProperty), 901 "VOLATILE": lambda self: self._parse_volatile_property(), 902 "WITH": lambda self: self._parse_with_property(), 903 } 904 905 CONSTRAINT_PARSERS = { 906 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 907 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 908 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 909 "CHARACTER SET": lambda self: self.expression( 910 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 911 ), 912 "CHECK": lambda self: self.expression( 913 exp.CheckColumnConstraint, 914 this=self._parse_wrapped(self._parse_assignment), 915 enforced=self._match_text_seq("ENFORCED"), 916 ), 917 "COLLATE": lambda self: self.expression( 918 exp.CollateColumnConstraint, this=self._parse_var(any_token=True) 919 ), 920 "COMMENT": lambda self: self.expression( 921 exp.CommentColumnConstraint, this=self._parse_string() 922 ), 923 "COMPRESS": lambda self: self._parse_compress(), 924 "CLUSTERED": lambda self: self.expression( 925 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 926 ), 927 "NONCLUSTERED": lambda self: self.expression( 928 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 929 ), 930 "DEFAULT": lambda self: self.expression( 931 exp.DefaultColumnConstraint, this=self._parse_bitwise() 932 ), 933 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 934 "EPHEMERAL": lambda self: self.expression( 935 exp.EphemeralColumnConstraint, this=self._parse_bitwise() 936 ), 937 "EXCLUDE": lambda self: self.expression( 938 exp.ExcludeColumnConstraint, this=self._parse_index_params() 939 ), 940 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 941 "FORMAT": lambda self: self.expression( 942 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 943 ), 944 "GENERATED": lambda self: self._parse_generated_as_identity(), 945 "IDENTITY": lambda self: self._parse_auto_increment(), 946 "INLINE": lambda self: self._parse_inline(), 947 "LIKE": lambda self: self._parse_create_like(), 948 "NOT": lambda self: self._parse_not_constraint(), 949 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 950 "ON": lambda self: ( 951 self._match(TokenType.UPDATE) 952 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 953 ) 954 or self.expression(exp.OnProperty, this=self._parse_id_var()), 955 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 956 "PERIOD": lambda self: self._parse_period_for_system_time(), 957 "PRIMARY KEY": lambda self: self._parse_primary_key(), 958 "REFERENCES": lambda self: self._parse_references(match=False), 959 "TITLE": lambda self: self.expression( 960 exp.TitleColumnConstraint, this=self._parse_var_or_string() 961 ), 962 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 963 "UNIQUE": lambda self: self._parse_unique(), 964 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 965 "WITH": lambda self: self.expression( 966 exp.Properties, expressions=self._parse_wrapped_properties() 967 ), 968 } 969 970 ALTER_PARSERS = { 971 "ADD": lambda self: self._parse_alter_table_add(), 972 "ALTER": lambda self: self._parse_alter_table_alter(), 973 "CLUSTER BY": lambda self: self._parse_cluster(wrapped=True), 974 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 975 "DROP": lambda self: self._parse_alter_table_drop(), 976 "RENAME": lambda self: self._parse_alter_table_rename(), 977 "SET": lambda self: self._parse_alter_table_set(), 978 } 979 980 ALTER_ALTER_PARSERS = { 981 "DISTKEY": lambda self: self._parse_alter_diststyle(), 982 "DISTSTYLE": lambda self: self._parse_alter_diststyle(), 983 "SORTKEY": lambda self: self._parse_alter_sortkey(), 984 "COMPOUND": lambda self: self._parse_alter_sortkey(compound=True), 985 } 986 987 SCHEMA_UNNAMED_CONSTRAINTS = { 988 "CHECK", 989 "EXCLUDE", 990 "FOREIGN KEY", 991 "LIKE", 992 "PERIOD", 993 "PRIMARY KEY", 994 "UNIQUE", 995 } 996 997 NO_PAREN_FUNCTION_PARSERS = { 998 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 999 "CASE": lambda self: self._parse_case(), 1000 "CONNECT_BY_ROOT": lambda self: self.expression( 1001 exp.ConnectByRoot, this=self._parse_column() 1002 ), 1003 "IF": lambda self: self._parse_if(), 1004 "NEXT": lambda self: self._parse_next_value_for(), 1005 } 1006 1007 INVALID_FUNC_NAME_TOKENS = { 1008 TokenType.IDENTIFIER, 1009 TokenType.STRING, 1010 } 1011 1012 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 1013 1014 KEY_VALUE_DEFINITIONS = (exp.Alias, exp.EQ, exp.PropertyEQ, exp.Slice) 1015 1016 FUNCTION_PARSERS = { 1017 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 1018 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 1019 "DECODE": lambda self: self._parse_decode(), 1020 "EXTRACT": lambda self: self._parse_extract(), 1021 "GAP_FILL": lambda self: self._parse_gap_fill(), 1022 "JSON_OBJECT": lambda self: self._parse_json_object(), 1023 "JSON_OBJECTAGG": lambda self: self._parse_json_object(agg=True), 1024 "JSON_TABLE": lambda self: self._parse_json_table(), 1025 "MATCH": lambda self: self._parse_match_against(), 1026 "OPENJSON": lambda self: self._parse_open_json(), 1027 "POSITION": lambda self: self._parse_position(), 1028 "PREDICT": lambda self: self._parse_predict(), 1029 "SAFE_CAST": lambda self: self._parse_cast(False, safe=True), 1030 "STRING_AGG": lambda self: self._parse_string_agg(), 1031 "SUBSTRING": lambda self: self._parse_substring(), 1032 "TRIM": lambda self: self._parse_trim(), 1033 "TRY_CAST": lambda self: self._parse_cast(False, safe=True), 1034 "TRY_CONVERT": lambda self: self._parse_convert(False, safe=True), 1035 } 1036 1037 QUERY_MODIFIER_PARSERS = { 1038 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 1039 TokenType.PREWHERE: lambda self: ("prewhere", self._parse_prewhere()), 1040 TokenType.WHERE: lambda self: ("where", self._parse_where()), 1041 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 1042 TokenType.HAVING: lambda self: ("having", self._parse_having()), 1043 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 1044 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 1045 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 1046 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 1047 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 1048 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 1049 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 1050 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 1051 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1052 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1053 TokenType.CLUSTER_BY: lambda self: ( 1054 "cluster", 1055 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 1056 ), 1057 TokenType.DISTRIBUTE_BY: lambda self: ( 1058 "distribute", 1059 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 1060 ), 1061 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 1062 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 1063 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 1064 } 1065 1066 SET_PARSERS = { 1067 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 1068 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 1069 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 1070 "TRANSACTION": lambda self: self._parse_set_transaction(), 1071 } 1072 1073 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 1074 1075 TYPE_LITERAL_PARSERS = { 1076 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 1077 } 1078 1079 TYPE_CONVERTERS: t.Dict[exp.DataType.Type, t.Callable[[exp.DataType], exp.DataType]] = {} 1080 1081 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 1082 1083 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 1084 1085 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 1086 TRANSACTION_CHARACTERISTICS: OPTIONS_TYPE = { 1087 "ISOLATION": ( 1088 ("LEVEL", "REPEATABLE", "READ"), 1089 ("LEVEL", "READ", "COMMITTED"), 1090 ("LEVEL", "READ", "UNCOMITTED"), 1091 ("LEVEL", "SERIALIZABLE"), 1092 ), 1093 "READ": ("WRITE", "ONLY"), 1094 } 1095 1096 CONFLICT_ACTIONS: OPTIONS_TYPE = dict.fromkeys( 1097 ("ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK", "UPDATE"), tuple() 1098 ) 1099 CONFLICT_ACTIONS["DO"] = ("NOTHING", "UPDATE") 1100 1101 CREATE_SEQUENCE: OPTIONS_TYPE = { 1102 "SCALE": ("EXTEND", "NOEXTEND"), 1103 "SHARD": ("EXTEND", "NOEXTEND"), 1104 "NO": ("CYCLE", "CACHE", "MAXVALUE", "MINVALUE"), 1105 **dict.fromkeys( 1106 ( 1107 "SESSION", 1108 "GLOBAL", 1109 "KEEP", 1110 "NOKEEP", 1111 "ORDER", 1112 "NOORDER", 1113 "NOCACHE", 1114 "CYCLE", 1115 "NOCYCLE", 1116 "NOMINVALUE", 1117 "NOMAXVALUE", 1118 "NOSCALE", 1119 "NOSHARD", 1120 ), 1121 tuple(), 1122 ), 1123 } 1124 1125 ISOLATED_LOADING_OPTIONS: OPTIONS_TYPE = {"FOR": ("ALL", "INSERT", "NONE")} 1126 1127 USABLES: OPTIONS_TYPE = dict.fromkeys(("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA"), tuple()) 1128 1129 CAST_ACTIONS: OPTIONS_TYPE = dict.fromkeys(("RENAME", "ADD"), ("FIELDS",)) 1130 1131 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 1132 1133 CLONE_KEYWORDS = {"CLONE", "COPY"} 1134 HISTORICAL_DATA_KIND = {"TIMESTAMP", "OFFSET", "STATEMENT", "STREAM"} 1135 1136 OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS", "WITH"} 1137 1138 OPTYPE_FOLLOW_TOKENS = {TokenType.COMMA, TokenType.R_PAREN} 1139 1140 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 1141 1142 VIEW_ATTRIBUTES = {"ENCRYPTION", "SCHEMABINDING", "VIEW_METADATA"} 1143 1144 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 1145 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 1146 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 1147 1148 JSON_KEY_VALUE_SEPARATOR_TOKENS = {TokenType.COLON, TokenType.COMMA, TokenType.IS} 1149 1150 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 1151 1152 ADD_CONSTRAINT_TOKENS = { 1153 TokenType.CONSTRAINT, 1154 TokenType.FOREIGN_KEY, 1155 TokenType.INDEX, 1156 TokenType.KEY, 1157 TokenType.PRIMARY_KEY, 1158 TokenType.UNIQUE, 1159 } 1160 1161 DISTINCT_TOKENS = {TokenType.DISTINCT} 1162 1163 NULL_TOKENS = {TokenType.NULL} 1164 1165 UNNEST_OFFSET_ALIAS_TOKENS = ID_VAR_TOKENS - SET_OPERATIONS 1166 1167 SELECT_START_TOKENS = {TokenType.L_PAREN, TokenType.WITH, TokenType.SELECT} 1168 1169 COPY_INTO_VARLEN_OPTIONS = {"FILE_FORMAT", "COPY_OPTIONS", "FORMAT_OPTIONS", "CREDENTIAL"} 1170 1171 STRICT_CAST = True 1172 1173 PREFIXED_PIVOT_COLUMNS = False 1174 IDENTIFY_PIVOT_STRINGS = False 1175 1176 LOG_DEFAULTS_TO_LN = False 1177 1178 # Whether ADD is present for each column added by ALTER TABLE 1179 ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN = True 1180 1181 # Whether the table sample clause expects CSV syntax 1182 TABLESAMPLE_CSV = False 1183 1184 # The default method used for table sampling 1185 DEFAULT_SAMPLING_METHOD: t.Optional[str] = None 1186 1187 # Whether the SET command needs a delimiter (e.g. "=") for assignments 1188 SET_REQUIRES_ASSIGNMENT_DELIMITER = True 1189 1190 # Whether the TRIM function expects the characters to trim as its first argument 1191 TRIM_PATTERN_FIRST = False 1192 1193 # Whether string aliases are supported `SELECT COUNT(*) 'count'` 1194 STRING_ALIASES = False 1195 1196 # Whether query modifiers such as LIMIT are attached to the UNION node (vs its right operand) 1197 MODIFIERS_ATTACHED_TO_SET_OP = True 1198 SET_OP_MODIFIERS = {"order", "limit", "offset"} 1199 1200 # Whether to parse IF statements that aren't followed by a left parenthesis as commands 1201 NO_PAREN_IF_COMMANDS = True 1202 1203 # Whether the -> and ->> operators expect documents of type JSON (e.g. Postgres) 1204 JSON_ARROWS_REQUIRE_JSON_TYPE = False 1205 1206 # Whether the `:` operator is used to extract a value from a JSON document 1207 COLON_IS_JSON_EXTRACT = False 1208 1209 # Whether or not a VALUES keyword needs to be followed by '(' to form a VALUES clause. 1210 # If this is True and '(' is not found, the keyword will be treated as an identifier 1211 VALUES_FOLLOWED_BY_PAREN = True 1212 1213 # Whether implicit unnesting is supported, e.g. SELECT 1 FROM y.z AS z, z.a (Redshift) 1214 SUPPORTS_IMPLICIT_UNNEST = False 1215 1216 # Whether or not interval spans are supported, INTERVAL 1 YEAR TO MONTHS 1217 INTERVAL_SPANS = True 1218 1219 # Whether a PARTITION clause can follow a table reference 1220 SUPPORTS_PARTITION_SELECTION = False 1221 1222 __slots__ = ( 1223 "error_level", 1224 "error_message_context", 1225 "max_errors", 1226 "dialect", 1227 "sql", 1228 "errors", 1229 "_tokens", 1230 "_index", 1231 "_curr", 1232 "_next", 1233 "_prev", 1234 "_prev_comments", 1235 ) 1236 1237 # Autofilled 1238 SHOW_TRIE: t.Dict = {} 1239 SET_TRIE: t.Dict = {} 1240 1241 def __init__( 1242 self, 1243 error_level: t.Optional[ErrorLevel] = None, 1244 error_message_context: int = 100, 1245 max_errors: int = 3, 1246 dialect: DialectType = None, 1247 ): 1248 from sqlglot.dialects import Dialect 1249 1250 self.error_level = error_level or ErrorLevel.IMMEDIATE 1251 self.error_message_context = error_message_context 1252 self.max_errors = max_errors 1253 self.dialect = Dialect.get_or_raise(dialect) 1254 self.reset() 1255 1256 def reset(self): 1257 self.sql = "" 1258 self.errors = [] 1259 self._tokens = [] 1260 self._index = 0 1261 self._curr = None 1262 self._next = None 1263 self._prev = None 1264 self._prev_comments = None 1265 1266 def parse( 1267 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1268 ) -> t.List[t.Optional[exp.Expression]]: 1269 """ 1270 Parses a list of tokens and returns a list of syntax trees, one tree 1271 per parsed SQL statement. 1272 1273 Args: 1274 raw_tokens: The list of tokens. 1275 sql: The original SQL string, used to produce helpful debug messages. 1276 1277 Returns: 1278 The list of the produced syntax trees. 1279 """ 1280 return self._parse( 1281 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1282 ) 1283 1284 def parse_into( 1285 self, 1286 expression_types: exp.IntoType, 1287 raw_tokens: t.List[Token], 1288 sql: t.Optional[str] = None, 1289 ) -> t.List[t.Optional[exp.Expression]]: 1290 """ 1291 Parses a list of tokens into a given Expression type. If a collection of Expression 1292 types is given instead, this method will try to parse the token list into each one 1293 of them, stopping at the first for which the parsing succeeds. 1294 1295 Args: 1296 expression_types: The expression type(s) to try and parse the token list into. 1297 raw_tokens: The list of tokens. 1298 sql: The original SQL string, used to produce helpful debug messages. 1299 1300 Returns: 1301 The target Expression. 1302 """ 1303 errors = [] 1304 for expression_type in ensure_list(expression_types): 1305 parser = self.EXPRESSION_PARSERS.get(expression_type) 1306 if not parser: 1307 raise TypeError(f"No parser registered for {expression_type}") 1308 1309 try: 1310 return self._parse(parser, raw_tokens, sql) 1311 except ParseError as e: 1312 e.errors[0]["into_expression"] = expression_type 1313 errors.append(e) 1314 1315 raise ParseError( 1316 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1317 errors=merge_errors(errors), 1318 ) from errors[-1] 1319 1320 def _parse( 1321 self, 1322 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 1323 raw_tokens: t.List[Token], 1324 sql: t.Optional[str] = None, 1325 ) -> t.List[t.Optional[exp.Expression]]: 1326 self.reset() 1327 self.sql = sql or "" 1328 1329 total = len(raw_tokens) 1330 chunks: t.List[t.List[Token]] = [[]] 1331 1332 for i, token in enumerate(raw_tokens): 1333 if token.token_type == TokenType.SEMICOLON: 1334 if token.comments: 1335 chunks.append([token]) 1336 1337 if i < total - 1: 1338 chunks.append([]) 1339 else: 1340 chunks[-1].append(token) 1341 1342 expressions = [] 1343 1344 for tokens in chunks: 1345 self._index = -1 1346 self._tokens = tokens 1347 self._advance() 1348 1349 expressions.append(parse_method(self)) 1350 1351 if self._index < len(self._tokens): 1352 self.raise_error("Invalid expression / Unexpected token") 1353 1354 self.check_errors() 1355 1356 return expressions 1357 1358 def check_errors(self) -> None: 1359 """Logs or raises any found errors, depending on the chosen error level setting.""" 1360 if self.error_level == ErrorLevel.WARN: 1361 for error in self.errors: 1362 logger.error(str(error)) 1363 elif self.error_level == ErrorLevel.RAISE and self.errors: 1364 raise ParseError( 1365 concat_messages(self.errors, self.max_errors), 1366 errors=merge_errors(self.errors), 1367 ) 1368 1369 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1370 """ 1371 Appends an error in the list of recorded errors or raises it, depending on the chosen 1372 error level setting. 1373 """ 1374 token = token or self._curr or self._prev or Token.string("") 1375 start = token.start 1376 end = token.end + 1 1377 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1378 highlight = self.sql[start:end] 1379 end_context = self.sql[end : end + self.error_message_context] 1380 1381 error = ParseError.new( 1382 f"{message}. Line {token.line}, Col: {token.col}.\n" 1383 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1384 description=message, 1385 line=token.line, 1386 col=token.col, 1387 start_context=start_context, 1388 highlight=highlight, 1389 end_context=end_context, 1390 ) 1391 1392 if self.error_level == ErrorLevel.IMMEDIATE: 1393 raise error 1394 1395 self.errors.append(error) 1396 1397 def expression( 1398 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1399 ) -> E: 1400 """ 1401 Creates a new, validated Expression. 1402 1403 Args: 1404 exp_class: The expression class to instantiate. 1405 comments: An optional list of comments to attach to the expression. 1406 kwargs: The arguments to set for the expression along with their respective values. 1407 1408 Returns: 1409 The target expression. 1410 """ 1411 instance = exp_class(**kwargs) 1412 instance.add_comments(comments) if comments else self._add_comments(instance) 1413 return self.validate_expression(instance) 1414 1415 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1416 if expression and self._prev_comments: 1417 expression.add_comments(self._prev_comments) 1418 self._prev_comments = None 1419 1420 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1421 """ 1422 Validates an Expression, making sure that all its mandatory arguments are set. 1423 1424 Args: 1425 expression: The expression to validate. 1426 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1427 1428 Returns: 1429 The validated expression. 1430 """ 1431 if self.error_level != ErrorLevel.IGNORE: 1432 for error_message in expression.error_messages(args): 1433 self.raise_error(error_message) 1434 1435 return expression 1436 1437 def _find_sql(self, start: Token, end: Token) -> str: 1438 return self.sql[start.start : end.end + 1] 1439 1440 def _is_connected(self) -> bool: 1441 return self._prev and self._curr and self._prev.end + 1 == self._curr.start 1442 1443 def _advance(self, times: int = 1) -> None: 1444 self._index += times 1445 self._curr = seq_get(self._tokens, self._index) 1446 self._next = seq_get(self._tokens, self._index + 1) 1447 1448 if self._index > 0: 1449 self._prev = self._tokens[self._index - 1] 1450 self._prev_comments = self._prev.comments 1451 else: 1452 self._prev = None 1453 self._prev_comments = None 1454 1455 def _retreat(self, index: int) -> None: 1456 if index != self._index: 1457 self._advance(index - self._index) 1458 1459 def _warn_unsupported(self) -> None: 1460 if len(self._tokens) <= 1: 1461 return 1462 1463 # We use _find_sql because self.sql may comprise multiple chunks, and we're only 1464 # interested in emitting a warning for the one being currently processed. 1465 sql = self._find_sql(self._tokens[0], self._tokens[-1])[: self.error_message_context] 1466 1467 logger.warning( 1468 f"'{sql}' contains unsupported syntax. Falling back to parsing as a 'Command'." 1469 ) 1470 1471 def _parse_command(self) -> exp.Command: 1472 self._warn_unsupported() 1473 return self.expression( 1474 exp.Command, this=self._prev.text.upper(), expression=self._parse_string() 1475 ) 1476 1477 def _try_parse(self, parse_method: t.Callable[[], T], retreat: bool = False) -> t.Optional[T]: 1478 """ 1479 Attemps to backtrack if a parse function that contains a try/catch internally raises an error. This behavior can 1480 be different depending on the uset-set ErrorLevel, so _try_parse aims to solve this by setting & resetting 1481 the parser state accordingly 1482 """ 1483 index = self._index 1484 error_level = self.error_level 1485 1486 self.error_level = ErrorLevel.IMMEDIATE 1487 try: 1488 this = parse_method() 1489 except ParseError: 1490 this = None 1491 finally: 1492 if not this or retreat: 1493 self._retreat(index) 1494 self.error_level = error_level 1495 1496 return this 1497 1498 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1499 start = self._prev 1500 exists = self._parse_exists() if allow_exists else None 1501 1502 self._match(TokenType.ON) 1503 1504 materialized = self._match_text_seq("MATERIALIZED") 1505 kind = self._match_set(self.CREATABLES) and self._prev 1506 if not kind: 1507 return self._parse_as_command(start) 1508 1509 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1510 this = self._parse_user_defined_function(kind=kind.token_type) 1511 elif kind.token_type == TokenType.TABLE: 1512 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1513 elif kind.token_type == TokenType.COLUMN: 1514 this = self._parse_column() 1515 else: 1516 this = self._parse_id_var() 1517 1518 self._match(TokenType.IS) 1519 1520 return self.expression( 1521 exp.Comment, 1522 this=this, 1523 kind=kind.text, 1524 expression=self._parse_string(), 1525 exists=exists, 1526 materialized=materialized, 1527 ) 1528 1529 def _parse_to_table( 1530 self, 1531 ) -> exp.ToTableProperty: 1532 table = self._parse_table_parts(schema=True) 1533 return self.expression(exp.ToTableProperty, this=table) 1534 1535 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1536 def _parse_ttl(self) -> exp.Expression: 1537 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1538 this = self._parse_bitwise() 1539 1540 if self._match_text_seq("DELETE"): 1541 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1542 if self._match_text_seq("RECOMPRESS"): 1543 return self.expression( 1544 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1545 ) 1546 if self._match_text_seq("TO", "DISK"): 1547 return self.expression( 1548 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1549 ) 1550 if self._match_text_seq("TO", "VOLUME"): 1551 return self.expression( 1552 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1553 ) 1554 1555 return this 1556 1557 expressions = self._parse_csv(_parse_ttl_action) 1558 where = self._parse_where() 1559 group = self._parse_group() 1560 1561 aggregates = None 1562 if group and self._match(TokenType.SET): 1563 aggregates = self._parse_csv(self._parse_set_item) 1564 1565 return self.expression( 1566 exp.MergeTreeTTL, 1567 expressions=expressions, 1568 where=where, 1569 group=group, 1570 aggregates=aggregates, 1571 ) 1572 1573 def _parse_statement(self) -> t.Optional[exp.Expression]: 1574 if self._curr is None: 1575 return None 1576 1577 if self._match_set(self.STATEMENT_PARSERS): 1578 return self.STATEMENT_PARSERS[self._prev.token_type](self) 1579 1580 if self._match_set(self.dialect.tokenizer.COMMANDS): 1581 return self._parse_command() 1582 1583 expression = self._parse_expression() 1584 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1585 return self._parse_query_modifiers(expression) 1586 1587 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1588 start = self._prev 1589 temporary = self._match(TokenType.TEMPORARY) 1590 materialized = self._match_text_seq("MATERIALIZED") 1591 1592 kind = self._match_set(self.CREATABLES) and self._prev.text 1593 if not kind: 1594 return self._parse_as_command(start) 1595 1596 if_exists = exists or self._parse_exists() 1597 table = self._parse_table_parts( 1598 schema=True, is_db_reference=self._prev.token_type == TokenType.SCHEMA 1599 ) 1600 1601 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 1602 1603 if self._match(TokenType.L_PAREN, advance=False): 1604 expressions = self._parse_wrapped_csv(self._parse_types) 1605 else: 1606 expressions = None 1607 1608 return self.expression( 1609 exp.Drop, 1610 comments=start.comments, 1611 exists=if_exists, 1612 this=table, 1613 expressions=expressions, 1614 kind=kind.upper(), 1615 temporary=temporary, 1616 materialized=materialized, 1617 cascade=self._match_text_seq("CASCADE"), 1618 constraints=self._match_text_seq("CONSTRAINTS"), 1619 purge=self._match_text_seq("PURGE"), 1620 cluster=cluster, 1621 ) 1622 1623 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1624 return ( 1625 self._match_text_seq("IF") 1626 and (not not_ or self._match(TokenType.NOT)) 1627 and self._match(TokenType.EXISTS) 1628 ) 1629 1630 def _parse_create(self) -> exp.Create | exp.Command: 1631 # Note: this can't be None because we've matched a statement parser 1632 start = self._prev 1633 comments = self._prev_comments 1634 1635 replace = ( 1636 start.token_type == TokenType.REPLACE 1637 or self._match_pair(TokenType.OR, TokenType.REPLACE) 1638 or self._match_pair(TokenType.OR, TokenType.ALTER) 1639 ) 1640 1641 unique = self._match(TokenType.UNIQUE) 1642 1643 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1644 self._advance() 1645 1646 properties = None 1647 create_token = self._match_set(self.CREATABLES) and self._prev 1648 1649 if not create_token: 1650 # exp.Properties.Location.POST_CREATE 1651 properties = self._parse_properties() 1652 create_token = self._match_set(self.CREATABLES) and self._prev 1653 1654 if not properties or not create_token: 1655 return self._parse_as_command(start) 1656 1657 exists = self._parse_exists(not_=True) 1658 this = None 1659 expression: t.Optional[exp.Expression] = None 1660 indexes = None 1661 no_schema_binding = None 1662 begin = None 1663 end = None 1664 clone = None 1665 1666 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1667 nonlocal properties 1668 if properties and temp_props: 1669 properties.expressions.extend(temp_props.expressions) 1670 elif temp_props: 1671 properties = temp_props 1672 1673 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1674 this = self._parse_user_defined_function(kind=create_token.token_type) 1675 1676 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1677 extend_props(self._parse_properties()) 1678 1679 expression = self._match(TokenType.ALIAS) and self._parse_heredoc() 1680 extend_props(self._parse_properties()) 1681 1682 if not expression: 1683 if self._match(TokenType.COMMAND): 1684 expression = self._parse_as_command(self._prev) 1685 else: 1686 begin = self._match(TokenType.BEGIN) 1687 return_ = self._match_text_seq("RETURN") 1688 1689 if self._match(TokenType.STRING, advance=False): 1690 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 1691 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 1692 expression = self._parse_string() 1693 extend_props(self._parse_properties()) 1694 else: 1695 expression = self._parse_statement() 1696 1697 end = self._match_text_seq("END") 1698 1699 if return_: 1700 expression = self.expression(exp.Return, this=expression) 1701 elif create_token.token_type == TokenType.INDEX: 1702 # Postgres allows anonymous indexes, eg. CREATE INDEX IF NOT EXISTS ON t(c) 1703 if not self._match(TokenType.ON): 1704 index = self._parse_id_var() 1705 anonymous = False 1706 else: 1707 index = None 1708 anonymous = True 1709 1710 this = self._parse_index(index=index, anonymous=anonymous) 1711 elif create_token.token_type in self.DB_CREATABLES: 1712 table_parts = self._parse_table_parts( 1713 schema=True, is_db_reference=create_token.token_type == TokenType.SCHEMA 1714 ) 1715 1716 # exp.Properties.Location.POST_NAME 1717 self._match(TokenType.COMMA) 1718 extend_props(self._parse_properties(before=True)) 1719 1720 this = self._parse_schema(this=table_parts) 1721 1722 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1723 extend_props(self._parse_properties()) 1724 1725 self._match(TokenType.ALIAS) 1726 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 1727 # exp.Properties.Location.POST_ALIAS 1728 extend_props(self._parse_properties()) 1729 1730 if create_token.token_type == TokenType.SEQUENCE: 1731 expression = self._parse_types() 1732 extend_props(self._parse_properties()) 1733 else: 1734 expression = self._parse_ddl_select() 1735 1736 if create_token.token_type == TokenType.TABLE: 1737 # exp.Properties.Location.POST_EXPRESSION 1738 extend_props(self._parse_properties()) 1739 1740 indexes = [] 1741 while True: 1742 index = self._parse_index() 1743 1744 # exp.Properties.Location.POST_INDEX 1745 extend_props(self._parse_properties()) 1746 1747 if not index: 1748 break 1749 else: 1750 self._match(TokenType.COMMA) 1751 indexes.append(index) 1752 elif create_token.token_type == TokenType.VIEW: 1753 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1754 no_schema_binding = True 1755 1756 shallow = self._match_text_seq("SHALLOW") 1757 1758 if self._match_texts(self.CLONE_KEYWORDS): 1759 copy = self._prev.text.lower() == "copy" 1760 clone = self.expression( 1761 exp.Clone, this=self._parse_table(schema=True), shallow=shallow, copy=copy 1762 ) 1763 1764 if self._curr and not self._match_set((TokenType.R_PAREN, TokenType.COMMA), advance=False): 1765 return self._parse_as_command(start) 1766 1767 return self.expression( 1768 exp.Create, 1769 comments=comments, 1770 this=this, 1771 kind=create_token.text.upper(), 1772 replace=replace, 1773 unique=unique, 1774 expression=expression, 1775 exists=exists, 1776 properties=properties, 1777 indexes=indexes, 1778 no_schema_binding=no_schema_binding, 1779 begin=begin, 1780 end=end, 1781 clone=clone, 1782 ) 1783 1784 def _parse_sequence_properties(self) -> t.Optional[exp.SequenceProperties]: 1785 seq = exp.SequenceProperties() 1786 1787 options = [] 1788 index = self._index 1789 1790 while self._curr: 1791 self._match(TokenType.COMMA) 1792 if self._match_text_seq("INCREMENT"): 1793 self._match_text_seq("BY") 1794 self._match_text_seq("=") 1795 seq.set("increment", self._parse_term()) 1796 elif self._match_text_seq("MINVALUE"): 1797 seq.set("minvalue", self._parse_term()) 1798 elif self._match_text_seq("MAXVALUE"): 1799 seq.set("maxvalue", self._parse_term()) 1800 elif self._match(TokenType.START_WITH) or self._match_text_seq("START"): 1801 self._match_text_seq("=") 1802 seq.set("start", self._parse_term()) 1803 elif self._match_text_seq("CACHE"): 1804 # T-SQL allows empty CACHE which is initialized dynamically 1805 seq.set("cache", self._parse_number() or True) 1806 elif self._match_text_seq("OWNED", "BY"): 1807 # "OWNED BY NONE" is the default 1808 seq.set("owned", None if self._match_text_seq("NONE") else self._parse_column()) 1809 else: 1810 opt = self._parse_var_from_options(self.CREATE_SEQUENCE, raise_unmatched=False) 1811 if opt: 1812 options.append(opt) 1813 else: 1814 break 1815 1816 seq.set("options", options if options else None) 1817 return None if self._index == index else seq 1818 1819 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1820 # only used for teradata currently 1821 self._match(TokenType.COMMA) 1822 1823 kwargs = { 1824 "no": self._match_text_seq("NO"), 1825 "dual": self._match_text_seq("DUAL"), 1826 "before": self._match_text_seq("BEFORE"), 1827 "default": self._match_text_seq("DEFAULT"), 1828 "local": (self._match_text_seq("LOCAL") and "LOCAL") 1829 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 1830 "after": self._match_text_seq("AFTER"), 1831 "minimum": self._match_texts(("MIN", "MINIMUM")), 1832 "maximum": self._match_texts(("MAX", "MAXIMUM")), 1833 } 1834 1835 if self._match_texts(self.PROPERTY_PARSERS): 1836 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 1837 try: 1838 return parser(self, **{k: v for k, v in kwargs.items() if v}) 1839 except TypeError: 1840 self.raise_error(f"Cannot parse property '{self._prev.text}'") 1841 1842 return None 1843 1844 def _parse_wrapped_properties(self) -> t.List[exp.Expression]: 1845 return self._parse_wrapped_csv(self._parse_property) 1846 1847 def _parse_property(self) -> t.Optional[exp.Expression]: 1848 if self._match_texts(self.PROPERTY_PARSERS): 1849 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 1850 1851 if self._match(TokenType.DEFAULT) and self._match_texts(self.PROPERTY_PARSERS): 1852 return self.PROPERTY_PARSERS[self._prev.text.upper()](self, default=True) 1853 1854 if self._match_text_seq("COMPOUND", "SORTKEY"): 1855 return self._parse_sortkey(compound=True) 1856 1857 if self._match_text_seq("SQL", "SECURITY"): 1858 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 1859 1860 index = self._index 1861 key = self._parse_column() 1862 1863 if not self._match(TokenType.EQ): 1864 self._retreat(index) 1865 return self._parse_sequence_properties() 1866 1867 # Transform the key to exp.Dot if it's dotted identifiers wrapped in exp.Column or to exp.Var otherwise 1868 if isinstance(key, exp.Column): 1869 key = key.to_dot() if len(key.parts) > 1 else exp.var(key.name) 1870 1871 value = self._parse_bitwise() or self._parse_var(any_token=True) 1872 1873 # Transform the value to exp.Var if it was parsed as exp.Column(exp.Identifier()) 1874 if isinstance(value, exp.Column): 1875 value = exp.var(value.name) 1876 1877 return self.expression(exp.Property, this=key, value=value) 1878 1879 def _parse_stored(self) -> exp.FileFormatProperty: 1880 self._match(TokenType.ALIAS) 1881 1882 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 1883 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 1884 1885 return self.expression( 1886 exp.FileFormatProperty, 1887 this=( 1888 self.expression( 1889 exp.InputOutputFormat, input_format=input_format, output_format=output_format 1890 ) 1891 if input_format or output_format 1892 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var() 1893 ), 1894 ) 1895 1896 def _parse_unquoted_field(self) -> t.Optional[exp.Expression]: 1897 field = self._parse_field() 1898 if isinstance(field, exp.Identifier) and not field.quoted: 1899 field = exp.var(field) 1900 1901 return field 1902 1903 def _parse_property_assignment(self, exp_class: t.Type[E], **kwargs: t.Any) -> E: 1904 self._match(TokenType.EQ) 1905 self._match(TokenType.ALIAS) 1906 1907 return self.expression(exp_class, this=self._parse_unquoted_field(), **kwargs) 1908 1909 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 1910 properties = [] 1911 while True: 1912 if before: 1913 prop = self._parse_property_before() 1914 else: 1915 prop = self._parse_property() 1916 if not prop: 1917 break 1918 for p in ensure_list(prop): 1919 properties.append(p) 1920 1921 if properties: 1922 return self.expression(exp.Properties, expressions=properties) 1923 1924 return None 1925 1926 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 1927 return self.expression( 1928 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 1929 ) 1930 1931 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 1932 if self._index >= 2: 1933 pre_volatile_token = self._tokens[self._index - 2] 1934 else: 1935 pre_volatile_token = None 1936 1937 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 1938 return exp.VolatileProperty() 1939 1940 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 1941 1942 def _parse_retention_period(self) -> exp.Var: 1943 # Parse TSQL's HISTORY_RETENTION_PERIOD: {INFINITE | <number> DAY | DAYS | MONTH ...} 1944 number = self._parse_number() 1945 number_str = f"{number} " if number else "" 1946 unit = self._parse_var(any_token=True) 1947 return exp.var(f"{number_str}{unit}") 1948 1949 def _parse_system_versioning_property( 1950 self, with_: bool = False 1951 ) -> exp.WithSystemVersioningProperty: 1952 self._match(TokenType.EQ) 1953 prop = self.expression( 1954 exp.WithSystemVersioningProperty, 1955 **{ # type: ignore 1956 "on": True, 1957 "with": with_, 1958 }, 1959 ) 1960 1961 if self._match_text_seq("OFF"): 1962 prop.set("on", False) 1963 return prop 1964 1965 self._match(TokenType.ON) 1966 if self._match(TokenType.L_PAREN): 1967 while self._curr and not self._match(TokenType.R_PAREN): 1968 if self._match_text_seq("HISTORY_TABLE", "="): 1969 prop.set("this", self._parse_table_parts()) 1970 elif self._match_text_seq("DATA_CONSISTENCY_CHECK", "="): 1971 prop.set("data_consistency", self._advance_any() and self._prev.text.upper()) 1972 elif self._match_text_seq("HISTORY_RETENTION_PERIOD", "="): 1973 prop.set("retention_period", self._parse_retention_period()) 1974 1975 self._match(TokenType.COMMA) 1976 1977 return prop 1978 1979 def _parse_data_deletion_property(self) -> exp.DataDeletionProperty: 1980 self._match(TokenType.EQ) 1981 on = self._match_text_seq("ON") or not self._match_text_seq("OFF") 1982 prop = self.expression(exp.DataDeletionProperty, on=on) 1983 1984 if self._match(TokenType.L_PAREN): 1985 while self._curr and not self._match(TokenType.R_PAREN): 1986 if self._match_text_seq("FILTER_COLUMN", "="): 1987 prop.set("filter_column", self._parse_column()) 1988 elif self._match_text_seq("RETENTION_PERIOD", "="): 1989 prop.set("retention_period", self._parse_retention_period()) 1990 1991 self._match(TokenType.COMMA) 1992 1993 return prop 1994 1995 def _parse_with_property(self) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 1996 if self._match_text_seq("(", "SYSTEM_VERSIONING"): 1997 prop = self._parse_system_versioning_property(with_=True) 1998 self._match_r_paren() 1999 return prop 2000 2001 if self._match(TokenType.L_PAREN, advance=False): 2002 return self._parse_wrapped_properties() 2003 2004 if self._match_text_seq("JOURNAL"): 2005 return self._parse_withjournaltable() 2006 2007 if self._match_texts(self.VIEW_ATTRIBUTES): 2008 return self.expression(exp.ViewAttributeProperty, this=self._prev.text.upper()) 2009 2010 if self._match_text_seq("DATA"): 2011 return self._parse_withdata(no=False) 2012 elif self._match_text_seq("NO", "DATA"): 2013 return self._parse_withdata(no=True) 2014 2015 if self._match(TokenType.SERDE_PROPERTIES, advance=False): 2016 return self._parse_serde_properties(with_=True) 2017 2018 if not self._next: 2019 return None 2020 2021 return self._parse_withisolatedloading() 2022 2023 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 2024 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 2025 self._match(TokenType.EQ) 2026 2027 user = self._parse_id_var() 2028 self._match(TokenType.PARAMETER) 2029 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 2030 2031 if not user or not host: 2032 return None 2033 2034 return exp.DefinerProperty(this=f"{user}@{host}") 2035 2036 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 2037 self._match(TokenType.TABLE) 2038 self._match(TokenType.EQ) 2039 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 2040 2041 def _parse_log(self, no: bool = False) -> exp.LogProperty: 2042 return self.expression(exp.LogProperty, no=no) 2043 2044 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 2045 return self.expression(exp.JournalProperty, **kwargs) 2046 2047 def _parse_checksum(self) -> exp.ChecksumProperty: 2048 self._match(TokenType.EQ) 2049 2050 on = None 2051 if self._match(TokenType.ON): 2052 on = True 2053 elif self._match_text_seq("OFF"): 2054 on = False 2055 2056 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 2057 2058 def _parse_cluster(self, wrapped: bool = False) -> exp.Cluster: 2059 return self.expression( 2060 exp.Cluster, 2061 expressions=( 2062 self._parse_wrapped_csv(self._parse_ordered) 2063 if wrapped 2064 else self._parse_csv(self._parse_ordered) 2065 ), 2066 ) 2067 2068 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 2069 self._match_text_seq("BY") 2070 2071 self._match_l_paren() 2072 expressions = self._parse_csv(self._parse_column) 2073 self._match_r_paren() 2074 2075 if self._match_text_seq("SORTED", "BY"): 2076 self._match_l_paren() 2077 sorted_by = self._parse_csv(self._parse_ordered) 2078 self._match_r_paren() 2079 else: 2080 sorted_by = None 2081 2082 self._match(TokenType.INTO) 2083 buckets = self._parse_number() 2084 self._match_text_seq("BUCKETS") 2085 2086 return self.expression( 2087 exp.ClusteredByProperty, 2088 expressions=expressions, 2089 sorted_by=sorted_by, 2090 buckets=buckets, 2091 ) 2092 2093 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 2094 if not self._match_text_seq("GRANTS"): 2095 self._retreat(self._index - 1) 2096 return None 2097 2098 return self.expression(exp.CopyGrantsProperty) 2099 2100 def _parse_freespace(self) -> exp.FreespaceProperty: 2101 self._match(TokenType.EQ) 2102 return self.expression( 2103 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 2104 ) 2105 2106 def _parse_mergeblockratio( 2107 self, no: bool = False, default: bool = False 2108 ) -> exp.MergeBlockRatioProperty: 2109 if self._match(TokenType.EQ): 2110 return self.expression( 2111 exp.MergeBlockRatioProperty, 2112 this=self._parse_number(), 2113 percent=self._match(TokenType.PERCENT), 2114 ) 2115 2116 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 2117 2118 def _parse_datablocksize( 2119 self, 2120 default: t.Optional[bool] = None, 2121 minimum: t.Optional[bool] = None, 2122 maximum: t.Optional[bool] = None, 2123 ) -> exp.DataBlocksizeProperty: 2124 self._match(TokenType.EQ) 2125 size = self._parse_number() 2126 2127 units = None 2128 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 2129 units = self._prev.text 2130 2131 return self.expression( 2132 exp.DataBlocksizeProperty, 2133 size=size, 2134 units=units, 2135 default=default, 2136 minimum=minimum, 2137 maximum=maximum, 2138 ) 2139 2140 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 2141 self._match(TokenType.EQ) 2142 always = self._match_text_seq("ALWAYS") 2143 manual = self._match_text_seq("MANUAL") 2144 never = self._match_text_seq("NEVER") 2145 default = self._match_text_seq("DEFAULT") 2146 2147 autotemp = None 2148 if self._match_text_seq("AUTOTEMP"): 2149 autotemp = self._parse_schema() 2150 2151 return self.expression( 2152 exp.BlockCompressionProperty, 2153 always=always, 2154 manual=manual, 2155 never=never, 2156 default=default, 2157 autotemp=autotemp, 2158 ) 2159 2160 def _parse_withisolatedloading(self) -> t.Optional[exp.IsolatedLoadingProperty]: 2161 index = self._index 2162 no = self._match_text_seq("NO") 2163 concurrent = self._match_text_seq("CONCURRENT") 2164 2165 if not self._match_text_seq("ISOLATED", "LOADING"): 2166 self._retreat(index) 2167 return None 2168 2169 target = self._parse_var_from_options(self.ISOLATED_LOADING_OPTIONS, raise_unmatched=False) 2170 return self.expression( 2171 exp.IsolatedLoadingProperty, no=no, concurrent=concurrent, target=target 2172 ) 2173 2174 def _parse_locking(self) -> exp.LockingProperty: 2175 if self._match(TokenType.TABLE): 2176 kind = "TABLE" 2177 elif self._match(TokenType.VIEW): 2178 kind = "VIEW" 2179 elif self._match(TokenType.ROW): 2180 kind = "ROW" 2181 elif self._match_text_seq("DATABASE"): 2182 kind = "DATABASE" 2183 else: 2184 kind = None 2185 2186 if kind in ("DATABASE", "TABLE", "VIEW"): 2187 this = self._parse_table_parts() 2188 else: 2189 this = None 2190 2191 if self._match(TokenType.FOR): 2192 for_or_in = "FOR" 2193 elif self._match(TokenType.IN): 2194 for_or_in = "IN" 2195 else: 2196 for_or_in = None 2197 2198 if self._match_text_seq("ACCESS"): 2199 lock_type = "ACCESS" 2200 elif self._match_texts(("EXCL", "EXCLUSIVE")): 2201 lock_type = "EXCLUSIVE" 2202 elif self._match_text_seq("SHARE"): 2203 lock_type = "SHARE" 2204 elif self._match_text_seq("READ"): 2205 lock_type = "READ" 2206 elif self._match_text_seq("WRITE"): 2207 lock_type = "WRITE" 2208 elif self._match_text_seq("CHECKSUM"): 2209 lock_type = "CHECKSUM" 2210 else: 2211 lock_type = None 2212 2213 override = self._match_text_seq("OVERRIDE") 2214 2215 return self.expression( 2216 exp.LockingProperty, 2217 this=this, 2218 kind=kind, 2219 for_or_in=for_or_in, 2220 lock_type=lock_type, 2221 override=override, 2222 ) 2223 2224 def _parse_partition_by(self) -> t.List[exp.Expression]: 2225 if self._match(TokenType.PARTITION_BY): 2226 return self._parse_csv(self._parse_assignment) 2227 return [] 2228 2229 def _parse_partition_bound_spec(self) -> exp.PartitionBoundSpec: 2230 def _parse_partition_bound_expr() -> t.Optional[exp.Expression]: 2231 if self._match_text_seq("MINVALUE"): 2232 return exp.var("MINVALUE") 2233 if self._match_text_seq("MAXVALUE"): 2234 return exp.var("MAXVALUE") 2235 return self._parse_bitwise() 2236 2237 this: t.Optional[exp.Expression | t.List[exp.Expression]] = None 2238 expression = None 2239 from_expressions = None 2240 to_expressions = None 2241 2242 if self._match(TokenType.IN): 2243 this = self._parse_wrapped_csv(self._parse_bitwise) 2244 elif self._match(TokenType.FROM): 2245 from_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2246 self._match_text_seq("TO") 2247 to_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2248 elif self._match_text_seq("WITH", "(", "MODULUS"): 2249 this = self._parse_number() 2250 self._match_text_seq(",", "REMAINDER") 2251 expression = self._parse_number() 2252 self._match_r_paren() 2253 else: 2254 self.raise_error("Failed to parse partition bound spec.") 2255 2256 return self.expression( 2257 exp.PartitionBoundSpec, 2258 this=this, 2259 expression=expression, 2260 from_expressions=from_expressions, 2261 to_expressions=to_expressions, 2262 ) 2263 2264 # https://www.postgresql.org/docs/current/sql-createtable.html 2265 def _parse_partitioned_of(self) -> t.Optional[exp.PartitionedOfProperty]: 2266 if not self._match_text_seq("OF"): 2267 self._retreat(self._index - 1) 2268 return None 2269 2270 this = self._parse_table(schema=True) 2271 2272 if self._match(TokenType.DEFAULT): 2273 expression: exp.Var | exp.PartitionBoundSpec = exp.var("DEFAULT") 2274 elif self._match_text_seq("FOR", "VALUES"): 2275 expression = self._parse_partition_bound_spec() 2276 else: 2277 self.raise_error("Expecting either DEFAULT or FOR VALUES clause.") 2278 2279 return self.expression(exp.PartitionedOfProperty, this=this, expression=expression) 2280 2281 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 2282 self._match(TokenType.EQ) 2283 return self.expression( 2284 exp.PartitionedByProperty, 2285 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 2286 ) 2287 2288 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 2289 if self._match_text_seq("AND", "STATISTICS"): 2290 statistics = True 2291 elif self._match_text_seq("AND", "NO", "STATISTICS"): 2292 statistics = False 2293 else: 2294 statistics = None 2295 2296 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 2297 2298 def _parse_contains_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2299 if self._match_text_seq("SQL"): 2300 return self.expression(exp.SqlReadWriteProperty, this="CONTAINS SQL") 2301 return None 2302 2303 def _parse_modifies_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2304 if self._match_text_seq("SQL", "DATA"): 2305 return self.expression(exp.SqlReadWriteProperty, this="MODIFIES SQL DATA") 2306 return None 2307 2308 def _parse_no_property(self) -> t.Optional[exp.Expression]: 2309 if self._match_text_seq("PRIMARY", "INDEX"): 2310 return exp.NoPrimaryIndexProperty() 2311 if self._match_text_seq("SQL"): 2312 return self.expression(exp.SqlReadWriteProperty, this="NO SQL") 2313 return None 2314 2315 def _parse_on_property(self) -> t.Optional[exp.Expression]: 2316 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 2317 return exp.OnCommitProperty() 2318 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 2319 return exp.OnCommitProperty(delete=True) 2320 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 2321 2322 def _parse_reads_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2323 if self._match_text_seq("SQL", "DATA"): 2324 return self.expression(exp.SqlReadWriteProperty, this="READS SQL DATA") 2325 return None 2326 2327 def _parse_distkey(self) -> exp.DistKeyProperty: 2328 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 2329 2330 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 2331 table = self._parse_table(schema=True) 2332 2333 options = [] 2334 while self._match_texts(("INCLUDING", "EXCLUDING")): 2335 this = self._prev.text.upper() 2336 2337 id_var = self._parse_id_var() 2338 if not id_var: 2339 return None 2340 2341 options.append( 2342 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 2343 ) 2344 2345 return self.expression(exp.LikeProperty, this=table, expressions=options) 2346 2347 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 2348 return self.expression( 2349 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 2350 ) 2351 2352 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 2353 self._match(TokenType.EQ) 2354 return self.expression( 2355 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 2356 ) 2357 2358 def _parse_remote_with_connection(self) -> exp.RemoteWithConnectionModelProperty: 2359 self._match_text_seq("WITH", "CONNECTION") 2360 return self.expression( 2361 exp.RemoteWithConnectionModelProperty, this=self._parse_table_parts() 2362 ) 2363 2364 def _parse_returns(self) -> exp.ReturnsProperty: 2365 value: t.Optional[exp.Expression] 2366 null = None 2367 is_table = self._match(TokenType.TABLE) 2368 2369 if is_table: 2370 if self._match(TokenType.LT): 2371 value = self.expression( 2372 exp.Schema, 2373 this="TABLE", 2374 expressions=self._parse_csv(self._parse_struct_types), 2375 ) 2376 if not self._match(TokenType.GT): 2377 self.raise_error("Expecting >") 2378 else: 2379 value = self._parse_schema(exp.var("TABLE")) 2380 elif self._match_text_seq("NULL", "ON", "NULL", "INPUT"): 2381 null = True 2382 value = None 2383 else: 2384 value = self._parse_types() 2385 2386 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table, null=null) 2387 2388 def _parse_describe(self) -> exp.Describe: 2389 kind = self._match_set(self.CREATABLES) and self._prev.text 2390 style = self._match_texts(("EXTENDED", "FORMATTED", "HISTORY")) and self._prev.text.upper() 2391 if self._match(TokenType.DOT): 2392 style = None 2393 self._retreat(self._index - 2) 2394 this = self._parse_table(schema=True) 2395 properties = self._parse_properties() 2396 expressions = properties.expressions if properties else None 2397 return self.expression( 2398 exp.Describe, this=this, style=style, kind=kind, expressions=expressions 2399 ) 2400 2401 def _parse_insert(self) -> exp.Insert: 2402 comments = ensure_list(self._prev_comments) 2403 hint = self._parse_hint() 2404 overwrite = self._match(TokenType.OVERWRITE) 2405 ignore = self._match(TokenType.IGNORE) 2406 local = self._match_text_seq("LOCAL") 2407 alternative = None 2408 is_function = None 2409 2410 if self._match_text_seq("DIRECTORY"): 2411 this: t.Optional[exp.Expression] = self.expression( 2412 exp.Directory, 2413 this=self._parse_var_or_string(), 2414 local=local, 2415 row_format=self._parse_row_format(match_row=True), 2416 ) 2417 else: 2418 if self._match(TokenType.OR): 2419 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 2420 2421 self._match(TokenType.INTO) 2422 comments += ensure_list(self._prev_comments) 2423 self._match(TokenType.TABLE) 2424 is_function = self._match(TokenType.FUNCTION) 2425 2426 this = ( 2427 self._parse_table(schema=True, parse_partition=True) 2428 if not is_function 2429 else self._parse_function() 2430 ) 2431 2432 returning = self._parse_returning() 2433 2434 return self.expression( 2435 exp.Insert, 2436 comments=comments, 2437 hint=hint, 2438 is_function=is_function, 2439 this=this, 2440 stored=self._match_text_seq("STORED") and self._parse_stored(), 2441 by_name=self._match_text_seq("BY", "NAME"), 2442 exists=self._parse_exists(), 2443 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) and self._parse_assignment(), 2444 expression=self._parse_derived_table_values() or self._parse_ddl_select(), 2445 conflict=self._parse_on_conflict(), 2446 returning=returning or self._parse_returning(), 2447 overwrite=overwrite, 2448 alternative=alternative, 2449 ignore=ignore, 2450 ) 2451 2452 def _parse_kill(self) -> exp.Kill: 2453 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 2454 2455 return self.expression( 2456 exp.Kill, 2457 this=self._parse_primary(), 2458 kind=kind, 2459 ) 2460 2461 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 2462 conflict = self._match_text_seq("ON", "CONFLICT") 2463 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 2464 2465 if not conflict and not duplicate: 2466 return None 2467 2468 conflict_keys = None 2469 constraint = None 2470 2471 if conflict: 2472 if self._match_text_seq("ON", "CONSTRAINT"): 2473 constraint = self._parse_id_var() 2474 elif self._match(TokenType.L_PAREN): 2475 conflict_keys = self._parse_csv(self._parse_id_var) 2476 self._match_r_paren() 2477 2478 action = self._parse_var_from_options(self.CONFLICT_ACTIONS) 2479 if self._prev.token_type == TokenType.UPDATE: 2480 self._match(TokenType.SET) 2481 expressions = self._parse_csv(self._parse_equality) 2482 else: 2483 expressions = None 2484 2485 return self.expression( 2486 exp.OnConflict, 2487 duplicate=duplicate, 2488 expressions=expressions, 2489 action=action, 2490 conflict_keys=conflict_keys, 2491 constraint=constraint, 2492 ) 2493 2494 def _parse_returning(self) -> t.Optional[exp.Returning]: 2495 if not self._match(TokenType.RETURNING): 2496 return None 2497 return self.expression( 2498 exp.Returning, 2499 expressions=self._parse_csv(self._parse_expression), 2500 into=self._match(TokenType.INTO) and self._parse_table_part(), 2501 ) 2502 2503 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2504 if not self._match(TokenType.FORMAT): 2505 return None 2506 return self._parse_row_format() 2507 2508 def _parse_serde_properties(self, with_: bool = False) -> t.Optional[exp.SerdeProperties]: 2509 index = self._index 2510 with_ = with_ or self._match_text_seq("WITH") 2511 2512 if not self._match(TokenType.SERDE_PROPERTIES): 2513 self._retreat(index) 2514 return None 2515 return self.expression( 2516 exp.SerdeProperties, 2517 **{ # type: ignore 2518 "expressions": self._parse_wrapped_properties(), 2519 "with": with_, 2520 }, 2521 ) 2522 2523 def _parse_row_format( 2524 self, match_row: bool = False 2525 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2526 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 2527 return None 2528 2529 if self._match_text_seq("SERDE"): 2530 this = self._parse_string() 2531 2532 serde_properties = self._parse_serde_properties() 2533 2534 return self.expression( 2535 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 2536 ) 2537 2538 self._match_text_seq("DELIMITED") 2539 2540 kwargs = {} 2541 2542 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 2543 kwargs["fields"] = self._parse_string() 2544 if self._match_text_seq("ESCAPED", "BY"): 2545 kwargs["escaped"] = self._parse_string() 2546 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 2547 kwargs["collection_items"] = self._parse_string() 2548 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 2549 kwargs["map_keys"] = self._parse_string() 2550 if self._match_text_seq("LINES", "TERMINATED", "BY"): 2551 kwargs["lines"] = self._parse_string() 2552 if self._match_text_seq("NULL", "DEFINED", "AS"): 2553 kwargs["null"] = self._parse_string() 2554 2555 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 2556 2557 def _parse_load(self) -> exp.LoadData | exp.Command: 2558 if self._match_text_seq("DATA"): 2559 local = self._match_text_seq("LOCAL") 2560 self._match_text_seq("INPATH") 2561 inpath = self._parse_string() 2562 overwrite = self._match(TokenType.OVERWRITE) 2563 self._match_pair(TokenType.INTO, TokenType.TABLE) 2564 2565 return self.expression( 2566 exp.LoadData, 2567 this=self._parse_table(schema=True), 2568 local=local, 2569 overwrite=overwrite, 2570 inpath=inpath, 2571 partition=self._parse_partition(), 2572 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 2573 serde=self._match_text_seq("SERDE") and self._parse_string(), 2574 ) 2575 return self._parse_as_command(self._prev) 2576 2577 def _parse_delete(self) -> exp.Delete: 2578 # This handles MySQL's "Multiple-Table Syntax" 2579 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 2580 tables = None 2581 comments = self._prev_comments 2582 if not self._match(TokenType.FROM, advance=False): 2583 tables = self._parse_csv(self._parse_table) or None 2584 2585 returning = self._parse_returning() 2586 2587 return self.expression( 2588 exp.Delete, 2589 comments=comments, 2590 tables=tables, 2591 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 2592 using=self._match(TokenType.USING) and self._parse_table(joins=True), 2593 where=self._parse_where(), 2594 returning=returning or self._parse_returning(), 2595 limit=self._parse_limit(), 2596 ) 2597 2598 def _parse_update(self) -> exp.Update: 2599 comments = self._prev_comments 2600 this = self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS) 2601 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 2602 returning = self._parse_returning() 2603 return self.expression( 2604 exp.Update, 2605 comments=comments, 2606 **{ # type: ignore 2607 "this": this, 2608 "expressions": expressions, 2609 "from": self._parse_from(joins=True), 2610 "where": self._parse_where(), 2611 "returning": returning or self._parse_returning(), 2612 "order": self._parse_order(), 2613 "limit": self._parse_limit(), 2614 }, 2615 ) 2616 2617 def _parse_uncache(self) -> exp.Uncache: 2618 if not self._match(TokenType.TABLE): 2619 self.raise_error("Expecting TABLE after UNCACHE") 2620 2621 return self.expression( 2622 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 2623 ) 2624 2625 def _parse_cache(self) -> exp.Cache: 2626 lazy = self._match_text_seq("LAZY") 2627 self._match(TokenType.TABLE) 2628 table = self._parse_table(schema=True) 2629 2630 options = [] 2631 if self._match_text_seq("OPTIONS"): 2632 self._match_l_paren() 2633 k = self._parse_string() 2634 self._match(TokenType.EQ) 2635 v = self._parse_string() 2636 options = [k, v] 2637 self._match_r_paren() 2638 2639 self._match(TokenType.ALIAS) 2640 return self.expression( 2641 exp.Cache, 2642 this=table, 2643 lazy=lazy, 2644 options=options, 2645 expression=self._parse_select(nested=True), 2646 ) 2647 2648 def _parse_partition(self) -> t.Optional[exp.Partition]: 2649 if not self._match(TokenType.PARTITION): 2650 return None 2651 2652 return self.expression( 2653 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_assignment) 2654 ) 2655 2656 def _parse_value(self) -> t.Optional[exp.Tuple]: 2657 if self._match(TokenType.L_PAREN): 2658 expressions = self._parse_csv(self._parse_expression) 2659 self._match_r_paren() 2660 return self.expression(exp.Tuple, expressions=expressions) 2661 2662 # In some dialects we can have VALUES 1, 2 which results in 1 column & 2 rows. 2663 expression = self._parse_expression() 2664 if expression: 2665 return self.expression(exp.Tuple, expressions=[expression]) 2666 return None 2667 2668 def _parse_projections(self) -> t.List[exp.Expression]: 2669 return self._parse_expressions() 2670 2671 def _parse_select( 2672 self, 2673 nested: bool = False, 2674 table: bool = False, 2675 parse_subquery_alias: bool = True, 2676 parse_set_operation: bool = True, 2677 ) -> t.Optional[exp.Expression]: 2678 cte = self._parse_with() 2679 2680 if cte: 2681 this = self._parse_statement() 2682 2683 if not this: 2684 self.raise_error("Failed to parse any statement following CTE") 2685 return cte 2686 2687 if "with" in this.arg_types: 2688 this.set("with", cte) 2689 else: 2690 self.raise_error(f"{this.key} does not support CTE") 2691 this = cte 2692 2693 return this 2694 2695 # duckdb supports leading with FROM x 2696 from_ = self._parse_from() if self._match(TokenType.FROM, advance=False) else None 2697 2698 if self._match(TokenType.SELECT): 2699 comments = self._prev_comments 2700 2701 hint = self._parse_hint() 2702 all_ = self._match(TokenType.ALL) 2703 distinct = self._match_set(self.DISTINCT_TOKENS) 2704 2705 kind = ( 2706 self._match(TokenType.ALIAS) 2707 and self._match_texts(("STRUCT", "VALUE")) 2708 and self._prev.text.upper() 2709 ) 2710 2711 if distinct: 2712 distinct = self.expression( 2713 exp.Distinct, 2714 on=self._parse_value() if self._match(TokenType.ON) else None, 2715 ) 2716 2717 if all_ and distinct: 2718 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 2719 2720 limit = self._parse_limit(top=True) 2721 projections = self._parse_projections() 2722 2723 this = self.expression( 2724 exp.Select, 2725 kind=kind, 2726 hint=hint, 2727 distinct=distinct, 2728 expressions=projections, 2729 limit=limit, 2730 ) 2731 this.comments = comments 2732 2733 into = self._parse_into() 2734 if into: 2735 this.set("into", into) 2736 2737 if not from_: 2738 from_ = self._parse_from() 2739 2740 if from_: 2741 this.set("from", from_) 2742 2743 this = self._parse_query_modifiers(this) 2744 elif (table or nested) and self._match(TokenType.L_PAREN): 2745 if self._match(TokenType.PIVOT): 2746 this = self._parse_simplified_pivot() 2747 elif self._match(TokenType.FROM): 2748 this = exp.select("*").from_( 2749 t.cast(exp.From, self._parse_from(skip_from_token=True)) 2750 ) 2751 else: 2752 this = ( 2753 self._parse_table() 2754 if table 2755 else self._parse_select(nested=True, parse_set_operation=False) 2756 ) 2757 this = self._parse_query_modifiers(self._parse_set_operations(this)) 2758 2759 self._match_r_paren() 2760 2761 # We return early here so that the UNION isn't attached to the subquery by the 2762 # following call to _parse_set_operations, but instead becomes the parent node 2763 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 2764 elif self._match(TokenType.VALUES, advance=False): 2765 this = self._parse_derived_table_values() 2766 elif from_: 2767 this = exp.select("*").from_(from_.this, copy=False) 2768 else: 2769 this = None 2770 2771 if parse_set_operation: 2772 return self._parse_set_operations(this) 2773 return this 2774 2775 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 2776 if not skip_with_token and not self._match(TokenType.WITH): 2777 return None 2778 2779 comments = self._prev_comments 2780 recursive = self._match(TokenType.RECURSIVE) 2781 2782 expressions = [] 2783 while True: 2784 expressions.append(self._parse_cte()) 2785 2786 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 2787 break 2788 else: 2789 self._match(TokenType.WITH) 2790 2791 return self.expression( 2792 exp.With, comments=comments, expressions=expressions, recursive=recursive 2793 ) 2794 2795 def _parse_cte(self) -> exp.CTE: 2796 alias = self._parse_table_alias(self.ID_VAR_TOKENS) 2797 if not alias or not alias.this: 2798 self.raise_error("Expected CTE to have alias") 2799 2800 self._match(TokenType.ALIAS) 2801 2802 if self._match_text_seq("NOT", "MATERIALIZED"): 2803 materialized = False 2804 elif self._match_text_seq("MATERIALIZED"): 2805 materialized = True 2806 else: 2807 materialized = None 2808 2809 return self.expression( 2810 exp.CTE, 2811 this=self._parse_wrapped(self._parse_statement), 2812 alias=alias, 2813 materialized=materialized, 2814 ) 2815 2816 def _parse_table_alias( 2817 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 2818 ) -> t.Optional[exp.TableAlias]: 2819 any_token = self._match(TokenType.ALIAS) 2820 alias = ( 2821 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2822 or self._parse_string_as_identifier() 2823 ) 2824 2825 index = self._index 2826 if self._match(TokenType.L_PAREN): 2827 columns = self._parse_csv(self._parse_function_parameter) 2828 self._match_r_paren() if columns else self._retreat(index) 2829 else: 2830 columns = None 2831 2832 if not alias and not columns: 2833 return None 2834 2835 table_alias = self.expression(exp.TableAlias, this=alias, columns=columns) 2836 2837 # We bubble up comments from the Identifier to the TableAlias 2838 if isinstance(alias, exp.Identifier): 2839 table_alias.add_comments(alias.pop_comments()) 2840 2841 return table_alias 2842 2843 def _parse_subquery( 2844 self, this: t.Optional[exp.Expression], parse_alias: bool = True 2845 ) -> t.Optional[exp.Subquery]: 2846 if not this: 2847 return None 2848 2849 return self.expression( 2850 exp.Subquery, 2851 this=this, 2852 pivots=self._parse_pivots(), 2853 alias=self._parse_table_alias() if parse_alias else None, 2854 ) 2855 2856 def _implicit_unnests_to_explicit(self, this: E) -> E: 2857 from sqlglot.optimizer.normalize_identifiers import normalize_identifiers as _norm 2858 2859 refs = {_norm(this.args["from"].this.copy(), dialect=self.dialect).alias_or_name} 2860 for i, join in enumerate(this.args.get("joins") or []): 2861 table = join.this 2862 normalized_table = table.copy() 2863 normalized_table.meta["maybe_column"] = True 2864 normalized_table = _norm(normalized_table, dialect=self.dialect) 2865 2866 if isinstance(table, exp.Table) and not join.args.get("on"): 2867 if normalized_table.parts[0].name in refs: 2868 table_as_column = table.to_column() 2869 unnest = exp.Unnest(expressions=[table_as_column]) 2870 2871 # Table.to_column creates a parent Alias node that we want to convert to 2872 # a TableAlias and attach to the Unnest, so it matches the parser's output 2873 if isinstance(table.args.get("alias"), exp.TableAlias): 2874 table_as_column.replace(table_as_column.this) 2875 exp.alias_(unnest, None, table=[table.args["alias"].this], copy=False) 2876 2877 table.replace(unnest) 2878 2879 refs.add(normalized_table.alias_or_name) 2880 2881 return this 2882 2883 def _parse_query_modifiers( 2884 self, this: t.Optional[exp.Expression] 2885 ) -> t.Optional[exp.Expression]: 2886 if isinstance(this, (exp.Query, exp.Table)): 2887 for join in self._parse_joins(): 2888 this.append("joins", join) 2889 for lateral in iter(self._parse_lateral, None): 2890 this.append("laterals", lateral) 2891 2892 while True: 2893 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 2894 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 2895 key, expression = parser(self) 2896 2897 if expression: 2898 this.set(key, expression) 2899 if key == "limit": 2900 offset = expression.args.pop("offset", None) 2901 2902 if offset: 2903 offset = exp.Offset(expression=offset) 2904 this.set("offset", offset) 2905 2906 limit_by_expressions = expression.expressions 2907 expression.set("expressions", None) 2908 offset.set("expressions", limit_by_expressions) 2909 continue 2910 break 2911 2912 if self.SUPPORTS_IMPLICIT_UNNEST and this and this.args.get("from"): 2913 this = self._implicit_unnests_to_explicit(this) 2914 2915 return this 2916 2917 def _parse_hint(self) -> t.Optional[exp.Hint]: 2918 if self._match(TokenType.HINT): 2919 hints = [] 2920 for hint in iter( 2921 lambda: self._parse_csv( 2922 lambda: self._parse_function() or self._parse_var(upper=True) 2923 ), 2924 [], 2925 ): 2926 hints.extend(hint) 2927 2928 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 2929 self.raise_error("Expected */ after HINT") 2930 2931 return self.expression(exp.Hint, expressions=hints) 2932 2933 return None 2934 2935 def _parse_into(self) -> t.Optional[exp.Into]: 2936 if not self._match(TokenType.INTO): 2937 return None 2938 2939 temp = self._match(TokenType.TEMPORARY) 2940 unlogged = self._match_text_seq("UNLOGGED") 2941 self._match(TokenType.TABLE) 2942 2943 return self.expression( 2944 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 2945 ) 2946 2947 def _parse_from( 2948 self, joins: bool = False, skip_from_token: bool = False 2949 ) -> t.Optional[exp.From]: 2950 if not skip_from_token and not self._match(TokenType.FROM): 2951 return None 2952 2953 return self.expression( 2954 exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins) 2955 ) 2956 2957 def _parse_match_recognize_measure(self) -> exp.MatchRecognizeMeasure: 2958 return self.expression( 2959 exp.MatchRecognizeMeasure, 2960 window_frame=self._match_texts(("FINAL", "RUNNING")) and self._prev.text.upper(), 2961 this=self._parse_expression(), 2962 ) 2963 2964 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 2965 if not self._match(TokenType.MATCH_RECOGNIZE): 2966 return None 2967 2968 self._match_l_paren() 2969 2970 partition = self._parse_partition_by() 2971 order = self._parse_order() 2972 2973 measures = ( 2974 self._parse_csv(self._parse_match_recognize_measure) 2975 if self._match_text_seq("MEASURES") 2976 else None 2977 ) 2978 2979 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 2980 rows = exp.var("ONE ROW PER MATCH") 2981 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 2982 text = "ALL ROWS PER MATCH" 2983 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 2984 text += " SHOW EMPTY MATCHES" 2985 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 2986 text += " OMIT EMPTY MATCHES" 2987 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 2988 text += " WITH UNMATCHED ROWS" 2989 rows = exp.var(text) 2990 else: 2991 rows = None 2992 2993 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 2994 text = "AFTER MATCH SKIP" 2995 if self._match_text_seq("PAST", "LAST", "ROW"): 2996 text += " PAST LAST ROW" 2997 elif self._match_text_seq("TO", "NEXT", "ROW"): 2998 text += " TO NEXT ROW" 2999 elif self._match_text_seq("TO", "FIRST"): 3000 text += f" TO FIRST {self._advance_any().text}" # type: ignore 3001 elif self._match_text_seq("TO", "LAST"): 3002 text += f" TO LAST {self._advance_any().text}" # type: ignore 3003 after = exp.var(text) 3004 else: 3005 after = None 3006 3007 if self._match_text_seq("PATTERN"): 3008 self._match_l_paren() 3009 3010 if not self._curr: 3011 self.raise_error("Expecting )", self._curr) 3012 3013 paren = 1 3014 start = self._curr 3015 3016 while self._curr and paren > 0: 3017 if self._curr.token_type == TokenType.L_PAREN: 3018 paren += 1 3019 if self._curr.token_type == TokenType.R_PAREN: 3020 paren -= 1 3021 3022 end = self._prev 3023 self._advance() 3024 3025 if paren > 0: 3026 self.raise_error("Expecting )", self._curr) 3027 3028 pattern = exp.var(self._find_sql(start, end)) 3029 else: 3030 pattern = None 3031 3032 define = ( 3033 self._parse_csv(self._parse_name_as_expression) 3034 if self._match_text_seq("DEFINE") 3035 else None 3036 ) 3037 3038 self._match_r_paren() 3039 3040 return self.expression( 3041 exp.MatchRecognize, 3042 partition_by=partition, 3043 order=order, 3044 measures=measures, 3045 rows=rows, 3046 after=after, 3047 pattern=pattern, 3048 define=define, 3049 alias=self._parse_table_alias(), 3050 ) 3051 3052 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 3053 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 3054 if not cross_apply and self._match_pair(TokenType.OUTER, TokenType.APPLY): 3055 cross_apply = False 3056 3057 if cross_apply is not None: 3058 this = self._parse_select(table=True) 3059 view = None 3060 outer = None 3061 elif self._match(TokenType.LATERAL): 3062 this = self._parse_select(table=True) 3063 view = self._match(TokenType.VIEW) 3064 outer = self._match(TokenType.OUTER) 3065 else: 3066 return None 3067 3068 if not this: 3069 this = ( 3070 self._parse_unnest() 3071 or self._parse_function() 3072 or self._parse_id_var(any_token=False) 3073 ) 3074 3075 while self._match(TokenType.DOT): 3076 this = exp.Dot( 3077 this=this, 3078 expression=self._parse_function() or self._parse_id_var(any_token=False), 3079 ) 3080 3081 if view: 3082 table = self._parse_id_var(any_token=False) 3083 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 3084 table_alias: t.Optional[exp.TableAlias] = self.expression( 3085 exp.TableAlias, this=table, columns=columns 3086 ) 3087 elif isinstance(this, (exp.Subquery, exp.Unnest)) and this.alias: 3088 # We move the alias from the lateral's child node to the lateral itself 3089 table_alias = this.args["alias"].pop() 3090 else: 3091 table_alias = self._parse_table_alias() 3092 3093 return self.expression( 3094 exp.Lateral, 3095 this=this, 3096 view=view, 3097 outer=outer, 3098 alias=table_alias, 3099 cross_apply=cross_apply, 3100 ) 3101 3102 def _parse_join_parts( 3103 self, 3104 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 3105 return ( 3106 self._match_set(self.JOIN_METHODS) and self._prev, 3107 self._match_set(self.JOIN_SIDES) and self._prev, 3108 self._match_set(self.JOIN_KINDS) and self._prev, 3109 ) 3110 3111 def _parse_join( 3112 self, skip_join_token: bool = False, parse_bracket: bool = False 3113 ) -> t.Optional[exp.Join]: 3114 if self._match(TokenType.COMMA): 3115 return self.expression(exp.Join, this=self._parse_table()) 3116 3117 index = self._index 3118 method, side, kind = self._parse_join_parts() 3119 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 3120 join = self._match(TokenType.JOIN) or (kind and kind.token_type == TokenType.STRAIGHT_JOIN) 3121 3122 if not skip_join_token and not join: 3123 self._retreat(index) 3124 kind = None 3125 method = None 3126 side = None 3127 3128 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 3129 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 3130 3131 if not skip_join_token and not join and not outer_apply and not cross_apply: 3132 return None 3133 3134 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 3135 3136 if method: 3137 kwargs["method"] = method.text 3138 if side: 3139 kwargs["side"] = side.text 3140 if kind: 3141 kwargs["kind"] = kind.text 3142 if hint: 3143 kwargs["hint"] = hint 3144 3145 if self._match(TokenType.MATCH_CONDITION): 3146 kwargs["match_condition"] = self._parse_wrapped(self._parse_comparison) 3147 3148 if self._match(TokenType.ON): 3149 kwargs["on"] = self._parse_assignment() 3150 elif self._match(TokenType.USING): 3151 kwargs["using"] = self._parse_wrapped_id_vars() 3152 elif not isinstance(kwargs["this"], exp.Unnest) and not ( 3153 kind and kind.token_type == TokenType.CROSS 3154 ): 3155 index = self._index 3156 joins: t.Optional[list] = list(self._parse_joins()) 3157 3158 if joins and self._match(TokenType.ON): 3159 kwargs["on"] = self._parse_assignment() 3160 elif joins and self._match(TokenType.USING): 3161 kwargs["using"] = self._parse_wrapped_id_vars() 3162 else: 3163 joins = None 3164 self._retreat(index) 3165 3166 kwargs["this"].set("joins", joins if joins else None) 3167 3168 comments = [c for token in (method, side, kind) if token for c in token.comments] 3169 return self.expression(exp.Join, comments=comments, **kwargs) 3170 3171 def _parse_opclass(self) -> t.Optional[exp.Expression]: 3172 this = self._parse_assignment() 3173 3174 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 3175 return this 3176 3177 if not self._match_set(self.OPTYPE_FOLLOW_TOKENS, advance=False): 3178 return self.expression(exp.Opclass, this=this, expression=self._parse_table_parts()) 3179 3180 return this 3181 3182 def _parse_index_params(self) -> exp.IndexParameters: 3183 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 3184 3185 if self._match(TokenType.L_PAREN, advance=False): 3186 columns = self._parse_wrapped_csv(self._parse_with_operator) 3187 else: 3188 columns = None 3189 3190 include = self._parse_wrapped_id_vars() if self._match_text_seq("INCLUDE") else None 3191 partition_by = self._parse_partition_by() 3192 with_storage = self._match(TokenType.WITH) and self._parse_wrapped_properties() 3193 tablespace = ( 3194 self._parse_var(any_token=True) 3195 if self._match_text_seq("USING", "INDEX", "TABLESPACE") 3196 else None 3197 ) 3198 where = self._parse_where() 3199 3200 on = self._parse_field() if self._match(TokenType.ON) else None 3201 3202 return self.expression( 3203 exp.IndexParameters, 3204 using=using, 3205 columns=columns, 3206 include=include, 3207 partition_by=partition_by, 3208 where=where, 3209 with_storage=with_storage, 3210 tablespace=tablespace, 3211 on=on, 3212 ) 3213 3214 def _parse_index( 3215 self, index: t.Optional[exp.Expression] = None, anonymous: bool = False 3216 ) -> t.Optional[exp.Index]: 3217 if index or anonymous: 3218 unique = None 3219 primary = None 3220 amp = None 3221 3222 self._match(TokenType.ON) 3223 self._match(TokenType.TABLE) # hive 3224 table = self._parse_table_parts(schema=True) 3225 else: 3226 unique = self._match(TokenType.UNIQUE) 3227 primary = self._match_text_seq("PRIMARY") 3228 amp = self._match_text_seq("AMP") 3229 3230 if not self._match(TokenType.INDEX): 3231 return None 3232 3233 index = self._parse_id_var() 3234 table = None 3235 3236 params = self._parse_index_params() 3237 3238 return self.expression( 3239 exp.Index, 3240 this=index, 3241 table=table, 3242 unique=unique, 3243 primary=primary, 3244 amp=amp, 3245 params=params, 3246 ) 3247 3248 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 3249 hints: t.List[exp.Expression] = [] 3250 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 3251 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 3252 hints.append( 3253 self.expression( 3254 exp.WithTableHint, 3255 expressions=self._parse_csv( 3256 lambda: self._parse_function() or self._parse_var(any_token=True) 3257 ), 3258 ) 3259 ) 3260 self._match_r_paren() 3261 else: 3262 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 3263 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 3264 hint = exp.IndexTableHint(this=self._prev.text.upper()) 3265 3266 self._match_set((TokenType.INDEX, TokenType.KEY)) 3267 if self._match(TokenType.FOR): 3268 hint.set("target", self._advance_any() and self._prev.text.upper()) 3269 3270 hint.set("expressions", self._parse_wrapped_id_vars()) 3271 hints.append(hint) 3272 3273 return hints or None 3274 3275 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 3276 return ( 3277 (not schema and self._parse_function(optional_parens=False)) 3278 or self._parse_id_var(any_token=False) 3279 or self._parse_string_as_identifier() 3280 or self._parse_placeholder() 3281 ) 3282 3283 def _parse_table_parts( 3284 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 3285 ) -> exp.Table: 3286 catalog = None 3287 db = None 3288 table: t.Optional[exp.Expression | str] = self._parse_table_part(schema=schema) 3289 3290 while self._match(TokenType.DOT): 3291 if catalog: 3292 # This allows nesting the table in arbitrarily many dot expressions if needed 3293 table = self.expression( 3294 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 3295 ) 3296 else: 3297 catalog = db 3298 db = table 3299 # "" used for tsql FROM a..b case 3300 table = self._parse_table_part(schema=schema) or "" 3301 3302 if ( 3303 wildcard 3304 and self._is_connected() 3305 and (isinstance(table, exp.Identifier) or not table) 3306 and self._match(TokenType.STAR) 3307 ): 3308 if isinstance(table, exp.Identifier): 3309 table.args["this"] += "*" 3310 else: 3311 table = exp.Identifier(this="*") 3312 3313 # We bubble up comments from the Identifier to the Table 3314 comments = table.pop_comments() if isinstance(table, exp.Expression) else None 3315 3316 if is_db_reference: 3317 catalog = db 3318 db = table 3319 table = None 3320 3321 if not table and not is_db_reference: 3322 self.raise_error(f"Expected table name but got {self._curr}") 3323 if not db and is_db_reference: 3324 self.raise_error(f"Expected database name but got {self._curr}") 3325 3326 return self.expression( 3327 exp.Table, 3328 comments=comments, 3329 this=table, 3330 db=db, 3331 catalog=catalog, 3332 pivots=self._parse_pivots(), 3333 ) 3334 3335 def _parse_table( 3336 self, 3337 schema: bool = False, 3338 joins: bool = False, 3339 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 3340 parse_bracket: bool = False, 3341 is_db_reference: bool = False, 3342 parse_partition: bool = False, 3343 ) -> t.Optional[exp.Expression]: 3344 lateral = self._parse_lateral() 3345 if lateral: 3346 return lateral 3347 3348 unnest = self._parse_unnest() 3349 if unnest: 3350 return unnest 3351 3352 values = self._parse_derived_table_values() 3353 if values: 3354 return values 3355 3356 subquery = self._parse_select(table=True) 3357 if subquery: 3358 if not subquery.args.get("pivots"): 3359 subquery.set("pivots", self._parse_pivots()) 3360 return subquery 3361 3362 bracket = parse_bracket and self._parse_bracket(None) 3363 bracket = self.expression(exp.Table, this=bracket) if bracket else None 3364 3365 only = self._match(TokenType.ONLY) 3366 3367 this = t.cast( 3368 exp.Expression, 3369 bracket 3370 or self._parse_bracket( 3371 self._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 3372 ), 3373 ) 3374 3375 if only: 3376 this.set("only", only) 3377 3378 # Postgres supports a wildcard (table) suffix operator, which is a no-op in this context 3379 self._match_text_seq("*") 3380 3381 parse_partition = parse_partition or self.SUPPORTS_PARTITION_SELECTION 3382 if parse_partition and self._match(TokenType.PARTITION, advance=False): 3383 this.set("partition", self._parse_partition()) 3384 3385 if schema: 3386 return self._parse_schema(this=this) 3387 3388 version = self._parse_version() 3389 3390 if version: 3391 this.set("version", version) 3392 3393 if self.dialect.ALIAS_POST_TABLESAMPLE: 3394 table_sample = self._parse_table_sample() 3395 3396 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3397 if alias: 3398 this.set("alias", alias) 3399 3400 if isinstance(this, exp.Table) and self._match_text_seq("AT"): 3401 return self.expression( 3402 exp.AtIndex, this=this.to_column(copy=False), expression=self._parse_id_var() 3403 ) 3404 3405 this.set("hints", self._parse_table_hints()) 3406 3407 if not this.args.get("pivots"): 3408 this.set("pivots", self._parse_pivots()) 3409 3410 if not self.dialect.ALIAS_POST_TABLESAMPLE: 3411 table_sample = self._parse_table_sample() 3412 3413 if table_sample: 3414 table_sample.set("this", this) 3415 this = table_sample 3416 3417 if joins: 3418 for join in self._parse_joins(): 3419 this.append("joins", join) 3420 3421 if self._match_pair(TokenType.WITH, TokenType.ORDINALITY): 3422 this.set("ordinality", True) 3423 this.set("alias", self._parse_table_alias()) 3424 3425 return this 3426 3427 def _parse_version(self) -> t.Optional[exp.Version]: 3428 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 3429 this = "TIMESTAMP" 3430 elif self._match(TokenType.VERSION_SNAPSHOT): 3431 this = "VERSION" 3432 else: 3433 return None 3434 3435 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 3436 kind = self._prev.text.upper() 3437 start = self._parse_bitwise() 3438 self._match_texts(("TO", "AND")) 3439 end = self._parse_bitwise() 3440 expression: t.Optional[exp.Expression] = self.expression( 3441 exp.Tuple, expressions=[start, end] 3442 ) 3443 elif self._match_text_seq("CONTAINED", "IN"): 3444 kind = "CONTAINED IN" 3445 expression = self.expression( 3446 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 3447 ) 3448 elif self._match(TokenType.ALL): 3449 kind = "ALL" 3450 expression = None 3451 else: 3452 self._match_text_seq("AS", "OF") 3453 kind = "AS OF" 3454 expression = self._parse_type() 3455 3456 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 3457 3458 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 3459 if not self._match(TokenType.UNNEST): 3460 return None 3461 3462 expressions = self._parse_wrapped_csv(self._parse_equality) 3463 offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 3464 3465 alias = self._parse_table_alias() if with_alias else None 3466 3467 if alias: 3468 if self.dialect.UNNEST_COLUMN_ONLY: 3469 if alias.args.get("columns"): 3470 self.raise_error("Unexpected extra column alias in unnest.") 3471 3472 alias.set("columns", [alias.this]) 3473 alias.set("this", None) 3474 3475 columns = alias.args.get("columns") or [] 3476 if offset and len(expressions) < len(columns): 3477 offset = columns.pop() 3478 3479 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 3480 self._match(TokenType.ALIAS) 3481 offset = self._parse_id_var( 3482 any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS 3483 ) or exp.to_identifier("offset") 3484 3485 return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset) 3486 3487 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 3488 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 3489 if not is_derived and not self._match_text_seq("VALUES"): 3490 return None 3491 3492 expressions = self._parse_csv(self._parse_value) 3493 alias = self._parse_table_alias() 3494 3495 if is_derived: 3496 self._match_r_paren() 3497 3498 return self.expression( 3499 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 3500 ) 3501 3502 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 3503 if not self._match(TokenType.TABLE_SAMPLE) and not ( 3504 as_modifier and self._match_text_seq("USING", "SAMPLE") 3505 ): 3506 return None 3507 3508 bucket_numerator = None 3509 bucket_denominator = None 3510 bucket_field = None 3511 percent = None 3512 size = None 3513 seed = None 3514 3515 method = self._parse_var(tokens=(TokenType.ROW,), upper=True) 3516 matched_l_paren = self._match(TokenType.L_PAREN) 3517 3518 if self.TABLESAMPLE_CSV: 3519 num = None 3520 expressions = self._parse_csv(self._parse_primary) 3521 else: 3522 expressions = None 3523 num = ( 3524 self._parse_factor() 3525 if self._match(TokenType.NUMBER, advance=False) 3526 else self._parse_primary() or self._parse_placeholder() 3527 ) 3528 3529 if self._match_text_seq("BUCKET"): 3530 bucket_numerator = self._parse_number() 3531 self._match_text_seq("OUT", "OF") 3532 bucket_denominator = bucket_denominator = self._parse_number() 3533 self._match(TokenType.ON) 3534 bucket_field = self._parse_field() 3535 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 3536 percent = num 3537 elif self._match(TokenType.ROWS) or not self.dialect.TABLESAMPLE_SIZE_IS_PERCENT: 3538 size = num 3539 else: 3540 percent = num 3541 3542 if matched_l_paren: 3543 self._match_r_paren() 3544 3545 if self._match(TokenType.L_PAREN): 3546 method = self._parse_var(upper=True) 3547 seed = self._match(TokenType.COMMA) and self._parse_number() 3548 self._match_r_paren() 3549 elif self._match_texts(("SEED", "REPEATABLE")): 3550 seed = self._parse_wrapped(self._parse_number) 3551 3552 if not method and self.DEFAULT_SAMPLING_METHOD: 3553 method = exp.var(self.DEFAULT_SAMPLING_METHOD) 3554 3555 return self.expression( 3556 exp.TableSample, 3557 expressions=expressions, 3558 method=method, 3559 bucket_numerator=bucket_numerator, 3560 bucket_denominator=bucket_denominator, 3561 bucket_field=bucket_field, 3562 percent=percent, 3563 size=size, 3564 seed=seed, 3565 ) 3566 3567 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 3568 return list(iter(self._parse_pivot, None)) or None 3569 3570 def _parse_joins(self) -> t.Iterator[exp.Join]: 3571 return iter(self._parse_join, None) 3572 3573 # https://duckdb.org/docs/sql/statements/pivot 3574 def _parse_simplified_pivot(self) -> exp.Pivot: 3575 def _parse_on() -> t.Optional[exp.Expression]: 3576 this = self._parse_bitwise() 3577 return self._parse_in(this) if self._match(TokenType.IN) else this 3578 3579 this = self._parse_table() 3580 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 3581 using = self._match(TokenType.USING) and self._parse_csv( 3582 lambda: self._parse_alias(self._parse_function()) 3583 ) 3584 group = self._parse_group() 3585 return self.expression( 3586 exp.Pivot, this=this, expressions=expressions, using=using, group=group 3587 ) 3588 3589 def _parse_pivot_in(self) -> exp.In: 3590 def _parse_aliased_expression() -> t.Optional[exp.Expression]: 3591 this = self._parse_assignment() 3592 3593 self._match(TokenType.ALIAS) 3594 alias = self._parse_field() 3595 if alias: 3596 return self.expression(exp.PivotAlias, this=this, alias=alias) 3597 3598 return this 3599 3600 value = self._parse_column() 3601 3602 if not self._match_pair(TokenType.IN, TokenType.L_PAREN): 3603 self.raise_error("Expecting IN (") 3604 3605 aliased_expressions = self._parse_csv(_parse_aliased_expression) 3606 3607 self._match_r_paren() 3608 return self.expression(exp.In, this=value, expressions=aliased_expressions) 3609 3610 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 3611 index = self._index 3612 include_nulls = None 3613 3614 if self._match(TokenType.PIVOT): 3615 unpivot = False 3616 elif self._match(TokenType.UNPIVOT): 3617 unpivot = True 3618 3619 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 3620 if self._match_text_seq("INCLUDE", "NULLS"): 3621 include_nulls = True 3622 elif self._match_text_seq("EXCLUDE", "NULLS"): 3623 include_nulls = False 3624 else: 3625 return None 3626 3627 expressions = [] 3628 3629 if not self._match(TokenType.L_PAREN): 3630 self._retreat(index) 3631 return None 3632 3633 if unpivot: 3634 expressions = self._parse_csv(self._parse_column) 3635 else: 3636 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 3637 3638 if not expressions: 3639 self.raise_error("Failed to parse PIVOT's aggregation list") 3640 3641 if not self._match(TokenType.FOR): 3642 self.raise_error("Expecting FOR") 3643 3644 field = self._parse_pivot_in() 3645 3646 self._match_r_paren() 3647 3648 pivot = self.expression( 3649 exp.Pivot, 3650 expressions=expressions, 3651 field=field, 3652 unpivot=unpivot, 3653 include_nulls=include_nulls, 3654 ) 3655 3656 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 3657 pivot.set("alias", self._parse_table_alias()) 3658 3659 if not unpivot: 3660 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 3661 3662 columns: t.List[exp.Expression] = [] 3663 for fld in pivot.args["field"].expressions: 3664 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 3665 for name in names: 3666 if self.PREFIXED_PIVOT_COLUMNS: 3667 name = f"{name}_{field_name}" if name else field_name 3668 else: 3669 name = f"{field_name}_{name}" if name else field_name 3670 3671 columns.append(exp.to_identifier(name)) 3672 3673 pivot.set("columns", columns) 3674 3675 return pivot 3676 3677 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 3678 return [agg.alias for agg in aggregations] 3679 3680 def _parse_prewhere(self, skip_where_token: bool = False) -> t.Optional[exp.PreWhere]: 3681 if not skip_where_token and not self._match(TokenType.PREWHERE): 3682 return None 3683 3684 return self.expression( 3685 exp.PreWhere, comments=self._prev_comments, this=self._parse_assignment() 3686 ) 3687 3688 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 3689 if not skip_where_token and not self._match(TokenType.WHERE): 3690 return None 3691 3692 return self.expression( 3693 exp.Where, comments=self._prev_comments, this=self._parse_assignment() 3694 ) 3695 3696 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 3697 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 3698 return None 3699 3700 elements: t.Dict[str, t.Any] = defaultdict(list) 3701 3702 if self._match(TokenType.ALL): 3703 elements["all"] = True 3704 elif self._match(TokenType.DISTINCT): 3705 elements["all"] = False 3706 3707 while True: 3708 expressions = self._parse_csv( 3709 lambda: None 3710 if self._match(TokenType.ROLLUP, advance=False) 3711 else self._parse_assignment() 3712 ) 3713 if expressions: 3714 elements["expressions"].extend(expressions) 3715 3716 grouping_sets = self._parse_grouping_sets() 3717 if grouping_sets: 3718 elements["grouping_sets"].extend(grouping_sets) 3719 3720 rollup = None 3721 cube = None 3722 totals = None 3723 3724 index = self._index 3725 with_ = self._match(TokenType.WITH) 3726 if self._match(TokenType.ROLLUP): 3727 rollup = with_ or self._parse_wrapped_csv(self._parse_column) 3728 elements["rollup"].extend(ensure_list(rollup)) 3729 3730 if self._match(TokenType.CUBE): 3731 cube = with_ or self._parse_wrapped_csv(self._parse_column) 3732 elements["cube"].extend(ensure_list(cube)) 3733 3734 if self._match_text_seq("TOTALS"): 3735 totals = True 3736 elements["totals"] = True # type: ignore 3737 3738 if not (grouping_sets or rollup or cube or totals): 3739 if with_: 3740 self._retreat(index) 3741 break 3742 3743 return self.expression(exp.Group, **elements) # type: ignore 3744 3745 def _parse_grouping_sets(self) -> t.Optional[t.List[exp.Expression]]: 3746 if not self._match(TokenType.GROUPING_SETS): 3747 return None 3748 3749 return self._parse_wrapped_csv(self._parse_grouping_set) 3750 3751 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 3752 if self._match(TokenType.L_PAREN): 3753 grouping_set = self._parse_csv(self._parse_column) 3754 self._match_r_paren() 3755 return self.expression(exp.Tuple, expressions=grouping_set) 3756 3757 return self._parse_column() 3758 3759 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 3760 if not skip_having_token and not self._match(TokenType.HAVING): 3761 return None 3762 return self.expression(exp.Having, this=self._parse_assignment()) 3763 3764 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 3765 if not self._match(TokenType.QUALIFY): 3766 return None 3767 return self.expression(exp.Qualify, this=self._parse_assignment()) 3768 3769 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 3770 if skip_start_token: 3771 start = None 3772 elif self._match(TokenType.START_WITH): 3773 start = self._parse_assignment() 3774 else: 3775 return None 3776 3777 self._match(TokenType.CONNECT_BY) 3778 nocycle = self._match_text_seq("NOCYCLE") 3779 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 3780 exp.Prior, this=self._parse_bitwise() 3781 ) 3782 connect = self._parse_assignment() 3783 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 3784 3785 if not start and self._match(TokenType.START_WITH): 3786 start = self._parse_assignment() 3787 3788 return self.expression(exp.Connect, start=start, connect=connect, nocycle=nocycle) 3789 3790 def _parse_name_as_expression(self) -> exp.Alias: 3791 return self.expression( 3792 exp.Alias, 3793 alias=self._parse_id_var(any_token=True), 3794 this=self._match(TokenType.ALIAS) and self._parse_assignment(), 3795 ) 3796 3797 def _parse_interpolate(self) -> t.Optional[t.List[exp.Expression]]: 3798 if self._match_text_seq("INTERPOLATE"): 3799 return self._parse_wrapped_csv(self._parse_name_as_expression) 3800 return None 3801 3802 def _parse_order( 3803 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 3804 ) -> t.Optional[exp.Expression]: 3805 siblings = None 3806 if not skip_order_token and not self._match(TokenType.ORDER_BY): 3807 if not self._match(TokenType.ORDER_SIBLINGS_BY): 3808 return this 3809 3810 siblings = True 3811 3812 return self.expression( 3813 exp.Order, 3814 this=this, 3815 expressions=self._parse_csv(self._parse_ordered), 3816 interpolate=self._parse_interpolate(), 3817 siblings=siblings, 3818 ) 3819 3820 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 3821 if not self._match(token): 3822 return None 3823 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 3824 3825 def _parse_ordered( 3826 self, parse_method: t.Optional[t.Callable] = None 3827 ) -> t.Optional[exp.Ordered]: 3828 this = parse_method() if parse_method else self._parse_assignment() 3829 if not this: 3830 return None 3831 3832 asc = self._match(TokenType.ASC) 3833 desc = self._match(TokenType.DESC) or (asc and False) 3834 3835 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 3836 is_nulls_last = self._match_text_seq("NULLS", "LAST") 3837 3838 nulls_first = is_nulls_first or False 3839 explicitly_null_ordered = is_nulls_first or is_nulls_last 3840 3841 if ( 3842 not explicitly_null_ordered 3843 and ( 3844 (not desc and self.dialect.NULL_ORDERING == "nulls_are_small") 3845 or (desc and self.dialect.NULL_ORDERING != "nulls_are_small") 3846 ) 3847 and self.dialect.NULL_ORDERING != "nulls_are_last" 3848 ): 3849 nulls_first = True 3850 3851 if self._match_text_seq("WITH", "FILL"): 3852 with_fill = self.expression( 3853 exp.WithFill, 3854 **{ # type: ignore 3855 "from": self._match(TokenType.FROM) and self._parse_bitwise(), 3856 "to": self._match_text_seq("TO") and self._parse_bitwise(), 3857 "step": self._match_text_seq("STEP") and self._parse_bitwise(), 3858 }, 3859 ) 3860 else: 3861 with_fill = None 3862 3863 return self.expression( 3864 exp.Ordered, this=this, desc=desc, nulls_first=nulls_first, with_fill=with_fill 3865 ) 3866 3867 def _parse_limit( 3868 self, 3869 this: t.Optional[exp.Expression] = None, 3870 top: bool = False, 3871 skip_limit_token: bool = False, 3872 ) -> t.Optional[exp.Expression]: 3873 if skip_limit_token or self._match(TokenType.TOP if top else TokenType.LIMIT): 3874 comments = self._prev_comments 3875 if top: 3876 limit_paren = self._match(TokenType.L_PAREN) 3877 expression = self._parse_term() if limit_paren else self._parse_number() 3878 3879 if limit_paren: 3880 self._match_r_paren() 3881 else: 3882 expression = self._parse_term() 3883 3884 if self._match(TokenType.COMMA): 3885 offset = expression 3886 expression = self._parse_term() 3887 else: 3888 offset = None 3889 3890 limit_exp = self.expression( 3891 exp.Limit, 3892 this=this, 3893 expression=expression, 3894 offset=offset, 3895 comments=comments, 3896 expressions=self._parse_limit_by(), 3897 ) 3898 3899 return limit_exp 3900 3901 if self._match(TokenType.FETCH): 3902 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 3903 direction = self._prev.text.upper() if direction else "FIRST" 3904 3905 count = self._parse_field(tokens=self.FETCH_TOKENS) 3906 percent = self._match(TokenType.PERCENT) 3907 3908 self._match_set((TokenType.ROW, TokenType.ROWS)) 3909 3910 only = self._match_text_seq("ONLY") 3911 with_ties = self._match_text_seq("WITH", "TIES") 3912 3913 if only and with_ties: 3914 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 3915 3916 return self.expression( 3917 exp.Fetch, 3918 direction=direction, 3919 count=count, 3920 percent=percent, 3921 with_ties=with_ties, 3922 ) 3923 3924 return this 3925 3926 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3927 if not self._match(TokenType.OFFSET): 3928 return this 3929 3930 count = self._parse_term() 3931 self._match_set((TokenType.ROW, TokenType.ROWS)) 3932 3933 return self.expression( 3934 exp.Offset, this=this, expression=count, expressions=self._parse_limit_by() 3935 ) 3936 3937 def _parse_limit_by(self) -> t.Optional[t.List[exp.Expression]]: 3938 return self._match_text_seq("BY") and self._parse_csv(self._parse_bitwise) 3939 3940 def _parse_locks(self) -> t.List[exp.Lock]: 3941 locks = [] 3942 while True: 3943 if self._match_text_seq("FOR", "UPDATE"): 3944 update = True 3945 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 3946 "LOCK", "IN", "SHARE", "MODE" 3947 ): 3948 update = False 3949 else: 3950 break 3951 3952 expressions = None 3953 if self._match_text_seq("OF"): 3954 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 3955 3956 wait: t.Optional[bool | exp.Expression] = None 3957 if self._match_text_seq("NOWAIT"): 3958 wait = True 3959 elif self._match_text_seq("WAIT"): 3960 wait = self._parse_primary() 3961 elif self._match_text_seq("SKIP", "LOCKED"): 3962 wait = False 3963 3964 locks.append( 3965 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 3966 ) 3967 3968 return locks 3969 3970 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3971 while this and self._match_set(self.SET_OPERATIONS): 3972 token_type = self._prev.token_type 3973 3974 if token_type == TokenType.UNION: 3975 operation: t.Type[exp.SetOperation] = exp.Union 3976 elif token_type == TokenType.EXCEPT: 3977 operation = exp.Except 3978 else: 3979 operation = exp.Intersect 3980 3981 comments = self._prev.comments 3982 distinct = self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL) 3983 by_name = self._match_text_seq("BY", "NAME") 3984 expression = self._parse_select(nested=True, parse_set_operation=False) 3985 3986 this = self.expression( 3987 operation, 3988 comments=comments, 3989 this=this, 3990 distinct=distinct, 3991 by_name=by_name, 3992 expression=expression, 3993 ) 3994 3995 if isinstance(this, exp.SetOperation) and self.MODIFIERS_ATTACHED_TO_SET_OP: 3996 expression = this.expression 3997 3998 if expression: 3999 for arg in self.SET_OP_MODIFIERS: 4000 expr = expression.args.get(arg) 4001 if expr: 4002 this.set(arg, expr.pop()) 4003 4004 return this 4005 4006 def _parse_expression(self) -> t.Optional[exp.Expression]: 4007 return self._parse_alias(self._parse_assignment()) 4008 4009 def _parse_assignment(self) -> t.Optional[exp.Expression]: 4010 this = self._parse_disjunction() 4011 4012 while self._match_set(self.ASSIGNMENT): 4013 this = self.expression( 4014 self.ASSIGNMENT[self._prev.token_type], 4015 this=this, 4016 comments=self._prev_comments, 4017 expression=self._parse_assignment(), 4018 ) 4019 4020 return this 4021 4022 def _parse_disjunction(self) -> t.Optional[exp.Expression]: 4023 return self._parse_tokens(self._parse_conjunction, self.DISJUNCTION) 4024 4025 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 4026 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 4027 4028 def _parse_equality(self) -> t.Optional[exp.Expression]: 4029 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 4030 4031 def _parse_comparison(self) -> t.Optional[exp.Expression]: 4032 return self._parse_tokens(self._parse_range, self.COMPARISON) 4033 4034 def _parse_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4035 this = this or self._parse_bitwise() 4036 negate = self._match(TokenType.NOT) 4037 4038 if self._match_set(self.RANGE_PARSERS): 4039 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 4040 if not expression: 4041 return this 4042 4043 this = expression 4044 elif self._match(TokenType.ISNULL): 4045 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4046 4047 # Postgres supports ISNULL and NOTNULL for conditions. 4048 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 4049 if self._match(TokenType.NOTNULL): 4050 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4051 this = self.expression(exp.Not, this=this) 4052 4053 if negate: 4054 this = self.expression(exp.Not, this=this) 4055 4056 if self._match(TokenType.IS): 4057 this = self._parse_is(this) 4058 4059 return this 4060 4061 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4062 index = self._index - 1 4063 negate = self._match(TokenType.NOT) 4064 4065 if self._match_text_seq("DISTINCT", "FROM"): 4066 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 4067 return self.expression(klass, this=this, expression=self._parse_bitwise()) 4068 4069 expression = self._parse_null() or self._parse_boolean() 4070 if not expression: 4071 self._retreat(index) 4072 return None 4073 4074 this = self.expression(exp.Is, this=this, expression=expression) 4075 return self.expression(exp.Not, this=this) if negate else this 4076 4077 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 4078 unnest = self._parse_unnest(with_alias=False) 4079 if unnest: 4080 this = self.expression(exp.In, this=this, unnest=unnest) 4081 elif self._match_set((TokenType.L_PAREN, TokenType.L_BRACKET)): 4082 matched_l_paren = self._prev.token_type == TokenType.L_PAREN 4083 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 4084 4085 if len(expressions) == 1 and isinstance(expressions[0], exp.Query): 4086 this = self.expression(exp.In, this=this, query=expressions[0].subquery(copy=False)) 4087 else: 4088 this = self.expression(exp.In, this=this, expressions=expressions) 4089 4090 if matched_l_paren: 4091 self._match_r_paren(this) 4092 elif not self._match(TokenType.R_BRACKET, expression=this): 4093 self.raise_error("Expecting ]") 4094 else: 4095 this = self.expression(exp.In, this=this, field=self._parse_field()) 4096 4097 return this 4098 4099 def _parse_between(self, this: t.Optional[exp.Expression]) -> exp.Between: 4100 low = self._parse_bitwise() 4101 self._match(TokenType.AND) 4102 high = self._parse_bitwise() 4103 return self.expression(exp.Between, this=this, low=low, high=high) 4104 4105 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4106 if not self._match(TokenType.ESCAPE): 4107 return this 4108 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 4109 4110 def _parse_interval(self, match_interval: bool = True) -> t.Optional[exp.Add | exp.Interval]: 4111 index = self._index 4112 4113 if not self._match(TokenType.INTERVAL) and match_interval: 4114 return None 4115 4116 if self._match(TokenType.STRING, advance=False): 4117 this = self._parse_primary() 4118 else: 4119 this = self._parse_term() 4120 4121 if not this or ( 4122 isinstance(this, exp.Column) 4123 and not this.table 4124 and not this.this.quoted 4125 and this.name.upper() == "IS" 4126 ): 4127 self._retreat(index) 4128 return None 4129 4130 unit = self._parse_function() or ( 4131 not self._match(TokenType.ALIAS, advance=False) 4132 and self._parse_var(any_token=True, upper=True) 4133 ) 4134 4135 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 4136 # each INTERVAL expression into this canonical form so it's easy to transpile 4137 if this and this.is_number: 4138 this = exp.Literal.string(this.to_py()) 4139 elif this and this.is_string: 4140 parts = exp.INTERVAL_STRING_RE.findall(this.name) 4141 if len(parts) == 1: 4142 if unit: 4143 # Unconsume the eagerly-parsed unit, since the real unit was part of the string 4144 self._retreat(self._index - 1) 4145 4146 this = exp.Literal.string(parts[0][0]) 4147 unit = self.expression(exp.Var, this=parts[0][1].upper()) 4148 4149 if self.INTERVAL_SPANS and self._match_text_seq("TO"): 4150 unit = self.expression( 4151 exp.IntervalSpan, this=unit, expression=self._parse_var(any_token=True, upper=True) 4152 ) 4153 4154 interval = self.expression(exp.Interval, this=this, unit=unit) 4155 4156 index = self._index 4157 self._match(TokenType.PLUS) 4158 4159 # Convert INTERVAL 'val_1' unit_1 [+] ... [+] 'val_n' unit_n into a sum of intervals 4160 if self._match_set((TokenType.STRING, TokenType.NUMBER), advance=False): 4161 return self.expression( 4162 exp.Add, this=interval, expression=self._parse_interval(match_interval=False) 4163 ) 4164 4165 self._retreat(index) 4166 return interval 4167 4168 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 4169 this = self._parse_term() 4170 4171 while True: 4172 if self._match_set(self.BITWISE): 4173 this = self.expression( 4174 self.BITWISE[self._prev.token_type], 4175 this=this, 4176 expression=self._parse_term(), 4177 ) 4178 elif self.dialect.DPIPE_IS_STRING_CONCAT and self._match(TokenType.DPIPE): 4179 this = self.expression( 4180 exp.DPipe, 4181 this=this, 4182 expression=self._parse_term(), 4183 safe=not self.dialect.STRICT_STRING_CONCAT, 4184 ) 4185 elif self._match(TokenType.DQMARK): 4186 this = self.expression(exp.Coalesce, this=this, expressions=self._parse_term()) 4187 elif self._match_pair(TokenType.LT, TokenType.LT): 4188 this = self.expression( 4189 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 4190 ) 4191 elif self._match_pair(TokenType.GT, TokenType.GT): 4192 this = self.expression( 4193 exp.BitwiseRightShift, this=this, expression=self._parse_term() 4194 ) 4195 else: 4196 break 4197 4198 return this 4199 4200 def _parse_term(self) -> t.Optional[exp.Expression]: 4201 return self._parse_tokens(self._parse_factor, self.TERM) 4202 4203 def _parse_factor(self) -> t.Optional[exp.Expression]: 4204 parse_method = self._parse_exponent if self.EXPONENT else self._parse_unary 4205 this = parse_method() 4206 4207 while self._match_set(self.FACTOR): 4208 klass = self.FACTOR[self._prev.token_type] 4209 comments = self._prev_comments 4210 expression = parse_method() 4211 4212 if not expression and klass is exp.IntDiv and self._prev.text.isalpha(): 4213 self._retreat(self._index - 1) 4214 return this 4215 4216 this = self.expression(klass, this=this, comments=comments, expression=expression) 4217 4218 if isinstance(this, exp.Div): 4219 this.args["typed"] = self.dialect.TYPED_DIVISION 4220 this.args["safe"] = self.dialect.SAFE_DIVISION 4221 4222 return this 4223 4224 def _parse_exponent(self) -> t.Optional[exp.Expression]: 4225 return self._parse_tokens(self._parse_unary, self.EXPONENT) 4226 4227 def _parse_unary(self) -> t.Optional[exp.Expression]: 4228 if self._match_set(self.UNARY_PARSERS): 4229 return self.UNARY_PARSERS[self._prev.token_type](self) 4230 return self._parse_at_time_zone(self._parse_type()) 4231 4232 def _parse_type( 4233 self, parse_interval: bool = True, fallback_to_identifier: bool = False 4234 ) -> t.Optional[exp.Expression]: 4235 interval = parse_interval and self._parse_interval() 4236 if interval: 4237 return interval 4238 4239 index = self._index 4240 data_type = self._parse_types(check_func=True, allow_identifiers=False) 4241 4242 if data_type: 4243 index2 = self._index 4244 this = self._parse_primary() 4245 4246 if isinstance(this, exp.Literal): 4247 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 4248 if parser: 4249 return parser(self, this, data_type) 4250 4251 return self.expression(exp.Cast, this=this, to=data_type) 4252 4253 # The expressions arg gets set by the parser when we have something like DECIMAL(38, 0) 4254 # in the input SQL. In that case, we'll produce these tokens: DECIMAL ( 38 , 0 ) 4255 # 4256 # If the index difference here is greater than 1, that means the parser itself must have 4257 # consumed additional tokens such as the DECIMAL scale and precision in the above example. 4258 # 4259 # If it's not greater than 1, then it must be 1, because we've consumed at least the type 4260 # keyword, meaning that the expressions arg of the DataType must have gotten set by a 4261 # callable in the TYPE_CONVERTERS mapping. For example, Snowflake converts DECIMAL to 4262 # DECIMAL(38, 0)) in order to facilitate the data type's transpilation. 4263 # 4264 # In these cases, we don't really want to return the converted type, but instead retreat 4265 # and try to parse a Column or Identifier in the section below. 4266 if data_type.expressions and index2 - index > 1: 4267 self._retreat(index2) 4268 return self._parse_column_ops(data_type) 4269 4270 self._retreat(index) 4271 4272 if fallback_to_identifier: 4273 return self._parse_id_var() 4274 4275 this = self._parse_column() 4276 return this and self._parse_column_ops(this) 4277 4278 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 4279 this = self._parse_type() 4280 if not this: 4281 return None 4282 4283 if isinstance(this, exp.Column) and not this.table: 4284 this = exp.var(this.name.upper()) 4285 4286 return self.expression( 4287 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 4288 ) 4289 4290 def _parse_types( 4291 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 4292 ) -> t.Optional[exp.Expression]: 4293 index = self._index 4294 4295 this: t.Optional[exp.Expression] = None 4296 prefix = self._match_text_seq("SYSUDTLIB", ".") 4297 4298 if not self._match_set(self.TYPE_TOKENS): 4299 identifier = allow_identifiers and self._parse_id_var( 4300 any_token=False, tokens=(TokenType.VAR,) 4301 ) 4302 if isinstance(identifier, exp.Identifier): 4303 tokens = self.dialect.tokenize(identifier.sql(dialect=self.dialect)) 4304 4305 if len(tokens) != 1: 4306 self.raise_error("Unexpected identifier", self._prev) 4307 4308 if tokens[0].token_type in self.TYPE_TOKENS: 4309 self._prev = tokens[0] 4310 elif self.dialect.SUPPORTS_USER_DEFINED_TYPES: 4311 type_name = identifier.name 4312 4313 while self._match(TokenType.DOT): 4314 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 4315 4316 this = exp.DataType.build(type_name, udt=True) 4317 else: 4318 self._retreat(self._index - 1) 4319 return None 4320 else: 4321 return None 4322 4323 type_token = self._prev.token_type 4324 4325 if type_token == TokenType.PSEUDO_TYPE: 4326 return self.expression(exp.PseudoType, this=self._prev.text.upper()) 4327 4328 if type_token == TokenType.OBJECT_IDENTIFIER: 4329 return self.expression(exp.ObjectIdentifier, this=self._prev.text.upper()) 4330 4331 # https://materialize.com/docs/sql/types/map/ 4332 if type_token == TokenType.MAP and self._match(TokenType.L_BRACKET): 4333 key_type = self._parse_types( 4334 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4335 ) 4336 if not self._match(TokenType.FARROW): 4337 self._retreat(index) 4338 return None 4339 4340 value_type = self._parse_types( 4341 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4342 ) 4343 if not self._match(TokenType.R_BRACKET): 4344 self._retreat(index) 4345 return None 4346 4347 return exp.DataType( 4348 this=exp.DataType.Type.MAP, 4349 expressions=[key_type, value_type], 4350 nested=True, 4351 prefix=prefix, 4352 ) 4353 4354 nested = type_token in self.NESTED_TYPE_TOKENS 4355 is_struct = type_token in self.STRUCT_TYPE_TOKENS 4356 is_aggregate = type_token in self.AGGREGATE_TYPE_TOKENS 4357 expressions = None 4358 maybe_func = False 4359 4360 if self._match(TokenType.L_PAREN): 4361 if is_struct: 4362 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 4363 elif nested: 4364 expressions = self._parse_csv( 4365 lambda: self._parse_types( 4366 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4367 ) 4368 ) 4369 elif type_token in self.ENUM_TYPE_TOKENS: 4370 expressions = self._parse_csv(self._parse_equality) 4371 elif is_aggregate: 4372 func_or_ident = self._parse_function(anonymous=True) or self._parse_id_var( 4373 any_token=False, tokens=(TokenType.VAR,) 4374 ) 4375 if not func_or_ident or not self._match(TokenType.COMMA): 4376 return None 4377 expressions = self._parse_csv( 4378 lambda: self._parse_types( 4379 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4380 ) 4381 ) 4382 expressions.insert(0, func_or_ident) 4383 else: 4384 expressions = self._parse_csv(self._parse_type_size) 4385 4386 if not expressions or not self._match(TokenType.R_PAREN): 4387 self._retreat(index) 4388 return None 4389 4390 maybe_func = True 4391 4392 values: t.Optional[t.List[exp.Expression]] = None 4393 4394 if nested and self._match(TokenType.LT): 4395 if is_struct: 4396 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 4397 else: 4398 expressions = self._parse_csv( 4399 lambda: self._parse_types( 4400 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4401 ) 4402 ) 4403 4404 if not self._match(TokenType.GT): 4405 self.raise_error("Expecting >") 4406 4407 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 4408 values = self._parse_csv(self._parse_assignment) 4409 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 4410 4411 if type_token in self.TIMESTAMPS: 4412 if self._match_text_seq("WITH", "TIME", "ZONE"): 4413 maybe_func = False 4414 tz_type = ( 4415 exp.DataType.Type.TIMETZ 4416 if type_token in self.TIMES 4417 else exp.DataType.Type.TIMESTAMPTZ 4418 ) 4419 this = exp.DataType(this=tz_type, expressions=expressions) 4420 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 4421 maybe_func = False 4422 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 4423 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 4424 maybe_func = False 4425 elif type_token == TokenType.INTERVAL: 4426 unit = self._parse_var(upper=True) 4427 if unit: 4428 if self._match_text_seq("TO"): 4429 unit = exp.IntervalSpan(this=unit, expression=self._parse_var(upper=True)) 4430 4431 this = self.expression(exp.DataType, this=self.expression(exp.Interval, unit=unit)) 4432 else: 4433 this = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL) 4434 4435 if maybe_func and check_func: 4436 index2 = self._index 4437 peek = self._parse_string() 4438 4439 if not peek: 4440 self._retreat(index) 4441 return None 4442 4443 self._retreat(index2) 4444 4445 if not this: 4446 if self._match_text_seq("UNSIGNED"): 4447 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 4448 if not unsigned_type_token: 4449 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 4450 4451 type_token = unsigned_type_token or type_token 4452 4453 this = exp.DataType( 4454 this=exp.DataType.Type[type_token.value], 4455 expressions=expressions, 4456 nested=nested, 4457 values=values, 4458 prefix=prefix, 4459 ) 4460 elif expressions: 4461 this.set("expressions", expressions) 4462 4463 # https://materialize.com/docs/sql/types/list/#type-name 4464 while self._match(TokenType.LIST): 4465 this = exp.DataType(this=exp.DataType.Type.LIST, expressions=[this], nested=True) 4466 4467 index = self._index 4468 4469 # Postgres supports the INT ARRAY[3] syntax as a synonym for INT[3] 4470 matched_array = self._match(TokenType.ARRAY) 4471 4472 while self._curr: 4473 matched_l_bracket = self._match(TokenType.L_BRACKET) 4474 if not matched_l_bracket and not matched_array: 4475 break 4476 4477 matched_array = False 4478 values = self._parse_csv(self._parse_assignment) or None 4479 if values and not schema: 4480 self._retreat(index) 4481 break 4482 4483 this = exp.DataType( 4484 this=exp.DataType.Type.ARRAY, expressions=[this], values=values, nested=True 4485 ) 4486 self._match(TokenType.R_BRACKET) 4487 4488 if self.TYPE_CONVERTERS and isinstance(this.this, exp.DataType.Type): 4489 converter = self.TYPE_CONVERTERS.get(this.this) 4490 if converter: 4491 this = converter(t.cast(exp.DataType, this)) 4492 4493 return this 4494 4495 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 4496 index = self._index 4497 4498 if ( 4499 self._curr 4500 and self._next 4501 and self._curr.token_type in self.TYPE_TOKENS 4502 and self._next.token_type in self.TYPE_TOKENS 4503 ): 4504 # Takes care of special cases like `STRUCT<list ARRAY<...>>` where the identifier is also a 4505 # type token. Without this, the list will be parsed as a type and we'll eventually crash 4506 this = self._parse_id_var() 4507 else: 4508 this = ( 4509 self._parse_type(parse_interval=False, fallback_to_identifier=True) 4510 or self._parse_id_var() 4511 ) 4512 4513 self._match(TokenType.COLON) 4514 4515 if ( 4516 type_required 4517 and not isinstance(this, exp.DataType) 4518 and not self._match_set(self.TYPE_TOKENS, advance=False) 4519 ): 4520 self._retreat(index) 4521 return self._parse_types() 4522 4523 return self._parse_column_def(this) 4524 4525 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4526 if not self._match_text_seq("AT", "TIME", "ZONE"): 4527 return this 4528 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 4529 4530 def _parse_column(self) -> t.Optional[exp.Expression]: 4531 this = self._parse_column_reference() 4532 column = self._parse_column_ops(this) if this else self._parse_bracket(this) 4533 4534 if self.dialect.SUPPORTS_COLUMN_JOIN_MARKS and column: 4535 column.set("join_mark", self._match(TokenType.JOIN_MARKER)) 4536 4537 return column 4538 4539 def _parse_column_reference(self) -> t.Optional[exp.Expression]: 4540 this = self._parse_field() 4541 if ( 4542 not this 4543 and self._match(TokenType.VALUES, advance=False) 4544 and self.VALUES_FOLLOWED_BY_PAREN 4545 and (not self._next or self._next.token_type != TokenType.L_PAREN) 4546 ): 4547 this = self._parse_id_var() 4548 4549 if isinstance(this, exp.Identifier): 4550 # We bubble up comments from the Identifier to the Column 4551 this = self.expression(exp.Column, comments=this.pop_comments(), this=this) 4552 4553 return this 4554 4555 def _parse_colon_as_json_extract( 4556 self, this: t.Optional[exp.Expression] 4557 ) -> t.Optional[exp.Expression]: 4558 casts = [] 4559 json_path = [] 4560 4561 while self._match(TokenType.COLON): 4562 start_index = self._index 4563 4564 # Snowflake allows reserved keywords as json keys but advance_any() excludes TokenType.SELECT from any_tokens=True 4565 path = self._parse_column_ops( 4566 self._parse_field(any_token=True, tokens=(TokenType.SELECT,)) 4567 ) 4568 4569 # The cast :: operator has a lower precedence than the extraction operator :, so 4570 # we rearrange the AST appropriately to avoid casting the JSON path 4571 while isinstance(path, exp.Cast): 4572 casts.append(path.to) 4573 path = path.this 4574 4575 if casts: 4576 dcolon_offset = next( 4577 i 4578 for i, t in enumerate(self._tokens[start_index:]) 4579 if t.token_type == TokenType.DCOLON 4580 ) 4581 end_token = self._tokens[start_index + dcolon_offset - 1] 4582 else: 4583 end_token = self._prev 4584 4585 if path: 4586 json_path.append(self._find_sql(self._tokens[start_index], end_token)) 4587 4588 if json_path: 4589 this = self.expression( 4590 exp.JSONExtract, 4591 this=this, 4592 expression=self.dialect.to_json_path(exp.Literal.string(".".join(json_path))), 4593 ) 4594 4595 while casts: 4596 this = self.expression(exp.Cast, this=this, to=casts.pop()) 4597 4598 return this 4599 4600 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4601 this = self._parse_bracket(this) 4602 4603 while self._match_set(self.COLUMN_OPERATORS): 4604 op_token = self._prev.token_type 4605 op = self.COLUMN_OPERATORS.get(op_token) 4606 4607 if op_token == TokenType.DCOLON: 4608 field = self._parse_types() 4609 if not field: 4610 self.raise_error("Expected type") 4611 elif op and self._curr: 4612 field = self._parse_column_reference() 4613 else: 4614 field = self._parse_field(any_token=True, anonymous_func=True) 4615 4616 if isinstance(field, exp.Func) and this: 4617 # bigquery allows function calls like x.y.count(...) 4618 # SAFE.SUBSTR(...) 4619 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 4620 this = exp.replace_tree( 4621 this, 4622 lambda n: ( 4623 self.expression(exp.Dot, this=n.args.get("table"), expression=n.this) 4624 if n.table 4625 else n.this 4626 ) 4627 if isinstance(n, exp.Column) 4628 else n, 4629 ) 4630 4631 if op: 4632 this = op(self, this, field) 4633 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 4634 this = self.expression( 4635 exp.Column, 4636 this=field, 4637 table=this.this, 4638 db=this.args.get("table"), 4639 catalog=this.args.get("db"), 4640 ) 4641 else: 4642 this = self.expression(exp.Dot, this=this, expression=field) 4643 4644 this = self._parse_bracket(this) 4645 4646 return self._parse_colon_as_json_extract(this) if self.COLON_IS_JSON_EXTRACT else this 4647 4648 def _parse_primary(self) -> t.Optional[exp.Expression]: 4649 if self._match_set(self.PRIMARY_PARSERS): 4650 token_type = self._prev.token_type 4651 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 4652 4653 if token_type == TokenType.STRING: 4654 expressions = [primary] 4655 while self._match(TokenType.STRING): 4656 expressions.append(exp.Literal.string(self._prev.text)) 4657 4658 if len(expressions) > 1: 4659 return self.expression(exp.Concat, expressions=expressions) 4660 4661 return primary 4662 4663 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 4664 return exp.Literal.number(f"0.{self._prev.text}") 4665 4666 if self._match(TokenType.L_PAREN): 4667 comments = self._prev_comments 4668 query = self._parse_select() 4669 4670 if query: 4671 expressions = [query] 4672 else: 4673 expressions = self._parse_expressions() 4674 4675 this = self._parse_query_modifiers(seq_get(expressions, 0)) 4676 4677 if not this and self._match(TokenType.R_PAREN, advance=False): 4678 this = self.expression(exp.Tuple) 4679 elif isinstance(this, exp.UNWRAPPED_QUERIES): 4680 this = self._parse_subquery(this=this, parse_alias=False) 4681 elif isinstance(this, exp.Subquery): 4682 this = self._parse_subquery( 4683 this=self._parse_set_operations(this), parse_alias=False 4684 ) 4685 elif len(expressions) > 1 or self._prev.token_type == TokenType.COMMA: 4686 this = self.expression(exp.Tuple, expressions=expressions) 4687 else: 4688 this = self.expression(exp.Paren, this=this) 4689 4690 if this: 4691 this.add_comments(comments) 4692 4693 self._match_r_paren(expression=this) 4694 return this 4695 4696 return None 4697 4698 def _parse_field( 4699 self, 4700 any_token: bool = False, 4701 tokens: t.Optional[t.Collection[TokenType]] = None, 4702 anonymous_func: bool = False, 4703 ) -> t.Optional[exp.Expression]: 4704 if anonymous_func: 4705 field = ( 4706 self._parse_function(anonymous=anonymous_func, any_token=any_token) 4707 or self._parse_primary() 4708 ) 4709 else: 4710 field = self._parse_primary() or self._parse_function( 4711 anonymous=anonymous_func, any_token=any_token 4712 ) 4713 return field or self._parse_id_var(any_token=any_token, tokens=tokens) 4714 4715 def _parse_function( 4716 self, 4717 functions: t.Optional[t.Dict[str, t.Callable]] = None, 4718 anonymous: bool = False, 4719 optional_parens: bool = True, 4720 any_token: bool = False, 4721 ) -> t.Optional[exp.Expression]: 4722 # This allows us to also parse {fn <function>} syntax (Snowflake, MySQL support this) 4723 # See: https://community.snowflake.com/s/article/SQL-Escape-Sequences 4724 fn_syntax = False 4725 if ( 4726 self._match(TokenType.L_BRACE, advance=False) 4727 and self._next 4728 and self._next.text.upper() == "FN" 4729 ): 4730 self._advance(2) 4731 fn_syntax = True 4732 4733 func = self._parse_function_call( 4734 functions=functions, 4735 anonymous=anonymous, 4736 optional_parens=optional_parens, 4737 any_token=any_token, 4738 ) 4739 4740 if fn_syntax: 4741 self._match(TokenType.R_BRACE) 4742 4743 return func 4744 4745 def _parse_function_call( 4746 self, 4747 functions: t.Optional[t.Dict[str, t.Callable]] = None, 4748 anonymous: bool = False, 4749 optional_parens: bool = True, 4750 any_token: bool = False, 4751 ) -> t.Optional[exp.Expression]: 4752 if not self._curr: 4753 return None 4754 4755 comments = self._curr.comments 4756 token_type = self._curr.token_type 4757 this = self._curr.text 4758 upper = this.upper() 4759 4760 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 4761 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 4762 self._advance() 4763 return self._parse_window(parser(self)) 4764 4765 if not self._next or self._next.token_type != TokenType.L_PAREN: 4766 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 4767 self._advance() 4768 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 4769 4770 return None 4771 4772 if any_token: 4773 if token_type in self.RESERVED_TOKENS: 4774 return None 4775 elif token_type not in self.FUNC_TOKENS: 4776 return None 4777 4778 self._advance(2) 4779 4780 parser = self.FUNCTION_PARSERS.get(upper) 4781 if parser and not anonymous: 4782 this = parser(self) 4783 else: 4784 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 4785 4786 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 4787 this = self.expression(subquery_predicate, this=self._parse_select()) 4788 self._match_r_paren() 4789 return this 4790 4791 if functions is None: 4792 functions = self.FUNCTIONS 4793 4794 function = functions.get(upper) 4795 4796 alias = upper in self.FUNCTIONS_WITH_ALIASED_ARGS 4797 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 4798 4799 if alias: 4800 args = self._kv_to_prop_eq(args) 4801 4802 if function and not anonymous: 4803 if "dialect" in function.__code__.co_varnames: 4804 func = function(args, dialect=self.dialect) 4805 else: 4806 func = function(args) 4807 4808 func = self.validate_expression(func, args) 4809 if not self.dialect.NORMALIZE_FUNCTIONS: 4810 func.meta["name"] = this 4811 4812 this = func 4813 else: 4814 if token_type == TokenType.IDENTIFIER: 4815 this = exp.Identifier(this=this, quoted=True) 4816 this = self.expression(exp.Anonymous, this=this, expressions=args) 4817 4818 if isinstance(this, exp.Expression): 4819 this.add_comments(comments) 4820 4821 self._match_r_paren(this) 4822 return self._parse_window(this) 4823 4824 def _kv_to_prop_eq(self, expressions: t.List[exp.Expression]) -> t.List[exp.Expression]: 4825 transformed = [] 4826 4827 for e in expressions: 4828 if isinstance(e, self.KEY_VALUE_DEFINITIONS): 4829 if isinstance(e, exp.Alias): 4830 e = self.expression(exp.PropertyEQ, this=e.args.get("alias"), expression=e.this) 4831 4832 if not isinstance(e, exp.PropertyEQ): 4833 e = self.expression( 4834 exp.PropertyEQ, this=exp.to_identifier(e.this.name), expression=e.expression 4835 ) 4836 4837 if isinstance(e.this, exp.Column): 4838 e.this.replace(e.this.this) 4839 4840 transformed.append(e) 4841 4842 return transformed 4843 4844 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 4845 return self._parse_column_def(self._parse_id_var()) 4846 4847 def _parse_user_defined_function( 4848 self, kind: t.Optional[TokenType] = None 4849 ) -> t.Optional[exp.Expression]: 4850 this = self._parse_id_var() 4851 4852 while self._match(TokenType.DOT): 4853 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 4854 4855 if not self._match(TokenType.L_PAREN): 4856 return this 4857 4858 expressions = self._parse_csv(self._parse_function_parameter) 4859 self._match_r_paren() 4860 return self.expression( 4861 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 4862 ) 4863 4864 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 4865 literal = self._parse_primary() 4866 if literal: 4867 return self.expression(exp.Introducer, this=token.text, expression=literal) 4868 4869 return self.expression(exp.Identifier, this=token.text) 4870 4871 def _parse_session_parameter(self) -> exp.SessionParameter: 4872 kind = None 4873 this = self._parse_id_var() or self._parse_primary() 4874 4875 if this and self._match(TokenType.DOT): 4876 kind = this.name 4877 this = self._parse_var() or self._parse_primary() 4878 4879 return self.expression(exp.SessionParameter, this=this, kind=kind) 4880 4881 def _parse_lambda_arg(self) -> t.Optional[exp.Expression]: 4882 return self._parse_id_var() 4883 4884 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 4885 index = self._index 4886 4887 if self._match(TokenType.L_PAREN): 4888 expressions = t.cast( 4889 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_lambda_arg) 4890 ) 4891 4892 if not self._match(TokenType.R_PAREN): 4893 self._retreat(index) 4894 else: 4895 expressions = [self._parse_lambda_arg()] 4896 4897 if self._match_set(self.LAMBDAS): 4898 return self.LAMBDAS[self._prev.token_type](self, expressions) 4899 4900 self._retreat(index) 4901 4902 this: t.Optional[exp.Expression] 4903 4904 if self._match(TokenType.DISTINCT): 4905 this = self.expression( 4906 exp.Distinct, expressions=self._parse_csv(self._parse_assignment) 4907 ) 4908 else: 4909 this = self._parse_select_or_expression(alias=alias) 4910 4911 return self._parse_limit( 4912 self._parse_order(self._parse_having_max(self._parse_respect_or_ignore_nulls(this))) 4913 ) 4914 4915 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4916 index = self._index 4917 if not self._match(TokenType.L_PAREN): 4918 return this 4919 4920 # Disambiguate between schema and subquery/CTE, e.g. in INSERT INTO table (<expr>), 4921 # expr can be of both types 4922 if self._match_set(self.SELECT_START_TOKENS): 4923 self._retreat(index) 4924 return this 4925 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 4926 self._match_r_paren() 4927 return self.expression(exp.Schema, this=this, expressions=args) 4928 4929 def _parse_field_def(self) -> t.Optional[exp.Expression]: 4930 return self._parse_column_def(self._parse_field(any_token=True)) 4931 4932 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4933 # column defs are not really columns, they're identifiers 4934 if isinstance(this, exp.Column): 4935 this = this.this 4936 4937 kind = self._parse_types(schema=True) 4938 4939 if self._match_text_seq("FOR", "ORDINALITY"): 4940 return self.expression(exp.ColumnDef, this=this, ordinality=True) 4941 4942 constraints: t.List[exp.Expression] = [] 4943 4944 if (not kind and self._match(TokenType.ALIAS)) or self._match_texts( 4945 ("ALIAS", "MATERIALIZED") 4946 ): 4947 persisted = self._prev.text.upper() == "MATERIALIZED" 4948 constraints.append( 4949 self.expression( 4950 exp.ComputedColumnConstraint, 4951 this=self._parse_assignment(), 4952 persisted=persisted or self._match_text_seq("PERSISTED"), 4953 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 4954 ) 4955 ) 4956 elif kind and self._match_pair(TokenType.ALIAS, TokenType.L_PAREN, advance=False): 4957 self._match(TokenType.ALIAS) 4958 constraints.append( 4959 self.expression(exp.TransformColumnConstraint, this=self._parse_field()) 4960 ) 4961 4962 while True: 4963 constraint = self._parse_column_constraint() 4964 if not constraint: 4965 break 4966 constraints.append(constraint) 4967 4968 if not kind and not constraints: 4969 return this 4970 4971 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 4972 4973 def _parse_auto_increment( 4974 self, 4975 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 4976 start = None 4977 increment = None 4978 4979 if self._match(TokenType.L_PAREN, advance=False): 4980 args = self._parse_wrapped_csv(self._parse_bitwise) 4981 start = seq_get(args, 0) 4982 increment = seq_get(args, 1) 4983 elif self._match_text_seq("START"): 4984 start = self._parse_bitwise() 4985 self._match_text_seq("INCREMENT") 4986 increment = self._parse_bitwise() 4987 4988 if start and increment: 4989 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 4990 4991 return exp.AutoIncrementColumnConstraint() 4992 4993 def _parse_auto_property(self) -> t.Optional[exp.AutoRefreshProperty]: 4994 if not self._match_text_seq("REFRESH"): 4995 self._retreat(self._index - 1) 4996 return None 4997 return self.expression(exp.AutoRefreshProperty, this=self._parse_var(upper=True)) 4998 4999 def _parse_compress(self) -> exp.CompressColumnConstraint: 5000 if self._match(TokenType.L_PAREN, advance=False): 5001 return self.expression( 5002 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 5003 ) 5004 5005 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 5006 5007 def _parse_generated_as_identity( 5008 self, 5009 ) -> ( 5010 exp.GeneratedAsIdentityColumnConstraint 5011 | exp.ComputedColumnConstraint 5012 | exp.GeneratedAsRowColumnConstraint 5013 ): 5014 if self._match_text_seq("BY", "DEFAULT"): 5015 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 5016 this = self.expression( 5017 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 5018 ) 5019 else: 5020 self._match_text_seq("ALWAYS") 5021 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 5022 5023 self._match(TokenType.ALIAS) 5024 5025 if self._match_text_seq("ROW"): 5026 start = self._match_text_seq("START") 5027 if not start: 5028 self._match(TokenType.END) 5029 hidden = self._match_text_seq("HIDDEN") 5030 return self.expression(exp.GeneratedAsRowColumnConstraint, start=start, hidden=hidden) 5031 5032 identity = self._match_text_seq("IDENTITY") 5033 5034 if self._match(TokenType.L_PAREN): 5035 if self._match(TokenType.START_WITH): 5036 this.set("start", self._parse_bitwise()) 5037 if self._match_text_seq("INCREMENT", "BY"): 5038 this.set("increment", self._parse_bitwise()) 5039 if self._match_text_seq("MINVALUE"): 5040 this.set("minvalue", self._parse_bitwise()) 5041 if self._match_text_seq("MAXVALUE"): 5042 this.set("maxvalue", self._parse_bitwise()) 5043 5044 if self._match_text_seq("CYCLE"): 5045 this.set("cycle", True) 5046 elif self._match_text_seq("NO", "CYCLE"): 5047 this.set("cycle", False) 5048 5049 if not identity: 5050 this.set("expression", self._parse_range()) 5051 elif not this.args.get("start") and self._match(TokenType.NUMBER, advance=False): 5052 args = self._parse_csv(self._parse_bitwise) 5053 this.set("start", seq_get(args, 0)) 5054 this.set("increment", seq_get(args, 1)) 5055 5056 self._match_r_paren() 5057 5058 return this 5059 5060 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 5061 self._match_text_seq("LENGTH") 5062 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 5063 5064 def _parse_not_constraint(self) -> t.Optional[exp.Expression]: 5065 if self._match_text_seq("NULL"): 5066 return self.expression(exp.NotNullColumnConstraint) 5067 if self._match_text_seq("CASESPECIFIC"): 5068 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 5069 if self._match_text_seq("FOR", "REPLICATION"): 5070 return self.expression(exp.NotForReplicationColumnConstraint) 5071 return None 5072 5073 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 5074 if self._match(TokenType.CONSTRAINT): 5075 this = self._parse_id_var() 5076 else: 5077 this = None 5078 5079 if self._match_texts(self.CONSTRAINT_PARSERS): 5080 return self.expression( 5081 exp.ColumnConstraint, 5082 this=this, 5083 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 5084 ) 5085 5086 return this 5087 5088 def _parse_constraint(self) -> t.Optional[exp.Expression]: 5089 if not self._match(TokenType.CONSTRAINT): 5090 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 5091 5092 return self.expression( 5093 exp.Constraint, 5094 this=self._parse_id_var(), 5095 expressions=self._parse_unnamed_constraints(), 5096 ) 5097 5098 def _parse_unnamed_constraints(self) -> t.List[exp.Expression]: 5099 constraints = [] 5100 while True: 5101 constraint = self._parse_unnamed_constraint() or self._parse_function() 5102 if not constraint: 5103 break 5104 constraints.append(constraint) 5105 5106 return constraints 5107 5108 def _parse_unnamed_constraint( 5109 self, constraints: t.Optional[t.Collection[str]] = None 5110 ) -> t.Optional[exp.Expression]: 5111 if self._match(TokenType.IDENTIFIER, advance=False) or not self._match_texts( 5112 constraints or self.CONSTRAINT_PARSERS 5113 ): 5114 return None 5115 5116 constraint = self._prev.text.upper() 5117 if constraint not in self.CONSTRAINT_PARSERS: 5118 self.raise_error(f"No parser found for schema constraint {constraint}.") 5119 5120 return self.CONSTRAINT_PARSERS[constraint](self) 5121 5122 def _parse_unique(self) -> exp.UniqueColumnConstraint: 5123 self._match_text_seq("KEY") 5124 return self.expression( 5125 exp.UniqueColumnConstraint, 5126 this=self._parse_schema(self._parse_id_var(any_token=False)), 5127 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 5128 on_conflict=self._parse_on_conflict(), 5129 ) 5130 5131 def _parse_key_constraint_options(self) -> t.List[str]: 5132 options = [] 5133 while True: 5134 if not self._curr: 5135 break 5136 5137 if self._match(TokenType.ON): 5138 action = None 5139 on = self._advance_any() and self._prev.text 5140 5141 if self._match_text_seq("NO", "ACTION"): 5142 action = "NO ACTION" 5143 elif self._match_text_seq("CASCADE"): 5144 action = "CASCADE" 5145 elif self._match_text_seq("RESTRICT"): 5146 action = "RESTRICT" 5147 elif self._match_pair(TokenType.SET, TokenType.NULL): 5148 action = "SET NULL" 5149 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 5150 action = "SET DEFAULT" 5151 else: 5152 self.raise_error("Invalid key constraint") 5153 5154 options.append(f"ON {on} {action}") 5155 elif self._match_text_seq("NOT", "ENFORCED"): 5156 options.append("NOT ENFORCED") 5157 elif self._match_text_seq("DEFERRABLE"): 5158 options.append("DEFERRABLE") 5159 elif self._match_text_seq("INITIALLY", "DEFERRED"): 5160 options.append("INITIALLY DEFERRED") 5161 elif self._match_text_seq("NORELY"): 5162 options.append("NORELY") 5163 elif self._match_text_seq("MATCH", "FULL"): 5164 options.append("MATCH FULL") 5165 else: 5166 break 5167 5168 return options 5169 5170 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 5171 if match and not self._match(TokenType.REFERENCES): 5172 return None 5173 5174 expressions = None 5175 this = self._parse_table(schema=True) 5176 options = self._parse_key_constraint_options() 5177 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 5178 5179 def _parse_foreign_key(self) -> exp.ForeignKey: 5180 expressions = self._parse_wrapped_id_vars() 5181 reference = self._parse_references() 5182 options = {} 5183 5184 while self._match(TokenType.ON): 5185 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 5186 self.raise_error("Expected DELETE or UPDATE") 5187 5188 kind = self._prev.text.lower() 5189 5190 if self._match_text_seq("NO", "ACTION"): 5191 action = "NO ACTION" 5192 elif self._match(TokenType.SET): 5193 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 5194 action = "SET " + self._prev.text.upper() 5195 else: 5196 self._advance() 5197 action = self._prev.text.upper() 5198 5199 options[kind] = action 5200 5201 return self.expression( 5202 exp.ForeignKey, 5203 expressions=expressions, 5204 reference=reference, 5205 **options, # type: ignore 5206 ) 5207 5208 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 5209 return self._parse_field() 5210 5211 def _parse_period_for_system_time(self) -> t.Optional[exp.PeriodForSystemTimeConstraint]: 5212 if not self._match(TokenType.TIMESTAMP_SNAPSHOT): 5213 self._retreat(self._index - 1) 5214 return None 5215 5216 id_vars = self._parse_wrapped_id_vars() 5217 return self.expression( 5218 exp.PeriodForSystemTimeConstraint, 5219 this=seq_get(id_vars, 0), 5220 expression=seq_get(id_vars, 1), 5221 ) 5222 5223 def _parse_primary_key( 5224 self, wrapped_optional: bool = False, in_props: bool = False 5225 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 5226 desc = ( 5227 self._match_set((TokenType.ASC, TokenType.DESC)) 5228 and self._prev.token_type == TokenType.DESC 5229 ) 5230 5231 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 5232 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 5233 5234 expressions = self._parse_wrapped_csv( 5235 self._parse_primary_key_part, optional=wrapped_optional 5236 ) 5237 options = self._parse_key_constraint_options() 5238 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 5239 5240 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 5241 return self._parse_slice(self._parse_alias(self._parse_assignment(), explicit=True)) 5242 5243 def _parse_bracket(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 5244 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 5245 return this 5246 5247 bracket_kind = self._prev.token_type 5248 expressions = self._parse_csv( 5249 lambda: self._parse_bracket_key_value(is_map=bracket_kind == TokenType.L_BRACE) 5250 ) 5251 5252 if bracket_kind == TokenType.L_BRACKET and not self._match(TokenType.R_BRACKET): 5253 self.raise_error("Expected ]") 5254 elif bracket_kind == TokenType.L_BRACE and not self._match(TokenType.R_BRACE): 5255 self.raise_error("Expected }") 5256 5257 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 5258 if bracket_kind == TokenType.L_BRACE: 5259 this = self.expression(exp.Struct, expressions=self._kv_to_prop_eq(expressions)) 5260 elif not this: 5261 this = self.expression(exp.Array, expressions=expressions) 5262 else: 5263 constructor_type = self.ARRAY_CONSTRUCTORS.get(this.name.upper()) 5264 if constructor_type: 5265 return self.expression(constructor_type, expressions=expressions) 5266 5267 expressions = apply_index_offset(this, expressions, -self.dialect.INDEX_OFFSET) 5268 this = self.expression(exp.Bracket, this=this, expressions=expressions) 5269 5270 self._add_comments(this) 5271 return self._parse_bracket(this) 5272 5273 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5274 if self._match(TokenType.COLON): 5275 return self.expression(exp.Slice, this=this, expression=self._parse_assignment()) 5276 return this 5277 5278 def _parse_case(self) -> t.Optional[exp.Expression]: 5279 ifs = [] 5280 default = None 5281 5282 comments = self._prev_comments 5283 expression = self._parse_assignment() 5284 5285 while self._match(TokenType.WHEN): 5286 this = self._parse_assignment() 5287 self._match(TokenType.THEN) 5288 then = self._parse_assignment() 5289 ifs.append(self.expression(exp.If, this=this, true=then)) 5290 5291 if self._match(TokenType.ELSE): 5292 default = self._parse_assignment() 5293 5294 if not self._match(TokenType.END): 5295 if isinstance(default, exp.Interval) and default.this.sql().upper() == "END": 5296 default = exp.column("interval") 5297 else: 5298 self.raise_error("Expected END after CASE", self._prev) 5299 5300 return self.expression( 5301 exp.Case, comments=comments, this=expression, ifs=ifs, default=default 5302 ) 5303 5304 def _parse_if(self) -> t.Optional[exp.Expression]: 5305 if self._match(TokenType.L_PAREN): 5306 args = self._parse_csv(self._parse_assignment) 5307 this = self.validate_expression(exp.If.from_arg_list(args), args) 5308 self._match_r_paren() 5309 else: 5310 index = self._index - 1 5311 5312 if self.NO_PAREN_IF_COMMANDS and index == 0: 5313 return self._parse_as_command(self._prev) 5314 5315 condition = self._parse_assignment() 5316 5317 if not condition: 5318 self._retreat(index) 5319 return None 5320 5321 self._match(TokenType.THEN) 5322 true = self._parse_assignment() 5323 false = self._parse_assignment() if self._match(TokenType.ELSE) else None 5324 self._match(TokenType.END) 5325 this = self.expression(exp.If, this=condition, true=true, false=false) 5326 5327 return this 5328 5329 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 5330 if not self._match_text_seq("VALUE", "FOR"): 5331 self._retreat(self._index - 1) 5332 return None 5333 5334 return self.expression( 5335 exp.NextValueFor, 5336 this=self._parse_column(), 5337 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 5338 ) 5339 5340 def _parse_extract(self) -> exp.Extract: 5341 this = self._parse_function() or self._parse_var_or_string(upper=True) 5342 5343 if self._match(TokenType.FROM): 5344 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 5345 5346 if not self._match(TokenType.COMMA): 5347 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 5348 5349 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 5350 5351 def _parse_gap_fill(self) -> exp.GapFill: 5352 self._match(TokenType.TABLE) 5353 this = self._parse_table() 5354 5355 self._match(TokenType.COMMA) 5356 args = [this, *self._parse_csv(self._parse_lambda)] 5357 5358 gap_fill = exp.GapFill.from_arg_list(args) 5359 return self.validate_expression(gap_fill, args) 5360 5361 def _parse_cast(self, strict: bool, safe: t.Optional[bool] = None) -> exp.Expression: 5362 this = self._parse_assignment() 5363 5364 if not self._match(TokenType.ALIAS): 5365 if self._match(TokenType.COMMA): 5366 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 5367 5368 self.raise_error("Expected AS after CAST") 5369 5370 fmt = None 5371 to = self._parse_types() 5372 5373 if self._match(TokenType.FORMAT): 5374 fmt_string = self._parse_string() 5375 fmt = self._parse_at_time_zone(fmt_string) 5376 5377 if not to: 5378 to = exp.DataType.build(exp.DataType.Type.UNKNOWN) 5379 if to.this in exp.DataType.TEMPORAL_TYPES: 5380 this = self.expression( 5381 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 5382 this=this, 5383 format=exp.Literal.string( 5384 format_time( 5385 fmt_string.this if fmt_string else "", 5386 self.dialect.FORMAT_MAPPING or self.dialect.TIME_MAPPING, 5387 self.dialect.FORMAT_TRIE or self.dialect.TIME_TRIE, 5388 ) 5389 ), 5390 safe=safe, 5391 ) 5392 5393 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 5394 this.set("zone", fmt.args["zone"]) 5395 return this 5396 elif not to: 5397 self.raise_error("Expected TYPE after CAST") 5398 elif isinstance(to, exp.Identifier): 5399 to = exp.DataType.build(to.name, udt=True) 5400 elif to.this == exp.DataType.Type.CHAR: 5401 if self._match(TokenType.CHARACTER_SET): 5402 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 5403 5404 return self.expression( 5405 exp.Cast if strict else exp.TryCast, 5406 this=this, 5407 to=to, 5408 format=fmt, 5409 safe=safe, 5410 action=self._parse_var_from_options(self.CAST_ACTIONS, raise_unmatched=False), 5411 ) 5412 5413 def _parse_string_agg(self) -> exp.Expression: 5414 if self._match(TokenType.DISTINCT): 5415 args: t.List[t.Optional[exp.Expression]] = [ 5416 self.expression(exp.Distinct, expressions=[self._parse_assignment()]) 5417 ] 5418 if self._match(TokenType.COMMA): 5419 args.extend(self._parse_csv(self._parse_assignment)) 5420 else: 5421 args = self._parse_csv(self._parse_assignment) # type: ignore 5422 5423 index = self._index 5424 if not self._match(TokenType.R_PAREN) and args: 5425 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 5426 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 5427 args[-1] = self._parse_limit(this=self._parse_order(this=args[-1])) 5428 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 5429 5430 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 5431 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 5432 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 5433 if not self._match_text_seq("WITHIN", "GROUP"): 5434 self._retreat(index) 5435 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 5436 5437 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 5438 order = self._parse_order(this=seq_get(args, 0)) 5439 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 5440 5441 def _parse_convert( 5442 self, strict: bool, safe: t.Optional[bool] = None 5443 ) -> t.Optional[exp.Expression]: 5444 this = self._parse_bitwise() 5445 5446 if self._match(TokenType.USING): 5447 to: t.Optional[exp.Expression] = self.expression( 5448 exp.CharacterSet, this=self._parse_var() 5449 ) 5450 elif self._match(TokenType.COMMA): 5451 to = self._parse_types() 5452 else: 5453 to = None 5454 5455 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, safe=safe) 5456 5457 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 5458 """ 5459 There are generally two variants of the DECODE function: 5460 5461 - DECODE(bin, charset) 5462 - DECODE(expression, search, result [, search, result] ... [, default]) 5463 5464 The second variant will always be parsed into a CASE expression. Note that NULL 5465 needs special treatment, since we need to explicitly check for it with `IS NULL`, 5466 instead of relying on pattern matching. 5467 """ 5468 args = self._parse_csv(self._parse_assignment) 5469 5470 if len(args) < 3: 5471 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 5472 5473 expression, *expressions = args 5474 if not expression: 5475 return None 5476 5477 ifs = [] 5478 for search, result in zip(expressions[::2], expressions[1::2]): 5479 if not search or not result: 5480 return None 5481 5482 if isinstance(search, exp.Literal): 5483 ifs.append( 5484 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 5485 ) 5486 elif isinstance(search, exp.Null): 5487 ifs.append( 5488 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 5489 ) 5490 else: 5491 cond = exp.or_( 5492 exp.EQ(this=expression.copy(), expression=search), 5493 exp.and_( 5494 exp.Is(this=expression.copy(), expression=exp.Null()), 5495 exp.Is(this=search.copy(), expression=exp.Null()), 5496 copy=False, 5497 ), 5498 copy=False, 5499 ) 5500 ifs.append(exp.If(this=cond, true=result)) 5501 5502 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 5503 5504 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 5505 self._match_text_seq("KEY") 5506 key = self._parse_column() 5507 self._match_set(self.JSON_KEY_VALUE_SEPARATOR_TOKENS) 5508 self._match_text_seq("VALUE") 5509 value = self._parse_bitwise() 5510 5511 if not key and not value: 5512 return None 5513 return self.expression(exp.JSONKeyValue, this=key, expression=value) 5514 5515 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5516 if not this or not self._match_text_seq("FORMAT", "JSON"): 5517 return this 5518 5519 return self.expression(exp.FormatJson, this=this) 5520 5521 def _parse_on_handling(self, on: str, *values: str) -> t.Optional[str]: 5522 # Parses the "X ON Y" syntax, i.e. NULL ON NULL (Oracle, T-SQL) 5523 for value in values: 5524 if self._match_text_seq(value, "ON", on): 5525 return f"{value} ON {on}" 5526 5527 return None 5528 5529 @t.overload 5530 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 5531 5532 @t.overload 5533 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 5534 5535 def _parse_json_object(self, agg=False): 5536 star = self._parse_star() 5537 expressions = ( 5538 [star] 5539 if star 5540 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 5541 ) 5542 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 5543 5544 unique_keys = None 5545 if self._match_text_seq("WITH", "UNIQUE"): 5546 unique_keys = True 5547 elif self._match_text_seq("WITHOUT", "UNIQUE"): 5548 unique_keys = False 5549 5550 self._match_text_seq("KEYS") 5551 5552 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 5553 self._parse_type() 5554 ) 5555 encoding = self._match_text_seq("ENCODING") and self._parse_var() 5556 5557 return self.expression( 5558 exp.JSONObjectAgg if agg else exp.JSONObject, 5559 expressions=expressions, 5560 null_handling=null_handling, 5561 unique_keys=unique_keys, 5562 return_type=return_type, 5563 encoding=encoding, 5564 ) 5565 5566 # Note: this is currently incomplete; it only implements the "JSON_value_column" part 5567 def _parse_json_column_def(self) -> exp.JSONColumnDef: 5568 if not self._match_text_seq("NESTED"): 5569 this = self._parse_id_var() 5570 kind = self._parse_types(allow_identifiers=False) 5571 nested = None 5572 else: 5573 this = None 5574 kind = None 5575 nested = True 5576 5577 path = self._match_text_seq("PATH") and self._parse_string() 5578 nested_schema = nested and self._parse_json_schema() 5579 5580 return self.expression( 5581 exp.JSONColumnDef, 5582 this=this, 5583 kind=kind, 5584 path=path, 5585 nested_schema=nested_schema, 5586 ) 5587 5588 def _parse_json_schema(self) -> exp.JSONSchema: 5589 self._match_text_seq("COLUMNS") 5590 return self.expression( 5591 exp.JSONSchema, 5592 expressions=self._parse_wrapped_csv(self._parse_json_column_def, optional=True), 5593 ) 5594 5595 def _parse_json_table(self) -> exp.JSONTable: 5596 this = self._parse_format_json(self._parse_bitwise()) 5597 path = self._match(TokenType.COMMA) and self._parse_string() 5598 error_handling = self._parse_on_handling("ERROR", "ERROR", "NULL") 5599 empty_handling = self._parse_on_handling("EMPTY", "ERROR", "NULL") 5600 schema = self._parse_json_schema() 5601 5602 return exp.JSONTable( 5603 this=this, 5604 schema=schema, 5605 path=path, 5606 error_handling=error_handling, 5607 empty_handling=empty_handling, 5608 ) 5609 5610 def _parse_match_against(self) -> exp.MatchAgainst: 5611 expressions = self._parse_csv(self._parse_column) 5612 5613 self._match_text_seq(")", "AGAINST", "(") 5614 5615 this = self._parse_string() 5616 5617 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 5618 modifier = "IN NATURAL LANGUAGE MODE" 5619 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 5620 modifier = f"{modifier} WITH QUERY EXPANSION" 5621 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 5622 modifier = "IN BOOLEAN MODE" 5623 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 5624 modifier = "WITH QUERY EXPANSION" 5625 else: 5626 modifier = None 5627 5628 return self.expression( 5629 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 5630 ) 5631 5632 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 5633 def _parse_open_json(self) -> exp.OpenJSON: 5634 this = self._parse_bitwise() 5635 path = self._match(TokenType.COMMA) and self._parse_string() 5636 5637 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 5638 this = self._parse_field(any_token=True) 5639 kind = self._parse_types() 5640 path = self._parse_string() 5641 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 5642 5643 return self.expression( 5644 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 5645 ) 5646 5647 expressions = None 5648 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 5649 self._match_l_paren() 5650 expressions = self._parse_csv(_parse_open_json_column_def) 5651 5652 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 5653 5654 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 5655 args = self._parse_csv(self._parse_bitwise) 5656 5657 if self._match(TokenType.IN): 5658 return self.expression( 5659 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 5660 ) 5661 5662 if haystack_first: 5663 haystack = seq_get(args, 0) 5664 needle = seq_get(args, 1) 5665 else: 5666 needle = seq_get(args, 0) 5667 haystack = seq_get(args, 1) 5668 5669 return self.expression( 5670 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 5671 ) 5672 5673 def _parse_predict(self) -> exp.Predict: 5674 self._match_text_seq("MODEL") 5675 this = self._parse_table() 5676 5677 self._match(TokenType.COMMA) 5678 self._match_text_seq("TABLE") 5679 5680 return self.expression( 5681 exp.Predict, 5682 this=this, 5683 expression=self._parse_table(), 5684 params_struct=self._match(TokenType.COMMA) and self._parse_bitwise(), 5685 ) 5686 5687 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 5688 args = self._parse_csv(self._parse_table) 5689 return exp.JoinHint(this=func_name.upper(), expressions=args) 5690 5691 def _parse_substring(self) -> exp.Substring: 5692 # Postgres supports the form: substring(string [from int] [for int]) 5693 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 5694 5695 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 5696 5697 if self._match(TokenType.FROM): 5698 args.append(self._parse_bitwise()) 5699 if self._match(TokenType.FOR): 5700 if len(args) == 1: 5701 args.append(exp.Literal.number(1)) 5702 args.append(self._parse_bitwise()) 5703 5704 return self.validate_expression(exp.Substring.from_arg_list(args), args) 5705 5706 def _parse_trim(self) -> exp.Trim: 5707 # https://www.w3resource.com/sql/character-functions/trim.php 5708 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 5709 5710 position = None 5711 collation = None 5712 expression = None 5713 5714 if self._match_texts(self.TRIM_TYPES): 5715 position = self._prev.text.upper() 5716 5717 this = self._parse_bitwise() 5718 if self._match_set((TokenType.FROM, TokenType.COMMA)): 5719 invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST 5720 expression = self._parse_bitwise() 5721 5722 if invert_order: 5723 this, expression = expression, this 5724 5725 if self._match(TokenType.COLLATE): 5726 collation = self._parse_bitwise() 5727 5728 return self.expression( 5729 exp.Trim, this=this, position=position, expression=expression, collation=collation 5730 ) 5731 5732 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 5733 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 5734 5735 def _parse_named_window(self) -> t.Optional[exp.Expression]: 5736 return self._parse_window(self._parse_id_var(), alias=True) 5737 5738 def _parse_respect_or_ignore_nulls( 5739 self, this: t.Optional[exp.Expression] 5740 ) -> t.Optional[exp.Expression]: 5741 if self._match_text_seq("IGNORE", "NULLS"): 5742 return self.expression(exp.IgnoreNulls, this=this) 5743 if self._match_text_seq("RESPECT", "NULLS"): 5744 return self.expression(exp.RespectNulls, this=this) 5745 return this 5746 5747 def _parse_having_max(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5748 if self._match(TokenType.HAVING): 5749 self._match_texts(("MAX", "MIN")) 5750 max = self._prev.text.upper() != "MIN" 5751 return self.expression( 5752 exp.HavingMax, this=this, expression=self._parse_column(), max=max 5753 ) 5754 5755 return this 5756 5757 def _parse_window( 5758 self, this: t.Optional[exp.Expression], alias: bool = False 5759 ) -> t.Optional[exp.Expression]: 5760 func = this 5761 comments = func.comments if isinstance(func, exp.Expression) else None 5762 5763 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 5764 self._match(TokenType.WHERE) 5765 this = self.expression( 5766 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 5767 ) 5768 self._match_r_paren() 5769 5770 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 5771 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 5772 if self._match_text_seq("WITHIN", "GROUP"): 5773 order = self._parse_wrapped(self._parse_order) 5774 this = self.expression(exp.WithinGroup, this=this, expression=order) 5775 5776 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 5777 # Some dialects choose to implement and some do not. 5778 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 5779 5780 # There is some code above in _parse_lambda that handles 5781 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 5782 5783 # The below changes handle 5784 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 5785 5786 # Oracle allows both formats 5787 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 5788 # and Snowflake chose to do the same for familiarity 5789 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 5790 if isinstance(this, exp.AggFunc): 5791 ignore_respect = this.find(exp.IgnoreNulls, exp.RespectNulls) 5792 5793 if ignore_respect and ignore_respect is not this: 5794 ignore_respect.replace(ignore_respect.this) 5795 this = self.expression(ignore_respect.__class__, this=this) 5796 5797 this = self._parse_respect_or_ignore_nulls(this) 5798 5799 # bigquery select from window x AS (partition by ...) 5800 if alias: 5801 over = None 5802 self._match(TokenType.ALIAS) 5803 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 5804 return this 5805 else: 5806 over = self._prev.text.upper() 5807 5808 if comments and isinstance(func, exp.Expression): 5809 func.pop_comments() 5810 5811 if not self._match(TokenType.L_PAREN): 5812 return self.expression( 5813 exp.Window, 5814 comments=comments, 5815 this=this, 5816 alias=self._parse_id_var(False), 5817 over=over, 5818 ) 5819 5820 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 5821 5822 first = self._match(TokenType.FIRST) 5823 if self._match_text_seq("LAST"): 5824 first = False 5825 5826 partition, order = self._parse_partition_and_order() 5827 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 5828 5829 if kind: 5830 self._match(TokenType.BETWEEN) 5831 start = self._parse_window_spec() 5832 self._match(TokenType.AND) 5833 end = self._parse_window_spec() 5834 5835 spec = self.expression( 5836 exp.WindowSpec, 5837 kind=kind, 5838 start=start["value"], 5839 start_side=start["side"], 5840 end=end["value"], 5841 end_side=end["side"], 5842 ) 5843 else: 5844 spec = None 5845 5846 self._match_r_paren() 5847 5848 window = self.expression( 5849 exp.Window, 5850 comments=comments, 5851 this=this, 5852 partition_by=partition, 5853 order=order, 5854 spec=spec, 5855 alias=window_alias, 5856 over=over, 5857 first=first, 5858 ) 5859 5860 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 5861 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 5862 return self._parse_window(window, alias=alias) 5863 5864 return window 5865 5866 def _parse_partition_and_order( 5867 self, 5868 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 5869 return self._parse_partition_by(), self._parse_order() 5870 5871 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 5872 self._match(TokenType.BETWEEN) 5873 5874 return { 5875 "value": ( 5876 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 5877 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 5878 or self._parse_bitwise() 5879 ), 5880 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 5881 } 5882 5883 def _parse_alias( 5884 self, this: t.Optional[exp.Expression], explicit: bool = False 5885 ) -> t.Optional[exp.Expression]: 5886 any_token = self._match(TokenType.ALIAS) 5887 comments = self._prev_comments or [] 5888 5889 if explicit and not any_token: 5890 return this 5891 5892 if self._match(TokenType.L_PAREN): 5893 aliases = self.expression( 5894 exp.Aliases, 5895 comments=comments, 5896 this=this, 5897 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 5898 ) 5899 self._match_r_paren(aliases) 5900 return aliases 5901 5902 alias = self._parse_id_var(any_token, tokens=self.ALIAS_TOKENS) or ( 5903 self.STRING_ALIASES and self._parse_string_as_identifier() 5904 ) 5905 5906 if alias: 5907 comments.extend(alias.pop_comments()) 5908 this = self.expression(exp.Alias, comments=comments, this=this, alias=alias) 5909 column = this.this 5910 5911 # Moves the comment next to the alias in `expr /* comment */ AS alias` 5912 if not this.comments and column and column.comments: 5913 this.comments = column.pop_comments() 5914 5915 return this 5916 5917 def _parse_id_var( 5918 self, 5919 any_token: bool = True, 5920 tokens: t.Optional[t.Collection[TokenType]] = None, 5921 ) -> t.Optional[exp.Expression]: 5922 expression = self._parse_identifier() 5923 if not expression and ( 5924 (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS) 5925 ): 5926 quoted = self._prev.token_type == TokenType.STRING 5927 expression = self.expression(exp.Identifier, this=self._prev.text, quoted=quoted) 5928 5929 return expression 5930 5931 def _parse_string(self) -> t.Optional[exp.Expression]: 5932 if self._match_set(self.STRING_PARSERS): 5933 return self.STRING_PARSERS[self._prev.token_type](self, self._prev) 5934 return self._parse_placeholder() 5935 5936 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 5937 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 5938 5939 def _parse_number(self) -> t.Optional[exp.Expression]: 5940 if self._match_set(self.NUMERIC_PARSERS): 5941 return self.NUMERIC_PARSERS[self._prev.token_type](self, self._prev) 5942 return self._parse_placeholder() 5943 5944 def _parse_identifier(self) -> t.Optional[exp.Expression]: 5945 if self._match(TokenType.IDENTIFIER): 5946 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 5947 return self._parse_placeholder() 5948 5949 def _parse_var( 5950 self, 5951 any_token: bool = False, 5952 tokens: t.Optional[t.Collection[TokenType]] = None, 5953 upper: bool = False, 5954 ) -> t.Optional[exp.Expression]: 5955 if ( 5956 (any_token and self._advance_any()) 5957 or self._match(TokenType.VAR) 5958 or (self._match_set(tokens) if tokens else False) 5959 ): 5960 return self.expression( 5961 exp.Var, this=self._prev.text.upper() if upper else self._prev.text 5962 ) 5963 return self._parse_placeholder() 5964 5965 def _advance_any(self, ignore_reserved: bool = False) -> t.Optional[Token]: 5966 if self._curr and (ignore_reserved or self._curr.token_type not in self.RESERVED_TOKENS): 5967 self._advance() 5968 return self._prev 5969 return None 5970 5971 def _parse_var_or_string(self, upper: bool = False) -> t.Optional[exp.Expression]: 5972 return self._parse_string() or self._parse_var(any_token=True, upper=upper) 5973 5974 def _parse_primary_or_var(self) -> t.Optional[exp.Expression]: 5975 return self._parse_primary() or self._parse_var(any_token=True) 5976 5977 def _parse_null(self) -> t.Optional[exp.Expression]: 5978 if self._match_set(self.NULL_TOKENS): 5979 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 5980 return self._parse_placeholder() 5981 5982 def _parse_boolean(self) -> t.Optional[exp.Expression]: 5983 if self._match(TokenType.TRUE): 5984 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 5985 if self._match(TokenType.FALSE): 5986 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 5987 return self._parse_placeholder() 5988 5989 def _parse_star(self) -> t.Optional[exp.Expression]: 5990 if self._match(TokenType.STAR): 5991 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 5992 return self._parse_placeholder() 5993 5994 def _parse_parameter(self) -> exp.Parameter: 5995 this = self._parse_identifier() or self._parse_primary_or_var() 5996 return self.expression(exp.Parameter, this=this) 5997 5998 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 5999 if self._match_set(self.PLACEHOLDER_PARSERS): 6000 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 6001 if placeholder: 6002 return placeholder 6003 self._advance(-1) 6004 return None 6005 6006 def _parse_star_op(self, *keywords: str) -> t.Optional[t.List[exp.Expression]]: 6007 if not self._match_texts(keywords): 6008 return None 6009 if self._match(TokenType.L_PAREN, advance=False): 6010 return self._parse_wrapped_csv(self._parse_expression) 6011 6012 expression = self._parse_expression() 6013 return [expression] if expression else None 6014 6015 def _parse_csv( 6016 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 6017 ) -> t.List[exp.Expression]: 6018 parse_result = parse_method() 6019 items = [parse_result] if parse_result is not None else [] 6020 6021 while self._match(sep): 6022 self._add_comments(parse_result) 6023 parse_result = parse_method() 6024 if parse_result is not None: 6025 items.append(parse_result) 6026 6027 return items 6028 6029 def _parse_tokens( 6030 self, parse_method: t.Callable, expressions: t.Dict 6031 ) -> t.Optional[exp.Expression]: 6032 this = parse_method() 6033 6034 while self._match_set(expressions): 6035 this = self.expression( 6036 expressions[self._prev.token_type], 6037 this=this, 6038 comments=self._prev_comments, 6039 expression=parse_method(), 6040 ) 6041 6042 return this 6043 6044 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 6045 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 6046 6047 def _parse_wrapped_csv( 6048 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 6049 ) -> t.List[exp.Expression]: 6050 return self._parse_wrapped( 6051 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 6052 ) 6053 6054 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 6055 wrapped = self._match(TokenType.L_PAREN) 6056 if not wrapped and not optional: 6057 self.raise_error("Expecting (") 6058 parse_result = parse_method() 6059 if wrapped: 6060 self._match_r_paren() 6061 return parse_result 6062 6063 def _parse_expressions(self) -> t.List[exp.Expression]: 6064 return self._parse_csv(self._parse_expression) 6065 6066 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 6067 return self._parse_select() or self._parse_set_operations( 6068 self._parse_expression() if alias else self._parse_assignment() 6069 ) 6070 6071 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 6072 return self._parse_query_modifiers( 6073 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 6074 ) 6075 6076 def _parse_transaction(self) -> exp.Transaction | exp.Command: 6077 this = None 6078 if self._match_texts(self.TRANSACTION_KIND): 6079 this = self._prev.text 6080 6081 self._match_texts(("TRANSACTION", "WORK")) 6082 6083 modes = [] 6084 while True: 6085 mode = [] 6086 while self._match(TokenType.VAR): 6087 mode.append(self._prev.text) 6088 6089 if mode: 6090 modes.append(" ".join(mode)) 6091 if not self._match(TokenType.COMMA): 6092 break 6093 6094 return self.expression(exp.Transaction, this=this, modes=modes) 6095 6096 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 6097 chain = None 6098 savepoint = None 6099 is_rollback = self._prev.token_type == TokenType.ROLLBACK 6100 6101 self._match_texts(("TRANSACTION", "WORK")) 6102 6103 if self._match_text_seq("TO"): 6104 self._match_text_seq("SAVEPOINT") 6105 savepoint = self._parse_id_var() 6106 6107 if self._match(TokenType.AND): 6108 chain = not self._match_text_seq("NO") 6109 self._match_text_seq("CHAIN") 6110 6111 if is_rollback: 6112 return self.expression(exp.Rollback, savepoint=savepoint) 6113 6114 return self.expression(exp.Commit, chain=chain) 6115 6116 def _parse_refresh(self) -> exp.Refresh: 6117 self._match(TokenType.TABLE) 6118 return self.expression(exp.Refresh, this=self._parse_string() or self._parse_table()) 6119 6120 def _parse_add_column(self) -> t.Optional[exp.Expression]: 6121 if not self._match_text_seq("ADD"): 6122 return None 6123 6124 self._match(TokenType.COLUMN) 6125 exists_column = self._parse_exists(not_=True) 6126 expression = self._parse_field_def() 6127 6128 if expression: 6129 expression.set("exists", exists_column) 6130 6131 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 6132 if self._match_texts(("FIRST", "AFTER")): 6133 position = self._prev.text 6134 column_position = self.expression( 6135 exp.ColumnPosition, this=self._parse_column(), position=position 6136 ) 6137 expression.set("position", column_position) 6138 6139 return expression 6140 6141 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 6142 drop = self._match(TokenType.DROP) and self._parse_drop() 6143 if drop and not isinstance(drop, exp.Command): 6144 drop.set("kind", drop.args.get("kind", "COLUMN")) 6145 return drop 6146 6147 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 6148 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 6149 return self.expression( 6150 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 6151 ) 6152 6153 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 6154 index = self._index - 1 6155 6156 if self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False): 6157 return self._parse_csv( 6158 lambda: self.expression( 6159 exp.AddConstraint, expressions=self._parse_csv(self._parse_constraint) 6160 ) 6161 ) 6162 6163 self._retreat(index) 6164 if not self.ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN and self._match_text_seq("ADD"): 6165 return self._parse_wrapped_csv(self._parse_field_def, optional=True) 6166 return self._parse_wrapped_csv(self._parse_add_column, optional=True) 6167 6168 def _parse_alter_table_alter(self) -> t.Optional[exp.Expression]: 6169 if self._match_texts(self.ALTER_ALTER_PARSERS): 6170 return self.ALTER_ALTER_PARSERS[self._prev.text.upper()](self) 6171 6172 # Many dialects support the ALTER [COLUMN] syntax, so if there is no 6173 # keyword after ALTER we default to parsing this statement 6174 self._match(TokenType.COLUMN) 6175 column = self._parse_field(any_token=True) 6176 6177 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 6178 return self.expression(exp.AlterColumn, this=column, drop=True) 6179 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 6180 return self.expression(exp.AlterColumn, this=column, default=self._parse_assignment()) 6181 if self._match(TokenType.COMMENT): 6182 return self.expression(exp.AlterColumn, this=column, comment=self._parse_string()) 6183 if self._match_text_seq("DROP", "NOT", "NULL"): 6184 return self.expression( 6185 exp.AlterColumn, 6186 this=column, 6187 drop=True, 6188 allow_null=True, 6189 ) 6190 if self._match_text_seq("SET", "NOT", "NULL"): 6191 return self.expression( 6192 exp.AlterColumn, 6193 this=column, 6194 allow_null=False, 6195 ) 6196 self._match_text_seq("SET", "DATA") 6197 self._match_text_seq("TYPE") 6198 return self.expression( 6199 exp.AlterColumn, 6200 this=column, 6201 dtype=self._parse_types(), 6202 collate=self._match(TokenType.COLLATE) and self._parse_term(), 6203 using=self._match(TokenType.USING) and self._parse_assignment(), 6204 ) 6205 6206 def _parse_alter_diststyle(self) -> exp.AlterDistStyle: 6207 if self._match_texts(("ALL", "EVEN", "AUTO")): 6208 return self.expression(exp.AlterDistStyle, this=exp.var(self._prev.text.upper())) 6209 6210 self._match_text_seq("KEY", "DISTKEY") 6211 return self.expression(exp.AlterDistStyle, this=self._parse_column()) 6212 6213 def _parse_alter_sortkey(self, compound: t.Optional[bool] = None) -> exp.AlterSortKey: 6214 if compound: 6215 self._match_text_seq("SORTKEY") 6216 6217 if self._match(TokenType.L_PAREN, advance=False): 6218 return self.expression( 6219 exp.AlterSortKey, expressions=self._parse_wrapped_id_vars(), compound=compound 6220 ) 6221 6222 self._match_texts(("AUTO", "NONE")) 6223 return self.expression( 6224 exp.AlterSortKey, this=exp.var(self._prev.text.upper()), compound=compound 6225 ) 6226 6227 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 6228 index = self._index - 1 6229 6230 partition_exists = self._parse_exists() 6231 if self._match(TokenType.PARTITION, advance=False): 6232 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 6233 6234 self._retreat(index) 6235 return self._parse_csv(self._parse_drop_column) 6236 6237 def _parse_alter_table_rename(self) -> t.Optional[exp.RenameTable | exp.RenameColumn]: 6238 if self._match(TokenType.COLUMN): 6239 exists = self._parse_exists() 6240 old_column = self._parse_column() 6241 to = self._match_text_seq("TO") 6242 new_column = self._parse_column() 6243 6244 if old_column is None or to is None or new_column is None: 6245 return None 6246 6247 return self.expression(exp.RenameColumn, this=old_column, to=new_column, exists=exists) 6248 6249 self._match_text_seq("TO") 6250 return self.expression(exp.RenameTable, this=self._parse_table(schema=True)) 6251 6252 def _parse_alter_table_set(self) -> exp.AlterSet: 6253 alter_set = self.expression(exp.AlterSet) 6254 6255 if self._match(TokenType.L_PAREN, advance=False) or self._match_text_seq( 6256 "TABLE", "PROPERTIES" 6257 ): 6258 alter_set.set("expressions", self._parse_wrapped_csv(self._parse_assignment)) 6259 elif self._match_text_seq("FILESTREAM_ON", advance=False): 6260 alter_set.set("expressions", [self._parse_assignment()]) 6261 elif self._match_texts(("LOGGED", "UNLOGGED")): 6262 alter_set.set("option", exp.var(self._prev.text.upper())) 6263 elif self._match_text_seq("WITHOUT") and self._match_texts(("CLUSTER", "OIDS")): 6264 alter_set.set("option", exp.var(f"WITHOUT {self._prev.text.upper()}")) 6265 elif self._match_text_seq("LOCATION"): 6266 alter_set.set("location", self._parse_field()) 6267 elif self._match_text_seq("ACCESS", "METHOD"): 6268 alter_set.set("access_method", self._parse_field()) 6269 elif self._match_text_seq("TABLESPACE"): 6270 alter_set.set("tablespace", self._parse_field()) 6271 elif self._match_text_seq("FILE", "FORMAT") or self._match_text_seq("FILEFORMAT"): 6272 alter_set.set("file_format", [self._parse_field()]) 6273 elif self._match_text_seq("STAGE_FILE_FORMAT"): 6274 alter_set.set("file_format", self._parse_wrapped_options()) 6275 elif self._match_text_seq("STAGE_COPY_OPTIONS"): 6276 alter_set.set("copy_options", self._parse_wrapped_options()) 6277 elif self._match_text_seq("TAG") or self._match_text_seq("TAGS"): 6278 alter_set.set("tag", self._parse_csv(self._parse_assignment)) 6279 else: 6280 if self._match_text_seq("SERDE"): 6281 alter_set.set("serde", self._parse_field()) 6282 6283 alter_set.set("expressions", [self._parse_properties()]) 6284 6285 return alter_set 6286 6287 def _parse_alter(self) -> exp.AlterTable | exp.Command: 6288 start = self._prev 6289 6290 if not self._match(TokenType.TABLE): 6291 return self._parse_as_command(start) 6292 6293 exists = self._parse_exists() 6294 only = self._match_text_seq("ONLY") 6295 this = self._parse_table(schema=True) 6296 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 6297 6298 if self._next: 6299 self._advance() 6300 6301 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 6302 if parser: 6303 actions = ensure_list(parser(self)) 6304 options = self._parse_csv(self._parse_property) 6305 6306 if not self._curr and actions: 6307 return self.expression( 6308 exp.AlterTable, 6309 this=this, 6310 exists=exists, 6311 actions=actions, 6312 only=only, 6313 options=options, 6314 cluster=cluster, 6315 ) 6316 6317 return self._parse_as_command(start) 6318 6319 def _parse_merge(self) -> exp.Merge: 6320 self._match(TokenType.INTO) 6321 target = self._parse_table() 6322 6323 if target and self._match(TokenType.ALIAS, advance=False): 6324 target.set("alias", self._parse_table_alias()) 6325 6326 self._match(TokenType.USING) 6327 using = self._parse_table() 6328 6329 self._match(TokenType.ON) 6330 on = self._parse_assignment() 6331 6332 return self.expression( 6333 exp.Merge, 6334 this=target, 6335 using=using, 6336 on=on, 6337 expressions=self._parse_when_matched(), 6338 ) 6339 6340 def _parse_when_matched(self) -> t.List[exp.When]: 6341 whens = [] 6342 6343 while self._match(TokenType.WHEN): 6344 matched = not self._match(TokenType.NOT) 6345 self._match_text_seq("MATCHED") 6346 source = ( 6347 False 6348 if self._match_text_seq("BY", "TARGET") 6349 else self._match_text_seq("BY", "SOURCE") 6350 ) 6351 condition = self._parse_assignment() if self._match(TokenType.AND) else None 6352 6353 self._match(TokenType.THEN) 6354 6355 if self._match(TokenType.INSERT): 6356 _this = self._parse_star() 6357 if _this: 6358 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=_this) 6359 else: 6360 then = self.expression( 6361 exp.Insert, 6362 this=self._parse_value(), 6363 expression=self._match_text_seq("VALUES") and self._parse_value(), 6364 ) 6365 elif self._match(TokenType.UPDATE): 6366 expressions = self._parse_star() 6367 if expressions: 6368 then = self.expression(exp.Update, expressions=expressions) 6369 else: 6370 then = self.expression( 6371 exp.Update, 6372 expressions=self._match(TokenType.SET) 6373 and self._parse_csv(self._parse_equality), 6374 ) 6375 elif self._match(TokenType.DELETE): 6376 then = self.expression(exp.Var, this=self._prev.text) 6377 else: 6378 then = None 6379 6380 whens.append( 6381 self.expression( 6382 exp.When, 6383 matched=matched, 6384 source=source, 6385 condition=condition, 6386 then=then, 6387 ) 6388 ) 6389 return whens 6390 6391 def _parse_show(self) -> t.Optional[exp.Expression]: 6392 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 6393 if parser: 6394 return parser(self) 6395 return self._parse_as_command(self._prev) 6396 6397 def _parse_set_item_assignment( 6398 self, kind: t.Optional[str] = None 6399 ) -> t.Optional[exp.Expression]: 6400 index = self._index 6401 6402 if kind in ("GLOBAL", "SESSION") and self._match_text_seq("TRANSACTION"): 6403 return self._parse_set_transaction(global_=kind == "GLOBAL") 6404 6405 left = self._parse_primary() or self._parse_column() 6406 assignment_delimiter = self._match_texts(("=", "TO")) 6407 6408 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 6409 self._retreat(index) 6410 return None 6411 6412 right = self._parse_statement() or self._parse_id_var() 6413 if isinstance(right, (exp.Column, exp.Identifier)): 6414 right = exp.var(right.name) 6415 6416 this = self.expression(exp.EQ, this=left, expression=right) 6417 return self.expression(exp.SetItem, this=this, kind=kind) 6418 6419 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 6420 self._match_text_seq("TRANSACTION") 6421 characteristics = self._parse_csv( 6422 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 6423 ) 6424 return self.expression( 6425 exp.SetItem, 6426 expressions=characteristics, 6427 kind="TRANSACTION", 6428 **{"global": global_}, # type: ignore 6429 ) 6430 6431 def _parse_set_item(self) -> t.Optional[exp.Expression]: 6432 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 6433 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 6434 6435 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 6436 index = self._index 6437 set_ = self.expression( 6438 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 6439 ) 6440 6441 if self._curr: 6442 self._retreat(index) 6443 return self._parse_as_command(self._prev) 6444 6445 return set_ 6446 6447 def _parse_var_from_options( 6448 self, options: OPTIONS_TYPE, raise_unmatched: bool = True 6449 ) -> t.Optional[exp.Var]: 6450 start = self._curr 6451 if not start: 6452 return None 6453 6454 option = start.text.upper() 6455 continuations = options.get(option) 6456 6457 index = self._index 6458 self._advance() 6459 for keywords in continuations or []: 6460 if isinstance(keywords, str): 6461 keywords = (keywords,) 6462 6463 if self._match_text_seq(*keywords): 6464 option = f"{option} {' '.join(keywords)}" 6465 break 6466 else: 6467 if continuations or continuations is None: 6468 if raise_unmatched: 6469 self.raise_error(f"Unknown option {option}") 6470 6471 self._retreat(index) 6472 return None 6473 6474 return exp.var(option) 6475 6476 def _parse_as_command(self, start: Token) -> exp.Command: 6477 while self._curr: 6478 self._advance() 6479 text = self._find_sql(start, self._prev) 6480 size = len(start.text) 6481 self._warn_unsupported() 6482 return exp.Command(this=text[:size], expression=text[size:]) 6483 6484 def _parse_dict_property(self, this: str) -> exp.DictProperty: 6485 settings = [] 6486 6487 self._match_l_paren() 6488 kind = self._parse_id_var() 6489 6490 if self._match(TokenType.L_PAREN): 6491 while True: 6492 key = self._parse_id_var() 6493 value = self._parse_primary() 6494 6495 if not key and value is None: 6496 break 6497 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 6498 self._match(TokenType.R_PAREN) 6499 6500 self._match_r_paren() 6501 6502 return self.expression( 6503 exp.DictProperty, 6504 this=this, 6505 kind=kind.this if kind else None, 6506 settings=settings, 6507 ) 6508 6509 def _parse_dict_range(self, this: str) -> exp.DictRange: 6510 self._match_l_paren() 6511 has_min = self._match_text_seq("MIN") 6512 if has_min: 6513 min = self._parse_var() or self._parse_primary() 6514 self._match_text_seq("MAX") 6515 max = self._parse_var() or self._parse_primary() 6516 else: 6517 max = self._parse_var() or self._parse_primary() 6518 min = exp.Literal.number(0) 6519 self._match_r_paren() 6520 return self.expression(exp.DictRange, this=this, min=min, max=max) 6521 6522 def _parse_comprehension( 6523 self, this: t.Optional[exp.Expression] 6524 ) -> t.Optional[exp.Comprehension]: 6525 index = self._index 6526 expression = self._parse_column() 6527 if not self._match(TokenType.IN): 6528 self._retreat(index - 1) 6529 return None 6530 iterator = self._parse_column() 6531 condition = self._parse_assignment() if self._match_text_seq("IF") else None 6532 return self.expression( 6533 exp.Comprehension, 6534 this=this, 6535 expression=expression, 6536 iterator=iterator, 6537 condition=condition, 6538 ) 6539 6540 def _parse_heredoc(self) -> t.Optional[exp.Heredoc]: 6541 if self._match(TokenType.HEREDOC_STRING): 6542 return self.expression(exp.Heredoc, this=self._prev.text) 6543 6544 if not self._match_text_seq("$"): 6545 return None 6546 6547 tags = ["$"] 6548 tag_text = None 6549 6550 if self._is_connected(): 6551 self._advance() 6552 tags.append(self._prev.text.upper()) 6553 else: 6554 self.raise_error("No closing $ found") 6555 6556 if tags[-1] != "$": 6557 if self._is_connected() and self._match_text_seq("$"): 6558 tag_text = tags[-1] 6559 tags.append("$") 6560 else: 6561 self.raise_error("No closing $ found") 6562 6563 heredoc_start = self._curr 6564 6565 while self._curr: 6566 if self._match_text_seq(*tags, advance=False): 6567 this = self._find_sql(heredoc_start, self._prev) 6568 self._advance(len(tags)) 6569 return self.expression(exp.Heredoc, this=this, tag=tag_text) 6570 6571 self._advance() 6572 6573 self.raise_error(f"No closing {''.join(tags)} found") 6574 return None 6575 6576 def _find_parser( 6577 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 6578 ) -> t.Optional[t.Callable]: 6579 if not self._curr: 6580 return None 6581 6582 index = self._index 6583 this = [] 6584 while True: 6585 # The current token might be multiple words 6586 curr = self._curr.text.upper() 6587 key = curr.split(" ") 6588 this.append(curr) 6589 6590 self._advance() 6591 result, trie = in_trie(trie, key) 6592 if result == TrieResult.FAILED: 6593 break 6594 6595 if result == TrieResult.EXISTS: 6596 subparser = parsers[" ".join(this)] 6597 return subparser 6598 6599 self._retreat(index) 6600 return None 6601 6602 def _match(self, token_type, advance=True, expression=None): 6603 if not self._curr: 6604 return None 6605 6606 if self._curr.token_type == token_type: 6607 if advance: 6608 self._advance() 6609 self._add_comments(expression) 6610 return True 6611 6612 return None 6613 6614 def _match_set(self, types, advance=True): 6615 if not self._curr: 6616 return None 6617 6618 if self._curr.token_type in types: 6619 if advance: 6620 self._advance() 6621 return True 6622 6623 return None 6624 6625 def _match_pair(self, token_type_a, token_type_b, advance=True): 6626 if not self._curr or not self._next: 6627 return None 6628 6629 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 6630 if advance: 6631 self._advance(2) 6632 return True 6633 6634 return None 6635 6636 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 6637 if not self._match(TokenType.L_PAREN, expression=expression): 6638 self.raise_error("Expecting (") 6639 6640 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 6641 if not self._match(TokenType.R_PAREN, expression=expression): 6642 self.raise_error("Expecting )") 6643 6644 def _match_texts(self, texts, advance=True): 6645 if self._curr and self._curr.text.upper() in texts: 6646 if advance: 6647 self._advance() 6648 return True 6649 return None 6650 6651 def _match_text_seq(self, *texts, advance=True): 6652 index = self._index 6653 for text in texts: 6654 if self._curr and self._curr.text.upper() == text: 6655 self._advance() 6656 else: 6657 self._retreat(index) 6658 return None 6659 6660 if not advance: 6661 self._retreat(index) 6662 6663 return True 6664 6665 def _replace_lambda( 6666 self, node: t.Optional[exp.Expression], expressions: t.List[exp.Expression] 6667 ) -> t.Optional[exp.Expression]: 6668 if not node: 6669 return node 6670 6671 lambda_types = {e.name: e.args.get("to") or False for e in expressions} 6672 6673 for column in node.find_all(exp.Column): 6674 typ = lambda_types.get(column.parts[0].name) 6675 if typ is not None: 6676 dot_or_id = column.to_dot() if column.table else column.this 6677 6678 if typ: 6679 dot_or_id = self.expression( 6680 exp.Cast, 6681 this=dot_or_id, 6682 to=typ, 6683 ) 6684 6685 parent = column.parent 6686 6687 while isinstance(parent, exp.Dot): 6688 if not isinstance(parent.parent, exp.Dot): 6689 parent.replace(dot_or_id) 6690 break 6691 parent = parent.parent 6692 else: 6693 if column is node: 6694 node = dot_or_id 6695 else: 6696 column.replace(dot_or_id) 6697 return node 6698 6699 def _parse_truncate_table(self) -> t.Optional[exp.TruncateTable] | exp.Expression: 6700 start = self._prev 6701 6702 # Not to be confused with TRUNCATE(number, decimals) function call 6703 if self._match(TokenType.L_PAREN): 6704 self._retreat(self._index - 2) 6705 return self._parse_function() 6706 6707 # Clickhouse supports TRUNCATE DATABASE as well 6708 is_database = self._match(TokenType.DATABASE) 6709 6710 self._match(TokenType.TABLE) 6711 6712 exists = self._parse_exists(not_=False) 6713 6714 expressions = self._parse_csv( 6715 lambda: self._parse_table(schema=True, is_db_reference=is_database) 6716 ) 6717 6718 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 6719 6720 if self._match_text_seq("RESTART", "IDENTITY"): 6721 identity = "RESTART" 6722 elif self._match_text_seq("CONTINUE", "IDENTITY"): 6723 identity = "CONTINUE" 6724 else: 6725 identity = None 6726 6727 if self._match_text_seq("CASCADE") or self._match_text_seq("RESTRICT"): 6728 option = self._prev.text 6729 else: 6730 option = None 6731 6732 partition = self._parse_partition() 6733 6734 # Fallback case 6735 if self._curr: 6736 return self._parse_as_command(start) 6737 6738 return self.expression( 6739 exp.TruncateTable, 6740 expressions=expressions, 6741 is_database=is_database, 6742 exists=exists, 6743 cluster=cluster, 6744 identity=identity, 6745 option=option, 6746 partition=partition, 6747 ) 6748 6749 def _parse_with_operator(self) -> t.Optional[exp.Expression]: 6750 this = self._parse_ordered(self._parse_opclass) 6751 6752 if not self._match(TokenType.WITH): 6753 return this 6754 6755 op = self._parse_var(any_token=True) 6756 6757 return self.expression(exp.WithOperator, this=this, op=op) 6758 6759 def _parse_wrapped_options(self) -> t.List[t.Optional[exp.Expression]]: 6760 self._match(TokenType.EQ) 6761 self._match(TokenType.L_PAREN) 6762 6763 opts: t.List[t.Optional[exp.Expression]] = [] 6764 while self._curr and not self._match(TokenType.R_PAREN): 6765 if self._match_text_seq("FORMAT_NAME", "="): 6766 # The FORMAT_NAME can be set to an identifier for Snowflake and T-SQL, 6767 # so we parse it separately to use _parse_field() 6768 prop = self.expression( 6769 exp.Property, this=exp.var("FORMAT_NAME"), value=self._parse_field() 6770 ) 6771 opts.append(prop) 6772 else: 6773 opts.append(self._parse_property()) 6774 6775 self._match(TokenType.COMMA) 6776 6777 return opts 6778 6779 def _parse_copy_parameters(self) -> t.List[exp.CopyParameter]: 6780 sep = TokenType.COMMA if self.dialect.COPY_PARAMS_ARE_CSV else None 6781 6782 options = [] 6783 while self._curr and not self._match(TokenType.R_PAREN, advance=False): 6784 option = self._parse_var(any_token=True) 6785 prev = self._prev.text.upper() 6786 6787 # Different dialects might separate options and values by white space, "=" and "AS" 6788 self._match(TokenType.EQ) 6789 self._match(TokenType.ALIAS) 6790 6791 param = self.expression(exp.CopyParameter, this=option) 6792 6793 if prev in self.COPY_INTO_VARLEN_OPTIONS and self._match( 6794 TokenType.L_PAREN, advance=False 6795 ): 6796 # Snowflake FILE_FORMAT case, Databricks COPY & FORMAT options 6797 param.set("expressions", self._parse_wrapped_options()) 6798 elif prev == "FILE_FORMAT": 6799 # T-SQL's external file format case 6800 param.set("expression", self._parse_field()) 6801 else: 6802 param.set("expression", self._parse_unquoted_field()) 6803 6804 options.append(param) 6805 self._match(sep) 6806 6807 return options 6808 6809 def _parse_credentials(self) -> t.Optional[exp.Credentials]: 6810 expr = self.expression(exp.Credentials) 6811 6812 if self._match_text_seq("STORAGE_INTEGRATION", "="): 6813 expr.set("storage", self._parse_field()) 6814 if self._match_text_seq("CREDENTIALS"): 6815 # Snowflake case: CREDENTIALS = (...), Redshift case: CREDENTIALS <string> 6816 creds = ( 6817 self._parse_wrapped_options() if self._match(TokenType.EQ) else self._parse_field() 6818 ) 6819 expr.set("credentials", creds) 6820 if self._match_text_seq("ENCRYPTION"): 6821 expr.set("encryption", self._parse_wrapped_options()) 6822 if self._match_text_seq("IAM_ROLE"): 6823 expr.set("iam_role", self._parse_field()) 6824 if self._match_text_seq("REGION"): 6825 expr.set("region", self._parse_field()) 6826 6827 return expr 6828 6829 def _parse_file_location(self) -> t.Optional[exp.Expression]: 6830 return self._parse_field() 6831 6832 def _parse_copy(self) -> exp.Copy | exp.Command: 6833 start = self._prev 6834 6835 self._match(TokenType.INTO) 6836 6837 this = ( 6838 self._parse_select(nested=True, parse_subquery_alias=False) 6839 if self._match(TokenType.L_PAREN, advance=False) 6840 else self._parse_table(schema=True) 6841 ) 6842 6843 kind = self._match(TokenType.FROM) or not self._match_text_seq("TO") 6844 6845 files = self._parse_csv(self._parse_file_location) 6846 credentials = self._parse_credentials() 6847 6848 self._match_text_seq("WITH") 6849 6850 params = self._parse_wrapped(self._parse_copy_parameters, optional=True) 6851 6852 # Fallback case 6853 if self._curr: 6854 return self._parse_as_command(start) 6855 6856 return self.expression( 6857 exp.Copy, 6858 this=this, 6859 kind=kind, 6860 credentials=credentials, 6861 files=files, 6862 params=params, 6863 )
26def build_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 27 if len(args) == 1 and args[0].is_star: 28 return exp.StarMap(this=args[0]) 29 30 keys = [] 31 values = [] 32 for i in range(0, len(args), 2): 33 keys.append(args[i]) 34 values.append(args[i + 1]) 35 36 return exp.VarMap(keys=exp.array(*keys, copy=False), values=exp.array(*values, copy=False))
44def binary_range_parser( 45 expr_type: t.Type[exp.Expression], reverse_args: bool = False 46) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]: 47 def _parse_binary_range( 48 self: Parser, this: t.Optional[exp.Expression] 49 ) -> t.Optional[exp.Expression]: 50 expression = self._parse_bitwise() 51 if reverse_args: 52 this, expression = expression, this 53 return self._parse_escape(self.expression(expr_type, this=this, expression=expression)) 54 55 return _parse_binary_range
58def build_logarithm(args: t.List, dialect: Dialect) -> exp.Func: 59 # Default argument order is base, expression 60 this = seq_get(args, 0) 61 expression = seq_get(args, 1) 62 63 if expression: 64 if not dialect.LOG_BASE_FIRST: 65 this, expression = expression, this 66 return exp.Log(this=this, expression=expression) 67 68 return (exp.Ln if dialect.parser_class.LOG_DEFAULTS_TO_LN else exp.Log)(this=this)
88def build_extract_json_with_path(expr_type: t.Type[E]) -> t.Callable[[t.List, Dialect], E]: 89 def _builder(args: t.List, dialect: Dialect) -> E: 90 expression = expr_type( 91 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 92 ) 93 if len(args) > 2 and expr_type is exp.JSONExtract: 94 expression.set("expressions", args[2:]) 95 96 return expression 97 98 return _builder
101def build_mod(args: t.List) -> exp.Mod: 102 this = seq_get(args, 0) 103 expression = seq_get(args, 1) 104 105 # Wrap the operands if they are binary nodes, e.g. MOD(a + 1, 7) -> (a + 1) % 7 106 this = exp.Paren(this=this) if isinstance(this, exp.Binary) else this 107 expression = exp.Paren(this=expression) if isinstance(expression, exp.Binary) else expression 108 109 return exp.Mod(this=this, expression=expression)
122class Parser(metaclass=_Parser): 123 """ 124 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 125 126 Args: 127 error_level: The desired error level. 128 Default: ErrorLevel.IMMEDIATE 129 error_message_context: The amount of context to capture from a query string when displaying 130 the error message (in number of characters). 131 Default: 100 132 max_errors: Maximum number of error messages to include in a raised ParseError. 133 This is only relevant if error_level is ErrorLevel.RAISE. 134 Default: 3 135 """ 136 137 FUNCTIONS: t.Dict[str, t.Callable] = { 138 **{name: func.from_arg_list for name, func in exp.FUNCTION_BY_NAME.items()}, 139 "CONCAT": lambda args, dialect: exp.Concat( 140 expressions=args, 141 safe=not dialect.STRICT_STRING_CONCAT, 142 coalesce=dialect.CONCAT_COALESCE, 143 ), 144 "CONCAT_WS": lambda args, dialect: exp.ConcatWs( 145 expressions=args, 146 safe=not dialect.STRICT_STRING_CONCAT, 147 coalesce=dialect.CONCAT_COALESCE, 148 ), 149 "DATE_TO_DATE_STR": lambda args: exp.Cast( 150 this=seq_get(args, 0), 151 to=exp.DataType(this=exp.DataType.Type.TEXT), 152 ), 153 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 154 "HEX": build_hex, 155 "JSON_EXTRACT": build_extract_json_with_path(exp.JSONExtract), 156 "JSON_EXTRACT_SCALAR": build_extract_json_with_path(exp.JSONExtractScalar), 157 "JSON_EXTRACT_PATH_TEXT": build_extract_json_with_path(exp.JSONExtractScalar), 158 "LIKE": build_like, 159 "LOG": build_logarithm, 160 "LOG2": lambda args: exp.Log(this=exp.Literal.number(2), expression=seq_get(args, 0)), 161 "LOG10": lambda args: exp.Log(this=exp.Literal.number(10), expression=seq_get(args, 0)), 162 "LOWER": build_lower, 163 "MOD": build_mod, 164 "SCOPE_RESOLUTION": lambda args: exp.ScopeResolution(expression=seq_get(args, 0)) 165 if len(args) != 2 166 else exp.ScopeResolution(this=seq_get(args, 0), expression=seq_get(args, 1)), 167 "TIME_TO_TIME_STR": lambda args: exp.Cast( 168 this=seq_get(args, 0), 169 to=exp.DataType(this=exp.DataType.Type.TEXT), 170 ), 171 "TO_HEX": build_hex, 172 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 173 this=exp.Cast( 174 this=seq_get(args, 0), 175 to=exp.DataType(this=exp.DataType.Type.TEXT), 176 ), 177 start=exp.Literal.number(1), 178 length=exp.Literal.number(10), 179 ), 180 "UNNEST": lambda args: exp.Unnest(expressions=ensure_list(seq_get(args, 0))), 181 "UPPER": build_upper, 182 "VAR_MAP": build_var_map, 183 } 184 185 NO_PAREN_FUNCTIONS = { 186 TokenType.CURRENT_DATE: exp.CurrentDate, 187 TokenType.CURRENT_DATETIME: exp.CurrentDate, 188 TokenType.CURRENT_TIME: exp.CurrentTime, 189 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 190 TokenType.CURRENT_USER: exp.CurrentUser, 191 } 192 193 STRUCT_TYPE_TOKENS = { 194 TokenType.NESTED, 195 TokenType.OBJECT, 196 TokenType.STRUCT, 197 } 198 199 NESTED_TYPE_TOKENS = { 200 TokenType.ARRAY, 201 TokenType.LIST, 202 TokenType.LOWCARDINALITY, 203 TokenType.MAP, 204 TokenType.NULLABLE, 205 *STRUCT_TYPE_TOKENS, 206 } 207 208 ENUM_TYPE_TOKENS = { 209 TokenType.ENUM, 210 TokenType.ENUM8, 211 TokenType.ENUM16, 212 } 213 214 AGGREGATE_TYPE_TOKENS = { 215 TokenType.AGGREGATEFUNCTION, 216 TokenType.SIMPLEAGGREGATEFUNCTION, 217 } 218 219 TYPE_TOKENS = { 220 TokenType.BIT, 221 TokenType.BOOLEAN, 222 TokenType.TINYINT, 223 TokenType.UTINYINT, 224 TokenType.SMALLINT, 225 TokenType.USMALLINT, 226 TokenType.INT, 227 TokenType.UINT, 228 TokenType.BIGINT, 229 TokenType.UBIGINT, 230 TokenType.INT128, 231 TokenType.UINT128, 232 TokenType.INT256, 233 TokenType.UINT256, 234 TokenType.MEDIUMINT, 235 TokenType.UMEDIUMINT, 236 TokenType.FIXEDSTRING, 237 TokenType.FLOAT, 238 TokenType.DOUBLE, 239 TokenType.CHAR, 240 TokenType.NCHAR, 241 TokenType.VARCHAR, 242 TokenType.NVARCHAR, 243 TokenType.BPCHAR, 244 TokenType.TEXT, 245 TokenType.MEDIUMTEXT, 246 TokenType.LONGTEXT, 247 TokenType.MEDIUMBLOB, 248 TokenType.LONGBLOB, 249 TokenType.BINARY, 250 TokenType.VARBINARY, 251 TokenType.JSON, 252 TokenType.JSONB, 253 TokenType.INTERVAL, 254 TokenType.TINYBLOB, 255 TokenType.TINYTEXT, 256 TokenType.TIME, 257 TokenType.TIMETZ, 258 TokenType.TIMESTAMP, 259 TokenType.TIMESTAMP_S, 260 TokenType.TIMESTAMP_MS, 261 TokenType.TIMESTAMP_NS, 262 TokenType.TIMESTAMPTZ, 263 TokenType.TIMESTAMPLTZ, 264 TokenType.TIMESTAMPNTZ, 265 TokenType.DATETIME, 266 TokenType.DATETIME64, 267 TokenType.DATE, 268 TokenType.DATE32, 269 TokenType.INT4RANGE, 270 TokenType.INT4MULTIRANGE, 271 TokenType.INT8RANGE, 272 TokenType.INT8MULTIRANGE, 273 TokenType.NUMRANGE, 274 TokenType.NUMMULTIRANGE, 275 TokenType.TSRANGE, 276 TokenType.TSMULTIRANGE, 277 TokenType.TSTZRANGE, 278 TokenType.TSTZMULTIRANGE, 279 TokenType.DATERANGE, 280 TokenType.DATEMULTIRANGE, 281 TokenType.DECIMAL, 282 TokenType.UDECIMAL, 283 TokenType.BIGDECIMAL, 284 TokenType.UUID, 285 TokenType.GEOGRAPHY, 286 TokenType.GEOMETRY, 287 TokenType.HLLSKETCH, 288 TokenType.HSTORE, 289 TokenType.PSEUDO_TYPE, 290 TokenType.SUPER, 291 TokenType.SERIAL, 292 TokenType.SMALLSERIAL, 293 TokenType.BIGSERIAL, 294 TokenType.XML, 295 TokenType.YEAR, 296 TokenType.UNIQUEIDENTIFIER, 297 TokenType.USERDEFINED, 298 TokenType.MONEY, 299 TokenType.SMALLMONEY, 300 TokenType.ROWVERSION, 301 TokenType.IMAGE, 302 TokenType.VARIANT, 303 TokenType.OBJECT, 304 TokenType.OBJECT_IDENTIFIER, 305 TokenType.INET, 306 TokenType.IPADDRESS, 307 TokenType.IPPREFIX, 308 TokenType.IPV4, 309 TokenType.IPV6, 310 TokenType.UNKNOWN, 311 TokenType.NULL, 312 TokenType.NAME, 313 TokenType.TDIGEST, 314 *ENUM_TYPE_TOKENS, 315 *NESTED_TYPE_TOKENS, 316 *AGGREGATE_TYPE_TOKENS, 317 } 318 319 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 320 TokenType.BIGINT: TokenType.UBIGINT, 321 TokenType.INT: TokenType.UINT, 322 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 323 TokenType.SMALLINT: TokenType.USMALLINT, 324 TokenType.TINYINT: TokenType.UTINYINT, 325 TokenType.DECIMAL: TokenType.UDECIMAL, 326 } 327 328 SUBQUERY_PREDICATES = { 329 TokenType.ANY: exp.Any, 330 TokenType.ALL: exp.All, 331 TokenType.EXISTS: exp.Exists, 332 TokenType.SOME: exp.Any, 333 } 334 335 RESERVED_TOKENS = { 336 *Tokenizer.SINGLE_TOKENS.values(), 337 TokenType.SELECT, 338 } - {TokenType.IDENTIFIER} 339 340 DB_CREATABLES = { 341 TokenType.DATABASE, 342 TokenType.DICTIONARY, 343 TokenType.MODEL, 344 TokenType.SCHEMA, 345 TokenType.SEQUENCE, 346 TokenType.STORAGE_INTEGRATION, 347 TokenType.TABLE, 348 TokenType.TAG, 349 TokenType.VIEW, 350 TokenType.WAREHOUSE, 351 TokenType.STREAMLIT, 352 } 353 354 CREATABLES = { 355 TokenType.COLUMN, 356 TokenType.CONSTRAINT, 357 TokenType.FOREIGN_KEY, 358 TokenType.FUNCTION, 359 TokenType.INDEX, 360 TokenType.PROCEDURE, 361 *DB_CREATABLES, 362 } 363 364 # Tokens that can represent identifiers 365 ID_VAR_TOKENS = { 366 TokenType.VAR, 367 TokenType.ANTI, 368 TokenType.APPLY, 369 TokenType.ASC, 370 TokenType.ASOF, 371 TokenType.AUTO_INCREMENT, 372 TokenType.BEGIN, 373 TokenType.BPCHAR, 374 TokenType.CACHE, 375 TokenType.CASE, 376 TokenType.COLLATE, 377 TokenType.COMMAND, 378 TokenType.COMMENT, 379 TokenType.COMMIT, 380 TokenType.CONSTRAINT, 381 TokenType.COPY, 382 TokenType.DEFAULT, 383 TokenType.DELETE, 384 TokenType.DESC, 385 TokenType.DESCRIBE, 386 TokenType.DICTIONARY, 387 TokenType.DIV, 388 TokenType.END, 389 TokenType.EXECUTE, 390 TokenType.ESCAPE, 391 TokenType.FALSE, 392 TokenType.FIRST, 393 TokenType.FILTER, 394 TokenType.FINAL, 395 TokenType.FORMAT, 396 TokenType.FULL, 397 TokenType.IDENTIFIER, 398 TokenType.IS, 399 TokenType.ISNULL, 400 TokenType.INTERVAL, 401 TokenType.KEEP, 402 TokenType.KILL, 403 TokenType.LEFT, 404 TokenType.LOAD, 405 TokenType.MERGE, 406 TokenType.NATURAL, 407 TokenType.NEXT, 408 TokenType.OFFSET, 409 TokenType.OPERATOR, 410 TokenType.ORDINALITY, 411 TokenType.OVERLAPS, 412 TokenType.OVERWRITE, 413 TokenType.PARTITION, 414 TokenType.PERCENT, 415 TokenType.PIVOT, 416 TokenType.PRAGMA, 417 TokenType.RANGE, 418 TokenType.RECURSIVE, 419 TokenType.REFERENCES, 420 TokenType.REFRESH, 421 TokenType.REPLACE, 422 TokenType.RIGHT, 423 TokenType.ROLLUP, 424 TokenType.ROW, 425 TokenType.ROWS, 426 TokenType.SEMI, 427 TokenType.SET, 428 TokenType.SETTINGS, 429 TokenType.SHOW, 430 TokenType.TEMPORARY, 431 TokenType.TOP, 432 TokenType.TRUE, 433 TokenType.TRUNCATE, 434 TokenType.UNIQUE, 435 TokenType.UNNEST, 436 TokenType.UNPIVOT, 437 TokenType.UPDATE, 438 TokenType.USE, 439 TokenType.VOLATILE, 440 TokenType.WINDOW, 441 *CREATABLES, 442 *SUBQUERY_PREDICATES, 443 *TYPE_TOKENS, 444 *NO_PAREN_FUNCTIONS, 445 } 446 447 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 448 449 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 450 TokenType.ANTI, 451 TokenType.APPLY, 452 TokenType.ASOF, 453 TokenType.FULL, 454 TokenType.LEFT, 455 TokenType.LOCK, 456 TokenType.NATURAL, 457 TokenType.OFFSET, 458 TokenType.RIGHT, 459 TokenType.SEMI, 460 TokenType.WINDOW, 461 } 462 463 ALIAS_TOKENS = ID_VAR_TOKENS 464 465 ARRAY_CONSTRUCTORS = { 466 "ARRAY": exp.Array, 467 "LIST": exp.List, 468 } 469 470 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 471 472 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 473 474 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 475 476 FUNC_TOKENS = { 477 TokenType.COLLATE, 478 TokenType.COMMAND, 479 TokenType.CURRENT_DATE, 480 TokenType.CURRENT_DATETIME, 481 TokenType.CURRENT_TIMESTAMP, 482 TokenType.CURRENT_TIME, 483 TokenType.CURRENT_USER, 484 TokenType.FILTER, 485 TokenType.FIRST, 486 TokenType.FORMAT, 487 TokenType.GLOB, 488 TokenType.IDENTIFIER, 489 TokenType.INDEX, 490 TokenType.ISNULL, 491 TokenType.ILIKE, 492 TokenType.INSERT, 493 TokenType.LIKE, 494 TokenType.MERGE, 495 TokenType.OFFSET, 496 TokenType.PRIMARY_KEY, 497 TokenType.RANGE, 498 TokenType.REPLACE, 499 TokenType.RLIKE, 500 TokenType.ROW, 501 TokenType.UNNEST, 502 TokenType.VAR, 503 TokenType.LEFT, 504 TokenType.RIGHT, 505 TokenType.SEQUENCE, 506 TokenType.DATE, 507 TokenType.DATETIME, 508 TokenType.TABLE, 509 TokenType.TIMESTAMP, 510 TokenType.TIMESTAMPTZ, 511 TokenType.TRUNCATE, 512 TokenType.WINDOW, 513 TokenType.XOR, 514 *TYPE_TOKENS, 515 *SUBQUERY_PREDICATES, 516 } 517 518 CONJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 519 TokenType.AND: exp.And, 520 } 521 522 ASSIGNMENT: t.Dict[TokenType, t.Type[exp.Expression]] = { 523 TokenType.COLON_EQ: exp.PropertyEQ, 524 } 525 526 DISJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 527 TokenType.OR: exp.Or, 528 } 529 530 EQUALITY = { 531 TokenType.EQ: exp.EQ, 532 TokenType.NEQ: exp.NEQ, 533 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 534 } 535 536 COMPARISON = { 537 TokenType.GT: exp.GT, 538 TokenType.GTE: exp.GTE, 539 TokenType.LT: exp.LT, 540 TokenType.LTE: exp.LTE, 541 } 542 543 BITWISE = { 544 TokenType.AMP: exp.BitwiseAnd, 545 TokenType.CARET: exp.BitwiseXor, 546 TokenType.PIPE: exp.BitwiseOr, 547 } 548 549 TERM = { 550 TokenType.DASH: exp.Sub, 551 TokenType.PLUS: exp.Add, 552 TokenType.MOD: exp.Mod, 553 TokenType.COLLATE: exp.Collate, 554 } 555 556 FACTOR = { 557 TokenType.DIV: exp.IntDiv, 558 TokenType.LR_ARROW: exp.Distance, 559 TokenType.SLASH: exp.Div, 560 TokenType.STAR: exp.Mul, 561 } 562 563 EXPONENT: t.Dict[TokenType, t.Type[exp.Expression]] = {} 564 565 TIMES = { 566 TokenType.TIME, 567 TokenType.TIMETZ, 568 } 569 570 TIMESTAMPS = { 571 TokenType.TIMESTAMP, 572 TokenType.TIMESTAMPTZ, 573 TokenType.TIMESTAMPLTZ, 574 *TIMES, 575 } 576 577 SET_OPERATIONS = { 578 TokenType.UNION, 579 TokenType.INTERSECT, 580 TokenType.EXCEPT, 581 } 582 583 JOIN_METHODS = { 584 TokenType.ASOF, 585 TokenType.NATURAL, 586 TokenType.POSITIONAL, 587 } 588 589 JOIN_SIDES = { 590 TokenType.LEFT, 591 TokenType.RIGHT, 592 TokenType.FULL, 593 } 594 595 JOIN_KINDS = { 596 TokenType.ANTI, 597 TokenType.CROSS, 598 TokenType.INNER, 599 TokenType.OUTER, 600 TokenType.SEMI, 601 TokenType.STRAIGHT_JOIN, 602 } 603 604 JOIN_HINTS: t.Set[str] = set() 605 606 LAMBDAS = { 607 TokenType.ARROW: lambda self, expressions: self.expression( 608 exp.Lambda, 609 this=self._replace_lambda( 610 self._parse_assignment(), 611 expressions, 612 ), 613 expressions=expressions, 614 ), 615 TokenType.FARROW: lambda self, expressions: self.expression( 616 exp.Kwarg, 617 this=exp.var(expressions[0].name), 618 expression=self._parse_assignment(), 619 ), 620 } 621 622 COLUMN_OPERATORS = { 623 TokenType.DOT: None, 624 TokenType.DCOLON: lambda self, this, to: self.expression( 625 exp.Cast if self.STRICT_CAST else exp.TryCast, 626 this=this, 627 to=to, 628 ), 629 TokenType.ARROW: lambda self, this, path: self.expression( 630 exp.JSONExtract, 631 this=this, 632 expression=self.dialect.to_json_path(path), 633 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 634 ), 635 TokenType.DARROW: lambda self, this, path: self.expression( 636 exp.JSONExtractScalar, 637 this=this, 638 expression=self.dialect.to_json_path(path), 639 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 640 ), 641 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 642 exp.JSONBExtract, 643 this=this, 644 expression=path, 645 ), 646 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 647 exp.JSONBExtractScalar, 648 this=this, 649 expression=path, 650 ), 651 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 652 exp.JSONBContains, 653 this=this, 654 expression=key, 655 ), 656 } 657 658 EXPRESSION_PARSERS = { 659 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 660 exp.Column: lambda self: self._parse_column(), 661 exp.Condition: lambda self: self._parse_assignment(), 662 exp.DataType: lambda self: self._parse_types(allow_identifiers=False), 663 exp.Expression: lambda self: self._parse_expression(), 664 exp.From: lambda self: self._parse_from(joins=True), 665 exp.Group: lambda self: self._parse_group(), 666 exp.Having: lambda self: self._parse_having(), 667 exp.Identifier: lambda self: self._parse_id_var(), 668 exp.Join: lambda self: self._parse_join(), 669 exp.Lambda: lambda self: self._parse_lambda(), 670 exp.Lateral: lambda self: self._parse_lateral(), 671 exp.Limit: lambda self: self._parse_limit(), 672 exp.Offset: lambda self: self._parse_offset(), 673 exp.Order: lambda self: self._parse_order(), 674 exp.Ordered: lambda self: self._parse_ordered(), 675 exp.Properties: lambda self: self._parse_properties(), 676 exp.Qualify: lambda self: self._parse_qualify(), 677 exp.Returning: lambda self: self._parse_returning(), 678 exp.Select: lambda self: self._parse_select(), 679 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 680 exp.Table: lambda self: self._parse_table_parts(), 681 exp.TableAlias: lambda self: self._parse_table_alias(), 682 exp.When: lambda self: seq_get(self._parse_when_matched(), 0), 683 exp.Where: lambda self: self._parse_where(), 684 exp.Window: lambda self: self._parse_named_window(), 685 exp.With: lambda self: self._parse_with(), 686 "JOIN_TYPE": lambda self: self._parse_join_parts(), 687 } 688 689 STATEMENT_PARSERS = { 690 TokenType.ALTER: lambda self: self._parse_alter(), 691 TokenType.BEGIN: lambda self: self._parse_transaction(), 692 TokenType.CACHE: lambda self: self._parse_cache(), 693 TokenType.COMMENT: lambda self: self._parse_comment(), 694 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 695 TokenType.COPY: lambda self: self._parse_copy(), 696 TokenType.CREATE: lambda self: self._parse_create(), 697 TokenType.DELETE: lambda self: self._parse_delete(), 698 TokenType.DESC: lambda self: self._parse_describe(), 699 TokenType.DESCRIBE: lambda self: self._parse_describe(), 700 TokenType.DROP: lambda self: self._parse_drop(), 701 TokenType.INSERT: lambda self: self._parse_insert(), 702 TokenType.KILL: lambda self: self._parse_kill(), 703 TokenType.LOAD: lambda self: self._parse_load(), 704 TokenType.MERGE: lambda self: self._parse_merge(), 705 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 706 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 707 TokenType.REFRESH: lambda self: self._parse_refresh(), 708 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 709 TokenType.SET: lambda self: self._parse_set(), 710 TokenType.TRUNCATE: lambda self: self._parse_truncate_table(), 711 TokenType.UNCACHE: lambda self: self._parse_uncache(), 712 TokenType.UPDATE: lambda self: self._parse_update(), 713 TokenType.USE: lambda self: self.expression( 714 exp.Use, 715 kind=self._parse_var_from_options(self.USABLES, raise_unmatched=False), 716 this=self._parse_table(schema=False), 717 ), 718 TokenType.SEMICOLON: lambda self: self.expression(exp.Semicolon), 719 } 720 721 UNARY_PARSERS = { 722 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 723 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 724 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 725 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 726 TokenType.PIPE_SLASH: lambda self: self.expression(exp.Sqrt, this=self._parse_unary()), 727 TokenType.DPIPE_SLASH: lambda self: self.expression(exp.Cbrt, this=self._parse_unary()), 728 } 729 730 STRING_PARSERS = { 731 TokenType.HEREDOC_STRING: lambda self, token: self.expression( 732 exp.RawString, this=token.text 733 ), 734 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 735 exp.National, this=token.text 736 ), 737 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 738 TokenType.STRING: lambda self, token: self.expression( 739 exp.Literal, this=token.text, is_string=True 740 ), 741 TokenType.UNICODE_STRING: lambda self, token: self.expression( 742 exp.UnicodeString, 743 this=token.text, 744 escape=self._match_text_seq("UESCAPE") and self._parse_string(), 745 ), 746 } 747 748 NUMERIC_PARSERS = { 749 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 750 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 751 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 752 TokenType.NUMBER: lambda self, token: self.expression( 753 exp.Literal, this=token.text, is_string=False 754 ), 755 } 756 757 PRIMARY_PARSERS = { 758 **STRING_PARSERS, 759 **NUMERIC_PARSERS, 760 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 761 TokenType.NULL: lambda self, _: self.expression(exp.Null), 762 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 763 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 764 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 765 TokenType.STAR: lambda self, _: self.expression( 766 exp.Star, 767 **{ 768 "except": self._parse_star_op("EXCEPT", "EXCLUDE"), 769 "replace": self._parse_star_op("REPLACE"), 770 "rename": self._parse_star_op("RENAME"), 771 }, 772 ), 773 } 774 775 PLACEHOLDER_PARSERS = { 776 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 777 TokenType.PARAMETER: lambda self: self._parse_parameter(), 778 TokenType.COLON: lambda self: ( 779 self.expression(exp.Placeholder, this=self._prev.text) 780 if self._match(TokenType.NUMBER) or self._match_set(self.ID_VAR_TOKENS) 781 else None 782 ), 783 } 784 785 RANGE_PARSERS = { 786 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 787 TokenType.GLOB: binary_range_parser(exp.Glob), 788 TokenType.ILIKE: binary_range_parser(exp.ILike), 789 TokenType.IN: lambda self, this: self._parse_in(this), 790 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 791 TokenType.IS: lambda self, this: self._parse_is(this), 792 TokenType.LIKE: binary_range_parser(exp.Like), 793 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 794 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 795 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 796 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 797 } 798 799 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 800 "ALLOWED_VALUES": lambda self: self.expression( 801 exp.AllowedValuesProperty, expressions=self._parse_csv(self._parse_primary) 802 ), 803 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 804 "AUTO": lambda self: self._parse_auto_property(), 805 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 806 "BACKUP": lambda self: self.expression( 807 exp.BackupProperty, this=self._parse_var(any_token=True) 808 ), 809 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 810 "CHARSET": lambda self, **kwargs: self._parse_character_set(**kwargs), 811 "CHARACTER SET": lambda self, **kwargs: self._parse_character_set(**kwargs), 812 "CHECKSUM": lambda self: self._parse_checksum(), 813 "CLUSTER BY": lambda self: self._parse_cluster(), 814 "CLUSTERED": lambda self: self._parse_clustered_by(), 815 "COLLATE": lambda self, **kwargs: self._parse_property_assignment( 816 exp.CollateProperty, **kwargs 817 ), 818 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 819 "CONTAINS": lambda self: self._parse_contains_property(), 820 "COPY": lambda self: self._parse_copy_property(), 821 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 822 "DATA_DELETION": lambda self: self._parse_data_deletion_property(), 823 "DEFINER": lambda self: self._parse_definer(), 824 "DETERMINISTIC": lambda self: self.expression( 825 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 826 ), 827 "DYNAMIC": lambda self: self.expression(exp.DynamicProperty), 828 "DISTKEY": lambda self: self._parse_distkey(), 829 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 830 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 831 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 832 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 833 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 834 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 835 "FREESPACE": lambda self: self._parse_freespace(), 836 "GLOBAL": lambda self: self.expression(exp.GlobalProperty), 837 "HEAP": lambda self: self.expression(exp.HeapProperty), 838 "ICEBERG": lambda self: self.expression(exp.IcebergProperty), 839 "IMMUTABLE": lambda self: self.expression( 840 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 841 ), 842 "INHERITS": lambda self: self.expression( 843 exp.InheritsProperty, expressions=self._parse_wrapped_csv(self._parse_table) 844 ), 845 "INPUT": lambda self: self.expression(exp.InputModelProperty, this=self._parse_schema()), 846 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 847 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 848 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 849 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 850 "LIKE": lambda self: self._parse_create_like(), 851 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 852 "LOCK": lambda self: self._parse_locking(), 853 "LOCKING": lambda self: self._parse_locking(), 854 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 855 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 856 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 857 "MODIFIES": lambda self: self._parse_modifies_property(), 858 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 859 "NO": lambda self: self._parse_no_property(), 860 "ON": lambda self: self._parse_on_property(), 861 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 862 "OUTPUT": lambda self: self.expression(exp.OutputModelProperty, this=self._parse_schema()), 863 "PARTITION": lambda self: self._parse_partitioned_of(), 864 "PARTITION BY": lambda self: self._parse_partitioned_by(), 865 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 866 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 867 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 868 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 869 "READS": lambda self: self._parse_reads_property(), 870 "REMOTE": lambda self: self._parse_remote_with_connection(), 871 "RETURNS": lambda self: self._parse_returns(), 872 "STRICT": lambda self: self.expression(exp.StrictProperty), 873 "ROW": lambda self: self._parse_row(), 874 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 875 "SAMPLE": lambda self: self.expression( 876 exp.SampleProperty, this=self._match_text_seq("BY") and self._parse_bitwise() 877 ), 878 "SECURE": lambda self: self.expression(exp.SecureProperty), 879 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 880 "SETTINGS": lambda self: self.expression( 881 exp.SettingsProperty, expressions=self._parse_csv(self._parse_set_item) 882 ), 883 "SHARING": lambda self: self._parse_property_assignment(exp.SharingProperty), 884 "SORTKEY": lambda self: self._parse_sortkey(), 885 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 886 "STABLE": lambda self: self.expression( 887 exp.StabilityProperty, this=exp.Literal.string("STABLE") 888 ), 889 "STORED": lambda self: self._parse_stored(), 890 "SYSTEM_VERSIONING": lambda self: self._parse_system_versioning_property(), 891 "TBLPROPERTIES": lambda self: self._parse_wrapped_properties(), 892 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 893 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 894 "TO": lambda self: self._parse_to_table(), 895 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 896 "TRANSFORM": lambda self: self.expression( 897 exp.TransformModelProperty, expressions=self._parse_wrapped_csv(self._parse_expression) 898 ), 899 "TTL": lambda self: self._parse_ttl(), 900 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 901 "UNLOGGED": lambda self: self.expression(exp.UnloggedProperty), 902 "VOLATILE": lambda self: self._parse_volatile_property(), 903 "WITH": lambda self: self._parse_with_property(), 904 } 905 906 CONSTRAINT_PARSERS = { 907 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 908 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 909 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 910 "CHARACTER SET": lambda self: self.expression( 911 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 912 ), 913 "CHECK": lambda self: self.expression( 914 exp.CheckColumnConstraint, 915 this=self._parse_wrapped(self._parse_assignment), 916 enforced=self._match_text_seq("ENFORCED"), 917 ), 918 "COLLATE": lambda self: self.expression( 919 exp.CollateColumnConstraint, this=self._parse_var(any_token=True) 920 ), 921 "COMMENT": lambda self: self.expression( 922 exp.CommentColumnConstraint, this=self._parse_string() 923 ), 924 "COMPRESS": lambda self: self._parse_compress(), 925 "CLUSTERED": lambda self: self.expression( 926 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 927 ), 928 "NONCLUSTERED": lambda self: self.expression( 929 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 930 ), 931 "DEFAULT": lambda self: self.expression( 932 exp.DefaultColumnConstraint, this=self._parse_bitwise() 933 ), 934 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 935 "EPHEMERAL": lambda self: self.expression( 936 exp.EphemeralColumnConstraint, this=self._parse_bitwise() 937 ), 938 "EXCLUDE": lambda self: self.expression( 939 exp.ExcludeColumnConstraint, this=self._parse_index_params() 940 ), 941 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 942 "FORMAT": lambda self: self.expression( 943 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 944 ), 945 "GENERATED": lambda self: self._parse_generated_as_identity(), 946 "IDENTITY": lambda self: self._parse_auto_increment(), 947 "INLINE": lambda self: self._parse_inline(), 948 "LIKE": lambda self: self._parse_create_like(), 949 "NOT": lambda self: self._parse_not_constraint(), 950 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 951 "ON": lambda self: ( 952 self._match(TokenType.UPDATE) 953 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 954 ) 955 or self.expression(exp.OnProperty, this=self._parse_id_var()), 956 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 957 "PERIOD": lambda self: self._parse_period_for_system_time(), 958 "PRIMARY KEY": lambda self: self._parse_primary_key(), 959 "REFERENCES": lambda self: self._parse_references(match=False), 960 "TITLE": lambda self: self.expression( 961 exp.TitleColumnConstraint, this=self._parse_var_or_string() 962 ), 963 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 964 "UNIQUE": lambda self: self._parse_unique(), 965 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 966 "WITH": lambda self: self.expression( 967 exp.Properties, expressions=self._parse_wrapped_properties() 968 ), 969 } 970 971 ALTER_PARSERS = { 972 "ADD": lambda self: self._parse_alter_table_add(), 973 "ALTER": lambda self: self._parse_alter_table_alter(), 974 "CLUSTER BY": lambda self: self._parse_cluster(wrapped=True), 975 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 976 "DROP": lambda self: self._parse_alter_table_drop(), 977 "RENAME": lambda self: self._parse_alter_table_rename(), 978 "SET": lambda self: self._parse_alter_table_set(), 979 } 980 981 ALTER_ALTER_PARSERS = { 982 "DISTKEY": lambda self: self._parse_alter_diststyle(), 983 "DISTSTYLE": lambda self: self._parse_alter_diststyle(), 984 "SORTKEY": lambda self: self._parse_alter_sortkey(), 985 "COMPOUND": lambda self: self._parse_alter_sortkey(compound=True), 986 } 987 988 SCHEMA_UNNAMED_CONSTRAINTS = { 989 "CHECK", 990 "EXCLUDE", 991 "FOREIGN KEY", 992 "LIKE", 993 "PERIOD", 994 "PRIMARY KEY", 995 "UNIQUE", 996 } 997 998 NO_PAREN_FUNCTION_PARSERS = { 999 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 1000 "CASE": lambda self: self._parse_case(), 1001 "CONNECT_BY_ROOT": lambda self: self.expression( 1002 exp.ConnectByRoot, this=self._parse_column() 1003 ), 1004 "IF": lambda self: self._parse_if(), 1005 "NEXT": lambda self: self._parse_next_value_for(), 1006 } 1007 1008 INVALID_FUNC_NAME_TOKENS = { 1009 TokenType.IDENTIFIER, 1010 TokenType.STRING, 1011 } 1012 1013 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 1014 1015 KEY_VALUE_DEFINITIONS = (exp.Alias, exp.EQ, exp.PropertyEQ, exp.Slice) 1016 1017 FUNCTION_PARSERS = { 1018 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 1019 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 1020 "DECODE": lambda self: self._parse_decode(), 1021 "EXTRACT": lambda self: self._parse_extract(), 1022 "GAP_FILL": lambda self: self._parse_gap_fill(), 1023 "JSON_OBJECT": lambda self: self._parse_json_object(), 1024 "JSON_OBJECTAGG": lambda self: self._parse_json_object(agg=True), 1025 "JSON_TABLE": lambda self: self._parse_json_table(), 1026 "MATCH": lambda self: self._parse_match_against(), 1027 "OPENJSON": lambda self: self._parse_open_json(), 1028 "POSITION": lambda self: self._parse_position(), 1029 "PREDICT": lambda self: self._parse_predict(), 1030 "SAFE_CAST": lambda self: self._parse_cast(False, safe=True), 1031 "STRING_AGG": lambda self: self._parse_string_agg(), 1032 "SUBSTRING": lambda self: self._parse_substring(), 1033 "TRIM": lambda self: self._parse_trim(), 1034 "TRY_CAST": lambda self: self._parse_cast(False, safe=True), 1035 "TRY_CONVERT": lambda self: self._parse_convert(False, safe=True), 1036 } 1037 1038 QUERY_MODIFIER_PARSERS = { 1039 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 1040 TokenType.PREWHERE: lambda self: ("prewhere", self._parse_prewhere()), 1041 TokenType.WHERE: lambda self: ("where", self._parse_where()), 1042 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 1043 TokenType.HAVING: lambda self: ("having", self._parse_having()), 1044 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 1045 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 1046 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 1047 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 1048 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 1049 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 1050 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 1051 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 1052 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1053 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1054 TokenType.CLUSTER_BY: lambda self: ( 1055 "cluster", 1056 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 1057 ), 1058 TokenType.DISTRIBUTE_BY: lambda self: ( 1059 "distribute", 1060 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 1061 ), 1062 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 1063 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 1064 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 1065 } 1066 1067 SET_PARSERS = { 1068 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 1069 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 1070 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 1071 "TRANSACTION": lambda self: self._parse_set_transaction(), 1072 } 1073 1074 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 1075 1076 TYPE_LITERAL_PARSERS = { 1077 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 1078 } 1079 1080 TYPE_CONVERTERS: t.Dict[exp.DataType.Type, t.Callable[[exp.DataType], exp.DataType]] = {} 1081 1082 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 1083 1084 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 1085 1086 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 1087 TRANSACTION_CHARACTERISTICS: OPTIONS_TYPE = { 1088 "ISOLATION": ( 1089 ("LEVEL", "REPEATABLE", "READ"), 1090 ("LEVEL", "READ", "COMMITTED"), 1091 ("LEVEL", "READ", "UNCOMITTED"), 1092 ("LEVEL", "SERIALIZABLE"), 1093 ), 1094 "READ": ("WRITE", "ONLY"), 1095 } 1096 1097 CONFLICT_ACTIONS: OPTIONS_TYPE = dict.fromkeys( 1098 ("ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK", "UPDATE"), tuple() 1099 ) 1100 CONFLICT_ACTIONS["DO"] = ("NOTHING", "UPDATE") 1101 1102 CREATE_SEQUENCE: OPTIONS_TYPE = { 1103 "SCALE": ("EXTEND", "NOEXTEND"), 1104 "SHARD": ("EXTEND", "NOEXTEND"), 1105 "NO": ("CYCLE", "CACHE", "MAXVALUE", "MINVALUE"), 1106 **dict.fromkeys( 1107 ( 1108 "SESSION", 1109 "GLOBAL", 1110 "KEEP", 1111 "NOKEEP", 1112 "ORDER", 1113 "NOORDER", 1114 "NOCACHE", 1115 "CYCLE", 1116 "NOCYCLE", 1117 "NOMINVALUE", 1118 "NOMAXVALUE", 1119 "NOSCALE", 1120 "NOSHARD", 1121 ), 1122 tuple(), 1123 ), 1124 } 1125 1126 ISOLATED_LOADING_OPTIONS: OPTIONS_TYPE = {"FOR": ("ALL", "INSERT", "NONE")} 1127 1128 USABLES: OPTIONS_TYPE = dict.fromkeys(("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA"), tuple()) 1129 1130 CAST_ACTIONS: OPTIONS_TYPE = dict.fromkeys(("RENAME", "ADD"), ("FIELDS",)) 1131 1132 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 1133 1134 CLONE_KEYWORDS = {"CLONE", "COPY"} 1135 HISTORICAL_DATA_KIND = {"TIMESTAMP", "OFFSET", "STATEMENT", "STREAM"} 1136 1137 OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS", "WITH"} 1138 1139 OPTYPE_FOLLOW_TOKENS = {TokenType.COMMA, TokenType.R_PAREN} 1140 1141 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 1142 1143 VIEW_ATTRIBUTES = {"ENCRYPTION", "SCHEMABINDING", "VIEW_METADATA"} 1144 1145 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 1146 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 1147 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 1148 1149 JSON_KEY_VALUE_SEPARATOR_TOKENS = {TokenType.COLON, TokenType.COMMA, TokenType.IS} 1150 1151 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 1152 1153 ADD_CONSTRAINT_TOKENS = { 1154 TokenType.CONSTRAINT, 1155 TokenType.FOREIGN_KEY, 1156 TokenType.INDEX, 1157 TokenType.KEY, 1158 TokenType.PRIMARY_KEY, 1159 TokenType.UNIQUE, 1160 } 1161 1162 DISTINCT_TOKENS = {TokenType.DISTINCT} 1163 1164 NULL_TOKENS = {TokenType.NULL} 1165 1166 UNNEST_OFFSET_ALIAS_TOKENS = ID_VAR_TOKENS - SET_OPERATIONS 1167 1168 SELECT_START_TOKENS = {TokenType.L_PAREN, TokenType.WITH, TokenType.SELECT} 1169 1170 COPY_INTO_VARLEN_OPTIONS = {"FILE_FORMAT", "COPY_OPTIONS", "FORMAT_OPTIONS", "CREDENTIAL"} 1171 1172 STRICT_CAST = True 1173 1174 PREFIXED_PIVOT_COLUMNS = False 1175 IDENTIFY_PIVOT_STRINGS = False 1176 1177 LOG_DEFAULTS_TO_LN = False 1178 1179 # Whether ADD is present for each column added by ALTER TABLE 1180 ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN = True 1181 1182 # Whether the table sample clause expects CSV syntax 1183 TABLESAMPLE_CSV = False 1184 1185 # The default method used for table sampling 1186 DEFAULT_SAMPLING_METHOD: t.Optional[str] = None 1187 1188 # Whether the SET command needs a delimiter (e.g. "=") for assignments 1189 SET_REQUIRES_ASSIGNMENT_DELIMITER = True 1190 1191 # Whether the TRIM function expects the characters to trim as its first argument 1192 TRIM_PATTERN_FIRST = False 1193 1194 # Whether string aliases are supported `SELECT COUNT(*) 'count'` 1195 STRING_ALIASES = False 1196 1197 # Whether query modifiers such as LIMIT are attached to the UNION node (vs its right operand) 1198 MODIFIERS_ATTACHED_TO_SET_OP = True 1199 SET_OP_MODIFIERS = {"order", "limit", "offset"} 1200 1201 # Whether to parse IF statements that aren't followed by a left parenthesis as commands 1202 NO_PAREN_IF_COMMANDS = True 1203 1204 # Whether the -> and ->> operators expect documents of type JSON (e.g. Postgres) 1205 JSON_ARROWS_REQUIRE_JSON_TYPE = False 1206 1207 # Whether the `:` operator is used to extract a value from a JSON document 1208 COLON_IS_JSON_EXTRACT = False 1209 1210 # Whether or not a VALUES keyword needs to be followed by '(' to form a VALUES clause. 1211 # If this is True and '(' is not found, the keyword will be treated as an identifier 1212 VALUES_FOLLOWED_BY_PAREN = True 1213 1214 # Whether implicit unnesting is supported, e.g. SELECT 1 FROM y.z AS z, z.a (Redshift) 1215 SUPPORTS_IMPLICIT_UNNEST = False 1216 1217 # Whether or not interval spans are supported, INTERVAL 1 YEAR TO MONTHS 1218 INTERVAL_SPANS = True 1219 1220 # Whether a PARTITION clause can follow a table reference 1221 SUPPORTS_PARTITION_SELECTION = False 1222 1223 __slots__ = ( 1224 "error_level", 1225 "error_message_context", 1226 "max_errors", 1227 "dialect", 1228 "sql", 1229 "errors", 1230 "_tokens", 1231 "_index", 1232 "_curr", 1233 "_next", 1234 "_prev", 1235 "_prev_comments", 1236 ) 1237 1238 # Autofilled 1239 SHOW_TRIE: t.Dict = {} 1240 SET_TRIE: t.Dict = {} 1241 1242 def __init__( 1243 self, 1244 error_level: t.Optional[ErrorLevel] = None, 1245 error_message_context: int = 100, 1246 max_errors: int = 3, 1247 dialect: DialectType = None, 1248 ): 1249 from sqlglot.dialects import Dialect 1250 1251 self.error_level = error_level or ErrorLevel.IMMEDIATE 1252 self.error_message_context = error_message_context 1253 self.max_errors = max_errors 1254 self.dialect = Dialect.get_or_raise(dialect) 1255 self.reset() 1256 1257 def reset(self): 1258 self.sql = "" 1259 self.errors = [] 1260 self._tokens = [] 1261 self._index = 0 1262 self._curr = None 1263 self._next = None 1264 self._prev = None 1265 self._prev_comments = None 1266 1267 def parse( 1268 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1269 ) -> t.List[t.Optional[exp.Expression]]: 1270 """ 1271 Parses a list of tokens and returns a list of syntax trees, one tree 1272 per parsed SQL statement. 1273 1274 Args: 1275 raw_tokens: The list of tokens. 1276 sql: The original SQL string, used to produce helpful debug messages. 1277 1278 Returns: 1279 The list of the produced syntax trees. 1280 """ 1281 return self._parse( 1282 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1283 ) 1284 1285 def parse_into( 1286 self, 1287 expression_types: exp.IntoType, 1288 raw_tokens: t.List[Token], 1289 sql: t.Optional[str] = None, 1290 ) -> t.List[t.Optional[exp.Expression]]: 1291 """ 1292 Parses a list of tokens into a given Expression type. If a collection of Expression 1293 types is given instead, this method will try to parse the token list into each one 1294 of them, stopping at the first for which the parsing succeeds. 1295 1296 Args: 1297 expression_types: The expression type(s) to try and parse the token list into. 1298 raw_tokens: The list of tokens. 1299 sql: The original SQL string, used to produce helpful debug messages. 1300 1301 Returns: 1302 The target Expression. 1303 """ 1304 errors = [] 1305 for expression_type in ensure_list(expression_types): 1306 parser = self.EXPRESSION_PARSERS.get(expression_type) 1307 if not parser: 1308 raise TypeError(f"No parser registered for {expression_type}") 1309 1310 try: 1311 return self._parse(parser, raw_tokens, sql) 1312 except ParseError as e: 1313 e.errors[0]["into_expression"] = expression_type 1314 errors.append(e) 1315 1316 raise ParseError( 1317 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1318 errors=merge_errors(errors), 1319 ) from errors[-1] 1320 1321 def _parse( 1322 self, 1323 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 1324 raw_tokens: t.List[Token], 1325 sql: t.Optional[str] = None, 1326 ) -> t.List[t.Optional[exp.Expression]]: 1327 self.reset() 1328 self.sql = sql or "" 1329 1330 total = len(raw_tokens) 1331 chunks: t.List[t.List[Token]] = [[]] 1332 1333 for i, token in enumerate(raw_tokens): 1334 if token.token_type == TokenType.SEMICOLON: 1335 if token.comments: 1336 chunks.append([token]) 1337 1338 if i < total - 1: 1339 chunks.append([]) 1340 else: 1341 chunks[-1].append(token) 1342 1343 expressions = [] 1344 1345 for tokens in chunks: 1346 self._index = -1 1347 self._tokens = tokens 1348 self._advance() 1349 1350 expressions.append(parse_method(self)) 1351 1352 if self._index < len(self._tokens): 1353 self.raise_error("Invalid expression / Unexpected token") 1354 1355 self.check_errors() 1356 1357 return expressions 1358 1359 def check_errors(self) -> None: 1360 """Logs or raises any found errors, depending on the chosen error level setting.""" 1361 if self.error_level == ErrorLevel.WARN: 1362 for error in self.errors: 1363 logger.error(str(error)) 1364 elif self.error_level == ErrorLevel.RAISE and self.errors: 1365 raise ParseError( 1366 concat_messages(self.errors, self.max_errors), 1367 errors=merge_errors(self.errors), 1368 ) 1369 1370 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1371 """ 1372 Appends an error in the list of recorded errors or raises it, depending on the chosen 1373 error level setting. 1374 """ 1375 token = token or self._curr or self._prev or Token.string("") 1376 start = token.start 1377 end = token.end + 1 1378 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1379 highlight = self.sql[start:end] 1380 end_context = self.sql[end : end + self.error_message_context] 1381 1382 error = ParseError.new( 1383 f"{message}. Line {token.line}, Col: {token.col}.\n" 1384 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1385 description=message, 1386 line=token.line, 1387 col=token.col, 1388 start_context=start_context, 1389 highlight=highlight, 1390 end_context=end_context, 1391 ) 1392 1393 if self.error_level == ErrorLevel.IMMEDIATE: 1394 raise error 1395 1396 self.errors.append(error) 1397 1398 def expression( 1399 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1400 ) -> E: 1401 """ 1402 Creates a new, validated Expression. 1403 1404 Args: 1405 exp_class: The expression class to instantiate. 1406 comments: An optional list of comments to attach to the expression. 1407 kwargs: The arguments to set for the expression along with their respective values. 1408 1409 Returns: 1410 The target expression. 1411 """ 1412 instance = exp_class(**kwargs) 1413 instance.add_comments(comments) if comments else self._add_comments(instance) 1414 return self.validate_expression(instance) 1415 1416 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1417 if expression and self._prev_comments: 1418 expression.add_comments(self._prev_comments) 1419 self._prev_comments = None 1420 1421 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1422 """ 1423 Validates an Expression, making sure that all its mandatory arguments are set. 1424 1425 Args: 1426 expression: The expression to validate. 1427 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1428 1429 Returns: 1430 The validated expression. 1431 """ 1432 if self.error_level != ErrorLevel.IGNORE: 1433 for error_message in expression.error_messages(args): 1434 self.raise_error(error_message) 1435 1436 return expression 1437 1438 def _find_sql(self, start: Token, end: Token) -> str: 1439 return self.sql[start.start : end.end + 1] 1440 1441 def _is_connected(self) -> bool: 1442 return self._prev and self._curr and self._prev.end + 1 == self._curr.start 1443 1444 def _advance(self, times: int = 1) -> None: 1445 self._index += times 1446 self._curr = seq_get(self._tokens, self._index) 1447 self._next = seq_get(self._tokens, self._index + 1) 1448 1449 if self._index > 0: 1450 self._prev = self._tokens[self._index - 1] 1451 self._prev_comments = self._prev.comments 1452 else: 1453 self._prev = None 1454 self._prev_comments = None 1455 1456 def _retreat(self, index: int) -> None: 1457 if index != self._index: 1458 self._advance(index - self._index) 1459 1460 def _warn_unsupported(self) -> None: 1461 if len(self._tokens) <= 1: 1462 return 1463 1464 # We use _find_sql because self.sql may comprise multiple chunks, and we're only 1465 # interested in emitting a warning for the one being currently processed. 1466 sql = self._find_sql(self._tokens[0], self._tokens[-1])[: self.error_message_context] 1467 1468 logger.warning( 1469 f"'{sql}' contains unsupported syntax. Falling back to parsing as a 'Command'." 1470 ) 1471 1472 def _parse_command(self) -> exp.Command: 1473 self._warn_unsupported() 1474 return self.expression( 1475 exp.Command, this=self._prev.text.upper(), expression=self._parse_string() 1476 ) 1477 1478 def _try_parse(self, parse_method: t.Callable[[], T], retreat: bool = False) -> t.Optional[T]: 1479 """ 1480 Attemps to backtrack if a parse function that contains a try/catch internally raises an error. This behavior can 1481 be different depending on the uset-set ErrorLevel, so _try_parse aims to solve this by setting & resetting 1482 the parser state accordingly 1483 """ 1484 index = self._index 1485 error_level = self.error_level 1486 1487 self.error_level = ErrorLevel.IMMEDIATE 1488 try: 1489 this = parse_method() 1490 except ParseError: 1491 this = None 1492 finally: 1493 if not this or retreat: 1494 self._retreat(index) 1495 self.error_level = error_level 1496 1497 return this 1498 1499 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1500 start = self._prev 1501 exists = self._parse_exists() if allow_exists else None 1502 1503 self._match(TokenType.ON) 1504 1505 materialized = self._match_text_seq("MATERIALIZED") 1506 kind = self._match_set(self.CREATABLES) and self._prev 1507 if not kind: 1508 return self._parse_as_command(start) 1509 1510 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1511 this = self._parse_user_defined_function(kind=kind.token_type) 1512 elif kind.token_type == TokenType.TABLE: 1513 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1514 elif kind.token_type == TokenType.COLUMN: 1515 this = self._parse_column() 1516 else: 1517 this = self._parse_id_var() 1518 1519 self._match(TokenType.IS) 1520 1521 return self.expression( 1522 exp.Comment, 1523 this=this, 1524 kind=kind.text, 1525 expression=self._parse_string(), 1526 exists=exists, 1527 materialized=materialized, 1528 ) 1529 1530 def _parse_to_table( 1531 self, 1532 ) -> exp.ToTableProperty: 1533 table = self._parse_table_parts(schema=True) 1534 return self.expression(exp.ToTableProperty, this=table) 1535 1536 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1537 def _parse_ttl(self) -> exp.Expression: 1538 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1539 this = self._parse_bitwise() 1540 1541 if self._match_text_seq("DELETE"): 1542 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1543 if self._match_text_seq("RECOMPRESS"): 1544 return self.expression( 1545 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1546 ) 1547 if self._match_text_seq("TO", "DISK"): 1548 return self.expression( 1549 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1550 ) 1551 if self._match_text_seq("TO", "VOLUME"): 1552 return self.expression( 1553 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1554 ) 1555 1556 return this 1557 1558 expressions = self._parse_csv(_parse_ttl_action) 1559 where = self._parse_where() 1560 group = self._parse_group() 1561 1562 aggregates = None 1563 if group and self._match(TokenType.SET): 1564 aggregates = self._parse_csv(self._parse_set_item) 1565 1566 return self.expression( 1567 exp.MergeTreeTTL, 1568 expressions=expressions, 1569 where=where, 1570 group=group, 1571 aggregates=aggregates, 1572 ) 1573 1574 def _parse_statement(self) -> t.Optional[exp.Expression]: 1575 if self._curr is None: 1576 return None 1577 1578 if self._match_set(self.STATEMENT_PARSERS): 1579 return self.STATEMENT_PARSERS[self._prev.token_type](self) 1580 1581 if self._match_set(self.dialect.tokenizer.COMMANDS): 1582 return self._parse_command() 1583 1584 expression = self._parse_expression() 1585 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1586 return self._parse_query_modifiers(expression) 1587 1588 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1589 start = self._prev 1590 temporary = self._match(TokenType.TEMPORARY) 1591 materialized = self._match_text_seq("MATERIALIZED") 1592 1593 kind = self._match_set(self.CREATABLES) and self._prev.text 1594 if not kind: 1595 return self._parse_as_command(start) 1596 1597 if_exists = exists or self._parse_exists() 1598 table = self._parse_table_parts( 1599 schema=True, is_db_reference=self._prev.token_type == TokenType.SCHEMA 1600 ) 1601 1602 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 1603 1604 if self._match(TokenType.L_PAREN, advance=False): 1605 expressions = self._parse_wrapped_csv(self._parse_types) 1606 else: 1607 expressions = None 1608 1609 return self.expression( 1610 exp.Drop, 1611 comments=start.comments, 1612 exists=if_exists, 1613 this=table, 1614 expressions=expressions, 1615 kind=kind.upper(), 1616 temporary=temporary, 1617 materialized=materialized, 1618 cascade=self._match_text_seq("CASCADE"), 1619 constraints=self._match_text_seq("CONSTRAINTS"), 1620 purge=self._match_text_seq("PURGE"), 1621 cluster=cluster, 1622 ) 1623 1624 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1625 return ( 1626 self._match_text_seq("IF") 1627 and (not not_ or self._match(TokenType.NOT)) 1628 and self._match(TokenType.EXISTS) 1629 ) 1630 1631 def _parse_create(self) -> exp.Create | exp.Command: 1632 # Note: this can't be None because we've matched a statement parser 1633 start = self._prev 1634 comments = self._prev_comments 1635 1636 replace = ( 1637 start.token_type == TokenType.REPLACE 1638 or self._match_pair(TokenType.OR, TokenType.REPLACE) 1639 or self._match_pair(TokenType.OR, TokenType.ALTER) 1640 ) 1641 1642 unique = self._match(TokenType.UNIQUE) 1643 1644 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1645 self._advance() 1646 1647 properties = None 1648 create_token = self._match_set(self.CREATABLES) and self._prev 1649 1650 if not create_token: 1651 # exp.Properties.Location.POST_CREATE 1652 properties = self._parse_properties() 1653 create_token = self._match_set(self.CREATABLES) and self._prev 1654 1655 if not properties or not create_token: 1656 return self._parse_as_command(start) 1657 1658 exists = self._parse_exists(not_=True) 1659 this = None 1660 expression: t.Optional[exp.Expression] = None 1661 indexes = None 1662 no_schema_binding = None 1663 begin = None 1664 end = None 1665 clone = None 1666 1667 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1668 nonlocal properties 1669 if properties and temp_props: 1670 properties.expressions.extend(temp_props.expressions) 1671 elif temp_props: 1672 properties = temp_props 1673 1674 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1675 this = self._parse_user_defined_function(kind=create_token.token_type) 1676 1677 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1678 extend_props(self._parse_properties()) 1679 1680 expression = self._match(TokenType.ALIAS) and self._parse_heredoc() 1681 extend_props(self._parse_properties()) 1682 1683 if not expression: 1684 if self._match(TokenType.COMMAND): 1685 expression = self._parse_as_command(self._prev) 1686 else: 1687 begin = self._match(TokenType.BEGIN) 1688 return_ = self._match_text_seq("RETURN") 1689 1690 if self._match(TokenType.STRING, advance=False): 1691 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 1692 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 1693 expression = self._parse_string() 1694 extend_props(self._parse_properties()) 1695 else: 1696 expression = self._parse_statement() 1697 1698 end = self._match_text_seq("END") 1699 1700 if return_: 1701 expression = self.expression(exp.Return, this=expression) 1702 elif create_token.token_type == TokenType.INDEX: 1703 # Postgres allows anonymous indexes, eg. CREATE INDEX IF NOT EXISTS ON t(c) 1704 if not self._match(TokenType.ON): 1705 index = self._parse_id_var() 1706 anonymous = False 1707 else: 1708 index = None 1709 anonymous = True 1710 1711 this = self._parse_index(index=index, anonymous=anonymous) 1712 elif create_token.token_type in self.DB_CREATABLES: 1713 table_parts = self._parse_table_parts( 1714 schema=True, is_db_reference=create_token.token_type == TokenType.SCHEMA 1715 ) 1716 1717 # exp.Properties.Location.POST_NAME 1718 self._match(TokenType.COMMA) 1719 extend_props(self._parse_properties(before=True)) 1720 1721 this = self._parse_schema(this=table_parts) 1722 1723 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1724 extend_props(self._parse_properties()) 1725 1726 self._match(TokenType.ALIAS) 1727 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 1728 # exp.Properties.Location.POST_ALIAS 1729 extend_props(self._parse_properties()) 1730 1731 if create_token.token_type == TokenType.SEQUENCE: 1732 expression = self._parse_types() 1733 extend_props(self._parse_properties()) 1734 else: 1735 expression = self._parse_ddl_select() 1736 1737 if create_token.token_type == TokenType.TABLE: 1738 # exp.Properties.Location.POST_EXPRESSION 1739 extend_props(self._parse_properties()) 1740 1741 indexes = [] 1742 while True: 1743 index = self._parse_index() 1744 1745 # exp.Properties.Location.POST_INDEX 1746 extend_props(self._parse_properties()) 1747 1748 if not index: 1749 break 1750 else: 1751 self._match(TokenType.COMMA) 1752 indexes.append(index) 1753 elif create_token.token_type == TokenType.VIEW: 1754 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1755 no_schema_binding = True 1756 1757 shallow = self._match_text_seq("SHALLOW") 1758 1759 if self._match_texts(self.CLONE_KEYWORDS): 1760 copy = self._prev.text.lower() == "copy" 1761 clone = self.expression( 1762 exp.Clone, this=self._parse_table(schema=True), shallow=shallow, copy=copy 1763 ) 1764 1765 if self._curr and not self._match_set((TokenType.R_PAREN, TokenType.COMMA), advance=False): 1766 return self._parse_as_command(start) 1767 1768 return self.expression( 1769 exp.Create, 1770 comments=comments, 1771 this=this, 1772 kind=create_token.text.upper(), 1773 replace=replace, 1774 unique=unique, 1775 expression=expression, 1776 exists=exists, 1777 properties=properties, 1778 indexes=indexes, 1779 no_schema_binding=no_schema_binding, 1780 begin=begin, 1781 end=end, 1782 clone=clone, 1783 ) 1784 1785 def _parse_sequence_properties(self) -> t.Optional[exp.SequenceProperties]: 1786 seq = exp.SequenceProperties() 1787 1788 options = [] 1789 index = self._index 1790 1791 while self._curr: 1792 self._match(TokenType.COMMA) 1793 if self._match_text_seq("INCREMENT"): 1794 self._match_text_seq("BY") 1795 self._match_text_seq("=") 1796 seq.set("increment", self._parse_term()) 1797 elif self._match_text_seq("MINVALUE"): 1798 seq.set("minvalue", self._parse_term()) 1799 elif self._match_text_seq("MAXVALUE"): 1800 seq.set("maxvalue", self._parse_term()) 1801 elif self._match(TokenType.START_WITH) or self._match_text_seq("START"): 1802 self._match_text_seq("=") 1803 seq.set("start", self._parse_term()) 1804 elif self._match_text_seq("CACHE"): 1805 # T-SQL allows empty CACHE which is initialized dynamically 1806 seq.set("cache", self._parse_number() or True) 1807 elif self._match_text_seq("OWNED", "BY"): 1808 # "OWNED BY NONE" is the default 1809 seq.set("owned", None if self._match_text_seq("NONE") else self._parse_column()) 1810 else: 1811 opt = self._parse_var_from_options(self.CREATE_SEQUENCE, raise_unmatched=False) 1812 if opt: 1813 options.append(opt) 1814 else: 1815 break 1816 1817 seq.set("options", options if options else None) 1818 return None if self._index == index else seq 1819 1820 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1821 # only used for teradata currently 1822 self._match(TokenType.COMMA) 1823 1824 kwargs = { 1825 "no": self._match_text_seq("NO"), 1826 "dual": self._match_text_seq("DUAL"), 1827 "before": self._match_text_seq("BEFORE"), 1828 "default": self._match_text_seq("DEFAULT"), 1829 "local": (self._match_text_seq("LOCAL") and "LOCAL") 1830 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 1831 "after": self._match_text_seq("AFTER"), 1832 "minimum": self._match_texts(("MIN", "MINIMUM")), 1833 "maximum": self._match_texts(("MAX", "MAXIMUM")), 1834 } 1835 1836 if self._match_texts(self.PROPERTY_PARSERS): 1837 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 1838 try: 1839 return parser(self, **{k: v for k, v in kwargs.items() if v}) 1840 except TypeError: 1841 self.raise_error(f"Cannot parse property '{self._prev.text}'") 1842 1843 return None 1844 1845 def _parse_wrapped_properties(self) -> t.List[exp.Expression]: 1846 return self._parse_wrapped_csv(self._parse_property) 1847 1848 def _parse_property(self) -> t.Optional[exp.Expression]: 1849 if self._match_texts(self.PROPERTY_PARSERS): 1850 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 1851 1852 if self._match(TokenType.DEFAULT) and self._match_texts(self.PROPERTY_PARSERS): 1853 return self.PROPERTY_PARSERS[self._prev.text.upper()](self, default=True) 1854 1855 if self._match_text_seq("COMPOUND", "SORTKEY"): 1856 return self._parse_sortkey(compound=True) 1857 1858 if self._match_text_seq("SQL", "SECURITY"): 1859 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 1860 1861 index = self._index 1862 key = self._parse_column() 1863 1864 if not self._match(TokenType.EQ): 1865 self._retreat(index) 1866 return self._parse_sequence_properties() 1867 1868 # Transform the key to exp.Dot if it's dotted identifiers wrapped in exp.Column or to exp.Var otherwise 1869 if isinstance(key, exp.Column): 1870 key = key.to_dot() if len(key.parts) > 1 else exp.var(key.name) 1871 1872 value = self._parse_bitwise() or self._parse_var(any_token=True) 1873 1874 # Transform the value to exp.Var if it was parsed as exp.Column(exp.Identifier()) 1875 if isinstance(value, exp.Column): 1876 value = exp.var(value.name) 1877 1878 return self.expression(exp.Property, this=key, value=value) 1879 1880 def _parse_stored(self) -> exp.FileFormatProperty: 1881 self._match(TokenType.ALIAS) 1882 1883 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 1884 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 1885 1886 return self.expression( 1887 exp.FileFormatProperty, 1888 this=( 1889 self.expression( 1890 exp.InputOutputFormat, input_format=input_format, output_format=output_format 1891 ) 1892 if input_format or output_format 1893 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var() 1894 ), 1895 ) 1896 1897 def _parse_unquoted_field(self) -> t.Optional[exp.Expression]: 1898 field = self._parse_field() 1899 if isinstance(field, exp.Identifier) and not field.quoted: 1900 field = exp.var(field) 1901 1902 return field 1903 1904 def _parse_property_assignment(self, exp_class: t.Type[E], **kwargs: t.Any) -> E: 1905 self._match(TokenType.EQ) 1906 self._match(TokenType.ALIAS) 1907 1908 return self.expression(exp_class, this=self._parse_unquoted_field(), **kwargs) 1909 1910 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 1911 properties = [] 1912 while True: 1913 if before: 1914 prop = self._parse_property_before() 1915 else: 1916 prop = self._parse_property() 1917 if not prop: 1918 break 1919 for p in ensure_list(prop): 1920 properties.append(p) 1921 1922 if properties: 1923 return self.expression(exp.Properties, expressions=properties) 1924 1925 return None 1926 1927 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 1928 return self.expression( 1929 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 1930 ) 1931 1932 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 1933 if self._index >= 2: 1934 pre_volatile_token = self._tokens[self._index - 2] 1935 else: 1936 pre_volatile_token = None 1937 1938 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 1939 return exp.VolatileProperty() 1940 1941 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 1942 1943 def _parse_retention_period(self) -> exp.Var: 1944 # Parse TSQL's HISTORY_RETENTION_PERIOD: {INFINITE | <number> DAY | DAYS | MONTH ...} 1945 number = self._parse_number() 1946 number_str = f"{number} " if number else "" 1947 unit = self._parse_var(any_token=True) 1948 return exp.var(f"{number_str}{unit}") 1949 1950 def _parse_system_versioning_property( 1951 self, with_: bool = False 1952 ) -> exp.WithSystemVersioningProperty: 1953 self._match(TokenType.EQ) 1954 prop = self.expression( 1955 exp.WithSystemVersioningProperty, 1956 **{ # type: ignore 1957 "on": True, 1958 "with": with_, 1959 }, 1960 ) 1961 1962 if self._match_text_seq("OFF"): 1963 prop.set("on", False) 1964 return prop 1965 1966 self._match(TokenType.ON) 1967 if self._match(TokenType.L_PAREN): 1968 while self._curr and not self._match(TokenType.R_PAREN): 1969 if self._match_text_seq("HISTORY_TABLE", "="): 1970 prop.set("this", self._parse_table_parts()) 1971 elif self._match_text_seq("DATA_CONSISTENCY_CHECK", "="): 1972 prop.set("data_consistency", self._advance_any() and self._prev.text.upper()) 1973 elif self._match_text_seq("HISTORY_RETENTION_PERIOD", "="): 1974 prop.set("retention_period", self._parse_retention_period()) 1975 1976 self._match(TokenType.COMMA) 1977 1978 return prop 1979 1980 def _parse_data_deletion_property(self) -> exp.DataDeletionProperty: 1981 self._match(TokenType.EQ) 1982 on = self._match_text_seq("ON") or not self._match_text_seq("OFF") 1983 prop = self.expression(exp.DataDeletionProperty, on=on) 1984 1985 if self._match(TokenType.L_PAREN): 1986 while self._curr and not self._match(TokenType.R_PAREN): 1987 if self._match_text_seq("FILTER_COLUMN", "="): 1988 prop.set("filter_column", self._parse_column()) 1989 elif self._match_text_seq("RETENTION_PERIOD", "="): 1990 prop.set("retention_period", self._parse_retention_period()) 1991 1992 self._match(TokenType.COMMA) 1993 1994 return prop 1995 1996 def _parse_with_property(self) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 1997 if self._match_text_seq("(", "SYSTEM_VERSIONING"): 1998 prop = self._parse_system_versioning_property(with_=True) 1999 self._match_r_paren() 2000 return prop 2001 2002 if self._match(TokenType.L_PAREN, advance=False): 2003 return self._parse_wrapped_properties() 2004 2005 if self._match_text_seq("JOURNAL"): 2006 return self._parse_withjournaltable() 2007 2008 if self._match_texts(self.VIEW_ATTRIBUTES): 2009 return self.expression(exp.ViewAttributeProperty, this=self._prev.text.upper()) 2010 2011 if self._match_text_seq("DATA"): 2012 return self._parse_withdata(no=False) 2013 elif self._match_text_seq("NO", "DATA"): 2014 return self._parse_withdata(no=True) 2015 2016 if self._match(TokenType.SERDE_PROPERTIES, advance=False): 2017 return self._parse_serde_properties(with_=True) 2018 2019 if not self._next: 2020 return None 2021 2022 return self._parse_withisolatedloading() 2023 2024 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 2025 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 2026 self._match(TokenType.EQ) 2027 2028 user = self._parse_id_var() 2029 self._match(TokenType.PARAMETER) 2030 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 2031 2032 if not user or not host: 2033 return None 2034 2035 return exp.DefinerProperty(this=f"{user}@{host}") 2036 2037 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 2038 self._match(TokenType.TABLE) 2039 self._match(TokenType.EQ) 2040 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 2041 2042 def _parse_log(self, no: bool = False) -> exp.LogProperty: 2043 return self.expression(exp.LogProperty, no=no) 2044 2045 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 2046 return self.expression(exp.JournalProperty, **kwargs) 2047 2048 def _parse_checksum(self) -> exp.ChecksumProperty: 2049 self._match(TokenType.EQ) 2050 2051 on = None 2052 if self._match(TokenType.ON): 2053 on = True 2054 elif self._match_text_seq("OFF"): 2055 on = False 2056 2057 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 2058 2059 def _parse_cluster(self, wrapped: bool = False) -> exp.Cluster: 2060 return self.expression( 2061 exp.Cluster, 2062 expressions=( 2063 self._parse_wrapped_csv(self._parse_ordered) 2064 if wrapped 2065 else self._parse_csv(self._parse_ordered) 2066 ), 2067 ) 2068 2069 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 2070 self._match_text_seq("BY") 2071 2072 self._match_l_paren() 2073 expressions = self._parse_csv(self._parse_column) 2074 self._match_r_paren() 2075 2076 if self._match_text_seq("SORTED", "BY"): 2077 self._match_l_paren() 2078 sorted_by = self._parse_csv(self._parse_ordered) 2079 self._match_r_paren() 2080 else: 2081 sorted_by = None 2082 2083 self._match(TokenType.INTO) 2084 buckets = self._parse_number() 2085 self._match_text_seq("BUCKETS") 2086 2087 return self.expression( 2088 exp.ClusteredByProperty, 2089 expressions=expressions, 2090 sorted_by=sorted_by, 2091 buckets=buckets, 2092 ) 2093 2094 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 2095 if not self._match_text_seq("GRANTS"): 2096 self._retreat(self._index - 1) 2097 return None 2098 2099 return self.expression(exp.CopyGrantsProperty) 2100 2101 def _parse_freespace(self) -> exp.FreespaceProperty: 2102 self._match(TokenType.EQ) 2103 return self.expression( 2104 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 2105 ) 2106 2107 def _parse_mergeblockratio( 2108 self, no: bool = False, default: bool = False 2109 ) -> exp.MergeBlockRatioProperty: 2110 if self._match(TokenType.EQ): 2111 return self.expression( 2112 exp.MergeBlockRatioProperty, 2113 this=self._parse_number(), 2114 percent=self._match(TokenType.PERCENT), 2115 ) 2116 2117 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 2118 2119 def _parse_datablocksize( 2120 self, 2121 default: t.Optional[bool] = None, 2122 minimum: t.Optional[bool] = None, 2123 maximum: t.Optional[bool] = None, 2124 ) -> exp.DataBlocksizeProperty: 2125 self._match(TokenType.EQ) 2126 size = self._parse_number() 2127 2128 units = None 2129 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 2130 units = self._prev.text 2131 2132 return self.expression( 2133 exp.DataBlocksizeProperty, 2134 size=size, 2135 units=units, 2136 default=default, 2137 minimum=minimum, 2138 maximum=maximum, 2139 ) 2140 2141 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 2142 self._match(TokenType.EQ) 2143 always = self._match_text_seq("ALWAYS") 2144 manual = self._match_text_seq("MANUAL") 2145 never = self._match_text_seq("NEVER") 2146 default = self._match_text_seq("DEFAULT") 2147 2148 autotemp = None 2149 if self._match_text_seq("AUTOTEMP"): 2150 autotemp = self._parse_schema() 2151 2152 return self.expression( 2153 exp.BlockCompressionProperty, 2154 always=always, 2155 manual=manual, 2156 never=never, 2157 default=default, 2158 autotemp=autotemp, 2159 ) 2160 2161 def _parse_withisolatedloading(self) -> t.Optional[exp.IsolatedLoadingProperty]: 2162 index = self._index 2163 no = self._match_text_seq("NO") 2164 concurrent = self._match_text_seq("CONCURRENT") 2165 2166 if not self._match_text_seq("ISOLATED", "LOADING"): 2167 self._retreat(index) 2168 return None 2169 2170 target = self._parse_var_from_options(self.ISOLATED_LOADING_OPTIONS, raise_unmatched=False) 2171 return self.expression( 2172 exp.IsolatedLoadingProperty, no=no, concurrent=concurrent, target=target 2173 ) 2174 2175 def _parse_locking(self) -> exp.LockingProperty: 2176 if self._match(TokenType.TABLE): 2177 kind = "TABLE" 2178 elif self._match(TokenType.VIEW): 2179 kind = "VIEW" 2180 elif self._match(TokenType.ROW): 2181 kind = "ROW" 2182 elif self._match_text_seq("DATABASE"): 2183 kind = "DATABASE" 2184 else: 2185 kind = None 2186 2187 if kind in ("DATABASE", "TABLE", "VIEW"): 2188 this = self._parse_table_parts() 2189 else: 2190 this = None 2191 2192 if self._match(TokenType.FOR): 2193 for_or_in = "FOR" 2194 elif self._match(TokenType.IN): 2195 for_or_in = "IN" 2196 else: 2197 for_or_in = None 2198 2199 if self._match_text_seq("ACCESS"): 2200 lock_type = "ACCESS" 2201 elif self._match_texts(("EXCL", "EXCLUSIVE")): 2202 lock_type = "EXCLUSIVE" 2203 elif self._match_text_seq("SHARE"): 2204 lock_type = "SHARE" 2205 elif self._match_text_seq("READ"): 2206 lock_type = "READ" 2207 elif self._match_text_seq("WRITE"): 2208 lock_type = "WRITE" 2209 elif self._match_text_seq("CHECKSUM"): 2210 lock_type = "CHECKSUM" 2211 else: 2212 lock_type = None 2213 2214 override = self._match_text_seq("OVERRIDE") 2215 2216 return self.expression( 2217 exp.LockingProperty, 2218 this=this, 2219 kind=kind, 2220 for_or_in=for_or_in, 2221 lock_type=lock_type, 2222 override=override, 2223 ) 2224 2225 def _parse_partition_by(self) -> t.List[exp.Expression]: 2226 if self._match(TokenType.PARTITION_BY): 2227 return self._parse_csv(self._parse_assignment) 2228 return [] 2229 2230 def _parse_partition_bound_spec(self) -> exp.PartitionBoundSpec: 2231 def _parse_partition_bound_expr() -> t.Optional[exp.Expression]: 2232 if self._match_text_seq("MINVALUE"): 2233 return exp.var("MINVALUE") 2234 if self._match_text_seq("MAXVALUE"): 2235 return exp.var("MAXVALUE") 2236 return self._parse_bitwise() 2237 2238 this: t.Optional[exp.Expression | t.List[exp.Expression]] = None 2239 expression = None 2240 from_expressions = None 2241 to_expressions = None 2242 2243 if self._match(TokenType.IN): 2244 this = self._parse_wrapped_csv(self._parse_bitwise) 2245 elif self._match(TokenType.FROM): 2246 from_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2247 self._match_text_seq("TO") 2248 to_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2249 elif self._match_text_seq("WITH", "(", "MODULUS"): 2250 this = self._parse_number() 2251 self._match_text_seq(",", "REMAINDER") 2252 expression = self._parse_number() 2253 self._match_r_paren() 2254 else: 2255 self.raise_error("Failed to parse partition bound spec.") 2256 2257 return self.expression( 2258 exp.PartitionBoundSpec, 2259 this=this, 2260 expression=expression, 2261 from_expressions=from_expressions, 2262 to_expressions=to_expressions, 2263 ) 2264 2265 # https://www.postgresql.org/docs/current/sql-createtable.html 2266 def _parse_partitioned_of(self) -> t.Optional[exp.PartitionedOfProperty]: 2267 if not self._match_text_seq("OF"): 2268 self._retreat(self._index - 1) 2269 return None 2270 2271 this = self._parse_table(schema=True) 2272 2273 if self._match(TokenType.DEFAULT): 2274 expression: exp.Var | exp.PartitionBoundSpec = exp.var("DEFAULT") 2275 elif self._match_text_seq("FOR", "VALUES"): 2276 expression = self._parse_partition_bound_spec() 2277 else: 2278 self.raise_error("Expecting either DEFAULT or FOR VALUES clause.") 2279 2280 return self.expression(exp.PartitionedOfProperty, this=this, expression=expression) 2281 2282 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 2283 self._match(TokenType.EQ) 2284 return self.expression( 2285 exp.PartitionedByProperty, 2286 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 2287 ) 2288 2289 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 2290 if self._match_text_seq("AND", "STATISTICS"): 2291 statistics = True 2292 elif self._match_text_seq("AND", "NO", "STATISTICS"): 2293 statistics = False 2294 else: 2295 statistics = None 2296 2297 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 2298 2299 def _parse_contains_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2300 if self._match_text_seq("SQL"): 2301 return self.expression(exp.SqlReadWriteProperty, this="CONTAINS SQL") 2302 return None 2303 2304 def _parse_modifies_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2305 if self._match_text_seq("SQL", "DATA"): 2306 return self.expression(exp.SqlReadWriteProperty, this="MODIFIES SQL DATA") 2307 return None 2308 2309 def _parse_no_property(self) -> t.Optional[exp.Expression]: 2310 if self._match_text_seq("PRIMARY", "INDEX"): 2311 return exp.NoPrimaryIndexProperty() 2312 if self._match_text_seq("SQL"): 2313 return self.expression(exp.SqlReadWriteProperty, this="NO SQL") 2314 return None 2315 2316 def _parse_on_property(self) -> t.Optional[exp.Expression]: 2317 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 2318 return exp.OnCommitProperty() 2319 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 2320 return exp.OnCommitProperty(delete=True) 2321 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 2322 2323 def _parse_reads_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2324 if self._match_text_seq("SQL", "DATA"): 2325 return self.expression(exp.SqlReadWriteProperty, this="READS SQL DATA") 2326 return None 2327 2328 def _parse_distkey(self) -> exp.DistKeyProperty: 2329 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 2330 2331 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 2332 table = self._parse_table(schema=True) 2333 2334 options = [] 2335 while self._match_texts(("INCLUDING", "EXCLUDING")): 2336 this = self._prev.text.upper() 2337 2338 id_var = self._parse_id_var() 2339 if not id_var: 2340 return None 2341 2342 options.append( 2343 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 2344 ) 2345 2346 return self.expression(exp.LikeProperty, this=table, expressions=options) 2347 2348 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 2349 return self.expression( 2350 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 2351 ) 2352 2353 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 2354 self._match(TokenType.EQ) 2355 return self.expression( 2356 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 2357 ) 2358 2359 def _parse_remote_with_connection(self) -> exp.RemoteWithConnectionModelProperty: 2360 self._match_text_seq("WITH", "CONNECTION") 2361 return self.expression( 2362 exp.RemoteWithConnectionModelProperty, this=self._parse_table_parts() 2363 ) 2364 2365 def _parse_returns(self) -> exp.ReturnsProperty: 2366 value: t.Optional[exp.Expression] 2367 null = None 2368 is_table = self._match(TokenType.TABLE) 2369 2370 if is_table: 2371 if self._match(TokenType.LT): 2372 value = self.expression( 2373 exp.Schema, 2374 this="TABLE", 2375 expressions=self._parse_csv(self._parse_struct_types), 2376 ) 2377 if not self._match(TokenType.GT): 2378 self.raise_error("Expecting >") 2379 else: 2380 value = self._parse_schema(exp.var("TABLE")) 2381 elif self._match_text_seq("NULL", "ON", "NULL", "INPUT"): 2382 null = True 2383 value = None 2384 else: 2385 value = self._parse_types() 2386 2387 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table, null=null) 2388 2389 def _parse_describe(self) -> exp.Describe: 2390 kind = self._match_set(self.CREATABLES) and self._prev.text 2391 style = self._match_texts(("EXTENDED", "FORMATTED", "HISTORY")) and self._prev.text.upper() 2392 if self._match(TokenType.DOT): 2393 style = None 2394 self._retreat(self._index - 2) 2395 this = self._parse_table(schema=True) 2396 properties = self._parse_properties() 2397 expressions = properties.expressions if properties else None 2398 return self.expression( 2399 exp.Describe, this=this, style=style, kind=kind, expressions=expressions 2400 ) 2401 2402 def _parse_insert(self) -> exp.Insert: 2403 comments = ensure_list(self._prev_comments) 2404 hint = self._parse_hint() 2405 overwrite = self._match(TokenType.OVERWRITE) 2406 ignore = self._match(TokenType.IGNORE) 2407 local = self._match_text_seq("LOCAL") 2408 alternative = None 2409 is_function = None 2410 2411 if self._match_text_seq("DIRECTORY"): 2412 this: t.Optional[exp.Expression] = self.expression( 2413 exp.Directory, 2414 this=self._parse_var_or_string(), 2415 local=local, 2416 row_format=self._parse_row_format(match_row=True), 2417 ) 2418 else: 2419 if self._match(TokenType.OR): 2420 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 2421 2422 self._match(TokenType.INTO) 2423 comments += ensure_list(self._prev_comments) 2424 self._match(TokenType.TABLE) 2425 is_function = self._match(TokenType.FUNCTION) 2426 2427 this = ( 2428 self._parse_table(schema=True, parse_partition=True) 2429 if not is_function 2430 else self._parse_function() 2431 ) 2432 2433 returning = self._parse_returning() 2434 2435 return self.expression( 2436 exp.Insert, 2437 comments=comments, 2438 hint=hint, 2439 is_function=is_function, 2440 this=this, 2441 stored=self._match_text_seq("STORED") and self._parse_stored(), 2442 by_name=self._match_text_seq("BY", "NAME"), 2443 exists=self._parse_exists(), 2444 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) and self._parse_assignment(), 2445 expression=self._parse_derived_table_values() or self._parse_ddl_select(), 2446 conflict=self._parse_on_conflict(), 2447 returning=returning or self._parse_returning(), 2448 overwrite=overwrite, 2449 alternative=alternative, 2450 ignore=ignore, 2451 ) 2452 2453 def _parse_kill(self) -> exp.Kill: 2454 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 2455 2456 return self.expression( 2457 exp.Kill, 2458 this=self._parse_primary(), 2459 kind=kind, 2460 ) 2461 2462 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 2463 conflict = self._match_text_seq("ON", "CONFLICT") 2464 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 2465 2466 if not conflict and not duplicate: 2467 return None 2468 2469 conflict_keys = None 2470 constraint = None 2471 2472 if conflict: 2473 if self._match_text_seq("ON", "CONSTRAINT"): 2474 constraint = self._parse_id_var() 2475 elif self._match(TokenType.L_PAREN): 2476 conflict_keys = self._parse_csv(self._parse_id_var) 2477 self._match_r_paren() 2478 2479 action = self._parse_var_from_options(self.CONFLICT_ACTIONS) 2480 if self._prev.token_type == TokenType.UPDATE: 2481 self._match(TokenType.SET) 2482 expressions = self._parse_csv(self._parse_equality) 2483 else: 2484 expressions = None 2485 2486 return self.expression( 2487 exp.OnConflict, 2488 duplicate=duplicate, 2489 expressions=expressions, 2490 action=action, 2491 conflict_keys=conflict_keys, 2492 constraint=constraint, 2493 ) 2494 2495 def _parse_returning(self) -> t.Optional[exp.Returning]: 2496 if not self._match(TokenType.RETURNING): 2497 return None 2498 return self.expression( 2499 exp.Returning, 2500 expressions=self._parse_csv(self._parse_expression), 2501 into=self._match(TokenType.INTO) and self._parse_table_part(), 2502 ) 2503 2504 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2505 if not self._match(TokenType.FORMAT): 2506 return None 2507 return self._parse_row_format() 2508 2509 def _parse_serde_properties(self, with_: bool = False) -> t.Optional[exp.SerdeProperties]: 2510 index = self._index 2511 with_ = with_ or self._match_text_seq("WITH") 2512 2513 if not self._match(TokenType.SERDE_PROPERTIES): 2514 self._retreat(index) 2515 return None 2516 return self.expression( 2517 exp.SerdeProperties, 2518 **{ # type: ignore 2519 "expressions": self._parse_wrapped_properties(), 2520 "with": with_, 2521 }, 2522 ) 2523 2524 def _parse_row_format( 2525 self, match_row: bool = False 2526 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2527 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 2528 return None 2529 2530 if self._match_text_seq("SERDE"): 2531 this = self._parse_string() 2532 2533 serde_properties = self._parse_serde_properties() 2534 2535 return self.expression( 2536 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 2537 ) 2538 2539 self._match_text_seq("DELIMITED") 2540 2541 kwargs = {} 2542 2543 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 2544 kwargs["fields"] = self._parse_string() 2545 if self._match_text_seq("ESCAPED", "BY"): 2546 kwargs["escaped"] = self._parse_string() 2547 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 2548 kwargs["collection_items"] = self._parse_string() 2549 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 2550 kwargs["map_keys"] = self._parse_string() 2551 if self._match_text_seq("LINES", "TERMINATED", "BY"): 2552 kwargs["lines"] = self._parse_string() 2553 if self._match_text_seq("NULL", "DEFINED", "AS"): 2554 kwargs["null"] = self._parse_string() 2555 2556 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 2557 2558 def _parse_load(self) -> exp.LoadData | exp.Command: 2559 if self._match_text_seq("DATA"): 2560 local = self._match_text_seq("LOCAL") 2561 self._match_text_seq("INPATH") 2562 inpath = self._parse_string() 2563 overwrite = self._match(TokenType.OVERWRITE) 2564 self._match_pair(TokenType.INTO, TokenType.TABLE) 2565 2566 return self.expression( 2567 exp.LoadData, 2568 this=self._parse_table(schema=True), 2569 local=local, 2570 overwrite=overwrite, 2571 inpath=inpath, 2572 partition=self._parse_partition(), 2573 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 2574 serde=self._match_text_seq("SERDE") and self._parse_string(), 2575 ) 2576 return self._parse_as_command(self._prev) 2577 2578 def _parse_delete(self) -> exp.Delete: 2579 # This handles MySQL's "Multiple-Table Syntax" 2580 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 2581 tables = None 2582 comments = self._prev_comments 2583 if not self._match(TokenType.FROM, advance=False): 2584 tables = self._parse_csv(self._parse_table) or None 2585 2586 returning = self._parse_returning() 2587 2588 return self.expression( 2589 exp.Delete, 2590 comments=comments, 2591 tables=tables, 2592 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 2593 using=self._match(TokenType.USING) and self._parse_table(joins=True), 2594 where=self._parse_where(), 2595 returning=returning or self._parse_returning(), 2596 limit=self._parse_limit(), 2597 ) 2598 2599 def _parse_update(self) -> exp.Update: 2600 comments = self._prev_comments 2601 this = self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS) 2602 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 2603 returning = self._parse_returning() 2604 return self.expression( 2605 exp.Update, 2606 comments=comments, 2607 **{ # type: ignore 2608 "this": this, 2609 "expressions": expressions, 2610 "from": self._parse_from(joins=True), 2611 "where": self._parse_where(), 2612 "returning": returning or self._parse_returning(), 2613 "order": self._parse_order(), 2614 "limit": self._parse_limit(), 2615 }, 2616 ) 2617 2618 def _parse_uncache(self) -> exp.Uncache: 2619 if not self._match(TokenType.TABLE): 2620 self.raise_error("Expecting TABLE after UNCACHE") 2621 2622 return self.expression( 2623 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 2624 ) 2625 2626 def _parse_cache(self) -> exp.Cache: 2627 lazy = self._match_text_seq("LAZY") 2628 self._match(TokenType.TABLE) 2629 table = self._parse_table(schema=True) 2630 2631 options = [] 2632 if self._match_text_seq("OPTIONS"): 2633 self._match_l_paren() 2634 k = self._parse_string() 2635 self._match(TokenType.EQ) 2636 v = self._parse_string() 2637 options = [k, v] 2638 self._match_r_paren() 2639 2640 self._match(TokenType.ALIAS) 2641 return self.expression( 2642 exp.Cache, 2643 this=table, 2644 lazy=lazy, 2645 options=options, 2646 expression=self._parse_select(nested=True), 2647 ) 2648 2649 def _parse_partition(self) -> t.Optional[exp.Partition]: 2650 if not self._match(TokenType.PARTITION): 2651 return None 2652 2653 return self.expression( 2654 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_assignment) 2655 ) 2656 2657 def _parse_value(self) -> t.Optional[exp.Tuple]: 2658 if self._match(TokenType.L_PAREN): 2659 expressions = self._parse_csv(self._parse_expression) 2660 self._match_r_paren() 2661 return self.expression(exp.Tuple, expressions=expressions) 2662 2663 # In some dialects we can have VALUES 1, 2 which results in 1 column & 2 rows. 2664 expression = self._parse_expression() 2665 if expression: 2666 return self.expression(exp.Tuple, expressions=[expression]) 2667 return None 2668 2669 def _parse_projections(self) -> t.List[exp.Expression]: 2670 return self._parse_expressions() 2671 2672 def _parse_select( 2673 self, 2674 nested: bool = False, 2675 table: bool = False, 2676 parse_subquery_alias: bool = True, 2677 parse_set_operation: bool = True, 2678 ) -> t.Optional[exp.Expression]: 2679 cte = self._parse_with() 2680 2681 if cte: 2682 this = self._parse_statement() 2683 2684 if not this: 2685 self.raise_error("Failed to parse any statement following CTE") 2686 return cte 2687 2688 if "with" in this.arg_types: 2689 this.set("with", cte) 2690 else: 2691 self.raise_error(f"{this.key} does not support CTE") 2692 this = cte 2693 2694 return this 2695 2696 # duckdb supports leading with FROM x 2697 from_ = self._parse_from() if self._match(TokenType.FROM, advance=False) else None 2698 2699 if self._match(TokenType.SELECT): 2700 comments = self._prev_comments 2701 2702 hint = self._parse_hint() 2703 all_ = self._match(TokenType.ALL) 2704 distinct = self._match_set(self.DISTINCT_TOKENS) 2705 2706 kind = ( 2707 self._match(TokenType.ALIAS) 2708 and self._match_texts(("STRUCT", "VALUE")) 2709 and self._prev.text.upper() 2710 ) 2711 2712 if distinct: 2713 distinct = self.expression( 2714 exp.Distinct, 2715 on=self._parse_value() if self._match(TokenType.ON) else None, 2716 ) 2717 2718 if all_ and distinct: 2719 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 2720 2721 limit = self._parse_limit(top=True) 2722 projections = self._parse_projections() 2723 2724 this = self.expression( 2725 exp.Select, 2726 kind=kind, 2727 hint=hint, 2728 distinct=distinct, 2729 expressions=projections, 2730 limit=limit, 2731 ) 2732 this.comments = comments 2733 2734 into = self._parse_into() 2735 if into: 2736 this.set("into", into) 2737 2738 if not from_: 2739 from_ = self._parse_from() 2740 2741 if from_: 2742 this.set("from", from_) 2743 2744 this = self._parse_query_modifiers(this) 2745 elif (table or nested) and self._match(TokenType.L_PAREN): 2746 if self._match(TokenType.PIVOT): 2747 this = self._parse_simplified_pivot() 2748 elif self._match(TokenType.FROM): 2749 this = exp.select("*").from_( 2750 t.cast(exp.From, self._parse_from(skip_from_token=True)) 2751 ) 2752 else: 2753 this = ( 2754 self._parse_table() 2755 if table 2756 else self._parse_select(nested=True, parse_set_operation=False) 2757 ) 2758 this = self._parse_query_modifiers(self._parse_set_operations(this)) 2759 2760 self._match_r_paren() 2761 2762 # We return early here so that the UNION isn't attached to the subquery by the 2763 # following call to _parse_set_operations, but instead becomes the parent node 2764 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 2765 elif self._match(TokenType.VALUES, advance=False): 2766 this = self._parse_derived_table_values() 2767 elif from_: 2768 this = exp.select("*").from_(from_.this, copy=False) 2769 else: 2770 this = None 2771 2772 if parse_set_operation: 2773 return self._parse_set_operations(this) 2774 return this 2775 2776 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 2777 if not skip_with_token and not self._match(TokenType.WITH): 2778 return None 2779 2780 comments = self._prev_comments 2781 recursive = self._match(TokenType.RECURSIVE) 2782 2783 expressions = [] 2784 while True: 2785 expressions.append(self._parse_cte()) 2786 2787 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 2788 break 2789 else: 2790 self._match(TokenType.WITH) 2791 2792 return self.expression( 2793 exp.With, comments=comments, expressions=expressions, recursive=recursive 2794 ) 2795 2796 def _parse_cte(self) -> exp.CTE: 2797 alias = self._parse_table_alias(self.ID_VAR_TOKENS) 2798 if not alias or not alias.this: 2799 self.raise_error("Expected CTE to have alias") 2800 2801 self._match(TokenType.ALIAS) 2802 2803 if self._match_text_seq("NOT", "MATERIALIZED"): 2804 materialized = False 2805 elif self._match_text_seq("MATERIALIZED"): 2806 materialized = True 2807 else: 2808 materialized = None 2809 2810 return self.expression( 2811 exp.CTE, 2812 this=self._parse_wrapped(self._parse_statement), 2813 alias=alias, 2814 materialized=materialized, 2815 ) 2816 2817 def _parse_table_alias( 2818 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 2819 ) -> t.Optional[exp.TableAlias]: 2820 any_token = self._match(TokenType.ALIAS) 2821 alias = ( 2822 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2823 or self._parse_string_as_identifier() 2824 ) 2825 2826 index = self._index 2827 if self._match(TokenType.L_PAREN): 2828 columns = self._parse_csv(self._parse_function_parameter) 2829 self._match_r_paren() if columns else self._retreat(index) 2830 else: 2831 columns = None 2832 2833 if not alias and not columns: 2834 return None 2835 2836 table_alias = self.expression(exp.TableAlias, this=alias, columns=columns) 2837 2838 # We bubble up comments from the Identifier to the TableAlias 2839 if isinstance(alias, exp.Identifier): 2840 table_alias.add_comments(alias.pop_comments()) 2841 2842 return table_alias 2843 2844 def _parse_subquery( 2845 self, this: t.Optional[exp.Expression], parse_alias: bool = True 2846 ) -> t.Optional[exp.Subquery]: 2847 if not this: 2848 return None 2849 2850 return self.expression( 2851 exp.Subquery, 2852 this=this, 2853 pivots=self._parse_pivots(), 2854 alias=self._parse_table_alias() if parse_alias else None, 2855 ) 2856 2857 def _implicit_unnests_to_explicit(self, this: E) -> E: 2858 from sqlglot.optimizer.normalize_identifiers import normalize_identifiers as _norm 2859 2860 refs = {_norm(this.args["from"].this.copy(), dialect=self.dialect).alias_or_name} 2861 for i, join in enumerate(this.args.get("joins") or []): 2862 table = join.this 2863 normalized_table = table.copy() 2864 normalized_table.meta["maybe_column"] = True 2865 normalized_table = _norm(normalized_table, dialect=self.dialect) 2866 2867 if isinstance(table, exp.Table) and not join.args.get("on"): 2868 if normalized_table.parts[0].name in refs: 2869 table_as_column = table.to_column() 2870 unnest = exp.Unnest(expressions=[table_as_column]) 2871 2872 # Table.to_column creates a parent Alias node that we want to convert to 2873 # a TableAlias and attach to the Unnest, so it matches the parser's output 2874 if isinstance(table.args.get("alias"), exp.TableAlias): 2875 table_as_column.replace(table_as_column.this) 2876 exp.alias_(unnest, None, table=[table.args["alias"].this], copy=False) 2877 2878 table.replace(unnest) 2879 2880 refs.add(normalized_table.alias_or_name) 2881 2882 return this 2883 2884 def _parse_query_modifiers( 2885 self, this: t.Optional[exp.Expression] 2886 ) -> t.Optional[exp.Expression]: 2887 if isinstance(this, (exp.Query, exp.Table)): 2888 for join in self._parse_joins(): 2889 this.append("joins", join) 2890 for lateral in iter(self._parse_lateral, None): 2891 this.append("laterals", lateral) 2892 2893 while True: 2894 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 2895 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 2896 key, expression = parser(self) 2897 2898 if expression: 2899 this.set(key, expression) 2900 if key == "limit": 2901 offset = expression.args.pop("offset", None) 2902 2903 if offset: 2904 offset = exp.Offset(expression=offset) 2905 this.set("offset", offset) 2906 2907 limit_by_expressions = expression.expressions 2908 expression.set("expressions", None) 2909 offset.set("expressions", limit_by_expressions) 2910 continue 2911 break 2912 2913 if self.SUPPORTS_IMPLICIT_UNNEST and this and this.args.get("from"): 2914 this = self._implicit_unnests_to_explicit(this) 2915 2916 return this 2917 2918 def _parse_hint(self) -> t.Optional[exp.Hint]: 2919 if self._match(TokenType.HINT): 2920 hints = [] 2921 for hint in iter( 2922 lambda: self._parse_csv( 2923 lambda: self._parse_function() or self._parse_var(upper=True) 2924 ), 2925 [], 2926 ): 2927 hints.extend(hint) 2928 2929 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 2930 self.raise_error("Expected */ after HINT") 2931 2932 return self.expression(exp.Hint, expressions=hints) 2933 2934 return None 2935 2936 def _parse_into(self) -> t.Optional[exp.Into]: 2937 if not self._match(TokenType.INTO): 2938 return None 2939 2940 temp = self._match(TokenType.TEMPORARY) 2941 unlogged = self._match_text_seq("UNLOGGED") 2942 self._match(TokenType.TABLE) 2943 2944 return self.expression( 2945 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 2946 ) 2947 2948 def _parse_from( 2949 self, joins: bool = False, skip_from_token: bool = False 2950 ) -> t.Optional[exp.From]: 2951 if not skip_from_token and not self._match(TokenType.FROM): 2952 return None 2953 2954 return self.expression( 2955 exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins) 2956 ) 2957 2958 def _parse_match_recognize_measure(self) -> exp.MatchRecognizeMeasure: 2959 return self.expression( 2960 exp.MatchRecognizeMeasure, 2961 window_frame=self._match_texts(("FINAL", "RUNNING")) and self._prev.text.upper(), 2962 this=self._parse_expression(), 2963 ) 2964 2965 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 2966 if not self._match(TokenType.MATCH_RECOGNIZE): 2967 return None 2968 2969 self._match_l_paren() 2970 2971 partition = self._parse_partition_by() 2972 order = self._parse_order() 2973 2974 measures = ( 2975 self._parse_csv(self._parse_match_recognize_measure) 2976 if self._match_text_seq("MEASURES") 2977 else None 2978 ) 2979 2980 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 2981 rows = exp.var("ONE ROW PER MATCH") 2982 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 2983 text = "ALL ROWS PER MATCH" 2984 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 2985 text += " SHOW EMPTY MATCHES" 2986 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 2987 text += " OMIT EMPTY MATCHES" 2988 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 2989 text += " WITH UNMATCHED ROWS" 2990 rows = exp.var(text) 2991 else: 2992 rows = None 2993 2994 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 2995 text = "AFTER MATCH SKIP" 2996 if self._match_text_seq("PAST", "LAST", "ROW"): 2997 text += " PAST LAST ROW" 2998 elif self._match_text_seq("TO", "NEXT", "ROW"): 2999 text += " TO NEXT ROW" 3000 elif self._match_text_seq("TO", "FIRST"): 3001 text += f" TO FIRST {self._advance_any().text}" # type: ignore 3002 elif self._match_text_seq("TO", "LAST"): 3003 text += f" TO LAST {self._advance_any().text}" # type: ignore 3004 after = exp.var(text) 3005 else: 3006 after = None 3007 3008 if self._match_text_seq("PATTERN"): 3009 self._match_l_paren() 3010 3011 if not self._curr: 3012 self.raise_error("Expecting )", self._curr) 3013 3014 paren = 1 3015 start = self._curr 3016 3017 while self._curr and paren > 0: 3018 if self._curr.token_type == TokenType.L_PAREN: 3019 paren += 1 3020 if self._curr.token_type == TokenType.R_PAREN: 3021 paren -= 1 3022 3023 end = self._prev 3024 self._advance() 3025 3026 if paren > 0: 3027 self.raise_error("Expecting )", self._curr) 3028 3029 pattern = exp.var(self._find_sql(start, end)) 3030 else: 3031 pattern = None 3032 3033 define = ( 3034 self._parse_csv(self._parse_name_as_expression) 3035 if self._match_text_seq("DEFINE") 3036 else None 3037 ) 3038 3039 self._match_r_paren() 3040 3041 return self.expression( 3042 exp.MatchRecognize, 3043 partition_by=partition, 3044 order=order, 3045 measures=measures, 3046 rows=rows, 3047 after=after, 3048 pattern=pattern, 3049 define=define, 3050 alias=self._parse_table_alias(), 3051 ) 3052 3053 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 3054 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 3055 if not cross_apply and self._match_pair(TokenType.OUTER, TokenType.APPLY): 3056 cross_apply = False 3057 3058 if cross_apply is not None: 3059 this = self._parse_select(table=True) 3060 view = None 3061 outer = None 3062 elif self._match(TokenType.LATERAL): 3063 this = self._parse_select(table=True) 3064 view = self._match(TokenType.VIEW) 3065 outer = self._match(TokenType.OUTER) 3066 else: 3067 return None 3068 3069 if not this: 3070 this = ( 3071 self._parse_unnest() 3072 or self._parse_function() 3073 or self._parse_id_var(any_token=False) 3074 ) 3075 3076 while self._match(TokenType.DOT): 3077 this = exp.Dot( 3078 this=this, 3079 expression=self._parse_function() or self._parse_id_var(any_token=False), 3080 ) 3081 3082 if view: 3083 table = self._parse_id_var(any_token=False) 3084 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 3085 table_alias: t.Optional[exp.TableAlias] = self.expression( 3086 exp.TableAlias, this=table, columns=columns 3087 ) 3088 elif isinstance(this, (exp.Subquery, exp.Unnest)) and this.alias: 3089 # We move the alias from the lateral's child node to the lateral itself 3090 table_alias = this.args["alias"].pop() 3091 else: 3092 table_alias = self._parse_table_alias() 3093 3094 return self.expression( 3095 exp.Lateral, 3096 this=this, 3097 view=view, 3098 outer=outer, 3099 alias=table_alias, 3100 cross_apply=cross_apply, 3101 ) 3102 3103 def _parse_join_parts( 3104 self, 3105 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 3106 return ( 3107 self._match_set(self.JOIN_METHODS) and self._prev, 3108 self._match_set(self.JOIN_SIDES) and self._prev, 3109 self._match_set(self.JOIN_KINDS) and self._prev, 3110 ) 3111 3112 def _parse_join( 3113 self, skip_join_token: bool = False, parse_bracket: bool = False 3114 ) -> t.Optional[exp.Join]: 3115 if self._match(TokenType.COMMA): 3116 return self.expression(exp.Join, this=self._parse_table()) 3117 3118 index = self._index 3119 method, side, kind = self._parse_join_parts() 3120 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 3121 join = self._match(TokenType.JOIN) or (kind and kind.token_type == TokenType.STRAIGHT_JOIN) 3122 3123 if not skip_join_token and not join: 3124 self._retreat(index) 3125 kind = None 3126 method = None 3127 side = None 3128 3129 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 3130 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 3131 3132 if not skip_join_token and not join and not outer_apply and not cross_apply: 3133 return None 3134 3135 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 3136 3137 if method: 3138 kwargs["method"] = method.text 3139 if side: 3140 kwargs["side"] = side.text 3141 if kind: 3142 kwargs["kind"] = kind.text 3143 if hint: 3144 kwargs["hint"] = hint 3145 3146 if self._match(TokenType.MATCH_CONDITION): 3147 kwargs["match_condition"] = self._parse_wrapped(self._parse_comparison) 3148 3149 if self._match(TokenType.ON): 3150 kwargs["on"] = self._parse_assignment() 3151 elif self._match(TokenType.USING): 3152 kwargs["using"] = self._parse_wrapped_id_vars() 3153 elif not isinstance(kwargs["this"], exp.Unnest) and not ( 3154 kind and kind.token_type == TokenType.CROSS 3155 ): 3156 index = self._index 3157 joins: t.Optional[list] = list(self._parse_joins()) 3158 3159 if joins and self._match(TokenType.ON): 3160 kwargs["on"] = self._parse_assignment() 3161 elif joins and self._match(TokenType.USING): 3162 kwargs["using"] = self._parse_wrapped_id_vars() 3163 else: 3164 joins = None 3165 self._retreat(index) 3166 3167 kwargs["this"].set("joins", joins if joins else None) 3168 3169 comments = [c for token in (method, side, kind) if token for c in token.comments] 3170 return self.expression(exp.Join, comments=comments, **kwargs) 3171 3172 def _parse_opclass(self) -> t.Optional[exp.Expression]: 3173 this = self._parse_assignment() 3174 3175 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 3176 return this 3177 3178 if not self._match_set(self.OPTYPE_FOLLOW_TOKENS, advance=False): 3179 return self.expression(exp.Opclass, this=this, expression=self._parse_table_parts()) 3180 3181 return this 3182 3183 def _parse_index_params(self) -> exp.IndexParameters: 3184 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 3185 3186 if self._match(TokenType.L_PAREN, advance=False): 3187 columns = self._parse_wrapped_csv(self._parse_with_operator) 3188 else: 3189 columns = None 3190 3191 include = self._parse_wrapped_id_vars() if self._match_text_seq("INCLUDE") else None 3192 partition_by = self._parse_partition_by() 3193 with_storage = self._match(TokenType.WITH) and self._parse_wrapped_properties() 3194 tablespace = ( 3195 self._parse_var(any_token=True) 3196 if self._match_text_seq("USING", "INDEX", "TABLESPACE") 3197 else None 3198 ) 3199 where = self._parse_where() 3200 3201 on = self._parse_field() if self._match(TokenType.ON) else None 3202 3203 return self.expression( 3204 exp.IndexParameters, 3205 using=using, 3206 columns=columns, 3207 include=include, 3208 partition_by=partition_by, 3209 where=where, 3210 with_storage=with_storage, 3211 tablespace=tablespace, 3212 on=on, 3213 ) 3214 3215 def _parse_index( 3216 self, index: t.Optional[exp.Expression] = None, anonymous: bool = False 3217 ) -> t.Optional[exp.Index]: 3218 if index or anonymous: 3219 unique = None 3220 primary = None 3221 amp = None 3222 3223 self._match(TokenType.ON) 3224 self._match(TokenType.TABLE) # hive 3225 table = self._parse_table_parts(schema=True) 3226 else: 3227 unique = self._match(TokenType.UNIQUE) 3228 primary = self._match_text_seq("PRIMARY") 3229 amp = self._match_text_seq("AMP") 3230 3231 if not self._match(TokenType.INDEX): 3232 return None 3233 3234 index = self._parse_id_var() 3235 table = None 3236 3237 params = self._parse_index_params() 3238 3239 return self.expression( 3240 exp.Index, 3241 this=index, 3242 table=table, 3243 unique=unique, 3244 primary=primary, 3245 amp=amp, 3246 params=params, 3247 ) 3248 3249 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 3250 hints: t.List[exp.Expression] = [] 3251 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 3252 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 3253 hints.append( 3254 self.expression( 3255 exp.WithTableHint, 3256 expressions=self._parse_csv( 3257 lambda: self._parse_function() or self._parse_var(any_token=True) 3258 ), 3259 ) 3260 ) 3261 self._match_r_paren() 3262 else: 3263 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 3264 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 3265 hint = exp.IndexTableHint(this=self._prev.text.upper()) 3266 3267 self._match_set((TokenType.INDEX, TokenType.KEY)) 3268 if self._match(TokenType.FOR): 3269 hint.set("target", self._advance_any() and self._prev.text.upper()) 3270 3271 hint.set("expressions", self._parse_wrapped_id_vars()) 3272 hints.append(hint) 3273 3274 return hints or None 3275 3276 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 3277 return ( 3278 (not schema and self._parse_function(optional_parens=False)) 3279 or self._parse_id_var(any_token=False) 3280 or self._parse_string_as_identifier() 3281 or self._parse_placeholder() 3282 ) 3283 3284 def _parse_table_parts( 3285 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 3286 ) -> exp.Table: 3287 catalog = None 3288 db = None 3289 table: t.Optional[exp.Expression | str] = self._parse_table_part(schema=schema) 3290 3291 while self._match(TokenType.DOT): 3292 if catalog: 3293 # This allows nesting the table in arbitrarily many dot expressions if needed 3294 table = self.expression( 3295 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 3296 ) 3297 else: 3298 catalog = db 3299 db = table 3300 # "" used for tsql FROM a..b case 3301 table = self._parse_table_part(schema=schema) or "" 3302 3303 if ( 3304 wildcard 3305 and self._is_connected() 3306 and (isinstance(table, exp.Identifier) or not table) 3307 and self._match(TokenType.STAR) 3308 ): 3309 if isinstance(table, exp.Identifier): 3310 table.args["this"] += "*" 3311 else: 3312 table = exp.Identifier(this="*") 3313 3314 # We bubble up comments from the Identifier to the Table 3315 comments = table.pop_comments() if isinstance(table, exp.Expression) else None 3316 3317 if is_db_reference: 3318 catalog = db 3319 db = table 3320 table = None 3321 3322 if not table and not is_db_reference: 3323 self.raise_error(f"Expected table name but got {self._curr}") 3324 if not db and is_db_reference: 3325 self.raise_error(f"Expected database name but got {self._curr}") 3326 3327 return self.expression( 3328 exp.Table, 3329 comments=comments, 3330 this=table, 3331 db=db, 3332 catalog=catalog, 3333 pivots=self._parse_pivots(), 3334 ) 3335 3336 def _parse_table( 3337 self, 3338 schema: bool = False, 3339 joins: bool = False, 3340 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 3341 parse_bracket: bool = False, 3342 is_db_reference: bool = False, 3343 parse_partition: bool = False, 3344 ) -> t.Optional[exp.Expression]: 3345 lateral = self._parse_lateral() 3346 if lateral: 3347 return lateral 3348 3349 unnest = self._parse_unnest() 3350 if unnest: 3351 return unnest 3352 3353 values = self._parse_derived_table_values() 3354 if values: 3355 return values 3356 3357 subquery = self._parse_select(table=True) 3358 if subquery: 3359 if not subquery.args.get("pivots"): 3360 subquery.set("pivots", self._parse_pivots()) 3361 return subquery 3362 3363 bracket = parse_bracket and self._parse_bracket(None) 3364 bracket = self.expression(exp.Table, this=bracket) if bracket else None 3365 3366 only = self._match(TokenType.ONLY) 3367 3368 this = t.cast( 3369 exp.Expression, 3370 bracket 3371 or self._parse_bracket( 3372 self._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 3373 ), 3374 ) 3375 3376 if only: 3377 this.set("only", only) 3378 3379 # Postgres supports a wildcard (table) suffix operator, which is a no-op in this context 3380 self._match_text_seq("*") 3381 3382 parse_partition = parse_partition or self.SUPPORTS_PARTITION_SELECTION 3383 if parse_partition and self._match(TokenType.PARTITION, advance=False): 3384 this.set("partition", self._parse_partition()) 3385 3386 if schema: 3387 return self._parse_schema(this=this) 3388 3389 version = self._parse_version() 3390 3391 if version: 3392 this.set("version", version) 3393 3394 if self.dialect.ALIAS_POST_TABLESAMPLE: 3395 table_sample = self._parse_table_sample() 3396 3397 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3398 if alias: 3399 this.set("alias", alias) 3400 3401 if isinstance(this, exp.Table) and self._match_text_seq("AT"): 3402 return self.expression( 3403 exp.AtIndex, this=this.to_column(copy=False), expression=self._parse_id_var() 3404 ) 3405 3406 this.set("hints", self._parse_table_hints()) 3407 3408 if not this.args.get("pivots"): 3409 this.set("pivots", self._parse_pivots()) 3410 3411 if not self.dialect.ALIAS_POST_TABLESAMPLE: 3412 table_sample = self._parse_table_sample() 3413 3414 if table_sample: 3415 table_sample.set("this", this) 3416 this = table_sample 3417 3418 if joins: 3419 for join in self._parse_joins(): 3420 this.append("joins", join) 3421 3422 if self._match_pair(TokenType.WITH, TokenType.ORDINALITY): 3423 this.set("ordinality", True) 3424 this.set("alias", self._parse_table_alias()) 3425 3426 return this 3427 3428 def _parse_version(self) -> t.Optional[exp.Version]: 3429 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 3430 this = "TIMESTAMP" 3431 elif self._match(TokenType.VERSION_SNAPSHOT): 3432 this = "VERSION" 3433 else: 3434 return None 3435 3436 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 3437 kind = self._prev.text.upper() 3438 start = self._parse_bitwise() 3439 self._match_texts(("TO", "AND")) 3440 end = self._parse_bitwise() 3441 expression: t.Optional[exp.Expression] = self.expression( 3442 exp.Tuple, expressions=[start, end] 3443 ) 3444 elif self._match_text_seq("CONTAINED", "IN"): 3445 kind = "CONTAINED IN" 3446 expression = self.expression( 3447 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 3448 ) 3449 elif self._match(TokenType.ALL): 3450 kind = "ALL" 3451 expression = None 3452 else: 3453 self._match_text_seq("AS", "OF") 3454 kind = "AS OF" 3455 expression = self._parse_type() 3456 3457 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 3458 3459 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 3460 if not self._match(TokenType.UNNEST): 3461 return None 3462 3463 expressions = self._parse_wrapped_csv(self._parse_equality) 3464 offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 3465 3466 alias = self._parse_table_alias() if with_alias else None 3467 3468 if alias: 3469 if self.dialect.UNNEST_COLUMN_ONLY: 3470 if alias.args.get("columns"): 3471 self.raise_error("Unexpected extra column alias in unnest.") 3472 3473 alias.set("columns", [alias.this]) 3474 alias.set("this", None) 3475 3476 columns = alias.args.get("columns") or [] 3477 if offset and len(expressions) < len(columns): 3478 offset = columns.pop() 3479 3480 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 3481 self._match(TokenType.ALIAS) 3482 offset = self._parse_id_var( 3483 any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS 3484 ) or exp.to_identifier("offset") 3485 3486 return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset) 3487 3488 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 3489 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 3490 if not is_derived and not self._match_text_seq("VALUES"): 3491 return None 3492 3493 expressions = self._parse_csv(self._parse_value) 3494 alias = self._parse_table_alias() 3495 3496 if is_derived: 3497 self._match_r_paren() 3498 3499 return self.expression( 3500 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 3501 ) 3502 3503 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 3504 if not self._match(TokenType.TABLE_SAMPLE) and not ( 3505 as_modifier and self._match_text_seq("USING", "SAMPLE") 3506 ): 3507 return None 3508 3509 bucket_numerator = None 3510 bucket_denominator = None 3511 bucket_field = None 3512 percent = None 3513 size = None 3514 seed = None 3515 3516 method = self._parse_var(tokens=(TokenType.ROW,), upper=True) 3517 matched_l_paren = self._match(TokenType.L_PAREN) 3518 3519 if self.TABLESAMPLE_CSV: 3520 num = None 3521 expressions = self._parse_csv(self._parse_primary) 3522 else: 3523 expressions = None 3524 num = ( 3525 self._parse_factor() 3526 if self._match(TokenType.NUMBER, advance=False) 3527 else self._parse_primary() or self._parse_placeholder() 3528 ) 3529 3530 if self._match_text_seq("BUCKET"): 3531 bucket_numerator = self._parse_number() 3532 self._match_text_seq("OUT", "OF") 3533 bucket_denominator = bucket_denominator = self._parse_number() 3534 self._match(TokenType.ON) 3535 bucket_field = self._parse_field() 3536 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 3537 percent = num 3538 elif self._match(TokenType.ROWS) or not self.dialect.TABLESAMPLE_SIZE_IS_PERCENT: 3539 size = num 3540 else: 3541 percent = num 3542 3543 if matched_l_paren: 3544 self._match_r_paren() 3545 3546 if self._match(TokenType.L_PAREN): 3547 method = self._parse_var(upper=True) 3548 seed = self._match(TokenType.COMMA) and self._parse_number() 3549 self._match_r_paren() 3550 elif self._match_texts(("SEED", "REPEATABLE")): 3551 seed = self._parse_wrapped(self._parse_number) 3552 3553 if not method and self.DEFAULT_SAMPLING_METHOD: 3554 method = exp.var(self.DEFAULT_SAMPLING_METHOD) 3555 3556 return self.expression( 3557 exp.TableSample, 3558 expressions=expressions, 3559 method=method, 3560 bucket_numerator=bucket_numerator, 3561 bucket_denominator=bucket_denominator, 3562 bucket_field=bucket_field, 3563 percent=percent, 3564 size=size, 3565 seed=seed, 3566 ) 3567 3568 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 3569 return list(iter(self._parse_pivot, None)) or None 3570 3571 def _parse_joins(self) -> t.Iterator[exp.Join]: 3572 return iter(self._parse_join, None) 3573 3574 # https://duckdb.org/docs/sql/statements/pivot 3575 def _parse_simplified_pivot(self) -> exp.Pivot: 3576 def _parse_on() -> t.Optional[exp.Expression]: 3577 this = self._parse_bitwise() 3578 return self._parse_in(this) if self._match(TokenType.IN) else this 3579 3580 this = self._parse_table() 3581 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 3582 using = self._match(TokenType.USING) and self._parse_csv( 3583 lambda: self._parse_alias(self._parse_function()) 3584 ) 3585 group = self._parse_group() 3586 return self.expression( 3587 exp.Pivot, this=this, expressions=expressions, using=using, group=group 3588 ) 3589 3590 def _parse_pivot_in(self) -> exp.In: 3591 def _parse_aliased_expression() -> t.Optional[exp.Expression]: 3592 this = self._parse_assignment() 3593 3594 self._match(TokenType.ALIAS) 3595 alias = self._parse_field() 3596 if alias: 3597 return self.expression(exp.PivotAlias, this=this, alias=alias) 3598 3599 return this 3600 3601 value = self._parse_column() 3602 3603 if not self._match_pair(TokenType.IN, TokenType.L_PAREN): 3604 self.raise_error("Expecting IN (") 3605 3606 aliased_expressions = self._parse_csv(_parse_aliased_expression) 3607 3608 self._match_r_paren() 3609 return self.expression(exp.In, this=value, expressions=aliased_expressions) 3610 3611 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 3612 index = self._index 3613 include_nulls = None 3614 3615 if self._match(TokenType.PIVOT): 3616 unpivot = False 3617 elif self._match(TokenType.UNPIVOT): 3618 unpivot = True 3619 3620 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 3621 if self._match_text_seq("INCLUDE", "NULLS"): 3622 include_nulls = True 3623 elif self._match_text_seq("EXCLUDE", "NULLS"): 3624 include_nulls = False 3625 else: 3626 return None 3627 3628 expressions = [] 3629 3630 if not self._match(TokenType.L_PAREN): 3631 self._retreat(index) 3632 return None 3633 3634 if unpivot: 3635 expressions = self._parse_csv(self._parse_column) 3636 else: 3637 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 3638 3639 if not expressions: 3640 self.raise_error("Failed to parse PIVOT's aggregation list") 3641 3642 if not self._match(TokenType.FOR): 3643 self.raise_error("Expecting FOR") 3644 3645 field = self._parse_pivot_in() 3646 3647 self._match_r_paren() 3648 3649 pivot = self.expression( 3650 exp.Pivot, 3651 expressions=expressions, 3652 field=field, 3653 unpivot=unpivot, 3654 include_nulls=include_nulls, 3655 ) 3656 3657 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 3658 pivot.set("alias", self._parse_table_alias()) 3659 3660 if not unpivot: 3661 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 3662 3663 columns: t.List[exp.Expression] = [] 3664 for fld in pivot.args["field"].expressions: 3665 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 3666 for name in names: 3667 if self.PREFIXED_PIVOT_COLUMNS: 3668 name = f"{name}_{field_name}" if name else field_name 3669 else: 3670 name = f"{field_name}_{name}" if name else field_name 3671 3672 columns.append(exp.to_identifier(name)) 3673 3674 pivot.set("columns", columns) 3675 3676 return pivot 3677 3678 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 3679 return [agg.alias for agg in aggregations] 3680 3681 def _parse_prewhere(self, skip_where_token: bool = False) -> t.Optional[exp.PreWhere]: 3682 if not skip_where_token and not self._match(TokenType.PREWHERE): 3683 return None 3684 3685 return self.expression( 3686 exp.PreWhere, comments=self._prev_comments, this=self._parse_assignment() 3687 ) 3688 3689 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 3690 if not skip_where_token and not self._match(TokenType.WHERE): 3691 return None 3692 3693 return self.expression( 3694 exp.Where, comments=self._prev_comments, this=self._parse_assignment() 3695 ) 3696 3697 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 3698 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 3699 return None 3700 3701 elements: t.Dict[str, t.Any] = defaultdict(list) 3702 3703 if self._match(TokenType.ALL): 3704 elements["all"] = True 3705 elif self._match(TokenType.DISTINCT): 3706 elements["all"] = False 3707 3708 while True: 3709 expressions = self._parse_csv( 3710 lambda: None 3711 if self._match(TokenType.ROLLUP, advance=False) 3712 else self._parse_assignment() 3713 ) 3714 if expressions: 3715 elements["expressions"].extend(expressions) 3716 3717 grouping_sets = self._parse_grouping_sets() 3718 if grouping_sets: 3719 elements["grouping_sets"].extend(grouping_sets) 3720 3721 rollup = None 3722 cube = None 3723 totals = None 3724 3725 index = self._index 3726 with_ = self._match(TokenType.WITH) 3727 if self._match(TokenType.ROLLUP): 3728 rollup = with_ or self._parse_wrapped_csv(self._parse_column) 3729 elements["rollup"].extend(ensure_list(rollup)) 3730 3731 if self._match(TokenType.CUBE): 3732 cube = with_ or self._parse_wrapped_csv(self._parse_column) 3733 elements["cube"].extend(ensure_list(cube)) 3734 3735 if self._match_text_seq("TOTALS"): 3736 totals = True 3737 elements["totals"] = True # type: ignore 3738 3739 if not (grouping_sets or rollup or cube or totals): 3740 if with_: 3741 self._retreat(index) 3742 break 3743 3744 return self.expression(exp.Group, **elements) # type: ignore 3745 3746 def _parse_grouping_sets(self) -> t.Optional[t.List[exp.Expression]]: 3747 if not self._match(TokenType.GROUPING_SETS): 3748 return None 3749 3750 return self._parse_wrapped_csv(self._parse_grouping_set) 3751 3752 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 3753 if self._match(TokenType.L_PAREN): 3754 grouping_set = self._parse_csv(self._parse_column) 3755 self._match_r_paren() 3756 return self.expression(exp.Tuple, expressions=grouping_set) 3757 3758 return self._parse_column() 3759 3760 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 3761 if not skip_having_token and not self._match(TokenType.HAVING): 3762 return None 3763 return self.expression(exp.Having, this=self._parse_assignment()) 3764 3765 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 3766 if not self._match(TokenType.QUALIFY): 3767 return None 3768 return self.expression(exp.Qualify, this=self._parse_assignment()) 3769 3770 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 3771 if skip_start_token: 3772 start = None 3773 elif self._match(TokenType.START_WITH): 3774 start = self._parse_assignment() 3775 else: 3776 return None 3777 3778 self._match(TokenType.CONNECT_BY) 3779 nocycle = self._match_text_seq("NOCYCLE") 3780 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 3781 exp.Prior, this=self._parse_bitwise() 3782 ) 3783 connect = self._parse_assignment() 3784 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 3785 3786 if not start and self._match(TokenType.START_WITH): 3787 start = self._parse_assignment() 3788 3789 return self.expression(exp.Connect, start=start, connect=connect, nocycle=nocycle) 3790 3791 def _parse_name_as_expression(self) -> exp.Alias: 3792 return self.expression( 3793 exp.Alias, 3794 alias=self._parse_id_var(any_token=True), 3795 this=self._match(TokenType.ALIAS) and self._parse_assignment(), 3796 ) 3797 3798 def _parse_interpolate(self) -> t.Optional[t.List[exp.Expression]]: 3799 if self._match_text_seq("INTERPOLATE"): 3800 return self._parse_wrapped_csv(self._parse_name_as_expression) 3801 return None 3802 3803 def _parse_order( 3804 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 3805 ) -> t.Optional[exp.Expression]: 3806 siblings = None 3807 if not skip_order_token and not self._match(TokenType.ORDER_BY): 3808 if not self._match(TokenType.ORDER_SIBLINGS_BY): 3809 return this 3810 3811 siblings = True 3812 3813 return self.expression( 3814 exp.Order, 3815 this=this, 3816 expressions=self._parse_csv(self._parse_ordered), 3817 interpolate=self._parse_interpolate(), 3818 siblings=siblings, 3819 ) 3820 3821 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 3822 if not self._match(token): 3823 return None 3824 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 3825 3826 def _parse_ordered( 3827 self, parse_method: t.Optional[t.Callable] = None 3828 ) -> t.Optional[exp.Ordered]: 3829 this = parse_method() if parse_method else self._parse_assignment() 3830 if not this: 3831 return None 3832 3833 asc = self._match(TokenType.ASC) 3834 desc = self._match(TokenType.DESC) or (asc and False) 3835 3836 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 3837 is_nulls_last = self._match_text_seq("NULLS", "LAST") 3838 3839 nulls_first = is_nulls_first or False 3840 explicitly_null_ordered = is_nulls_first or is_nulls_last 3841 3842 if ( 3843 not explicitly_null_ordered 3844 and ( 3845 (not desc and self.dialect.NULL_ORDERING == "nulls_are_small") 3846 or (desc and self.dialect.NULL_ORDERING != "nulls_are_small") 3847 ) 3848 and self.dialect.NULL_ORDERING != "nulls_are_last" 3849 ): 3850 nulls_first = True 3851 3852 if self._match_text_seq("WITH", "FILL"): 3853 with_fill = self.expression( 3854 exp.WithFill, 3855 **{ # type: ignore 3856 "from": self._match(TokenType.FROM) and self._parse_bitwise(), 3857 "to": self._match_text_seq("TO") and self._parse_bitwise(), 3858 "step": self._match_text_seq("STEP") and self._parse_bitwise(), 3859 }, 3860 ) 3861 else: 3862 with_fill = None 3863 3864 return self.expression( 3865 exp.Ordered, this=this, desc=desc, nulls_first=nulls_first, with_fill=with_fill 3866 ) 3867 3868 def _parse_limit( 3869 self, 3870 this: t.Optional[exp.Expression] = None, 3871 top: bool = False, 3872 skip_limit_token: bool = False, 3873 ) -> t.Optional[exp.Expression]: 3874 if skip_limit_token or self._match(TokenType.TOP if top else TokenType.LIMIT): 3875 comments = self._prev_comments 3876 if top: 3877 limit_paren = self._match(TokenType.L_PAREN) 3878 expression = self._parse_term() if limit_paren else self._parse_number() 3879 3880 if limit_paren: 3881 self._match_r_paren() 3882 else: 3883 expression = self._parse_term() 3884 3885 if self._match(TokenType.COMMA): 3886 offset = expression 3887 expression = self._parse_term() 3888 else: 3889 offset = None 3890 3891 limit_exp = self.expression( 3892 exp.Limit, 3893 this=this, 3894 expression=expression, 3895 offset=offset, 3896 comments=comments, 3897 expressions=self._parse_limit_by(), 3898 ) 3899 3900 return limit_exp 3901 3902 if self._match(TokenType.FETCH): 3903 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 3904 direction = self._prev.text.upper() if direction else "FIRST" 3905 3906 count = self._parse_field(tokens=self.FETCH_TOKENS) 3907 percent = self._match(TokenType.PERCENT) 3908 3909 self._match_set((TokenType.ROW, TokenType.ROWS)) 3910 3911 only = self._match_text_seq("ONLY") 3912 with_ties = self._match_text_seq("WITH", "TIES") 3913 3914 if only and with_ties: 3915 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 3916 3917 return self.expression( 3918 exp.Fetch, 3919 direction=direction, 3920 count=count, 3921 percent=percent, 3922 with_ties=with_ties, 3923 ) 3924 3925 return this 3926 3927 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3928 if not self._match(TokenType.OFFSET): 3929 return this 3930 3931 count = self._parse_term() 3932 self._match_set((TokenType.ROW, TokenType.ROWS)) 3933 3934 return self.expression( 3935 exp.Offset, this=this, expression=count, expressions=self._parse_limit_by() 3936 ) 3937 3938 def _parse_limit_by(self) -> t.Optional[t.List[exp.Expression]]: 3939 return self._match_text_seq("BY") and self._parse_csv(self._parse_bitwise) 3940 3941 def _parse_locks(self) -> t.List[exp.Lock]: 3942 locks = [] 3943 while True: 3944 if self._match_text_seq("FOR", "UPDATE"): 3945 update = True 3946 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 3947 "LOCK", "IN", "SHARE", "MODE" 3948 ): 3949 update = False 3950 else: 3951 break 3952 3953 expressions = None 3954 if self._match_text_seq("OF"): 3955 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 3956 3957 wait: t.Optional[bool | exp.Expression] = None 3958 if self._match_text_seq("NOWAIT"): 3959 wait = True 3960 elif self._match_text_seq("WAIT"): 3961 wait = self._parse_primary() 3962 elif self._match_text_seq("SKIP", "LOCKED"): 3963 wait = False 3964 3965 locks.append( 3966 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 3967 ) 3968 3969 return locks 3970 3971 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3972 while this and self._match_set(self.SET_OPERATIONS): 3973 token_type = self._prev.token_type 3974 3975 if token_type == TokenType.UNION: 3976 operation: t.Type[exp.SetOperation] = exp.Union 3977 elif token_type == TokenType.EXCEPT: 3978 operation = exp.Except 3979 else: 3980 operation = exp.Intersect 3981 3982 comments = self._prev.comments 3983 distinct = self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL) 3984 by_name = self._match_text_seq("BY", "NAME") 3985 expression = self._parse_select(nested=True, parse_set_operation=False) 3986 3987 this = self.expression( 3988 operation, 3989 comments=comments, 3990 this=this, 3991 distinct=distinct, 3992 by_name=by_name, 3993 expression=expression, 3994 ) 3995 3996 if isinstance(this, exp.SetOperation) and self.MODIFIERS_ATTACHED_TO_SET_OP: 3997 expression = this.expression 3998 3999 if expression: 4000 for arg in self.SET_OP_MODIFIERS: 4001 expr = expression.args.get(arg) 4002 if expr: 4003 this.set(arg, expr.pop()) 4004 4005 return this 4006 4007 def _parse_expression(self) -> t.Optional[exp.Expression]: 4008 return self._parse_alias(self._parse_assignment()) 4009 4010 def _parse_assignment(self) -> t.Optional[exp.Expression]: 4011 this = self._parse_disjunction() 4012 4013 while self._match_set(self.ASSIGNMENT): 4014 this = self.expression( 4015 self.ASSIGNMENT[self._prev.token_type], 4016 this=this, 4017 comments=self._prev_comments, 4018 expression=self._parse_assignment(), 4019 ) 4020 4021 return this 4022 4023 def _parse_disjunction(self) -> t.Optional[exp.Expression]: 4024 return self._parse_tokens(self._parse_conjunction, self.DISJUNCTION) 4025 4026 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 4027 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 4028 4029 def _parse_equality(self) -> t.Optional[exp.Expression]: 4030 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 4031 4032 def _parse_comparison(self) -> t.Optional[exp.Expression]: 4033 return self._parse_tokens(self._parse_range, self.COMPARISON) 4034 4035 def _parse_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4036 this = this or self._parse_bitwise() 4037 negate = self._match(TokenType.NOT) 4038 4039 if self._match_set(self.RANGE_PARSERS): 4040 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 4041 if not expression: 4042 return this 4043 4044 this = expression 4045 elif self._match(TokenType.ISNULL): 4046 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4047 4048 # Postgres supports ISNULL and NOTNULL for conditions. 4049 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 4050 if self._match(TokenType.NOTNULL): 4051 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4052 this = self.expression(exp.Not, this=this) 4053 4054 if negate: 4055 this = self.expression(exp.Not, this=this) 4056 4057 if self._match(TokenType.IS): 4058 this = self._parse_is(this) 4059 4060 return this 4061 4062 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4063 index = self._index - 1 4064 negate = self._match(TokenType.NOT) 4065 4066 if self._match_text_seq("DISTINCT", "FROM"): 4067 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 4068 return self.expression(klass, this=this, expression=self._parse_bitwise()) 4069 4070 expression = self._parse_null() or self._parse_boolean() 4071 if not expression: 4072 self._retreat(index) 4073 return None 4074 4075 this = self.expression(exp.Is, this=this, expression=expression) 4076 return self.expression(exp.Not, this=this) if negate else this 4077 4078 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 4079 unnest = self._parse_unnest(with_alias=False) 4080 if unnest: 4081 this = self.expression(exp.In, this=this, unnest=unnest) 4082 elif self._match_set((TokenType.L_PAREN, TokenType.L_BRACKET)): 4083 matched_l_paren = self._prev.token_type == TokenType.L_PAREN 4084 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 4085 4086 if len(expressions) == 1 and isinstance(expressions[0], exp.Query): 4087 this = self.expression(exp.In, this=this, query=expressions[0].subquery(copy=False)) 4088 else: 4089 this = self.expression(exp.In, this=this, expressions=expressions) 4090 4091 if matched_l_paren: 4092 self._match_r_paren(this) 4093 elif not self._match(TokenType.R_BRACKET, expression=this): 4094 self.raise_error("Expecting ]") 4095 else: 4096 this = self.expression(exp.In, this=this, field=self._parse_field()) 4097 4098 return this 4099 4100 def _parse_between(self, this: t.Optional[exp.Expression]) -> exp.Between: 4101 low = self._parse_bitwise() 4102 self._match(TokenType.AND) 4103 high = self._parse_bitwise() 4104 return self.expression(exp.Between, this=this, low=low, high=high) 4105 4106 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4107 if not self._match(TokenType.ESCAPE): 4108 return this 4109 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 4110 4111 def _parse_interval(self, match_interval: bool = True) -> t.Optional[exp.Add | exp.Interval]: 4112 index = self._index 4113 4114 if not self._match(TokenType.INTERVAL) and match_interval: 4115 return None 4116 4117 if self._match(TokenType.STRING, advance=False): 4118 this = self._parse_primary() 4119 else: 4120 this = self._parse_term() 4121 4122 if not this or ( 4123 isinstance(this, exp.Column) 4124 and not this.table 4125 and not this.this.quoted 4126 and this.name.upper() == "IS" 4127 ): 4128 self._retreat(index) 4129 return None 4130 4131 unit = self._parse_function() or ( 4132 not self._match(TokenType.ALIAS, advance=False) 4133 and self._parse_var(any_token=True, upper=True) 4134 ) 4135 4136 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 4137 # each INTERVAL expression into this canonical form so it's easy to transpile 4138 if this and this.is_number: 4139 this = exp.Literal.string(this.to_py()) 4140 elif this and this.is_string: 4141 parts = exp.INTERVAL_STRING_RE.findall(this.name) 4142 if len(parts) == 1: 4143 if unit: 4144 # Unconsume the eagerly-parsed unit, since the real unit was part of the string 4145 self._retreat(self._index - 1) 4146 4147 this = exp.Literal.string(parts[0][0]) 4148 unit = self.expression(exp.Var, this=parts[0][1].upper()) 4149 4150 if self.INTERVAL_SPANS and self._match_text_seq("TO"): 4151 unit = self.expression( 4152 exp.IntervalSpan, this=unit, expression=self._parse_var(any_token=True, upper=True) 4153 ) 4154 4155 interval = self.expression(exp.Interval, this=this, unit=unit) 4156 4157 index = self._index 4158 self._match(TokenType.PLUS) 4159 4160 # Convert INTERVAL 'val_1' unit_1 [+] ... [+] 'val_n' unit_n into a sum of intervals 4161 if self._match_set((TokenType.STRING, TokenType.NUMBER), advance=False): 4162 return self.expression( 4163 exp.Add, this=interval, expression=self._parse_interval(match_interval=False) 4164 ) 4165 4166 self._retreat(index) 4167 return interval 4168 4169 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 4170 this = self._parse_term() 4171 4172 while True: 4173 if self._match_set(self.BITWISE): 4174 this = self.expression( 4175 self.BITWISE[self._prev.token_type], 4176 this=this, 4177 expression=self._parse_term(), 4178 ) 4179 elif self.dialect.DPIPE_IS_STRING_CONCAT and self._match(TokenType.DPIPE): 4180 this = self.expression( 4181 exp.DPipe, 4182 this=this, 4183 expression=self._parse_term(), 4184 safe=not self.dialect.STRICT_STRING_CONCAT, 4185 ) 4186 elif self._match(TokenType.DQMARK): 4187 this = self.expression(exp.Coalesce, this=this, expressions=self._parse_term()) 4188 elif self._match_pair(TokenType.LT, TokenType.LT): 4189 this = self.expression( 4190 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 4191 ) 4192 elif self._match_pair(TokenType.GT, TokenType.GT): 4193 this = self.expression( 4194 exp.BitwiseRightShift, this=this, expression=self._parse_term() 4195 ) 4196 else: 4197 break 4198 4199 return this 4200 4201 def _parse_term(self) -> t.Optional[exp.Expression]: 4202 return self._parse_tokens(self._parse_factor, self.TERM) 4203 4204 def _parse_factor(self) -> t.Optional[exp.Expression]: 4205 parse_method = self._parse_exponent if self.EXPONENT else self._parse_unary 4206 this = parse_method() 4207 4208 while self._match_set(self.FACTOR): 4209 klass = self.FACTOR[self._prev.token_type] 4210 comments = self._prev_comments 4211 expression = parse_method() 4212 4213 if not expression and klass is exp.IntDiv and self._prev.text.isalpha(): 4214 self._retreat(self._index - 1) 4215 return this 4216 4217 this = self.expression(klass, this=this, comments=comments, expression=expression) 4218 4219 if isinstance(this, exp.Div): 4220 this.args["typed"] = self.dialect.TYPED_DIVISION 4221 this.args["safe"] = self.dialect.SAFE_DIVISION 4222 4223 return this 4224 4225 def _parse_exponent(self) -> t.Optional[exp.Expression]: 4226 return self._parse_tokens(self._parse_unary, self.EXPONENT) 4227 4228 def _parse_unary(self) -> t.Optional[exp.Expression]: 4229 if self._match_set(self.UNARY_PARSERS): 4230 return self.UNARY_PARSERS[self._prev.token_type](self) 4231 return self._parse_at_time_zone(self._parse_type()) 4232 4233 def _parse_type( 4234 self, parse_interval: bool = True, fallback_to_identifier: bool = False 4235 ) -> t.Optional[exp.Expression]: 4236 interval = parse_interval and self._parse_interval() 4237 if interval: 4238 return interval 4239 4240 index = self._index 4241 data_type = self._parse_types(check_func=True, allow_identifiers=False) 4242 4243 if data_type: 4244 index2 = self._index 4245 this = self._parse_primary() 4246 4247 if isinstance(this, exp.Literal): 4248 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 4249 if parser: 4250 return parser(self, this, data_type) 4251 4252 return self.expression(exp.Cast, this=this, to=data_type) 4253 4254 # The expressions arg gets set by the parser when we have something like DECIMAL(38, 0) 4255 # in the input SQL. In that case, we'll produce these tokens: DECIMAL ( 38 , 0 ) 4256 # 4257 # If the index difference here is greater than 1, that means the parser itself must have 4258 # consumed additional tokens such as the DECIMAL scale and precision in the above example. 4259 # 4260 # If it's not greater than 1, then it must be 1, because we've consumed at least the type 4261 # keyword, meaning that the expressions arg of the DataType must have gotten set by a 4262 # callable in the TYPE_CONVERTERS mapping. For example, Snowflake converts DECIMAL to 4263 # DECIMAL(38, 0)) in order to facilitate the data type's transpilation. 4264 # 4265 # In these cases, we don't really want to return the converted type, but instead retreat 4266 # and try to parse a Column or Identifier in the section below. 4267 if data_type.expressions and index2 - index > 1: 4268 self._retreat(index2) 4269 return self._parse_column_ops(data_type) 4270 4271 self._retreat(index) 4272 4273 if fallback_to_identifier: 4274 return self._parse_id_var() 4275 4276 this = self._parse_column() 4277 return this and self._parse_column_ops(this) 4278 4279 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 4280 this = self._parse_type() 4281 if not this: 4282 return None 4283 4284 if isinstance(this, exp.Column) and not this.table: 4285 this = exp.var(this.name.upper()) 4286 4287 return self.expression( 4288 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 4289 ) 4290 4291 def _parse_types( 4292 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 4293 ) -> t.Optional[exp.Expression]: 4294 index = self._index 4295 4296 this: t.Optional[exp.Expression] = None 4297 prefix = self._match_text_seq("SYSUDTLIB", ".") 4298 4299 if not self._match_set(self.TYPE_TOKENS): 4300 identifier = allow_identifiers and self._parse_id_var( 4301 any_token=False, tokens=(TokenType.VAR,) 4302 ) 4303 if isinstance(identifier, exp.Identifier): 4304 tokens = self.dialect.tokenize(identifier.sql(dialect=self.dialect)) 4305 4306 if len(tokens) != 1: 4307 self.raise_error("Unexpected identifier", self._prev) 4308 4309 if tokens[0].token_type in self.TYPE_TOKENS: 4310 self._prev = tokens[0] 4311 elif self.dialect.SUPPORTS_USER_DEFINED_TYPES: 4312 type_name = identifier.name 4313 4314 while self._match(TokenType.DOT): 4315 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 4316 4317 this = exp.DataType.build(type_name, udt=True) 4318 else: 4319 self._retreat(self._index - 1) 4320 return None 4321 else: 4322 return None 4323 4324 type_token = self._prev.token_type 4325 4326 if type_token == TokenType.PSEUDO_TYPE: 4327 return self.expression(exp.PseudoType, this=self._prev.text.upper()) 4328 4329 if type_token == TokenType.OBJECT_IDENTIFIER: 4330 return self.expression(exp.ObjectIdentifier, this=self._prev.text.upper()) 4331 4332 # https://materialize.com/docs/sql/types/map/ 4333 if type_token == TokenType.MAP and self._match(TokenType.L_BRACKET): 4334 key_type = self._parse_types( 4335 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4336 ) 4337 if not self._match(TokenType.FARROW): 4338 self._retreat(index) 4339 return None 4340 4341 value_type = self._parse_types( 4342 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4343 ) 4344 if not self._match(TokenType.R_BRACKET): 4345 self._retreat(index) 4346 return None 4347 4348 return exp.DataType( 4349 this=exp.DataType.Type.MAP, 4350 expressions=[key_type, value_type], 4351 nested=True, 4352 prefix=prefix, 4353 ) 4354 4355 nested = type_token in self.NESTED_TYPE_TOKENS 4356 is_struct = type_token in self.STRUCT_TYPE_TOKENS 4357 is_aggregate = type_token in self.AGGREGATE_TYPE_TOKENS 4358 expressions = None 4359 maybe_func = False 4360 4361 if self._match(TokenType.L_PAREN): 4362 if is_struct: 4363 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 4364 elif nested: 4365 expressions = self._parse_csv( 4366 lambda: self._parse_types( 4367 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4368 ) 4369 ) 4370 elif type_token in self.ENUM_TYPE_TOKENS: 4371 expressions = self._parse_csv(self._parse_equality) 4372 elif is_aggregate: 4373 func_or_ident = self._parse_function(anonymous=True) or self._parse_id_var( 4374 any_token=False, tokens=(TokenType.VAR,) 4375 ) 4376 if not func_or_ident or not self._match(TokenType.COMMA): 4377 return None 4378 expressions = self._parse_csv( 4379 lambda: self._parse_types( 4380 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4381 ) 4382 ) 4383 expressions.insert(0, func_or_ident) 4384 else: 4385 expressions = self._parse_csv(self._parse_type_size) 4386 4387 if not expressions or not self._match(TokenType.R_PAREN): 4388 self._retreat(index) 4389 return None 4390 4391 maybe_func = True 4392 4393 values: t.Optional[t.List[exp.Expression]] = None 4394 4395 if nested and self._match(TokenType.LT): 4396 if is_struct: 4397 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 4398 else: 4399 expressions = self._parse_csv( 4400 lambda: self._parse_types( 4401 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4402 ) 4403 ) 4404 4405 if not self._match(TokenType.GT): 4406 self.raise_error("Expecting >") 4407 4408 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 4409 values = self._parse_csv(self._parse_assignment) 4410 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 4411 4412 if type_token in self.TIMESTAMPS: 4413 if self._match_text_seq("WITH", "TIME", "ZONE"): 4414 maybe_func = False 4415 tz_type = ( 4416 exp.DataType.Type.TIMETZ 4417 if type_token in self.TIMES 4418 else exp.DataType.Type.TIMESTAMPTZ 4419 ) 4420 this = exp.DataType(this=tz_type, expressions=expressions) 4421 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 4422 maybe_func = False 4423 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 4424 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 4425 maybe_func = False 4426 elif type_token == TokenType.INTERVAL: 4427 unit = self._parse_var(upper=True) 4428 if unit: 4429 if self._match_text_seq("TO"): 4430 unit = exp.IntervalSpan(this=unit, expression=self._parse_var(upper=True)) 4431 4432 this = self.expression(exp.DataType, this=self.expression(exp.Interval, unit=unit)) 4433 else: 4434 this = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL) 4435 4436 if maybe_func and check_func: 4437 index2 = self._index 4438 peek = self._parse_string() 4439 4440 if not peek: 4441 self._retreat(index) 4442 return None 4443 4444 self._retreat(index2) 4445 4446 if not this: 4447 if self._match_text_seq("UNSIGNED"): 4448 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 4449 if not unsigned_type_token: 4450 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 4451 4452 type_token = unsigned_type_token or type_token 4453 4454 this = exp.DataType( 4455 this=exp.DataType.Type[type_token.value], 4456 expressions=expressions, 4457 nested=nested, 4458 values=values, 4459 prefix=prefix, 4460 ) 4461 elif expressions: 4462 this.set("expressions", expressions) 4463 4464 # https://materialize.com/docs/sql/types/list/#type-name 4465 while self._match(TokenType.LIST): 4466 this = exp.DataType(this=exp.DataType.Type.LIST, expressions=[this], nested=True) 4467 4468 index = self._index 4469 4470 # Postgres supports the INT ARRAY[3] syntax as a synonym for INT[3] 4471 matched_array = self._match(TokenType.ARRAY) 4472 4473 while self._curr: 4474 matched_l_bracket = self._match(TokenType.L_BRACKET) 4475 if not matched_l_bracket and not matched_array: 4476 break 4477 4478 matched_array = False 4479 values = self._parse_csv(self._parse_assignment) or None 4480 if values and not schema: 4481 self._retreat(index) 4482 break 4483 4484 this = exp.DataType( 4485 this=exp.DataType.Type.ARRAY, expressions=[this], values=values, nested=True 4486 ) 4487 self._match(TokenType.R_BRACKET) 4488 4489 if self.TYPE_CONVERTERS and isinstance(this.this, exp.DataType.Type): 4490 converter = self.TYPE_CONVERTERS.get(this.this) 4491 if converter: 4492 this = converter(t.cast(exp.DataType, this)) 4493 4494 return this 4495 4496 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 4497 index = self._index 4498 4499 if ( 4500 self._curr 4501 and self._next 4502 and self._curr.token_type in self.TYPE_TOKENS 4503 and self._next.token_type in self.TYPE_TOKENS 4504 ): 4505 # Takes care of special cases like `STRUCT<list ARRAY<...>>` where the identifier is also a 4506 # type token. Without this, the list will be parsed as a type and we'll eventually crash 4507 this = self._parse_id_var() 4508 else: 4509 this = ( 4510 self._parse_type(parse_interval=False, fallback_to_identifier=True) 4511 or self._parse_id_var() 4512 ) 4513 4514 self._match(TokenType.COLON) 4515 4516 if ( 4517 type_required 4518 and not isinstance(this, exp.DataType) 4519 and not self._match_set(self.TYPE_TOKENS, advance=False) 4520 ): 4521 self._retreat(index) 4522 return self._parse_types() 4523 4524 return self._parse_column_def(this) 4525 4526 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4527 if not self._match_text_seq("AT", "TIME", "ZONE"): 4528 return this 4529 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 4530 4531 def _parse_column(self) -> t.Optional[exp.Expression]: 4532 this = self._parse_column_reference() 4533 column = self._parse_column_ops(this) if this else self._parse_bracket(this) 4534 4535 if self.dialect.SUPPORTS_COLUMN_JOIN_MARKS and column: 4536 column.set("join_mark", self._match(TokenType.JOIN_MARKER)) 4537 4538 return column 4539 4540 def _parse_column_reference(self) -> t.Optional[exp.Expression]: 4541 this = self._parse_field() 4542 if ( 4543 not this 4544 and self._match(TokenType.VALUES, advance=False) 4545 and self.VALUES_FOLLOWED_BY_PAREN 4546 and (not self._next or self._next.token_type != TokenType.L_PAREN) 4547 ): 4548 this = self._parse_id_var() 4549 4550 if isinstance(this, exp.Identifier): 4551 # We bubble up comments from the Identifier to the Column 4552 this = self.expression(exp.Column, comments=this.pop_comments(), this=this) 4553 4554 return this 4555 4556 def _parse_colon_as_json_extract( 4557 self, this: t.Optional[exp.Expression] 4558 ) -> t.Optional[exp.Expression]: 4559 casts = [] 4560 json_path = [] 4561 4562 while self._match(TokenType.COLON): 4563 start_index = self._index 4564 4565 # Snowflake allows reserved keywords as json keys but advance_any() excludes TokenType.SELECT from any_tokens=True 4566 path = self._parse_column_ops( 4567 self._parse_field(any_token=True, tokens=(TokenType.SELECT,)) 4568 ) 4569 4570 # The cast :: operator has a lower precedence than the extraction operator :, so 4571 # we rearrange the AST appropriately to avoid casting the JSON path 4572 while isinstance(path, exp.Cast): 4573 casts.append(path.to) 4574 path = path.this 4575 4576 if casts: 4577 dcolon_offset = next( 4578 i 4579 for i, t in enumerate(self._tokens[start_index:]) 4580 if t.token_type == TokenType.DCOLON 4581 ) 4582 end_token = self._tokens[start_index + dcolon_offset - 1] 4583 else: 4584 end_token = self._prev 4585 4586 if path: 4587 json_path.append(self._find_sql(self._tokens[start_index], end_token)) 4588 4589 if json_path: 4590 this = self.expression( 4591 exp.JSONExtract, 4592 this=this, 4593 expression=self.dialect.to_json_path(exp.Literal.string(".".join(json_path))), 4594 ) 4595 4596 while casts: 4597 this = self.expression(exp.Cast, this=this, to=casts.pop()) 4598 4599 return this 4600 4601 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4602 this = self._parse_bracket(this) 4603 4604 while self._match_set(self.COLUMN_OPERATORS): 4605 op_token = self._prev.token_type 4606 op = self.COLUMN_OPERATORS.get(op_token) 4607 4608 if op_token == TokenType.DCOLON: 4609 field = self._parse_types() 4610 if not field: 4611 self.raise_error("Expected type") 4612 elif op and self._curr: 4613 field = self._parse_column_reference() 4614 else: 4615 field = self._parse_field(any_token=True, anonymous_func=True) 4616 4617 if isinstance(field, exp.Func) and this: 4618 # bigquery allows function calls like x.y.count(...) 4619 # SAFE.SUBSTR(...) 4620 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 4621 this = exp.replace_tree( 4622 this, 4623 lambda n: ( 4624 self.expression(exp.Dot, this=n.args.get("table"), expression=n.this) 4625 if n.table 4626 else n.this 4627 ) 4628 if isinstance(n, exp.Column) 4629 else n, 4630 ) 4631 4632 if op: 4633 this = op(self, this, field) 4634 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 4635 this = self.expression( 4636 exp.Column, 4637 this=field, 4638 table=this.this, 4639 db=this.args.get("table"), 4640 catalog=this.args.get("db"), 4641 ) 4642 else: 4643 this = self.expression(exp.Dot, this=this, expression=field) 4644 4645 this = self._parse_bracket(this) 4646 4647 return self._parse_colon_as_json_extract(this) if self.COLON_IS_JSON_EXTRACT else this 4648 4649 def _parse_primary(self) -> t.Optional[exp.Expression]: 4650 if self._match_set(self.PRIMARY_PARSERS): 4651 token_type = self._prev.token_type 4652 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 4653 4654 if token_type == TokenType.STRING: 4655 expressions = [primary] 4656 while self._match(TokenType.STRING): 4657 expressions.append(exp.Literal.string(self._prev.text)) 4658 4659 if len(expressions) > 1: 4660 return self.expression(exp.Concat, expressions=expressions) 4661 4662 return primary 4663 4664 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 4665 return exp.Literal.number(f"0.{self._prev.text}") 4666 4667 if self._match(TokenType.L_PAREN): 4668 comments = self._prev_comments 4669 query = self._parse_select() 4670 4671 if query: 4672 expressions = [query] 4673 else: 4674 expressions = self._parse_expressions() 4675 4676 this = self._parse_query_modifiers(seq_get(expressions, 0)) 4677 4678 if not this and self._match(TokenType.R_PAREN, advance=False): 4679 this = self.expression(exp.Tuple) 4680 elif isinstance(this, exp.UNWRAPPED_QUERIES): 4681 this = self._parse_subquery(this=this, parse_alias=False) 4682 elif isinstance(this, exp.Subquery): 4683 this = self._parse_subquery( 4684 this=self._parse_set_operations(this), parse_alias=False 4685 ) 4686 elif len(expressions) > 1 or self._prev.token_type == TokenType.COMMA: 4687 this = self.expression(exp.Tuple, expressions=expressions) 4688 else: 4689 this = self.expression(exp.Paren, this=this) 4690 4691 if this: 4692 this.add_comments(comments) 4693 4694 self._match_r_paren(expression=this) 4695 return this 4696 4697 return None 4698 4699 def _parse_field( 4700 self, 4701 any_token: bool = False, 4702 tokens: t.Optional[t.Collection[TokenType]] = None, 4703 anonymous_func: bool = False, 4704 ) -> t.Optional[exp.Expression]: 4705 if anonymous_func: 4706 field = ( 4707 self._parse_function(anonymous=anonymous_func, any_token=any_token) 4708 or self._parse_primary() 4709 ) 4710 else: 4711 field = self._parse_primary() or self._parse_function( 4712 anonymous=anonymous_func, any_token=any_token 4713 ) 4714 return field or self._parse_id_var(any_token=any_token, tokens=tokens) 4715 4716 def _parse_function( 4717 self, 4718 functions: t.Optional[t.Dict[str, t.Callable]] = None, 4719 anonymous: bool = False, 4720 optional_parens: bool = True, 4721 any_token: bool = False, 4722 ) -> t.Optional[exp.Expression]: 4723 # This allows us to also parse {fn <function>} syntax (Snowflake, MySQL support this) 4724 # See: https://community.snowflake.com/s/article/SQL-Escape-Sequences 4725 fn_syntax = False 4726 if ( 4727 self._match(TokenType.L_BRACE, advance=False) 4728 and self._next 4729 and self._next.text.upper() == "FN" 4730 ): 4731 self._advance(2) 4732 fn_syntax = True 4733 4734 func = self._parse_function_call( 4735 functions=functions, 4736 anonymous=anonymous, 4737 optional_parens=optional_parens, 4738 any_token=any_token, 4739 ) 4740 4741 if fn_syntax: 4742 self._match(TokenType.R_BRACE) 4743 4744 return func 4745 4746 def _parse_function_call( 4747 self, 4748 functions: t.Optional[t.Dict[str, t.Callable]] = None, 4749 anonymous: bool = False, 4750 optional_parens: bool = True, 4751 any_token: bool = False, 4752 ) -> t.Optional[exp.Expression]: 4753 if not self._curr: 4754 return None 4755 4756 comments = self._curr.comments 4757 token_type = self._curr.token_type 4758 this = self._curr.text 4759 upper = this.upper() 4760 4761 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 4762 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 4763 self._advance() 4764 return self._parse_window(parser(self)) 4765 4766 if not self._next or self._next.token_type != TokenType.L_PAREN: 4767 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 4768 self._advance() 4769 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 4770 4771 return None 4772 4773 if any_token: 4774 if token_type in self.RESERVED_TOKENS: 4775 return None 4776 elif token_type not in self.FUNC_TOKENS: 4777 return None 4778 4779 self._advance(2) 4780 4781 parser = self.FUNCTION_PARSERS.get(upper) 4782 if parser and not anonymous: 4783 this = parser(self) 4784 else: 4785 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 4786 4787 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 4788 this = self.expression(subquery_predicate, this=self._parse_select()) 4789 self._match_r_paren() 4790 return this 4791 4792 if functions is None: 4793 functions = self.FUNCTIONS 4794 4795 function = functions.get(upper) 4796 4797 alias = upper in self.FUNCTIONS_WITH_ALIASED_ARGS 4798 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 4799 4800 if alias: 4801 args = self._kv_to_prop_eq(args) 4802 4803 if function and not anonymous: 4804 if "dialect" in function.__code__.co_varnames: 4805 func = function(args, dialect=self.dialect) 4806 else: 4807 func = function(args) 4808 4809 func = self.validate_expression(func, args) 4810 if not self.dialect.NORMALIZE_FUNCTIONS: 4811 func.meta["name"] = this 4812 4813 this = func 4814 else: 4815 if token_type == TokenType.IDENTIFIER: 4816 this = exp.Identifier(this=this, quoted=True) 4817 this = self.expression(exp.Anonymous, this=this, expressions=args) 4818 4819 if isinstance(this, exp.Expression): 4820 this.add_comments(comments) 4821 4822 self._match_r_paren(this) 4823 return self._parse_window(this) 4824 4825 def _kv_to_prop_eq(self, expressions: t.List[exp.Expression]) -> t.List[exp.Expression]: 4826 transformed = [] 4827 4828 for e in expressions: 4829 if isinstance(e, self.KEY_VALUE_DEFINITIONS): 4830 if isinstance(e, exp.Alias): 4831 e = self.expression(exp.PropertyEQ, this=e.args.get("alias"), expression=e.this) 4832 4833 if not isinstance(e, exp.PropertyEQ): 4834 e = self.expression( 4835 exp.PropertyEQ, this=exp.to_identifier(e.this.name), expression=e.expression 4836 ) 4837 4838 if isinstance(e.this, exp.Column): 4839 e.this.replace(e.this.this) 4840 4841 transformed.append(e) 4842 4843 return transformed 4844 4845 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 4846 return self._parse_column_def(self._parse_id_var()) 4847 4848 def _parse_user_defined_function( 4849 self, kind: t.Optional[TokenType] = None 4850 ) -> t.Optional[exp.Expression]: 4851 this = self._parse_id_var() 4852 4853 while self._match(TokenType.DOT): 4854 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 4855 4856 if not self._match(TokenType.L_PAREN): 4857 return this 4858 4859 expressions = self._parse_csv(self._parse_function_parameter) 4860 self._match_r_paren() 4861 return self.expression( 4862 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 4863 ) 4864 4865 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 4866 literal = self._parse_primary() 4867 if literal: 4868 return self.expression(exp.Introducer, this=token.text, expression=literal) 4869 4870 return self.expression(exp.Identifier, this=token.text) 4871 4872 def _parse_session_parameter(self) -> exp.SessionParameter: 4873 kind = None 4874 this = self._parse_id_var() or self._parse_primary() 4875 4876 if this and self._match(TokenType.DOT): 4877 kind = this.name 4878 this = self._parse_var() or self._parse_primary() 4879 4880 return self.expression(exp.SessionParameter, this=this, kind=kind) 4881 4882 def _parse_lambda_arg(self) -> t.Optional[exp.Expression]: 4883 return self._parse_id_var() 4884 4885 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 4886 index = self._index 4887 4888 if self._match(TokenType.L_PAREN): 4889 expressions = t.cast( 4890 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_lambda_arg) 4891 ) 4892 4893 if not self._match(TokenType.R_PAREN): 4894 self._retreat(index) 4895 else: 4896 expressions = [self._parse_lambda_arg()] 4897 4898 if self._match_set(self.LAMBDAS): 4899 return self.LAMBDAS[self._prev.token_type](self, expressions) 4900 4901 self._retreat(index) 4902 4903 this: t.Optional[exp.Expression] 4904 4905 if self._match(TokenType.DISTINCT): 4906 this = self.expression( 4907 exp.Distinct, expressions=self._parse_csv(self._parse_assignment) 4908 ) 4909 else: 4910 this = self._parse_select_or_expression(alias=alias) 4911 4912 return self._parse_limit( 4913 self._parse_order(self._parse_having_max(self._parse_respect_or_ignore_nulls(this))) 4914 ) 4915 4916 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4917 index = self._index 4918 if not self._match(TokenType.L_PAREN): 4919 return this 4920 4921 # Disambiguate between schema and subquery/CTE, e.g. in INSERT INTO table (<expr>), 4922 # expr can be of both types 4923 if self._match_set(self.SELECT_START_TOKENS): 4924 self._retreat(index) 4925 return this 4926 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 4927 self._match_r_paren() 4928 return self.expression(exp.Schema, this=this, expressions=args) 4929 4930 def _parse_field_def(self) -> t.Optional[exp.Expression]: 4931 return self._parse_column_def(self._parse_field(any_token=True)) 4932 4933 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4934 # column defs are not really columns, they're identifiers 4935 if isinstance(this, exp.Column): 4936 this = this.this 4937 4938 kind = self._parse_types(schema=True) 4939 4940 if self._match_text_seq("FOR", "ORDINALITY"): 4941 return self.expression(exp.ColumnDef, this=this, ordinality=True) 4942 4943 constraints: t.List[exp.Expression] = [] 4944 4945 if (not kind and self._match(TokenType.ALIAS)) or self._match_texts( 4946 ("ALIAS", "MATERIALIZED") 4947 ): 4948 persisted = self._prev.text.upper() == "MATERIALIZED" 4949 constraints.append( 4950 self.expression( 4951 exp.ComputedColumnConstraint, 4952 this=self._parse_assignment(), 4953 persisted=persisted or self._match_text_seq("PERSISTED"), 4954 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 4955 ) 4956 ) 4957 elif kind and self._match_pair(TokenType.ALIAS, TokenType.L_PAREN, advance=False): 4958 self._match(TokenType.ALIAS) 4959 constraints.append( 4960 self.expression(exp.TransformColumnConstraint, this=self._parse_field()) 4961 ) 4962 4963 while True: 4964 constraint = self._parse_column_constraint() 4965 if not constraint: 4966 break 4967 constraints.append(constraint) 4968 4969 if not kind and not constraints: 4970 return this 4971 4972 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 4973 4974 def _parse_auto_increment( 4975 self, 4976 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 4977 start = None 4978 increment = None 4979 4980 if self._match(TokenType.L_PAREN, advance=False): 4981 args = self._parse_wrapped_csv(self._parse_bitwise) 4982 start = seq_get(args, 0) 4983 increment = seq_get(args, 1) 4984 elif self._match_text_seq("START"): 4985 start = self._parse_bitwise() 4986 self._match_text_seq("INCREMENT") 4987 increment = self._parse_bitwise() 4988 4989 if start and increment: 4990 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 4991 4992 return exp.AutoIncrementColumnConstraint() 4993 4994 def _parse_auto_property(self) -> t.Optional[exp.AutoRefreshProperty]: 4995 if not self._match_text_seq("REFRESH"): 4996 self._retreat(self._index - 1) 4997 return None 4998 return self.expression(exp.AutoRefreshProperty, this=self._parse_var(upper=True)) 4999 5000 def _parse_compress(self) -> exp.CompressColumnConstraint: 5001 if self._match(TokenType.L_PAREN, advance=False): 5002 return self.expression( 5003 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 5004 ) 5005 5006 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 5007 5008 def _parse_generated_as_identity( 5009 self, 5010 ) -> ( 5011 exp.GeneratedAsIdentityColumnConstraint 5012 | exp.ComputedColumnConstraint 5013 | exp.GeneratedAsRowColumnConstraint 5014 ): 5015 if self._match_text_seq("BY", "DEFAULT"): 5016 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 5017 this = self.expression( 5018 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 5019 ) 5020 else: 5021 self._match_text_seq("ALWAYS") 5022 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 5023 5024 self._match(TokenType.ALIAS) 5025 5026 if self._match_text_seq("ROW"): 5027 start = self._match_text_seq("START") 5028 if not start: 5029 self._match(TokenType.END) 5030 hidden = self._match_text_seq("HIDDEN") 5031 return self.expression(exp.GeneratedAsRowColumnConstraint, start=start, hidden=hidden) 5032 5033 identity = self._match_text_seq("IDENTITY") 5034 5035 if self._match(TokenType.L_PAREN): 5036 if self._match(TokenType.START_WITH): 5037 this.set("start", self._parse_bitwise()) 5038 if self._match_text_seq("INCREMENT", "BY"): 5039 this.set("increment", self._parse_bitwise()) 5040 if self._match_text_seq("MINVALUE"): 5041 this.set("minvalue", self._parse_bitwise()) 5042 if self._match_text_seq("MAXVALUE"): 5043 this.set("maxvalue", self._parse_bitwise()) 5044 5045 if self._match_text_seq("CYCLE"): 5046 this.set("cycle", True) 5047 elif self._match_text_seq("NO", "CYCLE"): 5048 this.set("cycle", False) 5049 5050 if not identity: 5051 this.set("expression", self._parse_range()) 5052 elif not this.args.get("start") and self._match(TokenType.NUMBER, advance=False): 5053 args = self._parse_csv(self._parse_bitwise) 5054 this.set("start", seq_get(args, 0)) 5055 this.set("increment", seq_get(args, 1)) 5056 5057 self._match_r_paren() 5058 5059 return this 5060 5061 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 5062 self._match_text_seq("LENGTH") 5063 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 5064 5065 def _parse_not_constraint(self) -> t.Optional[exp.Expression]: 5066 if self._match_text_seq("NULL"): 5067 return self.expression(exp.NotNullColumnConstraint) 5068 if self._match_text_seq("CASESPECIFIC"): 5069 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 5070 if self._match_text_seq("FOR", "REPLICATION"): 5071 return self.expression(exp.NotForReplicationColumnConstraint) 5072 return None 5073 5074 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 5075 if self._match(TokenType.CONSTRAINT): 5076 this = self._parse_id_var() 5077 else: 5078 this = None 5079 5080 if self._match_texts(self.CONSTRAINT_PARSERS): 5081 return self.expression( 5082 exp.ColumnConstraint, 5083 this=this, 5084 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 5085 ) 5086 5087 return this 5088 5089 def _parse_constraint(self) -> t.Optional[exp.Expression]: 5090 if not self._match(TokenType.CONSTRAINT): 5091 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 5092 5093 return self.expression( 5094 exp.Constraint, 5095 this=self._parse_id_var(), 5096 expressions=self._parse_unnamed_constraints(), 5097 ) 5098 5099 def _parse_unnamed_constraints(self) -> t.List[exp.Expression]: 5100 constraints = [] 5101 while True: 5102 constraint = self._parse_unnamed_constraint() or self._parse_function() 5103 if not constraint: 5104 break 5105 constraints.append(constraint) 5106 5107 return constraints 5108 5109 def _parse_unnamed_constraint( 5110 self, constraints: t.Optional[t.Collection[str]] = None 5111 ) -> t.Optional[exp.Expression]: 5112 if self._match(TokenType.IDENTIFIER, advance=False) or not self._match_texts( 5113 constraints or self.CONSTRAINT_PARSERS 5114 ): 5115 return None 5116 5117 constraint = self._prev.text.upper() 5118 if constraint not in self.CONSTRAINT_PARSERS: 5119 self.raise_error(f"No parser found for schema constraint {constraint}.") 5120 5121 return self.CONSTRAINT_PARSERS[constraint](self) 5122 5123 def _parse_unique(self) -> exp.UniqueColumnConstraint: 5124 self._match_text_seq("KEY") 5125 return self.expression( 5126 exp.UniqueColumnConstraint, 5127 this=self._parse_schema(self._parse_id_var(any_token=False)), 5128 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 5129 on_conflict=self._parse_on_conflict(), 5130 ) 5131 5132 def _parse_key_constraint_options(self) -> t.List[str]: 5133 options = [] 5134 while True: 5135 if not self._curr: 5136 break 5137 5138 if self._match(TokenType.ON): 5139 action = None 5140 on = self._advance_any() and self._prev.text 5141 5142 if self._match_text_seq("NO", "ACTION"): 5143 action = "NO ACTION" 5144 elif self._match_text_seq("CASCADE"): 5145 action = "CASCADE" 5146 elif self._match_text_seq("RESTRICT"): 5147 action = "RESTRICT" 5148 elif self._match_pair(TokenType.SET, TokenType.NULL): 5149 action = "SET NULL" 5150 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 5151 action = "SET DEFAULT" 5152 else: 5153 self.raise_error("Invalid key constraint") 5154 5155 options.append(f"ON {on} {action}") 5156 elif self._match_text_seq("NOT", "ENFORCED"): 5157 options.append("NOT ENFORCED") 5158 elif self._match_text_seq("DEFERRABLE"): 5159 options.append("DEFERRABLE") 5160 elif self._match_text_seq("INITIALLY", "DEFERRED"): 5161 options.append("INITIALLY DEFERRED") 5162 elif self._match_text_seq("NORELY"): 5163 options.append("NORELY") 5164 elif self._match_text_seq("MATCH", "FULL"): 5165 options.append("MATCH FULL") 5166 else: 5167 break 5168 5169 return options 5170 5171 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 5172 if match and not self._match(TokenType.REFERENCES): 5173 return None 5174 5175 expressions = None 5176 this = self._parse_table(schema=True) 5177 options = self._parse_key_constraint_options() 5178 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 5179 5180 def _parse_foreign_key(self) -> exp.ForeignKey: 5181 expressions = self._parse_wrapped_id_vars() 5182 reference = self._parse_references() 5183 options = {} 5184 5185 while self._match(TokenType.ON): 5186 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 5187 self.raise_error("Expected DELETE or UPDATE") 5188 5189 kind = self._prev.text.lower() 5190 5191 if self._match_text_seq("NO", "ACTION"): 5192 action = "NO ACTION" 5193 elif self._match(TokenType.SET): 5194 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 5195 action = "SET " + self._prev.text.upper() 5196 else: 5197 self._advance() 5198 action = self._prev.text.upper() 5199 5200 options[kind] = action 5201 5202 return self.expression( 5203 exp.ForeignKey, 5204 expressions=expressions, 5205 reference=reference, 5206 **options, # type: ignore 5207 ) 5208 5209 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 5210 return self._parse_field() 5211 5212 def _parse_period_for_system_time(self) -> t.Optional[exp.PeriodForSystemTimeConstraint]: 5213 if not self._match(TokenType.TIMESTAMP_SNAPSHOT): 5214 self._retreat(self._index - 1) 5215 return None 5216 5217 id_vars = self._parse_wrapped_id_vars() 5218 return self.expression( 5219 exp.PeriodForSystemTimeConstraint, 5220 this=seq_get(id_vars, 0), 5221 expression=seq_get(id_vars, 1), 5222 ) 5223 5224 def _parse_primary_key( 5225 self, wrapped_optional: bool = False, in_props: bool = False 5226 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 5227 desc = ( 5228 self._match_set((TokenType.ASC, TokenType.DESC)) 5229 and self._prev.token_type == TokenType.DESC 5230 ) 5231 5232 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 5233 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 5234 5235 expressions = self._parse_wrapped_csv( 5236 self._parse_primary_key_part, optional=wrapped_optional 5237 ) 5238 options = self._parse_key_constraint_options() 5239 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 5240 5241 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 5242 return self._parse_slice(self._parse_alias(self._parse_assignment(), explicit=True)) 5243 5244 def _parse_bracket(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 5245 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 5246 return this 5247 5248 bracket_kind = self._prev.token_type 5249 expressions = self._parse_csv( 5250 lambda: self._parse_bracket_key_value(is_map=bracket_kind == TokenType.L_BRACE) 5251 ) 5252 5253 if bracket_kind == TokenType.L_BRACKET and not self._match(TokenType.R_BRACKET): 5254 self.raise_error("Expected ]") 5255 elif bracket_kind == TokenType.L_BRACE and not self._match(TokenType.R_BRACE): 5256 self.raise_error("Expected }") 5257 5258 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 5259 if bracket_kind == TokenType.L_BRACE: 5260 this = self.expression(exp.Struct, expressions=self._kv_to_prop_eq(expressions)) 5261 elif not this: 5262 this = self.expression(exp.Array, expressions=expressions) 5263 else: 5264 constructor_type = self.ARRAY_CONSTRUCTORS.get(this.name.upper()) 5265 if constructor_type: 5266 return self.expression(constructor_type, expressions=expressions) 5267 5268 expressions = apply_index_offset(this, expressions, -self.dialect.INDEX_OFFSET) 5269 this = self.expression(exp.Bracket, this=this, expressions=expressions) 5270 5271 self._add_comments(this) 5272 return self._parse_bracket(this) 5273 5274 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5275 if self._match(TokenType.COLON): 5276 return self.expression(exp.Slice, this=this, expression=self._parse_assignment()) 5277 return this 5278 5279 def _parse_case(self) -> t.Optional[exp.Expression]: 5280 ifs = [] 5281 default = None 5282 5283 comments = self._prev_comments 5284 expression = self._parse_assignment() 5285 5286 while self._match(TokenType.WHEN): 5287 this = self._parse_assignment() 5288 self._match(TokenType.THEN) 5289 then = self._parse_assignment() 5290 ifs.append(self.expression(exp.If, this=this, true=then)) 5291 5292 if self._match(TokenType.ELSE): 5293 default = self._parse_assignment() 5294 5295 if not self._match(TokenType.END): 5296 if isinstance(default, exp.Interval) and default.this.sql().upper() == "END": 5297 default = exp.column("interval") 5298 else: 5299 self.raise_error("Expected END after CASE", self._prev) 5300 5301 return self.expression( 5302 exp.Case, comments=comments, this=expression, ifs=ifs, default=default 5303 ) 5304 5305 def _parse_if(self) -> t.Optional[exp.Expression]: 5306 if self._match(TokenType.L_PAREN): 5307 args = self._parse_csv(self._parse_assignment) 5308 this = self.validate_expression(exp.If.from_arg_list(args), args) 5309 self._match_r_paren() 5310 else: 5311 index = self._index - 1 5312 5313 if self.NO_PAREN_IF_COMMANDS and index == 0: 5314 return self._parse_as_command(self._prev) 5315 5316 condition = self._parse_assignment() 5317 5318 if not condition: 5319 self._retreat(index) 5320 return None 5321 5322 self._match(TokenType.THEN) 5323 true = self._parse_assignment() 5324 false = self._parse_assignment() if self._match(TokenType.ELSE) else None 5325 self._match(TokenType.END) 5326 this = self.expression(exp.If, this=condition, true=true, false=false) 5327 5328 return this 5329 5330 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 5331 if not self._match_text_seq("VALUE", "FOR"): 5332 self._retreat(self._index - 1) 5333 return None 5334 5335 return self.expression( 5336 exp.NextValueFor, 5337 this=self._parse_column(), 5338 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 5339 ) 5340 5341 def _parse_extract(self) -> exp.Extract: 5342 this = self._parse_function() or self._parse_var_or_string(upper=True) 5343 5344 if self._match(TokenType.FROM): 5345 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 5346 5347 if not self._match(TokenType.COMMA): 5348 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 5349 5350 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 5351 5352 def _parse_gap_fill(self) -> exp.GapFill: 5353 self._match(TokenType.TABLE) 5354 this = self._parse_table() 5355 5356 self._match(TokenType.COMMA) 5357 args = [this, *self._parse_csv(self._parse_lambda)] 5358 5359 gap_fill = exp.GapFill.from_arg_list(args) 5360 return self.validate_expression(gap_fill, args) 5361 5362 def _parse_cast(self, strict: bool, safe: t.Optional[bool] = None) -> exp.Expression: 5363 this = self._parse_assignment() 5364 5365 if not self._match(TokenType.ALIAS): 5366 if self._match(TokenType.COMMA): 5367 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 5368 5369 self.raise_error("Expected AS after CAST") 5370 5371 fmt = None 5372 to = self._parse_types() 5373 5374 if self._match(TokenType.FORMAT): 5375 fmt_string = self._parse_string() 5376 fmt = self._parse_at_time_zone(fmt_string) 5377 5378 if not to: 5379 to = exp.DataType.build(exp.DataType.Type.UNKNOWN) 5380 if to.this in exp.DataType.TEMPORAL_TYPES: 5381 this = self.expression( 5382 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 5383 this=this, 5384 format=exp.Literal.string( 5385 format_time( 5386 fmt_string.this if fmt_string else "", 5387 self.dialect.FORMAT_MAPPING or self.dialect.TIME_MAPPING, 5388 self.dialect.FORMAT_TRIE or self.dialect.TIME_TRIE, 5389 ) 5390 ), 5391 safe=safe, 5392 ) 5393 5394 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 5395 this.set("zone", fmt.args["zone"]) 5396 return this 5397 elif not to: 5398 self.raise_error("Expected TYPE after CAST") 5399 elif isinstance(to, exp.Identifier): 5400 to = exp.DataType.build(to.name, udt=True) 5401 elif to.this == exp.DataType.Type.CHAR: 5402 if self._match(TokenType.CHARACTER_SET): 5403 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 5404 5405 return self.expression( 5406 exp.Cast if strict else exp.TryCast, 5407 this=this, 5408 to=to, 5409 format=fmt, 5410 safe=safe, 5411 action=self._parse_var_from_options(self.CAST_ACTIONS, raise_unmatched=False), 5412 ) 5413 5414 def _parse_string_agg(self) -> exp.Expression: 5415 if self._match(TokenType.DISTINCT): 5416 args: t.List[t.Optional[exp.Expression]] = [ 5417 self.expression(exp.Distinct, expressions=[self._parse_assignment()]) 5418 ] 5419 if self._match(TokenType.COMMA): 5420 args.extend(self._parse_csv(self._parse_assignment)) 5421 else: 5422 args = self._parse_csv(self._parse_assignment) # type: ignore 5423 5424 index = self._index 5425 if not self._match(TokenType.R_PAREN) and args: 5426 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 5427 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 5428 args[-1] = self._parse_limit(this=self._parse_order(this=args[-1])) 5429 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 5430 5431 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 5432 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 5433 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 5434 if not self._match_text_seq("WITHIN", "GROUP"): 5435 self._retreat(index) 5436 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 5437 5438 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 5439 order = self._parse_order(this=seq_get(args, 0)) 5440 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 5441 5442 def _parse_convert( 5443 self, strict: bool, safe: t.Optional[bool] = None 5444 ) -> t.Optional[exp.Expression]: 5445 this = self._parse_bitwise() 5446 5447 if self._match(TokenType.USING): 5448 to: t.Optional[exp.Expression] = self.expression( 5449 exp.CharacterSet, this=self._parse_var() 5450 ) 5451 elif self._match(TokenType.COMMA): 5452 to = self._parse_types() 5453 else: 5454 to = None 5455 5456 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, safe=safe) 5457 5458 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 5459 """ 5460 There are generally two variants of the DECODE function: 5461 5462 - DECODE(bin, charset) 5463 - DECODE(expression, search, result [, search, result] ... [, default]) 5464 5465 The second variant will always be parsed into a CASE expression. Note that NULL 5466 needs special treatment, since we need to explicitly check for it with `IS NULL`, 5467 instead of relying on pattern matching. 5468 """ 5469 args = self._parse_csv(self._parse_assignment) 5470 5471 if len(args) < 3: 5472 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 5473 5474 expression, *expressions = args 5475 if not expression: 5476 return None 5477 5478 ifs = [] 5479 for search, result in zip(expressions[::2], expressions[1::2]): 5480 if not search or not result: 5481 return None 5482 5483 if isinstance(search, exp.Literal): 5484 ifs.append( 5485 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 5486 ) 5487 elif isinstance(search, exp.Null): 5488 ifs.append( 5489 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 5490 ) 5491 else: 5492 cond = exp.or_( 5493 exp.EQ(this=expression.copy(), expression=search), 5494 exp.and_( 5495 exp.Is(this=expression.copy(), expression=exp.Null()), 5496 exp.Is(this=search.copy(), expression=exp.Null()), 5497 copy=False, 5498 ), 5499 copy=False, 5500 ) 5501 ifs.append(exp.If(this=cond, true=result)) 5502 5503 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 5504 5505 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 5506 self._match_text_seq("KEY") 5507 key = self._parse_column() 5508 self._match_set(self.JSON_KEY_VALUE_SEPARATOR_TOKENS) 5509 self._match_text_seq("VALUE") 5510 value = self._parse_bitwise() 5511 5512 if not key and not value: 5513 return None 5514 return self.expression(exp.JSONKeyValue, this=key, expression=value) 5515 5516 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5517 if not this or not self._match_text_seq("FORMAT", "JSON"): 5518 return this 5519 5520 return self.expression(exp.FormatJson, this=this) 5521 5522 def _parse_on_handling(self, on: str, *values: str) -> t.Optional[str]: 5523 # Parses the "X ON Y" syntax, i.e. NULL ON NULL (Oracle, T-SQL) 5524 for value in values: 5525 if self._match_text_seq(value, "ON", on): 5526 return f"{value} ON {on}" 5527 5528 return None 5529 5530 @t.overload 5531 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 5532 5533 @t.overload 5534 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 5535 5536 def _parse_json_object(self, agg=False): 5537 star = self._parse_star() 5538 expressions = ( 5539 [star] 5540 if star 5541 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 5542 ) 5543 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 5544 5545 unique_keys = None 5546 if self._match_text_seq("WITH", "UNIQUE"): 5547 unique_keys = True 5548 elif self._match_text_seq("WITHOUT", "UNIQUE"): 5549 unique_keys = False 5550 5551 self._match_text_seq("KEYS") 5552 5553 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 5554 self._parse_type() 5555 ) 5556 encoding = self._match_text_seq("ENCODING") and self._parse_var() 5557 5558 return self.expression( 5559 exp.JSONObjectAgg if agg else exp.JSONObject, 5560 expressions=expressions, 5561 null_handling=null_handling, 5562 unique_keys=unique_keys, 5563 return_type=return_type, 5564 encoding=encoding, 5565 ) 5566 5567 # Note: this is currently incomplete; it only implements the "JSON_value_column" part 5568 def _parse_json_column_def(self) -> exp.JSONColumnDef: 5569 if not self._match_text_seq("NESTED"): 5570 this = self._parse_id_var() 5571 kind = self._parse_types(allow_identifiers=False) 5572 nested = None 5573 else: 5574 this = None 5575 kind = None 5576 nested = True 5577 5578 path = self._match_text_seq("PATH") and self._parse_string() 5579 nested_schema = nested and self._parse_json_schema() 5580 5581 return self.expression( 5582 exp.JSONColumnDef, 5583 this=this, 5584 kind=kind, 5585 path=path, 5586 nested_schema=nested_schema, 5587 ) 5588 5589 def _parse_json_schema(self) -> exp.JSONSchema: 5590 self._match_text_seq("COLUMNS") 5591 return self.expression( 5592 exp.JSONSchema, 5593 expressions=self._parse_wrapped_csv(self._parse_json_column_def, optional=True), 5594 ) 5595 5596 def _parse_json_table(self) -> exp.JSONTable: 5597 this = self._parse_format_json(self._parse_bitwise()) 5598 path = self._match(TokenType.COMMA) and self._parse_string() 5599 error_handling = self._parse_on_handling("ERROR", "ERROR", "NULL") 5600 empty_handling = self._parse_on_handling("EMPTY", "ERROR", "NULL") 5601 schema = self._parse_json_schema() 5602 5603 return exp.JSONTable( 5604 this=this, 5605 schema=schema, 5606 path=path, 5607 error_handling=error_handling, 5608 empty_handling=empty_handling, 5609 ) 5610 5611 def _parse_match_against(self) -> exp.MatchAgainst: 5612 expressions = self._parse_csv(self._parse_column) 5613 5614 self._match_text_seq(")", "AGAINST", "(") 5615 5616 this = self._parse_string() 5617 5618 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 5619 modifier = "IN NATURAL LANGUAGE MODE" 5620 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 5621 modifier = f"{modifier} WITH QUERY EXPANSION" 5622 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 5623 modifier = "IN BOOLEAN MODE" 5624 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 5625 modifier = "WITH QUERY EXPANSION" 5626 else: 5627 modifier = None 5628 5629 return self.expression( 5630 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 5631 ) 5632 5633 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 5634 def _parse_open_json(self) -> exp.OpenJSON: 5635 this = self._parse_bitwise() 5636 path = self._match(TokenType.COMMA) and self._parse_string() 5637 5638 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 5639 this = self._parse_field(any_token=True) 5640 kind = self._parse_types() 5641 path = self._parse_string() 5642 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 5643 5644 return self.expression( 5645 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 5646 ) 5647 5648 expressions = None 5649 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 5650 self._match_l_paren() 5651 expressions = self._parse_csv(_parse_open_json_column_def) 5652 5653 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 5654 5655 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 5656 args = self._parse_csv(self._parse_bitwise) 5657 5658 if self._match(TokenType.IN): 5659 return self.expression( 5660 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 5661 ) 5662 5663 if haystack_first: 5664 haystack = seq_get(args, 0) 5665 needle = seq_get(args, 1) 5666 else: 5667 needle = seq_get(args, 0) 5668 haystack = seq_get(args, 1) 5669 5670 return self.expression( 5671 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 5672 ) 5673 5674 def _parse_predict(self) -> exp.Predict: 5675 self._match_text_seq("MODEL") 5676 this = self._parse_table() 5677 5678 self._match(TokenType.COMMA) 5679 self._match_text_seq("TABLE") 5680 5681 return self.expression( 5682 exp.Predict, 5683 this=this, 5684 expression=self._parse_table(), 5685 params_struct=self._match(TokenType.COMMA) and self._parse_bitwise(), 5686 ) 5687 5688 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 5689 args = self._parse_csv(self._parse_table) 5690 return exp.JoinHint(this=func_name.upper(), expressions=args) 5691 5692 def _parse_substring(self) -> exp.Substring: 5693 # Postgres supports the form: substring(string [from int] [for int]) 5694 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 5695 5696 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 5697 5698 if self._match(TokenType.FROM): 5699 args.append(self._parse_bitwise()) 5700 if self._match(TokenType.FOR): 5701 if len(args) == 1: 5702 args.append(exp.Literal.number(1)) 5703 args.append(self._parse_bitwise()) 5704 5705 return self.validate_expression(exp.Substring.from_arg_list(args), args) 5706 5707 def _parse_trim(self) -> exp.Trim: 5708 # https://www.w3resource.com/sql/character-functions/trim.php 5709 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 5710 5711 position = None 5712 collation = None 5713 expression = None 5714 5715 if self._match_texts(self.TRIM_TYPES): 5716 position = self._prev.text.upper() 5717 5718 this = self._parse_bitwise() 5719 if self._match_set((TokenType.FROM, TokenType.COMMA)): 5720 invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST 5721 expression = self._parse_bitwise() 5722 5723 if invert_order: 5724 this, expression = expression, this 5725 5726 if self._match(TokenType.COLLATE): 5727 collation = self._parse_bitwise() 5728 5729 return self.expression( 5730 exp.Trim, this=this, position=position, expression=expression, collation=collation 5731 ) 5732 5733 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 5734 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 5735 5736 def _parse_named_window(self) -> t.Optional[exp.Expression]: 5737 return self._parse_window(self._parse_id_var(), alias=True) 5738 5739 def _parse_respect_or_ignore_nulls( 5740 self, this: t.Optional[exp.Expression] 5741 ) -> t.Optional[exp.Expression]: 5742 if self._match_text_seq("IGNORE", "NULLS"): 5743 return self.expression(exp.IgnoreNulls, this=this) 5744 if self._match_text_seq("RESPECT", "NULLS"): 5745 return self.expression(exp.RespectNulls, this=this) 5746 return this 5747 5748 def _parse_having_max(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5749 if self._match(TokenType.HAVING): 5750 self._match_texts(("MAX", "MIN")) 5751 max = self._prev.text.upper() != "MIN" 5752 return self.expression( 5753 exp.HavingMax, this=this, expression=self._parse_column(), max=max 5754 ) 5755 5756 return this 5757 5758 def _parse_window( 5759 self, this: t.Optional[exp.Expression], alias: bool = False 5760 ) -> t.Optional[exp.Expression]: 5761 func = this 5762 comments = func.comments if isinstance(func, exp.Expression) else None 5763 5764 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 5765 self._match(TokenType.WHERE) 5766 this = self.expression( 5767 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 5768 ) 5769 self._match_r_paren() 5770 5771 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 5772 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 5773 if self._match_text_seq("WITHIN", "GROUP"): 5774 order = self._parse_wrapped(self._parse_order) 5775 this = self.expression(exp.WithinGroup, this=this, expression=order) 5776 5777 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 5778 # Some dialects choose to implement and some do not. 5779 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 5780 5781 # There is some code above in _parse_lambda that handles 5782 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 5783 5784 # The below changes handle 5785 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 5786 5787 # Oracle allows both formats 5788 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 5789 # and Snowflake chose to do the same for familiarity 5790 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 5791 if isinstance(this, exp.AggFunc): 5792 ignore_respect = this.find(exp.IgnoreNulls, exp.RespectNulls) 5793 5794 if ignore_respect and ignore_respect is not this: 5795 ignore_respect.replace(ignore_respect.this) 5796 this = self.expression(ignore_respect.__class__, this=this) 5797 5798 this = self._parse_respect_or_ignore_nulls(this) 5799 5800 # bigquery select from window x AS (partition by ...) 5801 if alias: 5802 over = None 5803 self._match(TokenType.ALIAS) 5804 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 5805 return this 5806 else: 5807 over = self._prev.text.upper() 5808 5809 if comments and isinstance(func, exp.Expression): 5810 func.pop_comments() 5811 5812 if not self._match(TokenType.L_PAREN): 5813 return self.expression( 5814 exp.Window, 5815 comments=comments, 5816 this=this, 5817 alias=self._parse_id_var(False), 5818 over=over, 5819 ) 5820 5821 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 5822 5823 first = self._match(TokenType.FIRST) 5824 if self._match_text_seq("LAST"): 5825 first = False 5826 5827 partition, order = self._parse_partition_and_order() 5828 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 5829 5830 if kind: 5831 self._match(TokenType.BETWEEN) 5832 start = self._parse_window_spec() 5833 self._match(TokenType.AND) 5834 end = self._parse_window_spec() 5835 5836 spec = self.expression( 5837 exp.WindowSpec, 5838 kind=kind, 5839 start=start["value"], 5840 start_side=start["side"], 5841 end=end["value"], 5842 end_side=end["side"], 5843 ) 5844 else: 5845 spec = None 5846 5847 self._match_r_paren() 5848 5849 window = self.expression( 5850 exp.Window, 5851 comments=comments, 5852 this=this, 5853 partition_by=partition, 5854 order=order, 5855 spec=spec, 5856 alias=window_alias, 5857 over=over, 5858 first=first, 5859 ) 5860 5861 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 5862 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 5863 return self._parse_window(window, alias=alias) 5864 5865 return window 5866 5867 def _parse_partition_and_order( 5868 self, 5869 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 5870 return self._parse_partition_by(), self._parse_order() 5871 5872 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 5873 self._match(TokenType.BETWEEN) 5874 5875 return { 5876 "value": ( 5877 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 5878 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 5879 or self._parse_bitwise() 5880 ), 5881 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 5882 } 5883 5884 def _parse_alias( 5885 self, this: t.Optional[exp.Expression], explicit: bool = False 5886 ) -> t.Optional[exp.Expression]: 5887 any_token = self._match(TokenType.ALIAS) 5888 comments = self._prev_comments or [] 5889 5890 if explicit and not any_token: 5891 return this 5892 5893 if self._match(TokenType.L_PAREN): 5894 aliases = self.expression( 5895 exp.Aliases, 5896 comments=comments, 5897 this=this, 5898 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 5899 ) 5900 self._match_r_paren(aliases) 5901 return aliases 5902 5903 alias = self._parse_id_var(any_token, tokens=self.ALIAS_TOKENS) or ( 5904 self.STRING_ALIASES and self._parse_string_as_identifier() 5905 ) 5906 5907 if alias: 5908 comments.extend(alias.pop_comments()) 5909 this = self.expression(exp.Alias, comments=comments, this=this, alias=alias) 5910 column = this.this 5911 5912 # Moves the comment next to the alias in `expr /* comment */ AS alias` 5913 if not this.comments and column and column.comments: 5914 this.comments = column.pop_comments() 5915 5916 return this 5917 5918 def _parse_id_var( 5919 self, 5920 any_token: bool = True, 5921 tokens: t.Optional[t.Collection[TokenType]] = None, 5922 ) -> t.Optional[exp.Expression]: 5923 expression = self._parse_identifier() 5924 if not expression and ( 5925 (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS) 5926 ): 5927 quoted = self._prev.token_type == TokenType.STRING 5928 expression = self.expression(exp.Identifier, this=self._prev.text, quoted=quoted) 5929 5930 return expression 5931 5932 def _parse_string(self) -> t.Optional[exp.Expression]: 5933 if self._match_set(self.STRING_PARSERS): 5934 return self.STRING_PARSERS[self._prev.token_type](self, self._prev) 5935 return self._parse_placeholder() 5936 5937 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 5938 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 5939 5940 def _parse_number(self) -> t.Optional[exp.Expression]: 5941 if self._match_set(self.NUMERIC_PARSERS): 5942 return self.NUMERIC_PARSERS[self._prev.token_type](self, self._prev) 5943 return self._parse_placeholder() 5944 5945 def _parse_identifier(self) -> t.Optional[exp.Expression]: 5946 if self._match(TokenType.IDENTIFIER): 5947 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 5948 return self._parse_placeholder() 5949 5950 def _parse_var( 5951 self, 5952 any_token: bool = False, 5953 tokens: t.Optional[t.Collection[TokenType]] = None, 5954 upper: bool = False, 5955 ) -> t.Optional[exp.Expression]: 5956 if ( 5957 (any_token and self._advance_any()) 5958 or self._match(TokenType.VAR) 5959 or (self._match_set(tokens) if tokens else False) 5960 ): 5961 return self.expression( 5962 exp.Var, this=self._prev.text.upper() if upper else self._prev.text 5963 ) 5964 return self._parse_placeholder() 5965 5966 def _advance_any(self, ignore_reserved: bool = False) -> t.Optional[Token]: 5967 if self._curr and (ignore_reserved or self._curr.token_type not in self.RESERVED_TOKENS): 5968 self._advance() 5969 return self._prev 5970 return None 5971 5972 def _parse_var_or_string(self, upper: bool = False) -> t.Optional[exp.Expression]: 5973 return self._parse_string() or self._parse_var(any_token=True, upper=upper) 5974 5975 def _parse_primary_or_var(self) -> t.Optional[exp.Expression]: 5976 return self._parse_primary() or self._parse_var(any_token=True) 5977 5978 def _parse_null(self) -> t.Optional[exp.Expression]: 5979 if self._match_set(self.NULL_TOKENS): 5980 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 5981 return self._parse_placeholder() 5982 5983 def _parse_boolean(self) -> t.Optional[exp.Expression]: 5984 if self._match(TokenType.TRUE): 5985 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 5986 if self._match(TokenType.FALSE): 5987 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 5988 return self._parse_placeholder() 5989 5990 def _parse_star(self) -> t.Optional[exp.Expression]: 5991 if self._match(TokenType.STAR): 5992 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 5993 return self._parse_placeholder() 5994 5995 def _parse_parameter(self) -> exp.Parameter: 5996 this = self._parse_identifier() or self._parse_primary_or_var() 5997 return self.expression(exp.Parameter, this=this) 5998 5999 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 6000 if self._match_set(self.PLACEHOLDER_PARSERS): 6001 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 6002 if placeholder: 6003 return placeholder 6004 self._advance(-1) 6005 return None 6006 6007 def _parse_star_op(self, *keywords: str) -> t.Optional[t.List[exp.Expression]]: 6008 if not self._match_texts(keywords): 6009 return None 6010 if self._match(TokenType.L_PAREN, advance=False): 6011 return self._parse_wrapped_csv(self._parse_expression) 6012 6013 expression = self._parse_expression() 6014 return [expression] if expression else None 6015 6016 def _parse_csv( 6017 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 6018 ) -> t.List[exp.Expression]: 6019 parse_result = parse_method() 6020 items = [parse_result] if parse_result is not None else [] 6021 6022 while self._match(sep): 6023 self._add_comments(parse_result) 6024 parse_result = parse_method() 6025 if parse_result is not None: 6026 items.append(parse_result) 6027 6028 return items 6029 6030 def _parse_tokens( 6031 self, parse_method: t.Callable, expressions: t.Dict 6032 ) -> t.Optional[exp.Expression]: 6033 this = parse_method() 6034 6035 while self._match_set(expressions): 6036 this = self.expression( 6037 expressions[self._prev.token_type], 6038 this=this, 6039 comments=self._prev_comments, 6040 expression=parse_method(), 6041 ) 6042 6043 return this 6044 6045 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 6046 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 6047 6048 def _parse_wrapped_csv( 6049 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 6050 ) -> t.List[exp.Expression]: 6051 return self._parse_wrapped( 6052 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 6053 ) 6054 6055 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 6056 wrapped = self._match(TokenType.L_PAREN) 6057 if not wrapped and not optional: 6058 self.raise_error("Expecting (") 6059 parse_result = parse_method() 6060 if wrapped: 6061 self._match_r_paren() 6062 return parse_result 6063 6064 def _parse_expressions(self) -> t.List[exp.Expression]: 6065 return self._parse_csv(self._parse_expression) 6066 6067 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 6068 return self._parse_select() or self._parse_set_operations( 6069 self._parse_expression() if alias else self._parse_assignment() 6070 ) 6071 6072 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 6073 return self._parse_query_modifiers( 6074 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 6075 ) 6076 6077 def _parse_transaction(self) -> exp.Transaction | exp.Command: 6078 this = None 6079 if self._match_texts(self.TRANSACTION_KIND): 6080 this = self._prev.text 6081 6082 self._match_texts(("TRANSACTION", "WORK")) 6083 6084 modes = [] 6085 while True: 6086 mode = [] 6087 while self._match(TokenType.VAR): 6088 mode.append(self._prev.text) 6089 6090 if mode: 6091 modes.append(" ".join(mode)) 6092 if not self._match(TokenType.COMMA): 6093 break 6094 6095 return self.expression(exp.Transaction, this=this, modes=modes) 6096 6097 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 6098 chain = None 6099 savepoint = None 6100 is_rollback = self._prev.token_type == TokenType.ROLLBACK 6101 6102 self._match_texts(("TRANSACTION", "WORK")) 6103 6104 if self._match_text_seq("TO"): 6105 self._match_text_seq("SAVEPOINT") 6106 savepoint = self._parse_id_var() 6107 6108 if self._match(TokenType.AND): 6109 chain = not self._match_text_seq("NO") 6110 self._match_text_seq("CHAIN") 6111 6112 if is_rollback: 6113 return self.expression(exp.Rollback, savepoint=savepoint) 6114 6115 return self.expression(exp.Commit, chain=chain) 6116 6117 def _parse_refresh(self) -> exp.Refresh: 6118 self._match(TokenType.TABLE) 6119 return self.expression(exp.Refresh, this=self._parse_string() or self._parse_table()) 6120 6121 def _parse_add_column(self) -> t.Optional[exp.Expression]: 6122 if not self._match_text_seq("ADD"): 6123 return None 6124 6125 self._match(TokenType.COLUMN) 6126 exists_column = self._parse_exists(not_=True) 6127 expression = self._parse_field_def() 6128 6129 if expression: 6130 expression.set("exists", exists_column) 6131 6132 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 6133 if self._match_texts(("FIRST", "AFTER")): 6134 position = self._prev.text 6135 column_position = self.expression( 6136 exp.ColumnPosition, this=self._parse_column(), position=position 6137 ) 6138 expression.set("position", column_position) 6139 6140 return expression 6141 6142 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 6143 drop = self._match(TokenType.DROP) and self._parse_drop() 6144 if drop and not isinstance(drop, exp.Command): 6145 drop.set("kind", drop.args.get("kind", "COLUMN")) 6146 return drop 6147 6148 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 6149 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 6150 return self.expression( 6151 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 6152 ) 6153 6154 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 6155 index = self._index - 1 6156 6157 if self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False): 6158 return self._parse_csv( 6159 lambda: self.expression( 6160 exp.AddConstraint, expressions=self._parse_csv(self._parse_constraint) 6161 ) 6162 ) 6163 6164 self._retreat(index) 6165 if not self.ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN and self._match_text_seq("ADD"): 6166 return self._parse_wrapped_csv(self._parse_field_def, optional=True) 6167 return self._parse_wrapped_csv(self._parse_add_column, optional=True) 6168 6169 def _parse_alter_table_alter(self) -> t.Optional[exp.Expression]: 6170 if self._match_texts(self.ALTER_ALTER_PARSERS): 6171 return self.ALTER_ALTER_PARSERS[self._prev.text.upper()](self) 6172 6173 # Many dialects support the ALTER [COLUMN] syntax, so if there is no 6174 # keyword after ALTER we default to parsing this statement 6175 self._match(TokenType.COLUMN) 6176 column = self._parse_field(any_token=True) 6177 6178 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 6179 return self.expression(exp.AlterColumn, this=column, drop=True) 6180 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 6181 return self.expression(exp.AlterColumn, this=column, default=self._parse_assignment()) 6182 if self._match(TokenType.COMMENT): 6183 return self.expression(exp.AlterColumn, this=column, comment=self._parse_string()) 6184 if self._match_text_seq("DROP", "NOT", "NULL"): 6185 return self.expression( 6186 exp.AlterColumn, 6187 this=column, 6188 drop=True, 6189 allow_null=True, 6190 ) 6191 if self._match_text_seq("SET", "NOT", "NULL"): 6192 return self.expression( 6193 exp.AlterColumn, 6194 this=column, 6195 allow_null=False, 6196 ) 6197 self._match_text_seq("SET", "DATA") 6198 self._match_text_seq("TYPE") 6199 return self.expression( 6200 exp.AlterColumn, 6201 this=column, 6202 dtype=self._parse_types(), 6203 collate=self._match(TokenType.COLLATE) and self._parse_term(), 6204 using=self._match(TokenType.USING) and self._parse_assignment(), 6205 ) 6206 6207 def _parse_alter_diststyle(self) -> exp.AlterDistStyle: 6208 if self._match_texts(("ALL", "EVEN", "AUTO")): 6209 return self.expression(exp.AlterDistStyle, this=exp.var(self._prev.text.upper())) 6210 6211 self._match_text_seq("KEY", "DISTKEY") 6212 return self.expression(exp.AlterDistStyle, this=self._parse_column()) 6213 6214 def _parse_alter_sortkey(self, compound: t.Optional[bool] = None) -> exp.AlterSortKey: 6215 if compound: 6216 self._match_text_seq("SORTKEY") 6217 6218 if self._match(TokenType.L_PAREN, advance=False): 6219 return self.expression( 6220 exp.AlterSortKey, expressions=self._parse_wrapped_id_vars(), compound=compound 6221 ) 6222 6223 self._match_texts(("AUTO", "NONE")) 6224 return self.expression( 6225 exp.AlterSortKey, this=exp.var(self._prev.text.upper()), compound=compound 6226 ) 6227 6228 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 6229 index = self._index - 1 6230 6231 partition_exists = self._parse_exists() 6232 if self._match(TokenType.PARTITION, advance=False): 6233 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 6234 6235 self._retreat(index) 6236 return self._parse_csv(self._parse_drop_column) 6237 6238 def _parse_alter_table_rename(self) -> t.Optional[exp.RenameTable | exp.RenameColumn]: 6239 if self._match(TokenType.COLUMN): 6240 exists = self._parse_exists() 6241 old_column = self._parse_column() 6242 to = self._match_text_seq("TO") 6243 new_column = self._parse_column() 6244 6245 if old_column is None or to is None or new_column is None: 6246 return None 6247 6248 return self.expression(exp.RenameColumn, this=old_column, to=new_column, exists=exists) 6249 6250 self._match_text_seq("TO") 6251 return self.expression(exp.RenameTable, this=self._parse_table(schema=True)) 6252 6253 def _parse_alter_table_set(self) -> exp.AlterSet: 6254 alter_set = self.expression(exp.AlterSet) 6255 6256 if self._match(TokenType.L_PAREN, advance=False) or self._match_text_seq( 6257 "TABLE", "PROPERTIES" 6258 ): 6259 alter_set.set("expressions", self._parse_wrapped_csv(self._parse_assignment)) 6260 elif self._match_text_seq("FILESTREAM_ON", advance=False): 6261 alter_set.set("expressions", [self._parse_assignment()]) 6262 elif self._match_texts(("LOGGED", "UNLOGGED")): 6263 alter_set.set("option", exp.var(self._prev.text.upper())) 6264 elif self._match_text_seq("WITHOUT") and self._match_texts(("CLUSTER", "OIDS")): 6265 alter_set.set("option", exp.var(f"WITHOUT {self._prev.text.upper()}")) 6266 elif self._match_text_seq("LOCATION"): 6267 alter_set.set("location", self._parse_field()) 6268 elif self._match_text_seq("ACCESS", "METHOD"): 6269 alter_set.set("access_method", self._parse_field()) 6270 elif self._match_text_seq("TABLESPACE"): 6271 alter_set.set("tablespace", self._parse_field()) 6272 elif self._match_text_seq("FILE", "FORMAT") or self._match_text_seq("FILEFORMAT"): 6273 alter_set.set("file_format", [self._parse_field()]) 6274 elif self._match_text_seq("STAGE_FILE_FORMAT"): 6275 alter_set.set("file_format", self._parse_wrapped_options()) 6276 elif self._match_text_seq("STAGE_COPY_OPTIONS"): 6277 alter_set.set("copy_options", self._parse_wrapped_options()) 6278 elif self._match_text_seq("TAG") or self._match_text_seq("TAGS"): 6279 alter_set.set("tag", self._parse_csv(self._parse_assignment)) 6280 else: 6281 if self._match_text_seq("SERDE"): 6282 alter_set.set("serde", self._parse_field()) 6283 6284 alter_set.set("expressions", [self._parse_properties()]) 6285 6286 return alter_set 6287 6288 def _parse_alter(self) -> exp.AlterTable | exp.Command: 6289 start = self._prev 6290 6291 if not self._match(TokenType.TABLE): 6292 return self._parse_as_command(start) 6293 6294 exists = self._parse_exists() 6295 only = self._match_text_seq("ONLY") 6296 this = self._parse_table(schema=True) 6297 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 6298 6299 if self._next: 6300 self._advance() 6301 6302 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 6303 if parser: 6304 actions = ensure_list(parser(self)) 6305 options = self._parse_csv(self._parse_property) 6306 6307 if not self._curr and actions: 6308 return self.expression( 6309 exp.AlterTable, 6310 this=this, 6311 exists=exists, 6312 actions=actions, 6313 only=only, 6314 options=options, 6315 cluster=cluster, 6316 ) 6317 6318 return self._parse_as_command(start) 6319 6320 def _parse_merge(self) -> exp.Merge: 6321 self._match(TokenType.INTO) 6322 target = self._parse_table() 6323 6324 if target and self._match(TokenType.ALIAS, advance=False): 6325 target.set("alias", self._parse_table_alias()) 6326 6327 self._match(TokenType.USING) 6328 using = self._parse_table() 6329 6330 self._match(TokenType.ON) 6331 on = self._parse_assignment() 6332 6333 return self.expression( 6334 exp.Merge, 6335 this=target, 6336 using=using, 6337 on=on, 6338 expressions=self._parse_when_matched(), 6339 ) 6340 6341 def _parse_when_matched(self) -> t.List[exp.When]: 6342 whens = [] 6343 6344 while self._match(TokenType.WHEN): 6345 matched = not self._match(TokenType.NOT) 6346 self._match_text_seq("MATCHED") 6347 source = ( 6348 False 6349 if self._match_text_seq("BY", "TARGET") 6350 else self._match_text_seq("BY", "SOURCE") 6351 ) 6352 condition = self._parse_assignment() if self._match(TokenType.AND) else None 6353 6354 self._match(TokenType.THEN) 6355 6356 if self._match(TokenType.INSERT): 6357 _this = self._parse_star() 6358 if _this: 6359 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=_this) 6360 else: 6361 then = self.expression( 6362 exp.Insert, 6363 this=self._parse_value(), 6364 expression=self._match_text_seq("VALUES") and self._parse_value(), 6365 ) 6366 elif self._match(TokenType.UPDATE): 6367 expressions = self._parse_star() 6368 if expressions: 6369 then = self.expression(exp.Update, expressions=expressions) 6370 else: 6371 then = self.expression( 6372 exp.Update, 6373 expressions=self._match(TokenType.SET) 6374 and self._parse_csv(self._parse_equality), 6375 ) 6376 elif self._match(TokenType.DELETE): 6377 then = self.expression(exp.Var, this=self._prev.text) 6378 else: 6379 then = None 6380 6381 whens.append( 6382 self.expression( 6383 exp.When, 6384 matched=matched, 6385 source=source, 6386 condition=condition, 6387 then=then, 6388 ) 6389 ) 6390 return whens 6391 6392 def _parse_show(self) -> t.Optional[exp.Expression]: 6393 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 6394 if parser: 6395 return parser(self) 6396 return self._parse_as_command(self._prev) 6397 6398 def _parse_set_item_assignment( 6399 self, kind: t.Optional[str] = None 6400 ) -> t.Optional[exp.Expression]: 6401 index = self._index 6402 6403 if kind in ("GLOBAL", "SESSION") and self._match_text_seq("TRANSACTION"): 6404 return self._parse_set_transaction(global_=kind == "GLOBAL") 6405 6406 left = self._parse_primary() or self._parse_column() 6407 assignment_delimiter = self._match_texts(("=", "TO")) 6408 6409 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 6410 self._retreat(index) 6411 return None 6412 6413 right = self._parse_statement() or self._parse_id_var() 6414 if isinstance(right, (exp.Column, exp.Identifier)): 6415 right = exp.var(right.name) 6416 6417 this = self.expression(exp.EQ, this=left, expression=right) 6418 return self.expression(exp.SetItem, this=this, kind=kind) 6419 6420 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 6421 self._match_text_seq("TRANSACTION") 6422 characteristics = self._parse_csv( 6423 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 6424 ) 6425 return self.expression( 6426 exp.SetItem, 6427 expressions=characteristics, 6428 kind="TRANSACTION", 6429 **{"global": global_}, # type: ignore 6430 ) 6431 6432 def _parse_set_item(self) -> t.Optional[exp.Expression]: 6433 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 6434 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 6435 6436 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 6437 index = self._index 6438 set_ = self.expression( 6439 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 6440 ) 6441 6442 if self._curr: 6443 self._retreat(index) 6444 return self._parse_as_command(self._prev) 6445 6446 return set_ 6447 6448 def _parse_var_from_options( 6449 self, options: OPTIONS_TYPE, raise_unmatched: bool = True 6450 ) -> t.Optional[exp.Var]: 6451 start = self._curr 6452 if not start: 6453 return None 6454 6455 option = start.text.upper() 6456 continuations = options.get(option) 6457 6458 index = self._index 6459 self._advance() 6460 for keywords in continuations or []: 6461 if isinstance(keywords, str): 6462 keywords = (keywords,) 6463 6464 if self._match_text_seq(*keywords): 6465 option = f"{option} {' '.join(keywords)}" 6466 break 6467 else: 6468 if continuations or continuations is None: 6469 if raise_unmatched: 6470 self.raise_error(f"Unknown option {option}") 6471 6472 self._retreat(index) 6473 return None 6474 6475 return exp.var(option) 6476 6477 def _parse_as_command(self, start: Token) -> exp.Command: 6478 while self._curr: 6479 self._advance() 6480 text = self._find_sql(start, self._prev) 6481 size = len(start.text) 6482 self._warn_unsupported() 6483 return exp.Command(this=text[:size], expression=text[size:]) 6484 6485 def _parse_dict_property(self, this: str) -> exp.DictProperty: 6486 settings = [] 6487 6488 self._match_l_paren() 6489 kind = self._parse_id_var() 6490 6491 if self._match(TokenType.L_PAREN): 6492 while True: 6493 key = self._parse_id_var() 6494 value = self._parse_primary() 6495 6496 if not key and value is None: 6497 break 6498 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 6499 self._match(TokenType.R_PAREN) 6500 6501 self._match_r_paren() 6502 6503 return self.expression( 6504 exp.DictProperty, 6505 this=this, 6506 kind=kind.this if kind else None, 6507 settings=settings, 6508 ) 6509 6510 def _parse_dict_range(self, this: str) -> exp.DictRange: 6511 self._match_l_paren() 6512 has_min = self._match_text_seq("MIN") 6513 if has_min: 6514 min = self._parse_var() or self._parse_primary() 6515 self._match_text_seq("MAX") 6516 max = self._parse_var() or self._parse_primary() 6517 else: 6518 max = self._parse_var() or self._parse_primary() 6519 min = exp.Literal.number(0) 6520 self._match_r_paren() 6521 return self.expression(exp.DictRange, this=this, min=min, max=max) 6522 6523 def _parse_comprehension( 6524 self, this: t.Optional[exp.Expression] 6525 ) -> t.Optional[exp.Comprehension]: 6526 index = self._index 6527 expression = self._parse_column() 6528 if not self._match(TokenType.IN): 6529 self._retreat(index - 1) 6530 return None 6531 iterator = self._parse_column() 6532 condition = self._parse_assignment() if self._match_text_seq("IF") else None 6533 return self.expression( 6534 exp.Comprehension, 6535 this=this, 6536 expression=expression, 6537 iterator=iterator, 6538 condition=condition, 6539 ) 6540 6541 def _parse_heredoc(self) -> t.Optional[exp.Heredoc]: 6542 if self._match(TokenType.HEREDOC_STRING): 6543 return self.expression(exp.Heredoc, this=self._prev.text) 6544 6545 if not self._match_text_seq("$"): 6546 return None 6547 6548 tags = ["$"] 6549 tag_text = None 6550 6551 if self._is_connected(): 6552 self._advance() 6553 tags.append(self._prev.text.upper()) 6554 else: 6555 self.raise_error("No closing $ found") 6556 6557 if tags[-1] != "$": 6558 if self._is_connected() and self._match_text_seq("$"): 6559 tag_text = tags[-1] 6560 tags.append("$") 6561 else: 6562 self.raise_error("No closing $ found") 6563 6564 heredoc_start = self._curr 6565 6566 while self._curr: 6567 if self._match_text_seq(*tags, advance=False): 6568 this = self._find_sql(heredoc_start, self._prev) 6569 self._advance(len(tags)) 6570 return self.expression(exp.Heredoc, this=this, tag=tag_text) 6571 6572 self._advance() 6573 6574 self.raise_error(f"No closing {''.join(tags)} found") 6575 return None 6576 6577 def _find_parser( 6578 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 6579 ) -> t.Optional[t.Callable]: 6580 if not self._curr: 6581 return None 6582 6583 index = self._index 6584 this = [] 6585 while True: 6586 # The current token might be multiple words 6587 curr = self._curr.text.upper() 6588 key = curr.split(" ") 6589 this.append(curr) 6590 6591 self._advance() 6592 result, trie = in_trie(trie, key) 6593 if result == TrieResult.FAILED: 6594 break 6595 6596 if result == TrieResult.EXISTS: 6597 subparser = parsers[" ".join(this)] 6598 return subparser 6599 6600 self._retreat(index) 6601 return None 6602 6603 def _match(self, token_type, advance=True, expression=None): 6604 if not self._curr: 6605 return None 6606 6607 if self._curr.token_type == token_type: 6608 if advance: 6609 self._advance() 6610 self._add_comments(expression) 6611 return True 6612 6613 return None 6614 6615 def _match_set(self, types, advance=True): 6616 if not self._curr: 6617 return None 6618 6619 if self._curr.token_type in types: 6620 if advance: 6621 self._advance() 6622 return True 6623 6624 return None 6625 6626 def _match_pair(self, token_type_a, token_type_b, advance=True): 6627 if not self._curr or not self._next: 6628 return None 6629 6630 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 6631 if advance: 6632 self._advance(2) 6633 return True 6634 6635 return None 6636 6637 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 6638 if not self._match(TokenType.L_PAREN, expression=expression): 6639 self.raise_error("Expecting (") 6640 6641 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 6642 if not self._match(TokenType.R_PAREN, expression=expression): 6643 self.raise_error("Expecting )") 6644 6645 def _match_texts(self, texts, advance=True): 6646 if self._curr and self._curr.text.upper() in texts: 6647 if advance: 6648 self._advance() 6649 return True 6650 return None 6651 6652 def _match_text_seq(self, *texts, advance=True): 6653 index = self._index 6654 for text in texts: 6655 if self._curr and self._curr.text.upper() == text: 6656 self._advance() 6657 else: 6658 self._retreat(index) 6659 return None 6660 6661 if not advance: 6662 self._retreat(index) 6663 6664 return True 6665 6666 def _replace_lambda( 6667 self, node: t.Optional[exp.Expression], expressions: t.List[exp.Expression] 6668 ) -> t.Optional[exp.Expression]: 6669 if not node: 6670 return node 6671 6672 lambda_types = {e.name: e.args.get("to") or False for e in expressions} 6673 6674 for column in node.find_all(exp.Column): 6675 typ = lambda_types.get(column.parts[0].name) 6676 if typ is not None: 6677 dot_or_id = column.to_dot() if column.table else column.this 6678 6679 if typ: 6680 dot_or_id = self.expression( 6681 exp.Cast, 6682 this=dot_or_id, 6683 to=typ, 6684 ) 6685 6686 parent = column.parent 6687 6688 while isinstance(parent, exp.Dot): 6689 if not isinstance(parent.parent, exp.Dot): 6690 parent.replace(dot_or_id) 6691 break 6692 parent = parent.parent 6693 else: 6694 if column is node: 6695 node = dot_or_id 6696 else: 6697 column.replace(dot_or_id) 6698 return node 6699 6700 def _parse_truncate_table(self) -> t.Optional[exp.TruncateTable] | exp.Expression: 6701 start = self._prev 6702 6703 # Not to be confused with TRUNCATE(number, decimals) function call 6704 if self._match(TokenType.L_PAREN): 6705 self._retreat(self._index - 2) 6706 return self._parse_function() 6707 6708 # Clickhouse supports TRUNCATE DATABASE as well 6709 is_database = self._match(TokenType.DATABASE) 6710 6711 self._match(TokenType.TABLE) 6712 6713 exists = self._parse_exists(not_=False) 6714 6715 expressions = self._parse_csv( 6716 lambda: self._parse_table(schema=True, is_db_reference=is_database) 6717 ) 6718 6719 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 6720 6721 if self._match_text_seq("RESTART", "IDENTITY"): 6722 identity = "RESTART" 6723 elif self._match_text_seq("CONTINUE", "IDENTITY"): 6724 identity = "CONTINUE" 6725 else: 6726 identity = None 6727 6728 if self._match_text_seq("CASCADE") or self._match_text_seq("RESTRICT"): 6729 option = self._prev.text 6730 else: 6731 option = None 6732 6733 partition = self._parse_partition() 6734 6735 # Fallback case 6736 if self._curr: 6737 return self._parse_as_command(start) 6738 6739 return self.expression( 6740 exp.TruncateTable, 6741 expressions=expressions, 6742 is_database=is_database, 6743 exists=exists, 6744 cluster=cluster, 6745 identity=identity, 6746 option=option, 6747 partition=partition, 6748 ) 6749 6750 def _parse_with_operator(self) -> t.Optional[exp.Expression]: 6751 this = self._parse_ordered(self._parse_opclass) 6752 6753 if not self._match(TokenType.WITH): 6754 return this 6755 6756 op = self._parse_var(any_token=True) 6757 6758 return self.expression(exp.WithOperator, this=this, op=op) 6759 6760 def _parse_wrapped_options(self) -> t.List[t.Optional[exp.Expression]]: 6761 self._match(TokenType.EQ) 6762 self._match(TokenType.L_PAREN) 6763 6764 opts: t.List[t.Optional[exp.Expression]] = [] 6765 while self._curr and not self._match(TokenType.R_PAREN): 6766 if self._match_text_seq("FORMAT_NAME", "="): 6767 # The FORMAT_NAME can be set to an identifier for Snowflake and T-SQL, 6768 # so we parse it separately to use _parse_field() 6769 prop = self.expression( 6770 exp.Property, this=exp.var("FORMAT_NAME"), value=self._parse_field() 6771 ) 6772 opts.append(prop) 6773 else: 6774 opts.append(self._parse_property()) 6775 6776 self._match(TokenType.COMMA) 6777 6778 return opts 6779 6780 def _parse_copy_parameters(self) -> t.List[exp.CopyParameter]: 6781 sep = TokenType.COMMA if self.dialect.COPY_PARAMS_ARE_CSV else None 6782 6783 options = [] 6784 while self._curr and not self._match(TokenType.R_PAREN, advance=False): 6785 option = self._parse_var(any_token=True) 6786 prev = self._prev.text.upper() 6787 6788 # Different dialects might separate options and values by white space, "=" and "AS" 6789 self._match(TokenType.EQ) 6790 self._match(TokenType.ALIAS) 6791 6792 param = self.expression(exp.CopyParameter, this=option) 6793 6794 if prev in self.COPY_INTO_VARLEN_OPTIONS and self._match( 6795 TokenType.L_PAREN, advance=False 6796 ): 6797 # Snowflake FILE_FORMAT case, Databricks COPY & FORMAT options 6798 param.set("expressions", self._parse_wrapped_options()) 6799 elif prev == "FILE_FORMAT": 6800 # T-SQL's external file format case 6801 param.set("expression", self._parse_field()) 6802 else: 6803 param.set("expression", self._parse_unquoted_field()) 6804 6805 options.append(param) 6806 self._match(sep) 6807 6808 return options 6809 6810 def _parse_credentials(self) -> t.Optional[exp.Credentials]: 6811 expr = self.expression(exp.Credentials) 6812 6813 if self._match_text_seq("STORAGE_INTEGRATION", "="): 6814 expr.set("storage", self._parse_field()) 6815 if self._match_text_seq("CREDENTIALS"): 6816 # Snowflake case: CREDENTIALS = (...), Redshift case: CREDENTIALS <string> 6817 creds = ( 6818 self._parse_wrapped_options() if self._match(TokenType.EQ) else self._parse_field() 6819 ) 6820 expr.set("credentials", creds) 6821 if self._match_text_seq("ENCRYPTION"): 6822 expr.set("encryption", self._parse_wrapped_options()) 6823 if self._match_text_seq("IAM_ROLE"): 6824 expr.set("iam_role", self._parse_field()) 6825 if self._match_text_seq("REGION"): 6826 expr.set("region", self._parse_field()) 6827 6828 return expr 6829 6830 def _parse_file_location(self) -> t.Optional[exp.Expression]: 6831 return self._parse_field() 6832 6833 def _parse_copy(self) -> exp.Copy | exp.Command: 6834 start = self._prev 6835 6836 self._match(TokenType.INTO) 6837 6838 this = ( 6839 self._parse_select(nested=True, parse_subquery_alias=False) 6840 if self._match(TokenType.L_PAREN, advance=False) 6841 else self._parse_table(schema=True) 6842 ) 6843 6844 kind = self._match(TokenType.FROM) or not self._match_text_seq("TO") 6845 6846 files = self._parse_csv(self._parse_file_location) 6847 credentials = self._parse_credentials() 6848 6849 self._match_text_seq("WITH") 6850 6851 params = self._parse_wrapped(self._parse_copy_parameters, optional=True) 6852 6853 # Fallback case 6854 if self._curr: 6855 return self._parse_as_command(start) 6856 6857 return self.expression( 6858 exp.Copy, 6859 this=this, 6860 kind=kind, 6861 credentials=credentials, 6862 files=files, 6863 params=params, 6864 )
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
1242 def __init__( 1243 self, 1244 error_level: t.Optional[ErrorLevel] = None, 1245 error_message_context: int = 100, 1246 max_errors: int = 3, 1247 dialect: DialectType = None, 1248 ): 1249 from sqlglot.dialects import Dialect 1250 1251 self.error_level = error_level or ErrorLevel.IMMEDIATE 1252 self.error_message_context = error_message_context 1253 self.max_errors = max_errors 1254 self.dialect = Dialect.get_or_raise(dialect) 1255 self.reset()
1267 def parse( 1268 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1269 ) -> t.List[t.Optional[exp.Expression]]: 1270 """ 1271 Parses a list of tokens and returns a list of syntax trees, one tree 1272 per parsed SQL statement. 1273 1274 Args: 1275 raw_tokens: The list of tokens. 1276 sql: The original SQL string, used to produce helpful debug messages. 1277 1278 Returns: 1279 The list of the produced syntax trees. 1280 """ 1281 return self._parse( 1282 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1283 )
Parses a list of tokens and returns a list of syntax trees, one tree per parsed SQL statement.
Arguments:
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The list of the produced syntax trees.
1285 def parse_into( 1286 self, 1287 expression_types: exp.IntoType, 1288 raw_tokens: t.List[Token], 1289 sql: t.Optional[str] = None, 1290 ) -> t.List[t.Optional[exp.Expression]]: 1291 """ 1292 Parses a list of tokens into a given Expression type. If a collection of Expression 1293 types is given instead, this method will try to parse the token list into each one 1294 of them, stopping at the first for which the parsing succeeds. 1295 1296 Args: 1297 expression_types: The expression type(s) to try and parse the token list into. 1298 raw_tokens: The list of tokens. 1299 sql: The original SQL string, used to produce helpful debug messages. 1300 1301 Returns: 1302 The target Expression. 1303 """ 1304 errors = [] 1305 for expression_type in ensure_list(expression_types): 1306 parser = self.EXPRESSION_PARSERS.get(expression_type) 1307 if not parser: 1308 raise TypeError(f"No parser registered for {expression_type}") 1309 1310 try: 1311 return self._parse(parser, raw_tokens, sql) 1312 except ParseError as e: 1313 e.errors[0]["into_expression"] = expression_type 1314 errors.append(e) 1315 1316 raise ParseError( 1317 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1318 errors=merge_errors(errors), 1319 ) from errors[-1]
Parses a list of tokens into a given Expression type. If a collection of Expression types is given instead, this method will try to parse the token list into each one of them, stopping at the first for which the parsing succeeds.
Arguments:
- expression_types: The expression type(s) to try and parse the token list into.
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The target Expression.
1359 def check_errors(self) -> None: 1360 """Logs or raises any found errors, depending on the chosen error level setting.""" 1361 if self.error_level == ErrorLevel.WARN: 1362 for error in self.errors: 1363 logger.error(str(error)) 1364 elif self.error_level == ErrorLevel.RAISE and self.errors: 1365 raise ParseError( 1366 concat_messages(self.errors, self.max_errors), 1367 errors=merge_errors(self.errors), 1368 )
Logs or raises any found errors, depending on the chosen error level setting.
1370 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1371 """ 1372 Appends an error in the list of recorded errors or raises it, depending on the chosen 1373 error level setting. 1374 """ 1375 token = token or self._curr or self._prev or Token.string("") 1376 start = token.start 1377 end = token.end + 1 1378 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1379 highlight = self.sql[start:end] 1380 end_context = self.sql[end : end + self.error_message_context] 1381 1382 error = ParseError.new( 1383 f"{message}. Line {token.line}, Col: {token.col}.\n" 1384 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1385 description=message, 1386 line=token.line, 1387 col=token.col, 1388 start_context=start_context, 1389 highlight=highlight, 1390 end_context=end_context, 1391 ) 1392 1393 if self.error_level == ErrorLevel.IMMEDIATE: 1394 raise error 1395 1396 self.errors.append(error)
Appends an error in the list of recorded errors or raises it, depending on the chosen error level setting.
1398 def expression( 1399 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1400 ) -> E: 1401 """ 1402 Creates a new, validated Expression. 1403 1404 Args: 1405 exp_class: The expression class to instantiate. 1406 comments: An optional list of comments to attach to the expression. 1407 kwargs: The arguments to set for the expression along with their respective values. 1408 1409 Returns: 1410 The target expression. 1411 """ 1412 instance = exp_class(**kwargs) 1413 instance.add_comments(comments) if comments else self._add_comments(instance) 1414 return self.validate_expression(instance)
Creates a new, validated Expression.
Arguments:
- exp_class: The expression class to instantiate.
- comments: An optional list of comments to attach to the expression.
- kwargs: The arguments to set for the expression along with their respective values.
Returns:
The target expression.
1421 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1422 """ 1423 Validates an Expression, making sure that all its mandatory arguments are set. 1424 1425 Args: 1426 expression: The expression to validate. 1427 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1428 1429 Returns: 1430 The validated expression. 1431 """ 1432 if self.error_level != ErrorLevel.IGNORE: 1433 for error_message in expression.error_messages(args): 1434 self.raise_error(error_message) 1435 1436 return expression
Validates an Expression, making sure that all its mandatory arguments are set.
Arguments:
- expression: The expression to validate.
- args: An optional list of items that was used to instantiate the expression, if it's a Func.
Returns:
The validated expression.