sqlglot.dialects.bigquery
1from __future__ import annotations 2 3import logging 4import re 5import typing as t 6 7from sqlglot import exp, generator, parser, tokens, transforms 8from sqlglot.dialects.dialect import ( 9 Dialect, 10 NormalizationStrategy, 11 arg_max_or_min_no_count, 12 binary_from_function, 13 date_add_interval_sql, 14 datestrtodate_sql, 15 build_formatted_time, 16 filter_array_using_unnest, 17 if_sql, 18 inline_array_unless_query, 19 max_or_greatest, 20 min_or_least, 21 no_ilike_sql, 22 build_date_delta_with_interval, 23 regexp_replace_sql, 24 rename_func, 25 sha256_sql, 26 timestrtotime_sql, 27 ts_or_ds_add_cast, 28 unit_to_var, 29) 30from sqlglot.helper import seq_get, split_num_words 31from sqlglot.tokens import TokenType 32 33if t.TYPE_CHECKING: 34 from sqlglot._typing import E, Lit 35 36 from sqlglot.optimizer.annotate_types import TypeAnnotator 37 38logger = logging.getLogger("sqlglot") 39 40 41def _derived_table_values_to_unnest(self: BigQuery.Generator, expression: exp.Values) -> str: 42 if not expression.find_ancestor(exp.From, exp.Join): 43 return self.values_sql(expression) 44 45 structs = [] 46 alias = expression.args.get("alias") 47 for tup in expression.find_all(exp.Tuple): 48 field_aliases = ( 49 alias.columns 50 if alias and alias.columns 51 else (f"_c{i}" for i in range(len(tup.expressions))) 52 ) 53 expressions = [ 54 exp.PropertyEQ(this=exp.to_identifier(name), expression=fld) 55 for name, fld in zip(field_aliases, tup.expressions) 56 ] 57 structs.append(exp.Struct(expressions=expressions)) 58 59 # Due to `UNNEST_COLUMN_ONLY`, it is expected that the table alias be contained in the columns expression 60 alias_name_only = exp.TableAlias(columns=[alias.this]) if alias else None 61 return self.unnest_sql( 62 exp.Unnest(expressions=[exp.array(*structs, copy=False)], alias=alias_name_only) 63 ) 64 65 66def _returnsproperty_sql(self: BigQuery.Generator, expression: exp.ReturnsProperty) -> str: 67 this = expression.this 68 if isinstance(this, exp.Schema): 69 this = f"{self.sql(this, 'this')} <{self.expressions(this)}>" 70 else: 71 this = self.sql(this) 72 return f"RETURNS {this}" 73 74 75def _create_sql(self: BigQuery.Generator, expression: exp.Create) -> str: 76 returns = expression.find(exp.ReturnsProperty) 77 if expression.kind == "FUNCTION" and returns and returns.args.get("is_table"): 78 expression.set("kind", "TABLE FUNCTION") 79 80 if isinstance(expression.expression, (exp.Subquery, exp.Literal)): 81 expression.set("expression", expression.expression.this) 82 83 return self.create_sql(expression) 84 85 86# https://issuetracker.google.com/issues/162294746 87# workaround for bigquery bug when grouping by an expression and then ordering 88# WITH x AS (SELECT 1 y) 89# SELECT y + 1 z 90# FROM x 91# GROUP BY x + 1 92# ORDER by z 93def _alias_ordered_group(expression: exp.Expression) -> exp.Expression: 94 if isinstance(expression, exp.Select): 95 group = expression.args.get("group") 96 order = expression.args.get("order") 97 98 if group and order: 99 aliases = { 100 select.this: select.args["alias"] 101 for select in expression.selects 102 if isinstance(select, exp.Alias) 103 } 104 105 for grouped in group.expressions: 106 if grouped.is_int: 107 continue 108 alias = aliases.get(grouped) 109 if alias: 110 grouped.replace(exp.column(alias)) 111 112 return expression 113 114 115def _pushdown_cte_column_names(expression: exp.Expression) -> exp.Expression: 116 """BigQuery doesn't allow column names when defining a CTE, so we try to push them down.""" 117 if isinstance(expression, exp.CTE) and expression.alias_column_names: 118 cte_query = expression.this 119 120 if cte_query.is_star: 121 logger.warning( 122 "Can't push down CTE column names for star queries. Run the query through" 123 " the optimizer or use 'qualify' to expand the star projections first." 124 ) 125 return expression 126 127 column_names = expression.alias_column_names 128 expression.args["alias"].set("columns", None) 129 130 for name, select in zip(column_names, cte_query.selects): 131 to_replace = select 132 133 if isinstance(select, exp.Alias): 134 select = select.this 135 136 # Inner aliases are shadowed by the CTE column names 137 to_replace.replace(exp.alias_(select, name)) 138 139 return expression 140 141 142def _build_parse_timestamp(args: t.List) -> exp.StrToTime: 143 this = build_formatted_time(exp.StrToTime, "bigquery")([seq_get(args, 1), seq_get(args, 0)]) 144 this.set("zone", seq_get(args, 2)) 145 return this 146 147 148def _build_timestamp(args: t.List) -> exp.Timestamp: 149 timestamp = exp.Timestamp.from_arg_list(args) 150 timestamp.set("with_tz", True) 151 return timestamp 152 153 154def _build_date(args: t.List) -> exp.Date | exp.DateFromParts: 155 expr_type = exp.DateFromParts if len(args) == 3 else exp.Date 156 return expr_type.from_arg_list(args) 157 158 159def _build_to_hex(args: t.List) -> exp.Hex | exp.MD5: 160 # TO_HEX(MD5(..)) is common in BigQuery, so it's parsed into MD5 to simplify its transpilation 161 arg = seq_get(args, 0) 162 return exp.MD5(this=arg.this) if isinstance(arg, exp.MD5Digest) else exp.LowerHex(this=arg) 163 164 165def _array_contains_sql(self: BigQuery.Generator, expression: exp.ArrayContains) -> str: 166 return self.sql( 167 exp.Exists( 168 this=exp.select("1") 169 .from_(exp.Unnest(expressions=[expression.left]).as_("_unnest", table=["_col"])) 170 .where(exp.column("_col").eq(expression.right)) 171 ) 172 ) 173 174 175def _ts_or_ds_add_sql(self: BigQuery.Generator, expression: exp.TsOrDsAdd) -> str: 176 return date_add_interval_sql("DATE", "ADD")(self, ts_or_ds_add_cast(expression)) 177 178 179def _ts_or_ds_diff_sql(self: BigQuery.Generator, expression: exp.TsOrDsDiff) -> str: 180 expression.this.replace(exp.cast(expression.this, exp.DataType.Type.TIMESTAMP)) 181 expression.expression.replace(exp.cast(expression.expression, exp.DataType.Type.TIMESTAMP)) 182 unit = unit_to_var(expression) 183 return self.func("DATE_DIFF", expression.this, expression.expression, unit) 184 185 186def _unix_to_time_sql(self: BigQuery.Generator, expression: exp.UnixToTime) -> str: 187 scale = expression.args.get("scale") 188 timestamp = expression.this 189 190 if scale in (None, exp.UnixToTime.SECONDS): 191 return self.func("TIMESTAMP_SECONDS", timestamp) 192 if scale == exp.UnixToTime.MILLIS: 193 return self.func("TIMESTAMP_MILLIS", timestamp) 194 if scale == exp.UnixToTime.MICROS: 195 return self.func("TIMESTAMP_MICROS", timestamp) 196 197 unix_seconds = exp.cast( 198 exp.Div(this=timestamp, expression=exp.func("POW", 10, scale)), exp.DataType.Type.BIGINT 199 ) 200 return self.func("TIMESTAMP_SECONDS", unix_seconds) 201 202 203def _build_time(args: t.List) -> exp.Func: 204 if len(args) == 1: 205 return exp.TsOrDsToTime(this=args[0]) 206 if len(args) == 2: 207 return exp.Time.from_arg_list(args) 208 return exp.TimeFromParts.from_arg_list(args) 209 210 211def _build_datetime(args: t.List) -> exp.Func: 212 if len(args) == 1: 213 return exp.TsOrDsToTimestamp.from_arg_list(args) 214 if len(args) == 2: 215 return exp.Datetime.from_arg_list(args) 216 return exp.TimestampFromParts.from_arg_list(args) 217 218 219def _build_regexp_extract(args: t.List) -> exp.RegexpExtract: 220 try: 221 group = re.compile(args[1].name).groups == 1 222 except re.error: 223 group = False 224 225 return exp.RegexpExtract( 226 this=seq_get(args, 0), 227 expression=seq_get(args, 1), 228 position=seq_get(args, 2), 229 occurrence=seq_get(args, 3), 230 group=exp.Literal.number(1) if group else None, 231 ) 232 233 234def _str_to_datetime_sql( 235 self: BigQuery.Generator, expression: exp.StrToDate | exp.StrToTime 236) -> str: 237 this = self.sql(expression, "this") 238 dtype = "DATE" if isinstance(expression, exp.StrToDate) else "TIMESTAMP" 239 240 if expression.args.get("safe"): 241 fmt = self.format_time( 242 expression, 243 self.dialect.INVERSE_FORMAT_MAPPING, 244 self.dialect.INVERSE_FORMAT_TRIE, 245 ) 246 return f"SAFE_CAST({this} AS {dtype} FORMAT {fmt})" 247 248 fmt = self.format_time(expression) 249 return self.func(f"PARSE_{dtype}", fmt, this, expression.args.get("zone")) 250 251 252def _annotate_math_functions(self: TypeAnnotator, expression: E) -> E: 253 """ 254 Many BigQuery math functions such as CEIL, FLOOR etc follow this return type convention: 255 +---------+---------+---------+------------+---------+ 256 | INPUT | INT64 | NUMERIC | BIGNUMERIC | FLOAT64 | 257 +---------+---------+---------+------------+---------+ 258 | OUTPUT | FLOAT64 | NUMERIC | BIGNUMERIC | FLOAT64 | 259 +---------+---------+---------+------------+---------+ 260 """ 261 self._annotate_args(expression) 262 263 this: exp.Expression = expression.this 264 265 self._set_type( 266 expression, 267 exp.DataType.Type.DOUBLE if this.is_type(*exp.DataType.INTEGER_TYPES) else this.type, 268 ) 269 return expression 270 271 272class BigQuery(Dialect): 273 WEEK_OFFSET = -1 274 UNNEST_COLUMN_ONLY = True 275 SUPPORTS_USER_DEFINED_TYPES = False 276 SUPPORTS_SEMI_ANTI_JOIN = False 277 LOG_BASE_FIRST = False 278 HEX_LOWERCASE = True 279 FORCE_EARLY_ALIAS_REF_EXPANSION = True 280 EXPAND_ALIAS_REFS_EARLY_ONLY_IN_GROUP_BY = True 281 282 # https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#case_sensitivity 283 NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_INSENSITIVE 284 285 # bigquery udfs are case sensitive 286 NORMALIZE_FUNCTIONS = False 287 288 # https://cloud.google.com/bigquery/docs/reference/standard-sql/format-elements#format_elements_date_time 289 TIME_MAPPING = { 290 "%D": "%m/%d/%y", 291 "%E6S": "%S.%f", 292 "%e": "%-d", 293 } 294 295 FORMAT_MAPPING = { 296 "DD": "%d", 297 "MM": "%m", 298 "MON": "%b", 299 "MONTH": "%B", 300 "YYYY": "%Y", 301 "YY": "%y", 302 "HH": "%I", 303 "HH12": "%I", 304 "HH24": "%H", 305 "MI": "%M", 306 "SS": "%S", 307 "SSSSS": "%f", 308 "TZH": "%z", 309 } 310 311 # The _PARTITIONTIME and _PARTITIONDATE pseudo-columns are not returned by a SELECT * statement 312 # https://cloud.google.com/bigquery/docs/querying-partitioned-tables#query_an_ingestion-time_partitioned_table 313 PSEUDOCOLUMNS = {"_PARTITIONTIME", "_PARTITIONDATE"} 314 315 # All set operations require either a DISTINCT or ALL specifier 316 SET_OP_DISTINCT_BY_DEFAULT = dict.fromkeys((exp.Except, exp.Intersect, exp.Union), None) 317 318 ANNOTATORS = { 319 **Dialect.ANNOTATORS, 320 **{ 321 expr_type: lambda self, e: _annotate_math_functions(self, e) 322 for expr_type in (exp.Floor, exp.Ceil, exp.Log, exp.Ln, exp.Sqrt, exp.Exp, exp.Round) 323 }, 324 **{ 325 expr_type: lambda self, e: self._annotate_by_args(e, "this") 326 for expr_type in ( 327 exp.Left, 328 exp.Right, 329 exp.Lower, 330 exp.Upper, 331 exp.Pad, 332 exp.Trim, 333 exp.RegexpExtract, 334 exp.RegexpReplace, 335 exp.Repeat, 336 exp.Substring, 337 ) 338 }, 339 exp.Concat: lambda self, e: self._annotate_by_args(e, "expressions"), 340 exp.Sign: lambda self, e: self._annotate_by_args(e, "this"), 341 exp.Split: lambda self, e: self._annotate_by_args(e, "this", array=True), 342 } 343 344 def normalize_identifier(self, expression: E) -> E: 345 if ( 346 isinstance(expression, exp.Identifier) 347 and self.normalization_strategy is not NormalizationStrategy.CASE_SENSITIVE 348 ): 349 parent = expression.parent 350 while isinstance(parent, exp.Dot): 351 parent = parent.parent 352 353 # In BigQuery, CTEs are case-insensitive, but UDF and table names are case-sensitive 354 # by default. The following check uses a heuristic to detect tables based on whether 355 # they are qualified. This should generally be correct, because tables in BigQuery 356 # must be qualified with at least a dataset, unless @@dataset_id is set. 357 case_sensitive = ( 358 isinstance(parent, exp.UserDefinedFunction) 359 or ( 360 isinstance(parent, exp.Table) 361 and parent.db 362 and (parent.meta.get("quoted_table") or not parent.meta.get("maybe_column")) 363 ) 364 or expression.meta.get("is_table") 365 ) 366 if not case_sensitive: 367 expression.set("this", expression.this.lower()) 368 369 return expression 370 371 class Tokenizer(tokens.Tokenizer): 372 QUOTES = ["'", '"', '"""', "'''"] 373 COMMENTS = ["--", "#", ("/*", "*/")] 374 IDENTIFIERS = ["`"] 375 STRING_ESCAPES = ["\\"] 376 377 HEX_STRINGS = [("0x", ""), ("0X", "")] 378 379 BYTE_STRINGS = [ 380 (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("b", "B") 381 ] 382 383 RAW_STRINGS = [ 384 (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("r", "R") 385 ] 386 387 KEYWORDS = { 388 **tokens.Tokenizer.KEYWORDS, 389 "ANY TYPE": TokenType.VARIANT, 390 "BEGIN": TokenType.COMMAND, 391 "BEGIN TRANSACTION": TokenType.BEGIN, 392 "BYTEINT": TokenType.INT, 393 "BYTES": TokenType.BINARY, 394 "CURRENT_DATETIME": TokenType.CURRENT_DATETIME, 395 "DATETIME": TokenType.TIMESTAMP, 396 "DECLARE": TokenType.COMMAND, 397 "ELSEIF": TokenType.COMMAND, 398 "EXCEPTION": TokenType.COMMAND, 399 "FLOAT64": TokenType.DOUBLE, 400 "FOR SYSTEM_TIME": TokenType.TIMESTAMP_SNAPSHOT, 401 "MODEL": TokenType.MODEL, 402 "NOT DETERMINISTIC": TokenType.VOLATILE, 403 "RECORD": TokenType.STRUCT, 404 "TIMESTAMP": TokenType.TIMESTAMPTZ, 405 } 406 KEYWORDS.pop("DIV") 407 KEYWORDS.pop("VALUES") 408 KEYWORDS.pop("/*+") 409 410 class Parser(parser.Parser): 411 PREFIXED_PIVOT_COLUMNS = True 412 LOG_DEFAULTS_TO_LN = True 413 SUPPORTS_IMPLICIT_UNNEST = True 414 415 FUNCTIONS = { 416 **parser.Parser.FUNCTIONS, 417 "DATE": _build_date, 418 "DATE_ADD": build_date_delta_with_interval(exp.DateAdd), 419 "DATE_SUB": build_date_delta_with_interval(exp.DateSub), 420 "DATE_TRUNC": lambda args: exp.DateTrunc( 421 unit=exp.Literal.string(str(seq_get(args, 1))), 422 this=seq_get(args, 0), 423 ), 424 "DATETIME": _build_datetime, 425 "DATETIME_ADD": build_date_delta_with_interval(exp.DatetimeAdd), 426 "DATETIME_SUB": build_date_delta_with_interval(exp.DatetimeSub), 427 "DIV": binary_from_function(exp.IntDiv), 428 "FORMAT_DATE": lambda args: exp.TimeToStr( 429 this=exp.TsOrDsToDate(this=seq_get(args, 1)), format=seq_get(args, 0) 430 ), 431 "GENERATE_ARRAY": exp.GenerateSeries.from_arg_list, 432 "JSON_EXTRACT_SCALAR": lambda args: exp.JSONExtractScalar( 433 this=seq_get(args, 0), expression=seq_get(args, 1) or exp.Literal.string("$") 434 ), 435 "LENGTH": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 436 "MD5": exp.MD5Digest.from_arg_list, 437 "TO_HEX": _build_to_hex, 438 "PARSE_DATE": lambda args: build_formatted_time(exp.StrToDate, "bigquery")( 439 [seq_get(args, 1), seq_get(args, 0)] 440 ), 441 "PARSE_TIMESTAMP": _build_parse_timestamp, 442 "REGEXP_CONTAINS": exp.RegexpLike.from_arg_list, 443 "REGEXP_EXTRACT": _build_regexp_extract, 444 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 445 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 446 "SPLIT": lambda args: exp.Split( 447 # https://cloud.google.com/bigquery/docs/reference/standard-sql/string_functions#split 448 this=seq_get(args, 0), 449 expression=seq_get(args, 1) or exp.Literal.string(","), 450 ), 451 "TIME": _build_time, 452 "TIME_ADD": build_date_delta_with_interval(exp.TimeAdd), 453 "TIME_SUB": build_date_delta_with_interval(exp.TimeSub), 454 "TIMESTAMP": _build_timestamp, 455 "TIMESTAMP_ADD": build_date_delta_with_interval(exp.TimestampAdd), 456 "TIMESTAMP_SUB": build_date_delta_with_interval(exp.TimestampSub), 457 "TIMESTAMP_MICROS": lambda args: exp.UnixToTime( 458 this=seq_get(args, 0), scale=exp.UnixToTime.MICROS 459 ), 460 "TIMESTAMP_MILLIS": lambda args: exp.UnixToTime( 461 this=seq_get(args, 0), scale=exp.UnixToTime.MILLIS 462 ), 463 "TIMESTAMP_SECONDS": lambda args: exp.UnixToTime(this=seq_get(args, 0)), 464 "TO_JSON_STRING": exp.JSONFormat.from_arg_list, 465 "FORMAT_DATETIME": lambda args: exp.TimeToStr( 466 this=exp.TsOrDsToTimestamp(this=seq_get(args, 1)), format=seq_get(args, 0) 467 ), 468 } 469 470 FUNCTION_PARSERS = { 471 **parser.Parser.FUNCTION_PARSERS, 472 "ARRAY": lambda self: self.expression(exp.Array, expressions=[self._parse_statement()]), 473 } 474 FUNCTION_PARSERS.pop("TRIM") 475 476 NO_PAREN_FUNCTIONS = { 477 **parser.Parser.NO_PAREN_FUNCTIONS, 478 TokenType.CURRENT_DATETIME: exp.CurrentDatetime, 479 } 480 481 NESTED_TYPE_TOKENS = { 482 *parser.Parser.NESTED_TYPE_TOKENS, 483 TokenType.TABLE, 484 } 485 486 PROPERTY_PARSERS = { 487 **parser.Parser.PROPERTY_PARSERS, 488 "NOT DETERMINISTIC": lambda self: self.expression( 489 exp.StabilityProperty, this=exp.Literal.string("VOLATILE") 490 ), 491 "OPTIONS": lambda self: self._parse_with_property(), 492 } 493 494 CONSTRAINT_PARSERS = { 495 **parser.Parser.CONSTRAINT_PARSERS, 496 "OPTIONS": lambda self: exp.Properties(expressions=self._parse_with_property()), 497 } 498 499 RANGE_PARSERS = parser.Parser.RANGE_PARSERS.copy() 500 RANGE_PARSERS.pop(TokenType.OVERLAPS) 501 502 NULL_TOKENS = {TokenType.NULL, TokenType.UNKNOWN} 503 504 STATEMENT_PARSERS = { 505 **parser.Parser.STATEMENT_PARSERS, 506 TokenType.ELSE: lambda self: self._parse_as_command(self._prev), 507 TokenType.END: lambda self: self._parse_as_command(self._prev), 508 TokenType.FOR: lambda self: self._parse_for_in(), 509 } 510 511 BRACKET_OFFSETS = { 512 "OFFSET": (0, False), 513 "ORDINAL": (1, False), 514 "SAFE_OFFSET": (0, True), 515 "SAFE_ORDINAL": (1, True), 516 } 517 518 def _parse_for_in(self) -> exp.ForIn: 519 this = self._parse_range() 520 self._match_text_seq("DO") 521 return self.expression(exp.ForIn, this=this, expression=self._parse_statement()) 522 523 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 524 this = super()._parse_table_part(schema=schema) or self._parse_number() 525 526 # https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#table_names 527 if isinstance(this, exp.Identifier): 528 table_name = this.name 529 while self._match(TokenType.DASH, advance=False) and self._next: 530 text = "" 531 while self._is_connected() and self._curr.token_type != TokenType.DOT: 532 self._advance() 533 text += self._prev.text 534 table_name += text 535 536 this = exp.Identifier(this=table_name, quoted=this.args.get("quoted")) 537 elif isinstance(this, exp.Literal): 538 table_name = this.name 539 540 if self._is_connected() and self._parse_var(any_token=True): 541 table_name += self._prev.text 542 543 this = exp.Identifier(this=table_name, quoted=True) 544 545 return this 546 547 def _parse_table_parts( 548 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 549 ) -> exp.Table: 550 table = super()._parse_table_parts( 551 schema=schema, is_db_reference=is_db_reference, wildcard=True 552 ) 553 554 # proj-1.db.tbl -- `1.` is tokenized as a float so we need to unravel it here 555 if not table.catalog: 556 if table.db: 557 parts = table.db.split(".") 558 if len(parts) == 2 and not table.args["db"].quoted: 559 table.set("catalog", exp.Identifier(this=parts[0])) 560 table.set("db", exp.Identifier(this=parts[1])) 561 else: 562 parts = table.name.split(".") 563 if len(parts) == 2 and not table.this.quoted: 564 table.set("db", exp.Identifier(this=parts[0])) 565 table.set("this", exp.Identifier(this=parts[1])) 566 567 if isinstance(table.this, exp.Identifier) and any("." in p.name for p in table.parts): 568 catalog, db, this, *rest = ( 569 exp.to_identifier(p, quoted=True) 570 for p in split_num_words(".".join(p.name for p in table.parts), ".", 3) 571 ) 572 573 if rest and this: 574 this = exp.Dot.build([this, *rest]) # type: ignore 575 576 table = exp.Table( 577 this=this, db=db, catalog=catalog, pivots=table.args.get("pivots") 578 ) 579 table.meta["quoted_table"] = True 580 581 return table 582 583 def _parse_column(self) -> t.Optional[exp.Expression]: 584 column = super()._parse_column() 585 if isinstance(column, exp.Column): 586 parts = column.parts 587 if any("." in p.name for p in parts): 588 catalog, db, table, this, *rest = ( 589 exp.to_identifier(p, quoted=True) 590 for p in split_num_words(".".join(p.name for p in parts), ".", 4) 591 ) 592 593 if rest and this: 594 this = exp.Dot.build([this, *rest]) # type: ignore 595 596 column = exp.Column(this=this, table=table, db=db, catalog=catalog) 597 column.meta["quoted_column"] = True 598 599 return column 600 601 @t.overload 602 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 603 604 @t.overload 605 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 606 607 def _parse_json_object(self, agg=False): 608 json_object = super()._parse_json_object() 609 array_kv_pair = seq_get(json_object.expressions, 0) 610 611 # Converts BQ's "signature 2" of JSON_OBJECT into SQLGlot's canonical representation 612 # https://cloud.google.com/bigquery/docs/reference/standard-sql/json_functions#json_object_signature2 613 if ( 614 array_kv_pair 615 and isinstance(array_kv_pair.this, exp.Array) 616 and isinstance(array_kv_pair.expression, exp.Array) 617 ): 618 keys = array_kv_pair.this.expressions 619 values = array_kv_pair.expression.expressions 620 621 json_object.set( 622 "expressions", 623 [exp.JSONKeyValue(this=k, expression=v) for k, v in zip(keys, values)], 624 ) 625 626 return json_object 627 628 def _parse_bracket( 629 self, this: t.Optional[exp.Expression] = None 630 ) -> t.Optional[exp.Expression]: 631 bracket = super()._parse_bracket(this) 632 633 if this is bracket: 634 return bracket 635 636 if isinstance(bracket, exp.Bracket): 637 for expression in bracket.expressions: 638 name = expression.name.upper() 639 640 if name not in self.BRACKET_OFFSETS: 641 break 642 643 offset, safe = self.BRACKET_OFFSETS[name] 644 bracket.set("offset", offset) 645 bracket.set("safe", safe) 646 expression.replace(expression.expressions[0]) 647 648 return bracket 649 650 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 651 unnest = super()._parse_unnest(with_alias=with_alias) 652 653 if not unnest: 654 return None 655 656 unnest_expr = seq_get(unnest.expressions, 0) 657 if unnest_expr: 658 from sqlglot.optimizer.annotate_types import annotate_types 659 660 unnest_expr = annotate_types(unnest_expr) 661 662 # Unnesting a nested array (i.e array of structs) explodes the top-level struct fields, 663 # in contrast to other dialects such as DuckDB which flattens only the array by default 664 if unnest_expr.is_type(exp.DataType.Type.ARRAY) and any( 665 array_elem.is_type(exp.DataType.Type.STRUCT) 666 for array_elem in unnest_expr._type.expressions 667 ): 668 unnest.set("explode_array", True) 669 670 return unnest 671 672 class Generator(generator.Generator): 673 INTERVAL_ALLOWS_PLURAL_FORM = False 674 JOIN_HINTS = False 675 QUERY_HINTS = False 676 TABLE_HINTS = False 677 LIMIT_FETCH = "LIMIT" 678 RENAME_TABLE_WITH_DB = False 679 NVL2_SUPPORTED = False 680 UNNEST_WITH_ORDINALITY = False 681 COLLATE_IS_FUNC = True 682 LIMIT_ONLY_LITERALS = True 683 SUPPORTS_TABLE_ALIAS_COLUMNS = False 684 UNPIVOT_ALIASES_ARE_IDENTIFIERS = False 685 JSON_KEY_VALUE_PAIR_SEP = "," 686 NULL_ORDERING_SUPPORTED = False 687 IGNORE_NULLS_IN_FUNC = True 688 JSON_PATH_SINGLE_QUOTE_ESCAPE = True 689 CAN_IMPLEMENT_ARRAY_ANY = True 690 SUPPORTS_TO_NUMBER = False 691 NAMED_PLACEHOLDER_TOKEN = "@" 692 HEX_FUNC = "TO_HEX" 693 WITH_PROPERTIES_PREFIX = "OPTIONS" 694 SUPPORTS_EXPLODING_PROJECTIONS = False 695 EXCEPT_INTERSECT_SUPPORT_ALL_CLAUSE = False 696 697 TRANSFORMS = { 698 **generator.Generator.TRANSFORMS, 699 exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"), 700 exp.ArgMax: arg_max_or_min_no_count("MAX_BY"), 701 exp.ArgMin: arg_max_or_min_no_count("MIN_BY"), 702 exp.Array: inline_array_unless_query, 703 exp.ArrayContains: _array_contains_sql, 704 exp.ArrayFilter: filter_array_using_unnest, 705 exp.ArraySize: rename_func("ARRAY_LENGTH"), 706 exp.Cast: transforms.preprocess([transforms.remove_precision_parameterized_types]), 707 exp.CollateProperty: lambda self, e: ( 708 f"DEFAULT COLLATE {self.sql(e, 'this')}" 709 if e.args.get("default") 710 else f"COLLATE {self.sql(e, 'this')}" 711 ), 712 exp.Commit: lambda *_: "COMMIT TRANSACTION", 713 exp.CountIf: rename_func("COUNTIF"), 714 exp.Create: _create_sql, 715 exp.CTE: transforms.preprocess([_pushdown_cte_column_names]), 716 exp.DateAdd: date_add_interval_sql("DATE", "ADD"), 717 exp.DateDiff: lambda self, e: self.func( 718 "DATE_DIFF", e.this, e.expression, unit_to_var(e) 719 ), 720 exp.DateFromParts: rename_func("DATE"), 721 exp.DateStrToDate: datestrtodate_sql, 722 exp.DateSub: date_add_interval_sql("DATE", "SUB"), 723 exp.DatetimeAdd: date_add_interval_sql("DATETIME", "ADD"), 724 exp.DatetimeSub: date_add_interval_sql("DATETIME", "SUB"), 725 exp.DateTrunc: lambda self, e: self.func("DATE_TRUNC", e.this, e.text("unit")), 726 exp.FromTimeZone: lambda self, e: self.func( 727 "DATETIME", self.func("TIMESTAMP", e.this, e.args.get("zone")), "'UTC'" 728 ), 729 exp.GenerateSeries: rename_func("GENERATE_ARRAY"), 730 exp.GroupConcat: rename_func("STRING_AGG"), 731 exp.Hex: lambda self, e: self.func("UPPER", self.func("TO_HEX", self.sql(e, "this"))), 732 exp.If: if_sql(false_value="NULL"), 733 exp.ILike: no_ilike_sql, 734 exp.IntDiv: rename_func("DIV"), 735 exp.JSONFormat: rename_func("TO_JSON_STRING"), 736 exp.Max: max_or_greatest, 737 exp.MD5: lambda self, e: self.func("TO_HEX", self.func("MD5", e.this)), 738 exp.MD5Digest: rename_func("MD5"), 739 exp.Min: min_or_least, 740 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 741 exp.RegexpExtract: lambda self, e: self.func( 742 "REGEXP_EXTRACT", 743 e.this, 744 e.expression, 745 e.args.get("position"), 746 e.args.get("occurrence"), 747 ), 748 exp.RegexpReplace: regexp_replace_sql, 749 exp.RegexpLike: rename_func("REGEXP_CONTAINS"), 750 exp.ReturnsProperty: _returnsproperty_sql, 751 exp.Rollback: lambda *_: "ROLLBACK TRANSACTION", 752 exp.Select: transforms.preprocess( 753 [ 754 transforms.explode_to_unnest(), 755 transforms.unqualify_unnest, 756 transforms.eliminate_distinct_on, 757 _alias_ordered_group, 758 transforms.eliminate_semi_and_anti_joins, 759 ] 760 ), 761 exp.SHA: rename_func("SHA1"), 762 exp.SHA2: sha256_sql, 763 exp.StabilityProperty: lambda self, e: ( 764 "DETERMINISTIC" if e.name == "IMMUTABLE" else "NOT DETERMINISTIC" 765 ), 766 exp.StrToDate: _str_to_datetime_sql, 767 exp.StrToTime: _str_to_datetime_sql, 768 exp.TimeAdd: date_add_interval_sql("TIME", "ADD"), 769 exp.TimeFromParts: rename_func("TIME"), 770 exp.TimestampFromParts: rename_func("DATETIME"), 771 exp.TimeSub: date_add_interval_sql("TIME", "SUB"), 772 exp.TimestampAdd: date_add_interval_sql("TIMESTAMP", "ADD"), 773 exp.TimestampDiff: rename_func("TIMESTAMP_DIFF"), 774 exp.TimestampSub: date_add_interval_sql("TIMESTAMP", "SUB"), 775 exp.TimeStrToTime: timestrtotime_sql, 776 exp.Transaction: lambda *_: "BEGIN TRANSACTION", 777 exp.TsOrDsAdd: _ts_or_ds_add_sql, 778 exp.TsOrDsDiff: _ts_or_ds_diff_sql, 779 exp.TsOrDsToTime: rename_func("TIME"), 780 exp.TsOrDsToTimestamp: rename_func("DATETIME"), 781 exp.Unhex: rename_func("FROM_HEX"), 782 exp.UnixDate: rename_func("UNIX_DATE"), 783 exp.UnixToTime: _unix_to_time_sql, 784 exp.Uuid: lambda *_: "GENERATE_UUID()", 785 exp.Values: _derived_table_values_to_unnest, 786 exp.VariancePop: rename_func("VAR_POP"), 787 } 788 789 SUPPORTED_JSON_PATH_PARTS = { 790 exp.JSONPathKey, 791 exp.JSONPathRoot, 792 exp.JSONPathSubscript, 793 } 794 795 TYPE_MAPPING = { 796 **generator.Generator.TYPE_MAPPING, 797 exp.DataType.Type.BIGDECIMAL: "BIGNUMERIC", 798 exp.DataType.Type.BIGINT: "INT64", 799 exp.DataType.Type.BINARY: "BYTES", 800 exp.DataType.Type.BOOLEAN: "BOOL", 801 exp.DataType.Type.CHAR: "STRING", 802 exp.DataType.Type.DECIMAL: "NUMERIC", 803 exp.DataType.Type.DOUBLE: "FLOAT64", 804 exp.DataType.Type.FLOAT: "FLOAT64", 805 exp.DataType.Type.INT: "INT64", 806 exp.DataType.Type.NCHAR: "STRING", 807 exp.DataType.Type.NVARCHAR: "STRING", 808 exp.DataType.Type.SMALLINT: "INT64", 809 exp.DataType.Type.TEXT: "STRING", 810 exp.DataType.Type.TIMESTAMP: "DATETIME", 811 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 812 exp.DataType.Type.TIMESTAMPLTZ: "TIMESTAMP", 813 exp.DataType.Type.TINYINT: "INT64", 814 exp.DataType.Type.ROWVERSION: "BYTES", 815 exp.DataType.Type.UUID: "STRING", 816 exp.DataType.Type.VARBINARY: "BYTES", 817 exp.DataType.Type.VARCHAR: "STRING", 818 exp.DataType.Type.VARIANT: "ANY TYPE", 819 } 820 821 PROPERTIES_LOCATION = { 822 **generator.Generator.PROPERTIES_LOCATION, 823 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 824 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 825 } 826 827 # WINDOW comes after QUALIFY 828 # https://cloud.google.com/bigquery/docs/reference/standard-sql/query-syntax#window_clause 829 AFTER_HAVING_MODIFIER_TRANSFORMS = { 830 "qualify": generator.Generator.AFTER_HAVING_MODIFIER_TRANSFORMS["qualify"], 831 "windows": generator.Generator.AFTER_HAVING_MODIFIER_TRANSFORMS["windows"], 832 } 833 834 # from: https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#reserved_keywords 835 RESERVED_KEYWORDS = { 836 "all", 837 "and", 838 "any", 839 "array", 840 "as", 841 "asc", 842 "assert_rows_modified", 843 "at", 844 "between", 845 "by", 846 "case", 847 "cast", 848 "collate", 849 "contains", 850 "create", 851 "cross", 852 "cube", 853 "current", 854 "default", 855 "define", 856 "desc", 857 "distinct", 858 "else", 859 "end", 860 "enum", 861 "escape", 862 "except", 863 "exclude", 864 "exists", 865 "extract", 866 "false", 867 "fetch", 868 "following", 869 "for", 870 "from", 871 "full", 872 "group", 873 "grouping", 874 "groups", 875 "hash", 876 "having", 877 "if", 878 "ignore", 879 "in", 880 "inner", 881 "intersect", 882 "interval", 883 "into", 884 "is", 885 "join", 886 "lateral", 887 "left", 888 "like", 889 "limit", 890 "lookup", 891 "merge", 892 "natural", 893 "new", 894 "no", 895 "not", 896 "null", 897 "nulls", 898 "of", 899 "on", 900 "or", 901 "order", 902 "outer", 903 "over", 904 "partition", 905 "preceding", 906 "proto", 907 "qualify", 908 "range", 909 "recursive", 910 "respect", 911 "right", 912 "rollup", 913 "rows", 914 "select", 915 "set", 916 "some", 917 "struct", 918 "tablesample", 919 "then", 920 "to", 921 "treat", 922 "true", 923 "unbounded", 924 "union", 925 "unnest", 926 "using", 927 "when", 928 "where", 929 "window", 930 "with", 931 "within", 932 } 933 934 def mod_sql(self, expression: exp.Mod) -> str: 935 this = expression.this 936 expr = expression.expression 937 return self.func( 938 "MOD", 939 this.unnest() if isinstance(this, exp.Paren) else this, 940 expr.unnest() if isinstance(expr, exp.Paren) else expr, 941 ) 942 943 def column_parts(self, expression: exp.Column) -> str: 944 if expression.meta.get("quoted_column"): 945 # If a column reference is of the form `dataset.table`.name, we need 946 # to preserve the quoted table path, otherwise the reference breaks 947 table_parts = ".".join(p.name for p in expression.parts[:-1]) 948 table_path = self.sql(exp.Identifier(this=table_parts, quoted=True)) 949 return f"{table_path}.{self.sql(expression, 'this')}" 950 951 return super().column_parts(expression) 952 953 def table_parts(self, expression: exp.Table) -> str: 954 # Depending on the context, `x.y` may not resolve to the same data source as `x`.`y`, so 955 # we need to make sure the correct quoting is used in each case. 956 # 957 # For example, if there is a CTE x that clashes with a schema name, then the former will 958 # return the table y in that schema, whereas the latter will return the CTE's y column: 959 # 960 # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x.y` -> cross join 961 # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x`.`y` -> implicit unnest 962 if expression.meta.get("quoted_table"): 963 table_parts = ".".join(p.name for p in expression.parts) 964 return self.sql(exp.Identifier(this=table_parts, quoted=True)) 965 966 return super().table_parts(expression) 967 968 def timetostr_sql(self, expression: exp.TimeToStr) -> str: 969 if isinstance(expression.this, exp.TsOrDsToTimestamp): 970 func_name = "FORMAT_DATETIME" 971 else: 972 func_name = "FORMAT_DATE" 973 this = ( 974 expression.this 975 if isinstance(expression.this, (exp.TsOrDsToTimestamp, exp.TsOrDsToDate)) 976 else expression 977 ) 978 return self.func(func_name, self.format_time(expression), this.this) 979 980 def eq_sql(self, expression: exp.EQ) -> str: 981 # Operands of = cannot be NULL in BigQuery 982 if isinstance(expression.left, exp.Null) or isinstance(expression.right, exp.Null): 983 if not isinstance(expression.parent, exp.Update): 984 return "NULL" 985 986 return self.binary(expression, "=") 987 988 def attimezone_sql(self, expression: exp.AtTimeZone) -> str: 989 parent = expression.parent 990 991 # BigQuery allows CAST(.. AS {STRING|TIMESTAMP} [FORMAT <fmt> [AT TIME ZONE <tz>]]). 992 # Only the TIMESTAMP one should use the below conversion, when AT TIME ZONE is included. 993 if not isinstance(parent, exp.Cast) or not parent.to.is_type("text"): 994 return self.func( 995 "TIMESTAMP", self.func("DATETIME", expression.this, expression.args.get("zone")) 996 ) 997 998 return super().attimezone_sql(expression) 999 1000 def trycast_sql(self, expression: exp.TryCast) -> str: 1001 return self.cast_sql(expression, safe_prefix="SAFE_") 1002 1003 def bracket_sql(self, expression: exp.Bracket) -> str: 1004 this = expression.this 1005 expressions = expression.expressions 1006 1007 if len(expressions) == 1 and this and this.is_type(exp.DataType.Type.STRUCT): 1008 arg = expressions[0] 1009 if arg.type is None: 1010 from sqlglot.optimizer.annotate_types import annotate_types 1011 1012 arg = annotate_types(arg) 1013 1014 if arg.type and arg.type.this in exp.DataType.TEXT_TYPES: 1015 # BQ doesn't support bracket syntax with string values for structs 1016 return f"{self.sql(this)}.{arg.name}" 1017 1018 expressions_sql = self.expressions(expression, flat=True) 1019 offset = expression.args.get("offset") 1020 1021 if offset == 0: 1022 expressions_sql = f"OFFSET({expressions_sql})" 1023 elif offset == 1: 1024 expressions_sql = f"ORDINAL({expressions_sql})" 1025 elif offset is not None: 1026 self.unsupported(f"Unsupported array offset: {offset}") 1027 1028 if expression.args.get("safe"): 1029 expressions_sql = f"SAFE_{expressions_sql}" 1030 1031 return f"{self.sql(this)}[{expressions_sql}]" 1032 1033 def in_unnest_op(self, expression: exp.Unnest) -> str: 1034 return self.sql(expression) 1035 1036 def version_sql(self, expression: exp.Version) -> str: 1037 if expression.name == "TIMESTAMP": 1038 expression.set("this", "SYSTEM_TIME") 1039 return super().version_sql(expression)
273class BigQuery(Dialect): 274 WEEK_OFFSET = -1 275 UNNEST_COLUMN_ONLY = True 276 SUPPORTS_USER_DEFINED_TYPES = False 277 SUPPORTS_SEMI_ANTI_JOIN = False 278 LOG_BASE_FIRST = False 279 HEX_LOWERCASE = True 280 FORCE_EARLY_ALIAS_REF_EXPANSION = True 281 EXPAND_ALIAS_REFS_EARLY_ONLY_IN_GROUP_BY = True 282 283 # https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#case_sensitivity 284 NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_INSENSITIVE 285 286 # bigquery udfs are case sensitive 287 NORMALIZE_FUNCTIONS = False 288 289 # https://cloud.google.com/bigquery/docs/reference/standard-sql/format-elements#format_elements_date_time 290 TIME_MAPPING = { 291 "%D": "%m/%d/%y", 292 "%E6S": "%S.%f", 293 "%e": "%-d", 294 } 295 296 FORMAT_MAPPING = { 297 "DD": "%d", 298 "MM": "%m", 299 "MON": "%b", 300 "MONTH": "%B", 301 "YYYY": "%Y", 302 "YY": "%y", 303 "HH": "%I", 304 "HH12": "%I", 305 "HH24": "%H", 306 "MI": "%M", 307 "SS": "%S", 308 "SSSSS": "%f", 309 "TZH": "%z", 310 } 311 312 # The _PARTITIONTIME and _PARTITIONDATE pseudo-columns are not returned by a SELECT * statement 313 # https://cloud.google.com/bigquery/docs/querying-partitioned-tables#query_an_ingestion-time_partitioned_table 314 PSEUDOCOLUMNS = {"_PARTITIONTIME", "_PARTITIONDATE"} 315 316 # All set operations require either a DISTINCT or ALL specifier 317 SET_OP_DISTINCT_BY_DEFAULT = dict.fromkeys((exp.Except, exp.Intersect, exp.Union), None) 318 319 ANNOTATORS = { 320 **Dialect.ANNOTATORS, 321 **{ 322 expr_type: lambda self, e: _annotate_math_functions(self, e) 323 for expr_type in (exp.Floor, exp.Ceil, exp.Log, exp.Ln, exp.Sqrt, exp.Exp, exp.Round) 324 }, 325 **{ 326 expr_type: lambda self, e: self._annotate_by_args(e, "this") 327 for expr_type in ( 328 exp.Left, 329 exp.Right, 330 exp.Lower, 331 exp.Upper, 332 exp.Pad, 333 exp.Trim, 334 exp.RegexpExtract, 335 exp.RegexpReplace, 336 exp.Repeat, 337 exp.Substring, 338 ) 339 }, 340 exp.Concat: lambda self, e: self._annotate_by_args(e, "expressions"), 341 exp.Sign: lambda self, e: self._annotate_by_args(e, "this"), 342 exp.Split: lambda self, e: self._annotate_by_args(e, "this", array=True), 343 } 344 345 def normalize_identifier(self, expression: E) -> E: 346 if ( 347 isinstance(expression, exp.Identifier) 348 and self.normalization_strategy is not NormalizationStrategy.CASE_SENSITIVE 349 ): 350 parent = expression.parent 351 while isinstance(parent, exp.Dot): 352 parent = parent.parent 353 354 # In BigQuery, CTEs are case-insensitive, but UDF and table names are case-sensitive 355 # by default. The following check uses a heuristic to detect tables based on whether 356 # they are qualified. This should generally be correct, because tables in BigQuery 357 # must be qualified with at least a dataset, unless @@dataset_id is set. 358 case_sensitive = ( 359 isinstance(parent, exp.UserDefinedFunction) 360 or ( 361 isinstance(parent, exp.Table) 362 and parent.db 363 and (parent.meta.get("quoted_table") or not parent.meta.get("maybe_column")) 364 ) 365 or expression.meta.get("is_table") 366 ) 367 if not case_sensitive: 368 expression.set("this", expression.this.lower()) 369 370 return expression 371 372 class Tokenizer(tokens.Tokenizer): 373 QUOTES = ["'", '"', '"""', "'''"] 374 COMMENTS = ["--", "#", ("/*", "*/")] 375 IDENTIFIERS = ["`"] 376 STRING_ESCAPES = ["\\"] 377 378 HEX_STRINGS = [("0x", ""), ("0X", "")] 379 380 BYTE_STRINGS = [ 381 (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("b", "B") 382 ] 383 384 RAW_STRINGS = [ 385 (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("r", "R") 386 ] 387 388 KEYWORDS = { 389 **tokens.Tokenizer.KEYWORDS, 390 "ANY TYPE": TokenType.VARIANT, 391 "BEGIN": TokenType.COMMAND, 392 "BEGIN TRANSACTION": TokenType.BEGIN, 393 "BYTEINT": TokenType.INT, 394 "BYTES": TokenType.BINARY, 395 "CURRENT_DATETIME": TokenType.CURRENT_DATETIME, 396 "DATETIME": TokenType.TIMESTAMP, 397 "DECLARE": TokenType.COMMAND, 398 "ELSEIF": TokenType.COMMAND, 399 "EXCEPTION": TokenType.COMMAND, 400 "FLOAT64": TokenType.DOUBLE, 401 "FOR SYSTEM_TIME": TokenType.TIMESTAMP_SNAPSHOT, 402 "MODEL": TokenType.MODEL, 403 "NOT DETERMINISTIC": TokenType.VOLATILE, 404 "RECORD": TokenType.STRUCT, 405 "TIMESTAMP": TokenType.TIMESTAMPTZ, 406 } 407 KEYWORDS.pop("DIV") 408 KEYWORDS.pop("VALUES") 409 KEYWORDS.pop("/*+") 410 411 class Parser(parser.Parser): 412 PREFIXED_PIVOT_COLUMNS = True 413 LOG_DEFAULTS_TO_LN = True 414 SUPPORTS_IMPLICIT_UNNEST = True 415 416 FUNCTIONS = { 417 **parser.Parser.FUNCTIONS, 418 "DATE": _build_date, 419 "DATE_ADD": build_date_delta_with_interval(exp.DateAdd), 420 "DATE_SUB": build_date_delta_with_interval(exp.DateSub), 421 "DATE_TRUNC": lambda args: exp.DateTrunc( 422 unit=exp.Literal.string(str(seq_get(args, 1))), 423 this=seq_get(args, 0), 424 ), 425 "DATETIME": _build_datetime, 426 "DATETIME_ADD": build_date_delta_with_interval(exp.DatetimeAdd), 427 "DATETIME_SUB": build_date_delta_with_interval(exp.DatetimeSub), 428 "DIV": binary_from_function(exp.IntDiv), 429 "FORMAT_DATE": lambda args: exp.TimeToStr( 430 this=exp.TsOrDsToDate(this=seq_get(args, 1)), format=seq_get(args, 0) 431 ), 432 "GENERATE_ARRAY": exp.GenerateSeries.from_arg_list, 433 "JSON_EXTRACT_SCALAR": lambda args: exp.JSONExtractScalar( 434 this=seq_get(args, 0), expression=seq_get(args, 1) or exp.Literal.string("$") 435 ), 436 "LENGTH": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 437 "MD5": exp.MD5Digest.from_arg_list, 438 "TO_HEX": _build_to_hex, 439 "PARSE_DATE": lambda args: build_formatted_time(exp.StrToDate, "bigquery")( 440 [seq_get(args, 1), seq_get(args, 0)] 441 ), 442 "PARSE_TIMESTAMP": _build_parse_timestamp, 443 "REGEXP_CONTAINS": exp.RegexpLike.from_arg_list, 444 "REGEXP_EXTRACT": _build_regexp_extract, 445 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 446 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 447 "SPLIT": lambda args: exp.Split( 448 # https://cloud.google.com/bigquery/docs/reference/standard-sql/string_functions#split 449 this=seq_get(args, 0), 450 expression=seq_get(args, 1) or exp.Literal.string(","), 451 ), 452 "TIME": _build_time, 453 "TIME_ADD": build_date_delta_with_interval(exp.TimeAdd), 454 "TIME_SUB": build_date_delta_with_interval(exp.TimeSub), 455 "TIMESTAMP": _build_timestamp, 456 "TIMESTAMP_ADD": build_date_delta_with_interval(exp.TimestampAdd), 457 "TIMESTAMP_SUB": build_date_delta_with_interval(exp.TimestampSub), 458 "TIMESTAMP_MICROS": lambda args: exp.UnixToTime( 459 this=seq_get(args, 0), scale=exp.UnixToTime.MICROS 460 ), 461 "TIMESTAMP_MILLIS": lambda args: exp.UnixToTime( 462 this=seq_get(args, 0), scale=exp.UnixToTime.MILLIS 463 ), 464 "TIMESTAMP_SECONDS": lambda args: exp.UnixToTime(this=seq_get(args, 0)), 465 "TO_JSON_STRING": exp.JSONFormat.from_arg_list, 466 "FORMAT_DATETIME": lambda args: exp.TimeToStr( 467 this=exp.TsOrDsToTimestamp(this=seq_get(args, 1)), format=seq_get(args, 0) 468 ), 469 } 470 471 FUNCTION_PARSERS = { 472 **parser.Parser.FUNCTION_PARSERS, 473 "ARRAY": lambda self: self.expression(exp.Array, expressions=[self._parse_statement()]), 474 } 475 FUNCTION_PARSERS.pop("TRIM") 476 477 NO_PAREN_FUNCTIONS = { 478 **parser.Parser.NO_PAREN_FUNCTIONS, 479 TokenType.CURRENT_DATETIME: exp.CurrentDatetime, 480 } 481 482 NESTED_TYPE_TOKENS = { 483 *parser.Parser.NESTED_TYPE_TOKENS, 484 TokenType.TABLE, 485 } 486 487 PROPERTY_PARSERS = { 488 **parser.Parser.PROPERTY_PARSERS, 489 "NOT DETERMINISTIC": lambda self: self.expression( 490 exp.StabilityProperty, this=exp.Literal.string("VOLATILE") 491 ), 492 "OPTIONS": lambda self: self._parse_with_property(), 493 } 494 495 CONSTRAINT_PARSERS = { 496 **parser.Parser.CONSTRAINT_PARSERS, 497 "OPTIONS": lambda self: exp.Properties(expressions=self._parse_with_property()), 498 } 499 500 RANGE_PARSERS = parser.Parser.RANGE_PARSERS.copy() 501 RANGE_PARSERS.pop(TokenType.OVERLAPS) 502 503 NULL_TOKENS = {TokenType.NULL, TokenType.UNKNOWN} 504 505 STATEMENT_PARSERS = { 506 **parser.Parser.STATEMENT_PARSERS, 507 TokenType.ELSE: lambda self: self._parse_as_command(self._prev), 508 TokenType.END: lambda self: self._parse_as_command(self._prev), 509 TokenType.FOR: lambda self: self._parse_for_in(), 510 } 511 512 BRACKET_OFFSETS = { 513 "OFFSET": (0, False), 514 "ORDINAL": (1, False), 515 "SAFE_OFFSET": (0, True), 516 "SAFE_ORDINAL": (1, True), 517 } 518 519 def _parse_for_in(self) -> exp.ForIn: 520 this = self._parse_range() 521 self._match_text_seq("DO") 522 return self.expression(exp.ForIn, this=this, expression=self._parse_statement()) 523 524 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 525 this = super()._parse_table_part(schema=schema) or self._parse_number() 526 527 # https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#table_names 528 if isinstance(this, exp.Identifier): 529 table_name = this.name 530 while self._match(TokenType.DASH, advance=False) and self._next: 531 text = "" 532 while self._is_connected() and self._curr.token_type != TokenType.DOT: 533 self._advance() 534 text += self._prev.text 535 table_name += text 536 537 this = exp.Identifier(this=table_name, quoted=this.args.get("quoted")) 538 elif isinstance(this, exp.Literal): 539 table_name = this.name 540 541 if self._is_connected() and self._parse_var(any_token=True): 542 table_name += self._prev.text 543 544 this = exp.Identifier(this=table_name, quoted=True) 545 546 return this 547 548 def _parse_table_parts( 549 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 550 ) -> exp.Table: 551 table = super()._parse_table_parts( 552 schema=schema, is_db_reference=is_db_reference, wildcard=True 553 ) 554 555 # proj-1.db.tbl -- `1.` is tokenized as a float so we need to unravel it here 556 if not table.catalog: 557 if table.db: 558 parts = table.db.split(".") 559 if len(parts) == 2 and not table.args["db"].quoted: 560 table.set("catalog", exp.Identifier(this=parts[0])) 561 table.set("db", exp.Identifier(this=parts[1])) 562 else: 563 parts = table.name.split(".") 564 if len(parts) == 2 and not table.this.quoted: 565 table.set("db", exp.Identifier(this=parts[0])) 566 table.set("this", exp.Identifier(this=parts[1])) 567 568 if isinstance(table.this, exp.Identifier) and any("." in p.name for p in table.parts): 569 catalog, db, this, *rest = ( 570 exp.to_identifier(p, quoted=True) 571 for p in split_num_words(".".join(p.name for p in table.parts), ".", 3) 572 ) 573 574 if rest and this: 575 this = exp.Dot.build([this, *rest]) # type: ignore 576 577 table = exp.Table( 578 this=this, db=db, catalog=catalog, pivots=table.args.get("pivots") 579 ) 580 table.meta["quoted_table"] = True 581 582 return table 583 584 def _parse_column(self) -> t.Optional[exp.Expression]: 585 column = super()._parse_column() 586 if isinstance(column, exp.Column): 587 parts = column.parts 588 if any("." in p.name for p in parts): 589 catalog, db, table, this, *rest = ( 590 exp.to_identifier(p, quoted=True) 591 for p in split_num_words(".".join(p.name for p in parts), ".", 4) 592 ) 593 594 if rest and this: 595 this = exp.Dot.build([this, *rest]) # type: ignore 596 597 column = exp.Column(this=this, table=table, db=db, catalog=catalog) 598 column.meta["quoted_column"] = True 599 600 return column 601 602 @t.overload 603 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 604 605 @t.overload 606 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 607 608 def _parse_json_object(self, agg=False): 609 json_object = super()._parse_json_object() 610 array_kv_pair = seq_get(json_object.expressions, 0) 611 612 # Converts BQ's "signature 2" of JSON_OBJECT into SQLGlot's canonical representation 613 # https://cloud.google.com/bigquery/docs/reference/standard-sql/json_functions#json_object_signature2 614 if ( 615 array_kv_pair 616 and isinstance(array_kv_pair.this, exp.Array) 617 and isinstance(array_kv_pair.expression, exp.Array) 618 ): 619 keys = array_kv_pair.this.expressions 620 values = array_kv_pair.expression.expressions 621 622 json_object.set( 623 "expressions", 624 [exp.JSONKeyValue(this=k, expression=v) for k, v in zip(keys, values)], 625 ) 626 627 return json_object 628 629 def _parse_bracket( 630 self, this: t.Optional[exp.Expression] = None 631 ) -> t.Optional[exp.Expression]: 632 bracket = super()._parse_bracket(this) 633 634 if this is bracket: 635 return bracket 636 637 if isinstance(bracket, exp.Bracket): 638 for expression in bracket.expressions: 639 name = expression.name.upper() 640 641 if name not in self.BRACKET_OFFSETS: 642 break 643 644 offset, safe = self.BRACKET_OFFSETS[name] 645 bracket.set("offset", offset) 646 bracket.set("safe", safe) 647 expression.replace(expression.expressions[0]) 648 649 return bracket 650 651 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 652 unnest = super()._parse_unnest(with_alias=with_alias) 653 654 if not unnest: 655 return None 656 657 unnest_expr = seq_get(unnest.expressions, 0) 658 if unnest_expr: 659 from sqlglot.optimizer.annotate_types import annotate_types 660 661 unnest_expr = annotate_types(unnest_expr) 662 663 # Unnesting a nested array (i.e array of structs) explodes the top-level struct fields, 664 # in contrast to other dialects such as DuckDB which flattens only the array by default 665 if unnest_expr.is_type(exp.DataType.Type.ARRAY) and any( 666 array_elem.is_type(exp.DataType.Type.STRUCT) 667 for array_elem in unnest_expr._type.expressions 668 ): 669 unnest.set("explode_array", True) 670 671 return unnest 672 673 class Generator(generator.Generator): 674 INTERVAL_ALLOWS_PLURAL_FORM = False 675 JOIN_HINTS = False 676 QUERY_HINTS = False 677 TABLE_HINTS = False 678 LIMIT_FETCH = "LIMIT" 679 RENAME_TABLE_WITH_DB = False 680 NVL2_SUPPORTED = False 681 UNNEST_WITH_ORDINALITY = False 682 COLLATE_IS_FUNC = True 683 LIMIT_ONLY_LITERALS = True 684 SUPPORTS_TABLE_ALIAS_COLUMNS = False 685 UNPIVOT_ALIASES_ARE_IDENTIFIERS = False 686 JSON_KEY_VALUE_PAIR_SEP = "," 687 NULL_ORDERING_SUPPORTED = False 688 IGNORE_NULLS_IN_FUNC = True 689 JSON_PATH_SINGLE_QUOTE_ESCAPE = True 690 CAN_IMPLEMENT_ARRAY_ANY = True 691 SUPPORTS_TO_NUMBER = False 692 NAMED_PLACEHOLDER_TOKEN = "@" 693 HEX_FUNC = "TO_HEX" 694 WITH_PROPERTIES_PREFIX = "OPTIONS" 695 SUPPORTS_EXPLODING_PROJECTIONS = False 696 EXCEPT_INTERSECT_SUPPORT_ALL_CLAUSE = False 697 698 TRANSFORMS = { 699 **generator.Generator.TRANSFORMS, 700 exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"), 701 exp.ArgMax: arg_max_or_min_no_count("MAX_BY"), 702 exp.ArgMin: arg_max_or_min_no_count("MIN_BY"), 703 exp.Array: inline_array_unless_query, 704 exp.ArrayContains: _array_contains_sql, 705 exp.ArrayFilter: filter_array_using_unnest, 706 exp.ArraySize: rename_func("ARRAY_LENGTH"), 707 exp.Cast: transforms.preprocess([transforms.remove_precision_parameterized_types]), 708 exp.CollateProperty: lambda self, e: ( 709 f"DEFAULT COLLATE {self.sql(e, 'this')}" 710 if e.args.get("default") 711 else f"COLLATE {self.sql(e, 'this')}" 712 ), 713 exp.Commit: lambda *_: "COMMIT TRANSACTION", 714 exp.CountIf: rename_func("COUNTIF"), 715 exp.Create: _create_sql, 716 exp.CTE: transforms.preprocess([_pushdown_cte_column_names]), 717 exp.DateAdd: date_add_interval_sql("DATE", "ADD"), 718 exp.DateDiff: lambda self, e: self.func( 719 "DATE_DIFF", e.this, e.expression, unit_to_var(e) 720 ), 721 exp.DateFromParts: rename_func("DATE"), 722 exp.DateStrToDate: datestrtodate_sql, 723 exp.DateSub: date_add_interval_sql("DATE", "SUB"), 724 exp.DatetimeAdd: date_add_interval_sql("DATETIME", "ADD"), 725 exp.DatetimeSub: date_add_interval_sql("DATETIME", "SUB"), 726 exp.DateTrunc: lambda self, e: self.func("DATE_TRUNC", e.this, e.text("unit")), 727 exp.FromTimeZone: lambda self, e: self.func( 728 "DATETIME", self.func("TIMESTAMP", e.this, e.args.get("zone")), "'UTC'" 729 ), 730 exp.GenerateSeries: rename_func("GENERATE_ARRAY"), 731 exp.GroupConcat: rename_func("STRING_AGG"), 732 exp.Hex: lambda self, e: self.func("UPPER", self.func("TO_HEX", self.sql(e, "this"))), 733 exp.If: if_sql(false_value="NULL"), 734 exp.ILike: no_ilike_sql, 735 exp.IntDiv: rename_func("DIV"), 736 exp.JSONFormat: rename_func("TO_JSON_STRING"), 737 exp.Max: max_or_greatest, 738 exp.MD5: lambda self, e: self.func("TO_HEX", self.func("MD5", e.this)), 739 exp.MD5Digest: rename_func("MD5"), 740 exp.Min: min_or_least, 741 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 742 exp.RegexpExtract: lambda self, e: self.func( 743 "REGEXP_EXTRACT", 744 e.this, 745 e.expression, 746 e.args.get("position"), 747 e.args.get("occurrence"), 748 ), 749 exp.RegexpReplace: regexp_replace_sql, 750 exp.RegexpLike: rename_func("REGEXP_CONTAINS"), 751 exp.ReturnsProperty: _returnsproperty_sql, 752 exp.Rollback: lambda *_: "ROLLBACK TRANSACTION", 753 exp.Select: transforms.preprocess( 754 [ 755 transforms.explode_to_unnest(), 756 transforms.unqualify_unnest, 757 transforms.eliminate_distinct_on, 758 _alias_ordered_group, 759 transforms.eliminate_semi_and_anti_joins, 760 ] 761 ), 762 exp.SHA: rename_func("SHA1"), 763 exp.SHA2: sha256_sql, 764 exp.StabilityProperty: lambda self, e: ( 765 "DETERMINISTIC" if e.name == "IMMUTABLE" else "NOT DETERMINISTIC" 766 ), 767 exp.StrToDate: _str_to_datetime_sql, 768 exp.StrToTime: _str_to_datetime_sql, 769 exp.TimeAdd: date_add_interval_sql("TIME", "ADD"), 770 exp.TimeFromParts: rename_func("TIME"), 771 exp.TimestampFromParts: rename_func("DATETIME"), 772 exp.TimeSub: date_add_interval_sql("TIME", "SUB"), 773 exp.TimestampAdd: date_add_interval_sql("TIMESTAMP", "ADD"), 774 exp.TimestampDiff: rename_func("TIMESTAMP_DIFF"), 775 exp.TimestampSub: date_add_interval_sql("TIMESTAMP", "SUB"), 776 exp.TimeStrToTime: timestrtotime_sql, 777 exp.Transaction: lambda *_: "BEGIN TRANSACTION", 778 exp.TsOrDsAdd: _ts_or_ds_add_sql, 779 exp.TsOrDsDiff: _ts_or_ds_diff_sql, 780 exp.TsOrDsToTime: rename_func("TIME"), 781 exp.TsOrDsToTimestamp: rename_func("DATETIME"), 782 exp.Unhex: rename_func("FROM_HEX"), 783 exp.UnixDate: rename_func("UNIX_DATE"), 784 exp.UnixToTime: _unix_to_time_sql, 785 exp.Uuid: lambda *_: "GENERATE_UUID()", 786 exp.Values: _derived_table_values_to_unnest, 787 exp.VariancePop: rename_func("VAR_POP"), 788 } 789 790 SUPPORTED_JSON_PATH_PARTS = { 791 exp.JSONPathKey, 792 exp.JSONPathRoot, 793 exp.JSONPathSubscript, 794 } 795 796 TYPE_MAPPING = { 797 **generator.Generator.TYPE_MAPPING, 798 exp.DataType.Type.BIGDECIMAL: "BIGNUMERIC", 799 exp.DataType.Type.BIGINT: "INT64", 800 exp.DataType.Type.BINARY: "BYTES", 801 exp.DataType.Type.BOOLEAN: "BOOL", 802 exp.DataType.Type.CHAR: "STRING", 803 exp.DataType.Type.DECIMAL: "NUMERIC", 804 exp.DataType.Type.DOUBLE: "FLOAT64", 805 exp.DataType.Type.FLOAT: "FLOAT64", 806 exp.DataType.Type.INT: "INT64", 807 exp.DataType.Type.NCHAR: "STRING", 808 exp.DataType.Type.NVARCHAR: "STRING", 809 exp.DataType.Type.SMALLINT: "INT64", 810 exp.DataType.Type.TEXT: "STRING", 811 exp.DataType.Type.TIMESTAMP: "DATETIME", 812 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 813 exp.DataType.Type.TIMESTAMPLTZ: "TIMESTAMP", 814 exp.DataType.Type.TINYINT: "INT64", 815 exp.DataType.Type.ROWVERSION: "BYTES", 816 exp.DataType.Type.UUID: "STRING", 817 exp.DataType.Type.VARBINARY: "BYTES", 818 exp.DataType.Type.VARCHAR: "STRING", 819 exp.DataType.Type.VARIANT: "ANY TYPE", 820 } 821 822 PROPERTIES_LOCATION = { 823 **generator.Generator.PROPERTIES_LOCATION, 824 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 825 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 826 } 827 828 # WINDOW comes after QUALIFY 829 # https://cloud.google.com/bigquery/docs/reference/standard-sql/query-syntax#window_clause 830 AFTER_HAVING_MODIFIER_TRANSFORMS = { 831 "qualify": generator.Generator.AFTER_HAVING_MODIFIER_TRANSFORMS["qualify"], 832 "windows": generator.Generator.AFTER_HAVING_MODIFIER_TRANSFORMS["windows"], 833 } 834 835 # from: https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#reserved_keywords 836 RESERVED_KEYWORDS = { 837 "all", 838 "and", 839 "any", 840 "array", 841 "as", 842 "asc", 843 "assert_rows_modified", 844 "at", 845 "between", 846 "by", 847 "case", 848 "cast", 849 "collate", 850 "contains", 851 "create", 852 "cross", 853 "cube", 854 "current", 855 "default", 856 "define", 857 "desc", 858 "distinct", 859 "else", 860 "end", 861 "enum", 862 "escape", 863 "except", 864 "exclude", 865 "exists", 866 "extract", 867 "false", 868 "fetch", 869 "following", 870 "for", 871 "from", 872 "full", 873 "group", 874 "grouping", 875 "groups", 876 "hash", 877 "having", 878 "if", 879 "ignore", 880 "in", 881 "inner", 882 "intersect", 883 "interval", 884 "into", 885 "is", 886 "join", 887 "lateral", 888 "left", 889 "like", 890 "limit", 891 "lookup", 892 "merge", 893 "natural", 894 "new", 895 "no", 896 "not", 897 "null", 898 "nulls", 899 "of", 900 "on", 901 "or", 902 "order", 903 "outer", 904 "over", 905 "partition", 906 "preceding", 907 "proto", 908 "qualify", 909 "range", 910 "recursive", 911 "respect", 912 "right", 913 "rollup", 914 "rows", 915 "select", 916 "set", 917 "some", 918 "struct", 919 "tablesample", 920 "then", 921 "to", 922 "treat", 923 "true", 924 "unbounded", 925 "union", 926 "unnest", 927 "using", 928 "when", 929 "where", 930 "window", 931 "with", 932 "within", 933 } 934 935 def mod_sql(self, expression: exp.Mod) -> str: 936 this = expression.this 937 expr = expression.expression 938 return self.func( 939 "MOD", 940 this.unnest() if isinstance(this, exp.Paren) else this, 941 expr.unnest() if isinstance(expr, exp.Paren) else expr, 942 ) 943 944 def column_parts(self, expression: exp.Column) -> str: 945 if expression.meta.get("quoted_column"): 946 # If a column reference is of the form `dataset.table`.name, we need 947 # to preserve the quoted table path, otherwise the reference breaks 948 table_parts = ".".join(p.name for p in expression.parts[:-1]) 949 table_path = self.sql(exp.Identifier(this=table_parts, quoted=True)) 950 return f"{table_path}.{self.sql(expression, 'this')}" 951 952 return super().column_parts(expression) 953 954 def table_parts(self, expression: exp.Table) -> str: 955 # Depending on the context, `x.y` may not resolve to the same data source as `x`.`y`, so 956 # we need to make sure the correct quoting is used in each case. 957 # 958 # For example, if there is a CTE x that clashes with a schema name, then the former will 959 # return the table y in that schema, whereas the latter will return the CTE's y column: 960 # 961 # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x.y` -> cross join 962 # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x`.`y` -> implicit unnest 963 if expression.meta.get("quoted_table"): 964 table_parts = ".".join(p.name for p in expression.parts) 965 return self.sql(exp.Identifier(this=table_parts, quoted=True)) 966 967 return super().table_parts(expression) 968 969 def timetostr_sql(self, expression: exp.TimeToStr) -> str: 970 if isinstance(expression.this, exp.TsOrDsToTimestamp): 971 func_name = "FORMAT_DATETIME" 972 else: 973 func_name = "FORMAT_DATE" 974 this = ( 975 expression.this 976 if isinstance(expression.this, (exp.TsOrDsToTimestamp, exp.TsOrDsToDate)) 977 else expression 978 ) 979 return self.func(func_name, self.format_time(expression), this.this) 980 981 def eq_sql(self, expression: exp.EQ) -> str: 982 # Operands of = cannot be NULL in BigQuery 983 if isinstance(expression.left, exp.Null) or isinstance(expression.right, exp.Null): 984 if not isinstance(expression.parent, exp.Update): 985 return "NULL" 986 987 return self.binary(expression, "=") 988 989 def attimezone_sql(self, expression: exp.AtTimeZone) -> str: 990 parent = expression.parent 991 992 # BigQuery allows CAST(.. AS {STRING|TIMESTAMP} [FORMAT <fmt> [AT TIME ZONE <tz>]]). 993 # Only the TIMESTAMP one should use the below conversion, when AT TIME ZONE is included. 994 if not isinstance(parent, exp.Cast) or not parent.to.is_type("text"): 995 return self.func( 996 "TIMESTAMP", self.func("DATETIME", expression.this, expression.args.get("zone")) 997 ) 998 999 return super().attimezone_sql(expression) 1000 1001 def trycast_sql(self, expression: exp.TryCast) -> str: 1002 return self.cast_sql(expression, safe_prefix="SAFE_") 1003 1004 def bracket_sql(self, expression: exp.Bracket) -> str: 1005 this = expression.this 1006 expressions = expression.expressions 1007 1008 if len(expressions) == 1 and this and this.is_type(exp.DataType.Type.STRUCT): 1009 arg = expressions[0] 1010 if arg.type is None: 1011 from sqlglot.optimizer.annotate_types import annotate_types 1012 1013 arg = annotate_types(arg) 1014 1015 if arg.type and arg.type.this in exp.DataType.TEXT_TYPES: 1016 # BQ doesn't support bracket syntax with string values for structs 1017 return f"{self.sql(this)}.{arg.name}" 1018 1019 expressions_sql = self.expressions(expression, flat=True) 1020 offset = expression.args.get("offset") 1021 1022 if offset == 0: 1023 expressions_sql = f"OFFSET({expressions_sql})" 1024 elif offset == 1: 1025 expressions_sql = f"ORDINAL({expressions_sql})" 1026 elif offset is not None: 1027 self.unsupported(f"Unsupported array offset: {offset}") 1028 1029 if expression.args.get("safe"): 1030 expressions_sql = f"SAFE_{expressions_sql}" 1031 1032 return f"{self.sql(this)}[{expressions_sql}]" 1033 1034 def in_unnest_op(self, expression: exp.Unnest) -> str: 1035 return self.sql(expression) 1036 1037 def version_sql(self, expression: exp.Version) -> str: 1038 if expression.name == "TIMESTAMP": 1039 expression.set("this", "SYSTEM_TIME") 1040 return super().version_sql(expression)
First day of the week in DATE_TRUNC(week). Defaults to 0 (Monday). -1 would be Sunday.
Whether the base comes first in the LOG
function.
Possible values: True
, False
, None
(two arguments are not supported by LOG
)
Whether alias reference expansion (_expand_alias_refs()) should run before column qualification (_qualify_columns()).
For example:
WITH data AS ( SELECT 1 AS id, 2 AS my_id ) SELECT id AS my_id FROM data WHERE my_id = 1 GROUP BY my_id, HAVING my_id = 1
In most dialects, "my_id" would refer to "data.my_id" across the query, except: - BigQuery, which will forward the alias to GROUP BY + HAVING clauses i.e it resolves to "WHERE my_id = 1 GROUP BY id HAVING id = 1" - Clickhouse, which will forward the alias across the query i.e it resolves to "WHERE id = 1 GROUP BY id HAVING id = 1"
Whether alias reference expansion before qualification should only happen for the GROUP BY clause.
Specifies the strategy according to which identifiers should be normalized.
Determines how function names are going to be normalized.
Possible values:
"upper" or True: Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
Associates this dialect's time formats with their equivalent Python strftime
formats.
Helper which is used for parsing the special syntax CAST(x AS DATE FORMAT 'yyyy')
.
If empty, the corresponding trie will be constructed off of TIME_MAPPING
.
Columns that are auto-generated by the engine corresponding to this dialect.
For example, such columns may be excluded from SELECT *
queries.
Whether a set operation uses DISTINCT by default. This is None
when either DISTINCT
or ALL
must be explicitly specified.
345 def normalize_identifier(self, expression: E) -> E: 346 if ( 347 isinstance(expression, exp.Identifier) 348 and self.normalization_strategy is not NormalizationStrategy.CASE_SENSITIVE 349 ): 350 parent = expression.parent 351 while isinstance(parent, exp.Dot): 352 parent = parent.parent 353 354 # In BigQuery, CTEs are case-insensitive, but UDF and table names are case-sensitive 355 # by default. The following check uses a heuristic to detect tables based on whether 356 # they are qualified. This should generally be correct, because tables in BigQuery 357 # must be qualified with at least a dataset, unless @@dataset_id is set. 358 case_sensitive = ( 359 isinstance(parent, exp.UserDefinedFunction) 360 or ( 361 isinstance(parent, exp.Table) 362 and parent.db 363 and (parent.meta.get("quoted_table") or not parent.meta.get("maybe_column")) 364 ) 365 or expression.meta.get("is_table") 366 ) 367 if not case_sensitive: 368 expression.set("this", expression.this.lower()) 369 370 return expression
Transforms an identifier in a way that resembles how it'd be resolved by this dialect.
For example, an identifier like FoO
would be resolved as foo
in Postgres, because it
lowercases all unquoted identifiers. On the other hand, Snowflake uppercases them, so
it would resolve it as FOO
. If it was quoted, it'd need to be treated as case-sensitive,
and so any normalization would be prohibited in order to avoid "breaking" the identifier.
There are also dialects like Spark, which are case-insensitive even when quotes are present, and dialects like MySQL, whose resolution rules match those employed by the underlying operating system, for example they may always be case-sensitive in Linux.
Finally, the normalization behavior of some engines can even be controlled through flags, like in Redshift's case, where users can explicitly set enable_case_sensitive_identifier.
SQLGlot aims to understand and handle all of these different behaviors gracefully, so that it can analyze queries in the optimizer and successfully capture their semantics.
Mapping of an escaped sequence (\n
) to its unescaped version (
).
Inherited Members
- sqlglot.dialects.dialect.Dialect
- Dialect
- INDEX_OFFSET
- ALIAS_POST_TABLESAMPLE
- TABLESAMPLE_SIZE_IS_PERCENT
- IDENTIFIERS_CAN_START_WITH_DIGIT
- DPIPE_IS_STRING_CONCAT
- STRICT_STRING_CONCAT
- COPY_PARAMS_ARE_CSV
- NULL_ORDERING
- TYPED_DIVISION
- SAFE_DIVISION
- CONCAT_COALESCE
- DATE_FORMAT
- DATEINT_FORMAT
- TIME_FORMAT
- PREFER_CTE_ALIAS_COLUMN
- SUPPORTS_ORDER_BY_ALL
- HAS_DISTINCT_ARRAY_CONSTRUCTORS
- SUPPORTS_FIXED_SIZE_ARRAYS
- STRICT_JSON_PATH_SYNTAX
- ON_CONDITION_EMPTY_BEFORE_ERROR
- ARRAY_AGG_INCLUDES_NULLS
- REGEXP_EXTRACT_DEFAULT_GROUP
- CREATABLE_KIND_MAPPING
- DATE_PART_MAPPING
- TYPE_TO_EXPRESSIONS
- get_or_raise
- format_time
- settings
- case_sensitive
- can_identify
- quote_identifier
- to_json_path
- parse
- parse_into
- generate
- transpile
- tokenize
- tokenizer
- jsonpath_tokenizer
- parser
- generator
372 class Tokenizer(tokens.Tokenizer): 373 QUOTES = ["'", '"', '"""', "'''"] 374 COMMENTS = ["--", "#", ("/*", "*/")] 375 IDENTIFIERS = ["`"] 376 STRING_ESCAPES = ["\\"] 377 378 HEX_STRINGS = [("0x", ""), ("0X", "")] 379 380 BYTE_STRINGS = [ 381 (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("b", "B") 382 ] 383 384 RAW_STRINGS = [ 385 (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("r", "R") 386 ] 387 388 KEYWORDS = { 389 **tokens.Tokenizer.KEYWORDS, 390 "ANY TYPE": TokenType.VARIANT, 391 "BEGIN": TokenType.COMMAND, 392 "BEGIN TRANSACTION": TokenType.BEGIN, 393 "BYTEINT": TokenType.INT, 394 "BYTES": TokenType.BINARY, 395 "CURRENT_DATETIME": TokenType.CURRENT_DATETIME, 396 "DATETIME": TokenType.TIMESTAMP, 397 "DECLARE": TokenType.COMMAND, 398 "ELSEIF": TokenType.COMMAND, 399 "EXCEPTION": TokenType.COMMAND, 400 "FLOAT64": TokenType.DOUBLE, 401 "FOR SYSTEM_TIME": TokenType.TIMESTAMP_SNAPSHOT, 402 "MODEL": TokenType.MODEL, 403 "NOT DETERMINISTIC": TokenType.VOLATILE, 404 "RECORD": TokenType.STRUCT, 405 "TIMESTAMP": TokenType.TIMESTAMPTZ, 406 } 407 KEYWORDS.pop("DIV") 408 KEYWORDS.pop("VALUES") 409 KEYWORDS.pop("/*+")
Inherited Members
- sqlglot.tokens.Tokenizer
- Tokenizer
- SINGLE_TOKENS
- BIT_STRINGS
- HEREDOC_STRINGS
- UNICODE_STRINGS
- IDENTIFIER_ESCAPES
- VAR_SINGLE_TOKENS
- HEREDOC_TAG_IS_IDENTIFIER
- HEREDOC_STRING_ALTERNATIVE
- STRING_ESCAPES_ALLOWED_IN_RAW_STRINGS
- NESTED_COMMENTS
- WHITE_SPACE
- COMMANDS
- COMMAND_PREFIX_TOKENS
- NUMERIC_LITERALS
- dialect
- reset
- tokenize
- tokenize_rs
- size
- sql
- tokens
411 class Parser(parser.Parser): 412 PREFIXED_PIVOT_COLUMNS = True 413 LOG_DEFAULTS_TO_LN = True 414 SUPPORTS_IMPLICIT_UNNEST = True 415 416 FUNCTIONS = { 417 **parser.Parser.FUNCTIONS, 418 "DATE": _build_date, 419 "DATE_ADD": build_date_delta_with_interval(exp.DateAdd), 420 "DATE_SUB": build_date_delta_with_interval(exp.DateSub), 421 "DATE_TRUNC": lambda args: exp.DateTrunc( 422 unit=exp.Literal.string(str(seq_get(args, 1))), 423 this=seq_get(args, 0), 424 ), 425 "DATETIME": _build_datetime, 426 "DATETIME_ADD": build_date_delta_with_interval(exp.DatetimeAdd), 427 "DATETIME_SUB": build_date_delta_with_interval(exp.DatetimeSub), 428 "DIV": binary_from_function(exp.IntDiv), 429 "FORMAT_DATE": lambda args: exp.TimeToStr( 430 this=exp.TsOrDsToDate(this=seq_get(args, 1)), format=seq_get(args, 0) 431 ), 432 "GENERATE_ARRAY": exp.GenerateSeries.from_arg_list, 433 "JSON_EXTRACT_SCALAR": lambda args: exp.JSONExtractScalar( 434 this=seq_get(args, 0), expression=seq_get(args, 1) or exp.Literal.string("$") 435 ), 436 "LENGTH": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 437 "MD5": exp.MD5Digest.from_arg_list, 438 "TO_HEX": _build_to_hex, 439 "PARSE_DATE": lambda args: build_formatted_time(exp.StrToDate, "bigquery")( 440 [seq_get(args, 1), seq_get(args, 0)] 441 ), 442 "PARSE_TIMESTAMP": _build_parse_timestamp, 443 "REGEXP_CONTAINS": exp.RegexpLike.from_arg_list, 444 "REGEXP_EXTRACT": _build_regexp_extract, 445 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 446 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 447 "SPLIT": lambda args: exp.Split( 448 # https://cloud.google.com/bigquery/docs/reference/standard-sql/string_functions#split 449 this=seq_get(args, 0), 450 expression=seq_get(args, 1) or exp.Literal.string(","), 451 ), 452 "TIME": _build_time, 453 "TIME_ADD": build_date_delta_with_interval(exp.TimeAdd), 454 "TIME_SUB": build_date_delta_with_interval(exp.TimeSub), 455 "TIMESTAMP": _build_timestamp, 456 "TIMESTAMP_ADD": build_date_delta_with_interval(exp.TimestampAdd), 457 "TIMESTAMP_SUB": build_date_delta_with_interval(exp.TimestampSub), 458 "TIMESTAMP_MICROS": lambda args: exp.UnixToTime( 459 this=seq_get(args, 0), scale=exp.UnixToTime.MICROS 460 ), 461 "TIMESTAMP_MILLIS": lambda args: exp.UnixToTime( 462 this=seq_get(args, 0), scale=exp.UnixToTime.MILLIS 463 ), 464 "TIMESTAMP_SECONDS": lambda args: exp.UnixToTime(this=seq_get(args, 0)), 465 "TO_JSON_STRING": exp.JSONFormat.from_arg_list, 466 "FORMAT_DATETIME": lambda args: exp.TimeToStr( 467 this=exp.TsOrDsToTimestamp(this=seq_get(args, 1)), format=seq_get(args, 0) 468 ), 469 } 470 471 FUNCTION_PARSERS = { 472 **parser.Parser.FUNCTION_PARSERS, 473 "ARRAY": lambda self: self.expression(exp.Array, expressions=[self._parse_statement()]), 474 } 475 FUNCTION_PARSERS.pop("TRIM") 476 477 NO_PAREN_FUNCTIONS = { 478 **parser.Parser.NO_PAREN_FUNCTIONS, 479 TokenType.CURRENT_DATETIME: exp.CurrentDatetime, 480 } 481 482 NESTED_TYPE_TOKENS = { 483 *parser.Parser.NESTED_TYPE_TOKENS, 484 TokenType.TABLE, 485 } 486 487 PROPERTY_PARSERS = { 488 **parser.Parser.PROPERTY_PARSERS, 489 "NOT DETERMINISTIC": lambda self: self.expression( 490 exp.StabilityProperty, this=exp.Literal.string("VOLATILE") 491 ), 492 "OPTIONS": lambda self: self._parse_with_property(), 493 } 494 495 CONSTRAINT_PARSERS = { 496 **parser.Parser.CONSTRAINT_PARSERS, 497 "OPTIONS": lambda self: exp.Properties(expressions=self._parse_with_property()), 498 } 499 500 RANGE_PARSERS = parser.Parser.RANGE_PARSERS.copy() 501 RANGE_PARSERS.pop(TokenType.OVERLAPS) 502 503 NULL_TOKENS = {TokenType.NULL, TokenType.UNKNOWN} 504 505 STATEMENT_PARSERS = { 506 **parser.Parser.STATEMENT_PARSERS, 507 TokenType.ELSE: lambda self: self._parse_as_command(self._prev), 508 TokenType.END: lambda self: self._parse_as_command(self._prev), 509 TokenType.FOR: lambda self: self._parse_for_in(), 510 } 511 512 BRACKET_OFFSETS = { 513 "OFFSET": (0, False), 514 "ORDINAL": (1, False), 515 "SAFE_OFFSET": (0, True), 516 "SAFE_ORDINAL": (1, True), 517 } 518 519 def _parse_for_in(self) -> exp.ForIn: 520 this = self._parse_range() 521 self._match_text_seq("DO") 522 return self.expression(exp.ForIn, this=this, expression=self._parse_statement()) 523 524 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 525 this = super()._parse_table_part(schema=schema) or self._parse_number() 526 527 # https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#table_names 528 if isinstance(this, exp.Identifier): 529 table_name = this.name 530 while self._match(TokenType.DASH, advance=False) and self._next: 531 text = "" 532 while self._is_connected() and self._curr.token_type != TokenType.DOT: 533 self._advance() 534 text += self._prev.text 535 table_name += text 536 537 this = exp.Identifier(this=table_name, quoted=this.args.get("quoted")) 538 elif isinstance(this, exp.Literal): 539 table_name = this.name 540 541 if self._is_connected() and self._parse_var(any_token=True): 542 table_name += self._prev.text 543 544 this = exp.Identifier(this=table_name, quoted=True) 545 546 return this 547 548 def _parse_table_parts( 549 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 550 ) -> exp.Table: 551 table = super()._parse_table_parts( 552 schema=schema, is_db_reference=is_db_reference, wildcard=True 553 ) 554 555 # proj-1.db.tbl -- `1.` is tokenized as a float so we need to unravel it here 556 if not table.catalog: 557 if table.db: 558 parts = table.db.split(".") 559 if len(parts) == 2 and not table.args["db"].quoted: 560 table.set("catalog", exp.Identifier(this=parts[0])) 561 table.set("db", exp.Identifier(this=parts[1])) 562 else: 563 parts = table.name.split(".") 564 if len(parts) == 2 and not table.this.quoted: 565 table.set("db", exp.Identifier(this=parts[0])) 566 table.set("this", exp.Identifier(this=parts[1])) 567 568 if isinstance(table.this, exp.Identifier) and any("." in p.name for p in table.parts): 569 catalog, db, this, *rest = ( 570 exp.to_identifier(p, quoted=True) 571 for p in split_num_words(".".join(p.name for p in table.parts), ".", 3) 572 ) 573 574 if rest and this: 575 this = exp.Dot.build([this, *rest]) # type: ignore 576 577 table = exp.Table( 578 this=this, db=db, catalog=catalog, pivots=table.args.get("pivots") 579 ) 580 table.meta["quoted_table"] = True 581 582 return table 583 584 def _parse_column(self) -> t.Optional[exp.Expression]: 585 column = super()._parse_column() 586 if isinstance(column, exp.Column): 587 parts = column.parts 588 if any("." in p.name for p in parts): 589 catalog, db, table, this, *rest = ( 590 exp.to_identifier(p, quoted=True) 591 for p in split_num_words(".".join(p.name for p in parts), ".", 4) 592 ) 593 594 if rest and this: 595 this = exp.Dot.build([this, *rest]) # type: ignore 596 597 column = exp.Column(this=this, table=table, db=db, catalog=catalog) 598 column.meta["quoted_column"] = True 599 600 return column 601 602 @t.overload 603 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 604 605 @t.overload 606 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 607 608 def _parse_json_object(self, agg=False): 609 json_object = super()._parse_json_object() 610 array_kv_pair = seq_get(json_object.expressions, 0) 611 612 # Converts BQ's "signature 2" of JSON_OBJECT into SQLGlot's canonical representation 613 # https://cloud.google.com/bigquery/docs/reference/standard-sql/json_functions#json_object_signature2 614 if ( 615 array_kv_pair 616 and isinstance(array_kv_pair.this, exp.Array) 617 and isinstance(array_kv_pair.expression, exp.Array) 618 ): 619 keys = array_kv_pair.this.expressions 620 values = array_kv_pair.expression.expressions 621 622 json_object.set( 623 "expressions", 624 [exp.JSONKeyValue(this=k, expression=v) for k, v in zip(keys, values)], 625 ) 626 627 return json_object 628 629 def _parse_bracket( 630 self, this: t.Optional[exp.Expression] = None 631 ) -> t.Optional[exp.Expression]: 632 bracket = super()._parse_bracket(this) 633 634 if this is bracket: 635 return bracket 636 637 if isinstance(bracket, exp.Bracket): 638 for expression in bracket.expressions: 639 name = expression.name.upper() 640 641 if name not in self.BRACKET_OFFSETS: 642 break 643 644 offset, safe = self.BRACKET_OFFSETS[name] 645 bracket.set("offset", offset) 646 bracket.set("safe", safe) 647 expression.replace(expression.expressions[0]) 648 649 return bracket 650 651 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 652 unnest = super()._parse_unnest(with_alias=with_alias) 653 654 if not unnest: 655 return None 656 657 unnest_expr = seq_get(unnest.expressions, 0) 658 if unnest_expr: 659 from sqlglot.optimizer.annotate_types import annotate_types 660 661 unnest_expr = annotate_types(unnest_expr) 662 663 # Unnesting a nested array (i.e array of structs) explodes the top-level struct fields, 664 # in contrast to other dialects such as DuckDB which flattens only the array by default 665 if unnest_expr.is_type(exp.DataType.Type.ARRAY) and any( 666 array_elem.is_type(exp.DataType.Type.STRUCT) 667 for array_elem in unnest_expr._type.expressions 668 ): 669 unnest.set("explode_array", True) 670 671 return unnest
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
Inherited Members
- sqlglot.parser.Parser
- Parser
- STRUCT_TYPE_TOKENS
- ENUM_TYPE_TOKENS
- AGGREGATE_TYPE_TOKENS
- TYPE_TOKENS
- SIGNED_TO_UNSIGNED_TYPE_TOKEN
- SUBQUERY_PREDICATES
- RESERVED_TOKENS
- DB_CREATABLES
- CREATABLES
- ALTERABLES
- INTERVAL_VARS
- ALIAS_TOKENS
- ARRAY_CONSTRUCTORS
- COMMENT_TABLE_ALIAS_TOKENS
- UPDATE_ALIAS_TOKENS
- TRIM_TYPES
- FUNC_TOKENS
- CONJUNCTION
- ASSIGNMENT
- DISJUNCTION
- EQUALITY
- COMPARISON
- BITWISE
- TERM
- FACTOR
- EXPONENT
- TIMES
- TIMESTAMPS
- SET_OPERATIONS
- JOIN_METHODS
- JOIN_SIDES
- JOIN_KINDS
- JOIN_HINTS
- LAMBDAS
- COLUMN_OPERATORS
- EXPRESSION_PARSERS
- UNARY_PARSERS
- STRING_PARSERS
- NUMERIC_PARSERS
- PRIMARY_PARSERS
- PLACEHOLDER_PARSERS
- ALTER_PARSERS
- ALTER_ALTER_PARSERS
- SCHEMA_UNNAMED_CONSTRAINTS
- NO_PAREN_FUNCTION_PARSERS
- INVALID_FUNC_NAME_TOKENS
- FUNCTIONS_WITH_ALIASED_ARGS
- KEY_VALUE_DEFINITIONS
- QUERY_MODIFIER_PARSERS
- SET_PARSERS
- SHOW_PARSERS
- TYPE_LITERAL_PARSERS
- TYPE_CONVERTERS
- DDL_SELECT_TOKENS
- PRE_VOLATILE_TOKENS
- TRANSACTION_KIND
- TRANSACTION_CHARACTERISTICS
- CONFLICT_ACTIONS
- CREATE_SEQUENCE
- ISOLATED_LOADING_OPTIONS
- USABLES
- CAST_ACTIONS
- SCHEMA_BINDING_OPTIONS
- KEY_CONSTRAINT_OPTIONS
- INSERT_ALTERNATIVES
- CLONE_KEYWORDS
- HISTORICAL_DATA_PREFIX
- HISTORICAL_DATA_KIND
- OPCLASS_FOLLOW_KEYWORDS
- OPTYPE_FOLLOW_TOKENS
- TABLE_INDEX_HINT_TOKENS
- VIEW_ATTRIBUTES
- WINDOW_ALIAS_TOKENS
- WINDOW_BEFORE_PAREN_TOKENS
- WINDOW_SIDES
- JSON_KEY_VALUE_SEPARATOR_TOKENS
- FETCH_TOKENS
- ADD_CONSTRAINT_TOKENS
- DISTINCT_TOKENS
- UNNEST_OFFSET_ALIAS_TOKENS
- SELECT_START_TOKENS
- COPY_INTO_VARLEN_OPTIONS
- IS_JSON_PREDICATE_KIND
- ODBC_DATETIME_LITERALS
- ON_CONDITION_TOKENS
- PRIVILEGE_FOLLOW_TOKENS
- DESCRIBE_STYLES
- OPERATION_MODIFIERS
- STRICT_CAST
- IDENTIFY_PIVOT_STRINGS
- ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN
- TABLESAMPLE_CSV
- DEFAULT_SAMPLING_METHOD
- SET_REQUIRES_ASSIGNMENT_DELIMITER
- TRIM_PATTERN_FIRST
- STRING_ALIASES
- MODIFIERS_ATTACHED_TO_SET_OP
- SET_OP_MODIFIERS
- NO_PAREN_IF_COMMANDS
- JSON_ARROWS_REQUIRE_JSON_TYPE
- COLON_IS_VARIANT_EXTRACT
- VALUES_FOLLOWED_BY_PAREN
- INTERVAL_SPANS
- SUPPORTS_PARTITION_SELECTION
- error_level
- error_message_context
- max_errors
- dialect
- reset
- parse
- parse_into
- check_errors
- raise_error
- expression
- validate_expression
- errors
- sql
673 class Generator(generator.Generator): 674 INTERVAL_ALLOWS_PLURAL_FORM = False 675 JOIN_HINTS = False 676 QUERY_HINTS = False 677 TABLE_HINTS = False 678 LIMIT_FETCH = "LIMIT" 679 RENAME_TABLE_WITH_DB = False 680 NVL2_SUPPORTED = False 681 UNNEST_WITH_ORDINALITY = False 682 COLLATE_IS_FUNC = True 683 LIMIT_ONLY_LITERALS = True 684 SUPPORTS_TABLE_ALIAS_COLUMNS = False 685 UNPIVOT_ALIASES_ARE_IDENTIFIERS = False 686 JSON_KEY_VALUE_PAIR_SEP = "," 687 NULL_ORDERING_SUPPORTED = False 688 IGNORE_NULLS_IN_FUNC = True 689 JSON_PATH_SINGLE_QUOTE_ESCAPE = True 690 CAN_IMPLEMENT_ARRAY_ANY = True 691 SUPPORTS_TO_NUMBER = False 692 NAMED_PLACEHOLDER_TOKEN = "@" 693 HEX_FUNC = "TO_HEX" 694 WITH_PROPERTIES_PREFIX = "OPTIONS" 695 SUPPORTS_EXPLODING_PROJECTIONS = False 696 EXCEPT_INTERSECT_SUPPORT_ALL_CLAUSE = False 697 698 TRANSFORMS = { 699 **generator.Generator.TRANSFORMS, 700 exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"), 701 exp.ArgMax: arg_max_or_min_no_count("MAX_BY"), 702 exp.ArgMin: arg_max_or_min_no_count("MIN_BY"), 703 exp.Array: inline_array_unless_query, 704 exp.ArrayContains: _array_contains_sql, 705 exp.ArrayFilter: filter_array_using_unnest, 706 exp.ArraySize: rename_func("ARRAY_LENGTH"), 707 exp.Cast: transforms.preprocess([transforms.remove_precision_parameterized_types]), 708 exp.CollateProperty: lambda self, e: ( 709 f"DEFAULT COLLATE {self.sql(e, 'this')}" 710 if e.args.get("default") 711 else f"COLLATE {self.sql(e, 'this')}" 712 ), 713 exp.Commit: lambda *_: "COMMIT TRANSACTION", 714 exp.CountIf: rename_func("COUNTIF"), 715 exp.Create: _create_sql, 716 exp.CTE: transforms.preprocess([_pushdown_cte_column_names]), 717 exp.DateAdd: date_add_interval_sql("DATE", "ADD"), 718 exp.DateDiff: lambda self, e: self.func( 719 "DATE_DIFF", e.this, e.expression, unit_to_var(e) 720 ), 721 exp.DateFromParts: rename_func("DATE"), 722 exp.DateStrToDate: datestrtodate_sql, 723 exp.DateSub: date_add_interval_sql("DATE", "SUB"), 724 exp.DatetimeAdd: date_add_interval_sql("DATETIME", "ADD"), 725 exp.DatetimeSub: date_add_interval_sql("DATETIME", "SUB"), 726 exp.DateTrunc: lambda self, e: self.func("DATE_TRUNC", e.this, e.text("unit")), 727 exp.FromTimeZone: lambda self, e: self.func( 728 "DATETIME", self.func("TIMESTAMP", e.this, e.args.get("zone")), "'UTC'" 729 ), 730 exp.GenerateSeries: rename_func("GENERATE_ARRAY"), 731 exp.GroupConcat: rename_func("STRING_AGG"), 732 exp.Hex: lambda self, e: self.func("UPPER", self.func("TO_HEX", self.sql(e, "this"))), 733 exp.If: if_sql(false_value="NULL"), 734 exp.ILike: no_ilike_sql, 735 exp.IntDiv: rename_func("DIV"), 736 exp.JSONFormat: rename_func("TO_JSON_STRING"), 737 exp.Max: max_or_greatest, 738 exp.MD5: lambda self, e: self.func("TO_HEX", self.func("MD5", e.this)), 739 exp.MD5Digest: rename_func("MD5"), 740 exp.Min: min_or_least, 741 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 742 exp.RegexpExtract: lambda self, e: self.func( 743 "REGEXP_EXTRACT", 744 e.this, 745 e.expression, 746 e.args.get("position"), 747 e.args.get("occurrence"), 748 ), 749 exp.RegexpReplace: regexp_replace_sql, 750 exp.RegexpLike: rename_func("REGEXP_CONTAINS"), 751 exp.ReturnsProperty: _returnsproperty_sql, 752 exp.Rollback: lambda *_: "ROLLBACK TRANSACTION", 753 exp.Select: transforms.preprocess( 754 [ 755 transforms.explode_to_unnest(), 756 transforms.unqualify_unnest, 757 transforms.eliminate_distinct_on, 758 _alias_ordered_group, 759 transforms.eliminate_semi_and_anti_joins, 760 ] 761 ), 762 exp.SHA: rename_func("SHA1"), 763 exp.SHA2: sha256_sql, 764 exp.StabilityProperty: lambda self, e: ( 765 "DETERMINISTIC" if e.name == "IMMUTABLE" else "NOT DETERMINISTIC" 766 ), 767 exp.StrToDate: _str_to_datetime_sql, 768 exp.StrToTime: _str_to_datetime_sql, 769 exp.TimeAdd: date_add_interval_sql("TIME", "ADD"), 770 exp.TimeFromParts: rename_func("TIME"), 771 exp.TimestampFromParts: rename_func("DATETIME"), 772 exp.TimeSub: date_add_interval_sql("TIME", "SUB"), 773 exp.TimestampAdd: date_add_interval_sql("TIMESTAMP", "ADD"), 774 exp.TimestampDiff: rename_func("TIMESTAMP_DIFF"), 775 exp.TimestampSub: date_add_interval_sql("TIMESTAMP", "SUB"), 776 exp.TimeStrToTime: timestrtotime_sql, 777 exp.Transaction: lambda *_: "BEGIN TRANSACTION", 778 exp.TsOrDsAdd: _ts_or_ds_add_sql, 779 exp.TsOrDsDiff: _ts_or_ds_diff_sql, 780 exp.TsOrDsToTime: rename_func("TIME"), 781 exp.TsOrDsToTimestamp: rename_func("DATETIME"), 782 exp.Unhex: rename_func("FROM_HEX"), 783 exp.UnixDate: rename_func("UNIX_DATE"), 784 exp.UnixToTime: _unix_to_time_sql, 785 exp.Uuid: lambda *_: "GENERATE_UUID()", 786 exp.Values: _derived_table_values_to_unnest, 787 exp.VariancePop: rename_func("VAR_POP"), 788 } 789 790 SUPPORTED_JSON_PATH_PARTS = { 791 exp.JSONPathKey, 792 exp.JSONPathRoot, 793 exp.JSONPathSubscript, 794 } 795 796 TYPE_MAPPING = { 797 **generator.Generator.TYPE_MAPPING, 798 exp.DataType.Type.BIGDECIMAL: "BIGNUMERIC", 799 exp.DataType.Type.BIGINT: "INT64", 800 exp.DataType.Type.BINARY: "BYTES", 801 exp.DataType.Type.BOOLEAN: "BOOL", 802 exp.DataType.Type.CHAR: "STRING", 803 exp.DataType.Type.DECIMAL: "NUMERIC", 804 exp.DataType.Type.DOUBLE: "FLOAT64", 805 exp.DataType.Type.FLOAT: "FLOAT64", 806 exp.DataType.Type.INT: "INT64", 807 exp.DataType.Type.NCHAR: "STRING", 808 exp.DataType.Type.NVARCHAR: "STRING", 809 exp.DataType.Type.SMALLINT: "INT64", 810 exp.DataType.Type.TEXT: "STRING", 811 exp.DataType.Type.TIMESTAMP: "DATETIME", 812 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 813 exp.DataType.Type.TIMESTAMPLTZ: "TIMESTAMP", 814 exp.DataType.Type.TINYINT: "INT64", 815 exp.DataType.Type.ROWVERSION: "BYTES", 816 exp.DataType.Type.UUID: "STRING", 817 exp.DataType.Type.VARBINARY: "BYTES", 818 exp.DataType.Type.VARCHAR: "STRING", 819 exp.DataType.Type.VARIANT: "ANY TYPE", 820 } 821 822 PROPERTIES_LOCATION = { 823 **generator.Generator.PROPERTIES_LOCATION, 824 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 825 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 826 } 827 828 # WINDOW comes after QUALIFY 829 # https://cloud.google.com/bigquery/docs/reference/standard-sql/query-syntax#window_clause 830 AFTER_HAVING_MODIFIER_TRANSFORMS = { 831 "qualify": generator.Generator.AFTER_HAVING_MODIFIER_TRANSFORMS["qualify"], 832 "windows": generator.Generator.AFTER_HAVING_MODIFIER_TRANSFORMS["windows"], 833 } 834 835 # from: https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#reserved_keywords 836 RESERVED_KEYWORDS = { 837 "all", 838 "and", 839 "any", 840 "array", 841 "as", 842 "asc", 843 "assert_rows_modified", 844 "at", 845 "between", 846 "by", 847 "case", 848 "cast", 849 "collate", 850 "contains", 851 "create", 852 "cross", 853 "cube", 854 "current", 855 "default", 856 "define", 857 "desc", 858 "distinct", 859 "else", 860 "end", 861 "enum", 862 "escape", 863 "except", 864 "exclude", 865 "exists", 866 "extract", 867 "false", 868 "fetch", 869 "following", 870 "for", 871 "from", 872 "full", 873 "group", 874 "grouping", 875 "groups", 876 "hash", 877 "having", 878 "if", 879 "ignore", 880 "in", 881 "inner", 882 "intersect", 883 "interval", 884 "into", 885 "is", 886 "join", 887 "lateral", 888 "left", 889 "like", 890 "limit", 891 "lookup", 892 "merge", 893 "natural", 894 "new", 895 "no", 896 "not", 897 "null", 898 "nulls", 899 "of", 900 "on", 901 "or", 902 "order", 903 "outer", 904 "over", 905 "partition", 906 "preceding", 907 "proto", 908 "qualify", 909 "range", 910 "recursive", 911 "respect", 912 "right", 913 "rollup", 914 "rows", 915 "select", 916 "set", 917 "some", 918 "struct", 919 "tablesample", 920 "then", 921 "to", 922 "treat", 923 "true", 924 "unbounded", 925 "union", 926 "unnest", 927 "using", 928 "when", 929 "where", 930 "window", 931 "with", 932 "within", 933 } 934 935 def mod_sql(self, expression: exp.Mod) -> str: 936 this = expression.this 937 expr = expression.expression 938 return self.func( 939 "MOD", 940 this.unnest() if isinstance(this, exp.Paren) else this, 941 expr.unnest() if isinstance(expr, exp.Paren) else expr, 942 ) 943 944 def column_parts(self, expression: exp.Column) -> str: 945 if expression.meta.get("quoted_column"): 946 # If a column reference is of the form `dataset.table`.name, we need 947 # to preserve the quoted table path, otherwise the reference breaks 948 table_parts = ".".join(p.name for p in expression.parts[:-1]) 949 table_path = self.sql(exp.Identifier(this=table_parts, quoted=True)) 950 return f"{table_path}.{self.sql(expression, 'this')}" 951 952 return super().column_parts(expression) 953 954 def table_parts(self, expression: exp.Table) -> str: 955 # Depending on the context, `x.y` may not resolve to the same data source as `x`.`y`, so 956 # we need to make sure the correct quoting is used in each case. 957 # 958 # For example, if there is a CTE x that clashes with a schema name, then the former will 959 # return the table y in that schema, whereas the latter will return the CTE's y column: 960 # 961 # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x.y` -> cross join 962 # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x`.`y` -> implicit unnest 963 if expression.meta.get("quoted_table"): 964 table_parts = ".".join(p.name for p in expression.parts) 965 return self.sql(exp.Identifier(this=table_parts, quoted=True)) 966 967 return super().table_parts(expression) 968 969 def timetostr_sql(self, expression: exp.TimeToStr) -> str: 970 if isinstance(expression.this, exp.TsOrDsToTimestamp): 971 func_name = "FORMAT_DATETIME" 972 else: 973 func_name = "FORMAT_DATE" 974 this = ( 975 expression.this 976 if isinstance(expression.this, (exp.TsOrDsToTimestamp, exp.TsOrDsToDate)) 977 else expression 978 ) 979 return self.func(func_name, self.format_time(expression), this.this) 980 981 def eq_sql(self, expression: exp.EQ) -> str: 982 # Operands of = cannot be NULL in BigQuery 983 if isinstance(expression.left, exp.Null) or isinstance(expression.right, exp.Null): 984 if not isinstance(expression.parent, exp.Update): 985 return "NULL" 986 987 return self.binary(expression, "=") 988 989 def attimezone_sql(self, expression: exp.AtTimeZone) -> str: 990 parent = expression.parent 991 992 # BigQuery allows CAST(.. AS {STRING|TIMESTAMP} [FORMAT <fmt> [AT TIME ZONE <tz>]]). 993 # Only the TIMESTAMP one should use the below conversion, when AT TIME ZONE is included. 994 if not isinstance(parent, exp.Cast) or not parent.to.is_type("text"): 995 return self.func( 996 "TIMESTAMP", self.func("DATETIME", expression.this, expression.args.get("zone")) 997 ) 998 999 return super().attimezone_sql(expression) 1000 1001 def trycast_sql(self, expression: exp.TryCast) -> str: 1002 return self.cast_sql(expression, safe_prefix="SAFE_") 1003 1004 def bracket_sql(self, expression: exp.Bracket) -> str: 1005 this = expression.this 1006 expressions = expression.expressions 1007 1008 if len(expressions) == 1 and this and this.is_type(exp.DataType.Type.STRUCT): 1009 arg = expressions[0] 1010 if arg.type is None: 1011 from sqlglot.optimizer.annotate_types import annotate_types 1012 1013 arg = annotate_types(arg) 1014 1015 if arg.type and arg.type.this in exp.DataType.TEXT_TYPES: 1016 # BQ doesn't support bracket syntax with string values for structs 1017 return f"{self.sql(this)}.{arg.name}" 1018 1019 expressions_sql = self.expressions(expression, flat=True) 1020 offset = expression.args.get("offset") 1021 1022 if offset == 0: 1023 expressions_sql = f"OFFSET({expressions_sql})" 1024 elif offset == 1: 1025 expressions_sql = f"ORDINAL({expressions_sql})" 1026 elif offset is not None: 1027 self.unsupported(f"Unsupported array offset: {offset}") 1028 1029 if expression.args.get("safe"): 1030 expressions_sql = f"SAFE_{expressions_sql}" 1031 1032 return f"{self.sql(this)}[{expressions_sql}]" 1033 1034 def in_unnest_op(self, expression: exp.Unnest) -> str: 1035 return self.sql(expression) 1036 1037 def version_sql(self, expression: exp.Version) -> str: 1038 if expression.name == "TIMESTAMP": 1039 expression.set("this", "SYSTEM_TIME") 1040 return super().version_sql(expression)
Generator converts a given syntax tree to the corresponding SQL string.
Arguments:
- pretty: Whether to format the produced SQL string. Default: False.
- identify: Determines when an identifier should be quoted. Possible values are: False (default): Never quote, except in cases where it's mandatory by the dialect. True or 'always': Always quote. 'safe': Only quote identifiers that are case insensitive.
- normalize: Whether to normalize identifiers to lowercase. Default: False.
- pad: The pad size in a formatted string. For example, this affects the indentation of a projection in a query, relative to its nesting level. Default: 2.
- indent: The indentation size in a formatted string. For example, this affects the
indentation of subqueries and filters under a
WHERE
clause. Default: 2. - normalize_functions: How to normalize function names. Possible values are: "upper" or True (default): Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
- unsupported_level: Determines the generator's behavior when it encounters unsupported expressions. Default ErrorLevel.WARN.
- max_unsupported: Maximum number of unsupported messages to include in a raised UnsupportedError. This is only relevant if unsupported_level is ErrorLevel.RAISE. Default: 3
- leading_comma: Whether the comma is leading or trailing in select expressions. This is only relevant when generating in pretty mode. Default: False
- max_text_width: The max number of characters in a segment before creating new lines in pretty mode. The default is on the smaller end because the length only represents a segment and not the true line length. Default: 80
- comments: Whether to preserve comments in the output SQL code. Default: True
944 def column_parts(self, expression: exp.Column) -> str: 945 if expression.meta.get("quoted_column"): 946 # If a column reference is of the form `dataset.table`.name, we need 947 # to preserve the quoted table path, otherwise the reference breaks 948 table_parts = ".".join(p.name for p in expression.parts[:-1]) 949 table_path = self.sql(exp.Identifier(this=table_parts, quoted=True)) 950 return f"{table_path}.{self.sql(expression, 'this')}" 951 952 return super().column_parts(expression)
954 def table_parts(self, expression: exp.Table) -> str: 955 # Depending on the context, `x.y` may not resolve to the same data source as `x`.`y`, so 956 # we need to make sure the correct quoting is used in each case. 957 # 958 # For example, if there is a CTE x that clashes with a schema name, then the former will 959 # return the table y in that schema, whereas the latter will return the CTE's y column: 960 # 961 # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x.y` -> cross join 962 # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x`.`y` -> implicit unnest 963 if expression.meta.get("quoted_table"): 964 table_parts = ".".join(p.name for p in expression.parts) 965 return self.sql(exp.Identifier(this=table_parts, quoted=True)) 966 967 return super().table_parts(expression)
969 def timetostr_sql(self, expression: exp.TimeToStr) -> str: 970 if isinstance(expression.this, exp.TsOrDsToTimestamp): 971 func_name = "FORMAT_DATETIME" 972 else: 973 func_name = "FORMAT_DATE" 974 this = ( 975 expression.this 976 if isinstance(expression.this, (exp.TsOrDsToTimestamp, exp.TsOrDsToDate)) 977 else expression 978 ) 979 return self.func(func_name, self.format_time(expression), this.this)
989 def attimezone_sql(self, expression: exp.AtTimeZone) -> str: 990 parent = expression.parent 991 992 # BigQuery allows CAST(.. AS {STRING|TIMESTAMP} [FORMAT <fmt> [AT TIME ZONE <tz>]]). 993 # Only the TIMESTAMP one should use the below conversion, when AT TIME ZONE is included. 994 if not isinstance(parent, exp.Cast) or not parent.to.is_type("text"): 995 return self.func( 996 "TIMESTAMP", self.func("DATETIME", expression.this, expression.args.get("zone")) 997 ) 998 999 return super().attimezone_sql(expression)
1004 def bracket_sql(self, expression: exp.Bracket) -> str: 1005 this = expression.this 1006 expressions = expression.expressions 1007 1008 if len(expressions) == 1 and this and this.is_type(exp.DataType.Type.STRUCT): 1009 arg = expressions[0] 1010 if arg.type is None: 1011 from sqlglot.optimizer.annotate_types import annotate_types 1012 1013 arg = annotate_types(arg) 1014 1015 if arg.type and arg.type.this in exp.DataType.TEXT_TYPES: 1016 # BQ doesn't support bracket syntax with string values for structs 1017 return f"{self.sql(this)}.{arg.name}" 1018 1019 expressions_sql = self.expressions(expression, flat=True) 1020 offset = expression.args.get("offset") 1021 1022 if offset == 0: 1023 expressions_sql = f"OFFSET({expressions_sql})" 1024 elif offset == 1: 1025 expressions_sql = f"ORDINAL({expressions_sql})" 1026 elif offset is not None: 1027 self.unsupported(f"Unsupported array offset: {offset}") 1028 1029 if expression.args.get("safe"): 1030 expressions_sql = f"SAFE_{expressions_sql}" 1031 1032 return f"{self.sql(this)}[{expressions_sql}]"
Inherited Members
- sqlglot.generator.Generator
- Generator
- LOCKING_READS_SUPPORTED
- WRAP_DERIVED_VALUES
- CREATE_FUNCTION_RETURN_AS
- MATCHED_BY_SOURCE
- SINGLE_STRING_INTERVAL
- GROUPINGS_SEP
- INDEX_ON
- QUERY_HINT_SEP
- IS_BOOL_ALLOWED
- DUPLICATE_KEY_UPDATE_WITH_SET
- LIMIT_IS_TOP
- RETURNING_END
- EXTRACT_ALLOWS_QUOTES
- TZ_TO_WITH_TIME_ZONE
- SELECT_KINDS
- VALUES_AS_TABLE
- ALTER_TABLE_INCLUDE_COLUMN_KEYWORD
- AGGREGATE_FILTER_SUPPORTED
- SEMI_ANTI_JOIN_WITH_SIDE
- COMPUTED_COLUMN_WITH_TYPE
- SUPPORTS_TABLE_COPY
- TABLESAMPLE_REQUIRES_PARENS
- TABLESAMPLE_SIZE_IS_ROWS
- TABLESAMPLE_KEYWORDS
- TABLESAMPLE_WITH_METHOD
- TABLESAMPLE_SEED_KEYWORD
- DATA_TYPE_SPECIFIERS_ALLOWED
- ENSURE_BOOLS
- CTE_RECURSIVE_KEYWORD_REQUIRED
- SUPPORTS_SINGLE_ARG_CONCAT
- LAST_DAY_SUPPORTS_DATE_PART
- INSERT_OVERWRITE
- SUPPORTS_SELECT_INTO
- SUPPORTS_UNLOGGED_TABLES
- SUPPORTS_CREATE_TABLE_LIKE
- LIKE_PROPERTY_INSIDE_SCHEMA
- MULTI_ARG_DISTINCT
- JSON_TYPE_REQUIRED_FOR_EXTRACTION
- JSON_PATH_BRACKETED_KEY_SUPPORTED
- SET_OP_MODIFIERS
- COPY_PARAMS_ARE_WRAPPED
- COPY_PARAMS_EQ_REQUIRED
- COPY_HAS_INTO_KEYWORD
- STAR_EXCEPT
- QUOTE_JSON_PATH
- PAD_FILL_PATTERN_IS_REQUIRED
- ARRAY_CONCAT_IS_VAR_LEN
- SUPPORTS_CONVERT_TIMEZONE
- PARSE_JSON_NAME
- TIME_PART_SINGULARS
- TOKEN_MAPPING
- STRUCT_DELIMITER
- PARAMETER_TOKEN
- WITH_SEPARATED_COMMENTS
- EXCLUDE_COMMENTS
- UNWRAPPED_INTERVAL_VALUES
- PARAMETERIZABLE_TEXT_TYPES
- EXPRESSIONS_WITHOUT_NESTED_CTES
- SENTINEL_LINE_BREAK
- pretty
- identify
- normalize
- pad
- unsupported_level
- max_unsupported
- leading_comma
- max_text_width
- comments
- dialect
- normalize_functions
- unsupported_messages
- generate
- preprocess
- unsupported
- sep
- seg
- pad_comment
- maybe_comment
- wrap
- no_identify
- normalize_func
- indent
- sql
- uncache_sql
- cache_sql
- characterset_sql
- column_sql
- columnposition_sql
- columndef_sql
- columnconstraint_sql
- computedcolumnconstraint_sql
- autoincrementcolumnconstraint_sql
- compresscolumnconstraint_sql
- generatedasidentitycolumnconstraint_sql
- generatedasrowcolumnconstraint_sql
- periodforsystemtimeconstraint_sql
- notnullcolumnconstraint_sql
- transformcolumnconstraint_sql
- primarykeycolumnconstraint_sql
- uniquecolumnconstraint_sql
- createable_sql
- create_sql
- sequenceproperties_sql
- clone_sql
- describe_sql
- heredoc_sql
- prepend_ctes
- with_sql
- cte_sql
- tablealias_sql
- bitstring_sql
- hexstring_sql
- bytestring_sql
- unicodestring_sql
- rawstring_sql
- datatypeparam_sql
- datatype_sql
- directory_sql
- delete_sql
- drop_sql
- set_operation
- set_operations
- fetch_sql
- filter_sql
- hint_sql
- indexparameters_sql
- index_sql
- identifier_sql
- hex_sql
- lowerhex_sql
- inputoutputformat_sql
- national_sql
- partition_sql
- properties_sql
- root_properties
- properties
- with_properties
- locate_properties
- property_name
- property_sql
- likeproperty_sql
- fallbackproperty_sql
- journalproperty_sql
- freespaceproperty_sql
- checksumproperty_sql
- mergeblockratioproperty_sql
- datablocksizeproperty_sql
- blockcompressionproperty_sql
- isolatedloadingproperty_sql
- partitionboundspec_sql
- partitionedofproperty_sql
- lockingproperty_sql
- withdataproperty_sql
- withsystemversioningproperty_sql
- insert_sql
- introducer_sql
- kill_sql
- pseudotype_sql
- objectidentifier_sql
- onconflict_sql
- returning_sql
- rowformatdelimitedproperty_sql
- withtablehint_sql
- indextablehint_sql
- historicaldata_sql
- table_sql
- tablesample_sql
- pivot_sql
- tuple_sql
- update_sql
- values_sql
- var_sql
- into_sql
- from_sql
- groupingsets_sql
- rollup_sql
- cube_sql
- group_sql
- having_sql
- connect_sql
- prior_sql
- join_sql
- lambda_sql
- lateral_op
- lateral_sql
- limit_sql
- offset_sql
- setitem_sql
- set_sql
- pragma_sql
- lock_sql
- literal_sql
- escape_str
- loaddata_sql
- null_sql
- boolean_sql
- order_sql
- withfill_sql
- cluster_sql
- distribute_sql
- sort_sql
- ordered_sql
- matchrecognizemeasure_sql
- matchrecognize_sql
- query_modifiers
- options_modifier
- queryoption_sql
- offset_limit_modifiers
- after_limit_modifiers
- select_sql
- schema_sql
- schema_columns_sql
- star_sql
- parameter_sql
- sessionparameter_sql
- placeholder_sql
- subquery_sql
- qualify_sql
- unnest_sql
- prewhere_sql
- where_sql
- window_sql
- partition_by_sql
- windowspec_sql
- withingroup_sql
- between_sql
- bracket_offset_expressions
- all_sql
- any_sql
- exists_sql
- case_sql
- constraint_sql
- nextvaluefor_sql
- extract_sql
- trim_sql
- convert_concat_args
- concat_sql
- concatws_sql
- check_sql
- foreignkey_sql
- primarykey_sql
- if_sql
- matchagainst_sql
- jsonkeyvalue_sql
- jsonpath_sql
- json_path_part
- formatjson_sql
- jsonobject_sql
- jsonobjectagg_sql
- jsonarray_sql
- jsonarrayagg_sql
- jsoncolumndef_sql
- jsonschema_sql
- jsontable_sql
- openjsoncolumndef_sql
- openjson_sql
- in_sql
- interval_sql
- return_sql
- reference_sql
- anonymous_sql
- paren_sql
- neg_sql
- not_sql
- alias_sql
- pivotalias_sql
- aliases_sql
- atindex_sql
- fromtimezone_sql
- add_sql
- and_sql
- or_sql
- xor_sql
- connector_sql
- bitwiseand_sql
- bitwiseleftshift_sql
- bitwisenot_sql
- bitwiseor_sql
- bitwiserightshift_sql
- bitwisexor_sql
- cast_sql
- currentdate_sql
- collate_sql
- command_sql
- comment_sql
- mergetreettlaction_sql
- mergetreettl_sql
- transaction_sql
- commit_sql
- rollback_sql
- altercolumn_sql
- alterdiststyle_sql
- altersortkey_sql
- alterrename_sql
- renamecolumn_sql
- alterset_sql
- alter_sql
- add_column_sql
- droppartition_sql
- addconstraint_sql
- distinct_sql
- ignorenulls_sql
- respectnulls_sql
- havingmax_sql
- intdiv_sql
- dpipe_sql
- div_sql
- overlaps_sql
- distance_sql
- dot_sql
- propertyeq_sql
- escape_sql
- glob_sql
- gt_sql
- gte_sql
- ilike_sql
- ilikeany_sql
- is_sql
- like_sql
- likeany_sql
- similarto_sql
- lt_sql
- lte_sql
- mul_sql
- neq_sql
- nullsafeeq_sql
- nullsafeneq_sql
- slice_sql
- sub_sql
- try_sql
- log_sql
- use_sql
- binary
- function_fallback_sql
- func
- format_args
- too_wide
- format_time
- expressions
- op_expressions
- naked_property
- tag_sql
- token_sql
- userdefinedfunction_sql
- joinhint_sql
- kwarg_sql
- when_sql
- merge_sql
- tochar_sql
- tonumber_sql
- dictproperty_sql
- dictrange_sql
- dictsubproperty_sql
- duplicatekeyproperty_sql
- distributedbyproperty_sql
- oncluster_sql
- clusteredbyproperty_sql
- anyvalue_sql
- querytransform_sql
- indexconstraintoption_sql
- checkcolumnconstraint_sql
- indexcolumnconstraint_sql
- nvl2_sql
- comprehension_sql
- columnprefix_sql
- opclass_sql
- predict_sql
- forin_sql
- refresh_sql
- toarray_sql
- tsordstotime_sql
- tsordstotimestamp_sql
- tsordstodate_sql
- unixdate_sql
- lastday_sql
- dateadd_sql
- arrayany_sql
- struct_sql
- partitionrange_sql
- truncatetable_sql
- convert_sql
- copyparameter_sql
- credentials_sql
- copy_sql
- semicolon_sql
- datadeletionproperty_sql
- maskingpolicycolumnconstraint_sql
- gapfill_sql
- scope_resolution
- scoperesolution_sql
- parsejson_sql
- rand_sql
- changes_sql
- pad_sql
- summarize_sql
- explodinggenerateseries_sql
- arrayconcat_sql
- converttimezone_sql
- json_sql
- jsonvalue_sql
- conditionalinsert_sql
- multitableinserts_sql
- oncondition_sql
- jsonexists_sql
- arrayagg_sql
- apply_sql
- grant_sql
- grantprivilege_sql
- grantprincipal_sql
- columns_sql
- overlay_sql