sqlglot.dialects.snowflake
1from __future__ import annotations 2 3import typing as t 4 5from sqlglot import exp, generator, parser, tokens, transforms 6from sqlglot.dialects.dialect import ( 7 Dialect, 8 NormalizationStrategy, 9 binary_from_function, 10 build_default_decimal_type, 11 build_timestamp_from_parts, 12 date_delta_sql, 13 date_trunc_to_time, 14 datestrtodate_sql, 15 build_formatted_time, 16 if_sql, 17 inline_array_sql, 18 max_or_greatest, 19 min_or_least, 20 rename_func, 21 timestamptrunc_sql, 22 timestrtotime_sql, 23 var_map_sql, 24 map_date_part, 25) 26from sqlglot.helper import flatten, is_float, is_int, seq_get 27from sqlglot.tokens import TokenType 28 29if t.TYPE_CHECKING: 30 from sqlglot._typing import E 31 32 33# from https://docs.snowflake.com/en/sql-reference/functions/to_timestamp.html 34def _build_datetime( 35 name: str, kind: exp.DataType.Type, safe: bool = False 36) -> t.Callable[[t.List], exp.Func]: 37 def _builder(args: t.List) -> exp.Func: 38 value = seq_get(args, 0) 39 int_value = value is not None and is_int(value.name) 40 41 if isinstance(value, exp.Literal): 42 # Converts calls like `TO_TIME('01:02:03')` into casts 43 if len(args) == 1 and value.is_string and not int_value: 44 return exp.cast(value, kind) 45 46 # Handles `TO_TIMESTAMP(str, fmt)` and `TO_TIMESTAMP(num, scale)` as special 47 # cases so we can transpile them, since they're relatively common 48 if kind == exp.DataType.Type.TIMESTAMP: 49 if int_value: 50 return exp.UnixToTime(this=value, scale=seq_get(args, 1)) 51 if not is_float(value.this): 52 return build_formatted_time(exp.StrToTime, "snowflake")(args) 53 54 if kind == exp.DataType.Type.DATE and not int_value: 55 formatted_exp = build_formatted_time(exp.TsOrDsToDate, "snowflake")(args) 56 formatted_exp.set("safe", safe) 57 return formatted_exp 58 59 return exp.Anonymous(this=name, expressions=args) 60 61 return _builder 62 63 64def _build_object_construct(args: t.List) -> t.Union[exp.StarMap, exp.Struct]: 65 expression = parser.build_var_map(args) 66 67 if isinstance(expression, exp.StarMap): 68 return expression 69 70 return exp.Struct( 71 expressions=[ 72 exp.PropertyEQ(this=k, expression=v) for k, v in zip(expression.keys, expression.values) 73 ] 74 ) 75 76 77def _build_datediff(args: t.List) -> exp.DateDiff: 78 return exp.DateDiff( 79 this=seq_get(args, 2), expression=seq_get(args, 1), unit=map_date_part(seq_get(args, 0)) 80 ) 81 82 83def _build_date_time_add(expr_type: t.Type[E]) -> t.Callable[[t.List], E]: 84 def _builder(args: t.List) -> E: 85 return expr_type( 86 this=seq_get(args, 2), 87 expression=seq_get(args, 1), 88 unit=map_date_part(seq_get(args, 0)), 89 ) 90 91 return _builder 92 93 94# https://docs.snowflake.com/en/sql-reference/functions/div0 95def _build_if_from_div0(args: t.List) -> exp.If: 96 cond = exp.EQ(this=seq_get(args, 1), expression=exp.Literal.number(0)) 97 true = exp.Literal.number(0) 98 false = exp.Div(this=seq_get(args, 0), expression=seq_get(args, 1)) 99 return exp.If(this=cond, true=true, false=false) 100 101 102# https://docs.snowflake.com/en/sql-reference/functions/zeroifnull 103def _build_if_from_zeroifnull(args: t.List) -> exp.If: 104 cond = exp.Is(this=seq_get(args, 0), expression=exp.Null()) 105 return exp.If(this=cond, true=exp.Literal.number(0), false=seq_get(args, 0)) 106 107 108# https://docs.snowflake.com/en/sql-reference/functions/zeroifnull 109def _build_if_from_nullifzero(args: t.List) -> exp.If: 110 cond = exp.EQ(this=seq_get(args, 0), expression=exp.Literal.number(0)) 111 return exp.If(this=cond, true=exp.Null(), false=seq_get(args, 0)) 112 113 114def _regexpilike_sql(self: Snowflake.Generator, expression: exp.RegexpILike) -> str: 115 flag = expression.text("flag") 116 117 if "i" not in flag: 118 flag += "i" 119 120 return self.func( 121 "REGEXP_LIKE", expression.this, expression.expression, exp.Literal.string(flag) 122 ) 123 124 125def _build_convert_timezone(args: t.List) -> t.Union[exp.Anonymous, exp.AtTimeZone]: 126 if len(args) == 3: 127 return exp.Anonymous(this="CONVERT_TIMEZONE", expressions=args) 128 return exp.AtTimeZone(this=seq_get(args, 1), zone=seq_get(args, 0)) 129 130 131def _build_regexp_replace(args: t.List) -> exp.RegexpReplace: 132 regexp_replace = exp.RegexpReplace.from_arg_list(args) 133 134 if not regexp_replace.args.get("replacement"): 135 regexp_replace.set("replacement", exp.Literal.string("")) 136 137 return regexp_replace 138 139 140def _show_parser(*args: t.Any, **kwargs: t.Any) -> t.Callable[[Snowflake.Parser], exp.Show]: 141 def _parse(self: Snowflake.Parser) -> exp.Show: 142 return self._parse_show_snowflake(*args, **kwargs) 143 144 return _parse 145 146 147def _date_trunc_to_time(args: t.List) -> exp.DateTrunc | exp.TimestampTrunc: 148 trunc = date_trunc_to_time(args) 149 trunc.set("unit", map_date_part(trunc.args["unit"])) 150 return trunc 151 152 153def _unqualify_unpivot_columns(expression: exp.Expression) -> exp.Expression: 154 """ 155 Snowflake doesn't allow columns referenced in UNPIVOT to be qualified, 156 so we need to unqualify them. 157 158 Example: 159 >>> from sqlglot import parse_one 160 >>> expr = parse_one("SELECT * FROM m_sales UNPIVOT(sales FOR month IN (m_sales.jan, feb, mar, april))") 161 >>> print(_unqualify_unpivot_columns(expr).sql(dialect="snowflake")) 162 SELECT * FROM m_sales UNPIVOT(sales FOR month IN (jan, feb, mar, april)) 163 """ 164 if isinstance(expression, exp.Pivot) and expression.unpivot: 165 expression = transforms.unqualify_columns(expression) 166 167 return expression 168 169 170def _flatten_structured_types_unless_iceberg(expression: exp.Expression) -> exp.Expression: 171 assert isinstance(expression, exp.Create) 172 173 def _flatten_structured_type(expression: exp.DataType) -> exp.DataType: 174 if expression.this in exp.DataType.NESTED_TYPES: 175 expression.set("expressions", None) 176 return expression 177 178 props = expression.args.get("properties") 179 if isinstance(expression.this, exp.Schema) and not (props and props.find(exp.IcebergProperty)): 180 for schema_expression in expression.this.expressions: 181 if isinstance(schema_expression, exp.ColumnDef): 182 column_type = schema_expression.kind 183 if isinstance(column_type, exp.DataType): 184 column_type.transform(_flatten_structured_type, copy=False) 185 186 return expression 187 188 189class Snowflake(Dialect): 190 # https://docs.snowflake.com/en/sql-reference/identifiers-syntax 191 NORMALIZATION_STRATEGY = NormalizationStrategy.UPPERCASE 192 NULL_ORDERING = "nulls_are_large" 193 TIME_FORMAT = "'YYYY-MM-DD HH24:MI:SS'" 194 SUPPORTS_USER_DEFINED_TYPES = False 195 SUPPORTS_SEMI_ANTI_JOIN = False 196 PREFER_CTE_ALIAS_COLUMN = True 197 TABLESAMPLE_SIZE_IS_PERCENT = True 198 COPY_PARAMS_ARE_CSV = False 199 200 TIME_MAPPING = { 201 "YYYY": "%Y", 202 "yyyy": "%Y", 203 "YY": "%y", 204 "yy": "%y", 205 "MMMM": "%B", 206 "mmmm": "%B", 207 "MON": "%b", 208 "mon": "%b", 209 "MM": "%m", 210 "mm": "%m", 211 "DD": "%d", 212 "dd": "%-d", 213 "DY": "%a", 214 "dy": "%w", 215 "HH24": "%H", 216 "hh24": "%H", 217 "HH12": "%I", 218 "hh12": "%I", 219 "MI": "%M", 220 "mi": "%M", 221 "SS": "%S", 222 "ss": "%S", 223 "FF": "%f", 224 "ff": "%f", 225 "FF6": "%f", 226 "ff6": "%f", 227 } 228 229 def quote_identifier(self, expression: E, identify: bool = True) -> E: 230 # This disables quoting DUAL in SELECT ... FROM DUAL, because Snowflake treats an 231 # unquoted DUAL keyword in a special way and does not map it to a user-defined table 232 if ( 233 isinstance(expression, exp.Identifier) 234 and isinstance(expression.parent, exp.Table) 235 and expression.name.lower() == "dual" 236 ): 237 return expression # type: ignore 238 239 return super().quote_identifier(expression, identify=identify) 240 241 class Parser(parser.Parser): 242 IDENTIFY_PIVOT_STRINGS = True 243 DEFAULT_SAMPLING_METHOD = "BERNOULLI" 244 COLON_IS_VARIANT_EXTRACT = True 245 246 ID_VAR_TOKENS = { 247 *parser.Parser.ID_VAR_TOKENS, 248 TokenType.MATCH_CONDITION, 249 } 250 251 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS | {TokenType.WINDOW} 252 TABLE_ALIAS_TOKENS.discard(TokenType.MATCH_CONDITION) 253 254 FUNCTIONS = { 255 **parser.Parser.FUNCTIONS, 256 "APPROX_PERCENTILE": exp.ApproxQuantile.from_arg_list, 257 "ARRAYAGG": exp.ArrayAgg.from_arg_list, 258 "ARRAY_CONSTRUCT": lambda args: exp.Array(expressions=args), 259 "ARRAY_CONTAINS": lambda args: exp.ArrayContains( 260 this=seq_get(args, 1), expression=seq_get(args, 0) 261 ), 262 "ARRAY_GENERATE_RANGE": lambda args: exp.GenerateSeries( 263 # ARRAY_GENERATE_RANGE has an exlusive end; we normalize it to be inclusive 264 start=seq_get(args, 0), 265 end=exp.Sub(this=seq_get(args, 1), expression=exp.Literal.number(1)), 266 step=seq_get(args, 2), 267 ), 268 "BITXOR": binary_from_function(exp.BitwiseXor), 269 "BIT_XOR": binary_from_function(exp.BitwiseXor), 270 "BOOLXOR": binary_from_function(exp.Xor), 271 "CONVERT_TIMEZONE": _build_convert_timezone, 272 "DATE": _build_datetime("DATE", exp.DataType.Type.DATE), 273 "DATE_TRUNC": _date_trunc_to_time, 274 "DATEADD": _build_date_time_add(exp.DateAdd), 275 "DATEDIFF": _build_datediff, 276 "DIV0": _build_if_from_div0, 277 "FLATTEN": exp.Explode.from_arg_list, 278 "GET_PATH": lambda args, dialect: exp.JSONExtract( 279 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 280 ), 281 "IFF": exp.If.from_arg_list, 282 "LAST_DAY": lambda args: exp.LastDay( 283 this=seq_get(args, 0), unit=map_date_part(seq_get(args, 1)) 284 ), 285 "LEN": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 286 "LENGTH": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 287 "LISTAGG": exp.GroupConcat.from_arg_list, 288 "MEDIAN": lambda args: exp.PercentileCont( 289 this=seq_get(args, 0), expression=exp.Literal.number(0.5) 290 ), 291 "NULLIFZERO": _build_if_from_nullifzero, 292 "OBJECT_CONSTRUCT": _build_object_construct, 293 "REGEXP_REPLACE": _build_regexp_replace, 294 "REGEXP_SUBSTR": exp.RegexpExtract.from_arg_list, 295 "RLIKE": exp.RegexpLike.from_arg_list, 296 "SQUARE": lambda args: exp.Pow(this=seq_get(args, 0), expression=exp.Literal.number(2)), 297 "TIMEADD": _build_date_time_add(exp.TimeAdd), 298 "TIMEDIFF": _build_datediff, 299 "TIMESTAMPADD": _build_date_time_add(exp.DateAdd), 300 "TIMESTAMPDIFF": _build_datediff, 301 "TIMESTAMPFROMPARTS": build_timestamp_from_parts, 302 "TIMESTAMP_FROM_PARTS": build_timestamp_from_parts, 303 "TRY_PARSE_JSON": lambda args: exp.ParseJSON(this=seq_get(args, 0), safe=True), 304 "TRY_TO_DATE": _build_datetime("TRY_TO_DATE", exp.DataType.Type.DATE, safe=True), 305 "TO_DATE": _build_datetime("TO_DATE", exp.DataType.Type.DATE), 306 "TO_NUMBER": lambda args: exp.ToNumber( 307 this=seq_get(args, 0), 308 format=seq_get(args, 1), 309 precision=seq_get(args, 2), 310 scale=seq_get(args, 3), 311 ), 312 "TO_TIME": _build_datetime("TO_TIME", exp.DataType.Type.TIME), 313 "TO_TIMESTAMP": _build_datetime("TO_TIMESTAMP", exp.DataType.Type.TIMESTAMP), 314 "TO_TIMESTAMP_LTZ": _build_datetime("TO_TIMESTAMP_LTZ", exp.DataType.Type.TIMESTAMPLTZ), 315 "TO_TIMESTAMP_NTZ": _build_datetime("TO_TIMESTAMP_NTZ", exp.DataType.Type.TIMESTAMP), 316 "TO_TIMESTAMP_TZ": _build_datetime("TO_TIMESTAMP_TZ", exp.DataType.Type.TIMESTAMPTZ), 317 "TO_VARCHAR": exp.ToChar.from_arg_list, 318 "ZEROIFNULL": _build_if_from_zeroifnull, 319 } 320 321 FUNCTION_PARSERS = { 322 **parser.Parser.FUNCTION_PARSERS, 323 "DATE_PART": lambda self: self._parse_date_part(), 324 "OBJECT_CONSTRUCT_KEEP_NULL": lambda self: self._parse_json_object(), 325 } 326 FUNCTION_PARSERS.pop("TRIM") 327 328 TIMESTAMPS = parser.Parser.TIMESTAMPS - {TokenType.TIME} 329 330 RANGE_PARSERS = { 331 **parser.Parser.RANGE_PARSERS, 332 TokenType.LIKE_ANY: parser.binary_range_parser(exp.LikeAny), 333 TokenType.ILIKE_ANY: parser.binary_range_parser(exp.ILikeAny), 334 } 335 336 ALTER_PARSERS = { 337 **parser.Parser.ALTER_PARSERS, 338 "UNSET": lambda self: self.expression( 339 exp.Set, 340 tag=self._match_text_seq("TAG"), 341 expressions=self._parse_csv(self._parse_id_var), 342 unset=True, 343 ), 344 "SWAP": lambda self: self._parse_alter_table_swap(), 345 } 346 347 STATEMENT_PARSERS = { 348 **parser.Parser.STATEMENT_PARSERS, 349 TokenType.SHOW: lambda self: self._parse_show(), 350 } 351 352 PROPERTY_PARSERS = { 353 **parser.Parser.PROPERTY_PARSERS, 354 "LOCATION": lambda self: self._parse_location_property(), 355 } 356 357 TYPE_CONVERTERS = { 358 # https://docs.snowflake.com/en/sql-reference/data-types-numeric#number 359 exp.DataType.Type.DECIMAL: build_default_decimal_type(precision=38, scale=0), 360 } 361 362 SHOW_PARSERS = { 363 "SCHEMAS": _show_parser("SCHEMAS"), 364 "TERSE SCHEMAS": _show_parser("SCHEMAS"), 365 "OBJECTS": _show_parser("OBJECTS"), 366 "TERSE OBJECTS": _show_parser("OBJECTS"), 367 "TABLES": _show_parser("TABLES"), 368 "TERSE TABLES": _show_parser("TABLES"), 369 "VIEWS": _show_parser("VIEWS"), 370 "TERSE VIEWS": _show_parser("VIEWS"), 371 "PRIMARY KEYS": _show_parser("PRIMARY KEYS"), 372 "TERSE PRIMARY KEYS": _show_parser("PRIMARY KEYS"), 373 "IMPORTED KEYS": _show_parser("IMPORTED KEYS"), 374 "TERSE IMPORTED KEYS": _show_parser("IMPORTED KEYS"), 375 "UNIQUE KEYS": _show_parser("UNIQUE KEYS"), 376 "TERSE UNIQUE KEYS": _show_parser("UNIQUE KEYS"), 377 "SEQUENCES": _show_parser("SEQUENCES"), 378 "TERSE SEQUENCES": _show_parser("SEQUENCES"), 379 "COLUMNS": _show_parser("COLUMNS"), 380 "USERS": _show_parser("USERS"), 381 "TERSE USERS": _show_parser("USERS"), 382 } 383 384 CONSTRAINT_PARSERS = { 385 **parser.Parser.CONSTRAINT_PARSERS, 386 "WITH": lambda self: self._parse_with_constraint(), 387 "MASKING": lambda self: self._parse_with_constraint(), 388 "PROJECTION": lambda self: self._parse_with_constraint(), 389 "TAG": lambda self: self._parse_with_constraint(), 390 } 391 392 STAGED_FILE_SINGLE_TOKENS = { 393 TokenType.DOT, 394 TokenType.MOD, 395 TokenType.SLASH, 396 } 397 398 FLATTEN_COLUMNS = ["SEQ", "KEY", "PATH", "INDEX", "VALUE", "THIS"] 399 400 SCHEMA_KINDS = {"OBJECTS", "TABLES", "VIEWS", "SEQUENCES", "UNIQUE KEYS", "IMPORTED KEYS"} 401 402 NON_TABLE_CREATABLES = {"STORAGE INTEGRATION", "TAG", "WAREHOUSE", "STREAMLIT"} 403 404 LAMBDAS = { 405 **parser.Parser.LAMBDAS, 406 TokenType.ARROW: lambda self, expressions: self.expression( 407 exp.Lambda, 408 this=self._replace_lambda( 409 self._parse_assignment(), 410 expressions, 411 ), 412 expressions=[e.this if isinstance(e, exp.Cast) else e for e in expressions], 413 ), 414 } 415 416 def _parse_with_constraint(self) -> t.Optional[exp.Expression]: 417 if self._prev.token_type != TokenType.WITH: 418 self._retreat(self._index - 1) 419 420 if self._match_text_seq("MASKING", "POLICY"): 421 policy = self._parse_column() 422 return self.expression( 423 exp.MaskingPolicyColumnConstraint, 424 this=policy.to_dot() if isinstance(policy, exp.Column) else policy, 425 expressions=self._match(TokenType.USING) 426 and self._parse_wrapped_csv(self._parse_id_var), 427 ) 428 if self._match_text_seq("PROJECTION", "POLICY"): 429 policy = self._parse_column() 430 return self.expression( 431 exp.ProjectionPolicyColumnConstraint, 432 this=policy.to_dot() if isinstance(policy, exp.Column) else policy, 433 ) 434 if self._match(TokenType.TAG): 435 return self.expression( 436 exp.TagColumnConstraint, 437 expressions=self._parse_wrapped_csv(self._parse_property), 438 ) 439 440 return None 441 442 def _parse_create(self) -> exp.Create | exp.Command: 443 expression = super()._parse_create() 444 if isinstance(expression, exp.Create) and expression.kind in self.NON_TABLE_CREATABLES: 445 # Replace the Table node with the enclosed Identifier 446 expression.this.replace(expression.this.this) 447 448 return expression 449 450 # https://docs.snowflake.com/en/sql-reference/functions/date_part.html 451 # https://docs.snowflake.com/en/sql-reference/functions-date-time.html#label-supported-date-time-parts 452 def _parse_date_part(self: Snowflake.Parser) -> t.Optional[exp.Expression]: 453 this = self._parse_var() or self._parse_type() 454 455 if not this: 456 return None 457 458 self._match(TokenType.COMMA) 459 expression = self._parse_bitwise() 460 this = map_date_part(this) 461 name = this.name.upper() 462 463 if name.startswith("EPOCH"): 464 if name == "EPOCH_MILLISECOND": 465 scale = 10**3 466 elif name == "EPOCH_MICROSECOND": 467 scale = 10**6 468 elif name == "EPOCH_NANOSECOND": 469 scale = 10**9 470 else: 471 scale = None 472 473 ts = self.expression(exp.Cast, this=expression, to=exp.DataType.build("TIMESTAMP")) 474 to_unix: exp.Expression = self.expression(exp.TimeToUnix, this=ts) 475 476 if scale: 477 to_unix = exp.Mul(this=to_unix, expression=exp.Literal.number(scale)) 478 479 return to_unix 480 481 return self.expression(exp.Extract, this=this, expression=expression) 482 483 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 484 if is_map: 485 # Keys are strings in Snowflake's objects, see also: 486 # - https://docs.snowflake.com/en/sql-reference/data-types-semistructured 487 # - https://docs.snowflake.com/en/sql-reference/functions/object_construct 488 return self._parse_slice(self._parse_string()) 489 490 return self._parse_slice(self._parse_alias(self._parse_assignment(), explicit=True)) 491 492 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 493 lateral = super()._parse_lateral() 494 if not lateral: 495 return lateral 496 497 if isinstance(lateral.this, exp.Explode): 498 table_alias = lateral.args.get("alias") 499 columns = [exp.to_identifier(col) for col in self.FLATTEN_COLUMNS] 500 if table_alias and not table_alias.args.get("columns"): 501 table_alias.set("columns", columns) 502 elif not table_alias: 503 exp.alias_(lateral, "_flattened", table=columns, copy=False) 504 505 return lateral 506 507 def _parse_table_parts( 508 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 509 ) -> exp.Table: 510 # https://docs.snowflake.com/en/user-guide/querying-stage 511 if self._match(TokenType.STRING, advance=False): 512 table = self._parse_string() 513 elif self._match_text_seq("@", advance=False): 514 table = self._parse_location_path() 515 else: 516 table = None 517 518 if table: 519 file_format = None 520 pattern = None 521 522 wrapped = self._match(TokenType.L_PAREN) 523 while self._curr and wrapped and not self._match(TokenType.R_PAREN): 524 if self._match_text_seq("FILE_FORMAT", "=>"): 525 file_format = self._parse_string() or super()._parse_table_parts( 526 is_db_reference=is_db_reference 527 ) 528 elif self._match_text_seq("PATTERN", "=>"): 529 pattern = self._parse_string() 530 else: 531 break 532 533 self._match(TokenType.COMMA) 534 535 table = self.expression(exp.Table, this=table, format=file_format, pattern=pattern) 536 else: 537 table = super()._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 538 539 return table 540 541 def _parse_id_var( 542 self, 543 any_token: bool = True, 544 tokens: t.Optional[t.Collection[TokenType]] = None, 545 ) -> t.Optional[exp.Expression]: 546 if self._match_text_seq("IDENTIFIER", "("): 547 identifier = ( 548 super()._parse_id_var(any_token=any_token, tokens=tokens) 549 or self._parse_string() 550 ) 551 self._match_r_paren() 552 return self.expression(exp.Anonymous, this="IDENTIFIER", expressions=[identifier]) 553 554 return super()._parse_id_var(any_token=any_token, tokens=tokens) 555 556 def _parse_show_snowflake(self, this: str) -> exp.Show: 557 scope = None 558 scope_kind = None 559 560 # will identity SHOW TERSE SCHEMAS but not SHOW TERSE PRIMARY KEYS 561 # which is syntactically valid but has no effect on the output 562 terse = self._tokens[self._index - 2].text.upper() == "TERSE" 563 564 history = self._match_text_seq("HISTORY") 565 566 like = self._parse_string() if self._match(TokenType.LIKE) else None 567 568 if self._match(TokenType.IN): 569 if self._match_text_seq("ACCOUNT"): 570 scope_kind = "ACCOUNT" 571 elif self._match_set(self.DB_CREATABLES): 572 scope_kind = self._prev.text.upper() 573 if self._curr: 574 scope = self._parse_table_parts() 575 elif self._curr: 576 scope_kind = "SCHEMA" if this in self.SCHEMA_KINDS else "TABLE" 577 scope = self._parse_table_parts() 578 579 return self.expression( 580 exp.Show, 581 **{ 582 "terse": terse, 583 "this": this, 584 "history": history, 585 "like": like, 586 "scope": scope, 587 "scope_kind": scope_kind, 588 "starts_with": self._match_text_seq("STARTS", "WITH") and self._parse_string(), 589 "limit": self._parse_limit(), 590 "from": self._parse_string() if self._match(TokenType.FROM) else None, 591 }, 592 ) 593 594 def _parse_alter_table_swap(self) -> exp.SwapTable: 595 self._match_text_seq("WITH") 596 return self.expression(exp.SwapTable, this=self._parse_table(schema=True)) 597 598 def _parse_location_property(self) -> exp.LocationProperty: 599 self._match(TokenType.EQ) 600 return self.expression(exp.LocationProperty, this=self._parse_location_path()) 601 602 def _parse_file_location(self) -> t.Optional[exp.Expression]: 603 # Parse either a subquery or a staged file 604 return ( 605 self._parse_select(table=True, parse_subquery_alias=False) 606 if self._match(TokenType.L_PAREN, advance=False) 607 else self._parse_table_parts() 608 ) 609 610 def _parse_location_path(self) -> exp.Var: 611 parts = [self._advance_any(ignore_reserved=True)] 612 613 # We avoid consuming a comma token because external tables like @foo and @bar 614 # can be joined in a query with a comma separator, as well as closing paren 615 # in case of subqueries 616 while self._is_connected() and not self._match_set( 617 (TokenType.COMMA, TokenType.L_PAREN, TokenType.R_PAREN), advance=False 618 ): 619 parts.append(self._advance_any(ignore_reserved=True)) 620 621 return exp.var("".join(part.text for part in parts if part)) 622 623 def _parse_lambda_arg(self) -> t.Optional[exp.Expression]: 624 this = super()._parse_lambda_arg() 625 626 if not this: 627 return this 628 629 typ = self._parse_types() 630 631 if typ: 632 return self.expression(exp.Cast, this=this, to=typ) 633 634 return this 635 636 class Tokenizer(tokens.Tokenizer): 637 STRING_ESCAPES = ["\\", "'"] 638 HEX_STRINGS = [("x'", "'"), ("X'", "'")] 639 RAW_STRINGS = ["$$"] 640 COMMENTS = ["--", "//", ("/*", "*/")] 641 642 KEYWORDS = { 643 **tokens.Tokenizer.KEYWORDS, 644 "BYTEINT": TokenType.INT, 645 "CHAR VARYING": TokenType.VARCHAR, 646 "CHARACTER VARYING": TokenType.VARCHAR, 647 "EXCLUDE": TokenType.EXCEPT, 648 "ILIKE ANY": TokenType.ILIKE_ANY, 649 "LIKE ANY": TokenType.LIKE_ANY, 650 "MATCH_CONDITION": TokenType.MATCH_CONDITION, 651 "MATCH_RECOGNIZE": TokenType.MATCH_RECOGNIZE, 652 "MINUS": TokenType.EXCEPT, 653 "NCHAR VARYING": TokenType.VARCHAR, 654 "PUT": TokenType.COMMAND, 655 "REMOVE": TokenType.COMMAND, 656 "RM": TokenType.COMMAND, 657 "SAMPLE": TokenType.TABLE_SAMPLE, 658 "SQL_DOUBLE": TokenType.DOUBLE, 659 "SQL_VARCHAR": TokenType.VARCHAR, 660 "STORAGE INTEGRATION": TokenType.STORAGE_INTEGRATION, 661 "TAG": TokenType.TAG, 662 "TIMESTAMP_TZ": TokenType.TIMESTAMPTZ, 663 "TOP": TokenType.TOP, 664 "WAREHOUSE": TokenType.WAREHOUSE, 665 "STREAMLIT": TokenType.STREAMLIT, 666 } 667 KEYWORDS.pop("/*+") 668 669 SINGLE_TOKENS = { 670 **tokens.Tokenizer.SINGLE_TOKENS, 671 "$": TokenType.PARAMETER, 672 } 673 674 VAR_SINGLE_TOKENS = {"$"} 675 676 COMMANDS = tokens.Tokenizer.COMMANDS - {TokenType.SHOW} 677 678 class Generator(generator.Generator): 679 PARAMETER_TOKEN = "$" 680 MATCHED_BY_SOURCE = False 681 SINGLE_STRING_INTERVAL = True 682 JOIN_HINTS = False 683 TABLE_HINTS = False 684 QUERY_HINTS = False 685 AGGREGATE_FILTER_SUPPORTED = False 686 SUPPORTS_TABLE_COPY = False 687 COLLATE_IS_FUNC = True 688 LIMIT_ONLY_LITERALS = True 689 JSON_KEY_VALUE_PAIR_SEP = "," 690 INSERT_OVERWRITE = " OVERWRITE INTO" 691 STRUCT_DELIMITER = ("(", ")") 692 COPY_PARAMS_ARE_WRAPPED = False 693 COPY_PARAMS_EQ_REQUIRED = True 694 STAR_EXCEPT = "EXCLUDE" 695 SUPPORTS_EXPLODING_PROJECTIONS = False 696 ARRAY_CONCAT_IS_VAR_LEN = False 697 698 TRANSFORMS = { 699 **generator.Generator.TRANSFORMS, 700 exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"), 701 exp.ArgMax: rename_func("MAX_BY"), 702 exp.ArgMin: rename_func("MIN_BY"), 703 exp.Array: inline_array_sql, 704 exp.ArrayConcat: lambda self, e: self.arrayconcat_sql(e, name="ARRAY_CAT"), 705 exp.ArrayContains: lambda self, e: self.func("ARRAY_CONTAINS", e.expression, e.this), 706 exp.AtTimeZone: lambda self, e: self.func( 707 "CONVERT_TIMEZONE", e.args.get("zone"), e.this 708 ), 709 exp.BitwiseXor: rename_func("BITXOR"), 710 exp.Create: transforms.preprocess([_flatten_structured_types_unless_iceberg]), 711 exp.DateAdd: date_delta_sql("DATEADD"), 712 exp.DateDiff: date_delta_sql("DATEDIFF"), 713 exp.DateStrToDate: datestrtodate_sql, 714 exp.DayOfMonth: rename_func("DAYOFMONTH"), 715 exp.DayOfWeek: rename_func("DAYOFWEEK"), 716 exp.DayOfYear: rename_func("DAYOFYEAR"), 717 exp.Explode: rename_func("FLATTEN"), 718 exp.Extract: rename_func("DATE_PART"), 719 exp.FromTimeZone: lambda self, e: self.func( 720 "CONVERT_TIMEZONE", e.args.get("zone"), "'UTC'", e.this 721 ), 722 exp.GenerateSeries: lambda self, e: self.func( 723 "ARRAY_GENERATE_RANGE", e.args["start"], e.args["end"] + 1, e.args.get("step") 724 ), 725 exp.GroupConcat: rename_func("LISTAGG"), 726 exp.If: if_sql(name="IFF", false_value="NULL"), 727 exp.JSONExtract: lambda self, e: self.func("GET_PATH", e.this, e.expression), 728 exp.JSONExtractScalar: lambda self, e: self.func( 729 "JSON_EXTRACT_PATH_TEXT", e.this, e.expression 730 ), 731 exp.JSONObject: lambda self, e: self.func("OBJECT_CONSTRUCT_KEEP_NULL", *e.expressions), 732 exp.JSONPathRoot: lambda *_: "", 733 exp.LogicalAnd: rename_func("BOOLAND_AGG"), 734 exp.LogicalOr: rename_func("BOOLOR_AGG"), 735 exp.Map: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 736 exp.Max: max_or_greatest, 737 exp.Min: min_or_least, 738 exp.ParseJSON: lambda self, e: self.func( 739 "TRY_PARSE_JSON" if e.args.get("safe") else "PARSE_JSON", e.this 740 ), 741 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 742 exp.PercentileCont: transforms.preprocess( 743 [transforms.add_within_group_for_percentiles] 744 ), 745 exp.PercentileDisc: transforms.preprocess( 746 [transforms.add_within_group_for_percentiles] 747 ), 748 exp.Pivot: transforms.preprocess([_unqualify_unpivot_columns]), 749 exp.RegexpILike: _regexpilike_sql, 750 exp.Rand: rename_func("RANDOM"), 751 exp.Select: transforms.preprocess( 752 [ 753 transforms.eliminate_distinct_on, 754 transforms.explode_to_unnest(), 755 transforms.eliminate_semi_and_anti_joins, 756 ] 757 ), 758 exp.SHA: rename_func("SHA1"), 759 exp.StarMap: rename_func("OBJECT_CONSTRUCT"), 760 exp.StartsWith: rename_func("STARTSWITH"), 761 exp.StrPosition: lambda self, e: self.func( 762 "POSITION", e.args.get("substr"), e.this, e.args.get("position") 763 ), 764 exp.StrToTime: lambda self, e: self.func("TO_TIMESTAMP", e.this, self.format_time(e)), 765 exp.Stuff: rename_func("INSERT"), 766 exp.TimeAdd: date_delta_sql("TIMEADD"), 767 exp.TimestampDiff: lambda self, e: self.func( 768 "TIMESTAMPDIFF", e.unit, e.expression, e.this 769 ), 770 exp.TimestampTrunc: timestamptrunc_sql(), 771 exp.TimeStrToTime: timestrtotime_sql, 772 exp.TimeToStr: lambda self, e: self.func( 773 "TO_CHAR", exp.cast(e.this, exp.DataType.Type.TIMESTAMP), self.format_time(e) 774 ), 775 exp.TimeToUnix: lambda self, e: f"EXTRACT(epoch_second FROM {self.sql(e, 'this')})", 776 exp.ToArray: rename_func("TO_ARRAY"), 777 exp.ToChar: lambda self, e: self.function_fallback_sql(e), 778 exp.Trim: lambda self, e: self.func("TRIM", e.this, e.expression), 779 exp.TsOrDsAdd: date_delta_sql("DATEADD", cast=True), 780 exp.TsOrDsDiff: date_delta_sql("DATEDIFF"), 781 exp.TsOrDsToDate: lambda self, e: self.func( 782 "TRY_TO_DATE" if e.args.get("safe") else "TO_DATE", e.this, self.format_time(e) 783 ), 784 exp.UnixToTime: rename_func("TO_TIMESTAMP"), 785 exp.VarMap: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 786 exp.WeekOfYear: rename_func("WEEKOFYEAR"), 787 exp.Xor: rename_func("BOOLXOR"), 788 } 789 790 SUPPORTED_JSON_PATH_PARTS = { 791 exp.JSONPathKey, 792 exp.JSONPathRoot, 793 exp.JSONPathSubscript, 794 } 795 796 TYPE_MAPPING = { 797 **generator.Generator.TYPE_MAPPING, 798 exp.DataType.Type.NESTED: "OBJECT", 799 exp.DataType.Type.STRUCT: "OBJECT", 800 } 801 802 PROPERTIES_LOCATION = { 803 **generator.Generator.PROPERTIES_LOCATION, 804 exp.SetProperty: exp.Properties.Location.UNSUPPORTED, 805 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 806 } 807 808 UNSUPPORTED_VALUES_EXPRESSIONS = { 809 exp.Map, 810 exp.StarMap, 811 exp.Struct, 812 exp.VarMap, 813 } 814 815 def with_properties(self, properties: exp.Properties) -> str: 816 return self.properties(properties, wrapped=False, prefix=self.sep(""), sep=" ") 817 818 def values_sql(self, expression: exp.Values, values_as_table: bool = True) -> str: 819 if expression.find(*self.UNSUPPORTED_VALUES_EXPRESSIONS): 820 values_as_table = False 821 822 return super().values_sql(expression, values_as_table=values_as_table) 823 824 def datatype_sql(self, expression: exp.DataType) -> str: 825 expressions = expression.expressions 826 if ( 827 expressions 828 and expression.is_type(*exp.DataType.STRUCT_TYPES) 829 and any(isinstance(field_type, exp.DataType) for field_type in expressions) 830 ): 831 # The correct syntax is OBJECT [ (<key> <value_type [NOT NULL] [, ...]) ] 832 return "OBJECT" 833 834 return super().datatype_sql(expression) 835 836 def tonumber_sql(self, expression: exp.ToNumber) -> str: 837 return self.func( 838 "TO_NUMBER", 839 expression.this, 840 expression.args.get("format"), 841 expression.args.get("precision"), 842 expression.args.get("scale"), 843 ) 844 845 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 846 milli = expression.args.get("milli") 847 if milli is not None: 848 milli_to_nano = milli.pop() * exp.Literal.number(1000000) 849 expression.set("nano", milli_to_nano) 850 851 return rename_func("TIMESTAMP_FROM_PARTS")(self, expression) 852 853 def trycast_sql(self, expression: exp.TryCast) -> str: 854 value = expression.this 855 856 if value.type is None: 857 from sqlglot.optimizer.annotate_types import annotate_types 858 859 value = annotate_types(value) 860 861 if value.is_type(*exp.DataType.TEXT_TYPES, exp.DataType.Type.UNKNOWN): 862 return super().trycast_sql(expression) 863 864 # TRY_CAST only works for string values in Snowflake 865 return self.cast_sql(expression) 866 867 def log_sql(self, expression: exp.Log) -> str: 868 if not expression.expression: 869 return self.func("LN", expression.this) 870 871 return super().log_sql(expression) 872 873 def unnest_sql(self, expression: exp.Unnest) -> str: 874 unnest_alias = expression.args.get("alias") 875 offset = expression.args.get("offset") 876 877 columns = [ 878 exp.to_identifier("seq"), 879 exp.to_identifier("key"), 880 exp.to_identifier("path"), 881 offset.pop() if isinstance(offset, exp.Expression) else exp.to_identifier("index"), 882 seq_get(unnest_alias.columns if unnest_alias else [], 0) 883 or exp.to_identifier("value"), 884 exp.to_identifier("this"), 885 ] 886 887 if unnest_alias: 888 unnest_alias.set("columns", columns) 889 else: 890 unnest_alias = exp.TableAlias(this="_u", columns=columns) 891 892 explode = f"TABLE(FLATTEN(INPUT => {self.sql(expression.expressions[0])}))" 893 alias = self.sql(unnest_alias) 894 alias = f" AS {alias}" if alias else "" 895 return f"{explode}{alias}" 896 897 def show_sql(self, expression: exp.Show) -> str: 898 terse = "TERSE " if expression.args.get("terse") else "" 899 history = " HISTORY" if expression.args.get("history") else "" 900 like = self.sql(expression, "like") 901 like = f" LIKE {like}" if like else "" 902 903 scope = self.sql(expression, "scope") 904 scope = f" {scope}" if scope else "" 905 906 scope_kind = self.sql(expression, "scope_kind") 907 if scope_kind: 908 scope_kind = f" IN {scope_kind}" 909 910 starts_with = self.sql(expression, "starts_with") 911 if starts_with: 912 starts_with = f" STARTS WITH {starts_with}" 913 914 limit = self.sql(expression, "limit") 915 916 from_ = self.sql(expression, "from") 917 if from_: 918 from_ = f" FROM {from_}" 919 920 return f"SHOW {terse}{expression.name}{history}{like}{scope_kind}{scope}{starts_with}{limit}{from_}" 921 922 def regexpextract_sql(self, expression: exp.RegexpExtract) -> str: 923 # Other dialects don't support all of the following parameters, so we need to 924 # generate default values as necessary to ensure the transpilation is correct 925 group = expression.args.get("group") 926 parameters = expression.args.get("parameters") or (group and exp.Literal.string("c")) 927 occurrence = expression.args.get("occurrence") or (parameters and exp.Literal.number(1)) 928 position = expression.args.get("position") or (occurrence and exp.Literal.number(1)) 929 930 return self.func( 931 "REGEXP_SUBSTR", 932 expression.this, 933 expression.expression, 934 position, 935 occurrence, 936 parameters, 937 group, 938 ) 939 940 def except_op(self, expression: exp.Except) -> str: 941 if not expression.args.get("distinct"): 942 self.unsupported("EXCEPT with All is not supported in Snowflake") 943 return super().except_op(expression) 944 945 def intersect_op(self, expression: exp.Intersect) -> str: 946 if not expression.args.get("distinct"): 947 self.unsupported("INTERSECT with All is not supported in Snowflake") 948 return super().intersect_op(expression) 949 950 def describe_sql(self, expression: exp.Describe) -> str: 951 # Default to table if kind is unknown 952 kind_value = expression.args.get("kind") or "TABLE" 953 kind = f" {kind_value}" if kind_value else "" 954 this = f" {self.sql(expression, 'this')}" 955 expressions = self.expressions(expression, flat=True) 956 expressions = f" {expressions}" if expressions else "" 957 return f"DESCRIBE{kind}{this}{expressions}" 958 959 def generatedasidentitycolumnconstraint_sql( 960 self, expression: exp.GeneratedAsIdentityColumnConstraint 961 ) -> str: 962 start = expression.args.get("start") 963 start = f" START {start}" if start else "" 964 increment = expression.args.get("increment") 965 increment = f" INCREMENT {increment}" if increment else "" 966 return f"AUTOINCREMENT{start}{increment}" 967 968 def swaptable_sql(self, expression: exp.SwapTable) -> str: 969 this = self.sql(expression, "this") 970 return f"SWAP WITH {this}" 971 972 def cluster_sql(self, expression: exp.Cluster) -> str: 973 return f"CLUSTER BY ({self.expressions(expression, flat=True)})" 974 975 def struct_sql(self, expression: exp.Struct) -> str: 976 keys = [] 977 values = [] 978 979 for i, e in enumerate(expression.expressions): 980 if isinstance(e, exp.PropertyEQ): 981 keys.append( 982 exp.Literal.string(e.name) if isinstance(e.this, exp.Identifier) else e.this 983 ) 984 values.append(e.expression) 985 else: 986 keys.append(exp.Literal.string(f"_{i}")) 987 values.append(e) 988 989 return self.func("OBJECT_CONSTRUCT", *flatten(zip(keys, values))) 990 991 def approxquantile_sql(self, expression: exp.ApproxQuantile) -> str: 992 if expression.args.get("weight") or expression.args.get("accuracy"): 993 self.unsupported( 994 "APPROX_PERCENTILE with weight and/or accuracy arguments are not supported in Snowflake" 995 ) 996 997 return self.func("APPROX_PERCENTILE", expression.this, expression.args.get("quantile")) 998 999 def alterset_sql(self, expression: exp.AlterSet) -> str: 1000 exprs = self.expressions(expression, flat=True) 1001 exprs = f" {exprs}" if exprs else "" 1002 file_format = self.expressions(expression, key="file_format", flat=True, sep=" ") 1003 file_format = f" STAGE_FILE_FORMAT = ({file_format})" if file_format else "" 1004 copy_options = self.expressions(expression, key="copy_options", flat=True, sep=" ") 1005 copy_options = f" STAGE_COPY_OPTIONS = ({copy_options})" if copy_options else "" 1006 tag = self.expressions(expression, key="tag", flat=True) 1007 tag = f" TAG {tag}" if tag else "" 1008 1009 return f"SET{exprs}{file_format}{copy_options}{tag}"
190class Snowflake(Dialect): 191 # https://docs.snowflake.com/en/sql-reference/identifiers-syntax 192 NORMALIZATION_STRATEGY = NormalizationStrategy.UPPERCASE 193 NULL_ORDERING = "nulls_are_large" 194 TIME_FORMAT = "'YYYY-MM-DD HH24:MI:SS'" 195 SUPPORTS_USER_DEFINED_TYPES = False 196 SUPPORTS_SEMI_ANTI_JOIN = False 197 PREFER_CTE_ALIAS_COLUMN = True 198 TABLESAMPLE_SIZE_IS_PERCENT = True 199 COPY_PARAMS_ARE_CSV = False 200 201 TIME_MAPPING = { 202 "YYYY": "%Y", 203 "yyyy": "%Y", 204 "YY": "%y", 205 "yy": "%y", 206 "MMMM": "%B", 207 "mmmm": "%B", 208 "MON": "%b", 209 "mon": "%b", 210 "MM": "%m", 211 "mm": "%m", 212 "DD": "%d", 213 "dd": "%-d", 214 "DY": "%a", 215 "dy": "%w", 216 "HH24": "%H", 217 "hh24": "%H", 218 "HH12": "%I", 219 "hh12": "%I", 220 "MI": "%M", 221 "mi": "%M", 222 "SS": "%S", 223 "ss": "%S", 224 "FF": "%f", 225 "ff": "%f", 226 "FF6": "%f", 227 "ff6": "%f", 228 } 229 230 def quote_identifier(self, expression: E, identify: bool = True) -> E: 231 # This disables quoting DUAL in SELECT ... FROM DUAL, because Snowflake treats an 232 # unquoted DUAL keyword in a special way and does not map it to a user-defined table 233 if ( 234 isinstance(expression, exp.Identifier) 235 and isinstance(expression.parent, exp.Table) 236 and expression.name.lower() == "dual" 237 ): 238 return expression # type: ignore 239 240 return super().quote_identifier(expression, identify=identify) 241 242 class Parser(parser.Parser): 243 IDENTIFY_PIVOT_STRINGS = True 244 DEFAULT_SAMPLING_METHOD = "BERNOULLI" 245 COLON_IS_VARIANT_EXTRACT = True 246 247 ID_VAR_TOKENS = { 248 *parser.Parser.ID_VAR_TOKENS, 249 TokenType.MATCH_CONDITION, 250 } 251 252 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS | {TokenType.WINDOW} 253 TABLE_ALIAS_TOKENS.discard(TokenType.MATCH_CONDITION) 254 255 FUNCTIONS = { 256 **parser.Parser.FUNCTIONS, 257 "APPROX_PERCENTILE": exp.ApproxQuantile.from_arg_list, 258 "ARRAYAGG": exp.ArrayAgg.from_arg_list, 259 "ARRAY_CONSTRUCT": lambda args: exp.Array(expressions=args), 260 "ARRAY_CONTAINS": lambda args: exp.ArrayContains( 261 this=seq_get(args, 1), expression=seq_get(args, 0) 262 ), 263 "ARRAY_GENERATE_RANGE": lambda args: exp.GenerateSeries( 264 # ARRAY_GENERATE_RANGE has an exlusive end; we normalize it to be inclusive 265 start=seq_get(args, 0), 266 end=exp.Sub(this=seq_get(args, 1), expression=exp.Literal.number(1)), 267 step=seq_get(args, 2), 268 ), 269 "BITXOR": binary_from_function(exp.BitwiseXor), 270 "BIT_XOR": binary_from_function(exp.BitwiseXor), 271 "BOOLXOR": binary_from_function(exp.Xor), 272 "CONVERT_TIMEZONE": _build_convert_timezone, 273 "DATE": _build_datetime("DATE", exp.DataType.Type.DATE), 274 "DATE_TRUNC": _date_trunc_to_time, 275 "DATEADD": _build_date_time_add(exp.DateAdd), 276 "DATEDIFF": _build_datediff, 277 "DIV0": _build_if_from_div0, 278 "FLATTEN": exp.Explode.from_arg_list, 279 "GET_PATH": lambda args, dialect: exp.JSONExtract( 280 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 281 ), 282 "IFF": exp.If.from_arg_list, 283 "LAST_DAY": lambda args: exp.LastDay( 284 this=seq_get(args, 0), unit=map_date_part(seq_get(args, 1)) 285 ), 286 "LEN": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 287 "LENGTH": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 288 "LISTAGG": exp.GroupConcat.from_arg_list, 289 "MEDIAN": lambda args: exp.PercentileCont( 290 this=seq_get(args, 0), expression=exp.Literal.number(0.5) 291 ), 292 "NULLIFZERO": _build_if_from_nullifzero, 293 "OBJECT_CONSTRUCT": _build_object_construct, 294 "REGEXP_REPLACE": _build_regexp_replace, 295 "REGEXP_SUBSTR": exp.RegexpExtract.from_arg_list, 296 "RLIKE": exp.RegexpLike.from_arg_list, 297 "SQUARE": lambda args: exp.Pow(this=seq_get(args, 0), expression=exp.Literal.number(2)), 298 "TIMEADD": _build_date_time_add(exp.TimeAdd), 299 "TIMEDIFF": _build_datediff, 300 "TIMESTAMPADD": _build_date_time_add(exp.DateAdd), 301 "TIMESTAMPDIFF": _build_datediff, 302 "TIMESTAMPFROMPARTS": build_timestamp_from_parts, 303 "TIMESTAMP_FROM_PARTS": build_timestamp_from_parts, 304 "TRY_PARSE_JSON": lambda args: exp.ParseJSON(this=seq_get(args, 0), safe=True), 305 "TRY_TO_DATE": _build_datetime("TRY_TO_DATE", exp.DataType.Type.DATE, safe=True), 306 "TO_DATE": _build_datetime("TO_DATE", exp.DataType.Type.DATE), 307 "TO_NUMBER": lambda args: exp.ToNumber( 308 this=seq_get(args, 0), 309 format=seq_get(args, 1), 310 precision=seq_get(args, 2), 311 scale=seq_get(args, 3), 312 ), 313 "TO_TIME": _build_datetime("TO_TIME", exp.DataType.Type.TIME), 314 "TO_TIMESTAMP": _build_datetime("TO_TIMESTAMP", exp.DataType.Type.TIMESTAMP), 315 "TO_TIMESTAMP_LTZ": _build_datetime("TO_TIMESTAMP_LTZ", exp.DataType.Type.TIMESTAMPLTZ), 316 "TO_TIMESTAMP_NTZ": _build_datetime("TO_TIMESTAMP_NTZ", exp.DataType.Type.TIMESTAMP), 317 "TO_TIMESTAMP_TZ": _build_datetime("TO_TIMESTAMP_TZ", exp.DataType.Type.TIMESTAMPTZ), 318 "TO_VARCHAR": exp.ToChar.from_arg_list, 319 "ZEROIFNULL": _build_if_from_zeroifnull, 320 } 321 322 FUNCTION_PARSERS = { 323 **parser.Parser.FUNCTION_PARSERS, 324 "DATE_PART": lambda self: self._parse_date_part(), 325 "OBJECT_CONSTRUCT_KEEP_NULL": lambda self: self._parse_json_object(), 326 } 327 FUNCTION_PARSERS.pop("TRIM") 328 329 TIMESTAMPS = parser.Parser.TIMESTAMPS - {TokenType.TIME} 330 331 RANGE_PARSERS = { 332 **parser.Parser.RANGE_PARSERS, 333 TokenType.LIKE_ANY: parser.binary_range_parser(exp.LikeAny), 334 TokenType.ILIKE_ANY: parser.binary_range_parser(exp.ILikeAny), 335 } 336 337 ALTER_PARSERS = { 338 **parser.Parser.ALTER_PARSERS, 339 "UNSET": lambda self: self.expression( 340 exp.Set, 341 tag=self._match_text_seq("TAG"), 342 expressions=self._parse_csv(self._parse_id_var), 343 unset=True, 344 ), 345 "SWAP": lambda self: self._parse_alter_table_swap(), 346 } 347 348 STATEMENT_PARSERS = { 349 **parser.Parser.STATEMENT_PARSERS, 350 TokenType.SHOW: lambda self: self._parse_show(), 351 } 352 353 PROPERTY_PARSERS = { 354 **parser.Parser.PROPERTY_PARSERS, 355 "LOCATION": lambda self: self._parse_location_property(), 356 } 357 358 TYPE_CONVERTERS = { 359 # https://docs.snowflake.com/en/sql-reference/data-types-numeric#number 360 exp.DataType.Type.DECIMAL: build_default_decimal_type(precision=38, scale=0), 361 } 362 363 SHOW_PARSERS = { 364 "SCHEMAS": _show_parser("SCHEMAS"), 365 "TERSE SCHEMAS": _show_parser("SCHEMAS"), 366 "OBJECTS": _show_parser("OBJECTS"), 367 "TERSE OBJECTS": _show_parser("OBJECTS"), 368 "TABLES": _show_parser("TABLES"), 369 "TERSE TABLES": _show_parser("TABLES"), 370 "VIEWS": _show_parser("VIEWS"), 371 "TERSE VIEWS": _show_parser("VIEWS"), 372 "PRIMARY KEYS": _show_parser("PRIMARY KEYS"), 373 "TERSE PRIMARY KEYS": _show_parser("PRIMARY KEYS"), 374 "IMPORTED KEYS": _show_parser("IMPORTED KEYS"), 375 "TERSE IMPORTED KEYS": _show_parser("IMPORTED KEYS"), 376 "UNIQUE KEYS": _show_parser("UNIQUE KEYS"), 377 "TERSE UNIQUE KEYS": _show_parser("UNIQUE KEYS"), 378 "SEQUENCES": _show_parser("SEQUENCES"), 379 "TERSE SEQUENCES": _show_parser("SEQUENCES"), 380 "COLUMNS": _show_parser("COLUMNS"), 381 "USERS": _show_parser("USERS"), 382 "TERSE USERS": _show_parser("USERS"), 383 } 384 385 CONSTRAINT_PARSERS = { 386 **parser.Parser.CONSTRAINT_PARSERS, 387 "WITH": lambda self: self._parse_with_constraint(), 388 "MASKING": lambda self: self._parse_with_constraint(), 389 "PROJECTION": lambda self: self._parse_with_constraint(), 390 "TAG": lambda self: self._parse_with_constraint(), 391 } 392 393 STAGED_FILE_SINGLE_TOKENS = { 394 TokenType.DOT, 395 TokenType.MOD, 396 TokenType.SLASH, 397 } 398 399 FLATTEN_COLUMNS = ["SEQ", "KEY", "PATH", "INDEX", "VALUE", "THIS"] 400 401 SCHEMA_KINDS = {"OBJECTS", "TABLES", "VIEWS", "SEQUENCES", "UNIQUE KEYS", "IMPORTED KEYS"} 402 403 NON_TABLE_CREATABLES = {"STORAGE INTEGRATION", "TAG", "WAREHOUSE", "STREAMLIT"} 404 405 LAMBDAS = { 406 **parser.Parser.LAMBDAS, 407 TokenType.ARROW: lambda self, expressions: self.expression( 408 exp.Lambda, 409 this=self._replace_lambda( 410 self._parse_assignment(), 411 expressions, 412 ), 413 expressions=[e.this if isinstance(e, exp.Cast) else e for e in expressions], 414 ), 415 } 416 417 def _parse_with_constraint(self) -> t.Optional[exp.Expression]: 418 if self._prev.token_type != TokenType.WITH: 419 self._retreat(self._index - 1) 420 421 if self._match_text_seq("MASKING", "POLICY"): 422 policy = self._parse_column() 423 return self.expression( 424 exp.MaskingPolicyColumnConstraint, 425 this=policy.to_dot() if isinstance(policy, exp.Column) else policy, 426 expressions=self._match(TokenType.USING) 427 and self._parse_wrapped_csv(self._parse_id_var), 428 ) 429 if self._match_text_seq("PROJECTION", "POLICY"): 430 policy = self._parse_column() 431 return self.expression( 432 exp.ProjectionPolicyColumnConstraint, 433 this=policy.to_dot() if isinstance(policy, exp.Column) else policy, 434 ) 435 if self._match(TokenType.TAG): 436 return self.expression( 437 exp.TagColumnConstraint, 438 expressions=self._parse_wrapped_csv(self._parse_property), 439 ) 440 441 return None 442 443 def _parse_create(self) -> exp.Create | exp.Command: 444 expression = super()._parse_create() 445 if isinstance(expression, exp.Create) and expression.kind in self.NON_TABLE_CREATABLES: 446 # Replace the Table node with the enclosed Identifier 447 expression.this.replace(expression.this.this) 448 449 return expression 450 451 # https://docs.snowflake.com/en/sql-reference/functions/date_part.html 452 # https://docs.snowflake.com/en/sql-reference/functions-date-time.html#label-supported-date-time-parts 453 def _parse_date_part(self: Snowflake.Parser) -> t.Optional[exp.Expression]: 454 this = self._parse_var() or self._parse_type() 455 456 if not this: 457 return None 458 459 self._match(TokenType.COMMA) 460 expression = self._parse_bitwise() 461 this = map_date_part(this) 462 name = this.name.upper() 463 464 if name.startswith("EPOCH"): 465 if name == "EPOCH_MILLISECOND": 466 scale = 10**3 467 elif name == "EPOCH_MICROSECOND": 468 scale = 10**6 469 elif name == "EPOCH_NANOSECOND": 470 scale = 10**9 471 else: 472 scale = None 473 474 ts = self.expression(exp.Cast, this=expression, to=exp.DataType.build("TIMESTAMP")) 475 to_unix: exp.Expression = self.expression(exp.TimeToUnix, this=ts) 476 477 if scale: 478 to_unix = exp.Mul(this=to_unix, expression=exp.Literal.number(scale)) 479 480 return to_unix 481 482 return self.expression(exp.Extract, this=this, expression=expression) 483 484 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 485 if is_map: 486 # Keys are strings in Snowflake's objects, see also: 487 # - https://docs.snowflake.com/en/sql-reference/data-types-semistructured 488 # - https://docs.snowflake.com/en/sql-reference/functions/object_construct 489 return self._parse_slice(self._parse_string()) 490 491 return self._parse_slice(self._parse_alias(self._parse_assignment(), explicit=True)) 492 493 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 494 lateral = super()._parse_lateral() 495 if not lateral: 496 return lateral 497 498 if isinstance(lateral.this, exp.Explode): 499 table_alias = lateral.args.get("alias") 500 columns = [exp.to_identifier(col) for col in self.FLATTEN_COLUMNS] 501 if table_alias and not table_alias.args.get("columns"): 502 table_alias.set("columns", columns) 503 elif not table_alias: 504 exp.alias_(lateral, "_flattened", table=columns, copy=False) 505 506 return lateral 507 508 def _parse_table_parts( 509 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 510 ) -> exp.Table: 511 # https://docs.snowflake.com/en/user-guide/querying-stage 512 if self._match(TokenType.STRING, advance=False): 513 table = self._parse_string() 514 elif self._match_text_seq("@", advance=False): 515 table = self._parse_location_path() 516 else: 517 table = None 518 519 if table: 520 file_format = None 521 pattern = None 522 523 wrapped = self._match(TokenType.L_PAREN) 524 while self._curr and wrapped and not self._match(TokenType.R_PAREN): 525 if self._match_text_seq("FILE_FORMAT", "=>"): 526 file_format = self._parse_string() or super()._parse_table_parts( 527 is_db_reference=is_db_reference 528 ) 529 elif self._match_text_seq("PATTERN", "=>"): 530 pattern = self._parse_string() 531 else: 532 break 533 534 self._match(TokenType.COMMA) 535 536 table = self.expression(exp.Table, this=table, format=file_format, pattern=pattern) 537 else: 538 table = super()._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 539 540 return table 541 542 def _parse_id_var( 543 self, 544 any_token: bool = True, 545 tokens: t.Optional[t.Collection[TokenType]] = None, 546 ) -> t.Optional[exp.Expression]: 547 if self._match_text_seq("IDENTIFIER", "("): 548 identifier = ( 549 super()._parse_id_var(any_token=any_token, tokens=tokens) 550 or self._parse_string() 551 ) 552 self._match_r_paren() 553 return self.expression(exp.Anonymous, this="IDENTIFIER", expressions=[identifier]) 554 555 return super()._parse_id_var(any_token=any_token, tokens=tokens) 556 557 def _parse_show_snowflake(self, this: str) -> exp.Show: 558 scope = None 559 scope_kind = None 560 561 # will identity SHOW TERSE SCHEMAS but not SHOW TERSE PRIMARY KEYS 562 # which is syntactically valid but has no effect on the output 563 terse = self._tokens[self._index - 2].text.upper() == "TERSE" 564 565 history = self._match_text_seq("HISTORY") 566 567 like = self._parse_string() if self._match(TokenType.LIKE) else None 568 569 if self._match(TokenType.IN): 570 if self._match_text_seq("ACCOUNT"): 571 scope_kind = "ACCOUNT" 572 elif self._match_set(self.DB_CREATABLES): 573 scope_kind = self._prev.text.upper() 574 if self._curr: 575 scope = self._parse_table_parts() 576 elif self._curr: 577 scope_kind = "SCHEMA" if this in self.SCHEMA_KINDS else "TABLE" 578 scope = self._parse_table_parts() 579 580 return self.expression( 581 exp.Show, 582 **{ 583 "terse": terse, 584 "this": this, 585 "history": history, 586 "like": like, 587 "scope": scope, 588 "scope_kind": scope_kind, 589 "starts_with": self._match_text_seq("STARTS", "WITH") and self._parse_string(), 590 "limit": self._parse_limit(), 591 "from": self._parse_string() if self._match(TokenType.FROM) else None, 592 }, 593 ) 594 595 def _parse_alter_table_swap(self) -> exp.SwapTable: 596 self._match_text_seq("WITH") 597 return self.expression(exp.SwapTable, this=self._parse_table(schema=True)) 598 599 def _parse_location_property(self) -> exp.LocationProperty: 600 self._match(TokenType.EQ) 601 return self.expression(exp.LocationProperty, this=self._parse_location_path()) 602 603 def _parse_file_location(self) -> t.Optional[exp.Expression]: 604 # Parse either a subquery or a staged file 605 return ( 606 self._parse_select(table=True, parse_subquery_alias=False) 607 if self._match(TokenType.L_PAREN, advance=False) 608 else self._parse_table_parts() 609 ) 610 611 def _parse_location_path(self) -> exp.Var: 612 parts = [self._advance_any(ignore_reserved=True)] 613 614 # We avoid consuming a comma token because external tables like @foo and @bar 615 # can be joined in a query with a comma separator, as well as closing paren 616 # in case of subqueries 617 while self._is_connected() and not self._match_set( 618 (TokenType.COMMA, TokenType.L_PAREN, TokenType.R_PAREN), advance=False 619 ): 620 parts.append(self._advance_any(ignore_reserved=True)) 621 622 return exp.var("".join(part.text for part in parts if part)) 623 624 def _parse_lambda_arg(self) -> t.Optional[exp.Expression]: 625 this = super()._parse_lambda_arg() 626 627 if not this: 628 return this 629 630 typ = self._parse_types() 631 632 if typ: 633 return self.expression(exp.Cast, this=this, to=typ) 634 635 return this 636 637 class Tokenizer(tokens.Tokenizer): 638 STRING_ESCAPES = ["\\", "'"] 639 HEX_STRINGS = [("x'", "'"), ("X'", "'")] 640 RAW_STRINGS = ["$$"] 641 COMMENTS = ["--", "//", ("/*", "*/")] 642 643 KEYWORDS = { 644 **tokens.Tokenizer.KEYWORDS, 645 "BYTEINT": TokenType.INT, 646 "CHAR VARYING": TokenType.VARCHAR, 647 "CHARACTER VARYING": TokenType.VARCHAR, 648 "EXCLUDE": TokenType.EXCEPT, 649 "ILIKE ANY": TokenType.ILIKE_ANY, 650 "LIKE ANY": TokenType.LIKE_ANY, 651 "MATCH_CONDITION": TokenType.MATCH_CONDITION, 652 "MATCH_RECOGNIZE": TokenType.MATCH_RECOGNIZE, 653 "MINUS": TokenType.EXCEPT, 654 "NCHAR VARYING": TokenType.VARCHAR, 655 "PUT": TokenType.COMMAND, 656 "REMOVE": TokenType.COMMAND, 657 "RM": TokenType.COMMAND, 658 "SAMPLE": TokenType.TABLE_SAMPLE, 659 "SQL_DOUBLE": TokenType.DOUBLE, 660 "SQL_VARCHAR": TokenType.VARCHAR, 661 "STORAGE INTEGRATION": TokenType.STORAGE_INTEGRATION, 662 "TAG": TokenType.TAG, 663 "TIMESTAMP_TZ": TokenType.TIMESTAMPTZ, 664 "TOP": TokenType.TOP, 665 "WAREHOUSE": TokenType.WAREHOUSE, 666 "STREAMLIT": TokenType.STREAMLIT, 667 } 668 KEYWORDS.pop("/*+") 669 670 SINGLE_TOKENS = { 671 **tokens.Tokenizer.SINGLE_TOKENS, 672 "$": TokenType.PARAMETER, 673 } 674 675 VAR_SINGLE_TOKENS = {"$"} 676 677 COMMANDS = tokens.Tokenizer.COMMANDS - {TokenType.SHOW} 678 679 class Generator(generator.Generator): 680 PARAMETER_TOKEN = "$" 681 MATCHED_BY_SOURCE = False 682 SINGLE_STRING_INTERVAL = True 683 JOIN_HINTS = False 684 TABLE_HINTS = False 685 QUERY_HINTS = False 686 AGGREGATE_FILTER_SUPPORTED = False 687 SUPPORTS_TABLE_COPY = False 688 COLLATE_IS_FUNC = True 689 LIMIT_ONLY_LITERALS = True 690 JSON_KEY_VALUE_PAIR_SEP = "," 691 INSERT_OVERWRITE = " OVERWRITE INTO" 692 STRUCT_DELIMITER = ("(", ")") 693 COPY_PARAMS_ARE_WRAPPED = False 694 COPY_PARAMS_EQ_REQUIRED = True 695 STAR_EXCEPT = "EXCLUDE" 696 SUPPORTS_EXPLODING_PROJECTIONS = False 697 ARRAY_CONCAT_IS_VAR_LEN = False 698 699 TRANSFORMS = { 700 **generator.Generator.TRANSFORMS, 701 exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"), 702 exp.ArgMax: rename_func("MAX_BY"), 703 exp.ArgMin: rename_func("MIN_BY"), 704 exp.Array: inline_array_sql, 705 exp.ArrayConcat: lambda self, e: self.arrayconcat_sql(e, name="ARRAY_CAT"), 706 exp.ArrayContains: lambda self, e: self.func("ARRAY_CONTAINS", e.expression, e.this), 707 exp.AtTimeZone: lambda self, e: self.func( 708 "CONVERT_TIMEZONE", e.args.get("zone"), e.this 709 ), 710 exp.BitwiseXor: rename_func("BITXOR"), 711 exp.Create: transforms.preprocess([_flatten_structured_types_unless_iceberg]), 712 exp.DateAdd: date_delta_sql("DATEADD"), 713 exp.DateDiff: date_delta_sql("DATEDIFF"), 714 exp.DateStrToDate: datestrtodate_sql, 715 exp.DayOfMonth: rename_func("DAYOFMONTH"), 716 exp.DayOfWeek: rename_func("DAYOFWEEK"), 717 exp.DayOfYear: rename_func("DAYOFYEAR"), 718 exp.Explode: rename_func("FLATTEN"), 719 exp.Extract: rename_func("DATE_PART"), 720 exp.FromTimeZone: lambda self, e: self.func( 721 "CONVERT_TIMEZONE", e.args.get("zone"), "'UTC'", e.this 722 ), 723 exp.GenerateSeries: lambda self, e: self.func( 724 "ARRAY_GENERATE_RANGE", e.args["start"], e.args["end"] + 1, e.args.get("step") 725 ), 726 exp.GroupConcat: rename_func("LISTAGG"), 727 exp.If: if_sql(name="IFF", false_value="NULL"), 728 exp.JSONExtract: lambda self, e: self.func("GET_PATH", e.this, e.expression), 729 exp.JSONExtractScalar: lambda self, e: self.func( 730 "JSON_EXTRACT_PATH_TEXT", e.this, e.expression 731 ), 732 exp.JSONObject: lambda self, e: self.func("OBJECT_CONSTRUCT_KEEP_NULL", *e.expressions), 733 exp.JSONPathRoot: lambda *_: "", 734 exp.LogicalAnd: rename_func("BOOLAND_AGG"), 735 exp.LogicalOr: rename_func("BOOLOR_AGG"), 736 exp.Map: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 737 exp.Max: max_or_greatest, 738 exp.Min: min_or_least, 739 exp.ParseJSON: lambda self, e: self.func( 740 "TRY_PARSE_JSON" if e.args.get("safe") else "PARSE_JSON", e.this 741 ), 742 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 743 exp.PercentileCont: transforms.preprocess( 744 [transforms.add_within_group_for_percentiles] 745 ), 746 exp.PercentileDisc: transforms.preprocess( 747 [transforms.add_within_group_for_percentiles] 748 ), 749 exp.Pivot: transforms.preprocess([_unqualify_unpivot_columns]), 750 exp.RegexpILike: _regexpilike_sql, 751 exp.Rand: rename_func("RANDOM"), 752 exp.Select: transforms.preprocess( 753 [ 754 transforms.eliminate_distinct_on, 755 transforms.explode_to_unnest(), 756 transforms.eliminate_semi_and_anti_joins, 757 ] 758 ), 759 exp.SHA: rename_func("SHA1"), 760 exp.StarMap: rename_func("OBJECT_CONSTRUCT"), 761 exp.StartsWith: rename_func("STARTSWITH"), 762 exp.StrPosition: lambda self, e: self.func( 763 "POSITION", e.args.get("substr"), e.this, e.args.get("position") 764 ), 765 exp.StrToTime: lambda self, e: self.func("TO_TIMESTAMP", e.this, self.format_time(e)), 766 exp.Stuff: rename_func("INSERT"), 767 exp.TimeAdd: date_delta_sql("TIMEADD"), 768 exp.TimestampDiff: lambda self, e: self.func( 769 "TIMESTAMPDIFF", e.unit, e.expression, e.this 770 ), 771 exp.TimestampTrunc: timestamptrunc_sql(), 772 exp.TimeStrToTime: timestrtotime_sql, 773 exp.TimeToStr: lambda self, e: self.func( 774 "TO_CHAR", exp.cast(e.this, exp.DataType.Type.TIMESTAMP), self.format_time(e) 775 ), 776 exp.TimeToUnix: lambda self, e: f"EXTRACT(epoch_second FROM {self.sql(e, 'this')})", 777 exp.ToArray: rename_func("TO_ARRAY"), 778 exp.ToChar: lambda self, e: self.function_fallback_sql(e), 779 exp.Trim: lambda self, e: self.func("TRIM", e.this, e.expression), 780 exp.TsOrDsAdd: date_delta_sql("DATEADD", cast=True), 781 exp.TsOrDsDiff: date_delta_sql("DATEDIFF"), 782 exp.TsOrDsToDate: lambda self, e: self.func( 783 "TRY_TO_DATE" if e.args.get("safe") else "TO_DATE", e.this, self.format_time(e) 784 ), 785 exp.UnixToTime: rename_func("TO_TIMESTAMP"), 786 exp.VarMap: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 787 exp.WeekOfYear: rename_func("WEEKOFYEAR"), 788 exp.Xor: rename_func("BOOLXOR"), 789 } 790 791 SUPPORTED_JSON_PATH_PARTS = { 792 exp.JSONPathKey, 793 exp.JSONPathRoot, 794 exp.JSONPathSubscript, 795 } 796 797 TYPE_MAPPING = { 798 **generator.Generator.TYPE_MAPPING, 799 exp.DataType.Type.NESTED: "OBJECT", 800 exp.DataType.Type.STRUCT: "OBJECT", 801 } 802 803 PROPERTIES_LOCATION = { 804 **generator.Generator.PROPERTIES_LOCATION, 805 exp.SetProperty: exp.Properties.Location.UNSUPPORTED, 806 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 807 } 808 809 UNSUPPORTED_VALUES_EXPRESSIONS = { 810 exp.Map, 811 exp.StarMap, 812 exp.Struct, 813 exp.VarMap, 814 } 815 816 def with_properties(self, properties: exp.Properties) -> str: 817 return self.properties(properties, wrapped=False, prefix=self.sep(""), sep=" ") 818 819 def values_sql(self, expression: exp.Values, values_as_table: bool = True) -> str: 820 if expression.find(*self.UNSUPPORTED_VALUES_EXPRESSIONS): 821 values_as_table = False 822 823 return super().values_sql(expression, values_as_table=values_as_table) 824 825 def datatype_sql(self, expression: exp.DataType) -> str: 826 expressions = expression.expressions 827 if ( 828 expressions 829 and expression.is_type(*exp.DataType.STRUCT_TYPES) 830 and any(isinstance(field_type, exp.DataType) for field_type in expressions) 831 ): 832 # The correct syntax is OBJECT [ (<key> <value_type [NOT NULL] [, ...]) ] 833 return "OBJECT" 834 835 return super().datatype_sql(expression) 836 837 def tonumber_sql(self, expression: exp.ToNumber) -> str: 838 return self.func( 839 "TO_NUMBER", 840 expression.this, 841 expression.args.get("format"), 842 expression.args.get("precision"), 843 expression.args.get("scale"), 844 ) 845 846 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 847 milli = expression.args.get("milli") 848 if milli is not None: 849 milli_to_nano = milli.pop() * exp.Literal.number(1000000) 850 expression.set("nano", milli_to_nano) 851 852 return rename_func("TIMESTAMP_FROM_PARTS")(self, expression) 853 854 def trycast_sql(self, expression: exp.TryCast) -> str: 855 value = expression.this 856 857 if value.type is None: 858 from sqlglot.optimizer.annotate_types import annotate_types 859 860 value = annotate_types(value) 861 862 if value.is_type(*exp.DataType.TEXT_TYPES, exp.DataType.Type.UNKNOWN): 863 return super().trycast_sql(expression) 864 865 # TRY_CAST only works for string values in Snowflake 866 return self.cast_sql(expression) 867 868 def log_sql(self, expression: exp.Log) -> str: 869 if not expression.expression: 870 return self.func("LN", expression.this) 871 872 return super().log_sql(expression) 873 874 def unnest_sql(self, expression: exp.Unnest) -> str: 875 unnest_alias = expression.args.get("alias") 876 offset = expression.args.get("offset") 877 878 columns = [ 879 exp.to_identifier("seq"), 880 exp.to_identifier("key"), 881 exp.to_identifier("path"), 882 offset.pop() if isinstance(offset, exp.Expression) else exp.to_identifier("index"), 883 seq_get(unnest_alias.columns if unnest_alias else [], 0) 884 or exp.to_identifier("value"), 885 exp.to_identifier("this"), 886 ] 887 888 if unnest_alias: 889 unnest_alias.set("columns", columns) 890 else: 891 unnest_alias = exp.TableAlias(this="_u", columns=columns) 892 893 explode = f"TABLE(FLATTEN(INPUT => {self.sql(expression.expressions[0])}))" 894 alias = self.sql(unnest_alias) 895 alias = f" AS {alias}" if alias else "" 896 return f"{explode}{alias}" 897 898 def show_sql(self, expression: exp.Show) -> str: 899 terse = "TERSE " if expression.args.get("terse") else "" 900 history = " HISTORY" if expression.args.get("history") else "" 901 like = self.sql(expression, "like") 902 like = f" LIKE {like}" if like else "" 903 904 scope = self.sql(expression, "scope") 905 scope = f" {scope}" if scope else "" 906 907 scope_kind = self.sql(expression, "scope_kind") 908 if scope_kind: 909 scope_kind = f" IN {scope_kind}" 910 911 starts_with = self.sql(expression, "starts_with") 912 if starts_with: 913 starts_with = f" STARTS WITH {starts_with}" 914 915 limit = self.sql(expression, "limit") 916 917 from_ = self.sql(expression, "from") 918 if from_: 919 from_ = f" FROM {from_}" 920 921 return f"SHOW {terse}{expression.name}{history}{like}{scope_kind}{scope}{starts_with}{limit}{from_}" 922 923 def regexpextract_sql(self, expression: exp.RegexpExtract) -> str: 924 # Other dialects don't support all of the following parameters, so we need to 925 # generate default values as necessary to ensure the transpilation is correct 926 group = expression.args.get("group") 927 parameters = expression.args.get("parameters") or (group and exp.Literal.string("c")) 928 occurrence = expression.args.get("occurrence") or (parameters and exp.Literal.number(1)) 929 position = expression.args.get("position") or (occurrence and exp.Literal.number(1)) 930 931 return self.func( 932 "REGEXP_SUBSTR", 933 expression.this, 934 expression.expression, 935 position, 936 occurrence, 937 parameters, 938 group, 939 ) 940 941 def except_op(self, expression: exp.Except) -> str: 942 if not expression.args.get("distinct"): 943 self.unsupported("EXCEPT with All is not supported in Snowflake") 944 return super().except_op(expression) 945 946 def intersect_op(self, expression: exp.Intersect) -> str: 947 if not expression.args.get("distinct"): 948 self.unsupported("INTERSECT with All is not supported in Snowflake") 949 return super().intersect_op(expression) 950 951 def describe_sql(self, expression: exp.Describe) -> str: 952 # Default to table if kind is unknown 953 kind_value = expression.args.get("kind") or "TABLE" 954 kind = f" {kind_value}" if kind_value else "" 955 this = f" {self.sql(expression, 'this')}" 956 expressions = self.expressions(expression, flat=True) 957 expressions = f" {expressions}" if expressions else "" 958 return f"DESCRIBE{kind}{this}{expressions}" 959 960 def generatedasidentitycolumnconstraint_sql( 961 self, expression: exp.GeneratedAsIdentityColumnConstraint 962 ) -> str: 963 start = expression.args.get("start") 964 start = f" START {start}" if start else "" 965 increment = expression.args.get("increment") 966 increment = f" INCREMENT {increment}" if increment else "" 967 return f"AUTOINCREMENT{start}{increment}" 968 969 def swaptable_sql(self, expression: exp.SwapTable) -> str: 970 this = self.sql(expression, "this") 971 return f"SWAP WITH {this}" 972 973 def cluster_sql(self, expression: exp.Cluster) -> str: 974 return f"CLUSTER BY ({self.expressions(expression, flat=True)})" 975 976 def struct_sql(self, expression: exp.Struct) -> str: 977 keys = [] 978 values = [] 979 980 for i, e in enumerate(expression.expressions): 981 if isinstance(e, exp.PropertyEQ): 982 keys.append( 983 exp.Literal.string(e.name) if isinstance(e.this, exp.Identifier) else e.this 984 ) 985 values.append(e.expression) 986 else: 987 keys.append(exp.Literal.string(f"_{i}")) 988 values.append(e) 989 990 return self.func("OBJECT_CONSTRUCT", *flatten(zip(keys, values))) 991 992 def approxquantile_sql(self, expression: exp.ApproxQuantile) -> str: 993 if expression.args.get("weight") or expression.args.get("accuracy"): 994 self.unsupported( 995 "APPROX_PERCENTILE with weight and/or accuracy arguments are not supported in Snowflake" 996 ) 997 998 return self.func("APPROX_PERCENTILE", expression.this, expression.args.get("quantile")) 999 1000 def alterset_sql(self, expression: exp.AlterSet) -> str: 1001 exprs = self.expressions(expression, flat=True) 1002 exprs = f" {exprs}" if exprs else "" 1003 file_format = self.expressions(expression, key="file_format", flat=True, sep=" ") 1004 file_format = f" STAGE_FILE_FORMAT = ({file_format})" if file_format else "" 1005 copy_options = self.expressions(expression, key="copy_options", flat=True, sep=" ") 1006 copy_options = f" STAGE_COPY_OPTIONS = ({copy_options})" if copy_options else "" 1007 tag = self.expressions(expression, key="tag", flat=True) 1008 tag = f" TAG {tag}" if tag else "" 1009 1010 return f"SET{exprs}{file_format}{copy_options}{tag}"
Specifies the strategy according to which identifiers should be normalized.
Default NULL
ordering method to use if not explicitly set.
Possible values: "nulls_are_small"
, "nulls_are_large"
, "nulls_are_last"
Some dialects, such as Snowflake, allow you to reference a CTE column alias in the HAVING clause of the CTE. This flag will cause the CTE alias columns to override any projection aliases in the subquery.
For example, WITH y(c) AS ( SELECT SUM(a) FROM (SELECT 1 a) AS x HAVING c > 0 ) SELECT c FROM y;
will be rewritten as
WITH y(c) AS (
SELECT SUM(a) AS c FROM (SELECT 1 AS a) AS x HAVING c > 0
) SELECT c FROM y;
Associates this dialect's time formats with their equivalent Python strftime
formats.
230 def quote_identifier(self, expression: E, identify: bool = True) -> E: 231 # This disables quoting DUAL in SELECT ... FROM DUAL, because Snowflake treats an 232 # unquoted DUAL keyword in a special way and does not map it to a user-defined table 233 if ( 234 isinstance(expression, exp.Identifier) 235 and isinstance(expression.parent, exp.Table) 236 and expression.name.lower() == "dual" 237 ): 238 return expression # type: ignore 239 240 return super().quote_identifier(expression, identify=identify)
Adds quotes to a given identifier.
Arguments:
- expression: The expression of interest. If it's not an
Identifier
, this method is a no-op. - identify: If set to
False
, the quotes will only be added if the identifier is deemed "unsafe", with respect to its characters and this dialect's normalization strategy.
Mapping of an escaped sequence (\n
) to its unescaped version (
).
Inherited Members
- sqlglot.dialects.dialect.Dialect
- Dialect
- INDEX_OFFSET
- WEEK_OFFSET
- UNNEST_COLUMN_ONLY
- ALIAS_POST_TABLESAMPLE
- IDENTIFIERS_CAN_START_WITH_DIGIT
- DPIPE_IS_STRING_CONCAT
- STRICT_STRING_CONCAT
- NORMALIZE_FUNCTIONS
- LOG_BASE_FIRST
- TYPED_DIVISION
- SAFE_DIVISION
- CONCAT_COALESCE
- HEX_LOWERCASE
- DATE_FORMAT
- DATEINT_FORMAT
- FORMAT_MAPPING
- PSEUDOCOLUMNS
- FORCE_EARLY_ALIAS_REF_EXPANSION
- EXPAND_ALIAS_REFS_EARLY_ONLY_IN_GROUP_BY
- SUPPORTS_ORDER_BY_ALL
- HAS_DISTINCT_ARRAY_CONSTRUCTORS
- SUPPORTS_FIXED_SIZE_ARRAYS
- DATE_PART_MAPPING
- TYPE_TO_EXPRESSIONS
- ANNOTATORS
- get_or_raise
- format_time
- settings
- normalize_identifier
- case_sensitive
- can_identify
- to_json_path
- parse
- parse_into
- generate
- transpile
- tokenize
- tokenizer
- jsonpath_tokenizer
- parser
- generator
242 class Parser(parser.Parser): 243 IDENTIFY_PIVOT_STRINGS = True 244 DEFAULT_SAMPLING_METHOD = "BERNOULLI" 245 COLON_IS_VARIANT_EXTRACT = True 246 247 ID_VAR_TOKENS = { 248 *parser.Parser.ID_VAR_TOKENS, 249 TokenType.MATCH_CONDITION, 250 } 251 252 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS | {TokenType.WINDOW} 253 TABLE_ALIAS_TOKENS.discard(TokenType.MATCH_CONDITION) 254 255 FUNCTIONS = { 256 **parser.Parser.FUNCTIONS, 257 "APPROX_PERCENTILE": exp.ApproxQuantile.from_arg_list, 258 "ARRAYAGG": exp.ArrayAgg.from_arg_list, 259 "ARRAY_CONSTRUCT": lambda args: exp.Array(expressions=args), 260 "ARRAY_CONTAINS": lambda args: exp.ArrayContains( 261 this=seq_get(args, 1), expression=seq_get(args, 0) 262 ), 263 "ARRAY_GENERATE_RANGE": lambda args: exp.GenerateSeries( 264 # ARRAY_GENERATE_RANGE has an exlusive end; we normalize it to be inclusive 265 start=seq_get(args, 0), 266 end=exp.Sub(this=seq_get(args, 1), expression=exp.Literal.number(1)), 267 step=seq_get(args, 2), 268 ), 269 "BITXOR": binary_from_function(exp.BitwiseXor), 270 "BIT_XOR": binary_from_function(exp.BitwiseXor), 271 "BOOLXOR": binary_from_function(exp.Xor), 272 "CONVERT_TIMEZONE": _build_convert_timezone, 273 "DATE": _build_datetime("DATE", exp.DataType.Type.DATE), 274 "DATE_TRUNC": _date_trunc_to_time, 275 "DATEADD": _build_date_time_add(exp.DateAdd), 276 "DATEDIFF": _build_datediff, 277 "DIV0": _build_if_from_div0, 278 "FLATTEN": exp.Explode.from_arg_list, 279 "GET_PATH": lambda args, dialect: exp.JSONExtract( 280 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 281 ), 282 "IFF": exp.If.from_arg_list, 283 "LAST_DAY": lambda args: exp.LastDay( 284 this=seq_get(args, 0), unit=map_date_part(seq_get(args, 1)) 285 ), 286 "LEN": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 287 "LENGTH": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 288 "LISTAGG": exp.GroupConcat.from_arg_list, 289 "MEDIAN": lambda args: exp.PercentileCont( 290 this=seq_get(args, 0), expression=exp.Literal.number(0.5) 291 ), 292 "NULLIFZERO": _build_if_from_nullifzero, 293 "OBJECT_CONSTRUCT": _build_object_construct, 294 "REGEXP_REPLACE": _build_regexp_replace, 295 "REGEXP_SUBSTR": exp.RegexpExtract.from_arg_list, 296 "RLIKE": exp.RegexpLike.from_arg_list, 297 "SQUARE": lambda args: exp.Pow(this=seq_get(args, 0), expression=exp.Literal.number(2)), 298 "TIMEADD": _build_date_time_add(exp.TimeAdd), 299 "TIMEDIFF": _build_datediff, 300 "TIMESTAMPADD": _build_date_time_add(exp.DateAdd), 301 "TIMESTAMPDIFF": _build_datediff, 302 "TIMESTAMPFROMPARTS": build_timestamp_from_parts, 303 "TIMESTAMP_FROM_PARTS": build_timestamp_from_parts, 304 "TRY_PARSE_JSON": lambda args: exp.ParseJSON(this=seq_get(args, 0), safe=True), 305 "TRY_TO_DATE": _build_datetime("TRY_TO_DATE", exp.DataType.Type.DATE, safe=True), 306 "TO_DATE": _build_datetime("TO_DATE", exp.DataType.Type.DATE), 307 "TO_NUMBER": lambda args: exp.ToNumber( 308 this=seq_get(args, 0), 309 format=seq_get(args, 1), 310 precision=seq_get(args, 2), 311 scale=seq_get(args, 3), 312 ), 313 "TO_TIME": _build_datetime("TO_TIME", exp.DataType.Type.TIME), 314 "TO_TIMESTAMP": _build_datetime("TO_TIMESTAMP", exp.DataType.Type.TIMESTAMP), 315 "TO_TIMESTAMP_LTZ": _build_datetime("TO_TIMESTAMP_LTZ", exp.DataType.Type.TIMESTAMPLTZ), 316 "TO_TIMESTAMP_NTZ": _build_datetime("TO_TIMESTAMP_NTZ", exp.DataType.Type.TIMESTAMP), 317 "TO_TIMESTAMP_TZ": _build_datetime("TO_TIMESTAMP_TZ", exp.DataType.Type.TIMESTAMPTZ), 318 "TO_VARCHAR": exp.ToChar.from_arg_list, 319 "ZEROIFNULL": _build_if_from_zeroifnull, 320 } 321 322 FUNCTION_PARSERS = { 323 **parser.Parser.FUNCTION_PARSERS, 324 "DATE_PART": lambda self: self._parse_date_part(), 325 "OBJECT_CONSTRUCT_KEEP_NULL": lambda self: self._parse_json_object(), 326 } 327 FUNCTION_PARSERS.pop("TRIM") 328 329 TIMESTAMPS = parser.Parser.TIMESTAMPS - {TokenType.TIME} 330 331 RANGE_PARSERS = { 332 **parser.Parser.RANGE_PARSERS, 333 TokenType.LIKE_ANY: parser.binary_range_parser(exp.LikeAny), 334 TokenType.ILIKE_ANY: parser.binary_range_parser(exp.ILikeAny), 335 } 336 337 ALTER_PARSERS = { 338 **parser.Parser.ALTER_PARSERS, 339 "UNSET": lambda self: self.expression( 340 exp.Set, 341 tag=self._match_text_seq("TAG"), 342 expressions=self._parse_csv(self._parse_id_var), 343 unset=True, 344 ), 345 "SWAP": lambda self: self._parse_alter_table_swap(), 346 } 347 348 STATEMENT_PARSERS = { 349 **parser.Parser.STATEMENT_PARSERS, 350 TokenType.SHOW: lambda self: self._parse_show(), 351 } 352 353 PROPERTY_PARSERS = { 354 **parser.Parser.PROPERTY_PARSERS, 355 "LOCATION": lambda self: self._parse_location_property(), 356 } 357 358 TYPE_CONVERTERS = { 359 # https://docs.snowflake.com/en/sql-reference/data-types-numeric#number 360 exp.DataType.Type.DECIMAL: build_default_decimal_type(precision=38, scale=0), 361 } 362 363 SHOW_PARSERS = { 364 "SCHEMAS": _show_parser("SCHEMAS"), 365 "TERSE SCHEMAS": _show_parser("SCHEMAS"), 366 "OBJECTS": _show_parser("OBJECTS"), 367 "TERSE OBJECTS": _show_parser("OBJECTS"), 368 "TABLES": _show_parser("TABLES"), 369 "TERSE TABLES": _show_parser("TABLES"), 370 "VIEWS": _show_parser("VIEWS"), 371 "TERSE VIEWS": _show_parser("VIEWS"), 372 "PRIMARY KEYS": _show_parser("PRIMARY KEYS"), 373 "TERSE PRIMARY KEYS": _show_parser("PRIMARY KEYS"), 374 "IMPORTED KEYS": _show_parser("IMPORTED KEYS"), 375 "TERSE IMPORTED KEYS": _show_parser("IMPORTED KEYS"), 376 "UNIQUE KEYS": _show_parser("UNIQUE KEYS"), 377 "TERSE UNIQUE KEYS": _show_parser("UNIQUE KEYS"), 378 "SEQUENCES": _show_parser("SEQUENCES"), 379 "TERSE SEQUENCES": _show_parser("SEQUENCES"), 380 "COLUMNS": _show_parser("COLUMNS"), 381 "USERS": _show_parser("USERS"), 382 "TERSE USERS": _show_parser("USERS"), 383 } 384 385 CONSTRAINT_PARSERS = { 386 **parser.Parser.CONSTRAINT_PARSERS, 387 "WITH": lambda self: self._parse_with_constraint(), 388 "MASKING": lambda self: self._parse_with_constraint(), 389 "PROJECTION": lambda self: self._parse_with_constraint(), 390 "TAG": lambda self: self._parse_with_constraint(), 391 } 392 393 STAGED_FILE_SINGLE_TOKENS = { 394 TokenType.DOT, 395 TokenType.MOD, 396 TokenType.SLASH, 397 } 398 399 FLATTEN_COLUMNS = ["SEQ", "KEY", "PATH", "INDEX", "VALUE", "THIS"] 400 401 SCHEMA_KINDS = {"OBJECTS", "TABLES", "VIEWS", "SEQUENCES", "UNIQUE KEYS", "IMPORTED KEYS"} 402 403 NON_TABLE_CREATABLES = {"STORAGE INTEGRATION", "TAG", "WAREHOUSE", "STREAMLIT"} 404 405 LAMBDAS = { 406 **parser.Parser.LAMBDAS, 407 TokenType.ARROW: lambda self, expressions: self.expression( 408 exp.Lambda, 409 this=self._replace_lambda( 410 self._parse_assignment(), 411 expressions, 412 ), 413 expressions=[e.this if isinstance(e, exp.Cast) else e for e in expressions], 414 ), 415 } 416 417 def _parse_with_constraint(self) -> t.Optional[exp.Expression]: 418 if self._prev.token_type != TokenType.WITH: 419 self._retreat(self._index - 1) 420 421 if self._match_text_seq("MASKING", "POLICY"): 422 policy = self._parse_column() 423 return self.expression( 424 exp.MaskingPolicyColumnConstraint, 425 this=policy.to_dot() if isinstance(policy, exp.Column) else policy, 426 expressions=self._match(TokenType.USING) 427 and self._parse_wrapped_csv(self._parse_id_var), 428 ) 429 if self._match_text_seq("PROJECTION", "POLICY"): 430 policy = self._parse_column() 431 return self.expression( 432 exp.ProjectionPolicyColumnConstraint, 433 this=policy.to_dot() if isinstance(policy, exp.Column) else policy, 434 ) 435 if self._match(TokenType.TAG): 436 return self.expression( 437 exp.TagColumnConstraint, 438 expressions=self._parse_wrapped_csv(self._parse_property), 439 ) 440 441 return None 442 443 def _parse_create(self) -> exp.Create | exp.Command: 444 expression = super()._parse_create() 445 if isinstance(expression, exp.Create) and expression.kind in self.NON_TABLE_CREATABLES: 446 # Replace the Table node with the enclosed Identifier 447 expression.this.replace(expression.this.this) 448 449 return expression 450 451 # https://docs.snowflake.com/en/sql-reference/functions/date_part.html 452 # https://docs.snowflake.com/en/sql-reference/functions-date-time.html#label-supported-date-time-parts 453 def _parse_date_part(self: Snowflake.Parser) -> t.Optional[exp.Expression]: 454 this = self._parse_var() or self._parse_type() 455 456 if not this: 457 return None 458 459 self._match(TokenType.COMMA) 460 expression = self._parse_bitwise() 461 this = map_date_part(this) 462 name = this.name.upper() 463 464 if name.startswith("EPOCH"): 465 if name == "EPOCH_MILLISECOND": 466 scale = 10**3 467 elif name == "EPOCH_MICROSECOND": 468 scale = 10**6 469 elif name == "EPOCH_NANOSECOND": 470 scale = 10**9 471 else: 472 scale = None 473 474 ts = self.expression(exp.Cast, this=expression, to=exp.DataType.build("TIMESTAMP")) 475 to_unix: exp.Expression = self.expression(exp.TimeToUnix, this=ts) 476 477 if scale: 478 to_unix = exp.Mul(this=to_unix, expression=exp.Literal.number(scale)) 479 480 return to_unix 481 482 return self.expression(exp.Extract, this=this, expression=expression) 483 484 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 485 if is_map: 486 # Keys are strings in Snowflake's objects, see also: 487 # - https://docs.snowflake.com/en/sql-reference/data-types-semistructured 488 # - https://docs.snowflake.com/en/sql-reference/functions/object_construct 489 return self._parse_slice(self._parse_string()) 490 491 return self._parse_slice(self._parse_alias(self._parse_assignment(), explicit=True)) 492 493 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 494 lateral = super()._parse_lateral() 495 if not lateral: 496 return lateral 497 498 if isinstance(lateral.this, exp.Explode): 499 table_alias = lateral.args.get("alias") 500 columns = [exp.to_identifier(col) for col in self.FLATTEN_COLUMNS] 501 if table_alias and not table_alias.args.get("columns"): 502 table_alias.set("columns", columns) 503 elif not table_alias: 504 exp.alias_(lateral, "_flattened", table=columns, copy=False) 505 506 return lateral 507 508 def _parse_table_parts( 509 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 510 ) -> exp.Table: 511 # https://docs.snowflake.com/en/user-guide/querying-stage 512 if self._match(TokenType.STRING, advance=False): 513 table = self._parse_string() 514 elif self._match_text_seq("@", advance=False): 515 table = self._parse_location_path() 516 else: 517 table = None 518 519 if table: 520 file_format = None 521 pattern = None 522 523 wrapped = self._match(TokenType.L_PAREN) 524 while self._curr and wrapped and not self._match(TokenType.R_PAREN): 525 if self._match_text_seq("FILE_FORMAT", "=>"): 526 file_format = self._parse_string() or super()._parse_table_parts( 527 is_db_reference=is_db_reference 528 ) 529 elif self._match_text_seq("PATTERN", "=>"): 530 pattern = self._parse_string() 531 else: 532 break 533 534 self._match(TokenType.COMMA) 535 536 table = self.expression(exp.Table, this=table, format=file_format, pattern=pattern) 537 else: 538 table = super()._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 539 540 return table 541 542 def _parse_id_var( 543 self, 544 any_token: bool = True, 545 tokens: t.Optional[t.Collection[TokenType]] = None, 546 ) -> t.Optional[exp.Expression]: 547 if self._match_text_seq("IDENTIFIER", "("): 548 identifier = ( 549 super()._parse_id_var(any_token=any_token, tokens=tokens) 550 or self._parse_string() 551 ) 552 self._match_r_paren() 553 return self.expression(exp.Anonymous, this="IDENTIFIER", expressions=[identifier]) 554 555 return super()._parse_id_var(any_token=any_token, tokens=tokens) 556 557 def _parse_show_snowflake(self, this: str) -> exp.Show: 558 scope = None 559 scope_kind = None 560 561 # will identity SHOW TERSE SCHEMAS but not SHOW TERSE PRIMARY KEYS 562 # which is syntactically valid but has no effect on the output 563 terse = self._tokens[self._index - 2].text.upper() == "TERSE" 564 565 history = self._match_text_seq("HISTORY") 566 567 like = self._parse_string() if self._match(TokenType.LIKE) else None 568 569 if self._match(TokenType.IN): 570 if self._match_text_seq("ACCOUNT"): 571 scope_kind = "ACCOUNT" 572 elif self._match_set(self.DB_CREATABLES): 573 scope_kind = self._prev.text.upper() 574 if self._curr: 575 scope = self._parse_table_parts() 576 elif self._curr: 577 scope_kind = "SCHEMA" if this in self.SCHEMA_KINDS else "TABLE" 578 scope = self._parse_table_parts() 579 580 return self.expression( 581 exp.Show, 582 **{ 583 "terse": terse, 584 "this": this, 585 "history": history, 586 "like": like, 587 "scope": scope, 588 "scope_kind": scope_kind, 589 "starts_with": self._match_text_seq("STARTS", "WITH") and self._parse_string(), 590 "limit": self._parse_limit(), 591 "from": self._parse_string() if self._match(TokenType.FROM) else None, 592 }, 593 ) 594 595 def _parse_alter_table_swap(self) -> exp.SwapTable: 596 self._match_text_seq("WITH") 597 return self.expression(exp.SwapTable, this=self._parse_table(schema=True)) 598 599 def _parse_location_property(self) -> exp.LocationProperty: 600 self._match(TokenType.EQ) 601 return self.expression(exp.LocationProperty, this=self._parse_location_path()) 602 603 def _parse_file_location(self) -> t.Optional[exp.Expression]: 604 # Parse either a subquery or a staged file 605 return ( 606 self._parse_select(table=True, parse_subquery_alias=False) 607 if self._match(TokenType.L_PAREN, advance=False) 608 else self._parse_table_parts() 609 ) 610 611 def _parse_location_path(self) -> exp.Var: 612 parts = [self._advance_any(ignore_reserved=True)] 613 614 # We avoid consuming a comma token because external tables like @foo and @bar 615 # can be joined in a query with a comma separator, as well as closing paren 616 # in case of subqueries 617 while self._is_connected() and not self._match_set( 618 (TokenType.COMMA, TokenType.L_PAREN, TokenType.R_PAREN), advance=False 619 ): 620 parts.append(self._advance_any(ignore_reserved=True)) 621 622 return exp.var("".join(part.text for part in parts if part)) 623 624 def _parse_lambda_arg(self) -> t.Optional[exp.Expression]: 625 this = super()._parse_lambda_arg() 626 627 if not this: 628 return this 629 630 typ = self._parse_types() 631 632 if typ: 633 return self.expression(exp.Cast, this=this, to=typ) 634 635 return this
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
Inherited Members
- sqlglot.parser.Parser
- Parser
- NO_PAREN_FUNCTIONS
- STRUCT_TYPE_TOKENS
- NESTED_TYPE_TOKENS
- ENUM_TYPE_TOKENS
- AGGREGATE_TYPE_TOKENS
- TYPE_TOKENS
- SIGNED_TO_UNSIGNED_TYPE_TOKEN
- SUBQUERY_PREDICATES
- RESERVED_TOKENS
- DB_CREATABLES
- CREATABLES
- INTERVAL_VARS
- ALIAS_TOKENS
- ARRAY_CONSTRUCTORS
- COMMENT_TABLE_ALIAS_TOKENS
- UPDATE_ALIAS_TOKENS
- TRIM_TYPES
- FUNC_TOKENS
- CONJUNCTION
- ASSIGNMENT
- DISJUNCTION
- EQUALITY
- COMPARISON
- BITWISE
- TERM
- FACTOR
- EXPONENT
- TIMES
- SET_OPERATIONS
- JOIN_METHODS
- JOIN_SIDES
- JOIN_KINDS
- JOIN_HINTS
- COLUMN_OPERATORS
- EXPRESSION_PARSERS
- UNARY_PARSERS
- STRING_PARSERS
- NUMERIC_PARSERS
- PRIMARY_PARSERS
- PLACEHOLDER_PARSERS
- ALTER_ALTER_PARSERS
- SCHEMA_UNNAMED_CONSTRAINTS
- NO_PAREN_FUNCTION_PARSERS
- INVALID_FUNC_NAME_TOKENS
- FUNCTIONS_WITH_ALIASED_ARGS
- KEY_VALUE_DEFINITIONS
- QUERY_MODIFIER_PARSERS
- SET_PARSERS
- TYPE_LITERAL_PARSERS
- DDL_SELECT_TOKENS
- PRE_VOLATILE_TOKENS
- TRANSACTION_KIND
- TRANSACTION_CHARACTERISTICS
- CONFLICT_ACTIONS
- CREATE_SEQUENCE
- ISOLATED_LOADING_OPTIONS
- USABLES
- CAST_ACTIONS
- SCHEMA_BINDING_OPTIONS
- KEY_CONSTRAINT_OPTIONS
- INSERT_ALTERNATIVES
- CLONE_KEYWORDS
- HISTORICAL_DATA_PREFIX
- HISTORICAL_DATA_KIND
- OPCLASS_FOLLOW_KEYWORDS
- OPTYPE_FOLLOW_TOKENS
- TABLE_INDEX_HINT_TOKENS
- VIEW_ATTRIBUTES
- WINDOW_ALIAS_TOKENS
- WINDOW_BEFORE_PAREN_TOKENS
- WINDOW_SIDES
- JSON_KEY_VALUE_SEPARATOR_TOKENS
- FETCH_TOKENS
- ADD_CONSTRAINT_TOKENS
- DISTINCT_TOKENS
- NULL_TOKENS
- UNNEST_OFFSET_ALIAS_TOKENS
- SELECT_START_TOKENS
- COPY_INTO_VARLEN_OPTIONS
- STRICT_CAST
- PREFIXED_PIVOT_COLUMNS
- LOG_DEFAULTS_TO_LN
- ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN
- TABLESAMPLE_CSV
- SET_REQUIRES_ASSIGNMENT_DELIMITER
- TRIM_PATTERN_FIRST
- STRING_ALIASES
- MODIFIERS_ATTACHED_TO_SET_OP
- SET_OP_MODIFIERS
- NO_PAREN_IF_COMMANDS
- JSON_ARROWS_REQUIRE_JSON_TYPE
- VALUES_FOLLOWED_BY_PAREN
- SUPPORTS_IMPLICIT_UNNEST
- INTERVAL_SPANS
- SUPPORTS_PARTITION_SELECTION
- error_level
- error_message_context
- max_errors
- dialect
- reset
- parse
- parse_into
- check_errors
- raise_error
- expression
- validate_expression
- errors
- sql
637 class Tokenizer(tokens.Tokenizer): 638 STRING_ESCAPES = ["\\", "'"] 639 HEX_STRINGS = [("x'", "'"), ("X'", "'")] 640 RAW_STRINGS = ["$$"] 641 COMMENTS = ["--", "//", ("/*", "*/")] 642 643 KEYWORDS = { 644 **tokens.Tokenizer.KEYWORDS, 645 "BYTEINT": TokenType.INT, 646 "CHAR VARYING": TokenType.VARCHAR, 647 "CHARACTER VARYING": TokenType.VARCHAR, 648 "EXCLUDE": TokenType.EXCEPT, 649 "ILIKE ANY": TokenType.ILIKE_ANY, 650 "LIKE ANY": TokenType.LIKE_ANY, 651 "MATCH_CONDITION": TokenType.MATCH_CONDITION, 652 "MATCH_RECOGNIZE": TokenType.MATCH_RECOGNIZE, 653 "MINUS": TokenType.EXCEPT, 654 "NCHAR VARYING": TokenType.VARCHAR, 655 "PUT": TokenType.COMMAND, 656 "REMOVE": TokenType.COMMAND, 657 "RM": TokenType.COMMAND, 658 "SAMPLE": TokenType.TABLE_SAMPLE, 659 "SQL_DOUBLE": TokenType.DOUBLE, 660 "SQL_VARCHAR": TokenType.VARCHAR, 661 "STORAGE INTEGRATION": TokenType.STORAGE_INTEGRATION, 662 "TAG": TokenType.TAG, 663 "TIMESTAMP_TZ": TokenType.TIMESTAMPTZ, 664 "TOP": TokenType.TOP, 665 "WAREHOUSE": TokenType.WAREHOUSE, 666 "STREAMLIT": TokenType.STREAMLIT, 667 } 668 KEYWORDS.pop("/*+") 669 670 SINGLE_TOKENS = { 671 **tokens.Tokenizer.SINGLE_TOKENS, 672 "$": TokenType.PARAMETER, 673 } 674 675 VAR_SINGLE_TOKENS = {"$"} 676 677 COMMANDS = tokens.Tokenizer.COMMANDS - {TokenType.SHOW}
Inherited Members
- sqlglot.tokens.Tokenizer
- Tokenizer
- BIT_STRINGS
- BYTE_STRINGS
- HEREDOC_STRINGS
- UNICODE_STRINGS
- IDENTIFIERS
- IDENTIFIER_ESCAPES
- QUOTES
- HEREDOC_TAG_IS_IDENTIFIER
- HEREDOC_STRING_ALTERNATIVE
- STRING_ESCAPES_ALLOWED_IN_RAW_STRINGS
- WHITE_SPACE
- COMMAND_PREFIX_TOKENS
- NUMERIC_LITERALS
- dialect
- reset
- tokenize
- tokenize_rs
- size
- sql
- tokens
679 class Generator(generator.Generator): 680 PARAMETER_TOKEN = "$" 681 MATCHED_BY_SOURCE = False 682 SINGLE_STRING_INTERVAL = True 683 JOIN_HINTS = False 684 TABLE_HINTS = False 685 QUERY_HINTS = False 686 AGGREGATE_FILTER_SUPPORTED = False 687 SUPPORTS_TABLE_COPY = False 688 COLLATE_IS_FUNC = True 689 LIMIT_ONLY_LITERALS = True 690 JSON_KEY_VALUE_PAIR_SEP = "," 691 INSERT_OVERWRITE = " OVERWRITE INTO" 692 STRUCT_DELIMITER = ("(", ")") 693 COPY_PARAMS_ARE_WRAPPED = False 694 COPY_PARAMS_EQ_REQUIRED = True 695 STAR_EXCEPT = "EXCLUDE" 696 SUPPORTS_EXPLODING_PROJECTIONS = False 697 ARRAY_CONCAT_IS_VAR_LEN = False 698 699 TRANSFORMS = { 700 **generator.Generator.TRANSFORMS, 701 exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"), 702 exp.ArgMax: rename_func("MAX_BY"), 703 exp.ArgMin: rename_func("MIN_BY"), 704 exp.Array: inline_array_sql, 705 exp.ArrayConcat: lambda self, e: self.arrayconcat_sql(e, name="ARRAY_CAT"), 706 exp.ArrayContains: lambda self, e: self.func("ARRAY_CONTAINS", e.expression, e.this), 707 exp.AtTimeZone: lambda self, e: self.func( 708 "CONVERT_TIMEZONE", e.args.get("zone"), e.this 709 ), 710 exp.BitwiseXor: rename_func("BITXOR"), 711 exp.Create: transforms.preprocess([_flatten_structured_types_unless_iceberg]), 712 exp.DateAdd: date_delta_sql("DATEADD"), 713 exp.DateDiff: date_delta_sql("DATEDIFF"), 714 exp.DateStrToDate: datestrtodate_sql, 715 exp.DayOfMonth: rename_func("DAYOFMONTH"), 716 exp.DayOfWeek: rename_func("DAYOFWEEK"), 717 exp.DayOfYear: rename_func("DAYOFYEAR"), 718 exp.Explode: rename_func("FLATTEN"), 719 exp.Extract: rename_func("DATE_PART"), 720 exp.FromTimeZone: lambda self, e: self.func( 721 "CONVERT_TIMEZONE", e.args.get("zone"), "'UTC'", e.this 722 ), 723 exp.GenerateSeries: lambda self, e: self.func( 724 "ARRAY_GENERATE_RANGE", e.args["start"], e.args["end"] + 1, e.args.get("step") 725 ), 726 exp.GroupConcat: rename_func("LISTAGG"), 727 exp.If: if_sql(name="IFF", false_value="NULL"), 728 exp.JSONExtract: lambda self, e: self.func("GET_PATH", e.this, e.expression), 729 exp.JSONExtractScalar: lambda self, e: self.func( 730 "JSON_EXTRACT_PATH_TEXT", e.this, e.expression 731 ), 732 exp.JSONObject: lambda self, e: self.func("OBJECT_CONSTRUCT_KEEP_NULL", *e.expressions), 733 exp.JSONPathRoot: lambda *_: "", 734 exp.LogicalAnd: rename_func("BOOLAND_AGG"), 735 exp.LogicalOr: rename_func("BOOLOR_AGG"), 736 exp.Map: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 737 exp.Max: max_or_greatest, 738 exp.Min: min_or_least, 739 exp.ParseJSON: lambda self, e: self.func( 740 "TRY_PARSE_JSON" if e.args.get("safe") else "PARSE_JSON", e.this 741 ), 742 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 743 exp.PercentileCont: transforms.preprocess( 744 [transforms.add_within_group_for_percentiles] 745 ), 746 exp.PercentileDisc: transforms.preprocess( 747 [transforms.add_within_group_for_percentiles] 748 ), 749 exp.Pivot: transforms.preprocess([_unqualify_unpivot_columns]), 750 exp.RegexpILike: _regexpilike_sql, 751 exp.Rand: rename_func("RANDOM"), 752 exp.Select: transforms.preprocess( 753 [ 754 transforms.eliminate_distinct_on, 755 transforms.explode_to_unnest(), 756 transforms.eliminate_semi_and_anti_joins, 757 ] 758 ), 759 exp.SHA: rename_func("SHA1"), 760 exp.StarMap: rename_func("OBJECT_CONSTRUCT"), 761 exp.StartsWith: rename_func("STARTSWITH"), 762 exp.StrPosition: lambda self, e: self.func( 763 "POSITION", e.args.get("substr"), e.this, e.args.get("position") 764 ), 765 exp.StrToTime: lambda self, e: self.func("TO_TIMESTAMP", e.this, self.format_time(e)), 766 exp.Stuff: rename_func("INSERT"), 767 exp.TimeAdd: date_delta_sql("TIMEADD"), 768 exp.TimestampDiff: lambda self, e: self.func( 769 "TIMESTAMPDIFF", e.unit, e.expression, e.this 770 ), 771 exp.TimestampTrunc: timestamptrunc_sql(), 772 exp.TimeStrToTime: timestrtotime_sql, 773 exp.TimeToStr: lambda self, e: self.func( 774 "TO_CHAR", exp.cast(e.this, exp.DataType.Type.TIMESTAMP), self.format_time(e) 775 ), 776 exp.TimeToUnix: lambda self, e: f"EXTRACT(epoch_second FROM {self.sql(e, 'this')})", 777 exp.ToArray: rename_func("TO_ARRAY"), 778 exp.ToChar: lambda self, e: self.function_fallback_sql(e), 779 exp.Trim: lambda self, e: self.func("TRIM", e.this, e.expression), 780 exp.TsOrDsAdd: date_delta_sql("DATEADD", cast=True), 781 exp.TsOrDsDiff: date_delta_sql("DATEDIFF"), 782 exp.TsOrDsToDate: lambda self, e: self.func( 783 "TRY_TO_DATE" if e.args.get("safe") else "TO_DATE", e.this, self.format_time(e) 784 ), 785 exp.UnixToTime: rename_func("TO_TIMESTAMP"), 786 exp.VarMap: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 787 exp.WeekOfYear: rename_func("WEEKOFYEAR"), 788 exp.Xor: rename_func("BOOLXOR"), 789 } 790 791 SUPPORTED_JSON_PATH_PARTS = { 792 exp.JSONPathKey, 793 exp.JSONPathRoot, 794 exp.JSONPathSubscript, 795 } 796 797 TYPE_MAPPING = { 798 **generator.Generator.TYPE_MAPPING, 799 exp.DataType.Type.NESTED: "OBJECT", 800 exp.DataType.Type.STRUCT: "OBJECT", 801 } 802 803 PROPERTIES_LOCATION = { 804 **generator.Generator.PROPERTIES_LOCATION, 805 exp.SetProperty: exp.Properties.Location.UNSUPPORTED, 806 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 807 } 808 809 UNSUPPORTED_VALUES_EXPRESSIONS = { 810 exp.Map, 811 exp.StarMap, 812 exp.Struct, 813 exp.VarMap, 814 } 815 816 def with_properties(self, properties: exp.Properties) -> str: 817 return self.properties(properties, wrapped=False, prefix=self.sep(""), sep=" ") 818 819 def values_sql(self, expression: exp.Values, values_as_table: bool = True) -> str: 820 if expression.find(*self.UNSUPPORTED_VALUES_EXPRESSIONS): 821 values_as_table = False 822 823 return super().values_sql(expression, values_as_table=values_as_table) 824 825 def datatype_sql(self, expression: exp.DataType) -> str: 826 expressions = expression.expressions 827 if ( 828 expressions 829 and expression.is_type(*exp.DataType.STRUCT_TYPES) 830 and any(isinstance(field_type, exp.DataType) for field_type in expressions) 831 ): 832 # The correct syntax is OBJECT [ (<key> <value_type [NOT NULL] [, ...]) ] 833 return "OBJECT" 834 835 return super().datatype_sql(expression) 836 837 def tonumber_sql(self, expression: exp.ToNumber) -> str: 838 return self.func( 839 "TO_NUMBER", 840 expression.this, 841 expression.args.get("format"), 842 expression.args.get("precision"), 843 expression.args.get("scale"), 844 ) 845 846 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 847 milli = expression.args.get("milli") 848 if milli is not None: 849 milli_to_nano = milli.pop() * exp.Literal.number(1000000) 850 expression.set("nano", milli_to_nano) 851 852 return rename_func("TIMESTAMP_FROM_PARTS")(self, expression) 853 854 def trycast_sql(self, expression: exp.TryCast) -> str: 855 value = expression.this 856 857 if value.type is None: 858 from sqlglot.optimizer.annotate_types import annotate_types 859 860 value = annotate_types(value) 861 862 if value.is_type(*exp.DataType.TEXT_TYPES, exp.DataType.Type.UNKNOWN): 863 return super().trycast_sql(expression) 864 865 # TRY_CAST only works for string values in Snowflake 866 return self.cast_sql(expression) 867 868 def log_sql(self, expression: exp.Log) -> str: 869 if not expression.expression: 870 return self.func("LN", expression.this) 871 872 return super().log_sql(expression) 873 874 def unnest_sql(self, expression: exp.Unnest) -> str: 875 unnest_alias = expression.args.get("alias") 876 offset = expression.args.get("offset") 877 878 columns = [ 879 exp.to_identifier("seq"), 880 exp.to_identifier("key"), 881 exp.to_identifier("path"), 882 offset.pop() if isinstance(offset, exp.Expression) else exp.to_identifier("index"), 883 seq_get(unnest_alias.columns if unnest_alias else [], 0) 884 or exp.to_identifier("value"), 885 exp.to_identifier("this"), 886 ] 887 888 if unnest_alias: 889 unnest_alias.set("columns", columns) 890 else: 891 unnest_alias = exp.TableAlias(this="_u", columns=columns) 892 893 explode = f"TABLE(FLATTEN(INPUT => {self.sql(expression.expressions[0])}))" 894 alias = self.sql(unnest_alias) 895 alias = f" AS {alias}" if alias else "" 896 return f"{explode}{alias}" 897 898 def show_sql(self, expression: exp.Show) -> str: 899 terse = "TERSE " if expression.args.get("terse") else "" 900 history = " HISTORY" if expression.args.get("history") else "" 901 like = self.sql(expression, "like") 902 like = f" LIKE {like}" if like else "" 903 904 scope = self.sql(expression, "scope") 905 scope = f" {scope}" if scope else "" 906 907 scope_kind = self.sql(expression, "scope_kind") 908 if scope_kind: 909 scope_kind = f" IN {scope_kind}" 910 911 starts_with = self.sql(expression, "starts_with") 912 if starts_with: 913 starts_with = f" STARTS WITH {starts_with}" 914 915 limit = self.sql(expression, "limit") 916 917 from_ = self.sql(expression, "from") 918 if from_: 919 from_ = f" FROM {from_}" 920 921 return f"SHOW {terse}{expression.name}{history}{like}{scope_kind}{scope}{starts_with}{limit}{from_}" 922 923 def regexpextract_sql(self, expression: exp.RegexpExtract) -> str: 924 # Other dialects don't support all of the following parameters, so we need to 925 # generate default values as necessary to ensure the transpilation is correct 926 group = expression.args.get("group") 927 parameters = expression.args.get("parameters") or (group and exp.Literal.string("c")) 928 occurrence = expression.args.get("occurrence") or (parameters and exp.Literal.number(1)) 929 position = expression.args.get("position") or (occurrence and exp.Literal.number(1)) 930 931 return self.func( 932 "REGEXP_SUBSTR", 933 expression.this, 934 expression.expression, 935 position, 936 occurrence, 937 parameters, 938 group, 939 ) 940 941 def except_op(self, expression: exp.Except) -> str: 942 if not expression.args.get("distinct"): 943 self.unsupported("EXCEPT with All is not supported in Snowflake") 944 return super().except_op(expression) 945 946 def intersect_op(self, expression: exp.Intersect) -> str: 947 if not expression.args.get("distinct"): 948 self.unsupported("INTERSECT with All is not supported in Snowflake") 949 return super().intersect_op(expression) 950 951 def describe_sql(self, expression: exp.Describe) -> str: 952 # Default to table if kind is unknown 953 kind_value = expression.args.get("kind") or "TABLE" 954 kind = f" {kind_value}" if kind_value else "" 955 this = f" {self.sql(expression, 'this')}" 956 expressions = self.expressions(expression, flat=True) 957 expressions = f" {expressions}" if expressions else "" 958 return f"DESCRIBE{kind}{this}{expressions}" 959 960 def generatedasidentitycolumnconstraint_sql( 961 self, expression: exp.GeneratedAsIdentityColumnConstraint 962 ) -> str: 963 start = expression.args.get("start") 964 start = f" START {start}" if start else "" 965 increment = expression.args.get("increment") 966 increment = f" INCREMENT {increment}" if increment else "" 967 return f"AUTOINCREMENT{start}{increment}" 968 969 def swaptable_sql(self, expression: exp.SwapTable) -> str: 970 this = self.sql(expression, "this") 971 return f"SWAP WITH {this}" 972 973 def cluster_sql(self, expression: exp.Cluster) -> str: 974 return f"CLUSTER BY ({self.expressions(expression, flat=True)})" 975 976 def struct_sql(self, expression: exp.Struct) -> str: 977 keys = [] 978 values = [] 979 980 for i, e in enumerate(expression.expressions): 981 if isinstance(e, exp.PropertyEQ): 982 keys.append( 983 exp.Literal.string(e.name) if isinstance(e.this, exp.Identifier) else e.this 984 ) 985 values.append(e.expression) 986 else: 987 keys.append(exp.Literal.string(f"_{i}")) 988 values.append(e) 989 990 return self.func("OBJECT_CONSTRUCT", *flatten(zip(keys, values))) 991 992 def approxquantile_sql(self, expression: exp.ApproxQuantile) -> str: 993 if expression.args.get("weight") or expression.args.get("accuracy"): 994 self.unsupported( 995 "APPROX_PERCENTILE with weight and/or accuracy arguments are not supported in Snowflake" 996 ) 997 998 return self.func("APPROX_PERCENTILE", expression.this, expression.args.get("quantile")) 999 1000 def alterset_sql(self, expression: exp.AlterSet) -> str: 1001 exprs = self.expressions(expression, flat=True) 1002 exprs = f" {exprs}" if exprs else "" 1003 file_format = self.expressions(expression, key="file_format", flat=True, sep=" ") 1004 file_format = f" STAGE_FILE_FORMAT = ({file_format})" if file_format else "" 1005 copy_options = self.expressions(expression, key="copy_options", flat=True, sep=" ") 1006 copy_options = f" STAGE_COPY_OPTIONS = ({copy_options})" if copy_options else "" 1007 tag = self.expressions(expression, key="tag", flat=True) 1008 tag = f" TAG {tag}" if tag else "" 1009 1010 return f"SET{exprs}{file_format}{copy_options}{tag}"
Generator converts a given syntax tree to the corresponding SQL string.
Arguments:
- pretty: Whether to format the produced SQL string. Default: False.
- identify: Determines when an identifier should be quoted. Possible values are: False (default): Never quote, except in cases where it's mandatory by the dialect. True or 'always': Always quote. 'safe': Only quote identifiers that are case insensitive.
- normalize: Whether to normalize identifiers to lowercase. Default: False.
- pad: The pad size in a formatted string. For example, this affects the indentation of a projection in a query, relative to its nesting level. Default: 2.
- indent: The indentation size in a formatted string. For example, this affects the
indentation of subqueries and filters under a
WHERE
clause. Default: 2. - normalize_functions: How to normalize function names. Possible values are: "upper" or True (default): Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
- unsupported_level: Determines the generator's behavior when it encounters unsupported expressions. Default ErrorLevel.WARN.
- max_unsupported: Maximum number of unsupported messages to include in a raised UnsupportedError. This is only relevant if unsupported_level is ErrorLevel.RAISE. Default: 3
- leading_comma: Whether the comma is leading or trailing in select expressions. This is only relevant when generating in pretty mode. Default: False
- max_text_width: The max number of characters in a segment before creating new lines in pretty mode. The default is on the smaller end because the length only represents a segment and not the true line length. Default: 80
- comments: Whether to preserve comments in the output SQL code. Default: True
825 def datatype_sql(self, expression: exp.DataType) -> str: 826 expressions = expression.expressions 827 if ( 828 expressions 829 and expression.is_type(*exp.DataType.STRUCT_TYPES) 830 and any(isinstance(field_type, exp.DataType) for field_type in expressions) 831 ): 832 # The correct syntax is OBJECT [ (<key> <value_type [NOT NULL] [, ...]) ] 833 return "OBJECT" 834 835 return super().datatype_sql(expression)
846 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 847 milli = expression.args.get("milli") 848 if milli is not None: 849 milli_to_nano = milli.pop() * exp.Literal.number(1000000) 850 expression.set("nano", milli_to_nano) 851 852 return rename_func("TIMESTAMP_FROM_PARTS")(self, expression)
854 def trycast_sql(self, expression: exp.TryCast) -> str: 855 value = expression.this 856 857 if value.type is None: 858 from sqlglot.optimizer.annotate_types import annotate_types 859 860 value = annotate_types(value) 861 862 if value.is_type(*exp.DataType.TEXT_TYPES, exp.DataType.Type.UNKNOWN): 863 return super().trycast_sql(expression) 864 865 # TRY_CAST only works for string values in Snowflake 866 return self.cast_sql(expression)
874 def unnest_sql(self, expression: exp.Unnest) -> str: 875 unnest_alias = expression.args.get("alias") 876 offset = expression.args.get("offset") 877 878 columns = [ 879 exp.to_identifier("seq"), 880 exp.to_identifier("key"), 881 exp.to_identifier("path"), 882 offset.pop() if isinstance(offset, exp.Expression) else exp.to_identifier("index"), 883 seq_get(unnest_alias.columns if unnest_alias else [], 0) 884 or exp.to_identifier("value"), 885 exp.to_identifier("this"), 886 ] 887 888 if unnest_alias: 889 unnest_alias.set("columns", columns) 890 else: 891 unnest_alias = exp.TableAlias(this="_u", columns=columns) 892 893 explode = f"TABLE(FLATTEN(INPUT => {self.sql(expression.expressions[0])}))" 894 alias = self.sql(unnest_alias) 895 alias = f" AS {alias}" if alias else "" 896 return f"{explode}{alias}"
898 def show_sql(self, expression: exp.Show) -> str: 899 terse = "TERSE " if expression.args.get("terse") else "" 900 history = " HISTORY" if expression.args.get("history") else "" 901 like = self.sql(expression, "like") 902 like = f" LIKE {like}" if like else "" 903 904 scope = self.sql(expression, "scope") 905 scope = f" {scope}" if scope else "" 906 907 scope_kind = self.sql(expression, "scope_kind") 908 if scope_kind: 909 scope_kind = f" IN {scope_kind}" 910 911 starts_with = self.sql(expression, "starts_with") 912 if starts_with: 913 starts_with = f" STARTS WITH {starts_with}" 914 915 limit = self.sql(expression, "limit") 916 917 from_ = self.sql(expression, "from") 918 if from_: 919 from_ = f" FROM {from_}" 920 921 return f"SHOW {terse}{expression.name}{history}{like}{scope_kind}{scope}{starts_with}{limit}{from_}"
923 def regexpextract_sql(self, expression: exp.RegexpExtract) -> str: 924 # Other dialects don't support all of the following parameters, so we need to 925 # generate default values as necessary to ensure the transpilation is correct 926 group = expression.args.get("group") 927 parameters = expression.args.get("parameters") or (group and exp.Literal.string("c")) 928 occurrence = expression.args.get("occurrence") or (parameters and exp.Literal.number(1)) 929 position = expression.args.get("position") or (occurrence and exp.Literal.number(1)) 930 931 return self.func( 932 "REGEXP_SUBSTR", 933 expression.this, 934 expression.expression, 935 position, 936 occurrence, 937 parameters, 938 group, 939 )
951 def describe_sql(self, expression: exp.Describe) -> str: 952 # Default to table if kind is unknown 953 kind_value = expression.args.get("kind") or "TABLE" 954 kind = f" {kind_value}" if kind_value else "" 955 this = f" {self.sql(expression, 'this')}" 956 expressions = self.expressions(expression, flat=True) 957 expressions = f" {expressions}" if expressions else "" 958 return f"DESCRIBE{kind}{this}{expressions}"
960 def generatedasidentitycolumnconstraint_sql( 961 self, expression: exp.GeneratedAsIdentityColumnConstraint 962 ) -> str: 963 start = expression.args.get("start") 964 start = f" START {start}" if start else "" 965 increment = expression.args.get("increment") 966 increment = f" INCREMENT {increment}" if increment else "" 967 return f"AUTOINCREMENT{start}{increment}"
976 def struct_sql(self, expression: exp.Struct) -> str: 977 keys = [] 978 values = [] 979 980 for i, e in enumerate(expression.expressions): 981 if isinstance(e, exp.PropertyEQ): 982 keys.append( 983 exp.Literal.string(e.name) if isinstance(e.this, exp.Identifier) else e.this 984 ) 985 values.append(e.expression) 986 else: 987 keys.append(exp.Literal.string(f"_{i}")) 988 values.append(e) 989 990 return self.func("OBJECT_CONSTRUCT", *flatten(zip(keys, values)))
992 def approxquantile_sql(self, expression: exp.ApproxQuantile) -> str: 993 if expression.args.get("weight") or expression.args.get("accuracy"): 994 self.unsupported( 995 "APPROX_PERCENTILE with weight and/or accuracy arguments are not supported in Snowflake" 996 ) 997 998 return self.func("APPROX_PERCENTILE", expression.this, expression.args.get("quantile"))
1000 def alterset_sql(self, expression: exp.AlterSet) -> str: 1001 exprs = self.expressions(expression, flat=True) 1002 exprs = f" {exprs}" if exprs else "" 1003 file_format = self.expressions(expression, key="file_format", flat=True, sep=" ") 1004 file_format = f" STAGE_FILE_FORMAT = ({file_format})" if file_format else "" 1005 copy_options = self.expressions(expression, key="copy_options", flat=True, sep=" ") 1006 copy_options = f" STAGE_COPY_OPTIONS = ({copy_options})" if copy_options else "" 1007 tag = self.expressions(expression, key="tag", flat=True) 1008 tag = f" TAG {tag}" if tag else "" 1009 1010 return f"SET{exprs}{file_format}{copy_options}{tag}"
Inherited Members
- sqlglot.generator.Generator
- Generator
- NULL_ORDERING_SUPPORTED
- IGNORE_NULLS_IN_FUNC
- LOCKING_READS_SUPPORTED
- EXPLICIT_SET_OP
- WRAP_DERIVED_VALUES
- CREATE_FUNCTION_RETURN_AS
- INTERVAL_ALLOWS_PLURAL_FORM
- LIMIT_FETCH
- RENAME_TABLE_WITH_DB
- GROUPINGS_SEP
- INDEX_ON
- QUERY_HINT_SEP
- IS_BOOL_ALLOWED
- DUPLICATE_KEY_UPDATE_WITH_SET
- LIMIT_IS_TOP
- RETURNING_END
- EXTRACT_ALLOWS_QUOTES
- TZ_TO_WITH_TIME_ZONE
- NVL2_SUPPORTED
- VALUES_AS_TABLE
- ALTER_TABLE_INCLUDE_COLUMN_KEYWORD
- UNNEST_WITH_ORDINALITY
- SEMI_ANTI_JOIN_WITH_SIDE
- COMPUTED_COLUMN_WITH_TYPE
- TABLESAMPLE_REQUIRES_PARENS
- TABLESAMPLE_SIZE_IS_ROWS
- TABLESAMPLE_KEYWORDS
- TABLESAMPLE_WITH_METHOD
- TABLESAMPLE_SEED_KEYWORD
- DATA_TYPE_SPECIFIERS_ALLOWED
- ENSURE_BOOLS
- CTE_RECURSIVE_KEYWORD_REQUIRED
- SUPPORTS_SINGLE_ARG_CONCAT
- LAST_DAY_SUPPORTS_DATE_PART
- SUPPORTS_TABLE_ALIAS_COLUMNS
- UNPIVOT_ALIASES_ARE_IDENTIFIERS
- SUPPORTS_SELECT_INTO
- SUPPORTS_UNLOGGED_TABLES
- SUPPORTS_CREATE_TABLE_LIKE
- LIKE_PROPERTY_INSIDE_SCHEMA
- MULTI_ARG_DISTINCT
- JSON_TYPE_REQUIRED_FOR_EXTRACTION
- JSON_PATH_BRACKETED_KEY_SUPPORTED
- JSON_PATH_SINGLE_QUOTE_ESCAPE
- CAN_IMPLEMENT_ARRAY_ANY
- SUPPORTS_TO_NUMBER
- SET_OP_MODIFIERS
- COPY_HAS_INTO_KEYWORD
- HEX_FUNC
- WITH_PROPERTIES_PREFIX
- QUOTE_JSON_PATH
- PAD_FILL_PATTERN_IS_REQUIRED
- PARSE_JSON_NAME
- TIME_PART_SINGULARS
- TOKEN_MAPPING
- NAMED_PLACEHOLDER_TOKEN
- RESERVED_KEYWORDS
- WITH_SEPARATED_COMMENTS
- EXCLUDE_COMMENTS
- UNWRAPPED_INTERVAL_VALUES
- PARAMETERIZABLE_TEXT_TYPES
- EXPRESSIONS_WITHOUT_NESTED_CTES
- SENTINEL_LINE_BREAK
- pretty
- identify
- normalize
- pad
- unsupported_level
- max_unsupported
- leading_comma
- max_text_width
- comments
- dialect
- normalize_functions
- unsupported_messages
- generate
- preprocess
- unsupported
- sep
- seg
- pad_comment
- maybe_comment
- wrap
- no_identify
- normalize_func
- indent
- sql
- uncache_sql
- cache_sql
- characterset_sql
- column_parts
- column_sql
- columnposition_sql
- columndef_sql
- columnconstraint_sql
- computedcolumnconstraint_sql
- autoincrementcolumnconstraint_sql
- compresscolumnconstraint_sql
- generatedasrowcolumnconstraint_sql
- periodforsystemtimeconstraint_sql
- notnullcolumnconstraint_sql
- transformcolumnconstraint_sql
- primarykeycolumnconstraint_sql
- uniquecolumnconstraint_sql
- createable_sql
- create_sql
- sequenceproperties_sql
- clone_sql
- heredoc_sql
- prepend_ctes
- with_sql
- cte_sql
- tablealias_sql
- bitstring_sql
- hexstring_sql
- bytestring_sql
- unicodestring_sql
- rawstring_sql
- datatypeparam_sql
- directory_sql
- delete_sql
- drop_sql
- except_sql
- fetch_sql
- filter_sql
- hint_sql
- indexparameters_sql
- index_sql
- identifier_sql
- hex_sql
- lowerhex_sql
- inputoutputformat_sql
- national_sql
- partition_sql
- properties_sql
- root_properties
- properties
- locate_properties
- property_name
- property_sql
- likeproperty_sql
- fallbackproperty_sql
- journalproperty_sql
- freespaceproperty_sql
- checksumproperty_sql
- mergeblockratioproperty_sql
- datablocksizeproperty_sql
- blockcompressionproperty_sql
- isolatedloadingproperty_sql
- partitionboundspec_sql
- partitionedofproperty_sql
- lockingproperty_sql
- withdataproperty_sql
- withsystemversioningproperty_sql
- insert_sql
- intersect_sql
- introducer_sql
- kill_sql
- pseudotype_sql
- objectidentifier_sql
- onconflict_sql
- returning_sql
- rowformatdelimitedproperty_sql
- withtablehint_sql
- indextablehint_sql
- historicaldata_sql
- table_parts
- table_sql
- tablesample_sql
- pivot_sql
- version_sql
- tuple_sql
- update_sql
- var_sql
- into_sql
- from_sql
- group_sql
- having_sql
- connect_sql
- prior_sql
- join_sql
- lambda_sql
- lateral_op
- lateral_sql
- limit_sql
- offset_sql
- setitem_sql
- set_sql
- pragma_sql
- lock_sql
- literal_sql
- escape_str
- loaddata_sql
- null_sql
- boolean_sql
- order_sql
- withfill_sql
- distribute_sql
- sort_sql
- ordered_sql
- matchrecognizemeasure_sql
- matchrecognize_sql
- query_modifiers
- options_modifier
- queryoption_sql
- offset_limit_modifiers
- after_limit_modifiers
- select_sql
- schema_sql
- schema_columns_sql
- star_sql
- parameter_sql
- sessionparameter_sql
- placeholder_sql
- subquery_sql
- qualify_sql
- set_operations
- union_sql
- union_op
- prewhere_sql
- where_sql
- window_sql
- partition_by_sql
- windowspec_sql
- withingroup_sql
- between_sql
- bracket_offset_expressions
- bracket_sql
- all_sql
- any_sql
- exists_sql
- case_sql
- constraint_sql
- nextvaluefor_sql
- extract_sql
- trim_sql
- convert_concat_args
- concat_sql
- concatws_sql
- check_sql
- foreignkey_sql
- primarykey_sql
- if_sql
- matchagainst_sql
- jsonkeyvalue_sql
- jsonpath_sql
- json_path_part
- formatjson_sql
- jsonobject_sql
- jsonobjectagg_sql
- jsonarray_sql
- jsonarrayagg_sql
- jsoncolumndef_sql
- jsonschema_sql
- jsontable_sql
- openjsoncolumndef_sql
- openjson_sql
- in_sql
- in_unnest_op
- interval_sql
- return_sql
- reference_sql
- anonymous_sql
- paren_sql
- neg_sql
- not_sql
- alias_sql
- pivotalias_sql
- aliases_sql
- atindex_sql
- attimezone_sql
- fromtimezone_sql
- add_sql
- and_sql
- or_sql
- xor_sql
- connector_sql
- bitwiseand_sql
- bitwiseleftshift_sql
- bitwisenot_sql
- bitwiseor_sql
- bitwiserightshift_sql
- bitwisexor_sql
- cast_sql
- currentdate_sql
- collate_sql
- command_sql
- comment_sql
- mergetreettlaction_sql
- mergetreettl_sql
- transaction_sql
- commit_sql
- rollback_sql
- altercolumn_sql
- alterdiststyle_sql
- altersortkey_sql
- renametable_sql
- renamecolumn_sql
- altertable_sql
- add_column_sql
- droppartition_sql
- addconstraint_sql
- distinct_sql
- ignorenulls_sql
- respectnulls_sql
- havingmax_sql
- intdiv_sql
- dpipe_sql
- div_sql
- overlaps_sql
- distance_sql
- dot_sql
- eq_sql
- propertyeq_sql
- escape_sql
- glob_sql
- gt_sql
- gte_sql
- ilike_sql
- ilikeany_sql
- is_sql
- like_sql
- likeany_sql
- similarto_sql
- lt_sql
- lte_sql
- mod_sql
- mul_sql
- neq_sql
- nullsafeeq_sql
- nullsafeneq_sql
- slice_sql
- sub_sql
- try_sql
- use_sql
- binary
- function_fallback_sql
- func
- format_args
- too_wide
- format_time
- expressions
- op_expressions
- naked_property
- tag_sql
- token_sql
- userdefinedfunction_sql
- joinhint_sql
- kwarg_sql
- when_sql
- merge_sql
- tochar_sql
- dictproperty_sql
- dictrange_sql
- dictsubproperty_sql
- oncluster_sql
- clusteredbyproperty_sql
- anyvalue_sql
- querytransform_sql
- indexconstraintoption_sql
- checkcolumnconstraint_sql
- indexcolumnconstraint_sql
- nvl2_sql
- comprehension_sql
- columnprefix_sql
- opclass_sql
- predict_sql
- forin_sql
- refresh_sql
- operator_sql
- toarray_sql
- tsordstotime_sql
- tsordstotimestamp_sql
- tsordstodate_sql
- unixdate_sql
- lastday_sql
- dateadd_sql
- arrayany_sql
- partitionrange_sql
- truncatetable_sql
- convert_sql
- copyparameter_sql
- credentials_sql
- copy_sql
- semicolon_sql
- datadeletionproperty_sql
- maskingpolicycolumnconstraint_sql
- gapfill_sql
- scope_resolution
- scoperesolution_sql
- parsejson_sql
- rand_sql
- changes_sql
- pad_sql
- summarize_sql
- explodinggenerateseries_sql
- arrayconcat_sql