sqlglot.dialects.duckdb
1from __future__ import annotations 2 3import typing as t 4 5from sqlglot import exp, generator, parser, tokens, transforms 6from sqlglot.expressions import DATA_TYPE 7from sqlglot.dialects.dialect import ( 8 Dialect, 9 JSON_EXTRACT_TYPE, 10 NormalizationStrategy, 11 approx_count_distinct_sql, 12 arg_max_or_min_no_count, 13 arrow_json_extract_sql, 14 binary_from_function, 15 bool_xor_sql, 16 build_default_decimal_type, 17 date_trunc_to_time, 18 datestrtodate_sql, 19 no_datetime_sql, 20 encode_decode_sql, 21 build_formatted_time, 22 inline_array_unless_query, 23 no_comment_column_constraint_sql, 24 no_safe_divide_sql, 25 no_time_sql, 26 no_timestamp_sql, 27 pivot_column_names, 28 regexp_extract_sql, 29 rename_func, 30 str_position_sql, 31 str_to_time_sql, 32 timestamptrunc_sql, 33 timestrtotime_sql, 34 unit_to_var, 35 unit_to_str, 36) 37from sqlglot.helper import seq_get 38from sqlglot.tokens import TokenType 39 40DATETIME_DELTA = t.Union[ 41 exp.DateAdd, exp.TimeAdd, exp.DatetimeAdd, exp.TsOrDsAdd, exp.DateSub, exp.DatetimeSub 42] 43 44 45def _date_delta_sql(self: DuckDB.Generator, expression: DATETIME_DELTA) -> str: 46 this = expression.this 47 unit = unit_to_var(expression) 48 op = ( 49 "+" 50 if isinstance(expression, (exp.DateAdd, exp.TimeAdd, exp.DatetimeAdd, exp.TsOrDsAdd)) 51 else "-" 52 ) 53 54 to_type: t.Optional[DATA_TYPE] = None 55 if isinstance(expression, exp.TsOrDsAdd): 56 to_type = expression.return_type 57 elif this.is_string: 58 # Cast string literals (i.e function parameters) to the appropriate type for +/- interval to work 59 to_type = ( 60 exp.DataType.Type.DATETIME 61 if isinstance(expression, (exp.DatetimeAdd, exp.DatetimeSub)) 62 else exp.DataType.Type.DATE 63 ) 64 65 this = exp.cast(this, to_type) if to_type else this 66 67 return f"{self.sql(this)} {op} {self.sql(exp.Interval(this=expression.expression, unit=unit))}" 68 69 70# BigQuery -> DuckDB conversion for the DATE function 71def _date_sql(self: DuckDB.Generator, expression: exp.Date) -> str: 72 result = f"CAST({self.sql(expression, 'this')} AS DATE)" 73 zone = self.sql(expression, "zone") 74 75 if zone: 76 date_str = self.func("STRFTIME", result, "'%d/%m/%Y'") 77 date_str = f"{date_str} || ' ' || {zone}" 78 79 # This will create a TIMESTAMP with time zone information 80 result = self.func("STRPTIME", date_str, "'%d/%m/%Y %Z'") 81 82 return result 83 84 85# BigQuery -> DuckDB conversion for the TIME_DIFF function 86def _timediff_sql(self: DuckDB.Generator, expression: exp.TimeDiff) -> str: 87 this = exp.cast(expression.this, exp.DataType.Type.TIME) 88 expr = exp.cast(expression.expression, exp.DataType.Type.TIME) 89 90 # Although the 2 dialects share similar signatures, BQ seems to inverse 91 # the sign of the result so the start/end time operands are flipped 92 return self.func("DATE_DIFF", unit_to_str(expression), expr, this) 93 94 95def _array_sort_sql(self: DuckDB.Generator, expression: exp.ArraySort) -> str: 96 if expression.expression: 97 self.unsupported("DuckDB ARRAY_SORT does not support a comparator") 98 return self.func("ARRAY_SORT", expression.this) 99 100 101def _sort_array_sql(self: DuckDB.Generator, expression: exp.SortArray) -> str: 102 name = "ARRAY_REVERSE_SORT" if expression.args.get("asc") == exp.false() else "ARRAY_SORT" 103 return self.func(name, expression.this) 104 105 106def _build_sort_array_desc(args: t.List) -> exp.Expression: 107 return exp.SortArray(this=seq_get(args, 0), asc=exp.false()) 108 109 110def _build_date_diff(args: t.List) -> exp.Expression: 111 return exp.DateDiff(this=seq_get(args, 2), expression=seq_get(args, 1), unit=seq_get(args, 0)) 112 113 114def _build_generate_series(end_exclusive: bool = False) -> t.Callable[[t.List], exp.GenerateSeries]: 115 def _builder(args: t.List) -> exp.GenerateSeries: 116 # Check https://duckdb.org/docs/sql/functions/nested.html#range-functions 117 if len(args) == 1: 118 # DuckDB uses 0 as a default for the series' start when it's omitted 119 args.insert(0, exp.Literal.number("0")) 120 121 gen_series = exp.GenerateSeries.from_arg_list(args) 122 gen_series.set("is_end_exclusive", end_exclusive) 123 124 return gen_series 125 126 return _builder 127 128 129def _build_make_timestamp(args: t.List) -> exp.Expression: 130 if len(args) == 1: 131 return exp.UnixToTime(this=seq_get(args, 0), scale=exp.UnixToTime.MICROS) 132 133 return exp.TimestampFromParts( 134 year=seq_get(args, 0), 135 month=seq_get(args, 1), 136 day=seq_get(args, 2), 137 hour=seq_get(args, 3), 138 min=seq_get(args, 4), 139 sec=seq_get(args, 5), 140 ) 141 142 143def _struct_sql(self: DuckDB.Generator, expression: exp.Struct) -> str: 144 args: t.List[str] = [] 145 146 # BigQuery allows inline construction such as "STRUCT<a STRING, b INTEGER>('str', 1)" which is 147 # canonicalized to "ROW('str', 1) AS STRUCT(a TEXT, b INT)" in DuckDB 148 # The transformation to ROW will take place if a cast to STRUCT / ARRAY of STRUCTs is found 149 ancestor_cast = expression.find_ancestor(exp.Cast) 150 is_struct_cast = ancestor_cast and any( 151 casted_type.is_type(exp.DataType.Type.STRUCT) 152 for casted_type in ancestor_cast.find_all(exp.DataType) 153 ) 154 155 for i, expr in enumerate(expression.expressions): 156 is_property_eq = isinstance(expr, exp.PropertyEQ) 157 value = expr.expression if is_property_eq else expr 158 159 if is_struct_cast: 160 args.append(self.sql(value)) 161 else: 162 key = expr.name if is_property_eq else f"_{i}" 163 args.append(f"{self.sql(exp.Literal.string(key))}: {self.sql(value)}") 164 165 csv_args = ", ".join(args) 166 167 return f"ROW({csv_args})" if is_struct_cast else f"{{{csv_args}}}" 168 169 170def _datatype_sql(self: DuckDB.Generator, expression: exp.DataType) -> str: 171 if expression.is_type("array"): 172 return f"{self.expressions(expression, flat=True)}[{self.expressions(expression, key='values', flat=True)}]" 173 174 # Type TIMESTAMP / TIME WITH TIME ZONE does not support any modifiers 175 if expression.is_type("timestamptz", "timetz"): 176 return expression.this.value 177 178 return self.datatype_sql(expression) 179 180 181def _json_format_sql(self: DuckDB.Generator, expression: exp.JSONFormat) -> str: 182 sql = self.func("TO_JSON", expression.this, expression.args.get("options")) 183 return f"CAST({sql} AS TEXT)" 184 185 186def _unix_to_time_sql(self: DuckDB.Generator, expression: exp.UnixToTime) -> str: 187 scale = expression.args.get("scale") 188 timestamp = expression.this 189 190 if scale in (None, exp.UnixToTime.SECONDS): 191 return self.func("TO_TIMESTAMP", timestamp) 192 if scale == exp.UnixToTime.MILLIS: 193 return self.func("EPOCH_MS", timestamp) 194 if scale == exp.UnixToTime.MICROS: 195 return self.func("MAKE_TIMESTAMP", timestamp) 196 197 return self.func("TO_TIMESTAMP", exp.Div(this=timestamp, expression=exp.func("POW", 10, scale))) 198 199 200WRAPPED_JSON_EXTRACT_EXPRESSIONS = (exp.Binary, exp.Bracket, exp.In) 201 202 203def _arrow_json_extract_sql(self: DuckDB.Generator, expression: JSON_EXTRACT_TYPE) -> str: 204 arrow_sql = arrow_json_extract_sql(self, expression) 205 if not expression.same_parent and isinstance( 206 expression.parent, WRAPPED_JSON_EXTRACT_EXPRESSIONS 207 ): 208 arrow_sql = self.wrap(arrow_sql) 209 return arrow_sql 210 211 212def _implicit_date_cast(arg: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 213 return exp.cast(arg, exp.DataType.Type.DATE) if isinstance(arg, exp.Literal) else arg 214 215 216def _date_diff_sql(self: DuckDB.Generator, expression: exp.DateDiff) -> str: 217 this = _implicit_date_cast(expression.this) 218 expr = _implicit_date_cast(expression.expression) 219 220 return self.func("DATE_DIFF", unit_to_str(expression), expr, this) 221 222 223class DuckDB(Dialect): 224 NULL_ORDERING = "nulls_are_last" 225 SUPPORTS_USER_DEFINED_TYPES = False 226 SAFE_DIVISION = True 227 INDEX_OFFSET = 1 228 CONCAT_COALESCE = True 229 SUPPORTS_ORDER_BY_ALL = True 230 SUPPORTS_FIXED_SIZE_ARRAYS = True 231 232 # https://duckdb.org/docs/sql/introduction.html#creating-a-new-table 233 NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_INSENSITIVE 234 235 def to_json_path(self, path: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 236 if isinstance(path, exp.Literal): 237 # DuckDB also supports the JSON pointer syntax, where every path starts with a `/`. 238 # Additionally, it allows accessing the back of lists using the `[#-i]` syntax. 239 # This check ensures we'll avoid trying to parse these as JSON paths, which can 240 # either result in a noisy warning or in an invalid representation of the path. 241 path_text = path.name 242 if path_text.startswith("/") or "[#" in path_text: 243 return path 244 245 return super().to_json_path(path) 246 247 class Tokenizer(tokens.Tokenizer): 248 HEREDOC_STRINGS = ["$"] 249 250 HEREDOC_TAG_IS_IDENTIFIER = True 251 HEREDOC_STRING_ALTERNATIVE = TokenType.PARAMETER 252 253 KEYWORDS = { 254 **tokens.Tokenizer.KEYWORDS, 255 "//": TokenType.DIV, 256 "ATTACH": TokenType.COMMAND, 257 "BINARY": TokenType.VARBINARY, 258 "BITSTRING": TokenType.BIT, 259 "BPCHAR": TokenType.TEXT, 260 "CHAR": TokenType.TEXT, 261 "CHARACTER VARYING": TokenType.TEXT, 262 "EXCLUDE": TokenType.EXCEPT, 263 "LOGICAL": TokenType.BOOLEAN, 264 "ONLY": TokenType.ONLY, 265 "PIVOT_WIDER": TokenType.PIVOT, 266 "POSITIONAL": TokenType.POSITIONAL, 267 "SIGNED": TokenType.INT, 268 "STRING": TokenType.TEXT, 269 "SUMMARIZE": TokenType.SUMMARIZE, 270 "TIMESTAMP_S": TokenType.TIMESTAMP_S, 271 "TIMESTAMP_MS": TokenType.TIMESTAMP_MS, 272 "TIMESTAMP_NS": TokenType.TIMESTAMP_NS, 273 "TIMESTAMP_US": TokenType.TIMESTAMP, 274 "UBIGINT": TokenType.UBIGINT, 275 "UINTEGER": TokenType.UINT, 276 "USMALLINT": TokenType.USMALLINT, 277 "UTINYINT": TokenType.UTINYINT, 278 "VARCHAR": TokenType.TEXT, 279 } 280 KEYWORDS.pop("/*+") 281 282 SINGLE_TOKENS = { 283 **tokens.Tokenizer.SINGLE_TOKENS, 284 "$": TokenType.PARAMETER, 285 } 286 287 class Parser(parser.Parser): 288 BITWISE = { 289 **parser.Parser.BITWISE, 290 TokenType.TILDA: exp.RegexpLike, 291 } 292 293 FUNCTIONS_WITH_ALIASED_ARGS = {*parser.Parser.FUNCTIONS_WITH_ALIASED_ARGS, "STRUCT_PACK"} 294 295 FUNCTIONS = { 296 **parser.Parser.FUNCTIONS, 297 "ARRAY_REVERSE_SORT": _build_sort_array_desc, 298 "ARRAY_SORT": exp.SortArray.from_arg_list, 299 "DATEDIFF": _build_date_diff, 300 "DATE_DIFF": _build_date_diff, 301 "DATE_TRUNC": date_trunc_to_time, 302 "DATETRUNC": date_trunc_to_time, 303 "DECODE": lambda args: exp.Decode( 304 this=seq_get(args, 0), charset=exp.Literal.string("utf-8") 305 ), 306 "ENCODE": lambda args: exp.Encode( 307 this=seq_get(args, 0), charset=exp.Literal.string("utf-8") 308 ), 309 "EPOCH": exp.TimeToUnix.from_arg_list, 310 "EPOCH_MS": lambda args: exp.UnixToTime( 311 this=seq_get(args, 0), scale=exp.UnixToTime.MILLIS 312 ), 313 "JSON": exp.ParseJSON.from_arg_list, 314 "JSON_EXTRACT_PATH": parser.build_extract_json_with_path(exp.JSONExtract), 315 "JSON_EXTRACT_STRING": parser.build_extract_json_with_path(exp.JSONExtractScalar), 316 "LIST_HAS": exp.ArrayContains.from_arg_list, 317 "LIST_REVERSE_SORT": _build_sort_array_desc, 318 "LIST_SORT": exp.SortArray.from_arg_list, 319 "LIST_VALUE": lambda args: exp.Array(expressions=args), 320 "MAKE_TIME": exp.TimeFromParts.from_arg_list, 321 "MAKE_TIMESTAMP": _build_make_timestamp, 322 "MEDIAN": lambda args: exp.PercentileCont( 323 this=seq_get(args, 0), expression=exp.Literal.number(0.5) 324 ), 325 "QUANTILE_CONT": exp.PercentileCont.from_arg_list, 326 "QUANTILE_DISC": exp.PercentileDisc.from_arg_list, 327 "REGEXP_EXTRACT": lambda args: exp.RegexpExtract( 328 this=seq_get(args, 0), expression=seq_get(args, 1), group=seq_get(args, 2) 329 ), 330 "REGEXP_MATCHES": exp.RegexpLike.from_arg_list, 331 "REGEXP_REPLACE": lambda args: exp.RegexpReplace( 332 this=seq_get(args, 0), 333 expression=seq_get(args, 1), 334 replacement=seq_get(args, 2), 335 modifiers=seq_get(args, 3), 336 ), 337 "STRFTIME": build_formatted_time(exp.TimeToStr, "duckdb"), 338 "STRING_SPLIT": exp.Split.from_arg_list, 339 "STRING_SPLIT_REGEX": exp.RegexpSplit.from_arg_list, 340 "STRING_TO_ARRAY": exp.Split.from_arg_list, 341 "STRPTIME": build_formatted_time(exp.StrToTime, "duckdb"), 342 "STRUCT_PACK": exp.Struct.from_arg_list, 343 "STR_SPLIT": exp.Split.from_arg_list, 344 "STR_SPLIT_REGEX": exp.RegexpSplit.from_arg_list, 345 "TO_TIMESTAMP": exp.UnixToTime.from_arg_list, 346 "UNNEST": exp.Explode.from_arg_list, 347 "XOR": binary_from_function(exp.BitwiseXor), 348 "GENERATE_SERIES": _build_generate_series(), 349 "RANGE": _build_generate_series(end_exclusive=True), 350 } 351 352 FUNCTIONS.pop("DATE_SUB") 353 354 FUNCTION_PARSERS = parser.Parser.FUNCTION_PARSERS.copy() 355 FUNCTION_PARSERS.pop("DECODE") 356 357 NO_PAREN_FUNCTION_PARSERS = { 358 **parser.Parser.NO_PAREN_FUNCTION_PARSERS, 359 "MAP": lambda self: self._parse_map(), 360 } 361 362 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS - { 363 TokenType.SEMI, 364 TokenType.ANTI, 365 } 366 367 PLACEHOLDER_PARSERS = { 368 **parser.Parser.PLACEHOLDER_PARSERS, 369 TokenType.PARAMETER: lambda self: ( 370 self.expression(exp.Placeholder, this=self._prev.text) 371 if self._match(TokenType.NUMBER) or self._match_set(self.ID_VAR_TOKENS) 372 else None 373 ), 374 } 375 376 TYPE_CONVERTERS = { 377 # https://duckdb.org/docs/sql/data_types/numeric 378 exp.DataType.Type.DECIMAL: build_default_decimal_type(precision=18, scale=3), 379 # https://duckdb.org/docs/sql/data_types/text 380 exp.DataType.Type.TEXT: lambda dtype: exp.DataType.build("TEXT"), 381 } 382 383 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 384 # https://duckdb.org/docs/sql/samples.html 385 sample = super()._parse_table_sample(as_modifier=as_modifier) 386 if sample and not sample.args.get("method"): 387 if sample.args.get("size"): 388 sample.set("method", exp.var("RESERVOIR")) 389 else: 390 sample.set("method", exp.var("SYSTEM")) 391 392 return sample 393 394 def _parse_bracket( 395 self, this: t.Optional[exp.Expression] = None 396 ) -> t.Optional[exp.Expression]: 397 bracket = super()._parse_bracket(this) 398 if isinstance(bracket, exp.Bracket): 399 bracket.set("returns_list_for_maps", True) 400 401 return bracket 402 403 def _parse_map(self) -> exp.ToMap | exp.Map: 404 if self._match(TokenType.L_BRACE, advance=False): 405 return self.expression(exp.ToMap, this=self._parse_bracket()) 406 407 args = self._parse_wrapped_csv(self._parse_assignment) 408 return self.expression(exp.Map, keys=seq_get(args, 0), values=seq_get(args, 1)) 409 410 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 411 return self._parse_field_def() 412 413 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 414 if len(aggregations) == 1: 415 return super()._pivot_column_names(aggregations) 416 return pivot_column_names(aggregations, dialect="duckdb") 417 418 class Generator(generator.Generator): 419 PARAMETER_TOKEN = "$" 420 NAMED_PLACEHOLDER_TOKEN = "$" 421 JOIN_HINTS = False 422 TABLE_HINTS = False 423 QUERY_HINTS = False 424 LIMIT_FETCH = "LIMIT" 425 STRUCT_DELIMITER = ("(", ")") 426 RENAME_TABLE_WITH_DB = False 427 NVL2_SUPPORTED = False 428 SEMI_ANTI_JOIN_WITH_SIDE = False 429 TABLESAMPLE_KEYWORDS = "USING SAMPLE" 430 TABLESAMPLE_SEED_KEYWORD = "REPEATABLE" 431 LAST_DAY_SUPPORTS_DATE_PART = False 432 JSON_KEY_VALUE_PAIR_SEP = "," 433 IGNORE_NULLS_IN_FUNC = True 434 JSON_PATH_BRACKETED_KEY_SUPPORTED = False 435 SUPPORTS_CREATE_TABLE_LIKE = False 436 MULTI_ARG_DISTINCT = False 437 CAN_IMPLEMENT_ARRAY_ANY = True 438 SUPPORTS_TO_NUMBER = False 439 COPY_HAS_INTO_KEYWORD = False 440 STAR_EXCEPT = "EXCLUDE" 441 PAD_FILL_PATTERN_IS_REQUIRED = True 442 ARRAY_CONCAT_IS_VAR_LEN = False 443 444 TRANSFORMS = { 445 **generator.Generator.TRANSFORMS, 446 exp.ApproxDistinct: approx_count_distinct_sql, 447 exp.Array: inline_array_unless_query, 448 exp.ArrayContainsAll: rename_func("ARRAY_HAS_ALL"), 449 exp.ArrayFilter: rename_func("LIST_FILTER"), 450 exp.ArraySize: rename_func("ARRAY_LENGTH"), 451 exp.ArgMax: arg_max_or_min_no_count("ARG_MAX"), 452 exp.ArgMin: arg_max_or_min_no_count("ARG_MIN"), 453 exp.ArraySort: _array_sort_sql, 454 exp.ArraySum: rename_func("LIST_SUM"), 455 exp.BitwiseXor: rename_func("XOR"), 456 exp.CommentColumnConstraint: no_comment_column_constraint_sql, 457 exp.CurrentDate: lambda *_: "CURRENT_DATE", 458 exp.CurrentTime: lambda *_: "CURRENT_TIME", 459 exp.CurrentTimestamp: lambda *_: "CURRENT_TIMESTAMP", 460 exp.DayOfMonth: rename_func("DAYOFMONTH"), 461 exp.DayOfWeek: rename_func("DAYOFWEEK"), 462 exp.DayOfYear: rename_func("DAYOFYEAR"), 463 exp.DataType: _datatype_sql, 464 exp.Date: _date_sql, 465 exp.DateAdd: _date_delta_sql, 466 exp.DateFromParts: rename_func("MAKE_DATE"), 467 exp.DateSub: _date_delta_sql, 468 exp.DateDiff: _date_diff_sql, 469 exp.DateStrToDate: datestrtodate_sql, 470 exp.Datetime: no_datetime_sql, 471 exp.DatetimeSub: _date_delta_sql, 472 exp.DatetimeAdd: _date_delta_sql, 473 exp.DateToDi: lambda self, 474 e: f"CAST(STRFTIME({self.sql(e, 'this')}, {DuckDB.DATEINT_FORMAT}) AS INT)", 475 exp.Decode: lambda self, e: encode_decode_sql(self, e, "DECODE", replace=False), 476 exp.DiToDate: lambda self, 477 e: f"CAST(STRPTIME(CAST({self.sql(e, 'this')} AS TEXT), {DuckDB.DATEINT_FORMAT}) AS DATE)", 478 exp.Encode: lambda self, e: encode_decode_sql(self, e, "ENCODE", replace=False), 479 exp.Explode: rename_func("UNNEST"), 480 exp.IntDiv: lambda self, e: self.binary(e, "//"), 481 exp.IsInf: rename_func("ISINF"), 482 exp.IsNan: rename_func("ISNAN"), 483 exp.JSONExtract: _arrow_json_extract_sql, 484 exp.JSONExtractScalar: _arrow_json_extract_sql, 485 exp.JSONFormat: _json_format_sql, 486 exp.LogicalOr: rename_func("BOOL_OR"), 487 exp.LogicalAnd: rename_func("BOOL_AND"), 488 exp.MD5Digest: lambda self, e: self.func("UNHEX", self.func("MD5", e.this)), 489 exp.MonthsBetween: lambda self, e: self.func( 490 "DATEDIFF", 491 "'month'", 492 exp.cast(e.expression, exp.DataType.Type.TIMESTAMP, copy=True), 493 exp.cast(e.this, exp.DataType.Type.TIMESTAMP, copy=True), 494 ), 495 exp.PercentileCont: rename_func("QUANTILE_CONT"), 496 exp.PercentileDisc: rename_func("QUANTILE_DISC"), 497 # DuckDB doesn't allow qualified columns inside of PIVOT expressions. 498 # See: https://github.com/duckdb/duckdb/blob/671faf92411182f81dce42ac43de8bfb05d9909e/src/planner/binder/tableref/bind_pivot.cpp#L61-L62 499 exp.Pivot: transforms.preprocess([transforms.unqualify_columns]), 500 exp.RegexpExtract: regexp_extract_sql, 501 exp.RegexpReplace: lambda self, e: self.func( 502 "REGEXP_REPLACE", 503 e.this, 504 e.expression, 505 e.args.get("replacement"), 506 e.args.get("modifiers"), 507 ), 508 exp.RegexpLike: rename_func("REGEXP_MATCHES"), 509 exp.RegexpSplit: rename_func("STR_SPLIT_REGEX"), 510 exp.Return: lambda self, e: self.sql(e, "this"), 511 exp.ReturnsProperty: lambda self, e: "TABLE" if isinstance(e.this, exp.Schema) else "", 512 exp.Rand: rename_func("RANDOM"), 513 exp.SafeDivide: no_safe_divide_sql, 514 exp.Split: rename_func("STR_SPLIT"), 515 exp.SortArray: _sort_array_sql, 516 exp.StrPosition: str_position_sql, 517 exp.StrToUnix: lambda self, e: self.func( 518 "EPOCH", self.func("STRPTIME", e.this, self.format_time(e)) 519 ), 520 exp.Struct: _struct_sql, 521 exp.TimeAdd: _date_delta_sql, 522 exp.Time: no_time_sql, 523 exp.TimeDiff: _timediff_sql, 524 exp.Timestamp: no_timestamp_sql, 525 exp.TimestampDiff: lambda self, e: self.func( 526 "DATE_DIFF", exp.Literal.string(e.unit), e.expression, e.this 527 ), 528 exp.TimestampTrunc: timestamptrunc_sql(), 529 exp.TimeStrToDate: lambda self, e: self.sql(exp.cast(e.this, exp.DataType.Type.DATE)), 530 exp.TimeStrToTime: timestrtotime_sql, 531 exp.TimeStrToUnix: lambda self, e: self.func( 532 "EPOCH", exp.cast(e.this, exp.DataType.Type.TIMESTAMP) 533 ), 534 exp.TimeToStr: lambda self, e: self.func("STRFTIME", e.this, self.format_time(e)), 535 exp.TimeToUnix: rename_func("EPOCH"), 536 exp.TsOrDiToDi: lambda self, 537 e: f"CAST(SUBSTR(REPLACE(CAST({self.sql(e, 'this')} AS TEXT), '-', ''), 1, 8) AS INT)", 538 exp.TsOrDsAdd: _date_delta_sql, 539 exp.TsOrDsDiff: lambda self, e: self.func( 540 "DATE_DIFF", 541 f"'{e.args.get('unit') or 'DAY'}'", 542 exp.cast(e.expression, exp.DataType.Type.TIMESTAMP), 543 exp.cast(e.this, exp.DataType.Type.TIMESTAMP), 544 ), 545 exp.UnixToStr: lambda self, e: self.func( 546 "STRFTIME", self.func("TO_TIMESTAMP", e.this), self.format_time(e) 547 ), 548 exp.UnixToTime: _unix_to_time_sql, 549 exp.UnixToTimeStr: lambda self, e: f"CAST(TO_TIMESTAMP({self.sql(e, 'this')}) AS TEXT)", 550 exp.VariancePop: rename_func("VAR_POP"), 551 exp.WeekOfYear: rename_func("WEEKOFYEAR"), 552 exp.Xor: bool_xor_sql, 553 } 554 555 SUPPORTED_JSON_PATH_PARTS = { 556 exp.JSONPathKey, 557 exp.JSONPathRoot, 558 exp.JSONPathSubscript, 559 exp.JSONPathWildcard, 560 } 561 562 TYPE_MAPPING = { 563 **generator.Generator.TYPE_MAPPING, 564 exp.DataType.Type.BINARY: "BLOB", 565 exp.DataType.Type.BPCHAR: "TEXT", 566 exp.DataType.Type.CHAR: "TEXT", 567 exp.DataType.Type.FLOAT: "REAL", 568 exp.DataType.Type.NCHAR: "TEXT", 569 exp.DataType.Type.NVARCHAR: "TEXT", 570 exp.DataType.Type.UINT: "UINTEGER", 571 exp.DataType.Type.VARBINARY: "BLOB", 572 exp.DataType.Type.ROWVERSION: "BLOB", 573 exp.DataType.Type.VARCHAR: "TEXT", 574 exp.DataType.Type.TIMESTAMPNTZ: "TIMESTAMP", 575 exp.DataType.Type.TIMESTAMP_S: "TIMESTAMP_S", 576 exp.DataType.Type.TIMESTAMP_MS: "TIMESTAMP_MS", 577 exp.DataType.Type.TIMESTAMP_NS: "TIMESTAMP_NS", 578 } 579 580 # https://github.com/duckdb/duckdb/blob/ff7f24fd8e3128d94371827523dae85ebaf58713/third_party/libpg_query/grammar/keywords/reserved_keywords.list#L1-L77 581 RESERVED_KEYWORDS = { 582 "array", 583 "analyse", 584 "union", 585 "all", 586 "when", 587 "in_p", 588 "default", 589 "create_p", 590 "window", 591 "asymmetric", 592 "to", 593 "else", 594 "localtime", 595 "from", 596 "end_p", 597 "select", 598 "current_date", 599 "foreign", 600 "with", 601 "grant", 602 "session_user", 603 "or", 604 "except", 605 "references", 606 "fetch", 607 "limit", 608 "group_p", 609 "leading", 610 "into", 611 "collate", 612 "offset", 613 "do", 614 "then", 615 "localtimestamp", 616 "check_p", 617 "lateral_p", 618 "current_role", 619 "where", 620 "asc_p", 621 "placing", 622 "desc_p", 623 "user", 624 "unique", 625 "initially", 626 "column", 627 "both", 628 "some", 629 "as", 630 "any", 631 "only", 632 "deferrable", 633 "null_p", 634 "current_time", 635 "true_p", 636 "table", 637 "case", 638 "trailing", 639 "variadic", 640 "for", 641 "on", 642 "distinct", 643 "false_p", 644 "not", 645 "constraint", 646 "current_timestamp", 647 "returning", 648 "primary", 649 "intersect", 650 "having", 651 "analyze", 652 "current_user", 653 "and", 654 "cast", 655 "symmetric", 656 "using", 657 "order", 658 "current_catalog", 659 } 660 661 UNWRAPPED_INTERVAL_VALUES = (exp.Literal, exp.Paren) 662 663 # DuckDB doesn't generally support CREATE TABLE .. properties 664 # https://duckdb.org/docs/sql/statements/create_table.html 665 PROPERTIES_LOCATION = { 666 prop: exp.Properties.Location.UNSUPPORTED 667 for prop in generator.Generator.PROPERTIES_LOCATION 668 } 669 670 # There are a few exceptions (e.g. temporary tables) which are supported or 671 # can be transpiled to DuckDB, so we explicitly override them accordingly 672 PROPERTIES_LOCATION[exp.LikeProperty] = exp.Properties.Location.POST_SCHEMA 673 PROPERTIES_LOCATION[exp.TemporaryProperty] = exp.Properties.Location.POST_CREATE 674 PROPERTIES_LOCATION[exp.ReturnsProperty] = exp.Properties.Location.POST_ALIAS 675 676 def strtotime_sql(self, expression: exp.StrToTime) -> str: 677 if expression.args.get("safe"): 678 formatted_time = self.format_time(expression) 679 return f"CAST({self.func('TRY_STRPTIME', expression.this, formatted_time)} AS TIMESTAMP)" 680 return str_to_time_sql(self, expression) 681 682 def strtodate_sql(self, expression: exp.StrToDate) -> str: 683 if expression.args.get("safe"): 684 formatted_time = self.format_time(expression) 685 return f"CAST({self.func('TRY_STRPTIME', expression.this, formatted_time)} AS DATE)" 686 return f"CAST({str_to_time_sql(self, expression)} AS DATE)" 687 688 def parsejson_sql(self, expression: exp.ParseJSON) -> str: 689 arg = expression.this 690 if expression.args.get("safe"): 691 return self.sql(exp.case().when(exp.func("json_valid", arg), arg).else_(exp.null())) 692 return self.func("JSON", arg) 693 694 def timefromparts_sql(self, expression: exp.TimeFromParts) -> str: 695 nano = expression.args.get("nano") 696 if nano is not None: 697 expression.set( 698 "sec", expression.args["sec"] + nano.pop() / exp.Literal.number(1000000000.0) 699 ) 700 701 return rename_func("MAKE_TIME")(self, expression) 702 703 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 704 sec = expression.args["sec"] 705 706 milli = expression.args.get("milli") 707 if milli is not None: 708 sec += milli.pop() / exp.Literal.number(1000.0) 709 710 nano = expression.args.get("nano") 711 if nano is not None: 712 sec += nano.pop() / exp.Literal.number(1000000000.0) 713 714 if milli or nano: 715 expression.set("sec", sec) 716 717 return rename_func("MAKE_TIMESTAMP")(self, expression) 718 719 def tablesample_sql( 720 self, 721 expression: exp.TableSample, 722 sep: str = " AS ", 723 tablesample_keyword: t.Optional[str] = None, 724 ) -> str: 725 if not isinstance(expression.parent, exp.Select): 726 # This sample clause only applies to a single source, not the entire resulting relation 727 tablesample_keyword = "TABLESAMPLE" 728 729 if expression.args.get("size"): 730 method = expression.args.get("method") 731 if method and method.name.upper() != "RESERVOIR": 732 self.unsupported( 733 f"Sampling method {method} is not supported with a discrete sample count, " 734 "defaulting to reservoir sampling" 735 ) 736 expression.set("method", exp.var("RESERVOIR")) 737 738 return super().tablesample_sql( 739 expression, sep=sep, tablesample_keyword=tablesample_keyword 740 ) 741 742 def interval_sql(self, expression: exp.Interval) -> str: 743 multiplier: t.Optional[int] = None 744 unit = expression.text("unit").lower() 745 746 if unit.startswith("week"): 747 multiplier = 7 748 if unit.startswith("quarter"): 749 multiplier = 90 750 751 if multiplier: 752 return f"({multiplier} * {super().interval_sql(exp.Interval(this=expression.this, unit=exp.var('DAY')))})" 753 754 return super().interval_sql(expression) 755 756 def columndef_sql(self, expression: exp.ColumnDef, sep: str = " ") -> str: 757 if isinstance(expression.parent, exp.UserDefinedFunction): 758 return self.sql(expression, "this") 759 return super().columndef_sql(expression, sep) 760 761 def join_sql(self, expression: exp.Join) -> str: 762 if ( 763 expression.side == "LEFT" 764 and not expression.args.get("on") 765 and isinstance(expression.this, exp.Unnest) 766 ): 767 # Some dialects support `LEFT JOIN UNNEST(...)` without an explicit ON clause 768 # DuckDB doesn't, but we can just add a dummy ON clause that is always true 769 return super().join_sql(expression.on(exp.true())) 770 771 return super().join_sql(expression) 772 773 def generateseries_sql(self, expression: exp.GenerateSeries) -> str: 774 # GENERATE_SERIES(a, b) -> [a, b], RANGE(a, b) -> [a, b) 775 if expression.args.get("is_end_exclusive"): 776 return rename_func("RANGE")(self, expression) 777 778 return self.function_fallback_sql(expression) 779 780 def bracket_sql(self, expression: exp.Bracket) -> str: 781 this = expression.this 782 if isinstance(this, exp.Array): 783 this.replace(exp.paren(this)) 784 785 bracket = super().bracket_sql(expression) 786 787 if not expression.args.get("returns_list_for_maps"): 788 if not this.type: 789 from sqlglot.optimizer.annotate_types import annotate_types 790 791 this = annotate_types(this) 792 793 if this.is_type(exp.DataType.Type.MAP): 794 bracket = f"({bracket})[1]" 795 796 return bracket 797 798 def withingroup_sql(self, expression: exp.WithinGroup) -> str: 799 expression_sql = self.sql(expression, "expression") 800 801 func = expression.this 802 if isinstance(func, exp.PERCENTILES): 803 # Make the order key the first arg and slide the fraction to the right 804 # https://duckdb.org/docs/sql/aggregates#ordered-set-aggregate-functions 805 order_col = expression.find(exp.Ordered) 806 if order_col: 807 func.set("expression", func.this) 808 func.set("this", order_col.this) 809 810 this = self.sql(expression, "this").rstrip(")") 811 812 return f"{this}{expression_sql})" 813 814 def length_sql(self, expression: exp.Length) -> str: 815 arg = expression.this 816 817 # Dialects like BQ and Snowflake also accept binary values as args, so 818 # DDB will attempt to infer the type or resort to case/when resolution 819 if not expression.args.get("binary") or arg.is_string: 820 return self.func("LENGTH", arg) 821 822 if not arg.type: 823 from sqlglot.optimizer.annotate_types import annotate_types 824 825 arg = annotate_types(arg) 826 827 if arg.is_type(*exp.DataType.TEXT_TYPES): 828 return self.func("LENGTH", arg) 829 830 # We need these casts to make duckdb's static type checker happy 831 blob = exp.cast(arg, exp.DataType.Type.VARBINARY) 832 varchar = exp.cast(arg, exp.DataType.Type.VARCHAR) 833 834 case = ( 835 exp.case(self.func("TYPEOF", arg)) 836 .when( 837 "'VARCHAR'", exp.Anonymous(this="LENGTH", expressions=[varchar]) 838 ) # anonymous to break length_sql recursion 839 .when("'BLOB'", self.func("OCTET_LENGTH", blob)) 840 ) 841 842 return self.sql(case) 843 844 def objectinsert_sql(self, expression: exp.ObjectInsert) -> str: 845 this = expression.this 846 key = expression.args.get("key") 847 key_sql = key.name if isinstance(key, exp.Expression) else "" 848 value_sql = self.sql(expression, "value") 849 850 kv_sql = f"{key_sql} := {value_sql}" 851 852 # If the input struct is empty e.g. transpiling OBJECT_INSERT(OBJECT_CONSTRUCT(), key, value) from Snowflake 853 # then we can generate STRUCT_PACK which will build it since STRUCT_INSERT({}, key := value) is not valid DuckDB 854 if isinstance(this, exp.Struct) and not this.expressions: 855 return self.func("STRUCT_PACK", kv_sql) 856 857 return self.func("STRUCT_INSERT", this, kv_sql) 858 859 def generatedatearray_sql(self, expression: exp.GenerateDateArray) -> str: 860 start = _implicit_date_cast(expression.args.get("start")) 861 end = _implicit_date_cast(expression.args.get("end")) 862 863 # BQ's GENERATE_DATE_ARRAY is transformed to DuckDB'S GENERATE_SERIES 864 gen_series = exp.GenerateSeries( 865 start=start, end=end, step=expression.args.get("interval") 866 ) 867 868 # The result is TIMESTAMP array, so to match BQ's semantics we must cast it back to DATE array 869 return self.sql(exp.cast(gen_series, exp.DataType.build("ARRAY<DATE>")))
224class DuckDB(Dialect): 225 NULL_ORDERING = "nulls_are_last" 226 SUPPORTS_USER_DEFINED_TYPES = False 227 SAFE_DIVISION = True 228 INDEX_OFFSET = 1 229 CONCAT_COALESCE = True 230 SUPPORTS_ORDER_BY_ALL = True 231 SUPPORTS_FIXED_SIZE_ARRAYS = True 232 233 # https://duckdb.org/docs/sql/introduction.html#creating-a-new-table 234 NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_INSENSITIVE 235 236 def to_json_path(self, path: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 237 if isinstance(path, exp.Literal): 238 # DuckDB also supports the JSON pointer syntax, where every path starts with a `/`. 239 # Additionally, it allows accessing the back of lists using the `[#-i]` syntax. 240 # This check ensures we'll avoid trying to parse these as JSON paths, which can 241 # either result in a noisy warning or in an invalid representation of the path. 242 path_text = path.name 243 if path_text.startswith("/") or "[#" in path_text: 244 return path 245 246 return super().to_json_path(path) 247 248 class Tokenizer(tokens.Tokenizer): 249 HEREDOC_STRINGS = ["$"] 250 251 HEREDOC_TAG_IS_IDENTIFIER = True 252 HEREDOC_STRING_ALTERNATIVE = TokenType.PARAMETER 253 254 KEYWORDS = { 255 **tokens.Tokenizer.KEYWORDS, 256 "//": TokenType.DIV, 257 "ATTACH": TokenType.COMMAND, 258 "BINARY": TokenType.VARBINARY, 259 "BITSTRING": TokenType.BIT, 260 "BPCHAR": TokenType.TEXT, 261 "CHAR": TokenType.TEXT, 262 "CHARACTER VARYING": TokenType.TEXT, 263 "EXCLUDE": TokenType.EXCEPT, 264 "LOGICAL": TokenType.BOOLEAN, 265 "ONLY": TokenType.ONLY, 266 "PIVOT_WIDER": TokenType.PIVOT, 267 "POSITIONAL": TokenType.POSITIONAL, 268 "SIGNED": TokenType.INT, 269 "STRING": TokenType.TEXT, 270 "SUMMARIZE": TokenType.SUMMARIZE, 271 "TIMESTAMP_S": TokenType.TIMESTAMP_S, 272 "TIMESTAMP_MS": TokenType.TIMESTAMP_MS, 273 "TIMESTAMP_NS": TokenType.TIMESTAMP_NS, 274 "TIMESTAMP_US": TokenType.TIMESTAMP, 275 "UBIGINT": TokenType.UBIGINT, 276 "UINTEGER": TokenType.UINT, 277 "USMALLINT": TokenType.USMALLINT, 278 "UTINYINT": TokenType.UTINYINT, 279 "VARCHAR": TokenType.TEXT, 280 } 281 KEYWORDS.pop("/*+") 282 283 SINGLE_TOKENS = { 284 **tokens.Tokenizer.SINGLE_TOKENS, 285 "$": TokenType.PARAMETER, 286 } 287 288 class Parser(parser.Parser): 289 BITWISE = { 290 **parser.Parser.BITWISE, 291 TokenType.TILDA: exp.RegexpLike, 292 } 293 294 FUNCTIONS_WITH_ALIASED_ARGS = {*parser.Parser.FUNCTIONS_WITH_ALIASED_ARGS, "STRUCT_PACK"} 295 296 FUNCTIONS = { 297 **parser.Parser.FUNCTIONS, 298 "ARRAY_REVERSE_SORT": _build_sort_array_desc, 299 "ARRAY_SORT": exp.SortArray.from_arg_list, 300 "DATEDIFF": _build_date_diff, 301 "DATE_DIFF": _build_date_diff, 302 "DATE_TRUNC": date_trunc_to_time, 303 "DATETRUNC": date_trunc_to_time, 304 "DECODE": lambda args: exp.Decode( 305 this=seq_get(args, 0), charset=exp.Literal.string("utf-8") 306 ), 307 "ENCODE": lambda args: exp.Encode( 308 this=seq_get(args, 0), charset=exp.Literal.string("utf-8") 309 ), 310 "EPOCH": exp.TimeToUnix.from_arg_list, 311 "EPOCH_MS": lambda args: exp.UnixToTime( 312 this=seq_get(args, 0), scale=exp.UnixToTime.MILLIS 313 ), 314 "JSON": exp.ParseJSON.from_arg_list, 315 "JSON_EXTRACT_PATH": parser.build_extract_json_with_path(exp.JSONExtract), 316 "JSON_EXTRACT_STRING": parser.build_extract_json_with_path(exp.JSONExtractScalar), 317 "LIST_HAS": exp.ArrayContains.from_arg_list, 318 "LIST_REVERSE_SORT": _build_sort_array_desc, 319 "LIST_SORT": exp.SortArray.from_arg_list, 320 "LIST_VALUE": lambda args: exp.Array(expressions=args), 321 "MAKE_TIME": exp.TimeFromParts.from_arg_list, 322 "MAKE_TIMESTAMP": _build_make_timestamp, 323 "MEDIAN": lambda args: exp.PercentileCont( 324 this=seq_get(args, 0), expression=exp.Literal.number(0.5) 325 ), 326 "QUANTILE_CONT": exp.PercentileCont.from_arg_list, 327 "QUANTILE_DISC": exp.PercentileDisc.from_arg_list, 328 "REGEXP_EXTRACT": lambda args: exp.RegexpExtract( 329 this=seq_get(args, 0), expression=seq_get(args, 1), group=seq_get(args, 2) 330 ), 331 "REGEXP_MATCHES": exp.RegexpLike.from_arg_list, 332 "REGEXP_REPLACE": lambda args: exp.RegexpReplace( 333 this=seq_get(args, 0), 334 expression=seq_get(args, 1), 335 replacement=seq_get(args, 2), 336 modifiers=seq_get(args, 3), 337 ), 338 "STRFTIME": build_formatted_time(exp.TimeToStr, "duckdb"), 339 "STRING_SPLIT": exp.Split.from_arg_list, 340 "STRING_SPLIT_REGEX": exp.RegexpSplit.from_arg_list, 341 "STRING_TO_ARRAY": exp.Split.from_arg_list, 342 "STRPTIME": build_formatted_time(exp.StrToTime, "duckdb"), 343 "STRUCT_PACK": exp.Struct.from_arg_list, 344 "STR_SPLIT": exp.Split.from_arg_list, 345 "STR_SPLIT_REGEX": exp.RegexpSplit.from_arg_list, 346 "TO_TIMESTAMP": exp.UnixToTime.from_arg_list, 347 "UNNEST": exp.Explode.from_arg_list, 348 "XOR": binary_from_function(exp.BitwiseXor), 349 "GENERATE_SERIES": _build_generate_series(), 350 "RANGE": _build_generate_series(end_exclusive=True), 351 } 352 353 FUNCTIONS.pop("DATE_SUB") 354 355 FUNCTION_PARSERS = parser.Parser.FUNCTION_PARSERS.copy() 356 FUNCTION_PARSERS.pop("DECODE") 357 358 NO_PAREN_FUNCTION_PARSERS = { 359 **parser.Parser.NO_PAREN_FUNCTION_PARSERS, 360 "MAP": lambda self: self._parse_map(), 361 } 362 363 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS - { 364 TokenType.SEMI, 365 TokenType.ANTI, 366 } 367 368 PLACEHOLDER_PARSERS = { 369 **parser.Parser.PLACEHOLDER_PARSERS, 370 TokenType.PARAMETER: lambda self: ( 371 self.expression(exp.Placeholder, this=self._prev.text) 372 if self._match(TokenType.NUMBER) or self._match_set(self.ID_VAR_TOKENS) 373 else None 374 ), 375 } 376 377 TYPE_CONVERTERS = { 378 # https://duckdb.org/docs/sql/data_types/numeric 379 exp.DataType.Type.DECIMAL: build_default_decimal_type(precision=18, scale=3), 380 # https://duckdb.org/docs/sql/data_types/text 381 exp.DataType.Type.TEXT: lambda dtype: exp.DataType.build("TEXT"), 382 } 383 384 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 385 # https://duckdb.org/docs/sql/samples.html 386 sample = super()._parse_table_sample(as_modifier=as_modifier) 387 if sample and not sample.args.get("method"): 388 if sample.args.get("size"): 389 sample.set("method", exp.var("RESERVOIR")) 390 else: 391 sample.set("method", exp.var("SYSTEM")) 392 393 return sample 394 395 def _parse_bracket( 396 self, this: t.Optional[exp.Expression] = None 397 ) -> t.Optional[exp.Expression]: 398 bracket = super()._parse_bracket(this) 399 if isinstance(bracket, exp.Bracket): 400 bracket.set("returns_list_for_maps", True) 401 402 return bracket 403 404 def _parse_map(self) -> exp.ToMap | exp.Map: 405 if self._match(TokenType.L_BRACE, advance=False): 406 return self.expression(exp.ToMap, this=self._parse_bracket()) 407 408 args = self._parse_wrapped_csv(self._parse_assignment) 409 return self.expression(exp.Map, keys=seq_get(args, 0), values=seq_get(args, 1)) 410 411 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 412 return self._parse_field_def() 413 414 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 415 if len(aggregations) == 1: 416 return super()._pivot_column_names(aggregations) 417 return pivot_column_names(aggregations, dialect="duckdb") 418 419 class Generator(generator.Generator): 420 PARAMETER_TOKEN = "$" 421 NAMED_PLACEHOLDER_TOKEN = "$" 422 JOIN_HINTS = False 423 TABLE_HINTS = False 424 QUERY_HINTS = False 425 LIMIT_FETCH = "LIMIT" 426 STRUCT_DELIMITER = ("(", ")") 427 RENAME_TABLE_WITH_DB = False 428 NVL2_SUPPORTED = False 429 SEMI_ANTI_JOIN_WITH_SIDE = False 430 TABLESAMPLE_KEYWORDS = "USING SAMPLE" 431 TABLESAMPLE_SEED_KEYWORD = "REPEATABLE" 432 LAST_DAY_SUPPORTS_DATE_PART = False 433 JSON_KEY_VALUE_PAIR_SEP = "," 434 IGNORE_NULLS_IN_FUNC = True 435 JSON_PATH_BRACKETED_KEY_SUPPORTED = False 436 SUPPORTS_CREATE_TABLE_LIKE = False 437 MULTI_ARG_DISTINCT = False 438 CAN_IMPLEMENT_ARRAY_ANY = True 439 SUPPORTS_TO_NUMBER = False 440 COPY_HAS_INTO_KEYWORD = False 441 STAR_EXCEPT = "EXCLUDE" 442 PAD_FILL_PATTERN_IS_REQUIRED = True 443 ARRAY_CONCAT_IS_VAR_LEN = False 444 445 TRANSFORMS = { 446 **generator.Generator.TRANSFORMS, 447 exp.ApproxDistinct: approx_count_distinct_sql, 448 exp.Array: inline_array_unless_query, 449 exp.ArrayContainsAll: rename_func("ARRAY_HAS_ALL"), 450 exp.ArrayFilter: rename_func("LIST_FILTER"), 451 exp.ArraySize: rename_func("ARRAY_LENGTH"), 452 exp.ArgMax: arg_max_or_min_no_count("ARG_MAX"), 453 exp.ArgMin: arg_max_or_min_no_count("ARG_MIN"), 454 exp.ArraySort: _array_sort_sql, 455 exp.ArraySum: rename_func("LIST_SUM"), 456 exp.BitwiseXor: rename_func("XOR"), 457 exp.CommentColumnConstraint: no_comment_column_constraint_sql, 458 exp.CurrentDate: lambda *_: "CURRENT_DATE", 459 exp.CurrentTime: lambda *_: "CURRENT_TIME", 460 exp.CurrentTimestamp: lambda *_: "CURRENT_TIMESTAMP", 461 exp.DayOfMonth: rename_func("DAYOFMONTH"), 462 exp.DayOfWeek: rename_func("DAYOFWEEK"), 463 exp.DayOfYear: rename_func("DAYOFYEAR"), 464 exp.DataType: _datatype_sql, 465 exp.Date: _date_sql, 466 exp.DateAdd: _date_delta_sql, 467 exp.DateFromParts: rename_func("MAKE_DATE"), 468 exp.DateSub: _date_delta_sql, 469 exp.DateDiff: _date_diff_sql, 470 exp.DateStrToDate: datestrtodate_sql, 471 exp.Datetime: no_datetime_sql, 472 exp.DatetimeSub: _date_delta_sql, 473 exp.DatetimeAdd: _date_delta_sql, 474 exp.DateToDi: lambda self, 475 e: f"CAST(STRFTIME({self.sql(e, 'this')}, {DuckDB.DATEINT_FORMAT}) AS INT)", 476 exp.Decode: lambda self, e: encode_decode_sql(self, e, "DECODE", replace=False), 477 exp.DiToDate: lambda self, 478 e: f"CAST(STRPTIME(CAST({self.sql(e, 'this')} AS TEXT), {DuckDB.DATEINT_FORMAT}) AS DATE)", 479 exp.Encode: lambda self, e: encode_decode_sql(self, e, "ENCODE", replace=False), 480 exp.Explode: rename_func("UNNEST"), 481 exp.IntDiv: lambda self, e: self.binary(e, "//"), 482 exp.IsInf: rename_func("ISINF"), 483 exp.IsNan: rename_func("ISNAN"), 484 exp.JSONExtract: _arrow_json_extract_sql, 485 exp.JSONExtractScalar: _arrow_json_extract_sql, 486 exp.JSONFormat: _json_format_sql, 487 exp.LogicalOr: rename_func("BOOL_OR"), 488 exp.LogicalAnd: rename_func("BOOL_AND"), 489 exp.MD5Digest: lambda self, e: self.func("UNHEX", self.func("MD5", e.this)), 490 exp.MonthsBetween: lambda self, e: self.func( 491 "DATEDIFF", 492 "'month'", 493 exp.cast(e.expression, exp.DataType.Type.TIMESTAMP, copy=True), 494 exp.cast(e.this, exp.DataType.Type.TIMESTAMP, copy=True), 495 ), 496 exp.PercentileCont: rename_func("QUANTILE_CONT"), 497 exp.PercentileDisc: rename_func("QUANTILE_DISC"), 498 # DuckDB doesn't allow qualified columns inside of PIVOT expressions. 499 # See: https://github.com/duckdb/duckdb/blob/671faf92411182f81dce42ac43de8bfb05d9909e/src/planner/binder/tableref/bind_pivot.cpp#L61-L62 500 exp.Pivot: transforms.preprocess([transforms.unqualify_columns]), 501 exp.RegexpExtract: regexp_extract_sql, 502 exp.RegexpReplace: lambda self, e: self.func( 503 "REGEXP_REPLACE", 504 e.this, 505 e.expression, 506 e.args.get("replacement"), 507 e.args.get("modifiers"), 508 ), 509 exp.RegexpLike: rename_func("REGEXP_MATCHES"), 510 exp.RegexpSplit: rename_func("STR_SPLIT_REGEX"), 511 exp.Return: lambda self, e: self.sql(e, "this"), 512 exp.ReturnsProperty: lambda self, e: "TABLE" if isinstance(e.this, exp.Schema) else "", 513 exp.Rand: rename_func("RANDOM"), 514 exp.SafeDivide: no_safe_divide_sql, 515 exp.Split: rename_func("STR_SPLIT"), 516 exp.SortArray: _sort_array_sql, 517 exp.StrPosition: str_position_sql, 518 exp.StrToUnix: lambda self, e: self.func( 519 "EPOCH", self.func("STRPTIME", e.this, self.format_time(e)) 520 ), 521 exp.Struct: _struct_sql, 522 exp.TimeAdd: _date_delta_sql, 523 exp.Time: no_time_sql, 524 exp.TimeDiff: _timediff_sql, 525 exp.Timestamp: no_timestamp_sql, 526 exp.TimestampDiff: lambda self, e: self.func( 527 "DATE_DIFF", exp.Literal.string(e.unit), e.expression, e.this 528 ), 529 exp.TimestampTrunc: timestamptrunc_sql(), 530 exp.TimeStrToDate: lambda self, e: self.sql(exp.cast(e.this, exp.DataType.Type.DATE)), 531 exp.TimeStrToTime: timestrtotime_sql, 532 exp.TimeStrToUnix: lambda self, e: self.func( 533 "EPOCH", exp.cast(e.this, exp.DataType.Type.TIMESTAMP) 534 ), 535 exp.TimeToStr: lambda self, e: self.func("STRFTIME", e.this, self.format_time(e)), 536 exp.TimeToUnix: rename_func("EPOCH"), 537 exp.TsOrDiToDi: lambda self, 538 e: f"CAST(SUBSTR(REPLACE(CAST({self.sql(e, 'this')} AS TEXT), '-', ''), 1, 8) AS INT)", 539 exp.TsOrDsAdd: _date_delta_sql, 540 exp.TsOrDsDiff: lambda self, e: self.func( 541 "DATE_DIFF", 542 f"'{e.args.get('unit') or 'DAY'}'", 543 exp.cast(e.expression, exp.DataType.Type.TIMESTAMP), 544 exp.cast(e.this, exp.DataType.Type.TIMESTAMP), 545 ), 546 exp.UnixToStr: lambda self, e: self.func( 547 "STRFTIME", self.func("TO_TIMESTAMP", e.this), self.format_time(e) 548 ), 549 exp.UnixToTime: _unix_to_time_sql, 550 exp.UnixToTimeStr: lambda self, e: f"CAST(TO_TIMESTAMP({self.sql(e, 'this')}) AS TEXT)", 551 exp.VariancePop: rename_func("VAR_POP"), 552 exp.WeekOfYear: rename_func("WEEKOFYEAR"), 553 exp.Xor: bool_xor_sql, 554 } 555 556 SUPPORTED_JSON_PATH_PARTS = { 557 exp.JSONPathKey, 558 exp.JSONPathRoot, 559 exp.JSONPathSubscript, 560 exp.JSONPathWildcard, 561 } 562 563 TYPE_MAPPING = { 564 **generator.Generator.TYPE_MAPPING, 565 exp.DataType.Type.BINARY: "BLOB", 566 exp.DataType.Type.BPCHAR: "TEXT", 567 exp.DataType.Type.CHAR: "TEXT", 568 exp.DataType.Type.FLOAT: "REAL", 569 exp.DataType.Type.NCHAR: "TEXT", 570 exp.DataType.Type.NVARCHAR: "TEXT", 571 exp.DataType.Type.UINT: "UINTEGER", 572 exp.DataType.Type.VARBINARY: "BLOB", 573 exp.DataType.Type.ROWVERSION: "BLOB", 574 exp.DataType.Type.VARCHAR: "TEXT", 575 exp.DataType.Type.TIMESTAMPNTZ: "TIMESTAMP", 576 exp.DataType.Type.TIMESTAMP_S: "TIMESTAMP_S", 577 exp.DataType.Type.TIMESTAMP_MS: "TIMESTAMP_MS", 578 exp.DataType.Type.TIMESTAMP_NS: "TIMESTAMP_NS", 579 } 580 581 # https://github.com/duckdb/duckdb/blob/ff7f24fd8e3128d94371827523dae85ebaf58713/third_party/libpg_query/grammar/keywords/reserved_keywords.list#L1-L77 582 RESERVED_KEYWORDS = { 583 "array", 584 "analyse", 585 "union", 586 "all", 587 "when", 588 "in_p", 589 "default", 590 "create_p", 591 "window", 592 "asymmetric", 593 "to", 594 "else", 595 "localtime", 596 "from", 597 "end_p", 598 "select", 599 "current_date", 600 "foreign", 601 "with", 602 "grant", 603 "session_user", 604 "or", 605 "except", 606 "references", 607 "fetch", 608 "limit", 609 "group_p", 610 "leading", 611 "into", 612 "collate", 613 "offset", 614 "do", 615 "then", 616 "localtimestamp", 617 "check_p", 618 "lateral_p", 619 "current_role", 620 "where", 621 "asc_p", 622 "placing", 623 "desc_p", 624 "user", 625 "unique", 626 "initially", 627 "column", 628 "both", 629 "some", 630 "as", 631 "any", 632 "only", 633 "deferrable", 634 "null_p", 635 "current_time", 636 "true_p", 637 "table", 638 "case", 639 "trailing", 640 "variadic", 641 "for", 642 "on", 643 "distinct", 644 "false_p", 645 "not", 646 "constraint", 647 "current_timestamp", 648 "returning", 649 "primary", 650 "intersect", 651 "having", 652 "analyze", 653 "current_user", 654 "and", 655 "cast", 656 "symmetric", 657 "using", 658 "order", 659 "current_catalog", 660 } 661 662 UNWRAPPED_INTERVAL_VALUES = (exp.Literal, exp.Paren) 663 664 # DuckDB doesn't generally support CREATE TABLE .. properties 665 # https://duckdb.org/docs/sql/statements/create_table.html 666 PROPERTIES_LOCATION = { 667 prop: exp.Properties.Location.UNSUPPORTED 668 for prop in generator.Generator.PROPERTIES_LOCATION 669 } 670 671 # There are a few exceptions (e.g. temporary tables) which are supported or 672 # can be transpiled to DuckDB, so we explicitly override them accordingly 673 PROPERTIES_LOCATION[exp.LikeProperty] = exp.Properties.Location.POST_SCHEMA 674 PROPERTIES_LOCATION[exp.TemporaryProperty] = exp.Properties.Location.POST_CREATE 675 PROPERTIES_LOCATION[exp.ReturnsProperty] = exp.Properties.Location.POST_ALIAS 676 677 def strtotime_sql(self, expression: exp.StrToTime) -> str: 678 if expression.args.get("safe"): 679 formatted_time = self.format_time(expression) 680 return f"CAST({self.func('TRY_STRPTIME', expression.this, formatted_time)} AS TIMESTAMP)" 681 return str_to_time_sql(self, expression) 682 683 def strtodate_sql(self, expression: exp.StrToDate) -> str: 684 if expression.args.get("safe"): 685 formatted_time = self.format_time(expression) 686 return f"CAST({self.func('TRY_STRPTIME', expression.this, formatted_time)} AS DATE)" 687 return f"CAST({str_to_time_sql(self, expression)} AS DATE)" 688 689 def parsejson_sql(self, expression: exp.ParseJSON) -> str: 690 arg = expression.this 691 if expression.args.get("safe"): 692 return self.sql(exp.case().when(exp.func("json_valid", arg), arg).else_(exp.null())) 693 return self.func("JSON", arg) 694 695 def timefromparts_sql(self, expression: exp.TimeFromParts) -> str: 696 nano = expression.args.get("nano") 697 if nano is not None: 698 expression.set( 699 "sec", expression.args["sec"] + nano.pop() / exp.Literal.number(1000000000.0) 700 ) 701 702 return rename_func("MAKE_TIME")(self, expression) 703 704 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 705 sec = expression.args["sec"] 706 707 milli = expression.args.get("milli") 708 if milli is not None: 709 sec += milli.pop() / exp.Literal.number(1000.0) 710 711 nano = expression.args.get("nano") 712 if nano is not None: 713 sec += nano.pop() / exp.Literal.number(1000000000.0) 714 715 if milli or nano: 716 expression.set("sec", sec) 717 718 return rename_func("MAKE_TIMESTAMP")(self, expression) 719 720 def tablesample_sql( 721 self, 722 expression: exp.TableSample, 723 sep: str = " AS ", 724 tablesample_keyword: t.Optional[str] = None, 725 ) -> str: 726 if not isinstance(expression.parent, exp.Select): 727 # This sample clause only applies to a single source, not the entire resulting relation 728 tablesample_keyword = "TABLESAMPLE" 729 730 if expression.args.get("size"): 731 method = expression.args.get("method") 732 if method and method.name.upper() != "RESERVOIR": 733 self.unsupported( 734 f"Sampling method {method} is not supported with a discrete sample count, " 735 "defaulting to reservoir sampling" 736 ) 737 expression.set("method", exp.var("RESERVOIR")) 738 739 return super().tablesample_sql( 740 expression, sep=sep, tablesample_keyword=tablesample_keyword 741 ) 742 743 def interval_sql(self, expression: exp.Interval) -> str: 744 multiplier: t.Optional[int] = None 745 unit = expression.text("unit").lower() 746 747 if unit.startswith("week"): 748 multiplier = 7 749 if unit.startswith("quarter"): 750 multiplier = 90 751 752 if multiplier: 753 return f"({multiplier} * {super().interval_sql(exp.Interval(this=expression.this, unit=exp.var('DAY')))})" 754 755 return super().interval_sql(expression) 756 757 def columndef_sql(self, expression: exp.ColumnDef, sep: str = " ") -> str: 758 if isinstance(expression.parent, exp.UserDefinedFunction): 759 return self.sql(expression, "this") 760 return super().columndef_sql(expression, sep) 761 762 def join_sql(self, expression: exp.Join) -> str: 763 if ( 764 expression.side == "LEFT" 765 and not expression.args.get("on") 766 and isinstance(expression.this, exp.Unnest) 767 ): 768 # Some dialects support `LEFT JOIN UNNEST(...)` without an explicit ON clause 769 # DuckDB doesn't, but we can just add a dummy ON clause that is always true 770 return super().join_sql(expression.on(exp.true())) 771 772 return super().join_sql(expression) 773 774 def generateseries_sql(self, expression: exp.GenerateSeries) -> str: 775 # GENERATE_SERIES(a, b) -> [a, b], RANGE(a, b) -> [a, b) 776 if expression.args.get("is_end_exclusive"): 777 return rename_func("RANGE")(self, expression) 778 779 return self.function_fallback_sql(expression) 780 781 def bracket_sql(self, expression: exp.Bracket) -> str: 782 this = expression.this 783 if isinstance(this, exp.Array): 784 this.replace(exp.paren(this)) 785 786 bracket = super().bracket_sql(expression) 787 788 if not expression.args.get("returns_list_for_maps"): 789 if not this.type: 790 from sqlglot.optimizer.annotate_types import annotate_types 791 792 this = annotate_types(this) 793 794 if this.is_type(exp.DataType.Type.MAP): 795 bracket = f"({bracket})[1]" 796 797 return bracket 798 799 def withingroup_sql(self, expression: exp.WithinGroup) -> str: 800 expression_sql = self.sql(expression, "expression") 801 802 func = expression.this 803 if isinstance(func, exp.PERCENTILES): 804 # Make the order key the first arg and slide the fraction to the right 805 # https://duckdb.org/docs/sql/aggregates#ordered-set-aggregate-functions 806 order_col = expression.find(exp.Ordered) 807 if order_col: 808 func.set("expression", func.this) 809 func.set("this", order_col.this) 810 811 this = self.sql(expression, "this").rstrip(")") 812 813 return f"{this}{expression_sql})" 814 815 def length_sql(self, expression: exp.Length) -> str: 816 arg = expression.this 817 818 # Dialects like BQ and Snowflake also accept binary values as args, so 819 # DDB will attempt to infer the type or resort to case/when resolution 820 if not expression.args.get("binary") or arg.is_string: 821 return self.func("LENGTH", arg) 822 823 if not arg.type: 824 from sqlglot.optimizer.annotate_types import annotate_types 825 826 arg = annotate_types(arg) 827 828 if arg.is_type(*exp.DataType.TEXT_TYPES): 829 return self.func("LENGTH", arg) 830 831 # We need these casts to make duckdb's static type checker happy 832 blob = exp.cast(arg, exp.DataType.Type.VARBINARY) 833 varchar = exp.cast(arg, exp.DataType.Type.VARCHAR) 834 835 case = ( 836 exp.case(self.func("TYPEOF", arg)) 837 .when( 838 "'VARCHAR'", exp.Anonymous(this="LENGTH", expressions=[varchar]) 839 ) # anonymous to break length_sql recursion 840 .when("'BLOB'", self.func("OCTET_LENGTH", blob)) 841 ) 842 843 return self.sql(case) 844 845 def objectinsert_sql(self, expression: exp.ObjectInsert) -> str: 846 this = expression.this 847 key = expression.args.get("key") 848 key_sql = key.name if isinstance(key, exp.Expression) else "" 849 value_sql = self.sql(expression, "value") 850 851 kv_sql = f"{key_sql} := {value_sql}" 852 853 # If the input struct is empty e.g. transpiling OBJECT_INSERT(OBJECT_CONSTRUCT(), key, value) from Snowflake 854 # then we can generate STRUCT_PACK which will build it since STRUCT_INSERT({}, key := value) is not valid DuckDB 855 if isinstance(this, exp.Struct) and not this.expressions: 856 return self.func("STRUCT_PACK", kv_sql) 857 858 return self.func("STRUCT_INSERT", this, kv_sql) 859 860 def generatedatearray_sql(self, expression: exp.GenerateDateArray) -> str: 861 start = _implicit_date_cast(expression.args.get("start")) 862 end = _implicit_date_cast(expression.args.get("end")) 863 864 # BQ's GENERATE_DATE_ARRAY is transformed to DuckDB'S GENERATE_SERIES 865 gen_series = exp.GenerateSeries( 866 start=start, end=end, step=expression.args.get("interval") 867 ) 868 869 # The result is TIMESTAMP array, so to match BQ's semantics we must cast it back to DATE array 870 return self.sql(exp.cast(gen_series, exp.DataType.build("ARRAY<DATE>")))
Default NULL
ordering method to use if not explicitly set.
Possible values: "nulls_are_small"
, "nulls_are_large"
, "nulls_are_last"
A NULL
arg in CONCAT
yields NULL
by default, but in some dialects it yields an empty string.
Whether ORDER BY ALL is supported (expands to all the selected columns) as in DuckDB, Spark3/Databricks
Whether expressions such as x::INT[5] should be parsed as fixed-size array defs/casts e.g. in DuckDB. In dialects which don't support fixed size arrays such as Snowflake, this should be interpreted as a subscript/index operator
Specifies the strategy according to which identifiers should be normalized.
236 def to_json_path(self, path: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 237 if isinstance(path, exp.Literal): 238 # DuckDB also supports the JSON pointer syntax, where every path starts with a `/`. 239 # Additionally, it allows accessing the back of lists using the `[#-i]` syntax. 240 # This check ensures we'll avoid trying to parse these as JSON paths, which can 241 # either result in a noisy warning or in an invalid representation of the path. 242 path_text = path.name 243 if path_text.startswith("/") or "[#" in path_text: 244 return path 245 246 return super().to_json_path(path)
Inherited Members
- sqlglot.dialects.dialect.Dialect
- Dialect
- WEEK_OFFSET
- UNNEST_COLUMN_ONLY
- ALIAS_POST_TABLESAMPLE
- TABLESAMPLE_SIZE_IS_PERCENT
- IDENTIFIERS_CAN_START_WITH_DIGIT
- DPIPE_IS_STRING_CONCAT
- STRICT_STRING_CONCAT
- SUPPORTS_SEMI_ANTI_JOIN
- COPY_PARAMS_ARE_CSV
- NORMALIZE_FUNCTIONS
- LOG_BASE_FIRST
- TYPED_DIVISION
- HEX_LOWERCASE
- DATE_FORMAT
- DATEINT_FORMAT
- TIME_FORMAT
- TIME_MAPPING
- FORMAT_MAPPING
- UNESCAPED_SEQUENCES
- PSEUDOCOLUMNS
- PREFER_CTE_ALIAS_COLUMN
- FORCE_EARLY_ALIAS_REF_EXPANSION
- EXPAND_ALIAS_REFS_EARLY_ONLY_IN_GROUP_BY
- HAS_DISTINCT_ARRAY_CONSTRUCTORS
- DATE_PART_MAPPING
- TYPE_TO_EXPRESSIONS
- ANNOTATORS
- get_or_raise
- format_time
- settings
- normalize_identifier
- case_sensitive
- can_identify
- quote_identifier
- parse
- parse_into
- generate
- transpile
- tokenize
- tokenizer
- jsonpath_tokenizer
- parser
- generator
248 class Tokenizer(tokens.Tokenizer): 249 HEREDOC_STRINGS = ["$"] 250 251 HEREDOC_TAG_IS_IDENTIFIER = True 252 HEREDOC_STRING_ALTERNATIVE = TokenType.PARAMETER 253 254 KEYWORDS = { 255 **tokens.Tokenizer.KEYWORDS, 256 "//": TokenType.DIV, 257 "ATTACH": TokenType.COMMAND, 258 "BINARY": TokenType.VARBINARY, 259 "BITSTRING": TokenType.BIT, 260 "BPCHAR": TokenType.TEXT, 261 "CHAR": TokenType.TEXT, 262 "CHARACTER VARYING": TokenType.TEXT, 263 "EXCLUDE": TokenType.EXCEPT, 264 "LOGICAL": TokenType.BOOLEAN, 265 "ONLY": TokenType.ONLY, 266 "PIVOT_WIDER": TokenType.PIVOT, 267 "POSITIONAL": TokenType.POSITIONAL, 268 "SIGNED": TokenType.INT, 269 "STRING": TokenType.TEXT, 270 "SUMMARIZE": TokenType.SUMMARIZE, 271 "TIMESTAMP_S": TokenType.TIMESTAMP_S, 272 "TIMESTAMP_MS": TokenType.TIMESTAMP_MS, 273 "TIMESTAMP_NS": TokenType.TIMESTAMP_NS, 274 "TIMESTAMP_US": TokenType.TIMESTAMP, 275 "UBIGINT": TokenType.UBIGINT, 276 "UINTEGER": TokenType.UINT, 277 "USMALLINT": TokenType.USMALLINT, 278 "UTINYINT": TokenType.UTINYINT, 279 "VARCHAR": TokenType.TEXT, 280 } 281 KEYWORDS.pop("/*+") 282 283 SINGLE_TOKENS = { 284 **tokens.Tokenizer.SINGLE_TOKENS, 285 "$": TokenType.PARAMETER, 286 }
Inherited Members
- sqlglot.tokens.Tokenizer
- Tokenizer
- BIT_STRINGS
- BYTE_STRINGS
- HEX_STRINGS
- RAW_STRINGS
- UNICODE_STRINGS
- IDENTIFIERS
- IDENTIFIER_ESCAPES
- QUOTES
- STRING_ESCAPES
- VAR_SINGLE_TOKENS
- STRING_ESCAPES_ALLOWED_IN_RAW_STRINGS
- WHITE_SPACE
- COMMANDS
- COMMAND_PREFIX_TOKENS
- NUMERIC_LITERALS
- COMMENTS
- dialect
- reset
- tokenize
- tokenize_rs
- size
- sql
- tokens
288 class Parser(parser.Parser): 289 BITWISE = { 290 **parser.Parser.BITWISE, 291 TokenType.TILDA: exp.RegexpLike, 292 } 293 294 FUNCTIONS_WITH_ALIASED_ARGS = {*parser.Parser.FUNCTIONS_WITH_ALIASED_ARGS, "STRUCT_PACK"} 295 296 FUNCTIONS = { 297 **parser.Parser.FUNCTIONS, 298 "ARRAY_REVERSE_SORT": _build_sort_array_desc, 299 "ARRAY_SORT": exp.SortArray.from_arg_list, 300 "DATEDIFF": _build_date_diff, 301 "DATE_DIFF": _build_date_diff, 302 "DATE_TRUNC": date_trunc_to_time, 303 "DATETRUNC": date_trunc_to_time, 304 "DECODE": lambda args: exp.Decode( 305 this=seq_get(args, 0), charset=exp.Literal.string("utf-8") 306 ), 307 "ENCODE": lambda args: exp.Encode( 308 this=seq_get(args, 0), charset=exp.Literal.string("utf-8") 309 ), 310 "EPOCH": exp.TimeToUnix.from_arg_list, 311 "EPOCH_MS": lambda args: exp.UnixToTime( 312 this=seq_get(args, 0), scale=exp.UnixToTime.MILLIS 313 ), 314 "JSON": exp.ParseJSON.from_arg_list, 315 "JSON_EXTRACT_PATH": parser.build_extract_json_with_path(exp.JSONExtract), 316 "JSON_EXTRACT_STRING": parser.build_extract_json_with_path(exp.JSONExtractScalar), 317 "LIST_HAS": exp.ArrayContains.from_arg_list, 318 "LIST_REVERSE_SORT": _build_sort_array_desc, 319 "LIST_SORT": exp.SortArray.from_arg_list, 320 "LIST_VALUE": lambda args: exp.Array(expressions=args), 321 "MAKE_TIME": exp.TimeFromParts.from_arg_list, 322 "MAKE_TIMESTAMP": _build_make_timestamp, 323 "MEDIAN": lambda args: exp.PercentileCont( 324 this=seq_get(args, 0), expression=exp.Literal.number(0.5) 325 ), 326 "QUANTILE_CONT": exp.PercentileCont.from_arg_list, 327 "QUANTILE_DISC": exp.PercentileDisc.from_arg_list, 328 "REGEXP_EXTRACT": lambda args: exp.RegexpExtract( 329 this=seq_get(args, 0), expression=seq_get(args, 1), group=seq_get(args, 2) 330 ), 331 "REGEXP_MATCHES": exp.RegexpLike.from_arg_list, 332 "REGEXP_REPLACE": lambda args: exp.RegexpReplace( 333 this=seq_get(args, 0), 334 expression=seq_get(args, 1), 335 replacement=seq_get(args, 2), 336 modifiers=seq_get(args, 3), 337 ), 338 "STRFTIME": build_formatted_time(exp.TimeToStr, "duckdb"), 339 "STRING_SPLIT": exp.Split.from_arg_list, 340 "STRING_SPLIT_REGEX": exp.RegexpSplit.from_arg_list, 341 "STRING_TO_ARRAY": exp.Split.from_arg_list, 342 "STRPTIME": build_formatted_time(exp.StrToTime, "duckdb"), 343 "STRUCT_PACK": exp.Struct.from_arg_list, 344 "STR_SPLIT": exp.Split.from_arg_list, 345 "STR_SPLIT_REGEX": exp.RegexpSplit.from_arg_list, 346 "TO_TIMESTAMP": exp.UnixToTime.from_arg_list, 347 "UNNEST": exp.Explode.from_arg_list, 348 "XOR": binary_from_function(exp.BitwiseXor), 349 "GENERATE_SERIES": _build_generate_series(), 350 "RANGE": _build_generate_series(end_exclusive=True), 351 } 352 353 FUNCTIONS.pop("DATE_SUB") 354 355 FUNCTION_PARSERS = parser.Parser.FUNCTION_PARSERS.copy() 356 FUNCTION_PARSERS.pop("DECODE") 357 358 NO_PAREN_FUNCTION_PARSERS = { 359 **parser.Parser.NO_PAREN_FUNCTION_PARSERS, 360 "MAP": lambda self: self._parse_map(), 361 } 362 363 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS - { 364 TokenType.SEMI, 365 TokenType.ANTI, 366 } 367 368 PLACEHOLDER_PARSERS = { 369 **parser.Parser.PLACEHOLDER_PARSERS, 370 TokenType.PARAMETER: lambda self: ( 371 self.expression(exp.Placeholder, this=self._prev.text) 372 if self._match(TokenType.NUMBER) or self._match_set(self.ID_VAR_TOKENS) 373 else None 374 ), 375 } 376 377 TYPE_CONVERTERS = { 378 # https://duckdb.org/docs/sql/data_types/numeric 379 exp.DataType.Type.DECIMAL: build_default_decimal_type(precision=18, scale=3), 380 # https://duckdb.org/docs/sql/data_types/text 381 exp.DataType.Type.TEXT: lambda dtype: exp.DataType.build("TEXT"), 382 } 383 384 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 385 # https://duckdb.org/docs/sql/samples.html 386 sample = super()._parse_table_sample(as_modifier=as_modifier) 387 if sample and not sample.args.get("method"): 388 if sample.args.get("size"): 389 sample.set("method", exp.var("RESERVOIR")) 390 else: 391 sample.set("method", exp.var("SYSTEM")) 392 393 return sample 394 395 def _parse_bracket( 396 self, this: t.Optional[exp.Expression] = None 397 ) -> t.Optional[exp.Expression]: 398 bracket = super()._parse_bracket(this) 399 if isinstance(bracket, exp.Bracket): 400 bracket.set("returns_list_for_maps", True) 401 402 return bracket 403 404 def _parse_map(self) -> exp.ToMap | exp.Map: 405 if self._match(TokenType.L_BRACE, advance=False): 406 return self.expression(exp.ToMap, this=self._parse_bracket()) 407 408 args = self._parse_wrapped_csv(self._parse_assignment) 409 return self.expression(exp.Map, keys=seq_get(args, 0), values=seq_get(args, 1)) 410 411 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 412 return self._parse_field_def() 413 414 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 415 if len(aggregations) == 1: 416 return super()._pivot_column_names(aggregations) 417 return pivot_column_names(aggregations, dialect="duckdb")
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
Inherited Members
- sqlglot.parser.Parser
- Parser
- NO_PAREN_FUNCTIONS
- STRUCT_TYPE_TOKENS
- NESTED_TYPE_TOKENS
- ENUM_TYPE_TOKENS
- AGGREGATE_TYPE_TOKENS
- TYPE_TOKENS
- SIGNED_TO_UNSIGNED_TYPE_TOKEN
- SUBQUERY_PREDICATES
- RESERVED_TOKENS
- DB_CREATABLES
- CREATABLES
- INTERVAL_VARS
- ALIAS_TOKENS
- ARRAY_CONSTRUCTORS
- COMMENT_TABLE_ALIAS_TOKENS
- UPDATE_ALIAS_TOKENS
- TRIM_TYPES
- FUNC_TOKENS
- CONJUNCTION
- ASSIGNMENT
- DISJUNCTION
- EQUALITY
- COMPARISON
- TERM
- FACTOR
- EXPONENT
- TIMES
- TIMESTAMPS
- SET_OPERATIONS
- JOIN_METHODS
- JOIN_SIDES
- JOIN_KINDS
- JOIN_HINTS
- LAMBDAS
- COLUMN_OPERATORS
- EXPRESSION_PARSERS
- STATEMENT_PARSERS
- UNARY_PARSERS
- STRING_PARSERS
- NUMERIC_PARSERS
- PRIMARY_PARSERS
- RANGE_PARSERS
- PROPERTY_PARSERS
- CONSTRAINT_PARSERS
- ALTER_PARSERS
- ALTER_ALTER_PARSERS
- SCHEMA_UNNAMED_CONSTRAINTS
- INVALID_FUNC_NAME_TOKENS
- KEY_VALUE_DEFINITIONS
- QUERY_MODIFIER_PARSERS
- SET_PARSERS
- SHOW_PARSERS
- TYPE_LITERAL_PARSERS
- DDL_SELECT_TOKENS
- PRE_VOLATILE_TOKENS
- TRANSACTION_KIND
- TRANSACTION_CHARACTERISTICS
- CONFLICT_ACTIONS
- CREATE_SEQUENCE
- ISOLATED_LOADING_OPTIONS
- USABLES
- CAST_ACTIONS
- SCHEMA_BINDING_OPTIONS
- KEY_CONSTRAINT_OPTIONS
- INSERT_ALTERNATIVES
- CLONE_KEYWORDS
- HISTORICAL_DATA_PREFIX
- HISTORICAL_DATA_KIND
- OPCLASS_FOLLOW_KEYWORDS
- OPTYPE_FOLLOW_TOKENS
- TABLE_INDEX_HINT_TOKENS
- VIEW_ATTRIBUTES
- WINDOW_ALIAS_TOKENS
- WINDOW_BEFORE_PAREN_TOKENS
- WINDOW_SIDES
- JSON_KEY_VALUE_SEPARATOR_TOKENS
- FETCH_TOKENS
- ADD_CONSTRAINT_TOKENS
- DISTINCT_TOKENS
- NULL_TOKENS
- UNNEST_OFFSET_ALIAS_TOKENS
- SELECT_START_TOKENS
- COPY_INTO_VARLEN_OPTIONS
- STRICT_CAST
- PREFIXED_PIVOT_COLUMNS
- IDENTIFY_PIVOT_STRINGS
- LOG_DEFAULTS_TO_LN
- ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN
- TABLESAMPLE_CSV
- DEFAULT_SAMPLING_METHOD
- SET_REQUIRES_ASSIGNMENT_DELIMITER
- TRIM_PATTERN_FIRST
- STRING_ALIASES
- MODIFIERS_ATTACHED_TO_SET_OP
- SET_OP_MODIFIERS
- NO_PAREN_IF_COMMANDS
- JSON_ARROWS_REQUIRE_JSON_TYPE
- COLON_IS_VARIANT_EXTRACT
- VALUES_FOLLOWED_BY_PAREN
- SUPPORTS_IMPLICIT_UNNEST
- INTERVAL_SPANS
- SUPPORTS_PARTITION_SELECTION
- error_level
- error_message_context
- max_errors
- dialect
- reset
- parse
- parse_into
- check_errors
- raise_error
- expression
- validate_expression
- errors
- sql
419 class Generator(generator.Generator): 420 PARAMETER_TOKEN = "$" 421 NAMED_PLACEHOLDER_TOKEN = "$" 422 JOIN_HINTS = False 423 TABLE_HINTS = False 424 QUERY_HINTS = False 425 LIMIT_FETCH = "LIMIT" 426 STRUCT_DELIMITER = ("(", ")") 427 RENAME_TABLE_WITH_DB = False 428 NVL2_SUPPORTED = False 429 SEMI_ANTI_JOIN_WITH_SIDE = False 430 TABLESAMPLE_KEYWORDS = "USING SAMPLE" 431 TABLESAMPLE_SEED_KEYWORD = "REPEATABLE" 432 LAST_DAY_SUPPORTS_DATE_PART = False 433 JSON_KEY_VALUE_PAIR_SEP = "," 434 IGNORE_NULLS_IN_FUNC = True 435 JSON_PATH_BRACKETED_KEY_SUPPORTED = False 436 SUPPORTS_CREATE_TABLE_LIKE = False 437 MULTI_ARG_DISTINCT = False 438 CAN_IMPLEMENT_ARRAY_ANY = True 439 SUPPORTS_TO_NUMBER = False 440 COPY_HAS_INTO_KEYWORD = False 441 STAR_EXCEPT = "EXCLUDE" 442 PAD_FILL_PATTERN_IS_REQUIRED = True 443 ARRAY_CONCAT_IS_VAR_LEN = False 444 445 TRANSFORMS = { 446 **generator.Generator.TRANSFORMS, 447 exp.ApproxDistinct: approx_count_distinct_sql, 448 exp.Array: inline_array_unless_query, 449 exp.ArrayContainsAll: rename_func("ARRAY_HAS_ALL"), 450 exp.ArrayFilter: rename_func("LIST_FILTER"), 451 exp.ArraySize: rename_func("ARRAY_LENGTH"), 452 exp.ArgMax: arg_max_or_min_no_count("ARG_MAX"), 453 exp.ArgMin: arg_max_or_min_no_count("ARG_MIN"), 454 exp.ArraySort: _array_sort_sql, 455 exp.ArraySum: rename_func("LIST_SUM"), 456 exp.BitwiseXor: rename_func("XOR"), 457 exp.CommentColumnConstraint: no_comment_column_constraint_sql, 458 exp.CurrentDate: lambda *_: "CURRENT_DATE", 459 exp.CurrentTime: lambda *_: "CURRENT_TIME", 460 exp.CurrentTimestamp: lambda *_: "CURRENT_TIMESTAMP", 461 exp.DayOfMonth: rename_func("DAYOFMONTH"), 462 exp.DayOfWeek: rename_func("DAYOFWEEK"), 463 exp.DayOfYear: rename_func("DAYOFYEAR"), 464 exp.DataType: _datatype_sql, 465 exp.Date: _date_sql, 466 exp.DateAdd: _date_delta_sql, 467 exp.DateFromParts: rename_func("MAKE_DATE"), 468 exp.DateSub: _date_delta_sql, 469 exp.DateDiff: _date_diff_sql, 470 exp.DateStrToDate: datestrtodate_sql, 471 exp.Datetime: no_datetime_sql, 472 exp.DatetimeSub: _date_delta_sql, 473 exp.DatetimeAdd: _date_delta_sql, 474 exp.DateToDi: lambda self, 475 e: f"CAST(STRFTIME({self.sql(e, 'this')}, {DuckDB.DATEINT_FORMAT}) AS INT)", 476 exp.Decode: lambda self, e: encode_decode_sql(self, e, "DECODE", replace=False), 477 exp.DiToDate: lambda self, 478 e: f"CAST(STRPTIME(CAST({self.sql(e, 'this')} AS TEXT), {DuckDB.DATEINT_FORMAT}) AS DATE)", 479 exp.Encode: lambda self, e: encode_decode_sql(self, e, "ENCODE", replace=False), 480 exp.Explode: rename_func("UNNEST"), 481 exp.IntDiv: lambda self, e: self.binary(e, "//"), 482 exp.IsInf: rename_func("ISINF"), 483 exp.IsNan: rename_func("ISNAN"), 484 exp.JSONExtract: _arrow_json_extract_sql, 485 exp.JSONExtractScalar: _arrow_json_extract_sql, 486 exp.JSONFormat: _json_format_sql, 487 exp.LogicalOr: rename_func("BOOL_OR"), 488 exp.LogicalAnd: rename_func("BOOL_AND"), 489 exp.MD5Digest: lambda self, e: self.func("UNHEX", self.func("MD5", e.this)), 490 exp.MonthsBetween: lambda self, e: self.func( 491 "DATEDIFF", 492 "'month'", 493 exp.cast(e.expression, exp.DataType.Type.TIMESTAMP, copy=True), 494 exp.cast(e.this, exp.DataType.Type.TIMESTAMP, copy=True), 495 ), 496 exp.PercentileCont: rename_func("QUANTILE_CONT"), 497 exp.PercentileDisc: rename_func("QUANTILE_DISC"), 498 # DuckDB doesn't allow qualified columns inside of PIVOT expressions. 499 # See: https://github.com/duckdb/duckdb/blob/671faf92411182f81dce42ac43de8bfb05d9909e/src/planner/binder/tableref/bind_pivot.cpp#L61-L62 500 exp.Pivot: transforms.preprocess([transforms.unqualify_columns]), 501 exp.RegexpExtract: regexp_extract_sql, 502 exp.RegexpReplace: lambda self, e: self.func( 503 "REGEXP_REPLACE", 504 e.this, 505 e.expression, 506 e.args.get("replacement"), 507 e.args.get("modifiers"), 508 ), 509 exp.RegexpLike: rename_func("REGEXP_MATCHES"), 510 exp.RegexpSplit: rename_func("STR_SPLIT_REGEX"), 511 exp.Return: lambda self, e: self.sql(e, "this"), 512 exp.ReturnsProperty: lambda self, e: "TABLE" if isinstance(e.this, exp.Schema) else "", 513 exp.Rand: rename_func("RANDOM"), 514 exp.SafeDivide: no_safe_divide_sql, 515 exp.Split: rename_func("STR_SPLIT"), 516 exp.SortArray: _sort_array_sql, 517 exp.StrPosition: str_position_sql, 518 exp.StrToUnix: lambda self, e: self.func( 519 "EPOCH", self.func("STRPTIME", e.this, self.format_time(e)) 520 ), 521 exp.Struct: _struct_sql, 522 exp.TimeAdd: _date_delta_sql, 523 exp.Time: no_time_sql, 524 exp.TimeDiff: _timediff_sql, 525 exp.Timestamp: no_timestamp_sql, 526 exp.TimestampDiff: lambda self, e: self.func( 527 "DATE_DIFF", exp.Literal.string(e.unit), e.expression, e.this 528 ), 529 exp.TimestampTrunc: timestamptrunc_sql(), 530 exp.TimeStrToDate: lambda self, e: self.sql(exp.cast(e.this, exp.DataType.Type.DATE)), 531 exp.TimeStrToTime: timestrtotime_sql, 532 exp.TimeStrToUnix: lambda self, e: self.func( 533 "EPOCH", exp.cast(e.this, exp.DataType.Type.TIMESTAMP) 534 ), 535 exp.TimeToStr: lambda self, e: self.func("STRFTIME", e.this, self.format_time(e)), 536 exp.TimeToUnix: rename_func("EPOCH"), 537 exp.TsOrDiToDi: lambda self, 538 e: f"CAST(SUBSTR(REPLACE(CAST({self.sql(e, 'this')} AS TEXT), '-', ''), 1, 8) AS INT)", 539 exp.TsOrDsAdd: _date_delta_sql, 540 exp.TsOrDsDiff: lambda self, e: self.func( 541 "DATE_DIFF", 542 f"'{e.args.get('unit') or 'DAY'}'", 543 exp.cast(e.expression, exp.DataType.Type.TIMESTAMP), 544 exp.cast(e.this, exp.DataType.Type.TIMESTAMP), 545 ), 546 exp.UnixToStr: lambda self, e: self.func( 547 "STRFTIME", self.func("TO_TIMESTAMP", e.this), self.format_time(e) 548 ), 549 exp.UnixToTime: _unix_to_time_sql, 550 exp.UnixToTimeStr: lambda self, e: f"CAST(TO_TIMESTAMP({self.sql(e, 'this')}) AS TEXT)", 551 exp.VariancePop: rename_func("VAR_POP"), 552 exp.WeekOfYear: rename_func("WEEKOFYEAR"), 553 exp.Xor: bool_xor_sql, 554 } 555 556 SUPPORTED_JSON_PATH_PARTS = { 557 exp.JSONPathKey, 558 exp.JSONPathRoot, 559 exp.JSONPathSubscript, 560 exp.JSONPathWildcard, 561 } 562 563 TYPE_MAPPING = { 564 **generator.Generator.TYPE_MAPPING, 565 exp.DataType.Type.BINARY: "BLOB", 566 exp.DataType.Type.BPCHAR: "TEXT", 567 exp.DataType.Type.CHAR: "TEXT", 568 exp.DataType.Type.FLOAT: "REAL", 569 exp.DataType.Type.NCHAR: "TEXT", 570 exp.DataType.Type.NVARCHAR: "TEXT", 571 exp.DataType.Type.UINT: "UINTEGER", 572 exp.DataType.Type.VARBINARY: "BLOB", 573 exp.DataType.Type.ROWVERSION: "BLOB", 574 exp.DataType.Type.VARCHAR: "TEXT", 575 exp.DataType.Type.TIMESTAMPNTZ: "TIMESTAMP", 576 exp.DataType.Type.TIMESTAMP_S: "TIMESTAMP_S", 577 exp.DataType.Type.TIMESTAMP_MS: "TIMESTAMP_MS", 578 exp.DataType.Type.TIMESTAMP_NS: "TIMESTAMP_NS", 579 } 580 581 # https://github.com/duckdb/duckdb/blob/ff7f24fd8e3128d94371827523dae85ebaf58713/third_party/libpg_query/grammar/keywords/reserved_keywords.list#L1-L77 582 RESERVED_KEYWORDS = { 583 "array", 584 "analyse", 585 "union", 586 "all", 587 "when", 588 "in_p", 589 "default", 590 "create_p", 591 "window", 592 "asymmetric", 593 "to", 594 "else", 595 "localtime", 596 "from", 597 "end_p", 598 "select", 599 "current_date", 600 "foreign", 601 "with", 602 "grant", 603 "session_user", 604 "or", 605 "except", 606 "references", 607 "fetch", 608 "limit", 609 "group_p", 610 "leading", 611 "into", 612 "collate", 613 "offset", 614 "do", 615 "then", 616 "localtimestamp", 617 "check_p", 618 "lateral_p", 619 "current_role", 620 "where", 621 "asc_p", 622 "placing", 623 "desc_p", 624 "user", 625 "unique", 626 "initially", 627 "column", 628 "both", 629 "some", 630 "as", 631 "any", 632 "only", 633 "deferrable", 634 "null_p", 635 "current_time", 636 "true_p", 637 "table", 638 "case", 639 "trailing", 640 "variadic", 641 "for", 642 "on", 643 "distinct", 644 "false_p", 645 "not", 646 "constraint", 647 "current_timestamp", 648 "returning", 649 "primary", 650 "intersect", 651 "having", 652 "analyze", 653 "current_user", 654 "and", 655 "cast", 656 "symmetric", 657 "using", 658 "order", 659 "current_catalog", 660 } 661 662 UNWRAPPED_INTERVAL_VALUES = (exp.Literal, exp.Paren) 663 664 # DuckDB doesn't generally support CREATE TABLE .. properties 665 # https://duckdb.org/docs/sql/statements/create_table.html 666 PROPERTIES_LOCATION = { 667 prop: exp.Properties.Location.UNSUPPORTED 668 for prop in generator.Generator.PROPERTIES_LOCATION 669 } 670 671 # There are a few exceptions (e.g. temporary tables) which are supported or 672 # can be transpiled to DuckDB, so we explicitly override them accordingly 673 PROPERTIES_LOCATION[exp.LikeProperty] = exp.Properties.Location.POST_SCHEMA 674 PROPERTIES_LOCATION[exp.TemporaryProperty] = exp.Properties.Location.POST_CREATE 675 PROPERTIES_LOCATION[exp.ReturnsProperty] = exp.Properties.Location.POST_ALIAS 676 677 def strtotime_sql(self, expression: exp.StrToTime) -> str: 678 if expression.args.get("safe"): 679 formatted_time = self.format_time(expression) 680 return f"CAST({self.func('TRY_STRPTIME', expression.this, formatted_time)} AS TIMESTAMP)" 681 return str_to_time_sql(self, expression) 682 683 def strtodate_sql(self, expression: exp.StrToDate) -> str: 684 if expression.args.get("safe"): 685 formatted_time = self.format_time(expression) 686 return f"CAST({self.func('TRY_STRPTIME', expression.this, formatted_time)} AS DATE)" 687 return f"CAST({str_to_time_sql(self, expression)} AS DATE)" 688 689 def parsejson_sql(self, expression: exp.ParseJSON) -> str: 690 arg = expression.this 691 if expression.args.get("safe"): 692 return self.sql(exp.case().when(exp.func("json_valid", arg), arg).else_(exp.null())) 693 return self.func("JSON", arg) 694 695 def timefromparts_sql(self, expression: exp.TimeFromParts) -> str: 696 nano = expression.args.get("nano") 697 if nano is not None: 698 expression.set( 699 "sec", expression.args["sec"] + nano.pop() / exp.Literal.number(1000000000.0) 700 ) 701 702 return rename_func("MAKE_TIME")(self, expression) 703 704 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 705 sec = expression.args["sec"] 706 707 milli = expression.args.get("milli") 708 if milli is not None: 709 sec += milli.pop() / exp.Literal.number(1000.0) 710 711 nano = expression.args.get("nano") 712 if nano is not None: 713 sec += nano.pop() / exp.Literal.number(1000000000.0) 714 715 if milli or nano: 716 expression.set("sec", sec) 717 718 return rename_func("MAKE_TIMESTAMP")(self, expression) 719 720 def tablesample_sql( 721 self, 722 expression: exp.TableSample, 723 sep: str = " AS ", 724 tablesample_keyword: t.Optional[str] = None, 725 ) -> str: 726 if not isinstance(expression.parent, exp.Select): 727 # This sample clause only applies to a single source, not the entire resulting relation 728 tablesample_keyword = "TABLESAMPLE" 729 730 if expression.args.get("size"): 731 method = expression.args.get("method") 732 if method and method.name.upper() != "RESERVOIR": 733 self.unsupported( 734 f"Sampling method {method} is not supported with a discrete sample count, " 735 "defaulting to reservoir sampling" 736 ) 737 expression.set("method", exp.var("RESERVOIR")) 738 739 return super().tablesample_sql( 740 expression, sep=sep, tablesample_keyword=tablesample_keyword 741 ) 742 743 def interval_sql(self, expression: exp.Interval) -> str: 744 multiplier: t.Optional[int] = None 745 unit = expression.text("unit").lower() 746 747 if unit.startswith("week"): 748 multiplier = 7 749 if unit.startswith("quarter"): 750 multiplier = 90 751 752 if multiplier: 753 return f"({multiplier} * {super().interval_sql(exp.Interval(this=expression.this, unit=exp.var('DAY')))})" 754 755 return super().interval_sql(expression) 756 757 def columndef_sql(self, expression: exp.ColumnDef, sep: str = " ") -> str: 758 if isinstance(expression.parent, exp.UserDefinedFunction): 759 return self.sql(expression, "this") 760 return super().columndef_sql(expression, sep) 761 762 def join_sql(self, expression: exp.Join) -> str: 763 if ( 764 expression.side == "LEFT" 765 and not expression.args.get("on") 766 and isinstance(expression.this, exp.Unnest) 767 ): 768 # Some dialects support `LEFT JOIN UNNEST(...)` without an explicit ON clause 769 # DuckDB doesn't, but we can just add a dummy ON clause that is always true 770 return super().join_sql(expression.on(exp.true())) 771 772 return super().join_sql(expression) 773 774 def generateseries_sql(self, expression: exp.GenerateSeries) -> str: 775 # GENERATE_SERIES(a, b) -> [a, b], RANGE(a, b) -> [a, b) 776 if expression.args.get("is_end_exclusive"): 777 return rename_func("RANGE")(self, expression) 778 779 return self.function_fallback_sql(expression) 780 781 def bracket_sql(self, expression: exp.Bracket) -> str: 782 this = expression.this 783 if isinstance(this, exp.Array): 784 this.replace(exp.paren(this)) 785 786 bracket = super().bracket_sql(expression) 787 788 if not expression.args.get("returns_list_for_maps"): 789 if not this.type: 790 from sqlglot.optimizer.annotate_types import annotate_types 791 792 this = annotate_types(this) 793 794 if this.is_type(exp.DataType.Type.MAP): 795 bracket = f"({bracket})[1]" 796 797 return bracket 798 799 def withingroup_sql(self, expression: exp.WithinGroup) -> str: 800 expression_sql = self.sql(expression, "expression") 801 802 func = expression.this 803 if isinstance(func, exp.PERCENTILES): 804 # Make the order key the first arg and slide the fraction to the right 805 # https://duckdb.org/docs/sql/aggregates#ordered-set-aggregate-functions 806 order_col = expression.find(exp.Ordered) 807 if order_col: 808 func.set("expression", func.this) 809 func.set("this", order_col.this) 810 811 this = self.sql(expression, "this").rstrip(")") 812 813 return f"{this}{expression_sql})" 814 815 def length_sql(self, expression: exp.Length) -> str: 816 arg = expression.this 817 818 # Dialects like BQ and Snowflake also accept binary values as args, so 819 # DDB will attempt to infer the type or resort to case/when resolution 820 if not expression.args.get("binary") or arg.is_string: 821 return self.func("LENGTH", arg) 822 823 if not arg.type: 824 from sqlglot.optimizer.annotate_types import annotate_types 825 826 arg = annotate_types(arg) 827 828 if arg.is_type(*exp.DataType.TEXT_TYPES): 829 return self.func("LENGTH", arg) 830 831 # We need these casts to make duckdb's static type checker happy 832 blob = exp.cast(arg, exp.DataType.Type.VARBINARY) 833 varchar = exp.cast(arg, exp.DataType.Type.VARCHAR) 834 835 case = ( 836 exp.case(self.func("TYPEOF", arg)) 837 .when( 838 "'VARCHAR'", exp.Anonymous(this="LENGTH", expressions=[varchar]) 839 ) # anonymous to break length_sql recursion 840 .when("'BLOB'", self.func("OCTET_LENGTH", blob)) 841 ) 842 843 return self.sql(case) 844 845 def objectinsert_sql(self, expression: exp.ObjectInsert) -> str: 846 this = expression.this 847 key = expression.args.get("key") 848 key_sql = key.name if isinstance(key, exp.Expression) else "" 849 value_sql = self.sql(expression, "value") 850 851 kv_sql = f"{key_sql} := {value_sql}" 852 853 # If the input struct is empty e.g. transpiling OBJECT_INSERT(OBJECT_CONSTRUCT(), key, value) from Snowflake 854 # then we can generate STRUCT_PACK which will build it since STRUCT_INSERT({}, key := value) is not valid DuckDB 855 if isinstance(this, exp.Struct) and not this.expressions: 856 return self.func("STRUCT_PACK", kv_sql) 857 858 return self.func("STRUCT_INSERT", this, kv_sql) 859 860 def generatedatearray_sql(self, expression: exp.GenerateDateArray) -> str: 861 start = _implicit_date_cast(expression.args.get("start")) 862 end = _implicit_date_cast(expression.args.get("end")) 863 864 # BQ's GENERATE_DATE_ARRAY is transformed to DuckDB'S GENERATE_SERIES 865 gen_series = exp.GenerateSeries( 866 start=start, end=end, step=expression.args.get("interval") 867 ) 868 869 # The result is TIMESTAMP array, so to match BQ's semantics we must cast it back to DATE array 870 return self.sql(exp.cast(gen_series, exp.DataType.build("ARRAY<DATE>")))
Generator converts a given syntax tree to the corresponding SQL string.
Arguments:
- pretty: Whether to format the produced SQL string. Default: False.
- identify: Determines when an identifier should be quoted. Possible values are: False (default): Never quote, except in cases where it's mandatory by the dialect. True or 'always': Always quote. 'safe': Only quote identifiers that are case insensitive.
- normalize: Whether to normalize identifiers to lowercase. Default: False.
- pad: The pad size in a formatted string. For example, this affects the indentation of a projection in a query, relative to its nesting level. Default: 2.
- indent: The indentation size in a formatted string. For example, this affects the
indentation of subqueries and filters under a
WHERE
clause. Default: 2. - normalize_functions: How to normalize function names. Possible values are: "upper" or True (default): Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
- unsupported_level: Determines the generator's behavior when it encounters unsupported expressions. Default ErrorLevel.WARN.
- max_unsupported: Maximum number of unsupported messages to include in a raised UnsupportedError. This is only relevant if unsupported_level is ErrorLevel.RAISE. Default: 3
- leading_comma: Whether the comma is leading or trailing in select expressions. This is only relevant when generating in pretty mode. Default: False
- max_text_width: The max number of characters in a segment before creating new lines in pretty mode. The default is on the smaller end because the length only represents a segment and not the true line length. Default: 80
- comments: Whether to preserve comments in the output SQL code. Default: True
683 def strtodate_sql(self, expression: exp.StrToDate) -> str: 684 if expression.args.get("safe"): 685 formatted_time = self.format_time(expression) 686 return f"CAST({self.func('TRY_STRPTIME', expression.this, formatted_time)} AS DATE)" 687 return f"CAST({str_to_time_sql(self, expression)} AS DATE)"
695 def timefromparts_sql(self, expression: exp.TimeFromParts) -> str: 696 nano = expression.args.get("nano") 697 if nano is not None: 698 expression.set( 699 "sec", expression.args["sec"] + nano.pop() / exp.Literal.number(1000000000.0) 700 ) 701 702 return rename_func("MAKE_TIME")(self, expression)
704 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 705 sec = expression.args["sec"] 706 707 milli = expression.args.get("milli") 708 if milli is not None: 709 sec += milli.pop() / exp.Literal.number(1000.0) 710 711 nano = expression.args.get("nano") 712 if nano is not None: 713 sec += nano.pop() / exp.Literal.number(1000000000.0) 714 715 if milli or nano: 716 expression.set("sec", sec) 717 718 return rename_func("MAKE_TIMESTAMP")(self, expression)
720 def tablesample_sql( 721 self, 722 expression: exp.TableSample, 723 sep: str = " AS ", 724 tablesample_keyword: t.Optional[str] = None, 725 ) -> str: 726 if not isinstance(expression.parent, exp.Select): 727 # This sample clause only applies to a single source, not the entire resulting relation 728 tablesample_keyword = "TABLESAMPLE" 729 730 if expression.args.get("size"): 731 method = expression.args.get("method") 732 if method and method.name.upper() != "RESERVOIR": 733 self.unsupported( 734 f"Sampling method {method} is not supported with a discrete sample count, " 735 "defaulting to reservoir sampling" 736 ) 737 expression.set("method", exp.var("RESERVOIR")) 738 739 return super().tablesample_sql( 740 expression, sep=sep, tablesample_keyword=tablesample_keyword 741 )
743 def interval_sql(self, expression: exp.Interval) -> str: 744 multiplier: t.Optional[int] = None 745 unit = expression.text("unit").lower() 746 747 if unit.startswith("week"): 748 multiplier = 7 749 if unit.startswith("quarter"): 750 multiplier = 90 751 752 if multiplier: 753 return f"({multiplier} * {super().interval_sql(exp.Interval(this=expression.this, unit=exp.var('DAY')))})" 754 755 return super().interval_sql(expression)
762 def join_sql(self, expression: exp.Join) -> str: 763 if ( 764 expression.side == "LEFT" 765 and not expression.args.get("on") 766 and isinstance(expression.this, exp.Unnest) 767 ): 768 # Some dialects support `LEFT JOIN UNNEST(...)` without an explicit ON clause 769 # DuckDB doesn't, but we can just add a dummy ON clause that is always true 770 return super().join_sql(expression.on(exp.true())) 771 772 return super().join_sql(expression)
781 def bracket_sql(self, expression: exp.Bracket) -> str: 782 this = expression.this 783 if isinstance(this, exp.Array): 784 this.replace(exp.paren(this)) 785 786 bracket = super().bracket_sql(expression) 787 788 if not expression.args.get("returns_list_for_maps"): 789 if not this.type: 790 from sqlglot.optimizer.annotate_types import annotate_types 791 792 this = annotate_types(this) 793 794 if this.is_type(exp.DataType.Type.MAP): 795 bracket = f"({bracket})[1]" 796 797 return bracket
799 def withingroup_sql(self, expression: exp.WithinGroup) -> str: 800 expression_sql = self.sql(expression, "expression") 801 802 func = expression.this 803 if isinstance(func, exp.PERCENTILES): 804 # Make the order key the first arg and slide the fraction to the right 805 # https://duckdb.org/docs/sql/aggregates#ordered-set-aggregate-functions 806 order_col = expression.find(exp.Ordered) 807 if order_col: 808 func.set("expression", func.this) 809 func.set("this", order_col.this) 810 811 this = self.sql(expression, "this").rstrip(")") 812 813 return f"{this}{expression_sql})"
815 def length_sql(self, expression: exp.Length) -> str: 816 arg = expression.this 817 818 # Dialects like BQ and Snowflake also accept binary values as args, so 819 # DDB will attempt to infer the type or resort to case/when resolution 820 if not expression.args.get("binary") or arg.is_string: 821 return self.func("LENGTH", arg) 822 823 if not arg.type: 824 from sqlglot.optimizer.annotate_types import annotate_types 825 826 arg = annotate_types(arg) 827 828 if arg.is_type(*exp.DataType.TEXT_TYPES): 829 return self.func("LENGTH", arg) 830 831 # We need these casts to make duckdb's static type checker happy 832 blob = exp.cast(arg, exp.DataType.Type.VARBINARY) 833 varchar = exp.cast(arg, exp.DataType.Type.VARCHAR) 834 835 case = ( 836 exp.case(self.func("TYPEOF", arg)) 837 .when( 838 "'VARCHAR'", exp.Anonymous(this="LENGTH", expressions=[varchar]) 839 ) # anonymous to break length_sql recursion 840 .when("'BLOB'", self.func("OCTET_LENGTH", blob)) 841 ) 842 843 return self.sql(case)
845 def objectinsert_sql(self, expression: exp.ObjectInsert) -> str: 846 this = expression.this 847 key = expression.args.get("key") 848 key_sql = key.name if isinstance(key, exp.Expression) else "" 849 value_sql = self.sql(expression, "value") 850 851 kv_sql = f"{key_sql} := {value_sql}" 852 853 # If the input struct is empty e.g. transpiling OBJECT_INSERT(OBJECT_CONSTRUCT(), key, value) from Snowflake 854 # then we can generate STRUCT_PACK which will build it since STRUCT_INSERT({}, key := value) is not valid DuckDB 855 if isinstance(this, exp.Struct) and not this.expressions: 856 return self.func("STRUCT_PACK", kv_sql) 857 858 return self.func("STRUCT_INSERT", this, kv_sql)
860 def generatedatearray_sql(self, expression: exp.GenerateDateArray) -> str: 861 start = _implicit_date_cast(expression.args.get("start")) 862 end = _implicit_date_cast(expression.args.get("end")) 863 864 # BQ's GENERATE_DATE_ARRAY is transformed to DuckDB'S GENERATE_SERIES 865 gen_series = exp.GenerateSeries( 866 start=start, end=end, step=expression.args.get("interval") 867 ) 868 869 # The result is TIMESTAMP array, so to match BQ's semantics we must cast it back to DATE array 870 return self.sql(exp.cast(gen_series, exp.DataType.build("ARRAY<DATE>")))
Inherited Members
- sqlglot.generator.Generator
- Generator
- NULL_ORDERING_SUPPORTED
- LOCKING_READS_SUPPORTED
- EXPLICIT_SET_OP
- WRAP_DERIVED_VALUES
- CREATE_FUNCTION_RETURN_AS
- MATCHED_BY_SOURCE
- SINGLE_STRING_INTERVAL
- INTERVAL_ALLOWS_PLURAL_FORM
- LIMIT_ONLY_LITERALS
- GROUPINGS_SEP
- INDEX_ON
- QUERY_HINT_SEP
- IS_BOOL_ALLOWED
- DUPLICATE_KEY_UPDATE_WITH_SET
- LIMIT_IS_TOP
- RETURNING_END
- EXTRACT_ALLOWS_QUOTES
- TZ_TO_WITH_TIME_ZONE
- VALUES_AS_TABLE
- ALTER_TABLE_INCLUDE_COLUMN_KEYWORD
- UNNEST_WITH_ORDINALITY
- AGGREGATE_FILTER_SUPPORTED
- COMPUTED_COLUMN_WITH_TYPE
- SUPPORTS_TABLE_COPY
- TABLESAMPLE_REQUIRES_PARENS
- TABLESAMPLE_SIZE_IS_ROWS
- TABLESAMPLE_WITH_METHOD
- COLLATE_IS_FUNC
- DATA_TYPE_SPECIFIERS_ALLOWED
- ENSURE_BOOLS
- CTE_RECURSIVE_KEYWORD_REQUIRED
- SUPPORTS_SINGLE_ARG_CONCAT
- SUPPORTS_TABLE_ALIAS_COLUMNS
- UNPIVOT_ALIASES_ARE_IDENTIFIERS
- INSERT_OVERWRITE
- SUPPORTS_SELECT_INTO
- SUPPORTS_UNLOGGED_TABLES
- LIKE_PROPERTY_INSIDE_SCHEMA
- JSON_TYPE_REQUIRED_FOR_EXTRACTION
- JSON_PATH_SINGLE_QUOTE_ESCAPE
- SET_OP_MODIFIERS
- COPY_PARAMS_ARE_WRAPPED
- COPY_PARAMS_EQ_REQUIRED
- HEX_FUNC
- WITH_PROPERTIES_PREFIX
- QUOTE_JSON_PATH
- SUPPORTS_EXPLODING_PROJECTIONS
- PARSE_JSON_NAME
- TIME_PART_SINGULARS
- TOKEN_MAPPING
- WITH_SEPARATED_COMMENTS
- EXCLUDE_COMMENTS
- PARAMETERIZABLE_TEXT_TYPES
- EXPRESSIONS_WITHOUT_NESTED_CTES
- SENTINEL_LINE_BREAK
- pretty
- identify
- normalize
- pad
- unsupported_level
- max_unsupported
- leading_comma
- max_text_width
- comments
- dialect
- normalize_functions
- unsupported_messages
- generate
- preprocess
- unsupported
- sep
- seg
- pad_comment
- maybe_comment
- wrap
- no_identify
- normalize_func
- indent
- sql
- uncache_sql
- cache_sql
- characterset_sql
- column_parts
- column_sql
- columnposition_sql
- columnconstraint_sql
- computedcolumnconstraint_sql
- autoincrementcolumnconstraint_sql
- compresscolumnconstraint_sql
- generatedasidentitycolumnconstraint_sql
- generatedasrowcolumnconstraint_sql
- periodforsystemtimeconstraint_sql
- notnullcolumnconstraint_sql
- transformcolumnconstraint_sql
- primarykeycolumnconstraint_sql
- uniquecolumnconstraint_sql
- createable_sql
- create_sql
- sequenceproperties_sql
- clone_sql
- describe_sql
- heredoc_sql
- prepend_ctes
- with_sql
- cte_sql
- tablealias_sql
- bitstring_sql
- hexstring_sql
- bytestring_sql
- unicodestring_sql
- rawstring_sql
- datatypeparam_sql
- datatype_sql
- directory_sql
- delete_sql
- drop_sql
- except_sql
- except_op
- fetch_sql
- filter_sql
- hint_sql
- indexparameters_sql
- index_sql
- identifier_sql
- hex_sql
- lowerhex_sql
- inputoutputformat_sql
- national_sql
- partition_sql
- properties_sql
- root_properties
- properties
- with_properties
- locate_properties
- property_name
- property_sql
- likeproperty_sql
- fallbackproperty_sql
- journalproperty_sql
- freespaceproperty_sql
- checksumproperty_sql
- mergeblockratioproperty_sql
- datablocksizeproperty_sql
- blockcompressionproperty_sql
- isolatedloadingproperty_sql
- partitionboundspec_sql
- partitionedofproperty_sql
- lockingproperty_sql
- withdataproperty_sql
- withsystemversioningproperty_sql
- insert_sql
- intersect_sql
- intersect_op
- introducer_sql
- kill_sql
- pseudotype_sql
- objectidentifier_sql
- onconflict_sql
- returning_sql
- rowformatdelimitedproperty_sql
- withtablehint_sql
- indextablehint_sql
- historicaldata_sql
- table_parts
- table_sql
- pivot_sql
- version_sql
- tuple_sql
- update_sql
- values_sql
- var_sql
- into_sql
- from_sql
- group_sql
- having_sql
- connect_sql
- prior_sql
- lambda_sql
- lateral_op
- lateral_sql
- limit_sql
- offset_sql
- setitem_sql
- set_sql
- pragma_sql
- lock_sql
- literal_sql
- escape_str
- loaddata_sql
- null_sql
- boolean_sql
- order_sql
- withfill_sql
- cluster_sql
- distribute_sql
- sort_sql
- ordered_sql
- matchrecognizemeasure_sql
- matchrecognize_sql
- query_modifiers
- options_modifier
- queryoption_sql
- offset_limit_modifiers
- after_limit_modifiers
- select_sql
- schema_sql
- schema_columns_sql
- star_sql
- parameter_sql
- sessionparameter_sql
- placeholder_sql
- subquery_sql
- qualify_sql
- set_operations
- union_sql
- union_op
- unnest_sql
- prewhere_sql
- where_sql
- window_sql
- partition_by_sql
- windowspec_sql
- between_sql
- bracket_offset_expressions
- all_sql
- any_sql
- exists_sql
- case_sql
- constraint_sql
- nextvaluefor_sql
- extract_sql
- trim_sql
- convert_concat_args
- concat_sql
- concatws_sql
- check_sql
- foreignkey_sql
- primarykey_sql
- if_sql
- matchagainst_sql
- jsonkeyvalue_sql
- jsonpath_sql
- json_path_part
- formatjson_sql
- jsonobject_sql
- jsonobjectagg_sql
- jsonarray_sql
- jsonarrayagg_sql
- jsoncolumndef_sql
- jsonschema_sql
- jsontable_sql
- openjsoncolumndef_sql
- openjson_sql
- in_sql
- in_unnest_op
- return_sql
- reference_sql
- anonymous_sql
- paren_sql
- neg_sql
- not_sql
- alias_sql
- pivotalias_sql
- aliases_sql
- atindex_sql
- attimezone_sql
- fromtimezone_sql
- add_sql
- and_sql
- or_sql
- xor_sql
- connector_sql
- bitwiseand_sql
- bitwiseleftshift_sql
- bitwisenot_sql
- bitwiseor_sql
- bitwiserightshift_sql
- bitwisexor_sql
- cast_sql
- currentdate_sql
- collate_sql
- command_sql
- comment_sql
- mergetreettlaction_sql
- mergetreettl_sql
- transaction_sql
- commit_sql
- rollback_sql
- altercolumn_sql
- alterdiststyle_sql
- altersortkey_sql
- renametable_sql
- renamecolumn_sql
- alterset_sql
- altertable_sql
- add_column_sql
- droppartition_sql
- addconstraint_sql
- distinct_sql
- ignorenulls_sql
- respectnulls_sql
- havingmax_sql
- intdiv_sql
- dpipe_sql
- div_sql
- overlaps_sql
- distance_sql
- dot_sql
- eq_sql
- propertyeq_sql
- escape_sql
- glob_sql
- gt_sql
- gte_sql
- ilike_sql
- ilikeany_sql
- is_sql
- like_sql
- likeany_sql
- similarto_sql
- lt_sql
- lte_sql
- mod_sql
- mul_sql
- neq_sql
- nullsafeeq_sql
- nullsafeneq_sql
- slice_sql
- sub_sql
- trycast_sql
- try_sql
- log_sql
- use_sql
- binary
- function_fallback_sql
- func
- format_args
- too_wide
- format_time
- expressions
- op_expressions
- naked_property
- tag_sql
- token_sql
- userdefinedfunction_sql
- joinhint_sql
- kwarg_sql
- when_sql
- merge_sql
- tochar_sql
- tonumber_sql
- dictproperty_sql
- dictrange_sql
- dictsubproperty_sql
- oncluster_sql
- clusteredbyproperty_sql
- anyvalue_sql
- querytransform_sql
- indexconstraintoption_sql
- checkcolumnconstraint_sql
- indexcolumnconstraint_sql
- nvl2_sql
- comprehension_sql
- columnprefix_sql
- opclass_sql
- predict_sql
- forin_sql
- refresh_sql
- operator_sql
- toarray_sql
- tsordstotime_sql
- tsordstotimestamp_sql
- tsordstodate_sql
- unixdate_sql
- lastday_sql
- dateadd_sql
- arrayany_sql
- struct_sql
- partitionrange_sql
- truncatetable_sql
- convert_sql
- copyparameter_sql
- credentials_sql
- copy_sql
- semicolon_sql
- datadeletionproperty_sql
- maskingpolicycolumnconstraint_sql
- gapfill_sql
- scope_resolution
- scoperesolution_sql
- rand_sql
- changes_sql
- pad_sql
- summarize_sql
- explodinggenerateseries_sql
- arrayconcat_sql