Edit on GitHub

sqlglot.dialects.dialect

   1from __future__ import annotations
   2
   3import logging
   4import typing as t
   5from enum import Enum, auto
   6from functools import reduce
   7
   8from sqlglot import exp
   9from sqlglot.errors import ParseError
  10from sqlglot.generator import Generator, unsupported_args
  11from sqlglot.helper import AutoName, flatten, is_int, seq_get, subclasses
  12from sqlglot.jsonpath import JSONPathTokenizer, parse as parse_json_path
  13from sqlglot.parser import Parser
  14from sqlglot.time import TIMEZONES, format_time, subsecond_precision
  15from sqlglot.tokens import Token, Tokenizer, TokenType
  16from sqlglot.trie import new_trie
  17
  18DATE_ADD_OR_DIFF = t.Union[exp.DateAdd, exp.TsOrDsAdd, exp.DateDiff, exp.TsOrDsDiff]
  19DATE_ADD_OR_SUB = t.Union[exp.DateAdd, exp.TsOrDsAdd, exp.DateSub]
  20JSON_EXTRACT_TYPE = t.Union[exp.JSONExtract, exp.JSONExtractScalar]
  21
  22
  23if t.TYPE_CHECKING:
  24    from sqlglot._typing import B, E, F
  25
  26    from sqlglot.optimizer.annotate_types import TypeAnnotator
  27
  28    AnnotatorsType = t.Dict[t.Type[E], t.Callable[[TypeAnnotator, E], E]]
  29
  30logger = logging.getLogger("sqlglot")
  31
  32UNESCAPED_SEQUENCES = {
  33    "\\a": "\a",
  34    "\\b": "\b",
  35    "\\f": "\f",
  36    "\\n": "\n",
  37    "\\r": "\r",
  38    "\\t": "\t",
  39    "\\v": "\v",
  40    "\\\\": "\\",
  41}
  42
  43
  44def _annotate_with_type_lambda(data_type: exp.DataType.Type) -> t.Callable[[TypeAnnotator, E], E]:
  45    return lambda self, e: self._annotate_with_type(e, data_type)
  46
  47
  48class Dialects(str, Enum):
  49    """Dialects supported by SQLGLot."""
  50
  51    DIALECT = ""
  52
  53    ATHENA = "athena"
  54    BIGQUERY = "bigquery"
  55    CLICKHOUSE = "clickhouse"
  56    DATABRICKS = "databricks"
  57    DORIS = "doris"
  58    DRILL = "drill"
  59    DUCKDB = "duckdb"
  60    HIVE = "hive"
  61    MATERIALIZE = "materialize"
  62    MYSQL = "mysql"
  63    ORACLE = "oracle"
  64    POSTGRES = "postgres"
  65    PRESTO = "presto"
  66    PRQL = "prql"
  67    REDSHIFT = "redshift"
  68    RISINGWAVE = "risingwave"
  69    SNOWFLAKE = "snowflake"
  70    SPARK = "spark"
  71    SPARK2 = "spark2"
  72    SQLITE = "sqlite"
  73    STARROCKS = "starrocks"
  74    TABLEAU = "tableau"
  75    TERADATA = "teradata"
  76    TRINO = "trino"
  77    TSQL = "tsql"
  78
  79
  80class NormalizationStrategy(str, AutoName):
  81    """Specifies the strategy according to which identifiers should be normalized."""
  82
  83    LOWERCASE = auto()
  84    """Unquoted identifiers are lowercased."""
  85
  86    UPPERCASE = auto()
  87    """Unquoted identifiers are uppercased."""
  88
  89    CASE_SENSITIVE = auto()
  90    """Always case-sensitive, regardless of quotes."""
  91
  92    CASE_INSENSITIVE = auto()
  93    """Always case-insensitive, regardless of quotes."""
  94
  95
  96class _Dialect(type):
  97    classes: t.Dict[str, t.Type[Dialect]] = {}
  98
  99    def __eq__(cls, other: t.Any) -> bool:
 100        if cls is other:
 101            return True
 102        if isinstance(other, str):
 103            return cls is cls.get(other)
 104        if isinstance(other, Dialect):
 105            return cls is type(other)
 106
 107        return False
 108
 109    def __hash__(cls) -> int:
 110        return hash(cls.__name__.lower())
 111
 112    @classmethod
 113    def __getitem__(cls, key: str) -> t.Type[Dialect]:
 114        return cls.classes[key]
 115
 116    @classmethod
 117    def get(
 118        cls, key: str, default: t.Optional[t.Type[Dialect]] = None
 119    ) -> t.Optional[t.Type[Dialect]]:
 120        return cls.classes.get(key, default)
 121
 122    def __new__(cls, clsname, bases, attrs):
 123        klass = super().__new__(cls, clsname, bases, attrs)
 124        enum = Dialects.__members__.get(clsname.upper())
 125        cls.classes[enum.value if enum is not None else clsname.lower()] = klass
 126
 127        klass.TIME_TRIE = new_trie(klass.TIME_MAPPING)
 128        klass.FORMAT_TRIE = (
 129            new_trie(klass.FORMAT_MAPPING) if klass.FORMAT_MAPPING else klass.TIME_TRIE
 130        )
 131        klass.INVERSE_TIME_MAPPING = {v: k for k, v in klass.TIME_MAPPING.items()}
 132        klass.INVERSE_TIME_TRIE = new_trie(klass.INVERSE_TIME_MAPPING)
 133        klass.INVERSE_FORMAT_MAPPING = {v: k for k, v in klass.FORMAT_MAPPING.items()}
 134        klass.INVERSE_FORMAT_TRIE = new_trie(klass.INVERSE_FORMAT_MAPPING)
 135
 136        klass.INVERSE_CREATABLE_KIND_MAPPING = {
 137            v: k for k, v in klass.CREATABLE_KIND_MAPPING.items()
 138        }
 139
 140        base = seq_get(bases, 0)
 141        base_tokenizer = (getattr(base, "tokenizer_class", Tokenizer),)
 142        base_jsonpath_tokenizer = (getattr(base, "jsonpath_tokenizer_class", JSONPathTokenizer),)
 143        base_parser = (getattr(base, "parser_class", Parser),)
 144        base_generator = (getattr(base, "generator_class", Generator),)
 145
 146        klass.tokenizer_class = klass.__dict__.get(
 147            "Tokenizer", type("Tokenizer", base_tokenizer, {})
 148        )
 149        klass.jsonpath_tokenizer_class = klass.__dict__.get(
 150            "JSONPathTokenizer", type("JSONPathTokenizer", base_jsonpath_tokenizer, {})
 151        )
 152        klass.parser_class = klass.__dict__.get("Parser", type("Parser", base_parser, {}))
 153        klass.generator_class = klass.__dict__.get(
 154            "Generator", type("Generator", base_generator, {})
 155        )
 156
 157        klass.QUOTE_START, klass.QUOTE_END = list(klass.tokenizer_class._QUOTES.items())[0]
 158        klass.IDENTIFIER_START, klass.IDENTIFIER_END = list(
 159            klass.tokenizer_class._IDENTIFIERS.items()
 160        )[0]
 161
 162        def get_start_end(token_type: TokenType) -> t.Tuple[t.Optional[str], t.Optional[str]]:
 163            return next(
 164                (
 165                    (s, e)
 166                    for s, (e, t) in klass.tokenizer_class._FORMAT_STRINGS.items()
 167                    if t == token_type
 168                ),
 169                (None, None),
 170            )
 171
 172        klass.BIT_START, klass.BIT_END = get_start_end(TokenType.BIT_STRING)
 173        klass.HEX_START, klass.HEX_END = get_start_end(TokenType.HEX_STRING)
 174        klass.BYTE_START, klass.BYTE_END = get_start_end(TokenType.BYTE_STRING)
 175        klass.UNICODE_START, klass.UNICODE_END = get_start_end(TokenType.UNICODE_STRING)
 176
 177        if "\\" in klass.tokenizer_class.STRING_ESCAPES:
 178            klass.UNESCAPED_SEQUENCES = {
 179                **UNESCAPED_SEQUENCES,
 180                **klass.UNESCAPED_SEQUENCES,
 181            }
 182
 183        klass.ESCAPED_SEQUENCES = {v: k for k, v in klass.UNESCAPED_SEQUENCES.items()}
 184
 185        klass.SUPPORTS_COLUMN_JOIN_MARKS = "(+)" in klass.tokenizer_class.KEYWORDS
 186
 187        if enum not in ("", "bigquery"):
 188            klass.generator_class.SELECT_KINDS = ()
 189
 190        if enum not in ("", "athena", "presto", "trino"):
 191            klass.generator_class.TRY_SUPPORTED = False
 192            klass.generator_class.SUPPORTS_UESCAPE = False
 193
 194        if enum not in ("", "databricks", "hive", "spark", "spark2"):
 195            modifier_transforms = klass.generator_class.AFTER_HAVING_MODIFIER_TRANSFORMS.copy()
 196            for modifier in ("cluster", "distribute", "sort"):
 197                modifier_transforms.pop(modifier, None)
 198
 199            klass.generator_class.AFTER_HAVING_MODIFIER_TRANSFORMS = modifier_transforms
 200
 201        if enum not in ("", "doris", "mysql"):
 202            klass.parser_class.ID_VAR_TOKENS = klass.parser_class.ID_VAR_TOKENS | {
 203                TokenType.STRAIGHT_JOIN,
 204            }
 205            klass.parser_class.TABLE_ALIAS_TOKENS = klass.parser_class.TABLE_ALIAS_TOKENS | {
 206                TokenType.STRAIGHT_JOIN,
 207            }
 208
 209        if not klass.SUPPORTS_SEMI_ANTI_JOIN:
 210            klass.parser_class.TABLE_ALIAS_TOKENS = klass.parser_class.TABLE_ALIAS_TOKENS | {
 211                TokenType.ANTI,
 212                TokenType.SEMI,
 213            }
 214
 215        return klass
 216
 217
 218class Dialect(metaclass=_Dialect):
 219    INDEX_OFFSET = 0
 220    """The base index offset for arrays."""
 221
 222    WEEK_OFFSET = 0
 223    """First day of the week in DATE_TRUNC(week). Defaults to 0 (Monday). -1 would be Sunday."""
 224
 225    UNNEST_COLUMN_ONLY = False
 226    """Whether `UNNEST` table aliases are treated as column aliases."""
 227
 228    ALIAS_POST_TABLESAMPLE = False
 229    """Whether the table alias comes after tablesample."""
 230
 231    TABLESAMPLE_SIZE_IS_PERCENT = False
 232    """Whether a size in the table sample clause represents percentage."""
 233
 234    NORMALIZATION_STRATEGY = NormalizationStrategy.LOWERCASE
 235    """Specifies the strategy according to which identifiers should be normalized."""
 236
 237    IDENTIFIERS_CAN_START_WITH_DIGIT = False
 238    """Whether an unquoted identifier can start with a digit."""
 239
 240    DPIPE_IS_STRING_CONCAT = True
 241    """Whether the DPIPE token (`||`) is a string concatenation operator."""
 242
 243    STRICT_STRING_CONCAT = False
 244    """Whether `CONCAT`'s arguments must be strings."""
 245
 246    SUPPORTS_USER_DEFINED_TYPES = True
 247    """Whether user-defined data types are supported."""
 248
 249    SUPPORTS_SEMI_ANTI_JOIN = True
 250    """Whether `SEMI` or `ANTI` joins are supported."""
 251
 252    SUPPORTS_COLUMN_JOIN_MARKS = False
 253    """Whether the old-style outer join (+) syntax is supported."""
 254
 255    COPY_PARAMS_ARE_CSV = True
 256    """Separator of COPY statement parameters."""
 257
 258    NORMALIZE_FUNCTIONS: bool | str = "upper"
 259    """
 260    Determines how function names are going to be normalized.
 261    Possible values:
 262        "upper" or True: Convert names to uppercase.
 263        "lower": Convert names to lowercase.
 264        False: Disables function name normalization.
 265    """
 266
 267    LOG_BASE_FIRST: t.Optional[bool] = True
 268    """
 269    Whether the base comes first in the `LOG` function.
 270    Possible values: `True`, `False`, `None` (two arguments are not supported by `LOG`)
 271    """
 272
 273    NULL_ORDERING = "nulls_are_small"
 274    """
 275    Default `NULL` ordering method to use if not explicitly set.
 276    Possible values: `"nulls_are_small"`, `"nulls_are_large"`, `"nulls_are_last"`
 277    """
 278
 279    TYPED_DIVISION = False
 280    """
 281    Whether the behavior of `a / b` depends on the types of `a` and `b`.
 282    False means `a / b` is always float division.
 283    True means `a / b` is integer division if both `a` and `b` are integers.
 284    """
 285
 286    SAFE_DIVISION = False
 287    """Whether division by zero throws an error (`False`) or returns NULL (`True`)."""
 288
 289    CONCAT_COALESCE = False
 290    """A `NULL` arg in `CONCAT` yields `NULL` by default, but in some dialects it yields an empty string."""
 291
 292    HEX_LOWERCASE = False
 293    """Whether the `HEX` function returns a lowercase hexadecimal string."""
 294
 295    DATE_FORMAT = "'%Y-%m-%d'"
 296    DATEINT_FORMAT = "'%Y%m%d'"
 297    TIME_FORMAT = "'%Y-%m-%d %H:%M:%S'"
 298
 299    TIME_MAPPING: t.Dict[str, str] = {}
 300    """Associates this dialect's time formats with their equivalent Python `strftime` formats."""
 301
 302    # https://cloud.google.com/bigquery/docs/reference/standard-sql/format-elements#format_model_rules_date_time
 303    # https://docs.teradata.com/r/Teradata-Database-SQL-Functions-Operators-Expressions-and-Predicates/March-2017/Data-Type-Conversions/Character-to-DATE-Conversion/Forcing-a-FORMAT-on-CAST-for-Converting-Character-to-DATE
 304    FORMAT_MAPPING: t.Dict[str, str] = {}
 305    """
 306    Helper which is used for parsing the special syntax `CAST(x AS DATE FORMAT 'yyyy')`.
 307    If empty, the corresponding trie will be constructed off of `TIME_MAPPING`.
 308    """
 309
 310    UNESCAPED_SEQUENCES: t.Dict[str, str] = {}
 311    """Mapping of an escaped sequence (`\\n`) to its unescaped version (`\n`)."""
 312
 313    PSEUDOCOLUMNS: t.Set[str] = set()
 314    """
 315    Columns that are auto-generated by the engine corresponding to this dialect.
 316    For example, such columns may be excluded from `SELECT *` queries.
 317    """
 318
 319    PREFER_CTE_ALIAS_COLUMN = False
 320    """
 321    Some dialects, such as Snowflake, allow you to reference a CTE column alias in the
 322    HAVING clause of the CTE. This flag will cause the CTE alias columns to override
 323    any projection aliases in the subquery.
 324
 325    For example,
 326        WITH y(c) AS (
 327            SELECT SUM(a) FROM (SELECT 1 a) AS x HAVING c > 0
 328        ) SELECT c FROM y;
 329
 330        will be rewritten as
 331
 332        WITH y(c) AS (
 333            SELECT SUM(a) AS c FROM (SELECT 1 AS a) AS x HAVING c > 0
 334        ) SELECT c FROM y;
 335    """
 336
 337    COPY_PARAMS_ARE_CSV = True
 338    """
 339    Whether COPY statement parameters are separated by comma or whitespace
 340    """
 341
 342    FORCE_EARLY_ALIAS_REF_EXPANSION = False
 343    """
 344    Whether alias reference expansion (_expand_alias_refs()) should run before column qualification (_qualify_columns()).
 345
 346    For example:
 347        WITH data AS (
 348        SELECT
 349            1 AS id,
 350            2 AS my_id
 351        )
 352        SELECT
 353            id AS my_id
 354        FROM
 355            data
 356        WHERE
 357            my_id = 1
 358        GROUP BY
 359            my_id,
 360        HAVING
 361            my_id = 1
 362
 363    In most dialects, "my_id" would refer to "data.my_id" across the query, except:
 364        - BigQuery, which will forward the alias to GROUP BY + HAVING clauses i.e
 365          it resolves to "WHERE my_id = 1 GROUP BY id HAVING id = 1"
 366        - Clickhouse, which will forward the alias across the query i.e it resolves
 367        to "WHERE id = 1 GROUP BY id HAVING id = 1"
 368    """
 369
 370    EXPAND_ALIAS_REFS_EARLY_ONLY_IN_GROUP_BY = False
 371    """Whether alias reference expansion before qualification should only happen for the GROUP BY clause."""
 372
 373    SUPPORTS_ORDER_BY_ALL = False
 374    """
 375    Whether ORDER BY ALL is supported (expands to all the selected columns) as in DuckDB, Spark3/Databricks
 376    """
 377
 378    HAS_DISTINCT_ARRAY_CONSTRUCTORS = False
 379    """
 380    Whether the ARRAY constructor is context-sensitive, i.e in Redshift ARRAY[1, 2, 3] != ARRAY(1, 2, 3)
 381    as the former is of type INT[] vs the latter which is SUPER
 382    """
 383
 384    SUPPORTS_FIXED_SIZE_ARRAYS = False
 385    """
 386    Whether expressions such as x::INT[5] should be parsed as fixed-size array defs/casts e.g.
 387    in DuckDB. In dialects which don't support fixed size arrays such as Snowflake, this should
 388    be interpreted as a subscript/index operator.
 389    """
 390
 391    STRICT_JSON_PATH_SYNTAX = True
 392    """Whether failing to parse a JSON path expression using the JSONPath dialect will log a warning."""
 393
 394    ON_CONDITION_EMPTY_BEFORE_ERROR = True
 395    """Whether "X ON EMPTY" should come before "X ON ERROR" (for dialects like T-SQL, MySQL, Oracle)."""
 396
 397    ARRAY_AGG_INCLUDES_NULLS: t.Optional[bool] = True
 398    """Whether ArrayAgg needs to filter NULL values."""
 399
 400    REGEXP_EXTRACT_DEFAULT_GROUP = 0
 401    """The default value for the capturing group."""
 402
 403    SET_OP_DISTINCT_BY_DEFAULT: t.Dict[t.Type[exp.Expression], t.Optional[bool]] = {
 404        exp.Except: True,
 405        exp.Intersect: True,
 406        exp.Union: True,
 407    }
 408    """
 409    Whether a set operation uses DISTINCT by default. This is `None` when either `DISTINCT` or `ALL`
 410    must be explicitly specified.
 411    """
 412
 413    CREATABLE_KIND_MAPPING: dict[str, str] = {}
 414    """
 415    Helper for dialects that use a different name for the same creatable kind. For example, the Clickhouse
 416    equivalent of CREATE SCHEMA is CREATE DATABASE.
 417    """
 418
 419    # --- Autofilled ---
 420
 421    tokenizer_class = Tokenizer
 422    jsonpath_tokenizer_class = JSONPathTokenizer
 423    parser_class = Parser
 424    generator_class = Generator
 425
 426    # A trie of the time_mapping keys
 427    TIME_TRIE: t.Dict = {}
 428    FORMAT_TRIE: t.Dict = {}
 429
 430    INVERSE_TIME_MAPPING: t.Dict[str, str] = {}
 431    INVERSE_TIME_TRIE: t.Dict = {}
 432    INVERSE_FORMAT_MAPPING: t.Dict[str, str] = {}
 433    INVERSE_FORMAT_TRIE: t.Dict = {}
 434
 435    INVERSE_CREATABLE_KIND_MAPPING: dict[str, str] = {}
 436
 437    ESCAPED_SEQUENCES: t.Dict[str, str] = {}
 438
 439    # Delimiters for string literals and identifiers
 440    QUOTE_START = "'"
 441    QUOTE_END = "'"
 442    IDENTIFIER_START = '"'
 443    IDENTIFIER_END = '"'
 444
 445    # Delimiters for bit, hex, byte and unicode literals
 446    BIT_START: t.Optional[str] = None
 447    BIT_END: t.Optional[str] = None
 448    HEX_START: t.Optional[str] = None
 449    HEX_END: t.Optional[str] = None
 450    BYTE_START: t.Optional[str] = None
 451    BYTE_END: t.Optional[str] = None
 452    UNICODE_START: t.Optional[str] = None
 453    UNICODE_END: t.Optional[str] = None
 454
 455    DATE_PART_MAPPING = {
 456        "Y": "YEAR",
 457        "YY": "YEAR",
 458        "YYY": "YEAR",
 459        "YYYY": "YEAR",
 460        "YR": "YEAR",
 461        "YEARS": "YEAR",
 462        "YRS": "YEAR",
 463        "MM": "MONTH",
 464        "MON": "MONTH",
 465        "MONS": "MONTH",
 466        "MONTHS": "MONTH",
 467        "D": "DAY",
 468        "DD": "DAY",
 469        "DAYS": "DAY",
 470        "DAYOFMONTH": "DAY",
 471        "DAY OF WEEK": "DAYOFWEEK",
 472        "WEEKDAY": "DAYOFWEEK",
 473        "DOW": "DAYOFWEEK",
 474        "DW": "DAYOFWEEK",
 475        "WEEKDAY_ISO": "DAYOFWEEKISO",
 476        "DOW_ISO": "DAYOFWEEKISO",
 477        "DW_ISO": "DAYOFWEEKISO",
 478        "DAY OF YEAR": "DAYOFYEAR",
 479        "DOY": "DAYOFYEAR",
 480        "DY": "DAYOFYEAR",
 481        "W": "WEEK",
 482        "WK": "WEEK",
 483        "WEEKOFYEAR": "WEEK",
 484        "WOY": "WEEK",
 485        "WY": "WEEK",
 486        "WEEK_ISO": "WEEKISO",
 487        "WEEKOFYEARISO": "WEEKISO",
 488        "WEEKOFYEAR_ISO": "WEEKISO",
 489        "Q": "QUARTER",
 490        "QTR": "QUARTER",
 491        "QTRS": "QUARTER",
 492        "QUARTERS": "QUARTER",
 493        "H": "HOUR",
 494        "HH": "HOUR",
 495        "HR": "HOUR",
 496        "HOURS": "HOUR",
 497        "HRS": "HOUR",
 498        "M": "MINUTE",
 499        "MI": "MINUTE",
 500        "MIN": "MINUTE",
 501        "MINUTES": "MINUTE",
 502        "MINS": "MINUTE",
 503        "S": "SECOND",
 504        "SEC": "SECOND",
 505        "SECONDS": "SECOND",
 506        "SECS": "SECOND",
 507        "MS": "MILLISECOND",
 508        "MSEC": "MILLISECOND",
 509        "MSECS": "MILLISECOND",
 510        "MSECOND": "MILLISECOND",
 511        "MSECONDS": "MILLISECOND",
 512        "MILLISEC": "MILLISECOND",
 513        "MILLISECS": "MILLISECOND",
 514        "MILLISECON": "MILLISECOND",
 515        "MILLISECONDS": "MILLISECOND",
 516        "US": "MICROSECOND",
 517        "USEC": "MICROSECOND",
 518        "USECS": "MICROSECOND",
 519        "MICROSEC": "MICROSECOND",
 520        "MICROSECS": "MICROSECOND",
 521        "USECOND": "MICROSECOND",
 522        "USECONDS": "MICROSECOND",
 523        "MICROSECONDS": "MICROSECOND",
 524        "NS": "NANOSECOND",
 525        "NSEC": "NANOSECOND",
 526        "NANOSEC": "NANOSECOND",
 527        "NSECOND": "NANOSECOND",
 528        "NSECONDS": "NANOSECOND",
 529        "NANOSECS": "NANOSECOND",
 530        "EPOCH_SECOND": "EPOCH",
 531        "EPOCH_SECONDS": "EPOCH",
 532        "EPOCH_MILLISECONDS": "EPOCH_MILLISECOND",
 533        "EPOCH_MICROSECONDS": "EPOCH_MICROSECOND",
 534        "EPOCH_NANOSECONDS": "EPOCH_NANOSECOND",
 535        "TZH": "TIMEZONE_HOUR",
 536        "TZM": "TIMEZONE_MINUTE",
 537        "DEC": "DECADE",
 538        "DECS": "DECADE",
 539        "DECADES": "DECADE",
 540        "MIL": "MILLENIUM",
 541        "MILS": "MILLENIUM",
 542        "MILLENIA": "MILLENIUM",
 543        "C": "CENTURY",
 544        "CENT": "CENTURY",
 545        "CENTS": "CENTURY",
 546        "CENTURIES": "CENTURY",
 547    }
 548
 549    TYPE_TO_EXPRESSIONS: t.Dict[exp.DataType.Type, t.Set[t.Type[exp.Expression]]] = {
 550        exp.DataType.Type.BIGINT: {
 551            exp.ApproxDistinct,
 552            exp.ArraySize,
 553            exp.Length,
 554        },
 555        exp.DataType.Type.BOOLEAN: {
 556            exp.Between,
 557            exp.Boolean,
 558            exp.In,
 559            exp.RegexpLike,
 560        },
 561        exp.DataType.Type.DATE: {
 562            exp.CurrentDate,
 563            exp.Date,
 564            exp.DateFromParts,
 565            exp.DateStrToDate,
 566            exp.DiToDate,
 567            exp.StrToDate,
 568            exp.TimeStrToDate,
 569            exp.TsOrDsToDate,
 570        },
 571        exp.DataType.Type.DATETIME: {
 572            exp.CurrentDatetime,
 573            exp.Datetime,
 574            exp.DatetimeAdd,
 575            exp.DatetimeSub,
 576        },
 577        exp.DataType.Type.DOUBLE: {
 578            exp.ApproxQuantile,
 579            exp.Avg,
 580            exp.Exp,
 581            exp.Ln,
 582            exp.Log,
 583            exp.Pow,
 584            exp.Quantile,
 585            exp.Round,
 586            exp.SafeDivide,
 587            exp.Sqrt,
 588            exp.Stddev,
 589            exp.StddevPop,
 590            exp.StddevSamp,
 591            exp.Variance,
 592            exp.VariancePop,
 593        },
 594        exp.DataType.Type.INT: {
 595            exp.Ceil,
 596            exp.DatetimeDiff,
 597            exp.DateDiff,
 598            exp.TimestampDiff,
 599            exp.TimeDiff,
 600            exp.DateToDi,
 601            exp.Levenshtein,
 602            exp.Sign,
 603            exp.StrPosition,
 604            exp.TsOrDiToDi,
 605        },
 606        exp.DataType.Type.JSON: {
 607            exp.ParseJSON,
 608        },
 609        exp.DataType.Type.TIME: {
 610            exp.Time,
 611        },
 612        exp.DataType.Type.TIMESTAMP: {
 613            exp.CurrentTime,
 614            exp.CurrentTimestamp,
 615            exp.StrToTime,
 616            exp.TimeAdd,
 617            exp.TimeStrToTime,
 618            exp.TimeSub,
 619            exp.TimestampAdd,
 620            exp.TimestampSub,
 621            exp.UnixToTime,
 622        },
 623        exp.DataType.Type.TINYINT: {
 624            exp.Day,
 625            exp.Month,
 626            exp.Week,
 627            exp.Year,
 628            exp.Quarter,
 629        },
 630        exp.DataType.Type.VARCHAR: {
 631            exp.ArrayConcat,
 632            exp.Concat,
 633            exp.ConcatWs,
 634            exp.DateToDateStr,
 635            exp.GroupConcat,
 636            exp.Initcap,
 637            exp.Lower,
 638            exp.Substring,
 639            exp.TimeToStr,
 640            exp.TimeToTimeStr,
 641            exp.Trim,
 642            exp.TsOrDsToDateStr,
 643            exp.UnixToStr,
 644            exp.UnixToTimeStr,
 645            exp.Upper,
 646        },
 647    }
 648
 649    ANNOTATORS: AnnotatorsType = {
 650        **{
 651            expr_type: lambda self, e: self._annotate_unary(e)
 652            for expr_type in subclasses(exp.__name__, (exp.Unary, exp.Alias))
 653        },
 654        **{
 655            expr_type: lambda self, e: self._annotate_binary(e)
 656            for expr_type in subclasses(exp.__name__, exp.Binary)
 657        },
 658        **{
 659            expr_type: _annotate_with_type_lambda(data_type)
 660            for data_type, expressions in TYPE_TO_EXPRESSIONS.items()
 661            for expr_type in expressions
 662        },
 663        exp.Abs: lambda self, e: self._annotate_by_args(e, "this"),
 664        exp.Anonymous: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.UNKNOWN),
 665        exp.Array: lambda self, e: self._annotate_by_args(e, "expressions", array=True),
 666        exp.ArrayAgg: lambda self, e: self._annotate_by_args(e, "this", array=True),
 667        exp.ArrayConcat: lambda self, e: self._annotate_by_args(e, "this", "expressions"),
 668        exp.Bracket: lambda self, e: self._annotate_bracket(e),
 669        exp.Cast: lambda self, e: self._annotate_with_type(e, e.args["to"]),
 670        exp.Case: lambda self, e: self._annotate_by_args(e, "default", "ifs"),
 671        exp.Coalesce: lambda self, e: self._annotate_by_args(e, "this", "expressions"),
 672        exp.Count: lambda self, e: self._annotate_with_type(
 673            e, exp.DataType.Type.BIGINT if e.args.get("big_int") else exp.DataType.Type.INT
 674        ),
 675        exp.DataType: lambda self, e: self._annotate_with_type(e, e.copy()),
 676        exp.DateAdd: lambda self, e: self._annotate_timeunit(e),
 677        exp.DateSub: lambda self, e: self._annotate_timeunit(e),
 678        exp.DateTrunc: lambda self, e: self._annotate_timeunit(e),
 679        exp.Distinct: lambda self, e: self._annotate_by_args(e, "expressions"),
 680        exp.Div: lambda self, e: self._annotate_div(e),
 681        exp.Dot: lambda self, e: self._annotate_dot(e),
 682        exp.Explode: lambda self, e: self._annotate_explode(e),
 683        exp.Extract: lambda self, e: self._annotate_extract(e),
 684        exp.Filter: lambda self, e: self._annotate_by_args(e, "this"),
 685        exp.GenerateDateArray: lambda self, e: self._annotate_with_type(
 686            e, exp.DataType.build("ARRAY<DATE>")
 687        ),
 688        exp.GenerateTimestampArray: lambda self, e: self._annotate_with_type(
 689            e, exp.DataType.build("ARRAY<TIMESTAMP>")
 690        ),
 691        exp.Greatest: lambda self, e: self._annotate_by_args(e, "this", "expressions"),
 692        exp.If: lambda self, e: self._annotate_by_args(e, "true", "false"),
 693        exp.Interval: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.INTERVAL),
 694        exp.Least: lambda self, e: self._annotate_by_args(e, "this", "expressions"),
 695        exp.Literal: lambda self, e: self._annotate_literal(e),
 696        exp.Map: lambda self, e: self._annotate_map(e),
 697        exp.Max: lambda self, e: self._annotate_by_args(e, "this", "expressions"),
 698        exp.Min: lambda self, e: self._annotate_by_args(e, "this", "expressions"),
 699        exp.Null: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.NULL),
 700        exp.Nullif: lambda self, e: self._annotate_by_args(e, "this", "expression"),
 701        exp.PropertyEQ: lambda self, e: self._annotate_by_args(e, "expression"),
 702        exp.Slice: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.UNKNOWN),
 703        exp.Struct: lambda self, e: self._annotate_struct(e),
 704        exp.Sum: lambda self, e: self._annotate_by_args(e, "this", "expressions", promote=True),
 705        exp.Timestamp: lambda self, e: self._annotate_with_type(
 706            e,
 707            exp.DataType.Type.TIMESTAMPTZ if e.args.get("with_tz") else exp.DataType.Type.TIMESTAMP,
 708        ),
 709        exp.ToMap: lambda self, e: self._annotate_to_map(e),
 710        exp.TryCast: lambda self, e: self._annotate_with_type(e, e.args["to"]),
 711        exp.Unnest: lambda self, e: self._annotate_unnest(e),
 712        exp.VarMap: lambda self, e: self._annotate_map(e),
 713    }
 714
 715    @classmethod
 716    def get_or_raise(cls, dialect: DialectType) -> Dialect:
 717        """
 718        Look up a dialect in the global dialect registry and return it if it exists.
 719
 720        Args:
 721            dialect: The target dialect. If this is a string, it can be optionally followed by
 722                additional key-value pairs that are separated by commas and are used to specify
 723                dialect settings, such as whether the dialect's identifiers are case-sensitive.
 724
 725        Example:
 726            >>> dialect = dialect_class = get_or_raise("duckdb")
 727            >>> dialect = get_or_raise("mysql, normalization_strategy = case_sensitive")
 728
 729        Returns:
 730            The corresponding Dialect instance.
 731        """
 732
 733        if not dialect:
 734            return cls()
 735        if isinstance(dialect, _Dialect):
 736            return dialect()
 737        if isinstance(dialect, Dialect):
 738            return dialect
 739        if isinstance(dialect, str):
 740            try:
 741                dialect_name, *kv_strings = dialect.split(",")
 742                kv_pairs = (kv.split("=") for kv in kv_strings)
 743                kwargs = {}
 744                for pair in kv_pairs:
 745                    key = pair[0].strip()
 746                    value: t.Union[bool | str | None] = None
 747
 748                    if len(pair) == 1:
 749                        # Default initialize standalone settings to True
 750                        value = True
 751                    elif len(pair) == 2:
 752                        value = pair[1].strip()
 753
 754                        # Coerce the value to boolean if it matches to the truthy/falsy values below
 755                        value_lower = value.lower()
 756                        if value_lower in ("true", "1"):
 757                            value = True
 758                        elif value_lower in ("false", "0"):
 759                            value = False
 760
 761                    kwargs[key] = value
 762
 763            except ValueError:
 764                raise ValueError(
 765                    f"Invalid dialect format: '{dialect}'. "
 766                    "Please use the correct format: 'dialect [, k1 = v2 [, ...]]'."
 767                )
 768
 769            result = cls.get(dialect_name.strip())
 770            if not result:
 771                from difflib import get_close_matches
 772
 773                similar = seq_get(get_close_matches(dialect_name, cls.classes, n=1), 0) or ""
 774                if similar:
 775                    similar = f" Did you mean {similar}?"
 776
 777                raise ValueError(f"Unknown dialect '{dialect_name}'.{similar}")
 778
 779            return result(**kwargs)
 780
 781        raise ValueError(f"Invalid dialect type for '{dialect}': '{type(dialect)}'.")
 782
 783    @classmethod
 784    def format_time(
 785        cls, expression: t.Optional[str | exp.Expression]
 786    ) -> t.Optional[exp.Expression]:
 787        """Converts a time format in this dialect to its equivalent Python `strftime` format."""
 788        if isinstance(expression, str):
 789            return exp.Literal.string(
 790                # the time formats are quoted
 791                format_time(expression[1:-1], cls.TIME_MAPPING, cls.TIME_TRIE)
 792            )
 793
 794        if expression and expression.is_string:
 795            return exp.Literal.string(format_time(expression.this, cls.TIME_MAPPING, cls.TIME_TRIE))
 796
 797        return expression
 798
 799    def __init__(self, **kwargs) -> None:
 800        normalization_strategy = kwargs.pop("normalization_strategy", None)
 801
 802        if normalization_strategy is None:
 803            self.normalization_strategy = self.NORMALIZATION_STRATEGY
 804        else:
 805            self.normalization_strategy = NormalizationStrategy(normalization_strategy.upper())
 806
 807        self.settings = kwargs
 808
 809    def __eq__(self, other: t.Any) -> bool:
 810        # Does not currently take dialect state into account
 811        return type(self) == other
 812
 813    def __hash__(self) -> int:
 814        # Does not currently take dialect state into account
 815        return hash(type(self))
 816
 817    def normalize_identifier(self, expression: E) -> E:
 818        """
 819        Transforms an identifier in a way that resembles how it'd be resolved by this dialect.
 820
 821        For example, an identifier like `FoO` would be resolved as `foo` in Postgres, because it
 822        lowercases all unquoted identifiers. On the other hand, Snowflake uppercases them, so
 823        it would resolve it as `FOO`. If it was quoted, it'd need to be treated as case-sensitive,
 824        and so any normalization would be prohibited in order to avoid "breaking" the identifier.
 825
 826        There are also dialects like Spark, which are case-insensitive even when quotes are
 827        present, and dialects like MySQL, whose resolution rules match those employed by the
 828        underlying operating system, for example they may always be case-sensitive in Linux.
 829
 830        Finally, the normalization behavior of some engines can even be controlled through flags,
 831        like in Redshift's case, where users can explicitly set enable_case_sensitive_identifier.
 832
 833        SQLGlot aims to understand and handle all of these different behaviors gracefully, so
 834        that it can analyze queries in the optimizer and successfully capture their semantics.
 835        """
 836        if (
 837            isinstance(expression, exp.Identifier)
 838            and self.normalization_strategy is not NormalizationStrategy.CASE_SENSITIVE
 839            and (
 840                not expression.quoted
 841                or self.normalization_strategy is NormalizationStrategy.CASE_INSENSITIVE
 842            )
 843        ):
 844            expression.set(
 845                "this",
 846                (
 847                    expression.this.upper()
 848                    if self.normalization_strategy is NormalizationStrategy.UPPERCASE
 849                    else expression.this.lower()
 850                ),
 851            )
 852
 853        return expression
 854
 855    def case_sensitive(self, text: str) -> bool:
 856        """Checks if text contains any case sensitive characters, based on the dialect's rules."""
 857        if self.normalization_strategy is NormalizationStrategy.CASE_INSENSITIVE:
 858            return False
 859
 860        unsafe = (
 861            str.islower
 862            if self.normalization_strategy is NormalizationStrategy.UPPERCASE
 863            else str.isupper
 864        )
 865        return any(unsafe(char) for char in text)
 866
 867    def can_identify(self, text: str, identify: str | bool = "safe") -> bool:
 868        """Checks if text can be identified given an identify option.
 869
 870        Args:
 871            text: The text to check.
 872            identify:
 873                `"always"` or `True`: Always returns `True`.
 874                `"safe"`: Only returns `True` if the identifier is case-insensitive.
 875
 876        Returns:
 877            Whether the given text can be identified.
 878        """
 879        if identify is True or identify == "always":
 880            return True
 881
 882        if identify == "safe":
 883            return not self.case_sensitive(text)
 884
 885        return False
 886
 887    def quote_identifier(self, expression: E, identify: bool = True) -> E:
 888        """
 889        Adds quotes to a given identifier.
 890
 891        Args:
 892            expression: The expression of interest. If it's not an `Identifier`, this method is a no-op.
 893            identify: If set to `False`, the quotes will only be added if the identifier is deemed
 894                "unsafe", with respect to its characters and this dialect's normalization strategy.
 895        """
 896        if isinstance(expression, exp.Identifier) and not isinstance(expression.parent, exp.Func):
 897            name = expression.this
 898            expression.set(
 899                "quoted",
 900                identify or self.case_sensitive(name) or not exp.SAFE_IDENTIFIER_RE.match(name),
 901            )
 902
 903        return expression
 904
 905    def to_json_path(self, path: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]:
 906        if isinstance(path, exp.Literal):
 907            path_text = path.name
 908            if path.is_number:
 909                path_text = f"[{path_text}]"
 910            try:
 911                return parse_json_path(path_text, self)
 912            except ParseError as e:
 913                if self.STRICT_JSON_PATH_SYNTAX:
 914                    logger.warning(f"Invalid JSON path syntax. {str(e)}")
 915
 916        return path
 917
 918    def parse(self, sql: str, **opts) -> t.List[t.Optional[exp.Expression]]:
 919        return self.parser(**opts).parse(self.tokenize(sql), sql)
 920
 921    def parse_into(
 922        self, expression_type: exp.IntoType, sql: str, **opts
 923    ) -> t.List[t.Optional[exp.Expression]]:
 924        return self.parser(**opts).parse_into(expression_type, self.tokenize(sql), sql)
 925
 926    def generate(self, expression: exp.Expression, copy: bool = True, **opts) -> str:
 927        return self.generator(**opts).generate(expression, copy=copy)
 928
 929    def transpile(self, sql: str, **opts) -> t.List[str]:
 930        return [
 931            self.generate(expression, copy=False, **opts) if expression else ""
 932            for expression in self.parse(sql)
 933        ]
 934
 935    def tokenize(self, sql: str) -> t.List[Token]:
 936        return self.tokenizer.tokenize(sql)
 937
 938    @property
 939    def tokenizer(self) -> Tokenizer:
 940        return self.tokenizer_class(dialect=self)
 941
 942    @property
 943    def jsonpath_tokenizer(self) -> JSONPathTokenizer:
 944        return self.jsonpath_tokenizer_class(dialect=self)
 945
 946    def parser(self, **opts) -> Parser:
 947        return self.parser_class(dialect=self, **opts)
 948
 949    def generator(self, **opts) -> Generator:
 950        return self.generator_class(dialect=self, **opts)
 951
 952
 953DialectType = t.Union[str, Dialect, t.Type[Dialect], None]
 954
 955
 956def rename_func(name: str) -> t.Callable[[Generator, exp.Expression], str]:
 957    return lambda self, expression: self.func(name, *flatten(expression.args.values()))
 958
 959
 960@unsupported_args("accuracy")
 961def approx_count_distinct_sql(self: Generator, expression: exp.ApproxDistinct) -> str:
 962    return self.func("APPROX_COUNT_DISTINCT", expression.this)
 963
 964
 965def if_sql(
 966    name: str = "IF", false_value: t.Optional[exp.Expression | str] = None
 967) -> t.Callable[[Generator, exp.If], str]:
 968    def _if_sql(self: Generator, expression: exp.If) -> str:
 969        return self.func(
 970            name,
 971            expression.this,
 972            expression.args.get("true"),
 973            expression.args.get("false") or false_value,
 974        )
 975
 976    return _if_sql
 977
 978
 979def arrow_json_extract_sql(self: Generator, expression: JSON_EXTRACT_TYPE) -> str:
 980    this = expression.this
 981    if self.JSON_TYPE_REQUIRED_FOR_EXTRACTION and isinstance(this, exp.Literal) and this.is_string:
 982        this.replace(exp.cast(this, exp.DataType.Type.JSON))
 983
 984    return self.binary(expression, "->" if isinstance(expression, exp.JSONExtract) else "->>")
 985
 986
 987def inline_array_sql(self: Generator, expression: exp.Array) -> str:
 988    return f"[{self.expressions(expression, dynamic=True, new_line=True, skip_first=True, skip_last=True)}]"
 989
 990
 991def inline_array_unless_query(self: Generator, expression: exp.Array) -> str:
 992    elem = seq_get(expression.expressions, 0)
 993    if isinstance(elem, exp.Expression) and elem.find(exp.Query):
 994        return self.func("ARRAY", elem)
 995    return inline_array_sql(self, expression)
 996
 997
 998def no_ilike_sql(self: Generator, expression: exp.ILike) -> str:
 999    return self.like_sql(
1000        exp.Like(
1001            this=exp.Lower(this=expression.this), expression=exp.Lower(this=expression.expression)
1002        )
1003    )
1004
1005
1006def no_paren_current_date_sql(self: Generator, expression: exp.CurrentDate) -> str:
1007    zone = self.sql(expression, "this")
1008    return f"CURRENT_DATE AT TIME ZONE {zone}" if zone else "CURRENT_DATE"
1009
1010
1011def no_recursive_cte_sql(self: Generator, expression: exp.With) -> str:
1012    if expression.args.get("recursive"):
1013        self.unsupported("Recursive CTEs are unsupported")
1014        expression.args["recursive"] = False
1015    return self.with_sql(expression)
1016
1017
1018def no_safe_divide_sql(self: Generator, expression: exp.SafeDivide) -> str:
1019    n = self.sql(expression, "this")
1020    d = self.sql(expression, "expression")
1021    return f"IF(({d}) <> 0, ({n}) / ({d}), NULL)"
1022
1023
1024def no_tablesample_sql(self: Generator, expression: exp.TableSample) -> str:
1025    self.unsupported("TABLESAMPLE unsupported")
1026    return self.sql(expression.this)
1027
1028
1029def no_pivot_sql(self: Generator, expression: exp.Pivot) -> str:
1030    self.unsupported("PIVOT unsupported")
1031    return ""
1032
1033
1034def no_trycast_sql(self: Generator, expression: exp.TryCast) -> str:
1035    return self.cast_sql(expression)
1036
1037
1038def no_comment_column_constraint_sql(
1039    self: Generator, expression: exp.CommentColumnConstraint
1040) -> str:
1041    self.unsupported("CommentColumnConstraint unsupported")
1042    return ""
1043
1044
1045def no_map_from_entries_sql(self: Generator, expression: exp.MapFromEntries) -> str:
1046    self.unsupported("MAP_FROM_ENTRIES unsupported")
1047    return ""
1048
1049
1050def property_sql(self: Generator, expression: exp.Property) -> str:
1051    return f"{self.property_name(expression, string_key=True)}={self.sql(expression, 'value')}"
1052
1053
1054def str_position_sql(
1055    self: Generator,
1056    expression: exp.StrPosition,
1057    generate_instance: bool = False,
1058    str_position_func_name: str = "STRPOS",
1059) -> str:
1060    this = self.sql(expression, "this")
1061    substr = self.sql(expression, "substr")
1062    position = self.sql(expression, "position")
1063    instance = expression.args.get("instance") if generate_instance else None
1064    position_offset = ""
1065
1066    if position:
1067        # Normalize third 'pos' argument into 'SUBSTR(..) + offset' across dialects
1068        this = self.func("SUBSTR", this, position)
1069        position_offset = f" + {position} - 1"
1070
1071    return self.func(str_position_func_name, this, substr, instance) + position_offset
1072
1073
1074def struct_extract_sql(self: Generator, expression: exp.StructExtract) -> str:
1075    return (
1076        f"{self.sql(expression, 'this')}.{self.sql(exp.to_identifier(expression.expression.name))}"
1077    )
1078
1079
1080def var_map_sql(
1081    self: Generator, expression: exp.Map | exp.VarMap, map_func_name: str = "MAP"
1082) -> str:
1083    keys = expression.args["keys"]
1084    values = expression.args["values"]
1085
1086    if not isinstance(keys, exp.Array) or not isinstance(values, exp.Array):
1087        self.unsupported("Cannot convert array columns into map.")
1088        return self.func(map_func_name, keys, values)
1089
1090    args = []
1091    for key, value in zip(keys.expressions, values.expressions):
1092        args.append(self.sql(key))
1093        args.append(self.sql(value))
1094
1095    return self.func(map_func_name, *args)
1096
1097
1098def build_formatted_time(
1099    exp_class: t.Type[E], dialect: str, default: t.Optional[bool | str] = None
1100) -> t.Callable[[t.List], E]:
1101    """Helper used for time expressions.
1102
1103    Args:
1104        exp_class: the expression class to instantiate.
1105        dialect: target sql dialect.
1106        default: the default format, True being time.
1107
1108    Returns:
1109        A callable that can be used to return the appropriately formatted time expression.
1110    """
1111
1112    def _builder(args: t.List):
1113        return exp_class(
1114            this=seq_get(args, 0),
1115            format=Dialect[dialect].format_time(
1116                seq_get(args, 1)
1117                or (Dialect[dialect].TIME_FORMAT if default is True else default or None)
1118            ),
1119        )
1120
1121    return _builder
1122
1123
1124def time_format(
1125    dialect: DialectType = None,
1126) -> t.Callable[[Generator, exp.UnixToStr | exp.StrToUnix], t.Optional[str]]:
1127    def _time_format(self: Generator, expression: exp.UnixToStr | exp.StrToUnix) -> t.Optional[str]:
1128        """
1129        Returns the time format for a given expression, unless it's equivalent
1130        to the default time format of the dialect of interest.
1131        """
1132        time_format = self.format_time(expression)
1133        return time_format if time_format != Dialect.get_or_raise(dialect).TIME_FORMAT else None
1134
1135    return _time_format
1136
1137
1138def build_date_delta(
1139    exp_class: t.Type[E],
1140    unit_mapping: t.Optional[t.Dict[str, str]] = None,
1141    default_unit: t.Optional[str] = "DAY",
1142) -> t.Callable[[t.List], E]:
1143    def _builder(args: t.List) -> E:
1144        unit_based = len(args) == 3
1145        this = args[2] if unit_based else seq_get(args, 0)
1146        unit = None
1147        if unit_based or default_unit:
1148            unit = args[0] if unit_based else exp.Literal.string(default_unit)
1149            unit = exp.var(unit_mapping.get(unit.name.lower(), unit.name)) if unit_mapping else unit
1150        return exp_class(this=this, expression=seq_get(args, 1), unit=unit)
1151
1152    return _builder
1153
1154
1155def build_date_delta_with_interval(
1156    expression_class: t.Type[E],
1157) -> t.Callable[[t.List], t.Optional[E]]:
1158    def _builder(args: t.List) -> t.Optional[E]:
1159        if len(args) < 2:
1160            return None
1161
1162        interval = args[1]
1163
1164        if not isinstance(interval, exp.Interval):
1165            raise ParseError(f"INTERVAL expression expected but got '{interval}'")
1166
1167        return expression_class(this=args[0], expression=interval.this, unit=unit_to_str(interval))
1168
1169    return _builder
1170
1171
1172def date_trunc_to_time(args: t.List) -> exp.DateTrunc | exp.TimestampTrunc:
1173    unit = seq_get(args, 0)
1174    this = seq_get(args, 1)
1175
1176    if isinstance(this, exp.Cast) and this.is_type("date"):
1177        return exp.DateTrunc(unit=unit, this=this)
1178    return exp.TimestampTrunc(this=this, unit=unit)
1179
1180
1181def date_add_interval_sql(
1182    data_type: str, kind: str
1183) -> t.Callable[[Generator, exp.Expression], str]:
1184    def func(self: Generator, expression: exp.Expression) -> str:
1185        this = self.sql(expression, "this")
1186        interval = exp.Interval(this=expression.expression, unit=unit_to_var(expression))
1187        return f"{data_type}_{kind}({this}, {self.sql(interval)})"
1188
1189    return func
1190
1191
1192def timestamptrunc_sql(zone: bool = False) -> t.Callable[[Generator, exp.TimestampTrunc], str]:
1193    def _timestamptrunc_sql(self: Generator, expression: exp.TimestampTrunc) -> str:
1194        args = [unit_to_str(expression), expression.this]
1195        if zone:
1196            args.append(expression.args.get("zone"))
1197        return self.func("DATE_TRUNC", *args)
1198
1199    return _timestamptrunc_sql
1200
1201
1202def no_timestamp_sql(self: Generator, expression: exp.Timestamp) -> str:
1203    zone = expression.args.get("zone")
1204    if not zone:
1205        from sqlglot.optimizer.annotate_types import annotate_types
1206
1207        target_type = annotate_types(expression).type or exp.DataType.Type.TIMESTAMP
1208        return self.sql(exp.cast(expression.this, target_type))
1209    if zone.name.lower() in TIMEZONES:
1210        return self.sql(
1211            exp.AtTimeZone(
1212                this=exp.cast(expression.this, exp.DataType.Type.TIMESTAMP),
1213                zone=zone,
1214            )
1215        )
1216    return self.func("TIMESTAMP", expression.this, zone)
1217
1218
1219def no_time_sql(self: Generator, expression: exp.Time) -> str:
1220    # Transpile BQ's TIME(timestamp, zone) to CAST(TIMESTAMPTZ <timestamp> AT TIME ZONE <zone> AS TIME)
1221    this = exp.cast(expression.this, exp.DataType.Type.TIMESTAMPTZ)
1222    expr = exp.cast(
1223        exp.AtTimeZone(this=this, zone=expression.args.get("zone")), exp.DataType.Type.TIME
1224    )
1225    return self.sql(expr)
1226
1227
1228def no_datetime_sql(self: Generator, expression: exp.Datetime) -> str:
1229    this = expression.this
1230    expr = expression.expression
1231
1232    if expr.name.lower() in TIMEZONES:
1233        # Transpile BQ's DATETIME(timestamp, zone) to CAST(TIMESTAMPTZ <timestamp> AT TIME ZONE <zone> AS TIMESTAMP)
1234        this = exp.cast(this, exp.DataType.Type.TIMESTAMPTZ)
1235        this = exp.cast(exp.AtTimeZone(this=this, zone=expr), exp.DataType.Type.TIMESTAMP)
1236        return self.sql(this)
1237
1238    this = exp.cast(this, exp.DataType.Type.DATE)
1239    expr = exp.cast(expr, exp.DataType.Type.TIME)
1240
1241    return self.sql(exp.cast(exp.Add(this=this, expression=expr), exp.DataType.Type.TIMESTAMP))
1242
1243
1244def locate_to_strposition(args: t.List) -> exp.Expression:
1245    return exp.StrPosition(
1246        this=seq_get(args, 1), substr=seq_get(args, 0), position=seq_get(args, 2)
1247    )
1248
1249
1250def strposition_to_locate_sql(self: Generator, expression: exp.StrPosition) -> str:
1251    return self.func(
1252        "LOCATE", expression.args.get("substr"), expression.this, expression.args.get("position")
1253    )
1254
1255
1256def left_to_substring_sql(self: Generator, expression: exp.Left) -> str:
1257    return self.sql(
1258        exp.Substring(
1259            this=expression.this, start=exp.Literal.number(1), length=expression.expression
1260        )
1261    )
1262
1263
1264def right_to_substring_sql(self: Generator, expression: exp.Left) -> str:
1265    return self.sql(
1266        exp.Substring(
1267            this=expression.this,
1268            start=exp.Length(this=expression.this) - exp.paren(expression.expression - 1),
1269        )
1270    )
1271
1272
1273def timestrtotime_sql(
1274    self: Generator,
1275    expression: exp.TimeStrToTime,
1276    include_precision: bool = False,
1277) -> str:
1278    datatype = exp.DataType.build(
1279        exp.DataType.Type.TIMESTAMPTZ
1280        if expression.args.get("zone")
1281        else exp.DataType.Type.TIMESTAMP
1282    )
1283
1284    if isinstance(expression.this, exp.Literal) and include_precision:
1285        precision = subsecond_precision(expression.this.name)
1286        if precision > 0:
1287            datatype = exp.DataType.build(
1288                datatype.this, expressions=[exp.DataTypeParam(this=exp.Literal.number(precision))]
1289            )
1290
1291    return self.sql(exp.cast(expression.this, datatype, dialect=self.dialect))
1292
1293
1294def datestrtodate_sql(self: Generator, expression: exp.DateStrToDate) -> str:
1295    return self.sql(exp.cast(expression.this, exp.DataType.Type.DATE))
1296
1297
1298# Used for Presto and Duckdb which use functions that don't support charset, and assume utf-8
1299def encode_decode_sql(
1300    self: Generator, expression: exp.Expression, name: str, replace: bool = True
1301) -> str:
1302    charset = expression.args.get("charset")
1303    if charset and charset.name.lower() != "utf-8":
1304        self.unsupported(f"Expected utf-8 character set, got {charset}.")
1305
1306    return self.func(name, expression.this, expression.args.get("replace") if replace else None)
1307
1308
1309def min_or_least(self: Generator, expression: exp.Min) -> str:
1310    name = "LEAST" if expression.expressions else "MIN"
1311    return rename_func(name)(self, expression)
1312
1313
1314def max_or_greatest(self: Generator, expression: exp.Max) -> str:
1315    name = "GREATEST" if expression.expressions else "MAX"
1316    return rename_func(name)(self, expression)
1317
1318
1319def count_if_to_sum(self: Generator, expression: exp.CountIf) -> str:
1320    cond = expression.this
1321
1322    if isinstance(expression.this, exp.Distinct):
1323        cond = expression.this.expressions[0]
1324        self.unsupported("DISTINCT is not supported when converting COUNT_IF to SUM")
1325
1326    return self.func("sum", exp.func("if", cond, 1, 0))
1327
1328
1329def trim_sql(self: Generator, expression: exp.Trim) -> str:
1330    target = self.sql(expression, "this")
1331    trim_type = self.sql(expression, "position")
1332    remove_chars = self.sql(expression, "expression")
1333    collation = self.sql(expression, "collation")
1334
1335    # Use TRIM/LTRIM/RTRIM syntax if the expression isn't database-specific
1336    if not remove_chars:
1337        return self.trim_sql(expression)
1338
1339    trim_type = f"{trim_type} " if trim_type else ""
1340    remove_chars = f"{remove_chars} " if remove_chars else ""
1341    from_part = "FROM " if trim_type or remove_chars else ""
1342    collation = f" COLLATE {collation}" if collation else ""
1343    return f"TRIM({trim_type}{remove_chars}{from_part}{target}{collation})"
1344
1345
1346def str_to_time_sql(self: Generator, expression: exp.Expression) -> str:
1347    return self.func("STRPTIME", expression.this, self.format_time(expression))
1348
1349
1350def concat_to_dpipe_sql(self: Generator, expression: exp.Concat) -> str:
1351    return self.sql(reduce(lambda x, y: exp.DPipe(this=x, expression=y), expression.expressions))
1352
1353
1354def concat_ws_to_dpipe_sql(self: Generator, expression: exp.ConcatWs) -> str:
1355    delim, *rest_args = expression.expressions
1356    return self.sql(
1357        reduce(
1358            lambda x, y: exp.DPipe(this=x, expression=exp.DPipe(this=delim, expression=y)),
1359            rest_args,
1360        )
1361    )
1362
1363
1364@unsupported_args("position", "occurrence", "parameters")
1365def regexp_extract_sql(self: Generator, expression: exp.RegexpExtract) -> str:
1366    group = expression.args.get("group")
1367
1368    # Do not render group if it's the default value for this dialect
1369    if group and group.name == str(self.dialect.REGEXP_EXTRACT_DEFAULT_GROUP):
1370        group = None
1371
1372    return self.func("REGEXP_EXTRACT", expression.this, expression.expression, group)
1373
1374
1375@unsupported_args("position", "occurrence", "modifiers")
1376def regexp_replace_sql(self: Generator, expression: exp.RegexpReplace) -> str:
1377    return self.func(
1378        "REGEXP_REPLACE", expression.this, expression.expression, expression.args["replacement"]
1379    )
1380
1381
1382def pivot_column_names(aggregations: t.List[exp.Expression], dialect: DialectType) -> t.List[str]:
1383    names = []
1384    for agg in aggregations:
1385        if isinstance(agg, exp.Alias):
1386            names.append(agg.alias)
1387        else:
1388            """
1389            This case corresponds to aggregations without aliases being used as suffixes
1390            (e.g. col_avg(foo)). We need to unquote identifiers because they're going to
1391            be quoted in the base parser's `_parse_pivot` method, due to `to_identifier`.
1392            Otherwise, we'd end up with `col_avg(`foo`)` (notice the double quotes).
1393            """
1394            agg_all_unquoted = agg.transform(
1395                lambda node: (
1396                    exp.Identifier(this=node.name, quoted=False)
1397                    if isinstance(node, exp.Identifier)
1398                    else node
1399                )
1400            )
1401            names.append(agg_all_unquoted.sql(dialect=dialect, normalize_functions="lower"))
1402
1403    return names
1404
1405
1406def binary_from_function(expr_type: t.Type[B]) -> t.Callable[[t.List], B]:
1407    return lambda args: expr_type(this=seq_get(args, 0), expression=seq_get(args, 1))
1408
1409
1410# Used to represent DATE_TRUNC in Doris, Postgres and Starrocks dialects
1411def build_timestamp_trunc(args: t.List) -> exp.TimestampTrunc:
1412    return exp.TimestampTrunc(this=seq_get(args, 1), unit=seq_get(args, 0))
1413
1414
1415def any_value_to_max_sql(self: Generator, expression: exp.AnyValue) -> str:
1416    return self.func("MAX", expression.this)
1417
1418
1419def bool_xor_sql(self: Generator, expression: exp.Xor) -> str:
1420    a = self.sql(expression.left)
1421    b = self.sql(expression.right)
1422    return f"({a} AND (NOT {b})) OR ((NOT {a}) AND {b})"
1423
1424
1425def is_parse_json(expression: exp.Expression) -> bool:
1426    return isinstance(expression, exp.ParseJSON) or (
1427        isinstance(expression, exp.Cast) and expression.is_type("json")
1428    )
1429
1430
1431def isnull_to_is_null(args: t.List) -> exp.Expression:
1432    return exp.Paren(this=exp.Is(this=seq_get(args, 0), expression=exp.null()))
1433
1434
1435def generatedasidentitycolumnconstraint_sql(
1436    self: Generator, expression: exp.GeneratedAsIdentityColumnConstraint
1437) -> str:
1438    start = self.sql(expression, "start") or "1"
1439    increment = self.sql(expression, "increment") or "1"
1440    return f"IDENTITY({start}, {increment})"
1441
1442
1443def arg_max_or_min_no_count(name: str) -> t.Callable[[Generator, exp.ArgMax | exp.ArgMin], str]:
1444    @unsupported_args("count")
1445    def _arg_max_or_min_sql(self: Generator, expression: exp.ArgMax | exp.ArgMin) -> str:
1446        return self.func(name, expression.this, expression.expression)
1447
1448    return _arg_max_or_min_sql
1449
1450
1451def ts_or_ds_add_cast(expression: exp.TsOrDsAdd) -> exp.TsOrDsAdd:
1452    this = expression.this.copy()
1453
1454    return_type = expression.return_type
1455    if return_type.is_type(exp.DataType.Type.DATE):
1456        # If we need to cast to a DATE, we cast to TIMESTAMP first to make sure we
1457        # can truncate timestamp strings, because some dialects can't cast them to DATE
1458        this = exp.cast(this, exp.DataType.Type.TIMESTAMP)
1459
1460    expression.this.replace(exp.cast(this, return_type))
1461    return expression
1462
1463
1464def date_delta_sql(name: str, cast: bool = False) -> t.Callable[[Generator, DATE_ADD_OR_DIFF], str]:
1465    def _delta_sql(self: Generator, expression: DATE_ADD_OR_DIFF) -> str:
1466        if cast and isinstance(expression, exp.TsOrDsAdd):
1467            expression = ts_or_ds_add_cast(expression)
1468
1469        return self.func(
1470            name,
1471            unit_to_var(expression),
1472            expression.expression,
1473            expression.this,
1474        )
1475
1476    return _delta_sql
1477
1478
1479def unit_to_str(expression: exp.Expression, default: str = "DAY") -> t.Optional[exp.Expression]:
1480    unit = expression.args.get("unit")
1481
1482    if isinstance(unit, exp.Placeholder):
1483        return unit
1484    if unit:
1485        return exp.Literal.string(unit.name)
1486    return exp.Literal.string(default) if default else None
1487
1488
1489def unit_to_var(expression: exp.Expression, default: str = "DAY") -> t.Optional[exp.Expression]:
1490    unit = expression.args.get("unit")
1491
1492    if isinstance(unit, (exp.Var, exp.Placeholder)):
1493        return unit
1494    return exp.Var(this=default) if default else None
1495
1496
1497@t.overload
1498def map_date_part(part: exp.Expression, dialect: DialectType = Dialect) -> exp.Var:
1499    pass
1500
1501
1502@t.overload
1503def map_date_part(
1504    part: t.Optional[exp.Expression], dialect: DialectType = Dialect
1505) -> t.Optional[exp.Expression]:
1506    pass
1507
1508
1509def map_date_part(part, dialect: DialectType = Dialect):
1510    mapped = (
1511        Dialect.get_or_raise(dialect).DATE_PART_MAPPING.get(part.name.upper()) if part else None
1512    )
1513    return exp.var(mapped) if mapped else part
1514
1515
1516def no_last_day_sql(self: Generator, expression: exp.LastDay) -> str:
1517    trunc_curr_date = exp.func("date_trunc", "month", expression.this)
1518    plus_one_month = exp.func("date_add", trunc_curr_date, 1, "month")
1519    minus_one_day = exp.func("date_sub", plus_one_month, 1, "day")
1520
1521    return self.sql(exp.cast(minus_one_day, exp.DataType.Type.DATE))
1522
1523
1524def merge_without_target_sql(self: Generator, expression: exp.Merge) -> str:
1525    """Remove table refs from columns in when statements."""
1526    alias = expression.this.args.get("alias")
1527
1528    def normalize(identifier: t.Optional[exp.Identifier]) -> t.Optional[str]:
1529        return self.dialect.normalize_identifier(identifier).name if identifier else None
1530
1531    targets = {normalize(expression.this.this)}
1532
1533    if alias:
1534        targets.add(normalize(alias.this))
1535
1536    for when in expression.expressions:
1537        # only remove the target names from the THEN clause
1538        # theyre still valid in the <condition> part of WHEN MATCHED / WHEN NOT MATCHED
1539        # ref: https://github.com/TobikoData/sqlmesh/issues/2934
1540        then = when.args.get("then")
1541        if then:
1542            then.transform(
1543                lambda node: (
1544                    exp.column(node.this)
1545                    if isinstance(node, exp.Column) and normalize(node.args.get("table")) in targets
1546                    else node
1547                ),
1548                copy=False,
1549            )
1550
1551    return self.merge_sql(expression)
1552
1553
1554def build_json_extract_path(
1555    expr_type: t.Type[F], zero_based_indexing: bool = True, arrow_req_json_type: bool = False
1556) -> t.Callable[[t.List], F]:
1557    def _builder(args: t.List) -> F:
1558        segments: t.List[exp.JSONPathPart] = [exp.JSONPathRoot()]
1559        for arg in args[1:]:
1560            if not isinstance(arg, exp.Literal):
1561                # We use the fallback parser because we can't really transpile non-literals safely
1562                return expr_type.from_arg_list(args)
1563
1564            text = arg.name
1565            if is_int(text):
1566                index = int(text)
1567                segments.append(
1568                    exp.JSONPathSubscript(this=index if zero_based_indexing else index - 1)
1569                )
1570            else:
1571                segments.append(exp.JSONPathKey(this=text))
1572
1573        # This is done to avoid failing in the expression validator due to the arg count
1574        del args[2:]
1575        return expr_type(
1576            this=seq_get(args, 0),
1577            expression=exp.JSONPath(expressions=segments),
1578            only_json_types=arrow_req_json_type,
1579        )
1580
1581    return _builder
1582
1583
1584def json_extract_segments(
1585    name: str, quoted_index: bool = True, op: t.Optional[str] = None
1586) -> t.Callable[[Generator, JSON_EXTRACT_TYPE], str]:
1587    def _json_extract_segments(self: Generator, expression: JSON_EXTRACT_TYPE) -> str:
1588        path = expression.expression
1589        if not isinstance(path, exp.JSONPath):
1590            return rename_func(name)(self, expression)
1591
1592        escape = path.args.get("escape")
1593
1594        segments = []
1595        for segment in path.expressions:
1596            path = self.sql(segment)
1597            if path:
1598                if isinstance(segment, exp.JSONPathPart) and (
1599                    quoted_index or not isinstance(segment, exp.JSONPathSubscript)
1600                ):
1601                    if escape:
1602                        path = self.escape_str(path)
1603
1604                    path = f"{self.dialect.QUOTE_START}{path}{self.dialect.QUOTE_END}"
1605
1606                segments.append(path)
1607
1608        if op:
1609            return f" {op} ".join([self.sql(expression.this), *segments])
1610        return self.func(name, expression.this, *segments)
1611
1612    return _json_extract_segments
1613
1614
1615def json_path_key_only_name(self: Generator, expression: exp.JSONPathKey) -> str:
1616    if isinstance(expression.this, exp.JSONPathWildcard):
1617        self.unsupported("Unsupported wildcard in JSONPathKey expression")
1618
1619    return expression.name
1620
1621
1622def filter_array_using_unnest(self: Generator, expression: exp.ArrayFilter) -> str:
1623    cond = expression.expression
1624    if isinstance(cond, exp.Lambda) and len(cond.expressions) == 1:
1625        alias = cond.expressions[0]
1626        cond = cond.this
1627    elif isinstance(cond, exp.Predicate):
1628        alias = "_u"
1629    else:
1630        self.unsupported("Unsupported filter condition")
1631        return ""
1632
1633    unnest = exp.Unnest(expressions=[expression.this])
1634    filtered = exp.select(alias).from_(exp.alias_(unnest, None, table=[alias])).where(cond)
1635    return self.sql(exp.Array(expressions=[filtered]))
1636
1637
1638def to_number_with_nls_param(self: Generator, expression: exp.ToNumber) -> str:
1639    return self.func(
1640        "TO_NUMBER",
1641        expression.this,
1642        expression.args.get("format"),
1643        expression.args.get("nlsparam"),
1644    )
1645
1646
1647def build_default_decimal_type(
1648    precision: t.Optional[int] = None, scale: t.Optional[int] = None
1649) -> t.Callable[[exp.DataType], exp.DataType]:
1650    def _builder(dtype: exp.DataType) -> exp.DataType:
1651        if dtype.expressions or precision is None:
1652            return dtype
1653
1654        params = f"{precision}{f', {scale}' if scale is not None else ''}"
1655        return exp.DataType.build(f"DECIMAL({params})")
1656
1657    return _builder
1658
1659
1660def build_timestamp_from_parts(args: t.List) -> exp.Func:
1661    if len(args) == 2:
1662        # Other dialects don't have the TIMESTAMP_FROM_PARTS(date, time) concept,
1663        # so we parse this into Anonymous for now instead of introducing complexity
1664        return exp.Anonymous(this="TIMESTAMP_FROM_PARTS", expressions=args)
1665
1666    return exp.TimestampFromParts.from_arg_list(args)
1667
1668
1669def sha256_sql(self: Generator, expression: exp.SHA2) -> str:
1670    return self.func(f"SHA{expression.text('length') or '256'}", expression.this)
1671
1672
1673def sequence_sql(self: Generator, expression: exp.GenerateSeries | exp.GenerateDateArray) -> str:
1674    start = expression.args.get("start")
1675    end = expression.args.get("end")
1676    step = expression.args.get("step")
1677
1678    if isinstance(start, exp.Cast):
1679        target_type = start.to
1680    elif isinstance(end, exp.Cast):
1681        target_type = end.to
1682    else:
1683        target_type = None
1684
1685    if start and end and target_type and target_type.is_type("date", "timestamp"):
1686        if isinstance(start, exp.Cast) and target_type is start.to:
1687            end = exp.cast(end, target_type)
1688        else:
1689            start = exp.cast(start, target_type)
1690
1691    return self.func("SEQUENCE", start, end, step)
1692
1693
1694def build_regexp_extract(args: t.List, dialect: Dialect) -> exp.RegexpExtract:
1695    return exp.RegexpExtract(
1696        this=seq_get(args, 0),
1697        expression=seq_get(args, 1),
1698        group=seq_get(args, 2) or exp.Literal.number(dialect.REGEXP_EXTRACT_DEFAULT_GROUP),
1699    )
logger = <Logger sqlglot (WARNING)>
UNESCAPED_SEQUENCES = {'\\a': '\x07', '\\b': '\x08', '\\f': '\x0c', '\\n': '\n', '\\r': '\r', '\\t': '\t', '\\v': '\x0b', '\\\\': '\\'}
class Dialects(builtins.str, enum.Enum):
49class Dialects(str, Enum):
50    """Dialects supported by SQLGLot."""
51
52    DIALECT = ""
53
54    ATHENA = "athena"
55    BIGQUERY = "bigquery"
56    CLICKHOUSE = "clickhouse"
57    DATABRICKS = "databricks"
58    DORIS = "doris"
59    DRILL = "drill"
60    DUCKDB = "duckdb"
61    HIVE = "hive"
62    MATERIALIZE = "materialize"
63    MYSQL = "mysql"
64    ORACLE = "oracle"
65    POSTGRES = "postgres"
66    PRESTO = "presto"
67    PRQL = "prql"
68    REDSHIFT = "redshift"
69    RISINGWAVE = "risingwave"
70    SNOWFLAKE = "snowflake"
71    SPARK = "spark"
72    SPARK2 = "spark2"
73    SQLITE = "sqlite"
74    STARROCKS = "starrocks"
75    TABLEAU = "tableau"
76    TERADATA = "teradata"
77    TRINO = "trino"
78    TSQL = "tsql"

Dialects supported by SQLGLot.

DIALECT = <Dialects.DIALECT: ''>
ATHENA = <Dialects.ATHENA: 'athena'>
BIGQUERY = <Dialects.BIGQUERY: 'bigquery'>
CLICKHOUSE = <Dialects.CLICKHOUSE: 'clickhouse'>
DATABRICKS = <Dialects.DATABRICKS: 'databricks'>
DORIS = <Dialects.DORIS: 'doris'>
DRILL = <Dialects.DRILL: 'drill'>
DUCKDB = <Dialects.DUCKDB: 'duckdb'>
HIVE = <Dialects.HIVE: 'hive'>
MATERIALIZE = <Dialects.MATERIALIZE: 'materialize'>
MYSQL = <Dialects.MYSQL: 'mysql'>
ORACLE = <Dialects.ORACLE: 'oracle'>
POSTGRES = <Dialects.POSTGRES: 'postgres'>
PRESTO = <Dialects.PRESTO: 'presto'>
PRQL = <Dialects.PRQL: 'prql'>
REDSHIFT = <Dialects.REDSHIFT: 'redshift'>
RISINGWAVE = <Dialects.RISINGWAVE: 'risingwave'>
SNOWFLAKE = <Dialects.SNOWFLAKE: 'snowflake'>
SPARK = <Dialects.SPARK: 'spark'>
SPARK2 = <Dialects.SPARK2: 'spark2'>
SQLITE = <Dialects.SQLITE: 'sqlite'>
STARROCKS = <Dialects.STARROCKS: 'starrocks'>
TABLEAU = <Dialects.TABLEAU: 'tableau'>
TERADATA = <Dialects.TERADATA: 'teradata'>
TRINO = <Dialects.TRINO: 'trino'>
TSQL = <Dialects.TSQL: 'tsql'>
class NormalizationStrategy(builtins.str, sqlglot.helper.AutoName):
81class NormalizationStrategy(str, AutoName):
82    """Specifies the strategy according to which identifiers should be normalized."""
83
84    LOWERCASE = auto()
85    """Unquoted identifiers are lowercased."""
86
87    UPPERCASE = auto()
88    """Unquoted identifiers are uppercased."""
89
90    CASE_SENSITIVE = auto()
91    """Always case-sensitive, regardless of quotes."""
92
93    CASE_INSENSITIVE = auto()
94    """Always case-insensitive, regardless of quotes."""

Specifies the strategy according to which identifiers should be normalized.

LOWERCASE = <NormalizationStrategy.LOWERCASE: 'LOWERCASE'>

Unquoted identifiers are lowercased.

UPPERCASE = <NormalizationStrategy.UPPERCASE: 'UPPERCASE'>

Unquoted identifiers are uppercased.

CASE_SENSITIVE = <NormalizationStrategy.CASE_SENSITIVE: 'CASE_SENSITIVE'>

Always case-sensitive, regardless of quotes.

CASE_INSENSITIVE = <NormalizationStrategy.CASE_INSENSITIVE: 'CASE_INSENSITIVE'>

Always case-insensitive, regardless of quotes.

class Dialect:
219class Dialect(metaclass=_Dialect):
220    INDEX_OFFSET = 0
221    """The base index offset for arrays."""
222
223    WEEK_OFFSET = 0
224    """First day of the week in DATE_TRUNC(week). Defaults to 0 (Monday). -1 would be Sunday."""
225
226    UNNEST_COLUMN_ONLY = False
227    """Whether `UNNEST` table aliases are treated as column aliases."""
228
229    ALIAS_POST_TABLESAMPLE = False
230    """Whether the table alias comes after tablesample."""
231
232    TABLESAMPLE_SIZE_IS_PERCENT = False
233    """Whether a size in the table sample clause represents percentage."""
234
235    NORMALIZATION_STRATEGY = NormalizationStrategy.LOWERCASE
236    """Specifies the strategy according to which identifiers should be normalized."""
237
238    IDENTIFIERS_CAN_START_WITH_DIGIT = False
239    """Whether an unquoted identifier can start with a digit."""
240
241    DPIPE_IS_STRING_CONCAT = True
242    """Whether the DPIPE token (`||`) is a string concatenation operator."""
243
244    STRICT_STRING_CONCAT = False
245    """Whether `CONCAT`'s arguments must be strings."""
246
247    SUPPORTS_USER_DEFINED_TYPES = True
248    """Whether user-defined data types are supported."""
249
250    SUPPORTS_SEMI_ANTI_JOIN = True
251    """Whether `SEMI` or `ANTI` joins are supported."""
252
253    SUPPORTS_COLUMN_JOIN_MARKS = False
254    """Whether the old-style outer join (+) syntax is supported."""
255
256    COPY_PARAMS_ARE_CSV = True
257    """Separator of COPY statement parameters."""
258
259    NORMALIZE_FUNCTIONS: bool | str = "upper"
260    """
261    Determines how function names are going to be normalized.
262    Possible values:
263        "upper" or True: Convert names to uppercase.
264        "lower": Convert names to lowercase.
265        False: Disables function name normalization.
266    """
267
268    LOG_BASE_FIRST: t.Optional[bool] = True
269    """
270    Whether the base comes first in the `LOG` function.
271    Possible values: `True`, `False`, `None` (two arguments are not supported by `LOG`)
272    """
273
274    NULL_ORDERING = "nulls_are_small"
275    """
276    Default `NULL` ordering method to use if not explicitly set.
277    Possible values: `"nulls_are_small"`, `"nulls_are_large"`, `"nulls_are_last"`
278    """
279
280    TYPED_DIVISION = False
281    """
282    Whether the behavior of `a / b` depends on the types of `a` and `b`.
283    False means `a / b` is always float division.
284    True means `a / b` is integer division if both `a` and `b` are integers.
285    """
286
287    SAFE_DIVISION = False
288    """Whether division by zero throws an error (`False`) or returns NULL (`True`)."""
289
290    CONCAT_COALESCE = False
291    """A `NULL` arg in `CONCAT` yields `NULL` by default, but in some dialects it yields an empty string."""
292
293    HEX_LOWERCASE = False
294    """Whether the `HEX` function returns a lowercase hexadecimal string."""
295
296    DATE_FORMAT = "'%Y-%m-%d'"
297    DATEINT_FORMAT = "'%Y%m%d'"
298    TIME_FORMAT = "'%Y-%m-%d %H:%M:%S'"
299
300    TIME_MAPPING: t.Dict[str, str] = {}
301    """Associates this dialect's time formats with their equivalent Python `strftime` formats."""
302
303    # https://cloud.google.com/bigquery/docs/reference/standard-sql/format-elements#format_model_rules_date_time
304    # https://docs.teradata.com/r/Teradata-Database-SQL-Functions-Operators-Expressions-and-Predicates/March-2017/Data-Type-Conversions/Character-to-DATE-Conversion/Forcing-a-FORMAT-on-CAST-for-Converting-Character-to-DATE
305    FORMAT_MAPPING: t.Dict[str, str] = {}
306    """
307    Helper which is used for parsing the special syntax `CAST(x AS DATE FORMAT 'yyyy')`.
308    If empty, the corresponding trie will be constructed off of `TIME_MAPPING`.
309    """
310
311    UNESCAPED_SEQUENCES: t.Dict[str, str] = {}
312    """Mapping of an escaped sequence (`\\n`) to its unescaped version (`\n`)."""
313
314    PSEUDOCOLUMNS: t.Set[str] = set()
315    """
316    Columns that are auto-generated by the engine corresponding to this dialect.
317    For example, such columns may be excluded from `SELECT *` queries.
318    """
319
320    PREFER_CTE_ALIAS_COLUMN = False
321    """
322    Some dialects, such as Snowflake, allow you to reference a CTE column alias in the
323    HAVING clause of the CTE. This flag will cause the CTE alias columns to override
324    any projection aliases in the subquery.
325
326    For example,
327        WITH y(c) AS (
328            SELECT SUM(a) FROM (SELECT 1 a) AS x HAVING c > 0
329        ) SELECT c FROM y;
330
331        will be rewritten as
332
333        WITH y(c) AS (
334            SELECT SUM(a) AS c FROM (SELECT 1 AS a) AS x HAVING c > 0
335        ) SELECT c FROM y;
336    """
337
338    COPY_PARAMS_ARE_CSV = True
339    """
340    Whether COPY statement parameters are separated by comma or whitespace
341    """
342
343    FORCE_EARLY_ALIAS_REF_EXPANSION = False
344    """
345    Whether alias reference expansion (_expand_alias_refs()) should run before column qualification (_qualify_columns()).
346
347    For example:
348        WITH data AS (
349        SELECT
350            1 AS id,
351            2 AS my_id
352        )
353        SELECT
354            id AS my_id
355        FROM
356            data
357        WHERE
358            my_id = 1
359        GROUP BY
360            my_id,
361        HAVING
362            my_id = 1
363
364    In most dialects, "my_id" would refer to "data.my_id" across the query, except:
365        - BigQuery, which will forward the alias to GROUP BY + HAVING clauses i.e
366          it resolves to "WHERE my_id = 1 GROUP BY id HAVING id = 1"
367        - Clickhouse, which will forward the alias across the query i.e it resolves
368        to "WHERE id = 1 GROUP BY id HAVING id = 1"
369    """
370
371    EXPAND_ALIAS_REFS_EARLY_ONLY_IN_GROUP_BY = False
372    """Whether alias reference expansion before qualification should only happen for the GROUP BY clause."""
373
374    SUPPORTS_ORDER_BY_ALL = False
375    """
376    Whether ORDER BY ALL is supported (expands to all the selected columns) as in DuckDB, Spark3/Databricks
377    """
378
379    HAS_DISTINCT_ARRAY_CONSTRUCTORS = False
380    """
381    Whether the ARRAY constructor is context-sensitive, i.e in Redshift ARRAY[1, 2, 3] != ARRAY(1, 2, 3)
382    as the former is of type INT[] vs the latter which is SUPER
383    """
384
385    SUPPORTS_FIXED_SIZE_ARRAYS = False
386    """
387    Whether expressions such as x::INT[5] should be parsed as fixed-size array defs/casts e.g.
388    in DuckDB. In dialects which don't support fixed size arrays such as Snowflake, this should
389    be interpreted as a subscript/index operator.
390    """
391
392    STRICT_JSON_PATH_SYNTAX = True
393    """Whether failing to parse a JSON path expression using the JSONPath dialect will log a warning."""
394
395    ON_CONDITION_EMPTY_BEFORE_ERROR = True
396    """Whether "X ON EMPTY" should come before "X ON ERROR" (for dialects like T-SQL, MySQL, Oracle)."""
397
398    ARRAY_AGG_INCLUDES_NULLS: t.Optional[bool] = True
399    """Whether ArrayAgg needs to filter NULL values."""
400
401    REGEXP_EXTRACT_DEFAULT_GROUP = 0
402    """The default value for the capturing group."""
403
404    SET_OP_DISTINCT_BY_DEFAULT: t.Dict[t.Type[exp.Expression], t.Optional[bool]] = {
405        exp.Except: True,
406        exp.Intersect: True,
407        exp.Union: True,
408    }
409    """
410    Whether a set operation uses DISTINCT by default. This is `None` when either `DISTINCT` or `ALL`
411    must be explicitly specified.
412    """
413
414    CREATABLE_KIND_MAPPING: dict[str, str] = {}
415    """
416    Helper for dialects that use a different name for the same creatable kind. For example, the Clickhouse
417    equivalent of CREATE SCHEMA is CREATE DATABASE.
418    """
419
420    # --- Autofilled ---
421
422    tokenizer_class = Tokenizer
423    jsonpath_tokenizer_class = JSONPathTokenizer
424    parser_class = Parser
425    generator_class = Generator
426
427    # A trie of the time_mapping keys
428    TIME_TRIE: t.Dict = {}
429    FORMAT_TRIE: t.Dict = {}
430
431    INVERSE_TIME_MAPPING: t.Dict[str, str] = {}
432    INVERSE_TIME_TRIE: t.Dict = {}
433    INVERSE_FORMAT_MAPPING: t.Dict[str, str] = {}
434    INVERSE_FORMAT_TRIE: t.Dict = {}
435
436    INVERSE_CREATABLE_KIND_MAPPING: dict[str, str] = {}
437
438    ESCAPED_SEQUENCES: t.Dict[str, str] = {}
439
440    # Delimiters for string literals and identifiers
441    QUOTE_START = "'"
442    QUOTE_END = "'"
443    IDENTIFIER_START = '"'
444    IDENTIFIER_END = '"'
445
446    # Delimiters for bit, hex, byte and unicode literals
447    BIT_START: t.Optional[str] = None
448    BIT_END: t.Optional[str] = None
449    HEX_START: t.Optional[str] = None
450    HEX_END: t.Optional[str] = None
451    BYTE_START: t.Optional[str] = None
452    BYTE_END: t.Optional[str] = None
453    UNICODE_START: t.Optional[str] = None
454    UNICODE_END: t.Optional[str] = None
455
456    DATE_PART_MAPPING = {
457        "Y": "YEAR",
458        "YY": "YEAR",
459        "YYY": "YEAR",
460        "YYYY": "YEAR",
461        "YR": "YEAR",
462        "YEARS": "YEAR",
463        "YRS": "YEAR",
464        "MM": "MONTH",
465        "MON": "MONTH",
466        "MONS": "MONTH",
467        "MONTHS": "MONTH",
468        "D": "DAY",
469        "DD": "DAY",
470        "DAYS": "DAY",
471        "DAYOFMONTH": "DAY",
472        "DAY OF WEEK": "DAYOFWEEK",
473        "WEEKDAY": "DAYOFWEEK",
474        "DOW": "DAYOFWEEK",
475        "DW": "DAYOFWEEK",
476        "WEEKDAY_ISO": "DAYOFWEEKISO",
477        "DOW_ISO": "DAYOFWEEKISO",
478        "DW_ISO": "DAYOFWEEKISO",
479        "DAY OF YEAR": "DAYOFYEAR",
480        "DOY": "DAYOFYEAR",
481        "DY": "DAYOFYEAR",
482        "W": "WEEK",
483        "WK": "WEEK",
484        "WEEKOFYEAR": "WEEK",
485        "WOY": "WEEK",
486        "WY": "WEEK",
487        "WEEK_ISO": "WEEKISO",
488        "WEEKOFYEARISO": "WEEKISO",
489        "WEEKOFYEAR_ISO": "WEEKISO",
490        "Q": "QUARTER",
491        "QTR": "QUARTER",
492        "QTRS": "QUARTER",
493        "QUARTERS": "QUARTER",
494        "H": "HOUR",
495        "HH": "HOUR",
496        "HR": "HOUR",
497        "HOURS": "HOUR",
498        "HRS": "HOUR",
499        "M": "MINUTE",
500        "MI": "MINUTE",
501        "MIN": "MINUTE",
502        "MINUTES": "MINUTE",
503        "MINS": "MINUTE",
504        "S": "SECOND",
505        "SEC": "SECOND",
506        "SECONDS": "SECOND",
507        "SECS": "SECOND",
508        "MS": "MILLISECOND",
509        "MSEC": "MILLISECOND",
510        "MSECS": "MILLISECOND",
511        "MSECOND": "MILLISECOND",
512        "MSECONDS": "MILLISECOND",
513        "MILLISEC": "MILLISECOND",
514        "MILLISECS": "MILLISECOND",
515        "MILLISECON": "MILLISECOND",
516        "MILLISECONDS": "MILLISECOND",
517        "US": "MICROSECOND",
518        "USEC": "MICROSECOND",
519        "USECS": "MICROSECOND",
520        "MICROSEC": "MICROSECOND",
521        "MICROSECS": "MICROSECOND",
522        "USECOND": "MICROSECOND",
523        "USECONDS": "MICROSECOND",
524        "MICROSECONDS": "MICROSECOND",
525        "NS": "NANOSECOND",
526        "NSEC": "NANOSECOND",
527        "NANOSEC": "NANOSECOND",
528        "NSECOND": "NANOSECOND",
529        "NSECONDS": "NANOSECOND",
530        "NANOSECS": "NANOSECOND",
531        "EPOCH_SECOND": "EPOCH",
532        "EPOCH_SECONDS": "EPOCH",
533        "EPOCH_MILLISECONDS": "EPOCH_MILLISECOND",
534        "EPOCH_MICROSECONDS": "EPOCH_MICROSECOND",
535        "EPOCH_NANOSECONDS": "EPOCH_NANOSECOND",
536        "TZH": "TIMEZONE_HOUR",
537        "TZM": "TIMEZONE_MINUTE",
538        "DEC": "DECADE",
539        "DECS": "DECADE",
540        "DECADES": "DECADE",
541        "MIL": "MILLENIUM",
542        "MILS": "MILLENIUM",
543        "MILLENIA": "MILLENIUM",
544        "C": "CENTURY",
545        "CENT": "CENTURY",
546        "CENTS": "CENTURY",
547        "CENTURIES": "CENTURY",
548    }
549
550    TYPE_TO_EXPRESSIONS: t.Dict[exp.DataType.Type, t.Set[t.Type[exp.Expression]]] = {
551        exp.DataType.Type.BIGINT: {
552            exp.ApproxDistinct,
553            exp.ArraySize,
554            exp.Length,
555        },
556        exp.DataType.Type.BOOLEAN: {
557            exp.Between,
558            exp.Boolean,
559            exp.In,
560            exp.RegexpLike,
561        },
562        exp.DataType.Type.DATE: {
563            exp.CurrentDate,
564            exp.Date,
565            exp.DateFromParts,
566            exp.DateStrToDate,
567            exp.DiToDate,
568            exp.StrToDate,
569            exp.TimeStrToDate,
570            exp.TsOrDsToDate,
571        },
572        exp.DataType.Type.DATETIME: {
573            exp.CurrentDatetime,
574            exp.Datetime,
575            exp.DatetimeAdd,
576            exp.DatetimeSub,
577        },
578        exp.DataType.Type.DOUBLE: {
579            exp.ApproxQuantile,
580            exp.Avg,
581            exp.Exp,
582            exp.Ln,
583            exp.Log,
584            exp.Pow,
585            exp.Quantile,
586            exp.Round,
587            exp.SafeDivide,
588            exp.Sqrt,
589            exp.Stddev,
590            exp.StddevPop,
591            exp.StddevSamp,
592            exp.Variance,
593            exp.VariancePop,
594        },
595        exp.DataType.Type.INT: {
596            exp.Ceil,
597            exp.DatetimeDiff,
598            exp.DateDiff,
599            exp.TimestampDiff,
600            exp.TimeDiff,
601            exp.DateToDi,
602            exp.Levenshtein,
603            exp.Sign,
604            exp.StrPosition,
605            exp.TsOrDiToDi,
606        },
607        exp.DataType.Type.JSON: {
608            exp.ParseJSON,
609        },
610        exp.DataType.Type.TIME: {
611            exp.Time,
612        },
613        exp.DataType.Type.TIMESTAMP: {
614            exp.CurrentTime,
615            exp.CurrentTimestamp,
616            exp.StrToTime,
617            exp.TimeAdd,
618            exp.TimeStrToTime,
619            exp.TimeSub,
620            exp.TimestampAdd,
621            exp.TimestampSub,
622            exp.UnixToTime,
623        },
624        exp.DataType.Type.TINYINT: {
625            exp.Day,
626            exp.Month,
627            exp.Week,
628            exp.Year,
629            exp.Quarter,
630        },
631        exp.DataType.Type.VARCHAR: {
632            exp.ArrayConcat,
633            exp.Concat,
634            exp.ConcatWs,
635            exp.DateToDateStr,
636            exp.GroupConcat,
637            exp.Initcap,
638            exp.Lower,
639            exp.Substring,
640            exp.TimeToStr,
641            exp.TimeToTimeStr,
642            exp.Trim,
643            exp.TsOrDsToDateStr,
644            exp.UnixToStr,
645            exp.UnixToTimeStr,
646            exp.Upper,
647        },
648    }
649
650    ANNOTATORS: AnnotatorsType = {
651        **{
652            expr_type: lambda self, e: self._annotate_unary(e)
653            for expr_type in subclasses(exp.__name__, (exp.Unary, exp.Alias))
654        },
655        **{
656            expr_type: lambda self, e: self._annotate_binary(e)
657            for expr_type in subclasses(exp.__name__, exp.Binary)
658        },
659        **{
660            expr_type: _annotate_with_type_lambda(data_type)
661            for data_type, expressions in TYPE_TO_EXPRESSIONS.items()
662            for expr_type in expressions
663        },
664        exp.Abs: lambda self, e: self._annotate_by_args(e, "this"),
665        exp.Anonymous: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.UNKNOWN),
666        exp.Array: lambda self, e: self._annotate_by_args(e, "expressions", array=True),
667        exp.ArrayAgg: lambda self, e: self._annotate_by_args(e, "this", array=True),
668        exp.ArrayConcat: lambda self, e: self._annotate_by_args(e, "this", "expressions"),
669        exp.Bracket: lambda self, e: self._annotate_bracket(e),
670        exp.Cast: lambda self, e: self._annotate_with_type(e, e.args["to"]),
671        exp.Case: lambda self, e: self._annotate_by_args(e, "default", "ifs"),
672        exp.Coalesce: lambda self, e: self._annotate_by_args(e, "this", "expressions"),
673        exp.Count: lambda self, e: self._annotate_with_type(
674            e, exp.DataType.Type.BIGINT if e.args.get("big_int") else exp.DataType.Type.INT
675        ),
676        exp.DataType: lambda self, e: self._annotate_with_type(e, e.copy()),
677        exp.DateAdd: lambda self, e: self._annotate_timeunit(e),
678        exp.DateSub: lambda self, e: self._annotate_timeunit(e),
679        exp.DateTrunc: lambda self, e: self._annotate_timeunit(e),
680        exp.Distinct: lambda self, e: self._annotate_by_args(e, "expressions"),
681        exp.Div: lambda self, e: self._annotate_div(e),
682        exp.Dot: lambda self, e: self._annotate_dot(e),
683        exp.Explode: lambda self, e: self._annotate_explode(e),
684        exp.Extract: lambda self, e: self._annotate_extract(e),
685        exp.Filter: lambda self, e: self._annotate_by_args(e, "this"),
686        exp.GenerateDateArray: lambda self, e: self._annotate_with_type(
687            e, exp.DataType.build("ARRAY<DATE>")
688        ),
689        exp.GenerateTimestampArray: lambda self, e: self._annotate_with_type(
690            e, exp.DataType.build("ARRAY<TIMESTAMP>")
691        ),
692        exp.Greatest: lambda self, e: self._annotate_by_args(e, "this", "expressions"),
693        exp.If: lambda self, e: self._annotate_by_args(e, "true", "false"),
694        exp.Interval: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.INTERVAL),
695        exp.Least: lambda self, e: self._annotate_by_args(e, "this", "expressions"),
696        exp.Literal: lambda self, e: self._annotate_literal(e),
697        exp.Map: lambda self, e: self._annotate_map(e),
698        exp.Max: lambda self, e: self._annotate_by_args(e, "this", "expressions"),
699        exp.Min: lambda self, e: self._annotate_by_args(e, "this", "expressions"),
700        exp.Null: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.NULL),
701        exp.Nullif: lambda self, e: self._annotate_by_args(e, "this", "expression"),
702        exp.PropertyEQ: lambda self, e: self._annotate_by_args(e, "expression"),
703        exp.Slice: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.UNKNOWN),
704        exp.Struct: lambda self, e: self._annotate_struct(e),
705        exp.Sum: lambda self, e: self._annotate_by_args(e, "this", "expressions", promote=True),
706        exp.Timestamp: lambda self, e: self._annotate_with_type(
707            e,
708            exp.DataType.Type.TIMESTAMPTZ if e.args.get("with_tz") else exp.DataType.Type.TIMESTAMP,
709        ),
710        exp.ToMap: lambda self, e: self._annotate_to_map(e),
711        exp.TryCast: lambda self, e: self._annotate_with_type(e, e.args["to"]),
712        exp.Unnest: lambda self, e: self._annotate_unnest(e),
713        exp.VarMap: lambda self, e: self._annotate_map(e),
714    }
715
716    @classmethod
717    def get_or_raise(cls, dialect: DialectType) -> Dialect:
718        """
719        Look up a dialect in the global dialect registry and return it if it exists.
720
721        Args:
722            dialect: The target dialect. If this is a string, it can be optionally followed by
723                additional key-value pairs that are separated by commas and are used to specify
724                dialect settings, such as whether the dialect's identifiers are case-sensitive.
725
726        Example:
727            >>> dialect = dialect_class = get_or_raise("duckdb")
728            >>> dialect = get_or_raise("mysql, normalization_strategy = case_sensitive")
729
730        Returns:
731            The corresponding Dialect instance.
732        """
733
734        if not dialect:
735            return cls()
736        if isinstance(dialect, _Dialect):
737            return dialect()
738        if isinstance(dialect, Dialect):
739            return dialect
740        if isinstance(dialect, str):
741            try:
742                dialect_name, *kv_strings = dialect.split(",")
743                kv_pairs = (kv.split("=") for kv in kv_strings)
744                kwargs = {}
745                for pair in kv_pairs:
746                    key = pair[0].strip()
747                    value: t.Union[bool | str | None] = None
748
749                    if len(pair) == 1:
750                        # Default initialize standalone settings to True
751                        value = True
752                    elif len(pair) == 2:
753                        value = pair[1].strip()
754
755                        # Coerce the value to boolean if it matches to the truthy/falsy values below
756                        value_lower = value.lower()
757                        if value_lower in ("true", "1"):
758                            value = True
759                        elif value_lower in ("false", "0"):
760                            value = False
761
762                    kwargs[key] = value
763
764            except ValueError:
765                raise ValueError(
766                    f"Invalid dialect format: '{dialect}'. "
767                    "Please use the correct format: 'dialect [, k1 = v2 [, ...]]'."
768                )
769
770            result = cls.get(dialect_name.strip())
771            if not result:
772                from difflib import get_close_matches
773
774                similar = seq_get(get_close_matches(dialect_name, cls.classes, n=1), 0) or ""
775                if similar:
776                    similar = f" Did you mean {similar}?"
777
778                raise ValueError(f"Unknown dialect '{dialect_name}'.{similar}")
779
780            return result(**kwargs)
781
782        raise ValueError(f"Invalid dialect type for '{dialect}': '{type(dialect)}'.")
783
784    @classmethod
785    def format_time(
786        cls, expression: t.Optional[str | exp.Expression]
787    ) -> t.Optional[exp.Expression]:
788        """Converts a time format in this dialect to its equivalent Python `strftime` format."""
789        if isinstance(expression, str):
790            return exp.Literal.string(
791                # the time formats are quoted
792                format_time(expression[1:-1], cls.TIME_MAPPING, cls.TIME_TRIE)
793            )
794
795        if expression and expression.is_string:
796            return exp.Literal.string(format_time(expression.this, cls.TIME_MAPPING, cls.TIME_TRIE))
797
798        return expression
799
800    def __init__(self, **kwargs) -> None:
801        normalization_strategy = kwargs.pop("normalization_strategy", None)
802
803        if normalization_strategy is None:
804            self.normalization_strategy = self.NORMALIZATION_STRATEGY
805        else:
806            self.normalization_strategy = NormalizationStrategy(normalization_strategy.upper())
807
808        self.settings = kwargs
809
810    def __eq__(self, other: t.Any) -> bool:
811        # Does not currently take dialect state into account
812        return type(self) == other
813
814    def __hash__(self) -> int:
815        # Does not currently take dialect state into account
816        return hash(type(self))
817
818    def normalize_identifier(self, expression: E) -> E:
819        """
820        Transforms an identifier in a way that resembles how it'd be resolved by this dialect.
821
822        For example, an identifier like `FoO` would be resolved as `foo` in Postgres, because it
823        lowercases all unquoted identifiers. On the other hand, Snowflake uppercases them, so
824        it would resolve it as `FOO`. If it was quoted, it'd need to be treated as case-sensitive,
825        and so any normalization would be prohibited in order to avoid "breaking" the identifier.
826
827        There are also dialects like Spark, which are case-insensitive even when quotes are
828        present, and dialects like MySQL, whose resolution rules match those employed by the
829        underlying operating system, for example they may always be case-sensitive in Linux.
830
831        Finally, the normalization behavior of some engines can even be controlled through flags,
832        like in Redshift's case, where users can explicitly set enable_case_sensitive_identifier.
833
834        SQLGlot aims to understand and handle all of these different behaviors gracefully, so
835        that it can analyze queries in the optimizer and successfully capture their semantics.
836        """
837        if (
838            isinstance(expression, exp.Identifier)
839            and self.normalization_strategy is not NormalizationStrategy.CASE_SENSITIVE
840            and (
841                not expression.quoted
842                or self.normalization_strategy is NormalizationStrategy.CASE_INSENSITIVE
843            )
844        ):
845            expression.set(
846                "this",
847                (
848                    expression.this.upper()
849                    if self.normalization_strategy is NormalizationStrategy.UPPERCASE
850                    else expression.this.lower()
851                ),
852            )
853
854        return expression
855
856    def case_sensitive(self, text: str) -> bool:
857        """Checks if text contains any case sensitive characters, based on the dialect's rules."""
858        if self.normalization_strategy is NormalizationStrategy.CASE_INSENSITIVE:
859            return False
860
861        unsafe = (
862            str.islower
863            if self.normalization_strategy is NormalizationStrategy.UPPERCASE
864            else str.isupper
865        )
866        return any(unsafe(char) for char in text)
867
868    def can_identify(self, text: str, identify: str | bool = "safe") -> bool:
869        """Checks if text can be identified given an identify option.
870
871        Args:
872            text: The text to check.
873            identify:
874                `"always"` or `True`: Always returns `True`.
875                `"safe"`: Only returns `True` if the identifier is case-insensitive.
876
877        Returns:
878            Whether the given text can be identified.
879        """
880        if identify is True or identify == "always":
881            return True
882
883        if identify == "safe":
884            return not self.case_sensitive(text)
885
886        return False
887
888    def quote_identifier(self, expression: E, identify: bool = True) -> E:
889        """
890        Adds quotes to a given identifier.
891
892        Args:
893            expression: The expression of interest. If it's not an `Identifier`, this method is a no-op.
894            identify: If set to `False`, the quotes will only be added if the identifier is deemed
895                "unsafe", with respect to its characters and this dialect's normalization strategy.
896        """
897        if isinstance(expression, exp.Identifier) and not isinstance(expression.parent, exp.Func):
898            name = expression.this
899            expression.set(
900                "quoted",
901                identify or self.case_sensitive(name) or not exp.SAFE_IDENTIFIER_RE.match(name),
902            )
903
904        return expression
905
906    def to_json_path(self, path: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]:
907        if isinstance(path, exp.Literal):
908            path_text = path.name
909            if path.is_number:
910                path_text = f"[{path_text}]"
911            try:
912                return parse_json_path(path_text, self)
913            except ParseError as e:
914                if self.STRICT_JSON_PATH_SYNTAX:
915                    logger.warning(f"Invalid JSON path syntax. {str(e)}")
916
917        return path
918
919    def parse(self, sql: str, **opts) -> t.List[t.Optional[exp.Expression]]:
920        return self.parser(**opts).parse(self.tokenize(sql), sql)
921
922    def parse_into(
923        self, expression_type: exp.IntoType, sql: str, **opts
924    ) -> t.List[t.Optional[exp.Expression]]:
925        return self.parser(**opts).parse_into(expression_type, self.tokenize(sql), sql)
926
927    def generate(self, expression: exp.Expression, copy: bool = True, **opts) -> str:
928        return self.generator(**opts).generate(expression, copy=copy)
929
930    def transpile(self, sql: str, **opts) -> t.List[str]:
931        return [
932            self.generate(expression, copy=False, **opts) if expression else ""
933            for expression in self.parse(sql)
934        ]
935
936    def tokenize(self, sql: str) -> t.List[Token]:
937        return self.tokenizer.tokenize(sql)
938
939    @property
940    def tokenizer(self) -> Tokenizer:
941        return self.tokenizer_class(dialect=self)
942
943    @property
944    def jsonpath_tokenizer(self) -> JSONPathTokenizer:
945        return self.jsonpath_tokenizer_class(dialect=self)
946
947    def parser(self, **opts) -> Parser:
948        return self.parser_class(dialect=self, **opts)
949
950    def generator(self, **opts) -> Generator:
951        return self.generator_class(dialect=self, **opts)
Dialect(**kwargs)
800    def __init__(self, **kwargs) -> None:
801        normalization_strategy = kwargs.pop("normalization_strategy", None)
802
803        if normalization_strategy is None:
804            self.normalization_strategy = self.NORMALIZATION_STRATEGY
805        else:
806            self.normalization_strategy = NormalizationStrategy(normalization_strategy.upper())
807
808        self.settings = kwargs
INDEX_OFFSET = 0

The base index offset for arrays.

WEEK_OFFSET = 0

First day of the week in DATE_TRUNC(week). Defaults to 0 (Monday). -1 would be Sunday.

UNNEST_COLUMN_ONLY = False

Whether UNNEST table aliases are treated as column aliases.

ALIAS_POST_TABLESAMPLE = False

Whether the table alias comes after tablesample.

TABLESAMPLE_SIZE_IS_PERCENT = False

Whether a size in the table sample clause represents percentage.

NORMALIZATION_STRATEGY = <NormalizationStrategy.LOWERCASE: 'LOWERCASE'>

Specifies the strategy according to which identifiers should be normalized.

IDENTIFIERS_CAN_START_WITH_DIGIT = False

Whether an unquoted identifier can start with a digit.

DPIPE_IS_STRING_CONCAT = True

Whether the DPIPE token (||) is a string concatenation operator.

STRICT_STRING_CONCAT = False

Whether CONCAT's arguments must be strings.

SUPPORTS_USER_DEFINED_TYPES = True

Whether user-defined data types are supported.

SUPPORTS_SEMI_ANTI_JOIN = True

Whether SEMI or ANTI joins are supported.

SUPPORTS_COLUMN_JOIN_MARKS = False

Whether the old-style outer join (+) syntax is supported.

COPY_PARAMS_ARE_CSV = True

Whether COPY statement parameters are separated by comma or whitespace

NORMALIZE_FUNCTIONS: bool | str = 'upper'

Determines how function names are going to be normalized.

Possible values:

"upper" or True: Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.

LOG_BASE_FIRST: Optional[bool] = True

Whether the base comes first in the LOG function. Possible values: True, False, None (two arguments are not supported by LOG)

NULL_ORDERING = 'nulls_are_small'

Default NULL ordering method to use if not explicitly set. Possible values: "nulls_are_small", "nulls_are_large", "nulls_are_last"

TYPED_DIVISION = False

Whether the behavior of a / b depends on the types of a and b. False means a / b is always float division. True means a / b is integer division if both a and b are integers.

SAFE_DIVISION = False

Whether division by zero throws an error (False) or returns NULL (True).

CONCAT_COALESCE = False

A NULL arg in CONCAT yields NULL by default, but in some dialects it yields an empty string.

HEX_LOWERCASE = False

Whether the HEX function returns a lowercase hexadecimal string.

DATE_FORMAT = "'%Y-%m-%d'"
DATEINT_FORMAT = "'%Y%m%d'"
TIME_FORMAT = "'%Y-%m-%d %H:%M:%S'"
TIME_MAPPING: Dict[str, str] = {}

Associates this dialect's time formats with their equivalent Python strftime formats.

FORMAT_MAPPING: Dict[str, str] = {}

Helper which is used for parsing the special syntax CAST(x AS DATE FORMAT 'yyyy'). If empty, the corresponding trie will be constructed off of TIME_MAPPING.

UNESCAPED_SEQUENCES: Dict[str, str] = {}

Mapping of an escaped sequence (\n) to its unescaped version ( ).

PSEUDOCOLUMNS: Set[str] = set()

Columns that are auto-generated by the engine corresponding to this dialect. For example, such columns may be excluded from SELECT * queries.

PREFER_CTE_ALIAS_COLUMN = False

Some dialects, such as Snowflake, allow you to reference a CTE column alias in the HAVING clause of the CTE. This flag will cause the CTE alias columns to override any projection aliases in the subquery.

For example, WITH y(c) AS ( SELECT SUM(a) FROM (SELECT 1 a) AS x HAVING c > 0 ) SELECT c FROM y;

will be rewritten as

WITH y(c) AS (
    SELECT SUM(a) AS c FROM (SELECT 1 AS a) AS x HAVING c > 0
) SELECT c FROM y;
FORCE_EARLY_ALIAS_REF_EXPANSION = False

Whether alias reference expansion (_expand_alias_refs()) should run before column qualification (_qualify_columns()).

For example:

WITH data AS ( SELECT 1 AS id, 2 AS my_id ) SELECT id AS my_id FROM data WHERE my_id = 1 GROUP BY my_id, HAVING my_id = 1

In most dialects, "my_id" would refer to "data.my_id" across the query, except: - BigQuery, which will forward the alias to GROUP BY + HAVING clauses i.e it resolves to "WHERE my_id = 1 GROUP BY id HAVING id = 1" - Clickhouse, which will forward the alias across the query i.e it resolves to "WHERE id = 1 GROUP BY id HAVING id = 1"

EXPAND_ALIAS_REFS_EARLY_ONLY_IN_GROUP_BY = False

Whether alias reference expansion before qualification should only happen for the GROUP BY clause.

SUPPORTS_ORDER_BY_ALL = False

Whether ORDER BY ALL is supported (expands to all the selected columns) as in DuckDB, Spark3/Databricks

HAS_DISTINCT_ARRAY_CONSTRUCTORS = False

Whether the ARRAY constructor is context-sensitive, i.e in Redshift ARRAY[1, 2, 3] != ARRAY(1, 2, 3) as the former is of type INT[] vs the latter which is SUPER

SUPPORTS_FIXED_SIZE_ARRAYS = False

Whether expressions such as x::INT[5] should be parsed as fixed-size array defs/casts e.g. in DuckDB. In dialects which don't support fixed size arrays such as Snowflake, this should be interpreted as a subscript/index operator.

STRICT_JSON_PATH_SYNTAX = True

Whether failing to parse a JSON path expression using the JSONPath dialect will log a warning.

ON_CONDITION_EMPTY_BEFORE_ERROR = True

Whether "X ON EMPTY" should come before "X ON ERROR" (for dialects like T-SQL, MySQL, Oracle).

ARRAY_AGG_INCLUDES_NULLS: Optional[bool] = True

Whether ArrayAgg needs to filter NULL values.

REGEXP_EXTRACT_DEFAULT_GROUP = 0

The default value for the capturing group.

SET_OP_DISTINCT_BY_DEFAULT: Dict[Type[sqlglot.expressions.Expression], Optional[bool]] = {<class 'sqlglot.expressions.Except'>: True, <class 'sqlglot.expressions.Intersect'>: True, <class 'sqlglot.expressions.Union'>: True}

Whether a set operation uses DISTINCT by default. This is None when either DISTINCT or ALL must be explicitly specified.

CREATABLE_KIND_MAPPING: dict[str, str] = {}

Helper for dialects that use a different name for the same creatable kind. For example, the Clickhouse equivalent of CREATE SCHEMA is CREATE DATABASE.

tokenizer_class = <class 'sqlglot.tokens.Tokenizer'>
jsonpath_tokenizer_class = <class 'sqlglot.tokens.JSONPathTokenizer'>
parser_class = <class 'sqlglot.parser.Parser'>
generator_class = <class 'sqlglot.generator.Generator'>
TIME_TRIE: Dict = {}
FORMAT_TRIE: Dict = {}
INVERSE_TIME_MAPPING: Dict[str, str] = {}
INVERSE_TIME_TRIE: Dict = {}
INVERSE_FORMAT_MAPPING: Dict[str, str] = {}
INVERSE_FORMAT_TRIE: Dict = {}
INVERSE_CREATABLE_KIND_MAPPING: dict[str, str] = {}
ESCAPED_SEQUENCES: Dict[str, str] = {}
QUOTE_START = "'"
QUOTE_END = "'"
IDENTIFIER_START = '"'
IDENTIFIER_END = '"'
BIT_START: Optional[str] = None
BIT_END: Optional[str] = None
HEX_START: Optional[str] = None
HEX_END: Optional[str] = None
BYTE_START: Optional[str] = None
BYTE_END: Optional[str] = None
UNICODE_START: Optional[str] = None
UNICODE_END: Optional[str] = None
DATE_PART_MAPPING = {'Y': 'YEAR', 'YY': 'YEAR', 'YYY': 'YEAR', 'YYYY': 'YEAR', 'YR': 'YEAR', 'YEARS': 'YEAR', 'YRS': 'YEAR', 'MM': 'MONTH', 'MON': 'MONTH', 'MONS': 'MONTH', 'MONTHS': 'MONTH', 'D': 'DAY', 'DD': 'DAY', 'DAYS': 'DAY', 'DAYOFMONTH': 'DAY', 'DAY OF WEEK': 'DAYOFWEEK', 'WEEKDAY': 'DAYOFWEEK', 'DOW': 'DAYOFWEEK', 'DW': 'DAYOFWEEK', 'WEEKDAY_ISO': 'DAYOFWEEKISO', 'DOW_ISO': 'DAYOFWEEKISO', 'DW_ISO': 'DAYOFWEEKISO', 'DAY OF YEAR': 'DAYOFYEAR', 'DOY': 'DAYOFYEAR', 'DY': 'DAYOFYEAR', 'W': 'WEEK', 'WK': 'WEEK', 'WEEKOFYEAR': 'WEEK', 'WOY': 'WEEK', 'WY': 'WEEK', 'WEEK_ISO': 'WEEKISO', 'WEEKOFYEARISO': 'WEEKISO', 'WEEKOFYEAR_ISO': 'WEEKISO', 'Q': 'QUARTER', 'QTR': 'QUARTER', 'QTRS': 'QUARTER', 'QUARTERS': 'QUARTER', 'H': 'HOUR', 'HH': 'HOUR', 'HR': 'HOUR', 'HOURS': 'HOUR', 'HRS': 'HOUR', 'M': 'MINUTE', 'MI': 'MINUTE', 'MIN': 'MINUTE', 'MINUTES': 'MINUTE', 'MINS': 'MINUTE', 'S': 'SECOND', 'SEC': 'SECOND', 'SECONDS': 'SECOND', 'SECS': 'SECOND', 'MS': 'MILLISECOND', 'MSEC': 'MILLISECOND', 'MSECS': 'MILLISECOND', 'MSECOND': 'MILLISECOND', 'MSECONDS': 'MILLISECOND', 'MILLISEC': 'MILLISECOND', 'MILLISECS': 'MILLISECOND', 'MILLISECON': 'MILLISECOND', 'MILLISECONDS': 'MILLISECOND', 'US': 'MICROSECOND', 'USEC': 'MICROSECOND', 'USECS': 'MICROSECOND', 'MICROSEC': 'MICROSECOND', 'MICROSECS': 'MICROSECOND', 'USECOND': 'MICROSECOND', 'USECONDS': 'MICROSECOND', 'MICROSECONDS': 'MICROSECOND', 'NS': 'NANOSECOND', 'NSEC': 'NANOSECOND', 'NANOSEC': 'NANOSECOND', 'NSECOND': 'NANOSECOND', 'NSECONDS': 'NANOSECOND', 'NANOSECS': 'NANOSECOND', 'EPOCH_SECOND': 'EPOCH', 'EPOCH_SECONDS': 'EPOCH', 'EPOCH_MILLISECONDS': 'EPOCH_MILLISECOND', 'EPOCH_MICROSECONDS': 'EPOCH_MICROSECOND', 'EPOCH_NANOSECONDS': 'EPOCH_NANOSECOND', 'TZH': 'TIMEZONE_HOUR', 'TZM': 'TIMEZONE_MINUTE', 'DEC': 'DECADE', 'DECS': 'DECADE', 'DECADES': 'DECADE', 'MIL': 'MILLENIUM', 'MILS': 'MILLENIUM', 'MILLENIA': 'MILLENIUM', 'C': 'CENTURY', 'CENT': 'CENTURY', 'CENTS': 'CENTURY', 'CENTURIES': 'CENTURY'}
TYPE_TO_EXPRESSIONS: Dict[sqlglot.expressions.DataType.Type, Set[Type[sqlglot.expressions.Expression]]] = {<Type.BIGINT: 'BIGINT'>: {<class 'sqlglot.expressions.Length'>, <class 'sqlglot.expressions.ApproxDistinct'>, <class 'sqlglot.expressions.ArraySize'>}, <Type.BOOLEAN: 'BOOLEAN'>: {<class 'sqlglot.expressions.In'>, <class 'sqlglot.expressions.RegexpLike'>, <class 'sqlglot.expressions.Between'>, <class 'sqlglot.expressions.Boolean'>}, <Type.DATE: 'DATE'>: {<class 'sqlglot.expressions.CurrentDate'>, <class 'sqlglot.expressions.Date'>, <class 'sqlglot.expressions.StrToDate'>, <class 'sqlglot.expressions.TimeStrToDate'>, <class 'sqlglot.expressions.DateStrToDate'>, <class 'sqlglot.expressions.TsOrDsToDate'>, <class 'sqlglot.expressions.DateFromParts'>, <class 'sqlglot.expressions.DiToDate'>}, <Type.DATETIME: 'DATETIME'>: {<class 'sqlglot.expressions.CurrentDatetime'>, <class 'sqlglot.expressions.DatetimeSub'>, <class 'sqlglot.expressions.DatetimeAdd'>, <class 'sqlglot.expressions.Datetime'>}, <Type.DOUBLE: 'DOUBLE'>: {<class 'sqlglot.expressions.Avg'>, <class 'sqlglot.expressions.StddevSamp'>, <class 'sqlglot.expressions.VariancePop'>, <class 'sqlglot.expressions.StddevPop'>, <class 'sqlglot.expressions.Sqrt'>, <class 'sqlglot.expressions.Variance'>, <class 'sqlglot.expressions.Log'>, <class 'sqlglot.expressions.ApproxQuantile'>, <class 'sqlglot.expressions.Stddev'>, <class 'sqlglot.expressions.Ln'>, <class 'sqlglot.expressions.Exp'>, <class 'sqlglot.expressions.Quantile'>, <class 'sqlglot.expressions.SafeDivide'>, <class 'sqlglot.expressions.Round'>, <class 'sqlglot.expressions.Pow'>}, <Type.INT: 'INT'>: {<class 'sqlglot.expressions.TimeDiff'>, <class 'sqlglot.expressions.DatetimeDiff'>, <class 'sqlglot.expressions.Sign'>, <class 'sqlglot.expressions.Ceil'>, <class 'sqlglot.expressions.TsOrDiToDi'>, <class 'sqlglot.expressions.DateToDi'>, <class 'sqlglot.expressions.StrPosition'>, <class 'sqlglot.expressions.TimestampDiff'>, <class 'sqlglot.expressions.Levenshtein'>, <class 'sqlglot.expressions.DateDiff'>}, <Type.JSON: 'JSON'>: {<class 'sqlglot.expressions.ParseJSON'>}, <Type.TIME: 'TIME'>: {<class 'sqlglot.expressions.Time'>}, <Type.TIMESTAMP: 'TIMESTAMP'>: {<class 'sqlglot.expressions.StrToTime'>, <class 'sqlglot.expressions.TimeAdd'>, <class 'sqlglot.expressions.TimeSub'>, <class 'sqlglot.expressions.TimeStrToTime'>, <class 'sqlglot.expressions.TimestampSub'>, <class 'sqlglot.expressions.UnixToTime'>, <class 'sqlglot.expressions.TimestampAdd'>, <class 'sqlglot.expressions.CurrentTime'>, <class 'sqlglot.expressions.CurrentTimestamp'>}, <Type.TINYINT: 'TINYINT'>: {<class 'sqlglot.expressions.Day'>, <class 'sqlglot.expressions.Quarter'>, <class 'sqlglot.expressions.Year'>, <class 'sqlglot.expressions.Month'>, <class 'sqlglot.expressions.Week'>}, <Type.VARCHAR: 'VARCHAR'>: {<class 'sqlglot.expressions.GroupConcat'>, <class 'sqlglot.expressions.Lower'>, <class 'sqlglot.expressions.Trim'>, <class 'sqlglot.expressions.TsOrDsToDateStr'>, <class 'sqlglot.expressions.UnixToStr'>, <class 'sqlglot.expressions.DateToDateStr'>, <class 'sqlglot.expressions.ArrayConcat'>, <class 'sqlglot.expressions.Initcap'>, <class 'sqlglot.expressions.Upper'>, <class 'sqlglot.expressions.Substring'>, <class 'sqlglot.expressions.ConcatWs'>, <class 'sqlglot.expressions.TimeToTimeStr'>, <class 'sqlglot.expressions.Concat'>, <class 'sqlglot.expressions.TimeToStr'>, <class 'sqlglot.expressions.UnixToTimeStr'>}}
ANNOTATORS: Dict[Type[~E], Callable[[sqlglot.optimizer.annotate_types.TypeAnnotator, ~E], ~E]] = {<class 'sqlglot.expressions.Alias'>: <function Dialect.<dictcomp>.<lambda>>, <class 'sqlglot.expressions.BitwiseNot'>: <function Dialect.<dictcomp>.<lambda>>, <class 'sqlglot.expressions.Neg'>: <function Dialect.<dictcomp>.<lambda>>, <class 'sqlglot.expressions.Not'>: <function Dialect.<dictcomp>.<lambda>>, <class 'sqlglot.expressions.Paren'>: <function Dialect.<dictcomp>.<lambda>>, <class 'sqlglot.expressions.PivotAlias'>: <function Dialect.<dictcomp>.<lambda>>, <class 'sqlglot.expressions.Unary'>: <function Dialect.<dictcomp>.<lambda>>, <class 'sqlglot.expressions.Add'>: <function Dialect.<dictcomp>.<lambda>>, <class 'sqlglot.expressions.And'>: <function Dialect.<dictcomp>.<lambda>>, <class 'sqlglot.expressions.ArrayContains'>: <function Dialect.<dictcomp>.<lambda>>, <class 'sqlglot.expressions.ArrayContainsAll'>: <function Dialect.<dictcomp>.<lambda>>, <class 'sqlglot.expressions.ArrayOverlaps'>: <function Dialect.<dictcomp>.<lambda>>, <class 'sqlglot.expressions.Binary'>: <function Dialect.<dictcomp>.<lambda>>, <class 'sqlglot.expressions.BitwiseAnd'>: <function Dialect.<dictcomp>.<lambda>>, <class 'sqlglot.expressions.BitwiseLeftShift'>: <function Dialect.<dictcomp>.<lambda>>, <class 'sqlglot.expressions.BitwiseOr'>: <function Dialect.<dictcomp>.<lambda>>, <class 'sqlglot.expressions.BitwiseRightShift'>: <function Dialect.<dictcomp>.<lambda>>, <class 'sqlglot.expressions.BitwiseXor'>: <function Dialect.<dictcomp>.<lambda>>, <class 'sqlglot.expressions.Collate'>: <function Dialect.<dictcomp>.<lambda>>, <class 'sqlglot.expressions.Connector'>: <function Dialect.<dictcomp>.<lambda>>, <class 'sqlglot.expressions.Corr'>: <function Dialect.<dictcomp>.<lambda>>, <class 'sqlglot.expressions.CovarPop'>: <function Dialect.<dictcomp>.<lambda>>, <class 'sqlglot.expressions.CovarSamp'>: <function Dialect.<dictcomp>.<lambda>>, <class 'sqlglot.expressions.DPipe'>: <function Dialect.<dictcomp>.<lambda>>, <class 'sqlglot.expressions.Distance'>: <function Dialect.<dictcomp>.<lambda>>, <class 'sqlglot.expressions.Div'>: <function Dialect.<lambda>>, <class 'sqlglot.expressions.Dot'>: <function Dialect.<lambda>>, <class 'sqlglot.expressions.EQ'>: <function Dialect.<dictcomp>.<lambda>>, <class 'sqlglot.expressions.Escape'>: <function Dialect.<dictcomp>.<lambda>>, <class 'sqlglot.expressions.GT'>: <function Dialect.<dictcomp>.<lambda>>, <class 'sqlglot.expressions.GTE'>: <function Dialect.<dictcomp>.<lambda>>, <class 'sqlglot.expressions.Glob'>: <function Dialect.<dictcomp>.<lambda>>, <class 'sqlglot.expressions.ILike'>: <function Dialect.<dictcomp>.<lambda>>, <class 'sqlglot.expressions.ILikeAny'>: <function Dialect.<dictcomp>.<lambda>>, <class 'sqlglot.expressions.IntDiv'>: <function Dialect.<dictcomp>.<lambda>>, <class 'sqlglot.expressions.Is'>: <function Dialect.<dictcomp>.<lambda>>, <class 'sqlglot.expressions.JSONArrayContains'>: <function Dialect.<dictcomp>.<lambda>>, <class 'sqlglot.expressions.JSONBContains'>: <function Dialect.<dictcomp>.<lambda>>, <class 'sqlglot.expressions.JSONBExtract'>: <function Dialect.<dictcomp>.<lambda>>, <class 'sqlglot.expressions.JSONBExtractScalar'>: <function Dialect.<dictcomp>.<lambda>>, <class 'sqlglot.expressions.JSONExtract'>: <function Dialect.<dictcomp>.<lambda>>, <class 'sqlglot.expressions.JSONExtractScalar'>: <function Dialect.<dictcomp>.<lambda>>, <class 'sqlglot.expressions.Kwarg'>: <function Dialect.<dictcomp>.<lambda>>, <class 'sqlglot.expressions.LT'>: <function Dialect.<dictcomp>.<lambda>>, <class 'sqlglot.expressions.LTE'>: <function Dialect.<dictcomp>.<lambda>>, <class 'sqlglot.expressions.Like'>: <function Dialect.<dictcomp>.<lambda>>, <class 'sqlglot.expressions.LikeAny'>: <function Dialect.<dictcomp>.<lambda>>, <class 'sqlglot.expressions.Mod'>: <function Dialect.<dictcomp>.<lambda>>, <class 'sqlglot.expressions.Mul'>: <function Dialect.<dictcomp>.<lambda>>, <class 'sqlglot.expressions.NEQ'>: <function Dialect.<dictcomp>.<lambda>>, <class 'sqlglot.expressions.NullSafeEQ'>: <function Dialect.<dictcomp>.<lambda>>, <class 'sqlglot.expressions.NullSafeNEQ'>: <function Dialect.<dictcomp>.<lambda>>, <class 'sqlglot.expressions.Operator'>: <function Dialect.<dictcomp>.<lambda>>, <class 'sqlglot.expressions.Or'>: <function Dialect.<dictcomp>.<lambda>>, <class 'sqlglot.expressions.Overlaps'>: <function Dialect.<dictcomp>.<lambda>>, <class 'sqlglot.expressions.Pow'>: <function _annotate_with_type_lambda.<locals>.<lambda>>, <class 'sqlglot.expressions.PropertyEQ'>: <function Dialect.<lambda>>, <class 'sqlglot.expressions.RegexpILike'>: <function Dialect.<dictcomp>.<lambda>>, <class 'sqlglot.expressions.RegexpLike'>: <function _annotate_with_type_lambda.<locals>.<lambda>>, <class 'sqlglot.expressions.SimilarTo'>: <function Dialect.<dictcomp>.<lambda>>, <class 'sqlglot.expressions.Slice'>: <function Dialect.<lambda>>, <class 'sqlglot.expressions.Sub'>: <function Dialect.<dictcomp>.<lambda>>, <class 'sqlglot.expressions.Xor'>: <function Dialect.<dictcomp>.<lambda>>, <class 'sqlglot.expressions.Length'>: <function _annotate_with_type_lambda.<locals>.<lambda>>, <class 'sqlglot.expressions.ApproxDistinct'>: <function _annotate_with_type_lambda.<locals>.<lambda>>, <class 'sqlglot.expressions.ArraySize'>: <function _annotate_with_type_lambda.<locals>.<lambda>>, <class 'sqlglot.expressions.In'>: <function _annotate_with_type_lambda.<locals>.<lambda>>, <class 'sqlglot.expressions.Between'>: <function _annotate_with_type_lambda.<locals>.<lambda>>, <class 'sqlglot.expressions.Boolean'>: <function _annotate_with_type_lambda.<locals>.<lambda>>, <class 'sqlglot.expressions.CurrentDate'>: <function _annotate_with_type_lambda.<locals>.<lambda>>, <class 'sqlglot.expressions.Date'>: <function _annotate_with_type_lambda.<locals>.<lambda>>, <class 'sqlglot.expressions.StrToDate'>: <function _annotate_with_type_lambda.<locals>.<lambda>>, <class 'sqlglot.expressions.TimeStrToDate'>: <function _annotate_with_type_lambda.<locals>.<lambda>>, <class 'sqlglot.expressions.DateStrToDate'>: <function _annotate_with_type_lambda.<locals>.<lambda>>, <class 'sqlglot.expressions.TsOrDsToDate'>: <function _annotate_with_type_lambda.<locals>.<lambda>>, <class 'sqlglot.expressions.DateFromParts'>: <function _annotate_with_type_lambda.<locals>.<lambda>>, <class 'sqlglot.expressions.DiToDate'>: <function _annotate_with_type_lambda.<locals>.<lambda>>, <class 'sqlglot.expressions.CurrentDatetime'>: <function _annotate_with_type_lambda.<locals>.<lambda>>, <class 'sqlglot.expressions.DatetimeSub'>: <function _annotate_with_type_lambda.<locals>.<lambda>>, <class 'sqlglot.expressions.DatetimeAdd'>: <function _annotate_with_type_lambda.<locals>.<lambda>>, <class 'sqlglot.expressions.Datetime'>: <function _annotate_with_type_lambda.<locals>.<lambda>>, <class 'sqlglot.expressions.Avg'>: <function _annotate_with_type_lambda.<locals>.<lambda>>, <class 'sqlglot.expressions.StddevSamp'>: <function _annotate_with_type_lambda.<locals>.<lambda>>, <class 'sqlglot.expressions.VariancePop'>: <function _annotate_with_type_lambda.<locals>.<lambda>>, <class 'sqlglot.expressions.StddevPop'>: <function _annotate_with_type_lambda.<locals>.<lambda>>, <class 'sqlglot.expressions.Sqrt'>: <function _annotate_with_type_lambda.<locals>.<lambda>>, <class 'sqlglot.expressions.Variance'>: <function _annotate_with_type_lambda.<locals>.<lambda>>, <class 'sqlglot.expressions.Log'>: <function _annotate_with_type_lambda.<locals>.<lambda>>, <class 'sqlglot.expressions.ApproxQuantile'>: <function _annotate_with_type_lambda.<locals>.<lambda>>, <class 'sqlglot.expressions.Stddev'>: <function _annotate_with_type_lambda.<locals>.<lambda>>, <class 'sqlglot.expressions.Ln'>: <function _annotate_with_type_lambda.<locals>.<lambda>>, <class 'sqlglot.expressions.Exp'>: <function _annotate_with_type_lambda.<locals>.<lambda>>, <class 'sqlglot.expressions.Quantile'>: <function _annotate_with_type_lambda.<locals>.<lambda>>, <class 'sqlglot.expressions.SafeDivide'>: <function _annotate_with_type_lambda.<locals>.<lambda>>, <class 'sqlglot.expressions.Round'>: <function _annotate_with_type_lambda.<locals>.<lambda>>, <class 'sqlglot.expressions.TimeDiff'>: <function _annotate_with_type_lambda.<locals>.<lambda>>, <class 'sqlglot.expressions.DatetimeDiff'>: <function _annotate_with_type_lambda.<locals>.<lambda>>, <class 'sqlglot.expressions.Sign'>: <function _annotate_with_type_lambda.<locals>.<lambda>>, <class 'sqlglot.expressions.Ceil'>: <function _annotate_with_type_lambda.<locals>.<lambda>>, <class 'sqlglot.expressions.TsOrDiToDi'>: <function _annotate_with_type_lambda.<locals>.<lambda>>, <class 'sqlglot.expressions.DateToDi'>: <function _annotate_with_type_lambda.<locals>.<lambda>>, <class 'sqlglot.expressions.StrPosition'>: <function _annotate_with_type_lambda.<locals>.<lambda>>, <class 'sqlglot.expressions.TimestampDiff'>: <function _annotate_with_type_lambda.<locals>.<lambda>>, <class 'sqlglot.expressions.Levenshtein'>: <function _annotate_with_type_lambda.<locals>.<lambda>>, <class 'sqlglot.expressions.DateDiff'>: <function _annotate_with_type_lambda.<locals>.<lambda>>, <class 'sqlglot.expressions.ParseJSON'>: <function _annotate_with_type_lambda.<locals>.<lambda>>, <class 'sqlglot.expressions.Time'>: <function _annotate_with_type_lambda.<locals>.<lambda>>, <class 'sqlglot.expressions.StrToTime'>: <function _annotate_with_type_lambda.<locals>.<lambda>>, <class 'sqlglot.expressions.TimeAdd'>: <function _annotate_with_type_lambda.<locals>.<lambda>>, <class 'sqlglot.expressions.TimeSub'>: <function _annotate_with_type_lambda.<locals>.<lambda>>, <class 'sqlglot.expressions.TimeStrToTime'>: <function _annotate_with_type_lambda.<locals>.<lambda>>, <class 'sqlglot.expressions.TimestampSub'>: <function _annotate_with_type_lambda.<locals>.<lambda>>, <class 'sqlglot.expressions.UnixToTime'>: <function _annotate_with_type_lambda.<locals>.<lambda>>, <class 'sqlglot.expressions.TimestampAdd'>: <function _annotate_with_type_lambda.<locals>.<lambda>>, <class 'sqlglot.expressions.CurrentTime'>: <function _annotate_with_type_lambda.<locals>.<lambda>>, <class 'sqlglot.expressions.CurrentTimestamp'>: <function _annotate_with_type_lambda.<locals>.<lambda>>, <class 'sqlglot.expressions.Day'>: <function _annotate_with_type_lambda.<locals>.<lambda>>, <class 'sqlglot.expressions.Quarter'>: <function _annotate_with_type_lambda.<locals>.<lambda>>, <class 'sqlglot.expressions.Year'>: <function _annotate_with_type_lambda.<locals>.<lambda>>, <class 'sqlglot.expressions.Month'>: <function _annotate_with_type_lambda.<locals>.<lambda>>, <class 'sqlglot.expressions.Week'>: <function _annotate_with_type_lambda.<locals>.<lambda>>, <class 'sqlglot.expressions.GroupConcat'>: <function _annotate_with_type_lambda.<locals>.<lambda>>, <class 'sqlglot.expressions.Lower'>: <function _annotate_with_type_lambda.<locals>.<lambda>>, <class 'sqlglot.expressions.Trim'>: <function _annotate_with_type_lambda.<locals>.<lambda>>, <class 'sqlglot.expressions.TsOrDsToDateStr'>: <function _annotate_with_type_lambda.<locals>.<lambda>>, <class 'sqlglot.expressions.UnixToStr'>: <function _annotate_with_type_lambda.<locals>.<lambda>>, <class 'sqlglot.expressions.DateToDateStr'>: <function _annotate_with_type_lambda.<locals>.<lambda>>, <class 'sqlglot.expressions.ArrayConcat'>: <function Dialect.<lambda>>, <class 'sqlglot.expressions.Initcap'>: <function _annotate_with_type_lambda.<locals>.<lambda>>, <class 'sqlglot.expressions.Upper'>: <function _annotate_with_type_lambda.<locals>.<lambda>>, <class 'sqlglot.expressions.Substring'>: <function _annotate_with_type_lambda.<locals>.<lambda>>, <class 'sqlglot.expressions.ConcatWs'>: <function _annotate_with_type_lambda.<locals>.<lambda>>, <class 'sqlglot.expressions.TimeToTimeStr'>: <function _annotate_with_type_lambda.<locals>.<lambda>>, <class 'sqlglot.expressions.Concat'>: <function _annotate_with_type_lambda.<locals>.<lambda>>, <class 'sqlglot.expressions.TimeToStr'>: <function _annotate_with_type_lambda.<locals>.<lambda>>, <class 'sqlglot.expressions.UnixToTimeStr'>: <function _annotate_with_type_lambda.<locals>.<lambda>>, <class 'sqlglot.expressions.Abs'>: <function Dialect.<lambda>>, <class 'sqlglot.expressions.Anonymous'>: <function Dialect.<lambda>>, <class 'sqlglot.expressions.Array'>: <function Dialect.<lambda>>, <class 'sqlglot.expressions.ArrayAgg'>: <function Dialect.<lambda>>, <class 'sqlglot.expressions.Bracket'>: <function Dialect.<lambda>>, <class 'sqlglot.expressions.Cast'>: <function Dialect.<lambda>>, <class 'sqlglot.expressions.Case'>: <function Dialect.<lambda>>, <class 'sqlglot.expressions.Coalesce'>: <function Dialect.<lambda>>, <class 'sqlglot.expressions.Count'>: <function Dialect.<lambda>>, <class 'sqlglot.expressions.DataType'>: <function Dialect.<lambda>>, <class 'sqlglot.expressions.DateAdd'>: <function Dialect.<lambda>>, <class 'sqlglot.expressions.DateSub'>: <function Dialect.<lambda>>, <class 'sqlglot.expressions.DateTrunc'>: <function Dialect.<lambda>>, <class 'sqlglot.expressions.Distinct'>: <function Dialect.<lambda>>, <class 'sqlglot.expressions.Explode'>: <function Dialect.<lambda>>, <class 'sqlglot.expressions.Extract'>: <function Dialect.<lambda>>, <class 'sqlglot.expressions.Filter'>: <function Dialect.<lambda>>, <class 'sqlglot.expressions.GenerateDateArray'>: <function Dialect.<lambda>>, <class 'sqlglot.expressions.GenerateTimestampArray'>: <function Dialect.<lambda>>, <class 'sqlglot.expressions.Greatest'>: <function Dialect.<lambda>>, <class 'sqlglot.expressions.If'>: <function Dialect.<lambda>>, <class 'sqlglot.expressions.Interval'>: <function Dialect.<lambda>>, <class 'sqlglot.expressions.Least'>: <function Dialect.<lambda>>, <class 'sqlglot.expressions.Literal'>: <function Dialect.<lambda>>, <class 'sqlglot.expressions.Map'>: <function Dialect.<lambda>>, <class 'sqlglot.expressions.Max'>: <function Dialect.<lambda>>, <class 'sqlglot.expressions.Min'>: <function Dialect.<lambda>>, <class 'sqlglot.expressions.Null'>: <function Dialect.<lambda>>, <class 'sqlglot.expressions.Nullif'>: <function Dialect.<lambda>>, <class 'sqlglot.expressions.Struct'>: <function Dialect.<lambda>>, <class 'sqlglot.expressions.Sum'>: <function Dialect.<lambda>>, <class 'sqlglot.expressions.Timestamp'>: <function Dialect.<lambda>>, <class 'sqlglot.expressions.ToMap'>: <function Dialect.<lambda>>, <class 'sqlglot.expressions.TryCast'>: <function Dialect.<lambda>>, <class 'sqlglot.expressions.Unnest'>: <function Dialect.<lambda>>, <class 'sqlglot.expressions.VarMap'>: <function Dialect.<lambda>>}
@classmethod
def get_or_raise( cls, dialect: Union[str, Dialect, Type[Dialect], NoneType]) -> Dialect:
716    @classmethod
717    def get_or_raise(cls, dialect: DialectType) -> Dialect:
718        """
719        Look up a dialect in the global dialect registry and return it if it exists.
720
721        Args:
722            dialect: The target dialect. If this is a string, it can be optionally followed by
723                additional key-value pairs that are separated by commas and are used to specify
724                dialect settings, such as whether the dialect's identifiers are case-sensitive.
725
726        Example:
727            >>> dialect = dialect_class = get_or_raise("duckdb")
728            >>> dialect = get_or_raise("mysql, normalization_strategy = case_sensitive")
729
730        Returns:
731            The corresponding Dialect instance.
732        """
733
734        if not dialect:
735            return cls()
736        if isinstance(dialect, _Dialect):
737            return dialect()
738        if isinstance(dialect, Dialect):
739            return dialect
740        if isinstance(dialect, str):
741            try:
742                dialect_name, *kv_strings = dialect.split(",")
743                kv_pairs = (kv.split("=") for kv in kv_strings)
744                kwargs = {}
745                for pair in kv_pairs:
746                    key = pair[0].strip()
747                    value: t.Union[bool | str | None] = None
748
749                    if len(pair) == 1:
750                        # Default initialize standalone settings to True
751                        value = True
752                    elif len(pair) == 2:
753                        value = pair[1].strip()
754
755                        # Coerce the value to boolean if it matches to the truthy/falsy values below
756                        value_lower = value.lower()
757                        if value_lower in ("true", "1"):
758                            value = True
759                        elif value_lower in ("false", "0"):
760                            value = False
761
762                    kwargs[key] = value
763
764            except ValueError:
765                raise ValueError(
766                    f"Invalid dialect format: '{dialect}'. "
767                    "Please use the correct format: 'dialect [, k1 = v2 [, ...]]'."
768                )
769
770            result = cls.get(dialect_name.strip())
771            if not result:
772                from difflib import get_close_matches
773
774                similar = seq_get(get_close_matches(dialect_name, cls.classes, n=1), 0) or ""
775                if similar:
776                    similar = f" Did you mean {similar}?"
777
778                raise ValueError(f"Unknown dialect '{dialect_name}'.{similar}")
779
780            return result(**kwargs)
781
782        raise ValueError(f"Invalid dialect type for '{dialect}': '{type(dialect)}'.")

Look up a dialect in the global dialect registry and return it if it exists.

Arguments:
  • dialect: The target dialect. If this is a string, it can be optionally followed by additional key-value pairs that are separated by commas and are used to specify dialect settings, such as whether the dialect's identifiers are case-sensitive.
Example:
>>> dialect = dialect_class = get_or_raise("duckdb")
>>> dialect = get_or_raise("mysql, normalization_strategy = case_sensitive")
Returns:

The corresponding Dialect instance.

@classmethod
def format_time( cls, expression: Union[str, sqlglot.expressions.Expression, NoneType]) -> Optional[sqlglot.expressions.Expression]:
784    @classmethod
785    def format_time(
786        cls, expression: t.Optional[str | exp.Expression]
787    ) -> t.Optional[exp.Expression]:
788        """Converts a time format in this dialect to its equivalent Python `strftime` format."""
789        if isinstance(expression, str):
790            return exp.Literal.string(
791                # the time formats are quoted
792                format_time(expression[1:-1], cls.TIME_MAPPING, cls.TIME_TRIE)
793            )
794
795        if expression and expression.is_string:
796            return exp.Literal.string(format_time(expression.this, cls.TIME_MAPPING, cls.TIME_TRIE))
797
798        return expression

Converts a time format in this dialect to its equivalent Python strftime format.

settings
def normalize_identifier(self, expression: ~E) -> ~E:
818    def normalize_identifier(self, expression: E) -> E:
819        """
820        Transforms an identifier in a way that resembles how it'd be resolved by this dialect.
821
822        For example, an identifier like `FoO` would be resolved as `foo` in Postgres, because it
823        lowercases all unquoted identifiers. On the other hand, Snowflake uppercases them, so
824        it would resolve it as `FOO`. If it was quoted, it'd need to be treated as case-sensitive,
825        and so any normalization would be prohibited in order to avoid "breaking" the identifier.
826
827        There are also dialects like Spark, which are case-insensitive even when quotes are
828        present, and dialects like MySQL, whose resolution rules match those employed by the
829        underlying operating system, for example they may always be case-sensitive in Linux.
830
831        Finally, the normalization behavior of some engines can even be controlled through flags,
832        like in Redshift's case, where users can explicitly set enable_case_sensitive_identifier.
833
834        SQLGlot aims to understand and handle all of these different behaviors gracefully, so
835        that it can analyze queries in the optimizer and successfully capture their semantics.
836        """
837        if (
838            isinstance(expression, exp.Identifier)
839            and self.normalization_strategy is not NormalizationStrategy.CASE_SENSITIVE
840            and (
841                not expression.quoted
842                or self.normalization_strategy is NormalizationStrategy.CASE_INSENSITIVE
843            )
844        ):
845            expression.set(
846                "this",
847                (
848                    expression.this.upper()
849                    if self.normalization_strategy is NormalizationStrategy.UPPERCASE
850                    else expression.this.lower()
851                ),
852            )
853
854        return expression

Transforms an identifier in a way that resembles how it'd be resolved by this dialect.

For example, an identifier like FoO would be resolved as foo in Postgres, because it lowercases all unquoted identifiers. On the other hand, Snowflake uppercases them, so it would resolve it as FOO. If it was quoted, it'd need to be treated as case-sensitive, and so any normalization would be prohibited in order to avoid "breaking" the identifier.

There are also dialects like Spark, which are case-insensitive even when quotes are present, and dialects like MySQL, whose resolution rules match those employed by the underlying operating system, for example they may always be case-sensitive in Linux.

Finally, the normalization behavior of some engines can even be controlled through flags, like in Redshift's case, where users can explicitly set enable_case_sensitive_identifier.

SQLGlot aims to understand and handle all of these different behaviors gracefully, so that it can analyze queries in the optimizer and successfully capture their semantics.

def case_sensitive(self, text: str) -> bool:
856    def case_sensitive(self, text: str) -> bool:
857        """Checks if text contains any case sensitive characters, based on the dialect's rules."""
858        if self.normalization_strategy is NormalizationStrategy.CASE_INSENSITIVE:
859            return False
860
861        unsafe = (
862            str.islower
863            if self.normalization_strategy is NormalizationStrategy.UPPERCASE
864            else str.isupper
865        )
866        return any(unsafe(char) for char in text)

Checks if text contains any case sensitive characters, based on the dialect's rules.

def can_identify(self, text: str, identify: str | bool = 'safe') -> bool:
868    def can_identify(self, text: str, identify: str | bool = "safe") -> bool:
869        """Checks if text can be identified given an identify option.
870
871        Args:
872            text: The text to check.
873            identify:
874                `"always"` or `True`: Always returns `True`.
875                `"safe"`: Only returns `True` if the identifier is case-insensitive.
876
877        Returns:
878            Whether the given text can be identified.
879        """
880        if identify is True or identify == "always":
881            return True
882
883        if identify == "safe":
884            return not self.case_sensitive(text)
885
886        return False

Checks if text can be identified given an identify option.

Arguments:
  • text: The text to check.
  • identify: "always" or True: Always returns True. "safe": Only returns True if the identifier is case-insensitive.
Returns:

Whether the given text can be identified.

def quote_identifier(self, expression: ~E, identify: bool = True) -> ~E:
888    def quote_identifier(self, expression: E, identify: bool = True) -> E:
889        """
890        Adds quotes to a given identifier.
891
892        Args:
893            expression: The expression of interest. If it's not an `Identifier`, this method is a no-op.
894            identify: If set to `False`, the quotes will only be added if the identifier is deemed
895                "unsafe", with respect to its characters and this dialect's normalization strategy.
896        """
897        if isinstance(expression, exp.Identifier) and not isinstance(expression.parent, exp.Func):
898            name = expression.this
899            expression.set(
900                "quoted",
901                identify or self.case_sensitive(name) or not exp.SAFE_IDENTIFIER_RE.match(name),
902            )
903
904        return expression

Adds quotes to a given identifier.

Arguments:
  • expression: The expression of interest. If it's not an Identifier, this method is a no-op.
  • identify: If set to False, the quotes will only be added if the identifier is deemed "unsafe", with respect to its characters and this dialect's normalization strategy.
def to_json_path( self, path: Optional[sqlglot.expressions.Expression]) -> Optional[sqlglot.expressions.Expression]:
906    def to_json_path(self, path: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]:
907        if isinstance(path, exp.Literal):
908            path_text = path.name
909            if path.is_number:
910                path_text = f"[{path_text}]"
911            try:
912                return parse_json_path(path_text, self)
913            except ParseError as e:
914                if self.STRICT_JSON_PATH_SYNTAX:
915                    logger.warning(f"Invalid JSON path syntax. {str(e)}")
916
917        return path
def parse(self, sql: str, **opts) -> List[Optional[sqlglot.expressions.Expression]]:
919    def parse(self, sql: str, **opts) -> t.List[t.Optional[exp.Expression]]:
920        return self.parser(**opts).parse(self.tokenize(sql), sql)
def parse_into( self, expression_type: Union[str, Type[sqlglot.expressions.Expression], Collection[Union[str, Type[sqlglot.expressions.Expression]]]], sql: str, **opts) -> List[Optional[sqlglot.expressions.Expression]]:
922    def parse_into(
923        self, expression_type: exp.IntoType, sql: str, **opts
924    ) -> t.List[t.Optional[exp.Expression]]:
925        return self.parser(**opts).parse_into(expression_type, self.tokenize(sql), sql)
def generate( self, expression: sqlglot.expressions.Expression, copy: bool = True, **opts) -> str:
927    def generate(self, expression: exp.Expression, copy: bool = True, **opts) -> str:
928        return self.generator(**opts).generate(expression, copy=copy)
def transpile(self, sql: str, **opts) -> List[str]:
930    def transpile(self, sql: str, **opts) -> t.List[str]:
931        return [
932            self.generate(expression, copy=False, **opts) if expression else ""
933            for expression in self.parse(sql)
934        ]
def tokenize(self, sql: str) -> List[sqlglot.tokens.Token]:
936    def tokenize(self, sql: str) -> t.List[Token]:
937        return self.tokenizer.tokenize(sql)
tokenizer: sqlglot.tokens.Tokenizer
939    @property
940    def tokenizer(self) -> Tokenizer:
941        return self.tokenizer_class(dialect=self)
jsonpath_tokenizer: sqlglot.jsonpath.JSONPathTokenizer
943    @property
944    def jsonpath_tokenizer(self) -> JSONPathTokenizer:
945        return self.jsonpath_tokenizer_class(dialect=self)
def parser(self, **opts) -> sqlglot.parser.Parser:
947    def parser(self, **opts) -> Parser:
948        return self.parser_class(dialect=self, **opts)
def generator(self, **opts) -> sqlglot.generator.Generator:
950    def generator(self, **opts) -> Generator:
951        return self.generator_class(dialect=self, **opts)
DialectType = typing.Union[str, Dialect, typing.Type[Dialect], NoneType]
def rename_func( name: str) -> Callable[[sqlglot.generator.Generator, sqlglot.expressions.Expression], str]:
957def rename_func(name: str) -> t.Callable[[Generator, exp.Expression], str]:
958    return lambda self, expression: self.func(name, *flatten(expression.args.values()))
@unsupported_args('accuracy')
def approx_count_distinct_sql( self: sqlglot.generator.Generator, expression: sqlglot.expressions.ApproxDistinct) -> str:
961@unsupported_args("accuracy")
962def approx_count_distinct_sql(self: Generator, expression: exp.ApproxDistinct) -> str:
963    return self.func("APPROX_COUNT_DISTINCT", expression.this)
def if_sql( name: str = 'IF', false_value: Union[str, sqlglot.expressions.Expression, NoneType] = None) -> Callable[[sqlglot.generator.Generator, sqlglot.expressions.If], str]:
966def if_sql(
967    name: str = "IF", false_value: t.Optional[exp.Expression | str] = None
968) -> t.Callable[[Generator, exp.If], str]:
969    def _if_sql(self: Generator, expression: exp.If) -> str:
970        return self.func(
971            name,
972            expression.this,
973            expression.args.get("true"),
974            expression.args.get("false") or false_value,
975        )
976
977    return _if_sql
def arrow_json_extract_sql( self: sqlglot.generator.Generator, expression: Union[sqlglot.expressions.JSONExtract, sqlglot.expressions.JSONExtractScalar]) -> str:
980def arrow_json_extract_sql(self: Generator, expression: JSON_EXTRACT_TYPE) -> str:
981    this = expression.this
982    if self.JSON_TYPE_REQUIRED_FOR_EXTRACTION and isinstance(this, exp.Literal) and this.is_string:
983        this.replace(exp.cast(this, exp.DataType.Type.JSON))
984
985    return self.binary(expression, "->" if isinstance(expression, exp.JSONExtract) else "->>")
def inline_array_sql( self: sqlglot.generator.Generator, expression: sqlglot.expressions.Array) -> str:
988def inline_array_sql(self: Generator, expression: exp.Array) -> str:
989    return f"[{self.expressions(expression, dynamic=True, new_line=True, skip_first=True, skip_last=True)}]"
def inline_array_unless_query( self: sqlglot.generator.Generator, expression: sqlglot.expressions.Array) -> str:
992def inline_array_unless_query(self: Generator, expression: exp.Array) -> str:
993    elem = seq_get(expression.expressions, 0)
994    if isinstance(elem, exp.Expression) and elem.find(exp.Query):
995        return self.func("ARRAY", elem)
996    return inline_array_sql(self, expression)
def no_ilike_sql( self: sqlglot.generator.Generator, expression: sqlglot.expressions.ILike) -> str:
 999def no_ilike_sql(self: Generator, expression: exp.ILike) -> str:
1000    return self.like_sql(
1001        exp.Like(
1002            this=exp.Lower(this=expression.this), expression=exp.Lower(this=expression.expression)
1003        )
1004    )
def no_paren_current_date_sql( self: sqlglot.generator.Generator, expression: sqlglot.expressions.CurrentDate) -> str:
1007def no_paren_current_date_sql(self: Generator, expression: exp.CurrentDate) -> str:
1008    zone = self.sql(expression, "this")
1009    return f"CURRENT_DATE AT TIME ZONE {zone}" if zone else "CURRENT_DATE"
def no_recursive_cte_sql( self: sqlglot.generator.Generator, expression: sqlglot.expressions.With) -> str:
1012def no_recursive_cte_sql(self: Generator, expression: exp.With) -> str:
1013    if expression.args.get("recursive"):
1014        self.unsupported("Recursive CTEs are unsupported")
1015        expression.args["recursive"] = False
1016    return self.with_sql(expression)
def no_safe_divide_sql( self: sqlglot.generator.Generator, expression: sqlglot.expressions.SafeDivide) -> str:
1019def no_safe_divide_sql(self: Generator, expression: exp.SafeDivide) -> str:
1020    n = self.sql(expression, "this")
1021    d = self.sql(expression, "expression")
1022    return f"IF(({d}) <> 0, ({n}) / ({d}), NULL)"
def no_tablesample_sql( self: sqlglot.generator.Generator, expression: sqlglot.expressions.TableSample) -> str:
1025def no_tablesample_sql(self: Generator, expression: exp.TableSample) -> str:
1026    self.unsupported("TABLESAMPLE unsupported")
1027    return self.sql(expression.this)
def no_pivot_sql( self: sqlglot.generator.Generator, expression: sqlglot.expressions.Pivot) -> str:
1030def no_pivot_sql(self: Generator, expression: exp.Pivot) -> str:
1031    self.unsupported("PIVOT unsupported")
1032    return ""
def no_trycast_sql( self: sqlglot.generator.Generator, expression: sqlglot.expressions.TryCast) -> str:
1035def no_trycast_sql(self: Generator, expression: exp.TryCast) -> str:
1036    return self.cast_sql(expression)
def no_comment_column_constraint_sql( self: sqlglot.generator.Generator, expression: sqlglot.expressions.CommentColumnConstraint) -> str:
1039def no_comment_column_constraint_sql(
1040    self: Generator, expression: exp.CommentColumnConstraint
1041) -> str:
1042    self.unsupported("CommentColumnConstraint unsupported")
1043    return ""
def no_map_from_entries_sql( self: sqlglot.generator.Generator, expression: sqlglot.expressions.MapFromEntries) -> str:
1046def no_map_from_entries_sql(self: Generator, expression: exp.MapFromEntries) -> str:
1047    self.unsupported("MAP_FROM_ENTRIES unsupported")
1048    return ""
def property_sql( self: sqlglot.generator.Generator, expression: sqlglot.expressions.Property) -> str:
1051def property_sql(self: Generator, expression: exp.Property) -> str:
1052    return f"{self.property_name(expression, string_key=True)}={self.sql(expression, 'value')}"
def str_position_sql( self: sqlglot.generator.Generator, expression: sqlglot.expressions.StrPosition, generate_instance: bool = False, str_position_func_name: str = 'STRPOS') -> str:
1055def str_position_sql(
1056    self: Generator,
1057    expression: exp.StrPosition,
1058    generate_instance: bool = False,
1059    str_position_func_name: str = "STRPOS",
1060) -> str:
1061    this = self.sql(expression, "this")
1062    substr = self.sql(expression, "substr")
1063    position = self.sql(expression, "position")
1064    instance = expression.args.get("instance") if generate_instance else None
1065    position_offset = ""
1066
1067    if position:
1068        # Normalize third 'pos' argument into 'SUBSTR(..) + offset' across dialects
1069        this = self.func("SUBSTR", this, position)
1070        position_offset = f" + {position} - 1"
1071
1072    return self.func(str_position_func_name, this, substr, instance) + position_offset
def struct_extract_sql( self: sqlglot.generator.Generator, expression: sqlglot.expressions.StructExtract) -> str:
1075def struct_extract_sql(self: Generator, expression: exp.StructExtract) -> str:
1076    return (
1077        f"{self.sql(expression, 'this')}.{self.sql(exp.to_identifier(expression.expression.name))}"
1078    )
def var_map_sql( self: sqlglot.generator.Generator, expression: sqlglot.expressions.Map | sqlglot.expressions.VarMap, map_func_name: str = 'MAP') -> str:
1081def var_map_sql(
1082    self: Generator, expression: exp.Map | exp.VarMap, map_func_name: str = "MAP"
1083) -> str:
1084    keys = expression.args["keys"]
1085    values = expression.args["values"]
1086
1087    if not isinstance(keys, exp.Array) or not isinstance(values, exp.Array):
1088        self.unsupported("Cannot convert array columns into map.")
1089        return self.func(map_func_name, keys, values)
1090
1091    args = []
1092    for key, value in zip(keys.expressions, values.expressions):
1093        args.append(self.sql(key))
1094        args.append(self.sql(value))
1095
1096    return self.func(map_func_name, *args)
def build_formatted_time( exp_class: Type[~E], dialect: str, default: Union[str, bool, NoneType] = None) -> Callable[[List], ~E]:
1099def build_formatted_time(
1100    exp_class: t.Type[E], dialect: str, default: t.Optional[bool | str] = None
1101) -> t.Callable[[t.List], E]:
1102    """Helper used for time expressions.
1103
1104    Args:
1105        exp_class: the expression class to instantiate.
1106        dialect: target sql dialect.
1107        default: the default format, True being time.
1108
1109    Returns:
1110        A callable that can be used to return the appropriately formatted time expression.
1111    """
1112
1113    def _builder(args: t.List):
1114        return exp_class(
1115            this=seq_get(args, 0),
1116            format=Dialect[dialect].format_time(
1117                seq_get(args, 1)
1118                or (Dialect[dialect].TIME_FORMAT if default is True else default or None)
1119            ),
1120        )
1121
1122    return _builder

Helper used for time expressions.

Arguments:
  • exp_class: the expression class to instantiate.
  • dialect: target sql dialect.
  • default: the default format, True being time.
Returns:

A callable that can be used to return the appropriately formatted time expression.

def time_format( dialect: Union[str, Dialect, Type[Dialect], NoneType] = None) -> Callable[[sqlglot.generator.Generator, sqlglot.expressions.UnixToStr | sqlglot.expressions.StrToUnix], Optional[str]]:
1125def time_format(
1126    dialect: DialectType = None,
1127) -> t.Callable[[Generator, exp.UnixToStr | exp.StrToUnix], t.Optional[str]]:
1128    def _time_format(self: Generator, expression: exp.UnixToStr | exp.StrToUnix) -> t.Optional[str]:
1129        """
1130        Returns the time format for a given expression, unless it's equivalent
1131        to the default time format of the dialect of interest.
1132        """
1133        time_format = self.format_time(expression)
1134        return time_format if time_format != Dialect.get_or_raise(dialect).TIME_FORMAT else None
1135
1136    return _time_format
def build_date_delta( exp_class: Type[~E], unit_mapping: Optional[Dict[str, str]] = None, default_unit: Optional[str] = 'DAY') -> Callable[[List], ~E]:
1139def build_date_delta(
1140    exp_class: t.Type[E],
1141    unit_mapping: t.Optional[t.Dict[str, str]] = None,
1142    default_unit: t.Optional[str] = "DAY",
1143) -> t.Callable[[t.List], E]:
1144    def _builder(args: t.List) -> E:
1145        unit_based = len(args) == 3
1146        this = args[2] if unit_based else seq_get(args, 0)
1147        unit = None
1148        if unit_based or default_unit:
1149            unit = args[0] if unit_based else exp.Literal.string(default_unit)
1150            unit = exp.var(unit_mapping.get(unit.name.lower(), unit.name)) if unit_mapping else unit
1151        return exp_class(this=this, expression=seq_get(args, 1), unit=unit)
1152
1153    return _builder
def build_date_delta_with_interval(expression_class: Type[~E]) -> Callable[[List], Optional[~E]]:
1156def build_date_delta_with_interval(
1157    expression_class: t.Type[E],
1158) -> t.Callable[[t.List], t.Optional[E]]:
1159    def _builder(args: t.List) -> t.Optional[E]:
1160        if len(args) < 2:
1161            return None
1162
1163        interval = args[1]
1164
1165        if not isinstance(interval, exp.Interval):
1166            raise ParseError(f"INTERVAL expression expected but got '{interval}'")
1167
1168        return expression_class(this=args[0], expression=interval.this, unit=unit_to_str(interval))
1169
1170    return _builder
def date_trunc_to_time( args: List) -> sqlglot.expressions.DateTrunc | sqlglot.expressions.TimestampTrunc:
1173def date_trunc_to_time(args: t.List) -> exp.DateTrunc | exp.TimestampTrunc:
1174    unit = seq_get(args, 0)
1175    this = seq_get(args, 1)
1176
1177    if isinstance(this, exp.Cast) and this.is_type("date"):
1178        return exp.DateTrunc(unit=unit, this=this)
1179    return exp.TimestampTrunc(this=this, unit=unit)
def date_add_interval_sql( data_type: str, kind: str) -> Callable[[sqlglot.generator.Generator, sqlglot.expressions.Expression], str]:
1182def date_add_interval_sql(
1183    data_type: str, kind: str
1184) -> t.Callable[[Generator, exp.Expression], str]:
1185    def func(self: Generator, expression: exp.Expression) -> str:
1186        this = self.sql(expression, "this")
1187        interval = exp.Interval(this=expression.expression, unit=unit_to_var(expression))
1188        return f"{data_type}_{kind}({this}, {self.sql(interval)})"
1189
1190    return func
def timestamptrunc_sql( zone: bool = False) -> Callable[[sqlglot.generator.Generator, sqlglot.expressions.TimestampTrunc], str]:
1193def timestamptrunc_sql(zone: bool = False) -> t.Callable[[Generator, exp.TimestampTrunc], str]:
1194    def _timestamptrunc_sql(self: Generator, expression: exp.TimestampTrunc) -> str:
1195        args = [unit_to_str(expression), expression.this]
1196        if zone:
1197            args.append(expression.args.get("zone"))
1198        return self.func("DATE_TRUNC", *args)
1199
1200    return _timestamptrunc_sql
def no_timestamp_sql( self: sqlglot.generator.Generator, expression: sqlglot.expressions.Timestamp) -> str:
1203def no_timestamp_sql(self: Generator, expression: exp.Timestamp) -> str:
1204    zone = expression.args.get("zone")
1205    if not zone:
1206        from sqlglot.optimizer.annotate_types import annotate_types
1207
1208        target_type = annotate_types(expression).type or exp.DataType.Type.TIMESTAMP
1209        return self.sql(exp.cast(expression.this, target_type))
1210    if zone.name.lower() in TIMEZONES:
1211        return self.sql(
1212            exp.AtTimeZone(
1213                this=exp.cast(expression.this, exp.DataType.Type.TIMESTAMP),
1214                zone=zone,
1215            )
1216        )
1217    return self.func("TIMESTAMP", expression.this, zone)
def no_time_sql( self: sqlglot.generator.Generator, expression: sqlglot.expressions.Time) -> str:
1220def no_time_sql(self: Generator, expression: exp.Time) -> str:
1221    # Transpile BQ's TIME(timestamp, zone) to CAST(TIMESTAMPTZ <timestamp> AT TIME ZONE <zone> AS TIME)
1222    this = exp.cast(expression.this, exp.DataType.Type.TIMESTAMPTZ)
1223    expr = exp.cast(
1224        exp.AtTimeZone(this=this, zone=expression.args.get("zone")), exp.DataType.Type.TIME
1225    )
1226    return self.sql(expr)
def no_datetime_sql( self: sqlglot.generator.Generator, expression: sqlglot.expressions.Datetime) -> str:
1229def no_datetime_sql(self: Generator, expression: exp.Datetime) -> str:
1230    this = expression.this
1231    expr = expression.expression
1232
1233    if expr.name.lower() in TIMEZONES:
1234        # Transpile BQ's DATETIME(timestamp, zone) to CAST(TIMESTAMPTZ <timestamp> AT TIME ZONE <zone> AS TIMESTAMP)
1235        this = exp.cast(this, exp.DataType.Type.TIMESTAMPTZ)
1236        this = exp.cast(exp.AtTimeZone(this=this, zone=expr), exp.DataType.Type.TIMESTAMP)
1237        return self.sql(this)
1238
1239    this = exp.cast(this, exp.DataType.Type.DATE)
1240    expr = exp.cast(expr, exp.DataType.Type.TIME)
1241
1242    return self.sql(exp.cast(exp.Add(this=this, expression=expr), exp.DataType.Type.TIMESTAMP))
def locate_to_strposition(args: List) -> sqlglot.expressions.Expression:
1245def locate_to_strposition(args: t.List) -> exp.Expression:
1246    return exp.StrPosition(
1247        this=seq_get(args, 1), substr=seq_get(args, 0), position=seq_get(args, 2)
1248    )
def strposition_to_locate_sql( self: sqlglot.generator.Generator, expression: sqlglot.expressions.StrPosition) -> str:
1251def strposition_to_locate_sql(self: Generator, expression: exp.StrPosition) -> str:
1252    return self.func(
1253        "LOCATE", expression.args.get("substr"), expression.this, expression.args.get("position")
1254    )
def left_to_substring_sql( self: sqlglot.generator.Generator, expression: sqlglot.expressions.Left) -> str:
1257def left_to_substring_sql(self: Generator, expression: exp.Left) -> str:
1258    return self.sql(
1259        exp.Substring(
1260            this=expression.this, start=exp.Literal.number(1), length=expression.expression
1261        )
1262    )
def right_to_substring_sql( self: sqlglot.generator.Generator, expression: sqlglot.expressions.Left) -> str:
1265def right_to_substring_sql(self: Generator, expression: exp.Left) -> str:
1266    return self.sql(
1267        exp.Substring(
1268            this=expression.this,
1269            start=exp.Length(this=expression.this) - exp.paren(expression.expression - 1),
1270        )
1271    )
def timestrtotime_sql( self: sqlglot.generator.Generator, expression: sqlglot.expressions.TimeStrToTime, include_precision: bool = False) -> str:
1274def timestrtotime_sql(
1275    self: Generator,
1276    expression: exp.TimeStrToTime,
1277    include_precision: bool = False,
1278) -> str:
1279    datatype = exp.DataType.build(
1280        exp.DataType.Type.TIMESTAMPTZ
1281        if expression.args.get("zone")
1282        else exp.DataType.Type.TIMESTAMP
1283    )
1284
1285    if isinstance(expression.this, exp.Literal) and include_precision:
1286        precision = subsecond_precision(expression.this.name)
1287        if precision > 0:
1288            datatype = exp.DataType.build(
1289                datatype.this, expressions=[exp.DataTypeParam(this=exp.Literal.number(precision))]
1290            )
1291
1292    return self.sql(exp.cast(expression.this, datatype, dialect=self.dialect))
def datestrtodate_sql( self: sqlglot.generator.Generator, expression: sqlglot.expressions.DateStrToDate) -> str:
1295def datestrtodate_sql(self: Generator, expression: exp.DateStrToDate) -> str:
1296    return self.sql(exp.cast(expression.this, exp.DataType.Type.DATE))
def encode_decode_sql( self: sqlglot.generator.Generator, expression: sqlglot.expressions.Expression, name: str, replace: bool = True) -> str:
1300def encode_decode_sql(
1301    self: Generator, expression: exp.Expression, name: str, replace: bool = True
1302) -> str:
1303    charset = expression.args.get("charset")
1304    if charset and charset.name.lower() != "utf-8":
1305        self.unsupported(f"Expected utf-8 character set, got {charset}.")
1306
1307    return self.func(name, expression.this, expression.args.get("replace") if replace else None)
def min_or_least( self: sqlglot.generator.Generator, expression: sqlglot.expressions.Min) -> str:
1310def min_or_least(self: Generator, expression: exp.Min) -> str:
1311    name = "LEAST" if expression.expressions else "MIN"
1312    return rename_func(name)(self, expression)
def max_or_greatest( self: sqlglot.generator.Generator, expression: sqlglot.expressions.Max) -> str:
1315def max_or_greatest(self: Generator, expression: exp.Max) -> str:
1316    name = "GREATEST" if expression.expressions else "MAX"
1317    return rename_func(name)(self, expression)
def count_if_to_sum( self: sqlglot.generator.Generator, expression: sqlglot.expressions.CountIf) -> str:
1320def count_if_to_sum(self: Generator, expression: exp.CountIf) -> str:
1321    cond = expression.this
1322
1323    if isinstance(expression.this, exp.Distinct):
1324        cond = expression.this.expressions[0]
1325        self.unsupported("DISTINCT is not supported when converting COUNT_IF to SUM")
1326
1327    return self.func("sum", exp.func("if", cond, 1, 0))
def trim_sql( self: sqlglot.generator.Generator, expression: sqlglot.expressions.Trim) -> str:
1330def trim_sql(self: Generator, expression: exp.Trim) -> str:
1331    target = self.sql(expression, "this")
1332    trim_type = self.sql(expression, "position")
1333    remove_chars = self.sql(expression, "expression")
1334    collation = self.sql(expression, "collation")
1335
1336    # Use TRIM/LTRIM/RTRIM syntax if the expression isn't database-specific
1337    if not remove_chars:
1338        return self.trim_sql(expression)
1339
1340    trim_type = f"{trim_type} " if trim_type else ""
1341    remove_chars = f"{remove_chars} " if remove_chars else ""
1342    from_part = "FROM " if trim_type or remove_chars else ""
1343    collation = f" COLLATE {collation}" if collation else ""
1344    return f"TRIM({trim_type}{remove_chars}{from_part}{target}{collation})"
def str_to_time_sql( self: sqlglot.generator.Generator, expression: sqlglot.expressions.Expression) -> str:
1347def str_to_time_sql(self: Generator, expression: exp.Expression) -> str:
1348    return self.func("STRPTIME", expression.this, self.format_time(expression))
def concat_to_dpipe_sql( self: sqlglot.generator.Generator, expression: sqlglot.expressions.Concat) -> str:
1351def concat_to_dpipe_sql(self: Generator, expression: exp.Concat) -> str:
1352    return self.sql(reduce(lambda x, y: exp.DPipe(this=x, expression=y), expression.expressions))
def concat_ws_to_dpipe_sql( self: sqlglot.generator.Generator, expression: sqlglot.expressions.ConcatWs) -> str:
1355def concat_ws_to_dpipe_sql(self: Generator, expression: exp.ConcatWs) -> str:
1356    delim, *rest_args = expression.expressions
1357    return self.sql(
1358        reduce(
1359            lambda x, y: exp.DPipe(this=x, expression=exp.DPipe(this=delim, expression=y)),
1360            rest_args,
1361        )
1362    )
@unsupported_args('position', 'occurrence', 'parameters')
def regexp_extract_sql( self: sqlglot.generator.Generator, expression: sqlglot.expressions.RegexpExtract) -> str:
1365@unsupported_args("position", "occurrence", "parameters")
1366def regexp_extract_sql(self: Generator, expression: exp.RegexpExtract) -> str:
1367    group = expression.args.get("group")
1368
1369    # Do not render group if it's the default value for this dialect
1370    if group and group.name == str(self.dialect.REGEXP_EXTRACT_DEFAULT_GROUP):
1371        group = None
1372
1373    return self.func("REGEXP_EXTRACT", expression.this, expression.expression, group)
@unsupported_args('position', 'occurrence', 'modifiers')
def regexp_replace_sql( self: sqlglot.generator.Generator, expression: sqlglot.expressions.RegexpReplace) -> str:
1376@unsupported_args("position", "occurrence", "modifiers")
1377def regexp_replace_sql(self: Generator, expression: exp.RegexpReplace) -> str:
1378    return self.func(
1379        "REGEXP_REPLACE", expression.this, expression.expression, expression.args["replacement"]
1380    )
def pivot_column_names( aggregations: List[sqlglot.expressions.Expression], dialect: Union[str, Dialect, Type[Dialect], NoneType]) -> List[str]:
1383def pivot_column_names(aggregations: t.List[exp.Expression], dialect: DialectType) -> t.List[str]:
1384    names = []
1385    for agg in aggregations:
1386        if isinstance(agg, exp.Alias):
1387            names.append(agg.alias)
1388        else:
1389            """
1390            This case corresponds to aggregations without aliases being used as suffixes
1391            (e.g. col_avg(foo)). We need to unquote identifiers because they're going to
1392            be quoted in the base parser's `_parse_pivot` method, due to `to_identifier`.
1393            Otherwise, we'd end up with `col_avg(`foo`)` (notice the double quotes).
1394            """
1395            agg_all_unquoted = agg.transform(
1396                lambda node: (
1397                    exp.Identifier(this=node.name, quoted=False)
1398                    if isinstance(node, exp.Identifier)
1399                    else node
1400                )
1401            )
1402            names.append(agg_all_unquoted.sql(dialect=dialect, normalize_functions="lower"))
1403
1404    return names
def binary_from_function(expr_type: Type[~B]) -> Callable[[List], ~B]:
1407def binary_from_function(expr_type: t.Type[B]) -> t.Callable[[t.List], B]:
1408    return lambda args: expr_type(this=seq_get(args, 0), expression=seq_get(args, 1))
def build_timestamp_trunc(args: List) -> sqlglot.expressions.TimestampTrunc:
1412def build_timestamp_trunc(args: t.List) -> exp.TimestampTrunc:
1413    return exp.TimestampTrunc(this=seq_get(args, 1), unit=seq_get(args, 0))
def any_value_to_max_sql( self: sqlglot.generator.Generator, expression: sqlglot.expressions.AnyValue) -> str:
1416def any_value_to_max_sql(self: Generator, expression: exp.AnyValue) -> str:
1417    return self.func("MAX", expression.this)
def bool_xor_sql( self: sqlglot.generator.Generator, expression: sqlglot.expressions.Xor) -> str:
1420def bool_xor_sql(self: Generator, expression: exp.Xor) -> str:
1421    a = self.sql(expression.left)
1422    b = self.sql(expression.right)
1423    return f"({a} AND (NOT {b})) OR ((NOT {a}) AND {b})"
def is_parse_json(expression: sqlglot.expressions.Expression) -> bool:
1426def is_parse_json(expression: exp.Expression) -> bool:
1427    return isinstance(expression, exp.ParseJSON) or (
1428        isinstance(expression, exp.Cast) and expression.is_type("json")
1429    )
def isnull_to_is_null(args: List) -> sqlglot.expressions.Expression:
1432def isnull_to_is_null(args: t.List) -> exp.Expression:
1433    return exp.Paren(this=exp.Is(this=seq_get(args, 0), expression=exp.null()))
def generatedasidentitycolumnconstraint_sql( self: sqlglot.generator.Generator, expression: sqlglot.expressions.GeneratedAsIdentityColumnConstraint) -> str:
1436def generatedasidentitycolumnconstraint_sql(
1437    self: Generator, expression: exp.GeneratedAsIdentityColumnConstraint
1438) -> str:
1439    start = self.sql(expression, "start") or "1"
1440    increment = self.sql(expression, "increment") or "1"
1441    return f"IDENTITY({start}, {increment})"
def arg_max_or_min_no_count( name: str) -> Callable[[sqlglot.generator.Generator, sqlglot.expressions.ArgMax | sqlglot.expressions.ArgMin], str]:
1444def arg_max_or_min_no_count(name: str) -> t.Callable[[Generator, exp.ArgMax | exp.ArgMin], str]:
1445    @unsupported_args("count")
1446    def _arg_max_or_min_sql(self: Generator, expression: exp.ArgMax | exp.ArgMin) -> str:
1447        return self.func(name, expression.this, expression.expression)
1448
1449    return _arg_max_or_min_sql
def ts_or_ds_add_cast( expression: sqlglot.expressions.TsOrDsAdd) -> sqlglot.expressions.TsOrDsAdd:
1452def ts_or_ds_add_cast(expression: exp.TsOrDsAdd) -> exp.TsOrDsAdd:
1453    this = expression.this.copy()
1454
1455    return_type = expression.return_type
1456    if return_type.is_type(exp.DataType.Type.DATE):
1457        # If we need to cast to a DATE, we cast to TIMESTAMP first to make sure we
1458        # can truncate timestamp strings, because some dialects can't cast them to DATE
1459        this = exp.cast(this, exp.DataType.Type.TIMESTAMP)
1460
1461    expression.this.replace(exp.cast(this, return_type))
1462    return expression
def date_delta_sql( name: str, cast: bool = False) -> Callable[[sqlglot.generator.Generator, Union[sqlglot.expressions.DateAdd, sqlglot.expressions.TsOrDsAdd, sqlglot.expressions.DateDiff, sqlglot.expressions.TsOrDsDiff]], str]:
1465def date_delta_sql(name: str, cast: bool = False) -> t.Callable[[Generator, DATE_ADD_OR_DIFF], str]:
1466    def _delta_sql(self: Generator, expression: DATE_ADD_OR_DIFF) -> str:
1467        if cast and isinstance(expression, exp.TsOrDsAdd):
1468            expression = ts_or_ds_add_cast(expression)
1469
1470        return self.func(
1471            name,
1472            unit_to_var(expression),
1473            expression.expression,
1474            expression.this,
1475        )
1476
1477    return _delta_sql
def unit_to_str( expression: sqlglot.expressions.Expression, default: str = 'DAY') -> Optional[sqlglot.expressions.Expression]:
1480def unit_to_str(expression: exp.Expression, default: str = "DAY") -> t.Optional[exp.Expression]:
1481    unit = expression.args.get("unit")
1482
1483    if isinstance(unit, exp.Placeholder):
1484        return unit
1485    if unit:
1486        return exp.Literal.string(unit.name)
1487    return exp.Literal.string(default) if default else None
def unit_to_var( expression: sqlglot.expressions.Expression, default: str = 'DAY') -> Optional[sqlglot.expressions.Expression]:
1490def unit_to_var(expression: exp.Expression, default: str = "DAY") -> t.Optional[exp.Expression]:
1491    unit = expression.args.get("unit")
1492
1493    if isinstance(unit, (exp.Var, exp.Placeholder)):
1494        return unit
1495    return exp.Var(this=default) if default else None
def map_date_part( part, dialect: Union[str, Dialect, Type[Dialect], NoneType] = <class 'Dialect'>):
1510def map_date_part(part, dialect: DialectType = Dialect):
1511    mapped = (
1512        Dialect.get_or_raise(dialect).DATE_PART_MAPPING.get(part.name.upper()) if part else None
1513    )
1514    return exp.var(mapped) if mapped else part
def no_last_day_sql( self: sqlglot.generator.Generator, expression: sqlglot.expressions.LastDay) -> str:
1517def no_last_day_sql(self: Generator, expression: exp.LastDay) -> str:
1518    trunc_curr_date = exp.func("date_trunc", "month", expression.this)
1519    plus_one_month = exp.func("date_add", trunc_curr_date, 1, "month")
1520    minus_one_day = exp.func("date_sub", plus_one_month, 1, "day")
1521
1522    return self.sql(exp.cast(minus_one_day, exp.DataType.Type.DATE))
def merge_without_target_sql( self: sqlglot.generator.Generator, expression: sqlglot.expressions.Merge) -> str:
1525def merge_without_target_sql(self: Generator, expression: exp.Merge) -> str:
1526    """Remove table refs from columns in when statements."""
1527    alias = expression.this.args.get("alias")
1528
1529    def normalize(identifier: t.Optional[exp.Identifier]) -> t.Optional[str]:
1530        return self.dialect.normalize_identifier(identifier).name if identifier else None
1531
1532    targets = {normalize(expression.this.this)}
1533
1534    if alias:
1535        targets.add(normalize(alias.this))
1536
1537    for when in expression.expressions:
1538        # only remove the target names from the THEN clause
1539        # theyre still valid in the <condition> part of WHEN MATCHED / WHEN NOT MATCHED
1540        # ref: https://github.com/TobikoData/sqlmesh/issues/2934
1541        then = when.args.get("then")
1542        if then:
1543            then.transform(
1544                lambda node: (
1545                    exp.column(node.this)
1546                    if isinstance(node, exp.Column) and normalize(node.args.get("table")) in targets
1547                    else node
1548                ),
1549                copy=False,
1550            )
1551
1552    return self.merge_sql(expression)

Remove table refs from columns in when statements.

def build_json_extract_path( expr_type: Type[~F], zero_based_indexing: bool = True, arrow_req_json_type: bool = False) -> Callable[[List], ~F]:
1555def build_json_extract_path(
1556    expr_type: t.Type[F], zero_based_indexing: bool = True, arrow_req_json_type: bool = False
1557) -> t.Callable[[t.List], F]:
1558    def _builder(args: t.List) -> F:
1559        segments: t.List[exp.JSONPathPart] = [exp.JSONPathRoot()]
1560        for arg in args[1:]:
1561            if not isinstance(arg, exp.Literal):
1562                # We use the fallback parser because we can't really transpile non-literals safely
1563                return expr_type.from_arg_list(args)
1564
1565            text = arg.name
1566            if is_int(text):
1567                index = int(text)
1568                segments.append(
1569                    exp.JSONPathSubscript(this=index if zero_based_indexing else index - 1)
1570                )
1571            else:
1572                segments.append(exp.JSONPathKey(this=text))
1573
1574        # This is done to avoid failing in the expression validator due to the arg count
1575        del args[2:]
1576        return expr_type(
1577            this=seq_get(args, 0),
1578            expression=exp.JSONPath(expressions=segments),
1579            only_json_types=arrow_req_json_type,
1580        )
1581
1582    return _builder
def json_extract_segments( name: str, quoted_index: bool = True, op: Optional[str] = None) -> Callable[[sqlglot.generator.Generator, Union[sqlglot.expressions.JSONExtract, sqlglot.expressions.JSONExtractScalar]], str]:
1585def json_extract_segments(
1586    name: str, quoted_index: bool = True, op: t.Optional[str] = None
1587) -> t.Callable[[Generator, JSON_EXTRACT_TYPE], str]:
1588    def _json_extract_segments(self: Generator, expression: JSON_EXTRACT_TYPE) -> str:
1589        path = expression.expression
1590        if not isinstance(path, exp.JSONPath):
1591            return rename_func(name)(self, expression)
1592
1593        escape = path.args.get("escape")
1594
1595        segments = []
1596        for segment in path.expressions:
1597            path = self.sql(segment)
1598            if path:
1599                if isinstance(segment, exp.JSONPathPart) and (
1600                    quoted_index or not isinstance(segment, exp.JSONPathSubscript)
1601                ):
1602                    if escape:
1603                        path = self.escape_str(path)
1604
1605                    path = f"{self.dialect.QUOTE_START}{path}{self.dialect.QUOTE_END}"
1606
1607                segments.append(path)
1608
1609        if op:
1610            return f" {op} ".join([self.sql(expression.this), *segments])
1611        return self.func(name, expression.this, *segments)
1612
1613    return _json_extract_segments
def json_path_key_only_name( self: sqlglot.generator.Generator, expression: sqlglot.expressions.JSONPathKey) -> str:
1616def json_path_key_only_name(self: Generator, expression: exp.JSONPathKey) -> str:
1617    if isinstance(expression.this, exp.JSONPathWildcard):
1618        self.unsupported("Unsupported wildcard in JSONPathKey expression")
1619
1620    return expression.name
def filter_array_using_unnest( self: sqlglot.generator.Generator, expression: sqlglot.expressions.ArrayFilter) -> str:
1623def filter_array_using_unnest(self: Generator, expression: exp.ArrayFilter) -> str:
1624    cond = expression.expression
1625    if isinstance(cond, exp.Lambda) and len(cond.expressions) == 1:
1626        alias = cond.expressions[0]
1627        cond = cond.this
1628    elif isinstance(cond, exp.Predicate):
1629        alias = "_u"
1630    else:
1631        self.unsupported("Unsupported filter condition")
1632        return ""
1633
1634    unnest = exp.Unnest(expressions=[expression.this])
1635    filtered = exp.select(alias).from_(exp.alias_(unnest, None, table=[alias])).where(cond)
1636    return self.sql(exp.Array(expressions=[filtered]))
def to_number_with_nls_param( self: sqlglot.generator.Generator, expression: sqlglot.expressions.ToNumber) -> str:
1639def to_number_with_nls_param(self: Generator, expression: exp.ToNumber) -> str:
1640    return self.func(
1641        "TO_NUMBER",
1642        expression.this,
1643        expression.args.get("format"),
1644        expression.args.get("nlsparam"),
1645    )
def build_default_decimal_type( precision: Optional[int] = None, scale: Optional[int] = None) -> Callable[[sqlglot.expressions.DataType], sqlglot.expressions.DataType]:
1648def build_default_decimal_type(
1649    precision: t.Optional[int] = None, scale: t.Optional[int] = None
1650) -> t.Callable[[exp.DataType], exp.DataType]:
1651    def _builder(dtype: exp.DataType) -> exp.DataType:
1652        if dtype.expressions or precision is None:
1653            return dtype
1654
1655        params = f"{precision}{f', {scale}' if scale is not None else ''}"
1656        return exp.DataType.build(f"DECIMAL({params})")
1657
1658    return _builder
def build_timestamp_from_parts(args: List) -> sqlglot.expressions.Func:
1661def build_timestamp_from_parts(args: t.List) -> exp.Func:
1662    if len(args) == 2:
1663        # Other dialects don't have the TIMESTAMP_FROM_PARTS(date, time) concept,
1664        # so we parse this into Anonymous for now instead of introducing complexity
1665        return exp.Anonymous(this="TIMESTAMP_FROM_PARTS", expressions=args)
1666
1667    return exp.TimestampFromParts.from_arg_list(args)
def sha256_sql( self: sqlglot.generator.Generator, expression: sqlglot.expressions.SHA2) -> str:
1670def sha256_sql(self: Generator, expression: exp.SHA2) -> str:
1671    return self.func(f"SHA{expression.text('length') or '256'}", expression.this)
1674def sequence_sql(self: Generator, expression: exp.GenerateSeries | exp.GenerateDateArray) -> str:
1675    start = expression.args.get("start")
1676    end = expression.args.get("end")
1677    step = expression.args.get("step")
1678
1679    if isinstance(start, exp.Cast):
1680        target_type = start.to
1681    elif isinstance(end, exp.Cast):
1682        target_type = end.to
1683    else:
1684        target_type = None
1685
1686    if start and end and target_type and target_type.is_type("date", "timestamp"):
1687        if isinstance(start, exp.Cast) and target_type is start.to:
1688            end = exp.cast(end, target_type)
1689        else:
1690            start = exp.cast(start, target_type)
1691
1692    return self.func("SEQUENCE", start, end, step)
def build_regexp_extract( args: List, dialect: Dialect) -> sqlglot.expressions.RegexpExtract:
1695def build_regexp_extract(args: t.List, dialect: Dialect) -> exp.RegexpExtract:
1696    return exp.RegexpExtract(
1697        this=seq_get(args, 0),
1698        expression=seq_get(args, 1),
1699        group=seq_get(args, 2) or exp.Literal.number(dialect.REGEXP_EXTRACT_DEFAULT_GROUP),
1700    )