sqlglot.dialects.snowflake
1from __future__ import annotations 2 3import typing as t 4 5from sqlglot import exp, generator, jsonpath, parser, tokens, transforms 6from sqlglot.dialects.dialect import ( 7 Dialect, 8 NormalizationStrategy, 9 build_timetostr_or_tochar, 10 binary_from_function, 11 build_default_decimal_type, 12 build_replace_with_optional_replacement, 13 build_timestamp_from_parts, 14 date_delta_sql, 15 date_trunc_to_time, 16 datestrtodate_sql, 17 build_formatted_time, 18 if_sql, 19 inline_array_sql, 20 max_or_greatest, 21 min_or_least, 22 rename_func, 23 timestamptrunc_sql, 24 timestrtotime_sql, 25 var_map_sql, 26 map_date_part, 27 no_timestamp_sql, 28 strposition_sql, 29 timestampdiff_sql, 30 no_make_interval_sql, 31 groupconcat_sql, 32) 33from sqlglot.generator import unsupported_args 34from sqlglot.helper import find_new_name, flatten, is_float, is_int, seq_get 35from sqlglot.optimizer.scope import build_scope, find_all_in_scope 36from sqlglot.tokens import TokenType 37 38if t.TYPE_CHECKING: 39 from sqlglot._typing import E, B 40 41 42# from https://docs.snowflake.com/en/sql-reference/functions/to_timestamp.html 43def _build_datetime( 44 name: str, kind: exp.DataType.Type, safe: bool = False 45) -> t.Callable[[t.List], exp.Func]: 46 def _builder(args: t.List) -> exp.Func: 47 value = seq_get(args, 0) 48 scale_or_fmt = seq_get(args, 1) 49 50 int_value = value is not None and is_int(value.name) 51 int_scale_or_fmt = scale_or_fmt is not None and scale_or_fmt.is_int 52 53 if isinstance(value, exp.Literal) or (value and scale_or_fmt): 54 # Converts calls like `TO_TIME('01:02:03')` into casts 55 if len(args) == 1 and value.is_string and not int_value: 56 return ( 57 exp.TryCast(this=value, to=exp.DataType.build(kind), requires_string=True) 58 if safe 59 else exp.cast(value, kind) 60 ) 61 62 # Handles `TO_TIMESTAMP(str, fmt)` and `TO_TIMESTAMP(num, scale)` as special 63 # cases so we can transpile them, since they're relatively common 64 if kind == exp.DataType.Type.TIMESTAMP: 65 if not safe and (int_value or int_scale_or_fmt): 66 # TRY_TO_TIMESTAMP('integer') is not parsed into exp.UnixToTime as 67 # it's not easily transpilable 68 return exp.UnixToTime(this=value, scale=scale_or_fmt) 69 if not int_scale_or_fmt and not is_float(value.name): 70 expr = build_formatted_time(exp.StrToTime, "snowflake")(args) 71 expr.set("safe", safe) 72 return expr 73 74 if kind in (exp.DataType.Type.DATE, exp.DataType.Type.TIME) and not int_value: 75 klass = exp.TsOrDsToDate if kind == exp.DataType.Type.DATE else exp.TsOrDsToTime 76 formatted_exp = build_formatted_time(klass, "snowflake")(args) 77 formatted_exp.set("safe", safe) 78 return formatted_exp 79 80 return exp.Anonymous(this=name, expressions=args) 81 82 return _builder 83 84 85def _build_object_construct(args: t.List) -> t.Union[exp.StarMap, exp.Struct]: 86 expression = parser.build_var_map(args) 87 88 if isinstance(expression, exp.StarMap): 89 return expression 90 91 return exp.Struct( 92 expressions=[ 93 exp.PropertyEQ(this=k, expression=v) for k, v in zip(expression.keys, expression.values) 94 ] 95 ) 96 97 98def _build_datediff(args: t.List) -> exp.DateDiff: 99 return exp.DateDiff( 100 this=seq_get(args, 2), expression=seq_get(args, 1), unit=map_date_part(seq_get(args, 0)) 101 ) 102 103 104def _build_date_time_add(expr_type: t.Type[E]) -> t.Callable[[t.List], E]: 105 def _builder(args: t.List) -> E: 106 return expr_type( 107 this=seq_get(args, 2), 108 expression=seq_get(args, 1), 109 unit=map_date_part(seq_get(args, 0)), 110 ) 111 112 return _builder 113 114 115def _build_bitwise(expr_type: t.Type[B], name: str) -> t.Callable[[t.List], B | exp.Anonymous]: 116 def _builder(args: t.List) -> B | exp.Anonymous: 117 if len(args) == 3: 118 return exp.Anonymous(this=name, expressions=args) 119 120 return binary_from_function(expr_type)(args) 121 122 return _builder 123 124 125# https://docs.snowflake.com/en/sql-reference/functions/div0 126def _build_if_from_div0(args: t.List) -> exp.If: 127 lhs = exp._wrap(seq_get(args, 0), exp.Binary) 128 rhs = exp._wrap(seq_get(args, 1), exp.Binary) 129 130 cond = exp.EQ(this=rhs, expression=exp.Literal.number(0)).and_( 131 exp.Is(this=lhs, expression=exp.null()).not_() 132 ) 133 true = exp.Literal.number(0) 134 false = exp.Div(this=lhs, expression=rhs) 135 return exp.If(this=cond, true=true, false=false) 136 137 138# https://docs.snowflake.com/en/sql-reference/functions/zeroifnull 139def _build_if_from_zeroifnull(args: t.List) -> exp.If: 140 cond = exp.Is(this=seq_get(args, 0), expression=exp.Null()) 141 return exp.If(this=cond, true=exp.Literal.number(0), false=seq_get(args, 0)) 142 143 144# https://docs.snowflake.com/en/sql-reference/functions/zeroifnull 145def _build_if_from_nullifzero(args: t.List) -> exp.If: 146 cond = exp.EQ(this=seq_get(args, 0), expression=exp.Literal.number(0)) 147 return exp.If(this=cond, true=exp.Null(), false=seq_get(args, 0)) 148 149 150def _regexpilike_sql(self: Snowflake.Generator, expression: exp.RegexpILike) -> str: 151 flag = expression.text("flag") 152 153 if "i" not in flag: 154 flag += "i" 155 156 return self.func( 157 "REGEXP_LIKE", expression.this, expression.expression, exp.Literal.string(flag) 158 ) 159 160 161def _build_regexp_replace(args: t.List) -> exp.RegexpReplace: 162 regexp_replace = exp.RegexpReplace.from_arg_list(args) 163 164 if not regexp_replace.args.get("replacement"): 165 regexp_replace.set("replacement", exp.Literal.string("")) 166 167 return regexp_replace 168 169 170def _show_parser(*args: t.Any, **kwargs: t.Any) -> t.Callable[[Snowflake.Parser], exp.Show]: 171 def _parse(self: Snowflake.Parser) -> exp.Show: 172 return self._parse_show_snowflake(*args, **kwargs) 173 174 return _parse 175 176 177def _date_trunc_to_time(args: t.List) -> exp.DateTrunc | exp.TimestampTrunc: 178 trunc = date_trunc_to_time(args) 179 trunc.set("unit", map_date_part(trunc.args["unit"])) 180 return trunc 181 182 183def _unqualify_pivot_columns(expression: exp.Expression) -> exp.Expression: 184 """ 185 Snowflake doesn't allow columns referenced in UNPIVOT to be qualified, 186 so we need to unqualify them. Same goes for ANY ORDER BY <column>. 187 188 Example: 189 >>> from sqlglot import parse_one 190 >>> expr = parse_one("SELECT * FROM m_sales UNPIVOT(sales FOR month IN (m_sales.jan, feb, mar, april))") 191 >>> print(_unqualify_pivot_columns(expr).sql(dialect="snowflake")) 192 SELECT * FROM m_sales UNPIVOT(sales FOR month IN (jan, feb, mar, april)) 193 """ 194 if isinstance(expression, exp.Pivot): 195 if expression.unpivot: 196 expression = transforms.unqualify_columns(expression) 197 else: 198 for field in expression.fields: 199 field_expr = seq_get(field.expressions if field else [], 0) 200 201 if isinstance(field_expr, exp.PivotAny): 202 unqualified_field_expr = transforms.unqualify_columns(field_expr) 203 t.cast(exp.Expression, field).set("expressions", unqualified_field_expr, 0) 204 205 return expression 206 207 208def _flatten_structured_types_unless_iceberg(expression: exp.Expression) -> exp.Expression: 209 assert isinstance(expression, exp.Create) 210 211 def _flatten_structured_type(expression: exp.DataType) -> exp.DataType: 212 if expression.this in exp.DataType.NESTED_TYPES: 213 expression.set("expressions", None) 214 return expression 215 216 props = expression.args.get("properties") 217 if isinstance(expression.this, exp.Schema) and not (props and props.find(exp.IcebergProperty)): 218 for schema_expression in expression.this.expressions: 219 if isinstance(schema_expression, exp.ColumnDef): 220 column_type = schema_expression.kind 221 if isinstance(column_type, exp.DataType): 222 column_type.transform(_flatten_structured_type, copy=False) 223 224 return expression 225 226 227def _unnest_generate_date_array(unnest: exp.Unnest) -> None: 228 generate_date_array = unnest.expressions[0] 229 start = generate_date_array.args.get("start") 230 end = generate_date_array.args.get("end") 231 step = generate_date_array.args.get("step") 232 233 if not start or not end or not isinstance(step, exp.Interval) or step.name != "1": 234 return 235 236 unit = step.args.get("unit") 237 238 unnest_alias = unnest.args.get("alias") 239 if unnest_alias: 240 unnest_alias = unnest_alias.copy() 241 sequence_value_name = seq_get(unnest_alias.columns, 0) or "value" 242 else: 243 sequence_value_name = "value" 244 245 # We'll add the next sequence value to the starting date and project the result 246 date_add = _build_date_time_add(exp.DateAdd)( 247 [unit, exp.cast(sequence_value_name, "int"), exp.cast(start, "date")] 248 ) 249 250 # We use DATEDIFF to compute the number of sequence values needed 251 number_sequence = Snowflake.Parser.FUNCTIONS["ARRAY_GENERATE_RANGE"]( 252 [exp.Literal.number(0), _build_datediff([unit, start, end]) + 1] 253 ) 254 255 unnest.set("expressions", [number_sequence]) 256 257 unnest_parent = unnest.parent 258 if isinstance(unnest_parent, exp.Join): 259 select = unnest_parent.parent 260 if isinstance(select, exp.Select): 261 replace_column_name = ( 262 sequence_value_name 263 if isinstance(sequence_value_name, str) 264 else sequence_value_name.name 265 ) 266 267 scope = build_scope(select) 268 if scope: 269 for column in scope.columns: 270 if column.name.lower() == replace_column_name.lower(): 271 column.replace( 272 date_add.as_(replace_column_name) 273 if isinstance(column.parent, exp.Select) 274 else date_add 275 ) 276 277 lateral = exp.Lateral(this=unnest_parent.this.pop()) 278 unnest_parent.replace(exp.Join(this=lateral)) 279 else: 280 unnest.replace( 281 exp.select(date_add.as_(sequence_value_name)) 282 .from_(unnest.copy()) 283 .subquery(unnest_alias) 284 ) 285 286 287def _transform_generate_date_array(expression: exp.Expression) -> exp.Expression: 288 if isinstance(expression, exp.Select): 289 for generate_date_array in expression.find_all(exp.GenerateDateArray): 290 parent = generate_date_array.parent 291 292 # If GENERATE_DATE_ARRAY is used directly as an array (e.g passed into ARRAY_LENGTH), the transformed Snowflake 293 # query is the following (it'll be unnested properly on the next iteration due to copy): 294 # SELECT ref(GENERATE_DATE_ARRAY(...)) -> SELECT ref((SELECT ARRAY_AGG(*) FROM UNNEST(GENERATE_DATE_ARRAY(...)))) 295 if not isinstance(parent, exp.Unnest): 296 unnest = exp.Unnest(expressions=[generate_date_array.copy()]) 297 generate_date_array.replace( 298 exp.select(exp.ArrayAgg(this=exp.Star())).from_(unnest).subquery() 299 ) 300 301 if ( 302 isinstance(parent, exp.Unnest) 303 and isinstance(parent.parent, (exp.From, exp.Join)) 304 and len(parent.expressions) == 1 305 ): 306 _unnest_generate_date_array(parent) 307 308 return expression 309 310 311def _build_regexp_extract(expr_type: t.Type[E]) -> t.Callable[[t.List], E]: 312 def _builder(args: t.List) -> E: 313 return expr_type( 314 this=seq_get(args, 0), 315 expression=seq_get(args, 1), 316 position=seq_get(args, 2), 317 occurrence=seq_get(args, 3), 318 parameters=seq_get(args, 4), 319 group=seq_get(args, 5) or exp.Literal.number(0), 320 ) 321 322 return _builder 323 324 325def _regexpextract_sql(self, expression: exp.RegexpExtract | exp.RegexpExtractAll) -> str: 326 # Other dialects don't support all of the following parameters, so we need to 327 # generate default values as necessary to ensure the transpilation is correct 328 group = expression.args.get("group") 329 330 # To avoid generating all these default values, we set group to None if 331 # it's 0 (also default value) which doesn't trigger the following chain 332 if group and group.name == "0": 333 group = None 334 335 parameters = expression.args.get("parameters") or (group and exp.Literal.string("c")) 336 occurrence = expression.args.get("occurrence") or (parameters and exp.Literal.number(1)) 337 position = expression.args.get("position") or (occurrence and exp.Literal.number(1)) 338 339 return self.func( 340 "REGEXP_SUBSTR" if isinstance(expression, exp.RegexpExtract) else "REGEXP_EXTRACT_ALL", 341 expression.this, 342 expression.expression, 343 position, 344 occurrence, 345 parameters, 346 group, 347 ) 348 349 350def _json_extract_value_array_sql( 351 self: Snowflake.Generator, expression: exp.JSONValueArray | exp.JSONExtractArray 352) -> str: 353 json_extract = exp.JSONExtract(this=expression.this, expression=expression.expression) 354 ident = exp.to_identifier("x") 355 356 if isinstance(expression, exp.JSONValueArray): 357 this: exp.Expression = exp.cast(ident, to=exp.DataType.Type.VARCHAR) 358 else: 359 this = exp.ParseJSON(this=f"TO_JSON({ident})") 360 361 transform_lambda = exp.Lambda(expressions=[ident], this=this) 362 363 return self.func("TRANSFORM", json_extract, transform_lambda) 364 365 366def _qualify_unnested_columns(expression: exp.Expression) -> exp.Expression: 367 if isinstance(expression, exp.Select): 368 scope = build_scope(expression) 369 if not scope: 370 return expression 371 372 unnests = list(scope.find_all(exp.Unnest)) 373 374 if not unnests: 375 return expression 376 377 taken_source_names = set(scope.sources) 378 column_source: t.Dict[str, exp.Identifier] = {} 379 380 unnest_identifier: t.Optional[exp.Identifier] = None 381 orig_expression = expression.copy() 382 383 for unnest in unnests: 384 if not isinstance(unnest.parent, (exp.From, exp.Join)): 385 continue 386 387 # Try to infer column names produced by an unnest operator. This is only possible 388 # when we can peek into the (statically known) contents of the unnested value. 389 unnest_columns: t.Set[str] = set() 390 for unnest_expr in unnest.expressions: 391 if not isinstance(unnest_expr, exp.Array): 392 continue 393 394 for array_expr in unnest_expr.expressions: 395 if not ( 396 isinstance(array_expr, exp.Struct) 397 and array_expr.expressions 398 and all( 399 isinstance(struct_expr, exp.PropertyEQ) 400 for struct_expr in array_expr.expressions 401 ) 402 ): 403 continue 404 405 unnest_columns.update( 406 struct_expr.this.name.lower() for struct_expr in array_expr.expressions 407 ) 408 break 409 410 if unnest_columns: 411 break 412 413 unnest_alias = unnest.args.get("alias") 414 if not unnest_alias: 415 alias_name = find_new_name(taken_source_names, "value") 416 taken_source_names.add(alias_name) 417 418 # Produce a `TableAlias` AST similar to what is produced for BigQuery. This 419 # will be corrected later, when we generate SQL for the `Unnest` AST node. 420 aliased_unnest = exp.alias_(unnest, None, table=[alias_name]) 421 scope.replace(unnest, aliased_unnest) 422 423 unnest_identifier = aliased_unnest.args["alias"].columns[0] 424 else: 425 alias_columns = getattr(unnest_alias, "columns", []) 426 unnest_identifier = unnest_alias.this or seq_get(alias_columns, 0) 427 428 if not isinstance(unnest_identifier, exp.Identifier): 429 return orig_expression 430 431 column_source.update({c.lower(): unnest_identifier for c in unnest_columns}) 432 433 for column in scope.columns: 434 if column.table: 435 continue 436 437 table = column_source.get(column.name.lower()) 438 if ( 439 unnest_identifier 440 and not table 441 and len(scope.sources) == 1 442 and column.name.lower() != unnest_identifier.name.lower() 443 ): 444 table = unnest_identifier 445 446 column.set("table", table and table.copy()) 447 448 return expression 449 450 451def _eliminate_dot_variant_lookup(expression: exp.Expression) -> exp.Expression: 452 if isinstance(expression, exp.Select): 453 # This transformation is used to facilitate transpilation of BigQuery `UNNEST` operations 454 # to Snowflake. It should not affect roundtrip because `Unnest` nodes cannot be produced 455 # by Snowflake's parser. 456 # 457 # Additionally, at the time of writing this, BigQuery is the only dialect that produces a 458 # `TableAlias` node that only fills `columns` and not `this`, due to `UNNEST_COLUMN_ONLY`. 459 unnest_aliases = set() 460 for unnest in find_all_in_scope(expression, exp.Unnest): 461 unnest_alias = unnest.args.get("alias") 462 if ( 463 isinstance(unnest_alias, exp.TableAlias) 464 and (unnest_alias.args.get("column_only") or not unnest_alias.this) 465 and len(unnest_alias.columns) == 1 466 ): 467 unnest_aliases.add(unnest_alias.columns[0].name) 468 469 if unnest_aliases: 470 for c in find_all_in_scope(expression, exp.Column): 471 if c.table in unnest_aliases: 472 bracket_lhs = c.args["table"] 473 bracket_rhs = exp.Literal.string(c.name) 474 bracket = exp.Bracket(this=bracket_lhs, expressions=[bracket_rhs]) 475 476 if c.parent is expression: 477 # Retain column projection names by using aliases 478 c.replace(exp.alias_(bracket, c.this.copy())) 479 else: 480 c.replace(bracket) 481 482 return expression 483 484 485class Snowflake(Dialect): 486 # https://docs.snowflake.com/en/sql-reference/identifiers-syntax 487 NORMALIZATION_STRATEGY = NormalizationStrategy.UPPERCASE 488 NULL_ORDERING = "nulls_are_large" 489 TIME_FORMAT = "'YYYY-MM-DD HH24:MI:SS'" 490 SUPPORTS_USER_DEFINED_TYPES = False 491 SUPPORTS_SEMI_ANTI_JOIN = False 492 PREFER_CTE_ALIAS_COLUMN = True 493 TABLESAMPLE_SIZE_IS_PERCENT = True 494 COPY_PARAMS_ARE_CSV = False 495 ARRAY_AGG_INCLUDES_NULLS = None 496 ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN = False 497 TRY_CAST_REQUIRES_STRING = True 498 499 TIME_MAPPING = { 500 "YYYY": "%Y", 501 "yyyy": "%Y", 502 "YY": "%y", 503 "yy": "%y", 504 "MMMM": "%B", 505 "mmmm": "%B", 506 "MON": "%b", 507 "mon": "%b", 508 "MM": "%m", 509 "mm": "%m", 510 "DD": "%d", 511 "dd": "%-d", 512 "DY": "%a", 513 "dy": "%w", 514 "HH24": "%H", 515 "hh24": "%H", 516 "HH12": "%I", 517 "hh12": "%I", 518 "MI": "%M", 519 "mi": "%M", 520 "SS": "%S", 521 "ss": "%S", 522 "FF6": "%f", 523 "ff6": "%f", 524 } 525 526 DATE_PART_MAPPING = { 527 **Dialect.DATE_PART_MAPPING, 528 "ISOWEEK": "WEEKISO", 529 } 530 531 def quote_identifier(self, expression: E, identify: bool = True) -> E: 532 # This disables quoting DUAL in SELECT ... FROM DUAL, because Snowflake treats an 533 # unquoted DUAL keyword in a special way and does not map it to a user-defined table 534 if ( 535 isinstance(expression, exp.Identifier) 536 and isinstance(expression.parent, exp.Table) 537 and expression.name.lower() == "dual" 538 ): 539 return expression # type: ignore 540 541 return super().quote_identifier(expression, identify=identify) 542 543 class JSONPathTokenizer(jsonpath.JSONPathTokenizer): 544 SINGLE_TOKENS = jsonpath.JSONPathTokenizer.SINGLE_TOKENS.copy() 545 SINGLE_TOKENS.pop("$") 546 547 class Parser(parser.Parser): 548 IDENTIFY_PIVOT_STRINGS = True 549 DEFAULT_SAMPLING_METHOD = "BERNOULLI" 550 COLON_IS_VARIANT_EXTRACT = True 551 JSON_EXTRACT_REQUIRES_JSON_EXPRESSION = True 552 553 ID_VAR_TOKENS = { 554 *parser.Parser.ID_VAR_TOKENS, 555 TokenType.MATCH_CONDITION, 556 } 557 558 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS | {TokenType.WINDOW} 559 TABLE_ALIAS_TOKENS.discard(TokenType.MATCH_CONDITION) 560 561 COLON_PLACEHOLDER_TOKENS = ID_VAR_TOKENS | {TokenType.NUMBER} 562 563 FUNCTIONS = { 564 **parser.Parser.FUNCTIONS, 565 "APPROX_PERCENTILE": exp.ApproxQuantile.from_arg_list, 566 "ARRAY_CONSTRUCT": lambda args: exp.Array(expressions=args), 567 "ARRAY_CONTAINS": lambda args: exp.ArrayContains( 568 this=seq_get(args, 1), expression=seq_get(args, 0) 569 ), 570 "ARRAY_GENERATE_RANGE": lambda args: exp.GenerateSeries( 571 # ARRAY_GENERATE_RANGE has an exlusive end; we normalize it to be inclusive 572 start=seq_get(args, 0), 573 end=exp.Sub(this=seq_get(args, 1), expression=exp.Literal.number(1)), 574 step=seq_get(args, 2), 575 ), 576 "BITXOR": _build_bitwise(exp.BitwiseXor, "BITXOR"), 577 "BIT_XOR": _build_bitwise(exp.BitwiseXor, "BITXOR"), 578 "BITOR": _build_bitwise(exp.BitwiseOr, "BITOR"), 579 "BIT_OR": _build_bitwise(exp.BitwiseOr, "BITOR"), 580 "BITSHIFTLEFT": _build_bitwise(exp.BitwiseLeftShift, "BITSHIFTLEFT"), 581 "BIT_SHIFTLEFT": _build_bitwise(exp.BitwiseLeftShift, "BIT_SHIFTLEFT"), 582 "BITSHIFTRIGHT": _build_bitwise(exp.BitwiseRightShift, "BITSHIFTRIGHT"), 583 "BIT_SHIFTRIGHT": _build_bitwise(exp.BitwiseRightShift, "BIT_SHIFTRIGHT"), 584 "BOOLXOR": _build_bitwise(exp.Xor, "BOOLXOR"), 585 "DATE": _build_datetime("DATE", exp.DataType.Type.DATE), 586 "DATE_TRUNC": _date_trunc_to_time, 587 "DATEADD": _build_date_time_add(exp.DateAdd), 588 "DATEDIFF": _build_datediff, 589 "DIV0": _build_if_from_div0, 590 "EDITDISTANCE": lambda args: exp.Levenshtein( 591 this=seq_get(args, 0), expression=seq_get(args, 1), max_dist=seq_get(args, 2) 592 ), 593 "FLATTEN": exp.Explode.from_arg_list, 594 "GET_PATH": lambda args, dialect: exp.JSONExtract( 595 this=seq_get(args, 0), 596 expression=dialect.to_json_path(seq_get(args, 1)), 597 requires_json=True, 598 ), 599 "HEX_DECODE_BINARY": exp.Unhex.from_arg_list, 600 "IFF": exp.If.from_arg_list, 601 "LAST_DAY": lambda args: exp.LastDay( 602 this=seq_get(args, 0), unit=map_date_part(seq_get(args, 1)) 603 ), 604 "LEN": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 605 "LENGTH": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 606 "NULLIFZERO": _build_if_from_nullifzero, 607 "OBJECT_CONSTRUCT": _build_object_construct, 608 "REGEXP_EXTRACT_ALL": _build_regexp_extract(exp.RegexpExtractAll), 609 "REGEXP_REPLACE": _build_regexp_replace, 610 "REGEXP_SUBSTR": _build_regexp_extract(exp.RegexpExtract), 611 "REGEXP_SUBSTR_ALL": _build_regexp_extract(exp.RegexpExtractAll), 612 "REPLACE": build_replace_with_optional_replacement, 613 "RLIKE": exp.RegexpLike.from_arg_list, 614 "SQUARE": lambda args: exp.Pow(this=seq_get(args, 0), expression=exp.Literal.number(2)), 615 "TABLE": lambda args: exp.TableFromRows(this=seq_get(args, 0)), 616 "TIMEADD": _build_date_time_add(exp.TimeAdd), 617 "TIMEDIFF": _build_datediff, 618 "TIMESTAMPADD": _build_date_time_add(exp.DateAdd), 619 "TIMESTAMPDIFF": _build_datediff, 620 "TIMESTAMPFROMPARTS": build_timestamp_from_parts, 621 "TIMESTAMP_FROM_PARTS": build_timestamp_from_parts, 622 "TIMESTAMPNTZFROMPARTS": build_timestamp_from_parts, 623 "TIMESTAMP_NTZ_FROM_PARTS": build_timestamp_from_parts, 624 "TRY_PARSE_JSON": lambda args: exp.ParseJSON(this=seq_get(args, 0), safe=True), 625 "TRY_TO_DATE": _build_datetime("TRY_TO_DATE", exp.DataType.Type.DATE, safe=True), 626 "TRY_TO_TIME": _build_datetime("TRY_TO_TIME", exp.DataType.Type.TIME, safe=True), 627 "TRY_TO_TIMESTAMP": _build_datetime( 628 "TRY_TO_TIMESTAMP", exp.DataType.Type.TIMESTAMP, safe=True 629 ), 630 "TO_CHAR": build_timetostr_or_tochar, 631 "TO_DATE": _build_datetime("TO_DATE", exp.DataType.Type.DATE), 632 "TO_NUMBER": lambda args: exp.ToNumber( 633 this=seq_get(args, 0), 634 format=seq_get(args, 1), 635 precision=seq_get(args, 2), 636 scale=seq_get(args, 3), 637 ), 638 "TO_TIME": _build_datetime("TO_TIME", exp.DataType.Type.TIME), 639 "TO_TIMESTAMP": _build_datetime("TO_TIMESTAMP", exp.DataType.Type.TIMESTAMP), 640 "TO_TIMESTAMP_LTZ": _build_datetime("TO_TIMESTAMP_LTZ", exp.DataType.Type.TIMESTAMPLTZ), 641 "TO_TIMESTAMP_NTZ": _build_datetime("TO_TIMESTAMP_NTZ", exp.DataType.Type.TIMESTAMP), 642 "TO_TIMESTAMP_TZ": _build_datetime("TO_TIMESTAMP_TZ", exp.DataType.Type.TIMESTAMPTZ), 643 "TO_VARCHAR": exp.ToChar.from_arg_list, 644 "ZEROIFNULL": _build_if_from_zeroifnull, 645 } 646 647 FUNCTION_PARSERS = { 648 **parser.Parser.FUNCTION_PARSERS, 649 "DATE_PART": lambda self: self._parse_date_part(), 650 "OBJECT_CONSTRUCT_KEEP_NULL": lambda self: self._parse_json_object(), 651 "LISTAGG": lambda self: self._parse_string_agg(), 652 "SEMANTIC_VIEW": lambda self: self._parse_semantic_view(), 653 } 654 FUNCTION_PARSERS.pop("TRIM") 655 656 TIMESTAMPS = parser.Parser.TIMESTAMPS - {TokenType.TIME} 657 658 RANGE_PARSERS = { 659 **parser.Parser.RANGE_PARSERS, 660 TokenType.LIKE_ANY: parser.binary_range_parser(exp.LikeAny), 661 TokenType.ILIKE_ANY: parser.binary_range_parser(exp.ILikeAny), 662 } 663 664 ALTER_PARSERS = { 665 **parser.Parser.ALTER_PARSERS, 666 "UNSET": lambda self: self.expression( 667 exp.Set, 668 tag=self._match_text_seq("TAG"), 669 expressions=self._parse_csv(self._parse_id_var), 670 unset=True, 671 ), 672 } 673 674 STATEMENT_PARSERS = { 675 **parser.Parser.STATEMENT_PARSERS, 676 TokenType.GET: lambda self: self._parse_get(), 677 TokenType.PUT: lambda self: self._parse_put(), 678 TokenType.SHOW: lambda self: self._parse_show(), 679 } 680 681 PROPERTY_PARSERS = { 682 **parser.Parser.PROPERTY_PARSERS, 683 "CREDENTIALS": lambda self: self._parse_credentials_property(), 684 "FILE_FORMAT": lambda self: self._parse_file_format_property(), 685 "LOCATION": lambda self: self._parse_location_property(), 686 "TAG": lambda self: self._parse_tag(), 687 "USING": lambda self: self._match_text_seq("TEMPLATE") 688 and self.expression(exp.UsingTemplateProperty, this=self._parse_statement()), 689 } 690 691 TYPE_CONVERTERS = { 692 # https://docs.snowflake.com/en/sql-reference/data-types-numeric#number 693 exp.DataType.Type.DECIMAL: build_default_decimal_type(precision=38, scale=0), 694 } 695 696 SHOW_PARSERS = { 697 "DATABASES": _show_parser("DATABASES"), 698 "TERSE DATABASES": _show_parser("DATABASES"), 699 "SCHEMAS": _show_parser("SCHEMAS"), 700 "TERSE SCHEMAS": _show_parser("SCHEMAS"), 701 "OBJECTS": _show_parser("OBJECTS"), 702 "TERSE OBJECTS": _show_parser("OBJECTS"), 703 "TABLES": _show_parser("TABLES"), 704 "TERSE TABLES": _show_parser("TABLES"), 705 "VIEWS": _show_parser("VIEWS"), 706 "TERSE VIEWS": _show_parser("VIEWS"), 707 "PRIMARY KEYS": _show_parser("PRIMARY KEYS"), 708 "TERSE PRIMARY KEYS": _show_parser("PRIMARY KEYS"), 709 "IMPORTED KEYS": _show_parser("IMPORTED KEYS"), 710 "TERSE IMPORTED KEYS": _show_parser("IMPORTED KEYS"), 711 "UNIQUE KEYS": _show_parser("UNIQUE KEYS"), 712 "TERSE UNIQUE KEYS": _show_parser("UNIQUE KEYS"), 713 "SEQUENCES": _show_parser("SEQUENCES"), 714 "TERSE SEQUENCES": _show_parser("SEQUENCES"), 715 "STAGES": _show_parser("STAGES"), 716 "COLUMNS": _show_parser("COLUMNS"), 717 "USERS": _show_parser("USERS"), 718 "TERSE USERS": _show_parser("USERS"), 719 "FILE FORMATS": _show_parser("FILE FORMATS"), 720 "FUNCTIONS": _show_parser("FUNCTIONS"), 721 "PROCEDURES": _show_parser("PROCEDURES"), 722 "WAREHOUSES": _show_parser("WAREHOUSES"), 723 } 724 725 CONSTRAINT_PARSERS = { 726 **parser.Parser.CONSTRAINT_PARSERS, 727 "WITH": lambda self: self._parse_with_constraint(), 728 "MASKING": lambda self: self._parse_with_constraint(), 729 "PROJECTION": lambda self: self._parse_with_constraint(), 730 "TAG": lambda self: self._parse_with_constraint(), 731 } 732 733 STAGED_FILE_SINGLE_TOKENS = { 734 TokenType.DOT, 735 TokenType.MOD, 736 TokenType.SLASH, 737 } 738 739 FLATTEN_COLUMNS = ["SEQ", "KEY", "PATH", "INDEX", "VALUE", "THIS"] 740 741 SCHEMA_KINDS = {"OBJECTS", "TABLES", "VIEWS", "SEQUENCES", "UNIQUE KEYS", "IMPORTED KEYS"} 742 743 NON_TABLE_CREATABLES = {"STORAGE INTEGRATION", "TAG", "WAREHOUSE", "STREAMLIT"} 744 745 LAMBDAS = { 746 **parser.Parser.LAMBDAS, 747 TokenType.ARROW: lambda self, expressions: self.expression( 748 exp.Lambda, 749 this=self._replace_lambda( 750 self._parse_assignment(), 751 expressions, 752 ), 753 expressions=[e.this if isinstance(e, exp.Cast) else e for e in expressions], 754 ), 755 } 756 757 def _parse_use(self) -> exp.Use: 758 if self._match_text_seq("SECONDARY", "ROLES"): 759 this = self._match_texts(("ALL", "NONE")) and exp.var(self._prev.text.upper()) 760 roles = None if this else self._parse_csv(lambda: self._parse_table(schema=False)) 761 return self.expression( 762 exp.Use, kind="SECONDARY ROLES", this=this, expressions=roles 763 ) 764 765 return super()._parse_use() 766 767 def _negate_range( 768 self, this: t.Optional[exp.Expression] = None 769 ) -> t.Optional[exp.Expression]: 770 if not this: 771 return this 772 773 query = this.args.get("query") 774 if isinstance(this, exp.In) and isinstance(query, exp.Query): 775 # Snowflake treats `value NOT IN (subquery)` as `VALUE <> ALL (subquery)`, so 776 # we do this conversion here to avoid parsing it into `NOT value IN (subquery)` 777 # which can produce different results (most likely a SnowFlake bug). 778 # 779 # https://docs.snowflake.com/en/sql-reference/functions/in 780 # Context: https://github.com/tobymao/sqlglot/issues/3890 781 return self.expression( 782 exp.NEQ, this=this.this, expression=exp.All(this=query.unnest()) 783 ) 784 785 return self.expression(exp.Not, this=this) 786 787 def _parse_tag(self) -> exp.Tags: 788 return self.expression( 789 exp.Tags, 790 expressions=self._parse_wrapped_csv(self._parse_property), 791 ) 792 793 def _parse_with_constraint(self) -> t.Optional[exp.Expression]: 794 if self._prev.token_type != TokenType.WITH: 795 self._retreat(self._index - 1) 796 797 if self._match_text_seq("MASKING", "POLICY"): 798 policy = self._parse_column() 799 return self.expression( 800 exp.MaskingPolicyColumnConstraint, 801 this=policy.to_dot() if isinstance(policy, exp.Column) else policy, 802 expressions=self._match(TokenType.USING) 803 and self._parse_wrapped_csv(self._parse_id_var), 804 ) 805 if self._match_text_seq("PROJECTION", "POLICY"): 806 policy = self._parse_column() 807 return self.expression( 808 exp.ProjectionPolicyColumnConstraint, 809 this=policy.to_dot() if isinstance(policy, exp.Column) else policy, 810 ) 811 if self._match(TokenType.TAG): 812 return self._parse_tag() 813 814 return None 815 816 def _parse_with_property(self) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 817 if self._match(TokenType.TAG): 818 return self._parse_tag() 819 820 return super()._parse_with_property() 821 822 def _parse_create(self) -> exp.Create | exp.Command: 823 expression = super()._parse_create() 824 if isinstance(expression, exp.Create) and expression.kind in self.NON_TABLE_CREATABLES: 825 # Replace the Table node with the enclosed Identifier 826 expression.this.replace(expression.this.this) 827 828 return expression 829 830 # https://docs.snowflake.com/en/sql-reference/functions/date_part.html 831 # https://docs.snowflake.com/en/sql-reference/functions-date-time.html#label-supported-date-time-parts 832 def _parse_date_part(self: Snowflake.Parser) -> t.Optional[exp.Expression]: 833 this = self._parse_var() or self._parse_type() 834 835 if not this: 836 return None 837 838 self._match(TokenType.COMMA) 839 expression = self._parse_bitwise() 840 this = map_date_part(this) 841 name = this.name.upper() 842 843 if name.startswith("EPOCH"): 844 if name == "EPOCH_MILLISECOND": 845 scale = 10**3 846 elif name == "EPOCH_MICROSECOND": 847 scale = 10**6 848 elif name == "EPOCH_NANOSECOND": 849 scale = 10**9 850 else: 851 scale = None 852 853 ts = self.expression(exp.Cast, this=expression, to=exp.DataType.build("TIMESTAMP")) 854 to_unix: exp.Expression = self.expression(exp.TimeToUnix, this=ts) 855 856 if scale: 857 to_unix = exp.Mul(this=to_unix, expression=exp.Literal.number(scale)) 858 859 return to_unix 860 861 return self.expression(exp.Extract, this=this, expression=expression) 862 863 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 864 if is_map: 865 # Keys are strings in Snowflake's objects, see also: 866 # - https://docs.snowflake.com/en/sql-reference/data-types-semistructured 867 # - https://docs.snowflake.com/en/sql-reference/functions/object_construct 868 return self._parse_slice(self._parse_string()) 869 870 return self._parse_slice(self._parse_alias(self._parse_assignment(), explicit=True)) 871 872 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 873 lateral = super()._parse_lateral() 874 if not lateral: 875 return lateral 876 877 if isinstance(lateral.this, exp.Explode): 878 table_alias = lateral.args.get("alias") 879 columns = [exp.to_identifier(col) for col in self.FLATTEN_COLUMNS] 880 if table_alias and not table_alias.args.get("columns"): 881 table_alias.set("columns", columns) 882 elif not table_alias: 883 exp.alias_(lateral, "_flattened", table=columns, copy=False) 884 885 return lateral 886 887 def _parse_table_parts( 888 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 889 ) -> exp.Table: 890 # https://docs.snowflake.com/en/user-guide/querying-stage 891 if self._match(TokenType.STRING, advance=False): 892 table = self._parse_string() 893 elif self._match_text_seq("@", advance=False): 894 table = self._parse_location_path() 895 else: 896 table = None 897 898 if table: 899 file_format = None 900 pattern = None 901 902 wrapped = self._match(TokenType.L_PAREN) 903 while self._curr and wrapped and not self._match(TokenType.R_PAREN): 904 if self._match_text_seq("FILE_FORMAT", "=>"): 905 file_format = self._parse_string() or super()._parse_table_parts( 906 is_db_reference=is_db_reference 907 ) 908 elif self._match_text_seq("PATTERN", "=>"): 909 pattern = self._parse_string() 910 else: 911 break 912 913 self._match(TokenType.COMMA) 914 915 table = self.expression(exp.Table, this=table, format=file_format, pattern=pattern) 916 else: 917 table = super()._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 918 919 return table 920 921 def _parse_table( 922 self, 923 schema: bool = False, 924 joins: bool = False, 925 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 926 parse_bracket: bool = False, 927 is_db_reference: bool = False, 928 parse_partition: bool = False, 929 consume_pipe: bool = False, 930 ) -> t.Optional[exp.Expression]: 931 table = super()._parse_table( 932 schema=schema, 933 joins=joins, 934 alias_tokens=alias_tokens, 935 parse_bracket=parse_bracket, 936 is_db_reference=is_db_reference, 937 parse_partition=parse_partition, 938 ) 939 if isinstance(table, exp.Table) and isinstance(table.this, exp.TableFromRows): 940 table_from_rows = table.this 941 for arg in exp.TableFromRows.arg_types: 942 if arg != "this": 943 table_from_rows.set(arg, table.args.get(arg)) 944 945 table = table_from_rows 946 947 return table 948 949 def _parse_id_var( 950 self, 951 any_token: bool = True, 952 tokens: t.Optional[t.Collection[TokenType]] = None, 953 ) -> t.Optional[exp.Expression]: 954 if self._match_text_seq("IDENTIFIER", "("): 955 identifier = ( 956 super()._parse_id_var(any_token=any_token, tokens=tokens) 957 or self._parse_string() 958 ) 959 self._match_r_paren() 960 return self.expression(exp.Anonymous, this="IDENTIFIER", expressions=[identifier]) 961 962 return super()._parse_id_var(any_token=any_token, tokens=tokens) 963 964 def _parse_show_snowflake(self, this: str) -> exp.Show: 965 scope = None 966 scope_kind = None 967 968 # will identity SHOW TERSE SCHEMAS but not SHOW TERSE PRIMARY KEYS 969 # which is syntactically valid but has no effect on the output 970 terse = self._tokens[self._index - 2].text.upper() == "TERSE" 971 972 history = self._match_text_seq("HISTORY") 973 974 like = self._parse_string() if self._match(TokenType.LIKE) else None 975 976 if self._match(TokenType.IN): 977 if self._match_text_seq("ACCOUNT"): 978 scope_kind = "ACCOUNT" 979 elif self._match_text_seq("CLASS"): 980 scope_kind = "CLASS" 981 scope = self._parse_table_parts() 982 elif self._match_text_seq("APPLICATION"): 983 scope_kind = "APPLICATION" 984 if self._match_text_seq("PACKAGE"): 985 scope_kind += " PACKAGE" 986 scope = self._parse_table_parts() 987 elif self._match_set(self.DB_CREATABLES): 988 scope_kind = self._prev.text.upper() 989 if self._curr: 990 scope = self._parse_table_parts() 991 elif self._curr: 992 scope_kind = "SCHEMA" if this in self.SCHEMA_KINDS else "TABLE" 993 scope = self._parse_table_parts() 994 995 return self.expression( 996 exp.Show, 997 **{ 998 "terse": terse, 999 "this": this, 1000 "history": history, 1001 "like": like, 1002 "scope": scope, 1003 "scope_kind": scope_kind, 1004 "starts_with": self._match_text_seq("STARTS", "WITH") and self._parse_string(), 1005 "limit": self._parse_limit(), 1006 "from": self._parse_string() if self._match(TokenType.FROM) else None, 1007 "privileges": self._match_text_seq("WITH", "PRIVILEGES") 1008 and self._parse_csv(lambda: self._parse_var(any_token=True, upper=True)), 1009 }, 1010 ) 1011 1012 def _parse_put(self) -> exp.Put | exp.Command: 1013 if self._curr.token_type != TokenType.STRING: 1014 return self._parse_as_command(self._prev) 1015 1016 return self.expression( 1017 exp.Put, 1018 this=self._parse_string(), 1019 target=self._parse_location_path(), 1020 properties=self._parse_properties(), 1021 ) 1022 1023 def _parse_get(self) -> t.Optional[exp.Expression]: 1024 start = self._prev 1025 1026 # If we detect GET( then we need to parse a function, not a statement 1027 if self._match(TokenType.L_PAREN): 1028 self._retreat(self._index - 2) 1029 return self._parse_expression() 1030 1031 target = self._parse_location_path() 1032 1033 # Parse as command if unquoted file path 1034 if self._curr.token_type == TokenType.URI_START: 1035 return self._parse_as_command(start) 1036 1037 return self.expression( 1038 exp.Get, 1039 this=self._parse_string(), 1040 target=target, 1041 properties=self._parse_properties(), 1042 ) 1043 1044 def _parse_location_property(self) -> exp.LocationProperty: 1045 self._match(TokenType.EQ) 1046 return self.expression(exp.LocationProperty, this=self._parse_location_path()) 1047 1048 def _parse_file_location(self) -> t.Optional[exp.Expression]: 1049 # Parse either a subquery or a staged file 1050 return ( 1051 self._parse_select(table=True, parse_subquery_alias=False) 1052 if self._match(TokenType.L_PAREN, advance=False) 1053 else self._parse_table_parts() 1054 ) 1055 1056 def _parse_location_path(self) -> exp.Var: 1057 start = self._curr 1058 self._advance_any(ignore_reserved=True) 1059 1060 # We avoid consuming a comma token because external tables like @foo and @bar 1061 # can be joined in a query with a comma separator, as well as closing paren 1062 # in case of subqueries 1063 while self._is_connected() and not self._match_set( 1064 (TokenType.COMMA, TokenType.L_PAREN, TokenType.R_PAREN), advance=False 1065 ): 1066 self._advance_any(ignore_reserved=True) 1067 1068 return exp.var(self._find_sql(start, self._prev)) 1069 1070 def _parse_lambda_arg(self) -> t.Optional[exp.Expression]: 1071 this = super()._parse_lambda_arg() 1072 1073 if not this: 1074 return this 1075 1076 typ = self._parse_types() 1077 1078 if typ: 1079 return self.expression(exp.Cast, this=this, to=typ) 1080 1081 return this 1082 1083 def _parse_foreign_key(self) -> exp.ForeignKey: 1084 # inlineFK, the REFERENCES columns are implied 1085 if self._match(TokenType.REFERENCES, advance=False): 1086 return self.expression(exp.ForeignKey) 1087 1088 # outoflineFK, explicitly names the columns 1089 return super()._parse_foreign_key() 1090 1091 def _parse_file_format_property(self) -> exp.FileFormatProperty: 1092 self._match(TokenType.EQ) 1093 if self._match(TokenType.L_PAREN, advance=False): 1094 expressions = self._parse_wrapped_options() 1095 else: 1096 expressions = [self._parse_format_name()] 1097 1098 return self.expression( 1099 exp.FileFormatProperty, 1100 expressions=expressions, 1101 ) 1102 1103 def _parse_credentials_property(self) -> exp.CredentialsProperty: 1104 return self.expression( 1105 exp.CredentialsProperty, 1106 expressions=self._parse_wrapped_options(), 1107 ) 1108 1109 def _parse_semantic_view(self) -> exp.SemanticView: 1110 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table_parts()} 1111 1112 while self._curr and not self._match(TokenType.R_PAREN, advance=False): 1113 if self._match_text_seq("DIMENSIONS"): 1114 kwargs["dimensions"] = self._parse_csv(self._parse_disjunction) 1115 if self._match_text_seq("METRICS"): 1116 kwargs["metrics"] = self._parse_csv(self._parse_disjunction) 1117 if self._match_text_seq("WHERE"): 1118 kwargs["where"] = self._parse_expression() 1119 1120 return self.expression(exp.SemanticView, **kwargs) 1121 1122 class Tokenizer(tokens.Tokenizer): 1123 STRING_ESCAPES = ["\\", "'"] 1124 HEX_STRINGS = [("x'", "'"), ("X'", "'")] 1125 RAW_STRINGS = ["$$"] 1126 COMMENTS = ["--", "//", ("/*", "*/")] 1127 NESTED_COMMENTS = False 1128 1129 KEYWORDS = { 1130 **tokens.Tokenizer.KEYWORDS, 1131 "FILE://": TokenType.URI_START, 1132 "BYTEINT": TokenType.INT, 1133 "EXCLUDE": TokenType.EXCEPT, 1134 "FILE FORMAT": TokenType.FILE_FORMAT, 1135 "GET": TokenType.GET, 1136 "ILIKE ANY": TokenType.ILIKE_ANY, 1137 "LIKE ANY": TokenType.LIKE_ANY, 1138 "MATCH_CONDITION": TokenType.MATCH_CONDITION, 1139 "MATCH_RECOGNIZE": TokenType.MATCH_RECOGNIZE, 1140 "MINUS": TokenType.EXCEPT, 1141 "NCHAR VARYING": TokenType.VARCHAR, 1142 "PUT": TokenType.PUT, 1143 "REMOVE": TokenType.COMMAND, 1144 "RM": TokenType.COMMAND, 1145 "SAMPLE": TokenType.TABLE_SAMPLE, 1146 "SEMANTIC VIEW": TokenType.SEMANTIC_VIEW, 1147 "SQL_DOUBLE": TokenType.DOUBLE, 1148 "SQL_VARCHAR": TokenType.VARCHAR, 1149 "STORAGE INTEGRATION": TokenType.STORAGE_INTEGRATION, 1150 "TAG": TokenType.TAG, 1151 "TIMESTAMP_TZ": TokenType.TIMESTAMPTZ, 1152 "TOP": TokenType.TOP, 1153 "WAREHOUSE": TokenType.WAREHOUSE, 1154 "STAGE": TokenType.STAGE, 1155 "STREAMLIT": TokenType.STREAMLIT, 1156 } 1157 KEYWORDS.pop("/*+") 1158 1159 SINGLE_TOKENS = { 1160 **tokens.Tokenizer.SINGLE_TOKENS, 1161 "$": TokenType.PARAMETER, 1162 } 1163 1164 VAR_SINGLE_TOKENS = {"$"} 1165 1166 COMMANDS = tokens.Tokenizer.COMMANDS - {TokenType.SHOW} 1167 1168 class Generator(generator.Generator): 1169 PARAMETER_TOKEN = "$" 1170 MATCHED_BY_SOURCE = False 1171 SINGLE_STRING_INTERVAL = True 1172 JOIN_HINTS = False 1173 TABLE_HINTS = False 1174 QUERY_HINTS = False 1175 AGGREGATE_FILTER_SUPPORTED = False 1176 SUPPORTS_TABLE_COPY = False 1177 COLLATE_IS_FUNC = True 1178 LIMIT_ONLY_LITERALS = True 1179 JSON_KEY_VALUE_PAIR_SEP = "," 1180 INSERT_OVERWRITE = " OVERWRITE INTO" 1181 STRUCT_DELIMITER = ("(", ")") 1182 COPY_PARAMS_ARE_WRAPPED = False 1183 COPY_PARAMS_EQ_REQUIRED = True 1184 STAR_EXCEPT = "EXCLUDE" 1185 SUPPORTS_EXPLODING_PROJECTIONS = False 1186 ARRAY_CONCAT_IS_VAR_LEN = False 1187 SUPPORTS_CONVERT_TIMEZONE = True 1188 EXCEPT_INTERSECT_SUPPORT_ALL_CLAUSE = False 1189 SUPPORTS_MEDIAN = True 1190 ARRAY_SIZE_NAME = "ARRAY_SIZE" 1191 SUPPORTS_DECODE_CASE = True 1192 IS_BOOL_ALLOWED = False 1193 1194 TRANSFORMS = { 1195 **generator.Generator.TRANSFORMS, 1196 exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"), 1197 exp.ArgMax: rename_func("MAX_BY"), 1198 exp.ArgMin: rename_func("MIN_BY"), 1199 exp.ArrayConcat: lambda self, e: self.arrayconcat_sql(e, name="ARRAY_CAT"), 1200 exp.ArrayContains: lambda self, e: self.func("ARRAY_CONTAINS", e.expression, e.this), 1201 exp.ArrayIntersect: rename_func("ARRAY_INTERSECTION"), 1202 exp.AtTimeZone: lambda self, e: self.func( 1203 "CONVERT_TIMEZONE", e.args.get("zone"), e.this 1204 ), 1205 exp.BitwiseOr: rename_func("BITOR"), 1206 exp.BitwiseXor: rename_func("BITXOR"), 1207 exp.BitwiseLeftShift: rename_func("BITSHIFTLEFT"), 1208 exp.BitwiseRightShift: rename_func("BITSHIFTRIGHT"), 1209 exp.Create: transforms.preprocess([_flatten_structured_types_unless_iceberg]), 1210 exp.DateAdd: date_delta_sql("DATEADD"), 1211 exp.DateDiff: date_delta_sql("DATEDIFF"), 1212 exp.DatetimeAdd: date_delta_sql("TIMESTAMPADD"), 1213 exp.DatetimeDiff: timestampdiff_sql, 1214 exp.DateStrToDate: datestrtodate_sql, 1215 exp.DayOfMonth: rename_func("DAYOFMONTH"), 1216 exp.DayOfWeek: rename_func("DAYOFWEEK"), 1217 exp.DayOfWeekIso: rename_func("DAYOFWEEKISO"), 1218 exp.DayOfYear: rename_func("DAYOFYEAR"), 1219 exp.Explode: rename_func("FLATTEN"), 1220 exp.Extract: lambda self, e: self.func( 1221 "DATE_PART", map_date_part(e.this, self.dialect), e.expression 1222 ), 1223 exp.FileFormatProperty: lambda self, 1224 e: f"FILE_FORMAT=({self.expressions(e, 'expressions', sep=' ')})", 1225 exp.FromTimeZone: lambda self, e: self.func( 1226 "CONVERT_TIMEZONE", e.args.get("zone"), "'UTC'", e.this 1227 ), 1228 exp.GenerateSeries: lambda self, e: self.func( 1229 "ARRAY_GENERATE_RANGE", e.args["start"], e.args["end"] + 1, e.args.get("step") 1230 ), 1231 exp.GroupConcat: lambda self, e: groupconcat_sql(self, e, sep=""), 1232 exp.If: if_sql(name="IFF", false_value="NULL"), 1233 exp.JSONExtractArray: _json_extract_value_array_sql, 1234 exp.JSONExtractScalar: lambda self, e: self.func( 1235 "JSON_EXTRACT_PATH_TEXT", e.this, e.expression 1236 ), 1237 exp.JSONObject: lambda self, e: self.func("OBJECT_CONSTRUCT_KEEP_NULL", *e.expressions), 1238 exp.JSONPathRoot: lambda *_: "", 1239 exp.JSONValueArray: _json_extract_value_array_sql, 1240 exp.Levenshtein: unsupported_args("ins_cost", "del_cost", "sub_cost")( 1241 rename_func("EDITDISTANCE") 1242 ), 1243 exp.LocationProperty: lambda self, e: f"LOCATION={self.sql(e, 'this')}", 1244 exp.LogicalAnd: rename_func("BOOLAND_AGG"), 1245 exp.LogicalOr: rename_func("BOOLOR_AGG"), 1246 exp.Map: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 1247 exp.MakeInterval: no_make_interval_sql, 1248 exp.Max: max_or_greatest, 1249 exp.Min: min_or_least, 1250 exp.ParseJSON: lambda self, e: self.func( 1251 "TRY_PARSE_JSON" if e.args.get("safe") else "PARSE_JSON", e.this 1252 ), 1253 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 1254 exp.PercentileCont: transforms.preprocess( 1255 [transforms.add_within_group_for_percentiles] 1256 ), 1257 exp.PercentileDisc: transforms.preprocess( 1258 [transforms.add_within_group_for_percentiles] 1259 ), 1260 exp.Pivot: transforms.preprocess([_unqualify_pivot_columns]), 1261 exp.RegexpExtract: _regexpextract_sql, 1262 exp.RegexpExtractAll: _regexpextract_sql, 1263 exp.RegexpILike: _regexpilike_sql, 1264 exp.Rand: rename_func("RANDOM"), 1265 exp.Select: transforms.preprocess( 1266 [ 1267 transforms.eliminate_window_clause, 1268 transforms.eliminate_distinct_on, 1269 transforms.explode_projection_to_unnest(), 1270 transforms.eliminate_semi_and_anti_joins, 1271 _transform_generate_date_array, 1272 _qualify_unnested_columns, 1273 _eliminate_dot_variant_lookup, 1274 ] 1275 ), 1276 exp.SHA: rename_func("SHA1"), 1277 exp.StarMap: rename_func("OBJECT_CONSTRUCT"), 1278 exp.StartsWith: rename_func("STARTSWITH"), 1279 exp.EndsWith: rename_func("ENDSWITH"), 1280 exp.StrPosition: lambda self, e: strposition_sql( 1281 self, e, func_name="CHARINDEX", supports_position=True 1282 ), 1283 exp.StrToDate: lambda self, e: self.func("DATE", e.this, self.format_time(e)), 1284 exp.StringToArray: rename_func("STRTOK_TO_ARRAY"), 1285 exp.Stuff: rename_func("INSERT"), 1286 exp.StPoint: rename_func("ST_MAKEPOINT"), 1287 exp.TimeAdd: date_delta_sql("TIMEADD"), 1288 exp.Timestamp: no_timestamp_sql, 1289 exp.TimestampAdd: date_delta_sql("TIMESTAMPADD"), 1290 exp.TimestampDiff: lambda self, e: self.func( 1291 "TIMESTAMPDIFF", e.unit, e.expression, e.this 1292 ), 1293 exp.TimestampTrunc: timestamptrunc_sql(), 1294 exp.TimeStrToTime: timestrtotime_sql, 1295 exp.TimeToUnix: lambda self, e: f"EXTRACT(epoch_second FROM {self.sql(e, 'this')})", 1296 exp.ToArray: rename_func("TO_ARRAY"), 1297 exp.ToChar: lambda self, e: self.function_fallback_sql(e), 1298 exp.ToDouble: rename_func("TO_DOUBLE"), 1299 exp.TsOrDsAdd: date_delta_sql("DATEADD", cast=True), 1300 exp.TsOrDsDiff: date_delta_sql("DATEDIFF"), 1301 exp.TsOrDsToDate: lambda self, e: self.func( 1302 "TRY_TO_DATE" if e.args.get("safe") else "TO_DATE", e.this, self.format_time(e) 1303 ), 1304 exp.TsOrDsToTime: lambda self, e: self.func( 1305 "TRY_TO_TIME" if e.args.get("safe") else "TO_TIME", e.this, self.format_time(e) 1306 ), 1307 exp.Unhex: rename_func("HEX_DECODE_BINARY"), 1308 exp.UnixToTime: rename_func("TO_TIMESTAMP"), 1309 exp.Uuid: rename_func("UUID_STRING"), 1310 exp.VarMap: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 1311 exp.WeekOfYear: rename_func("WEEKOFYEAR"), 1312 exp.Xor: rename_func("BOOLXOR"), 1313 } 1314 1315 SUPPORTED_JSON_PATH_PARTS = { 1316 exp.JSONPathKey, 1317 exp.JSONPathRoot, 1318 exp.JSONPathSubscript, 1319 } 1320 1321 TYPE_MAPPING = { 1322 **generator.Generator.TYPE_MAPPING, 1323 exp.DataType.Type.NESTED: "OBJECT", 1324 exp.DataType.Type.STRUCT: "OBJECT", 1325 exp.DataType.Type.BIGDECIMAL: "DOUBLE", 1326 } 1327 1328 TOKEN_MAPPING = { 1329 TokenType.AUTO_INCREMENT: "AUTOINCREMENT", 1330 } 1331 1332 PROPERTIES_LOCATION = { 1333 **generator.Generator.PROPERTIES_LOCATION, 1334 exp.CredentialsProperty: exp.Properties.Location.POST_WITH, 1335 exp.LocationProperty: exp.Properties.Location.POST_WITH, 1336 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 1337 exp.SetProperty: exp.Properties.Location.UNSUPPORTED, 1338 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 1339 } 1340 1341 UNSUPPORTED_VALUES_EXPRESSIONS = { 1342 exp.Map, 1343 exp.StarMap, 1344 exp.Struct, 1345 exp.VarMap, 1346 } 1347 1348 RESPECT_IGNORE_NULLS_UNSUPPORTED_EXPRESSIONS = (exp.ArrayAgg,) 1349 1350 def with_properties(self, properties: exp.Properties) -> str: 1351 return self.properties(properties, wrapped=False, prefix=self.sep(""), sep=" ") 1352 1353 def values_sql(self, expression: exp.Values, values_as_table: bool = True) -> str: 1354 if expression.find(*self.UNSUPPORTED_VALUES_EXPRESSIONS): 1355 values_as_table = False 1356 1357 return super().values_sql(expression, values_as_table=values_as_table) 1358 1359 def datatype_sql(self, expression: exp.DataType) -> str: 1360 expressions = expression.expressions 1361 if ( 1362 expressions 1363 and expression.is_type(*exp.DataType.STRUCT_TYPES) 1364 and any(isinstance(field_type, exp.DataType) for field_type in expressions) 1365 ): 1366 # The correct syntax is OBJECT [ (<key> <value_type [NOT NULL] [, ...]) ] 1367 return "OBJECT" 1368 1369 return super().datatype_sql(expression) 1370 1371 def tonumber_sql(self, expression: exp.ToNumber) -> str: 1372 return self.func( 1373 "TO_NUMBER", 1374 expression.this, 1375 expression.args.get("format"), 1376 expression.args.get("precision"), 1377 expression.args.get("scale"), 1378 ) 1379 1380 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 1381 milli = expression.args.get("milli") 1382 if milli is not None: 1383 milli_to_nano = milli.pop() * exp.Literal.number(1000000) 1384 expression.set("nano", milli_to_nano) 1385 1386 return rename_func("TIMESTAMP_FROM_PARTS")(self, expression) 1387 1388 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 1389 if expression.is_type(exp.DataType.Type.GEOGRAPHY): 1390 return self.func("TO_GEOGRAPHY", expression.this) 1391 if expression.is_type(exp.DataType.Type.GEOMETRY): 1392 return self.func("TO_GEOMETRY", expression.this) 1393 1394 return super().cast_sql(expression, safe_prefix=safe_prefix) 1395 1396 def trycast_sql(self, expression: exp.TryCast) -> str: 1397 value = expression.this 1398 1399 if value.type is None: 1400 from sqlglot.optimizer.annotate_types import annotate_types 1401 1402 value = annotate_types(value, dialect=self.dialect) 1403 1404 # Snowflake requires that TRY_CAST's value be a string 1405 # If TRY_CAST is being roundtripped (since Snowflake is the only dialect that sets "requires_string") or 1406 # if we can deduce that the value is a string, then we can generate TRY_CAST 1407 if expression.args.get("requires_string") or value.is_type(*exp.DataType.TEXT_TYPES): 1408 return super().trycast_sql(expression) 1409 1410 return self.cast_sql(expression) 1411 1412 def log_sql(self, expression: exp.Log) -> str: 1413 if not expression.expression: 1414 return self.func("LN", expression.this) 1415 1416 return super().log_sql(expression) 1417 1418 def unnest_sql(self, expression: exp.Unnest) -> str: 1419 unnest_alias = expression.args.get("alias") 1420 offset = expression.args.get("offset") 1421 1422 unnest_alias_columns = unnest_alias.columns if unnest_alias else [] 1423 value = seq_get(unnest_alias_columns, 0) or exp.to_identifier("value") 1424 1425 columns = [ 1426 exp.to_identifier("seq"), 1427 exp.to_identifier("key"), 1428 exp.to_identifier("path"), 1429 offset.pop() if isinstance(offset, exp.Expression) else exp.to_identifier("index"), 1430 value, 1431 exp.to_identifier("this"), 1432 ] 1433 1434 if unnest_alias: 1435 unnest_alias.set("columns", columns) 1436 else: 1437 unnest_alias = exp.TableAlias(this="_u", columns=columns) 1438 1439 table_input = self.sql(expression.expressions[0]) 1440 if not table_input.startswith("INPUT =>"): 1441 table_input = f"INPUT => {table_input}" 1442 1443 expression_parent = expression.parent 1444 1445 explode = ( 1446 f"FLATTEN({table_input})" 1447 if isinstance(expression_parent, exp.Lateral) 1448 else f"TABLE(FLATTEN({table_input}))" 1449 ) 1450 alias = self.sql(unnest_alias) 1451 alias = f" AS {alias}" if alias else "" 1452 value = ( 1453 "" 1454 if isinstance(expression_parent, (exp.From, exp.Join, exp.Lateral)) 1455 else f"{value} FROM " 1456 ) 1457 1458 return f"{value}{explode}{alias}" 1459 1460 def show_sql(self, expression: exp.Show) -> str: 1461 terse = "TERSE " if expression.args.get("terse") else "" 1462 history = " HISTORY" if expression.args.get("history") else "" 1463 like = self.sql(expression, "like") 1464 like = f" LIKE {like}" if like else "" 1465 1466 scope = self.sql(expression, "scope") 1467 scope = f" {scope}" if scope else "" 1468 1469 scope_kind = self.sql(expression, "scope_kind") 1470 if scope_kind: 1471 scope_kind = f" IN {scope_kind}" 1472 1473 starts_with = self.sql(expression, "starts_with") 1474 if starts_with: 1475 starts_with = f" STARTS WITH {starts_with}" 1476 1477 limit = self.sql(expression, "limit") 1478 1479 from_ = self.sql(expression, "from") 1480 if from_: 1481 from_ = f" FROM {from_}" 1482 1483 privileges = self.expressions(expression, key="privileges", flat=True) 1484 privileges = f" WITH PRIVILEGES {privileges}" if privileges else "" 1485 1486 return f"SHOW {terse}{expression.name}{history}{like}{scope_kind}{scope}{starts_with}{limit}{from_}{privileges}" 1487 1488 def describe_sql(self, expression: exp.Describe) -> str: 1489 # Default to table if kind is unknown 1490 kind_value = expression.args.get("kind") or "TABLE" 1491 kind = f" {kind_value}" if kind_value else "" 1492 this = f" {self.sql(expression, 'this')}" 1493 expressions = self.expressions(expression, flat=True) 1494 expressions = f" {expressions}" if expressions else "" 1495 return f"DESCRIBE{kind}{this}{expressions}" 1496 1497 def generatedasidentitycolumnconstraint_sql( 1498 self, expression: exp.GeneratedAsIdentityColumnConstraint 1499 ) -> str: 1500 start = expression.args.get("start") 1501 start = f" START {start}" if start else "" 1502 increment = expression.args.get("increment") 1503 increment = f" INCREMENT {increment}" if increment else "" 1504 1505 order = expression.args.get("order") 1506 if order is not None: 1507 order_clause = " ORDER" if order else " NOORDER" 1508 else: 1509 order_clause = "" 1510 1511 return f"AUTOINCREMENT{start}{increment}{order_clause}" 1512 1513 def cluster_sql(self, expression: exp.Cluster) -> str: 1514 return f"CLUSTER BY ({self.expressions(expression, flat=True)})" 1515 1516 def struct_sql(self, expression: exp.Struct) -> str: 1517 keys = [] 1518 values = [] 1519 1520 for i, e in enumerate(expression.expressions): 1521 if isinstance(e, exp.PropertyEQ): 1522 keys.append( 1523 exp.Literal.string(e.name) if isinstance(e.this, exp.Identifier) else e.this 1524 ) 1525 values.append(e.expression) 1526 else: 1527 keys.append(exp.Literal.string(f"_{i}")) 1528 values.append(e) 1529 1530 return self.func("OBJECT_CONSTRUCT", *flatten(zip(keys, values))) 1531 1532 @unsupported_args("weight", "accuracy") 1533 def approxquantile_sql(self, expression: exp.ApproxQuantile) -> str: 1534 return self.func("APPROX_PERCENTILE", expression.this, expression.args.get("quantile")) 1535 1536 def alterset_sql(self, expression: exp.AlterSet) -> str: 1537 exprs = self.expressions(expression, flat=True) 1538 exprs = f" {exprs}" if exprs else "" 1539 file_format = self.expressions(expression, key="file_format", flat=True, sep=" ") 1540 file_format = f" STAGE_FILE_FORMAT = ({file_format})" if file_format else "" 1541 copy_options = self.expressions(expression, key="copy_options", flat=True, sep=" ") 1542 copy_options = f" STAGE_COPY_OPTIONS = ({copy_options})" if copy_options else "" 1543 tag = self.expressions(expression, key="tag", flat=True) 1544 tag = f" TAG {tag}" if tag else "" 1545 1546 return f"SET{exprs}{file_format}{copy_options}{tag}" 1547 1548 def strtotime_sql(self, expression: exp.StrToTime): 1549 safe_prefix = "TRY_" if expression.args.get("safe") else "" 1550 return self.func( 1551 f"{safe_prefix}TO_TIMESTAMP", expression.this, self.format_time(expression) 1552 ) 1553 1554 def timestampsub_sql(self, expression: exp.TimestampSub): 1555 return self.sql( 1556 exp.TimestampAdd( 1557 this=expression.this, 1558 expression=expression.expression * -1, 1559 unit=expression.unit, 1560 ) 1561 ) 1562 1563 def jsonextract_sql(self, expression: exp.JSONExtract): 1564 this = expression.this 1565 1566 # JSON strings are valid coming from other dialects such as BQ so 1567 # for these cases we PARSE_JSON preemptively 1568 if not isinstance(this, (exp.ParseJSON, exp.JSONExtract)) and not expression.args.get( 1569 "requires_json" 1570 ): 1571 this = exp.ParseJSON(this=this) 1572 1573 return self.func( 1574 "GET_PATH", 1575 this, 1576 expression.expression, 1577 ) 1578 1579 def timetostr_sql(self, expression: exp.TimeToStr) -> str: 1580 this = expression.this 1581 if this.is_string: 1582 this = exp.cast(this, exp.DataType.Type.TIMESTAMP) 1583 1584 return self.func("TO_CHAR", this, self.format_time(expression)) 1585 1586 def datesub_sql(self, expression: exp.DateSub) -> str: 1587 value = expression.expression 1588 if value: 1589 value.replace(value * (-1)) 1590 else: 1591 self.unsupported("DateSub cannot be transpiled if the subtracted count is unknown") 1592 1593 return date_delta_sql("DATEADD")(self, expression) 1594 1595 def select_sql(self, expression: exp.Select) -> str: 1596 limit = expression.args.get("limit") 1597 offset = expression.args.get("offset") 1598 if offset and not limit: 1599 expression.limit(exp.Null(), copy=False) 1600 return super().select_sql(expression) 1601 1602 def createable_sql(self, expression: exp.Create, locations: t.DefaultDict) -> str: 1603 is_materialized = expression.find(exp.MaterializedProperty) 1604 copy_grants_property = expression.find(exp.CopyGrantsProperty) 1605 1606 if expression.kind == "VIEW" and is_materialized and copy_grants_property: 1607 # For materialized views, COPY GRANTS is located *before* the columns list 1608 # This is in contrast to normal views where COPY GRANTS is located *after* the columns list 1609 # We default CopyGrantsProperty to POST_SCHEMA which means we need to output it POST_NAME if a materialized view is detected 1610 # ref: https://docs.snowflake.com/en/sql-reference/sql/create-materialized-view#syntax 1611 # ref: https://docs.snowflake.com/en/sql-reference/sql/create-view#syntax 1612 post_schema_properties = locations[exp.Properties.Location.POST_SCHEMA] 1613 post_schema_properties.pop(post_schema_properties.index(copy_grants_property)) 1614 1615 this_name = self.sql(expression.this, "this") 1616 copy_grants = self.sql(copy_grants_property) 1617 this_schema = self.schema_columns_sql(expression.this) 1618 this_schema = f"{self.sep()}{this_schema}" if this_schema else "" 1619 1620 return f"{this_name}{self.sep()}{copy_grants}{this_schema}" 1621 1622 return super().createable_sql(expression, locations) 1623 1624 def arrayagg_sql(self, expression: exp.ArrayAgg) -> str: 1625 this = expression.this 1626 1627 # If an ORDER BY clause is present, we need to remove it from ARRAY_AGG 1628 # and add it later as part of the WITHIN GROUP clause 1629 order = this if isinstance(this, exp.Order) else None 1630 if order: 1631 expression.set("this", order.this.pop()) 1632 1633 expr_sql = super().arrayagg_sql(expression) 1634 1635 if order: 1636 expr_sql = self.sql(exp.WithinGroup(this=expr_sql, expression=order)) 1637 1638 return expr_sql 1639 1640 def array_sql(self, expression: exp.Array) -> str: 1641 expressions = expression.expressions 1642 1643 first_expr = seq_get(expressions, 0) 1644 if isinstance(first_expr, exp.Select): 1645 # SELECT AS STRUCT foo AS alias_foo -> ARRAY_AGG(OBJECT_CONSTRUCT('alias_foo', foo)) 1646 if first_expr.text("kind").upper() == "STRUCT": 1647 object_construct_args = [] 1648 for expr in first_expr.expressions: 1649 # Alias case: SELECT AS STRUCT foo AS alias_foo -> OBJECT_CONSTRUCT('alias_foo', foo) 1650 # Column case: SELECT AS STRUCT foo -> OBJECT_CONSTRUCT('foo', foo) 1651 name = expr.this if isinstance(expr, exp.Alias) else expr 1652 1653 object_construct_args.extend([exp.Literal.string(expr.alias_or_name), name]) 1654 1655 array_agg = exp.ArrayAgg( 1656 this=_build_object_construct(args=object_construct_args) 1657 ) 1658 1659 first_expr.set("kind", None) 1660 first_expr.set("expressions", [array_agg]) 1661 1662 return self.sql(first_expr.subquery()) 1663 1664 return inline_array_sql(self, expression) 1665 1666 def currentdate_sql(self, expression: exp.CurrentDate) -> str: 1667 zone = self.sql(expression, "this") 1668 if not zone: 1669 return super().currentdate_sql(expression) 1670 1671 expr = exp.Cast( 1672 this=exp.ConvertTimezone(target_tz=zone, timestamp=exp.CurrentTimestamp()), 1673 to=exp.DataType(this=exp.DataType.Type.DATE), 1674 ) 1675 return self.sql(expr) 1676 1677 def dot_sql(self, expression: exp.Dot) -> str: 1678 this = expression.this 1679 1680 if not this.type: 1681 from sqlglot.optimizer.annotate_types import annotate_types 1682 1683 this = annotate_types(this, dialect=self.dialect) 1684 1685 if not isinstance(this, exp.Dot) and this.is_type(exp.DataType.Type.STRUCT): 1686 # Generate colon notation for the top level STRUCT 1687 return f"{self.sql(this)}:{self.sql(expression, 'expression')}" 1688 1689 return super().dot_sql(expression)
486class Snowflake(Dialect): 487 # https://docs.snowflake.com/en/sql-reference/identifiers-syntax 488 NORMALIZATION_STRATEGY = NormalizationStrategy.UPPERCASE 489 NULL_ORDERING = "nulls_are_large" 490 TIME_FORMAT = "'YYYY-MM-DD HH24:MI:SS'" 491 SUPPORTS_USER_DEFINED_TYPES = False 492 SUPPORTS_SEMI_ANTI_JOIN = False 493 PREFER_CTE_ALIAS_COLUMN = True 494 TABLESAMPLE_SIZE_IS_PERCENT = True 495 COPY_PARAMS_ARE_CSV = False 496 ARRAY_AGG_INCLUDES_NULLS = None 497 ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN = False 498 TRY_CAST_REQUIRES_STRING = True 499 500 TIME_MAPPING = { 501 "YYYY": "%Y", 502 "yyyy": "%Y", 503 "YY": "%y", 504 "yy": "%y", 505 "MMMM": "%B", 506 "mmmm": "%B", 507 "MON": "%b", 508 "mon": "%b", 509 "MM": "%m", 510 "mm": "%m", 511 "DD": "%d", 512 "dd": "%-d", 513 "DY": "%a", 514 "dy": "%w", 515 "HH24": "%H", 516 "hh24": "%H", 517 "HH12": "%I", 518 "hh12": "%I", 519 "MI": "%M", 520 "mi": "%M", 521 "SS": "%S", 522 "ss": "%S", 523 "FF6": "%f", 524 "ff6": "%f", 525 } 526 527 DATE_PART_MAPPING = { 528 **Dialect.DATE_PART_MAPPING, 529 "ISOWEEK": "WEEKISO", 530 } 531 532 def quote_identifier(self, expression: E, identify: bool = True) -> E: 533 # This disables quoting DUAL in SELECT ... FROM DUAL, because Snowflake treats an 534 # unquoted DUAL keyword in a special way and does not map it to a user-defined table 535 if ( 536 isinstance(expression, exp.Identifier) 537 and isinstance(expression.parent, exp.Table) 538 and expression.name.lower() == "dual" 539 ): 540 return expression # type: ignore 541 542 return super().quote_identifier(expression, identify=identify) 543 544 class JSONPathTokenizer(jsonpath.JSONPathTokenizer): 545 SINGLE_TOKENS = jsonpath.JSONPathTokenizer.SINGLE_TOKENS.copy() 546 SINGLE_TOKENS.pop("$") 547 548 class Parser(parser.Parser): 549 IDENTIFY_PIVOT_STRINGS = True 550 DEFAULT_SAMPLING_METHOD = "BERNOULLI" 551 COLON_IS_VARIANT_EXTRACT = True 552 JSON_EXTRACT_REQUIRES_JSON_EXPRESSION = True 553 554 ID_VAR_TOKENS = { 555 *parser.Parser.ID_VAR_TOKENS, 556 TokenType.MATCH_CONDITION, 557 } 558 559 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS | {TokenType.WINDOW} 560 TABLE_ALIAS_TOKENS.discard(TokenType.MATCH_CONDITION) 561 562 COLON_PLACEHOLDER_TOKENS = ID_VAR_TOKENS | {TokenType.NUMBER} 563 564 FUNCTIONS = { 565 **parser.Parser.FUNCTIONS, 566 "APPROX_PERCENTILE": exp.ApproxQuantile.from_arg_list, 567 "ARRAY_CONSTRUCT": lambda args: exp.Array(expressions=args), 568 "ARRAY_CONTAINS": lambda args: exp.ArrayContains( 569 this=seq_get(args, 1), expression=seq_get(args, 0) 570 ), 571 "ARRAY_GENERATE_RANGE": lambda args: exp.GenerateSeries( 572 # ARRAY_GENERATE_RANGE has an exlusive end; we normalize it to be inclusive 573 start=seq_get(args, 0), 574 end=exp.Sub(this=seq_get(args, 1), expression=exp.Literal.number(1)), 575 step=seq_get(args, 2), 576 ), 577 "BITXOR": _build_bitwise(exp.BitwiseXor, "BITXOR"), 578 "BIT_XOR": _build_bitwise(exp.BitwiseXor, "BITXOR"), 579 "BITOR": _build_bitwise(exp.BitwiseOr, "BITOR"), 580 "BIT_OR": _build_bitwise(exp.BitwiseOr, "BITOR"), 581 "BITSHIFTLEFT": _build_bitwise(exp.BitwiseLeftShift, "BITSHIFTLEFT"), 582 "BIT_SHIFTLEFT": _build_bitwise(exp.BitwiseLeftShift, "BIT_SHIFTLEFT"), 583 "BITSHIFTRIGHT": _build_bitwise(exp.BitwiseRightShift, "BITSHIFTRIGHT"), 584 "BIT_SHIFTRIGHT": _build_bitwise(exp.BitwiseRightShift, "BIT_SHIFTRIGHT"), 585 "BOOLXOR": _build_bitwise(exp.Xor, "BOOLXOR"), 586 "DATE": _build_datetime("DATE", exp.DataType.Type.DATE), 587 "DATE_TRUNC": _date_trunc_to_time, 588 "DATEADD": _build_date_time_add(exp.DateAdd), 589 "DATEDIFF": _build_datediff, 590 "DIV0": _build_if_from_div0, 591 "EDITDISTANCE": lambda args: exp.Levenshtein( 592 this=seq_get(args, 0), expression=seq_get(args, 1), max_dist=seq_get(args, 2) 593 ), 594 "FLATTEN": exp.Explode.from_arg_list, 595 "GET_PATH": lambda args, dialect: exp.JSONExtract( 596 this=seq_get(args, 0), 597 expression=dialect.to_json_path(seq_get(args, 1)), 598 requires_json=True, 599 ), 600 "HEX_DECODE_BINARY": exp.Unhex.from_arg_list, 601 "IFF": exp.If.from_arg_list, 602 "LAST_DAY": lambda args: exp.LastDay( 603 this=seq_get(args, 0), unit=map_date_part(seq_get(args, 1)) 604 ), 605 "LEN": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 606 "LENGTH": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 607 "NULLIFZERO": _build_if_from_nullifzero, 608 "OBJECT_CONSTRUCT": _build_object_construct, 609 "REGEXP_EXTRACT_ALL": _build_regexp_extract(exp.RegexpExtractAll), 610 "REGEXP_REPLACE": _build_regexp_replace, 611 "REGEXP_SUBSTR": _build_regexp_extract(exp.RegexpExtract), 612 "REGEXP_SUBSTR_ALL": _build_regexp_extract(exp.RegexpExtractAll), 613 "REPLACE": build_replace_with_optional_replacement, 614 "RLIKE": exp.RegexpLike.from_arg_list, 615 "SQUARE": lambda args: exp.Pow(this=seq_get(args, 0), expression=exp.Literal.number(2)), 616 "TABLE": lambda args: exp.TableFromRows(this=seq_get(args, 0)), 617 "TIMEADD": _build_date_time_add(exp.TimeAdd), 618 "TIMEDIFF": _build_datediff, 619 "TIMESTAMPADD": _build_date_time_add(exp.DateAdd), 620 "TIMESTAMPDIFF": _build_datediff, 621 "TIMESTAMPFROMPARTS": build_timestamp_from_parts, 622 "TIMESTAMP_FROM_PARTS": build_timestamp_from_parts, 623 "TIMESTAMPNTZFROMPARTS": build_timestamp_from_parts, 624 "TIMESTAMP_NTZ_FROM_PARTS": build_timestamp_from_parts, 625 "TRY_PARSE_JSON": lambda args: exp.ParseJSON(this=seq_get(args, 0), safe=True), 626 "TRY_TO_DATE": _build_datetime("TRY_TO_DATE", exp.DataType.Type.DATE, safe=True), 627 "TRY_TO_TIME": _build_datetime("TRY_TO_TIME", exp.DataType.Type.TIME, safe=True), 628 "TRY_TO_TIMESTAMP": _build_datetime( 629 "TRY_TO_TIMESTAMP", exp.DataType.Type.TIMESTAMP, safe=True 630 ), 631 "TO_CHAR": build_timetostr_or_tochar, 632 "TO_DATE": _build_datetime("TO_DATE", exp.DataType.Type.DATE), 633 "TO_NUMBER": lambda args: exp.ToNumber( 634 this=seq_get(args, 0), 635 format=seq_get(args, 1), 636 precision=seq_get(args, 2), 637 scale=seq_get(args, 3), 638 ), 639 "TO_TIME": _build_datetime("TO_TIME", exp.DataType.Type.TIME), 640 "TO_TIMESTAMP": _build_datetime("TO_TIMESTAMP", exp.DataType.Type.TIMESTAMP), 641 "TO_TIMESTAMP_LTZ": _build_datetime("TO_TIMESTAMP_LTZ", exp.DataType.Type.TIMESTAMPLTZ), 642 "TO_TIMESTAMP_NTZ": _build_datetime("TO_TIMESTAMP_NTZ", exp.DataType.Type.TIMESTAMP), 643 "TO_TIMESTAMP_TZ": _build_datetime("TO_TIMESTAMP_TZ", exp.DataType.Type.TIMESTAMPTZ), 644 "TO_VARCHAR": exp.ToChar.from_arg_list, 645 "ZEROIFNULL": _build_if_from_zeroifnull, 646 } 647 648 FUNCTION_PARSERS = { 649 **parser.Parser.FUNCTION_PARSERS, 650 "DATE_PART": lambda self: self._parse_date_part(), 651 "OBJECT_CONSTRUCT_KEEP_NULL": lambda self: self._parse_json_object(), 652 "LISTAGG": lambda self: self._parse_string_agg(), 653 "SEMANTIC_VIEW": lambda self: self._parse_semantic_view(), 654 } 655 FUNCTION_PARSERS.pop("TRIM") 656 657 TIMESTAMPS = parser.Parser.TIMESTAMPS - {TokenType.TIME} 658 659 RANGE_PARSERS = { 660 **parser.Parser.RANGE_PARSERS, 661 TokenType.LIKE_ANY: parser.binary_range_parser(exp.LikeAny), 662 TokenType.ILIKE_ANY: parser.binary_range_parser(exp.ILikeAny), 663 } 664 665 ALTER_PARSERS = { 666 **parser.Parser.ALTER_PARSERS, 667 "UNSET": lambda self: self.expression( 668 exp.Set, 669 tag=self._match_text_seq("TAG"), 670 expressions=self._parse_csv(self._parse_id_var), 671 unset=True, 672 ), 673 } 674 675 STATEMENT_PARSERS = { 676 **parser.Parser.STATEMENT_PARSERS, 677 TokenType.GET: lambda self: self._parse_get(), 678 TokenType.PUT: lambda self: self._parse_put(), 679 TokenType.SHOW: lambda self: self._parse_show(), 680 } 681 682 PROPERTY_PARSERS = { 683 **parser.Parser.PROPERTY_PARSERS, 684 "CREDENTIALS": lambda self: self._parse_credentials_property(), 685 "FILE_FORMAT": lambda self: self._parse_file_format_property(), 686 "LOCATION": lambda self: self._parse_location_property(), 687 "TAG": lambda self: self._parse_tag(), 688 "USING": lambda self: self._match_text_seq("TEMPLATE") 689 and self.expression(exp.UsingTemplateProperty, this=self._parse_statement()), 690 } 691 692 TYPE_CONVERTERS = { 693 # https://docs.snowflake.com/en/sql-reference/data-types-numeric#number 694 exp.DataType.Type.DECIMAL: build_default_decimal_type(precision=38, scale=0), 695 } 696 697 SHOW_PARSERS = { 698 "DATABASES": _show_parser("DATABASES"), 699 "TERSE DATABASES": _show_parser("DATABASES"), 700 "SCHEMAS": _show_parser("SCHEMAS"), 701 "TERSE SCHEMAS": _show_parser("SCHEMAS"), 702 "OBJECTS": _show_parser("OBJECTS"), 703 "TERSE OBJECTS": _show_parser("OBJECTS"), 704 "TABLES": _show_parser("TABLES"), 705 "TERSE TABLES": _show_parser("TABLES"), 706 "VIEWS": _show_parser("VIEWS"), 707 "TERSE VIEWS": _show_parser("VIEWS"), 708 "PRIMARY KEYS": _show_parser("PRIMARY KEYS"), 709 "TERSE PRIMARY KEYS": _show_parser("PRIMARY KEYS"), 710 "IMPORTED KEYS": _show_parser("IMPORTED KEYS"), 711 "TERSE IMPORTED KEYS": _show_parser("IMPORTED KEYS"), 712 "UNIQUE KEYS": _show_parser("UNIQUE KEYS"), 713 "TERSE UNIQUE KEYS": _show_parser("UNIQUE KEYS"), 714 "SEQUENCES": _show_parser("SEQUENCES"), 715 "TERSE SEQUENCES": _show_parser("SEQUENCES"), 716 "STAGES": _show_parser("STAGES"), 717 "COLUMNS": _show_parser("COLUMNS"), 718 "USERS": _show_parser("USERS"), 719 "TERSE USERS": _show_parser("USERS"), 720 "FILE FORMATS": _show_parser("FILE FORMATS"), 721 "FUNCTIONS": _show_parser("FUNCTIONS"), 722 "PROCEDURES": _show_parser("PROCEDURES"), 723 "WAREHOUSES": _show_parser("WAREHOUSES"), 724 } 725 726 CONSTRAINT_PARSERS = { 727 **parser.Parser.CONSTRAINT_PARSERS, 728 "WITH": lambda self: self._parse_with_constraint(), 729 "MASKING": lambda self: self._parse_with_constraint(), 730 "PROJECTION": lambda self: self._parse_with_constraint(), 731 "TAG": lambda self: self._parse_with_constraint(), 732 } 733 734 STAGED_FILE_SINGLE_TOKENS = { 735 TokenType.DOT, 736 TokenType.MOD, 737 TokenType.SLASH, 738 } 739 740 FLATTEN_COLUMNS = ["SEQ", "KEY", "PATH", "INDEX", "VALUE", "THIS"] 741 742 SCHEMA_KINDS = {"OBJECTS", "TABLES", "VIEWS", "SEQUENCES", "UNIQUE KEYS", "IMPORTED KEYS"} 743 744 NON_TABLE_CREATABLES = {"STORAGE INTEGRATION", "TAG", "WAREHOUSE", "STREAMLIT"} 745 746 LAMBDAS = { 747 **parser.Parser.LAMBDAS, 748 TokenType.ARROW: lambda self, expressions: self.expression( 749 exp.Lambda, 750 this=self._replace_lambda( 751 self._parse_assignment(), 752 expressions, 753 ), 754 expressions=[e.this if isinstance(e, exp.Cast) else e for e in expressions], 755 ), 756 } 757 758 def _parse_use(self) -> exp.Use: 759 if self._match_text_seq("SECONDARY", "ROLES"): 760 this = self._match_texts(("ALL", "NONE")) and exp.var(self._prev.text.upper()) 761 roles = None if this else self._parse_csv(lambda: self._parse_table(schema=False)) 762 return self.expression( 763 exp.Use, kind="SECONDARY ROLES", this=this, expressions=roles 764 ) 765 766 return super()._parse_use() 767 768 def _negate_range( 769 self, this: t.Optional[exp.Expression] = None 770 ) -> t.Optional[exp.Expression]: 771 if not this: 772 return this 773 774 query = this.args.get("query") 775 if isinstance(this, exp.In) and isinstance(query, exp.Query): 776 # Snowflake treats `value NOT IN (subquery)` as `VALUE <> ALL (subquery)`, so 777 # we do this conversion here to avoid parsing it into `NOT value IN (subquery)` 778 # which can produce different results (most likely a SnowFlake bug). 779 # 780 # https://docs.snowflake.com/en/sql-reference/functions/in 781 # Context: https://github.com/tobymao/sqlglot/issues/3890 782 return self.expression( 783 exp.NEQ, this=this.this, expression=exp.All(this=query.unnest()) 784 ) 785 786 return self.expression(exp.Not, this=this) 787 788 def _parse_tag(self) -> exp.Tags: 789 return self.expression( 790 exp.Tags, 791 expressions=self._parse_wrapped_csv(self._parse_property), 792 ) 793 794 def _parse_with_constraint(self) -> t.Optional[exp.Expression]: 795 if self._prev.token_type != TokenType.WITH: 796 self._retreat(self._index - 1) 797 798 if self._match_text_seq("MASKING", "POLICY"): 799 policy = self._parse_column() 800 return self.expression( 801 exp.MaskingPolicyColumnConstraint, 802 this=policy.to_dot() if isinstance(policy, exp.Column) else policy, 803 expressions=self._match(TokenType.USING) 804 and self._parse_wrapped_csv(self._parse_id_var), 805 ) 806 if self._match_text_seq("PROJECTION", "POLICY"): 807 policy = self._parse_column() 808 return self.expression( 809 exp.ProjectionPolicyColumnConstraint, 810 this=policy.to_dot() if isinstance(policy, exp.Column) else policy, 811 ) 812 if self._match(TokenType.TAG): 813 return self._parse_tag() 814 815 return None 816 817 def _parse_with_property(self) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 818 if self._match(TokenType.TAG): 819 return self._parse_tag() 820 821 return super()._parse_with_property() 822 823 def _parse_create(self) -> exp.Create | exp.Command: 824 expression = super()._parse_create() 825 if isinstance(expression, exp.Create) and expression.kind in self.NON_TABLE_CREATABLES: 826 # Replace the Table node with the enclosed Identifier 827 expression.this.replace(expression.this.this) 828 829 return expression 830 831 # https://docs.snowflake.com/en/sql-reference/functions/date_part.html 832 # https://docs.snowflake.com/en/sql-reference/functions-date-time.html#label-supported-date-time-parts 833 def _parse_date_part(self: Snowflake.Parser) -> t.Optional[exp.Expression]: 834 this = self._parse_var() or self._parse_type() 835 836 if not this: 837 return None 838 839 self._match(TokenType.COMMA) 840 expression = self._parse_bitwise() 841 this = map_date_part(this) 842 name = this.name.upper() 843 844 if name.startswith("EPOCH"): 845 if name == "EPOCH_MILLISECOND": 846 scale = 10**3 847 elif name == "EPOCH_MICROSECOND": 848 scale = 10**6 849 elif name == "EPOCH_NANOSECOND": 850 scale = 10**9 851 else: 852 scale = None 853 854 ts = self.expression(exp.Cast, this=expression, to=exp.DataType.build("TIMESTAMP")) 855 to_unix: exp.Expression = self.expression(exp.TimeToUnix, this=ts) 856 857 if scale: 858 to_unix = exp.Mul(this=to_unix, expression=exp.Literal.number(scale)) 859 860 return to_unix 861 862 return self.expression(exp.Extract, this=this, expression=expression) 863 864 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 865 if is_map: 866 # Keys are strings in Snowflake's objects, see also: 867 # - https://docs.snowflake.com/en/sql-reference/data-types-semistructured 868 # - https://docs.snowflake.com/en/sql-reference/functions/object_construct 869 return self._parse_slice(self._parse_string()) 870 871 return self._parse_slice(self._parse_alias(self._parse_assignment(), explicit=True)) 872 873 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 874 lateral = super()._parse_lateral() 875 if not lateral: 876 return lateral 877 878 if isinstance(lateral.this, exp.Explode): 879 table_alias = lateral.args.get("alias") 880 columns = [exp.to_identifier(col) for col in self.FLATTEN_COLUMNS] 881 if table_alias and not table_alias.args.get("columns"): 882 table_alias.set("columns", columns) 883 elif not table_alias: 884 exp.alias_(lateral, "_flattened", table=columns, copy=False) 885 886 return lateral 887 888 def _parse_table_parts( 889 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 890 ) -> exp.Table: 891 # https://docs.snowflake.com/en/user-guide/querying-stage 892 if self._match(TokenType.STRING, advance=False): 893 table = self._parse_string() 894 elif self._match_text_seq("@", advance=False): 895 table = self._parse_location_path() 896 else: 897 table = None 898 899 if table: 900 file_format = None 901 pattern = None 902 903 wrapped = self._match(TokenType.L_PAREN) 904 while self._curr and wrapped and not self._match(TokenType.R_PAREN): 905 if self._match_text_seq("FILE_FORMAT", "=>"): 906 file_format = self._parse_string() or super()._parse_table_parts( 907 is_db_reference=is_db_reference 908 ) 909 elif self._match_text_seq("PATTERN", "=>"): 910 pattern = self._parse_string() 911 else: 912 break 913 914 self._match(TokenType.COMMA) 915 916 table = self.expression(exp.Table, this=table, format=file_format, pattern=pattern) 917 else: 918 table = super()._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 919 920 return table 921 922 def _parse_table( 923 self, 924 schema: bool = False, 925 joins: bool = False, 926 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 927 parse_bracket: bool = False, 928 is_db_reference: bool = False, 929 parse_partition: bool = False, 930 consume_pipe: bool = False, 931 ) -> t.Optional[exp.Expression]: 932 table = super()._parse_table( 933 schema=schema, 934 joins=joins, 935 alias_tokens=alias_tokens, 936 parse_bracket=parse_bracket, 937 is_db_reference=is_db_reference, 938 parse_partition=parse_partition, 939 ) 940 if isinstance(table, exp.Table) and isinstance(table.this, exp.TableFromRows): 941 table_from_rows = table.this 942 for arg in exp.TableFromRows.arg_types: 943 if arg != "this": 944 table_from_rows.set(arg, table.args.get(arg)) 945 946 table = table_from_rows 947 948 return table 949 950 def _parse_id_var( 951 self, 952 any_token: bool = True, 953 tokens: t.Optional[t.Collection[TokenType]] = None, 954 ) -> t.Optional[exp.Expression]: 955 if self._match_text_seq("IDENTIFIER", "("): 956 identifier = ( 957 super()._parse_id_var(any_token=any_token, tokens=tokens) 958 or self._parse_string() 959 ) 960 self._match_r_paren() 961 return self.expression(exp.Anonymous, this="IDENTIFIER", expressions=[identifier]) 962 963 return super()._parse_id_var(any_token=any_token, tokens=tokens) 964 965 def _parse_show_snowflake(self, this: str) -> exp.Show: 966 scope = None 967 scope_kind = None 968 969 # will identity SHOW TERSE SCHEMAS but not SHOW TERSE PRIMARY KEYS 970 # which is syntactically valid but has no effect on the output 971 terse = self._tokens[self._index - 2].text.upper() == "TERSE" 972 973 history = self._match_text_seq("HISTORY") 974 975 like = self._parse_string() if self._match(TokenType.LIKE) else None 976 977 if self._match(TokenType.IN): 978 if self._match_text_seq("ACCOUNT"): 979 scope_kind = "ACCOUNT" 980 elif self._match_text_seq("CLASS"): 981 scope_kind = "CLASS" 982 scope = self._parse_table_parts() 983 elif self._match_text_seq("APPLICATION"): 984 scope_kind = "APPLICATION" 985 if self._match_text_seq("PACKAGE"): 986 scope_kind += " PACKAGE" 987 scope = self._parse_table_parts() 988 elif self._match_set(self.DB_CREATABLES): 989 scope_kind = self._prev.text.upper() 990 if self._curr: 991 scope = self._parse_table_parts() 992 elif self._curr: 993 scope_kind = "SCHEMA" if this in self.SCHEMA_KINDS else "TABLE" 994 scope = self._parse_table_parts() 995 996 return self.expression( 997 exp.Show, 998 **{ 999 "terse": terse, 1000 "this": this, 1001 "history": history, 1002 "like": like, 1003 "scope": scope, 1004 "scope_kind": scope_kind, 1005 "starts_with": self._match_text_seq("STARTS", "WITH") and self._parse_string(), 1006 "limit": self._parse_limit(), 1007 "from": self._parse_string() if self._match(TokenType.FROM) else None, 1008 "privileges": self._match_text_seq("WITH", "PRIVILEGES") 1009 and self._parse_csv(lambda: self._parse_var(any_token=True, upper=True)), 1010 }, 1011 ) 1012 1013 def _parse_put(self) -> exp.Put | exp.Command: 1014 if self._curr.token_type != TokenType.STRING: 1015 return self._parse_as_command(self._prev) 1016 1017 return self.expression( 1018 exp.Put, 1019 this=self._parse_string(), 1020 target=self._parse_location_path(), 1021 properties=self._parse_properties(), 1022 ) 1023 1024 def _parse_get(self) -> t.Optional[exp.Expression]: 1025 start = self._prev 1026 1027 # If we detect GET( then we need to parse a function, not a statement 1028 if self._match(TokenType.L_PAREN): 1029 self._retreat(self._index - 2) 1030 return self._parse_expression() 1031 1032 target = self._parse_location_path() 1033 1034 # Parse as command if unquoted file path 1035 if self._curr.token_type == TokenType.URI_START: 1036 return self._parse_as_command(start) 1037 1038 return self.expression( 1039 exp.Get, 1040 this=self._parse_string(), 1041 target=target, 1042 properties=self._parse_properties(), 1043 ) 1044 1045 def _parse_location_property(self) -> exp.LocationProperty: 1046 self._match(TokenType.EQ) 1047 return self.expression(exp.LocationProperty, this=self._parse_location_path()) 1048 1049 def _parse_file_location(self) -> t.Optional[exp.Expression]: 1050 # Parse either a subquery or a staged file 1051 return ( 1052 self._parse_select(table=True, parse_subquery_alias=False) 1053 if self._match(TokenType.L_PAREN, advance=False) 1054 else self._parse_table_parts() 1055 ) 1056 1057 def _parse_location_path(self) -> exp.Var: 1058 start = self._curr 1059 self._advance_any(ignore_reserved=True) 1060 1061 # We avoid consuming a comma token because external tables like @foo and @bar 1062 # can be joined in a query with a comma separator, as well as closing paren 1063 # in case of subqueries 1064 while self._is_connected() and not self._match_set( 1065 (TokenType.COMMA, TokenType.L_PAREN, TokenType.R_PAREN), advance=False 1066 ): 1067 self._advance_any(ignore_reserved=True) 1068 1069 return exp.var(self._find_sql(start, self._prev)) 1070 1071 def _parse_lambda_arg(self) -> t.Optional[exp.Expression]: 1072 this = super()._parse_lambda_arg() 1073 1074 if not this: 1075 return this 1076 1077 typ = self._parse_types() 1078 1079 if typ: 1080 return self.expression(exp.Cast, this=this, to=typ) 1081 1082 return this 1083 1084 def _parse_foreign_key(self) -> exp.ForeignKey: 1085 # inlineFK, the REFERENCES columns are implied 1086 if self._match(TokenType.REFERENCES, advance=False): 1087 return self.expression(exp.ForeignKey) 1088 1089 # outoflineFK, explicitly names the columns 1090 return super()._parse_foreign_key() 1091 1092 def _parse_file_format_property(self) -> exp.FileFormatProperty: 1093 self._match(TokenType.EQ) 1094 if self._match(TokenType.L_PAREN, advance=False): 1095 expressions = self._parse_wrapped_options() 1096 else: 1097 expressions = [self._parse_format_name()] 1098 1099 return self.expression( 1100 exp.FileFormatProperty, 1101 expressions=expressions, 1102 ) 1103 1104 def _parse_credentials_property(self) -> exp.CredentialsProperty: 1105 return self.expression( 1106 exp.CredentialsProperty, 1107 expressions=self._parse_wrapped_options(), 1108 ) 1109 1110 def _parse_semantic_view(self) -> exp.SemanticView: 1111 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table_parts()} 1112 1113 while self._curr and not self._match(TokenType.R_PAREN, advance=False): 1114 if self._match_text_seq("DIMENSIONS"): 1115 kwargs["dimensions"] = self._parse_csv(self._parse_disjunction) 1116 if self._match_text_seq("METRICS"): 1117 kwargs["metrics"] = self._parse_csv(self._parse_disjunction) 1118 if self._match_text_seq("WHERE"): 1119 kwargs["where"] = self._parse_expression() 1120 1121 return self.expression(exp.SemanticView, **kwargs) 1122 1123 class Tokenizer(tokens.Tokenizer): 1124 STRING_ESCAPES = ["\\", "'"] 1125 HEX_STRINGS = [("x'", "'"), ("X'", "'")] 1126 RAW_STRINGS = ["$$"] 1127 COMMENTS = ["--", "//", ("/*", "*/")] 1128 NESTED_COMMENTS = False 1129 1130 KEYWORDS = { 1131 **tokens.Tokenizer.KEYWORDS, 1132 "FILE://": TokenType.URI_START, 1133 "BYTEINT": TokenType.INT, 1134 "EXCLUDE": TokenType.EXCEPT, 1135 "FILE FORMAT": TokenType.FILE_FORMAT, 1136 "GET": TokenType.GET, 1137 "ILIKE ANY": TokenType.ILIKE_ANY, 1138 "LIKE ANY": TokenType.LIKE_ANY, 1139 "MATCH_CONDITION": TokenType.MATCH_CONDITION, 1140 "MATCH_RECOGNIZE": TokenType.MATCH_RECOGNIZE, 1141 "MINUS": TokenType.EXCEPT, 1142 "NCHAR VARYING": TokenType.VARCHAR, 1143 "PUT": TokenType.PUT, 1144 "REMOVE": TokenType.COMMAND, 1145 "RM": TokenType.COMMAND, 1146 "SAMPLE": TokenType.TABLE_SAMPLE, 1147 "SEMANTIC VIEW": TokenType.SEMANTIC_VIEW, 1148 "SQL_DOUBLE": TokenType.DOUBLE, 1149 "SQL_VARCHAR": TokenType.VARCHAR, 1150 "STORAGE INTEGRATION": TokenType.STORAGE_INTEGRATION, 1151 "TAG": TokenType.TAG, 1152 "TIMESTAMP_TZ": TokenType.TIMESTAMPTZ, 1153 "TOP": TokenType.TOP, 1154 "WAREHOUSE": TokenType.WAREHOUSE, 1155 "STAGE": TokenType.STAGE, 1156 "STREAMLIT": TokenType.STREAMLIT, 1157 } 1158 KEYWORDS.pop("/*+") 1159 1160 SINGLE_TOKENS = { 1161 **tokens.Tokenizer.SINGLE_TOKENS, 1162 "$": TokenType.PARAMETER, 1163 } 1164 1165 VAR_SINGLE_TOKENS = {"$"} 1166 1167 COMMANDS = tokens.Tokenizer.COMMANDS - {TokenType.SHOW} 1168 1169 class Generator(generator.Generator): 1170 PARAMETER_TOKEN = "$" 1171 MATCHED_BY_SOURCE = False 1172 SINGLE_STRING_INTERVAL = True 1173 JOIN_HINTS = False 1174 TABLE_HINTS = False 1175 QUERY_HINTS = False 1176 AGGREGATE_FILTER_SUPPORTED = False 1177 SUPPORTS_TABLE_COPY = False 1178 COLLATE_IS_FUNC = True 1179 LIMIT_ONLY_LITERALS = True 1180 JSON_KEY_VALUE_PAIR_SEP = "," 1181 INSERT_OVERWRITE = " OVERWRITE INTO" 1182 STRUCT_DELIMITER = ("(", ")") 1183 COPY_PARAMS_ARE_WRAPPED = False 1184 COPY_PARAMS_EQ_REQUIRED = True 1185 STAR_EXCEPT = "EXCLUDE" 1186 SUPPORTS_EXPLODING_PROJECTIONS = False 1187 ARRAY_CONCAT_IS_VAR_LEN = False 1188 SUPPORTS_CONVERT_TIMEZONE = True 1189 EXCEPT_INTERSECT_SUPPORT_ALL_CLAUSE = False 1190 SUPPORTS_MEDIAN = True 1191 ARRAY_SIZE_NAME = "ARRAY_SIZE" 1192 SUPPORTS_DECODE_CASE = True 1193 IS_BOOL_ALLOWED = False 1194 1195 TRANSFORMS = { 1196 **generator.Generator.TRANSFORMS, 1197 exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"), 1198 exp.ArgMax: rename_func("MAX_BY"), 1199 exp.ArgMin: rename_func("MIN_BY"), 1200 exp.ArrayConcat: lambda self, e: self.arrayconcat_sql(e, name="ARRAY_CAT"), 1201 exp.ArrayContains: lambda self, e: self.func("ARRAY_CONTAINS", e.expression, e.this), 1202 exp.ArrayIntersect: rename_func("ARRAY_INTERSECTION"), 1203 exp.AtTimeZone: lambda self, e: self.func( 1204 "CONVERT_TIMEZONE", e.args.get("zone"), e.this 1205 ), 1206 exp.BitwiseOr: rename_func("BITOR"), 1207 exp.BitwiseXor: rename_func("BITXOR"), 1208 exp.BitwiseLeftShift: rename_func("BITSHIFTLEFT"), 1209 exp.BitwiseRightShift: rename_func("BITSHIFTRIGHT"), 1210 exp.Create: transforms.preprocess([_flatten_structured_types_unless_iceberg]), 1211 exp.DateAdd: date_delta_sql("DATEADD"), 1212 exp.DateDiff: date_delta_sql("DATEDIFF"), 1213 exp.DatetimeAdd: date_delta_sql("TIMESTAMPADD"), 1214 exp.DatetimeDiff: timestampdiff_sql, 1215 exp.DateStrToDate: datestrtodate_sql, 1216 exp.DayOfMonth: rename_func("DAYOFMONTH"), 1217 exp.DayOfWeek: rename_func("DAYOFWEEK"), 1218 exp.DayOfWeekIso: rename_func("DAYOFWEEKISO"), 1219 exp.DayOfYear: rename_func("DAYOFYEAR"), 1220 exp.Explode: rename_func("FLATTEN"), 1221 exp.Extract: lambda self, e: self.func( 1222 "DATE_PART", map_date_part(e.this, self.dialect), e.expression 1223 ), 1224 exp.FileFormatProperty: lambda self, 1225 e: f"FILE_FORMAT=({self.expressions(e, 'expressions', sep=' ')})", 1226 exp.FromTimeZone: lambda self, e: self.func( 1227 "CONVERT_TIMEZONE", e.args.get("zone"), "'UTC'", e.this 1228 ), 1229 exp.GenerateSeries: lambda self, e: self.func( 1230 "ARRAY_GENERATE_RANGE", e.args["start"], e.args["end"] + 1, e.args.get("step") 1231 ), 1232 exp.GroupConcat: lambda self, e: groupconcat_sql(self, e, sep=""), 1233 exp.If: if_sql(name="IFF", false_value="NULL"), 1234 exp.JSONExtractArray: _json_extract_value_array_sql, 1235 exp.JSONExtractScalar: lambda self, e: self.func( 1236 "JSON_EXTRACT_PATH_TEXT", e.this, e.expression 1237 ), 1238 exp.JSONObject: lambda self, e: self.func("OBJECT_CONSTRUCT_KEEP_NULL", *e.expressions), 1239 exp.JSONPathRoot: lambda *_: "", 1240 exp.JSONValueArray: _json_extract_value_array_sql, 1241 exp.Levenshtein: unsupported_args("ins_cost", "del_cost", "sub_cost")( 1242 rename_func("EDITDISTANCE") 1243 ), 1244 exp.LocationProperty: lambda self, e: f"LOCATION={self.sql(e, 'this')}", 1245 exp.LogicalAnd: rename_func("BOOLAND_AGG"), 1246 exp.LogicalOr: rename_func("BOOLOR_AGG"), 1247 exp.Map: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 1248 exp.MakeInterval: no_make_interval_sql, 1249 exp.Max: max_or_greatest, 1250 exp.Min: min_or_least, 1251 exp.ParseJSON: lambda self, e: self.func( 1252 "TRY_PARSE_JSON" if e.args.get("safe") else "PARSE_JSON", e.this 1253 ), 1254 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 1255 exp.PercentileCont: transforms.preprocess( 1256 [transforms.add_within_group_for_percentiles] 1257 ), 1258 exp.PercentileDisc: transforms.preprocess( 1259 [transforms.add_within_group_for_percentiles] 1260 ), 1261 exp.Pivot: transforms.preprocess([_unqualify_pivot_columns]), 1262 exp.RegexpExtract: _regexpextract_sql, 1263 exp.RegexpExtractAll: _regexpextract_sql, 1264 exp.RegexpILike: _regexpilike_sql, 1265 exp.Rand: rename_func("RANDOM"), 1266 exp.Select: transforms.preprocess( 1267 [ 1268 transforms.eliminate_window_clause, 1269 transforms.eliminate_distinct_on, 1270 transforms.explode_projection_to_unnest(), 1271 transforms.eliminate_semi_and_anti_joins, 1272 _transform_generate_date_array, 1273 _qualify_unnested_columns, 1274 _eliminate_dot_variant_lookup, 1275 ] 1276 ), 1277 exp.SHA: rename_func("SHA1"), 1278 exp.StarMap: rename_func("OBJECT_CONSTRUCT"), 1279 exp.StartsWith: rename_func("STARTSWITH"), 1280 exp.EndsWith: rename_func("ENDSWITH"), 1281 exp.StrPosition: lambda self, e: strposition_sql( 1282 self, e, func_name="CHARINDEX", supports_position=True 1283 ), 1284 exp.StrToDate: lambda self, e: self.func("DATE", e.this, self.format_time(e)), 1285 exp.StringToArray: rename_func("STRTOK_TO_ARRAY"), 1286 exp.Stuff: rename_func("INSERT"), 1287 exp.StPoint: rename_func("ST_MAKEPOINT"), 1288 exp.TimeAdd: date_delta_sql("TIMEADD"), 1289 exp.Timestamp: no_timestamp_sql, 1290 exp.TimestampAdd: date_delta_sql("TIMESTAMPADD"), 1291 exp.TimestampDiff: lambda self, e: self.func( 1292 "TIMESTAMPDIFF", e.unit, e.expression, e.this 1293 ), 1294 exp.TimestampTrunc: timestamptrunc_sql(), 1295 exp.TimeStrToTime: timestrtotime_sql, 1296 exp.TimeToUnix: lambda self, e: f"EXTRACT(epoch_second FROM {self.sql(e, 'this')})", 1297 exp.ToArray: rename_func("TO_ARRAY"), 1298 exp.ToChar: lambda self, e: self.function_fallback_sql(e), 1299 exp.ToDouble: rename_func("TO_DOUBLE"), 1300 exp.TsOrDsAdd: date_delta_sql("DATEADD", cast=True), 1301 exp.TsOrDsDiff: date_delta_sql("DATEDIFF"), 1302 exp.TsOrDsToDate: lambda self, e: self.func( 1303 "TRY_TO_DATE" if e.args.get("safe") else "TO_DATE", e.this, self.format_time(e) 1304 ), 1305 exp.TsOrDsToTime: lambda self, e: self.func( 1306 "TRY_TO_TIME" if e.args.get("safe") else "TO_TIME", e.this, self.format_time(e) 1307 ), 1308 exp.Unhex: rename_func("HEX_DECODE_BINARY"), 1309 exp.UnixToTime: rename_func("TO_TIMESTAMP"), 1310 exp.Uuid: rename_func("UUID_STRING"), 1311 exp.VarMap: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 1312 exp.WeekOfYear: rename_func("WEEKOFYEAR"), 1313 exp.Xor: rename_func("BOOLXOR"), 1314 } 1315 1316 SUPPORTED_JSON_PATH_PARTS = { 1317 exp.JSONPathKey, 1318 exp.JSONPathRoot, 1319 exp.JSONPathSubscript, 1320 } 1321 1322 TYPE_MAPPING = { 1323 **generator.Generator.TYPE_MAPPING, 1324 exp.DataType.Type.NESTED: "OBJECT", 1325 exp.DataType.Type.STRUCT: "OBJECT", 1326 exp.DataType.Type.BIGDECIMAL: "DOUBLE", 1327 } 1328 1329 TOKEN_MAPPING = { 1330 TokenType.AUTO_INCREMENT: "AUTOINCREMENT", 1331 } 1332 1333 PROPERTIES_LOCATION = { 1334 **generator.Generator.PROPERTIES_LOCATION, 1335 exp.CredentialsProperty: exp.Properties.Location.POST_WITH, 1336 exp.LocationProperty: exp.Properties.Location.POST_WITH, 1337 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 1338 exp.SetProperty: exp.Properties.Location.UNSUPPORTED, 1339 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 1340 } 1341 1342 UNSUPPORTED_VALUES_EXPRESSIONS = { 1343 exp.Map, 1344 exp.StarMap, 1345 exp.Struct, 1346 exp.VarMap, 1347 } 1348 1349 RESPECT_IGNORE_NULLS_UNSUPPORTED_EXPRESSIONS = (exp.ArrayAgg,) 1350 1351 def with_properties(self, properties: exp.Properties) -> str: 1352 return self.properties(properties, wrapped=False, prefix=self.sep(""), sep=" ") 1353 1354 def values_sql(self, expression: exp.Values, values_as_table: bool = True) -> str: 1355 if expression.find(*self.UNSUPPORTED_VALUES_EXPRESSIONS): 1356 values_as_table = False 1357 1358 return super().values_sql(expression, values_as_table=values_as_table) 1359 1360 def datatype_sql(self, expression: exp.DataType) -> str: 1361 expressions = expression.expressions 1362 if ( 1363 expressions 1364 and expression.is_type(*exp.DataType.STRUCT_TYPES) 1365 and any(isinstance(field_type, exp.DataType) for field_type in expressions) 1366 ): 1367 # The correct syntax is OBJECT [ (<key> <value_type [NOT NULL] [, ...]) ] 1368 return "OBJECT" 1369 1370 return super().datatype_sql(expression) 1371 1372 def tonumber_sql(self, expression: exp.ToNumber) -> str: 1373 return self.func( 1374 "TO_NUMBER", 1375 expression.this, 1376 expression.args.get("format"), 1377 expression.args.get("precision"), 1378 expression.args.get("scale"), 1379 ) 1380 1381 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 1382 milli = expression.args.get("milli") 1383 if milli is not None: 1384 milli_to_nano = milli.pop() * exp.Literal.number(1000000) 1385 expression.set("nano", milli_to_nano) 1386 1387 return rename_func("TIMESTAMP_FROM_PARTS")(self, expression) 1388 1389 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 1390 if expression.is_type(exp.DataType.Type.GEOGRAPHY): 1391 return self.func("TO_GEOGRAPHY", expression.this) 1392 if expression.is_type(exp.DataType.Type.GEOMETRY): 1393 return self.func("TO_GEOMETRY", expression.this) 1394 1395 return super().cast_sql(expression, safe_prefix=safe_prefix) 1396 1397 def trycast_sql(self, expression: exp.TryCast) -> str: 1398 value = expression.this 1399 1400 if value.type is None: 1401 from sqlglot.optimizer.annotate_types import annotate_types 1402 1403 value = annotate_types(value, dialect=self.dialect) 1404 1405 # Snowflake requires that TRY_CAST's value be a string 1406 # If TRY_CAST is being roundtripped (since Snowflake is the only dialect that sets "requires_string") or 1407 # if we can deduce that the value is a string, then we can generate TRY_CAST 1408 if expression.args.get("requires_string") or value.is_type(*exp.DataType.TEXT_TYPES): 1409 return super().trycast_sql(expression) 1410 1411 return self.cast_sql(expression) 1412 1413 def log_sql(self, expression: exp.Log) -> str: 1414 if not expression.expression: 1415 return self.func("LN", expression.this) 1416 1417 return super().log_sql(expression) 1418 1419 def unnest_sql(self, expression: exp.Unnest) -> str: 1420 unnest_alias = expression.args.get("alias") 1421 offset = expression.args.get("offset") 1422 1423 unnest_alias_columns = unnest_alias.columns if unnest_alias else [] 1424 value = seq_get(unnest_alias_columns, 0) or exp.to_identifier("value") 1425 1426 columns = [ 1427 exp.to_identifier("seq"), 1428 exp.to_identifier("key"), 1429 exp.to_identifier("path"), 1430 offset.pop() if isinstance(offset, exp.Expression) else exp.to_identifier("index"), 1431 value, 1432 exp.to_identifier("this"), 1433 ] 1434 1435 if unnest_alias: 1436 unnest_alias.set("columns", columns) 1437 else: 1438 unnest_alias = exp.TableAlias(this="_u", columns=columns) 1439 1440 table_input = self.sql(expression.expressions[0]) 1441 if not table_input.startswith("INPUT =>"): 1442 table_input = f"INPUT => {table_input}" 1443 1444 expression_parent = expression.parent 1445 1446 explode = ( 1447 f"FLATTEN({table_input})" 1448 if isinstance(expression_parent, exp.Lateral) 1449 else f"TABLE(FLATTEN({table_input}))" 1450 ) 1451 alias = self.sql(unnest_alias) 1452 alias = f" AS {alias}" if alias else "" 1453 value = ( 1454 "" 1455 if isinstance(expression_parent, (exp.From, exp.Join, exp.Lateral)) 1456 else f"{value} FROM " 1457 ) 1458 1459 return f"{value}{explode}{alias}" 1460 1461 def show_sql(self, expression: exp.Show) -> str: 1462 terse = "TERSE " if expression.args.get("terse") else "" 1463 history = " HISTORY" if expression.args.get("history") else "" 1464 like = self.sql(expression, "like") 1465 like = f" LIKE {like}" if like else "" 1466 1467 scope = self.sql(expression, "scope") 1468 scope = f" {scope}" if scope else "" 1469 1470 scope_kind = self.sql(expression, "scope_kind") 1471 if scope_kind: 1472 scope_kind = f" IN {scope_kind}" 1473 1474 starts_with = self.sql(expression, "starts_with") 1475 if starts_with: 1476 starts_with = f" STARTS WITH {starts_with}" 1477 1478 limit = self.sql(expression, "limit") 1479 1480 from_ = self.sql(expression, "from") 1481 if from_: 1482 from_ = f" FROM {from_}" 1483 1484 privileges = self.expressions(expression, key="privileges", flat=True) 1485 privileges = f" WITH PRIVILEGES {privileges}" if privileges else "" 1486 1487 return f"SHOW {terse}{expression.name}{history}{like}{scope_kind}{scope}{starts_with}{limit}{from_}{privileges}" 1488 1489 def describe_sql(self, expression: exp.Describe) -> str: 1490 # Default to table if kind is unknown 1491 kind_value = expression.args.get("kind") or "TABLE" 1492 kind = f" {kind_value}" if kind_value else "" 1493 this = f" {self.sql(expression, 'this')}" 1494 expressions = self.expressions(expression, flat=True) 1495 expressions = f" {expressions}" if expressions else "" 1496 return f"DESCRIBE{kind}{this}{expressions}" 1497 1498 def generatedasidentitycolumnconstraint_sql( 1499 self, expression: exp.GeneratedAsIdentityColumnConstraint 1500 ) -> str: 1501 start = expression.args.get("start") 1502 start = f" START {start}" if start else "" 1503 increment = expression.args.get("increment") 1504 increment = f" INCREMENT {increment}" if increment else "" 1505 1506 order = expression.args.get("order") 1507 if order is not None: 1508 order_clause = " ORDER" if order else " NOORDER" 1509 else: 1510 order_clause = "" 1511 1512 return f"AUTOINCREMENT{start}{increment}{order_clause}" 1513 1514 def cluster_sql(self, expression: exp.Cluster) -> str: 1515 return f"CLUSTER BY ({self.expressions(expression, flat=True)})" 1516 1517 def struct_sql(self, expression: exp.Struct) -> str: 1518 keys = [] 1519 values = [] 1520 1521 for i, e in enumerate(expression.expressions): 1522 if isinstance(e, exp.PropertyEQ): 1523 keys.append( 1524 exp.Literal.string(e.name) if isinstance(e.this, exp.Identifier) else e.this 1525 ) 1526 values.append(e.expression) 1527 else: 1528 keys.append(exp.Literal.string(f"_{i}")) 1529 values.append(e) 1530 1531 return self.func("OBJECT_CONSTRUCT", *flatten(zip(keys, values))) 1532 1533 @unsupported_args("weight", "accuracy") 1534 def approxquantile_sql(self, expression: exp.ApproxQuantile) -> str: 1535 return self.func("APPROX_PERCENTILE", expression.this, expression.args.get("quantile")) 1536 1537 def alterset_sql(self, expression: exp.AlterSet) -> str: 1538 exprs = self.expressions(expression, flat=True) 1539 exprs = f" {exprs}" if exprs else "" 1540 file_format = self.expressions(expression, key="file_format", flat=True, sep=" ") 1541 file_format = f" STAGE_FILE_FORMAT = ({file_format})" if file_format else "" 1542 copy_options = self.expressions(expression, key="copy_options", flat=True, sep=" ") 1543 copy_options = f" STAGE_COPY_OPTIONS = ({copy_options})" if copy_options else "" 1544 tag = self.expressions(expression, key="tag", flat=True) 1545 tag = f" TAG {tag}" if tag else "" 1546 1547 return f"SET{exprs}{file_format}{copy_options}{tag}" 1548 1549 def strtotime_sql(self, expression: exp.StrToTime): 1550 safe_prefix = "TRY_" if expression.args.get("safe") else "" 1551 return self.func( 1552 f"{safe_prefix}TO_TIMESTAMP", expression.this, self.format_time(expression) 1553 ) 1554 1555 def timestampsub_sql(self, expression: exp.TimestampSub): 1556 return self.sql( 1557 exp.TimestampAdd( 1558 this=expression.this, 1559 expression=expression.expression * -1, 1560 unit=expression.unit, 1561 ) 1562 ) 1563 1564 def jsonextract_sql(self, expression: exp.JSONExtract): 1565 this = expression.this 1566 1567 # JSON strings are valid coming from other dialects such as BQ so 1568 # for these cases we PARSE_JSON preemptively 1569 if not isinstance(this, (exp.ParseJSON, exp.JSONExtract)) and not expression.args.get( 1570 "requires_json" 1571 ): 1572 this = exp.ParseJSON(this=this) 1573 1574 return self.func( 1575 "GET_PATH", 1576 this, 1577 expression.expression, 1578 ) 1579 1580 def timetostr_sql(self, expression: exp.TimeToStr) -> str: 1581 this = expression.this 1582 if this.is_string: 1583 this = exp.cast(this, exp.DataType.Type.TIMESTAMP) 1584 1585 return self.func("TO_CHAR", this, self.format_time(expression)) 1586 1587 def datesub_sql(self, expression: exp.DateSub) -> str: 1588 value = expression.expression 1589 if value: 1590 value.replace(value * (-1)) 1591 else: 1592 self.unsupported("DateSub cannot be transpiled if the subtracted count is unknown") 1593 1594 return date_delta_sql("DATEADD")(self, expression) 1595 1596 def select_sql(self, expression: exp.Select) -> str: 1597 limit = expression.args.get("limit") 1598 offset = expression.args.get("offset") 1599 if offset and not limit: 1600 expression.limit(exp.Null(), copy=False) 1601 return super().select_sql(expression) 1602 1603 def createable_sql(self, expression: exp.Create, locations: t.DefaultDict) -> str: 1604 is_materialized = expression.find(exp.MaterializedProperty) 1605 copy_grants_property = expression.find(exp.CopyGrantsProperty) 1606 1607 if expression.kind == "VIEW" and is_materialized and copy_grants_property: 1608 # For materialized views, COPY GRANTS is located *before* the columns list 1609 # This is in contrast to normal views where COPY GRANTS is located *after* the columns list 1610 # We default CopyGrantsProperty to POST_SCHEMA which means we need to output it POST_NAME if a materialized view is detected 1611 # ref: https://docs.snowflake.com/en/sql-reference/sql/create-materialized-view#syntax 1612 # ref: https://docs.snowflake.com/en/sql-reference/sql/create-view#syntax 1613 post_schema_properties = locations[exp.Properties.Location.POST_SCHEMA] 1614 post_schema_properties.pop(post_schema_properties.index(copy_grants_property)) 1615 1616 this_name = self.sql(expression.this, "this") 1617 copy_grants = self.sql(copy_grants_property) 1618 this_schema = self.schema_columns_sql(expression.this) 1619 this_schema = f"{self.sep()}{this_schema}" if this_schema else "" 1620 1621 return f"{this_name}{self.sep()}{copy_grants}{this_schema}" 1622 1623 return super().createable_sql(expression, locations) 1624 1625 def arrayagg_sql(self, expression: exp.ArrayAgg) -> str: 1626 this = expression.this 1627 1628 # If an ORDER BY clause is present, we need to remove it from ARRAY_AGG 1629 # and add it later as part of the WITHIN GROUP clause 1630 order = this if isinstance(this, exp.Order) else None 1631 if order: 1632 expression.set("this", order.this.pop()) 1633 1634 expr_sql = super().arrayagg_sql(expression) 1635 1636 if order: 1637 expr_sql = self.sql(exp.WithinGroup(this=expr_sql, expression=order)) 1638 1639 return expr_sql 1640 1641 def array_sql(self, expression: exp.Array) -> str: 1642 expressions = expression.expressions 1643 1644 first_expr = seq_get(expressions, 0) 1645 if isinstance(first_expr, exp.Select): 1646 # SELECT AS STRUCT foo AS alias_foo -> ARRAY_AGG(OBJECT_CONSTRUCT('alias_foo', foo)) 1647 if first_expr.text("kind").upper() == "STRUCT": 1648 object_construct_args = [] 1649 for expr in first_expr.expressions: 1650 # Alias case: SELECT AS STRUCT foo AS alias_foo -> OBJECT_CONSTRUCT('alias_foo', foo) 1651 # Column case: SELECT AS STRUCT foo -> OBJECT_CONSTRUCT('foo', foo) 1652 name = expr.this if isinstance(expr, exp.Alias) else expr 1653 1654 object_construct_args.extend([exp.Literal.string(expr.alias_or_name), name]) 1655 1656 array_agg = exp.ArrayAgg( 1657 this=_build_object_construct(args=object_construct_args) 1658 ) 1659 1660 first_expr.set("kind", None) 1661 first_expr.set("expressions", [array_agg]) 1662 1663 return self.sql(first_expr.subquery()) 1664 1665 return inline_array_sql(self, expression) 1666 1667 def currentdate_sql(self, expression: exp.CurrentDate) -> str: 1668 zone = self.sql(expression, "this") 1669 if not zone: 1670 return super().currentdate_sql(expression) 1671 1672 expr = exp.Cast( 1673 this=exp.ConvertTimezone(target_tz=zone, timestamp=exp.CurrentTimestamp()), 1674 to=exp.DataType(this=exp.DataType.Type.DATE), 1675 ) 1676 return self.sql(expr) 1677 1678 def dot_sql(self, expression: exp.Dot) -> str: 1679 this = expression.this 1680 1681 if not this.type: 1682 from sqlglot.optimizer.annotate_types import annotate_types 1683 1684 this = annotate_types(this, dialect=self.dialect) 1685 1686 if not isinstance(this, exp.Dot) and this.is_type(exp.DataType.Type.STRUCT): 1687 # Generate colon notation for the top level STRUCT 1688 return f"{self.sql(this)}:{self.sql(expression, 'expression')}" 1689 1690 return super().dot_sql(expression)
Specifies the strategy according to which identifiers should be normalized.
Default NULL
ordering method to use if not explicitly set.
Possible values: "nulls_are_small"
, "nulls_are_large"
, "nulls_are_last"
Some dialects, such as Snowflake, allow you to reference a CTE column alias in the HAVING clause of the CTE. This flag will cause the CTE alias columns to override any projection aliases in the subquery.
For example, WITH y(c) AS ( SELECT SUM(a) FROM (SELECT 1 a) AS x HAVING c > 0 ) SELECT c FROM y;
will be rewritten as
WITH y(c) AS (
SELECT SUM(a) AS c FROM (SELECT 1 AS a) AS x HAVING c > 0
) SELECT c FROM y;
Associates this dialect's time formats with their equivalent Python strftime
formats.
532 def quote_identifier(self, expression: E, identify: bool = True) -> E: 533 # This disables quoting DUAL in SELECT ... FROM DUAL, because Snowflake treats an 534 # unquoted DUAL keyword in a special way and does not map it to a user-defined table 535 if ( 536 isinstance(expression, exp.Identifier) 537 and isinstance(expression.parent, exp.Table) 538 and expression.name.lower() == "dual" 539 ): 540 return expression # type: ignore 541 542 return super().quote_identifier(expression, identify=identify)
Adds quotes to a given identifier.
Arguments:
- expression: The expression of interest. If it's not an
Identifier
, this method is a no-op. - identify: If set to
False
, the quotes will only be added if the identifier is deemed "unsafe", with respect to its characters and this dialect's normalization strategy.
Mapping of an escaped sequence (\n
) to its unescaped version (
).
544 class JSONPathTokenizer(jsonpath.JSONPathTokenizer): 545 SINGLE_TOKENS = jsonpath.JSONPathTokenizer.SINGLE_TOKENS.copy() 546 SINGLE_TOKENS.pop("$")
Inherited Members
- sqlglot.tokens.Tokenizer
- Tokenizer
- BIT_STRINGS
- BYTE_STRINGS
- HEX_STRINGS
- RAW_STRINGS
- HEREDOC_STRINGS
- UNICODE_STRINGS
- IDENTIFIERS
- QUOTES
- VAR_SINGLE_TOKENS
- HEREDOC_TAG_IS_IDENTIFIER
- HEREDOC_STRING_ALTERNATIVE
- STRING_ESCAPES_ALLOWED_IN_RAW_STRINGS
- NESTED_COMMENTS
- HINT_START
- TOKENS_PRECEDING_HINT
- WHITE_SPACE
- COMMANDS
- COMMAND_PREFIX_TOKENS
- NUMERIC_LITERALS
- COMMENTS
- dialect
- use_rs_tokenizer
- reset
- tokenize
- tokenize_rs
- size
- sql
- tokens
548 class Parser(parser.Parser): 549 IDENTIFY_PIVOT_STRINGS = True 550 DEFAULT_SAMPLING_METHOD = "BERNOULLI" 551 COLON_IS_VARIANT_EXTRACT = True 552 JSON_EXTRACT_REQUIRES_JSON_EXPRESSION = True 553 554 ID_VAR_TOKENS = { 555 *parser.Parser.ID_VAR_TOKENS, 556 TokenType.MATCH_CONDITION, 557 } 558 559 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS | {TokenType.WINDOW} 560 TABLE_ALIAS_TOKENS.discard(TokenType.MATCH_CONDITION) 561 562 COLON_PLACEHOLDER_TOKENS = ID_VAR_TOKENS | {TokenType.NUMBER} 563 564 FUNCTIONS = { 565 **parser.Parser.FUNCTIONS, 566 "APPROX_PERCENTILE": exp.ApproxQuantile.from_arg_list, 567 "ARRAY_CONSTRUCT": lambda args: exp.Array(expressions=args), 568 "ARRAY_CONTAINS": lambda args: exp.ArrayContains( 569 this=seq_get(args, 1), expression=seq_get(args, 0) 570 ), 571 "ARRAY_GENERATE_RANGE": lambda args: exp.GenerateSeries( 572 # ARRAY_GENERATE_RANGE has an exlusive end; we normalize it to be inclusive 573 start=seq_get(args, 0), 574 end=exp.Sub(this=seq_get(args, 1), expression=exp.Literal.number(1)), 575 step=seq_get(args, 2), 576 ), 577 "BITXOR": _build_bitwise(exp.BitwiseXor, "BITXOR"), 578 "BIT_XOR": _build_bitwise(exp.BitwiseXor, "BITXOR"), 579 "BITOR": _build_bitwise(exp.BitwiseOr, "BITOR"), 580 "BIT_OR": _build_bitwise(exp.BitwiseOr, "BITOR"), 581 "BITSHIFTLEFT": _build_bitwise(exp.BitwiseLeftShift, "BITSHIFTLEFT"), 582 "BIT_SHIFTLEFT": _build_bitwise(exp.BitwiseLeftShift, "BIT_SHIFTLEFT"), 583 "BITSHIFTRIGHT": _build_bitwise(exp.BitwiseRightShift, "BITSHIFTRIGHT"), 584 "BIT_SHIFTRIGHT": _build_bitwise(exp.BitwiseRightShift, "BIT_SHIFTRIGHT"), 585 "BOOLXOR": _build_bitwise(exp.Xor, "BOOLXOR"), 586 "DATE": _build_datetime("DATE", exp.DataType.Type.DATE), 587 "DATE_TRUNC": _date_trunc_to_time, 588 "DATEADD": _build_date_time_add(exp.DateAdd), 589 "DATEDIFF": _build_datediff, 590 "DIV0": _build_if_from_div0, 591 "EDITDISTANCE": lambda args: exp.Levenshtein( 592 this=seq_get(args, 0), expression=seq_get(args, 1), max_dist=seq_get(args, 2) 593 ), 594 "FLATTEN": exp.Explode.from_arg_list, 595 "GET_PATH": lambda args, dialect: exp.JSONExtract( 596 this=seq_get(args, 0), 597 expression=dialect.to_json_path(seq_get(args, 1)), 598 requires_json=True, 599 ), 600 "HEX_DECODE_BINARY": exp.Unhex.from_arg_list, 601 "IFF": exp.If.from_arg_list, 602 "LAST_DAY": lambda args: exp.LastDay( 603 this=seq_get(args, 0), unit=map_date_part(seq_get(args, 1)) 604 ), 605 "LEN": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 606 "LENGTH": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 607 "NULLIFZERO": _build_if_from_nullifzero, 608 "OBJECT_CONSTRUCT": _build_object_construct, 609 "REGEXP_EXTRACT_ALL": _build_regexp_extract(exp.RegexpExtractAll), 610 "REGEXP_REPLACE": _build_regexp_replace, 611 "REGEXP_SUBSTR": _build_regexp_extract(exp.RegexpExtract), 612 "REGEXP_SUBSTR_ALL": _build_regexp_extract(exp.RegexpExtractAll), 613 "REPLACE": build_replace_with_optional_replacement, 614 "RLIKE": exp.RegexpLike.from_arg_list, 615 "SQUARE": lambda args: exp.Pow(this=seq_get(args, 0), expression=exp.Literal.number(2)), 616 "TABLE": lambda args: exp.TableFromRows(this=seq_get(args, 0)), 617 "TIMEADD": _build_date_time_add(exp.TimeAdd), 618 "TIMEDIFF": _build_datediff, 619 "TIMESTAMPADD": _build_date_time_add(exp.DateAdd), 620 "TIMESTAMPDIFF": _build_datediff, 621 "TIMESTAMPFROMPARTS": build_timestamp_from_parts, 622 "TIMESTAMP_FROM_PARTS": build_timestamp_from_parts, 623 "TIMESTAMPNTZFROMPARTS": build_timestamp_from_parts, 624 "TIMESTAMP_NTZ_FROM_PARTS": build_timestamp_from_parts, 625 "TRY_PARSE_JSON": lambda args: exp.ParseJSON(this=seq_get(args, 0), safe=True), 626 "TRY_TO_DATE": _build_datetime("TRY_TO_DATE", exp.DataType.Type.DATE, safe=True), 627 "TRY_TO_TIME": _build_datetime("TRY_TO_TIME", exp.DataType.Type.TIME, safe=True), 628 "TRY_TO_TIMESTAMP": _build_datetime( 629 "TRY_TO_TIMESTAMP", exp.DataType.Type.TIMESTAMP, safe=True 630 ), 631 "TO_CHAR": build_timetostr_or_tochar, 632 "TO_DATE": _build_datetime("TO_DATE", exp.DataType.Type.DATE), 633 "TO_NUMBER": lambda args: exp.ToNumber( 634 this=seq_get(args, 0), 635 format=seq_get(args, 1), 636 precision=seq_get(args, 2), 637 scale=seq_get(args, 3), 638 ), 639 "TO_TIME": _build_datetime("TO_TIME", exp.DataType.Type.TIME), 640 "TO_TIMESTAMP": _build_datetime("TO_TIMESTAMP", exp.DataType.Type.TIMESTAMP), 641 "TO_TIMESTAMP_LTZ": _build_datetime("TO_TIMESTAMP_LTZ", exp.DataType.Type.TIMESTAMPLTZ), 642 "TO_TIMESTAMP_NTZ": _build_datetime("TO_TIMESTAMP_NTZ", exp.DataType.Type.TIMESTAMP), 643 "TO_TIMESTAMP_TZ": _build_datetime("TO_TIMESTAMP_TZ", exp.DataType.Type.TIMESTAMPTZ), 644 "TO_VARCHAR": exp.ToChar.from_arg_list, 645 "ZEROIFNULL": _build_if_from_zeroifnull, 646 } 647 648 FUNCTION_PARSERS = { 649 **parser.Parser.FUNCTION_PARSERS, 650 "DATE_PART": lambda self: self._parse_date_part(), 651 "OBJECT_CONSTRUCT_KEEP_NULL": lambda self: self._parse_json_object(), 652 "LISTAGG": lambda self: self._parse_string_agg(), 653 "SEMANTIC_VIEW": lambda self: self._parse_semantic_view(), 654 } 655 FUNCTION_PARSERS.pop("TRIM") 656 657 TIMESTAMPS = parser.Parser.TIMESTAMPS - {TokenType.TIME} 658 659 RANGE_PARSERS = { 660 **parser.Parser.RANGE_PARSERS, 661 TokenType.LIKE_ANY: parser.binary_range_parser(exp.LikeAny), 662 TokenType.ILIKE_ANY: parser.binary_range_parser(exp.ILikeAny), 663 } 664 665 ALTER_PARSERS = { 666 **parser.Parser.ALTER_PARSERS, 667 "UNSET": lambda self: self.expression( 668 exp.Set, 669 tag=self._match_text_seq("TAG"), 670 expressions=self._parse_csv(self._parse_id_var), 671 unset=True, 672 ), 673 } 674 675 STATEMENT_PARSERS = { 676 **parser.Parser.STATEMENT_PARSERS, 677 TokenType.GET: lambda self: self._parse_get(), 678 TokenType.PUT: lambda self: self._parse_put(), 679 TokenType.SHOW: lambda self: self._parse_show(), 680 } 681 682 PROPERTY_PARSERS = { 683 **parser.Parser.PROPERTY_PARSERS, 684 "CREDENTIALS": lambda self: self._parse_credentials_property(), 685 "FILE_FORMAT": lambda self: self._parse_file_format_property(), 686 "LOCATION": lambda self: self._parse_location_property(), 687 "TAG": lambda self: self._parse_tag(), 688 "USING": lambda self: self._match_text_seq("TEMPLATE") 689 and self.expression(exp.UsingTemplateProperty, this=self._parse_statement()), 690 } 691 692 TYPE_CONVERTERS = { 693 # https://docs.snowflake.com/en/sql-reference/data-types-numeric#number 694 exp.DataType.Type.DECIMAL: build_default_decimal_type(precision=38, scale=0), 695 } 696 697 SHOW_PARSERS = { 698 "DATABASES": _show_parser("DATABASES"), 699 "TERSE DATABASES": _show_parser("DATABASES"), 700 "SCHEMAS": _show_parser("SCHEMAS"), 701 "TERSE SCHEMAS": _show_parser("SCHEMAS"), 702 "OBJECTS": _show_parser("OBJECTS"), 703 "TERSE OBJECTS": _show_parser("OBJECTS"), 704 "TABLES": _show_parser("TABLES"), 705 "TERSE TABLES": _show_parser("TABLES"), 706 "VIEWS": _show_parser("VIEWS"), 707 "TERSE VIEWS": _show_parser("VIEWS"), 708 "PRIMARY KEYS": _show_parser("PRIMARY KEYS"), 709 "TERSE PRIMARY KEYS": _show_parser("PRIMARY KEYS"), 710 "IMPORTED KEYS": _show_parser("IMPORTED KEYS"), 711 "TERSE IMPORTED KEYS": _show_parser("IMPORTED KEYS"), 712 "UNIQUE KEYS": _show_parser("UNIQUE KEYS"), 713 "TERSE UNIQUE KEYS": _show_parser("UNIQUE KEYS"), 714 "SEQUENCES": _show_parser("SEQUENCES"), 715 "TERSE SEQUENCES": _show_parser("SEQUENCES"), 716 "STAGES": _show_parser("STAGES"), 717 "COLUMNS": _show_parser("COLUMNS"), 718 "USERS": _show_parser("USERS"), 719 "TERSE USERS": _show_parser("USERS"), 720 "FILE FORMATS": _show_parser("FILE FORMATS"), 721 "FUNCTIONS": _show_parser("FUNCTIONS"), 722 "PROCEDURES": _show_parser("PROCEDURES"), 723 "WAREHOUSES": _show_parser("WAREHOUSES"), 724 } 725 726 CONSTRAINT_PARSERS = { 727 **parser.Parser.CONSTRAINT_PARSERS, 728 "WITH": lambda self: self._parse_with_constraint(), 729 "MASKING": lambda self: self._parse_with_constraint(), 730 "PROJECTION": lambda self: self._parse_with_constraint(), 731 "TAG": lambda self: self._parse_with_constraint(), 732 } 733 734 STAGED_FILE_SINGLE_TOKENS = { 735 TokenType.DOT, 736 TokenType.MOD, 737 TokenType.SLASH, 738 } 739 740 FLATTEN_COLUMNS = ["SEQ", "KEY", "PATH", "INDEX", "VALUE", "THIS"] 741 742 SCHEMA_KINDS = {"OBJECTS", "TABLES", "VIEWS", "SEQUENCES", "UNIQUE KEYS", "IMPORTED KEYS"} 743 744 NON_TABLE_CREATABLES = {"STORAGE INTEGRATION", "TAG", "WAREHOUSE", "STREAMLIT"} 745 746 LAMBDAS = { 747 **parser.Parser.LAMBDAS, 748 TokenType.ARROW: lambda self, expressions: self.expression( 749 exp.Lambda, 750 this=self._replace_lambda( 751 self._parse_assignment(), 752 expressions, 753 ), 754 expressions=[e.this if isinstance(e, exp.Cast) else e for e in expressions], 755 ), 756 } 757 758 def _parse_use(self) -> exp.Use: 759 if self._match_text_seq("SECONDARY", "ROLES"): 760 this = self._match_texts(("ALL", "NONE")) and exp.var(self._prev.text.upper()) 761 roles = None if this else self._parse_csv(lambda: self._parse_table(schema=False)) 762 return self.expression( 763 exp.Use, kind="SECONDARY ROLES", this=this, expressions=roles 764 ) 765 766 return super()._parse_use() 767 768 def _negate_range( 769 self, this: t.Optional[exp.Expression] = None 770 ) -> t.Optional[exp.Expression]: 771 if not this: 772 return this 773 774 query = this.args.get("query") 775 if isinstance(this, exp.In) and isinstance(query, exp.Query): 776 # Snowflake treats `value NOT IN (subquery)` as `VALUE <> ALL (subquery)`, so 777 # we do this conversion here to avoid parsing it into `NOT value IN (subquery)` 778 # which can produce different results (most likely a SnowFlake bug). 779 # 780 # https://docs.snowflake.com/en/sql-reference/functions/in 781 # Context: https://github.com/tobymao/sqlglot/issues/3890 782 return self.expression( 783 exp.NEQ, this=this.this, expression=exp.All(this=query.unnest()) 784 ) 785 786 return self.expression(exp.Not, this=this) 787 788 def _parse_tag(self) -> exp.Tags: 789 return self.expression( 790 exp.Tags, 791 expressions=self._parse_wrapped_csv(self._parse_property), 792 ) 793 794 def _parse_with_constraint(self) -> t.Optional[exp.Expression]: 795 if self._prev.token_type != TokenType.WITH: 796 self._retreat(self._index - 1) 797 798 if self._match_text_seq("MASKING", "POLICY"): 799 policy = self._parse_column() 800 return self.expression( 801 exp.MaskingPolicyColumnConstraint, 802 this=policy.to_dot() if isinstance(policy, exp.Column) else policy, 803 expressions=self._match(TokenType.USING) 804 and self._parse_wrapped_csv(self._parse_id_var), 805 ) 806 if self._match_text_seq("PROJECTION", "POLICY"): 807 policy = self._parse_column() 808 return self.expression( 809 exp.ProjectionPolicyColumnConstraint, 810 this=policy.to_dot() if isinstance(policy, exp.Column) else policy, 811 ) 812 if self._match(TokenType.TAG): 813 return self._parse_tag() 814 815 return None 816 817 def _parse_with_property(self) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 818 if self._match(TokenType.TAG): 819 return self._parse_tag() 820 821 return super()._parse_with_property() 822 823 def _parse_create(self) -> exp.Create | exp.Command: 824 expression = super()._parse_create() 825 if isinstance(expression, exp.Create) and expression.kind in self.NON_TABLE_CREATABLES: 826 # Replace the Table node with the enclosed Identifier 827 expression.this.replace(expression.this.this) 828 829 return expression 830 831 # https://docs.snowflake.com/en/sql-reference/functions/date_part.html 832 # https://docs.snowflake.com/en/sql-reference/functions-date-time.html#label-supported-date-time-parts 833 def _parse_date_part(self: Snowflake.Parser) -> t.Optional[exp.Expression]: 834 this = self._parse_var() or self._parse_type() 835 836 if not this: 837 return None 838 839 self._match(TokenType.COMMA) 840 expression = self._parse_bitwise() 841 this = map_date_part(this) 842 name = this.name.upper() 843 844 if name.startswith("EPOCH"): 845 if name == "EPOCH_MILLISECOND": 846 scale = 10**3 847 elif name == "EPOCH_MICROSECOND": 848 scale = 10**6 849 elif name == "EPOCH_NANOSECOND": 850 scale = 10**9 851 else: 852 scale = None 853 854 ts = self.expression(exp.Cast, this=expression, to=exp.DataType.build("TIMESTAMP")) 855 to_unix: exp.Expression = self.expression(exp.TimeToUnix, this=ts) 856 857 if scale: 858 to_unix = exp.Mul(this=to_unix, expression=exp.Literal.number(scale)) 859 860 return to_unix 861 862 return self.expression(exp.Extract, this=this, expression=expression) 863 864 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 865 if is_map: 866 # Keys are strings in Snowflake's objects, see also: 867 # - https://docs.snowflake.com/en/sql-reference/data-types-semistructured 868 # - https://docs.snowflake.com/en/sql-reference/functions/object_construct 869 return self._parse_slice(self._parse_string()) 870 871 return self._parse_slice(self._parse_alias(self._parse_assignment(), explicit=True)) 872 873 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 874 lateral = super()._parse_lateral() 875 if not lateral: 876 return lateral 877 878 if isinstance(lateral.this, exp.Explode): 879 table_alias = lateral.args.get("alias") 880 columns = [exp.to_identifier(col) for col in self.FLATTEN_COLUMNS] 881 if table_alias and not table_alias.args.get("columns"): 882 table_alias.set("columns", columns) 883 elif not table_alias: 884 exp.alias_(lateral, "_flattened", table=columns, copy=False) 885 886 return lateral 887 888 def _parse_table_parts( 889 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 890 ) -> exp.Table: 891 # https://docs.snowflake.com/en/user-guide/querying-stage 892 if self._match(TokenType.STRING, advance=False): 893 table = self._parse_string() 894 elif self._match_text_seq("@", advance=False): 895 table = self._parse_location_path() 896 else: 897 table = None 898 899 if table: 900 file_format = None 901 pattern = None 902 903 wrapped = self._match(TokenType.L_PAREN) 904 while self._curr and wrapped and not self._match(TokenType.R_PAREN): 905 if self._match_text_seq("FILE_FORMAT", "=>"): 906 file_format = self._parse_string() or super()._parse_table_parts( 907 is_db_reference=is_db_reference 908 ) 909 elif self._match_text_seq("PATTERN", "=>"): 910 pattern = self._parse_string() 911 else: 912 break 913 914 self._match(TokenType.COMMA) 915 916 table = self.expression(exp.Table, this=table, format=file_format, pattern=pattern) 917 else: 918 table = super()._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 919 920 return table 921 922 def _parse_table( 923 self, 924 schema: bool = False, 925 joins: bool = False, 926 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 927 parse_bracket: bool = False, 928 is_db_reference: bool = False, 929 parse_partition: bool = False, 930 consume_pipe: bool = False, 931 ) -> t.Optional[exp.Expression]: 932 table = super()._parse_table( 933 schema=schema, 934 joins=joins, 935 alias_tokens=alias_tokens, 936 parse_bracket=parse_bracket, 937 is_db_reference=is_db_reference, 938 parse_partition=parse_partition, 939 ) 940 if isinstance(table, exp.Table) and isinstance(table.this, exp.TableFromRows): 941 table_from_rows = table.this 942 for arg in exp.TableFromRows.arg_types: 943 if arg != "this": 944 table_from_rows.set(arg, table.args.get(arg)) 945 946 table = table_from_rows 947 948 return table 949 950 def _parse_id_var( 951 self, 952 any_token: bool = True, 953 tokens: t.Optional[t.Collection[TokenType]] = None, 954 ) -> t.Optional[exp.Expression]: 955 if self._match_text_seq("IDENTIFIER", "("): 956 identifier = ( 957 super()._parse_id_var(any_token=any_token, tokens=tokens) 958 or self._parse_string() 959 ) 960 self._match_r_paren() 961 return self.expression(exp.Anonymous, this="IDENTIFIER", expressions=[identifier]) 962 963 return super()._parse_id_var(any_token=any_token, tokens=tokens) 964 965 def _parse_show_snowflake(self, this: str) -> exp.Show: 966 scope = None 967 scope_kind = None 968 969 # will identity SHOW TERSE SCHEMAS but not SHOW TERSE PRIMARY KEYS 970 # which is syntactically valid but has no effect on the output 971 terse = self._tokens[self._index - 2].text.upper() == "TERSE" 972 973 history = self._match_text_seq("HISTORY") 974 975 like = self._parse_string() if self._match(TokenType.LIKE) else None 976 977 if self._match(TokenType.IN): 978 if self._match_text_seq("ACCOUNT"): 979 scope_kind = "ACCOUNT" 980 elif self._match_text_seq("CLASS"): 981 scope_kind = "CLASS" 982 scope = self._parse_table_parts() 983 elif self._match_text_seq("APPLICATION"): 984 scope_kind = "APPLICATION" 985 if self._match_text_seq("PACKAGE"): 986 scope_kind += " PACKAGE" 987 scope = self._parse_table_parts() 988 elif self._match_set(self.DB_CREATABLES): 989 scope_kind = self._prev.text.upper() 990 if self._curr: 991 scope = self._parse_table_parts() 992 elif self._curr: 993 scope_kind = "SCHEMA" if this in self.SCHEMA_KINDS else "TABLE" 994 scope = self._parse_table_parts() 995 996 return self.expression( 997 exp.Show, 998 **{ 999 "terse": terse, 1000 "this": this, 1001 "history": history, 1002 "like": like, 1003 "scope": scope, 1004 "scope_kind": scope_kind, 1005 "starts_with": self._match_text_seq("STARTS", "WITH") and self._parse_string(), 1006 "limit": self._parse_limit(), 1007 "from": self._parse_string() if self._match(TokenType.FROM) else None, 1008 "privileges": self._match_text_seq("WITH", "PRIVILEGES") 1009 and self._parse_csv(lambda: self._parse_var(any_token=True, upper=True)), 1010 }, 1011 ) 1012 1013 def _parse_put(self) -> exp.Put | exp.Command: 1014 if self._curr.token_type != TokenType.STRING: 1015 return self._parse_as_command(self._prev) 1016 1017 return self.expression( 1018 exp.Put, 1019 this=self._parse_string(), 1020 target=self._parse_location_path(), 1021 properties=self._parse_properties(), 1022 ) 1023 1024 def _parse_get(self) -> t.Optional[exp.Expression]: 1025 start = self._prev 1026 1027 # If we detect GET( then we need to parse a function, not a statement 1028 if self._match(TokenType.L_PAREN): 1029 self._retreat(self._index - 2) 1030 return self._parse_expression() 1031 1032 target = self._parse_location_path() 1033 1034 # Parse as command if unquoted file path 1035 if self._curr.token_type == TokenType.URI_START: 1036 return self._parse_as_command(start) 1037 1038 return self.expression( 1039 exp.Get, 1040 this=self._parse_string(), 1041 target=target, 1042 properties=self._parse_properties(), 1043 ) 1044 1045 def _parse_location_property(self) -> exp.LocationProperty: 1046 self._match(TokenType.EQ) 1047 return self.expression(exp.LocationProperty, this=self._parse_location_path()) 1048 1049 def _parse_file_location(self) -> t.Optional[exp.Expression]: 1050 # Parse either a subquery or a staged file 1051 return ( 1052 self._parse_select(table=True, parse_subquery_alias=False) 1053 if self._match(TokenType.L_PAREN, advance=False) 1054 else self._parse_table_parts() 1055 ) 1056 1057 def _parse_location_path(self) -> exp.Var: 1058 start = self._curr 1059 self._advance_any(ignore_reserved=True) 1060 1061 # We avoid consuming a comma token because external tables like @foo and @bar 1062 # can be joined in a query with a comma separator, as well as closing paren 1063 # in case of subqueries 1064 while self._is_connected() and not self._match_set( 1065 (TokenType.COMMA, TokenType.L_PAREN, TokenType.R_PAREN), advance=False 1066 ): 1067 self._advance_any(ignore_reserved=True) 1068 1069 return exp.var(self._find_sql(start, self._prev)) 1070 1071 def _parse_lambda_arg(self) -> t.Optional[exp.Expression]: 1072 this = super()._parse_lambda_arg() 1073 1074 if not this: 1075 return this 1076 1077 typ = self._parse_types() 1078 1079 if typ: 1080 return self.expression(exp.Cast, this=this, to=typ) 1081 1082 return this 1083 1084 def _parse_foreign_key(self) -> exp.ForeignKey: 1085 # inlineFK, the REFERENCES columns are implied 1086 if self._match(TokenType.REFERENCES, advance=False): 1087 return self.expression(exp.ForeignKey) 1088 1089 # outoflineFK, explicitly names the columns 1090 return super()._parse_foreign_key() 1091 1092 def _parse_file_format_property(self) -> exp.FileFormatProperty: 1093 self._match(TokenType.EQ) 1094 if self._match(TokenType.L_PAREN, advance=False): 1095 expressions = self._parse_wrapped_options() 1096 else: 1097 expressions = [self._parse_format_name()] 1098 1099 return self.expression( 1100 exp.FileFormatProperty, 1101 expressions=expressions, 1102 ) 1103 1104 def _parse_credentials_property(self) -> exp.CredentialsProperty: 1105 return self.expression( 1106 exp.CredentialsProperty, 1107 expressions=self._parse_wrapped_options(), 1108 ) 1109 1110 def _parse_semantic_view(self) -> exp.SemanticView: 1111 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table_parts()} 1112 1113 while self._curr and not self._match(TokenType.R_PAREN, advance=False): 1114 if self._match_text_seq("DIMENSIONS"): 1115 kwargs["dimensions"] = self._parse_csv(self._parse_disjunction) 1116 if self._match_text_seq("METRICS"): 1117 kwargs["metrics"] = self._parse_csv(self._parse_disjunction) 1118 if self._match_text_seq("WHERE"): 1119 kwargs["where"] = self._parse_expression() 1120 1121 return self.expression(exp.SemanticView, **kwargs)
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
Inherited Members
- sqlglot.parser.Parser
- Parser
- NO_PAREN_FUNCTIONS
- STRUCT_TYPE_TOKENS
- NESTED_TYPE_TOKENS
- ENUM_TYPE_TOKENS
- AGGREGATE_TYPE_TOKENS
- TYPE_TOKENS
- SIGNED_TO_UNSIGNED_TYPE_TOKEN
- SUBQUERY_PREDICATES
- RESERVED_TOKENS
- DB_CREATABLES
- CREATABLES
- ALTERABLES
- ALIAS_TOKENS
- ARRAY_CONSTRUCTORS
- COMMENT_TABLE_ALIAS_TOKENS
- UPDATE_ALIAS_TOKENS
- TRIM_TYPES
- FUNC_TOKENS
- CONJUNCTION
- ASSIGNMENT
- DISJUNCTION
- EQUALITY
- COMPARISON
- BITWISE
- TERM
- FACTOR
- EXPONENT
- TIMES
- SET_OPERATIONS
- JOIN_METHODS
- JOIN_SIDES
- JOIN_KINDS
- JOIN_HINTS
- COLUMN_OPERATORS
- EXPRESSION_PARSERS
- UNARY_PARSERS
- STRING_PARSERS
- NUMERIC_PARSERS
- PRIMARY_PARSERS
- PLACEHOLDER_PARSERS
- PIPE_SYNTAX_TRANSFORM_PARSERS
- ALTER_ALTER_PARSERS
- SCHEMA_UNNAMED_CONSTRAINTS
- NO_PAREN_FUNCTION_PARSERS
- INVALID_FUNC_NAME_TOKENS
- FUNCTIONS_WITH_ALIASED_ARGS
- KEY_VALUE_DEFINITIONS
- QUERY_MODIFIER_PARSERS
- QUERY_MODIFIER_TOKENS
- SET_PARSERS
- TYPE_LITERAL_PARSERS
- DDL_SELECT_TOKENS
- PRE_VOLATILE_TOKENS
- TRANSACTION_KIND
- TRANSACTION_CHARACTERISTICS
- CONFLICT_ACTIONS
- CREATE_SEQUENCE
- ISOLATED_LOADING_OPTIONS
- USABLES
- CAST_ACTIONS
- SCHEMA_BINDING_OPTIONS
- PROCEDURE_OPTIONS
- EXECUTE_AS_OPTIONS
- KEY_CONSTRAINT_OPTIONS
- WINDOW_EXCLUDE_OPTIONS
- INSERT_ALTERNATIVES
- CLONE_KEYWORDS
- HISTORICAL_DATA_PREFIX
- HISTORICAL_DATA_KIND
- OPCLASS_FOLLOW_KEYWORDS
- OPTYPE_FOLLOW_TOKENS
- TABLE_INDEX_HINT_TOKENS
- VIEW_ATTRIBUTES
- WINDOW_ALIAS_TOKENS
- WINDOW_BEFORE_PAREN_TOKENS
- WINDOW_SIDES
- JSON_KEY_VALUE_SEPARATOR_TOKENS
- FETCH_TOKENS
- ADD_CONSTRAINT_TOKENS
- DISTINCT_TOKENS
- NULL_TOKENS
- UNNEST_OFFSET_ALIAS_TOKENS
- SELECT_START_TOKENS
- COPY_INTO_VARLEN_OPTIONS
- IS_JSON_PREDICATE_KIND
- ODBC_DATETIME_LITERALS
- ON_CONDITION_TOKENS
- PRIVILEGE_FOLLOW_TOKENS
- DESCRIBE_STYLES
- ANALYZE_STYLES
- ANALYZE_EXPRESSION_PARSERS
- PARTITION_KEYWORDS
- AMBIGUOUS_ALIAS_TOKENS
- OPERATION_MODIFIERS
- RECURSIVE_CTE_SEARCH_KIND
- MODIFIABLES
- STRICT_CAST
- PREFIXED_PIVOT_COLUMNS
- LOG_DEFAULTS_TO_LN
- TABLESAMPLE_CSV
- SET_REQUIRES_ASSIGNMENT_DELIMITER
- TRIM_PATTERN_FIRST
- STRING_ALIASES
- MODIFIERS_ATTACHED_TO_SET_OP
- SET_OP_MODIFIERS
- NO_PAREN_IF_COMMANDS
- JSON_ARROWS_REQUIRE_JSON_TYPE
- VALUES_FOLLOWED_BY_PAREN
- SUPPORTS_IMPLICIT_UNNEST
- INTERVAL_SPANS
- SUPPORTS_PARTITION_SELECTION
- WRAPPED_TRANSFORM_COLUMN_CONSTRAINT
- OPTIONAL_ALIAS_TOKEN_CTE
- ALTER_RENAME_REQUIRES_COLUMN
- JOINS_HAVE_EQUAL_PRECEDENCE
- ZONE_AWARE_TIMESTAMP_CONSTRUCTOR
- MAP_KEYS_ARE_ARBITRARY_EXPRESSIONS
- error_level
- error_message_context
- max_errors
- dialect
- reset
- parse
- parse_into
- check_errors
- raise_error
- expression
- validate_expression
- parse_set_operation
- build_cast
- errors
- sql
1123 class Tokenizer(tokens.Tokenizer): 1124 STRING_ESCAPES = ["\\", "'"] 1125 HEX_STRINGS = [("x'", "'"), ("X'", "'")] 1126 RAW_STRINGS = ["$$"] 1127 COMMENTS = ["--", "//", ("/*", "*/")] 1128 NESTED_COMMENTS = False 1129 1130 KEYWORDS = { 1131 **tokens.Tokenizer.KEYWORDS, 1132 "FILE://": TokenType.URI_START, 1133 "BYTEINT": TokenType.INT, 1134 "EXCLUDE": TokenType.EXCEPT, 1135 "FILE FORMAT": TokenType.FILE_FORMAT, 1136 "GET": TokenType.GET, 1137 "ILIKE ANY": TokenType.ILIKE_ANY, 1138 "LIKE ANY": TokenType.LIKE_ANY, 1139 "MATCH_CONDITION": TokenType.MATCH_CONDITION, 1140 "MATCH_RECOGNIZE": TokenType.MATCH_RECOGNIZE, 1141 "MINUS": TokenType.EXCEPT, 1142 "NCHAR VARYING": TokenType.VARCHAR, 1143 "PUT": TokenType.PUT, 1144 "REMOVE": TokenType.COMMAND, 1145 "RM": TokenType.COMMAND, 1146 "SAMPLE": TokenType.TABLE_SAMPLE, 1147 "SEMANTIC VIEW": TokenType.SEMANTIC_VIEW, 1148 "SQL_DOUBLE": TokenType.DOUBLE, 1149 "SQL_VARCHAR": TokenType.VARCHAR, 1150 "STORAGE INTEGRATION": TokenType.STORAGE_INTEGRATION, 1151 "TAG": TokenType.TAG, 1152 "TIMESTAMP_TZ": TokenType.TIMESTAMPTZ, 1153 "TOP": TokenType.TOP, 1154 "WAREHOUSE": TokenType.WAREHOUSE, 1155 "STAGE": TokenType.STAGE, 1156 "STREAMLIT": TokenType.STREAMLIT, 1157 } 1158 KEYWORDS.pop("/*+") 1159 1160 SINGLE_TOKENS = { 1161 **tokens.Tokenizer.SINGLE_TOKENS, 1162 "$": TokenType.PARAMETER, 1163 } 1164 1165 VAR_SINGLE_TOKENS = {"$"} 1166 1167 COMMANDS = tokens.Tokenizer.COMMANDS - {TokenType.SHOW}
Inherited Members
- sqlglot.tokens.Tokenizer
- Tokenizer
- BIT_STRINGS
- BYTE_STRINGS
- HEREDOC_STRINGS
- UNICODE_STRINGS
- IDENTIFIERS
- QUOTES
- IDENTIFIER_ESCAPES
- HEREDOC_TAG_IS_IDENTIFIER
- HEREDOC_STRING_ALTERNATIVE
- STRING_ESCAPES_ALLOWED_IN_RAW_STRINGS
- HINT_START
- TOKENS_PRECEDING_HINT
- WHITE_SPACE
- COMMAND_PREFIX_TOKENS
- NUMERIC_LITERALS
- dialect
- use_rs_tokenizer
- reset
- tokenize
- tokenize_rs
- size
- sql
- tokens
1169 class Generator(generator.Generator): 1170 PARAMETER_TOKEN = "$" 1171 MATCHED_BY_SOURCE = False 1172 SINGLE_STRING_INTERVAL = True 1173 JOIN_HINTS = False 1174 TABLE_HINTS = False 1175 QUERY_HINTS = False 1176 AGGREGATE_FILTER_SUPPORTED = False 1177 SUPPORTS_TABLE_COPY = False 1178 COLLATE_IS_FUNC = True 1179 LIMIT_ONLY_LITERALS = True 1180 JSON_KEY_VALUE_PAIR_SEP = "," 1181 INSERT_OVERWRITE = " OVERWRITE INTO" 1182 STRUCT_DELIMITER = ("(", ")") 1183 COPY_PARAMS_ARE_WRAPPED = False 1184 COPY_PARAMS_EQ_REQUIRED = True 1185 STAR_EXCEPT = "EXCLUDE" 1186 SUPPORTS_EXPLODING_PROJECTIONS = False 1187 ARRAY_CONCAT_IS_VAR_LEN = False 1188 SUPPORTS_CONVERT_TIMEZONE = True 1189 EXCEPT_INTERSECT_SUPPORT_ALL_CLAUSE = False 1190 SUPPORTS_MEDIAN = True 1191 ARRAY_SIZE_NAME = "ARRAY_SIZE" 1192 SUPPORTS_DECODE_CASE = True 1193 IS_BOOL_ALLOWED = False 1194 1195 TRANSFORMS = { 1196 **generator.Generator.TRANSFORMS, 1197 exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"), 1198 exp.ArgMax: rename_func("MAX_BY"), 1199 exp.ArgMin: rename_func("MIN_BY"), 1200 exp.ArrayConcat: lambda self, e: self.arrayconcat_sql(e, name="ARRAY_CAT"), 1201 exp.ArrayContains: lambda self, e: self.func("ARRAY_CONTAINS", e.expression, e.this), 1202 exp.ArrayIntersect: rename_func("ARRAY_INTERSECTION"), 1203 exp.AtTimeZone: lambda self, e: self.func( 1204 "CONVERT_TIMEZONE", e.args.get("zone"), e.this 1205 ), 1206 exp.BitwiseOr: rename_func("BITOR"), 1207 exp.BitwiseXor: rename_func("BITXOR"), 1208 exp.BitwiseLeftShift: rename_func("BITSHIFTLEFT"), 1209 exp.BitwiseRightShift: rename_func("BITSHIFTRIGHT"), 1210 exp.Create: transforms.preprocess([_flatten_structured_types_unless_iceberg]), 1211 exp.DateAdd: date_delta_sql("DATEADD"), 1212 exp.DateDiff: date_delta_sql("DATEDIFF"), 1213 exp.DatetimeAdd: date_delta_sql("TIMESTAMPADD"), 1214 exp.DatetimeDiff: timestampdiff_sql, 1215 exp.DateStrToDate: datestrtodate_sql, 1216 exp.DayOfMonth: rename_func("DAYOFMONTH"), 1217 exp.DayOfWeek: rename_func("DAYOFWEEK"), 1218 exp.DayOfWeekIso: rename_func("DAYOFWEEKISO"), 1219 exp.DayOfYear: rename_func("DAYOFYEAR"), 1220 exp.Explode: rename_func("FLATTEN"), 1221 exp.Extract: lambda self, e: self.func( 1222 "DATE_PART", map_date_part(e.this, self.dialect), e.expression 1223 ), 1224 exp.FileFormatProperty: lambda self, 1225 e: f"FILE_FORMAT=({self.expressions(e, 'expressions', sep=' ')})", 1226 exp.FromTimeZone: lambda self, e: self.func( 1227 "CONVERT_TIMEZONE", e.args.get("zone"), "'UTC'", e.this 1228 ), 1229 exp.GenerateSeries: lambda self, e: self.func( 1230 "ARRAY_GENERATE_RANGE", e.args["start"], e.args["end"] + 1, e.args.get("step") 1231 ), 1232 exp.GroupConcat: lambda self, e: groupconcat_sql(self, e, sep=""), 1233 exp.If: if_sql(name="IFF", false_value="NULL"), 1234 exp.JSONExtractArray: _json_extract_value_array_sql, 1235 exp.JSONExtractScalar: lambda self, e: self.func( 1236 "JSON_EXTRACT_PATH_TEXT", e.this, e.expression 1237 ), 1238 exp.JSONObject: lambda self, e: self.func("OBJECT_CONSTRUCT_KEEP_NULL", *e.expressions), 1239 exp.JSONPathRoot: lambda *_: "", 1240 exp.JSONValueArray: _json_extract_value_array_sql, 1241 exp.Levenshtein: unsupported_args("ins_cost", "del_cost", "sub_cost")( 1242 rename_func("EDITDISTANCE") 1243 ), 1244 exp.LocationProperty: lambda self, e: f"LOCATION={self.sql(e, 'this')}", 1245 exp.LogicalAnd: rename_func("BOOLAND_AGG"), 1246 exp.LogicalOr: rename_func("BOOLOR_AGG"), 1247 exp.Map: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 1248 exp.MakeInterval: no_make_interval_sql, 1249 exp.Max: max_or_greatest, 1250 exp.Min: min_or_least, 1251 exp.ParseJSON: lambda self, e: self.func( 1252 "TRY_PARSE_JSON" if e.args.get("safe") else "PARSE_JSON", e.this 1253 ), 1254 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 1255 exp.PercentileCont: transforms.preprocess( 1256 [transforms.add_within_group_for_percentiles] 1257 ), 1258 exp.PercentileDisc: transforms.preprocess( 1259 [transforms.add_within_group_for_percentiles] 1260 ), 1261 exp.Pivot: transforms.preprocess([_unqualify_pivot_columns]), 1262 exp.RegexpExtract: _regexpextract_sql, 1263 exp.RegexpExtractAll: _regexpextract_sql, 1264 exp.RegexpILike: _regexpilike_sql, 1265 exp.Rand: rename_func("RANDOM"), 1266 exp.Select: transforms.preprocess( 1267 [ 1268 transforms.eliminate_window_clause, 1269 transforms.eliminate_distinct_on, 1270 transforms.explode_projection_to_unnest(), 1271 transforms.eliminate_semi_and_anti_joins, 1272 _transform_generate_date_array, 1273 _qualify_unnested_columns, 1274 _eliminate_dot_variant_lookup, 1275 ] 1276 ), 1277 exp.SHA: rename_func("SHA1"), 1278 exp.StarMap: rename_func("OBJECT_CONSTRUCT"), 1279 exp.StartsWith: rename_func("STARTSWITH"), 1280 exp.EndsWith: rename_func("ENDSWITH"), 1281 exp.StrPosition: lambda self, e: strposition_sql( 1282 self, e, func_name="CHARINDEX", supports_position=True 1283 ), 1284 exp.StrToDate: lambda self, e: self.func("DATE", e.this, self.format_time(e)), 1285 exp.StringToArray: rename_func("STRTOK_TO_ARRAY"), 1286 exp.Stuff: rename_func("INSERT"), 1287 exp.StPoint: rename_func("ST_MAKEPOINT"), 1288 exp.TimeAdd: date_delta_sql("TIMEADD"), 1289 exp.Timestamp: no_timestamp_sql, 1290 exp.TimestampAdd: date_delta_sql("TIMESTAMPADD"), 1291 exp.TimestampDiff: lambda self, e: self.func( 1292 "TIMESTAMPDIFF", e.unit, e.expression, e.this 1293 ), 1294 exp.TimestampTrunc: timestamptrunc_sql(), 1295 exp.TimeStrToTime: timestrtotime_sql, 1296 exp.TimeToUnix: lambda self, e: f"EXTRACT(epoch_second FROM {self.sql(e, 'this')})", 1297 exp.ToArray: rename_func("TO_ARRAY"), 1298 exp.ToChar: lambda self, e: self.function_fallback_sql(e), 1299 exp.ToDouble: rename_func("TO_DOUBLE"), 1300 exp.TsOrDsAdd: date_delta_sql("DATEADD", cast=True), 1301 exp.TsOrDsDiff: date_delta_sql("DATEDIFF"), 1302 exp.TsOrDsToDate: lambda self, e: self.func( 1303 "TRY_TO_DATE" if e.args.get("safe") else "TO_DATE", e.this, self.format_time(e) 1304 ), 1305 exp.TsOrDsToTime: lambda self, e: self.func( 1306 "TRY_TO_TIME" if e.args.get("safe") else "TO_TIME", e.this, self.format_time(e) 1307 ), 1308 exp.Unhex: rename_func("HEX_DECODE_BINARY"), 1309 exp.UnixToTime: rename_func("TO_TIMESTAMP"), 1310 exp.Uuid: rename_func("UUID_STRING"), 1311 exp.VarMap: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 1312 exp.WeekOfYear: rename_func("WEEKOFYEAR"), 1313 exp.Xor: rename_func("BOOLXOR"), 1314 } 1315 1316 SUPPORTED_JSON_PATH_PARTS = { 1317 exp.JSONPathKey, 1318 exp.JSONPathRoot, 1319 exp.JSONPathSubscript, 1320 } 1321 1322 TYPE_MAPPING = { 1323 **generator.Generator.TYPE_MAPPING, 1324 exp.DataType.Type.NESTED: "OBJECT", 1325 exp.DataType.Type.STRUCT: "OBJECT", 1326 exp.DataType.Type.BIGDECIMAL: "DOUBLE", 1327 } 1328 1329 TOKEN_MAPPING = { 1330 TokenType.AUTO_INCREMENT: "AUTOINCREMENT", 1331 } 1332 1333 PROPERTIES_LOCATION = { 1334 **generator.Generator.PROPERTIES_LOCATION, 1335 exp.CredentialsProperty: exp.Properties.Location.POST_WITH, 1336 exp.LocationProperty: exp.Properties.Location.POST_WITH, 1337 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 1338 exp.SetProperty: exp.Properties.Location.UNSUPPORTED, 1339 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 1340 } 1341 1342 UNSUPPORTED_VALUES_EXPRESSIONS = { 1343 exp.Map, 1344 exp.StarMap, 1345 exp.Struct, 1346 exp.VarMap, 1347 } 1348 1349 RESPECT_IGNORE_NULLS_UNSUPPORTED_EXPRESSIONS = (exp.ArrayAgg,) 1350 1351 def with_properties(self, properties: exp.Properties) -> str: 1352 return self.properties(properties, wrapped=False, prefix=self.sep(""), sep=" ") 1353 1354 def values_sql(self, expression: exp.Values, values_as_table: bool = True) -> str: 1355 if expression.find(*self.UNSUPPORTED_VALUES_EXPRESSIONS): 1356 values_as_table = False 1357 1358 return super().values_sql(expression, values_as_table=values_as_table) 1359 1360 def datatype_sql(self, expression: exp.DataType) -> str: 1361 expressions = expression.expressions 1362 if ( 1363 expressions 1364 and expression.is_type(*exp.DataType.STRUCT_TYPES) 1365 and any(isinstance(field_type, exp.DataType) for field_type in expressions) 1366 ): 1367 # The correct syntax is OBJECT [ (<key> <value_type [NOT NULL] [, ...]) ] 1368 return "OBJECT" 1369 1370 return super().datatype_sql(expression) 1371 1372 def tonumber_sql(self, expression: exp.ToNumber) -> str: 1373 return self.func( 1374 "TO_NUMBER", 1375 expression.this, 1376 expression.args.get("format"), 1377 expression.args.get("precision"), 1378 expression.args.get("scale"), 1379 ) 1380 1381 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 1382 milli = expression.args.get("milli") 1383 if milli is not None: 1384 milli_to_nano = milli.pop() * exp.Literal.number(1000000) 1385 expression.set("nano", milli_to_nano) 1386 1387 return rename_func("TIMESTAMP_FROM_PARTS")(self, expression) 1388 1389 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 1390 if expression.is_type(exp.DataType.Type.GEOGRAPHY): 1391 return self.func("TO_GEOGRAPHY", expression.this) 1392 if expression.is_type(exp.DataType.Type.GEOMETRY): 1393 return self.func("TO_GEOMETRY", expression.this) 1394 1395 return super().cast_sql(expression, safe_prefix=safe_prefix) 1396 1397 def trycast_sql(self, expression: exp.TryCast) -> str: 1398 value = expression.this 1399 1400 if value.type is None: 1401 from sqlglot.optimizer.annotate_types import annotate_types 1402 1403 value = annotate_types(value, dialect=self.dialect) 1404 1405 # Snowflake requires that TRY_CAST's value be a string 1406 # If TRY_CAST is being roundtripped (since Snowflake is the only dialect that sets "requires_string") or 1407 # if we can deduce that the value is a string, then we can generate TRY_CAST 1408 if expression.args.get("requires_string") or value.is_type(*exp.DataType.TEXT_TYPES): 1409 return super().trycast_sql(expression) 1410 1411 return self.cast_sql(expression) 1412 1413 def log_sql(self, expression: exp.Log) -> str: 1414 if not expression.expression: 1415 return self.func("LN", expression.this) 1416 1417 return super().log_sql(expression) 1418 1419 def unnest_sql(self, expression: exp.Unnest) -> str: 1420 unnest_alias = expression.args.get("alias") 1421 offset = expression.args.get("offset") 1422 1423 unnest_alias_columns = unnest_alias.columns if unnest_alias else [] 1424 value = seq_get(unnest_alias_columns, 0) or exp.to_identifier("value") 1425 1426 columns = [ 1427 exp.to_identifier("seq"), 1428 exp.to_identifier("key"), 1429 exp.to_identifier("path"), 1430 offset.pop() if isinstance(offset, exp.Expression) else exp.to_identifier("index"), 1431 value, 1432 exp.to_identifier("this"), 1433 ] 1434 1435 if unnest_alias: 1436 unnest_alias.set("columns", columns) 1437 else: 1438 unnest_alias = exp.TableAlias(this="_u", columns=columns) 1439 1440 table_input = self.sql(expression.expressions[0]) 1441 if not table_input.startswith("INPUT =>"): 1442 table_input = f"INPUT => {table_input}" 1443 1444 expression_parent = expression.parent 1445 1446 explode = ( 1447 f"FLATTEN({table_input})" 1448 if isinstance(expression_parent, exp.Lateral) 1449 else f"TABLE(FLATTEN({table_input}))" 1450 ) 1451 alias = self.sql(unnest_alias) 1452 alias = f" AS {alias}" if alias else "" 1453 value = ( 1454 "" 1455 if isinstance(expression_parent, (exp.From, exp.Join, exp.Lateral)) 1456 else f"{value} FROM " 1457 ) 1458 1459 return f"{value}{explode}{alias}" 1460 1461 def show_sql(self, expression: exp.Show) -> str: 1462 terse = "TERSE " if expression.args.get("terse") else "" 1463 history = " HISTORY" if expression.args.get("history") else "" 1464 like = self.sql(expression, "like") 1465 like = f" LIKE {like}" if like else "" 1466 1467 scope = self.sql(expression, "scope") 1468 scope = f" {scope}" if scope else "" 1469 1470 scope_kind = self.sql(expression, "scope_kind") 1471 if scope_kind: 1472 scope_kind = f" IN {scope_kind}" 1473 1474 starts_with = self.sql(expression, "starts_with") 1475 if starts_with: 1476 starts_with = f" STARTS WITH {starts_with}" 1477 1478 limit = self.sql(expression, "limit") 1479 1480 from_ = self.sql(expression, "from") 1481 if from_: 1482 from_ = f" FROM {from_}" 1483 1484 privileges = self.expressions(expression, key="privileges", flat=True) 1485 privileges = f" WITH PRIVILEGES {privileges}" if privileges else "" 1486 1487 return f"SHOW {terse}{expression.name}{history}{like}{scope_kind}{scope}{starts_with}{limit}{from_}{privileges}" 1488 1489 def describe_sql(self, expression: exp.Describe) -> str: 1490 # Default to table if kind is unknown 1491 kind_value = expression.args.get("kind") or "TABLE" 1492 kind = f" {kind_value}" if kind_value else "" 1493 this = f" {self.sql(expression, 'this')}" 1494 expressions = self.expressions(expression, flat=True) 1495 expressions = f" {expressions}" if expressions else "" 1496 return f"DESCRIBE{kind}{this}{expressions}" 1497 1498 def generatedasidentitycolumnconstraint_sql( 1499 self, expression: exp.GeneratedAsIdentityColumnConstraint 1500 ) -> str: 1501 start = expression.args.get("start") 1502 start = f" START {start}" if start else "" 1503 increment = expression.args.get("increment") 1504 increment = f" INCREMENT {increment}" if increment else "" 1505 1506 order = expression.args.get("order") 1507 if order is not None: 1508 order_clause = " ORDER" if order else " NOORDER" 1509 else: 1510 order_clause = "" 1511 1512 return f"AUTOINCREMENT{start}{increment}{order_clause}" 1513 1514 def cluster_sql(self, expression: exp.Cluster) -> str: 1515 return f"CLUSTER BY ({self.expressions(expression, flat=True)})" 1516 1517 def struct_sql(self, expression: exp.Struct) -> str: 1518 keys = [] 1519 values = [] 1520 1521 for i, e in enumerate(expression.expressions): 1522 if isinstance(e, exp.PropertyEQ): 1523 keys.append( 1524 exp.Literal.string(e.name) if isinstance(e.this, exp.Identifier) else e.this 1525 ) 1526 values.append(e.expression) 1527 else: 1528 keys.append(exp.Literal.string(f"_{i}")) 1529 values.append(e) 1530 1531 return self.func("OBJECT_CONSTRUCT", *flatten(zip(keys, values))) 1532 1533 @unsupported_args("weight", "accuracy") 1534 def approxquantile_sql(self, expression: exp.ApproxQuantile) -> str: 1535 return self.func("APPROX_PERCENTILE", expression.this, expression.args.get("quantile")) 1536 1537 def alterset_sql(self, expression: exp.AlterSet) -> str: 1538 exprs = self.expressions(expression, flat=True) 1539 exprs = f" {exprs}" if exprs else "" 1540 file_format = self.expressions(expression, key="file_format", flat=True, sep=" ") 1541 file_format = f" STAGE_FILE_FORMAT = ({file_format})" if file_format else "" 1542 copy_options = self.expressions(expression, key="copy_options", flat=True, sep=" ") 1543 copy_options = f" STAGE_COPY_OPTIONS = ({copy_options})" if copy_options else "" 1544 tag = self.expressions(expression, key="tag", flat=True) 1545 tag = f" TAG {tag}" if tag else "" 1546 1547 return f"SET{exprs}{file_format}{copy_options}{tag}" 1548 1549 def strtotime_sql(self, expression: exp.StrToTime): 1550 safe_prefix = "TRY_" if expression.args.get("safe") else "" 1551 return self.func( 1552 f"{safe_prefix}TO_TIMESTAMP", expression.this, self.format_time(expression) 1553 ) 1554 1555 def timestampsub_sql(self, expression: exp.TimestampSub): 1556 return self.sql( 1557 exp.TimestampAdd( 1558 this=expression.this, 1559 expression=expression.expression * -1, 1560 unit=expression.unit, 1561 ) 1562 ) 1563 1564 def jsonextract_sql(self, expression: exp.JSONExtract): 1565 this = expression.this 1566 1567 # JSON strings are valid coming from other dialects such as BQ so 1568 # for these cases we PARSE_JSON preemptively 1569 if not isinstance(this, (exp.ParseJSON, exp.JSONExtract)) and not expression.args.get( 1570 "requires_json" 1571 ): 1572 this = exp.ParseJSON(this=this) 1573 1574 return self.func( 1575 "GET_PATH", 1576 this, 1577 expression.expression, 1578 ) 1579 1580 def timetostr_sql(self, expression: exp.TimeToStr) -> str: 1581 this = expression.this 1582 if this.is_string: 1583 this = exp.cast(this, exp.DataType.Type.TIMESTAMP) 1584 1585 return self.func("TO_CHAR", this, self.format_time(expression)) 1586 1587 def datesub_sql(self, expression: exp.DateSub) -> str: 1588 value = expression.expression 1589 if value: 1590 value.replace(value * (-1)) 1591 else: 1592 self.unsupported("DateSub cannot be transpiled if the subtracted count is unknown") 1593 1594 return date_delta_sql("DATEADD")(self, expression) 1595 1596 def select_sql(self, expression: exp.Select) -> str: 1597 limit = expression.args.get("limit") 1598 offset = expression.args.get("offset") 1599 if offset and not limit: 1600 expression.limit(exp.Null(), copy=False) 1601 return super().select_sql(expression) 1602 1603 def createable_sql(self, expression: exp.Create, locations: t.DefaultDict) -> str: 1604 is_materialized = expression.find(exp.MaterializedProperty) 1605 copy_grants_property = expression.find(exp.CopyGrantsProperty) 1606 1607 if expression.kind == "VIEW" and is_materialized and copy_grants_property: 1608 # For materialized views, COPY GRANTS is located *before* the columns list 1609 # This is in contrast to normal views where COPY GRANTS is located *after* the columns list 1610 # We default CopyGrantsProperty to POST_SCHEMA which means we need to output it POST_NAME if a materialized view is detected 1611 # ref: https://docs.snowflake.com/en/sql-reference/sql/create-materialized-view#syntax 1612 # ref: https://docs.snowflake.com/en/sql-reference/sql/create-view#syntax 1613 post_schema_properties = locations[exp.Properties.Location.POST_SCHEMA] 1614 post_schema_properties.pop(post_schema_properties.index(copy_grants_property)) 1615 1616 this_name = self.sql(expression.this, "this") 1617 copy_grants = self.sql(copy_grants_property) 1618 this_schema = self.schema_columns_sql(expression.this) 1619 this_schema = f"{self.sep()}{this_schema}" if this_schema else "" 1620 1621 return f"{this_name}{self.sep()}{copy_grants}{this_schema}" 1622 1623 return super().createable_sql(expression, locations) 1624 1625 def arrayagg_sql(self, expression: exp.ArrayAgg) -> str: 1626 this = expression.this 1627 1628 # If an ORDER BY clause is present, we need to remove it from ARRAY_AGG 1629 # and add it later as part of the WITHIN GROUP clause 1630 order = this if isinstance(this, exp.Order) else None 1631 if order: 1632 expression.set("this", order.this.pop()) 1633 1634 expr_sql = super().arrayagg_sql(expression) 1635 1636 if order: 1637 expr_sql = self.sql(exp.WithinGroup(this=expr_sql, expression=order)) 1638 1639 return expr_sql 1640 1641 def array_sql(self, expression: exp.Array) -> str: 1642 expressions = expression.expressions 1643 1644 first_expr = seq_get(expressions, 0) 1645 if isinstance(first_expr, exp.Select): 1646 # SELECT AS STRUCT foo AS alias_foo -> ARRAY_AGG(OBJECT_CONSTRUCT('alias_foo', foo)) 1647 if first_expr.text("kind").upper() == "STRUCT": 1648 object_construct_args = [] 1649 for expr in first_expr.expressions: 1650 # Alias case: SELECT AS STRUCT foo AS alias_foo -> OBJECT_CONSTRUCT('alias_foo', foo) 1651 # Column case: SELECT AS STRUCT foo -> OBJECT_CONSTRUCT('foo', foo) 1652 name = expr.this if isinstance(expr, exp.Alias) else expr 1653 1654 object_construct_args.extend([exp.Literal.string(expr.alias_or_name), name]) 1655 1656 array_agg = exp.ArrayAgg( 1657 this=_build_object_construct(args=object_construct_args) 1658 ) 1659 1660 first_expr.set("kind", None) 1661 first_expr.set("expressions", [array_agg]) 1662 1663 return self.sql(first_expr.subquery()) 1664 1665 return inline_array_sql(self, expression) 1666 1667 def currentdate_sql(self, expression: exp.CurrentDate) -> str: 1668 zone = self.sql(expression, "this") 1669 if not zone: 1670 return super().currentdate_sql(expression) 1671 1672 expr = exp.Cast( 1673 this=exp.ConvertTimezone(target_tz=zone, timestamp=exp.CurrentTimestamp()), 1674 to=exp.DataType(this=exp.DataType.Type.DATE), 1675 ) 1676 return self.sql(expr) 1677 1678 def dot_sql(self, expression: exp.Dot) -> str: 1679 this = expression.this 1680 1681 if not this.type: 1682 from sqlglot.optimizer.annotate_types import annotate_types 1683 1684 this = annotate_types(this, dialect=self.dialect) 1685 1686 if not isinstance(this, exp.Dot) and this.is_type(exp.DataType.Type.STRUCT): 1687 # Generate colon notation for the top level STRUCT 1688 return f"{self.sql(this)}:{self.sql(expression, 'expression')}" 1689 1690 return super().dot_sql(expression)
Generator converts a given syntax tree to the corresponding SQL string.
Arguments:
- pretty: Whether to format the produced SQL string. Default: False.
- identify: Determines when an identifier should be quoted. Possible values are: False (default): Never quote, except in cases where it's mandatory by the dialect. True or 'always': Always quote. 'safe': Only quote identifiers that are case insensitive.
- normalize: Whether to normalize identifiers to lowercase. Default: False.
- pad: The pad size in a formatted string. For example, this affects the indentation of a projection in a query, relative to its nesting level. Default: 2.
- indent: The indentation size in a formatted string. For example, this affects the
indentation of subqueries and filters under a
WHERE
clause. Default: 2. - normalize_functions: How to normalize function names. Possible values are: "upper" or True (default): Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
- unsupported_level: Determines the generator's behavior when it encounters unsupported expressions. Default ErrorLevel.WARN.
- max_unsupported: Maximum number of unsupported messages to include in a raised UnsupportedError. This is only relevant if unsupported_level is ErrorLevel.RAISE. Default: 3
- leading_comma: Whether the comma is leading or trailing in select expressions. This is only relevant when generating in pretty mode. Default: False
- max_text_width: The max number of characters in a segment before creating new lines in pretty mode. The default is on the smaller end because the length only represents a segment and not the true line length. Default: 80
- comments: Whether to preserve comments in the output SQL code. Default: True
1360 def datatype_sql(self, expression: exp.DataType) -> str: 1361 expressions = expression.expressions 1362 if ( 1363 expressions 1364 and expression.is_type(*exp.DataType.STRUCT_TYPES) 1365 and any(isinstance(field_type, exp.DataType) for field_type in expressions) 1366 ): 1367 # The correct syntax is OBJECT [ (<key> <value_type [NOT NULL] [, ...]) ] 1368 return "OBJECT" 1369 1370 return super().datatype_sql(expression)
1381 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 1382 milli = expression.args.get("milli") 1383 if milli is not None: 1384 milli_to_nano = milli.pop() * exp.Literal.number(1000000) 1385 expression.set("nano", milli_to_nano) 1386 1387 return rename_func("TIMESTAMP_FROM_PARTS")(self, expression)
1389 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 1390 if expression.is_type(exp.DataType.Type.GEOGRAPHY): 1391 return self.func("TO_GEOGRAPHY", expression.this) 1392 if expression.is_type(exp.DataType.Type.GEOMETRY): 1393 return self.func("TO_GEOMETRY", expression.this) 1394 1395 return super().cast_sql(expression, safe_prefix=safe_prefix)
1397 def trycast_sql(self, expression: exp.TryCast) -> str: 1398 value = expression.this 1399 1400 if value.type is None: 1401 from sqlglot.optimizer.annotate_types import annotate_types 1402 1403 value = annotate_types(value, dialect=self.dialect) 1404 1405 # Snowflake requires that TRY_CAST's value be a string 1406 # If TRY_CAST is being roundtripped (since Snowflake is the only dialect that sets "requires_string") or 1407 # if we can deduce that the value is a string, then we can generate TRY_CAST 1408 if expression.args.get("requires_string") or value.is_type(*exp.DataType.TEXT_TYPES): 1409 return super().trycast_sql(expression) 1410 1411 return self.cast_sql(expression)
1419 def unnest_sql(self, expression: exp.Unnest) -> str: 1420 unnest_alias = expression.args.get("alias") 1421 offset = expression.args.get("offset") 1422 1423 unnest_alias_columns = unnest_alias.columns if unnest_alias else [] 1424 value = seq_get(unnest_alias_columns, 0) or exp.to_identifier("value") 1425 1426 columns = [ 1427 exp.to_identifier("seq"), 1428 exp.to_identifier("key"), 1429 exp.to_identifier("path"), 1430 offset.pop() if isinstance(offset, exp.Expression) else exp.to_identifier("index"), 1431 value, 1432 exp.to_identifier("this"), 1433 ] 1434 1435 if unnest_alias: 1436 unnest_alias.set("columns", columns) 1437 else: 1438 unnest_alias = exp.TableAlias(this="_u", columns=columns) 1439 1440 table_input = self.sql(expression.expressions[0]) 1441 if not table_input.startswith("INPUT =>"): 1442 table_input = f"INPUT => {table_input}" 1443 1444 expression_parent = expression.parent 1445 1446 explode = ( 1447 f"FLATTEN({table_input})" 1448 if isinstance(expression_parent, exp.Lateral) 1449 else f"TABLE(FLATTEN({table_input}))" 1450 ) 1451 alias = self.sql(unnest_alias) 1452 alias = f" AS {alias}" if alias else "" 1453 value = ( 1454 "" 1455 if isinstance(expression_parent, (exp.From, exp.Join, exp.Lateral)) 1456 else f"{value} FROM " 1457 ) 1458 1459 return f"{value}{explode}{alias}"
1461 def show_sql(self, expression: exp.Show) -> str: 1462 terse = "TERSE " if expression.args.get("terse") else "" 1463 history = " HISTORY" if expression.args.get("history") else "" 1464 like = self.sql(expression, "like") 1465 like = f" LIKE {like}" if like else "" 1466 1467 scope = self.sql(expression, "scope") 1468 scope = f" {scope}" if scope else "" 1469 1470 scope_kind = self.sql(expression, "scope_kind") 1471 if scope_kind: 1472 scope_kind = f" IN {scope_kind}" 1473 1474 starts_with = self.sql(expression, "starts_with") 1475 if starts_with: 1476 starts_with = f" STARTS WITH {starts_with}" 1477 1478 limit = self.sql(expression, "limit") 1479 1480 from_ = self.sql(expression, "from") 1481 if from_: 1482 from_ = f" FROM {from_}" 1483 1484 privileges = self.expressions(expression, key="privileges", flat=True) 1485 privileges = f" WITH PRIVILEGES {privileges}" if privileges else "" 1486 1487 return f"SHOW {terse}{expression.name}{history}{like}{scope_kind}{scope}{starts_with}{limit}{from_}{privileges}"
1489 def describe_sql(self, expression: exp.Describe) -> str: 1490 # Default to table if kind is unknown 1491 kind_value = expression.args.get("kind") or "TABLE" 1492 kind = f" {kind_value}" if kind_value else "" 1493 this = f" {self.sql(expression, 'this')}" 1494 expressions = self.expressions(expression, flat=True) 1495 expressions = f" {expressions}" if expressions else "" 1496 return f"DESCRIBE{kind}{this}{expressions}"
1498 def generatedasidentitycolumnconstraint_sql( 1499 self, expression: exp.GeneratedAsIdentityColumnConstraint 1500 ) -> str: 1501 start = expression.args.get("start") 1502 start = f" START {start}" if start else "" 1503 increment = expression.args.get("increment") 1504 increment = f" INCREMENT {increment}" if increment else "" 1505 1506 order = expression.args.get("order") 1507 if order is not None: 1508 order_clause = " ORDER" if order else " NOORDER" 1509 else: 1510 order_clause = "" 1511 1512 return f"AUTOINCREMENT{start}{increment}{order_clause}"
1517 def struct_sql(self, expression: exp.Struct) -> str: 1518 keys = [] 1519 values = [] 1520 1521 for i, e in enumerate(expression.expressions): 1522 if isinstance(e, exp.PropertyEQ): 1523 keys.append( 1524 exp.Literal.string(e.name) if isinstance(e.this, exp.Identifier) else e.this 1525 ) 1526 values.append(e.expression) 1527 else: 1528 keys.append(exp.Literal.string(f"_{i}")) 1529 values.append(e) 1530 1531 return self.func("OBJECT_CONSTRUCT", *flatten(zip(keys, values)))
1537 def alterset_sql(self, expression: exp.AlterSet) -> str: 1538 exprs = self.expressions(expression, flat=True) 1539 exprs = f" {exprs}" if exprs else "" 1540 file_format = self.expressions(expression, key="file_format", flat=True, sep=" ") 1541 file_format = f" STAGE_FILE_FORMAT = ({file_format})" if file_format else "" 1542 copy_options = self.expressions(expression, key="copy_options", flat=True, sep=" ") 1543 copy_options = f" STAGE_COPY_OPTIONS = ({copy_options})" if copy_options else "" 1544 tag = self.expressions(expression, key="tag", flat=True) 1545 tag = f" TAG {tag}" if tag else "" 1546 1547 return f"SET{exprs}{file_format}{copy_options}{tag}"
1564 def jsonextract_sql(self, expression: exp.JSONExtract): 1565 this = expression.this 1566 1567 # JSON strings are valid coming from other dialects such as BQ so 1568 # for these cases we PARSE_JSON preemptively 1569 if not isinstance(this, (exp.ParseJSON, exp.JSONExtract)) and not expression.args.get( 1570 "requires_json" 1571 ): 1572 this = exp.ParseJSON(this=this) 1573 1574 return self.func( 1575 "GET_PATH", 1576 this, 1577 expression.expression, 1578 )
1587 def datesub_sql(self, expression: exp.DateSub) -> str: 1588 value = expression.expression 1589 if value: 1590 value.replace(value * (-1)) 1591 else: 1592 self.unsupported("DateSub cannot be transpiled if the subtracted count is unknown") 1593 1594 return date_delta_sql("DATEADD")(self, expression)
1603 def createable_sql(self, expression: exp.Create, locations: t.DefaultDict) -> str: 1604 is_materialized = expression.find(exp.MaterializedProperty) 1605 copy_grants_property = expression.find(exp.CopyGrantsProperty) 1606 1607 if expression.kind == "VIEW" and is_materialized and copy_grants_property: 1608 # For materialized views, COPY GRANTS is located *before* the columns list 1609 # This is in contrast to normal views where COPY GRANTS is located *after* the columns list 1610 # We default CopyGrantsProperty to POST_SCHEMA which means we need to output it POST_NAME if a materialized view is detected 1611 # ref: https://docs.snowflake.com/en/sql-reference/sql/create-materialized-view#syntax 1612 # ref: https://docs.snowflake.com/en/sql-reference/sql/create-view#syntax 1613 post_schema_properties = locations[exp.Properties.Location.POST_SCHEMA] 1614 post_schema_properties.pop(post_schema_properties.index(copy_grants_property)) 1615 1616 this_name = self.sql(expression.this, "this") 1617 copy_grants = self.sql(copy_grants_property) 1618 this_schema = self.schema_columns_sql(expression.this) 1619 this_schema = f"{self.sep()}{this_schema}" if this_schema else "" 1620 1621 return f"{this_name}{self.sep()}{copy_grants}{this_schema}" 1622 1623 return super().createable_sql(expression, locations)
1625 def arrayagg_sql(self, expression: exp.ArrayAgg) -> str: 1626 this = expression.this 1627 1628 # If an ORDER BY clause is present, we need to remove it from ARRAY_AGG 1629 # and add it later as part of the WITHIN GROUP clause 1630 order = this if isinstance(this, exp.Order) else None 1631 if order: 1632 expression.set("this", order.this.pop()) 1633 1634 expr_sql = super().arrayagg_sql(expression) 1635 1636 if order: 1637 expr_sql = self.sql(exp.WithinGroup(this=expr_sql, expression=order)) 1638 1639 return expr_sql
1641 def array_sql(self, expression: exp.Array) -> str: 1642 expressions = expression.expressions 1643 1644 first_expr = seq_get(expressions, 0) 1645 if isinstance(first_expr, exp.Select): 1646 # SELECT AS STRUCT foo AS alias_foo -> ARRAY_AGG(OBJECT_CONSTRUCT('alias_foo', foo)) 1647 if first_expr.text("kind").upper() == "STRUCT": 1648 object_construct_args = [] 1649 for expr in first_expr.expressions: 1650 # Alias case: SELECT AS STRUCT foo AS alias_foo -> OBJECT_CONSTRUCT('alias_foo', foo) 1651 # Column case: SELECT AS STRUCT foo -> OBJECT_CONSTRUCT('foo', foo) 1652 name = expr.this if isinstance(expr, exp.Alias) else expr 1653 1654 object_construct_args.extend([exp.Literal.string(expr.alias_or_name), name]) 1655 1656 array_agg = exp.ArrayAgg( 1657 this=_build_object_construct(args=object_construct_args) 1658 ) 1659 1660 first_expr.set("kind", None) 1661 first_expr.set("expressions", [array_agg]) 1662 1663 return self.sql(first_expr.subquery()) 1664 1665 return inline_array_sql(self, expression)
1667 def currentdate_sql(self, expression: exp.CurrentDate) -> str: 1668 zone = self.sql(expression, "this") 1669 if not zone: 1670 return super().currentdate_sql(expression) 1671 1672 expr = exp.Cast( 1673 this=exp.ConvertTimezone(target_tz=zone, timestamp=exp.CurrentTimestamp()), 1674 to=exp.DataType(this=exp.DataType.Type.DATE), 1675 ) 1676 return self.sql(expr)
1678 def dot_sql(self, expression: exp.Dot) -> str: 1679 this = expression.this 1680 1681 if not this.type: 1682 from sqlglot.optimizer.annotate_types import annotate_types 1683 1684 this = annotate_types(this, dialect=self.dialect) 1685 1686 if not isinstance(this, exp.Dot) and this.is_type(exp.DataType.Type.STRUCT): 1687 # Generate colon notation for the top level STRUCT 1688 return f"{self.sql(this)}:{self.sql(expression, 'expression')}" 1689 1690 return super().dot_sql(expression)
Inherited Members
- sqlglot.generator.Generator
- Generator
- NULL_ORDERING_SUPPORTED
- IGNORE_NULLS_IN_FUNC
- LOCKING_READS_SUPPORTED
- WRAP_DERIVED_VALUES
- CREATE_FUNCTION_RETURN_AS
- INTERVAL_ALLOWS_PLURAL_FORM
- LIMIT_FETCH
- RENAME_TABLE_WITH_DB
- GROUPINGS_SEP
- INDEX_ON
- QUERY_HINT_SEP
- DUPLICATE_KEY_UPDATE_WITH_SET
- LIMIT_IS_TOP
- RETURNING_END
- EXTRACT_ALLOWS_QUOTES
- TZ_TO_WITH_TIME_ZONE
- NVL2_SUPPORTED
- VALUES_AS_TABLE
- ALTER_TABLE_INCLUDE_COLUMN_KEYWORD
- UNNEST_WITH_ORDINALITY
- SEMI_ANTI_JOIN_WITH_SIDE
- COMPUTED_COLUMN_WITH_TYPE
- TABLESAMPLE_REQUIRES_PARENS
- TABLESAMPLE_SIZE_IS_ROWS
- TABLESAMPLE_KEYWORDS
- TABLESAMPLE_WITH_METHOD
- TABLESAMPLE_SEED_KEYWORD
- DATA_TYPE_SPECIFIERS_ALLOWED
- ENSURE_BOOLS
- CTE_RECURSIVE_KEYWORD_REQUIRED
- SUPPORTS_SINGLE_ARG_CONCAT
- LAST_DAY_SUPPORTS_DATE_PART
- SUPPORTS_TABLE_ALIAS_COLUMNS
- UNPIVOT_ALIASES_ARE_IDENTIFIERS
- SUPPORTS_SELECT_INTO
- SUPPORTS_UNLOGGED_TABLES
- SUPPORTS_CREATE_TABLE_LIKE
- LIKE_PROPERTY_INSIDE_SCHEMA
- MULTI_ARG_DISTINCT
- JSON_TYPE_REQUIRED_FOR_EXTRACTION
- JSON_PATH_BRACKETED_KEY_SUPPORTED
- JSON_PATH_SINGLE_QUOTE_ESCAPE
- CAN_IMPLEMENT_ARRAY_ANY
- SUPPORTS_TO_NUMBER
- SUPPORTS_WINDOW_EXCLUDE
- SET_OP_MODIFIERS
- COPY_HAS_INTO_KEYWORD
- HEX_FUNC
- WITH_PROPERTIES_PREFIX
- QUOTE_JSON_PATH
- PAD_FILL_PATTERN_IS_REQUIRED
- SUPPORTS_UNIX_SECONDS
- ALTER_SET_WRAPPED
- NORMALIZE_EXTRACT_DATE_PARTS
- PARSE_JSON_NAME
- ALTER_SET_TYPE
- ARRAY_SIZE_DIM_REQUIRED
- SUPPORTS_BETWEEN_FLAGS
- TIME_PART_SINGULARS
- NAMED_PLACEHOLDER_TOKEN
- EXPRESSION_PRECEDES_PROPERTIES_CREATABLES
- RESERVED_KEYWORDS
- WITH_SEPARATED_COMMENTS
- EXCLUDE_COMMENTS
- UNWRAPPED_INTERVAL_VALUES
- PARAMETERIZABLE_TEXT_TYPES
- EXPRESSIONS_WITHOUT_NESTED_CTES
- SENTINEL_LINE_BREAK
- pretty
- identify
- normalize
- pad
- unsupported_level
- max_unsupported
- leading_comma
- max_text_width
- comments
- dialect
- normalize_functions
- unsupported_messages
- generate
- preprocess
- unsupported
- sep
- seg
- sanitize_comment
- maybe_comment
- wrap
- no_identify
- normalize_func
- indent
- sql
- uncache_sql
- cache_sql
- characterset_sql
- column_parts
- column_sql
- columnposition_sql
- columndef_sql
- columnconstraint_sql
- computedcolumnconstraint_sql
- autoincrementcolumnconstraint_sql
- compresscolumnconstraint_sql
- generatedasrowcolumnconstraint_sql
- periodforsystemtimeconstraint_sql
- notnullcolumnconstraint_sql
- primarykeycolumnconstraint_sql
- uniquecolumnconstraint_sql
- create_sql
- sequenceproperties_sql
- clone_sql
- heredoc_sql
- prepend_ctes
- with_sql
- cte_sql
- tablealias_sql
- bitstring_sql
- hexstring_sql
- bytestring_sql
- unicodestring_sql
- rawstring_sql
- datatypeparam_sql
- directory_sql
- delete_sql
- drop_sql
- set_operation
- set_operations
- fetch_sql
- limitoptions_sql
- filter_sql
- hint_sql
- indexparameters_sql
- index_sql
- identifier_sql
- hex_sql
- lowerhex_sql
- inputoutputformat_sql
- national_sql
- partition_sql
- properties_sql
- root_properties
- properties
- locate_properties
- property_name
- property_sql
- likeproperty_sql
- fallbackproperty_sql
- journalproperty_sql
- freespaceproperty_sql
- checksumproperty_sql
- mergeblockratioproperty_sql
- datablocksizeproperty_sql
- blockcompressionproperty_sql
- isolatedloadingproperty_sql
- partitionboundspec_sql
- partitionedofproperty_sql
- lockingproperty_sql
- withdataproperty_sql
- withsystemversioningproperty_sql
- insert_sql
- introducer_sql
- kill_sql
- pseudotype_sql
- objectidentifier_sql
- onconflict_sql
- returning_sql
- rowformatdelimitedproperty_sql
- withtablehint_sql
- indextablehint_sql
- historicaldata_sql
- table_parts
- table_sql
- tablefromrows_sql
- tablesample_sql
- pivot_sql
- version_sql
- tuple_sql
- update_sql
- var_sql
- into_sql
- from_sql
- groupingsets_sql
- rollup_sql
- cube_sql
- group_sql
- having_sql
- connect_sql
- prior_sql
- join_sql
- lambda_sql
- lateral_op
- lateral_sql
- limit_sql
- offset_sql
- setitem_sql
- set_sql
- pragma_sql
- lock_sql
- literal_sql
- escape_str
- loaddata_sql
- null_sql
- boolean_sql
- order_sql
- withfill_sql
- distribute_sql
- sort_sql
- ordered_sql
- matchrecognizemeasure_sql
- matchrecognize_sql
- query_modifiers
- options_modifier
- for_modifiers
- queryoption_sql
- offset_limit_modifiers
- after_limit_modifiers
- schema_sql
- schema_columns_sql
- star_sql
- parameter_sql
- sessionparameter_sql
- placeholder_sql
- subquery_sql
- qualify_sql
- prewhere_sql
- where_sql
- window_sql
- partition_by_sql
- windowspec_sql
- withingroup_sql
- between_sql
- bracket_offset_expressions
- bracket_sql
- all_sql
- any_sql
- exists_sql
- case_sql
- constraint_sql
- nextvaluefor_sql
- extract_sql
- trim_sql
- convert_concat_args
- concat_sql
- concatws_sql
- check_sql
- foreignkey_sql
- primarykey_sql
- if_sql
- matchagainst_sql
- jsonkeyvalue_sql
- jsonpath_sql
- json_path_part
- formatjson_sql
- formatphrase_sql
- jsonobject_sql
- jsonobjectagg_sql
- jsonarray_sql
- jsonarrayagg_sql
- jsoncolumndef_sql
- jsonschema_sql
- jsontable_sql
- openjsoncolumndef_sql
- openjson_sql
- in_sql
- in_unnest_op
- interval_sql
- return_sql
- reference_sql
- anonymous_sql
- paren_sql
- neg_sql
- not_sql
- alias_sql
- pivotalias_sql
- aliases_sql
- atindex_sql
- attimezone_sql
- fromtimezone_sql
- add_sql
- and_sql
- or_sql
- xor_sql
- connector_sql
- bitwiseand_sql
- bitwiseleftshift_sql
- bitwisenot_sql
- bitwiseor_sql
- bitwiserightshift_sql
- bitwisexor_sql
- collate_sql
- command_sql
- comment_sql
- mergetreettlaction_sql
- mergetreettl_sql
- transaction_sql
- commit_sql
- rollback_sql
- altercolumn_sql
- alterindex_sql
- alterdiststyle_sql
- altersortkey_sql
- alterrename_sql
- renamecolumn_sql
- alter_sql
- add_column_sql
- droppartition_sql
- addconstraint_sql
- addpartition_sql
- distinct_sql
- ignorenulls_sql
- respectnulls_sql
- havingmax_sql
- intdiv_sql
- dpipe_sql
- div_sql
- safedivide_sql
- overlaps_sql
- distance_sql
- eq_sql
- propertyeq_sql
- escape_sql
- glob_sql
- gt_sql
- gte_sql
- ilike_sql
- ilikeany_sql
- is_sql
- like_sql
- likeany_sql
- similarto_sql
- lt_sql
- lte_sql
- mod_sql
- mul_sql
- neq_sql
- nullsafeeq_sql
- nullsafeneq_sql
- slice_sql
- sub_sql
- jsoncast_sql
- try_sql
- use_sql
- binary
- ceil_floor
- function_fallback_sql
- func
- format_args
- too_wide
- format_time
- expressions
- op_expressions
- naked_property
- tag_sql
- token_sql
- userdefinedfunction_sql
- joinhint_sql
- kwarg_sql
- when_sql
- whens_sql
- merge_sql
- tochar_sql
- dictproperty_sql
- dictrange_sql
- dictsubproperty_sql
- duplicatekeyproperty_sql
- uniquekeyproperty_sql
- distributedbyproperty_sql
- oncluster_sql
- clusteredbyproperty_sql
- anyvalue_sql
- querytransform_sql
- indexconstraintoption_sql
- checkcolumnconstraint_sql
- indexcolumnconstraint_sql
- nvl2_sql
- comprehension_sql
- columnprefix_sql
- opclass_sql
- predict_sql
- forin_sql
- refresh_sql
- toarray_sql
- tsordstotime_sql
- tsordstotimestamp_sql
- tsordstodatetime_sql
- tsordstodate_sql
- unixdate_sql
- lastday_sql
- dateadd_sql
- arrayany_sql
- partitionrange_sql
- truncatetable_sql
- convert_sql
- copyparameter_sql
- credentials_sql
- copy_sql
- semicolon_sql
- datadeletionproperty_sql
- maskingpolicycolumnconstraint_sql
- gapfill_sql
- scope_resolution
- scoperesolution_sql
- parsejson_sql
- rand_sql
- changes_sql
- pad_sql
- summarize_sql
- explodinggenerateseries_sql
- arrayconcat_sql
- converttimezone_sql
- json_sql
- jsonvalue_sql
- conditionalinsert_sql
- multitableinserts_sql
- oncondition_sql
- jsonextractquote_sql
- jsonexists_sql
- apply_sql
- grant_sql
- grantprivilege_sql
- grantprincipal_sql
- columns_sql
- overlay_sql
- todouble_sql
- string_sql
- median_sql
- overflowtruncatebehavior_sql
- unixseconds_sql
- arraysize_sql
- attach_sql
- detach_sql
- attachoption_sql
- featuresattime_sql
- watermarkcolumnconstraint_sql
- encodeproperty_sql
- includeproperty_sql
- xmlelement_sql
- xmlkeyvalueoption_sql
- partitionbyrangeproperty_sql
- partitionbyrangepropertydynamic_sql
- unpivotcolumns_sql
- analyzesample_sql
- analyzestatistics_sql
- analyzehistogram_sql
- analyzedelete_sql
- analyzelistchainedrows_sql
- analyzevalidate_sql
- analyze_sql
- xmltable_sql
- xmlnamespace_sql
- export_sql
- declare_sql
- declareitem_sql
- recursivewithsearch_sql
- parameterizedagg_sql
- anonymousaggfunc_sql
- combinedaggfunc_sql
- combinedparameterizedagg_sql
- get_put_sql
- translatecharacters_sql
- decodecase_sql
- semanticview_sql