sqlglot.dialects.bigquery
1from __future__ import annotations 2 3import logging 4import re 5import typing as t 6 7from sqlglot import exp, generator, parser, tokens, transforms 8from sqlglot._typing import E 9from sqlglot.dialects.dialect import ( 10 Dialect, 11 NormalizationStrategy, 12 annotate_with_type_lambda, 13 arg_max_or_min_no_count, 14 binary_from_function, 15 date_add_interval_sql, 16 datestrtodate_sql, 17 build_formatted_time, 18 filter_array_using_unnest, 19 if_sql, 20 inline_array_unless_query, 21 max_or_greatest, 22 min_or_least, 23 no_ilike_sql, 24 build_date_delta_with_interval, 25 regexp_replace_sql, 26 rename_func, 27 sha256_sql, 28 timestrtotime_sql, 29 ts_or_ds_add_cast, 30 unit_to_var, 31 strposition_sql, 32 groupconcat_sql, 33 space_sql, 34) 35from sqlglot.helper import seq_get, split_num_words 36from sqlglot.tokens import TokenType 37from sqlglot.generator import unsupported_args 38 39if t.TYPE_CHECKING: 40 from sqlglot._typing import Lit 41 42 from sqlglot.optimizer.annotate_types import TypeAnnotator 43 44logger = logging.getLogger("sqlglot") 45 46 47JSON_EXTRACT_TYPE = t.Union[exp.JSONExtract, exp.JSONExtractScalar, exp.JSONExtractArray] 48 49DQUOTES_ESCAPING_JSON_FUNCTIONS = ("JSON_QUERY", "JSON_VALUE", "JSON_QUERY_ARRAY") 50 51 52def _derived_table_values_to_unnest(self: BigQuery.Generator, expression: exp.Values) -> str: 53 if not expression.find_ancestor(exp.From, exp.Join): 54 return self.values_sql(expression) 55 56 structs = [] 57 alias = expression.args.get("alias") 58 for tup in expression.find_all(exp.Tuple): 59 field_aliases = ( 60 alias.columns 61 if alias and alias.columns 62 else (f"_c{i}" for i in range(len(tup.expressions))) 63 ) 64 expressions = [ 65 exp.PropertyEQ(this=exp.to_identifier(name), expression=fld) 66 for name, fld in zip(field_aliases, tup.expressions) 67 ] 68 structs.append(exp.Struct(expressions=expressions)) 69 70 # Due to `UNNEST_COLUMN_ONLY`, it is expected that the table alias be contained in the columns expression 71 alias_name_only = exp.TableAlias(columns=[alias.this]) if alias else None 72 return self.unnest_sql( 73 exp.Unnest(expressions=[exp.array(*structs, copy=False)], alias=alias_name_only) 74 ) 75 76 77def _returnsproperty_sql(self: BigQuery.Generator, expression: exp.ReturnsProperty) -> str: 78 this = expression.this 79 if isinstance(this, exp.Schema): 80 this = f"{self.sql(this, 'this')} <{self.expressions(this)}>" 81 else: 82 this = self.sql(this) 83 return f"RETURNS {this}" 84 85 86def _create_sql(self: BigQuery.Generator, expression: exp.Create) -> str: 87 returns = expression.find(exp.ReturnsProperty) 88 if expression.kind == "FUNCTION" and returns and returns.args.get("is_table"): 89 expression.set("kind", "TABLE FUNCTION") 90 91 if isinstance(expression.expression, (exp.Subquery, exp.Literal)): 92 expression.set("expression", expression.expression.this) 93 94 return self.create_sql(expression) 95 96 97# https://issuetracker.google.com/issues/162294746 98# workaround for bigquery bug when grouping by an expression and then ordering 99# WITH x AS (SELECT 1 y) 100# SELECT y + 1 z 101# FROM x 102# GROUP BY x + 1 103# ORDER by z 104def _alias_ordered_group(expression: exp.Expression) -> exp.Expression: 105 if isinstance(expression, exp.Select): 106 group = expression.args.get("group") 107 order = expression.args.get("order") 108 109 if group and order: 110 aliases = { 111 select.this: select.args["alias"] 112 for select in expression.selects 113 if isinstance(select, exp.Alias) 114 } 115 116 for grouped in group.expressions: 117 if grouped.is_int: 118 continue 119 alias = aliases.get(grouped) 120 if alias: 121 grouped.replace(exp.column(alias)) 122 123 return expression 124 125 126def _pushdown_cte_column_names(expression: exp.Expression) -> exp.Expression: 127 """BigQuery doesn't allow column names when defining a CTE, so we try to push them down.""" 128 if isinstance(expression, exp.CTE) and expression.alias_column_names: 129 cte_query = expression.this 130 131 if cte_query.is_star: 132 logger.warning( 133 "Can't push down CTE column names for star queries. Run the query through" 134 " the optimizer or use 'qualify' to expand the star projections first." 135 ) 136 return expression 137 138 column_names = expression.alias_column_names 139 expression.args["alias"].set("columns", None) 140 141 for name, select in zip(column_names, cte_query.selects): 142 to_replace = select 143 144 if isinstance(select, exp.Alias): 145 select = select.this 146 147 # Inner aliases are shadowed by the CTE column names 148 to_replace.replace(exp.alias_(select, name)) 149 150 return expression 151 152 153def _build_parse_timestamp(args: t.List) -> exp.StrToTime: 154 this = build_formatted_time(exp.StrToTime, "bigquery")([seq_get(args, 1), seq_get(args, 0)]) 155 this.set("zone", seq_get(args, 2)) 156 return this 157 158 159def _build_timestamp(args: t.List) -> exp.Timestamp: 160 timestamp = exp.Timestamp.from_arg_list(args) 161 timestamp.set("with_tz", True) 162 return timestamp 163 164 165def _build_date(args: t.List) -> exp.Date | exp.DateFromParts: 166 expr_type = exp.DateFromParts if len(args) == 3 else exp.Date 167 return expr_type.from_arg_list(args) 168 169 170def _build_to_hex(args: t.List) -> exp.Hex | exp.MD5: 171 # TO_HEX(MD5(..)) is common in BigQuery, so it's parsed into MD5 to simplify its transpilation 172 arg = seq_get(args, 0) 173 return exp.MD5(this=arg.this) if isinstance(arg, exp.MD5Digest) else exp.LowerHex(this=arg) 174 175 176def _array_contains_sql(self: BigQuery.Generator, expression: exp.ArrayContains) -> str: 177 return self.sql( 178 exp.Exists( 179 this=exp.select("1") 180 .from_(exp.Unnest(expressions=[expression.left]).as_("_unnest", table=["_col"])) 181 .where(exp.column("_col").eq(expression.right)) 182 ) 183 ) 184 185 186def _ts_or_ds_add_sql(self: BigQuery.Generator, expression: exp.TsOrDsAdd) -> str: 187 return date_add_interval_sql("DATE", "ADD")(self, ts_or_ds_add_cast(expression)) 188 189 190def _ts_or_ds_diff_sql(self: BigQuery.Generator, expression: exp.TsOrDsDiff) -> str: 191 expression.this.replace(exp.cast(expression.this, exp.DataType.Type.TIMESTAMP)) 192 expression.expression.replace(exp.cast(expression.expression, exp.DataType.Type.TIMESTAMP)) 193 unit = unit_to_var(expression) 194 return self.func("DATE_DIFF", expression.this, expression.expression, unit) 195 196 197def _unix_to_time_sql(self: BigQuery.Generator, expression: exp.UnixToTime) -> str: 198 scale = expression.args.get("scale") 199 timestamp = expression.this 200 201 if scale in (None, exp.UnixToTime.SECONDS): 202 return self.func("TIMESTAMP_SECONDS", timestamp) 203 if scale == exp.UnixToTime.MILLIS: 204 return self.func("TIMESTAMP_MILLIS", timestamp) 205 if scale == exp.UnixToTime.MICROS: 206 return self.func("TIMESTAMP_MICROS", timestamp) 207 208 unix_seconds = exp.cast( 209 exp.Div(this=timestamp, expression=exp.func("POW", 10, scale)), exp.DataType.Type.BIGINT 210 ) 211 return self.func("TIMESTAMP_SECONDS", unix_seconds) 212 213 214def _build_time(args: t.List) -> exp.Func: 215 if len(args) == 1: 216 return exp.TsOrDsToTime(this=args[0]) 217 if len(args) == 2: 218 return exp.Time.from_arg_list(args) 219 return exp.TimeFromParts.from_arg_list(args) 220 221 222def _build_datetime(args: t.List) -> exp.Func: 223 if len(args) == 1: 224 return exp.TsOrDsToDatetime.from_arg_list(args) 225 if len(args) == 2: 226 return exp.Datetime.from_arg_list(args) 227 return exp.TimestampFromParts.from_arg_list(args) 228 229 230def _build_regexp_extract( 231 expr_type: t.Type[E], default_group: t.Optional[exp.Expression] = None 232) -> t.Callable[[t.List], E]: 233 def _builder(args: t.List) -> E: 234 try: 235 group = re.compile(args[1].name).groups == 1 236 except re.error: 237 group = False 238 239 # Default group is used for the transpilation of REGEXP_EXTRACT_ALL 240 return expr_type( 241 this=seq_get(args, 0), 242 expression=seq_get(args, 1), 243 position=seq_get(args, 2), 244 occurrence=seq_get(args, 3), 245 group=exp.Literal.number(1) if group else default_group, 246 ) 247 248 return _builder 249 250 251def _build_extract_json_with_default_path(expr_type: t.Type[E]) -> t.Callable[[t.List, Dialect], E]: 252 def _builder(args: t.List, dialect: Dialect) -> E: 253 if len(args) == 1: 254 # The default value for the JSONPath is '$' i.e all of the data 255 args.append(exp.Literal.string("$")) 256 return parser.build_extract_json_with_path(expr_type)(args, dialect) 257 258 return _builder 259 260 261def _str_to_datetime_sql( 262 self: BigQuery.Generator, expression: exp.StrToDate | exp.StrToTime 263) -> str: 264 this = self.sql(expression, "this") 265 dtype = "DATE" if isinstance(expression, exp.StrToDate) else "TIMESTAMP" 266 267 if expression.args.get("safe"): 268 fmt = self.format_time( 269 expression, 270 self.dialect.INVERSE_FORMAT_MAPPING, 271 self.dialect.INVERSE_FORMAT_TRIE, 272 ) 273 return f"SAFE_CAST({this} AS {dtype} FORMAT {fmt})" 274 275 fmt = self.format_time(expression) 276 return self.func(f"PARSE_{dtype}", fmt, this, expression.args.get("zone")) 277 278 279def _annotate_math_functions(self: TypeAnnotator, expression: E) -> E: 280 """ 281 Many BigQuery math functions such as CEIL, FLOOR etc follow this return type convention: 282 +---------+---------+---------+------------+---------+ 283 | INPUT | INT64 | NUMERIC | BIGNUMERIC | FLOAT64 | 284 +---------+---------+---------+------------+---------+ 285 | OUTPUT | FLOAT64 | NUMERIC | BIGNUMERIC | FLOAT64 | 286 +---------+---------+---------+------------+---------+ 287 """ 288 self._annotate_args(expression) 289 290 this: exp.Expression = expression.this 291 292 self._set_type( 293 expression, 294 exp.DataType.Type.DOUBLE if this.is_type(*exp.DataType.INTEGER_TYPES) else this.type, 295 ) 296 return expression 297 298 299@unsupported_args("ins_cost", "del_cost", "sub_cost") 300def _levenshtein_sql(self: BigQuery.Generator, expression: exp.Levenshtein) -> str: 301 max_dist = expression.args.get("max_dist") 302 if max_dist: 303 max_dist = exp.Kwarg(this=exp.var("max_distance"), expression=max_dist) 304 305 return self.func("EDIT_DISTANCE", expression.this, expression.expression, max_dist) 306 307 308def _build_levenshtein(args: t.List) -> exp.Levenshtein: 309 max_dist = seq_get(args, 2) 310 return exp.Levenshtein( 311 this=seq_get(args, 0), 312 expression=seq_get(args, 1), 313 max_dist=max_dist.expression if max_dist else None, 314 ) 315 316 317def _build_format_time(expr_type: t.Type[exp.Expression]) -> t.Callable[[t.List], exp.TimeToStr]: 318 def _builder(args: t.List) -> exp.TimeToStr: 319 return exp.TimeToStr( 320 this=expr_type(this=seq_get(args, 1)), 321 format=seq_get(args, 0), 322 zone=seq_get(args, 2), 323 ) 324 325 return _builder 326 327 328def _build_contains_substring(args: t.List) -> exp.Contains | exp.Anonymous: 329 if len(args) == 3: 330 return exp.Anonymous(this="CONTAINS_SUBSTR", expressions=args) 331 332 # Lowercase the operands in case of transpilation, as exp.Contains 333 # is case-sensitive on other dialects 334 this = exp.Lower(this=seq_get(args, 0)) 335 expr = exp.Lower(this=seq_get(args, 1)) 336 337 return exp.Contains(this=this, expression=expr) 338 339 340def _json_extract_sql(self: BigQuery.Generator, expression: JSON_EXTRACT_TYPE) -> str: 341 name = (expression._meta and expression.meta.get("name")) or expression.sql_name() 342 upper = name.upper() 343 344 dquote_escaping = upper in DQUOTES_ESCAPING_JSON_FUNCTIONS 345 346 if dquote_escaping: 347 self._quote_json_path_key_using_brackets = False 348 349 sql = rename_func(upper)(self, expression) 350 351 if dquote_escaping: 352 self._quote_json_path_key_using_brackets = True 353 354 return sql 355 356 357def _annotate_concat(self: TypeAnnotator, expression: exp.Concat) -> exp.Concat: 358 annotated = self._annotate_by_args(expression, "expressions") 359 360 # Args must be BYTES or types that can be cast to STRING, return type is either BYTES or STRING 361 # https://cloud.google.com/bigquery/docs/reference/standard-sql/string_functions#concat 362 if not annotated.is_type(exp.DataType.Type.BINARY, exp.DataType.Type.UNKNOWN): 363 annotated.type = exp.DataType.Type.VARCHAR 364 365 return annotated 366 367 368def _annotate_array(self: TypeAnnotator, expression: exp.Array) -> exp.Array: 369 array_args = expression.expressions 370 371 # BigQuery behaves as follows: 372 # 373 # SELECT t, TYPEOF(t) FROM (SELECT 'foo') AS t -- foo, STRUCT<STRING> 374 # SELECT ARRAY(SELECT 'foo'), TYPEOF(ARRAY(SELECT 'foo')) -- foo, ARRAY<STRING> 375 if ( 376 len(array_args) == 1 377 and isinstance(select := array_args[0].unnest(), exp.Select) 378 and (query_type := select.meta.get("query_type")) is not None 379 and query_type.is_type(exp.DataType.Type.STRUCT) 380 and len(query_type.expressions) == 1 381 ): 382 projection_type = query_type.expressions[0].kind.copy() 383 array_type = exp.DataType( 384 this=exp.DataType.Type.ARRAY, 385 expressions=[projection_type], 386 nested=True, 387 ) 388 return self._annotate_with_type(expression, array_type) 389 390 return self._annotate_by_args(expression, "expressions", array=True) 391 392 393class BigQuery(Dialect): 394 WEEK_OFFSET = -1 395 UNNEST_COLUMN_ONLY = True 396 SUPPORTS_USER_DEFINED_TYPES = False 397 SUPPORTS_SEMI_ANTI_JOIN = False 398 LOG_BASE_FIRST = False 399 HEX_LOWERCASE = True 400 FORCE_EARLY_ALIAS_REF_EXPANSION = True 401 PRESERVE_ORIGINAL_NAMES = True 402 HEX_STRING_IS_INTEGER_TYPE = True 403 404 # https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#case_sensitivity 405 NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_INSENSITIVE 406 407 # bigquery udfs are case sensitive 408 NORMALIZE_FUNCTIONS = False 409 410 # https://cloud.google.com/bigquery/docs/reference/standard-sql/format-elements#format_elements_date_time 411 TIME_MAPPING = { 412 "%D": "%m/%d/%y", 413 "%E6S": "%S.%f", 414 "%e": "%-d", 415 } 416 417 FORMAT_MAPPING = { 418 "DD": "%d", 419 "MM": "%m", 420 "MON": "%b", 421 "MONTH": "%B", 422 "YYYY": "%Y", 423 "YY": "%y", 424 "HH": "%I", 425 "HH12": "%I", 426 "HH24": "%H", 427 "MI": "%M", 428 "SS": "%S", 429 "SSSSS": "%f", 430 "TZH": "%z", 431 } 432 433 # The _PARTITIONTIME and _PARTITIONDATE pseudo-columns are not returned by a SELECT * statement 434 # https://cloud.google.com/bigquery/docs/querying-partitioned-tables#query_an_ingestion-time_partitioned_table 435 PSEUDOCOLUMNS = {"_PARTITIONTIME", "_PARTITIONDATE"} 436 437 # All set operations require either a DISTINCT or ALL specifier 438 SET_OP_DISTINCT_BY_DEFAULT = dict.fromkeys((exp.Except, exp.Intersect, exp.Union), None) 439 440 # BigQuery maps Type.TIMESTAMP to DATETIME, so we need to amend the inferred types 441 TYPE_TO_EXPRESSIONS = { 442 **Dialect.TYPE_TO_EXPRESSIONS, 443 exp.DataType.Type.TIMESTAMPTZ: Dialect.TYPE_TO_EXPRESSIONS[exp.DataType.Type.TIMESTAMP], 444 } 445 TYPE_TO_EXPRESSIONS.pop(exp.DataType.Type.TIMESTAMP) 446 447 ANNOTATORS = { 448 **Dialect.ANNOTATORS, 449 **{ 450 expr_type: annotate_with_type_lambda(data_type) 451 for data_type, expressions in TYPE_TO_EXPRESSIONS.items() 452 for expr_type in expressions 453 }, 454 **{ 455 expr_type: lambda self, e: _annotate_math_functions(self, e) 456 for expr_type in (exp.Floor, exp.Ceil, exp.Log, exp.Ln, exp.Sqrt, exp.Exp, exp.Round) 457 }, 458 **{ 459 expr_type: lambda self, e: self._annotate_by_args(e, "this") 460 for expr_type in ( 461 exp.Left, 462 exp.Right, 463 exp.Lower, 464 exp.Upper, 465 exp.Pad, 466 exp.Trim, 467 exp.RegexpExtract, 468 exp.RegexpReplace, 469 exp.Repeat, 470 exp.Substring, 471 ) 472 }, 473 exp.Array: _annotate_array, 474 exp.ArrayConcat: lambda self, e: self._annotate_by_args(e, "this", "expressions"), 475 exp.Ascii: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.BIGINT), 476 exp.BitwiseAndAgg: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.BIGINT), 477 exp.BitwiseOrAgg: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.BIGINT), 478 exp.BitwiseXorAgg: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.BIGINT), 479 exp.BitwiseCountAgg: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.BIGINT), 480 exp.Concat: _annotate_concat, 481 exp.Corr: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.DOUBLE), 482 exp.CovarPop: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.DOUBLE), 483 exp.CovarSamp: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.DOUBLE), 484 exp.JSONArray: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.JSON), 485 exp.JSONExtractScalar: lambda self, e: self._annotate_with_type( 486 e, exp.DataType.Type.VARCHAR 487 ), 488 exp.JSONValueArray: lambda self, e: self._annotate_with_type( 489 e, exp.DataType.build("ARRAY<VARCHAR>") 490 ), 491 exp.JSONType: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.VARCHAR), 492 exp.Lag: lambda self, e: self._annotate_by_args(e, "this", "default"), 493 exp.SHA: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.BINARY), 494 exp.SHA2: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.BINARY), 495 exp.Sign: lambda self, e: self._annotate_by_args(e, "this"), 496 exp.Split: lambda self, e: self._annotate_by_args(e, "this", array=True), 497 exp.TimestampFromParts: lambda self, e: self._annotate_with_type( 498 e, exp.DataType.Type.DATETIME 499 ), 500 exp.Unicode: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.BIGINT), 501 } 502 503 def normalize_identifier(self, expression: E) -> E: 504 if ( 505 isinstance(expression, exp.Identifier) 506 and self.normalization_strategy is NormalizationStrategy.CASE_INSENSITIVE 507 ): 508 parent = expression.parent 509 while isinstance(parent, exp.Dot): 510 parent = parent.parent 511 512 # In BigQuery, CTEs are case-insensitive, but UDF and table names are case-sensitive 513 # by default. The following check uses a heuristic to detect tables based on whether 514 # they are qualified. This should generally be correct, because tables in BigQuery 515 # must be qualified with at least a dataset, unless @@dataset_id is set. 516 case_sensitive = ( 517 isinstance(parent, exp.UserDefinedFunction) 518 or ( 519 isinstance(parent, exp.Table) 520 and parent.db 521 and (parent.meta.get("quoted_table") or not parent.meta.get("maybe_column")) 522 ) 523 or expression.meta.get("is_table") 524 ) 525 if not case_sensitive: 526 expression.set("this", expression.this.lower()) 527 528 return t.cast(E, expression) 529 530 return super().normalize_identifier(expression) 531 532 class Tokenizer(tokens.Tokenizer): 533 QUOTES = ["'", '"', '"""', "'''"] 534 COMMENTS = ["--", "#", ("/*", "*/")] 535 IDENTIFIERS = ["`"] 536 STRING_ESCAPES = ["\\"] 537 538 HEX_STRINGS = [("0x", ""), ("0X", "")] 539 540 BYTE_STRINGS = [ 541 (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("b", "B") 542 ] 543 544 RAW_STRINGS = [ 545 (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("r", "R") 546 ] 547 548 NESTED_COMMENTS = False 549 550 KEYWORDS = { 551 **tokens.Tokenizer.KEYWORDS, 552 "ANY TYPE": TokenType.VARIANT, 553 "BEGIN": TokenType.COMMAND, 554 "BEGIN TRANSACTION": TokenType.BEGIN, 555 "BYTEINT": TokenType.INT, 556 "BYTES": TokenType.BINARY, 557 "CURRENT_DATETIME": TokenType.CURRENT_DATETIME, 558 "DATETIME": TokenType.TIMESTAMP, 559 "DECLARE": TokenType.DECLARE, 560 "ELSEIF": TokenType.COMMAND, 561 "EXCEPTION": TokenType.COMMAND, 562 "EXPORT": TokenType.EXPORT, 563 "FLOAT64": TokenType.DOUBLE, 564 "FOR SYSTEM_TIME": TokenType.TIMESTAMP_SNAPSHOT, 565 "MODEL": TokenType.MODEL, 566 "NOT DETERMINISTIC": TokenType.VOLATILE, 567 "RECORD": TokenType.STRUCT, 568 "TIMESTAMP": TokenType.TIMESTAMPTZ, 569 } 570 KEYWORDS.pop("DIV") 571 KEYWORDS.pop("VALUES") 572 KEYWORDS.pop("/*+") 573 574 class Parser(parser.Parser): 575 PREFIXED_PIVOT_COLUMNS = True 576 LOG_DEFAULTS_TO_LN = True 577 SUPPORTS_IMPLICIT_UNNEST = True 578 JOINS_HAVE_EQUAL_PRECEDENCE = True 579 580 # BigQuery does not allow ASC/DESC to be used as an identifier 581 ID_VAR_TOKENS = parser.Parser.ID_VAR_TOKENS - {TokenType.ASC, TokenType.DESC} 582 ALIAS_TOKENS = parser.Parser.ALIAS_TOKENS - {TokenType.ASC, TokenType.DESC} 583 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS - {TokenType.ASC, TokenType.DESC} 584 COMMENT_TABLE_ALIAS_TOKENS = parser.Parser.COMMENT_TABLE_ALIAS_TOKENS - { 585 TokenType.ASC, 586 TokenType.DESC, 587 } 588 UPDATE_ALIAS_TOKENS = parser.Parser.UPDATE_ALIAS_TOKENS - {TokenType.ASC, TokenType.DESC} 589 590 FUNCTIONS = { 591 **parser.Parser.FUNCTIONS, 592 "CONTAINS_SUBSTR": _build_contains_substring, 593 "DATE": _build_date, 594 "DATE_ADD": build_date_delta_with_interval(exp.DateAdd), 595 "DATE_SUB": build_date_delta_with_interval(exp.DateSub), 596 "DATE_TRUNC": lambda args: exp.DateTrunc( 597 unit=seq_get(args, 1), 598 this=seq_get(args, 0), 599 zone=seq_get(args, 2), 600 ), 601 "DATETIME": _build_datetime, 602 "DATETIME_ADD": build_date_delta_with_interval(exp.DatetimeAdd), 603 "DATETIME_SUB": build_date_delta_with_interval(exp.DatetimeSub), 604 "DIV": binary_from_function(exp.IntDiv), 605 "EDIT_DISTANCE": _build_levenshtein, 606 "FORMAT_DATE": _build_format_time(exp.TsOrDsToDate), 607 "GENERATE_ARRAY": exp.GenerateSeries.from_arg_list, 608 "JSON_EXTRACT_SCALAR": _build_extract_json_with_default_path(exp.JSONExtractScalar), 609 "JSON_EXTRACT_ARRAY": _build_extract_json_with_default_path(exp.JSONExtractArray), 610 "JSON_QUERY": parser.build_extract_json_with_path(exp.JSONExtract), 611 "JSON_QUERY_ARRAY": _build_extract_json_with_default_path(exp.JSONExtractArray), 612 "JSON_VALUE": _build_extract_json_with_default_path(exp.JSONExtractScalar), 613 "JSON_VALUE_ARRAY": _build_extract_json_with_default_path(exp.JSONValueArray), 614 "LENGTH": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 615 "MD5": exp.MD5Digest.from_arg_list, 616 "TO_HEX": _build_to_hex, 617 "PARSE_DATE": lambda args: build_formatted_time(exp.StrToDate, "bigquery")( 618 [seq_get(args, 1), seq_get(args, 0)] 619 ), 620 "PARSE_TIMESTAMP": _build_parse_timestamp, 621 "REGEXP_CONTAINS": exp.RegexpLike.from_arg_list, 622 "REGEXP_EXTRACT": _build_regexp_extract(exp.RegexpExtract), 623 "REGEXP_SUBSTR": _build_regexp_extract(exp.RegexpExtract), 624 "REGEXP_EXTRACT_ALL": _build_regexp_extract( 625 exp.RegexpExtractAll, default_group=exp.Literal.number(0) 626 ), 627 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 628 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 629 "SPLIT": lambda args: exp.Split( 630 # https://cloud.google.com/bigquery/docs/reference/standard-sql/string_functions#split 631 this=seq_get(args, 0), 632 expression=seq_get(args, 1) or exp.Literal.string(","), 633 ), 634 "STRPOS": exp.StrPosition.from_arg_list, 635 "TIME": _build_time, 636 "TIME_ADD": build_date_delta_with_interval(exp.TimeAdd), 637 "TIME_SUB": build_date_delta_with_interval(exp.TimeSub), 638 "TIMESTAMP": _build_timestamp, 639 "TIMESTAMP_ADD": build_date_delta_with_interval(exp.TimestampAdd), 640 "TIMESTAMP_SUB": build_date_delta_with_interval(exp.TimestampSub), 641 "TIMESTAMP_MICROS": lambda args: exp.UnixToTime( 642 this=seq_get(args, 0), scale=exp.UnixToTime.MICROS 643 ), 644 "TIMESTAMP_MILLIS": lambda args: exp.UnixToTime( 645 this=seq_get(args, 0), scale=exp.UnixToTime.MILLIS 646 ), 647 "TIMESTAMP_SECONDS": lambda args: exp.UnixToTime(this=seq_get(args, 0)), 648 "TO_JSON_STRING": exp.JSONFormat.from_arg_list, 649 "FORMAT_DATETIME": _build_format_time(exp.TsOrDsToDatetime), 650 "FORMAT_TIMESTAMP": _build_format_time(exp.TsOrDsToTimestamp), 651 } 652 653 FUNCTION_PARSERS = { 654 **parser.Parser.FUNCTION_PARSERS, 655 "ARRAY": lambda self: self.expression(exp.Array, expressions=[self._parse_statement()]), 656 "JSON_ARRAY": lambda self: self.expression( 657 exp.JSONArray, expressions=self._parse_csv(self._parse_bitwise) 658 ), 659 "MAKE_INTERVAL": lambda self: self._parse_make_interval(), 660 "FEATURES_AT_TIME": lambda self: self._parse_features_at_time(), 661 } 662 FUNCTION_PARSERS.pop("TRIM") 663 664 NO_PAREN_FUNCTIONS = { 665 **parser.Parser.NO_PAREN_FUNCTIONS, 666 TokenType.CURRENT_DATETIME: exp.CurrentDatetime, 667 } 668 669 NESTED_TYPE_TOKENS = { 670 *parser.Parser.NESTED_TYPE_TOKENS, 671 TokenType.TABLE, 672 } 673 674 PROPERTY_PARSERS = { 675 **parser.Parser.PROPERTY_PARSERS, 676 "NOT DETERMINISTIC": lambda self: self.expression( 677 exp.StabilityProperty, this=exp.Literal.string("VOLATILE") 678 ), 679 "OPTIONS": lambda self: self._parse_with_property(), 680 } 681 682 CONSTRAINT_PARSERS = { 683 **parser.Parser.CONSTRAINT_PARSERS, 684 "OPTIONS": lambda self: exp.Properties(expressions=self._parse_with_property()), 685 } 686 687 RANGE_PARSERS = parser.Parser.RANGE_PARSERS.copy() 688 RANGE_PARSERS.pop(TokenType.OVERLAPS) 689 690 NULL_TOKENS = {TokenType.NULL, TokenType.UNKNOWN} 691 692 DASHED_TABLE_PART_FOLLOW_TOKENS = {TokenType.DOT, TokenType.L_PAREN, TokenType.R_PAREN} 693 694 STATEMENT_PARSERS = { 695 **parser.Parser.STATEMENT_PARSERS, 696 TokenType.ELSE: lambda self: self._parse_as_command(self._prev), 697 TokenType.END: lambda self: self._parse_as_command(self._prev), 698 TokenType.FOR: lambda self: self._parse_for_in(), 699 TokenType.EXPORT: lambda self: self._parse_export_data(), 700 TokenType.DECLARE: lambda self: self._parse_declare(), 701 } 702 703 BRACKET_OFFSETS = { 704 "OFFSET": (0, False), 705 "ORDINAL": (1, False), 706 "SAFE_OFFSET": (0, True), 707 "SAFE_ORDINAL": (1, True), 708 } 709 710 def _parse_for_in(self) -> exp.ForIn: 711 this = self._parse_range() 712 self._match_text_seq("DO") 713 return self.expression(exp.ForIn, this=this, expression=self._parse_statement()) 714 715 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 716 this = super()._parse_table_part(schema=schema) or self._parse_number() 717 718 # https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#table_names 719 if isinstance(this, exp.Identifier): 720 table_name = this.name 721 while self._match(TokenType.DASH, advance=False) and self._next: 722 start = self._curr 723 while self._is_connected() and not self._match_set( 724 self.DASHED_TABLE_PART_FOLLOW_TOKENS, advance=False 725 ): 726 self._advance() 727 728 if start == self._curr: 729 break 730 731 table_name += self._find_sql(start, self._prev) 732 733 this = exp.Identifier( 734 this=table_name, quoted=this.args.get("quoted") 735 ).update_positions(this) 736 elif isinstance(this, exp.Literal): 737 table_name = this.name 738 739 if self._is_connected() and self._parse_var(any_token=True): 740 table_name += self._prev.text 741 742 this = exp.Identifier(this=table_name, quoted=True).update_positions(this) 743 744 return this 745 746 def _parse_table_parts( 747 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 748 ) -> exp.Table: 749 table = super()._parse_table_parts( 750 schema=schema, is_db_reference=is_db_reference, wildcard=True 751 ) 752 753 # proj-1.db.tbl -- `1.` is tokenized as a float so we need to unravel it here 754 if not table.catalog: 755 if table.db: 756 previous_db = table.args["db"] 757 parts = table.db.split(".") 758 if len(parts) == 2 and not table.args["db"].quoted: 759 table.set( 760 "catalog", exp.Identifier(this=parts[0]).update_positions(previous_db) 761 ) 762 table.set("db", exp.Identifier(this=parts[1]).update_positions(previous_db)) 763 else: 764 previous_this = table.this 765 parts = table.name.split(".") 766 if len(parts) == 2 and not table.this.quoted: 767 table.set( 768 "db", exp.Identifier(this=parts[0]).update_positions(previous_this) 769 ) 770 table.set( 771 "this", exp.Identifier(this=parts[1]).update_positions(previous_this) 772 ) 773 774 if isinstance(table.this, exp.Identifier) and any("." in p.name for p in table.parts): 775 alias = table.this 776 catalog, db, this, *rest = ( 777 exp.to_identifier(p, quoted=True) 778 for p in split_num_words(".".join(p.name for p in table.parts), ".", 3) 779 ) 780 781 for part in (catalog, db, this): 782 if part: 783 part.update_positions(table.this) 784 785 if rest and this: 786 this = exp.Dot.build([this, *rest]) # type: ignore 787 788 table = exp.Table( 789 this=this, db=db, catalog=catalog, pivots=table.args.get("pivots") 790 ) 791 table.meta["quoted_table"] = True 792 else: 793 alias = None 794 795 # The `INFORMATION_SCHEMA` views in BigQuery need to be qualified by a region or 796 # dataset, so if the project identifier is omitted we need to fix the ast so that 797 # the `INFORMATION_SCHEMA.X` bit is represented as a single (quoted) Identifier. 798 # Otherwise, we wouldn't correctly qualify a `Table` node that references these 799 # views, because it would seem like the "catalog" part is set, when it'd actually 800 # be the region/dataset. Merging the two identifiers into a single one is done to 801 # avoid producing a 4-part Table reference, which would cause issues in the schema 802 # module, when there are 3-part table names mixed with information schema views. 803 # 804 # See: https://cloud.google.com/bigquery/docs/information-schema-intro#syntax 805 table_parts = table.parts 806 if len(table_parts) > 1 and table_parts[-2].name.upper() == "INFORMATION_SCHEMA": 807 # We need to alias the table here to avoid breaking existing qualified columns. 808 # This is expected to be safe, because if there's an actual alias coming up in 809 # the token stream, it will overwrite this one. If there isn't one, we are only 810 # exposing the name that can be used to reference the view explicitly (a no-op). 811 exp.alias_( 812 table, 813 t.cast(exp.Identifier, alias or table_parts[-1]), 814 table=True, 815 copy=False, 816 ) 817 818 info_schema_view = f"{table_parts[-2].name}.{table_parts[-1].name}" 819 new_this = exp.Identifier(this=info_schema_view, quoted=True).update_positions( 820 line=table_parts[-2].meta.get("line"), 821 col=table_parts[-1].meta.get("col"), 822 start=table_parts[-2].meta.get("start"), 823 end=table_parts[-1].meta.get("end"), 824 ) 825 table.set("this", new_this) 826 table.set("db", seq_get(table_parts, -3)) 827 table.set("catalog", seq_get(table_parts, -4)) 828 829 return table 830 831 def _parse_column(self) -> t.Optional[exp.Expression]: 832 column = super()._parse_column() 833 if isinstance(column, exp.Column): 834 parts = column.parts 835 if any("." in p.name for p in parts): 836 catalog, db, table, this, *rest = ( 837 exp.to_identifier(p, quoted=True) 838 for p in split_num_words(".".join(p.name for p in parts), ".", 4) 839 ) 840 841 if rest and this: 842 this = exp.Dot.build([this, *rest]) # type: ignore 843 844 column = exp.Column(this=this, table=table, db=db, catalog=catalog) 845 column.meta["quoted_column"] = True 846 847 return column 848 849 @t.overload 850 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 851 852 @t.overload 853 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 854 855 def _parse_json_object(self, agg=False): 856 json_object = super()._parse_json_object() 857 array_kv_pair = seq_get(json_object.expressions, 0) 858 859 # Converts BQ's "signature 2" of JSON_OBJECT into SQLGlot's canonical representation 860 # https://cloud.google.com/bigquery/docs/reference/standard-sql/json_functions#json_object_signature2 861 if ( 862 array_kv_pair 863 and isinstance(array_kv_pair.this, exp.Array) 864 and isinstance(array_kv_pair.expression, exp.Array) 865 ): 866 keys = array_kv_pair.this.expressions 867 values = array_kv_pair.expression.expressions 868 869 json_object.set( 870 "expressions", 871 [exp.JSONKeyValue(this=k, expression=v) for k, v in zip(keys, values)], 872 ) 873 874 return json_object 875 876 def _parse_bracket( 877 self, this: t.Optional[exp.Expression] = None 878 ) -> t.Optional[exp.Expression]: 879 bracket = super()._parse_bracket(this) 880 881 if this is bracket: 882 return bracket 883 884 if isinstance(bracket, exp.Bracket): 885 for expression in bracket.expressions: 886 name = expression.name.upper() 887 888 if name not in self.BRACKET_OFFSETS: 889 break 890 891 offset, safe = self.BRACKET_OFFSETS[name] 892 bracket.set("offset", offset) 893 bracket.set("safe", safe) 894 expression.replace(expression.expressions[0]) 895 896 return bracket 897 898 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 899 unnest = super()._parse_unnest(with_alias=with_alias) 900 901 if not unnest: 902 return None 903 904 unnest_expr = seq_get(unnest.expressions, 0) 905 if unnest_expr: 906 from sqlglot.optimizer.annotate_types import annotate_types 907 908 unnest_expr = annotate_types(unnest_expr, dialect=self.dialect) 909 910 # Unnesting a nested array (i.e array of structs) explodes the top-level struct fields, 911 # in contrast to other dialects such as DuckDB which flattens only the array by default 912 if unnest_expr.is_type(exp.DataType.Type.ARRAY) and any( 913 array_elem.is_type(exp.DataType.Type.STRUCT) 914 for array_elem in unnest_expr._type.expressions 915 ): 916 unnest.set("explode_array", True) 917 918 return unnest 919 920 def _parse_make_interval(self) -> exp.MakeInterval: 921 expr = exp.MakeInterval() 922 923 for arg_key in expr.arg_types: 924 value = self._parse_lambda() 925 926 if not value: 927 break 928 929 # Non-named arguments are filled sequentially, (optionally) followed by named arguments 930 # that can appear in any order e.g MAKE_INTERVAL(1, minute => 5, day => 2) 931 if isinstance(value, exp.Kwarg): 932 arg_key = value.this.name 933 934 expr.set(arg_key, value) 935 936 self._match(TokenType.COMMA) 937 938 return expr 939 940 def _parse_features_at_time(self) -> exp.FeaturesAtTime: 941 expr = self.expression( 942 exp.FeaturesAtTime, 943 this=(self._match(TokenType.TABLE) and self._parse_table()) 944 or self._parse_select(nested=True), 945 ) 946 947 while self._match(TokenType.COMMA): 948 arg = self._parse_lambda() 949 950 # Get the LHS of the Kwarg and set the arg to that value, e.g 951 # "num_rows => 1" sets the expr's `num_rows` arg 952 if arg: 953 expr.set(arg.this.name, arg) 954 955 return expr 956 957 def _parse_export_data(self) -> exp.Export: 958 self._match_text_seq("DATA") 959 960 return self.expression( 961 exp.Export, 962 connection=self._match_text_seq("WITH", "CONNECTION") and self._parse_table_parts(), 963 options=self._parse_properties(), 964 this=self._match_text_seq("AS") and self._parse_select(), 965 ) 966 967 class Generator(generator.Generator): 968 INTERVAL_ALLOWS_PLURAL_FORM = False 969 JOIN_HINTS = False 970 QUERY_HINTS = False 971 TABLE_HINTS = False 972 LIMIT_FETCH = "LIMIT" 973 RENAME_TABLE_WITH_DB = False 974 NVL2_SUPPORTED = False 975 UNNEST_WITH_ORDINALITY = False 976 COLLATE_IS_FUNC = True 977 LIMIT_ONLY_LITERALS = True 978 SUPPORTS_TABLE_ALIAS_COLUMNS = False 979 UNPIVOT_ALIASES_ARE_IDENTIFIERS = False 980 JSON_KEY_VALUE_PAIR_SEP = "," 981 NULL_ORDERING_SUPPORTED = False 982 IGNORE_NULLS_IN_FUNC = True 983 JSON_PATH_SINGLE_QUOTE_ESCAPE = True 984 CAN_IMPLEMENT_ARRAY_ANY = True 985 SUPPORTS_TO_NUMBER = False 986 NAMED_PLACEHOLDER_TOKEN = "@" 987 HEX_FUNC = "TO_HEX" 988 WITH_PROPERTIES_PREFIX = "OPTIONS" 989 SUPPORTS_EXPLODING_PROJECTIONS = False 990 EXCEPT_INTERSECT_SUPPORT_ALL_CLAUSE = False 991 SUPPORTS_UNIX_SECONDS = True 992 993 TRANSFORMS = { 994 **generator.Generator.TRANSFORMS, 995 exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"), 996 exp.ArgMax: arg_max_or_min_no_count("MAX_BY"), 997 exp.ArgMin: arg_max_or_min_no_count("MIN_BY"), 998 exp.Array: inline_array_unless_query, 999 exp.ArrayContains: _array_contains_sql, 1000 exp.ArrayFilter: filter_array_using_unnest, 1001 exp.ArrayRemove: filter_array_using_unnest, 1002 exp.Cast: transforms.preprocess([transforms.remove_precision_parameterized_types]), 1003 exp.CollateProperty: lambda self, e: ( 1004 f"DEFAULT COLLATE {self.sql(e, 'this')}" 1005 if e.args.get("default") 1006 else f"COLLATE {self.sql(e, 'this')}" 1007 ), 1008 exp.Commit: lambda *_: "COMMIT TRANSACTION", 1009 exp.CountIf: rename_func("COUNTIF"), 1010 exp.Create: _create_sql, 1011 exp.CTE: transforms.preprocess([_pushdown_cte_column_names]), 1012 exp.DateAdd: date_add_interval_sql("DATE", "ADD"), 1013 exp.DateDiff: lambda self, e: self.func( 1014 "DATE_DIFF", e.this, e.expression, unit_to_var(e) 1015 ), 1016 exp.DateFromParts: rename_func("DATE"), 1017 exp.DateStrToDate: datestrtodate_sql, 1018 exp.DateSub: date_add_interval_sql("DATE", "SUB"), 1019 exp.DatetimeAdd: date_add_interval_sql("DATETIME", "ADD"), 1020 exp.DatetimeSub: date_add_interval_sql("DATETIME", "SUB"), 1021 exp.FromTimeZone: lambda self, e: self.func( 1022 "DATETIME", self.func("TIMESTAMP", e.this, e.args.get("zone")), "'UTC'" 1023 ), 1024 exp.GenerateSeries: rename_func("GENERATE_ARRAY"), 1025 exp.GroupConcat: lambda self, e: groupconcat_sql( 1026 self, e, func_name="STRING_AGG", within_group=False 1027 ), 1028 exp.Hex: lambda self, e: self.func("UPPER", self.func("TO_HEX", self.sql(e, "this"))), 1029 exp.HexString: lambda self, e: self.hexstring_sql(e, binary_function_repr="FROM_HEX"), 1030 exp.If: if_sql(false_value="NULL"), 1031 exp.ILike: no_ilike_sql, 1032 exp.IntDiv: rename_func("DIV"), 1033 exp.Int64: rename_func("INT64"), 1034 exp.JSONExtract: _json_extract_sql, 1035 exp.JSONExtractArray: _json_extract_sql, 1036 exp.JSONExtractScalar: _json_extract_sql, 1037 exp.JSONFormat: rename_func("TO_JSON_STRING"), 1038 exp.Levenshtein: _levenshtein_sql, 1039 exp.Max: max_or_greatest, 1040 exp.MD5: lambda self, e: self.func("TO_HEX", self.func("MD5", e.this)), 1041 exp.MD5Digest: rename_func("MD5"), 1042 exp.Min: min_or_least, 1043 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 1044 exp.RegexpExtract: lambda self, e: self.func( 1045 "REGEXP_EXTRACT", 1046 e.this, 1047 e.expression, 1048 e.args.get("position"), 1049 e.args.get("occurrence"), 1050 ), 1051 exp.RegexpExtractAll: lambda self, e: self.func( 1052 "REGEXP_EXTRACT_ALL", e.this, e.expression 1053 ), 1054 exp.RegexpReplace: regexp_replace_sql, 1055 exp.RegexpLike: rename_func("REGEXP_CONTAINS"), 1056 exp.ReturnsProperty: _returnsproperty_sql, 1057 exp.Rollback: lambda *_: "ROLLBACK TRANSACTION", 1058 exp.Select: transforms.preprocess( 1059 [ 1060 transforms.explode_projection_to_unnest(), 1061 transforms.unqualify_unnest, 1062 transforms.eliminate_distinct_on, 1063 _alias_ordered_group, 1064 transforms.eliminate_semi_and_anti_joins, 1065 ] 1066 ), 1067 exp.SHA: rename_func("SHA1"), 1068 exp.SHA2: sha256_sql, 1069 exp.Space: space_sql, 1070 exp.StabilityProperty: lambda self, e: ( 1071 "DETERMINISTIC" if e.name == "IMMUTABLE" else "NOT DETERMINISTIC" 1072 ), 1073 exp.String: rename_func("STRING"), 1074 exp.StrPosition: lambda self, e: ( 1075 strposition_sql( 1076 self, e, func_name="INSTR", supports_position=True, supports_occurrence=True 1077 ) 1078 ), 1079 exp.StrToDate: _str_to_datetime_sql, 1080 exp.StrToTime: _str_to_datetime_sql, 1081 exp.TimeAdd: date_add_interval_sql("TIME", "ADD"), 1082 exp.TimeFromParts: rename_func("TIME"), 1083 exp.TimestampFromParts: rename_func("DATETIME"), 1084 exp.TimeSub: date_add_interval_sql("TIME", "SUB"), 1085 exp.TimestampAdd: date_add_interval_sql("TIMESTAMP", "ADD"), 1086 exp.TimestampDiff: rename_func("TIMESTAMP_DIFF"), 1087 exp.TimestampSub: date_add_interval_sql("TIMESTAMP", "SUB"), 1088 exp.TimeStrToTime: timestrtotime_sql, 1089 exp.Transaction: lambda *_: "BEGIN TRANSACTION", 1090 exp.TsOrDsAdd: _ts_or_ds_add_sql, 1091 exp.TsOrDsDiff: _ts_or_ds_diff_sql, 1092 exp.TsOrDsToTime: rename_func("TIME"), 1093 exp.TsOrDsToDatetime: rename_func("DATETIME"), 1094 exp.TsOrDsToTimestamp: rename_func("TIMESTAMP"), 1095 exp.Unhex: rename_func("FROM_HEX"), 1096 exp.UnixDate: rename_func("UNIX_DATE"), 1097 exp.UnixToTime: _unix_to_time_sql, 1098 exp.Uuid: lambda *_: "GENERATE_UUID()", 1099 exp.Values: _derived_table_values_to_unnest, 1100 exp.VariancePop: rename_func("VAR_POP"), 1101 exp.SafeDivide: rename_func("SAFE_DIVIDE"), 1102 } 1103 1104 SUPPORTED_JSON_PATH_PARTS = { 1105 exp.JSONPathKey, 1106 exp.JSONPathRoot, 1107 exp.JSONPathSubscript, 1108 } 1109 1110 TYPE_MAPPING = { 1111 **generator.Generator.TYPE_MAPPING, 1112 exp.DataType.Type.BIGDECIMAL: "BIGNUMERIC", 1113 exp.DataType.Type.BIGINT: "INT64", 1114 exp.DataType.Type.BINARY: "BYTES", 1115 exp.DataType.Type.BLOB: "BYTES", 1116 exp.DataType.Type.BOOLEAN: "BOOL", 1117 exp.DataType.Type.CHAR: "STRING", 1118 exp.DataType.Type.DECIMAL: "NUMERIC", 1119 exp.DataType.Type.DOUBLE: "FLOAT64", 1120 exp.DataType.Type.FLOAT: "FLOAT64", 1121 exp.DataType.Type.INT: "INT64", 1122 exp.DataType.Type.NCHAR: "STRING", 1123 exp.DataType.Type.NVARCHAR: "STRING", 1124 exp.DataType.Type.SMALLINT: "INT64", 1125 exp.DataType.Type.TEXT: "STRING", 1126 exp.DataType.Type.TIMESTAMP: "DATETIME", 1127 exp.DataType.Type.TIMESTAMPNTZ: "DATETIME", 1128 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 1129 exp.DataType.Type.TIMESTAMPLTZ: "TIMESTAMP", 1130 exp.DataType.Type.TINYINT: "INT64", 1131 exp.DataType.Type.ROWVERSION: "BYTES", 1132 exp.DataType.Type.UUID: "STRING", 1133 exp.DataType.Type.VARBINARY: "BYTES", 1134 exp.DataType.Type.VARCHAR: "STRING", 1135 exp.DataType.Type.VARIANT: "ANY TYPE", 1136 } 1137 1138 PROPERTIES_LOCATION = { 1139 **generator.Generator.PROPERTIES_LOCATION, 1140 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 1141 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 1142 } 1143 1144 # WINDOW comes after QUALIFY 1145 # https://cloud.google.com/bigquery/docs/reference/standard-sql/query-syntax#window_clause 1146 AFTER_HAVING_MODIFIER_TRANSFORMS = { 1147 "qualify": generator.Generator.AFTER_HAVING_MODIFIER_TRANSFORMS["qualify"], 1148 "windows": generator.Generator.AFTER_HAVING_MODIFIER_TRANSFORMS["windows"], 1149 } 1150 1151 # from: https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#reserved_keywords 1152 RESERVED_KEYWORDS = { 1153 "all", 1154 "and", 1155 "any", 1156 "array", 1157 "as", 1158 "asc", 1159 "assert_rows_modified", 1160 "at", 1161 "between", 1162 "by", 1163 "case", 1164 "cast", 1165 "collate", 1166 "contains", 1167 "create", 1168 "cross", 1169 "cube", 1170 "current", 1171 "default", 1172 "define", 1173 "desc", 1174 "distinct", 1175 "else", 1176 "end", 1177 "enum", 1178 "escape", 1179 "except", 1180 "exclude", 1181 "exists", 1182 "extract", 1183 "false", 1184 "fetch", 1185 "following", 1186 "for", 1187 "from", 1188 "full", 1189 "group", 1190 "grouping", 1191 "groups", 1192 "hash", 1193 "having", 1194 "if", 1195 "ignore", 1196 "in", 1197 "inner", 1198 "intersect", 1199 "interval", 1200 "into", 1201 "is", 1202 "join", 1203 "lateral", 1204 "left", 1205 "like", 1206 "limit", 1207 "lookup", 1208 "merge", 1209 "natural", 1210 "new", 1211 "no", 1212 "not", 1213 "null", 1214 "nulls", 1215 "of", 1216 "on", 1217 "or", 1218 "order", 1219 "outer", 1220 "over", 1221 "partition", 1222 "preceding", 1223 "proto", 1224 "qualify", 1225 "range", 1226 "recursive", 1227 "respect", 1228 "right", 1229 "rollup", 1230 "rows", 1231 "select", 1232 "set", 1233 "some", 1234 "struct", 1235 "tablesample", 1236 "then", 1237 "to", 1238 "treat", 1239 "true", 1240 "unbounded", 1241 "union", 1242 "unnest", 1243 "using", 1244 "when", 1245 "where", 1246 "window", 1247 "with", 1248 "within", 1249 } 1250 1251 def datetrunc_sql(self, expression: exp.DateTrunc) -> str: 1252 unit = expression.unit 1253 unit_sql = unit.name if unit.is_string else self.sql(unit) 1254 return self.func("DATE_TRUNC", expression.this, unit_sql, expression.args.get("zone")) 1255 1256 def mod_sql(self, expression: exp.Mod) -> str: 1257 this = expression.this 1258 expr = expression.expression 1259 return self.func( 1260 "MOD", 1261 this.unnest() if isinstance(this, exp.Paren) else this, 1262 expr.unnest() if isinstance(expr, exp.Paren) else expr, 1263 ) 1264 1265 def column_parts(self, expression: exp.Column) -> str: 1266 if expression.meta.get("quoted_column"): 1267 # If a column reference is of the form `dataset.table`.name, we need 1268 # to preserve the quoted table path, otherwise the reference breaks 1269 table_parts = ".".join(p.name for p in expression.parts[:-1]) 1270 table_path = self.sql(exp.Identifier(this=table_parts, quoted=True)) 1271 return f"{table_path}.{self.sql(expression, 'this')}" 1272 1273 return super().column_parts(expression) 1274 1275 def table_parts(self, expression: exp.Table) -> str: 1276 # Depending on the context, `x.y` may not resolve to the same data source as `x`.`y`, so 1277 # we need to make sure the correct quoting is used in each case. 1278 # 1279 # For example, if there is a CTE x that clashes with a schema name, then the former will 1280 # return the table y in that schema, whereas the latter will return the CTE's y column: 1281 # 1282 # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x.y` -> cross join 1283 # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x`.`y` -> implicit unnest 1284 if expression.meta.get("quoted_table"): 1285 table_parts = ".".join(p.name for p in expression.parts) 1286 return self.sql(exp.Identifier(this=table_parts, quoted=True)) 1287 1288 return super().table_parts(expression) 1289 1290 def timetostr_sql(self, expression: exp.TimeToStr) -> str: 1291 this = expression.this 1292 if isinstance(this, exp.TsOrDsToDatetime): 1293 func_name = "FORMAT_DATETIME" 1294 elif isinstance(this, exp.TsOrDsToTimestamp): 1295 func_name = "FORMAT_TIMESTAMP" 1296 else: 1297 func_name = "FORMAT_DATE" 1298 1299 time_expr = ( 1300 this 1301 if isinstance(this, (exp.TsOrDsToDatetime, exp.TsOrDsToTimestamp, exp.TsOrDsToDate)) 1302 else expression 1303 ) 1304 return self.func( 1305 func_name, self.format_time(expression), time_expr.this, expression.args.get("zone") 1306 ) 1307 1308 def eq_sql(self, expression: exp.EQ) -> str: 1309 # Operands of = cannot be NULL in BigQuery 1310 if isinstance(expression.left, exp.Null) or isinstance(expression.right, exp.Null): 1311 if not isinstance(expression.parent, exp.Update): 1312 return "NULL" 1313 1314 return self.binary(expression, "=") 1315 1316 def attimezone_sql(self, expression: exp.AtTimeZone) -> str: 1317 parent = expression.parent 1318 1319 # BigQuery allows CAST(.. AS {STRING|TIMESTAMP} [FORMAT <fmt> [AT TIME ZONE <tz>]]). 1320 # Only the TIMESTAMP one should use the below conversion, when AT TIME ZONE is included. 1321 if not isinstance(parent, exp.Cast) or not parent.to.is_type("text"): 1322 return self.func( 1323 "TIMESTAMP", self.func("DATETIME", expression.this, expression.args.get("zone")) 1324 ) 1325 1326 return super().attimezone_sql(expression) 1327 1328 def trycast_sql(self, expression: exp.TryCast) -> str: 1329 return self.cast_sql(expression, safe_prefix="SAFE_") 1330 1331 def bracket_sql(self, expression: exp.Bracket) -> str: 1332 this = expression.this 1333 expressions = expression.expressions 1334 1335 if len(expressions) == 1 and this and this.is_type(exp.DataType.Type.STRUCT): 1336 arg = expressions[0] 1337 if arg.type is None: 1338 from sqlglot.optimizer.annotate_types import annotate_types 1339 1340 arg = annotate_types(arg, dialect=self.dialect) 1341 1342 if arg.type and arg.type.this in exp.DataType.TEXT_TYPES: 1343 # BQ doesn't support bracket syntax with string values for structs 1344 return f"{self.sql(this)}.{arg.name}" 1345 1346 expressions_sql = self.expressions(expression, flat=True) 1347 offset = expression.args.get("offset") 1348 1349 if offset == 0: 1350 expressions_sql = f"OFFSET({expressions_sql})" 1351 elif offset == 1: 1352 expressions_sql = f"ORDINAL({expressions_sql})" 1353 elif offset is not None: 1354 self.unsupported(f"Unsupported array offset: {offset}") 1355 1356 if expression.args.get("safe"): 1357 expressions_sql = f"SAFE_{expressions_sql}" 1358 1359 return f"{self.sql(this)}[{expressions_sql}]" 1360 1361 def in_unnest_op(self, expression: exp.Unnest) -> str: 1362 return self.sql(expression) 1363 1364 def version_sql(self, expression: exp.Version) -> str: 1365 if expression.name == "TIMESTAMP": 1366 expression.set("this", "SYSTEM_TIME") 1367 return super().version_sql(expression) 1368 1369 def contains_sql(self, expression: exp.Contains) -> str: 1370 this = expression.this 1371 expr = expression.expression 1372 1373 if isinstance(this, exp.Lower) and isinstance(expr, exp.Lower): 1374 this = this.this 1375 expr = expr.this 1376 1377 return self.func("CONTAINS_SUBSTR", this, expr) 1378 1379 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 1380 this = expression.this 1381 1382 # This ensures that inline type-annotated ARRAY literals like ARRAY<INT64>[1, 2, 3] 1383 # are roundtripped unaffected. The inner check excludes ARRAY(SELECT ...) expressions, 1384 # because they aren't literals and so the above syntax is invalid BigQuery. 1385 if isinstance(this, exp.Array): 1386 elem = seq_get(this.expressions, 0) 1387 if not (elem and elem.find(exp.Query)): 1388 return f"{self.sql(expression, 'to')}{self.sql(this)}" 1389 1390 return super().cast_sql(expression, safe_prefix=safe_prefix) 1391 1392 def declareitem_sql(self, expression: exp.DeclareItem) -> str: 1393 variables = self.expressions(expression, "this") 1394 default = self.sql(expression, "default") 1395 default = f" DEFAULT {default}" if default else "" 1396 kind = self.sql(expression, "kind") 1397 kind = f" {kind}" if kind else "" 1398 1399 return f"{variables}{kind}{default}"
394class BigQuery(Dialect): 395 WEEK_OFFSET = -1 396 UNNEST_COLUMN_ONLY = True 397 SUPPORTS_USER_DEFINED_TYPES = False 398 SUPPORTS_SEMI_ANTI_JOIN = False 399 LOG_BASE_FIRST = False 400 HEX_LOWERCASE = True 401 FORCE_EARLY_ALIAS_REF_EXPANSION = True 402 PRESERVE_ORIGINAL_NAMES = True 403 HEX_STRING_IS_INTEGER_TYPE = True 404 405 # https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#case_sensitivity 406 NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_INSENSITIVE 407 408 # bigquery udfs are case sensitive 409 NORMALIZE_FUNCTIONS = False 410 411 # https://cloud.google.com/bigquery/docs/reference/standard-sql/format-elements#format_elements_date_time 412 TIME_MAPPING = { 413 "%D": "%m/%d/%y", 414 "%E6S": "%S.%f", 415 "%e": "%-d", 416 } 417 418 FORMAT_MAPPING = { 419 "DD": "%d", 420 "MM": "%m", 421 "MON": "%b", 422 "MONTH": "%B", 423 "YYYY": "%Y", 424 "YY": "%y", 425 "HH": "%I", 426 "HH12": "%I", 427 "HH24": "%H", 428 "MI": "%M", 429 "SS": "%S", 430 "SSSSS": "%f", 431 "TZH": "%z", 432 } 433 434 # The _PARTITIONTIME and _PARTITIONDATE pseudo-columns are not returned by a SELECT * statement 435 # https://cloud.google.com/bigquery/docs/querying-partitioned-tables#query_an_ingestion-time_partitioned_table 436 PSEUDOCOLUMNS = {"_PARTITIONTIME", "_PARTITIONDATE"} 437 438 # All set operations require either a DISTINCT or ALL specifier 439 SET_OP_DISTINCT_BY_DEFAULT = dict.fromkeys((exp.Except, exp.Intersect, exp.Union), None) 440 441 # BigQuery maps Type.TIMESTAMP to DATETIME, so we need to amend the inferred types 442 TYPE_TO_EXPRESSIONS = { 443 **Dialect.TYPE_TO_EXPRESSIONS, 444 exp.DataType.Type.TIMESTAMPTZ: Dialect.TYPE_TO_EXPRESSIONS[exp.DataType.Type.TIMESTAMP], 445 } 446 TYPE_TO_EXPRESSIONS.pop(exp.DataType.Type.TIMESTAMP) 447 448 ANNOTATORS = { 449 **Dialect.ANNOTATORS, 450 **{ 451 expr_type: annotate_with_type_lambda(data_type) 452 for data_type, expressions in TYPE_TO_EXPRESSIONS.items() 453 for expr_type in expressions 454 }, 455 **{ 456 expr_type: lambda self, e: _annotate_math_functions(self, e) 457 for expr_type in (exp.Floor, exp.Ceil, exp.Log, exp.Ln, exp.Sqrt, exp.Exp, exp.Round) 458 }, 459 **{ 460 expr_type: lambda self, e: self._annotate_by_args(e, "this") 461 for expr_type in ( 462 exp.Left, 463 exp.Right, 464 exp.Lower, 465 exp.Upper, 466 exp.Pad, 467 exp.Trim, 468 exp.RegexpExtract, 469 exp.RegexpReplace, 470 exp.Repeat, 471 exp.Substring, 472 ) 473 }, 474 exp.Array: _annotate_array, 475 exp.ArrayConcat: lambda self, e: self._annotate_by_args(e, "this", "expressions"), 476 exp.Ascii: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.BIGINT), 477 exp.BitwiseAndAgg: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.BIGINT), 478 exp.BitwiseOrAgg: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.BIGINT), 479 exp.BitwiseXorAgg: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.BIGINT), 480 exp.BitwiseCountAgg: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.BIGINT), 481 exp.Concat: _annotate_concat, 482 exp.Corr: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.DOUBLE), 483 exp.CovarPop: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.DOUBLE), 484 exp.CovarSamp: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.DOUBLE), 485 exp.JSONArray: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.JSON), 486 exp.JSONExtractScalar: lambda self, e: self._annotate_with_type( 487 e, exp.DataType.Type.VARCHAR 488 ), 489 exp.JSONValueArray: lambda self, e: self._annotate_with_type( 490 e, exp.DataType.build("ARRAY<VARCHAR>") 491 ), 492 exp.JSONType: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.VARCHAR), 493 exp.Lag: lambda self, e: self._annotate_by_args(e, "this", "default"), 494 exp.SHA: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.BINARY), 495 exp.SHA2: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.BINARY), 496 exp.Sign: lambda self, e: self._annotate_by_args(e, "this"), 497 exp.Split: lambda self, e: self._annotate_by_args(e, "this", array=True), 498 exp.TimestampFromParts: lambda self, e: self._annotate_with_type( 499 e, exp.DataType.Type.DATETIME 500 ), 501 exp.Unicode: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.BIGINT), 502 } 503 504 def normalize_identifier(self, expression: E) -> E: 505 if ( 506 isinstance(expression, exp.Identifier) 507 and self.normalization_strategy is NormalizationStrategy.CASE_INSENSITIVE 508 ): 509 parent = expression.parent 510 while isinstance(parent, exp.Dot): 511 parent = parent.parent 512 513 # In BigQuery, CTEs are case-insensitive, but UDF and table names are case-sensitive 514 # by default. The following check uses a heuristic to detect tables based on whether 515 # they are qualified. This should generally be correct, because tables in BigQuery 516 # must be qualified with at least a dataset, unless @@dataset_id is set. 517 case_sensitive = ( 518 isinstance(parent, exp.UserDefinedFunction) 519 or ( 520 isinstance(parent, exp.Table) 521 and parent.db 522 and (parent.meta.get("quoted_table") or not parent.meta.get("maybe_column")) 523 ) 524 or expression.meta.get("is_table") 525 ) 526 if not case_sensitive: 527 expression.set("this", expression.this.lower()) 528 529 return t.cast(E, expression) 530 531 return super().normalize_identifier(expression) 532 533 class Tokenizer(tokens.Tokenizer): 534 QUOTES = ["'", '"', '"""', "'''"] 535 COMMENTS = ["--", "#", ("/*", "*/")] 536 IDENTIFIERS = ["`"] 537 STRING_ESCAPES = ["\\"] 538 539 HEX_STRINGS = [("0x", ""), ("0X", "")] 540 541 BYTE_STRINGS = [ 542 (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("b", "B") 543 ] 544 545 RAW_STRINGS = [ 546 (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("r", "R") 547 ] 548 549 NESTED_COMMENTS = False 550 551 KEYWORDS = { 552 **tokens.Tokenizer.KEYWORDS, 553 "ANY TYPE": TokenType.VARIANT, 554 "BEGIN": TokenType.COMMAND, 555 "BEGIN TRANSACTION": TokenType.BEGIN, 556 "BYTEINT": TokenType.INT, 557 "BYTES": TokenType.BINARY, 558 "CURRENT_DATETIME": TokenType.CURRENT_DATETIME, 559 "DATETIME": TokenType.TIMESTAMP, 560 "DECLARE": TokenType.DECLARE, 561 "ELSEIF": TokenType.COMMAND, 562 "EXCEPTION": TokenType.COMMAND, 563 "EXPORT": TokenType.EXPORT, 564 "FLOAT64": TokenType.DOUBLE, 565 "FOR SYSTEM_TIME": TokenType.TIMESTAMP_SNAPSHOT, 566 "MODEL": TokenType.MODEL, 567 "NOT DETERMINISTIC": TokenType.VOLATILE, 568 "RECORD": TokenType.STRUCT, 569 "TIMESTAMP": TokenType.TIMESTAMPTZ, 570 } 571 KEYWORDS.pop("DIV") 572 KEYWORDS.pop("VALUES") 573 KEYWORDS.pop("/*+") 574 575 class Parser(parser.Parser): 576 PREFIXED_PIVOT_COLUMNS = True 577 LOG_DEFAULTS_TO_LN = True 578 SUPPORTS_IMPLICIT_UNNEST = True 579 JOINS_HAVE_EQUAL_PRECEDENCE = True 580 581 # BigQuery does not allow ASC/DESC to be used as an identifier 582 ID_VAR_TOKENS = parser.Parser.ID_VAR_TOKENS - {TokenType.ASC, TokenType.DESC} 583 ALIAS_TOKENS = parser.Parser.ALIAS_TOKENS - {TokenType.ASC, TokenType.DESC} 584 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS - {TokenType.ASC, TokenType.DESC} 585 COMMENT_TABLE_ALIAS_TOKENS = parser.Parser.COMMENT_TABLE_ALIAS_TOKENS - { 586 TokenType.ASC, 587 TokenType.DESC, 588 } 589 UPDATE_ALIAS_TOKENS = parser.Parser.UPDATE_ALIAS_TOKENS - {TokenType.ASC, TokenType.DESC} 590 591 FUNCTIONS = { 592 **parser.Parser.FUNCTIONS, 593 "CONTAINS_SUBSTR": _build_contains_substring, 594 "DATE": _build_date, 595 "DATE_ADD": build_date_delta_with_interval(exp.DateAdd), 596 "DATE_SUB": build_date_delta_with_interval(exp.DateSub), 597 "DATE_TRUNC": lambda args: exp.DateTrunc( 598 unit=seq_get(args, 1), 599 this=seq_get(args, 0), 600 zone=seq_get(args, 2), 601 ), 602 "DATETIME": _build_datetime, 603 "DATETIME_ADD": build_date_delta_with_interval(exp.DatetimeAdd), 604 "DATETIME_SUB": build_date_delta_with_interval(exp.DatetimeSub), 605 "DIV": binary_from_function(exp.IntDiv), 606 "EDIT_DISTANCE": _build_levenshtein, 607 "FORMAT_DATE": _build_format_time(exp.TsOrDsToDate), 608 "GENERATE_ARRAY": exp.GenerateSeries.from_arg_list, 609 "JSON_EXTRACT_SCALAR": _build_extract_json_with_default_path(exp.JSONExtractScalar), 610 "JSON_EXTRACT_ARRAY": _build_extract_json_with_default_path(exp.JSONExtractArray), 611 "JSON_QUERY": parser.build_extract_json_with_path(exp.JSONExtract), 612 "JSON_QUERY_ARRAY": _build_extract_json_with_default_path(exp.JSONExtractArray), 613 "JSON_VALUE": _build_extract_json_with_default_path(exp.JSONExtractScalar), 614 "JSON_VALUE_ARRAY": _build_extract_json_with_default_path(exp.JSONValueArray), 615 "LENGTH": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 616 "MD5": exp.MD5Digest.from_arg_list, 617 "TO_HEX": _build_to_hex, 618 "PARSE_DATE": lambda args: build_formatted_time(exp.StrToDate, "bigquery")( 619 [seq_get(args, 1), seq_get(args, 0)] 620 ), 621 "PARSE_TIMESTAMP": _build_parse_timestamp, 622 "REGEXP_CONTAINS": exp.RegexpLike.from_arg_list, 623 "REGEXP_EXTRACT": _build_regexp_extract(exp.RegexpExtract), 624 "REGEXP_SUBSTR": _build_regexp_extract(exp.RegexpExtract), 625 "REGEXP_EXTRACT_ALL": _build_regexp_extract( 626 exp.RegexpExtractAll, default_group=exp.Literal.number(0) 627 ), 628 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 629 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 630 "SPLIT": lambda args: exp.Split( 631 # https://cloud.google.com/bigquery/docs/reference/standard-sql/string_functions#split 632 this=seq_get(args, 0), 633 expression=seq_get(args, 1) or exp.Literal.string(","), 634 ), 635 "STRPOS": exp.StrPosition.from_arg_list, 636 "TIME": _build_time, 637 "TIME_ADD": build_date_delta_with_interval(exp.TimeAdd), 638 "TIME_SUB": build_date_delta_with_interval(exp.TimeSub), 639 "TIMESTAMP": _build_timestamp, 640 "TIMESTAMP_ADD": build_date_delta_with_interval(exp.TimestampAdd), 641 "TIMESTAMP_SUB": build_date_delta_with_interval(exp.TimestampSub), 642 "TIMESTAMP_MICROS": lambda args: exp.UnixToTime( 643 this=seq_get(args, 0), scale=exp.UnixToTime.MICROS 644 ), 645 "TIMESTAMP_MILLIS": lambda args: exp.UnixToTime( 646 this=seq_get(args, 0), scale=exp.UnixToTime.MILLIS 647 ), 648 "TIMESTAMP_SECONDS": lambda args: exp.UnixToTime(this=seq_get(args, 0)), 649 "TO_JSON_STRING": exp.JSONFormat.from_arg_list, 650 "FORMAT_DATETIME": _build_format_time(exp.TsOrDsToDatetime), 651 "FORMAT_TIMESTAMP": _build_format_time(exp.TsOrDsToTimestamp), 652 } 653 654 FUNCTION_PARSERS = { 655 **parser.Parser.FUNCTION_PARSERS, 656 "ARRAY": lambda self: self.expression(exp.Array, expressions=[self._parse_statement()]), 657 "JSON_ARRAY": lambda self: self.expression( 658 exp.JSONArray, expressions=self._parse_csv(self._parse_bitwise) 659 ), 660 "MAKE_INTERVAL": lambda self: self._parse_make_interval(), 661 "FEATURES_AT_TIME": lambda self: self._parse_features_at_time(), 662 } 663 FUNCTION_PARSERS.pop("TRIM") 664 665 NO_PAREN_FUNCTIONS = { 666 **parser.Parser.NO_PAREN_FUNCTIONS, 667 TokenType.CURRENT_DATETIME: exp.CurrentDatetime, 668 } 669 670 NESTED_TYPE_TOKENS = { 671 *parser.Parser.NESTED_TYPE_TOKENS, 672 TokenType.TABLE, 673 } 674 675 PROPERTY_PARSERS = { 676 **parser.Parser.PROPERTY_PARSERS, 677 "NOT DETERMINISTIC": lambda self: self.expression( 678 exp.StabilityProperty, this=exp.Literal.string("VOLATILE") 679 ), 680 "OPTIONS": lambda self: self._parse_with_property(), 681 } 682 683 CONSTRAINT_PARSERS = { 684 **parser.Parser.CONSTRAINT_PARSERS, 685 "OPTIONS": lambda self: exp.Properties(expressions=self._parse_with_property()), 686 } 687 688 RANGE_PARSERS = parser.Parser.RANGE_PARSERS.copy() 689 RANGE_PARSERS.pop(TokenType.OVERLAPS) 690 691 NULL_TOKENS = {TokenType.NULL, TokenType.UNKNOWN} 692 693 DASHED_TABLE_PART_FOLLOW_TOKENS = {TokenType.DOT, TokenType.L_PAREN, TokenType.R_PAREN} 694 695 STATEMENT_PARSERS = { 696 **parser.Parser.STATEMENT_PARSERS, 697 TokenType.ELSE: lambda self: self._parse_as_command(self._prev), 698 TokenType.END: lambda self: self._parse_as_command(self._prev), 699 TokenType.FOR: lambda self: self._parse_for_in(), 700 TokenType.EXPORT: lambda self: self._parse_export_data(), 701 TokenType.DECLARE: lambda self: self._parse_declare(), 702 } 703 704 BRACKET_OFFSETS = { 705 "OFFSET": (0, False), 706 "ORDINAL": (1, False), 707 "SAFE_OFFSET": (0, True), 708 "SAFE_ORDINAL": (1, True), 709 } 710 711 def _parse_for_in(self) -> exp.ForIn: 712 this = self._parse_range() 713 self._match_text_seq("DO") 714 return self.expression(exp.ForIn, this=this, expression=self._parse_statement()) 715 716 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 717 this = super()._parse_table_part(schema=schema) or self._parse_number() 718 719 # https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#table_names 720 if isinstance(this, exp.Identifier): 721 table_name = this.name 722 while self._match(TokenType.DASH, advance=False) and self._next: 723 start = self._curr 724 while self._is_connected() and not self._match_set( 725 self.DASHED_TABLE_PART_FOLLOW_TOKENS, advance=False 726 ): 727 self._advance() 728 729 if start == self._curr: 730 break 731 732 table_name += self._find_sql(start, self._prev) 733 734 this = exp.Identifier( 735 this=table_name, quoted=this.args.get("quoted") 736 ).update_positions(this) 737 elif isinstance(this, exp.Literal): 738 table_name = this.name 739 740 if self._is_connected() and self._parse_var(any_token=True): 741 table_name += self._prev.text 742 743 this = exp.Identifier(this=table_name, quoted=True).update_positions(this) 744 745 return this 746 747 def _parse_table_parts( 748 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 749 ) -> exp.Table: 750 table = super()._parse_table_parts( 751 schema=schema, is_db_reference=is_db_reference, wildcard=True 752 ) 753 754 # proj-1.db.tbl -- `1.` is tokenized as a float so we need to unravel it here 755 if not table.catalog: 756 if table.db: 757 previous_db = table.args["db"] 758 parts = table.db.split(".") 759 if len(parts) == 2 and not table.args["db"].quoted: 760 table.set( 761 "catalog", exp.Identifier(this=parts[0]).update_positions(previous_db) 762 ) 763 table.set("db", exp.Identifier(this=parts[1]).update_positions(previous_db)) 764 else: 765 previous_this = table.this 766 parts = table.name.split(".") 767 if len(parts) == 2 and not table.this.quoted: 768 table.set( 769 "db", exp.Identifier(this=parts[0]).update_positions(previous_this) 770 ) 771 table.set( 772 "this", exp.Identifier(this=parts[1]).update_positions(previous_this) 773 ) 774 775 if isinstance(table.this, exp.Identifier) and any("." in p.name for p in table.parts): 776 alias = table.this 777 catalog, db, this, *rest = ( 778 exp.to_identifier(p, quoted=True) 779 for p in split_num_words(".".join(p.name for p in table.parts), ".", 3) 780 ) 781 782 for part in (catalog, db, this): 783 if part: 784 part.update_positions(table.this) 785 786 if rest and this: 787 this = exp.Dot.build([this, *rest]) # type: ignore 788 789 table = exp.Table( 790 this=this, db=db, catalog=catalog, pivots=table.args.get("pivots") 791 ) 792 table.meta["quoted_table"] = True 793 else: 794 alias = None 795 796 # The `INFORMATION_SCHEMA` views in BigQuery need to be qualified by a region or 797 # dataset, so if the project identifier is omitted we need to fix the ast so that 798 # the `INFORMATION_SCHEMA.X` bit is represented as a single (quoted) Identifier. 799 # Otherwise, we wouldn't correctly qualify a `Table` node that references these 800 # views, because it would seem like the "catalog" part is set, when it'd actually 801 # be the region/dataset. Merging the two identifiers into a single one is done to 802 # avoid producing a 4-part Table reference, which would cause issues in the schema 803 # module, when there are 3-part table names mixed with information schema views. 804 # 805 # See: https://cloud.google.com/bigquery/docs/information-schema-intro#syntax 806 table_parts = table.parts 807 if len(table_parts) > 1 and table_parts[-2].name.upper() == "INFORMATION_SCHEMA": 808 # We need to alias the table here to avoid breaking existing qualified columns. 809 # This is expected to be safe, because if there's an actual alias coming up in 810 # the token stream, it will overwrite this one. If there isn't one, we are only 811 # exposing the name that can be used to reference the view explicitly (a no-op). 812 exp.alias_( 813 table, 814 t.cast(exp.Identifier, alias or table_parts[-1]), 815 table=True, 816 copy=False, 817 ) 818 819 info_schema_view = f"{table_parts[-2].name}.{table_parts[-1].name}" 820 new_this = exp.Identifier(this=info_schema_view, quoted=True).update_positions( 821 line=table_parts[-2].meta.get("line"), 822 col=table_parts[-1].meta.get("col"), 823 start=table_parts[-2].meta.get("start"), 824 end=table_parts[-1].meta.get("end"), 825 ) 826 table.set("this", new_this) 827 table.set("db", seq_get(table_parts, -3)) 828 table.set("catalog", seq_get(table_parts, -4)) 829 830 return table 831 832 def _parse_column(self) -> t.Optional[exp.Expression]: 833 column = super()._parse_column() 834 if isinstance(column, exp.Column): 835 parts = column.parts 836 if any("." in p.name for p in parts): 837 catalog, db, table, this, *rest = ( 838 exp.to_identifier(p, quoted=True) 839 for p in split_num_words(".".join(p.name for p in parts), ".", 4) 840 ) 841 842 if rest and this: 843 this = exp.Dot.build([this, *rest]) # type: ignore 844 845 column = exp.Column(this=this, table=table, db=db, catalog=catalog) 846 column.meta["quoted_column"] = True 847 848 return column 849 850 @t.overload 851 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 852 853 @t.overload 854 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 855 856 def _parse_json_object(self, agg=False): 857 json_object = super()._parse_json_object() 858 array_kv_pair = seq_get(json_object.expressions, 0) 859 860 # Converts BQ's "signature 2" of JSON_OBJECT into SQLGlot's canonical representation 861 # https://cloud.google.com/bigquery/docs/reference/standard-sql/json_functions#json_object_signature2 862 if ( 863 array_kv_pair 864 and isinstance(array_kv_pair.this, exp.Array) 865 and isinstance(array_kv_pair.expression, exp.Array) 866 ): 867 keys = array_kv_pair.this.expressions 868 values = array_kv_pair.expression.expressions 869 870 json_object.set( 871 "expressions", 872 [exp.JSONKeyValue(this=k, expression=v) for k, v in zip(keys, values)], 873 ) 874 875 return json_object 876 877 def _parse_bracket( 878 self, this: t.Optional[exp.Expression] = None 879 ) -> t.Optional[exp.Expression]: 880 bracket = super()._parse_bracket(this) 881 882 if this is bracket: 883 return bracket 884 885 if isinstance(bracket, exp.Bracket): 886 for expression in bracket.expressions: 887 name = expression.name.upper() 888 889 if name not in self.BRACKET_OFFSETS: 890 break 891 892 offset, safe = self.BRACKET_OFFSETS[name] 893 bracket.set("offset", offset) 894 bracket.set("safe", safe) 895 expression.replace(expression.expressions[0]) 896 897 return bracket 898 899 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 900 unnest = super()._parse_unnest(with_alias=with_alias) 901 902 if not unnest: 903 return None 904 905 unnest_expr = seq_get(unnest.expressions, 0) 906 if unnest_expr: 907 from sqlglot.optimizer.annotate_types import annotate_types 908 909 unnest_expr = annotate_types(unnest_expr, dialect=self.dialect) 910 911 # Unnesting a nested array (i.e array of structs) explodes the top-level struct fields, 912 # in contrast to other dialects such as DuckDB which flattens only the array by default 913 if unnest_expr.is_type(exp.DataType.Type.ARRAY) and any( 914 array_elem.is_type(exp.DataType.Type.STRUCT) 915 for array_elem in unnest_expr._type.expressions 916 ): 917 unnest.set("explode_array", True) 918 919 return unnest 920 921 def _parse_make_interval(self) -> exp.MakeInterval: 922 expr = exp.MakeInterval() 923 924 for arg_key in expr.arg_types: 925 value = self._parse_lambda() 926 927 if not value: 928 break 929 930 # Non-named arguments are filled sequentially, (optionally) followed by named arguments 931 # that can appear in any order e.g MAKE_INTERVAL(1, minute => 5, day => 2) 932 if isinstance(value, exp.Kwarg): 933 arg_key = value.this.name 934 935 expr.set(arg_key, value) 936 937 self._match(TokenType.COMMA) 938 939 return expr 940 941 def _parse_features_at_time(self) -> exp.FeaturesAtTime: 942 expr = self.expression( 943 exp.FeaturesAtTime, 944 this=(self._match(TokenType.TABLE) and self._parse_table()) 945 or self._parse_select(nested=True), 946 ) 947 948 while self._match(TokenType.COMMA): 949 arg = self._parse_lambda() 950 951 # Get the LHS of the Kwarg and set the arg to that value, e.g 952 # "num_rows => 1" sets the expr's `num_rows` arg 953 if arg: 954 expr.set(arg.this.name, arg) 955 956 return expr 957 958 def _parse_export_data(self) -> exp.Export: 959 self._match_text_seq("DATA") 960 961 return self.expression( 962 exp.Export, 963 connection=self._match_text_seq("WITH", "CONNECTION") and self._parse_table_parts(), 964 options=self._parse_properties(), 965 this=self._match_text_seq("AS") and self._parse_select(), 966 ) 967 968 class Generator(generator.Generator): 969 INTERVAL_ALLOWS_PLURAL_FORM = False 970 JOIN_HINTS = False 971 QUERY_HINTS = False 972 TABLE_HINTS = False 973 LIMIT_FETCH = "LIMIT" 974 RENAME_TABLE_WITH_DB = False 975 NVL2_SUPPORTED = False 976 UNNEST_WITH_ORDINALITY = False 977 COLLATE_IS_FUNC = True 978 LIMIT_ONLY_LITERALS = True 979 SUPPORTS_TABLE_ALIAS_COLUMNS = False 980 UNPIVOT_ALIASES_ARE_IDENTIFIERS = False 981 JSON_KEY_VALUE_PAIR_SEP = "," 982 NULL_ORDERING_SUPPORTED = False 983 IGNORE_NULLS_IN_FUNC = True 984 JSON_PATH_SINGLE_QUOTE_ESCAPE = True 985 CAN_IMPLEMENT_ARRAY_ANY = True 986 SUPPORTS_TO_NUMBER = False 987 NAMED_PLACEHOLDER_TOKEN = "@" 988 HEX_FUNC = "TO_HEX" 989 WITH_PROPERTIES_PREFIX = "OPTIONS" 990 SUPPORTS_EXPLODING_PROJECTIONS = False 991 EXCEPT_INTERSECT_SUPPORT_ALL_CLAUSE = False 992 SUPPORTS_UNIX_SECONDS = True 993 994 TRANSFORMS = { 995 **generator.Generator.TRANSFORMS, 996 exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"), 997 exp.ArgMax: arg_max_or_min_no_count("MAX_BY"), 998 exp.ArgMin: arg_max_or_min_no_count("MIN_BY"), 999 exp.Array: inline_array_unless_query, 1000 exp.ArrayContains: _array_contains_sql, 1001 exp.ArrayFilter: filter_array_using_unnest, 1002 exp.ArrayRemove: filter_array_using_unnest, 1003 exp.Cast: transforms.preprocess([transforms.remove_precision_parameterized_types]), 1004 exp.CollateProperty: lambda self, e: ( 1005 f"DEFAULT COLLATE {self.sql(e, 'this')}" 1006 if e.args.get("default") 1007 else f"COLLATE {self.sql(e, 'this')}" 1008 ), 1009 exp.Commit: lambda *_: "COMMIT TRANSACTION", 1010 exp.CountIf: rename_func("COUNTIF"), 1011 exp.Create: _create_sql, 1012 exp.CTE: transforms.preprocess([_pushdown_cte_column_names]), 1013 exp.DateAdd: date_add_interval_sql("DATE", "ADD"), 1014 exp.DateDiff: lambda self, e: self.func( 1015 "DATE_DIFF", e.this, e.expression, unit_to_var(e) 1016 ), 1017 exp.DateFromParts: rename_func("DATE"), 1018 exp.DateStrToDate: datestrtodate_sql, 1019 exp.DateSub: date_add_interval_sql("DATE", "SUB"), 1020 exp.DatetimeAdd: date_add_interval_sql("DATETIME", "ADD"), 1021 exp.DatetimeSub: date_add_interval_sql("DATETIME", "SUB"), 1022 exp.FromTimeZone: lambda self, e: self.func( 1023 "DATETIME", self.func("TIMESTAMP", e.this, e.args.get("zone")), "'UTC'" 1024 ), 1025 exp.GenerateSeries: rename_func("GENERATE_ARRAY"), 1026 exp.GroupConcat: lambda self, e: groupconcat_sql( 1027 self, e, func_name="STRING_AGG", within_group=False 1028 ), 1029 exp.Hex: lambda self, e: self.func("UPPER", self.func("TO_HEX", self.sql(e, "this"))), 1030 exp.HexString: lambda self, e: self.hexstring_sql(e, binary_function_repr="FROM_HEX"), 1031 exp.If: if_sql(false_value="NULL"), 1032 exp.ILike: no_ilike_sql, 1033 exp.IntDiv: rename_func("DIV"), 1034 exp.Int64: rename_func("INT64"), 1035 exp.JSONExtract: _json_extract_sql, 1036 exp.JSONExtractArray: _json_extract_sql, 1037 exp.JSONExtractScalar: _json_extract_sql, 1038 exp.JSONFormat: rename_func("TO_JSON_STRING"), 1039 exp.Levenshtein: _levenshtein_sql, 1040 exp.Max: max_or_greatest, 1041 exp.MD5: lambda self, e: self.func("TO_HEX", self.func("MD5", e.this)), 1042 exp.MD5Digest: rename_func("MD5"), 1043 exp.Min: min_or_least, 1044 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 1045 exp.RegexpExtract: lambda self, e: self.func( 1046 "REGEXP_EXTRACT", 1047 e.this, 1048 e.expression, 1049 e.args.get("position"), 1050 e.args.get("occurrence"), 1051 ), 1052 exp.RegexpExtractAll: lambda self, e: self.func( 1053 "REGEXP_EXTRACT_ALL", e.this, e.expression 1054 ), 1055 exp.RegexpReplace: regexp_replace_sql, 1056 exp.RegexpLike: rename_func("REGEXP_CONTAINS"), 1057 exp.ReturnsProperty: _returnsproperty_sql, 1058 exp.Rollback: lambda *_: "ROLLBACK TRANSACTION", 1059 exp.Select: transforms.preprocess( 1060 [ 1061 transforms.explode_projection_to_unnest(), 1062 transforms.unqualify_unnest, 1063 transforms.eliminate_distinct_on, 1064 _alias_ordered_group, 1065 transforms.eliminate_semi_and_anti_joins, 1066 ] 1067 ), 1068 exp.SHA: rename_func("SHA1"), 1069 exp.SHA2: sha256_sql, 1070 exp.Space: space_sql, 1071 exp.StabilityProperty: lambda self, e: ( 1072 "DETERMINISTIC" if e.name == "IMMUTABLE" else "NOT DETERMINISTIC" 1073 ), 1074 exp.String: rename_func("STRING"), 1075 exp.StrPosition: lambda self, e: ( 1076 strposition_sql( 1077 self, e, func_name="INSTR", supports_position=True, supports_occurrence=True 1078 ) 1079 ), 1080 exp.StrToDate: _str_to_datetime_sql, 1081 exp.StrToTime: _str_to_datetime_sql, 1082 exp.TimeAdd: date_add_interval_sql("TIME", "ADD"), 1083 exp.TimeFromParts: rename_func("TIME"), 1084 exp.TimestampFromParts: rename_func("DATETIME"), 1085 exp.TimeSub: date_add_interval_sql("TIME", "SUB"), 1086 exp.TimestampAdd: date_add_interval_sql("TIMESTAMP", "ADD"), 1087 exp.TimestampDiff: rename_func("TIMESTAMP_DIFF"), 1088 exp.TimestampSub: date_add_interval_sql("TIMESTAMP", "SUB"), 1089 exp.TimeStrToTime: timestrtotime_sql, 1090 exp.Transaction: lambda *_: "BEGIN TRANSACTION", 1091 exp.TsOrDsAdd: _ts_or_ds_add_sql, 1092 exp.TsOrDsDiff: _ts_or_ds_diff_sql, 1093 exp.TsOrDsToTime: rename_func("TIME"), 1094 exp.TsOrDsToDatetime: rename_func("DATETIME"), 1095 exp.TsOrDsToTimestamp: rename_func("TIMESTAMP"), 1096 exp.Unhex: rename_func("FROM_HEX"), 1097 exp.UnixDate: rename_func("UNIX_DATE"), 1098 exp.UnixToTime: _unix_to_time_sql, 1099 exp.Uuid: lambda *_: "GENERATE_UUID()", 1100 exp.Values: _derived_table_values_to_unnest, 1101 exp.VariancePop: rename_func("VAR_POP"), 1102 exp.SafeDivide: rename_func("SAFE_DIVIDE"), 1103 } 1104 1105 SUPPORTED_JSON_PATH_PARTS = { 1106 exp.JSONPathKey, 1107 exp.JSONPathRoot, 1108 exp.JSONPathSubscript, 1109 } 1110 1111 TYPE_MAPPING = { 1112 **generator.Generator.TYPE_MAPPING, 1113 exp.DataType.Type.BIGDECIMAL: "BIGNUMERIC", 1114 exp.DataType.Type.BIGINT: "INT64", 1115 exp.DataType.Type.BINARY: "BYTES", 1116 exp.DataType.Type.BLOB: "BYTES", 1117 exp.DataType.Type.BOOLEAN: "BOOL", 1118 exp.DataType.Type.CHAR: "STRING", 1119 exp.DataType.Type.DECIMAL: "NUMERIC", 1120 exp.DataType.Type.DOUBLE: "FLOAT64", 1121 exp.DataType.Type.FLOAT: "FLOAT64", 1122 exp.DataType.Type.INT: "INT64", 1123 exp.DataType.Type.NCHAR: "STRING", 1124 exp.DataType.Type.NVARCHAR: "STRING", 1125 exp.DataType.Type.SMALLINT: "INT64", 1126 exp.DataType.Type.TEXT: "STRING", 1127 exp.DataType.Type.TIMESTAMP: "DATETIME", 1128 exp.DataType.Type.TIMESTAMPNTZ: "DATETIME", 1129 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 1130 exp.DataType.Type.TIMESTAMPLTZ: "TIMESTAMP", 1131 exp.DataType.Type.TINYINT: "INT64", 1132 exp.DataType.Type.ROWVERSION: "BYTES", 1133 exp.DataType.Type.UUID: "STRING", 1134 exp.DataType.Type.VARBINARY: "BYTES", 1135 exp.DataType.Type.VARCHAR: "STRING", 1136 exp.DataType.Type.VARIANT: "ANY TYPE", 1137 } 1138 1139 PROPERTIES_LOCATION = { 1140 **generator.Generator.PROPERTIES_LOCATION, 1141 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 1142 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 1143 } 1144 1145 # WINDOW comes after QUALIFY 1146 # https://cloud.google.com/bigquery/docs/reference/standard-sql/query-syntax#window_clause 1147 AFTER_HAVING_MODIFIER_TRANSFORMS = { 1148 "qualify": generator.Generator.AFTER_HAVING_MODIFIER_TRANSFORMS["qualify"], 1149 "windows": generator.Generator.AFTER_HAVING_MODIFIER_TRANSFORMS["windows"], 1150 } 1151 1152 # from: https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#reserved_keywords 1153 RESERVED_KEYWORDS = { 1154 "all", 1155 "and", 1156 "any", 1157 "array", 1158 "as", 1159 "asc", 1160 "assert_rows_modified", 1161 "at", 1162 "between", 1163 "by", 1164 "case", 1165 "cast", 1166 "collate", 1167 "contains", 1168 "create", 1169 "cross", 1170 "cube", 1171 "current", 1172 "default", 1173 "define", 1174 "desc", 1175 "distinct", 1176 "else", 1177 "end", 1178 "enum", 1179 "escape", 1180 "except", 1181 "exclude", 1182 "exists", 1183 "extract", 1184 "false", 1185 "fetch", 1186 "following", 1187 "for", 1188 "from", 1189 "full", 1190 "group", 1191 "grouping", 1192 "groups", 1193 "hash", 1194 "having", 1195 "if", 1196 "ignore", 1197 "in", 1198 "inner", 1199 "intersect", 1200 "interval", 1201 "into", 1202 "is", 1203 "join", 1204 "lateral", 1205 "left", 1206 "like", 1207 "limit", 1208 "lookup", 1209 "merge", 1210 "natural", 1211 "new", 1212 "no", 1213 "not", 1214 "null", 1215 "nulls", 1216 "of", 1217 "on", 1218 "or", 1219 "order", 1220 "outer", 1221 "over", 1222 "partition", 1223 "preceding", 1224 "proto", 1225 "qualify", 1226 "range", 1227 "recursive", 1228 "respect", 1229 "right", 1230 "rollup", 1231 "rows", 1232 "select", 1233 "set", 1234 "some", 1235 "struct", 1236 "tablesample", 1237 "then", 1238 "to", 1239 "treat", 1240 "true", 1241 "unbounded", 1242 "union", 1243 "unnest", 1244 "using", 1245 "when", 1246 "where", 1247 "window", 1248 "with", 1249 "within", 1250 } 1251 1252 def datetrunc_sql(self, expression: exp.DateTrunc) -> str: 1253 unit = expression.unit 1254 unit_sql = unit.name if unit.is_string else self.sql(unit) 1255 return self.func("DATE_TRUNC", expression.this, unit_sql, expression.args.get("zone")) 1256 1257 def mod_sql(self, expression: exp.Mod) -> str: 1258 this = expression.this 1259 expr = expression.expression 1260 return self.func( 1261 "MOD", 1262 this.unnest() if isinstance(this, exp.Paren) else this, 1263 expr.unnest() if isinstance(expr, exp.Paren) else expr, 1264 ) 1265 1266 def column_parts(self, expression: exp.Column) -> str: 1267 if expression.meta.get("quoted_column"): 1268 # If a column reference is of the form `dataset.table`.name, we need 1269 # to preserve the quoted table path, otherwise the reference breaks 1270 table_parts = ".".join(p.name for p in expression.parts[:-1]) 1271 table_path = self.sql(exp.Identifier(this=table_parts, quoted=True)) 1272 return f"{table_path}.{self.sql(expression, 'this')}" 1273 1274 return super().column_parts(expression) 1275 1276 def table_parts(self, expression: exp.Table) -> str: 1277 # Depending on the context, `x.y` may not resolve to the same data source as `x`.`y`, so 1278 # we need to make sure the correct quoting is used in each case. 1279 # 1280 # For example, if there is a CTE x that clashes with a schema name, then the former will 1281 # return the table y in that schema, whereas the latter will return the CTE's y column: 1282 # 1283 # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x.y` -> cross join 1284 # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x`.`y` -> implicit unnest 1285 if expression.meta.get("quoted_table"): 1286 table_parts = ".".join(p.name for p in expression.parts) 1287 return self.sql(exp.Identifier(this=table_parts, quoted=True)) 1288 1289 return super().table_parts(expression) 1290 1291 def timetostr_sql(self, expression: exp.TimeToStr) -> str: 1292 this = expression.this 1293 if isinstance(this, exp.TsOrDsToDatetime): 1294 func_name = "FORMAT_DATETIME" 1295 elif isinstance(this, exp.TsOrDsToTimestamp): 1296 func_name = "FORMAT_TIMESTAMP" 1297 else: 1298 func_name = "FORMAT_DATE" 1299 1300 time_expr = ( 1301 this 1302 if isinstance(this, (exp.TsOrDsToDatetime, exp.TsOrDsToTimestamp, exp.TsOrDsToDate)) 1303 else expression 1304 ) 1305 return self.func( 1306 func_name, self.format_time(expression), time_expr.this, expression.args.get("zone") 1307 ) 1308 1309 def eq_sql(self, expression: exp.EQ) -> str: 1310 # Operands of = cannot be NULL in BigQuery 1311 if isinstance(expression.left, exp.Null) or isinstance(expression.right, exp.Null): 1312 if not isinstance(expression.parent, exp.Update): 1313 return "NULL" 1314 1315 return self.binary(expression, "=") 1316 1317 def attimezone_sql(self, expression: exp.AtTimeZone) -> str: 1318 parent = expression.parent 1319 1320 # BigQuery allows CAST(.. AS {STRING|TIMESTAMP} [FORMAT <fmt> [AT TIME ZONE <tz>]]). 1321 # Only the TIMESTAMP one should use the below conversion, when AT TIME ZONE is included. 1322 if not isinstance(parent, exp.Cast) or not parent.to.is_type("text"): 1323 return self.func( 1324 "TIMESTAMP", self.func("DATETIME", expression.this, expression.args.get("zone")) 1325 ) 1326 1327 return super().attimezone_sql(expression) 1328 1329 def trycast_sql(self, expression: exp.TryCast) -> str: 1330 return self.cast_sql(expression, safe_prefix="SAFE_") 1331 1332 def bracket_sql(self, expression: exp.Bracket) -> str: 1333 this = expression.this 1334 expressions = expression.expressions 1335 1336 if len(expressions) == 1 and this and this.is_type(exp.DataType.Type.STRUCT): 1337 arg = expressions[0] 1338 if arg.type is None: 1339 from sqlglot.optimizer.annotate_types import annotate_types 1340 1341 arg = annotate_types(arg, dialect=self.dialect) 1342 1343 if arg.type and arg.type.this in exp.DataType.TEXT_TYPES: 1344 # BQ doesn't support bracket syntax with string values for structs 1345 return f"{self.sql(this)}.{arg.name}" 1346 1347 expressions_sql = self.expressions(expression, flat=True) 1348 offset = expression.args.get("offset") 1349 1350 if offset == 0: 1351 expressions_sql = f"OFFSET({expressions_sql})" 1352 elif offset == 1: 1353 expressions_sql = f"ORDINAL({expressions_sql})" 1354 elif offset is not None: 1355 self.unsupported(f"Unsupported array offset: {offset}") 1356 1357 if expression.args.get("safe"): 1358 expressions_sql = f"SAFE_{expressions_sql}" 1359 1360 return f"{self.sql(this)}[{expressions_sql}]" 1361 1362 def in_unnest_op(self, expression: exp.Unnest) -> str: 1363 return self.sql(expression) 1364 1365 def version_sql(self, expression: exp.Version) -> str: 1366 if expression.name == "TIMESTAMP": 1367 expression.set("this", "SYSTEM_TIME") 1368 return super().version_sql(expression) 1369 1370 def contains_sql(self, expression: exp.Contains) -> str: 1371 this = expression.this 1372 expr = expression.expression 1373 1374 if isinstance(this, exp.Lower) and isinstance(expr, exp.Lower): 1375 this = this.this 1376 expr = expr.this 1377 1378 return self.func("CONTAINS_SUBSTR", this, expr) 1379 1380 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 1381 this = expression.this 1382 1383 # This ensures that inline type-annotated ARRAY literals like ARRAY<INT64>[1, 2, 3] 1384 # are roundtripped unaffected. The inner check excludes ARRAY(SELECT ...) expressions, 1385 # because they aren't literals and so the above syntax is invalid BigQuery. 1386 if isinstance(this, exp.Array): 1387 elem = seq_get(this.expressions, 0) 1388 if not (elem and elem.find(exp.Query)): 1389 return f"{self.sql(expression, 'to')}{self.sql(this)}" 1390 1391 return super().cast_sql(expression, safe_prefix=safe_prefix) 1392 1393 def declareitem_sql(self, expression: exp.DeclareItem) -> str: 1394 variables = self.expressions(expression, "this") 1395 default = self.sql(expression, "default") 1396 default = f" DEFAULT {default}" if default else "" 1397 kind = self.sql(expression, "kind") 1398 kind = f" {kind}" if kind else "" 1399 1400 return f"{variables}{kind}{default}"
First day of the week in DATE_TRUNC(week). Defaults to 0 (Monday). -1 would be Sunday.
Whether the base comes first in the LOG
function.
Possible values: True
, False
, None
(two arguments are not supported by LOG
)
Whether alias reference expansion (_expand_alias_refs()) should run before column qualification (_qualify_columns()).
For example:
WITH data AS ( SELECT 1 AS id, 2 AS my_id ) SELECT id AS my_id FROM data WHERE my_id = 1 GROUP BY my_id, HAVING my_id = 1
In most dialects, "my_id" would refer to "data.my_id" across the query, except: - BigQuery, which will forward the alias to GROUP BY + HAVING clauses i.e it resolves to "WHERE my_id = 1 GROUP BY id HAVING id = 1" - Clickhouse, which will forward the alias across the query i.e it resolves to "WHERE id = 1 GROUP BY id HAVING id = 1"
Whether the name of the function should be preserved inside the node's metadata, can be useful for roundtripping deprecated vs new functions that share an AST node e.g JSON_VALUE vs JSON_EXTRACT_SCALAR in BigQuery
Whether hex strings such as x'CC' evaluate to integer or binary/blob type
Specifies the strategy according to which identifiers should be normalized.
Determines how function names are going to be normalized.
Possible values:
"upper" or True: Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
Associates this dialect's time formats with their equivalent Python strftime
formats.
Helper which is used for parsing the special syntax CAST(x AS DATE FORMAT 'yyyy')
.
If empty, the corresponding trie will be constructed off of TIME_MAPPING
.
Columns that are auto-generated by the engine corresponding to this dialect.
For example, such columns may be excluded from SELECT *
queries.
Whether a set operation uses DISTINCT by default. This is None
when either DISTINCT
or ALL
must be explicitly specified.
504 def normalize_identifier(self, expression: E) -> E: 505 if ( 506 isinstance(expression, exp.Identifier) 507 and self.normalization_strategy is NormalizationStrategy.CASE_INSENSITIVE 508 ): 509 parent = expression.parent 510 while isinstance(parent, exp.Dot): 511 parent = parent.parent 512 513 # In BigQuery, CTEs are case-insensitive, but UDF and table names are case-sensitive 514 # by default. The following check uses a heuristic to detect tables based on whether 515 # they are qualified. This should generally be correct, because tables in BigQuery 516 # must be qualified with at least a dataset, unless @@dataset_id is set. 517 case_sensitive = ( 518 isinstance(parent, exp.UserDefinedFunction) 519 or ( 520 isinstance(parent, exp.Table) 521 and parent.db 522 and (parent.meta.get("quoted_table") or not parent.meta.get("maybe_column")) 523 ) 524 or expression.meta.get("is_table") 525 ) 526 if not case_sensitive: 527 expression.set("this", expression.this.lower()) 528 529 return t.cast(E, expression) 530 531 return super().normalize_identifier(expression)
Transforms an identifier in a way that resembles how it'd be resolved by this dialect.
For example, an identifier like FoO
would be resolved as foo
in Postgres, because it
lowercases all unquoted identifiers. On the other hand, Snowflake uppercases them, so
it would resolve it as FOO
. If it was quoted, it'd need to be treated as case-sensitive,
and so any normalization would be prohibited in order to avoid "breaking" the identifier.
There are also dialects like Spark, which are case-insensitive even when quotes are present, and dialects like MySQL, whose resolution rules match those employed by the underlying operating system, for example they may always be case-sensitive in Linux.
Finally, the normalization behavior of some engines can even be controlled through flags, like in Redshift's case, where users can explicitly set enable_case_sensitive_identifier.
SQLGlot aims to understand and handle all of these different behaviors gracefully, so that it can analyze queries in the optimizer and successfully capture their semantics.
Mapping of an escaped sequence (\n
) to its unescaped version (
).
533 class Tokenizer(tokens.Tokenizer): 534 QUOTES = ["'", '"', '"""', "'''"] 535 COMMENTS = ["--", "#", ("/*", "*/")] 536 IDENTIFIERS = ["`"] 537 STRING_ESCAPES = ["\\"] 538 539 HEX_STRINGS = [("0x", ""), ("0X", "")] 540 541 BYTE_STRINGS = [ 542 (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("b", "B") 543 ] 544 545 RAW_STRINGS = [ 546 (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("r", "R") 547 ] 548 549 NESTED_COMMENTS = False 550 551 KEYWORDS = { 552 **tokens.Tokenizer.KEYWORDS, 553 "ANY TYPE": TokenType.VARIANT, 554 "BEGIN": TokenType.COMMAND, 555 "BEGIN TRANSACTION": TokenType.BEGIN, 556 "BYTEINT": TokenType.INT, 557 "BYTES": TokenType.BINARY, 558 "CURRENT_DATETIME": TokenType.CURRENT_DATETIME, 559 "DATETIME": TokenType.TIMESTAMP, 560 "DECLARE": TokenType.DECLARE, 561 "ELSEIF": TokenType.COMMAND, 562 "EXCEPTION": TokenType.COMMAND, 563 "EXPORT": TokenType.EXPORT, 564 "FLOAT64": TokenType.DOUBLE, 565 "FOR SYSTEM_TIME": TokenType.TIMESTAMP_SNAPSHOT, 566 "MODEL": TokenType.MODEL, 567 "NOT DETERMINISTIC": TokenType.VOLATILE, 568 "RECORD": TokenType.STRUCT, 569 "TIMESTAMP": TokenType.TIMESTAMPTZ, 570 } 571 KEYWORDS.pop("DIV") 572 KEYWORDS.pop("VALUES") 573 KEYWORDS.pop("/*+")
Inherited Members
- sqlglot.tokens.Tokenizer
- Tokenizer
- SINGLE_TOKENS
- BIT_STRINGS
- HEREDOC_STRINGS
- UNICODE_STRINGS
- VAR_SINGLE_TOKENS
- IDENTIFIER_ESCAPES
- HEREDOC_TAG_IS_IDENTIFIER
- HEREDOC_STRING_ALTERNATIVE
- STRING_ESCAPES_ALLOWED_IN_RAW_STRINGS
- HINT_START
- TOKENS_PRECEDING_HINT
- WHITE_SPACE
- COMMANDS
- COMMAND_PREFIX_TOKENS
- NUMERIC_LITERALS
- dialect
- use_rs_tokenizer
- reset
- tokenize
- tokenize_rs
- size
- sql
- tokens
575 class Parser(parser.Parser): 576 PREFIXED_PIVOT_COLUMNS = True 577 LOG_DEFAULTS_TO_LN = True 578 SUPPORTS_IMPLICIT_UNNEST = True 579 JOINS_HAVE_EQUAL_PRECEDENCE = True 580 581 # BigQuery does not allow ASC/DESC to be used as an identifier 582 ID_VAR_TOKENS = parser.Parser.ID_VAR_TOKENS - {TokenType.ASC, TokenType.DESC} 583 ALIAS_TOKENS = parser.Parser.ALIAS_TOKENS - {TokenType.ASC, TokenType.DESC} 584 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS - {TokenType.ASC, TokenType.DESC} 585 COMMENT_TABLE_ALIAS_TOKENS = parser.Parser.COMMENT_TABLE_ALIAS_TOKENS - { 586 TokenType.ASC, 587 TokenType.DESC, 588 } 589 UPDATE_ALIAS_TOKENS = parser.Parser.UPDATE_ALIAS_TOKENS - {TokenType.ASC, TokenType.DESC} 590 591 FUNCTIONS = { 592 **parser.Parser.FUNCTIONS, 593 "CONTAINS_SUBSTR": _build_contains_substring, 594 "DATE": _build_date, 595 "DATE_ADD": build_date_delta_with_interval(exp.DateAdd), 596 "DATE_SUB": build_date_delta_with_interval(exp.DateSub), 597 "DATE_TRUNC": lambda args: exp.DateTrunc( 598 unit=seq_get(args, 1), 599 this=seq_get(args, 0), 600 zone=seq_get(args, 2), 601 ), 602 "DATETIME": _build_datetime, 603 "DATETIME_ADD": build_date_delta_with_interval(exp.DatetimeAdd), 604 "DATETIME_SUB": build_date_delta_with_interval(exp.DatetimeSub), 605 "DIV": binary_from_function(exp.IntDiv), 606 "EDIT_DISTANCE": _build_levenshtein, 607 "FORMAT_DATE": _build_format_time(exp.TsOrDsToDate), 608 "GENERATE_ARRAY": exp.GenerateSeries.from_arg_list, 609 "JSON_EXTRACT_SCALAR": _build_extract_json_with_default_path(exp.JSONExtractScalar), 610 "JSON_EXTRACT_ARRAY": _build_extract_json_with_default_path(exp.JSONExtractArray), 611 "JSON_QUERY": parser.build_extract_json_with_path(exp.JSONExtract), 612 "JSON_QUERY_ARRAY": _build_extract_json_with_default_path(exp.JSONExtractArray), 613 "JSON_VALUE": _build_extract_json_with_default_path(exp.JSONExtractScalar), 614 "JSON_VALUE_ARRAY": _build_extract_json_with_default_path(exp.JSONValueArray), 615 "LENGTH": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 616 "MD5": exp.MD5Digest.from_arg_list, 617 "TO_HEX": _build_to_hex, 618 "PARSE_DATE": lambda args: build_formatted_time(exp.StrToDate, "bigquery")( 619 [seq_get(args, 1), seq_get(args, 0)] 620 ), 621 "PARSE_TIMESTAMP": _build_parse_timestamp, 622 "REGEXP_CONTAINS": exp.RegexpLike.from_arg_list, 623 "REGEXP_EXTRACT": _build_regexp_extract(exp.RegexpExtract), 624 "REGEXP_SUBSTR": _build_regexp_extract(exp.RegexpExtract), 625 "REGEXP_EXTRACT_ALL": _build_regexp_extract( 626 exp.RegexpExtractAll, default_group=exp.Literal.number(0) 627 ), 628 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 629 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 630 "SPLIT": lambda args: exp.Split( 631 # https://cloud.google.com/bigquery/docs/reference/standard-sql/string_functions#split 632 this=seq_get(args, 0), 633 expression=seq_get(args, 1) or exp.Literal.string(","), 634 ), 635 "STRPOS": exp.StrPosition.from_arg_list, 636 "TIME": _build_time, 637 "TIME_ADD": build_date_delta_with_interval(exp.TimeAdd), 638 "TIME_SUB": build_date_delta_with_interval(exp.TimeSub), 639 "TIMESTAMP": _build_timestamp, 640 "TIMESTAMP_ADD": build_date_delta_with_interval(exp.TimestampAdd), 641 "TIMESTAMP_SUB": build_date_delta_with_interval(exp.TimestampSub), 642 "TIMESTAMP_MICROS": lambda args: exp.UnixToTime( 643 this=seq_get(args, 0), scale=exp.UnixToTime.MICROS 644 ), 645 "TIMESTAMP_MILLIS": lambda args: exp.UnixToTime( 646 this=seq_get(args, 0), scale=exp.UnixToTime.MILLIS 647 ), 648 "TIMESTAMP_SECONDS": lambda args: exp.UnixToTime(this=seq_get(args, 0)), 649 "TO_JSON_STRING": exp.JSONFormat.from_arg_list, 650 "FORMAT_DATETIME": _build_format_time(exp.TsOrDsToDatetime), 651 "FORMAT_TIMESTAMP": _build_format_time(exp.TsOrDsToTimestamp), 652 } 653 654 FUNCTION_PARSERS = { 655 **parser.Parser.FUNCTION_PARSERS, 656 "ARRAY": lambda self: self.expression(exp.Array, expressions=[self._parse_statement()]), 657 "JSON_ARRAY": lambda self: self.expression( 658 exp.JSONArray, expressions=self._parse_csv(self._parse_bitwise) 659 ), 660 "MAKE_INTERVAL": lambda self: self._parse_make_interval(), 661 "FEATURES_AT_TIME": lambda self: self._parse_features_at_time(), 662 } 663 FUNCTION_PARSERS.pop("TRIM") 664 665 NO_PAREN_FUNCTIONS = { 666 **parser.Parser.NO_PAREN_FUNCTIONS, 667 TokenType.CURRENT_DATETIME: exp.CurrentDatetime, 668 } 669 670 NESTED_TYPE_TOKENS = { 671 *parser.Parser.NESTED_TYPE_TOKENS, 672 TokenType.TABLE, 673 } 674 675 PROPERTY_PARSERS = { 676 **parser.Parser.PROPERTY_PARSERS, 677 "NOT DETERMINISTIC": lambda self: self.expression( 678 exp.StabilityProperty, this=exp.Literal.string("VOLATILE") 679 ), 680 "OPTIONS": lambda self: self._parse_with_property(), 681 } 682 683 CONSTRAINT_PARSERS = { 684 **parser.Parser.CONSTRAINT_PARSERS, 685 "OPTIONS": lambda self: exp.Properties(expressions=self._parse_with_property()), 686 } 687 688 RANGE_PARSERS = parser.Parser.RANGE_PARSERS.copy() 689 RANGE_PARSERS.pop(TokenType.OVERLAPS) 690 691 NULL_TOKENS = {TokenType.NULL, TokenType.UNKNOWN} 692 693 DASHED_TABLE_PART_FOLLOW_TOKENS = {TokenType.DOT, TokenType.L_PAREN, TokenType.R_PAREN} 694 695 STATEMENT_PARSERS = { 696 **parser.Parser.STATEMENT_PARSERS, 697 TokenType.ELSE: lambda self: self._parse_as_command(self._prev), 698 TokenType.END: lambda self: self._parse_as_command(self._prev), 699 TokenType.FOR: lambda self: self._parse_for_in(), 700 TokenType.EXPORT: lambda self: self._parse_export_data(), 701 TokenType.DECLARE: lambda self: self._parse_declare(), 702 } 703 704 BRACKET_OFFSETS = { 705 "OFFSET": (0, False), 706 "ORDINAL": (1, False), 707 "SAFE_OFFSET": (0, True), 708 "SAFE_ORDINAL": (1, True), 709 } 710 711 def _parse_for_in(self) -> exp.ForIn: 712 this = self._parse_range() 713 self._match_text_seq("DO") 714 return self.expression(exp.ForIn, this=this, expression=self._parse_statement()) 715 716 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 717 this = super()._parse_table_part(schema=schema) or self._parse_number() 718 719 # https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#table_names 720 if isinstance(this, exp.Identifier): 721 table_name = this.name 722 while self._match(TokenType.DASH, advance=False) and self._next: 723 start = self._curr 724 while self._is_connected() and not self._match_set( 725 self.DASHED_TABLE_PART_FOLLOW_TOKENS, advance=False 726 ): 727 self._advance() 728 729 if start == self._curr: 730 break 731 732 table_name += self._find_sql(start, self._prev) 733 734 this = exp.Identifier( 735 this=table_name, quoted=this.args.get("quoted") 736 ).update_positions(this) 737 elif isinstance(this, exp.Literal): 738 table_name = this.name 739 740 if self._is_connected() and self._parse_var(any_token=True): 741 table_name += self._prev.text 742 743 this = exp.Identifier(this=table_name, quoted=True).update_positions(this) 744 745 return this 746 747 def _parse_table_parts( 748 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 749 ) -> exp.Table: 750 table = super()._parse_table_parts( 751 schema=schema, is_db_reference=is_db_reference, wildcard=True 752 ) 753 754 # proj-1.db.tbl -- `1.` is tokenized as a float so we need to unravel it here 755 if not table.catalog: 756 if table.db: 757 previous_db = table.args["db"] 758 parts = table.db.split(".") 759 if len(parts) == 2 and not table.args["db"].quoted: 760 table.set( 761 "catalog", exp.Identifier(this=parts[0]).update_positions(previous_db) 762 ) 763 table.set("db", exp.Identifier(this=parts[1]).update_positions(previous_db)) 764 else: 765 previous_this = table.this 766 parts = table.name.split(".") 767 if len(parts) == 2 and not table.this.quoted: 768 table.set( 769 "db", exp.Identifier(this=parts[0]).update_positions(previous_this) 770 ) 771 table.set( 772 "this", exp.Identifier(this=parts[1]).update_positions(previous_this) 773 ) 774 775 if isinstance(table.this, exp.Identifier) and any("." in p.name for p in table.parts): 776 alias = table.this 777 catalog, db, this, *rest = ( 778 exp.to_identifier(p, quoted=True) 779 for p in split_num_words(".".join(p.name for p in table.parts), ".", 3) 780 ) 781 782 for part in (catalog, db, this): 783 if part: 784 part.update_positions(table.this) 785 786 if rest and this: 787 this = exp.Dot.build([this, *rest]) # type: ignore 788 789 table = exp.Table( 790 this=this, db=db, catalog=catalog, pivots=table.args.get("pivots") 791 ) 792 table.meta["quoted_table"] = True 793 else: 794 alias = None 795 796 # The `INFORMATION_SCHEMA` views in BigQuery need to be qualified by a region or 797 # dataset, so if the project identifier is omitted we need to fix the ast so that 798 # the `INFORMATION_SCHEMA.X` bit is represented as a single (quoted) Identifier. 799 # Otherwise, we wouldn't correctly qualify a `Table` node that references these 800 # views, because it would seem like the "catalog" part is set, when it'd actually 801 # be the region/dataset. Merging the two identifiers into a single one is done to 802 # avoid producing a 4-part Table reference, which would cause issues in the schema 803 # module, when there are 3-part table names mixed with information schema views. 804 # 805 # See: https://cloud.google.com/bigquery/docs/information-schema-intro#syntax 806 table_parts = table.parts 807 if len(table_parts) > 1 and table_parts[-2].name.upper() == "INFORMATION_SCHEMA": 808 # We need to alias the table here to avoid breaking existing qualified columns. 809 # This is expected to be safe, because if there's an actual alias coming up in 810 # the token stream, it will overwrite this one. If there isn't one, we are only 811 # exposing the name that can be used to reference the view explicitly (a no-op). 812 exp.alias_( 813 table, 814 t.cast(exp.Identifier, alias or table_parts[-1]), 815 table=True, 816 copy=False, 817 ) 818 819 info_schema_view = f"{table_parts[-2].name}.{table_parts[-1].name}" 820 new_this = exp.Identifier(this=info_schema_view, quoted=True).update_positions( 821 line=table_parts[-2].meta.get("line"), 822 col=table_parts[-1].meta.get("col"), 823 start=table_parts[-2].meta.get("start"), 824 end=table_parts[-1].meta.get("end"), 825 ) 826 table.set("this", new_this) 827 table.set("db", seq_get(table_parts, -3)) 828 table.set("catalog", seq_get(table_parts, -4)) 829 830 return table 831 832 def _parse_column(self) -> t.Optional[exp.Expression]: 833 column = super()._parse_column() 834 if isinstance(column, exp.Column): 835 parts = column.parts 836 if any("." in p.name for p in parts): 837 catalog, db, table, this, *rest = ( 838 exp.to_identifier(p, quoted=True) 839 for p in split_num_words(".".join(p.name for p in parts), ".", 4) 840 ) 841 842 if rest and this: 843 this = exp.Dot.build([this, *rest]) # type: ignore 844 845 column = exp.Column(this=this, table=table, db=db, catalog=catalog) 846 column.meta["quoted_column"] = True 847 848 return column 849 850 @t.overload 851 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 852 853 @t.overload 854 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 855 856 def _parse_json_object(self, agg=False): 857 json_object = super()._parse_json_object() 858 array_kv_pair = seq_get(json_object.expressions, 0) 859 860 # Converts BQ's "signature 2" of JSON_OBJECT into SQLGlot's canonical representation 861 # https://cloud.google.com/bigquery/docs/reference/standard-sql/json_functions#json_object_signature2 862 if ( 863 array_kv_pair 864 and isinstance(array_kv_pair.this, exp.Array) 865 and isinstance(array_kv_pair.expression, exp.Array) 866 ): 867 keys = array_kv_pair.this.expressions 868 values = array_kv_pair.expression.expressions 869 870 json_object.set( 871 "expressions", 872 [exp.JSONKeyValue(this=k, expression=v) for k, v in zip(keys, values)], 873 ) 874 875 return json_object 876 877 def _parse_bracket( 878 self, this: t.Optional[exp.Expression] = None 879 ) -> t.Optional[exp.Expression]: 880 bracket = super()._parse_bracket(this) 881 882 if this is bracket: 883 return bracket 884 885 if isinstance(bracket, exp.Bracket): 886 for expression in bracket.expressions: 887 name = expression.name.upper() 888 889 if name not in self.BRACKET_OFFSETS: 890 break 891 892 offset, safe = self.BRACKET_OFFSETS[name] 893 bracket.set("offset", offset) 894 bracket.set("safe", safe) 895 expression.replace(expression.expressions[0]) 896 897 return bracket 898 899 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 900 unnest = super()._parse_unnest(with_alias=with_alias) 901 902 if not unnest: 903 return None 904 905 unnest_expr = seq_get(unnest.expressions, 0) 906 if unnest_expr: 907 from sqlglot.optimizer.annotate_types import annotate_types 908 909 unnest_expr = annotate_types(unnest_expr, dialect=self.dialect) 910 911 # Unnesting a nested array (i.e array of structs) explodes the top-level struct fields, 912 # in contrast to other dialects such as DuckDB which flattens only the array by default 913 if unnest_expr.is_type(exp.DataType.Type.ARRAY) and any( 914 array_elem.is_type(exp.DataType.Type.STRUCT) 915 for array_elem in unnest_expr._type.expressions 916 ): 917 unnest.set("explode_array", True) 918 919 return unnest 920 921 def _parse_make_interval(self) -> exp.MakeInterval: 922 expr = exp.MakeInterval() 923 924 for arg_key in expr.arg_types: 925 value = self._parse_lambda() 926 927 if not value: 928 break 929 930 # Non-named arguments are filled sequentially, (optionally) followed by named arguments 931 # that can appear in any order e.g MAKE_INTERVAL(1, minute => 5, day => 2) 932 if isinstance(value, exp.Kwarg): 933 arg_key = value.this.name 934 935 expr.set(arg_key, value) 936 937 self._match(TokenType.COMMA) 938 939 return expr 940 941 def _parse_features_at_time(self) -> exp.FeaturesAtTime: 942 expr = self.expression( 943 exp.FeaturesAtTime, 944 this=(self._match(TokenType.TABLE) and self._parse_table()) 945 or self._parse_select(nested=True), 946 ) 947 948 while self._match(TokenType.COMMA): 949 arg = self._parse_lambda() 950 951 # Get the LHS of the Kwarg and set the arg to that value, e.g 952 # "num_rows => 1" sets the expr's `num_rows` arg 953 if arg: 954 expr.set(arg.this.name, arg) 955 956 return expr 957 958 def _parse_export_data(self) -> exp.Export: 959 self._match_text_seq("DATA") 960 961 return self.expression( 962 exp.Export, 963 connection=self._match_text_seq("WITH", "CONNECTION") and self._parse_table_parts(), 964 options=self._parse_properties(), 965 this=self._match_text_seq("AS") and self._parse_select(), 966 )
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
Inherited Members
- sqlglot.parser.Parser
- Parser
- STRUCT_TYPE_TOKENS
- ENUM_TYPE_TOKENS
- AGGREGATE_TYPE_TOKENS
- TYPE_TOKENS
- SIGNED_TO_UNSIGNED_TYPE_TOKEN
- SUBQUERY_PREDICATES
- RESERVED_TOKENS
- DB_CREATABLES
- CREATABLES
- ALTERABLES
- COLON_PLACEHOLDER_TOKENS
- ARRAY_CONSTRUCTORS
- TRIM_TYPES
- FUNC_TOKENS
- CONJUNCTION
- ASSIGNMENT
- DISJUNCTION
- EQUALITY
- COMPARISON
- BITWISE
- TERM
- FACTOR
- EXPONENT
- TIMES
- TIMESTAMPS
- SET_OPERATIONS
- JOIN_METHODS
- JOIN_SIDES
- JOIN_KINDS
- JOIN_HINTS
- LAMBDAS
- COLUMN_OPERATORS
- EXPRESSION_PARSERS
- UNARY_PARSERS
- STRING_PARSERS
- NUMERIC_PARSERS
- PRIMARY_PARSERS
- PLACEHOLDER_PARSERS
- PIPE_SYNTAX_TRANSFORM_PARSERS
- ALTER_PARSERS
- ALTER_ALTER_PARSERS
- SCHEMA_UNNAMED_CONSTRAINTS
- NO_PAREN_FUNCTION_PARSERS
- INVALID_FUNC_NAME_TOKENS
- FUNCTIONS_WITH_ALIASED_ARGS
- KEY_VALUE_DEFINITIONS
- QUERY_MODIFIER_PARSERS
- QUERY_MODIFIER_TOKENS
- SET_PARSERS
- SHOW_PARSERS
- TYPE_LITERAL_PARSERS
- TYPE_CONVERTERS
- DDL_SELECT_TOKENS
- PRE_VOLATILE_TOKENS
- TRANSACTION_KIND
- TRANSACTION_CHARACTERISTICS
- CONFLICT_ACTIONS
- CREATE_SEQUENCE
- ISOLATED_LOADING_OPTIONS
- USABLES
- CAST_ACTIONS
- SCHEMA_BINDING_OPTIONS
- PROCEDURE_OPTIONS
- EXECUTE_AS_OPTIONS
- KEY_CONSTRAINT_OPTIONS
- WINDOW_EXCLUDE_OPTIONS
- INSERT_ALTERNATIVES
- CLONE_KEYWORDS
- HISTORICAL_DATA_PREFIX
- HISTORICAL_DATA_KIND
- OPCLASS_FOLLOW_KEYWORDS
- OPTYPE_FOLLOW_TOKENS
- TABLE_INDEX_HINT_TOKENS
- VIEW_ATTRIBUTES
- WINDOW_ALIAS_TOKENS
- WINDOW_BEFORE_PAREN_TOKENS
- WINDOW_SIDES
- JSON_KEY_VALUE_SEPARATOR_TOKENS
- FETCH_TOKENS
- ADD_CONSTRAINT_TOKENS
- DISTINCT_TOKENS
- UNNEST_OFFSET_ALIAS_TOKENS
- SELECT_START_TOKENS
- COPY_INTO_VARLEN_OPTIONS
- IS_JSON_PREDICATE_KIND
- ODBC_DATETIME_LITERALS
- ON_CONDITION_TOKENS
- PRIVILEGE_FOLLOW_TOKENS
- DESCRIBE_STYLES
- ANALYZE_STYLES
- ANALYZE_EXPRESSION_PARSERS
- PARTITION_KEYWORDS
- AMBIGUOUS_ALIAS_TOKENS
- OPERATION_MODIFIERS
- RECURSIVE_CTE_SEARCH_KIND
- MODIFIABLES
- STRICT_CAST
- IDENTIFY_PIVOT_STRINGS
- TABLESAMPLE_CSV
- DEFAULT_SAMPLING_METHOD
- SET_REQUIRES_ASSIGNMENT_DELIMITER
- TRIM_PATTERN_FIRST
- STRING_ALIASES
- MODIFIERS_ATTACHED_TO_SET_OP
- SET_OP_MODIFIERS
- NO_PAREN_IF_COMMANDS
- JSON_ARROWS_REQUIRE_JSON_TYPE
- COLON_IS_VARIANT_EXTRACT
- VALUES_FOLLOWED_BY_PAREN
- INTERVAL_SPANS
- SUPPORTS_PARTITION_SELECTION
- WRAPPED_TRANSFORM_COLUMN_CONSTRAINT
- OPTIONAL_ALIAS_TOKEN_CTE
- ALTER_RENAME_REQUIRES_COLUMN
- ZONE_AWARE_TIMESTAMP_CONSTRUCTOR
- MAP_KEYS_ARE_ARBITRARY_EXPRESSIONS
- JSON_EXTRACT_REQUIRES_JSON_EXPRESSION
- error_level
- error_message_context
- max_errors
- dialect
- reset
- parse
- parse_into
- check_errors
- raise_error
- expression
- validate_expression
- parse_set_operation
- build_cast
- errors
- sql
968 class Generator(generator.Generator): 969 INTERVAL_ALLOWS_PLURAL_FORM = False 970 JOIN_HINTS = False 971 QUERY_HINTS = False 972 TABLE_HINTS = False 973 LIMIT_FETCH = "LIMIT" 974 RENAME_TABLE_WITH_DB = False 975 NVL2_SUPPORTED = False 976 UNNEST_WITH_ORDINALITY = False 977 COLLATE_IS_FUNC = True 978 LIMIT_ONLY_LITERALS = True 979 SUPPORTS_TABLE_ALIAS_COLUMNS = False 980 UNPIVOT_ALIASES_ARE_IDENTIFIERS = False 981 JSON_KEY_VALUE_PAIR_SEP = "," 982 NULL_ORDERING_SUPPORTED = False 983 IGNORE_NULLS_IN_FUNC = True 984 JSON_PATH_SINGLE_QUOTE_ESCAPE = True 985 CAN_IMPLEMENT_ARRAY_ANY = True 986 SUPPORTS_TO_NUMBER = False 987 NAMED_PLACEHOLDER_TOKEN = "@" 988 HEX_FUNC = "TO_HEX" 989 WITH_PROPERTIES_PREFIX = "OPTIONS" 990 SUPPORTS_EXPLODING_PROJECTIONS = False 991 EXCEPT_INTERSECT_SUPPORT_ALL_CLAUSE = False 992 SUPPORTS_UNIX_SECONDS = True 993 994 TRANSFORMS = { 995 **generator.Generator.TRANSFORMS, 996 exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"), 997 exp.ArgMax: arg_max_or_min_no_count("MAX_BY"), 998 exp.ArgMin: arg_max_or_min_no_count("MIN_BY"), 999 exp.Array: inline_array_unless_query, 1000 exp.ArrayContains: _array_contains_sql, 1001 exp.ArrayFilter: filter_array_using_unnest, 1002 exp.ArrayRemove: filter_array_using_unnest, 1003 exp.Cast: transforms.preprocess([transforms.remove_precision_parameterized_types]), 1004 exp.CollateProperty: lambda self, e: ( 1005 f"DEFAULT COLLATE {self.sql(e, 'this')}" 1006 if e.args.get("default") 1007 else f"COLLATE {self.sql(e, 'this')}" 1008 ), 1009 exp.Commit: lambda *_: "COMMIT TRANSACTION", 1010 exp.CountIf: rename_func("COUNTIF"), 1011 exp.Create: _create_sql, 1012 exp.CTE: transforms.preprocess([_pushdown_cte_column_names]), 1013 exp.DateAdd: date_add_interval_sql("DATE", "ADD"), 1014 exp.DateDiff: lambda self, e: self.func( 1015 "DATE_DIFF", e.this, e.expression, unit_to_var(e) 1016 ), 1017 exp.DateFromParts: rename_func("DATE"), 1018 exp.DateStrToDate: datestrtodate_sql, 1019 exp.DateSub: date_add_interval_sql("DATE", "SUB"), 1020 exp.DatetimeAdd: date_add_interval_sql("DATETIME", "ADD"), 1021 exp.DatetimeSub: date_add_interval_sql("DATETIME", "SUB"), 1022 exp.FromTimeZone: lambda self, e: self.func( 1023 "DATETIME", self.func("TIMESTAMP", e.this, e.args.get("zone")), "'UTC'" 1024 ), 1025 exp.GenerateSeries: rename_func("GENERATE_ARRAY"), 1026 exp.GroupConcat: lambda self, e: groupconcat_sql( 1027 self, e, func_name="STRING_AGG", within_group=False 1028 ), 1029 exp.Hex: lambda self, e: self.func("UPPER", self.func("TO_HEX", self.sql(e, "this"))), 1030 exp.HexString: lambda self, e: self.hexstring_sql(e, binary_function_repr="FROM_HEX"), 1031 exp.If: if_sql(false_value="NULL"), 1032 exp.ILike: no_ilike_sql, 1033 exp.IntDiv: rename_func("DIV"), 1034 exp.Int64: rename_func("INT64"), 1035 exp.JSONExtract: _json_extract_sql, 1036 exp.JSONExtractArray: _json_extract_sql, 1037 exp.JSONExtractScalar: _json_extract_sql, 1038 exp.JSONFormat: rename_func("TO_JSON_STRING"), 1039 exp.Levenshtein: _levenshtein_sql, 1040 exp.Max: max_or_greatest, 1041 exp.MD5: lambda self, e: self.func("TO_HEX", self.func("MD5", e.this)), 1042 exp.MD5Digest: rename_func("MD5"), 1043 exp.Min: min_or_least, 1044 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 1045 exp.RegexpExtract: lambda self, e: self.func( 1046 "REGEXP_EXTRACT", 1047 e.this, 1048 e.expression, 1049 e.args.get("position"), 1050 e.args.get("occurrence"), 1051 ), 1052 exp.RegexpExtractAll: lambda self, e: self.func( 1053 "REGEXP_EXTRACT_ALL", e.this, e.expression 1054 ), 1055 exp.RegexpReplace: regexp_replace_sql, 1056 exp.RegexpLike: rename_func("REGEXP_CONTAINS"), 1057 exp.ReturnsProperty: _returnsproperty_sql, 1058 exp.Rollback: lambda *_: "ROLLBACK TRANSACTION", 1059 exp.Select: transforms.preprocess( 1060 [ 1061 transforms.explode_projection_to_unnest(), 1062 transforms.unqualify_unnest, 1063 transforms.eliminate_distinct_on, 1064 _alias_ordered_group, 1065 transforms.eliminate_semi_and_anti_joins, 1066 ] 1067 ), 1068 exp.SHA: rename_func("SHA1"), 1069 exp.SHA2: sha256_sql, 1070 exp.Space: space_sql, 1071 exp.StabilityProperty: lambda self, e: ( 1072 "DETERMINISTIC" if e.name == "IMMUTABLE" else "NOT DETERMINISTIC" 1073 ), 1074 exp.String: rename_func("STRING"), 1075 exp.StrPosition: lambda self, e: ( 1076 strposition_sql( 1077 self, e, func_name="INSTR", supports_position=True, supports_occurrence=True 1078 ) 1079 ), 1080 exp.StrToDate: _str_to_datetime_sql, 1081 exp.StrToTime: _str_to_datetime_sql, 1082 exp.TimeAdd: date_add_interval_sql("TIME", "ADD"), 1083 exp.TimeFromParts: rename_func("TIME"), 1084 exp.TimestampFromParts: rename_func("DATETIME"), 1085 exp.TimeSub: date_add_interval_sql("TIME", "SUB"), 1086 exp.TimestampAdd: date_add_interval_sql("TIMESTAMP", "ADD"), 1087 exp.TimestampDiff: rename_func("TIMESTAMP_DIFF"), 1088 exp.TimestampSub: date_add_interval_sql("TIMESTAMP", "SUB"), 1089 exp.TimeStrToTime: timestrtotime_sql, 1090 exp.Transaction: lambda *_: "BEGIN TRANSACTION", 1091 exp.TsOrDsAdd: _ts_or_ds_add_sql, 1092 exp.TsOrDsDiff: _ts_or_ds_diff_sql, 1093 exp.TsOrDsToTime: rename_func("TIME"), 1094 exp.TsOrDsToDatetime: rename_func("DATETIME"), 1095 exp.TsOrDsToTimestamp: rename_func("TIMESTAMP"), 1096 exp.Unhex: rename_func("FROM_HEX"), 1097 exp.UnixDate: rename_func("UNIX_DATE"), 1098 exp.UnixToTime: _unix_to_time_sql, 1099 exp.Uuid: lambda *_: "GENERATE_UUID()", 1100 exp.Values: _derived_table_values_to_unnest, 1101 exp.VariancePop: rename_func("VAR_POP"), 1102 exp.SafeDivide: rename_func("SAFE_DIVIDE"), 1103 } 1104 1105 SUPPORTED_JSON_PATH_PARTS = { 1106 exp.JSONPathKey, 1107 exp.JSONPathRoot, 1108 exp.JSONPathSubscript, 1109 } 1110 1111 TYPE_MAPPING = { 1112 **generator.Generator.TYPE_MAPPING, 1113 exp.DataType.Type.BIGDECIMAL: "BIGNUMERIC", 1114 exp.DataType.Type.BIGINT: "INT64", 1115 exp.DataType.Type.BINARY: "BYTES", 1116 exp.DataType.Type.BLOB: "BYTES", 1117 exp.DataType.Type.BOOLEAN: "BOOL", 1118 exp.DataType.Type.CHAR: "STRING", 1119 exp.DataType.Type.DECIMAL: "NUMERIC", 1120 exp.DataType.Type.DOUBLE: "FLOAT64", 1121 exp.DataType.Type.FLOAT: "FLOAT64", 1122 exp.DataType.Type.INT: "INT64", 1123 exp.DataType.Type.NCHAR: "STRING", 1124 exp.DataType.Type.NVARCHAR: "STRING", 1125 exp.DataType.Type.SMALLINT: "INT64", 1126 exp.DataType.Type.TEXT: "STRING", 1127 exp.DataType.Type.TIMESTAMP: "DATETIME", 1128 exp.DataType.Type.TIMESTAMPNTZ: "DATETIME", 1129 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 1130 exp.DataType.Type.TIMESTAMPLTZ: "TIMESTAMP", 1131 exp.DataType.Type.TINYINT: "INT64", 1132 exp.DataType.Type.ROWVERSION: "BYTES", 1133 exp.DataType.Type.UUID: "STRING", 1134 exp.DataType.Type.VARBINARY: "BYTES", 1135 exp.DataType.Type.VARCHAR: "STRING", 1136 exp.DataType.Type.VARIANT: "ANY TYPE", 1137 } 1138 1139 PROPERTIES_LOCATION = { 1140 **generator.Generator.PROPERTIES_LOCATION, 1141 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 1142 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 1143 } 1144 1145 # WINDOW comes after QUALIFY 1146 # https://cloud.google.com/bigquery/docs/reference/standard-sql/query-syntax#window_clause 1147 AFTER_HAVING_MODIFIER_TRANSFORMS = { 1148 "qualify": generator.Generator.AFTER_HAVING_MODIFIER_TRANSFORMS["qualify"], 1149 "windows": generator.Generator.AFTER_HAVING_MODIFIER_TRANSFORMS["windows"], 1150 } 1151 1152 # from: https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#reserved_keywords 1153 RESERVED_KEYWORDS = { 1154 "all", 1155 "and", 1156 "any", 1157 "array", 1158 "as", 1159 "asc", 1160 "assert_rows_modified", 1161 "at", 1162 "between", 1163 "by", 1164 "case", 1165 "cast", 1166 "collate", 1167 "contains", 1168 "create", 1169 "cross", 1170 "cube", 1171 "current", 1172 "default", 1173 "define", 1174 "desc", 1175 "distinct", 1176 "else", 1177 "end", 1178 "enum", 1179 "escape", 1180 "except", 1181 "exclude", 1182 "exists", 1183 "extract", 1184 "false", 1185 "fetch", 1186 "following", 1187 "for", 1188 "from", 1189 "full", 1190 "group", 1191 "grouping", 1192 "groups", 1193 "hash", 1194 "having", 1195 "if", 1196 "ignore", 1197 "in", 1198 "inner", 1199 "intersect", 1200 "interval", 1201 "into", 1202 "is", 1203 "join", 1204 "lateral", 1205 "left", 1206 "like", 1207 "limit", 1208 "lookup", 1209 "merge", 1210 "natural", 1211 "new", 1212 "no", 1213 "not", 1214 "null", 1215 "nulls", 1216 "of", 1217 "on", 1218 "or", 1219 "order", 1220 "outer", 1221 "over", 1222 "partition", 1223 "preceding", 1224 "proto", 1225 "qualify", 1226 "range", 1227 "recursive", 1228 "respect", 1229 "right", 1230 "rollup", 1231 "rows", 1232 "select", 1233 "set", 1234 "some", 1235 "struct", 1236 "tablesample", 1237 "then", 1238 "to", 1239 "treat", 1240 "true", 1241 "unbounded", 1242 "union", 1243 "unnest", 1244 "using", 1245 "when", 1246 "where", 1247 "window", 1248 "with", 1249 "within", 1250 } 1251 1252 def datetrunc_sql(self, expression: exp.DateTrunc) -> str: 1253 unit = expression.unit 1254 unit_sql = unit.name if unit.is_string else self.sql(unit) 1255 return self.func("DATE_TRUNC", expression.this, unit_sql, expression.args.get("zone")) 1256 1257 def mod_sql(self, expression: exp.Mod) -> str: 1258 this = expression.this 1259 expr = expression.expression 1260 return self.func( 1261 "MOD", 1262 this.unnest() if isinstance(this, exp.Paren) else this, 1263 expr.unnest() if isinstance(expr, exp.Paren) else expr, 1264 ) 1265 1266 def column_parts(self, expression: exp.Column) -> str: 1267 if expression.meta.get("quoted_column"): 1268 # If a column reference is of the form `dataset.table`.name, we need 1269 # to preserve the quoted table path, otherwise the reference breaks 1270 table_parts = ".".join(p.name for p in expression.parts[:-1]) 1271 table_path = self.sql(exp.Identifier(this=table_parts, quoted=True)) 1272 return f"{table_path}.{self.sql(expression, 'this')}" 1273 1274 return super().column_parts(expression) 1275 1276 def table_parts(self, expression: exp.Table) -> str: 1277 # Depending on the context, `x.y` may not resolve to the same data source as `x`.`y`, so 1278 # we need to make sure the correct quoting is used in each case. 1279 # 1280 # For example, if there is a CTE x that clashes with a schema name, then the former will 1281 # return the table y in that schema, whereas the latter will return the CTE's y column: 1282 # 1283 # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x.y` -> cross join 1284 # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x`.`y` -> implicit unnest 1285 if expression.meta.get("quoted_table"): 1286 table_parts = ".".join(p.name for p in expression.parts) 1287 return self.sql(exp.Identifier(this=table_parts, quoted=True)) 1288 1289 return super().table_parts(expression) 1290 1291 def timetostr_sql(self, expression: exp.TimeToStr) -> str: 1292 this = expression.this 1293 if isinstance(this, exp.TsOrDsToDatetime): 1294 func_name = "FORMAT_DATETIME" 1295 elif isinstance(this, exp.TsOrDsToTimestamp): 1296 func_name = "FORMAT_TIMESTAMP" 1297 else: 1298 func_name = "FORMAT_DATE" 1299 1300 time_expr = ( 1301 this 1302 if isinstance(this, (exp.TsOrDsToDatetime, exp.TsOrDsToTimestamp, exp.TsOrDsToDate)) 1303 else expression 1304 ) 1305 return self.func( 1306 func_name, self.format_time(expression), time_expr.this, expression.args.get("zone") 1307 ) 1308 1309 def eq_sql(self, expression: exp.EQ) -> str: 1310 # Operands of = cannot be NULL in BigQuery 1311 if isinstance(expression.left, exp.Null) or isinstance(expression.right, exp.Null): 1312 if not isinstance(expression.parent, exp.Update): 1313 return "NULL" 1314 1315 return self.binary(expression, "=") 1316 1317 def attimezone_sql(self, expression: exp.AtTimeZone) -> str: 1318 parent = expression.parent 1319 1320 # BigQuery allows CAST(.. AS {STRING|TIMESTAMP} [FORMAT <fmt> [AT TIME ZONE <tz>]]). 1321 # Only the TIMESTAMP one should use the below conversion, when AT TIME ZONE is included. 1322 if not isinstance(parent, exp.Cast) or not parent.to.is_type("text"): 1323 return self.func( 1324 "TIMESTAMP", self.func("DATETIME", expression.this, expression.args.get("zone")) 1325 ) 1326 1327 return super().attimezone_sql(expression) 1328 1329 def trycast_sql(self, expression: exp.TryCast) -> str: 1330 return self.cast_sql(expression, safe_prefix="SAFE_") 1331 1332 def bracket_sql(self, expression: exp.Bracket) -> str: 1333 this = expression.this 1334 expressions = expression.expressions 1335 1336 if len(expressions) == 1 and this and this.is_type(exp.DataType.Type.STRUCT): 1337 arg = expressions[0] 1338 if arg.type is None: 1339 from sqlglot.optimizer.annotate_types import annotate_types 1340 1341 arg = annotate_types(arg, dialect=self.dialect) 1342 1343 if arg.type and arg.type.this in exp.DataType.TEXT_TYPES: 1344 # BQ doesn't support bracket syntax with string values for structs 1345 return f"{self.sql(this)}.{arg.name}" 1346 1347 expressions_sql = self.expressions(expression, flat=True) 1348 offset = expression.args.get("offset") 1349 1350 if offset == 0: 1351 expressions_sql = f"OFFSET({expressions_sql})" 1352 elif offset == 1: 1353 expressions_sql = f"ORDINAL({expressions_sql})" 1354 elif offset is not None: 1355 self.unsupported(f"Unsupported array offset: {offset}") 1356 1357 if expression.args.get("safe"): 1358 expressions_sql = f"SAFE_{expressions_sql}" 1359 1360 return f"{self.sql(this)}[{expressions_sql}]" 1361 1362 def in_unnest_op(self, expression: exp.Unnest) -> str: 1363 return self.sql(expression) 1364 1365 def version_sql(self, expression: exp.Version) -> str: 1366 if expression.name == "TIMESTAMP": 1367 expression.set("this", "SYSTEM_TIME") 1368 return super().version_sql(expression) 1369 1370 def contains_sql(self, expression: exp.Contains) -> str: 1371 this = expression.this 1372 expr = expression.expression 1373 1374 if isinstance(this, exp.Lower) and isinstance(expr, exp.Lower): 1375 this = this.this 1376 expr = expr.this 1377 1378 return self.func("CONTAINS_SUBSTR", this, expr) 1379 1380 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 1381 this = expression.this 1382 1383 # This ensures that inline type-annotated ARRAY literals like ARRAY<INT64>[1, 2, 3] 1384 # are roundtripped unaffected. The inner check excludes ARRAY(SELECT ...) expressions, 1385 # because they aren't literals and so the above syntax is invalid BigQuery. 1386 if isinstance(this, exp.Array): 1387 elem = seq_get(this.expressions, 0) 1388 if not (elem and elem.find(exp.Query)): 1389 return f"{self.sql(expression, 'to')}{self.sql(this)}" 1390 1391 return super().cast_sql(expression, safe_prefix=safe_prefix) 1392 1393 def declareitem_sql(self, expression: exp.DeclareItem) -> str: 1394 variables = self.expressions(expression, "this") 1395 default = self.sql(expression, "default") 1396 default = f" DEFAULT {default}" if default else "" 1397 kind = self.sql(expression, "kind") 1398 kind = f" {kind}" if kind else "" 1399 1400 return f"{variables}{kind}{default}"
Generator converts a given syntax tree to the corresponding SQL string.
Arguments:
- pretty: Whether to format the produced SQL string. Default: False.
- identify: Determines when an identifier should be quoted. Possible values are: False (default): Never quote, except in cases where it's mandatory by the dialect. True or 'always': Always quote. 'safe': Only quote identifiers that are case insensitive.
- normalize: Whether to normalize identifiers to lowercase. Default: False.
- pad: The pad size in a formatted string. For example, this affects the indentation of a projection in a query, relative to its nesting level. Default: 2.
- indent: The indentation size in a formatted string. For example, this affects the
indentation of subqueries and filters under a
WHERE
clause. Default: 2. - normalize_functions: How to normalize function names. Possible values are: "upper" or True (default): Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
- unsupported_level: Determines the generator's behavior when it encounters unsupported expressions. Default ErrorLevel.WARN.
- max_unsupported: Maximum number of unsupported messages to include in a raised UnsupportedError. This is only relevant if unsupported_level is ErrorLevel.RAISE. Default: 3
- leading_comma: Whether the comma is leading or trailing in select expressions. This is only relevant when generating in pretty mode. Default: False
- max_text_width: The max number of characters in a segment before creating new lines in pretty mode. The default is on the smaller end because the length only represents a segment and not the true line length. Default: 80
- comments: Whether to preserve comments in the output SQL code. Default: True
1266 def column_parts(self, expression: exp.Column) -> str: 1267 if expression.meta.get("quoted_column"): 1268 # If a column reference is of the form `dataset.table`.name, we need 1269 # to preserve the quoted table path, otherwise the reference breaks 1270 table_parts = ".".join(p.name for p in expression.parts[:-1]) 1271 table_path = self.sql(exp.Identifier(this=table_parts, quoted=True)) 1272 return f"{table_path}.{self.sql(expression, 'this')}" 1273 1274 return super().column_parts(expression)
1276 def table_parts(self, expression: exp.Table) -> str: 1277 # Depending on the context, `x.y` may not resolve to the same data source as `x`.`y`, so 1278 # we need to make sure the correct quoting is used in each case. 1279 # 1280 # For example, if there is a CTE x that clashes with a schema name, then the former will 1281 # return the table y in that schema, whereas the latter will return the CTE's y column: 1282 # 1283 # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x.y` -> cross join 1284 # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x`.`y` -> implicit unnest 1285 if expression.meta.get("quoted_table"): 1286 table_parts = ".".join(p.name for p in expression.parts) 1287 return self.sql(exp.Identifier(this=table_parts, quoted=True)) 1288 1289 return super().table_parts(expression)
1291 def timetostr_sql(self, expression: exp.TimeToStr) -> str: 1292 this = expression.this 1293 if isinstance(this, exp.TsOrDsToDatetime): 1294 func_name = "FORMAT_DATETIME" 1295 elif isinstance(this, exp.TsOrDsToTimestamp): 1296 func_name = "FORMAT_TIMESTAMP" 1297 else: 1298 func_name = "FORMAT_DATE" 1299 1300 time_expr = ( 1301 this 1302 if isinstance(this, (exp.TsOrDsToDatetime, exp.TsOrDsToTimestamp, exp.TsOrDsToDate)) 1303 else expression 1304 ) 1305 return self.func( 1306 func_name, self.format_time(expression), time_expr.this, expression.args.get("zone") 1307 )
1309 def eq_sql(self, expression: exp.EQ) -> str: 1310 # Operands of = cannot be NULL in BigQuery 1311 if isinstance(expression.left, exp.Null) or isinstance(expression.right, exp.Null): 1312 if not isinstance(expression.parent, exp.Update): 1313 return "NULL" 1314 1315 return self.binary(expression, "=")
1317 def attimezone_sql(self, expression: exp.AtTimeZone) -> str: 1318 parent = expression.parent 1319 1320 # BigQuery allows CAST(.. AS {STRING|TIMESTAMP} [FORMAT <fmt> [AT TIME ZONE <tz>]]). 1321 # Only the TIMESTAMP one should use the below conversion, when AT TIME ZONE is included. 1322 if not isinstance(parent, exp.Cast) or not parent.to.is_type("text"): 1323 return self.func( 1324 "TIMESTAMP", self.func("DATETIME", expression.this, expression.args.get("zone")) 1325 ) 1326 1327 return super().attimezone_sql(expression)
1332 def bracket_sql(self, expression: exp.Bracket) -> str: 1333 this = expression.this 1334 expressions = expression.expressions 1335 1336 if len(expressions) == 1 and this and this.is_type(exp.DataType.Type.STRUCT): 1337 arg = expressions[0] 1338 if arg.type is None: 1339 from sqlglot.optimizer.annotate_types import annotate_types 1340 1341 arg = annotate_types(arg, dialect=self.dialect) 1342 1343 if arg.type and arg.type.this in exp.DataType.TEXT_TYPES: 1344 # BQ doesn't support bracket syntax with string values for structs 1345 return f"{self.sql(this)}.{arg.name}" 1346 1347 expressions_sql = self.expressions(expression, flat=True) 1348 offset = expression.args.get("offset") 1349 1350 if offset == 0: 1351 expressions_sql = f"OFFSET({expressions_sql})" 1352 elif offset == 1: 1353 expressions_sql = f"ORDINAL({expressions_sql})" 1354 elif offset is not None: 1355 self.unsupported(f"Unsupported array offset: {offset}") 1356 1357 if expression.args.get("safe"): 1358 expressions_sql = f"SAFE_{expressions_sql}" 1359 1360 return f"{self.sql(this)}[{expressions_sql}]"
1380 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 1381 this = expression.this 1382 1383 # This ensures that inline type-annotated ARRAY literals like ARRAY<INT64>[1, 2, 3] 1384 # are roundtripped unaffected. The inner check excludes ARRAY(SELECT ...) expressions, 1385 # because they aren't literals and so the above syntax is invalid BigQuery. 1386 if isinstance(this, exp.Array): 1387 elem = seq_get(this.expressions, 0) 1388 if not (elem and elem.find(exp.Query)): 1389 return f"{self.sql(expression, 'to')}{self.sql(this)}" 1390 1391 return super().cast_sql(expression, safe_prefix=safe_prefix)
1393 def declareitem_sql(self, expression: exp.DeclareItem) -> str: 1394 variables = self.expressions(expression, "this") 1395 default = self.sql(expression, "default") 1396 default = f" DEFAULT {default}" if default else "" 1397 kind = self.sql(expression, "kind") 1398 kind = f" {kind}" if kind else "" 1399 1400 return f"{variables}{kind}{default}"
Inherited Members
- sqlglot.generator.Generator
- Generator
- LOCKING_READS_SUPPORTED
- WRAP_DERIVED_VALUES
- CREATE_FUNCTION_RETURN_AS
- MATCHED_BY_SOURCE
- SINGLE_STRING_INTERVAL
- GROUPINGS_SEP
- INDEX_ON
- QUERY_HINT_SEP
- IS_BOOL_ALLOWED
- DUPLICATE_KEY_UPDATE_WITH_SET
- LIMIT_IS_TOP
- RETURNING_END
- EXTRACT_ALLOWS_QUOTES
- TZ_TO_WITH_TIME_ZONE
- SELECT_KINDS
- VALUES_AS_TABLE
- ALTER_TABLE_INCLUDE_COLUMN_KEYWORD
- AGGREGATE_FILTER_SUPPORTED
- SEMI_ANTI_JOIN_WITH_SIDE
- COMPUTED_COLUMN_WITH_TYPE
- SUPPORTS_TABLE_COPY
- TABLESAMPLE_REQUIRES_PARENS
- TABLESAMPLE_SIZE_IS_ROWS
- TABLESAMPLE_KEYWORDS
- TABLESAMPLE_WITH_METHOD
- TABLESAMPLE_SEED_KEYWORD
- DATA_TYPE_SPECIFIERS_ALLOWED
- ENSURE_BOOLS
- CTE_RECURSIVE_KEYWORD_REQUIRED
- SUPPORTS_SINGLE_ARG_CONCAT
- LAST_DAY_SUPPORTS_DATE_PART
- INSERT_OVERWRITE
- SUPPORTS_SELECT_INTO
- SUPPORTS_UNLOGGED_TABLES
- SUPPORTS_CREATE_TABLE_LIKE
- LIKE_PROPERTY_INSIDE_SCHEMA
- MULTI_ARG_DISTINCT
- JSON_TYPE_REQUIRED_FOR_EXTRACTION
- JSON_PATH_BRACKETED_KEY_SUPPORTED
- SUPPORTS_WINDOW_EXCLUDE
- SET_OP_MODIFIERS
- COPY_PARAMS_ARE_WRAPPED
- COPY_PARAMS_EQ_REQUIRED
- COPY_HAS_INTO_KEYWORD
- STAR_EXCEPT
- QUOTE_JSON_PATH
- PAD_FILL_PATTERN_IS_REQUIRED
- ARRAY_CONCAT_IS_VAR_LEN
- SUPPORTS_CONVERT_TIMEZONE
- SUPPORTS_MEDIAN
- ALTER_SET_WRAPPED
- NORMALIZE_EXTRACT_DATE_PARTS
- PARSE_JSON_NAME
- ARRAY_SIZE_NAME
- ALTER_SET_TYPE
- ARRAY_SIZE_DIM_REQUIRED
- SUPPORTS_BETWEEN_FLAGS
- TIME_PART_SINGULARS
- TOKEN_MAPPING
- STRUCT_DELIMITER
- PARAMETER_TOKEN
- EXPRESSION_PRECEDES_PROPERTIES_CREATABLES
- WITH_SEPARATED_COMMENTS
- EXCLUDE_COMMENTS
- UNWRAPPED_INTERVAL_VALUES
- PARAMETERIZABLE_TEXT_TYPES
- EXPRESSIONS_WITHOUT_NESTED_CTES
- RESPECT_IGNORE_NULLS_UNSUPPORTED_EXPRESSIONS
- SENTINEL_LINE_BREAK
- pretty
- identify
- normalize
- pad
- unsupported_level
- max_unsupported
- leading_comma
- max_text_width
- comments
- dialect
- normalize_functions
- unsupported_messages
- generate
- preprocess
- unsupported
- sep
- seg
- sanitize_comment
- maybe_comment
- wrap
- no_identify
- normalize_func
- indent
- sql
- uncache_sql
- cache_sql
- characterset_sql
- column_sql
- columnposition_sql
- columndef_sql
- columnconstraint_sql
- computedcolumnconstraint_sql
- autoincrementcolumnconstraint_sql
- compresscolumnconstraint_sql
- generatedasidentitycolumnconstraint_sql
- generatedasrowcolumnconstraint_sql
- periodforsystemtimeconstraint_sql
- notnullcolumnconstraint_sql
- primarykeycolumnconstraint_sql
- uniquecolumnconstraint_sql
- createable_sql
- create_sql
- sequenceproperties_sql
- clone_sql
- describe_sql
- heredoc_sql
- prepend_ctes
- with_sql
- cte_sql
- tablealias_sql
- bitstring_sql
- hexstring_sql
- bytestring_sql
- unicodestring_sql
- rawstring_sql
- datatypeparam_sql
- datatype_sql
- directory_sql
- delete_sql
- drop_sql
- set_operation
- set_operations
- fetch_sql
- limitoptions_sql
- filter_sql
- hint_sql
- indexparameters_sql
- index_sql
- identifier_sql
- hex_sql
- lowerhex_sql
- inputoutputformat_sql
- national_sql
- partition_sql
- properties_sql
- root_properties
- properties
- with_properties
- locate_properties
- property_name
- property_sql
- likeproperty_sql
- fallbackproperty_sql
- journalproperty_sql
- freespaceproperty_sql
- checksumproperty_sql
- mergeblockratioproperty_sql
- datablocksizeproperty_sql
- blockcompressionproperty_sql
- isolatedloadingproperty_sql
- partitionboundspec_sql
- partitionedofproperty_sql
- lockingproperty_sql
- withdataproperty_sql
- withsystemversioningproperty_sql
- insert_sql
- introducer_sql
- kill_sql
- pseudotype_sql
- objectidentifier_sql
- onconflict_sql
- returning_sql
- rowformatdelimitedproperty_sql
- withtablehint_sql
- indextablehint_sql
- historicaldata_sql
- table_sql
- tablefromrows_sql
- tablesample_sql
- pivot_sql
- tuple_sql
- update_sql
- values_sql
- var_sql
- into_sql
- from_sql
- groupingsets_sql
- rollup_sql
- cube_sql
- group_sql
- having_sql
- connect_sql
- prior_sql
- join_sql
- lambda_sql
- lateral_op
- lateral_sql
- limit_sql
- offset_sql
- setitem_sql
- set_sql
- pragma_sql
- lock_sql
- literal_sql
- escape_str
- loaddata_sql
- null_sql
- boolean_sql
- order_sql
- withfill_sql
- cluster_sql
- distribute_sql
- sort_sql
- ordered_sql
- matchrecognizemeasure_sql
- matchrecognize_sql
- query_modifiers
- options_modifier
- for_modifiers
- queryoption_sql
- offset_limit_modifiers
- after_limit_modifiers
- select_sql
- schema_sql
- schema_columns_sql
- star_sql
- parameter_sql
- sessionparameter_sql
- placeholder_sql
- subquery_sql
- qualify_sql
- unnest_sql
- prewhere_sql
- where_sql
- window_sql
- partition_by_sql
- windowspec_sql
- withingroup_sql
- between_sql
- bracket_offset_expressions
- all_sql
- any_sql
- exists_sql
- case_sql
- constraint_sql
- nextvaluefor_sql
- extract_sql
- trim_sql
- convert_concat_args
- concat_sql
- concatws_sql
- check_sql
- foreignkey_sql
- primarykey_sql
- if_sql
- matchagainst_sql
- jsonkeyvalue_sql
- jsonpath_sql
- json_path_part
- formatjson_sql
- formatphrase_sql
- jsonobject_sql
- jsonobjectagg_sql
- jsonarray_sql
- jsonarrayagg_sql
- jsoncolumndef_sql
- jsonschema_sql
- jsontable_sql
- openjsoncolumndef_sql
- openjson_sql
- in_sql
- interval_sql
- return_sql
- reference_sql
- anonymous_sql
- paren_sql
- neg_sql
- not_sql
- alias_sql
- pivotalias_sql
- aliases_sql
- atindex_sql
- fromtimezone_sql
- add_sql
- and_sql
- or_sql
- xor_sql
- connector_sql
- bitwiseand_sql
- bitwiseleftshift_sql
- bitwisenot_sql
- bitwiseor_sql
- bitwiserightshift_sql
- bitwisexor_sql
- currentdate_sql
- collate_sql
- command_sql
- comment_sql
- mergetreettlaction_sql
- mergetreettl_sql
- transaction_sql
- commit_sql
- rollback_sql
- altercolumn_sql
- alterindex_sql
- alterdiststyle_sql
- altersortkey_sql
- alterrename_sql
- renamecolumn_sql
- alterset_sql
- alter_sql
- add_column_sql
- droppartition_sql
- addconstraint_sql
- addpartition_sql
- distinct_sql
- ignorenulls_sql
- respectnulls_sql
- havingmax_sql
- intdiv_sql
- dpipe_sql
- div_sql
- safedivide_sql
- overlaps_sql
- distance_sql
- dot_sql
- propertyeq_sql
- escape_sql
- glob_sql
- gt_sql
- gte_sql
- ilike_sql
- ilikeany_sql
- is_sql
- like_sql
- likeany_sql
- similarto_sql
- lt_sql
- lte_sql
- mul_sql
- neq_sql
- nullsafeeq_sql
- nullsafeneq_sql
- slice_sql
- sub_sql
- jsoncast_sql
- try_sql
- log_sql
- use_sql
- binary
- ceil_floor
- function_fallback_sql
- func
- format_args
- too_wide
- format_time
- expressions
- op_expressions
- naked_property
- tag_sql
- token_sql
- userdefinedfunction_sql
- joinhint_sql
- kwarg_sql
- when_sql
- whens_sql
- merge_sql
- tochar_sql
- tonumber_sql
- dictproperty_sql
- dictrange_sql
- dictsubproperty_sql
- duplicatekeyproperty_sql
- uniquekeyproperty_sql
- distributedbyproperty_sql
- oncluster_sql
- clusteredbyproperty_sql
- anyvalue_sql
- querytransform_sql
- indexconstraintoption_sql
- checkcolumnconstraint_sql
- indexcolumnconstraint_sql
- nvl2_sql
- comprehension_sql
- columnprefix_sql
- opclass_sql
- predict_sql
- forin_sql
- refresh_sql
- toarray_sql
- tsordstotime_sql
- tsordstotimestamp_sql
- tsordstodatetime_sql
- tsordstodate_sql
- unixdate_sql
- lastday_sql
- dateadd_sql
- arrayany_sql
- struct_sql
- partitionrange_sql
- truncatetable_sql
- convert_sql
- copyparameter_sql
- credentials_sql
- copy_sql
- semicolon_sql
- datadeletionproperty_sql
- maskingpolicycolumnconstraint_sql
- gapfill_sql
- scope_resolution
- scoperesolution_sql
- parsejson_sql
- rand_sql
- changes_sql
- pad_sql
- summarize_sql
- explodinggenerateseries_sql
- arrayconcat_sql
- converttimezone_sql
- json_sql
- jsonvalue_sql
- conditionalinsert_sql
- multitableinserts_sql
- oncondition_sql
- jsonextractquote_sql
- jsonexists_sql
- arrayagg_sql
- apply_sql
- grant_sql
- grantprivilege_sql
- grantprincipal_sql
- columns_sql
- overlay_sql
- todouble_sql
- string_sql
- median_sql
- overflowtruncatebehavior_sql
- unixseconds_sql
- arraysize_sql
- attach_sql
- detach_sql
- attachoption_sql
- featuresattime_sql
- watermarkcolumnconstraint_sql
- encodeproperty_sql
- includeproperty_sql
- xmlelement_sql
- xmlkeyvalueoption_sql
- partitionbyrangeproperty_sql
- partitionbyrangepropertydynamic_sql
- unpivotcolumns_sql
- analyzesample_sql
- analyzestatistics_sql
- analyzehistogram_sql
- analyzedelete_sql
- analyzelistchainedrows_sql
- analyzevalidate_sql
- analyze_sql
- xmltable_sql
- xmlnamespace_sql
- export_sql
- declare_sql
- recursivewithsearch_sql
- parameterizedagg_sql
- anonymousaggfunc_sql
- combinedaggfunc_sql
- combinedparameterizedagg_sql
- show_sql
- get_put_sql
- translatecharacters_sql
- decodecase_sql
- semanticview_sql