sqlglot.dialects.bigquery
1from __future__ import annotations 2 3import logging 4import re 5import typing as t 6 7from sqlglot import exp, generator, jsonpath, parser, tokens, transforms 8from sqlglot._typing import E 9from sqlglot.dialects.dialect import ( 10 Dialect, 11 NormalizationStrategy, 12 annotate_with_type_lambda, 13 arg_max_or_min_no_count, 14 binary_from_function, 15 date_add_interval_sql, 16 datestrtodate_sql, 17 build_formatted_time, 18 filter_array_using_unnest, 19 if_sql, 20 inline_array_unless_query, 21 max_or_greatest, 22 min_or_least, 23 no_ilike_sql, 24 build_date_delta_with_interval, 25 regexp_replace_sql, 26 rename_func, 27 sha256_sql, 28 timestrtotime_sql, 29 ts_or_ds_add_cast, 30 unit_to_var, 31 strposition_sql, 32 groupconcat_sql, 33) 34from sqlglot.helper import seq_get, split_num_words 35from sqlglot.tokens import TokenType 36from sqlglot.generator import unsupported_args 37 38if t.TYPE_CHECKING: 39 from sqlglot._typing import Lit 40 41 from sqlglot.optimizer.annotate_types import TypeAnnotator 42 43logger = logging.getLogger("sqlglot") 44 45 46JSON_EXTRACT_TYPE = t.Union[exp.JSONExtract, exp.JSONExtractScalar, exp.JSONExtractArray] 47 48DQUOTES_ESCAPING_JSON_FUNCTIONS = ("JSON_QUERY", "JSON_VALUE", "JSON_QUERY_ARRAY") 49 50 51def _derived_table_values_to_unnest(self: BigQuery.Generator, expression: exp.Values) -> str: 52 if not expression.find_ancestor(exp.From, exp.Join): 53 return self.values_sql(expression) 54 55 structs = [] 56 alias = expression.args.get("alias") 57 for tup in expression.find_all(exp.Tuple): 58 field_aliases = ( 59 alias.columns 60 if alias and alias.columns 61 else (f"_c{i}" for i in range(len(tup.expressions))) 62 ) 63 expressions = [ 64 exp.PropertyEQ(this=exp.to_identifier(name), expression=fld) 65 for name, fld in zip(field_aliases, tup.expressions) 66 ] 67 structs.append(exp.Struct(expressions=expressions)) 68 69 # Due to `UNNEST_COLUMN_ONLY`, it is expected that the table alias be contained in the columns expression 70 alias_name_only = exp.TableAlias(columns=[alias.this]) if alias else None 71 return self.unnest_sql( 72 exp.Unnest(expressions=[exp.array(*structs, copy=False)], alias=alias_name_only) 73 ) 74 75 76def _returnsproperty_sql(self: BigQuery.Generator, expression: exp.ReturnsProperty) -> str: 77 this = expression.this 78 if isinstance(this, exp.Schema): 79 this = f"{self.sql(this, 'this')} <{self.expressions(this)}>" 80 else: 81 this = self.sql(this) 82 return f"RETURNS {this}" 83 84 85def _create_sql(self: BigQuery.Generator, expression: exp.Create) -> str: 86 returns = expression.find(exp.ReturnsProperty) 87 if expression.kind == "FUNCTION" and returns and returns.args.get("is_table"): 88 expression.set("kind", "TABLE FUNCTION") 89 90 if isinstance(expression.expression, (exp.Subquery, exp.Literal)): 91 expression.set("expression", expression.expression.this) 92 93 return self.create_sql(expression) 94 95 96# https://issuetracker.google.com/issues/162294746 97# workaround for bigquery bug when grouping by an expression and then ordering 98# WITH x AS (SELECT 1 y) 99# SELECT y + 1 z 100# FROM x 101# GROUP BY x + 1 102# ORDER by z 103def _alias_ordered_group(expression: exp.Expression) -> exp.Expression: 104 if isinstance(expression, exp.Select): 105 group = expression.args.get("group") 106 order = expression.args.get("order") 107 108 if group and order: 109 aliases = { 110 select.this: select.args["alias"] 111 for select in expression.selects 112 if isinstance(select, exp.Alias) 113 } 114 115 for grouped in group.expressions: 116 if grouped.is_int: 117 continue 118 alias = aliases.get(grouped) 119 if alias: 120 grouped.replace(exp.column(alias)) 121 122 return expression 123 124 125def _pushdown_cte_column_names(expression: exp.Expression) -> exp.Expression: 126 """BigQuery doesn't allow column names when defining a CTE, so we try to push them down.""" 127 if isinstance(expression, exp.CTE) and expression.alias_column_names: 128 cte_query = expression.this 129 130 if cte_query.is_star: 131 logger.warning( 132 "Can't push down CTE column names for star queries. Run the query through" 133 " the optimizer or use 'qualify' to expand the star projections first." 134 ) 135 return expression 136 137 column_names = expression.alias_column_names 138 expression.args["alias"].set("columns", None) 139 140 for name, select in zip(column_names, cte_query.selects): 141 to_replace = select 142 143 if isinstance(select, exp.Alias): 144 select = select.this 145 146 # Inner aliases are shadowed by the CTE column names 147 to_replace.replace(exp.alias_(select, name)) 148 149 return expression 150 151 152def _build_parse_timestamp(args: t.List) -> exp.StrToTime: 153 this = build_formatted_time(exp.StrToTime, "bigquery")([seq_get(args, 1), seq_get(args, 0)]) 154 this.set("zone", seq_get(args, 2)) 155 return this 156 157 158def _build_timestamp(args: t.List) -> exp.Timestamp: 159 timestamp = exp.Timestamp.from_arg_list(args) 160 timestamp.set("with_tz", True) 161 return timestamp 162 163 164def _build_date(args: t.List) -> exp.Date | exp.DateFromParts: 165 expr_type = exp.DateFromParts if len(args) == 3 else exp.Date 166 return expr_type.from_arg_list(args) 167 168 169def _build_to_hex(args: t.List) -> exp.Hex | exp.MD5: 170 # TO_HEX(MD5(..)) is common in BigQuery, so it's parsed into MD5 to simplify its transpilation 171 arg = seq_get(args, 0) 172 return exp.MD5(this=arg.this) if isinstance(arg, exp.MD5Digest) else exp.LowerHex(this=arg) 173 174 175def _array_contains_sql(self: BigQuery.Generator, expression: exp.ArrayContains) -> str: 176 return self.sql( 177 exp.Exists( 178 this=exp.select("1") 179 .from_(exp.Unnest(expressions=[expression.left]).as_("_unnest", table=["_col"])) 180 .where(exp.column("_col").eq(expression.right)) 181 ) 182 ) 183 184 185def _ts_or_ds_add_sql(self: BigQuery.Generator, expression: exp.TsOrDsAdd) -> str: 186 return date_add_interval_sql("DATE", "ADD")(self, ts_or_ds_add_cast(expression)) 187 188 189def _ts_or_ds_diff_sql(self: BigQuery.Generator, expression: exp.TsOrDsDiff) -> str: 190 expression.this.replace(exp.cast(expression.this, exp.DataType.Type.TIMESTAMP)) 191 expression.expression.replace(exp.cast(expression.expression, exp.DataType.Type.TIMESTAMP)) 192 unit = unit_to_var(expression) 193 return self.func("DATE_DIFF", expression.this, expression.expression, unit) 194 195 196def _unix_to_time_sql(self: BigQuery.Generator, expression: exp.UnixToTime) -> str: 197 scale = expression.args.get("scale") 198 timestamp = expression.this 199 200 if scale in (None, exp.UnixToTime.SECONDS): 201 return self.func("TIMESTAMP_SECONDS", timestamp) 202 if scale == exp.UnixToTime.MILLIS: 203 return self.func("TIMESTAMP_MILLIS", timestamp) 204 if scale == exp.UnixToTime.MICROS: 205 return self.func("TIMESTAMP_MICROS", timestamp) 206 207 unix_seconds = exp.cast( 208 exp.Div(this=timestamp, expression=exp.func("POW", 10, scale)), exp.DataType.Type.BIGINT 209 ) 210 return self.func("TIMESTAMP_SECONDS", unix_seconds) 211 212 213def _build_time(args: t.List) -> exp.Func: 214 if len(args) == 1: 215 return exp.TsOrDsToTime(this=args[0]) 216 if len(args) == 2: 217 return exp.Time.from_arg_list(args) 218 return exp.TimeFromParts.from_arg_list(args) 219 220 221def _build_datetime(args: t.List) -> exp.Func: 222 if len(args) == 1: 223 return exp.TsOrDsToDatetime.from_arg_list(args) 224 if len(args) == 2: 225 return exp.Datetime.from_arg_list(args) 226 return exp.TimestampFromParts.from_arg_list(args) 227 228 229def _build_regexp_extract( 230 expr_type: t.Type[E], default_group: t.Optional[exp.Expression] = None 231) -> t.Callable[[t.List], E]: 232 def _builder(args: t.List) -> E: 233 try: 234 group = re.compile(args[1].name).groups == 1 235 except re.error: 236 group = False 237 238 # Default group is used for the transpilation of REGEXP_EXTRACT_ALL 239 return expr_type( 240 this=seq_get(args, 0), 241 expression=seq_get(args, 1), 242 position=seq_get(args, 2), 243 occurrence=seq_get(args, 3), 244 group=exp.Literal.number(1) if group else default_group, 245 ) 246 247 return _builder 248 249 250def _build_extract_json_with_default_path(expr_type: t.Type[E]) -> t.Callable[[t.List, Dialect], E]: 251 def _builder(args: t.List, dialect: Dialect) -> E: 252 if len(args) == 1: 253 # The default value for the JSONPath is '$' i.e all of the data 254 args.append(exp.Literal.string("$")) 255 return parser.build_extract_json_with_path(expr_type)(args, dialect) 256 257 return _builder 258 259 260def _str_to_datetime_sql( 261 self: BigQuery.Generator, expression: exp.StrToDate | exp.StrToTime 262) -> str: 263 this = self.sql(expression, "this") 264 dtype = "DATE" if isinstance(expression, exp.StrToDate) else "TIMESTAMP" 265 266 if expression.args.get("safe"): 267 fmt = self.format_time( 268 expression, 269 self.dialect.INVERSE_FORMAT_MAPPING, 270 self.dialect.INVERSE_FORMAT_TRIE, 271 ) 272 return f"SAFE_CAST({this} AS {dtype} FORMAT {fmt})" 273 274 fmt = self.format_time(expression) 275 return self.func(f"PARSE_{dtype}", fmt, this, expression.args.get("zone")) 276 277 278def _annotate_math_functions(self: TypeAnnotator, expression: E) -> E: 279 """ 280 Many BigQuery math functions such as CEIL, FLOOR etc follow this return type convention: 281 +---------+---------+---------+------------+---------+ 282 | INPUT | INT64 | NUMERIC | BIGNUMERIC | FLOAT64 | 283 +---------+---------+---------+------------+---------+ 284 | OUTPUT | FLOAT64 | NUMERIC | BIGNUMERIC | FLOAT64 | 285 +---------+---------+---------+------------+---------+ 286 """ 287 self._annotate_args(expression) 288 289 this: exp.Expression = expression.this 290 291 self._set_type( 292 expression, 293 exp.DataType.Type.DOUBLE if this.is_type(*exp.DataType.INTEGER_TYPES) else this.type, 294 ) 295 return expression 296 297 298@unsupported_args("ins_cost", "del_cost", "sub_cost") 299def _levenshtein_sql(self: BigQuery.Generator, expression: exp.Levenshtein) -> str: 300 max_dist = expression.args.get("max_dist") 301 if max_dist: 302 max_dist = exp.Kwarg(this=exp.var("max_distance"), expression=max_dist) 303 304 return self.func("EDIT_DISTANCE", expression.this, expression.expression, max_dist) 305 306 307def _build_levenshtein(args: t.List) -> exp.Levenshtein: 308 max_dist = seq_get(args, 2) 309 return exp.Levenshtein( 310 this=seq_get(args, 0), 311 expression=seq_get(args, 1), 312 max_dist=max_dist.expression if max_dist else None, 313 ) 314 315 316def _build_format_time(expr_type: t.Type[exp.Expression]) -> t.Callable[[t.List], exp.TimeToStr]: 317 def _builder(args: t.List) -> exp.TimeToStr: 318 return exp.TimeToStr( 319 this=expr_type(this=seq_get(args, 1)), 320 format=seq_get(args, 0), 321 zone=seq_get(args, 2), 322 ) 323 324 return _builder 325 326 327def _build_contains_substring(args: t.List) -> exp.Contains | exp.Anonymous: 328 if len(args) == 3: 329 return exp.Anonymous(this="CONTAINS_SUBSTR", expressions=args) 330 331 # Lowercase the operands in case of transpilation, as exp.Contains 332 # is case-sensitive on other dialects 333 this = exp.Lower(this=seq_get(args, 0)) 334 expr = exp.Lower(this=seq_get(args, 1)) 335 336 return exp.Contains(this=this, expression=expr) 337 338 339def _json_extract_sql(self: BigQuery.Generator, expression: JSON_EXTRACT_TYPE) -> str: 340 name = (expression._meta and expression.meta.get("name")) or expression.sql_name() 341 upper = name.upper() 342 343 dquote_escaping = upper in DQUOTES_ESCAPING_JSON_FUNCTIONS 344 345 if dquote_escaping: 346 self._quote_json_path_key_using_brackets = False 347 348 sql = rename_func(upper)(self, expression) 349 350 if dquote_escaping: 351 self._quote_json_path_key_using_brackets = True 352 353 return sql 354 355 356def _annotate_concat(self: TypeAnnotator, expression: exp.Concat) -> exp.Concat: 357 annotated = self._annotate_by_args(expression, "expressions") 358 359 # Args must be BYTES or types that can be cast to STRING, return type is either BYTES or STRING 360 # https://cloud.google.com/bigquery/docs/reference/standard-sql/string_functions#concat 361 if not annotated.is_type(exp.DataType.Type.BINARY, exp.DataType.Type.UNKNOWN): 362 annotated.type = exp.DataType.Type.VARCHAR 363 364 return annotated 365 366 367def _annotate_array(self: TypeAnnotator, expression: exp.Array) -> exp.Array: 368 array_args = expression.expressions 369 370 # BigQuery behaves as follows: 371 # 372 # SELECT t, TYPEOF(t) FROM (SELECT 'foo') AS t -- foo, STRUCT<STRING> 373 # SELECT ARRAY(SELECT 'foo'), TYPEOF(ARRAY(SELECT 'foo')) -- foo, ARRAY<STRING> 374 if ( 375 len(array_args) == 1 376 and isinstance(select := array_args[0].unnest(), exp.Select) 377 and (query_type := select.meta.get("query_type")) is not None 378 and query_type.is_type(exp.DataType.Type.STRUCT) 379 and len(query_type.expressions) == 1 380 and isinstance(col_def := query_type.expressions[0], exp.ColumnDef) 381 and (projection_type := col_def.kind) is not None 382 and not projection_type.is_type(exp.DataType.Type.UNKNOWN) 383 ): 384 array_type = exp.DataType( 385 this=exp.DataType.Type.ARRAY, 386 expressions=[projection_type.copy()], 387 nested=True, 388 ) 389 return self._annotate_with_type(expression, array_type) 390 391 return self._annotate_by_args(expression, "expressions", array=True) 392 393 394class BigQuery(Dialect): 395 WEEK_OFFSET = -1 396 UNNEST_COLUMN_ONLY = True 397 SUPPORTS_USER_DEFINED_TYPES = False 398 SUPPORTS_SEMI_ANTI_JOIN = False 399 LOG_BASE_FIRST = False 400 HEX_LOWERCASE = True 401 FORCE_EARLY_ALIAS_REF_EXPANSION = True 402 PRESERVE_ORIGINAL_NAMES = True 403 HEX_STRING_IS_INTEGER_TYPE = True 404 405 # https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#case_sensitivity 406 NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_INSENSITIVE 407 408 # bigquery udfs are case sensitive 409 NORMALIZE_FUNCTIONS = False 410 411 # https://cloud.google.com/bigquery/docs/reference/standard-sql/format-elements#format_elements_date_time 412 TIME_MAPPING = { 413 "%D": "%m/%d/%y", 414 "%E6S": "%S.%f", 415 "%e": "%-d", 416 } 417 418 FORMAT_MAPPING = { 419 "DD": "%d", 420 "MM": "%m", 421 "MON": "%b", 422 "MONTH": "%B", 423 "YYYY": "%Y", 424 "YY": "%y", 425 "HH": "%I", 426 "HH12": "%I", 427 "HH24": "%H", 428 "MI": "%M", 429 "SS": "%S", 430 "SSSSS": "%f", 431 "TZH": "%z", 432 } 433 434 # The _PARTITIONTIME and _PARTITIONDATE pseudo-columns are not returned by a SELECT * statement 435 # https://cloud.google.com/bigquery/docs/querying-partitioned-tables#query_an_ingestion-time_partitioned_table 436 # https://cloud.google.com/bigquery/docs/querying-wildcard-tables#scanning_a_range_of_tables_using_table_suffix 437 # https://cloud.google.com/bigquery/docs/query-cloud-storage-data#query_the_file_name_pseudo-column 438 PSEUDOCOLUMNS = {"_PARTITIONTIME", "_PARTITIONDATE", "_TABLE_SUFFIX", "_FILE_NAME"} 439 440 # All set operations require either a DISTINCT or ALL specifier 441 SET_OP_DISTINCT_BY_DEFAULT = dict.fromkeys((exp.Except, exp.Intersect, exp.Union), None) 442 443 # BigQuery maps Type.TIMESTAMP to DATETIME, so we need to amend the inferred types 444 TYPE_TO_EXPRESSIONS = { 445 **Dialect.TYPE_TO_EXPRESSIONS, 446 exp.DataType.Type.TIMESTAMPTZ: Dialect.TYPE_TO_EXPRESSIONS[exp.DataType.Type.TIMESTAMP], 447 } 448 TYPE_TO_EXPRESSIONS.pop(exp.DataType.Type.TIMESTAMP) 449 450 ANNOTATORS = { 451 **Dialect.ANNOTATORS, 452 **{ 453 expr_type: annotate_with_type_lambda(data_type) 454 for data_type, expressions in TYPE_TO_EXPRESSIONS.items() 455 for expr_type in expressions 456 }, 457 **{ 458 expr_type: lambda self, e: _annotate_math_functions(self, e) 459 for expr_type in (exp.Floor, exp.Ceil, exp.Log, exp.Ln, exp.Sqrt, exp.Exp, exp.Round) 460 }, 461 **{ 462 expr_type: lambda self, e: self._annotate_by_args(e, "this") 463 for expr_type in ( 464 exp.Left, 465 exp.Right, 466 exp.Lower, 467 exp.Upper, 468 exp.Pad, 469 exp.Trim, 470 exp.RegexpExtract, 471 exp.RegexpReplace, 472 exp.Repeat, 473 exp.Substring, 474 ) 475 }, 476 exp.ArgMax: lambda self, e: self._annotate_by_args(e, "this"), 477 exp.ArgMin: lambda self, e: self._annotate_by_args(e, "this"), 478 exp.Array: _annotate_array, 479 exp.ArrayConcat: lambda self, e: self._annotate_by_args(e, "this", "expressions"), 480 exp.Ascii: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.BIGINT), 481 exp.BitwiseAndAgg: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.BIGINT), 482 exp.BitwiseOrAgg: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.BIGINT), 483 exp.BitwiseXorAgg: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.BIGINT), 484 exp.BitwiseCountAgg: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.BIGINT), 485 exp.ByteLength: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.BIGINT), 486 exp.ByteString: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.BINARY), 487 exp.CodePointsToString: lambda self, e: self._annotate_with_type( 488 e, exp.DataType.Type.VARCHAR 489 ), 490 exp.Concat: _annotate_concat, 491 exp.Corr: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.DOUBLE), 492 exp.CovarPop: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.DOUBLE), 493 exp.CovarSamp: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.DOUBLE), 494 exp.DateFromUnixDate: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.DATE), 495 exp.DateTrunc: lambda self, e: self._annotate_by_args(e, "this"), 496 exp.GenerateTimestampArray: lambda self, e: self._annotate_with_type( 497 e, exp.DataType.build("ARRAY<TIMESTAMP>", dialect="bigquery") 498 ), 499 exp.Grouping: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.BIGINT), 500 exp.JSONArray: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.JSON), 501 exp.JSONExtractScalar: lambda self, e: self._annotate_with_type( 502 e, exp.DataType.Type.VARCHAR 503 ), 504 exp.JSONValueArray: lambda self, e: self._annotate_with_type( 505 e, exp.DataType.build("ARRAY<VARCHAR>") 506 ), 507 exp.JSONType: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.VARCHAR), 508 exp.Lag: lambda self, e: self._annotate_by_args(e, "this", "default"), 509 exp.MD5Digest: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.BINARY), 510 exp.ParseTime: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.TIME), 511 exp.ParseDatetime: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.DATETIME), 512 exp.RegexpExtractAll: lambda self, e: self._annotate_by_args(e, "this", array=True), 513 exp.Replace: lambda self, e: self._annotate_by_args(e, "this"), 514 exp.Reverse: lambda self, e: self._annotate_by_args(e, "this"), 515 exp.Soundex: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.VARCHAR), 516 exp.SHA: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.BINARY), 517 exp.SHA2: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.BINARY), 518 exp.Sign: lambda self, e: self._annotate_by_args(e, "this"), 519 exp.Split: lambda self, e: self._annotate_by_args(e, "this", array=True), 520 exp.TimestampFromParts: lambda self, e: self._annotate_with_type( 521 e, exp.DataType.Type.DATETIME 522 ), 523 exp.TimestampTrunc: lambda self, e: self._annotate_by_args(e, "this"), 524 exp.TimeFromParts: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.TIME), 525 exp.TsOrDsToTime: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.TIME), 526 exp.TimeTrunc: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.TIME), 527 exp.Translate: lambda self, e: self._annotate_by_args(e, "this"), 528 exp.Unicode: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.BIGINT), 529 } 530 531 def normalize_identifier(self, expression: E) -> E: 532 if ( 533 isinstance(expression, exp.Identifier) 534 and self.normalization_strategy is NormalizationStrategy.CASE_INSENSITIVE 535 ): 536 parent = expression.parent 537 while isinstance(parent, exp.Dot): 538 parent = parent.parent 539 540 # In BigQuery, CTEs are case-insensitive, but UDF and table names are case-sensitive 541 # by default. The following check uses a heuristic to detect tables based on whether 542 # they are qualified. This should generally be correct, because tables in BigQuery 543 # must be qualified with at least a dataset, unless @@dataset_id is set. 544 case_sensitive = ( 545 isinstance(parent, exp.UserDefinedFunction) 546 or ( 547 isinstance(parent, exp.Table) 548 and parent.db 549 and (parent.meta.get("quoted_table") or not parent.meta.get("maybe_column")) 550 ) 551 or expression.meta.get("is_table") 552 ) 553 if not case_sensitive: 554 expression.set("this", expression.this.lower()) 555 556 return t.cast(E, expression) 557 558 return super().normalize_identifier(expression) 559 560 class JSONPathTokenizer(jsonpath.JSONPathTokenizer): 561 VAR_TOKENS = { 562 TokenType.DASH, 563 TokenType.VAR, 564 } 565 566 class Tokenizer(tokens.Tokenizer): 567 QUOTES = ["'", '"', '"""', "'''"] 568 COMMENTS = ["--", "#", ("/*", "*/")] 569 IDENTIFIERS = ["`"] 570 STRING_ESCAPES = ["\\"] 571 572 HEX_STRINGS = [("0x", ""), ("0X", "")] 573 574 BYTE_STRINGS = [ 575 (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("b", "B") 576 ] 577 578 RAW_STRINGS = [ 579 (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("r", "R") 580 ] 581 582 NESTED_COMMENTS = False 583 584 KEYWORDS = { 585 **tokens.Tokenizer.KEYWORDS, 586 "ANY TYPE": TokenType.VARIANT, 587 "BEGIN": TokenType.COMMAND, 588 "BEGIN TRANSACTION": TokenType.BEGIN, 589 "BYTEINT": TokenType.INT, 590 "BYTES": TokenType.BINARY, 591 "CURRENT_DATETIME": TokenType.CURRENT_DATETIME, 592 "DATETIME": TokenType.TIMESTAMP, 593 "DECLARE": TokenType.DECLARE, 594 "ELSEIF": TokenType.COMMAND, 595 "EXCEPTION": TokenType.COMMAND, 596 "EXPORT": TokenType.EXPORT, 597 "FLOAT64": TokenType.DOUBLE, 598 "FOR SYSTEM_TIME": TokenType.TIMESTAMP_SNAPSHOT, 599 "MODEL": TokenType.MODEL, 600 "NOT DETERMINISTIC": TokenType.VOLATILE, 601 "RECORD": TokenType.STRUCT, 602 "TIMESTAMP": TokenType.TIMESTAMPTZ, 603 } 604 KEYWORDS.pop("DIV") 605 KEYWORDS.pop("VALUES") 606 KEYWORDS.pop("/*+") 607 608 class Parser(parser.Parser): 609 PREFIXED_PIVOT_COLUMNS = True 610 LOG_DEFAULTS_TO_LN = True 611 SUPPORTS_IMPLICIT_UNNEST = True 612 JOINS_HAVE_EQUAL_PRECEDENCE = True 613 614 # BigQuery does not allow ASC/DESC to be used as an identifier 615 ID_VAR_TOKENS = parser.Parser.ID_VAR_TOKENS - {TokenType.ASC, TokenType.DESC} 616 ALIAS_TOKENS = parser.Parser.ALIAS_TOKENS - {TokenType.ASC, TokenType.DESC} 617 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS - {TokenType.ASC, TokenType.DESC} 618 COMMENT_TABLE_ALIAS_TOKENS = parser.Parser.COMMENT_TABLE_ALIAS_TOKENS - { 619 TokenType.ASC, 620 TokenType.DESC, 621 } 622 UPDATE_ALIAS_TOKENS = parser.Parser.UPDATE_ALIAS_TOKENS - {TokenType.ASC, TokenType.DESC} 623 624 FUNCTIONS = { 625 **parser.Parser.FUNCTIONS, 626 "CONTAINS_SUBSTR": _build_contains_substring, 627 "DATE": _build_date, 628 "DATE_ADD": build_date_delta_with_interval(exp.DateAdd), 629 "DATE_SUB": build_date_delta_with_interval(exp.DateSub), 630 "DATE_TRUNC": lambda args: exp.DateTrunc( 631 unit=seq_get(args, 1), 632 this=seq_get(args, 0), 633 zone=seq_get(args, 2), 634 ), 635 "DATETIME": _build_datetime, 636 "DATETIME_ADD": build_date_delta_with_interval(exp.DatetimeAdd), 637 "DATETIME_SUB": build_date_delta_with_interval(exp.DatetimeSub), 638 "DIV": binary_from_function(exp.IntDiv), 639 "EDIT_DISTANCE": _build_levenshtein, 640 "FORMAT_DATE": _build_format_time(exp.TsOrDsToDate), 641 "GENERATE_ARRAY": exp.GenerateSeries.from_arg_list, 642 "JSON_EXTRACT_SCALAR": _build_extract_json_with_default_path(exp.JSONExtractScalar), 643 "JSON_EXTRACT_ARRAY": _build_extract_json_with_default_path(exp.JSONExtractArray), 644 "JSON_QUERY": parser.build_extract_json_with_path(exp.JSONExtract), 645 "JSON_QUERY_ARRAY": _build_extract_json_with_default_path(exp.JSONExtractArray), 646 "JSON_VALUE": _build_extract_json_with_default_path(exp.JSONExtractScalar), 647 "JSON_VALUE_ARRAY": _build_extract_json_with_default_path(exp.JSONValueArray), 648 "LENGTH": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 649 "MD5": exp.MD5Digest.from_arg_list, 650 "TO_HEX": _build_to_hex, 651 "PARSE_DATE": lambda args: build_formatted_time(exp.StrToDate, "bigquery")( 652 [seq_get(args, 1), seq_get(args, 0)] 653 ), 654 "PARSE_TIME": lambda args: build_formatted_time(exp.ParseTime, "bigquery")( 655 [seq_get(args, 1), seq_get(args, 0)] 656 ), 657 "PARSE_TIMESTAMP": _build_parse_timestamp, 658 "PARSE_DATETIME": lambda args: build_formatted_time(exp.ParseDatetime, "bigquery")( 659 [seq_get(args, 1), seq_get(args, 0)] 660 ), 661 "REGEXP_CONTAINS": exp.RegexpLike.from_arg_list, 662 "REGEXP_EXTRACT": _build_regexp_extract(exp.RegexpExtract), 663 "REGEXP_SUBSTR": _build_regexp_extract(exp.RegexpExtract), 664 "REGEXP_EXTRACT_ALL": _build_regexp_extract( 665 exp.RegexpExtractAll, default_group=exp.Literal.number(0) 666 ), 667 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 668 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 669 "SPLIT": lambda args: exp.Split( 670 # https://cloud.google.com/bigquery/docs/reference/standard-sql/string_functions#split 671 this=seq_get(args, 0), 672 expression=seq_get(args, 1) or exp.Literal.string(","), 673 ), 674 "STRPOS": exp.StrPosition.from_arg_list, 675 "TIME": _build_time, 676 "TIME_ADD": build_date_delta_with_interval(exp.TimeAdd), 677 "TIME_SUB": build_date_delta_with_interval(exp.TimeSub), 678 "TIMESTAMP": _build_timestamp, 679 "TIMESTAMP_ADD": build_date_delta_with_interval(exp.TimestampAdd), 680 "TIMESTAMP_SUB": build_date_delta_with_interval(exp.TimestampSub), 681 "TIMESTAMP_MICROS": lambda args: exp.UnixToTime( 682 this=seq_get(args, 0), scale=exp.UnixToTime.MICROS 683 ), 684 "TIMESTAMP_MILLIS": lambda args: exp.UnixToTime( 685 this=seq_get(args, 0), scale=exp.UnixToTime.MILLIS 686 ), 687 "TIMESTAMP_SECONDS": lambda args: exp.UnixToTime(this=seq_get(args, 0)), 688 "TO_JSON_STRING": exp.JSONFormat.from_arg_list, 689 "FORMAT_DATETIME": _build_format_time(exp.TsOrDsToDatetime), 690 "FORMAT_TIMESTAMP": _build_format_time(exp.TsOrDsToTimestamp), 691 "FORMAT_TIME": _build_format_time(exp.TsOrDsToTime), 692 "WEEK": lambda args: exp.WeekStart(this=exp.var(seq_get(args, 0))), 693 } 694 695 FUNCTION_PARSERS = { 696 **parser.Parser.FUNCTION_PARSERS, 697 "ARRAY": lambda self: self.expression(exp.Array, expressions=[self._parse_statement()]), 698 "JSON_ARRAY": lambda self: self.expression( 699 exp.JSONArray, expressions=self._parse_csv(self._parse_bitwise) 700 ), 701 "MAKE_INTERVAL": lambda self: self._parse_make_interval(), 702 "FEATURES_AT_TIME": lambda self: self._parse_features_at_time(), 703 } 704 FUNCTION_PARSERS.pop("TRIM") 705 706 NO_PAREN_FUNCTIONS = { 707 **parser.Parser.NO_PAREN_FUNCTIONS, 708 TokenType.CURRENT_DATETIME: exp.CurrentDatetime, 709 } 710 711 NESTED_TYPE_TOKENS = { 712 *parser.Parser.NESTED_TYPE_TOKENS, 713 TokenType.TABLE, 714 } 715 716 PROPERTY_PARSERS = { 717 **parser.Parser.PROPERTY_PARSERS, 718 "NOT DETERMINISTIC": lambda self: self.expression( 719 exp.StabilityProperty, this=exp.Literal.string("VOLATILE") 720 ), 721 "OPTIONS": lambda self: self._parse_with_property(), 722 } 723 724 CONSTRAINT_PARSERS = { 725 **parser.Parser.CONSTRAINT_PARSERS, 726 "OPTIONS": lambda self: exp.Properties(expressions=self._parse_with_property()), 727 } 728 729 RANGE_PARSERS = parser.Parser.RANGE_PARSERS.copy() 730 RANGE_PARSERS.pop(TokenType.OVERLAPS) 731 732 NULL_TOKENS = {TokenType.NULL, TokenType.UNKNOWN} 733 734 DASHED_TABLE_PART_FOLLOW_TOKENS = {TokenType.DOT, TokenType.L_PAREN, TokenType.R_PAREN} 735 736 STATEMENT_PARSERS = { 737 **parser.Parser.STATEMENT_PARSERS, 738 TokenType.ELSE: lambda self: self._parse_as_command(self._prev), 739 TokenType.END: lambda self: self._parse_as_command(self._prev), 740 TokenType.FOR: lambda self: self._parse_for_in(), 741 TokenType.EXPORT: lambda self: self._parse_export_data(), 742 TokenType.DECLARE: lambda self: self._parse_declare(), 743 } 744 745 BRACKET_OFFSETS = { 746 "OFFSET": (0, False), 747 "ORDINAL": (1, False), 748 "SAFE_OFFSET": (0, True), 749 "SAFE_ORDINAL": (1, True), 750 } 751 752 def _parse_for_in(self) -> exp.ForIn: 753 this = self._parse_range() 754 self._match_text_seq("DO") 755 return self.expression(exp.ForIn, this=this, expression=self._parse_statement()) 756 757 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 758 this = super()._parse_table_part(schema=schema) or self._parse_number() 759 760 # https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#table_names 761 if isinstance(this, exp.Identifier): 762 table_name = this.name 763 while self._match(TokenType.DASH, advance=False) and self._next: 764 start = self._curr 765 while self._is_connected() and not self._match_set( 766 self.DASHED_TABLE_PART_FOLLOW_TOKENS, advance=False 767 ): 768 self._advance() 769 770 if start == self._curr: 771 break 772 773 table_name += self._find_sql(start, self._prev) 774 775 this = exp.Identifier( 776 this=table_name, quoted=this.args.get("quoted") 777 ).update_positions(this) 778 elif isinstance(this, exp.Literal): 779 table_name = this.name 780 781 if self._is_connected() and self._parse_var(any_token=True): 782 table_name += self._prev.text 783 784 this = exp.Identifier(this=table_name, quoted=True).update_positions(this) 785 786 return this 787 788 def _parse_table_parts( 789 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 790 ) -> exp.Table: 791 table = super()._parse_table_parts( 792 schema=schema, is_db_reference=is_db_reference, wildcard=True 793 ) 794 795 # proj-1.db.tbl -- `1.` is tokenized as a float so we need to unravel it here 796 if not table.catalog: 797 if table.db: 798 previous_db = table.args["db"] 799 parts = table.db.split(".") 800 if len(parts) == 2 and not table.args["db"].quoted: 801 table.set( 802 "catalog", exp.Identifier(this=parts[0]).update_positions(previous_db) 803 ) 804 table.set("db", exp.Identifier(this=parts[1]).update_positions(previous_db)) 805 else: 806 previous_this = table.this 807 parts = table.name.split(".") 808 if len(parts) == 2 and not table.this.quoted: 809 table.set( 810 "db", exp.Identifier(this=parts[0]).update_positions(previous_this) 811 ) 812 table.set( 813 "this", exp.Identifier(this=parts[1]).update_positions(previous_this) 814 ) 815 816 if isinstance(table.this, exp.Identifier) and any("." in p.name for p in table.parts): 817 alias = table.this 818 catalog, db, this, *rest = ( 819 exp.to_identifier(p, quoted=True) 820 for p in split_num_words(".".join(p.name for p in table.parts), ".", 3) 821 ) 822 823 for part in (catalog, db, this): 824 if part: 825 part.update_positions(table.this) 826 827 if rest and this: 828 this = exp.Dot.build([this, *rest]) # type: ignore 829 830 table = exp.Table( 831 this=this, db=db, catalog=catalog, pivots=table.args.get("pivots") 832 ) 833 table.meta["quoted_table"] = True 834 else: 835 alias = None 836 837 # The `INFORMATION_SCHEMA` views in BigQuery need to be qualified by a region or 838 # dataset, so if the project identifier is omitted we need to fix the ast so that 839 # the `INFORMATION_SCHEMA.X` bit is represented as a single (quoted) Identifier. 840 # Otherwise, we wouldn't correctly qualify a `Table` node that references these 841 # views, because it would seem like the "catalog" part is set, when it'd actually 842 # be the region/dataset. Merging the two identifiers into a single one is done to 843 # avoid producing a 4-part Table reference, which would cause issues in the schema 844 # module, when there are 3-part table names mixed with information schema views. 845 # 846 # See: https://cloud.google.com/bigquery/docs/information-schema-intro#syntax 847 table_parts = table.parts 848 if len(table_parts) > 1 and table_parts[-2].name.upper() == "INFORMATION_SCHEMA": 849 # We need to alias the table here to avoid breaking existing qualified columns. 850 # This is expected to be safe, because if there's an actual alias coming up in 851 # the token stream, it will overwrite this one. If there isn't one, we are only 852 # exposing the name that can be used to reference the view explicitly (a no-op). 853 exp.alias_( 854 table, 855 t.cast(exp.Identifier, alias or table_parts[-1]), 856 table=True, 857 copy=False, 858 ) 859 860 info_schema_view = f"{table_parts[-2].name}.{table_parts[-1].name}" 861 new_this = exp.Identifier(this=info_schema_view, quoted=True).update_positions( 862 line=table_parts[-2].meta.get("line"), 863 col=table_parts[-1].meta.get("col"), 864 start=table_parts[-2].meta.get("start"), 865 end=table_parts[-1].meta.get("end"), 866 ) 867 table.set("this", new_this) 868 table.set("db", seq_get(table_parts, -3)) 869 table.set("catalog", seq_get(table_parts, -4)) 870 871 return table 872 873 def _parse_column(self) -> t.Optional[exp.Expression]: 874 column = super()._parse_column() 875 if isinstance(column, exp.Column): 876 parts = column.parts 877 if any("." in p.name for p in parts): 878 catalog, db, table, this, *rest = ( 879 exp.to_identifier(p, quoted=True) 880 for p in split_num_words(".".join(p.name for p in parts), ".", 4) 881 ) 882 883 if rest and this: 884 this = exp.Dot.build([this, *rest]) # type: ignore 885 886 column = exp.Column(this=this, table=table, db=db, catalog=catalog) 887 column.meta["quoted_column"] = True 888 889 return column 890 891 @t.overload 892 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 893 894 @t.overload 895 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 896 897 def _parse_json_object(self, agg=False): 898 json_object = super()._parse_json_object() 899 array_kv_pair = seq_get(json_object.expressions, 0) 900 901 # Converts BQ's "signature 2" of JSON_OBJECT into SQLGlot's canonical representation 902 # https://cloud.google.com/bigquery/docs/reference/standard-sql/json_functions#json_object_signature2 903 if ( 904 array_kv_pair 905 and isinstance(array_kv_pair.this, exp.Array) 906 and isinstance(array_kv_pair.expression, exp.Array) 907 ): 908 keys = array_kv_pair.this.expressions 909 values = array_kv_pair.expression.expressions 910 911 json_object.set( 912 "expressions", 913 [exp.JSONKeyValue(this=k, expression=v) for k, v in zip(keys, values)], 914 ) 915 916 return json_object 917 918 def _parse_bracket( 919 self, this: t.Optional[exp.Expression] = None 920 ) -> t.Optional[exp.Expression]: 921 bracket = super()._parse_bracket(this) 922 923 if this is bracket: 924 return bracket 925 926 if isinstance(bracket, exp.Bracket): 927 for expression in bracket.expressions: 928 name = expression.name.upper() 929 930 if name not in self.BRACKET_OFFSETS: 931 break 932 933 offset, safe = self.BRACKET_OFFSETS[name] 934 bracket.set("offset", offset) 935 bracket.set("safe", safe) 936 expression.replace(expression.expressions[0]) 937 938 return bracket 939 940 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 941 unnest = super()._parse_unnest(with_alias=with_alias) 942 943 if not unnest: 944 return None 945 946 unnest_expr = seq_get(unnest.expressions, 0) 947 if unnest_expr: 948 from sqlglot.optimizer.annotate_types import annotate_types 949 950 unnest_expr = annotate_types(unnest_expr, dialect=self.dialect) 951 952 # Unnesting a nested array (i.e array of structs) explodes the top-level struct fields, 953 # in contrast to other dialects such as DuckDB which flattens only the array by default 954 if unnest_expr.is_type(exp.DataType.Type.ARRAY) and any( 955 array_elem.is_type(exp.DataType.Type.STRUCT) 956 for array_elem in unnest_expr._type.expressions 957 ): 958 unnest.set("explode_array", True) 959 960 return unnest 961 962 def _parse_make_interval(self) -> exp.MakeInterval: 963 expr = exp.MakeInterval() 964 965 for arg_key in expr.arg_types: 966 value = self._parse_lambda() 967 968 if not value: 969 break 970 971 # Non-named arguments are filled sequentially, (optionally) followed by named arguments 972 # that can appear in any order e.g MAKE_INTERVAL(1, minute => 5, day => 2) 973 if isinstance(value, exp.Kwarg): 974 arg_key = value.this.name 975 976 expr.set(arg_key, value) 977 978 self._match(TokenType.COMMA) 979 980 return expr 981 982 def _parse_features_at_time(self) -> exp.FeaturesAtTime: 983 expr = self.expression( 984 exp.FeaturesAtTime, 985 this=(self._match(TokenType.TABLE) and self._parse_table()) 986 or self._parse_select(nested=True), 987 ) 988 989 while self._match(TokenType.COMMA): 990 arg = self._parse_lambda() 991 992 # Get the LHS of the Kwarg and set the arg to that value, e.g 993 # "num_rows => 1" sets the expr's `num_rows` arg 994 if arg: 995 expr.set(arg.this.name, arg) 996 997 return expr 998 999 def _parse_export_data(self) -> exp.Export: 1000 self._match_text_seq("DATA") 1001 1002 return self.expression( 1003 exp.Export, 1004 connection=self._match_text_seq("WITH", "CONNECTION") and self._parse_table_parts(), 1005 options=self._parse_properties(), 1006 this=self._match_text_seq("AS") and self._parse_select(), 1007 ) 1008 1009 class Generator(generator.Generator): 1010 INTERVAL_ALLOWS_PLURAL_FORM = False 1011 JOIN_HINTS = False 1012 QUERY_HINTS = False 1013 TABLE_HINTS = False 1014 LIMIT_FETCH = "LIMIT" 1015 RENAME_TABLE_WITH_DB = False 1016 NVL2_SUPPORTED = False 1017 UNNEST_WITH_ORDINALITY = False 1018 COLLATE_IS_FUNC = True 1019 LIMIT_ONLY_LITERALS = True 1020 SUPPORTS_TABLE_ALIAS_COLUMNS = False 1021 UNPIVOT_ALIASES_ARE_IDENTIFIERS = False 1022 JSON_KEY_VALUE_PAIR_SEP = "," 1023 NULL_ORDERING_SUPPORTED = False 1024 IGNORE_NULLS_IN_FUNC = True 1025 JSON_PATH_SINGLE_QUOTE_ESCAPE = True 1026 CAN_IMPLEMENT_ARRAY_ANY = True 1027 SUPPORTS_TO_NUMBER = False 1028 NAMED_PLACEHOLDER_TOKEN = "@" 1029 HEX_FUNC = "TO_HEX" 1030 WITH_PROPERTIES_PREFIX = "OPTIONS" 1031 SUPPORTS_EXPLODING_PROJECTIONS = False 1032 EXCEPT_INTERSECT_SUPPORT_ALL_CLAUSE = False 1033 SUPPORTS_UNIX_SECONDS = True 1034 1035 SAFE_JSON_PATH_KEY_RE = re.compile(r"^[_\-a-zA-Z][\-\w]*$") 1036 1037 TS_OR_DS_TYPES = ( 1038 exp.TsOrDsToDatetime, 1039 exp.TsOrDsToTimestamp, 1040 exp.TsOrDsToTime, 1041 exp.TsOrDsToDate, 1042 ) 1043 1044 TRANSFORMS = { 1045 **generator.Generator.TRANSFORMS, 1046 exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"), 1047 exp.ArgMax: arg_max_or_min_no_count("MAX_BY"), 1048 exp.ArgMin: arg_max_or_min_no_count("MIN_BY"), 1049 exp.Array: inline_array_unless_query, 1050 exp.ArrayContains: _array_contains_sql, 1051 exp.ArrayFilter: filter_array_using_unnest, 1052 exp.ArrayRemove: filter_array_using_unnest, 1053 exp.Cast: transforms.preprocess([transforms.remove_precision_parameterized_types]), 1054 exp.CollateProperty: lambda self, e: ( 1055 f"DEFAULT COLLATE {self.sql(e, 'this')}" 1056 if e.args.get("default") 1057 else f"COLLATE {self.sql(e, 'this')}" 1058 ), 1059 exp.Commit: lambda *_: "COMMIT TRANSACTION", 1060 exp.CountIf: rename_func("COUNTIF"), 1061 exp.Create: _create_sql, 1062 exp.CTE: transforms.preprocess([_pushdown_cte_column_names]), 1063 exp.DateAdd: date_add_interval_sql("DATE", "ADD"), 1064 exp.DateDiff: lambda self, e: self.func( 1065 "DATE_DIFF", e.this, e.expression, unit_to_var(e) 1066 ), 1067 exp.DateFromParts: rename_func("DATE"), 1068 exp.DateStrToDate: datestrtodate_sql, 1069 exp.DateSub: date_add_interval_sql("DATE", "SUB"), 1070 exp.DatetimeAdd: date_add_interval_sql("DATETIME", "ADD"), 1071 exp.DatetimeSub: date_add_interval_sql("DATETIME", "SUB"), 1072 exp.DateFromUnixDate: rename_func("DATE_FROM_UNIX_DATE"), 1073 exp.FromTimeZone: lambda self, e: self.func( 1074 "DATETIME", self.func("TIMESTAMP", e.this, e.args.get("zone")), "'UTC'" 1075 ), 1076 exp.GenerateSeries: rename_func("GENERATE_ARRAY"), 1077 exp.GroupConcat: lambda self, e: groupconcat_sql( 1078 self, e, func_name="STRING_AGG", within_group=False 1079 ), 1080 exp.Hex: lambda self, e: self.func("UPPER", self.func("TO_HEX", self.sql(e, "this"))), 1081 exp.HexString: lambda self, e: self.hexstring_sql(e, binary_function_repr="FROM_HEX"), 1082 exp.If: if_sql(false_value="NULL"), 1083 exp.ILike: no_ilike_sql, 1084 exp.IntDiv: rename_func("DIV"), 1085 exp.Int64: rename_func("INT64"), 1086 exp.JSONExtract: _json_extract_sql, 1087 exp.JSONExtractArray: _json_extract_sql, 1088 exp.JSONExtractScalar: _json_extract_sql, 1089 exp.JSONFormat: rename_func("TO_JSON_STRING"), 1090 exp.Levenshtein: _levenshtein_sql, 1091 exp.Max: max_or_greatest, 1092 exp.MD5: lambda self, e: self.func("TO_HEX", self.func("MD5", e.this)), 1093 exp.MD5Digest: rename_func("MD5"), 1094 exp.Min: min_or_least, 1095 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 1096 exp.RegexpExtract: lambda self, e: self.func( 1097 "REGEXP_EXTRACT", 1098 e.this, 1099 e.expression, 1100 e.args.get("position"), 1101 e.args.get("occurrence"), 1102 ), 1103 exp.RegexpExtractAll: lambda self, e: self.func( 1104 "REGEXP_EXTRACT_ALL", e.this, e.expression 1105 ), 1106 exp.RegexpReplace: regexp_replace_sql, 1107 exp.RegexpLike: rename_func("REGEXP_CONTAINS"), 1108 exp.ReturnsProperty: _returnsproperty_sql, 1109 exp.Rollback: lambda *_: "ROLLBACK TRANSACTION", 1110 exp.ParseTime: lambda self, e: self.func("PARSE_TIME", self.format_time(e), e.this), 1111 exp.ParseDatetime: lambda self, e: self.func( 1112 "PARSE_DATETIME", self.format_time(e), e.this 1113 ), 1114 exp.Select: transforms.preprocess( 1115 [ 1116 transforms.explode_projection_to_unnest(), 1117 transforms.unqualify_unnest, 1118 transforms.eliminate_distinct_on, 1119 _alias_ordered_group, 1120 transforms.eliminate_semi_and_anti_joins, 1121 ] 1122 ), 1123 exp.SHA: rename_func("SHA1"), 1124 exp.SHA2: sha256_sql, 1125 exp.StabilityProperty: lambda self, e: ( 1126 "DETERMINISTIC" if e.name == "IMMUTABLE" else "NOT DETERMINISTIC" 1127 ), 1128 exp.String: rename_func("STRING"), 1129 exp.StrPosition: lambda self, e: ( 1130 strposition_sql( 1131 self, e, func_name="INSTR", supports_position=True, supports_occurrence=True 1132 ) 1133 ), 1134 exp.StrToDate: _str_to_datetime_sql, 1135 exp.StrToTime: _str_to_datetime_sql, 1136 exp.TimeAdd: date_add_interval_sql("TIME", "ADD"), 1137 exp.TimeFromParts: rename_func("TIME"), 1138 exp.TimestampFromParts: rename_func("DATETIME"), 1139 exp.TimeSub: date_add_interval_sql("TIME", "SUB"), 1140 exp.TimestampAdd: date_add_interval_sql("TIMESTAMP", "ADD"), 1141 exp.TimestampDiff: rename_func("TIMESTAMP_DIFF"), 1142 exp.TimestampSub: date_add_interval_sql("TIMESTAMP", "SUB"), 1143 exp.TimeStrToTime: timestrtotime_sql, 1144 exp.Transaction: lambda *_: "BEGIN TRANSACTION", 1145 exp.TsOrDsAdd: _ts_or_ds_add_sql, 1146 exp.TsOrDsDiff: _ts_or_ds_diff_sql, 1147 exp.TsOrDsToTime: rename_func("TIME"), 1148 exp.TsOrDsToDatetime: rename_func("DATETIME"), 1149 exp.TsOrDsToTimestamp: rename_func("TIMESTAMP"), 1150 exp.Unhex: rename_func("FROM_HEX"), 1151 exp.UnixDate: rename_func("UNIX_DATE"), 1152 exp.UnixToTime: _unix_to_time_sql, 1153 exp.Uuid: lambda *_: "GENERATE_UUID()", 1154 exp.Values: _derived_table_values_to_unnest, 1155 exp.VariancePop: rename_func("VAR_POP"), 1156 exp.SafeDivide: rename_func("SAFE_DIVIDE"), 1157 } 1158 1159 SUPPORTED_JSON_PATH_PARTS = { 1160 exp.JSONPathKey, 1161 exp.JSONPathRoot, 1162 exp.JSONPathSubscript, 1163 } 1164 1165 TYPE_MAPPING = { 1166 **generator.Generator.TYPE_MAPPING, 1167 exp.DataType.Type.BIGDECIMAL: "BIGNUMERIC", 1168 exp.DataType.Type.BIGINT: "INT64", 1169 exp.DataType.Type.BINARY: "BYTES", 1170 exp.DataType.Type.BLOB: "BYTES", 1171 exp.DataType.Type.BOOLEAN: "BOOL", 1172 exp.DataType.Type.CHAR: "STRING", 1173 exp.DataType.Type.DECIMAL: "NUMERIC", 1174 exp.DataType.Type.DOUBLE: "FLOAT64", 1175 exp.DataType.Type.FLOAT: "FLOAT64", 1176 exp.DataType.Type.INT: "INT64", 1177 exp.DataType.Type.NCHAR: "STRING", 1178 exp.DataType.Type.NVARCHAR: "STRING", 1179 exp.DataType.Type.SMALLINT: "INT64", 1180 exp.DataType.Type.TEXT: "STRING", 1181 exp.DataType.Type.TIMESTAMP: "DATETIME", 1182 exp.DataType.Type.TIMESTAMPNTZ: "DATETIME", 1183 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 1184 exp.DataType.Type.TIMESTAMPLTZ: "TIMESTAMP", 1185 exp.DataType.Type.TINYINT: "INT64", 1186 exp.DataType.Type.ROWVERSION: "BYTES", 1187 exp.DataType.Type.UUID: "STRING", 1188 exp.DataType.Type.VARBINARY: "BYTES", 1189 exp.DataType.Type.VARCHAR: "STRING", 1190 exp.DataType.Type.VARIANT: "ANY TYPE", 1191 } 1192 1193 PROPERTIES_LOCATION = { 1194 **generator.Generator.PROPERTIES_LOCATION, 1195 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 1196 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 1197 } 1198 1199 # WINDOW comes after QUALIFY 1200 # https://cloud.google.com/bigquery/docs/reference/standard-sql/query-syntax#window_clause 1201 AFTER_HAVING_MODIFIER_TRANSFORMS = { 1202 "qualify": generator.Generator.AFTER_HAVING_MODIFIER_TRANSFORMS["qualify"], 1203 "windows": generator.Generator.AFTER_HAVING_MODIFIER_TRANSFORMS["windows"], 1204 } 1205 1206 # from: https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#reserved_keywords 1207 RESERVED_KEYWORDS = { 1208 "all", 1209 "and", 1210 "any", 1211 "array", 1212 "as", 1213 "asc", 1214 "assert_rows_modified", 1215 "at", 1216 "between", 1217 "by", 1218 "case", 1219 "cast", 1220 "collate", 1221 "contains", 1222 "create", 1223 "cross", 1224 "cube", 1225 "current", 1226 "default", 1227 "define", 1228 "desc", 1229 "distinct", 1230 "else", 1231 "end", 1232 "enum", 1233 "escape", 1234 "except", 1235 "exclude", 1236 "exists", 1237 "extract", 1238 "false", 1239 "fetch", 1240 "following", 1241 "for", 1242 "from", 1243 "full", 1244 "group", 1245 "grouping", 1246 "groups", 1247 "hash", 1248 "having", 1249 "if", 1250 "ignore", 1251 "in", 1252 "inner", 1253 "intersect", 1254 "interval", 1255 "into", 1256 "is", 1257 "join", 1258 "lateral", 1259 "left", 1260 "like", 1261 "limit", 1262 "lookup", 1263 "merge", 1264 "natural", 1265 "new", 1266 "no", 1267 "not", 1268 "null", 1269 "nulls", 1270 "of", 1271 "on", 1272 "or", 1273 "order", 1274 "outer", 1275 "over", 1276 "partition", 1277 "preceding", 1278 "proto", 1279 "qualify", 1280 "range", 1281 "recursive", 1282 "respect", 1283 "right", 1284 "rollup", 1285 "rows", 1286 "select", 1287 "set", 1288 "some", 1289 "struct", 1290 "tablesample", 1291 "then", 1292 "to", 1293 "treat", 1294 "true", 1295 "unbounded", 1296 "union", 1297 "unnest", 1298 "using", 1299 "when", 1300 "where", 1301 "window", 1302 "with", 1303 "within", 1304 } 1305 1306 def datetrunc_sql(self, expression: exp.DateTrunc) -> str: 1307 unit = expression.unit 1308 unit_sql = unit.name if unit.is_string else self.sql(unit) 1309 return self.func("DATE_TRUNC", expression.this, unit_sql, expression.args.get("zone")) 1310 1311 def mod_sql(self, expression: exp.Mod) -> str: 1312 this = expression.this 1313 expr = expression.expression 1314 return self.func( 1315 "MOD", 1316 this.unnest() if isinstance(this, exp.Paren) else this, 1317 expr.unnest() if isinstance(expr, exp.Paren) else expr, 1318 ) 1319 1320 def column_parts(self, expression: exp.Column) -> str: 1321 if expression.meta.get("quoted_column"): 1322 # If a column reference is of the form `dataset.table`.name, we need 1323 # to preserve the quoted table path, otherwise the reference breaks 1324 table_parts = ".".join(p.name for p in expression.parts[:-1]) 1325 table_path = self.sql(exp.Identifier(this=table_parts, quoted=True)) 1326 return f"{table_path}.{self.sql(expression, 'this')}" 1327 1328 return super().column_parts(expression) 1329 1330 def table_parts(self, expression: exp.Table) -> str: 1331 # Depending on the context, `x.y` may not resolve to the same data source as `x`.`y`, so 1332 # we need to make sure the correct quoting is used in each case. 1333 # 1334 # For example, if there is a CTE x that clashes with a schema name, then the former will 1335 # return the table y in that schema, whereas the latter will return the CTE's y column: 1336 # 1337 # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x.y` -> cross join 1338 # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x`.`y` -> implicit unnest 1339 if expression.meta.get("quoted_table"): 1340 table_parts = ".".join(p.name for p in expression.parts) 1341 return self.sql(exp.Identifier(this=table_parts, quoted=True)) 1342 1343 return super().table_parts(expression) 1344 1345 def timetostr_sql(self, expression: exp.TimeToStr) -> str: 1346 this = expression.this 1347 if isinstance(this, exp.TsOrDsToDatetime): 1348 func_name = "FORMAT_DATETIME" 1349 elif isinstance(this, exp.TsOrDsToTimestamp): 1350 func_name = "FORMAT_TIMESTAMP" 1351 elif isinstance(this, exp.TsOrDsToTime): 1352 func_name = "FORMAT_TIME" 1353 else: 1354 func_name = "FORMAT_DATE" 1355 1356 time_expr = this if isinstance(this, self.TS_OR_DS_TYPES) else expression 1357 return self.func( 1358 func_name, self.format_time(expression), time_expr.this, expression.args.get("zone") 1359 ) 1360 1361 def eq_sql(self, expression: exp.EQ) -> str: 1362 # Operands of = cannot be NULL in BigQuery 1363 if isinstance(expression.left, exp.Null) or isinstance(expression.right, exp.Null): 1364 if not isinstance(expression.parent, exp.Update): 1365 return "NULL" 1366 1367 return self.binary(expression, "=") 1368 1369 def attimezone_sql(self, expression: exp.AtTimeZone) -> str: 1370 parent = expression.parent 1371 1372 # BigQuery allows CAST(.. AS {STRING|TIMESTAMP} [FORMAT <fmt> [AT TIME ZONE <tz>]]). 1373 # Only the TIMESTAMP one should use the below conversion, when AT TIME ZONE is included. 1374 if not isinstance(parent, exp.Cast) or not parent.to.is_type("text"): 1375 return self.func( 1376 "TIMESTAMP", self.func("DATETIME", expression.this, expression.args.get("zone")) 1377 ) 1378 1379 return super().attimezone_sql(expression) 1380 1381 def trycast_sql(self, expression: exp.TryCast) -> str: 1382 return self.cast_sql(expression, safe_prefix="SAFE_") 1383 1384 def bracket_sql(self, expression: exp.Bracket) -> str: 1385 this = expression.this 1386 expressions = expression.expressions 1387 1388 if len(expressions) == 1 and this and this.is_type(exp.DataType.Type.STRUCT): 1389 arg = expressions[0] 1390 if arg.type is None: 1391 from sqlglot.optimizer.annotate_types import annotate_types 1392 1393 arg = annotate_types(arg, dialect=self.dialect) 1394 1395 if arg.type and arg.type.this in exp.DataType.TEXT_TYPES: 1396 # BQ doesn't support bracket syntax with string values for structs 1397 return f"{self.sql(this)}.{arg.name}" 1398 1399 expressions_sql = self.expressions(expression, flat=True) 1400 offset = expression.args.get("offset") 1401 1402 if offset == 0: 1403 expressions_sql = f"OFFSET({expressions_sql})" 1404 elif offset == 1: 1405 expressions_sql = f"ORDINAL({expressions_sql})" 1406 elif offset is not None: 1407 self.unsupported(f"Unsupported array offset: {offset}") 1408 1409 if expression.args.get("safe"): 1410 expressions_sql = f"SAFE_{expressions_sql}" 1411 1412 return f"{self.sql(this)}[{expressions_sql}]" 1413 1414 def in_unnest_op(self, expression: exp.Unnest) -> str: 1415 return self.sql(expression) 1416 1417 def version_sql(self, expression: exp.Version) -> str: 1418 if expression.name == "TIMESTAMP": 1419 expression.set("this", "SYSTEM_TIME") 1420 return super().version_sql(expression) 1421 1422 def contains_sql(self, expression: exp.Contains) -> str: 1423 this = expression.this 1424 expr = expression.expression 1425 1426 if isinstance(this, exp.Lower) and isinstance(expr, exp.Lower): 1427 this = this.this 1428 expr = expr.this 1429 1430 return self.func("CONTAINS_SUBSTR", this, expr) 1431 1432 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 1433 this = expression.this 1434 1435 # This ensures that inline type-annotated ARRAY literals like ARRAY<INT64>[1, 2, 3] 1436 # are roundtripped unaffected. The inner check excludes ARRAY(SELECT ...) expressions, 1437 # because they aren't literals and so the above syntax is invalid BigQuery. 1438 if isinstance(this, exp.Array): 1439 elem = seq_get(this.expressions, 0) 1440 if not (elem and elem.find(exp.Query)): 1441 return f"{self.sql(expression, 'to')}{self.sql(this)}" 1442 1443 return super().cast_sql(expression, safe_prefix=safe_prefix) 1444 1445 def declareitem_sql(self, expression: exp.DeclareItem) -> str: 1446 variables = self.expressions(expression, "this") 1447 default = self.sql(expression, "default") 1448 default = f" DEFAULT {default}" if default else "" 1449 kind = self.sql(expression, "kind") 1450 kind = f" {kind}" if kind else "" 1451 1452 return f"{variables}{kind}{default}"
395class BigQuery(Dialect): 396 WEEK_OFFSET = -1 397 UNNEST_COLUMN_ONLY = True 398 SUPPORTS_USER_DEFINED_TYPES = False 399 SUPPORTS_SEMI_ANTI_JOIN = False 400 LOG_BASE_FIRST = False 401 HEX_LOWERCASE = True 402 FORCE_EARLY_ALIAS_REF_EXPANSION = True 403 PRESERVE_ORIGINAL_NAMES = True 404 HEX_STRING_IS_INTEGER_TYPE = True 405 406 # https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#case_sensitivity 407 NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_INSENSITIVE 408 409 # bigquery udfs are case sensitive 410 NORMALIZE_FUNCTIONS = False 411 412 # https://cloud.google.com/bigquery/docs/reference/standard-sql/format-elements#format_elements_date_time 413 TIME_MAPPING = { 414 "%D": "%m/%d/%y", 415 "%E6S": "%S.%f", 416 "%e": "%-d", 417 } 418 419 FORMAT_MAPPING = { 420 "DD": "%d", 421 "MM": "%m", 422 "MON": "%b", 423 "MONTH": "%B", 424 "YYYY": "%Y", 425 "YY": "%y", 426 "HH": "%I", 427 "HH12": "%I", 428 "HH24": "%H", 429 "MI": "%M", 430 "SS": "%S", 431 "SSSSS": "%f", 432 "TZH": "%z", 433 } 434 435 # The _PARTITIONTIME and _PARTITIONDATE pseudo-columns are not returned by a SELECT * statement 436 # https://cloud.google.com/bigquery/docs/querying-partitioned-tables#query_an_ingestion-time_partitioned_table 437 # https://cloud.google.com/bigquery/docs/querying-wildcard-tables#scanning_a_range_of_tables_using_table_suffix 438 # https://cloud.google.com/bigquery/docs/query-cloud-storage-data#query_the_file_name_pseudo-column 439 PSEUDOCOLUMNS = {"_PARTITIONTIME", "_PARTITIONDATE", "_TABLE_SUFFIX", "_FILE_NAME"} 440 441 # All set operations require either a DISTINCT or ALL specifier 442 SET_OP_DISTINCT_BY_DEFAULT = dict.fromkeys((exp.Except, exp.Intersect, exp.Union), None) 443 444 # BigQuery maps Type.TIMESTAMP to DATETIME, so we need to amend the inferred types 445 TYPE_TO_EXPRESSIONS = { 446 **Dialect.TYPE_TO_EXPRESSIONS, 447 exp.DataType.Type.TIMESTAMPTZ: Dialect.TYPE_TO_EXPRESSIONS[exp.DataType.Type.TIMESTAMP], 448 } 449 TYPE_TO_EXPRESSIONS.pop(exp.DataType.Type.TIMESTAMP) 450 451 ANNOTATORS = { 452 **Dialect.ANNOTATORS, 453 **{ 454 expr_type: annotate_with_type_lambda(data_type) 455 for data_type, expressions in TYPE_TO_EXPRESSIONS.items() 456 for expr_type in expressions 457 }, 458 **{ 459 expr_type: lambda self, e: _annotate_math_functions(self, e) 460 for expr_type in (exp.Floor, exp.Ceil, exp.Log, exp.Ln, exp.Sqrt, exp.Exp, exp.Round) 461 }, 462 **{ 463 expr_type: lambda self, e: self._annotate_by_args(e, "this") 464 for expr_type in ( 465 exp.Left, 466 exp.Right, 467 exp.Lower, 468 exp.Upper, 469 exp.Pad, 470 exp.Trim, 471 exp.RegexpExtract, 472 exp.RegexpReplace, 473 exp.Repeat, 474 exp.Substring, 475 ) 476 }, 477 exp.ArgMax: lambda self, e: self._annotate_by_args(e, "this"), 478 exp.ArgMin: lambda self, e: self._annotate_by_args(e, "this"), 479 exp.Array: _annotate_array, 480 exp.ArrayConcat: lambda self, e: self._annotate_by_args(e, "this", "expressions"), 481 exp.Ascii: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.BIGINT), 482 exp.BitwiseAndAgg: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.BIGINT), 483 exp.BitwiseOrAgg: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.BIGINT), 484 exp.BitwiseXorAgg: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.BIGINT), 485 exp.BitwiseCountAgg: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.BIGINT), 486 exp.ByteLength: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.BIGINT), 487 exp.ByteString: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.BINARY), 488 exp.CodePointsToString: lambda self, e: self._annotate_with_type( 489 e, exp.DataType.Type.VARCHAR 490 ), 491 exp.Concat: _annotate_concat, 492 exp.Corr: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.DOUBLE), 493 exp.CovarPop: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.DOUBLE), 494 exp.CovarSamp: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.DOUBLE), 495 exp.DateFromUnixDate: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.DATE), 496 exp.DateTrunc: lambda self, e: self._annotate_by_args(e, "this"), 497 exp.GenerateTimestampArray: lambda self, e: self._annotate_with_type( 498 e, exp.DataType.build("ARRAY<TIMESTAMP>", dialect="bigquery") 499 ), 500 exp.Grouping: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.BIGINT), 501 exp.JSONArray: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.JSON), 502 exp.JSONExtractScalar: lambda self, e: self._annotate_with_type( 503 e, exp.DataType.Type.VARCHAR 504 ), 505 exp.JSONValueArray: lambda self, e: self._annotate_with_type( 506 e, exp.DataType.build("ARRAY<VARCHAR>") 507 ), 508 exp.JSONType: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.VARCHAR), 509 exp.Lag: lambda self, e: self._annotate_by_args(e, "this", "default"), 510 exp.MD5Digest: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.BINARY), 511 exp.ParseTime: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.TIME), 512 exp.ParseDatetime: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.DATETIME), 513 exp.RegexpExtractAll: lambda self, e: self._annotate_by_args(e, "this", array=True), 514 exp.Replace: lambda self, e: self._annotate_by_args(e, "this"), 515 exp.Reverse: lambda self, e: self._annotate_by_args(e, "this"), 516 exp.Soundex: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.VARCHAR), 517 exp.SHA: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.BINARY), 518 exp.SHA2: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.BINARY), 519 exp.Sign: lambda self, e: self._annotate_by_args(e, "this"), 520 exp.Split: lambda self, e: self._annotate_by_args(e, "this", array=True), 521 exp.TimestampFromParts: lambda self, e: self._annotate_with_type( 522 e, exp.DataType.Type.DATETIME 523 ), 524 exp.TimestampTrunc: lambda self, e: self._annotate_by_args(e, "this"), 525 exp.TimeFromParts: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.TIME), 526 exp.TsOrDsToTime: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.TIME), 527 exp.TimeTrunc: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.TIME), 528 exp.Translate: lambda self, e: self._annotate_by_args(e, "this"), 529 exp.Unicode: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.BIGINT), 530 } 531 532 def normalize_identifier(self, expression: E) -> E: 533 if ( 534 isinstance(expression, exp.Identifier) 535 and self.normalization_strategy is NormalizationStrategy.CASE_INSENSITIVE 536 ): 537 parent = expression.parent 538 while isinstance(parent, exp.Dot): 539 parent = parent.parent 540 541 # In BigQuery, CTEs are case-insensitive, but UDF and table names are case-sensitive 542 # by default. The following check uses a heuristic to detect tables based on whether 543 # they are qualified. This should generally be correct, because tables in BigQuery 544 # must be qualified with at least a dataset, unless @@dataset_id is set. 545 case_sensitive = ( 546 isinstance(parent, exp.UserDefinedFunction) 547 or ( 548 isinstance(parent, exp.Table) 549 and parent.db 550 and (parent.meta.get("quoted_table") or not parent.meta.get("maybe_column")) 551 ) 552 or expression.meta.get("is_table") 553 ) 554 if not case_sensitive: 555 expression.set("this", expression.this.lower()) 556 557 return t.cast(E, expression) 558 559 return super().normalize_identifier(expression) 560 561 class JSONPathTokenizer(jsonpath.JSONPathTokenizer): 562 VAR_TOKENS = { 563 TokenType.DASH, 564 TokenType.VAR, 565 } 566 567 class Tokenizer(tokens.Tokenizer): 568 QUOTES = ["'", '"', '"""', "'''"] 569 COMMENTS = ["--", "#", ("/*", "*/")] 570 IDENTIFIERS = ["`"] 571 STRING_ESCAPES = ["\\"] 572 573 HEX_STRINGS = [("0x", ""), ("0X", "")] 574 575 BYTE_STRINGS = [ 576 (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("b", "B") 577 ] 578 579 RAW_STRINGS = [ 580 (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("r", "R") 581 ] 582 583 NESTED_COMMENTS = False 584 585 KEYWORDS = { 586 **tokens.Tokenizer.KEYWORDS, 587 "ANY TYPE": TokenType.VARIANT, 588 "BEGIN": TokenType.COMMAND, 589 "BEGIN TRANSACTION": TokenType.BEGIN, 590 "BYTEINT": TokenType.INT, 591 "BYTES": TokenType.BINARY, 592 "CURRENT_DATETIME": TokenType.CURRENT_DATETIME, 593 "DATETIME": TokenType.TIMESTAMP, 594 "DECLARE": TokenType.DECLARE, 595 "ELSEIF": TokenType.COMMAND, 596 "EXCEPTION": TokenType.COMMAND, 597 "EXPORT": TokenType.EXPORT, 598 "FLOAT64": TokenType.DOUBLE, 599 "FOR SYSTEM_TIME": TokenType.TIMESTAMP_SNAPSHOT, 600 "MODEL": TokenType.MODEL, 601 "NOT DETERMINISTIC": TokenType.VOLATILE, 602 "RECORD": TokenType.STRUCT, 603 "TIMESTAMP": TokenType.TIMESTAMPTZ, 604 } 605 KEYWORDS.pop("DIV") 606 KEYWORDS.pop("VALUES") 607 KEYWORDS.pop("/*+") 608 609 class Parser(parser.Parser): 610 PREFIXED_PIVOT_COLUMNS = True 611 LOG_DEFAULTS_TO_LN = True 612 SUPPORTS_IMPLICIT_UNNEST = True 613 JOINS_HAVE_EQUAL_PRECEDENCE = True 614 615 # BigQuery does not allow ASC/DESC to be used as an identifier 616 ID_VAR_TOKENS = parser.Parser.ID_VAR_TOKENS - {TokenType.ASC, TokenType.DESC} 617 ALIAS_TOKENS = parser.Parser.ALIAS_TOKENS - {TokenType.ASC, TokenType.DESC} 618 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS - {TokenType.ASC, TokenType.DESC} 619 COMMENT_TABLE_ALIAS_TOKENS = parser.Parser.COMMENT_TABLE_ALIAS_TOKENS - { 620 TokenType.ASC, 621 TokenType.DESC, 622 } 623 UPDATE_ALIAS_TOKENS = parser.Parser.UPDATE_ALIAS_TOKENS - {TokenType.ASC, TokenType.DESC} 624 625 FUNCTIONS = { 626 **parser.Parser.FUNCTIONS, 627 "CONTAINS_SUBSTR": _build_contains_substring, 628 "DATE": _build_date, 629 "DATE_ADD": build_date_delta_with_interval(exp.DateAdd), 630 "DATE_SUB": build_date_delta_with_interval(exp.DateSub), 631 "DATE_TRUNC": lambda args: exp.DateTrunc( 632 unit=seq_get(args, 1), 633 this=seq_get(args, 0), 634 zone=seq_get(args, 2), 635 ), 636 "DATETIME": _build_datetime, 637 "DATETIME_ADD": build_date_delta_with_interval(exp.DatetimeAdd), 638 "DATETIME_SUB": build_date_delta_with_interval(exp.DatetimeSub), 639 "DIV": binary_from_function(exp.IntDiv), 640 "EDIT_DISTANCE": _build_levenshtein, 641 "FORMAT_DATE": _build_format_time(exp.TsOrDsToDate), 642 "GENERATE_ARRAY": exp.GenerateSeries.from_arg_list, 643 "JSON_EXTRACT_SCALAR": _build_extract_json_with_default_path(exp.JSONExtractScalar), 644 "JSON_EXTRACT_ARRAY": _build_extract_json_with_default_path(exp.JSONExtractArray), 645 "JSON_QUERY": parser.build_extract_json_with_path(exp.JSONExtract), 646 "JSON_QUERY_ARRAY": _build_extract_json_with_default_path(exp.JSONExtractArray), 647 "JSON_VALUE": _build_extract_json_with_default_path(exp.JSONExtractScalar), 648 "JSON_VALUE_ARRAY": _build_extract_json_with_default_path(exp.JSONValueArray), 649 "LENGTH": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 650 "MD5": exp.MD5Digest.from_arg_list, 651 "TO_HEX": _build_to_hex, 652 "PARSE_DATE": lambda args: build_formatted_time(exp.StrToDate, "bigquery")( 653 [seq_get(args, 1), seq_get(args, 0)] 654 ), 655 "PARSE_TIME": lambda args: build_formatted_time(exp.ParseTime, "bigquery")( 656 [seq_get(args, 1), seq_get(args, 0)] 657 ), 658 "PARSE_TIMESTAMP": _build_parse_timestamp, 659 "PARSE_DATETIME": lambda args: build_formatted_time(exp.ParseDatetime, "bigquery")( 660 [seq_get(args, 1), seq_get(args, 0)] 661 ), 662 "REGEXP_CONTAINS": exp.RegexpLike.from_arg_list, 663 "REGEXP_EXTRACT": _build_regexp_extract(exp.RegexpExtract), 664 "REGEXP_SUBSTR": _build_regexp_extract(exp.RegexpExtract), 665 "REGEXP_EXTRACT_ALL": _build_regexp_extract( 666 exp.RegexpExtractAll, default_group=exp.Literal.number(0) 667 ), 668 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 669 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 670 "SPLIT": lambda args: exp.Split( 671 # https://cloud.google.com/bigquery/docs/reference/standard-sql/string_functions#split 672 this=seq_get(args, 0), 673 expression=seq_get(args, 1) or exp.Literal.string(","), 674 ), 675 "STRPOS": exp.StrPosition.from_arg_list, 676 "TIME": _build_time, 677 "TIME_ADD": build_date_delta_with_interval(exp.TimeAdd), 678 "TIME_SUB": build_date_delta_with_interval(exp.TimeSub), 679 "TIMESTAMP": _build_timestamp, 680 "TIMESTAMP_ADD": build_date_delta_with_interval(exp.TimestampAdd), 681 "TIMESTAMP_SUB": build_date_delta_with_interval(exp.TimestampSub), 682 "TIMESTAMP_MICROS": lambda args: exp.UnixToTime( 683 this=seq_get(args, 0), scale=exp.UnixToTime.MICROS 684 ), 685 "TIMESTAMP_MILLIS": lambda args: exp.UnixToTime( 686 this=seq_get(args, 0), scale=exp.UnixToTime.MILLIS 687 ), 688 "TIMESTAMP_SECONDS": lambda args: exp.UnixToTime(this=seq_get(args, 0)), 689 "TO_JSON_STRING": exp.JSONFormat.from_arg_list, 690 "FORMAT_DATETIME": _build_format_time(exp.TsOrDsToDatetime), 691 "FORMAT_TIMESTAMP": _build_format_time(exp.TsOrDsToTimestamp), 692 "FORMAT_TIME": _build_format_time(exp.TsOrDsToTime), 693 "WEEK": lambda args: exp.WeekStart(this=exp.var(seq_get(args, 0))), 694 } 695 696 FUNCTION_PARSERS = { 697 **parser.Parser.FUNCTION_PARSERS, 698 "ARRAY": lambda self: self.expression(exp.Array, expressions=[self._parse_statement()]), 699 "JSON_ARRAY": lambda self: self.expression( 700 exp.JSONArray, expressions=self._parse_csv(self._parse_bitwise) 701 ), 702 "MAKE_INTERVAL": lambda self: self._parse_make_interval(), 703 "FEATURES_AT_TIME": lambda self: self._parse_features_at_time(), 704 } 705 FUNCTION_PARSERS.pop("TRIM") 706 707 NO_PAREN_FUNCTIONS = { 708 **parser.Parser.NO_PAREN_FUNCTIONS, 709 TokenType.CURRENT_DATETIME: exp.CurrentDatetime, 710 } 711 712 NESTED_TYPE_TOKENS = { 713 *parser.Parser.NESTED_TYPE_TOKENS, 714 TokenType.TABLE, 715 } 716 717 PROPERTY_PARSERS = { 718 **parser.Parser.PROPERTY_PARSERS, 719 "NOT DETERMINISTIC": lambda self: self.expression( 720 exp.StabilityProperty, this=exp.Literal.string("VOLATILE") 721 ), 722 "OPTIONS": lambda self: self._parse_with_property(), 723 } 724 725 CONSTRAINT_PARSERS = { 726 **parser.Parser.CONSTRAINT_PARSERS, 727 "OPTIONS": lambda self: exp.Properties(expressions=self._parse_with_property()), 728 } 729 730 RANGE_PARSERS = parser.Parser.RANGE_PARSERS.copy() 731 RANGE_PARSERS.pop(TokenType.OVERLAPS) 732 733 NULL_TOKENS = {TokenType.NULL, TokenType.UNKNOWN} 734 735 DASHED_TABLE_PART_FOLLOW_TOKENS = {TokenType.DOT, TokenType.L_PAREN, TokenType.R_PAREN} 736 737 STATEMENT_PARSERS = { 738 **parser.Parser.STATEMENT_PARSERS, 739 TokenType.ELSE: lambda self: self._parse_as_command(self._prev), 740 TokenType.END: lambda self: self._parse_as_command(self._prev), 741 TokenType.FOR: lambda self: self._parse_for_in(), 742 TokenType.EXPORT: lambda self: self._parse_export_data(), 743 TokenType.DECLARE: lambda self: self._parse_declare(), 744 } 745 746 BRACKET_OFFSETS = { 747 "OFFSET": (0, False), 748 "ORDINAL": (1, False), 749 "SAFE_OFFSET": (0, True), 750 "SAFE_ORDINAL": (1, True), 751 } 752 753 def _parse_for_in(self) -> exp.ForIn: 754 this = self._parse_range() 755 self._match_text_seq("DO") 756 return self.expression(exp.ForIn, this=this, expression=self._parse_statement()) 757 758 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 759 this = super()._parse_table_part(schema=schema) or self._parse_number() 760 761 # https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#table_names 762 if isinstance(this, exp.Identifier): 763 table_name = this.name 764 while self._match(TokenType.DASH, advance=False) and self._next: 765 start = self._curr 766 while self._is_connected() and not self._match_set( 767 self.DASHED_TABLE_PART_FOLLOW_TOKENS, advance=False 768 ): 769 self._advance() 770 771 if start == self._curr: 772 break 773 774 table_name += self._find_sql(start, self._prev) 775 776 this = exp.Identifier( 777 this=table_name, quoted=this.args.get("quoted") 778 ).update_positions(this) 779 elif isinstance(this, exp.Literal): 780 table_name = this.name 781 782 if self._is_connected() and self._parse_var(any_token=True): 783 table_name += self._prev.text 784 785 this = exp.Identifier(this=table_name, quoted=True).update_positions(this) 786 787 return this 788 789 def _parse_table_parts( 790 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 791 ) -> exp.Table: 792 table = super()._parse_table_parts( 793 schema=schema, is_db_reference=is_db_reference, wildcard=True 794 ) 795 796 # proj-1.db.tbl -- `1.` is tokenized as a float so we need to unravel it here 797 if not table.catalog: 798 if table.db: 799 previous_db = table.args["db"] 800 parts = table.db.split(".") 801 if len(parts) == 2 and not table.args["db"].quoted: 802 table.set( 803 "catalog", exp.Identifier(this=parts[0]).update_positions(previous_db) 804 ) 805 table.set("db", exp.Identifier(this=parts[1]).update_positions(previous_db)) 806 else: 807 previous_this = table.this 808 parts = table.name.split(".") 809 if len(parts) == 2 and not table.this.quoted: 810 table.set( 811 "db", exp.Identifier(this=parts[0]).update_positions(previous_this) 812 ) 813 table.set( 814 "this", exp.Identifier(this=parts[1]).update_positions(previous_this) 815 ) 816 817 if isinstance(table.this, exp.Identifier) and any("." in p.name for p in table.parts): 818 alias = table.this 819 catalog, db, this, *rest = ( 820 exp.to_identifier(p, quoted=True) 821 for p in split_num_words(".".join(p.name for p in table.parts), ".", 3) 822 ) 823 824 for part in (catalog, db, this): 825 if part: 826 part.update_positions(table.this) 827 828 if rest and this: 829 this = exp.Dot.build([this, *rest]) # type: ignore 830 831 table = exp.Table( 832 this=this, db=db, catalog=catalog, pivots=table.args.get("pivots") 833 ) 834 table.meta["quoted_table"] = True 835 else: 836 alias = None 837 838 # The `INFORMATION_SCHEMA` views in BigQuery need to be qualified by a region or 839 # dataset, so if the project identifier is omitted we need to fix the ast so that 840 # the `INFORMATION_SCHEMA.X` bit is represented as a single (quoted) Identifier. 841 # Otherwise, we wouldn't correctly qualify a `Table` node that references these 842 # views, because it would seem like the "catalog" part is set, when it'd actually 843 # be the region/dataset. Merging the two identifiers into a single one is done to 844 # avoid producing a 4-part Table reference, which would cause issues in the schema 845 # module, when there are 3-part table names mixed with information schema views. 846 # 847 # See: https://cloud.google.com/bigquery/docs/information-schema-intro#syntax 848 table_parts = table.parts 849 if len(table_parts) > 1 and table_parts[-2].name.upper() == "INFORMATION_SCHEMA": 850 # We need to alias the table here to avoid breaking existing qualified columns. 851 # This is expected to be safe, because if there's an actual alias coming up in 852 # the token stream, it will overwrite this one. If there isn't one, we are only 853 # exposing the name that can be used to reference the view explicitly (a no-op). 854 exp.alias_( 855 table, 856 t.cast(exp.Identifier, alias or table_parts[-1]), 857 table=True, 858 copy=False, 859 ) 860 861 info_schema_view = f"{table_parts[-2].name}.{table_parts[-1].name}" 862 new_this = exp.Identifier(this=info_schema_view, quoted=True).update_positions( 863 line=table_parts[-2].meta.get("line"), 864 col=table_parts[-1].meta.get("col"), 865 start=table_parts[-2].meta.get("start"), 866 end=table_parts[-1].meta.get("end"), 867 ) 868 table.set("this", new_this) 869 table.set("db", seq_get(table_parts, -3)) 870 table.set("catalog", seq_get(table_parts, -4)) 871 872 return table 873 874 def _parse_column(self) -> t.Optional[exp.Expression]: 875 column = super()._parse_column() 876 if isinstance(column, exp.Column): 877 parts = column.parts 878 if any("." in p.name for p in parts): 879 catalog, db, table, this, *rest = ( 880 exp.to_identifier(p, quoted=True) 881 for p in split_num_words(".".join(p.name for p in parts), ".", 4) 882 ) 883 884 if rest and this: 885 this = exp.Dot.build([this, *rest]) # type: ignore 886 887 column = exp.Column(this=this, table=table, db=db, catalog=catalog) 888 column.meta["quoted_column"] = True 889 890 return column 891 892 @t.overload 893 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 894 895 @t.overload 896 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 897 898 def _parse_json_object(self, agg=False): 899 json_object = super()._parse_json_object() 900 array_kv_pair = seq_get(json_object.expressions, 0) 901 902 # Converts BQ's "signature 2" of JSON_OBJECT into SQLGlot's canonical representation 903 # https://cloud.google.com/bigquery/docs/reference/standard-sql/json_functions#json_object_signature2 904 if ( 905 array_kv_pair 906 and isinstance(array_kv_pair.this, exp.Array) 907 and isinstance(array_kv_pair.expression, exp.Array) 908 ): 909 keys = array_kv_pair.this.expressions 910 values = array_kv_pair.expression.expressions 911 912 json_object.set( 913 "expressions", 914 [exp.JSONKeyValue(this=k, expression=v) for k, v in zip(keys, values)], 915 ) 916 917 return json_object 918 919 def _parse_bracket( 920 self, this: t.Optional[exp.Expression] = None 921 ) -> t.Optional[exp.Expression]: 922 bracket = super()._parse_bracket(this) 923 924 if this is bracket: 925 return bracket 926 927 if isinstance(bracket, exp.Bracket): 928 for expression in bracket.expressions: 929 name = expression.name.upper() 930 931 if name not in self.BRACKET_OFFSETS: 932 break 933 934 offset, safe = self.BRACKET_OFFSETS[name] 935 bracket.set("offset", offset) 936 bracket.set("safe", safe) 937 expression.replace(expression.expressions[0]) 938 939 return bracket 940 941 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 942 unnest = super()._parse_unnest(with_alias=with_alias) 943 944 if not unnest: 945 return None 946 947 unnest_expr = seq_get(unnest.expressions, 0) 948 if unnest_expr: 949 from sqlglot.optimizer.annotate_types import annotate_types 950 951 unnest_expr = annotate_types(unnest_expr, dialect=self.dialect) 952 953 # Unnesting a nested array (i.e array of structs) explodes the top-level struct fields, 954 # in contrast to other dialects such as DuckDB which flattens only the array by default 955 if unnest_expr.is_type(exp.DataType.Type.ARRAY) and any( 956 array_elem.is_type(exp.DataType.Type.STRUCT) 957 for array_elem in unnest_expr._type.expressions 958 ): 959 unnest.set("explode_array", True) 960 961 return unnest 962 963 def _parse_make_interval(self) -> exp.MakeInterval: 964 expr = exp.MakeInterval() 965 966 for arg_key in expr.arg_types: 967 value = self._parse_lambda() 968 969 if not value: 970 break 971 972 # Non-named arguments are filled sequentially, (optionally) followed by named arguments 973 # that can appear in any order e.g MAKE_INTERVAL(1, minute => 5, day => 2) 974 if isinstance(value, exp.Kwarg): 975 arg_key = value.this.name 976 977 expr.set(arg_key, value) 978 979 self._match(TokenType.COMMA) 980 981 return expr 982 983 def _parse_features_at_time(self) -> exp.FeaturesAtTime: 984 expr = self.expression( 985 exp.FeaturesAtTime, 986 this=(self._match(TokenType.TABLE) and self._parse_table()) 987 or self._parse_select(nested=True), 988 ) 989 990 while self._match(TokenType.COMMA): 991 arg = self._parse_lambda() 992 993 # Get the LHS of the Kwarg and set the arg to that value, e.g 994 # "num_rows => 1" sets the expr's `num_rows` arg 995 if arg: 996 expr.set(arg.this.name, arg) 997 998 return expr 999 1000 def _parse_export_data(self) -> exp.Export: 1001 self._match_text_seq("DATA") 1002 1003 return self.expression( 1004 exp.Export, 1005 connection=self._match_text_seq("WITH", "CONNECTION") and self._parse_table_parts(), 1006 options=self._parse_properties(), 1007 this=self._match_text_seq("AS") and self._parse_select(), 1008 ) 1009 1010 class Generator(generator.Generator): 1011 INTERVAL_ALLOWS_PLURAL_FORM = False 1012 JOIN_HINTS = False 1013 QUERY_HINTS = False 1014 TABLE_HINTS = False 1015 LIMIT_FETCH = "LIMIT" 1016 RENAME_TABLE_WITH_DB = False 1017 NVL2_SUPPORTED = False 1018 UNNEST_WITH_ORDINALITY = False 1019 COLLATE_IS_FUNC = True 1020 LIMIT_ONLY_LITERALS = True 1021 SUPPORTS_TABLE_ALIAS_COLUMNS = False 1022 UNPIVOT_ALIASES_ARE_IDENTIFIERS = False 1023 JSON_KEY_VALUE_PAIR_SEP = "," 1024 NULL_ORDERING_SUPPORTED = False 1025 IGNORE_NULLS_IN_FUNC = True 1026 JSON_PATH_SINGLE_QUOTE_ESCAPE = True 1027 CAN_IMPLEMENT_ARRAY_ANY = True 1028 SUPPORTS_TO_NUMBER = False 1029 NAMED_PLACEHOLDER_TOKEN = "@" 1030 HEX_FUNC = "TO_HEX" 1031 WITH_PROPERTIES_PREFIX = "OPTIONS" 1032 SUPPORTS_EXPLODING_PROJECTIONS = False 1033 EXCEPT_INTERSECT_SUPPORT_ALL_CLAUSE = False 1034 SUPPORTS_UNIX_SECONDS = True 1035 1036 SAFE_JSON_PATH_KEY_RE = re.compile(r"^[_\-a-zA-Z][\-\w]*$") 1037 1038 TS_OR_DS_TYPES = ( 1039 exp.TsOrDsToDatetime, 1040 exp.TsOrDsToTimestamp, 1041 exp.TsOrDsToTime, 1042 exp.TsOrDsToDate, 1043 ) 1044 1045 TRANSFORMS = { 1046 **generator.Generator.TRANSFORMS, 1047 exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"), 1048 exp.ArgMax: arg_max_or_min_no_count("MAX_BY"), 1049 exp.ArgMin: arg_max_or_min_no_count("MIN_BY"), 1050 exp.Array: inline_array_unless_query, 1051 exp.ArrayContains: _array_contains_sql, 1052 exp.ArrayFilter: filter_array_using_unnest, 1053 exp.ArrayRemove: filter_array_using_unnest, 1054 exp.Cast: transforms.preprocess([transforms.remove_precision_parameterized_types]), 1055 exp.CollateProperty: lambda self, e: ( 1056 f"DEFAULT COLLATE {self.sql(e, 'this')}" 1057 if e.args.get("default") 1058 else f"COLLATE {self.sql(e, 'this')}" 1059 ), 1060 exp.Commit: lambda *_: "COMMIT TRANSACTION", 1061 exp.CountIf: rename_func("COUNTIF"), 1062 exp.Create: _create_sql, 1063 exp.CTE: transforms.preprocess([_pushdown_cte_column_names]), 1064 exp.DateAdd: date_add_interval_sql("DATE", "ADD"), 1065 exp.DateDiff: lambda self, e: self.func( 1066 "DATE_DIFF", e.this, e.expression, unit_to_var(e) 1067 ), 1068 exp.DateFromParts: rename_func("DATE"), 1069 exp.DateStrToDate: datestrtodate_sql, 1070 exp.DateSub: date_add_interval_sql("DATE", "SUB"), 1071 exp.DatetimeAdd: date_add_interval_sql("DATETIME", "ADD"), 1072 exp.DatetimeSub: date_add_interval_sql("DATETIME", "SUB"), 1073 exp.DateFromUnixDate: rename_func("DATE_FROM_UNIX_DATE"), 1074 exp.FromTimeZone: lambda self, e: self.func( 1075 "DATETIME", self.func("TIMESTAMP", e.this, e.args.get("zone")), "'UTC'" 1076 ), 1077 exp.GenerateSeries: rename_func("GENERATE_ARRAY"), 1078 exp.GroupConcat: lambda self, e: groupconcat_sql( 1079 self, e, func_name="STRING_AGG", within_group=False 1080 ), 1081 exp.Hex: lambda self, e: self.func("UPPER", self.func("TO_HEX", self.sql(e, "this"))), 1082 exp.HexString: lambda self, e: self.hexstring_sql(e, binary_function_repr="FROM_HEX"), 1083 exp.If: if_sql(false_value="NULL"), 1084 exp.ILike: no_ilike_sql, 1085 exp.IntDiv: rename_func("DIV"), 1086 exp.Int64: rename_func("INT64"), 1087 exp.JSONExtract: _json_extract_sql, 1088 exp.JSONExtractArray: _json_extract_sql, 1089 exp.JSONExtractScalar: _json_extract_sql, 1090 exp.JSONFormat: rename_func("TO_JSON_STRING"), 1091 exp.Levenshtein: _levenshtein_sql, 1092 exp.Max: max_or_greatest, 1093 exp.MD5: lambda self, e: self.func("TO_HEX", self.func("MD5", e.this)), 1094 exp.MD5Digest: rename_func("MD5"), 1095 exp.Min: min_or_least, 1096 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 1097 exp.RegexpExtract: lambda self, e: self.func( 1098 "REGEXP_EXTRACT", 1099 e.this, 1100 e.expression, 1101 e.args.get("position"), 1102 e.args.get("occurrence"), 1103 ), 1104 exp.RegexpExtractAll: lambda self, e: self.func( 1105 "REGEXP_EXTRACT_ALL", e.this, e.expression 1106 ), 1107 exp.RegexpReplace: regexp_replace_sql, 1108 exp.RegexpLike: rename_func("REGEXP_CONTAINS"), 1109 exp.ReturnsProperty: _returnsproperty_sql, 1110 exp.Rollback: lambda *_: "ROLLBACK TRANSACTION", 1111 exp.ParseTime: lambda self, e: self.func("PARSE_TIME", self.format_time(e), e.this), 1112 exp.ParseDatetime: lambda self, e: self.func( 1113 "PARSE_DATETIME", self.format_time(e), e.this 1114 ), 1115 exp.Select: transforms.preprocess( 1116 [ 1117 transforms.explode_projection_to_unnest(), 1118 transforms.unqualify_unnest, 1119 transforms.eliminate_distinct_on, 1120 _alias_ordered_group, 1121 transforms.eliminate_semi_and_anti_joins, 1122 ] 1123 ), 1124 exp.SHA: rename_func("SHA1"), 1125 exp.SHA2: sha256_sql, 1126 exp.StabilityProperty: lambda self, e: ( 1127 "DETERMINISTIC" if e.name == "IMMUTABLE" else "NOT DETERMINISTIC" 1128 ), 1129 exp.String: rename_func("STRING"), 1130 exp.StrPosition: lambda self, e: ( 1131 strposition_sql( 1132 self, e, func_name="INSTR", supports_position=True, supports_occurrence=True 1133 ) 1134 ), 1135 exp.StrToDate: _str_to_datetime_sql, 1136 exp.StrToTime: _str_to_datetime_sql, 1137 exp.TimeAdd: date_add_interval_sql("TIME", "ADD"), 1138 exp.TimeFromParts: rename_func("TIME"), 1139 exp.TimestampFromParts: rename_func("DATETIME"), 1140 exp.TimeSub: date_add_interval_sql("TIME", "SUB"), 1141 exp.TimestampAdd: date_add_interval_sql("TIMESTAMP", "ADD"), 1142 exp.TimestampDiff: rename_func("TIMESTAMP_DIFF"), 1143 exp.TimestampSub: date_add_interval_sql("TIMESTAMP", "SUB"), 1144 exp.TimeStrToTime: timestrtotime_sql, 1145 exp.Transaction: lambda *_: "BEGIN TRANSACTION", 1146 exp.TsOrDsAdd: _ts_or_ds_add_sql, 1147 exp.TsOrDsDiff: _ts_or_ds_diff_sql, 1148 exp.TsOrDsToTime: rename_func("TIME"), 1149 exp.TsOrDsToDatetime: rename_func("DATETIME"), 1150 exp.TsOrDsToTimestamp: rename_func("TIMESTAMP"), 1151 exp.Unhex: rename_func("FROM_HEX"), 1152 exp.UnixDate: rename_func("UNIX_DATE"), 1153 exp.UnixToTime: _unix_to_time_sql, 1154 exp.Uuid: lambda *_: "GENERATE_UUID()", 1155 exp.Values: _derived_table_values_to_unnest, 1156 exp.VariancePop: rename_func("VAR_POP"), 1157 exp.SafeDivide: rename_func("SAFE_DIVIDE"), 1158 } 1159 1160 SUPPORTED_JSON_PATH_PARTS = { 1161 exp.JSONPathKey, 1162 exp.JSONPathRoot, 1163 exp.JSONPathSubscript, 1164 } 1165 1166 TYPE_MAPPING = { 1167 **generator.Generator.TYPE_MAPPING, 1168 exp.DataType.Type.BIGDECIMAL: "BIGNUMERIC", 1169 exp.DataType.Type.BIGINT: "INT64", 1170 exp.DataType.Type.BINARY: "BYTES", 1171 exp.DataType.Type.BLOB: "BYTES", 1172 exp.DataType.Type.BOOLEAN: "BOOL", 1173 exp.DataType.Type.CHAR: "STRING", 1174 exp.DataType.Type.DECIMAL: "NUMERIC", 1175 exp.DataType.Type.DOUBLE: "FLOAT64", 1176 exp.DataType.Type.FLOAT: "FLOAT64", 1177 exp.DataType.Type.INT: "INT64", 1178 exp.DataType.Type.NCHAR: "STRING", 1179 exp.DataType.Type.NVARCHAR: "STRING", 1180 exp.DataType.Type.SMALLINT: "INT64", 1181 exp.DataType.Type.TEXT: "STRING", 1182 exp.DataType.Type.TIMESTAMP: "DATETIME", 1183 exp.DataType.Type.TIMESTAMPNTZ: "DATETIME", 1184 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 1185 exp.DataType.Type.TIMESTAMPLTZ: "TIMESTAMP", 1186 exp.DataType.Type.TINYINT: "INT64", 1187 exp.DataType.Type.ROWVERSION: "BYTES", 1188 exp.DataType.Type.UUID: "STRING", 1189 exp.DataType.Type.VARBINARY: "BYTES", 1190 exp.DataType.Type.VARCHAR: "STRING", 1191 exp.DataType.Type.VARIANT: "ANY TYPE", 1192 } 1193 1194 PROPERTIES_LOCATION = { 1195 **generator.Generator.PROPERTIES_LOCATION, 1196 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 1197 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 1198 } 1199 1200 # WINDOW comes after QUALIFY 1201 # https://cloud.google.com/bigquery/docs/reference/standard-sql/query-syntax#window_clause 1202 AFTER_HAVING_MODIFIER_TRANSFORMS = { 1203 "qualify": generator.Generator.AFTER_HAVING_MODIFIER_TRANSFORMS["qualify"], 1204 "windows": generator.Generator.AFTER_HAVING_MODIFIER_TRANSFORMS["windows"], 1205 } 1206 1207 # from: https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#reserved_keywords 1208 RESERVED_KEYWORDS = { 1209 "all", 1210 "and", 1211 "any", 1212 "array", 1213 "as", 1214 "asc", 1215 "assert_rows_modified", 1216 "at", 1217 "between", 1218 "by", 1219 "case", 1220 "cast", 1221 "collate", 1222 "contains", 1223 "create", 1224 "cross", 1225 "cube", 1226 "current", 1227 "default", 1228 "define", 1229 "desc", 1230 "distinct", 1231 "else", 1232 "end", 1233 "enum", 1234 "escape", 1235 "except", 1236 "exclude", 1237 "exists", 1238 "extract", 1239 "false", 1240 "fetch", 1241 "following", 1242 "for", 1243 "from", 1244 "full", 1245 "group", 1246 "grouping", 1247 "groups", 1248 "hash", 1249 "having", 1250 "if", 1251 "ignore", 1252 "in", 1253 "inner", 1254 "intersect", 1255 "interval", 1256 "into", 1257 "is", 1258 "join", 1259 "lateral", 1260 "left", 1261 "like", 1262 "limit", 1263 "lookup", 1264 "merge", 1265 "natural", 1266 "new", 1267 "no", 1268 "not", 1269 "null", 1270 "nulls", 1271 "of", 1272 "on", 1273 "or", 1274 "order", 1275 "outer", 1276 "over", 1277 "partition", 1278 "preceding", 1279 "proto", 1280 "qualify", 1281 "range", 1282 "recursive", 1283 "respect", 1284 "right", 1285 "rollup", 1286 "rows", 1287 "select", 1288 "set", 1289 "some", 1290 "struct", 1291 "tablesample", 1292 "then", 1293 "to", 1294 "treat", 1295 "true", 1296 "unbounded", 1297 "union", 1298 "unnest", 1299 "using", 1300 "when", 1301 "where", 1302 "window", 1303 "with", 1304 "within", 1305 } 1306 1307 def datetrunc_sql(self, expression: exp.DateTrunc) -> str: 1308 unit = expression.unit 1309 unit_sql = unit.name if unit.is_string else self.sql(unit) 1310 return self.func("DATE_TRUNC", expression.this, unit_sql, expression.args.get("zone")) 1311 1312 def mod_sql(self, expression: exp.Mod) -> str: 1313 this = expression.this 1314 expr = expression.expression 1315 return self.func( 1316 "MOD", 1317 this.unnest() if isinstance(this, exp.Paren) else this, 1318 expr.unnest() if isinstance(expr, exp.Paren) else expr, 1319 ) 1320 1321 def column_parts(self, expression: exp.Column) -> str: 1322 if expression.meta.get("quoted_column"): 1323 # If a column reference is of the form `dataset.table`.name, we need 1324 # to preserve the quoted table path, otherwise the reference breaks 1325 table_parts = ".".join(p.name for p in expression.parts[:-1]) 1326 table_path = self.sql(exp.Identifier(this=table_parts, quoted=True)) 1327 return f"{table_path}.{self.sql(expression, 'this')}" 1328 1329 return super().column_parts(expression) 1330 1331 def table_parts(self, expression: exp.Table) -> str: 1332 # Depending on the context, `x.y` may not resolve to the same data source as `x`.`y`, so 1333 # we need to make sure the correct quoting is used in each case. 1334 # 1335 # For example, if there is a CTE x that clashes with a schema name, then the former will 1336 # return the table y in that schema, whereas the latter will return the CTE's y column: 1337 # 1338 # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x.y` -> cross join 1339 # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x`.`y` -> implicit unnest 1340 if expression.meta.get("quoted_table"): 1341 table_parts = ".".join(p.name for p in expression.parts) 1342 return self.sql(exp.Identifier(this=table_parts, quoted=True)) 1343 1344 return super().table_parts(expression) 1345 1346 def timetostr_sql(self, expression: exp.TimeToStr) -> str: 1347 this = expression.this 1348 if isinstance(this, exp.TsOrDsToDatetime): 1349 func_name = "FORMAT_DATETIME" 1350 elif isinstance(this, exp.TsOrDsToTimestamp): 1351 func_name = "FORMAT_TIMESTAMP" 1352 elif isinstance(this, exp.TsOrDsToTime): 1353 func_name = "FORMAT_TIME" 1354 else: 1355 func_name = "FORMAT_DATE" 1356 1357 time_expr = this if isinstance(this, self.TS_OR_DS_TYPES) else expression 1358 return self.func( 1359 func_name, self.format_time(expression), time_expr.this, expression.args.get("zone") 1360 ) 1361 1362 def eq_sql(self, expression: exp.EQ) -> str: 1363 # Operands of = cannot be NULL in BigQuery 1364 if isinstance(expression.left, exp.Null) or isinstance(expression.right, exp.Null): 1365 if not isinstance(expression.parent, exp.Update): 1366 return "NULL" 1367 1368 return self.binary(expression, "=") 1369 1370 def attimezone_sql(self, expression: exp.AtTimeZone) -> str: 1371 parent = expression.parent 1372 1373 # BigQuery allows CAST(.. AS {STRING|TIMESTAMP} [FORMAT <fmt> [AT TIME ZONE <tz>]]). 1374 # Only the TIMESTAMP one should use the below conversion, when AT TIME ZONE is included. 1375 if not isinstance(parent, exp.Cast) or not parent.to.is_type("text"): 1376 return self.func( 1377 "TIMESTAMP", self.func("DATETIME", expression.this, expression.args.get("zone")) 1378 ) 1379 1380 return super().attimezone_sql(expression) 1381 1382 def trycast_sql(self, expression: exp.TryCast) -> str: 1383 return self.cast_sql(expression, safe_prefix="SAFE_") 1384 1385 def bracket_sql(self, expression: exp.Bracket) -> str: 1386 this = expression.this 1387 expressions = expression.expressions 1388 1389 if len(expressions) == 1 and this and this.is_type(exp.DataType.Type.STRUCT): 1390 arg = expressions[0] 1391 if arg.type is None: 1392 from sqlglot.optimizer.annotate_types import annotate_types 1393 1394 arg = annotate_types(arg, dialect=self.dialect) 1395 1396 if arg.type and arg.type.this in exp.DataType.TEXT_TYPES: 1397 # BQ doesn't support bracket syntax with string values for structs 1398 return f"{self.sql(this)}.{arg.name}" 1399 1400 expressions_sql = self.expressions(expression, flat=True) 1401 offset = expression.args.get("offset") 1402 1403 if offset == 0: 1404 expressions_sql = f"OFFSET({expressions_sql})" 1405 elif offset == 1: 1406 expressions_sql = f"ORDINAL({expressions_sql})" 1407 elif offset is not None: 1408 self.unsupported(f"Unsupported array offset: {offset}") 1409 1410 if expression.args.get("safe"): 1411 expressions_sql = f"SAFE_{expressions_sql}" 1412 1413 return f"{self.sql(this)}[{expressions_sql}]" 1414 1415 def in_unnest_op(self, expression: exp.Unnest) -> str: 1416 return self.sql(expression) 1417 1418 def version_sql(self, expression: exp.Version) -> str: 1419 if expression.name == "TIMESTAMP": 1420 expression.set("this", "SYSTEM_TIME") 1421 return super().version_sql(expression) 1422 1423 def contains_sql(self, expression: exp.Contains) -> str: 1424 this = expression.this 1425 expr = expression.expression 1426 1427 if isinstance(this, exp.Lower) and isinstance(expr, exp.Lower): 1428 this = this.this 1429 expr = expr.this 1430 1431 return self.func("CONTAINS_SUBSTR", this, expr) 1432 1433 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 1434 this = expression.this 1435 1436 # This ensures that inline type-annotated ARRAY literals like ARRAY<INT64>[1, 2, 3] 1437 # are roundtripped unaffected. The inner check excludes ARRAY(SELECT ...) expressions, 1438 # because they aren't literals and so the above syntax is invalid BigQuery. 1439 if isinstance(this, exp.Array): 1440 elem = seq_get(this.expressions, 0) 1441 if not (elem and elem.find(exp.Query)): 1442 return f"{self.sql(expression, 'to')}{self.sql(this)}" 1443 1444 return super().cast_sql(expression, safe_prefix=safe_prefix) 1445 1446 def declareitem_sql(self, expression: exp.DeclareItem) -> str: 1447 variables = self.expressions(expression, "this") 1448 default = self.sql(expression, "default") 1449 default = f" DEFAULT {default}" if default else "" 1450 kind = self.sql(expression, "kind") 1451 kind = f" {kind}" if kind else "" 1452 1453 return f"{variables}{kind}{default}"
First day of the week in DATE_TRUNC(week). Defaults to 0 (Monday). -1 would be Sunday.
Whether the base comes first in the LOG function.
Possible values: True, False, None (two arguments are not supported by LOG)
Whether alias reference expansion (_expand_alias_refs()) should run before column qualification (_qualify_columns()).
For example:
WITH data AS ( SELECT 1 AS id, 2 AS my_id ) SELECT id AS my_id FROM data WHERE my_id = 1 GROUP BY my_id, HAVING my_id = 1
In most dialects, "my_id" would refer to "data.my_id" across the query, except: - BigQuery, which will forward the alias to GROUP BY + HAVING clauses i.e it resolves to "WHERE my_id = 1 GROUP BY id HAVING id = 1" - Clickhouse, which will forward the alias across the query i.e it resolves to "WHERE id = 1 GROUP BY id HAVING id = 1"
Whether the name of the function should be preserved inside the node's metadata, can be useful for roundtripping deprecated vs new functions that share an AST node e.g JSON_VALUE vs JSON_EXTRACT_SCALAR in BigQuery
Whether hex strings such as x'CC' evaluate to integer or binary/blob type
Specifies the strategy according to which identifiers should be normalized.
Determines how function names are going to be normalized.
Possible values:
"upper" or True: Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
Associates this dialect's time formats with their equivalent Python strftime formats.
Helper which is used for parsing the special syntax CAST(x AS DATE FORMAT 'yyyy').
If empty, the corresponding trie will be constructed off of TIME_MAPPING.
Columns that are auto-generated by the engine corresponding to this dialect.
For example, such columns may be excluded from SELECT * queries.
Whether a set operation uses DISTINCT by default. This is None when either DISTINCT or ALL
must be explicitly specified.
532 def normalize_identifier(self, expression: E) -> E: 533 if ( 534 isinstance(expression, exp.Identifier) 535 and self.normalization_strategy is NormalizationStrategy.CASE_INSENSITIVE 536 ): 537 parent = expression.parent 538 while isinstance(parent, exp.Dot): 539 parent = parent.parent 540 541 # In BigQuery, CTEs are case-insensitive, but UDF and table names are case-sensitive 542 # by default. The following check uses a heuristic to detect tables based on whether 543 # they are qualified. This should generally be correct, because tables in BigQuery 544 # must be qualified with at least a dataset, unless @@dataset_id is set. 545 case_sensitive = ( 546 isinstance(parent, exp.UserDefinedFunction) 547 or ( 548 isinstance(parent, exp.Table) 549 and parent.db 550 and (parent.meta.get("quoted_table") or not parent.meta.get("maybe_column")) 551 ) 552 or expression.meta.get("is_table") 553 ) 554 if not case_sensitive: 555 expression.set("this", expression.this.lower()) 556 557 return t.cast(E, expression) 558 559 return super().normalize_identifier(expression)
Transforms an identifier in a way that resembles how it'd be resolved by this dialect.
For example, an identifier like FoO would be resolved as foo in Postgres, because it
lowercases all unquoted identifiers. On the other hand, Snowflake uppercases them, so
it would resolve it as FOO. If it was quoted, it'd need to be treated as case-sensitive,
and so any normalization would be prohibited in order to avoid "breaking" the identifier.
There are also dialects like Spark, which are case-insensitive even when quotes are present, and dialects like MySQL, whose resolution rules match those employed by the underlying operating system, for example they may always be case-sensitive in Linux.
Finally, the normalization behavior of some engines can even be controlled through flags, like in Redshift's case, where users can explicitly set enable_case_sensitive_identifier.
SQLGlot aims to understand and handle all of these different behaviors gracefully, so that it can analyze queries in the optimizer and successfully capture their semantics.
Mapping of an escaped sequence (\n) to its unescaped version (
).
561 class JSONPathTokenizer(jsonpath.JSONPathTokenizer): 562 VAR_TOKENS = { 563 TokenType.DASH, 564 TokenType.VAR, 565 }
Inherited Members
- sqlglot.tokens.Tokenizer
- Tokenizer
- BIT_STRINGS
- BYTE_STRINGS
- HEX_STRINGS
- RAW_STRINGS
- HEREDOC_STRINGS
- UNICODE_STRINGS
- IDENTIFIERS
- QUOTES
- VAR_SINGLE_TOKENS
- HEREDOC_TAG_IS_IDENTIFIER
- HEREDOC_STRING_ALTERNATIVE
- STRING_ESCAPES_ALLOWED_IN_RAW_STRINGS
- NESTED_COMMENTS
- HINT_START
- TOKENS_PRECEDING_HINT
- WHITE_SPACE
- COMMANDS
- COMMAND_PREFIX_TOKENS
- NUMERIC_LITERALS
- COMMENTS
- dialect
- use_rs_tokenizer
- reset
- tokenize
- tokenize_rs
- size
- sql
- tokens
567 class Tokenizer(tokens.Tokenizer): 568 QUOTES = ["'", '"', '"""', "'''"] 569 COMMENTS = ["--", "#", ("/*", "*/")] 570 IDENTIFIERS = ["`"] 571 STRING_ESCAPES = ["\\"] 572 573 HEX_STRINGS = [("0x", ""), ("0X", "")] 574 575 BYTE_STRINGS = [ 576 (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("b", "B") 577 ] 578 579 RAW_STRINGS = [ 580 (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("r", "R") 581 ] 582 583 NESTED_COMMENTS = False 584 585 KEYWORDS = { 586 **tokens.Tokenizer.KEYWORDS, 587 "ANY TYPE": TokenType.VARIANT, 588 "BEGIN": TokenType.COMMAND, 589 "BEGIN TRANSACTION": TokenType.BEGIN, 590 "BYTEINT": TokenType.INT, 591 "BYTES": TokenType.BINARY, 592 "CURRENT_DATETIME": TokenType.CURRENT_DATETIME, 593 "DATETIME": TokenType.TIMESTAMP, 594 "DECLARE": TokenType.DECLARE, 595 "ELSEIF": TokenType.COMMAND, 596 "EXCEPTION": TokenType.COMMAND, 597 "EXPORT": TokenType.EXPORT, 598 "FLOAT64": TokenType.DOUBLE, 599 "FOR SYSTEM_TIME": TokenType.TIMESTAMP_SNAPSHOT, 600 "MODEL": TokenType.MODEL, 601 "NOT DETERMINISTIC": TokenType.VOLATILE, 602 "RECORD": TokenType.STRUCT, 603 "TIMESTAMP": TokenType.TIMESTAMPTZ, 604 } 605 KEYWORDS.pop("DIV") 606 KEYWORDS.pop("VALUES") 607 KEYWORDS.pop("/*+")
Inherited Members
- sqlglot.tokens.Tokenizer
- Tokenizer
- SINGLE_TOKENS
- BIT_STRINGS
- HEREDOC_STRINGS
- UNICODE_STRINGS
- VAR_SINGLE_TOKENS
- IDENTIFIER_ESCAPES
- HEREDOC_TAG_IS_IDENTIFIER
- HEREDOC_STRING_ALTERNATIVE
- STRING_ESCAPES_ALLOWED_IN_RAW_STRINGS
- HINT_START
- TOKENS_PRECEDING_HINT
- WHITE_SPACE
- COMMANDS
- COMMAND_PREFIX_TOKENS
- NUMERIC_LITERALS
- dialect
- use_rs_tokenizer
- reset
- tokenize
- tokenize_rs
- size
- sql
- tokens
609 class Parser(parser.Parser): 610 PREFIXED_PIVOT_COLUMNS = True 611 LOG_DEFAULTS_TO_LN = True 612 SUPPORTS_IMPLICIT_UNNEST = True 613 JOINS_HAVE_EQUAL_PRECEDENCE = True 614 615 # BigQuery does not allow ASC/DESC to be used as an identifier 616 ID_VAR_TOKENS = parser.Parser.ID_VAR_TOKENS - {TokenType.ASC, TokenType.DESC} 617 ALIAS_TOKENS = parser.Parser.ALIAS_TOKENS - {TokenType.ASC, TokenType.DESC} 618 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS - {TokenType.ASC, TokenType.DESC} 619 COMMENT_TABLE_ALIAS_TOKENS = parser.Parser.COMMENT_TABLE_ALIAS_TOKENS - { 620 TokenType.ASC, 621 TokenType.DESC, 622 } 623 UPDATE_ALIAS_TOKENS = parser.Parser.UPDATE_ALIAS_TOKENS - {TokenType.ASC, TokenType.DESC} 624 625 FUNCTIONS = { 626 **parser.Parser.FUNCTIONS, 627 "CONTAINS_SUBSTR": _build_contains_substring, 628 "DATE": _build_date, 629 "DATE_ADD": build_date_delta_with_interval(exp.DateAdd), 630 "DATE_SUB": build_date_delta_with_interval(exp.DateSub), 631 "DATE_TRUNC": lambda args: exp.DateTrunc( 632 unit=seq_get(args, 1), 633 this=seq_get(args, 0), 634 zone=seq_get(args, 2), 635 ), 636 "DATETIME": _build_datetime, 637 "DATETIME_ADD": build_date_delta_with_interval(exp.DatetimeAdd), 638 "DATETIME_SUB": build_date_delta_with_interval(exp.DatetimeSub), 639 "DIV": binary_from_function(exp.IntDiv), 640 "EDIT_DISTANCE": _build_levenshtein, 641 "FORMAT_DATE": _build_format_time(exp.TsOrDsToDate), 642 "GENERATE_ARRAY": exp.GenerateSeries.from_arg_list, 643 "JSON_EXTRACT_SCALAR": _build_extract_json_with_default_path(exp.JSONExtractScalar), 644 "JSON_EXTRACT_ARRAY": _build_extract_json_with_default_path(exp.JSONExtractArray), 645 "JSON_QUERY": parser.build_extract_json_with_path(exp.JSONExtract), 646 "JSON_QUERY_ARRAY": _build_extract_json_with_default_path(exp.JSONExtractArray), 647 "JSON_VALUE": _build_extract_json_with_default_path(exp.JSONExtractScalar), 648 "JSON_VALUE_ARRAY": _build_extract_json_with_default_path(exp.JSONValueArray), 649 "LENGTH": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 650 "MD5": exp.MD5Digest.from_arg_list, 651 "TO_HEX": _build_to_hex, 652 "PARSE_DATE": lambda args: build_formatted_time(exp.StrToDate, "bigquery")( 653 [seq_get(args, 1), seq_get(args, 0)] 654 ), 655 "PARSE_TIME": lambda args: build_formatted_time(exp.ParseTime, "bigquery")( 656 [seq_get(args, 1), seq_get(args, 0)] 657 ), 658 "PARSE_TIMESTAMP": _build_parse_timestamp, 659 "PARSE_DATETIME": lambda args: build_formatted_time(exp.ParseDatetime, "bigquery")( 660 [seq_get(args, 1), seq_get(args, 0)] 661 ), 662 "REGEXP_CONTAINS": exp.RegexpLike.from_arg_list, 663 "REGEXP_EXTRACT": _build_regexp_extract(exp.RegexpExtract), 664 "REGEXP_SUBSTR": _build_regexp_extract(exp.RegexpExtract), 665 "REGEXP_EXTRACT_ALL": _build_regexp_extract( 666 exp.RegexpExtractAll, default_group=exp.Literal.number(0) 667 ), 668 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 669 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 670 "SPLIT": lambda args: exp.Split( 671 # https://cloud.google.com/bigquery/docs/reference/standard-sql/string_functions#split 672 this=seq_get(args, 0), 673 expression=seq_get(args, 1) or exp.Literal.string(","), 674 ), 675 "STRPOS": exp.StrPosition.from_arg_list, 676 "TIME": _build_time, 677 "TIME_ADD": build_date_delta_with_interval(exp.TimeAdd), 678 "TIME_SUB": build_date_delta_with_interval(exp.TimeSub), 679 "TIMESTAMP": _build_timestamp, 680 "TIMESTAMP_ADD": build_date_delta_with_interval(exp.TimestampAdd), 681 "TIMESTAMP_SUB": build_date_delta_with_interval(exp.TimestampSub), 682 "TIMESTAMP_MICROS": lambda args: exp.UnixToTime( 683 this=seq_get(args, 0), scale=exp.UnixToTime.MICROS 684 ), 685 "TIMESTAMP_MILLIS": lambda args: exp.UnixToTime( 686 this=seq_get(args, 0), scale=exp.UnixToTime.MILLIS 687 ), 688 "TIMESTAMP_SECONDS": lambda args: exp.UnixToTime(this=seq_get(args, 0)), 689 "TO_JSON_STRING": exp.JSONFormat.from_arg_list, 690 "FORMAT_DATETIME": _build_format_time(exp.TsOrDsToDatetime), 691 "FORMAT_TIMESTAMP": _build_format_time(exp.TsOrDsToTimestamp), 692 "FORMAT_TIME": _build_format_time(exp.TsOrDsToTime), 693 "WEEK": lambda args: exp.WeekStart(this=exp.var(seq_get(args, 0))), 694 } 695 696 FUNCTION_PARSERS = { 697 **parser.Parser.FUNCTION_PARSERS, 698 "ARRAY": lambda self: self.expression(exp.Array, expressions=[self._parse_statement()]), 699 "JSON_ARRAY": lambda self: self.expression( 700 exp.JSONArray, expressions=self._parse_csv(self._parse_bitwise) 701 ), 702 "MAKE_INTERVAL": lambda self: self._parse_make_interval(), 703 "FEATURES_AT_TIME": lambda self: self._parse_features_at_time(), 704 } 705 FUNCTION_PARSERS.pop("TRIM") 706 707 NO_PAREN_FUNCTIONS = { 708 **parser.Parser.NO_PAREN_FUNCTIONS, 709 TokenType.CURRENT_DATETIME: exp.CurrentDatetime, 710 } 711 712 NESTED_TYPE_TOKENS = { 713 *parser.Parser.NESTED_TYPE_TOKENS, 714 TokenType.TABLE, 715 } 716 717 PROPERTY_PARSERS = { 718 **parser.Parser.PROPERTY_PARSERS, 719 "NOT DETERMINISTIC": lambda self: self.expression( 720 exp.StabilityProperty, this=exp.Literal.string("VOLATILE") 721 ), 722 "OPTIONS": lambda self: self._parse_with_property(), 723 } 724 725 CONSTRAINT_PARSERS = { 726 **parser.Parser.CONSTRAINT_PARSERS, 727 "OPTIONS": lambda self: exp.Properties(expressions=self._parse_with_property()), 728 } 729 730 RANGE_PARSERS = parser.Parser.RANGE_PARSERS.copy() 731 RANGE_PARSERS.pop(TokenType.OVERLAPS) 732 733 NULL_TOKENS = {TokenType.NULL, TokenType.UNKNOWN} 734 735 DASHED_TABLE_PART_FOLLOW_TOKENS = {TokenType.DOT, TokenType.L_PAREN, TokenType.R_PAREN} 736 737 STATEMENT_PARSERS = { 738 **parser.Parser.STATEMENT_PARSERS, 739 TokenType.ELSE: lambda self: self._parse_as_command(self._prev), 740 TokenType.END: lambda self: self._parse_as_command(self._prev), 741 TokenType.FOR: lambda self: self._parse_for_in(), 742 TokenType.EXPORT: lambda self: self._parse_export_data(), 743 TokenType.DECLARE: lambda self: self._parse_declare(), 744 } 745 746 BRACKET_OFFSETS = { 747 "OFFSET": (0, False), 748 "ORDINAL": (1, False), 749 "SAFE_OFFSET": (0, True), 750 "SAFE_ORDINAL": (1, True), 751 } 752 753 def _parse_for_in(self) -> exp.ForIn: 754 this = self._parse_range() 755 self._match_text_seq("DO") 756 return self.expression(exp.ForIn, this=this, expression=self._parse_statement()) 757 758 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 759 this = super()._parse_table_part(schema=schema) or self._parse_number() 760 761 # https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#table_names 762 if isinstance(this, exp.Identifier): 763 table_name = this.name 764 while self._match(TokenType.DASH, advance=False) and self._next: 765 start = self._curr 766 while self._is_connected() and not self._match_set( 767 self.DASHED_TABLE_PART_FOLLOW_TOKENS, advance=False 768 ): 769 self._advance() 770 771 if start == self._curr: 772 break 773 774 table_name += self._find_sql(start, self._prev) 775 776 this = exp.Identifier( 777 this=table_name, quoted=this.args.get("quoted") 778 ).update_positions(this) 779 elif isinstance(this, exp.Literal): 780 table_name = this.name 781 782 if self._is_connected() and self._parse_var(any_token=True): 783 table_name += self._prev.text 784 785 this = exp.Identifier(this=table_name, quoted=True).update_positions(this) 786 787 return this 788 789 def _parse_table_parts( 790 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 791 ) -> exp.Table: 792 table = super()._parse_table_parts( 793 schema=schema, is_db_reference=is_db_reference, wildcard=True 794 ) 795 796 # proj-1.db.tbl -- `1.` is tokenized as a float so we need to unravel it here 797 if not table.catalog: 798 if table.db: 799 previous_db = table.args["db"] 800 parts = table.db.split(".") 801 if len(parts) == 2 and not table.args["db"].quoted: 802 table.set( 803 "catalog", exp.Identifier(this=parts[0]).update_positions(previous_db) 804 ) 805 table.set("db", exp.Identifier(this=parts[1]).update_positions(previous_db)) 806 else: 807 previous_this = table.this 808 parts = table.name.split(".") 809 if len(parts) == 2 and not table.this.quoted: 810 table.set( 811 "db", exp.Identifier(this=parts[0]).update_positions(previous_this) 812 ) 813 table.set( 814 "this", exp.Identifier(this=parts[1]).update_positions(previous_this) 815 ) 816 817 if isinstance(table.this, exp.Identifier) and any("." in p.name for p in table.parts): 818 alias = table.this 819 catalog, db, this, *rest = ( 820 exp.to_identifier(p, quoted=True) 821 for p in split_num_words(".".join(p.name for p in table.parts), ".", 3) 822 ) 823 824 for part in (catalog, db, this): 825 if part: 826 part.update_positions(table.this) 827 828 if rest and this: 829 this = exp.Dot.build([this, *rest]) # type: ignore 830 831 table = exp.Table( 832 this=this, db=db, catalog=catalog, pivots=table.args.get("pivots") 833 ) 834 table.meta["quoted_table"] = True 835 else: 836 alias = None 837 838 # The `INFORMATION_SCHEMA` views in BigQuery need to be qualified by a region or 839 # dataset, so if the project identifier is omitted we need to fix the ast so that 840 # the `INFORMATION_SCHEMA.X` bit is represented as a single (quoted) Identifier. 841 # Otherwise, we wouldn't correctly qualify a `Table` node that references these 842 # views, because it would seem like the "catalog" part is set, when it'd actually 843 # be the region/dataset. Merging the two identifiers into a single one is done to 844 # avoid producing a 4-part Table reference, which would cause issues in the schema 845 # module, when there are 3-part table names mixed with information schema views. 846 # 847 # See: https://cloud.google.com/bigquery/docs/information-schema-intro#syntax 848 table_parts = table.parts 849 if len(table_parts) > 1 and table_parts[-2].name.upper() == "INFORMATION_SCHEMA": 850 # We need to alias the table here to avoid breaking existing qualified columns. 851 # This is expected to be safe, because if there's an actual alias coming up in 852 # the token stream, it will overwrite this one. If there isn't one, we are only 853 # exposing the name that can be used to reference the view explicitly (a no-op). 854 exp.alias_( 855 table, 856 t.cast(exp.Identifier, alias or table_parts[-1]), 857 table=True, 858 copy=False, 859 ) 860 861 info_schema_view = f"{table_parts[-2].name}.{table_parts[-1].name}" 862 new_this = exp.Identifier(this=info_schema_view, quoted=True).update_positions( 863 line=table_parts[-2].meta.get("line"), 864 col=table_parts[-1].meta.get("col"), 865 start=table_parts[-2].meta.get("start"), 866 end=table_parts[-1].meta.get("end"), 867 ) 868 table.set("this", new_this) 869 table.set("db", seq_get(table_parts, -3)) 870 table.set("catalog", seq_get(table_parts, -4)) 871 872 return table 873 874 def _parse_column(self) -> t.Optional[exp.Expression]: 875 column = super()._parse_column() 876 if isinstance(column, exp.Column): 877 parts = column.parts 878 if any("." in p.name for p in parts): 879 catalog, db, table, this, *rest = ( 880 exp.to_identifier(p, quoted=True) 881 for p in split_num_words(".".join(p.name for p in parts), ".", 4) 882 ) 883 884 if rest and this: 885 this = exp.Dot.build([this, *rest]) # type: ignore 886 887 column = exp.Column(this=this, table=table, db=db, catalog=catalog) 888 column.meta["quoted_column"] = True 889 890 return column 891 892 @t.overload 893 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 894 895 @t.overload 896 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 897 898 def _parse_json_object(self, agg=False): 899 json_object = super()._parse_json_object() 900 array_kv_pair = seq_get(json_object.expressions, 0) 901 902 # Converts BQ's "signature 2" of JSON_OBJECT into SQLGlot's canonical representation 903 # https://cloud.google.com/bigquery/docs/reference/standard-sql/json_functions#json_object_signature2 904 if ( 905 array_kv_pair 906 and isinstance(array_kv_pair.this, exp.Array) 907 and isinstance(array_kv_pair.expression, exp.Array) 908 ): 909 keys = array_kv_pair.this.expressions 910 values = array_kv_pair.expression.expressions 911 912 json_object.set( 913 "expressions", 914 [exp.JSONKeyValue(this=k, expression=v) for k, v in zip(keys, values)], 915 ) 916 917 return json_object 918 919 def _parse_bracket( 920 self, this: t.Optional[exp.Expression] = None 921 ) -> t.Optional[exp.Expression]: 922 bracket = super()._parse_bracket(this) 923 924 if this is bracket: 925 return bracket 926 927 if isinstance(bracket, exp.Bracket): 928 for expression in bracket.expressions: 929 name = expression.name.upper() 930 931 if name not in self.BRACKET_OFFSETS: 932 break 933 934 offset, safe = self.BRACKET_OFFSETS[name] 935 bracket.set("offset", offset) 936 bracket.set("safe", safe) 937 expression.replace(expression.expressions[0]) 938 939 return bracket 940 941 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 942 unnest = super()._parse_unnest(with_alias=with_alias) 943 944 if not unnest: 945 return None 946 947 unnest_expr = seq_get(unnest.expressions, 0) 948 if unnest_expr: 949 from sqlglot.optimizer.annotate_types import annotate_types 950 951 unnest_expr = annotate_types(unnest_expr, dialect=self.dialect) 952 953 # Unnesting a nested array (i.e array of structs) explodes the top-level struct fields, 954 # in contrast to other dialects such as DuckDB which flattens only the array by default 955 if unnest_expr.is_type(exp.DataType.Type.ARRAY) and any( 956 array_elem.is_type(exp.DataType.Type.STRUCT) 957 for array_elem in unnest_expr._type.expressions 958 ): 959 unnest.set("explode_array", True) 960 961 return unnest 962 963 def _parse_make_interval(self) -> exp.MakeInterval: 964 expr = exp.MakeInterval() 965 966 for arg_key in expr.arg_types: 967 value = self._parse_lambda() 968 969 if not value: 970 break 971 972 # Non-named arguments are filled sequentially, (optionally) followed by named arguments 973 # that can appear in any order e.g MAKE_INTERVAL(1, minute => 5, day => 2) 974 if isinstance(value, exp.Kwarg): 975 arg_key = value.this.name 976 977 expr.set(arg_key, value) 978 979 self._match(TokenType.COMMA) 980 981 return expr 982 983 def _parse_features_at_time(self) -> exp.FeaturesAtTime: 984 expr = self.expression( 985 exp.FeaturesAtTime, 986 this=(self._match(TokenType.TABLE) and self._parse_table()) 987 or self._parse_select(nested=True), 988 ) 989 990 while self._match(TokenType.COMMA): 991 arg = self._parse_lambda() 992 993 # Get the LHS of the Kwarg and set the arg to that value, e.g 994 # "num_rows => 1" sets the expr's `num_rows` arg 995 if arg: 996 expr.set(arg.this.name, arg) 997 998 return expr 999 1000 def _parse_export_data(self) -> exp.Export: 1001 self._match_text_seq("DATA") 1002 1003 return self.expression( 1004 exp.Export, 1005 connection=self._match_text_seq("WITH", "CONNECTION") and self._parse_table_parts(), 1006 options=self._parse_properties(), 1007 this=self._match_text_seq("AS") and self._parse_select(), 1008 )
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
Inherited Members
- sqlglot.parser.Parser
- Parser
- STRUCT_TYPE_TOKENS
- ENUM_TYPE_TOKENS
- AGGREGATE_TYPE_TOKENS
- TYPE_TOKENS
- SIGNED_TO_UNSIGNED_TYPE_TOKEN
- SUBQUERY_PREDICATES
- RESERVED_TOKENS
- DB_CREATABLES
- CREATABLES
- ALTERABLES
- COLON_PLACEHOLDER_TOKENS
- ARRAY_CONSTRUCTORS
- TRIM_TYPES
- FUNC_TOKENS
- CONJUNCTION
- ASSIGNMENT
- DISJUNCTION
- EQUALITY
- COMPARISON
- BITWISE
- TERM
- FACTOR
- EXPONENT
- TIMES
- TIMESTAMPS
- SET_OPERATIONS
- JOIN_METHODS
- JOIN_SIDES
- JOIN_KINDS
- JOIN_HINTS
- LAMBDAS
- COLUMN_OPERATORS
- CAST_COLUMN_OPERATORS
- EXPRESSION_PARSERS
- UNARY_PARSERS
- STRING_PARSERS
- NUMERIC_PARSERS
- PRIMARY_PARSERS
- PLACEHOLDER_PARSERS
- PIPE_SYNTAX_TRANSFORM_PARSERS
- ALTER_PARSERS
- ALTER_ALTER_PARSERS
- SCHEMA_UNNAMED_CONSTRAINTS
- NO_PAREN_FUNCTION_PARSERS
- INVALID_FUNC_NAME_TOKENS
- FUNCTIONS_WITH_ALIASED_ARGS
- KEY_VALUE_DEFINITIONS
- QUERY_MODIFIER_PARSERS
- QUERY_MODIFIER_TOKENS
- SET_PARSERS
- SHOW_PARSERS
- TYPE_LITERAL_PARSERS
- TYPE_CONVERTERS
- DDL_SELECT_TOKENS
- PRE_VOLATILE_TOKENS
- TRANSACTION_KIND
- TRANSACTION_CHARACTERISTICS
- CONFLICT_ACTIONS
- CREATE_SEQUENCE
- ISOLATED_LOADING_OPTIONS
- USABLES
- CAST_ACTIONS
- SCHEMA_BINDING_OPTIONS
- PROCEDURE_OPTIONS
- EXECUTE_AS_OPTIONS
- KEY_CONSTRAINT_OPTIONS
- WINDOW_EXCLUDE_OPTIONS
- INSERT_ALTERNATIVES
- CLONE_KEYWORDS
- HISTORICAL_DATA_PREFIX
- HISTORICAL_DATA_KIND
- OPCLASS_FOLLOW_KEYWORDS
- OPTYPE_FOLLOW_TOKENS
- TABLE_INDEX_HINT_TOKENS
- VIEW_ATTRIBUTES
- WINDOW_ALIAS_TOKENS
- WINDOW_BEFORE_PAREN_TOKENS
- WINDOW_SIDES
- JSON_KEY_VALUE_SEPARATOR_TOKENS
- FETCH_TOKENS
- ADD_CONSTRAINT_TOKENS
- DISTINCT_TOKENS
- UNNEST_OFFSET_ALIAS_TOKENS
- SELECT_START_TOKENS
- COPY_INTO_VARLEN_OPTIONS
- IS_JSON_PREDICATE_KIND
- ODBC_DATETIME_LITERALS
- ON_CONDITION_TOKENS
- PRIVILEGE_FOLLOW_TOKENS
- DESCRIBE_STYLES
- ANALYZE_STYLES
- ANALYZE_EXPRESSION_PARSERS
- PARTITION_KEYWORDS
- AMBIGUOUS_ALIAS_TOKENS
- OPERATION_MODIFIERS
- RECURSIVE_CTE_SEARCH_KIND
- MODIFIABLES
- STRICT_CAST
- IDENTIFY_PIVOT_STRINGS
- TABLESAMPLE_CSV
- DEFAULT_SAMPLING_METHOD
- SET_REQUIRES_ASSIGNMENT_DELIMITER
- TRIM_PATTERN_FIRST
- STRING_ALIASES
- MODIFIERS_ATTACHED_TO_SET_OP
- SET_OP_MODIFIERS
- NO_PAREN_IF_COMMANDS
- JSON_ARROWS_REQUIRE_JSON_TYPE
- COLON_IS_VARIANT_EXTRACT
- VALUES_FOLLOWED_BY_PAREN
- INTERVAL_SPANS
- SUPPORTS_PARTITION_SELECTION
- WRAPPED_TRANSFORM_COLUMN_CONSTRAINT
- OPTIONAL_ALIAS_TOKEN_CTE
- ALTER_RENAME_REQUIRES_COLUMN
- ZONE_AWARE_TIMESTAMP_CONSTRUCTOR
- MAP_KEYS_ARE_ARBITRARY_EXPRESSIONS
- JSON_EXTRACT_REQUIRES_JSON_EXPRESSION
- error_level
- error_message_context
- max_errors
- dialect
- reset
- parse
- parse_into
- check_errors
- raise_error
- expression
- validate_expression
- parse_set_operation
- build_cast
- errors
- sql
1010 class Generator(generator.Generator): 1011 INTERVAL_ALLOWS_PLURAL_FORM = False 1012 JOIN_HINTS = False 1013 QUERY_HINTS = False 1014 TABLE_HINTS = False 1015 LIMIT_FETCH = "LIMIT" 1016 RENAME_TABLE_WITH_DB = False 1017 NVL2_SUPPORTED = False 1018 UNNEST_WITH_ORDINALITY = False 1019 COLLATE_IS_FUNC = True 1020 LIMIT_ONLY_LITERALS = True 1021 SUPPORTS_TABLE_ALIAS_COLUMNS = False 1022 UNPIVOT_ALIASES_ARE_IDENTIFIERS = False 1023 JSON_KEY_VALUE_PAIR_SEP = "," 1024 NULL_ORDERING_SUPPORTED = False 1025 IGNORE_NULLS_IN_FUNC = True 1026 JSON_PATH_SINGLE_QUOTE_ESCAPE = True 1027 CAN_IMPLEMENT_ARRAY_ANY = True 1028 SUPPORTS_TO_NUMBER = False 1029 NAMED_PLACEHOLDER_TOKEN = "@" 1030 HEX_FUNC = "TO_HEX" 1031 WITH_PROPERTIES_PREFIX = "OPTIONS" 1032 SUPPORTS_EXPLODING_PROJECTIONS = False 1033 EXCEPT_INTERSECT_SUPPORT_ALL_CLAUSE = False 1034 SUPPORTS_UNIX_SECONDS = True 1035 1036 SAFE_JSON_PATH_KEY_RE = re.compile(r"^[_\-a-zA-Z][\-\w]*$") 1037 1038 TS_OR_DS_TYPES = ( 1039 exp.TsOrDsToDatetime, 1040 exp.TsOrDsToTimestamp, 1041 exp.TsOrDsToTime, 1042 exp.TsOrDsToDate, 1043 ) 1044 1045 TRANSFORMS = { 1046 **generator.Generator.TRANSFORMS, 1047 exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"), 1048 exp.ArgMax: arg_max_or_min_no_count("MAX_BY"), 1049 exp.ArgMin: arg_max_or_min_no_count("MIN_BY"), 1050 exp.Array: inline_array_unless_query, 1051 exp.ArrayContains: _array_contains_sql, 1052 exp.ArrayFilter: filter_array_using_unnest, 1053 exp.ArrayRemove: filter_array_using_unnest, 1054 exp.Cast: transforms.preprocess([transforms.remove_precision_parameterized_types]), 1055 exp.CollateProperty: lambda self, e: ( 1056 f"DEFAULT COLLATE {self.sql(e, 'this')}" 1057 if e.args.get("default") 1058 else f"COLLATE {self.sql(e, 'this')}" 1059 ), 1060 exp.Commit: lambda *_: "COMMIT TRANSACTION", 1061 exp.CountIf: rename_func("COUNTIF"), 1062 exp.Create: _create_sql, 1063 exp.CTE: transforms.preprocess([_pushdown_cte_column_names]), 1064 exp.DateAdd: date_add_interval_sql("DATE", "ADD"), 1065 exp.DateDiff: lambda self, e: self.func( 1066 "DATE_DIFF", e.this, e.expression, unit_to_var(e) 1067 ), 1068 exp.DateFromParts: rename_func("DATE"), 1069 exp.DateStrToDate: datestrtodate_sql, 1070 exp.DateSub: date_add_interval_sql("DATE", "SUB"), 1071 exp.DatetimeAdd: date_add_interval_sql("DATETIME", "ADD"), 1072 exp.DatetimeSub: date_add_interval_sql("DATETIME", "SUB"), 1073 exp.DateFromUnixDate: rename_func("DATE_FROM_UNIX_DATE"), 1074 exp.FromTimeZone: lambda self, e: self.func( 1075 "DATETIME", self.func("TIMESTAMP", e.this, e.args.get("zone")), "'UTC'" 1076 ), 1077 exp.GenerateSeries: rename_func("GENERATE_ARRAY"), 1078 exp.GroupConcat: lambda self, e: groupconcat_sql( 1079 self, e, func_name="STRING_AGG", within_group=False 1080 ), 1081 exp.Hex: lambda self, e: self.func("UPPER", self.func("TO_HEX", self.sql(e, "this"))), 1082 exp.HexString: lambda self, e: self.hexstring_sql(e, binary_function_repr="FROM_HEX"), 1083 exp.If: if_sql(false_value="NULL"), 1084 exp.ILike: no_ilike_sql, 1085 exp.IntDiv: rename_func("DIV"), 1086 exp.Int64: rename_func("INT64"), 1087 exp.JSONExtract: _json_extract_sql, 1088 exp.JSONExtractArray: _json_extract_sql, 1089 exp.JSONExtractScalar: _json_extract_sql, 1090 exp.JSONFormat: rename_func("TO_JSON_STRING"), 1091 exp.Levenshtein: _levenshtein_sql, 1092 exp.Max: max_or_greatest, 1093 exp.MD5: lambda self, e: self.func("TO_HEX", self.func("MD5", e.this)), 1094 exp.MD5Digest: rename_func("MD5"), 1095 exp.Min: min_or_least, 1096 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 1097 exp.RegexpExtract: lambda self, e: self.func( 1098 "REGEXP_EXTRACT", 1099 e.this, 1100 e.expression, 1101 e.args.get("position"), 1102 e.args.get("occurrence"), 1103 ), 1104 exp.RegexpExtractAll: lambda self, e: self.func( 1105 "REGEXP_EXTRACT_ALL", e.this, e.expression 1106 ), 1107 exp.RegexpReplace: regexp_replace_sql, 1108 exp.RegexpLike: rename_func("REGEXP_CONTAINS"), 1109 exp.ReturnsProperty: _returnsproperty_sql, 1110 exp.Rollback: lambda *_: "ROLLBACK TRANSACTION", 1111 exp.ParseTime: lambda self, e: self.func("PARSE_TIME", self.format_time(e), e.this), 1112 exp.ParseDatetime: lambda self, e: self.func( 1113 "PARSE_DATETIME", self.format_time(e), e.this 1114 ), 1115 exp.Select: transforms.preprocess( 1116 [ 1117 transforms.explode_projection_to_unnest(), 1118 transforms.unqualify_unnest, 1119 transforms.eliminate_distinct_on, 1120 _alias_ordered_group, 1121 transforms.eliminate_semi_and_anti_joins, 1122 ] 1123 ), 1124 exp.SHA: rename_func("SHA1"), 1125 exp.SHA2: sha256_sql, 1126 exp.StabilityProperty: lambda self, e: ( 1127 "DETERMINISTIC" if e.name == "IMMUTABLE" else "NOT DETERMINISTIC" 1128 ), 1129 exp.String: rename_func("STRING"), 1130 exp.StrPosition: lambda self, e: ( 1131 strposition_sql( 1132 self, e, func_name="INSTR", supports_position=True, supports_occurrence=True 1133 ) 1134 ), 1135 exp.StrToDate: _str_to_datetime_sql, 1136 exp.StrToTime: _str_to_datetime_sql, 1137 exp.TimeAdd: date_add_interval_sql("TIME", "ADD"), 1138 exp.TimeFromParts: rename_func("TIME"), 1139 exp.TimestampFromParts: rename_func("DATETIME"), 1140 exp.TimeSub: date_add_interval_sql("TIME", "SUB"), 1141 exp.TimestampAdd: date_add_interval_sql("TIMESTAMP", "ADD"), 1142 exp.TimestampDiff: rename_func("TIMESTAMP_DIFF"), 1143 exp.TimestampSub: date_add_interval_sql("TIMESTAMP", "SUB"), 1144 exp.TimeStrToTime: timestrtotime_sql, 1145 exp.Transaction: lambda *_: "BEGIN TRANSACTION", 1146 exp.TsOrDsAdd: _ts_or_ds_add_sql, 1147 exp.TsOrDsDiff: _ts_or_ds_diff_sql, 1148 exp.TsOrDsToTime: rename_func("TIME"), 1149 exp.TsOrDsToDatetime: rename_func("DATETIME"), 1150 exp.TsOrDsToTimestamp: rename_func("TIMESTAMP"), 1151 exp.Unhex: rename_func("FROM_HEX"), 1152 exp.UnixDate: rename_func("UNIX_DATE"), 1153 exp.UnixToTime: _unix_to_time_sql, 1154 exp.Uuid: lambda *_: "GENERATE_UUID()", 1155 exp.Values: _derived_table_values_to_unnest, 1156 exp.VariancePop: rename_func("VAR_POP"), 1157 exp.SafeDivide: rename_func("SAFE_DIVIDE"), 1158 } 1159 1160 SUPPORTED_JSON_PATH_PARTS = { 1161 exp.JSONPathKey, 1162 exp.JSONPathRoot, 1163 exp.JSONPathSubscript, 1164 } 1165 1166 TYPE_MAPPING = { 1167 **generator.Generator.TYPE_MAPPING, 1168 exp.DataType.Type.BIGDECIMAL: "BIGNUMERIC", 1169 exp.DataType.Type.BIGINT: "INT64", 1170 exp.DataType.Type.BINARY: "BYTES", 1171 exp.DataType.Type.BLOB: "BYTES", 1172 exp.DataType.Type.BOOLEAN: "BOOL", 1173 exp.DataType.Type.CHAR: "STRING", 1174 exp.DataType.Type.DECIMAL: "NUMERIC", 1175 exp.DataType.Type.DOUBLE: "FLOAT64", 1176 exp.DataType.Type.FLOAT: "FLOAT64", 1177 exp.DataType.Type.INT: "INT64", 1178 exp.DataType.Type.NCHAR: "STRING", 1179 exp.DataType.Type.NVARCHAR: "STRING", 1180 exp.DataType.Type.SMALLINT: "INT64", 1181 exp.DataType.Type.TEXT: "STRING", 1182 exp.DataType.Type.TIMESTAMP: "DATETIME", 1183 exp.DataType.Type.TIMESTAMPNTZ: "DATETIME", 1184 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 1185 exp.DataType.Type.TIMESTAMPLTZ: "TIMESTAMP", 1186 exp.DataType.Type.TINYINT: "INT64", 1187 exp.DataType.Type.ROWVERSION: "BYTES", 1188 exp.DataType.Type.UUID: "STRING", 1189 exp.DataType.Type.VARBINARY: "BYTES", 1190 exp.DataType.Type.VARCHAR: "STRING", 1191 exp.DataType.Type.VARIANT: "ANY TYPE", 1192 } 1193 1194 PROPERTIES_LOCATION = { 1195 **generator.Generator.PROPERTIES_LOCATION, 1196 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 1197 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 1198 } 1199 1200 # WINDOW comes after QUALIFY 1201 # https://cloud.google.com/bigquery/docs/reference/standard-sql/query-syntax#window_clause 1202 AFTER_HAVING_MODIFIER_TRANSFORMS = { 1203 "qualify": generator.Generator.AFTER_HAVING_MODIFIER_TRANSFORMS["qualify"], 1204 "windows": generator.Generator.AFTER_HAVING_MODIFIER_TRANSFORMS["windows"], 1205 } 1206 1207 # from: https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#reserved_keywords 1208 RESERVED_KEYWORDS = { 1209 "all", 1210 "and", 1211 "any", 1212 "array", 1213 "as", 1214 "asc", 1215 "assert_rows_modified", 1216 "at", 1217 "between", 1218 "by", 1219 "case", 1220 "cast", 1221 "collate", 1222 "contains", 1223 "create", 1224 "cross", 1225 "cube", 1226 "current", 1227 "default", 1228 "define", 1229 "desc", 1230 "distinct", 1231 "else", 1232 "end", 1233 "enum", 1234 "escape", 1235 "except", 1236 "exclude", 1237 "exists", 1238 "extract", 1239 "false", 1240 "fetch", 1241 "following", 1242 "for", 1243 "from", 1244 "full", 1245 "group", 1246 "grouping", 1247 "groups", 1248 "hash", 1249 "having", 1250 "if", 1251 "ignore", 1252 "in", 1253 "inner", 1254 "intersect", 1255 "interval", 1256 "into", 1257 "is", 1258 "join", 1259 "lateral", 1260 "left", 1261 "like", 1262 "limit", 1263 "lookup", 1264 "merge", 1265 "natural", 1266 "new", 1267 "no", 1268 "not", 1269 "null", 1270 "nulls", 1271 "of", 1272 "on", 1273 "or", 1274 "order", 1275 "outer", 1276 "over", 1277 "partition", 1278 "preceding", 1279 "proto", 1280 "qualify", 1281 "range", 1282 "recursive", 1283 "respect", 1284 "right", 1285 "rollup", 1286 "rows", 1287 "select", 1288 "set", 1289 "some", 1290 "struct", 1291 "tablesample", 1292 "then", 1293 "to", 1294 "treat", 1295 "true", 1296 "unbounded", 1297 "union", 1298 "unnest", 1299 "using", 1300 "when", 1301 "where", 1302 "window", 1303 "with", 1304 "within", 1305 } 1306 1307 def datetrunc_sql(self, expression: exp.DateTrunc) -> str: 1308 unit = expression.unit 1309 unit_sql = unit.name if unit.is_string else self.sql(unit) 1310 return self.func("DATE_TRUNC", expression.this, unit_sql, expression.args.get("zone")) 1311 1312 def mod_sql(self, expression: exp.Mod) -> str: 1313 this = expression.this 1314 expr = expression.expression 1315 return self.func( 1316 "MOD", 1317 this.unnest() if isinstance(this, exp.Paren) else this, 1318 expr.unnest() if isinstance(expr, exp.Paren) else expr, 1319 ) 1320 1321 def column_parts(self, expression: exp.Column) -> str: 1322 if expression.meta.get("quoted_column"): 1323 # If a column reference is of the form `dataset.table`.name, we need 1324 # to preserve the quoted table path, otherwise the reference breaks 1325 table_parts = ".".join(p.name for p in expression.parts[:-1]) 1326 table_path = self.sql(exp.Identifier(this=table_parts, quoted=True)) 1327 return f"{table_path}.{self.sql(expression, 'this')}" 1328 1329 return super().column_parts(expression) 1330 1331 def table_parts(self, expression: exp.Table) -> str: 1332 # Depending on the context, `x.y` may not resolve to the same data source as `x`.`y`, so 1333 # we need to make sure the correct quoting is used in each case. 1334 # 1335 # For example, if there is a CTE x that clashes with a schema name, then the former will 1336 # return the table y in that schema, whereas the latter will return the CTE's y column: 1337 # 1338 # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x.y` -> cross join 1339 # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x`.`y` -> implicit unnest 1340 if expression.meta.get("quoted_table"): 1341 table_parts = ".".join(p.name for p in expression.parts) 1342 return self.sql(exp.Identifier(this=table_parts, quoted=True)) 1343 1344 return super().table_parts(expression) 1345 1346 def timetostr_sql(self, expression: exp.TimeToStr) -> str: 1347 this = expression.this 1348 if isinstance(this, exp.TsOrDsToDatetime): 1349 func_name = "FORMAT_DATETIME" 1350 elif isinstance(this, exp.TsOrDsToTimestamp): 1351 func_name = "FORMAT_TIMESTAMP" 1352 elif isinstance(this, exp.TsOrDsToTime): 1353 func_name = "FORMAT_TIME" 1354 else: 1355 func_name = "FORMAT_DATE" 1356 1357 time_expr = this if isinstance(this, self.TS_OR_DS_TYPES) else expression 1358 return self.func( 1359 func_name, self.format_time(expression), time_expr.this, expression.args.get("zone") 1360 ) 1361 1362 def eq_sql(self, expression: exp.EQ) -> str: 1363 # Operands of = cannot be NULL in BigQuery 1364 if isinstance(expression.left, exp.Null) or isinstance(expression.right, exp.Null): 1365 if not isinstance(expression.parent, exp.Update): 1366 return "NULL" 1367 1368 return self.binary(expression, "=") 1369 1370 def attimezone_sql(self, expression: exp.AtTimeZone) -> str: 1371 parent = expression.parent 1372 1373 # BigQuery allows CAST(.. AS {STRING|TIMESTAMP} [FORMAT <fmt> [AT TIME ZONE <tz>]]). 1374 # Only the TIMESTAMP one should use the below conversion, when AT TIME ZONE is included. 1375 if not isinstance(parent, exp.Cast) or not parent.to.is_type("text"): 1376 return self.func( 1377 "TIMESTAMP", self.func("DATETIME", expression.this, expression.args.get("zone")) 1378 ) 1379 1380 return super().attimezone_sql(expression) 1381 1382 def trycast_sql(self, expression: exp.TryCast) -> str: 1383 return self.cast_sql(expression, safe_prefix="SAFE_") 1384 1385 def bracket_sql(self, expression: exp.Bracket) -> str: 1386 this = expression.this 1387 expressions = expression.expressions 1388 1389 if len(expressions) == 1 and this and this.is_type(exp.DataType.Type.STRUCT): 1390 arg = expressions[0] 1391 if arg.type is None: 1392 from sqlglot.optimizer.annotate_types import annotate_types 1393 1394 arg = annotate_types(arg, dialect=self.dialect) 1395 1396 if arg.type and arg.type.this in exp.DataType.TEXT_TYPES: 1397 # BQ doesn't support bracket syntax with string values for structs 1398 return f"{self.sql(this)}.{arg.name}" 1399 1400 expressions_sql = self.expressions(expression, flat=True) 1401 offset = expression.args.get("offset") 1402 1403 if offset == 0: 1404 expressions_sql = f"OFFSET({expressions_sql})" 1405 elif offset == 1: 1406 expressions_sql = f"ORDINAL({expressions_sql})" 1407 elif offset is not None: 1408 self.unsupported(f"Unsupported array offset: {offset}") 1409 1410 if expression.args.get("safe"): 1411 expressions_sql = f"SAFE_{expressions_sql}" 1412 1413 return f"{self.sql(this)}[{expressions_sql}]" 1414 1415 def in_unnest_op(self, expression: exp.Unnest) -> str: 1416 return self.sql(expression) 1417 1418 def version_sql(self, expression: exp.Version) -> str: 1419 if expression.name == "TIMESTAMP": 1420 expression.set("this", "SYSTEM_TIME") 1421 return super().version_sql(expression) 1422 1423 def contains_sql(self, expression: exp.Contains) -> str: 1424 this = expression.this 1425 expr = expression.expression 1426 1427 if isinstance(this, exp.Lower) and isinstance(expr, exp.Lower): 1428 this = this.this 1429 expr = expr.this 1430 1431 return self.func("CONTAINS_SUBSTR", this, expr) 1432 1433 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 1434 this = expression.this 1435 1436 # This ensures that inline type-annotated ARRAY literals like ARRAY<INT64>[1, 2, 3] 1437 # are roundtripped unaffected. The inner check excludes ARRAY(SELECT ...) expressions, 1438 # because they aren't literals and so the above syntax is invalid BigQuery. 1439 if isinstance(this, exp.Array): 1440 elem = seq_get(this.expressions, 0) 1441 if not (elem and elem.find(exp.Query)): 1442 return f"{self.sql(expression, 'to')}{self.sql(this)}" 1443 1444 return super().cast_sql(expression, safe_prefix=safe_prefix) 1445 1446 def declareitem_sql(self, expression: exp.DeclareItem) -> str: 1447 variables = self.expressions(expression, "this") 1448 default = self.sql(expression, "default") 1449 default = f" DEFAULT {default}" if default else "" 1450 kind = self.sql(expression, "kind") 1451 kind = f" {kind}" if kind else "" 1452 1453 return f"{variables}{kind}{default}"
Generator converts a given syntax tree to the corresponding SQL string.
Arguments:
- pretty: Whether to format the produced SQL string. Default: False.
- identify: Determines when an identifier should be quoted. Possible values are: False (default): Never quote, except in cases where it's mandatory by the dialect. True or 'always': Always quote. 'safe': Only quote identifiers that are case insensitive.
- normalize: Whether to normalize identifiers to lowercase. Default: False.
- pad: The pad size in a formatted string. For example, this affects the indentation of a projection in a query, relative to its nesting level. Default: 2.
- indent: The indentation size in a formatted string. For example, this affects the
indentation of subqueries and filters under a
WHEREclause. Default: 2. - normalize_functions: How to normalize function names. Possible values are: "upper" or True (default): Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
- unsupported_level: Determines the generator's behavior when it encounters unsupported expressions. Default ErrorLevel.WARN.
- max_unsupported: Maximum number of unsupported messages to include in a raised UnsupportedError. This is only relevant if unsupported_level is ErrorLevel.RAISE. Default: 3
- leading_comma: Whether the comma is leading or trailing in select expressions. This is only relevant when generating in pretty mode. Default: False
- max_text_width: The max number of characters in a segment before creating new lines in pretty mode. The default is on the smaller end because the length only represents a segment and not the true line length. Default: 80
- comments: Whether to preserve comments in the output SQL code. Default: True
1321 def column_parts(self, expression: exp.Column) -> str: 1322 if expression.meta.get("quoted_column"): 1323 # If a column reference is of the form `dataset.table`.name, we need 1324 # to preserve the quoted table path, otherwise the reference breaks 1325 table_parts = ".".join(p.name for p in expression.parts[:-1]) 1326 table_path = self.sql(exp.Identifier(this=table_parts, quoted=True)) 1327 return f"{table_path}.{self.sql(expression, 'this')}" 1328 1329 return super().column_parts(expression)
1331 def table_parts(self, expression: exp.Table) -> str: 1332 # Depending on the context, `x.y` may not resolve to the same data source as `x`.`y`, so 1333 # we need to make sure the correct quoting is used in each case. 1334 # 1335 # For example, if there is a CTE x that clashes with a schema name, then the former will 1336 # return the table y in that schema, whereas the latter will return the CTE's y column: 1337 # 1338 # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x.y` -> cross join 1339 # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x`.`y` -> implicit unnest 1340 if expression.meta.get("quoted_table"): 1341 table_parts = ".".join(p.name for p in expression.parts) 1342 return self.sql(exp.Identifier(this=table_parts, quoted=True)) 1343 1344 return super().table_parts(expression)
1346 def timetostr_sql(self, expression: exp.TimeToStr) -> str: 1347 this = expression.this 1348 if isinstance(this, exp.TsOrDsToDatetime): 1349 func_name = "FORMAT_DATETIME" 1350 elif isinstance(this, exp.TsOrDsToTimestamp): 1351 func_name = "FORMAT_TIMESTAMP" 1352 elif isinstance(this, exp.TsOrDsToTime): 1353 func_name = "FORMAT_TIME" 1354 else: 1355 func_name = "FORMAT_DATE" 1356 1357 time_expr = this if isinstance(this, self.TS_OR_DS_TYPES) else expression 1358 return self.func( 1359 func_name, self.format_time(expression), time_expr.this, expression.args.get("zone") 1360 )
1362 def eq_sql(self, expression: exp.EQ) -> str: 1363 # Operands of = cannot be NULL in BigQuery 1364 if isinstance(expression.left, exp.Null) or isinstance(expression.right, exp.Null): 1365 if not isinstance(expression.parent, exp.Update): 1366 return "NULL" 1367 1368 return self.binary(expression, "=")
1370 def attimezone_sql(self, expression: exp.AtTimeZone) -> str: 1371 parent = expression.parent 1372 1373 # BigQuery allows CAST(.. AS {STRING|TIMESTAMP} [FORMAT <fmt> [AT TIME ZONE <tz>]]). 1374 # Only the TIMESTAMP one should use the below conversion, when AT TIME ZONE is included. 1375 if not isinstance(parent, exp.Cast) or not parent.to.is_type("text"): 1376 return self.func( 1377 "TIMESTAMP", self.func("DATETIME", expression.this, expression.args.get("zone")) 1378 ) 1379 1380 return super().attimezone_sql(expression)
1385 def bracket_sql(self, expression: exp.Bracket) -> str: 1386 this = expression.this 1387 expressions = expression.expressions 1388 1389 if len(expressions) == 1 and this and this.is_type(exp.DataType.Type.STRUCT): 1390 arg = expressions[0] 1391 if arg.type is None: 1392 from sqlglot.optimizer.annotate_types import annotate_types 1393 1394 arg = annotate_types(arg, dialect=self.dialect) 1395 1396 if arg.type and arg.type.this in exp.DataType.TEXT_TYPES: 1397 # BQ doesn't support bracket syntax with string values for structs 1398 return f"{self.sql(this)}.{arg.name}" 1399 1400 expressions_sql = self.expressions(expression, flat=True) 1401 offset = expression.args.get("offset") 1402 1403 if offset == 0: 1404 expressions_sql = f"OFFSET({expressions_sql})" 1405 elif offset == 1: 1406 expressions_sql = f"ORDINAL({expressions_sql})" 1407 elif offset is not None: 1408 self.unsupported(f"Unsupported array offset: {offset}") 1409 1410 if expression.args.get("safe"): 1411 expressions_sql = f"SAFE_{expressions_sql}" 1412 1413 return f"{self.sql(this)}[{expressions_sql}]"
1433 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 1434 this = expression.this 1435 1436 # This ensures that inline type-annotated ARRAY literals like ARRAY<INT64>[1, 2, 3] 1437 # are roundtripped unaffected. The inner check excludes ARRAY(SELECT ...) expressions, 1438 # because they aren't literals and so the above syntax is invalid BigQuery. 1439 if isinstance(this, exp.Array): 1440 elem = seq_get(this.expressions, 0) 1441 if not (elem and elem.find(exp.Query)): 1442 return f"{self.sql(expression, 'to')}{self.sql(this)}" 1443 1444 return super().cast_sql(expression, safe_prefix=safe_prefix)
1446 def declareitem_sql(self, expression: exp.DeclareItem) -> str: 1447 variables = self.expressions(expression, "this") 1448 default = self.sql(expression, "default") 1449 default = f" DEFAULT {default}" if default else "" 1450 kind = self.sql(expression, "kind") 1451 kind = f" {kind}" if kind else "" 1452 1453 return f"{variables}{kind}{default}"
Inherited Members
- sqlglot.generator.Generator
- Generator
- LOCKING_READS_SUPPORTED
- WRAP_DERIVED_VALUES
- CREATE_FUNCTION_RETURN_AS
- MATCHED_BY_SOURCE
- SINGLE_STRING_INTERVAL
- GROUPINGS_SEP
- INDEX_ON
- QUERY_HINT_SEP
- IS_BOOL_ALLOWED
- DUPLICATE_KEY_UPDATE_WITH_SET
- LIMIT_IS_TOP
- RETURNING_END
- EXTRACT_ALLOWS_QUOTES
- TZ_TO_WITH_TIME_ZONE
- SELECT_KINDS
- VALUES_AS_TABLE
- ALTER_TABLE_INCLUDE_COLUMN_KEYWORD
- AGGREGATE_FILTER_SUPPORTED
- SEMI_ANTI_JOIN_WITH_SIDE
- COMPUTED_COLUMN_WITH_TYPE
- SUPPORTS_TABLE_COPY
- TABLESAMPLE_REQUIRES_PARENS
- TABLESAMPLE_SIZE_IS_ROWS
- TABLESAMPLE_KEYWORDS
- TABLESAMPLE_WITH_METHOD
- TABLESAMPLE_SEED_KEYWORD
- DATA_TYPE_SPECIFIERS_ALLOWED
- ENSURE_BOOLS
- CTE_RECURSIVE_KEYWORD_REQUIRED
- SUPPORTS_SINGLE_ARG_CONCAT
- LAST_DAY_SUPPORTS_DATE_PART
- INSERT_OVERWRITE
- SUPPORTS_SELECT_INTO
- SUPPORTS_UNLOGGED_TABLES
- SUPPORTS_CREATE_TABLE_LIKE
- LIKE_PROPERTY_INSIDE_SCHEMA
- MULTI_ARG_DISTINCT
- JSON_TYPE_REQUIRED_FOR_EXTRACTION
- JSON_PATH_BRACKETED_KEY_SUPPORTED
- SUPPORTS_WINDOW_EXCLUDE
- SET_OP_MODIFIERS
- COPY_PARAMS_ARE_WRAPPED
- COPY_PARAMS_EQ_REQUIRED
- COPY_HAS_INTO_KEYWORD
- STAR_EXCEPT
- QUOTE_JSON_PATH
- PAD_FILL_PATTERN_IS_REQUIRED
- ARRAY_CONCAT_IS_VAR_LEN
- SUPPORTS_CONVERT_TIMEZONE
- SUPPORTS_MEDIAN
- ALTER_SET_WRAPPED
- NORMALIZE_EXTRACT_DATE_PARTS
- PARSE_JSON_NAME
- ARRAY_SIZE_NAME
- ALTER_SET_TYPE
- ARRAY_SIZE_DIM_REQUIRED
- SUPPORTS_BETWEEN_FLAGS
- SUPPORTS_LIKE_QUANTIFIERS
- TIME_PART_SINGULARS
- TOKEN_MAPPING
- STRUCT_DELIMITER
- PARAMETER_TOKEN
- EXPRESSION_PRECEDES_PROPERTIES_CREATABLES
- WITH_SEPARATED_COMMENTS
- EXCLUDE_COMMENTS
- UNWRAPPED_INTERVAL_VALUES
- PARAMETERIZABLE_TEXT_TYPES
- EXPRESSIONS_WITHOUT_NESTED_CTES
- RESPECT_IGNORE_NULLS_UNSUPPORTED_EXPRESSIONS
- SENTINEL_LINE_BREAK
- pretty
- identify
- normalize
- pad
- unsupported_level
- max_unsupported
- leading_comma
- max_text_width
- comments
- dialect
- normalize_functions
- unsupported_messages
- generate
- preprocess
- unsupported
- sep
- seg
- sanitize_comment
- maybe_comment
- wrap
- no_identify
- normalize_func
- indent
- sql
- uncache_sql
- cache_sql
- characterset_sql
- column_sql
- columnposition_sql
- columndef_sql
- columnconstraint_sql
- computedcolumnconstraint_sql
- autoincrementcolumnconstraint_sql
- compresscolumnconstraint_sql
- generatedasidentitycolumnconstraint_sql
- generatedasrowcolumnconstraint_sql
- periodforsystemtimeconstraint_sql
- notnullcolumnconstraint_sql
- primarykeycolumnconstraint_sql
- uniquecolumnconstraint_sql
- createable_sql
- create_sql
- sequenceproperties_sql
- clone_sql
- describe_sql
- heredoc_sql
- prepend_ctes
- with_sql
- cte_sql
- tablealias_sql
- bitstring_sql
- hexstring_sql
- bytestring_sql
- unicodestring_sql
- rawstring_sql
- datatypeparam_sql
- datatype_sql
- directory_sql
- delete_sql
- drop_sql
- set_operation
- set_operations
- fetch_sql
- limitoptions_sql
- filter_sql
- hint_sql
- indexparameters_sql
- index_sql
- identifier_sql
- hex_sql
- lowerhex_sql
- inputoutputformat_sql
- national_sql
- partition_sql
- properties_sql
- root_properties
- properties
- with_properties
- locate_properties
- property_name
- property_sql
- likeproperty_sql
- fallbackproperty_sql
- journalproperty_sql
- freespaceproperty_sql
- checksumproperty_sql
- mergeblockratioproperty_sql
- datablocksizeproperty_sql
- blockcompressionproperty_sql
- isolatedloadingproperty_sql
- partitionboundspec_sql
- partitionedofproperty_sql
- lockingproperty_sql
- withdataproperty_sql
- withsystemversioningproperty_sql
- insert_sql
- introducer_sql
- kill_sql
- pseudotype_sql
- objectidentifier_sql
- onconflict_sql
- returning_sql
- rowformatdelimitedproperty_sql
- withtablehint_sql
- indextablehint_sql
- historicaldata_sql
- table_sql
- tablefromrows_sql
- tablesample_sql
- pivot_sql
- tuple_sql
- update_sql
- values_sql
- var_sql
- into_sql
- from_sql
- groupingsets_sql
- rollup_sql
- cube_sql
- group_sql
- having_sql
- connect_sql
- prior_sql
- join_sql
- lambda_sql
- lateral_op
- lateral_sql
- limit_sql
- offset_sql
- setitem_sql
- set_sql
- queryband_sql
- pragma_sql
- lock_sql
- literal_sql
- escape_str
- loaddata_sql
- null_sql
- boolean_sql
- order_sql
- withfill_sql
- cluster_sql
- distribute_sql
- sort_sql
- ordered_sql
- matchrecognizemeasure_sql
- matchrecognize_sql
- query_modifiers
- options_modifier
- for_modifiers
- queryoption_sql
- offset_limit_modifiers
- after_limit_modifiers
- select_sql
- schema_sql
- schema_columns_sql
- star_sql
- parameter_sql
- sessionparameter_sql
- placeholder_sql
- subquery_sql
- qualify_sql
- unnest_sql
- prewhere_sql
- where_sql
- window_sql
- partition_by_sql
- windowspec_sql
- withingroup_sql
- between_sql
- bracket_offset_expressions
- all_sql
- any_sql
- exists_sql
- case_sql
- constraint_sql
- nextvaluefor_sql
- extract_sql
- trim_sql
- convert_concat_args
- concat_sql
- concatws_sql
- check_sql
- foreignkey_sql
- primarykey_sql
- if_sql
- matchagainst_sql
- jsonkeyvalue_sql
- jsonpath_sql
- json_path_part
- formatjson_sql
- formatphrase_sql
- jsonobject_sql
- jsonobjectagg_sql
- jsonarray_sql
- jsonarrayagg_sql
- jsoncolumndef_sql
- jsonschema_sql
- jsontable_sql
- openjsoncolumndef_sql
- openjson_sql
- in_sql
- interval_sql
- return_sql
- reference_sql
- anonymous_sql
- paren_sql
- neg_sql
- not_sql
- alias_sql
- pivotalias_sql
- aliases_sql
- atindex_sql
- fromtimezone_sql
- add_sql
- and_sql
- or_sql
- xor_sql
- connector_sql
- bitwiseand_sql
- bitwiseleftshift_sql
- bitwisenot_sql
- bitwiseor_sql
- bitwiserightshift_sql
- bitwisexor_sql
- currentdate_sql
- collate_sql
- command_sql
- comment_sql
- mergetreettlaction_sql
- mergetreettl_sql
- transaction_sql
- commit_sql
- rollback_sql
- altercolumn_sql
- alterindex_sql
- alterdiststyle_sql
- altersortkey_sql
- alterrename_sql
- renamecolumn_sql
- alterset_sql
- alter_sql
- add_column_sql
- droppartition_sql
- addconstraint_sql
- addpartition_sql
- distinct_sql
- ignorenulls_sql
- respectnulls_sql
- havingmax_sql
- intdiv_sql
- dpipe_sql
- div_sql
- safedivide_sql
- overlaps_sql
- distance_sql
- dot_sql
- propertyeq_sql
- escape_sql
- glob_sql
- gt_sql
- gte_sql
- is_sql
- like_sql
- ilike_sql
- similarto_sql
- lt_sql
- lte_sql
- mul_sql
- neq_sql
- nullsafeeq_sql
- nullsafeneq_sql
- slice_sql
- sub_sql
- jsoncast_sql
- try_sql
- log_sql
- use_sql
- binary
- ceil_floor
- function_fallback_sql
- func
- format_args
- too_wide
- format_time
- expressions
- op_expressions
- naked_property
- tag_sql
- token_sql
- userdefinedfunction_sql
- joinhint_sql
- kwarg_sql
- when_sql
- whens_sql
- merge_sql
- tochar_sql
- tonumber_sql
- dictproperty_sql
- dictrange_sql
- dictsubproperty_sql
- duplicatekeyproperty_sql
- uniquekeyproperty_sql
- distributedbyproperty_sql
- oncluster_sql
- clusteredbyproperty_sql
- anyvalue_sql
- querytransform_sql
- indexconstraintoption_sql
- checkcolumnconstraint_sql
- indexcolumnconstraint_sql
- nvl2_sql
- comprehension_sql
- columnprefix_sql
- opclass_sql
- predict_sql
- forin_sql
- refresh_sql
- toarray_sql
- tsordstotime_sql
- tsordstotimestamp_sql
- tsordstodatetime_sql
- tsordstodate_sql
- unixdate_sql
- lastday_sql
- dateadd_sql
- arrayany_sql
- struct_sql
- partitionrange_sql
- truncatetable_sql
- convert_sql
- copyparameter_sql
- credentials_sql
- copy_sql
- semicolon_sql
- datadeletionproperty_sql
- maskingpolicycolumnconstraint_sql
- gapfill_sql
- scope_resolution
- scoperesolution_sql
- parsejson_sql
- rand_sql
- changes_sql
- pad_sql
- summarize_sql
- explodinggenerateseries_sql
- arrayconcat_sql
- converttimezone_sql
- json_sql
- jsonvalue_sql
- conditionalinsert_sql
- multitableinserts_sql
- oncondition_sql
- jsonextractquote_sql
- jsonexists_sql
- arrayagg_sql
- apply_sql
- grant_sql
- grantprivilege_sql
- grantprincipal_sql
- columns_sql
- overlay_sql
- todouble_sql
- string_sql
- median_sql
- overflowtruncatebehavior_sql
- unixseconds_sql
- arraysize_sql
- attach_sql
- detach_sql
- attachoption_sql
- featuresattime_sql
- watermarkcolumnconstraint_sql
- encodeproperty_sql
- includeproperty_sql
- xmlelement_sql
- xmlkeyvalueoption_sql
- partitionbyrangeproperty_sql
- partitionbyrangepropertydynamic_sql
- unpivotcolumns_sql
- analyzesample_sql
- analyzestatistics_sql
- analyzehistogram_sql
- analyzedelete_sql
- analyzelistchainedrows_sql
- analyzevalidate_sql
- analyze_sql
- xmltable_sql
- xmlnamespace_sql
- export_sql
- declare_sql
- recursivewithsearch_sql
- parameterizedagg_sql
- anonymousaggfunc_sql
- combinedaggfunc_sql
- combinedparameterizedagg_sql
- show_sql
- get_put_sql
- translatecharacters_sql
- decodecase_sql
- semanticview_sql
- getextract_sql
- datefromunixdate_sql
- space_sql