Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions packages/bigframes/bigframes/core/compile/compiled.py
Original file line number Diff line number Diff line change
Expand Up @@ -381,6 +381,7 @@ def isin_join(
new_column = (
(left_table[conditions[0]])
.isin((right_table[conditions[1]]))
.fillna(False)
.name(indicator_col)
)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -385,9 +385,13 @@ def isin_join(
)
)
else:
new_column = sge.In(
this=conditions[0].expr,
expressions=[right._as_subquery()],
new_column = sge.func(
"COALESCE",
sge.In(
this=conditions[0].expr,
expressions=[right._as_subquery()],
),
sql.literal(False, dtypes.BOOL_DTYPE),
)

new_column = sge.Alias(
Expand Down
10 changes: 6 additions & 4 deletions packages/bigframes/bigframes/session/_io/bigquery/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -516,19 +516,21 @@ def to_query(
) -> str:
"""Compile query_or_table with conditions(filters, wildcards) to query."""
if is_query(query_or_table):
sub_query = f"({query_or_table})"
from_item = f"({query_or_table})"
else:
# Table ID can have 1, 2, 3, or 4 parts. Quoting all parts to be safe.
# See: https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#identifiers
parts = query_or_table.split(".")
sub_query = ".".join(f"`{part}`" for part in parts)
from_item = ".".join(f"`{part}`" for part in parts)

# TODO(b/338111344): Generate an index based on DefaultIndexKind if we
# don't have index columns specified.
if columns:
# We only reduce the selection if columns is set, but we always
# want to make sure index_cols is also included.
select_clause = "SELECT " + ", ".join(f"`{column}`" for column in columns)
select_clause = "SELECT " + ", ".join(
f"`_bf_source`.`{column}`" for column in columns
)
else:
select_clause = "SELECT *"
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

To maintain consistency with the qualified column selection in the if block and to further prevent ambiguity when this query is used as a subquery, consider qualifying the wildcard selector as well.

Suggested change
select_clause = "SELECT *"
select_clause = "SELECT _bf_source.*"


Expand All @@ -545,7 +547,7 @@ def to_query(

return (
f"{select_clause} "
f"FROM {sub_query}"
f"FROM {from_item} AS _bf_source"
f"{time_travel_clause}{where_clause}{limit_clause}"
)

Expand Down
2 changes: 1 addition & 1 deletion packages/bigframes/bigframes/session/polars_executor.py
Original file line number Diff line number Diff line change
Expand Up @@ -122,7 +122,7 @@ def _is_node_polars_executable(node: nodes.BigFrameNode):
return False
for expr in node._node_expressions:
if isinstance(expr, agg_expressions.Aggregation):
if not type(expr.op) in _COMPATIBLE_AGG_OPS:
if type(expr.op) not in _COMPATIBLE_AGG_OPS:
return False
if isinstance(expr, expression.Expression):
if not set(map(type, _get_expr_ops(expr))).issubset(_COMPATIBLE_SCALAR_OPS):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,11 +20,11 @@ WITH `bfcte_0` AS (
), `bfcte_4` AS (
SELECT
*,
`bfcol_4` IN ((
COALESCE(`bfcol_4` IN ((
SELECT
*
FROM `bfcte_3`
)) AS `bfcol_5`
)), FALSE) AS `bfcol_5`
FROM `bfcte_1`
)
SELECT
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -51,11 +51,11 @@ WITH `bfcte_0` AS (
), `bfcte_6` AS (
SELECT
*,
`bfcol_58` IN ((
COALESCE(`bfcol_58` IN ((
SELECT
*
FROM `bfcte_5`
)) AS `bfcol_59`
)), FALSE) AS `bfcol_59`
FROM `bfcte_4`
), `bfcte_7` AS (
SELECT
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -44,11 +44,11 @@ WITH `bfcte_0` AS (
), `bfcte_7` AS (
SELECT
*,
`bfcol_4` IN ((
COALESCE(`bfcol_4` IN ((
SELECT
*
FROM `bfcte_6`
)) AS `bfcol_14`
)), FALSE) AS `bfcol_14`
FROM `bfcte_2`
), `bfcte_8` AS (
SELECT
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -85,11 +85,11 @@ WITH `bfcte_0` AS (
), `bfcte_10` AS (
SELECT
*,
`bfcol_2` IN ((
COALESCE(`bfcol_2` IN ((
SELECT
*
FROM `bfcte_8`
)) AS `bfcol_37`
)), FALSE) AS `bfcol_37`
FROM `bfcte_1`
), `bfcte_11` AS (
SELECT
Expand Down Expand Up @@ -127,11 +127,11 @@ WITH `bfcte_0` AS (
), `bfcte_15` AS (
SELECT
*,
`bfcol_41` IN ((
COALESCE(`bfcol_41` IN ((
SELECT
*
FROM `bfcte_14`
)) AS `bfcol_62`
)), FALSE) AS `bfcol_62`
FROM `bfcte_7`
)
SELECT
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -92,11 +92,11 @@ WITH `bfcte_0` AS (
), `bfcte_12` AS (
SELECT
*,
`bfcol_61` IN ((
COALESCE(`bfcol_61` IN ((
SELECT
*
FROM `bfcte_6`
)) AS `bfcol_64`
)), FALSE) AS `bfcol_64`
FROM `bfcte_11`
), `bfcte_13` AS (
SELECT
Expand Down
14 changes: 7 additions & 7 deletions packages/bigframes/tests/unit/session/test_io_bigquery.py
Original file line number Diff line number Diff line change
Expand Up @@ -344,7 +344,7 @@ def test_bq_schema_to_sql(schema: Iterable[bigquery.SchemaField], expected: str)
2024, 5, 14, 12, 42, 36, 125125, tzinfo=datetime.timezone.utc
),
(
"SELECT `row_index`, `string_col` FROM `test_table` "
"SELECT `_bf_source`.`row_index`, `_bf_source`.`string_col` FROM `test_table` AS _bf_source "
"FOR SYSTEM_TIME AS OF CAST('2024-05-14T12:42:36.125125+00:00' AS TIMESTAMP) "
"WHERE `rowindex` NOT IN (0, 6) OR `string_col` IN ('Hello, World!', "
"'こんにちは') LIMIT 123"
Expand All @@ -369,11 +369,11 @@ def test_bq_schema_to_sql(schema: Iterable[bigquery.SchemaField], expected: str)
2024, 5, 14, 12, 42, 36, 125125, tzinfo=datetime.timezone.utc
),
(
"""SELECT `rowindex`, `string_col` FROM (SELECT
"""SELECT `_bf_source`.`rowindex`, `_bf_source`.`string_col` FROM (SELECT
rowindex,
string_col,
FROM `test_table` AS t
) """
) AS _bf_source """
"FOR SYSTEM_TIME AS OF CAST('2024-05-14T12:42:36.125125+00:00' AS TIMESTAMP) "
"WHERE `rowindex` < 4 AND `string_col` = 'Hello, World!' "
"LIMIT 123"
Expand All @@ -386,7 +386,7 @@ def test_bq_schema_to_sql(schema: Iterable[bigquery.SchemaField], expected: str)
[],
None, # max_results
None, # time_travel_timestampe
"SELECT `col_a`, `col_b` FROM `test_table`",
"SELECT `_bf_source`.`col_a`, `_bf_source`.`col_b` FROM `test_table` AS _bf_source",
id="table-columns",
),
pytest.param(
Expand All @@ -395,7 +395,7 @@ def test_bq_schema_to_sql(schema: Iterable[bigquery.SchemaField], expected: str)
[("date_col", ">", "2022-10-20")],
None, # max_results
None, # time_travel_timestampe
"SELECT * FROM `test_table` WHERE `date_col` > '2022-10-20'",
"SELECT * FROM `test_table` AS _bf_source WHERE `date_col` > '2022-10-20'",
id="table-filter",
),
pytest.param(
Expand All @@ -404,7 +404,7 @@ def test_bq_schema_to_sql(schema: Iterable[bigquery.SchemaField], expected: str)
[],
None, # max_results
None, # time_travel_timestampe
"SELECT * FROM `test_table*`",
"SELECT * FROM `test_table*` AS _bf_source",
id="wildcard-no_params",
),
pytest.param(
Expand All @@ -413,7 +413,7 @@ def test_bq_schema_to_sql(schema: Iterable[bigquery.SchemaField], expected: str)
[("_TABLE_SUFFIX", ">", "2022-10-20")],
None, # max_results
None, # time_travel_timestampe
"SELECT * FROM `test_table*` WHERE `_TABLE_SUFFIX` > '2022-10-20'",
"SELECT * FROM `test_table*` AS _bf_source WHERE `_TABLE_SUFFIX` > '2022-10-20'",
id="wildcard-filter",
),
],
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -290,11 +290,11 @@ class Parser(metaclass=_Parser):
"RIGHTPAD": lambda args: build_pad(args, is_left=False),
"RPAD": lambda args: build_pad(args, is_left=False),
"RTRIM": lambda args: build_trim(args, is_left=False),
"SCOPE_RESOLUTION": lambda args: exp.ScopeResolution(
expression=seq_get(args, 0)
)
if len(args) != 2
else exp.ScopeResolution(this=seq_get(args, 0), expression=seq_get(args, 1)),
"SCOPE_RESOLUTION": lambda args: (
exp.ScopeResolution(expression=seq_get(args, 0))
if len(args) != 2
else exp.ScopeResolution(this=seq_get(args, 0), expression=seq_get(args, 1))
),
"STRPOS": exp.StrPosition.from_arg_list,
"CHARINDEX": lambda args: build_locate_strposition(args),
"INSTR": exp.StrPosition.from_arg_list,
Expand Down Expand Up @@ -943,7 +943,9 @@ class Parser(metaclass=_Parser):
}

UNARY_PARSERS = {
TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op
TokenType.PLUS: lambda self: (
self._parse_unary()
), # Unary + is handled as a no-op
TokenType.NOT: lambda self: self.expression(
exp.Not, this=self._parse_equality()
),
Expand Down Expand Up @@ -1246,12 +1248,14 @@ class Parser(metaclass=_Parser):
exp.NotNullColumnConstraint, allow_null=True
),
"ON": lambda self: (
self._match(TokenType.UPDATE)
and self.expression(
exp.OnUpdateColumnConstraint, this=self._parse_function()
(
self._match(TokenType.UPDATE)
and self.expression(
exp.OnUpdateColumnConstraint, this=self._parse_function()
)
)
)
or self.expression(exp.OnProperty, this=self._parse_id_var()),
or self.expression(exp.OnProperty, this=self._parse_id_var())
),
"PATH": lambda self: self.expression(
exp.PathColumnConstraint, this=self._parse_string()
),
Expand Down Expand Up @@ -3885,8 +3889,9 @@ def _parse_hint_body(self) -> t.Optional[exp.Hint]:
try:
for hint in iter(
lambda: self._parse_csv(
lambda: self._parse_hint_function_call()
or self._parse_var(upper=True),
lambda: (
self._parse_hint_function_call() or self._parse_var(upper=True)
),
),
[],
):
Expand Down Expand Up @@ -4305,8 +4310,9 @@ def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]:
self.expression(
exp.WithTableHint,
expressions=self._parse_csv(
lambda: self._parse_function()
or self._parse_var(any_token=True)
lambda: (
self._parse_function() or self._parse_var(any_token=True)
)
),
)
)
Expand Down Expand Up @@ -4469,6 +4475,14 @@ def _parse_table(
if schema:
return self._parse_schema(this=this)

# see: https://docs.cloud.google.com/bigquery/docs/reference/standard-sql/query-syntax#from_clause
# from_item, then alias, then time travel, then sample.
alias = self._parse_table_alias(
alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS
)
if alias:
this.set("alias", alias)
Comment on lines +4480 to +4484
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

Moving the alias parsing to the top of _parse_table correctly addresses BigQuery's syntax requirements (where the alias precedes the FOR SYSTEM_TIME AS OF clause). However, this change unconditionally parses the alias before the table sample, which may break dialects that set ALIAS_POST_TABLESAMPLE = True. Since this is a vendored parser, if it's intended to support multiple dialects, consider making this move conditional on the dialect's settings or ensuring that the alias can still be parsed after the sample if needed.


version = self._parse_version()

if version:
Expand All @@ -4477,12 +4491,6 @@ def _parse_table(
if self.dialect.ALIAS_POST_TABLESAMPLE:
this.set("sample", self._parse_table_sample())

alias = self._parse_table_alias(
alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS
)
if alias:
this.set("alias", alias)

if self._match(TokenType.INDEXED_BY):
this.set("indexed", self._parse_table_parts())
elif self._match_text_seq("NOT", "INDEXED"):
Expand Down Expand Up @@ -4935,11 +4943,13 @@ def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Grou

elements["expressions"].extend(
self._parse_csv(
lambda: None
if self._match_set(
(TokenType.CUBE, TokenType.ROLLUP), advance=False
lambda: (
None
if self._match_set(
(TokenType.CUBE, TokenType.ROLLUP), advance=False
)
else self._parse_disjunction()
)
else self._parse_disjunction()
)
)

Expand Down Expand Up @@ -6225,9 +6235,9 @@ def _parse_column_ops(
# https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules
if isinstance(field, (exp.Func, exp.Window)) and this:
this = this.transform(
lambda n: n.to_dot(include_dots=False)
if isinstance(n, exp.Column)
else n
lambda n: (
n.to_dot(include_dots=False) if isinstance(n, exp.Column) else n
)
)

if op:
Expand Down
Loading