googleapis · TrevorBergeron · Apr 10, 2026 · Apr 10, 2026 · Apr 10, 2026 · Apr 10, 2026
@@ -381,6 +381,7 @@ def isin_join(
             new_column = (
                 (left_table[conditions[0]])
                 .isin((right_table[conditions[1]]))
+                .fillna(False)
                 .name(indicator_col)
             )
 

@@ -385,9 +385,13 @@ def isin_join(
                 )
             )
         else:
-            new_column = sge.In(
-                this=conditions[0].expr,
-                expressions=[right._as_subquery()],
+            new_column = sge.func(
+                "COALESCE",
+                sge.In(
+                    this=conditions[0].expr,
+                    expressions=[right._as_subquery()],
+                ),
+                sql.literal(False, dtypes.BOOL_DTYPE),
             )
 
         new_column = sge.Alias(

@@ -516,19 +516,21 @@ def to_query(
 ) -> str:
     """Compile query_or_table with conditions(filters, wildcards) to query."""
     if is_query(query_or_table):
-        sub_query = f"({query_or_table})"
+        from_item = f"({query_or_table})"
     else:
         # Table ID can have 1, 2, 3, or 4 parts. Quoting all parts to be safe.
         # See: https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#identifiers
         parts = query_or_table.split(".")
-        sub_query = ".".join(f"`{part}`" for part in parts)
+        from_item = ".".join(f"`{part}`" for part in parts)
 
     # TODO(b/338111344): Generate an index based on DefaultIndexKind if we
     # don't have index columns specified.
     if columns:
         # We only reduce the selection if columns is set, but we always
         # want to make sure index_cols is also included.
-        select_clause = "SELECT " + ", ".join(f"`{column}`" for column in columns)
+        select_clause = "SELECT " + ", ".join(
+            f"`_bf_source`.`{column}`" for column in columns
+        )
     else:
         select_clause = "SELECT *"
-        select_clause = "SELECT *"
+        select_clause = "SELECT _bf_source.*"
-        select_clause = "SELECT *"
+        select_clause = "SELECT _bf_source.*"
 
@@ -545,7 +547,7 @@ def to_query(
 
     return (
         f"{select_clause} "
-        f"FROM {sub_query}"
+        f"FROM {from_item} AS _bf_source"
         f"{time_travel_clause}{where_clause}{limit_clause}"
     )
 

@@ -122,7 +122,7 @@ def _is_node_polars_executable(node: nodes.BigFrameNode):
         return False
     for expr in node._node_expressions:
         if isinstance(expr, agg_expressions.Aggregation):
-            if not type(expr.op) in _COMPATIBLE_AGG_OPS:
+            if type(expr.op) not in _COMPATIBLE_AGG_OPS:
                 return False
         if isinstance(expr, expression.Expression):
             if not set(map(type, _get_expr_ops(expr))).issubset(_COMPATIBLE_SCALAR_OPS):

@@ -20,11 +20,11 @@ WITH `bfcte_0` AS (
 ), `bfcte_4` AS (
   SELECT
     *,
-    `bfcol_4` IN ((
+    COALESCE(`bfcol_4` IN ((
         SELECT
           *
         FROM `bfcte_3`
-    )) AS `bfcol_5`
+    )), FALSE) AS `bfcol_5`
   FROM `bfcte_1`
 )
 SELECT

@@ -51,11 +51,11 @@ WITH `bfcte_0` AS (
 ), `bfcte_6` AS (
   SELECT
     *,
-    `bfcol_58` IN ((
+    COALESCE(`bfcol_58` IN ((
         SELECT
           *
         FROM `bfcte_5`
-    )) AS `bfcol_59`
+    )), FALSE) AS `bfcol_59`
   FROM `bfcte_4`
 ), `bfcte_7` AS (
   SELECT

@@ -44,11 +44,11 @@ WITH `bfcte_0` AS (
 ), `bfcte_7` AS (
   SELECT
     *,
-    `bfcol_4` IN ((
+    COALESCE(`bfcol_4` IN ((
         SELECT
           *
         FROM `bfcte_6`
-    )) AS `bfcol_14`
+    )), FALSE) AS `bfcol_14`
   FROM `bfcte_2`
 ), `bfcte_8` AS (
   SELECT

@@ -85,11 +85,11 @@ WITH `bfcte_0` AS (
 ), `bfcte_10` AS (
   SELECT
     *,
-    `bfcol_2` IN ((
+    COALESCE(`bfcol_2` IN ((
         SELECT
           *
         FROM `bfcte_8`
-    )) AS `bfcol_37`
+    )), FALSE) AS `bfcol_37`
   FROM `bfcte_1`
 ), `bfcte_11` AS (
   SELECT
@@ -127,11 +127,11 @@ WITH `bfcte_0` AS (
 ), `bfcte_15` AS (
   SELECT
     *,
-    `bfcol_41` IN ((
+    COALESCE(`bfcol_41` IN ((
         SELECT
           *
         FROM `bfcte_14`
-    )) AS `bfcol_62`
+    )), FALSE) AS `bfcol_62`
   FROM `bfcte_7`
 )
 SELECT

@@ -92,11 +92,11 @@ WITH `bfcte_0` AS (
 ), `bfcte_12` AS (
   SELECT
     *,
-    `bfcol_61` IN ((
+    COALESCE(`bfcol_61` IN ((
         SELECT
           *
         FROM `bfcte_6`
-    )) AS `bfcol_64`
+    )), FALSE) AS `bfcol_64`
   FROM `bfcte_11`
 ), `bfcte_13` AS (
   SELECT

@@ -344,7 +344,7 @@ def test_bq_schema_to_sql(schema: Iterable[bigquery.SchemaField], expected: str)
                 2024, 5, 14, 12, 42, 36, 125125, tzinfo=datetime.timezone.utc
             ),
             (
-                "SELECT `row_index`, `string_col` FROM `test_table` "
+                "SELECT `_bf_source`.`row_index`, `_bf_source`.`string_col` FROM `test_table` AS _bf_source "
                 "FOR SYSTEM_TIME AS OF CAST('2024-05-14T12:42:36.125125+00:00' AS TIMESTAMP) "
                 "WHERE `rowindex` NOT IN (0, 6) OR `string_col` IN ('Hello, World!', "
                 "'こんにちは') LIMIT 123"
@@ -369,11 +369,11 @@ def test_bq_schema_to_sql(schema: Iterable[bigquery.SchemaField], expected: str)
                 2024, 5, 14, 12, 42, 36, 125125, tzinfo=datetime.timezone.utc
             ),
             (
-                """SELECT `rowindex`, `string_col` FROM (SELECT
+                """SELECT `_bf_source`.`rowindex`, `_bf_source`.`string_col` FROM (SELECT
                     rowindex,
                     string_col,
                 FROM `test_table` AS t
-                ) """
+                ) AS _bf_source """
                 "FOR SYSTEM_TIME AS OF CAST('2024-05-14T12:42:36.125125+00:00' AS TIMESTAMP) "
                 "WHERE `rowindex` < 4 AND `string_col` = 'Hello, World!' "
                 "LIMIT 123"
@@ -386,7 +386,7 @@ def test_bq_schema_to_sql(schema: Iterable[bigquery.SchemaField], expected: str)
             [],
             None,  # max_results
             None,  # time_travel_timestampe
-            "SELECT `col_a`, `col_b` FROM `test_table`",
+            "SELECT `_bf_source`.`col_a`, `_bf_source`.`col_b` FROM `test_table` AS _bf_source",
             id="table-columns",
         ),
         pytest.param(
@@ -395,7 +395,7 @@ def test_bq_schema_to_sql(schema: Iterable[bigquery.SchemaField], expected: str)
             [("date_col", ">", "2022-10-20")],
             None,  # max_results
             None,  # time_travel_timestampe
-            "SELECT * FROM `test_table` WHERE `date_col` > '2022-10-20'",
+            "SELECT * FROM `test_table` AS _bf_source WHERE `date_col` > '2022-10-20'",
             id="table-filter",
         ),
         pytest.param(
@@ -404,7 +404,7 @@ def test_bq_schema_to_sql(schema: Iterable[bigquery.SchemaField], expected: str)
             [],
             None,  # max_results
             None,  # time_travel_timestampe
-            "SELECT * FROM `test_table*`",
+            "SELECT * FROM `test_table*` AS _bf_source",
             id="wildcard-no_params",
         ),
         pytest.param(
@@ -413,7 +413,7 @@ def test_bq_schema_to_sql(schema: Iterable[bigquery.SchemaField], expected: str)
             [("_TABLE_SUFFIX", ">", "2022-10-20")],
             None,  # max_results
             None,  # time_travel_timestampe
-            "SELECT * FROM `test_table*` WHERE `_TABLE_SUFFIX` > '2022-10-20'",
+            "SELECT * FROM `test_table*` AS _bf_source WHERE `_TABLE_SUFFIX` > '2022-10-20'",
             id="wildcard-filter",
         ),
     ],

@@ -290,11 +290,11 @@ class Parser(metaclass=_Parser):
         "RIGHTPAD": lambda args: build_pad(args, is_left=False),
         "RPAD": lambda args: build_pad(args, is_left=False),
         "RTRIM": lambda args: build_trim(args, is_left=False),
-        "SCOPE_RESOLUTION": lambda args: exp.ScopeResolution(
-            expression=seq_get(args, 0)
-        )
-        if len(args) != 2
-        else exp.ScopeResolution(this=seq_get(args, 0), expression=seq_get(args, 1)),
+        "SCOPE_RESOLUTION": lambda args: (
+            exp.ScopeResolution(expression=seq_get(args, 0))
+            if len(args) != 2
+            else exp.ScopeResolution(this=seq_get(args, 0), expression=seq_get(args, 1))
+        ),
         "STRPOS": exp.StrPosition.from_arg_list,
         "CHARINDEX": lambda args: build_locate_strposition(args),
         "INSTR": exp.StrPosition.from_arg_list,
@@ -943,7 +943,9 @@ class Parser(metaclass=_Parser):
     }
 
     UNARY_PARSERS = {
-        TokenType.PLUS: lambda self: self._parse_unary(),  # Unary + is handled as a no-op
+        TokenType.PLUS: lambda self: (
+            self._parse_unary()
+        ),  # Unary + is handled as a no-op
         TokenType.NOT: lambda self: self.expression(
             exp.Not, this=self._parse_equality()
         ),
@@ -1246,12 +1248,14 @@ class Parser(metaclass=_Parser):
             exp.NotNullColumnConstraint, allow_null=True
         ),
         "ON": lambda self: (
-            self._match(TokenType.UPDATE)
-            and self.expression(
-                exp.OnUpdateColumnConstraint, this=self._parse_function()
+            (
+                self._match(TokenType.UPDATE)
+                and self.expression(
+                    exp.OnUpdateColumnConstraint, this=self._parse_function()
+                )
             )
-        )
-        or self.expression(exp.OnProperty, this=self._parse_id_var()),
+            or self.expression(exp.OnProperty, this=self._parse_id_var())
+        ),
         "PATH": lambda self: self.expression(
             exp.PathColumnConstraint, this=self._parse_string()
         ),
@@ -3885,8 +3889,9 @@ def _parse_hint_body(self) -> t.Optional[exp.Hint]:
         try:
             for hint in iter(
                 lambda: self._parse_csv(
-                    lambda: self._parse_hint_function_call()
-                    or self._parse_var(upper=True),
+                    lambda: (
+                        self._parse_hint_function_call() or self._parse_var(upper=True)
+                    ),
                 ),
                 [],
             ):
@@ -4305,8 +4310,9 @@ def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]:
                 self.expression(
                     exp.WithTableHint,
                     expressions=self._parse_csv(
-                        lambda: self._parse_function()
-                        or self._parse_var(any_token=True)
+                        lambda: (
+                            self._parse_function() or self._parse_var(any_token=True)
+                        )
                     ),
                 )
             )
@@ -4469,6 +4475,14 @@ def _parse_table(
         if schema:
             return self._parse_schema(this=this)
 
+        # see: https://docs.cloud.google.com/bigquery/docs/reference/standard-sql/query-syntax#from_clause
+        # from_item, then alias, then time travel, then sample.
+        alias = self._parse_table_alias(
+            alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS
+        )
+        if alias:
+            this.set("alias", alias)
+
         version = self._parse_version()
 
         if version:
@@ -4477,12 +4491,6 @@ def _parse_table(
         if self.dialect.ALIAS_POST_TABLESAMPLE:
             this.set("sample", self._parse_table_sample())
 
-        alias = self._parse_table_alias(
-            alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS
-        )
-        if alias:
-            this.set("alias", alias)
-
         if self._match(TokenType.INDEXED_BY):
             this.set("indexed", self._parse_table_parts())
         elif self._match_text_seq("NOT", "INDEXED"):
@@ -4935,11 +4943,13 @@ def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Grou
 
             elements["expressions"].extend(
                 self._parse_csv(
-                    lambda: None
-                    if self._match_set(
-                        (TokenType.CUBE, TokenType.ROLLUP), advance=False
+                    lambda: (
+                        None
+                        if self._match_set(
+                            (TokenType.CUBE, TokenType.ROLLUP), advance=False
+                        )
+                        else self._parse_disjunction()
                     )
-                    else self._parse_disjunction()
                 )
             )
 
@@ -6225,9 +6235,9 @@ def _parse_column_ops(
             # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules
             if isinstance(field, (exp.Func, exp.Window)) and this:
                 this = this.transform(
-                    lambda n: n.to_dot(include_dots=False)
-                    if isinstance(n, exp.Column)
-                    else n
+                    lambda n: (
+                        n.to_dot(include_dots=False) if isinstance(n, exp.Column) else n
+                    )
                 )
 
             if op: