From b3a6b5a68f73299d10cf040a5b6521fa467f38bd Mon Sep 17 00:00:00 2001 From: Trevor Bergeron Date: Fri, 29 May 2026 23:38:10 +0000 Subject: [PATCH 1/8] refactor(bigframes): Add basic disassembly to IR compiler --- packages/bigframes/bigframes/core/bytecode.py | 232 ++++++++++++ .../bigframes/core/py_expressions.py | 356 ++++++++++++++++++ .../tests/unit/core/test_bytecode.py | 77 ++++ 3 files changed, 665 insertions(+) create mode 100644 packages/bigframes/bigframes/core/bytecode.py create mode 100644 packages/bigframes/bigframes/core/py_expressions.py create mode 100644 packages/bigframes/tests/unit/core/test_bytecode.py diff --git a/packages/bigframes/bigframes/core/bytecode.py b/packages/bigframes/bigframes/core/bytecode.py new file mode 100644 index 000000000000..e981172203e5 --- /dev/null +++ b/packages/bigframes/bigframes/core/bytecode.py @@ -0,0 +1,232 @@ +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import dis +import operator +from types import ModuleType +from typing import Callable, Optional + +import bigframes.core.py_expressions as py_exprs +from bigframes.core import expression + + +class NullMarker: + pass + + +_BINARY_OP_MAP = { + "+": operator.add, + "-": operator.sub, + "*": operator.mul, + "/": operator.truediv, + "//": operator.floordiv, + "%": operator.mod, + "**": operator.pow, +} + +_COMPARE_OP_MAP = { + "==": operator.eq, + "!=": operator.ne, + "<": operator.lt, + "<=": operator.le, + ">": operator.gt, + ">=": operator.ge, +} + + +def _compile_bytecode_to_py_expr(func: Callable) -> Optional[expression.Expression]: + try: + instructions = list(dis.get_instructions(func)) + except Exception: + return None + + stack = [] + globals_dict = func.__globals__ + import builtins + builtins_dict = builtins.__dict__ + + closure_dict = {} + if func.__closure__: + free_vars = func.__code__.co_freevars + for var, cell in zip(free_vars, func.__closure__): + try: + closure_dict[var] = cell.cell_contents + except ValueError: + pass + + for inst in instructions: + opname = inst.opname + + if opname == "RESUME": + continue + + elif opname in ("LOAD_FAST", "LOAD_FAST_CHECK"): + stack.append(expression.UnboundVariableExpression(inst.argval)) + + elif opname == "LOAD_FAST_LOAD_FAST": + var1, var2 = inst.argval + stack.append(expression.UnboundVariableExpression(var1)) + stack.append(expression.UnboundVariableExpression(var2)) + + elif opname == "LOAD_CONST": + stack.append(py_exprs.PyObject(inst.argval)) + + elif opname == "LOAD_GLOBAL": + name = inst.argval + val = None + if name in closure_dict: + val = closure_dict[name] + elif name in globals_dict: + val = globals_dict[name] + elif name in builtins_dict: + val = builtins_dict[name] + + if isinstance(val, ModuleType): + stack.append(py_exprs.Module(val)) + elif val is not None: + stack.append(py_exprs.PyObject(val)) + else: + stack.append(expression.UnboundVariableExpression(name)) + + elif opname == "LOAD_ATTR": + if not stack: + return None + target = stack.pop() + stack.append(py_exprs.GetAttr(target, inst.argval)) + + elif opname == "PUSH_NULL": + stack.append(NullMarker) + + elif opname == "BINARY_OP": + if len(stack) < 2: + return None + right = stack.pop() + left = stack.pop() + op_symbol = inst.argrepr + if not op_symbol and isinstance(inst.argval, str): + op_symbol = inst.argval + if op_symbol.endswith("="): + op_symbol = op_symbol[:-1] + + if op_symbol not in _BINARY_OP_MAP: + return None + stack.append(py_exprs.Call(py_exprs.PyObject(_BINARY_OP_MAP[op_symbol]), (left, right))) + + # Support older Python versions compatibility + elif opname in ("BINARY_ADD", "INPLACE_ADD"): + if len(stack) < 2: return None + right = stack.pop(); left = stack.pop() + stack.append(py_exprs.Call(py_exprs.PyObject(operator.add), (left, right))) + elif opname in ("BINARY_SUBTRACT", "INPLACE_SUBTRACT"): + if len(stack) < 2: return None + right = stack.pop(); left = stack.pop() + stack.append(py_exprs.Call(py_exprs.PyObject(operator.sub), (left, right))) + elif opname in ("BINARY_MULTIPLY", "INPLACE_MULTIPLY"): + if len(stack) < 2: return None + right = stack.pop(); left = stack.pop() + stack.append(py_exprs.Call(py_exprs.PyObject(operator.mul), (left, right))) + elif opname in ("BINARY_TRUE_DIVIDE", "INPLACE_TRUE_DIVIDE"): + if len(stack) < 2: return None + right = stack.pop(); left = stack.pop() + stack.append(py_exprs.Call(py_exprs.PyObject(operator.truediv), (left, right))) + elif opname in ("BINARY_FLOOR_DIVIDE", "INPLACE_FLOOR_DIVIDE"): + if len(stack) < 2: return None + right = stack.pop(); left = stack.pop() + stack.append(py_exprs.Call(py_exprs.PyObject(operator.floordiv), (left, right))) + elif opname in ("BINARY_MODULO", "INPLACE_MODULO"): + if len(stack) < 2: return None + right = stack.pop(); left = stack.pop() + stack.append(py_exprs.Call(py_exprs.PyObject(operator.mod), (left, right))) + elif opname in ("BINARY_POWER", "INPLACE_POWER"): + if len(stack) < 2: return None + right = stack.pop(); left = stack.pop() + stack.append(py_exprs.Call(py_exprs.PyObject(operator.pow), (left, right))) + + elif opname == "COMPARE_OP": + if len(stack) < 2: + return None + right = stack.pop() + left = stack.pop() + op_symbol = inst.argval + if op_symbol not in _COMPARE_OP_MAP: + return None + stack.append(py_exprs.Call(py_exprs.PyObject(_COMPARE_OP_MAP[op_symbol]), (left, right))) + + elif opname in ("UNARY_NEGATIVE", "UNARY_INVERT"): + if not stack: + return None + target = stack.pop() + stack.append(py_exprs.Call(py_exprs.PyObject(operator.neg if opname == "UNARY_NEGATIVE" else operator.invert), (target,))) + + elif opname == "UNARY_POSITIVE": + if not stack: + return None + target = stack.pop() + stack.append(py_exprs.Call(py_exprs.PyObject(operator.pos), (target,))) + + elif opname == "CALL_INTRINSIC_1": + if inst.argrepr == "INTRINSIC_UNARY_POSITIVE": + if not stack: + return None + target = stack.pop() + stack.append(py_exprs.Call(py_exprs.PyObject(operator.pos), (target,))) + else: + return None + + elif opname in ("CALL", "CALL_FUNCTION"): + num_args = inst.arg + if len(stack) < num_args: + return None + args = [stack.pop() for _ in range(num_args)][::-1] + if stack and stack[-1] is NullMarker: + stack.pop() + if not stack: + return None + callable_expr = stack.pop() + stack.append(py_exprs.Call(callable_expr, tuple(args))) + + elif opname == "RETURN_VALUE": + if not stack: + return None + return stack[-1] + + elif opname in ("STORE_FAST", "POP_TOP"): + if stack: + stack.pop() + + else: + return None + + return None + + +def dis_to_expr( + func: Callable, unpack_mode: bool = False +) -> Optional[expression.Expression]: + """ + Try to convert a python function to a BigQuery expression. + + Unpack mode is whether SQL columns are addressed as attributes of a single + python argument (e.g. row.col1), or as separate arguments (e.g. col1). + + This is "best effort" - if the function contains operations that cannot + be converted to BigQuery expressions, it will return None. + """ + try: + py_expr = _compile_bytecode_to_py_expr(func) + if py_expr is None: + return None + return py_exprs.resolve_py_exprs(py_expr, unpack_mode=unpack_mode) + except Exception: + return None \ No newline at end of file diff --git a/packages/bigframes/bigframes/core/py_expressions.py b/packages/bigframes/bigframes/core/py_expressions.py new file mode 100644 index 000000000000..71f25f782ccc --- /dev/null +++ b/packages/bigframes/bigframes/core/py_expressions.py @@ -0,0 +1,356 @@ +# Copyright 2023 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import annotations + +import dataclasses +import itertools +from types import ModuleType +from typing import Callable, Hashable, Mapping, Tuple + +from bigframes import dtypes +from bigframes.core import identifiers +from bigframes.core.expression import ( + const, + Expression, + OpExpression, + UnboundVariableExpression, +) +from bigframes.operations import generic_ops, numeric_ops, NUMPY_TO_BINOP, NUMPY_TO_OP +import bigframes.operations.python_op_maps as python_op_maps + +_CALLABLE_TO_OP = { + **NUMPY_TO_OP, + **NUMPY_TO_BINOP, +} + +_BUILTIN_CALLABLES = { + str: generic_ops.AsTypeOp(dtypes.STRING_DTYPE), + abs: numeric_ops.abs_op, +} + + +@dataclasses.dataclass(frozen=True) +class GetAttr(Expression): + input: Expression + attr: str + + @property + def column_references( + self, + ) -> Tuple[identifiers.ColumnId, ...]: + return self.input.column_references + + @property + def free_variables(self) -> set[str]: + return self.input.free_variables + + @property + def is_const(self) -> bool: + return False + + @property + def children(self): + return (self.input,) + + @property + def nullable(self) -> bool: + return True + + @property + def is_resolved(self) -> bool: + return False + + @property + def output_type(self) -> dtypes.ExpressionType: + raise ValueError(f"Type of expression {self} has not been fixed.") + + @property + def is_bijective(self) -> bool: + # TODO: Mark individual functions as bijective? + return False + + @property + def deterministic(self) -> bool: + return True + + def transform_children(self, t: Callable[[Expression], Expression]) -> Expression: + new_input = t(self.input) + if new_input != self.input: + return dataclasses.replace(self, input=new_input) + return self + + def bind_variables( + self, bindings: Mapping[str, Expression], allow_partial_bindings: bool = False + ) -> GetAttr: + return GetAttr( + self.input.bind_variables( + bindings, allow_partial_bindings=allow_partial_bindings + ), + self.attr, + ) + + def bind_refs( + self, + bindings: Mapping[identifiers.ColumnId, Expression], + allow_partial_bindings: bool = False, + ) -> GetAttr: + return GetAttr( + self.input.bind_refs( + bindings, allow_partial_bindings=allow_partial_bindings + ), + self.attr, + ) + + +@dataclasses.dataclass(frozen=True) +class Module(Expression): + """An expression representing a module reference.""" + + module: ModuleType + + @property + def is_const(self) -> bool: + return True + + @property + def column_references(self) -> Tuple[identifiers.ColumnId, ...]: + return () + + @property + def nullable(self) -> bool: + return True # type: ignore + + @property + def is_resolved(self) -> bool: + return False + + @property + def output_type(self) -> dtypes.ExpressionType: + raise ValueError("Module expresion has not type") + + def bind_variables( + self, bindings: Mapping[str, Expression], allow_partial_bindings: bool = False + ) -> Expression: + return self + + def bind_refs( + self, + bindings: Mapping[identifiers.ColumnId, Expression], + allow_partial_bindings: bool = False, + ) -> Module: + return self + + @property + def is_bijective(self) -> bool: + # () <-> value + return True + + def transform_children(self, t: Callable[[Expression], Expression]) -> Expression: + return self + + +@dataclasses.dataclass(frozen=True) +class PyObject(Expression): + """An expression representing a module reference.""" + + value: Hashable + + @property + def is_const(self) -> bool: + return True + + @property + def column_references(self) -> Tuple[identifiers.ColumnId, ...]: + return () + + @property + def nullable(self) -> bool: + return True # type: ignore + + @property + def is_resolved(self) -> bool: + return False + + @property + def output_type(self) -> dtypes.ExpressionType: + raise ValueError("Module expresion has not type") + + def bind_variables( + self, bindings: Mapping[str, Expression], allow_partial_bindings: bool = False + ) -> Expression: + return self + + def bind_refs( + self, + bindings: Mapping[identifiers.ColumnId, Expression], + allow_partial_bindings: bool = False, + ) -> PyObject: + return self + + @property + def is_bijective(self) -> bool: + # () <-> value + return True + + def transform_children(self, t: Callable[[Expression], Expression]) -> Expression: + return self + + +@dataclasses.dataclass(frozen=True) +class Call(Expression): + """An expression representing a scalar constant.""" + + # TODO: Further constrain? + callable: Expression + inputs: Tuple[Expression, ...] + + @property + def column_references( + self, + ) -> Tuple[identifiers.ColumnId, ...]: + return tuple( + itertools.chain.from_iterable( + map(lambda x: x.column_references, self.children) + ) + ) + + @property + def free_variables(self) -> set[str]: + return set( + itertools.chain.from_iterable( + map(lambda x: x.free_variables, self.children) + ) + ) + + @property + def is_const(self) -> bool: + return False + + @property + def children(self): + return (self.callable, *self.inputs) + + @property + def nullable(self) -> bool: + return True + + @property + def is_resolved(self) -> bool: + return False + + @property + def output_type(self) -> dtypes.ExpressionType: + raise ValueError(f"Type of expression {self} has not been fixed.") + + @property + def is_bijective(self) -> bool: + # TODO: Mark individual functions as bijective? + return False + + @property + def deterministic(self) -> bool: + return True + + def transform_children(self, t: Callable[[Expression], Expression]) -> Expression: + return dataclasses.replace( + self, + callable=t(self.callable), + inputs=tuple(t(input) for input in self.inputs), + ) + + def bind_variables( + self, bindings: Mapping[str, Expression], allow_partial_bindings: bool = False + ) -> Call: + return Call( + callable=self.callable.bind_variables( + bindings, allow_partial_bindings=allow_partial_bindings + ), + inputs=tuple( + input.bind_variables( + bindings, allow_partial_bindings=allow_partial_bindings + ) + for input in self.inputs + ), + ) + + def bind_refs( + self, + bindings: Mapping[identifiers.ColumnId, Expression], + allow_partial_bindings: bool = False, + ) -> Call: + return Call( + callable=self.callable.bind_refs( + bindings, allow_partial_bindings=allow_partial_bindings + ), + inputs=tuple( + input.bind_refs(bindings, allow_partial_bindings=allow_partial_bindings) + for input in self.inputs + ), + ) + + +# TODO: Mode that resolves free variable attrs as columns +def resolve_py_exprs(expression: Expression, unpack_mode: bool = False) -> Expression: + """Replace all PyObject, attribute, call expressions. Bottom-up.""" + + def resolve_expr_if_call(expression: Expression) -> Expression: + if isinstance(expression, Call): + return resolve_call(expression) + return expression + + # this function assumes attrs that become callables have been resolved + # also, we don't yet handle resolving attrs that are column accesses + def resolve_attrs(expression: Expression) -> Expression: + if isinstance(expression, GetAttr): + if isinstance(expression.input, Module): + # resolves things like Math.pi + return PyObject(getattr(expression.input.module, expression.attr)) + if not unpack_mode and isinstance(expression.input, UnboundVariableExpression): + return UnboundVariableExpression(expression.attr) + return expression + + def resolve_pyobjs(expression: Expression) -> Expression: + if isinstance(expression, PyObject): + return const(expression.value) + return expression + + wo_calls = expression.bottom_up(resolve_expr_if_call) + wo_attrs = wo_calls.bottom_up(resolve_attrs) + wo_pyobjs = wo_attrs.bottom_up(resolve_pyobjs) + return wo_pyobjs + + +def resolve_call(call: Call) -> Expression: + callable = call.callable + if isinstance(callable, GetAttr): + attr = callable.attr + if isinstance(callable.input, Module): + fn = getattr(callable.input.module, attr) + if fn in python_op_maps.PYTHON_TO_BIGFRAMES: + op = python_op_maps.PYTHON_TO_BIGFRAMES[fn] + return OpExpression(op, call.inputs) + if fn in _CALLABLE_TO_OP: + op = _CALLABLE_TO_OP[fn] + return OpExpression(op, call.inputs) + elif isinstance(callable, PyObject): + if callable.value in python_op_maps.PYTHON_TO_BIGFRAMES: + op = python_op_maps.PYTHON_TO_BIGFRAMES[callable.value] + return OpExpression(op, call.inputs) + if callable.value in _BUILTIN_CALLABLES: + return OpExpression(_BUILTIN_CALLABLES[callable.value], call.inputs) + + raise NotImplementedError( + f"No implementation available for call expression: {call}" + ) diff --git a/packages/bigframes/tests/unit/core/test_bytecode.py b/packages/bigframes/tests/unit/core/test_bytecode.py new file mode 100644 index 000000000000..a368d2a89a96 --- /dev/null +++ b/packages/bigframes/tests/unit/core/test_bytecode.py @@ -0,0 +1,77 @@ +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import math +import pytest + +from bigframes.core.bytecode import dis_to_expr +import bigframes.core.expression as ex +import bigframes.operations as ops + + +def test_dis_to_expr_simple_arithmetic(): + func = lambda row: row.x + 1 + expr = dis_to_expr(func, unpack_mode=False) + assert expr is not None + + expected = ops.add_op.as_expr(ex.free_var("x"), ex.const(1)) + assert expr == expected + + +def test_dis_to_expr_unpack_mode(): + func = lambda col1, col2: col1 * col2 + expr = dis_to_expr(func, unpack_mode=True) + assert expr is not None + + expected = ops.mul_op.as_expr(ex.free_var("col1"), ex.free_var("col2")) + assert expr == expected + + +def test_dis_to_expr_math_function(): + func = lambda row: math.sin(row.x) + expr = dis_to_expr(func, unpack_mode=False) + assert expr is not None + + expected = ops.numeric_ops.sin_op.as_expr(ex.free_var("x")) + assert expr == expected + + +def test_dis_to_expr_negation(): + func = lambda row: -row.x + expr = dis_to_expr(func, unpack_mode=False) + assert expr is not None + + expected = ops.numeric_ops.neg_op.as_expr(ex.free_var("x")) + assert expr == expected + + +def test_dis_to_expr_comparison(): + func = lambda row: row.x == row.y + expr = dis_to_expr(func, unpack_mode=False) + assert expr is not None + + expected = ops.comparison_ops.eq_op.as_expr(ex.free_var("x"), ex.free_var("y")) + assert expr == expected + + +def test_dis_to_expr_unsupported(): + # Control flow or unsupported structures should return None + def func_with_loop(row): + res = 0 + for val in range(int(row.x)): + res += val + return res + + expr = dis_to_expr(func_with_loop, unpack_mode=False) + assert expr is None From d3e3441fed4776c35a014e3241f20dfb385019c7 Mon Sep 17 00:00:00 2001 From: Trevor Bergeron Date: Fri, 29 May 2026 23:46:03 +0000 Subject: [PATCH 2/8] improvements --- packages/bigframes/bigframes/core/bytecode.py | 118 +++++++++++------- .../bigframes/core/py_expressions.py | 14 ++- .../tests/unit/core/test_bytecode.py | 26 ++-- 3 files changed, 102 insertions(+), 56 deletions(-) diff --git a/packages/bigframes/bigframes/core/bytecode.py b/packages/bigframes/bigframes/core/bytecode.py index e981172203e5..ba5071604da2 100644 --- a/packages/bigframes/bigframes/core/bytecode.py +++ b/packages/bigframes/bigframes/core/bytecode.py @@ -44,6 +44,23 @@ class NullMarker: ">=": operator.ge, } +_OLD_BINARY_OP_MAP = { + "BINARY_ADD": operator.add, + "INPLACE_ADD": operator.add, + "BINARY_SUBTRACT": operator.sub, + "INPLACE_SUBTRACT": operator.sub, + "BINARY_MULTIPLY": operator.mul, + "INPLACE_MULTIPLY": operator.mul, + "BINARY_TRUE_DIVIDE": operator.truediv, + "INPLACE_TRUE_DIVIDE": operator.truediv, + "BINARY_FLOOR_DIVIDE": operator.floordiv, + "INPLACE_FLOOR_DIVIDE": operator.floordiv, + "BINARY_MODULO": operator.mod, + "INPLACE_MODULO": operator.mod, + "BINARY_POWER": operator.pow, + "INPLACE_POWER": operator.pow, +} + def _compile_bytecode_to_py_expr(func: Callable) -> Optional[expression.Expression]: try: @@ -54,6 +71,7 @@ def _compile_bytecode_to_py_expr(func: Callable) -> Optional[expression.Expressi stack = [] globals_dict = func.__globals__ import builtins + builtins_dict = builtins.__dict__ closure_dict = {} @@ -71,39 +89,49 @@ def _compile_bytecode_to_py_expr(func: Callable) -> Optional[expression.Expressi if opname == "RESUME": continue - elif opname in ("LOAD_FAST", "LOAD_FAST_CHECK"): - stack.append(expression.UnboundVariableExpression(inst.argval)) - elif opname == "LOAD_FAST_LOAD_FAST": var1, var2 = inst.argval stack.append(expression.UnboundVariableExpression(var1)) stack.append(expression.UnboundVariableExpression(var2)) - elif opname == "LOAD_CONST": + elif opname.startswith("LOAD_FAST"): + stack.append(expression.UnboundVariableExpression(inst.argval)) + + elif opname in ("LOAD_CONST", "LOAD_SMALL_INT"): stack.append(py_exprs.PyObject(inst.argval)) elif opname == "LOAD_GLOBAL": name = inst.argval + found = False val = None if name in closure_dict: val = closure_dict[name] + found = True elif name in globals_dict: val = globals_dict[name] + found = True elif name in builtins_dict: val = builtins_dict[name] + found = True - if isinstance(val, ModuleType): - stack.append(py_exprs.Module(val)) - elif val is not None: - stack.append(py_exprs.PyObject(val)) + if found: + if isinstance(val, ModuleType): + stack.append(py_exprs.Module(val)) + else: + stack.append(py_exprs.PyObject(val)) else: stack.append(expression.UnboundVariableExpression(name)) - elif opname == "LOAD_ATTR": + elif opname in ("LOAD_ATTR", "LOAD_METHOD"): if not stack: return None target = stack.pop() stack.append(py_exprs.GetAttr(target, inst.argval)) + if opname == "LOAD_METHOD": + if isinstance(target, py_exprs.Module): + stack.append(NullMarker) + else: + stack.append(target) elif opname == "PUSH_NULL": stack.append(NullMarker) @@ -116,42 +144,28 @@ def _compile_bytecode_to_py_expr(func: Callable) -> Optional[expression.Expressi op_symbol = inst.argrepr if not op_symbol and isinstance(inst.argval, str): op_symbol = inst.argval - if op_symbol.endswith("="): + if op_symbol and op_symbol.endswith("="): op_symbol = op_symbol[:-1] if op_symbol not in _BINARY_OP_MAP: return None - stack.append(py_exprs.Call(py_exprs.PyObject(_BINARY_OP_MAP[op_symbol]), (left, right))) + stack.append( + py_exprs.Call( + py_exprs.PyObject(_BINARY_OP_MAP[op_symbol]), (left, right) + ) + ) # Support older Python versions compatibility - elif opname in ("BINARY_ADD", "INPLACE_ADD"): - if len(stack) < 2: return None - right = stack.pop(); left = stack.pop() - stack.append(py_exprs.Call(py_exprs.PyObject(operator.add), (left, right))) - elif opname in ("BINARY_SUBTRACT", "INPLACE_SUBTRACT"): - if len(stack) < 2: return None - right = stack.pop(); left = stack.pop() - stack.append(py_exprs.Call(py_exprs.PyObject(operator.sub), (left, right))) - elif opname in ("BINARY_MULTIPLY", "INPLACE_MULTIPLY"): - if len(stack) < 2: return None - right = stack.pop(); left = stack.pop() - stack.append(py_exprs.Call(py_exprs.PyObject(operator.mul), (left, right))) - elif opname in ("BINARY_TRUE_DIVIDE", "INPLACE_TRUE_DIVIDE"): - if len(stack) < 2: return None - right = stack.pop(); left = stack.pop() - stack.append(py_exprs.Call(py_exprs.PyObject(operator.truediv), (left, right))) - elif opname in ("BINARY_FLOOR_DIVIDE", "INPLACE_FLOOR_DIVIDE"): - if len(stack) < 2: return None - right = stack.pop(); left = stack.pop() - stack.append(py_exprs.Call(py_exprs.PyObject(operator.floordiv), (left, right))) - elif opname in ("BINARY_MODULO", "INPLACE_MODULO"): - if len(stack) < 2: return None - right = stack.pop(); left = stack.pop() - stack.append(py_exprs.Call(py_exprs.PyObject(operator.mod), (left, right))) - elif opname in ("BINARY_POWER", "INPLACE_POWER"): - if len(stack) < 2: return None - right = stack.pop(); left = stack.pop() - stack.append(py_exprs.Call(py_exprs.PyObject(operator.pow), (left, right))) + elif opname in _OLD_BINARY_OP_MAP: + if len(stack) < 2: + return None + right = stack.pop() + left = stack.pop() + stack.append( + py_exprs.Call( + py_exprs.PyObject(_OLD_BINARY_OP_MAP[opname]), (left, right) + ) + ) elif opname == "COMPARE_OP": if len(stack) < 2: @@ -161,13 +175,24 @@ def _compile_bytecode_to_py_expr(func: Callable) -> Optional[expression.Expressi op_symbol = inst.argval if op_symbol not in _COMPARE_OP_MAP: return None - stack.append(py_exprs.Call(py_exprs.PyObject(_COMPARE_OP_MAP[op_symbol]), (left, right))) + stack.append( + py_exprs.Call( + py_exprs.PyObject(_COMPARE_OP_MAP[op_symbol]), (left, right) + ) + ) elif opname in ("UNARY_NEGATIVE", "UNARY_INVERT"): if not stack: return None target = stack.pop() - stack.append(py_exprs.Call(py_exprs.PyObject(operator.neg if opname == "UNARY_NEGATIVE" else operator.invert), (target,))) + stack.append( + py_exprs.Call( + py_exprs.PyObject( + operator.neg if opname == "UNARY_NEGATIVE" else operator.invert + ), + (target,), + ) + ) elif opname == "UNARY_POSITIVE": if not stack: @@ -184,13 +209,20 @@ def _compile_bytecode_to_py_expr(func: Callable) -> Optional[expression.Expressi else: return None - elif opname in ("CALL", "CALL_FUNCTION"): + elif opname in ("CALL", "CALL_FUNCTION", "CALL_METHOD"): num_args = inst.arg if len(stack) < num_args: return None args = [stack.pop() for _ in range(num_args)][::-1] if stack and stack[-1] is NullMarker: stack.pop() + elif ( + stack + and stack[-1] is not NullMarker + and isinstance(stack[-1], expression.Expression) + ): + self_arg = stack.pop() + args = [self_arg] + args if not stack: return None callable_expr = stack.pop() @@ -229,4 +261,4 @@ def dis_to_expr( return None return py_exprs.resolve_py_exprs(py_expr, unpack_mode=unpack_mode) except Exception: - return None \ No newline at end of file + return None diff --git a/packages/bigframes/bigframes/core/py_expressions.py b/packages/bigframes/bigframes/core/py_expressions.py index 71f25f782ccc..be26255534a9 100644 --- a/packages/bigframes/bigframes/core/py_expressions.py +++ b/packages/bigframes/bigframes/core/py_expressions.py @@ -19,16 +19,16 @@ from types import ModuleType from typing import Callable, Hashable, Mapping, Tuple +import bigframes.operations.python_op_maps as python_op_maps from bigframes import dtypes from bigframes.core import identifiers from bigframes.core.expression import ( - const, Expression, OpExpression, UnboundVariableExpression, + const, ) -from bigframes.operations import generic_ops, numeric_ops, NUMPY_TO_BINOP, NUMPY_TO_OP -import bigframes.operations.python_op_maps as python_op_maps +from bigframes.operations import NUMPY_TO_BINOP, NUMPY_TO_OP, generic_ops, numeric_ops _CALLABLE_TO_OP = { **NUMPY_TO_OP, @@ -138,7 +138,7 @@ def is_resolved(self) -> bool: @property def output_type(self) -> dtypes.ExpressionType: - raise ValueError("Module expresion has not type") + raise ValueError("Module expression does not have a type.") def bind_variables( self, bindings: Mapping[str, Expression], allow_partial_bindings: bool = False @@ -185,7 +185,7 @@ def is_resolved(self) -> bool: @property def output_type(self) -> dtypes.ExpressionType: - raise ValueError("Module expresion has not type") + raise ValueError("PyObject expression does not have a type.") def bind_variables( self, bindings: Mapping[str, Expression], allow_partial_bindings: bool = False @@ -317,7 +317,9 @@ def resolve_attrs(expression: Expression) -> Expression: if isinstance(expression.input, Module): # resolves things like Math.pi return PyObject(getattr(expression.input.module, expression.attr)) - if not unpack_mode and isinstance(expression.input, UnboundVariableExpression): + if not unpack_mode and isinstance( + expression.input, UnboundVariableExpression + ): return UnboundVariableExpression(expression.attr) return expression diff --git a/packages/bigframes/tests/unit/core/test_bytecode.py b/packages/bigframes/tests/unit/core/test_bytecode.py index a368d2a89a96..08903c8b2ba8 100644 --- a/packages/bigframes/tests/unit/core/test_bytecode.py +++ b/packages/bigframes/tests/unit/core/test_bytecode.py @@ -13,18 +13,17 @@ # limitations under the License. import math -import pytest -from bigframes.core.bytecode import dis_to_expr import bigframes.core.expression as ex import bigframes.operations as ops +from bigframes.core.bytecode import dis_to_expr def test_dis_to_expr_simple_arithmetic(): func = lambda row: row.x + 1 expr = dis_to_expr(func, unpack_mode=False) assert expr is not None - + expected = ops.add_op.as_expr(ex.free_var("x"), ex.const(1)) assert expr == expected @@ -33,7 +32,7 @@ def test_dis_to_expr_unpack_mode(): func = lambda col1, col2: col1 * col2 expr = dis_to_expr(func, unpack_mode=True) assert expr is not None - + expected = ops.mul_op.as_expr(ex.free_var("col1"), ex.free_var("col2")) assert expr == expected @@ -42,7 +41,7 @@ def test_dis_to_expr_math_function(): func = lambda row: math.sin(row.x) expr = dis_to_expr(func, unpack_mode=False) assert expr is not None - + expected = ops.numeric_ops.sin_op.as_expr(ex.free_var("x")) assert expr == expected @@ -51,7 +50,7 @@ def test_dis_to_expr_negation(): func = lambda row: -row.x expr = dis_to_expr(func, unpack_mode=False) assert expr is not None - + expected = ops.numeric_ops.neg_op.as_expr(ex.free_var("x")) assert expr == expected @@ -60,7 +59,7 @@ def test_dis_to_expr_comparison(): func = lambda row: row.x == row.y expr = dis_to_expr(func, unpack_mode=False) assert expr is not None - + expected = ops.comparison_ops.eq_op.as_expr(ex.free_var("x"), ex.free_var("y")) assert expr == expected @@ -75,3 +74,16 @@ def func_with_loop(row): expr = dis_to_expr(func_with_loop, unpack_mode=False) assert expr is None + + +global_none_val = None + + +def test_dis_to_expr_global_none(): + # Test resolving a global variable explicitly set to None + func = lambda row: row.x == global_none_val + expr = dis_to_expr(func, unpack_mode=False) + assert expr is not None + + expected = ops.comparison_ops.eq_op.as_expr(ex.free_var("x"), ex.const(None)) + assert expr == expected From 178d4e8aeb107b7bca325da358e27a816aa1d025 Mon Sep 17 00:00:00 2001 From: Trevor Bergeron Date: Tue, 2 Jun 2026 00:12:04 +0000 Subject: [PATCH 3/8] fix noxfile and handle PRECALL --- packages/bigframes/bigframes/core/bytecode.py | 2 +- packages/bigframes/noxfile.py | 2 -- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/packages/bigframes/bigframes/core/bytecode.py b/packages/bigframes/bigframes/core/bytecode.py index ba5071604da2..be008eb44597 100644 --- a/packages/bigframes/bigframes/core/bytecode.py +++ b/packages/bigframes/bigframes/core/bytecode.py @@ -86,7 +86,7 @@ def _compile_bytecode_to_py_expr(func: Callable) -> Optional[expression.Expressi for inst in instructions: opname = inst.opname - if opname == "RESUME": + if opname in ("RESUME", "PRECALL"): continue elif opname == "LOAD_FAST_LOAD_FAST": diff --git a/packages/bigframes/noxfile.py b/packages/bigframes/noxfile.py index c8d08c6787d0..c6189e3b80ed 100644 --- a/packages/bigframes/noxfile.py +++ b/packages/bigframes/noxfile.py @@ -123,9 +123,7 @@ # TODO(tswast): Consider removing this when unit_noextras and cover is run # from GitHub actions. "unit_noextras", - "system-3.10", # No extras. "system-3.12", # No extras. - f"system-{DEFAULT_PYTHON_VERSION}", # All extras. "cover", # TODO(b/401609005): remove "cleanup", From a6dce93b328b20e762ef379d2ee3c1bb02d7e67a Mon Sep 17 00:00:00 2001 From: Trevor Bergeron Date: Tue, 2 Jun 2026 00:34:42 +0000 Subject: [PATCH 4/8] more changes --- packages/bigframes/bigframes/core/bytecode.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/packages/bigframes/bigframes/core/bytecode.py b/packages/bigframes/bigframes/core/bytecode.py index be008eb44597..4803fc9e0fd4 100644 --- a/packages/bigframes/bigframes/core/bytecode.py +++ b/packages/bigframes/bigframes/core/bytecode.py @@ -101,6 +101,10 @@ def _compile_bytecode_to_py_expr(func: Callable) -> Optional[expression.Expressi stack.append(py_exprs.PyObject(inst.argval)) elif opname == "LOAD_GLOBAL": + # In Python 3.11+, the lowest bit of inst.arg indicates that a NULL + # should be pushed before the global variable. + if inst.arg is not None and (inst.arg & 1): + stack.append(NullMarker) name = inst.argval found = False val = None @@ -214,6 +218,11 @@ def _compile_bytecode_to_py_expr(func: Callable) -> Optional[expression.Expressi if len(stack) < num_args: return None args = [stack.pop() for _ in range(num_args)][::-1] + # In Python 3.11, LOAD_GLOBAL with NULL push puts NullMarker below the global. + # If NullMarker is below the callable on the stack, swap them to match + # the expected layout [callable, NullMarker]. + if len(stack) >= 2 and stack[-2] is NullMarker: + stack[-1], stack[-2] = stack[-2], stack[-1] if stack and stack[-1] is NullMarker: stack.pop() elif ( From ceaebf5bd3d7cf8fbeac46f6d78ce7f12513ee4f Mon Sep 17 00:00:00 2001 From: Trevor Bergeron Date: Wed, 3 Jun 2026 22:39:40 +0000 Subject: [PATCH 5/8] expose exceptions --- packages/bigframes/bigframes/core/bytecode.py | 55 ++++++++----------- .../tests/unit/core/test_bytecode.py | 6 +- 2 files changed, 27 insertions(+), 34 deletions(-) diff --git a/packages/bigframes/bigframes/core/bytecode.py b/packages/bigframes/bigframes/core/bytecode.py index 4803fc9e0fd4..d8aaa39e7ca8 100644 --- a/packages/bigframes/bigframes/core/bytecode.py +++ b/packages/bigframes/bigframes/core/bytecode.py @@ -14,8 +14,9 @@ import dis import operator +import sys from types import ModuleType -from typing import Callable, Optional +from typing import Callable import bigframes.core.py_expressions as py_exprs from bigframes.core import expression @@ -62,11 +63,8 @@ class NullMarker: } -def _compile_bytecode_to_py_expr(func: Callable) -> Optional[expression.Expression]: - try: - instructions = list(dis.get_instructions(func)) - except Exception: - return None +def _compile_bytecode_to_py_expr(func: Callable) -> expression.Expression: + instructions = list(dis.get_instructions(func)) stack = [] globals_dict = func.__globals__ @@ -103,7 +101,7 @@ def _compile_bytecode_to_py_expr(func: Callable) -> Optional[expression.Expressi elif opname == "LOAD_GLOBAL": # In Python 3.11+, the lowest bit of inst.arg indicates that a NULL # should be pushed before the global variable. - if inst.arg is not None and (inst.arg & 1): + if sys.version_info >= (3, 11) and inst.arg is not None and (inst.arg & 1): stack.append(NullMarker) name = inst.argval found = False @@ -128,7 +126,7 @@ def _compile_bytecode_to_py_expr(func: Callable) -> Optional[expression.Expressi elif opname in ("LOAD_ATTR", "LOAD_METHOD"): if not stack: - return None + raise ValueError("Stack is empty") target = stack.pop() stack.append(py_exprs.GetAttr(target, inst.argval)) if opname == "LOAD_METHOD": @@ -142,7 +140,7 @@ def _compile_bytecode_to_py_expr(func: Callable) -> Optional[expression.Expressi elif opname == "BINARY_OP": if len(stack) < 2: - return None + raise ValueError("Stack is empty") right = stack.pop() left = stack.pop() op_symbol = inst.argrepr @@ -152,7 +150,7 @@ def _compile_bytecode_to_py_expr(func: Callable) -> Optional[expression.Expressi op_symbol = op_symbol[:-1] if op_symbol not in _BINARY_OP_MAP: - return None + raise ValueError(f"Unsupported binary operator: {op_symbol}") stack.append( py_exprs.Call( py_exprs.PyObject(_BINARY_OP_MAP[op_symbol]), (left, right) @@ -173,12 +171,12 @@ def _compile_bytecode_to_py_expr(func: Callable) -> Optional[expression.Expressi elif opname == "COMPARE_OP": if len(stack) < 2: - return None + raise ValueError("Stack has < 2 elements") right = stack.pop() left = stack.pop() op_symbol = inst.argval if op_symbol not in _COMPARE_OP_MAP: - return None + raise ValueError(f"Unsupported compare operator: {op_symbol}") stack.append( py_exprs.Call( py_exprs.PyObject(_COMPARE_OP_MAP[op_symbol]), (left, right) @@ -187,7 +185,7 @@ def _compile_bytecode_to_py_expr(func: Callable) -> Optional[expression.Expressi elif opname in ("UNARY_NEGATIVE", "UNARY_INVERT"): if not stack: - return None + raise ValueError("Stack is empty") target = stack.pop() stack.append( py_exprs.Call( @@ -200,23 +198,23 @@ def _compile_bytecode_to_py_expr(func: Callable) -> Optional[expression.Expressi elif opname == "UNARY_POSITIVE": if not stack: - return None + raise ValueError("Stack is empty") target = stack.pop() stack.append(py_exprs.Call(py_exprs.PyObject(operator.pos), (target,))) elif opname == "CALL_INTRINSIC_1": if inst.argrepr == "INTRINSIC_UNARY_POSITIVE": if not stack: - return None + raise ValueError("Stack is empty") target = stack.pop() stack.append(py_exprs.Call(py_exprs.PyObject(operator.pos), (target,))) else: - return None + raise ValueError(f"Unsupported intrinsic: {inst.argrepr}") elif opname in ("CALL", "CALL_FUNCTION", "CALL_METHOD"): num_args = inst.arg if len(stack) < num_args: - return None + raise ValueError("Stack has < 2 elements") args = [stack.pop() for _ in range(num_args)][::-1] # In Python 3.11, LOAD_GLOBAL with NULL push puts NullMarker below the global. # If NullMarker is below the callable on the stack, swap them to match @@ -233,13 +231,13 @@ def _compile_bytecode_to_py_expr(func: Callable) -> Optional[expression.Expressi self_arg = stack.pop() args = [self_arg] + args if not stack: - return None + raise ValueError("Stack is empty") callable_expr = stack.pop() stack.append(py_exprs.Call(callable_expr, tuple(args))) elif opname == "RETURN_VALUE": if not stack: - return None + raise ValueError("Stack is empty") return stack[-1] elif opname in ("STORE_FAST", "POP_TOP"): @@ -247,14 +245,12 @@ def _compile_bytecode_to_py_expr(func: Callable) -> Optional[expression.Expressi stack.pop() else: - return None + raise ValueError(f"Unsupported opcode: {opname}") - return None + raise ValueError("No return value found") -def dis_to_expr( - func: Callable, unpack_mode: bool = False -) -> Optional[expression.Expression]: +def dis_to_expr(func: Callable, unpack_mode: bool = False) -> expression.Expression: """ Try to convert a python function to a BigQuery expression. @@ -262,12 +258,7 @@ def dis_to_expr( python argument (e.g. row.col1), or as separate arguments (e.g. col1). This is "best effort" - if the function contains operations that cannot - be converted to BigQuery expressions, it will return None. + be converted to BigQuery expressions, it will raise an Exception. """ - try: - py_expr = _compile_bytecode_to_py_expr(func) - if py_expr is None: - return None - return py_exprs.resolve_py_exprs(py_expr, unpack_mode=unpack_mode) - except Exception: - return None + py_expr = _compile_bytecode_to_py_expr(func) + return py_exprs.resolve_py_exprs(py_expr, unpack_mode=unpack_mode) diff --git a/packages/bigframes/tests/unit/core/test_bytecode.py b/packages/bigframes/tests/unit/core/test_bytecode.py index 08903c8b2ba8..b3036e3a205c 100644 --- a/packages/bigframes/tests/unit/core/test_bytecode.py +++ b/packages/bigframes/tests/unit/core/test_bytecode.py @@ -14,6 +14,8 @@ import math +import pytest + import bigframes.core.expression as ex import bigframes.operations as ops from bigframes.core.bytecode import dis_to_expr @@ -72,8 +74,8 @@ def func_with_loop(row): res += val return res - expr = dis_to_expr(func_with_loop, unpack_mode=False) - assert expr is None + with pytest.raises(ValueError): + dis_to_expr(func_with_loop, unpack_mode=False) global_none_val = None From 2503d096816c2cd2a816488ab0ecf9621a20a4c7 Mon Sep 17 00:00:00 2001 From: Trevor Bergeron Date: Wed, 3 Jun 2026 22:49:51 +0000 Subject: [PATCH 6/8] handle LOAD_FAST_BORROW_LOAD_FAST_BORROW --- packages/bigframes/bigframes/core/bytecode.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/bigframes/bigframes/core/bytecode.py b/packages/bigframes/bigframes/core/bytecode.py index d8aaa39e7ca8..c03eafe79b2e 100644 --- a/packages/bigframes/bigframes/core/bytecode.py +++ b/packages/bigframes/bigframes/core/bytecode.py @@ -87,7 +87,7 @@ def _compile_bytecode_to_py_expr(func: Callable) -> expression.Expression: if opname in ("RESUME", "PRECALL"): continue - elif opname == "LOAD_FAST_LOAD_FAST": + elif opname in ("LOAD_FAST_LOAD_FAST", "LOAD_FAST_BORROW_LOAD_FAST_BORROW"): var1, var2 = inst.argval stack.append(expression.UnboundVariableExpression(var1)) stack.append(expression.UnboundVariableExpression(var2)) From 77420e46c4f10005be949014197daf1f1f7367ae Mon Sep 17 00:00:00 2001 From: Trevor Bergeron Date: Wed, 3 Jun 2026 23:24:49 +0000 Subject: [PATCH 7/8] clean lint --- packages/bigframes/bigframes/core/bytecode.py | 31 +++++++++---------- .../bigframes/core/py_expressions.py | 22 ++++++++----- 2 files changed, 30 insertions(+), 23 deletions(-) diff --git a/packages/bigframes/bigframes/core/bytecode.py b/packages/bigframes/bigframes/core/bytecode.py index c03eafe79b2e..ec7c3ad0538e 100644 --- a/packages/bigframes/bigframes/core/bytecode.py +++ b/packages/bigframes/bigframes/core/bytecode.py @@ -21,11 +21,6 @@ import bigframes.core.py_expressions as py_exprs from bigframes.core import expression - -class NullMarker: - pass - - _BINARY_OP_MAP = { "+": operator.add, "-": operator.sub, @@ -63,10 +58,13 @@ class NullMarker: } +_NULL = py_exprs.PyObject(None) + + def _compile_bytecode_to_py_expr(func: Callable) -> expression.Expression: instructions = list(dis.get_instructions(func)) - stack = [] + stack: list[expression.Expression] = [] globals_dict = func.__globals__ import builtins @@ -102,7 +100,7 @@ def _compile_bytecode_to_py_expr(func: Callable) -> expression.Expression: # In Python 3.11+, the lowest bit of inst.arg indicates that a NULL # should be pushed before the global variable. if sys.version_info >= (3, 11) and inst.arg is not None and (inst.arg & 1): - stack.append(NullMarker) + stack.append(_NULL) name = inst.argval found = False val = None @@ -131,12 +129,12 @@ def _compile_bytecode_to_py_expr(func: Callable) -> expression.Expression: stack.append(py_exprs.GetAttr(target, inst.argval)) if opname == "LOAD_METHOD": if isinstance(target, py_exprs.Module): - stack.append(NullMarker) + stack.append(_NULL) else: stack.append(target) elif opname == "PUSH_NULL": - stack.append(NullMarker) + stack.append(_NULL) elif opname == "BINARY_OP": if len(stack) < 2: @@ -160,7 +158,7 @@ def _compile_bytecode_to_py_expr(func: Callable) -> expression.Expression: # Support older Python versions compatibility elif opname in _OLD_BINARY_OP_MAP: if len(stack) < 2: - return None + raise ValueError("Stack has < 2 elements") right = stack.pop() left = stack.pop() stack.append( @@ -213,19 +211,20 @@ def _compile_bytecode_to_py_expr(func: Callable) -> expression.Expression: elif opname in ("CALL", "CALL_FUNCTION", "CALL_METHOD"): num_args = inst.arg + assert num_args is not None if len(stack) < num_args: raise ValueError("Stack has < 2 elements") args = [stack.pop() for _ in range(num_args)][::-1] - # In Python 3.11, LOAD_GLOBAL with NULL push puts NullMarker below the global. - # If NullMarker is below the callable on the stack, swap them to match - # the expected layout [callable, NullMarker]. - if len(stack) >= 2 and stack[-2] is NullMarker: + # In Python 3.11, LOAD_GLOBAL with NULL push puts NULL below the global. + # If NULL is below the callable on the stack, swap them to match + # the expected layout [callable, NULL]. + if len(stack) >= 2 and stack[-2] == _NULL: stack[-1], stack[-2] = stack[-2], stack[-1] - if stack and stack[-1] is NullMarker: + if stack and stack[-1] == _NULL: stack.pop() elif ( stack - and stack[-1] is not NullMarker + and stack[-1] != _NULL and isinstance(stack[-1], expression.Expression) ): self_arg = stack.pop() diff --git a/packages/bigframes/bigframes/core/py_expressions.py b/packages/bigframes/bigframes/core/py_expressions.py index be26255534a9..023acaef247d 100644 --- a/packages/bigframes/bigframes/core/py_expressions.py +++ b/packages/bigframes/bigframes/core/py_expressions.py @@ -53,7 +53,7 @@ def column_references( return self.input.column_references @property - def free_variables(self) -> set[str]: + def free_variables(self) -> set[Hashable]: return self.input.free_variables @property @@ -92,7 +92,9 @@ def transform_children(self, t: Callable[[Expression], Expression]) -> Expressio return self def bind_variables( - self, bindings: Mapping[str, Expression], allow_partial_bindings: bool = False + self, + bindings: Mapping[Hashable, Expression], + allow_partial_bindings: bool = False, ) -> GetAttr: return GetAttr( self.input.bind_variables( @@ -141,7 +143,9 @@ def output_type(self) -> dtypes.ExpressionType: raise ValueError("Module expression does not have a type.") def bind_variables( - self, bindings: Mapping[str, Expression], allow_partial_bindings: bool = False + self, + bindings: Mapping[Hashable, Expression], + allow_partial_bindings: bool = False, ) -> Expression: return self @@ -188,7 +192,9 @@ def output_type(self) -> dtypes.ExpressionType: raise ValueError("PyObject expression does not have a type.") def bind_variables( - self, bindings: Mapping[str, Expression], allow_partial_bindings: bool = False + self, + bindings: Mapping[Hashable, Expression], + allow_partial_bindings: bool = False, ) -> Expression: return self @@ -227,7 +233,7 @@ def column_references( ) @property - def free_variables(self) -> set[str]: + def free_variables(self) -> set[Hashable]: return set( itertools.chain.from_iterable( map(lambda x: x.free_variables, self.children) @@ -271,7 +277,9 @@ def transform_children(self, t: Callable[[Expression], Expression]) -> Expressio ) def bind_variables( - self, bindings: Mapping[str, Expression], allow_partial_bindings: bool = False + self, + bindings: Mapping[Hashable, Expression], + allow_partial_bindings: bool = False, ) -> Call: return Call( callable=self.callable.bind_variables( @@ -348,7 +356,7 @@ def resolve_call(call: Call) -> Expression: return OpExpression(op, call.inputs) elif isinstance(callable, PyObject): if callable.value in python_op_maps.PYTHON_TO_BIGFRAMES: - op = python_op_maps.PYTHON_TO_BIGFRAMES[callable.value] + op = python_op_maps.PYTHON_TO_BIGFRAMES[callable.value] # type: ignore return OpExpression(op, call.inputs) if callable.value in _BUILTIN_CALLABLES: return OpExpression(_BUILTIN_CALLABLES[callable.value], call.inputs) From 97dbb4fef18166f6a7930033b425a98ce7a1d283 Mon Sep 17 00:00:00 2001 From: Trevor Bergeron Date: Wed, 3 Jun 2026 23:49:19 +0000 Subject: [PATCH 8/8] fix free_variables on py_expressions --- packages/bigframes/bigframes/core/py_expressions.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/packages/bigframes/bigframes/core/py_expressions.py b/packages/bigframes/bigframes/core/py_expressions.py index 023acaef247d..a8d19c3d037c 100644 --- a/packages/bigframes/bigframes/core/py_expressions.py +++ b/packages/bigframes/bigframes/core/py_expressions.py @@ -53,7 +53,7 @@ def column_references( return self.input.column_references @property - def free_variables(self) -> set[Hashable]: + def free_variables(self) -> tuple[Hashable, ...]: return self.input.free_variables @property @@ -233,8 +233,8 @@ def column_references( ) @property - def free_variables(self) -> set[Hashable]: - return set( + def free_variables(self) -> tuple[Hashable, ...]: + return tuple( itertools.chain.from_iterable( map(lambda x: x.free_variables, self.children) )