From b5e8a06a5f22bf5abab4641bd6cc3939720f3a16 Mon Sep 17 00:00:00 2001 From: "google-labs-jules[bot]" <161369871+google-labs-jules[bot]@users.noreply.github.com> Date: Thu, 9 Apr 2026 23:13:59 +0000 Subject: [PATCH 1/5] feat(bigframes): Support Expression objects in create_model options This change allows the `options` parameter of `bigframes.bigquery._operations.ml.create_model` to accept BigFrames `Expression` objects. These expressions are compiled to SQL scalar expressions and included in the generated `CREATE MODEL` DDL statement. - Added `bigframes.core.expression.Expression` type support in the `options` dict. - Updated `create_model_ddl` to handle compiling expressions using `expression_compiler`. - Added `test_create_model_expression_option` snapshot test to verify the generated "golden SQL". Co-authored-by: tswast <247555+tswast@users.noreply.github.com> --- .../bigframes/bigframes/bigquery/_operations/ml.py | 7 +++++-- packages/bigframes/bigframes/core/sql/ml.py | 11 +++++++++-- .../create_model_expression_option.sql | 3 +++ packages/bigframes/tests/unit/core/sql/test_ml.py | 14 ++++++++++++++ 4 files changed, 31 insertions(+), 4 deletions(-) create mode 100644 packages/bigframes/tests/unit/core/sql/snapshots/test_ml/test_create_model_expression_option/create_model_expression_option.sql diff --git a/packages/bigframes/bigframes/bigquery/_operations/ml.py b/packages/bigframes/bigframes/bigquery/_operations/ml.py index 04d88c9b6a68..add2dac05fe6 100644 --- a/packages/bigframes/bigframes/bigquery/_operations/ml.py +++ b/packages/bigframes/bigframes/bigquery/_operations/ml.py @@ -25,6 +25,7 @@ import bigframes.dataframe as dataframe import bigframes.ml.base import bigframes.session +import bigframes.core.expression as ex from bigframes.bigquery._operations import utils @@ -50,7 +51,9 @@ def create_model( input_schema: Optional[Mapping[str, str]] = None, output_schema: Optional[Mapping[str, str]] = None, connection_name: Optional[str] = None, - options: Optional[Mapping[str, Union[str, int, float, bool, list]]] = None, + options: Optional[ + Mapping[str, Union[str, int, float, bool, list, "ex.Expression"]] + ] = None, training_data: Optional[Union[pd.DataFrame, dataframe.DataFrame, str]] = None, custom_holiday: Optional[Union[pd.DataFrame, dataframe.DataFrame, str]] = None, session: Optional[bigframes.session.Session] = None, @@ -78,7 +81,7 @@ def create_model( The OUTPUT clause, which specifies the schema of the output data. connection_name (str, optional): The connection to use for the model. - options (Mapping[str, Union[str, int, float, bool, list]], optional): + options (Mapping[str, Union[str, int, float, bool, list, bigframes.core.expression.Expression]], optional): The OPTIONS clause, which specifies the model options. training_data (Union[bigframes.pandas.DataFrame, str], optional): The query or DataFrame to use for training the model. diff --git a/packages/bigframes/bigframes/core/sql/ml.py b/packages/bigframes/bigframes/core/sql/ml.py index 9694e19ad30e..4803bcd15049 100644 --- a/packages/bigframes/bigframes/core/sql/ml.py +++ b/packages/bigframes/bigframes/core/sql/ml.py @@ -16,7 +16,9 @@ from typing import Any, Dict, List, Mapping, Optional, Union +import bigframes.core.expression as ex from bigframes.core.compile.sqlglot import sql as sg_sql +from bigframes.core.compile.sqlglot.expression_compiler import expression_compiler def create_model_ddl( @@ -28,7 +30,9 @@ def create_model_ddl( input_schema: Optional[Mapping[str, str]] = None, output_schema: Optional[Mapping[str, str]] = None, connection_name: Optional[str] = None, - options: Optional[Mapping[str, Union[str, int, float, bool, list]]] = None, + options: Optional[ + Mapping[str, Union[str, int, float, bool, list, "ex.Expression"]] + ] = None, training_data: Optional[str] = None, custom_holiday: Optional[str] = None, ) -> str: @@ -70,7 +74,10 @@ def create_model_ddl( if options: rendered_options = [] for option_name, option_value in options.items(): - if isinstance(option_value, (list, tuple)): + if isinstance(option_value, ex.Expression): + sg_expr = expression_compiler.compile_expression(option_value) + rendered_val = sg_sql.to_sql(sg_expr) + elif isinstance(option_value, (list, tuple)): # Handle list options like model_registry="vertex_ai" # wait, usually options are key=value. # if value is list, it is [val1, val2] diff --git a/packages/bigframes/tests/unit/core/sql/snapshots/test_ml/test_create_model_expression_option/create_model_expression_option.sql b/packages/bigframes/tests/unit/core/sql/snapshots/test_ml/test_create_model_expression_option/create_model_expression_option.sql new file mode 100644 index 000000000000..eabfb8a5e962 --- /dev/null +++ b/packages/bigframes/tests/unit/core/sql/snapshots/test_ml/test_create_model_expression_option/create_model_expression_option.sql @@ -0,0 +1,3 @@ +CREATE MODEL `my_model` +OPTIONS(l2_reg = 0.1, booster_type = 'gbtree') +AS SELECT * FROM t diff --git a/packages/bigframes/tests/unit/core/sql/test_ml.py b/packages/bigframes/tests/unit/core/sql/test_ml.py index bb3b61a949cf..718e4b81ca3b 100644 --- a/packages/bigframes/tests/unit/core/sql/test_ml.py +++ b/packages/bigframes/tests/unit/core/sql/test_ml.py @@ -97,6 +97,20 @@ def test_create_model_list_option(snapshot): snapshot.assert_match(sql, "create_model_list_option.sql") +def test_create_model_expression_option(snapshot): + import bigframes.core.expression as ex + + sql = bigframes.core.sql.ml.create_model_ddl( + model_name="my_model", + options={ + "l2_reg": ex.ScalarConstantExpression(0.1, None), + "booster_type": "gbtree", + }, + training_data="SELECT * FROM t", + ) + snapshot.assert_match(sql, "create_model_expression_option.sql") + + def test_evaluate_model_basic(snapshot): sql = bigframes.core.sql.ml.evaluate( model_name="my_project.my_dataset.my_model", From 7ea8bf595ea68fbbfc578f38eb20856cdec6b48c Mon Sep 17 00:00:00 2001 From: "google-labs-jules[bot]" <161369871+google-labs-jules[bot]@users.noreply.github.com> Date: Fri, 10 Apr 2026 03:44:10 +0000 Subject: [PATCH 2/5] feat(bigframes): Support Expression objects in create_model options This change allows the `options` parameter of `bigframes.bigquery._operations.ml.create_model` to accept BigFrames `Expression` objects. These expressions are compiled to SQL scalar expressions and included in the generated `CREATE MODEL` DDL statement. - Added `bigframes.core.expression.Expression` type support in the `options` dict. - Updated `create_model_ddl` to handle compiling expressions using `expression_compiler`. - Added `test_create_model_expression_option` snapshot test to verify the generated "golden SQL", using an expression that calls a function on a literal value (e.g. 0.1 * 10). Co-authored-by: tswast <247555+tswast@users.noreply.github.com> --- .../create_model_expression_option.sql | 2 +- packages/bigframes/tests/unit/core/sql/test_ml.py | 10 +++++++++- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/packages/bigframes/tests/unit/core/sql/snapshots/test_ml/test_create_model_expression_option/create_model_expression_option.sql b/packages/bigframes/tests/unit/core/sql/snapshots/test_ml/test_create_model_expression_option/create_model_expression_option.sql index eabfb8a5e962..e328aab5fbfd 100644 --- a/packages/bigframes/tests/unit/core/sql/snapshots/test_ml/test_create_model_expression_option/create_model_expression_option.sql +++ b/packages/bigframes/tests/unit/core/sql/snapshots/test_ml/test_create_model_expression_option/create_model_expression_option.sql @@ -1,3 +1,3 @@ CREATE MODEL `my_model` -OPTIONS(l2_reg = 0.1, booster_type = 'gbtree') +OPTIONS(l2_reg = 0.1 * 10, booster_type = 'gbtree') AS SELECT * FROM t diff --git a/packages/bigframes/tests/unit/core/sql/test_ml.py b/packages/bigframes/tests/unit/core/sql/test_ml.py index 718e4b81ca3b..d46170b771c6 100644 --- a/packages/bigframes/tests/unit/core/sql/test_ml.py +++ b/packages/bigframes/tests/unit/core/sql/test_ml.py @@ -99,11 +99,19 @@ def test_create_model_list_option(snapshot): def test_create_model_expression_option(snapshot): import bigframes.core.expression as ex + import bigframes.operations.numeric_ops as numeric_ops + import bigframes.dtypes as dtypes + + # An expression that calls a function on a literal value + # e.g. 0.1 * 10 + literal_expr = ex.ScalarConstantExpression(0.1, dtypes.FLOAT_DTYPE) + multiplier_expr = ex.ScalarConstantExpression(10, dtypes.INT_DTYPE) + math_expr = ex.OpExpression(op=numeric_ops.mul_op, inputs=(literal_expr, multiplier_expr)) sql = bigframes.core.sql.ml.create_model_ddl( model_name="my_model", options={ - "l2_reg": ex.ScalarConstantExpression(0.1, None), + "l2_reg": math_expr, "booster_type": "gbtree", }, training_data="SELECT * FROM t", From f92c089cdca3dc212c7d3d42afb7f716a3acf5cb Mon Sep 17 00:00:00 2001 From: "google-labs-jules[bot]" <161369871+google-labs-jules[bot]@users.noreply.github.com> Date: Fri, 10 Apr 2026 04:05:56 +0000 Subject: [PATCH 3/5] feat(bigframes): Support Expression objects in create_model options This change allows the `options` parameter of `bigframes.bigquery._operations.ml.create_model` to accept BigFrames `Expression` objects. These expressions are compiled to SQL scalar expressions and included in the generated `CREATE MODEL` DDL statement. - Added `bigframes.core.expression.Expression` type support in the `options` dict. - Updated `create_model_ddl` to handle compiling expressions using `expression_compiler`. - Added `test_create_model_expression_option` snapshot test to verify the generated "golden SQL", using an expression that calls a function on a literal value (e.g. 0.1 * 10). Co-authored-by: tswast <247555+tswast@users.noreply.github.com> --- packages/bigframes/tests/unit/core/sql/test_ml.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/packages/bigframes/tests/unit/core/sql/test_ml.py b/packages/bigframes/tests/unit/core/sql/test_ml.py index d46170b771c6..fe7630224ddb 100644 --- a/packages/bigframes/tests/unit/core/sql/test_ml.py +++ b/packages/bigframes/tests/unit/core/sql/test_ml.py @@ -106,7 +106,9 @@ def test_create_model_expression_option(snapshot): # e.g. 0.1 * 10 literal_expr = ex.ScalarConstantExpression(0.1, dtypes.FLOAT_DTYPE) multiplier_expr = ex.ScalarConstantExpression(10, dtypes.INT_DTYPE) - math_expr = ex.OpExpression(op=numeric_ops.mul_op, inputs=(literal_expr, multiplier_expr)) + math_expr = ex.OpExpression( + op=numeric_ops.mul_op, inputs=(literal_expr, multiplier_expr) + ) sql = bigframes.core.sql.ml.create_model_ddl( model_name="my_model", From 5d874d0f7b875b881a4bb08f6edd471af0112295 Mon Sep 17 00:00:00 2001 From: "google-labs-jules[bot]" <161369871+google-labs-jules[bot]@users.noreply.github.com> Date: Fri, 10 Apr 2026 04:43:41 +0000 Subject: [PATCH 4/5] feat(bigframes): Support Expression objects in create_model options This change allows the `options` parameter of `bigframes.bigquery._operations.ml.create_model` to accept BigFrames `Expression` objects. These expressions are compiled to SQL scalar expressions and included in the generated `CREATE MODEL` DDL statement. - Added `bigframes.core.expression.Expression` type support in the `options` dict. - Updated `create_model_ddl` to handle compiling expressions using `expression_compiler`. - Added `test_create_model_expression_option` snapshot test to verify the generated "golden SQL", using an expression that calls a function on a literal value (e.g. 0.1 * 10). - Moved test imports to the top level to adhere to PEP 8. Co-authored-by: tswast <247555+tswast@users.noreply.github.com> --- packages/bigframes/tests/unit/core/sql/test_ml.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/packages/bigframes/tests/unit/core/sql/test_ml.py b/packages/bigframes/tests/unit/core/sql/test_ml.py index fe7630224ddb..e7d9f628cc2e 100644 --- a/packages/bigframes/tests/unit/core/sql/test_ml.py +++ b/packages/bigframes/tests/unit/core/sql/test_ml.py @@ -14,7 +14,10 @@ import pytest +import bigframes.core.expression as ex import bigframes.core.sql.ml +import bigframes.dtypes as dtypes +import bigframes.operations.numeric_ops as numeric_ops pytest.importorskip("pytest_snapshot") @@ -98,10 +101,6 @@ def test_create_model_list_option(snapshot): def test_create_model_expression_option(snapshot): - import bigframes.core.expression as ex - import bigframes.operations.numeric_ops as numeric_ops - import bigframes.dtypes as dtypes - # An expression that calls a function on a literal value # e.g. 0.1 * 10 literal_expr = ex.ScalarConstantExpression(0.1, dtypes.FLOAT_DTYPE) From 58aa74e9d4ada8798850a245b4d2292c47bb223e Mon Sep 17 00:00:00 2001 From: "google-labs-jules[bot]" <161369871+google-labs-jules[bot]@users.noreply.github.com> Date: Fri, 10 Apr 2026 21:10:35 +0000 Subject: [PATCH 5/5] feat(bigframes): Support Expression objects in create_model options This change allows the `options` parameter of `bigframes.bigquery._operations.ml.create_model` to accept BigFrames `col.Expression` objects. These expressions are compiled to SQL scalar expressions and included in the generated `CREATE MODEL` DDL statement. - Added `bigframes.core.col.Expression` type support in the `options` dict. - Updated `create_model_ddl` to handle compiling expressions using `expression_compiler`. - Added `test_create_model_expression_option` snapshot test to verify the generated "golden SQL", using an expression that calls a function on a literal value (e.g. 0.1 * 10). - Moved test imports to the top level to adhere to PEP 8 and ran `ruff format`. Co-authored-by: tswast <247555+tswast@users.noreply.github.com> --- packages/bigframes/bigframes/bigquery/_operations/ml.py | 6 +++--- packages/bigframes/bigframes/core/sql/ml.py | 8 ++++---- packages/bigframes/tests/unit/core/sql/test_ml.py | 5 +++-- 3 files changed, 10 insertions(+), 9 deletions(-) diff --git a/packages/bigframes/bigframes/bigquery/_operations/ml.py b/packages/bigframes/bigframes/bigquery/_operations/ml.py index add2dac05fe6..412b49b888f5 100644 --- a/packages/bigframes/bigframes/bigquery/_operations/ml.py +++ b/packages/bigframes/bigframes/bigquery/_operations/ml.py @@ -25,7 +25,7 @@ import bigframes.dataframe as dataframe import bigframes.ml.base import bigframes.session -import bigframes.core.expression as ex +import bigframes.core.col as col from bigframes.bigquery._operations import utils @@ -52,7 +52,7 @@ def create_model( output_schema: Optional[Mapping[str, str]] = None, connection_name: Optional[str] = None, options: Optional[ - Mapping[str, Union[str, int, float, bool, list, "ex.Expression"]] + Mapping[str, Union[str, int, float, bool, list, "col.Expression"]] ] = None, training_data: Optional[Union[pd.DataFrame, dataframe.DataFrame, str]] = None, custom_holiday: Optional[Union[pd.DataFrame, dataframe.DataFrame, str]] = None, @@ -81,7 +81,7 @@ def create_model( The OUTPUT clause, which specifies the schema of the output data. connection_name (str, optional): The connection to use for the model. - options (Mapping[str, Union[str, int, float, bool, list, bigframes.core.expression.Expression]], optional): + options (Mapping[str, Union[str, int, float, bool, list, bigframes.core.col.Expression]], optional): The OPTIONS clause, which specifies the model options. training_data (Union[bigframes.pandas.DataFrame, str], optional): The query or DataFrame to use for training the model. diff --git a/packages/bigframes/bigframes/core/sql/ml.py b/packages/bigframes/bigframes/core/sql/ml.py index 4803bcd15049..8d971e6c3e8d 100644 --- a/packages/bigframes/bigframes/core/sql/ml.py +++ b/packages/bigframes/bigframes/core/sql/ml.py @@ -16,7 +16,7 @@ from typing import Any, Dict, List, Mapping, Optional, Union -import bigframes.core.expression as ex +import bigframes.core.col as col from bigframes.core.compile.sqlglot import sql as sg_sql from bigframes.core.compile.sqlglot.expression_compiler import expression_compiler @@ -31,7 +31,7 @@ def create_model_ddl( output_schema: Optional[Mapping[str, str]] = None, connection_name: Optional[str] = None, options: Optional[ - Mapping[str, Union[str, int, float, bool, list, "ex.Expression"]] + Mapping[str, Union[str, int, float, bool, list, "col.Expression"]] ] = None, training_data: Optional[str] = None, custom_holiday: Optional[str] = None, @@ -74,8 +74,8 @@ def create_model_ddl( if options: rendered_options = [] for option_name, option_value in options.items(): - if isinstance(option_value, ex.Expression): - sg_expr = expression_compiler.compile_expression(option_value) + if isinstance(option_value, col.Expression): + sg_expr = expression_compiler.compile_expression(option_value._value) rendered_val = sg_sql.to_sql(sg_expr) elif isinstance(option_value, (list, tuple)): # Handle list options like model_registry="vertex_ai" diff --git a/packages/bigframes/tests/unit/core/sql/test_ml.py b/packages/bigframes/tests/unit/core/sql/test_ml.py index e7d9f628cc2e..61296638eec2 100644 --- a/packages/bigframes/tests/unit/core/sql/test_ml.py +++ b/packages/bigframes/tests/unit/core/sql/test_ml.py @@ -14,6 +14,7 @@ import pytest +import bigframes.core.col as col import bigframes.core.expression as ex import bigframes.core.sql.ml import bigframes.dtypes as dtypes @@ -105,8 +106,8 @@ def test_create_model_expression_option(snapshot): # e.g. 0.1 * 10 literal_expr = ex.ScalarConstantExpression(0.1, dtypes.FLOAT_DTYPE) multiplier_expr = ex.ScalarConstantExpression(10, dtypes.INT_DTYPE) - math_expr = ex.OpExpression( - op=numeric_ops.mul_op, inputs=(literal_expr, multiplier_expr) + math_expr = col.Expression( + ex.OpExpression(op=numeric_ops.mul_op, inputs=(literal_expr, multiplier_expr)) ) sql = bigframes.core.sql.ml.create_model_ddl(