From b1ae999155d18a5a8f7cd9b334850c6995724571 Mon Sep 17 00:00:00 2001 From: Derrick Williams Date: Mon, 11 May 2026 19:21:44 +0000 Subject: [PATCH 01/12] huggingface model handler for yaml --- .../beam_PostCommit_Yaml_Xlang_Direct.json | 2 +- .../beam_PostCommit_Yaml_Xlang_Direct.yml | 2 +- .../beam_PreCommit_Yaml_Xlang_Direct.yml | 2 +- ...erence.yaml => runinference_vertexai.yaml} | 0 sdks/python/apache_beam/yaml/yaml_ml.py | 49 +++++++++++++++++++ sdks/python/build.gradle | 28 ++++++++++- 6 files changed, 78 insertions(+), 5 deletions(-) rename sdks/python/apache_beam/yaml/tests/{runinference.yaml => runinference_vertexai.yaml} (100%) diff --git a/.github/trigger_files/beam_PostCommit_Yaml_Xlang_Direct.json b/.github/trigger_files/beam_PostCommit_Yaml_Xlang_Direct.json index 541dc4ea8e87..8ed972c9f579 100644 --- a/.github/trigger_files/beam_PostCommit_Yaml_Xlang_Direct.json +++ b/.github/trigger_files/beam_PostCommit_Yaml_Xlang_Direct.json @@ -1,4 +1,4 @@ { "comment": "Modify this file in a trivial way to cause this test suite to run", - "revision": 2 + "revision": 3 } diff --git a/.github/workflows/beam_PostCommit_Yaml_Xlang_Direct.yml b/.github/workflows/beam_PostCommit_Yaml_Xlang_Direct.yml index ea1c255f7cc9..afa437b64de1 100644 --- a/.github/workflows/beam_PostCommit_Yaml_Xlang_Direct.yml +++ b/.github/workflows/beam_PostCommit_Yaml_Xlang_Direct.yml @@ -80,7 +80,7 @@ jobs: - name: run PostCommit Yaml Xlang Direct script uses: ./.github/actions/gradle-command-self-hosted-action with: - gradle-command: :sdks:python:postCommitYamlIntegrationTests -PyamlTestSet=${{ matrix.test_set }} -PbeamPythonExtra=ml_test,yaml + gradle-command: :sdks:python:postCommitYamlIntegrationTests -PyamlTestSet=${{ matrix.test_set }} - name: Archive Python Test Results uses: actions/upload-artifact@v7 if: failure() diff --git a/.github/workflows/beam_PreCommit_Yaml_Xlang_Direct.yml b/.github/workflows/beam_PreCommit_Yaml_Xlang_Direct.yml index 34a1af741f74..bde75568101b 100644 --- a/.github/workflows/beam_PreCommit_Yaml_Xlang_Direct.yml +++ b/.github/workflows/beam_PreCommit_Yaml_Xlang_Direct.yml @@ -91,7 +91,7 @@ jobs: - name: run PreCommit Yaml Xlang Direct script uses: ./.github/actions/gradle-command-self-hosted-action with: - gradle-command: :sdks:python:yamlIntegrationTests -PbeamPythonExtra=ml_test,yaml + gradle-command: :sdks:python:yamlIntegrationTests - name: Archive Python Test Results uses: actions/upload-artifact@v7 if: failure() diff --git a/sdks/python/apache_beam/yaml/tests/runinference.yaml b/sdks/python/apache_beam/yaml/tests/runinference_vertexai.yaml similarity index 100% rename from sdks/python/apache_beam/yaml/tests/runinference.yaml rename to sdks/python/apache_beam/yaml/tests/runinference_vertexai.yaml diff --git a/sdks/python/apache_beam/yaml/yaml_ml.py b/sdks/python/apache_beam/yaml/yaml_ml.py index 51f18c733046..05cbed3bd456 100644 --- a/sdks/python/apache_beam/yaml/yaml_ml.py +++ b/sdks/python/apache_beam/yaml/yaml_ml.py @@ -282,6 +282,55 @@ def inference_output_type(self): ('model_id', Optional[str])]) +@ModelHandlerProvider.register_handler_type('HuggingFacePipeline') +class HuggingFacePipelineProvider(ModelHandlerProvider): + def __init__( + self, + task: Optional[str] = None, + model: Optional[str] = None, + preprocess: Optional[dict[str, str]] = None, + postprocess: Optional[dict[str, str]] = None, + device: Optional[Any] = None, + inference_fn: Optional[dict[str, str]] = None, + load_pipeline_args: Optional[dict[str, Any]] = None, + **kwargs): + try: + from apache_beam.ml.inference.huggingface_inference import HuggingFacePipelineModelHandler + except ImportError: + raise ValueError( + 'Unable to import HuggingFacePipelineModelHandler. Please ' + 'install transformers dependencies.') + + kwargs = {k: v for k, v in kwargs.items() if not k.startswith('_')} + + inference_fn_obj = self.parse_processing_transform( + inference_fn, 'inference_fn') if inference_fn else None + + handler_kwargs = {} + if inference_fn_obj: + handler_kwargs['inference_fn'] = inference_fn_obj + + _handler = HuggingFacePipelineModelHandler( + task=task, + model=model, + device=device, + load_pipeline_args=load_pipeline_args, + **handler_kwargs, + **kwargs) + + super().__init__(_handler, preprocess, postprocess) + + @staticmethod + def validate(config): + if not config.get('task') and not config.get('model'): + raise ValueError( + "HuggingFacePipeline requires either 'task' or " + "'model' to be specified.") + + def inference_output_type(self): + return Any + + @beam.ptransform.ptransform_fn def run_inference( pcoll, diff --git a/sdks/python/build.gradle b/sdks/python/build.gradle index 5f09dff57e8f..8450e32d593f 100644 --- a/sdks/python/build.gradle +++ b/sdks/python/build.gradle @@ -124,10 +124,34 @@ tasks.register("generateYamlDocs") { outputs.file "${buildDir}/yaml-examples.html" } +tasks.register("installYamlIntegrationTestDeps") { + dependsOn installGcpTest + doLast { + exec { + executable 'sh' + args '-c', ". ${envdir}/bin/activate && " + + "py_ver=\$(python -c 'import sys; print(f\"{sys.version_info.major}{sys.version_info.minor}\")') && " + + "constraint_file=\"\" && " + + "if [ -f \"container/ml/py\${py_ver}/base_image_requirements.txt\" ]; then " + + " constraint_file=\"container/ml/py\${py_ver}/base_image_requirements.txt\"; " + + "elif [ -f \"container/py\${py_ver}/base_image_requirements.txt\" ]; then " + + " constraint_file=\"container/py\${py_ver}/base_image_requirements.txt\"; " + + "fi && " + + "if [ -n \"\$constraint_file\" ]; then " + + " echo \"Installing with constraint file: \$constraint_file\" && " + + " pip install --pre --retries 10 -c \"\$constraint_file\" \${buildDir}/apache-beam.tar.gz[ml_test,yaml,transformers]; " + + "else " + + " echo \"No constraint file found, installing without constraints\" && " + + " pip install --pre --retries 10 \${buildDir}/apache-beam.tar.gz[ml_test,yaml,transformers]; " + + "fi" + } + } +} + tasks.register("yamlIntegrationTests") { description "Runs precommit integration tests for yaml pipelines." - dependsOn installGcpTest + dependsOn installYamlIntegrationTestDeps // Need to build all expansion services referenced in apache_beam/yaml/*.* // grep -oh 'sdk.*Jar' sdks/python/apache_beam/yaml/*.yaml | sort | uniq dependsOn ":sdks:java:extensions:schemaio-expansion-service:shadowJar" @@ -146,7 +170,7 @@ tasks.register("yamlIntegrationTests") { tasks.register("postCommitYamlIntegrationTests") { description "Runs postcommit integration tests for yaml pipelines - parameterized by yamlTestSet." - dependsOn installGcpTest + dependsOn installYamlIntegrationTestDeps // Need to build all expansion services referenced in apache_beam/yaml/*.* // grep -oh 'sdk.*Jar' sdks/python/apache_beam/yaml/*.yaml | sort | uniq dependsOn ":sdks:java:extensions:schemaio-expansion-service:shadowJar" From 78137c307ad558ae44965d80374daee6612b2227 Mon Sep 17 00:00:00 2001 From: Derrick Williams Date: Mon, 11 May 2026 19:34:59 +0000 Subject: [PATCH 02/12] add yaml huggingface test file --- .../yaml/tests/runinference_huggingface.yaml | 62 +++++++++++++++++++ 1 file changed, 62 insertions(+) create mode 100644 sdks/python/apache_beam/yaml/tests/runinference_huggingface.yaml diff --git a/sdks/python/apache_beam/yaml/tests/runinference_huggingface.yaml b/sdks/python/apache_beam/yaml/tests/runinference_huggingface.yaml new file mode 100644 index 000000000000..8728a6f544ad --- /dev/null +++ b/sdks/python/apache_beam/yaml/tests/runinference_huggingface.yaml @@ -0,0 +1,62 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +pipelines: + - pipeline: + type: chain + transforms: + - type: Create + config: + elements: + - text: "I love Apache Beam!" + - text: "I hate this error." + - type: RunInference + config: + model_handler: + type: "HuggingFacePipeline" + config: + task: "text-classification" + inference_fn: + callable: | + def real_inference(batch, pipeline, inference_args): + predictions = pipeline(batch, **inference_args) + + # If it's a single dictionary (batch size of 1), wrap it in a list + if isinstance(predictions, dict): + predictions = [predictions] + + return { + 'label': [p['label'] for p in predictions], + 'score': [p['score'] for p in predictions] + } + preprocess: + callable: 'lambda x: x.text' + - type: MapToFields + config: + language: python + fields: + text: text + sentiment: + callable: 'lambda x: x.inference.inference["label"]' + - type: AssertEqual + config: + elements: + - text: "I love Apache Beam!" + sentiment: "POSITIVE" + - text: "I hate this error." + sentiment: "NEGATIVE" + + options: + yaml_experimental_features: ['ML'] From 33a05a37b0644dbc2e079eaaab536bcfd31ab822 Mon Sep 17 00:00:00 2001 From: Derrick Williams Date: Tue, 12 May 2026 02:45:51 +0000 Subject: [PATCH 03/12] update dependency logic --- sdks/python/build.gradle | 17 ++++------------- 1 file changed, 4 insertions(+), 13 deletions(-) diff --git a/sdks/python/build.gradle b/sdks/python/build.gradle index 8450e32d593f..837631868b8a 100644 --- a/sdks/python/build.gradle +++ b/sdks/python/build.gradle @@ -131,19 +131,10 @@ tasks.register("installYamlIntegrationTestDeps") { executable 'sh' args '-c', ". ${envdir}/bin/activate && " + "py_ver=\$(python -c 'import sys; print(f\"{sys.version_info.major}{sys.version_info.minor}\")') && " + - "constraint_file=\"\" && " + - "if [ -f \"container/ml/py\${py_ver}/base_image_requirements.txt\" ]; then " + - " constraint_file=\"container/ml/py\${py_ver}/base_image_requirements.txt\"; " + - "elif [ -f \"container/py\${py_ver}/base_image_requirements.txt\" ]; then " + - " constraint_file=\"container/py\${py_ver}/base_image_requirements.txt\"; " + - "fi && " + - "if [ -n \"\$constraint_file\" ]; then " + - " echo \"Installing with constraint file: \$constraint_file\" && " + - " pip install --pre --retries 10 -c \"\$constraint_file\" \${buildDir}/apache-beam.tar.gz[ml_test,yaml,transformers]; " + - "else " + - " echo \"No constraint file found, installing without constraints\" && " + - " pip install --pre --retries 10 \${buildDir}/apache-beam.tar.gz[ml_test,yaml,transformers]; " + - "fi" + "ml_extra=\"ml_test\" && " + + "if [ \"\$py_ver\" -ge 313 ]; then ml_extra=\"p\${py_ver}_ml_test\"; fi && " + + "echo \"Installing dependencies...\" && " + + "pip install --pre --retries 10 ${buildDir}/apache-beam.tar.gz[\$ml_extra,yaml,transformers]" } } } From 204261160bb8cb24742070886e34a4107fa0a381 Mon Sep 17 00:00:00 2001 From: Derrick Williams Date: Sun, 24 May 2026 18:20:12 +0000 Subject: [PATCH 04/12] Fix installYamlIntegrationTestDeps to use constraint files and prevent pip backtracking hangs --- sdks/python/build.gradle | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/sdks/python/build.gradle b/sdks/python/build.gradle index 837631868b8a..0ae4aaa4148e 100644 --- a/sdks/python/build.gradle +++ b/sdks/python/build.gradle @@ -133,8 +133,19 @@ tasks.register("installYamlIntegrationTestDeps") { "py_ver=\$(python -c 'import sys; print(f\"{sys.version_info.major}{sys.version_info.minor}\")') && " + "ml_extra=\"ml_test\" && " + "if [ \"\$py_ver\" -ge 313 ]; then ml_extra=\"p\${py_ver}_ml_test\"; fi && " + - "echo \"Installing dependencies...\" && " + - "pip install --pre --retries 10 ${buildDir}/apache-beam.tar.gz[\$ml_extra,yaml,transformers]" + "constraint_file=\"\" && " + + "if [ -f \"container/ml/py\${py_ver}/base_image_requirements.txt\" ]; then " + + " constraint_file=\"container/ml/py\${py_ver}/base_image_requirements.txt\"; " + + "elif [ -f \"container/py\${py_ver}/base_image_requirements.txt\" ]; then " + + " constraint_file=\"container/py\${py_ver}/base_image_requirements.txt\"; " + + "fi && " + + "if [ -n \"\$constraint_file\" ]; then " + + " echo \"Installing with constraint file: \$constraint_file\" && " + + " pip install --pre --retries 10 -c \"\$constraint_file\" \${buildDir}/apache-beam.tar.gz[\$ml_extra,yaml,transformers]; " + + "else " + + " echo \"No constraint file found, installing without constraints\" && " + + " pip install --pre --retries 10 \${buildDir}/apache-beam.tar.gz[\$ml_extra,yaml,transformers]; " + + "fi" } } } From 4d2c47a63e542bfc462192487f54f20eda528159 Mon Sep 17 00:00:00 2001 From: Derrick Williams Date: Tue, 26 May 2026 14:02:22 +0000 Subject: [PATCH 05/12] fix gemini comments --- sdks/python/apache_beam/yaml/yaml_ml.py | 2 +- sdks/python/build.gradle | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/sdks/python/apache_beam/yaml/yaml_ml.py b/sdks/python/apache_beam/yaml/yaml_ml.py index 05cbed3bd456..188530180c46 100644 --- a/sdks/python/apache_beam/yaml/yaml_ml.py +++ b/sdks/python/apache_beam/yaml/yaml_ml.py @@ -322,7 +322,7 @@ def __init__( @staticmethod def validate(config): - if not config.get('task') and not config.get('model'): + if not config or (not config.get('task') and not config.get('model')): raise ValueError( "HuggingFacePipeline requires either 'task' or " "'model' to be specified.") diff --git a/sdks/python/build.gradle b/sdks/python/build.gradle index 0ae4aaa4148e..4aea2979c304 100644 --- a/sdks/python/build.gradle +++ b/sdks/python/build.gradle @@ -141,10 +141,10 @@ tasks.register("installYamlIntegrationTestDeps") { "fi && " + "if [ -n \"\$constraint_file\" ]; then " + " echo \"Installing with constraint file: \$constraint_file\" && " + - " pip install --pre --retries 10 -c \"\$constraint_file\" \${buildDir}/apache-beam.tar.gz[\$ml_extra,yaml,transformers]; " + + " pip install --pre --retries 10 -c \"\$constraint_file\" ${buildDir}/apache-beam.tar.gz[\$ml_extra,yaml,transformers]; " + "else " + " echo \"No constraint file found, installing without constraints\" && " + - " pip install --pre --retries 10 \${buildDir}/apache-beam.tar.gz[\$ml_extra,yaml,transformers]; " + + " pip install --pre --retries 10 ${buildDir}/apache-beam.tar.gz[\$ml_extra,yaml,transformers]; " + "fi" } } From 7fbe5fceffc5ae9dbf10a40e627ed23d6a3176ea Mon Sep 17 00:00:00 2001 From: Derrick Williams Date: Tue, 26 May 2026 14:41:51 +0000 Subject: [PATCH 06/12] try to fix dependency torch issue --- sdks/python/build.gradle | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sdks/python/build.gradle b/sdks/python/build.gradle index 4aea2979c304..d9c5aff82b63 100644 --- a/sdks/python/build.gradle +++ b/sdks/python/build.gradle @@ -141,10 +141,10 @@ tasks.register("installYamlIntegrationTestDeps") { "fi && " + "if [ -n \"\$constraint_file\" ]; then " + " echo \"Installing with constraint file: \$constraint_file\" && " + - " pip install --pre --retries 10 -c \"\$constraint_file\" ${buildDir}/apache-beam.tar.gz[\$ml_extra,yaml,transformers]; " + + " pip install --pre --retries 10 -c \"\$constraint_file\" ${buildDir}/apache-beam.tar.gz[\$ml_extra,yaml,transformers] --extra-index-url https://download.pytorch.org/whl/cpu; " + "else " + " echo \"No constraint file found, installing without constraints\" && " + - " pip install --pre --retries 10 ${buildDir}/apache-beam.tar.gz[\$ml_extra,yaml,transformers]; " + + " pip install --pre --retries 10 ${buildDir}/apache-beam.tar.gz[\$ml_extra,yaml,transformers] --extra-index-url https://download.pytorch.org/whl/cpu; " + "fi" } } From e6a47326dc7dde3c68d5ac5b7df66513048bea22 Mon Sep 17 00:00:00 2001 From: Derrick Williams Date: Tue, 26 May 2026 15:29:52 +0000 Subject: [PATCH 07/12] fix ml_test dependencies --- sdks/python/build.gradle | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/sdks/python/build.gradle b/sdks/python/build.gradle index d9c5aff82b63..cfe4e87c9c47 100644 --- a/sdks/python/build.gradle +++ b/sdks/python/build.gradle @@ -131,8 +131,9 @@ tasks.register("installYamlIntegrationTestDeps") { executable 'sh' args '-c', ". ${envdir}/bin/activate && " + "py_ver=\$(python -c 'import sys; print(f\"{sys.version_info.major}{sys.version_info.minor}\")') && " + - "ml_extra=\"ml_test\" && " + - "if [ \"\$py_ver\" -ge 313 ]; then ml_extra=\"p\${py_ver}_ml_test\"; fi && " + + "ml_extra=\"p310_ml_test\" && " + + "if [ \"\$py_ver\" -eq 312 ]; then ml_extra=\"p312_ml_test\"; " + + "elif [ \"\$py_ver\" -ge 313 ]; then ml_extra=\"p\${py_ver}_ml_test\"; fi && " + "constraint_file=\"\" && " + "if [ -f \"container/ml/py\${py_ver}/base_image_requirements.txt\" ]; then " + " constraint_file=\"container/ml/py\${py_ver}/base_image_requirements.txt\"; " + From ebc69dbb37291041286b88feca3319b1c86227b5 Mon Sep 17 00:00:00 2001 From: Derrick Williams Date: Tue, 26 May 2026 17:00:41 +0000 Subject: [PATCH 08/12] simplify condition again and try --- sdks/python/build.gradle | 18 +----------------- 1 file changed, 1 insertion(+), 17 deletions(-) diff --git a/sdks/python/build.gradle b/sdks/python/build.gradle index cfe4e87c9c47..718db05a3e03 100644 --- a/sdks/python/build.gradle +++ b/sdks/python/build.gradle @@ -130,23 +130,7 @@ tasks.register("installYamlIntegrationTestDeps") { exec { executable 'sh' args '-c', ". ${envdir}/bin/activate && " + - "py_ver=\$(python -c 'import sys; print(f\"{sys.version_info.major}{sys.version_info.minor}\")') && " + - "ml_extra=\"p310_ml_test\" && " + - "if [ \"\$py_ver\" -eq 312 ]; then ml_extra=\"p312_ml_test\"; " + - "elif [ \"\$py_ver\" -ge 313 ]; then ml_extra=\"p\${py_ver}_ml_test\"; fi && " + - "constraint_file=\"\" && " + - "if [ -f \"container/ml/py\${py_ver}/base_image_requirements.txt\" ]; then " + - " constraint_file=\"container/ml/py\${py_ver}/base_image_requirements.txt\"; " + - "elif [ -f \"container/py\${py_ver}/base_image_requirements.txt\" ]; then " + - " constraint_file=\"container/py\${py_ver}/base_image_requirements.txt\"; " + - "fi && " + - "if [ -n \"\$constraint_file\" ]; then " + - " echo \"Installing with constraint file: \$constraint_file\" && " + - " pip install --pre --retries 10 -c \"\$constraint_file\" ${buildDir}/apache-beam.tar.gz[\$ml_extra,yaml,transformers] --extra-index-url https://download.pytorch.org/whl/cpu; " + - "else " + - " echo \"No constraint file found, installing without constraints\" && " + - " pip install --pre --retries 10 ${buildDir}/apache-beam.tar.gz[\$ml_extra,yaml,transformers] --extra-index-url https://download.pytorch.org/whl/cpu; " + - "fi" + "pip install --pre --retries 10 ${buildDir}/apache-beam.tar.gz[ml_test,yaml,transformers]" } } } From 5bdbdd73804192de7c5d348f52240c7b9365ddcd Mon Sep 17 00:00:00 2001 From: Derrick Williams Date: Wed, 27 May 2026 18:48:34 +0000 Subject: [PATCH 09/12] Fix pip backtracking resolution limit errors in installYamlIntegrationTestDeps by defining p311/p312 ml extras without datatable and correctly resolving constraints --- sdks/python/build.gradle | 16 +++++++++++++++- sdks/python/setup.py | 5 ++--- 2 files changed, 17 insertions(+), 4 deletions(-) diff --git a/sdks/python/build.gradle b/sdks/python/build.gradle index 718db05a3e03..9a2728f2077f 100644 --- a/sdks/python/build.gradle +++ b/sdks/python/build.gradle @@ -130,7 +130,21 @@ tasks.register("installYamlIntegrationTestDeps") { exec { executable 'sh' args '-c', ". ${envdir}/bin/activate && " + - "pip install --pre --retries 10 ${buildDir}/apache-beam.tar.gz[ml_test,yaml,transformers]" + "py_ver=\$(python -c 'import sys; print(f\"{sys.version_info.major}{sys.version_info.minor}\")') && " + + "ml_extra=\"p\${py_ver}_ml_test\" && " + + "constraint_file=\"\" && " + + "if [ -f \"container/ml/py\${py_ver}/base_image_requirements.txt\" ]; then " + + " constraint_file=\"container/ml/py\${py_ver}/base_image_requirements.txt\"; " + + "elif [ -f \"container/py\${py_ver}/base_image_requirements.txt\" ]; then " + + " constraint_file=\"container/py\${py_ver}/base_image_requirements.txt\"; " + + "fi && " + + "if [ -n \"\$constraint_file\" ]; then " + + " echo \"Installing with constraint file: \$constraint_file\" && " + + " pip install --pre --retries 10 -c \"\$constraint_file\" ${buildDir}/apache-beam.tar.gz[\$ml_extra,yaml,transformers] --extra-index-url https://download.pytorch.org/whl/cpu; " + + "else " + + " echo \"No constraint file found, installing without constraints\" && " + + " pip install --pre --retries 10 ${buildDir}/apache-beam.tar.gz[\$ml_extra,yaml,transformers] --extra-index-url https://download.pytorch.org/whl/cpu; " + + "fi" } } } diff --git a/sdks/python/setup.py b/sdks/python/setup.py index 4c1384c31517..96c794e10193 100644 --- a/sdks/python/setup.py +++ b/sdks/python/setup.py @@ -609,9 +609,8 @@ def get_portability_package_data(): 'p310_ml_test': [ 'datatable', ] + ml_base, - 'p312_ml_test': [ - 'datatable', - ] + ml_base, + 'p311_ml_test': ml_base, + 'p312_ml_test': ml_base, # maintainer: milvus tests only run with this extension. Make sure it # is covered by docker-in-docker test when changing py version 'p313_ml_test': ml_base + milvus_dependency, From cb5b8ba0ede74ce1782930d406a9f75f03500518 Mon Sep 17 00:00:00 2001 From: Derrick Williams Date: Wed, 27 May 2026 19:14:38 +0000 Subject: [PATCH 10/12] Widen opentelemetry dependency version bounds in setup.py to be compatible with container base requirement constraints --- sdks/python/setup.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/sdks/python/setup.py b/sdks/python/setup.py index 96c794e10193..44d74b1d9d1d 100644 --- a/sdks/python/setup.py +++ b/sdks/python/setup.py @@ -211,9 +211,9 @@ def cythonize(*args, **kwargs): # proto-plus<1.24 caps protobuf<5; opentelemetry-proto (via ADK) needs # protobuf>=5. Scoped here so the main dependency list stays broader. 'proto-plus>=1.26.1,<2', - 'opentelemetry-api==1.37.0', - 'opentelemetry-sdk==1.37.0', - 'opentelemetry-exporter-otlp-proto-http==1.37.0', + 'opentelemetry-api>=1.37.0,<2', + 'opentelemetry-sdk>=1.37.0,<2', + 'opentelemetry-exporter-otlp-proto-http>=1.37.0,<2', # protobuf>=5 (ADK/OTel); tf2onnx 1.16.x pins protobuf~=3.20 only. 'tf2onnx>=1.17.0,<1.18', ] From 852df6575559fc4604d8a7415128bc0a203776a5 Mon Sep 17 00:00:00 2001 From: Derrick Williams Date: Thu, 28 May 2026 13:24:57 +0000 Subject: [PATCH 11/12] adk as extra --- sdks/python/setup.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/sdks/python/setup.py b/sdks/python/setup.py index 44d74b1d9d1d..856958314791 100644 --- a/sdks/python/setup.py +++ b/sdks/python/setup.py @@ -181,6 +181,7 @@ def cythonize(*args, **kwargs): 'sentence-transformers>=2.2.2', 'skl2onnx', 'pyod>=0.7.6', # 0.7.5 crashes setuptools + 'contourpy<1.3.0; python_version < "3.11"', 'tensorflow', # tensorflow transitive dep, lower versions not compatible with Python3.10+ 'absl-py>=0.12.0', @@ -207,7 +208,9 @@ def cythonize(*args, **kwargs): ] ml_adk_dependency = [ - 'google-adk==1.28.1', + # google-adk is excluded here because it requires google-genai<2.0.0, + # which conflicts with google-genai>=2.0.0 (e.g. 2.6.0) pinned in containers. + # 'google-adk==1.28.1', # proto-plus<1.24 caps protobuf<5; opentelemetry-proto (via ADK) needs # protobuf>=5. Scoped here so the main dependency list stays broader. 'proto-plus>=1.26.1,<2', @@ -417,6 +420,7 @@ def get_portability_package_data(): ext_modules=extensions, install_requires=[ 'cryptography>=39.0.0,<48.0.0', + 'aiohttp<4.0.0', 'envoy-data-plane>=1.0.3,<2; python_version >= "3.11"', # Newer version only work on Python 3.11. Versions 0.3 <= ver < 1.x # conflict with other GCP dependencies. @@ -608,6 +612,8 @@ def get_portability_package_data(): ] + ml_base_core, 'p310_ml_test': [ 'datatable', + 'dill', + 'tensorflow_transform>=1.14.0,<1.15.0', ] + ml_base, 'p311_ml_test': ml_base, 'p312_ml_test': ml_base, @@ -684,7 +690,8 @@ def get_portability_package_data(): 'xgboost': ['xgboost>=1.6.0,<2.1.3', 'datatable==1.0.0'], 'tensorflow-hub': ['tensorflow-hub>=0.14.0,<0.16.0'], 'milvus': milvus_dependency, - 'vllm': ['openai==1.107.1', 'vllm==0.10.1.1', 'triton==3.3.1'] + 'vllm': ['openai==1.107.1', 'vllm==0.10.1.1', 'triton==3.3.1'], + 'adk': ['google-adk==1.28.1'] }, zip_safe=False, # PyPI package information. From 8cfac40736c95f06091062d015cf44c3b8b011b4 Mon Sep 17 00:00:00 2001 From: Derrick Williams Date: Thu, 28 May 2026 13:53:56 +0000 Subject: [PATCH 12/12] revert integration test changes --- .../beam_PostCommit_Yaml_Xlang_Direct.yml | 2 +- .../beam_PreCommit_Yaml_Xlang_Direct.yml | 2 +- sdks/python/build.gradle | 29 ++----------------- sdks/python/setup.py | 22 +++++--------- 4 files changed, 12 insertions(+), 43 deletions(-) diff --git a/.github/workflows/beam_PostCommit_Yaml_Xlang_Direct.yml b/.github/workflows/beam_PostCommit_Yaml_Xlang_Direct.yml index afa437b64de1..57e1e30ec523 100644 --- a/.github/workflows/beam_PostCommit_Yaml_Xlang_Direct.yml +++ b/.github/workflows/beam_PostCommit_Yaml_Xlang_Direct.yml @@ -80,7 +80,7 @@ jobs: - name: run PostCommit Yaml Xlang Direct script uses: ./.github/actions/gradle-command-self-hosted-action with: - gradle-command: :sdks:python:postCommitYamlIntegrationTests -PyamlTestSet=${{ matrix.test_set }} + gradle-command: :sdks:python:postCommitYamlIntegrationTests -PyamlTestSet=${{ matrix.test_set }} -PbeamPythonExtra=ml_test,yaml,transformers - name: Archive Python Test Results uses: actions/upload-artifact@v7 if: failure() diff --git a/.github/workflows/beam_PreCommit_Yaml_Xlang_Direct.yml b/.github/workflows/beam_PreCommit_Yaml_Xlang_Direct.yml index bde75568101b..f6e5b1a84519 100644 --- a/.github/workflows/beam_PreCommit_Yaml_Xlang_Direct.yml +++ b/.github/workflows/beam_PreCommit_Yaml_Xlang_Direct.yml @@ -91,7 +91,7 @@ jobs: - name: run PreCommit Yaml Xlang Direct script uses: ./.github/actions/gradle-command-self-hosted-action with: - gradle-command: :sdks:python:yamlIntegrationTests + gradle-command: :sdks:python:yamlIntegrationTests -PbeamPythonExtra=ml_test,yaml,transformers - name: Archive Python Test Results uses: actions/upload-artifact@v7 if: failure() diff --git a/sdks/python/build.gradle b/sdks/python/build.gradle index 9a2728f2077f..5f09dff57e8f 100644 --- a/sdks/python/build.gradle +++ b/sdks/python/build.gradle @@ -124,35 +124,10 @@ tasks.register("generateYamlDocs") { outputs.file "${buildDir}/yaml-examples.html" } -tasks.register("installYamlIntegrationTestDeps") { - dependsOn installGcpTest - doLast { - exec { - executable 'sh' - args '-c', ". ${envdir}/bin/activate && " + - "py_ver=\$(python -c 'import sys; print(f\"{sys.version_info.major}{sys.version_info.minor}\")') && " + - "ml_extra=\"p\${py_ver}_ml_test\" && " + - "constraint_file=\"\" && " + - "if [ -f \"container/ml/py\${py_ver}/base_image_requirements.txt\" ]; then " + - " constraint_file=\"container/ml/py\${py_ver}/base_image_requirements.txt\"; " + - "elif [ -f \"container/py\${py_ver}/base_image_requirements.txt\" ]; then " + - " constraint_file=\"container/py\${py_ver}/base_image_requirements.txt\"; " + - "fi && " + - "if [ -n \"\$constraint_file\" ]; then " + - " echo \"Installing with constraint file: \$constraint_file\" && " + - " pip install --pre --retries 10 -c \"\$constraint_file\" ${buildDir}/apache-beam.tar.gz[\$ml_extra,yaml,transformers] --extra-index-url https://download.pytorch.org/whl/cpu; " + - "else " + - " echo \"No constraint file found, installing without constraints\" && " + - " pip install --pre --retries 10 ${buildDir}/apache-beam.tar.gz[\$ml_extra,yaml,transformers] --extra-index-url https://download.pytorch.org/whl/cpu; " + - "fi" - } - } -} - tasks.register("yamlIntegrationTests") { description "Runs precommit integration tests for yaml pipelines." - dependsOn installYamlIntegrationTestDeps + dependsOn installGcpTest // Need to build all expansion services referenced in apache_beam/yaml/*.* // grep -oh 'sdk.*Jar' sdks/python/apache_beam/yaml/*.yaml | sort | uniq dependsOn ":sdks:java:extensions:schemaio-expansion-service:shadowJar" @@ -171,7 +146,7 @@ tasks.register("yamlIntegrationTests") { tasks.register("postCommitYamlIntegrationTests") { description "Runs postcommit integration tests for yaml pipelines - parameterized by yamlTestSet." - dependsOn installYamlIntegrationTestDeps + dependsOn installGcpTest // Need to build all expansion services referenced in apache_beam/yaml/*.* // grep -oh 'sdk.*Jar' sdks/python/apache_beam/yaml/*.yaml | sort | uniq dependsOn ":sdks:java:extensions:schemaio-expansion-service:shadowJar" diff --git a/sdks/python/setup.py b/sdks/python/setup.py index 856958314791..4c1384c31517 100644 --- a/sdks/python/setup.py +++ b/sdks/python/setup.py @@ -181,7 +181,6 @@ def cythonize(*args, **kwargs): 'sentence-transformers>=2.2.2', 'skl2onnx', 'pyod>=0.7.6', # 0.7.5 crashes setuptools - 'contourpy<1.3.0; python_version < "3.11"', 'tensorflow', # tensorflow transitive dep, lower versions not compatible with Python3.10+ 'absl-py>=0.12.0', @@ -208,15 +207,13 @@ def cythonize(*args, **kwargs): ] ml_adk_dependency = [ - # google-adk is excluded here because it requires google-genai<2.0.0, - # which conflicts with google-genai>=2.0.0 (e.g. 2.6.0) pinned in containers. - # 'google-adk==1.28.1', + 'google-adk==1.28.1', # proto-plus<1.24 caps protobuf<5; opentelemetry-proto (via ADK) needs # protobuf>=5. Scoped here so the main dependency list stays broader. 'proto-plus>=1.26.1,<2', - 'opentelemetry-api>=1.37.0,<2', - 'opentelemetry-sdk>=1.37.0,<2', - 'opentelemetry-exporter-otlp-proto-http>=1.37.0,<2', + 'opentelemetry-api==1.37.0', + 'opentelemetry-sdk==1.37.0', + 'opentelemetry-exporter-otlp-proto-http==1.37.0', # protobuf>=5 (ADK/OTel); tf2onnx 1.16.x pins protobuf~=3.20 only. 'tf2onnx>=1.17.0,<1.18', ] @@ -420,7 +417,6 @@ def get_portability_package_data(): ext_modules=extensions, install_requires=[ 'cryptography>=39.0.0,<48.0.0', - 'aiohttp<4.0.0', 'envoy-data-plane>=1.0.3,<2; python_version >= "3.11"', # Newer version only work on Python 3.11. Versions 0.3 <= ver < 1.x # conflict with other GCP dependencies. @@ -612,11 +608,10 @@ def get_portability_package_data(): ] + ml_base_core, 'p310_ml_test': [ 'datatable', - 'dill', - 'tensorflow_transform>=1.14.0,<1.15.0', ] + ml_base, - 'p311_ml_test': ml_base, - 'p312_ml_test': ml_base, + 'p312_ml_test': [ + 'datatable', + ] + ml_base, # maintainer: milvus tests only run with this extension. Make sure it # is covered by docker-in-docker test when changing py version 'p313_ml_test': ml_base + milvus_dependency, @@ -690,8 +685,7 @@ def get_portability_package_data(): 'xgboost': ['xgboost>=1.6.0,<2.1.3', 'datatable==1.0.0'], 'tensorflow-hub': ['tensorflow-hub>=0.14.0,<0.16.0'], 'milvus': milvus_dependency, - 'vllm': ['openai==1.107.1', 'vllm==0.10.1.1', 'triton==3.3.1'], - 'adk': ['google-adk==1.28.1'] + 'vllm': ['openai==1.107.1', 'vllm==0.10.1.1', 'triton==3.3.1'] }, zip_safe=False, # PyPI package information.