From 985131e7028019a6144c9fe40a94be8e9a405ee1 Mon Sep 17 00:00:00 2001 From: Sai Asish Y Date: Thu, 21 May 2026 00:49:31 -0700 Subject: [PATCH] fix(interface): reject duplicate names within output_processors (#675) Signed-off-by: SAY-5 --- .../interface/composite_workflow.py | 9 ++++++++ .../interface/test_composite_workflow.py | 21 +++++++++++++++++++ 2 files changed, 30 insertions(+) diff --git a/packages/data-designer/src/data_designer/interface/composite_workflow.py b/packages/data-designer/src/data_designer/interface/composite_workflow.py index 98128efb6..e66ec1bff 100644 --- a/packages/data-designer/src/data_designer/interface/composite_workflow.py +++ b/packages/data-designer/src/data_designer/interface/composite_workflow.py @@ -558,6 +558,15 @@ def _validate_distinct_output_processors( config_builder: DataDesignerConfigBuilder, output_processors: list[ProcessorConfig], ) -> None: + seen: set[str] = set() + duplicate_within: set[str] = set() + for processor in output_processors: + if processor.name in seen: + duplicate_within.add(processor.name) + seen.add(processor.name) + if duplicate_within: + names = ", ".join(sorted(duplicate_within)) + raise DataDesignerWorkflowError(f"Output processor names must be distinct within output_processors: {names}.") stage_processor_names = {processor.name for processor in config_builder.get_processor_configs()} duplicate_names = stage_processor_names.intersection(processor.name for processor in output_processors) if duplicate_names: diff --git a/packages/data-designer/tests/interface/test_composite_workflow.py b/packages/data-designer/tests/interface/test_composite_workflow.py index aea4ec20f..2780e7a13 100644 --- a/packages/data-designer/tests/interface/test_composite_workflow.py +++ b/packages/data-designer/tests/interface/test_composite_workflow.py @@ -378,6 +378,27 @@ def test_composite_workflow_rejects_duplicate_output_processor_names( ) +def test_composite_workflow_rejects_duplicate_names_within_output_processors( + stub_artifact_path: Path, + stub_model_providers: list[ModelProvider], + stub_model_configs: list[ModelConfig], +) -> None: + stage = _category_builder(stub_model_configs) + workflow = _data_designer(stub_artifact_path, stub_model_providers).compose_workflow( + name="duplicate-within-output-processors" + ) + + with pytest.raises(DataDesignerWorkflowError, match="distinct within output_processors"): + workflow.add_stage( + "base", + stage, + output_processors=[ + DropColumnsProcessorConfig(name="drop_scratch", column_names=["scratch"]), + DropColumnsProcessorConfig(name="drop_scratch", column_names=["other_scratch"]), + ], + ) + + def test_composite_workflow_rejects_duplicate_stage_names( stub_artifact_path: Path, stub_model_providers: list[ModelProvider],