diff --git a/.github/workflows/README.md b/.github/workflows/README.md index de0a3dcab..2bacf4fbc 100644 --- a/.github/workflows/README.md +++ b/.github/workflows/README.md @@ -20,7 +20,9 @@ positional arguments: configurations for a runner type. For instance, to validate that all configs that specify an h200 runner successfully run across all h200 runner nodes. - test-config Generate full sweep for specific config keys. + test-config Generate the standard sweep for specific config keys by + default; pass --full for all intermediate concurrency + points. Supports wildcard patterns (* and ?) for matching multiple keys at once. @@ -129,7 +131,11 @@ This will only include runner nodes whose names contain "mi300x-amd" ## `test-config` Command -The `test-config` command generates the full sweep for one or more specific config keys. This is useful for testing individual configurations without filtering by model prefix, framework, etc. +The `test-config` command generates the standard sweep for one or more specific +config keys. By default this keeps only the two configured concurrency +endpoints per parallelism config. Pass `--full` to restore the full +intermediate concurrency sweep. Explicit `--conc` values remain authoritative +and are respected even without `--full`. ``` usage: generate_sweep_configs.py test-config @@ -137,6 +143,7 @@ usage: generate_sweep_configs.py test-config [--runner-config RUNNER_CONFIG] --config-keys CONFIG_KEYS [CONFIG_KEYS ...] [--conc CONC [CONC ...]] + [--full] ``` Config keys support **wildcard patterns** using `*` (matches any characters) and `?` (matches a single character). Patterns that match no keys will raise an error. @@ -173,7 +180,12 @@ test-config --config-keys dsr1* --config-files .github/configs/nvidia-master.yam test-config --config-keys dsr1-fp4-b200-sglang gptoss* --config-files .github/configs/nvidia-master.yaml ``` -**Override concurrency for targeted testing:** +**Run the full intermediate concurrency sweep for a specific config:** +``` +test-config --config-keys dsr1-fp4-b200-sglang --config-files .github/configs/nvidia-master.yaml --full +``` + +**Override concurrency for targeted testing (works independently of standard vs full sweep):** ``` test-config --config-keys *-b200-* --conc 4 8 --config-files .github/configs/nvidia-master.yaml ``` diff --git a/.github/workflows/claude.yml b/.github/workflows/claude.yml index b5b474471..7c1e44517 100644 --- a/.github/workflows/claude.yml +++ b/.github/workflows/claude.yml @@ -98,7 +98,11 @@ jobs: **Subcommand reference:** - `full-sweep`: Use this subcommand with filter flags like `--model-prefix`, `--framework`, `--precision`, `--runner-type`, `--min-conc`, `--max-conc`, `--seq-lens`. This is the primary subcommand for running benchmarks. - - `test-config`: Use this subcommand ONLY when prompted to with 'test-config'. Uses the flags `--config-files` and `--config-keys`, does NOT accept any other arguments. + - `test-config`: Use this subcommand ONLY when prompted to with 'test-config'. + Requires `--config-files` and `--config-keys`. It optionally accepts + `--conc` for an explicit subset or `--full` to sweep all intermediate + concurrency points. Without either flag, `test-config` runs the + standard endpoint-only sweep. Examples: @@ -122,16 +126,25 @@ jobs: generate-cli-command: "full-sweep --config-files .github/configs/nvidia-master.yaml --single-node --model-prefix dsr1 --min-conc 4 --max-conc 4 --seq-lens 1k1k" ``` - **Test specific config keys (MUST USE `--conc`):** + **Test specific config keys with explicit concurrency:** ``` generate-cli-command: "test-config --config-files .github/configs/nvidia-master.yaml --config-keys dsr1-fp4-b200-sglang --conc 4" ``` + **Test specific config keys with full sweep:** + ``` + generate-cli-command: "test-config --config-files .github/configs/nvidia-master.yaml --config-keys dsr1-fp4-b200-sglang --full" + ``` + **IMPORTANT: Keep runs precise and efficient:** - Use `full-sweep` with filter flags to narrow down the benchmark scope - "full-sweep" does NOT mean running everything - When using `full-sweep`, you must use `--min-conc` and `--max-conc` together to specify a single concurrency value. Unless prompted otherwise, use `--min-conc 4 --max-conc 4` - When using `full-sweep`, you can use `--seq-lens` to specify sequence lengths (choices: 1k1k, 8k1k). Unless prompted otherwise, use `--seq-lens 1k1k` - - Use `test-config` ONLY when given specific config keys to test - Use `--config-files`, `--config-keys`, and `--conc` flags ONLY + - Use `test-config` ONLY when given specific config keys to test. + Prefer explicit `--conc` values for the narrowest run, or use + `--full` only when the request specifically needs the entire + intermediate concurrency sweep. + - When using `test-config`, explicit `--conc` values are authoritative even without `--full` - Always filter by specific models, frameworks, precision, conc, or config keys when possible ## Monitor workflow execution diff --git a/.github/workflows/run-sweep.yml b/.github/workflows/run-sweep.yml index 44e335f49..ef7e34a0d 100644 --- a/.github/workflows/run-sweep.yml +++ b/.github/workflows/run-sweep.yml @@ -40,8 +40,18 @@ jobs: setup: runs-on: ubuntu-latest if: >- - (github.event_name == 'pull_request' && !github.event.pull_request.draft && contains(github.event.pull_request.labels.*.name, 'sweep-enabled')) || - (github.event_name != 'pull_request' && !contains(github.event.head_commit.message, '[skip-sweep]')) + ( + github.event_name == 'pull_request' && + !github.event.pull_request.draft && + ( + contains(github.event.pull_request.labels.*.name, 'sweep-enabled') || + contains(github.event.pull_request.labels.*.name, 'full-sweep-enabled') + ) + ) || + ( + github.event_name != 'pull_request' && + !contains(github.event.head_commit.message, '[skip-sweep]') + ) outputs: search-space-config: ${{ steps.setup.outputs.search-space-config }} steps: @@ -51,6 +61,17 @@ jobs: fetch-depth: 0 - id: setup + env: + IS_PUSH_MAIN: >- + ${{ + github.event_name == 'push' && + github.ref == 'refs/heads/main' + }} + HAS_FULL_SWEEP_LABEL: >- + ${{ + github.event_name == 'pull_request' && + contains(github.event.pull_request.labels.*.name, 'full-sweep-enabled') + }} run: | pip install pydantic @@ -62,12 +83,21 @@ jobs: HEAD_REF="${{ github.event.after }}" fi - CONFIG_JSON=$(python3 ${GITHUB_WORKSPACE}/utils/process_changelog.py \ - --changelog-file ${GITHUB_WORKSPACE}/perf-changelog.yaml \ - --base-ref "$BASE_REF" \ - --head-ref "$HEAD_REF") + PROCESS_CHANGELOG_CMD=( + python3 + "${GITHUB_WORKSPACE}/utils/process_changelog.py" + --changelog-file "${GITHUB_WORKSPACE}/perf-changelog.yaml" + --base-ref "$BASE_REF" + --head-ref "$HEAD_REF" + ) - echo "search-space-config=$CONFIG_JSON" >> $GITHUB_OUTPUT + if [ "$IS_PUSH_MAIN" = "true" ] || [ "$HAS_FULL_SWEEP_LABEL" = "true" ]; then + PROCESS_CHANGELOG_CMD+=(--full) + fi + + CONFIG_JSON=$("${PROCESS_CHANGELOG_CMD[@]}") + + echo "search-space-config=$CONFIG_JSON" >> "$GITHUB_OUTPUT" sweep-multi-node-1k1k: needs: setup diff --git a/utils/matrix_logic/generate_sweep_configs.py b/utils/matrix_logic/generate_sweep_configs.py index bc4562415..4cc6963e0 100644 --- a/utils/matrix_logic/generate_sweep_configs.py +++ b/utils/matrix_logic/generate_sweep_configs.py @@ -1,4 +1,3 @@ -from ast import For import fnmatch import json import argparse @@ -536,10 +535,11 @@ def get_lowest_conc(search_space_entry): def generate_test_config_sweep(args, all_config_data): - """Generate full sweep for specific config keys. + """Generate sweep for specific config keys. Validates that all specified config keys exist before generating. - Expands all configs fully without any filtering. + Standard sweep keeps only concurrency endpoints unless ``--full`` or + explicit ``--conc`` values are provided. """ resolved_keys = expand_config_keys(args.config_keys, all_config_data.keys()) @@ -585,12 +585,16 @@ def generate_test_config_sweep(args, all_config_data): if conc > conc_end: conc = conc_end - # Apply --conc filter if provided (only for test-config) + # Respect explicit --conc requests regardless of sweep tier if getattr(args, 'conc', None): conc_values = [c for c in conc_values if c in args.conc] if not conc_values: # No intersection with requested conc values; skip continue + # Standard sweep: only the two configured endpoints + # Pass --full to sweep all intermediate points + elif len(conc_values) > 1 and not getattr(args, 'full', False): + conc_values = [conc_values[0], conc_values[-1]] entry = { Fields.IMAGE.value: image, @@ -634,12 +638,16 @@ def generate_test_config_sweep(args, all_config_data): if conc > conc_end: conc = conc_end - # Apply --conc filter if provided (only for test-config) + # Respect explicit --conc requests regardless of sweep tier if getattr(args, 'conc', None): conc_values = [c for c in conc_values if c in args.conc] if not conc_values: # No intersection with requested conc values; skip continue + # Standard sweep: only the two configured endpoints + # Pass --full to sweep all intermediate points + elif len(conc_values) > 1 and not getattr(args, 'full', False): + conc_values = [conc_values[0], conc_values[-1]] for conc in conc_values: entry = { @@ -890,7 +898,7 @@ def main(): 'test-config', parents=[parent_parser], add_help=False, - help='Generate full sweep for specific config keys. Validates that all specified keys exist before generating.' + help='Generate sweep for specific config keys. Standard sweep by default; pass --full for all intermediate concurrency points.' ) test_config_keys_parser.add_argument( '--config-keys', @@ -903,7 +911,16 @@ def main(): nargs='+', type=int, required=False, - help='Only include these concurrency values. Values must exist in the config conc-range/list.' + help='Only include these concurrency values. Explicit --conc values are respected even without --full.' + ) + test_config_keys_parser.add_argument( + '--full', + action='store_true', + help=( + 'Sweep all intermediate concurrency points (full sweep). ' + 'Without this flag, only the two configured concurrency ' + 'endpoints are swept (standard sweep).' + ) ) test_config_keys_parser.add_argument( '-h', '--help', diff --git a/utils/matrix_logic/test_generate_sweep_configs.py b/utils/matrix_logic/test_generate_sweep_configs.py index d05299472..0457534a9 100644 --- a/utils/matrix_logic/test_generate_sweep_configs.py +++ b/utils/matrix_logic/test_generate_sweep_configs.py @@ -7,6 +7,7 @@ seq_len_to_str, generate_full_sweep, generate_runner_model_sweep_config, + generate_test_config_sweep, apply_node_type_defaults, expand_config_keys, mark_eval_entries, @@ -149,6 +150,121 @@ def full_sweep_args_multi_node(): return args +@pytest.fixture +def test_config_args(): + """Args namespace for test-config subcommand (standard sweep). + + Tests that use multi-node configs must override config_keys: + args.config_keys = ["test-multi-node"] + Tests that use 8k1k config must override: + args.config_keys = ["test-single-node-8k1k"] + + NOTE: generate_test_config_sweep() accesses args.config_keys directly and + args.conc / args.full via getattr. The --no-evals / --evals-only flags are + handled in main() after the function returns. + """ + args = argparse.Namespace() + args.config_keys = ["test-single-node"] + args.conc = None + args.full = False + return args + + +@pytest.fixture +def single_node_test_config(): + """Minimal single-node config for generate_test_config_sweep.""" + return { + "test-single-node": { + "image": "test-image:latest", + "model": "test-model", + "model-prefix": "test", + "precision": "fp8", + "framework": "sglang", + "runner": "h200", + "seq-len-configs": [ + { + "isl": seq_len_stoi["1k1k"][0], + "osl": seq_len_stoi["1k1k"][1], + "search-space": [ + { + "tp": 1, + "conc-start": 4, + "conc-end": 64, + } + ], + } + ], + } + } + + +@pytest.fixture +def single_node_8k1k_test_config(): + """Single-node 8k1k config for eval marking tests.""" + return { + "test-single-node-8k1k": { + "image": "test-image:latest", + "model": "test-model", + "model-prefix": "test", + "precision": "fp8", + "framework": "sglang", + "runner": "h200", + "seq-len-configs": [ + { + "isl": seq_len_stoi["8k1k"][0], + "osl": seq_len_stoi["8k1k"][1], + "search-space": [ + { + "tp": 1, + "conc-start": 4, + "conc-end": 64, + } + ], + } + ], + } + } + + +@pytest.fixture +def multinode_test_config(): + """Minimal multi-node config for generate_test_config_sweep.""" + return { + "test-multi-node": { + "image": "test-image:latest", + "model": "test-model", + "model-prefix": "test", + "precision": "fp4", + "framework": "dynamo-trt", + "runner": "gb200", + "multinode": True, + "seq-len-configs": [ + { + "isl": seq_len_stoi["1k1k"][0], + "osl": seq_len_stoi["1k1k"][1], + "search-space": [ + { + "prefill": { + "num-worker": 2, + "tp": 4, + "ep": 4, + "dp-attn": True, + }, + "decode": { + "num-worker": 1, + "tp": 8, + "ep": 8, + "dp-attn": True, + }, + "conc-list": [500, 1000, 2000, 4000], + } + ], + } + ], + } + } + + # ============================================================================= # Test seq_len mappings # ============================================================================= @@ -1181,7 +1297,6 @@ class TestArgumentDefaults: def test_runner_config_default_value(self): """Verify --runner-config defaults to .github/configs/runners.yaml.""" import sys - from generate_sweep_configs import main # Save original sys.argv original_argv = sys.argv @@ -1198,7 +1313,6 @@ def test_runner_config_default_value(self): # Parse args using the ArgumentParser from main # We need to access the parser directly import argparse - from generate_sweep_configs import main # Create the same parent parser as in main() parent_parser = argparse.ArgumentParser(add_help=False) @@ -1743,3 +1857,368 @@ def test_never_marks_all_entries(self): non_prefill = [x for x in result if 'prefill' not in x] assert not all(x['run-eval'] for x in non_prefill), \ "mark_eval_entries must not mark all entries — would break e2e splitting" + + +class TestGenerateTestConfigSweep: + """Tests for generate_test_config_sweep baseline behavior.""" + + def test_single_node_standard_entry_count_and_conc_values( + self, test_config_args, single_node_test_config + ): + result = generate_test_config_sweep(test_config_args, single_node_test_config) + + assert len(result) == 2 + assert {entry["conc"] for entry in result} == {4, 64} + + def test_single_node_entry_fields_and_values( + self, test_config_args, single_node_test_config + ): + entry = generate_test_config_sweep( + test_config_args, single_node_test_config + )[0] + + assert set(entry) == { + "image", + "model", + "model-prefix", + "precision", + "framework", + "runner", + "isl", + "osl", + "tp", + "conc", + "max-model-len", + "ep", + "dp-attn", + "spec-decoding", + "exp-name", + "disagg", + "run-eval", + } + assert entry["image"] == "test-image:latest" + assert entry["model"] == "test-model" + assert entry["model-prefix"] == "test" + assert entry["precision"] == "fp8" + assert entry["framework"] == "sglang" + assert entry["runner"] == "h200" + assert entry["isl"] == 1024 + assert entry["osl"] == 1024 + assert entry["tp"] == 1 + assert entry["ep"] == 1 + assert entry["dp-attn"] is False + assert entry["spec-decoding"] == "none" + assert entry["max-model-len"] == 2248 + assert entry["exp-name"] == "test_1k1k" + assert entry["disagg"] is False + assert entry["run-eval"] is False + + def test_single_node_multiple_tp_values( + self, test_config_args, single_node_test_config + ): + config = single_node_test_config["test-single-node"]["seq-len-configs"][0] + config["search-space"].append({"tp": 2, "conc-start": 8, "conc-end": 32}) + + result = generate_test_config_sweep(test_config_args, single_node_test_config) + + assert len(result) == 4 + assert {(entry["tp"], entry["conc"]) for entry in result} == { + (1, 4), + (1, 64), + (2, 8), + (2, 32), + } + + def test_single_node_multiple_seq_len_configs( + self, test_config_args, single_node_test_config + ): + single_node_test_config["test-single-node"]["seq-len-configs"].append( + { + "isl": seq_len_stoi["8k1k"][0], + "osl": seq_len_stoi["8k1k"][1], + "search-space": [{"tp": 1, "conc-start": 4, "conc-end": 64}], + } + ) + + result = generate_test_config_sweep(test_config_args, single_node_test_config) + + assert len(result) == 4 + assert {entry["isl"] for entry in result} == {1024, 8192} + + def test_multi_node_standard_conc_is_stored_as_endpoint_list( + self, test_config_args, multinode_test_config + ): + test_config_args.config_keys = ["test-multi-node"] + + result = generate_test_config_sweep(test_config_args, multinode_test_config) + + assert len(result) == 1 + assert result[0]["conc"] == [500, 4000] + + def test_multi_node_entry_fields( + self, test_config_args, multinode_test_config + ): + test_config_args.config_keys = ["test-multi-node"] + entry = generate_test_config_sweep( + test_config_args, multinode_test_config + )[0] + + assert "prefill" in entry + assert "decode" in entry + assert isinstance(entry["conc"], list) + assert "tp" not in entry + assert entry["spec-decoding"] == "none" + assert entry["disagg"] is False + assert entry["max-model-len"] == 2248 + assert entry["exp-name"] == "test_1k1k" + + def test_config_key_resolution_ignores_other_configs( + self, test_config_args, single_node_test_config + ): + all_config_data = { + **single_node_test_config, + "other-config": { + "image": "other-image:latest", + "model": "other-model", + "model-prefix": "other", + "precision": "fp4", + "framework": "vllm", + "runner": "b200", + "seq-len-configs": [ + { + "isl": seq_len_stoi["1k1k"][0], + "osl": seq_len_stoi["1k1k"][1], + "search-space": [{"tp": 2, "conc-start": 2, "conc-end": 8}], + } + ], + }, + } + + result = generate_test_config_sweep(test_config_args, all_config_data) + + assert len(result) == 2 + assert all(entry["model-prefix"] == "test" for entry in result) + + def test_invalid_config_key_raises(self, test_config_args, single_node_test_config): + test_config_args.config_keys = ["nonexistent-key"] + + with pytest.raises(ValueError, match="Config key\\(s\\) not found"): + generate_test_config_sweep(test_config_args, single_node_test_config) + + +class TestTestConfigParser: + """Regression tests for test-config parser wiring.""" + + def test_test_config_parser_accepts_full_and_conc(self): + parent_parser = argparse.ArgumentParser(add_help=False) + parent_parser.add_argument("--config-files", nargs="+", required=True) + parent_parser.add_argument( + "--runner-config", + default=".github/configs/runners.yaml", + ) + + parser = argparse.ArgumentParser( + description="Generate benchmark configurations from YAML config files" + ) + subparsers = parser.add_subparsers(dest="command", required=True) + + test_config_parser = subparsers.add_parser( + "test-config", + parents=[parent_parser], + add_help=False, + ) + test_config_parser.add_argument("--config-keys", nargs="+", required=True) + test_config_parser.add_argument("--conc", nargs="+", type=int, required=False) + test_config_parser.add_argument("--full", action="store_true") + + args = parser.parse_args( + [ + "test-config", + "--config-files", + "dummy.yaml", + "--config-keys", + "test-single-node", + "--conc", + "4", + "8", + "--full", + ] + ) + + assert args.command == "test-config" + assert args.config_keys == ["test-single-node"] + assert args.conc == [4, 8] + assert args.full is True + + +class TestFullSweepFlag: + """Tests for the test-config --full flag and endpoint reduction behavior.""" + + def test_single_node_standard_produces_two_points( + self, test_config_args, single_node_test_config + ): + result = generate_test_config_sweep(test_config_args, single_node_test_config) + + assert len(result) == 2 + assert {entry["conc"] for entry in result} == {4, 64} + + def test_single_node_full_produces_all_points( + self, test_config_args, single_node_test_config + ): + test_config_args.full = True + + result = generate_test_config_sweep(test_config_args, single_node_test_config) + + assert len(result) == 5 + assert {entry["conc"] for entry in result} == {4, 8, 16, 32, 64} + + def test_single_node_degenerate_range( + self, test_config_args, single_node_test_config + ): + search_space = single_node_test_config["test-single-node"]["seq-len-configs"][0][ + "search-space" + ][0] + search_space["conc-start"] = 32 + search_space["conc-end"] = 32 + + result = generate_test_config_sweep(test_config_args, single_node_test_config) + + assert len(result) == 1 + assert result[0]["conc"] == 32 + + def test_single_node_conc_list_preserves_ascending_endpoints( + self, test_config_args, single_node_test_config + ): + search_space = single_node_test_config["test-single-node"]["seq-len-configs"][0][ + "search-space" + ][0] + search_space.pop("conc-start") + search_space.pop("conc-end") + search_space["conc-list"] = [2, 4, 8, 16, 32] + + result = generate_test_config_sweep(test_config_args, single_node_test_config) + + assert len(result) == 2 + assert {entry["conc"] for entry in result} == {2, 32} + + def test_single_node_conc_list_preserves_descending_order( + self, test_config_args, single_node_test_config + ): + search_space = single_node_test_config["test-single-node"]["seq-len-configs"][0][ + "search-space" + ][0] + search_space.pop("conc-start") + search_space.pop("conc-end") + search_space["conc-list"] = [32, 16, 8, 4, 2] + + result = generate_test_config_sweep(test_config_args, single_node_test_config) + + assert len(result) == 2 + assert [entry["conc"] for entry in result] == [32, 2] + + def test_single_node_single_element_conc_list( + self, test_config_args, single_node_test_config + ): + search_space = single_node_test_config["test-single-node"]["seq-len-configs"][0][ + "search-space" + ][0] + search_space.pop("conc-start") + search_space.pop("conc-end") + search_space["conc-list"] = [128] + + result = generate_test_config_sweep(test_config_args, single_node_test_config) + + assert len(result) == 1 + assert result[0]["conc"] == 128 + + def test_multi_node_standard_trims_to_endpoints( + self, test_config_args, multinode_test_config + ): + test_config_args.config_keys = ["test-multi-node"] + + result = generate_test_config_sweep(test_config_args, multinode_test_config) + + assert len(result) == 1 + assert result[0]["conc"] == [500, 4000] + + def test_multi_node_preserves_descending_endpoint_order( + self, test_config_args, multinode_test_config + ): + test_config_args.config_keys = ["test-multi-node"] + search_space = multinode_test_config["test-multi-node"]["seq-len-configs"][0][ + "search-space" + ][0] + search_space["conc-list"] = [4000, 2000, 1000, 500] + + result = generate_test_config_sweep(test_config_args, multinode_test_config) + + assert len(result) == 1 + assert result[0]["conc"] == [4000, 500] + + def test_multi_node_full_preserves_all_points( + self, test_config_args, multinode_test_config + ): + test_config_args.config_keys = ["test-multi-node"] + test_config_args.full = True + + result = generate_test_config_sweep(test_config_args, multinode_test_config) + + assert len(result) == 1 + assert result[0]["conc"] == [500, 1000, 2000, 4000] + + def test_full_with_explicit_conc_filter( + self, test_config_args, single_node_test_config + ): + test_config_args.full = True + test_config_args.conc = [8, 32] + + result = generate_test_config_sweep(test_config_args, single_node_test_config) + + assert len(result) == 2 + assert {entry["conc"] for entry in result} == {8, 32} + + def test_standard_with_explicit_endpoint_conc_filter( + self, test_config_args, single_node_test_config + ): + test_config_args.conc = [64] + + result = generate_test_config_sweep(test_config_args, single_node_test_config) + + assert len(result) == 1 + assert result[0]["conc"] == 64 + + def test_standard_with_explicit_intermediate_conc_filter( + self, test_config_args, single_node_test_config + ): + test_config_args.conc = [16] + + result = generate_test_config_sweep(test_config_args, single_node_test_config) + + assert len(result) == 1 + assert result[0]["conc"] == 16 + + def test_explicit_conc_without_match_returns_empty( + self, test_config_args, single_node_test_config + ): + test_config_args.conc = [999] + + result = generate_test_config_sweep(test_config_args, single_node_test_config) + + assert result == [] + + def test_eval_marking_uses_full_sweep_range( + self, test_config_args, single_node_8k1k_test_config + ): + test_config_args.config_keys = ["test-single-node-8k1k"] + test_config_args.full = True + + result = generate_test_config_sweep( + test_config_args, single_node_8k1k_test_config + ) + marked = mark_eval_entries(result) + + eval_concs = {entry["conc"] for entry in marked if entry["run-eval"]} + non_eval_concs = {entry["conc"] for entry in marked if not entry["run-eval"]} + + assert eval_concs == {16, 64} + assert non_eval_concs == {4, 8, 32} diff --git a/utils/process_changelog.py b/utils/process_changelog.py index 7da19d030..5302494b3 100644 --- a/utils/process_changelog.py +++ b/utils/process_changelog.py @@ -66,6 +66,11 @@ def main(): parser.add_argument("--base-ref", type=str, required=True) parser.add_argument("--head-ref", type=str, required=True) parser.add_argument("--changelog-file", type=str, required=True) + parser.add_argument( + "--full", + action="store_true", + help="Pass --full to generate_sweep_configs.py (full-sweep-enabled mode: all conc points)", + ) args = parser.parse_args() added_yaml = get_added_lines(args.base_ref, args.head_ref, args.changelog_file) @@ -118,6 +123,8 @@ def main(): *MASTER_CONFIGS, "--no-evals", ] + if args.full: + base_cmd.append("--full") try: result = subprocess.run( base_cmd, @@ -143,6 +150,7 @@ def main(): "--config-files", *MASTER_CONFIGS, "--evals-only", + "--full", ] try: eval_result = subprocess.run(