SemiAnalysisAI · n0madsky · Apr 16, 2026
diff --git a/.github/workflows/README.md b/.github/workflows/README.md
@@ -20,7 +20,9 @@ positional arguments:
                         configurations for a runner type. For instance, to
                         validate that all configs that specify an h200 runner
                         successfully run across all h200 runner nodes.
-    test-config         Generate full sweep for specific config keys.
+    test-config         Generate the standard sweep for specific config keys by
+                        default; pass --full for all intermediate concurrency
+                        points.
                         Supports wildcard patterns (* and ?) for matching
                         multiple keys at once.
 
@@ -129,14 +131,19 @@ This will only include runner nodes whose names contain "mi300x-amd"
 
 ## `test-config` Command
 
-The `test-config` command generates the full sweep for one or more specific config keys. This is useful for testing individual configurations without filtering by model prefix, framework, etc.
+The `test-config` command generates the standard sweep for one or more specific
+config keys. By default this keeps only the two configured concurrency
+endpoints per parallelism config. Pass `--full` to restore the full
+intermediate concurrency sweep. Explicit `--conc` values remain authoritative
+and are respected even without `--full`.
 
 ```
 usage: generate_sweep_configs.py test-config
     --config-files CONFIG_FILES [CONFIG_FILES ...]
     [--runner-config RUNNER_CONFIG]
     --config-keys CONFIG_KEYS [CONFIG_KEYS ...]
     [--conc CONC [CONC ...]]
+    [--full]
 ```
 
 Config keys support **wildcard patterns** using `*` (matches any characters) and `?` (matches a single character). Patterns that match no keys will raise an error.
@@ -173,7 +180,12 @@ test-config --config-keys dsr1* --config-files .github/configs/nvidia-master.yam
 test-config --config-keys dsr1-fp4-b200-sglang gptoss* --config-files .github/configs/nvidia-master.yaml
 ```
 
-**Override concurrency for targeted testing:**
+**Run the full intermediate concurrency sweep for a specific config:**
+```
+test-config --config-keys dsr1-fp4-b200-sglang --config-files .github/configs/nvidia-master.yaml --full
+```
+
+**Override concurrency for targeted testing (works independently of standard vs full sweep):**
 ```
 test-config --config-keys *-b200-* --conc 4 8 --config-files .github/configs/nvidia-master.yaml
 ```

diff --git a/.github/workflows/claude.yml b/.github/workflows/claude.yml
@@ -98,7 +98,11 @@ jobs:
 
             **Subcommand reference:**
             - `full-sweep`: Use this subcommand with filter flags like `--model-prefix`, `--framework`, `--precision`, `--runner-type`, `--min-conc`, `--max-conc`, `--seq-lens`. This is the primary subcommand for running benchmarks.
-            - `test-config`: Use this subcommand ONLY when prompted to with 'test-config'. Uses the flags `--config-files` and `--config-keys`, does NOT accept any other arguments.
+            - `test-config`: Use this subcommand ONLY when prompted to with 'test-config'.
+              Requires `--config-files` and `--config-keys`. It optionally accepts
+              `--conc` for an explicit subset or `--full` to sweep all intermediate
+              concurrency points. Without either flag, `test-config` runs the
+              standard endpoint-only sweep.
 
             Examples:
 
@@ -122,16 +126,25 @@ jobs:
             generate-cli-command: "full-sweep --config-files .github/configs/nvidia-master.yaml --single-node --model-prefix dsr1 --min-conc 4 --max-conc 4 --seq-lens 1k1k"
             ```
 
-            **Test specific config keys (MUST USE `--conc`):**
+            **Test specific config keys with explicit concurrency:**
             ```
             generate-cli-command: "test-config --config-files .github/configs/nvidia-master.yaml --config-keys dsr1-fp4-b200-sglang --conc 4"
             ```
 
+            **Test specific config keys with full sweep:**
+            ```
+            generate-cli-command: "test-config --config-files .github/configs/nvidia-master.yaml --config-keys dsr1-fp4-b200-sglang --full"
+            ```
+
             **IMPORTANT: Keep runs precise and efficient:**
             - Use `full-sweep` with filter flags to narrow down the benchmark scope - "full-sweep" does NOT mean running everything
             - When using `full-sweep`, you must use `--min-conc` and `--max-conc` together to specify a single concurrency value. Unless prompted otherwise, use `--min-conc 4 --max-conc 4`
             - When using `full-sweep`, you can use `--seq-lens` to specify sequence lengths (choices: 1k1k, 8k1k). Unless prompted otherwise, use `--seq-lens 1k1k`
-            - Use `test-config` ONLY when given specific config keys to test - Use `--config-files`, `--config-keys`, and `--conc` flags ONLY
+            - Use `test-config` ONLY when given specific config keys to test.
+              Prefer explicit `--conc` values for the narrowest run, or use
+              `--full` only when the request specifically needs the entire
+              intermediate concurrency sweep.
+            - When using `test-config`, explicit `--conc` values are authoritative even without `--full`
             - Always filter by specific models, frameworks, precision, conc, or config keys when possible
 
             ## Monitor workflow execution

diff --git a/.github/workflows/run-sweep.yml b/.github/workflows/run-sweep.yml
@@ -40,8 +40,18 @@ jobs:
     setup:
         runs-on: ubuntu-latest
         if: >-
-            (github.event_name == 'pull_request' && !github.event.pull_request.draft && contains(github.event.pull_request.labels.*.name, 'sweep-enabled')) ||
-            (github.event_name != 'pull_request' && !contains(github.event.head_commit.message, '[skip-sweep]'))
+            (
+              github.event_name == 'pull_request' &&
+              !github.event.pull_request.draft &&
+              (
+                contains(github.event.pull_request.labels.*.name, 'sweep-enabled') ||
+                contains(github.event.pull_request.labels.*.name, 'full-sweep-enabled')
+              )
+            ) ||
+            (
+              github.event_name != 'pull_request' &&
+              !contains(github.event.head_commit.message, '[skip-sweep]')
+            )
         outputs:
             search-space-config: ${{ steps.setup.outputs.search-space-config }}
         steps:
@@ -51,6 +61,17 @@ jobs:
                   fetch-depth: 0
 
             - id: setup
+              env:
+                  IS_PUSH_MAIN: >-
+                      ${{
+                        github.event_name == 'push' &&
+                        github.ref == 'refs/heads/main'
+                      }}
+                  HAS_FULL_SWEEP_LABEL: >-
+                      ${{
+                        github.event_name == 'pull_request' &&
+                        contains(github.event.pull_request.labels.*.name, 'full-sweep-enabled')
+                      }}
               run: |
                   pip install pydantic
 
@@ -62,12 +83,21 @@ jobs:
                       HEAD_REF="${{ github.event.after }}"
                   fi
 
-                  CONFIG_JSON=$(python3 ${GITHUB_WORKSPACE}/utils/process_changelog.py \
-                      --changelog-file ${GITHUB_WORKSPACE}/perf-changelog.yaml \
-                      --base-ref "$BASE_REF" \
-                      --head-ref "$HEAD_REF")
+                  PROCESS_CHANGELOG_CMD=(
+                      python3
+                      "${GITHUB_WORKSPACE}/utils/process_changelog.py"
+                      --changelog-file "${GITHUB_WORKSPACE}/perf-changelog.yaml"
+                      --base-ref "$BASE_REF"
+                      --head-ref "$HEAD_REF"
+                  )
 
-                  echo "search-space-config=$CONFIG_JSON" >> $GITHUB_OUTPUT
+                  if [ "$IS_PUSH_MAIN" = "true" ] || [ "$HAS_FULL_SWEEP_LABEL" = "true" ]; then
+                      PROCESS_CHANGELOG_CMD+=(--full)
+                  fi
+
+                  CONFIG_JSON=$("${PROCESS_CHANGELOG_CMD[@]}")
+
+                  echo "search-space-config=$CONFIG_JSON" >> "$GITHUB_OUTPUT"
 
     sweep-multi-node-1k1k:
         needs: setup

diff --git a/utils/matrix_logic/generate_sweep_configs.py b/utils/matrix_logic/generate_sweep_configs.py
@@ -1,4 +1,3 @@
-from ast import For
 import fnmatch
 import json
 import argparse
@@ -536,10 +535,11 @@ def get_lowest_conc(search_space_entry):
 
 
 def generate_test_config_sweep(args, all_config_data):
-    """Generate full sweep for specific config keys.
+    """Generate sweep for specific config keys.
 
     Validates that all specified config keys exist before generating.
-    Expands all configs fully without any filtering.
+    Standard sweep keeps only concurrency endpoints unless ``--full`` or
+    explicit ``--conc`` values are provided.
     """
     resolved_keys = expand_config_keys(args.config_keys, all_config_data.keys())
 
@@ -585,12 +585,16 @@ def generate_test_config_sweep(args, all_config_data):
                             if conc > conc_end:
                                 conc = conc_end
 
-                    # Apply --conc filter if provided (only for test-config)
+                    # Respect explicit --conc requests regardless of sweep tier
                     if getattr(args, 'conc', None):
                         conc_values = [c for c in conc_values if c in args.conc]
                         if not conc_values:
                             # No intersection with requested conc values; skip
                             continue
+                    # Standard sweep: only the two configured endpoints
+                    # Pass --full to sweep all intermediate points
+                    elif len(conc_values) > 1 and not getattr(args, 'full', False):
+                        conc_values = [conc_values[0], conc_values[-1]]
 
                     entry = {
                         Fields.IMAGE.value: image,
@@ -634,12 +638,16 @@ def generate_test_config_sweep(args, all_config_data):
                             if conc > conc_end:
                                 conc = conc_end
 
-                    # Apply --conc filter if provided (only for test-config)
+                    # Respect explicit --conc requests regardless of sweep tier
                     if getattr(args, 'conc', None):
                         conc_values = [c for c in conc_values if c in args.conc]
                         if not conc_values:
                             # No intersection with requested conc values; skip
                             continue
+                    # Standard sweep: only the two configured endpoints
+                    # Pass --full to sweep all intermediate points
+                    elif len(conc_values) > 1 and not getattr(args, 'full', False):
+                        conc_values = [conc_values[0], conc_values[-1]]
 
                     for conc in conc_values:
                         entry = {
@@ -890,7 +898,7 @@ def main():
         'test-config',
         parents=[parent_parser],
         add_help=False,
-        help='Generate full sweep for specific config keys. Validates that all specified keys exist before generating.'
+        help='Generate sweep for specific config keys. Standard sweep by default; pass --full for all intermediate concurrency points.'
     )
     test_config_keys_parser.add_argument(
         '--config-keys',
@@ -903,7 +911,16 @@ def main():
         nargs='+',
         type=int,
         required=False,
-        help='Only include these concurrency values. Values must exist in the config conc-range/list.'
+        help='Only include these concurrency values. Explicit --conc values are respected even without --full.'
+    )
+    test_config_keys_parser.add_argument(
+        '--full',
+        action='store_true',
+        help=(
+            'Sweep all intermediate concurrency points (full sweep). '
+            'Without this flag, only the two configured concurrency '
+            'endpoints are swept (standard sweep).'
+        )
     )
     test_config_keys_parser.add_argument(
         '-h', '--help',