From f9df800b366bc6a01c2697404fdd88ac03416120 Mon Sep 17 00:00:00 2001
From: functionstackx <47992694+functionstackx@users.noreply.github.com>
Date: Sat, 13 Jun 2026 14:49:39 -0400
Subject: [PATCH 1/2] minimaxm3 H200/H100 MTP: start TP-only latency rows at
 conc 1

Drop the conc-start of the TP-only (latency) search-space rows from 4
to 1 for minimaxm3-fp8-h200-vllm-mtp and minimaxm3-fp8-h100-vllm-mtp,
matching the conc-1 start used on the non-MTP day-zero recipes so the
sweeps capture the single-request latency point. TEP/DEP rows keep
their higher concurrency starts (128/256). Follow-up to #1739
(6e8ebf56).

Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
---
 .github/configs/nvidia-master.yaml | 12 ++++++------
 perf-changelog.yaml                |  7 +++++++
 2 files changed, 13 insertions(+), 6 deletions(-)

diff --git a/.github/configs/nvidia-master.yaml b/.github/configs/nvidia-master.yaml
index 6229cf455..2b2848714 100644
--- a/.github/configs/nvidia-master.yaml
+++ b/.github/configs/nvidia-master.yaml
@@ -11934,16 +11934,16 @@ minimaxm3-fp8-h200-vllm-mtp:
     - isl: 1024
       osl: 1024
       search-space:
-      - { tp: 4, conc-start: 4, conc-end: 64, spec-decoding: mtp }
+      - { tp: 4, conc-start: 1, conc-end: 64, spec-decoding: mtp }
       - { tp: 4, ep: 4, conc-start: 128, conc-end: 256, spec-decoding: mtp }
-      - { tp: 8, conc-start: 4, conc-end: 128, spec-decoding: mtp }
+      - { tp: 8, conc-start: 1, conc-end: 128, spec-decoding: mtp }
       - { tp: 8, ep: 8, conc-start: 256, conc-end: 256, spec-decoding: mtp }
       - { tp: 8, ep: 8, dp-attn: true, conc-start: 256, conc-end: 512, spec-decoding: mtp }
     - isl: 8192
       osl: 1024
       search-space:
-      - { tp: 4, conc-start: 4, conc-end: 32, spec-decoding: mtp }
-      - { tp: 8, conc-start: 4, conc-end: 128, spec-decoding: mtp }
+      - { tp: 4, conc-start: 1, conc-end: 32, spec-decoding: mtp }
+      - { tp: 8, conc-start: 1, conc-end: 128, spec-decoding: mtp }
       - { tp: 8, ep: 8, conc-start: 256, conc-end: 256, spec-decoding: mtp }
       - { tp: 8, ep: 8, dp-attn: true, conc-start: 256, conc-end: 256, spec-decoding: mtp }
 
@@ -11966,10 +11966,10 @@ minimaxm3-fp8-h100-vllm-mtp:
     - isl: 1024
       osl: 1024
       search-space:
-      - { tp: 8, conc-start: 4, conc-end: 128, spec-decoding: mtp }
+      - { tp: 8, conc-start: 1, conc-end: 128, spec-decoding: mtp }
       - { tp: 8, ep: 8, conc-start: 256, conc-end: 256, spec-decoding: mtp }
     - isl: 8192
       osl: 1024
       search-space:
-      - { tp: 8, conc-start: 4, conc-end: 64, spec-decoding: mtp }
+      - { tp: 8, conc-start: 1, conc-end: 64, spec-decoding: mtp }
       - { tp: 8, ep: 8, conc-start: 128, conc-end: 256, spec-decoding: mtp }
diff --git a/perf-changelog.yaml b/perf-changelog.yaml
index 155da5483..55f34b749 100644
--- a/perf-changelog.yaml
+++ b/perf-changelog.yaml
@@ -3717,3 +3717,10 @@
     - "B300-parity layouts and concurrency ranges: TP8, TP8+EP8, TP4, TP4+EP4, TP2+EP2, and TP8+EP8 dp-attn (DEP) across 1k1k and 8k1k"
     - "launch_mi355x-amds.sh routes M3 weights to NFS /it-share/hf-hub-cache instead of node-local /var/lib NVMe"
   pr-link: https://github.com/SemiAnalysisAI/InferenceX/pull/1725
+
+- config-keys:
+    - minimaxm3-fp8-h200-vllm-mtp
+    - minimaxm3-fp8-h100-vllm-mtp
+  description:
+    - "Start the TP-only latency rows of the MiniMax-M3 EAGLE3 MTP sweeps (H200, H100) at concurrency 1 instead of 4, matching the conc-1 start used on the non-MTP day-zero recipes — captures the single-request latency point. TEP/DEP rows keep their higher concurrency starts."
+  pr-link: TBD

From c8615e80608aafccdcbdc9c463425de247a2f501 Mon Sep 17 00:00:00 2001
From: functionstackx <47992694+functionstackx@users.noreply.github.com>
Date: Sat, 13 Jun 2026 14:49:53 -0400
Subject: [PATCH 2/2] perf-changelog: fill in PR link for minimaxm3 H200/H100
 MTP conc-1

Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
---
 perf-changelog.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/perf-changelog.yaml b/perf-changelog.yaml
index 55f34b749..c6b069a68 100644
--- a/perf-changelog.yaml
+++ b/perf-changelog.yaml
@@ -3723,4 +3723,4 @@
     - minimaxm3-fp8-h100-vllm-mtp
   description:
     - "Start the TP-only latency rows of the MiniMax-M3 EAGLE3 MTP sweeps (H200, H100) at concurrency 1 instead of 4, matching the conc-1 start used on the non-MTP day-zero recipes — captures the single-request latency point. TEP/DEP rows keep their higher concurrency starts."
-  pr-link: TBD
+  pr-link: https://github.com/SemiAnalysisAI/InferenceX/pull/1743