From ff5b29385bc6c8f6652ce771e62692d645f86057 Mon Sep 17 00:00:00 2001 From: Oseltamivir <58582368+Oseltamivir@users.noreply.github.com> Date: Mon, 15 Jun 2026 19:10:17 -0700 Subject: [PATCH 1/3] [AMD] perf-changelog: duplicate dsv4-fp4-mi355x-sglang TP4 fixed-seq-len entry --- perf-changelog.yaml | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/perf-changelog.yaml b/perf-changelog.yaml index 43f06f4f1..934f2cb98 100644 --- a/perf-changelog.yaml +++ b/perf-changelog.yaml @@ -3837,7 +3837,14 @@ - "Switch fixed-seq-len search space from TP8 to TP4 for both isl=1024 and isl=8192 scenarios" - "Expand isl=8192 coverage: add TP4 dp-attn sweep (conc 32–2048) and TP4 TP-only sweep (conc 1–32)" pr-link: https://github.com/SemiAnalysisAI/InferenceX/pull/1762 - + +- config-keys: + - dsv4-fp4-mi355x-sglang + description: + - "Switch fixed-seq-len search space from TP8 to TP4 for both isl=1024 and isl=8192 scenarios" + - "Expand isl=8192 coverage: add TP4 dp-attn sweep (conc 32–2048) and TP4 TP-only sweep (conc 1–32)" + pr-link: https://github.com/SemiAnalysisAI/InferenceX/pull/1762 + - config-keys: - dsv4-fp4-gb300-dynamo-trt - dsv4-fp4-gb300-dynamo-trt-mtp From efdfbdca459d96fe51220461bbb334f8a0bddb57 Mon Sep 17 00:00:00 2001 From: Bryan Shan <58582368+Oseltamivir@users.noreply.github.com> Date: Mon, 15 Jun 2026 19:11:28 -0700 Subject: [PATCH 2/3] Update perf-changelog.yaml --- perf-changelog.yaml | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/perf-changelog.yaml b/perf-changelog.yaml index 934f2cb98..80c6ba88b 100644 --- a/perf-changelog.yaml +++ b/perf-changelog.yaml @@ -3838,13 +3838,6 @@ - "Expand isl=8192 coverage: add TP4 dp-attn sweep (conc 32–2048) and TP4 TP-only sweep (conc 1–32)" pr-link: https://github.com/SemiAnalysisAI/InferenceX/pull/1762 -- config-keys: - - dsv4-fp4-mi355x-sglang - description: - - "Switch fixed-seq-len search space from TP8 to TP4 for both isl=1024 and isl=8192 scenarios" - - "Expand isl=8192 coverage: add TP4 dp-attn sweep (conc 32–2048) and TP4 TP-only sweep (conc 1–32)" - pr-link: https://github.com/SemiAnalysisAI/InferenceX/pull/1762 - - config-keys: - dsv4-fp4-gb300-dynamo-trt - dsv4-fp4-gb300-dynamo-trt-mtp @@ -3870,3 +3863,10 @@ - "Align MiniMax-M3 B200 vLLM fixed-sequence serving with MiniMax-M2.5 FP8 B200 settings by setting VLLM_FLOAT32_MATMUL_PRECISION=high and restoring max cudagraph capture size 2048." - "Add TP4+EP4 coverage for MiniMax-M3 B200: DP-attention rows for 1k1k/8k1k and the missing non-DP-attention row for 8k1k." pr-link: https://github.com/SemiAnalysisAI/InferenceX/pull/1779 + +- config-keys: + - dsv4-fp4-mi355x-sglang + description: + - "Switch fixed-seq-len search space from TP8 to TP4 for both isl=1024 and isl=8192 scenarios" + - "Expand isl=8192 coverage: add TP4 dp-attn sweep (conc 32–2048) and TP4 TP-only sweep (conc 1–32)" + pr-link: https://github.com/SemiAnalysisAI/InferenceX/pull/1762 From 5927254002e2327eaef8a6f77f678f38f6ceaa8f Mon Sep 17 00:00:00 2001 From: Oseltamivir <58582368+Oseltamivir@users.noreply.github.com> Date: Mon, 15 Jun 2026 19:14:59 -0700 Subject: [PATCH 3/3] perf-changelog: restore trailing whitespace on line 3840 --- perf-changelog.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/perf-changelog.yaml b/perf-changelog.yaml index 80c6ba88b..90c9cfb26 100644 --- a/perf-changelog.yaml +++ b/perf-changelog.yaml @@ -3837,7 +3837,7 @@ - "Switch fixed-seq-len search space from TP8 to TP4 for both isl=1024 and isl=8192 scenarios" - "Expand isl=8192 coverage: add TP4 dp-attn sweep (conc 32–2048) and TP4 TP-only sweep (conc 1–32)" pr-link: https://github.com/SemiAnalysisAI/InferenceX/pull/1762 - + - config-keys: - dsv4-fp4-gb300-dynamo-trt - dsv4-fp4-gb300-dynamo-trt-mtp