diff --git a/perf-changelog.yaml b/perf-changelog.yaml index 43f06f4f1..90c9cfb26 100644 --- a/perf-changelog.yaml +++ b/perf-changelog.yaml @@ -3863,3 +3863,10 @@ - "Align MiniMax-M3 B200 vLLM fixed-sequence serving with MiniMax-M2.5 FP8 B200 settings by setting VLLM_FLOAT32_MATMUL_PRECISION=high and restoring max cudagraph capture size 2048." - "Add TP4+EP4 coverage for MiniMax-M3 B200: DP-attention rows for 1k1k/8k1k and the missing non-DP-attention row for 8k1k." pr-link: https://github.com/SemiAnalysisAI/InferenceX/pull/1779 + +- config-keys: + - dsv4-fp4-mi355x-sglang + description: + - "Switch fixed-seq-len search space from TP8 to TP4 for both isl=1024 and isl=8192 scenarios" + - "Expand isl=8192 coverage: add TP4 dp-attn sweep (conc 32–2048) and TP4 TP-only sweep (conc 1–32)" + pr-link: https://github.com/SemiAnalysisAI/InferenceX/pull/1762