SemiAnalysisAI · Oseltamivir · Jun 14, 2026 · Jun 14, 2026 · Jun 14, 2026 · Jun 14, 2026
@@ -2847,10 +2847,11 @@ minimaxm3-fp8-mi355x-vllm-mtp:
       - { tp: 4, conc-start: 1, conc-end: 64, spec-decoding: mtp }
       - { tp: 8, ep: 8, dp-attn: true, conc-start: 128, conc-end: 256, spec-decoding: mtp }
 
-# MiniMax-M3 MXFP8 MI300X day-zero recipe. Reuse the dedicated ROCm image and
-# MI355X serving shape, but retain the default BF16 KV cache because this
-# checkpoint lacks calibrated ROCm FP8 attention scales. Use the TP8-only H100
-# search space: TP8 for latency and TP8+EP8 (TEP) at high concurrency.
+# MiniMax-M3 MXFP8 MI300X recipe. Apply the checked-in hybrid gfx94x MXFP8 MoE
+# patch to the dedicated ROCm image: BF16 for small TP batches and EP, native
+# compressed MXFP8 for larger TP batches and long context. Retain the default
+# BF16 KV cache because this checkpoint lacks calibrated ROCm FP8 attention
+# scales. Use TP8 for latency and TP8+EP8 at high concurrency.
 minimaxm3-fp8-mi300x-vllm:
   image: vllm/vllm-openai-rocm:minimax-m3
   model: MiniMaxAI/MiniMax-M3-MXFP8

diff --git a/benchmarks/single_node/fixed_seq_len/minimaxm3_fp8_mi300x.sh b/benchmarks/single_node/fixed_seq_len/minimaxm3_fp8_mi300x.sh
@@ -1,10 +1,12 @@
 #!/usr/bin/env bash
 
 # MiniMax-M3 MXFP8 MI300X (gfx942) single-node vLLM recipe.
-# Reuses the dedicated ROCm image and the MI355X serving shape. Block size 128
-# is mandatory for MSA sparse attention. Keep the default BF16 KV cache on
-# gfx942: the checkpoint has no calibrated q/prob scales for ROCm FP8
-# attention, and vLLM's fallback scale of 1.0 corrupts model accuracy.
+# Reuses the dedicated ROCm image and applies the checked-in hybrid gfx94x
+# MXFP8 MoE patch before starting vLLM. Block size 128 is mandatory for MSA
+# sparse attention. Keep the default BF16 KV cache on gfx942: the checkpoint
+# has no calibrated q/prob scales for ROCm FP8 attention, and vLLM's fallback
+# scale of 1.0 corrupts model accuracy.
+# Target image vLLM revision: 4a560dd8db67c270f5e2afb614558271b76f2294.
 
 source "$(dirname "$0")/../../benchmark_lib.sh"
 
@@ -24,6 +26,28 @@ if [[ -n "$SLURM_JOB_ID" ]]; then
   echo "JOB $SLURM_JOB_ID running on $SLURMD_NODENAME"
 fi
 
+VLLM_PACKAGE_ROOT="$(
+    python - <<'PY'
+from pathlib import Path
+
+import vllm
+
+print(Path(vllm.__file__).resolve().parent.parent)
+PY
+)"
+MXFP8_PATCH="$(dirname "$0")/minimaxm3_mi300x_mxfp8.patch"
+MXFP8_ORACLE="$VLLM_PACKAGE_ROOT/vllm/model_executor/layers/fused_moe/oracle/mxfp8.py"
+if ! grep -q "Using fused CDNA3 (gfx94x)" "$MXFP8_ORACLE"; then
+    if ! patch --batch --forward -d "$VLLM_PACKAGE_ROOT" -p1 < "$MXFP8_PATCH"; then
+        echo "Failed to apply the MI300X MXFP8 patch" >&2
+        exit 1
+    fi
+fi
+if ! grep -q "Using fused CDNA3 (gfx94x)" "$MXFP8_ORACLE"; then
+    echo "MI300X MXFP8 backend marker is missing after patching" >&2
+    exit 1
+fi
+
 if [[ "$MODEL" != /* ]]; then hf download "$MODEL"; fi
 
 if [ -n "$ROCR_VISIBLE_DEVICES" ]; then