Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 16 additions & 4 deletions benchmarks/single_node/fixed_seq_len/minimaxm3_fp8_mi300x.sh
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,11 @@

# MiniMax-M3 MXFP8 MI300X (gfx942) single-node vLLM recipe.
# Reuses the dedicated ROCm image and applies the checked-in hybrid gfx94x
# MXFP8 MoE patch before starting vLLM. Block size 128 is mandatory for MSA
# sparse attention. Keep the default BF16 KV cache on gfx942: the checkpoint
# has no calibrated q/prob scales for ROCm FP8 attention, and vLLM's fallback
# scale of 1.0 corrupts model accuracy.
# MXFP8 MoE patch. Short-context EP8 uses the measured native/BF16 policy;
# long-context EP8 uses sparse local-route BF16 GEMMs with fused SwiGLU.
# Block size 128 is mandatory for MSA sparse attention. Keep the default BF16
# KV cache on gfx942: the checkpoint has no calibrated q/prob scales for ROCm
# FP8 attention, and vLLM's fallback scale of 1.0 corrupts model accuracy.
# Target image vLLM revision: 4a560dd8db67c270f5e2afb614558271b76f2294.

source "$(dirname "$0")/../../benchmark_lib.sh"
Expand Down Expand Up @@ -36,6 +37,7 @@ print(Path(vllm.__file__).resolve().parent.parent)
PY
)"
MXFP8_PATCH="$(dirname "$0")/minimaxm3_mi300x_mxfp8.patch"
MXFP8_EP_PATCH="$(dirname "$0")/minimaxm3_mi300x_ep_mxfp8.patch"
MXFP8_ORACLE="$VLLM_PACKAGE_ROOT/vllm/model_executor/layers/fused_moe/oracle/mxfp8.py"
if ! grep -q "Using fused CDNA3 (gfx94x)" "$MXFP8_ORACLE"; then
if ! patch --batch --forward -d "$VLLM_PACKAGE_ROOT" -p1 < "$MXFP8_PATCH"; then
Expand All @@ -47,6 +49,16 @@ if ! grep -q "Using fused CDNA3 (gfx94x)" "$MXFP8_ORACLE"; then
echo "MI300X MXFP8 backend marker is missing after patching" >&2
exit 1
fi
if ! grep -q "profiled gfx94x MiniMax-M3 EP8" "$MXFP8_ORACLE"; then
if ! patch --batch --forward -d "$VLLM_PACKAGE_ROOT" -p1 < "$MXFP8_EP_PATCH"; then
echo "Failed to apply the MI300X EP8 MXFP8 optimization patch" >&2
exit 1
fi
fi
if ! grep -q "profiled gfx94x MiniMax-M3 EP8" "$MXFP8_ORACLE"; then
echo "MI300X EP8 MXFP8 optimization marker is missing after patching" >&2
exit 1
fi

if [[ "$MODEL" != /* ]]; then hf download "$MODEL"; fi

Expand Down
Loading