From c8541ac81ebf69fcaa5c943cfc4995139d5a801e Mon Sep 17 00:00:00 2001 From: seungrokj Date: Mon, 15 Jun 2026 11:35:18 +0900 Subject: [PATCH 1/8] [AMD] update dsv4-fp4-mi355x-sglang fixed-seq-len search space in master yaml Co-Authored-By: Claude Sonnet 4.6 --- .github/configs/amd-master.yaml | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/.github/configs/amd-master.yaml b/.github/configs/amd-master.yaml index 1e32830dd..42c801d45 100644 --- a/.github/configs/amd-master.yaml +++ b/.github/configs/amd-master.yaml @@ -2146,16 +2146,16 @@ dsv4-fp4-mi355x-sglang: multinode: false scenarios: fixed-seq-len: - - isl: 1024 - osl: 1024 - search-space: - - { tp: 8, dp-attn: true, conc-start: 64, conc-end: 2048 } - - { tp: 8, dp-attn: false, conc-start: 1 , conc-end: 32 } + #- isl: 1024 + # osl: 1024 + # search-space: + # - { tp: 8, dp-attn: true, conc-start: 64, conc-end: 2048 } + # - { tp: 8, dp-attn: false, conc-start: 1 , conc-end: 32 } - isl: 8192 osl: 1024 search-space: - - { tp: 8, dp-attn: true, conc-start: 64, conc-end: 2048 } - - { tp: 8, dp-attn: false, conc-start: 1, conc-end: 32 } + - { tp: 4, dp-attn: true, conc-list: [32, 64, 1024] } + #- { tp: 8, dp-attn: false, conc-start: 1, conc-end: 32 } # MTP variant of dsv4-fp4-mi355x-sglang. Mirrors the base search space and adds # spec-decoding: mtp, which routes to dsv4_fp4_mi355x_sglang_mtp.sh (EAGLE From a1f0502df09dad5fd470c1c7cfaac59069f76796 Mon Sep 17 00:00:00 2001 From: seungrokj Date: Mon, 15 Jun 2026 12:57:49 +0900 Subject: [PATCH 2/8] [AMD] update dsv4-fp4-mi355x-sglang fixed-seq-len search space: re-enable isl=1024 and expand isl=8192 Co-Authored-By: Claude Sonnet 4.6 --- .github/configs/amd-master.yaml | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/.github/configs/amd-master.yaml b/.github/configs/amd-master.yaml index 42c801d45..5768f9810 100644 --- a/.github/configs/amd-master.yaml +++ b/.github/configs/amd-master.yaml @@ -2146,16 +2146,16 @@ dsv4-fp4-mi355x-sglang: multinode: false scenarios: fixed-seq-len: - #- isl: 1024 - # osl: 1024 - # search-space: - # - { tp: 8, dp-attn: true, conc-start: 64, conc-end: 2048 } - # - { tp: 8, dp-attn: false, conc-start: 1 , conc-end: 32 } + - isl: 1024 + osl: 1024 + search-space: + - { tp: 4, dp-attn: true, conc-start: 64, conc-end: 2048 } + - { tp: 4, dp-attn: false, conc-start: 1 , conc-end: 32 } - isl: 8192 osl: 1024 search-space: - - { tp: 4, dp-attn: true, conc-list: [32, 64, 1024] } - #- { tp: 8, dp-attn: false, conc-start: 1, conc-end: 32 } + - { tp: 4, dp-attn: true, conc-list: 32, conc-end: 2048 } + - { tp: 4, dp-attn: false, conc-start: 1, conc-end: 32 } # MTP variant of dsv4-fp4-mi355x-sglang. Mirrors the base search space and adds # spec-decoding: mtp, which routes to dsv4_fp4_mi355x_sglang_mtp.sh (EAGLE From 9628d116a43ab12187a446b23f9c96389f78e6e2 Mon Sep 17 00:00:00 2001 From: seungrokj Date: Mon, 15 Jun 2026 12:58:56 +0900 Subject: [PATCH 3/8] [AMD] perf-changelog: dsv4-fp4-mi355x-sglang TP4 fixed-seq-len search space update Co-Authored-By: Claude Sonnet 4.6 --- perf-changelog.yaml | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/perf-changelog.yaml b/perf-changelog.yaml index 2cb456f9d..9628f98fd 100644 --- a/perf-changelog.yaml +++ b/perf-changelog.yaml @@ -3822,3 +3822,10 @@ description: - "Extend MiniMax-M3 MXFP8 H100/H200 non-MTP sweeps to concurrency 1 on the latency rows (H100: TP8; H200: TP4 and TP8) and add full TEP coverage from conc 1 to 256 (H100: TP8+EP8; H200: TP4+EP4 and TP8+EP8, incl. a new TP4+EP4 row for 8k1k). H200 TP8+EP8 upper bound moves 512->256 (high concurrency stays covered by the TP8+EP8 dp-attn DEP rows). DEP rows unchanged" pr-link: https://github.com/SemiAnalysisAI/InferenceX/pull/1761 + +- config-keys: + - dsv4-fp4-mi355x-sglang + description: + - "Switch fixed-seq-len search space from TP8 to TP4 for both isl=1024 and isl=8192 scenarios" + - "Expand isl=8192 coverage: add TP4 dp-attn sweep (conc 32–2048) and TP4 TP-only sweep (conc 1–32)" + pr-link: https://github.com/SemiAnalysisAI/InferenceX/pull/1762 From 50b4a947026c48f998a0bafc035dd5e9a6ed4dd3 Mon Sep 17 00:00:00 2001 From: seungrokj Date: Mon, 15 Jun 2026 13:22:29 +0900 Subject: [PATCH 4/8] [AMD] narrow dsv4-fp4-mi355x-sglang fixed-seq-len to single isl=8192 dp-attn conc=512 point Co-Authored-By: Claude Sonnet 4.6 --- .github/configs/amd-master.yaml | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/.github/configs/amd-master.yaml b/.github/configs/amd-master.yaml index 5768f9810..3d1927625 100644 --- a/.github/configs/amd-master.yaml +++ b/.github/configs/amd-master.yaml @@ -2146,16 +2146,16 @@ dsv4-fp4-mi355x-sglang: multinode: false scenarios: fixed-seq-len: - - isl: 1024 - osl: 1024 - search-space: - - { tp: 4, dp-attn: true, conc-start: 64, conc-end: 2048 } - - { tp: 4, dp-attn: false, conc-start: 1 , conc-end: 32 } + #- isl: 1024 + # osl: 1024 + # search-space: + # - { tp: 4, dp-attn: true, conc-start: 64, conc-end: 2048 } + # - { tp: 4, dp-attn: false, conc-start: 1 , conc-end: 32 } - isl: 8192 osl: 1024 search-space: - - { tp: 4, dp-attn: true, conc-list: 32, conc-end: 2048 } - - { tp: 4, dp-attn: false, conc-start: 1, conc-end: 32 } + - { tp: 4, dp-attn: true, conc-start: 512, conc-end: 512 } + # - { tp: 4, dp-attn: false, conc-start: 1, conc-end: 32 } # MTP variant of dsv4-fp4-mi355x-sglang. Mirrors the base search space and adds # spec-decoding: mtp, which routes to dsv4_fp4_mi355x_sglang_mtp.sh (EAGLE From d7b88629a15a45bb4ef457a5c84b439d625052ee Mon Sep 17 00:00:00 2001 From: seungrokj Date: Mon, 15 Jun 2026 13:34:22 +0900 Subject: [PATCH 5/8] [AMD] add memory budget comments and set isl=1024 dp-attn conc=2048 point for dsv4-fp4-mi355x-sglang Co-Authored-By: Claude Sonnet 4.6 --- .github/configs/amd-master.yaml | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/.github/configs/amd-master.yaml b/.github/configs/amd-master.yaml index 3d1927625..ef7b9d311 100644 --- a/.github/configs/amd-master.yaml +++ b/.github/configs/amd-master.yaml @@ -2135,7 +2135,12 @@ dsr1-fp4-mi355x-sglang-disagg-8k1k-mtp: - "DECODE_NODES=1" - "DECODE_MTP_SIZE=1" +#TP4: 288*0.9 - (745+49)/4 - margin = 50 GB +#ISL+OSL 2048: total = 14,082,048 B ≈ 13.43 MiB +# batch: 3723 +#ISL+OSL 9216: total = 49,377,280 B ≈ 47.1 MiB +# batch: 1061 dsv4-fp4-mi355x-sglang: image: lmsysorg/sglang-rocm:v0.5.13-rocm720-mi35x-20260612 model: deepseek-ai/DeepSeek-V4-Pro @@ -2146,11 +2151,12 @@ dsv4-fp4-mi355x-sglang: multinode: false scenarios: fixed-seq-len: - #- isl: 1024 - # osl: 1024 - # search-space: - # - { tp: 4, dp-attn: true, conc-start: 64, conc-end: 2048 } - # - { tp: 4, dp-attn: false, conc-start: 1 , conc-end: 32 } + - isl: 1024 + osl: 1024 + search-space: + - { tp: 4, dp-attn: true, conc-start: 2048, conc-end: 2048 } + #- { tp: 4, dp-attn: true, conc-start: 64, conc-end: 2048 } + #- { tp: 4, dp-attn: false, conc-start: 1 , conc-end: 32 } - isl: 8192 osl: 1024 search-space: From 4327aaba0f7bc9e0d6dab204f93f92e60706e44e Mon Sep 17 00:00:00 2001 From: seungrokj Date: Mon, 15 Jun 2026 14:05:21 +0900 Subject: [PATCH 6/8] [AMD] clarify memory budget comments for dsv4-fp4-mi355x-sglang Co-Authored-By: Claude Sonnet 4.6 --- .github/configs/amd-master.yaml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/configs/amd-master.yaml b/.github/configs/amd-master.yaml index ef7b9d311..f5b3fd1db 100644 --- a/.github/configs/amd-master.yaml +++ b/.github/configs/amd-master.yaml @@ -2135,11 +2135,11 @@ dsr1-fp4-mi355x-sglang-disagg-8k1k-mtp: - "DECODE_NODES=1" - "DECODE_MTP_SIZE=1" -#TP4: 288*0.9 - (745+49)/4 - margin = 50 GB -#ISL+OSL 2048: total = 14,082,048 B ≈ 13.43 MiB +#TP4: 288*0.9 - (745+49)/4 - 10 = 50 GB +#ISL+OSL 2048: fp8 kv total = 14,082,048 B ≈ 13.43 MiB # batch: 3723 -#ISL+OSL 9216: total = 49,377,280 B ≈ 47.1 MiB +#ISL+OSL 9216: fp8 kv total = 49,377,280 B ≈ 47.1 MiB # batch: 1061 dsv4-fp4-mi355x-sglang: image: lmsysorg/sglang-rocm:v0.5.13-rocm720-mi35x-20260612 From 24bde2a5b6a3761140386f6d837763ca9b041847 Mon Sep 17 00:00:00 2001 From: seungrokj Date: Mon, 15 Jun 2026 15:37:51 +0900 Subject: [PATCH 7/8] [AMD] expand dsv4-fp4-mi355x-sglang fixed-seq-len: add TP8/TP4 dp-attn and TP4 TP-only rows for isl=1024 and isl=8192 Co-Authored-By: Claude Sonnet 4.6 --- .github/configs/amd-master.yaml | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/.github/configs/amd-master.yaml b/.github/configs/amd-master.yaml index f5b3fd1db..265fd5aa3 100644 --- a/.github/configs/amd-master.yaml +++ b/.github/configs/amd-master.yaml @@ -2154,14 +2154,16 @@ dsv4-fp4-mi355x-sglang: - isl: 1024 osl: 1024 search-space: - - { tp: 4, dp-attn: true, conc-start: 2048, conc-end: 2048 } - #- { tp: 4, dp-attn: true, conc-start: 64, conc-end: 2048 } - #- { tp: 4, dp-attn: false, conc-start: 1 , conc-end: 32 } + - { tp: 8, dp-attn: true, conc-start: 64, conc-end: 2048 } + - { tp: 4, dp-attn: true, conc-start: 16, conc-end: 128 } + - { tp: 4, dp-attn: false, conc-start: 1 , conc-end: 32 } - isl: 8192 osl: 1024 search-space: - - { tp: 4, dp-attn: true, conc-start: 512, conc-end: 512 } - # - { tp: 4, dp-attn: false, conc-start: 1, conc-end: 32 } + - { tp: 8, dp-attn: true, conc-start: 64, conc-end: 2048 } + - { tp: 4, dp-attn: true, conc-start: 16, conc-end: 128 } + - { tp: 4, dp-attn: false, conc-start: 1, conc-end: 32 } + # MTP variant of dsv4-fp4-mi355x-sglang. Mirrors the base search space and adds # spec-decoding: mtp, which routes to dsv4_fp4_mi355x_sglang_mtp.sh (EAGLE From 64213b9810975df453e703222dcc2bd482365f86 Mon Sep 17 00:00:00 2001 From: seungrokj Date: Mon, 15 Jun 2026 15:40:43 +0900 Subject: [PATCH 8/8] [AMD] remove stale memory budget comments from amd-master.yaml Co-Authored-By: Claude Sonnet 4.6 --- .github/configs/amd-master.yaml | 6 ------ 1 file changed, 6 deletions(-) diff --git a/.github/configs/amd-master.yaml b/.github/configs/amd-master.yaml index 0cc65385a..57e903738 100644 --- a/.github/configs/amd-master.yaml +++ b/.github/configs/amd-master.yaml @@ -2135,12 +2135,6 @@ dsr1-fp4-mi355x-sglang-disagg-8k1k-mtp: - "DECODE_NODES=1" - "DECODE_MTP_SIZE=1" -#TP4: 288*0.9 - (745+49)/4 - 10 = 50 GB -#ISL+OSL 2048: fp8 kv total = 14,082,048 B ≈ 13.43 MiB -# batch: 3723 - -#ISL+OSL 9216: fp8 kv total = 49,377,280 B ≈ 47.1 MiB -# batch: 1061 dsv4-fp4-mi355x-sglang: image: lmsysorg/sglang-rocm:v0.5.13-rocm720-mi35x-20260612 model: deepseek-ai/DeepSeek-V4-Pro