From ec7b457f8fadca09a0079bc4ede2cd8ea490477b Mon Sep 17 00:00:00 2001
From: chang-wenbin <1286094601@qq.com>
Date: Sun, 24 May 2026 16:58:43 +0800
Subject: [PATCH] fix_moe_learable-score1

---
 .../layers/moe/fused_moe_cutlass_backend.py         | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/fastdeploy/model_executor/layers/moe/fused_moe_cutlass_backend.py b/fastdeploy/model_executor/layers/moe/fused_moe_cutlass_backend.py
index 7d1bba33774..cc8fc0711ed 100644
--- a/fastdeploy/model_executor/layers/moe/fused_moe_cutlass_backend.py
+++ b/fastdeploy/model_executor/layers/moe/fused_moe_cutlass_backend.py
@@ -446,7 +446,7 @@ def apply_tp(
                 gate_out = gate_out.cast("float32")
             if fc1_latent_proj is not None:
                 x = fc1_latent_proj(x)
-            gate_out, topk_weights, topk_idx = get_moe_scores(
+            gate_out, _, __ = get_moe_scores(
                 gate_out,
                 layer.n_group,
                 layer.topk_group,
@@ -458,11 +458,6 @@ def apply_tp(
                 use_fused_cast=use_fused,
             )
 
-            if layer.routed_scaling_factor_learnable:
-                safe_topk_indices = paddle.clip(topk_idx, min=0)
-                gathered_scales = F.embedding(safe_topk_indices, layer.per_expert_scale.unsqueeze(1)).squeeze(-1)
-                topk_weights = topk_weights * gathered_scales
-
             (
                 permute_input,
                 token_nums_per_expert,
@@ -484,6 +479,12 @@ def apply_tp(
                 self.moe_quant_type,
                 topk_only_mode=True,
             )
+
+            if layer.routed_scaling_factor_learnable:
+                safe_topk_indices = paddle.clip(topk_idx, min=0)
+                gathered_scales = F.embedding(safe_topk_indices, layer.per_expert_scale.unsqueeze(1)).squeeze(-1)
+                topk_weights = topk_weights * gathered_scales
+
         else:
             gate_out = gate_out.cast("float32")
             if fc1_latent_proj is not None: