intel · yiliu30 · Apr 28, 2025 · Apr 29, 2025 · Apr 29, 2025 · Apr 29, 2025
diff --git a/neural_compressor/torch/algorithms/fp8_quant/_core/common.py b/neural_compressor/torch/algorithms/fp8_quant/_core/common.py
@@ -235,12 +235,12 @@ def convert_scales_to_tensors_dict(scales_obj, scales_file_format, hp_dtype, dev
     "VllmMixtureOfExpertsOpFP8": ModuleInfo("dynamic_moe", PatchedVllmMixtureOfExpertsOpFP8),
     # FIXME (Yi) revert change
     "FusedMoE": ModuleInfo("linear", PatchedMixtralMoE, False),
-    # "GaudiMixtralSparseMoeBlock": ModuleInfo("dynamic_moe", PatchedGaudiMixtralSparseMoeBlock),
-    # "VllmMixtureOfExpertsOp": (
-    #     ModuleInfo("dynamic_moe", PatchedVllmMixtureOfExpertsOpV2)
-    #     if os.getenv("LOW_CPU_MEM", "0") == "1"
-    #     else ModuleInfo("dynamic_moe", PatchedVllmMixtureOfExpertsOpV1)
-    # ),
+    "GaudiMixtralSparseMoeBlock": ModuleInfo("dynamic_moe", PatchedGaudiMixtralSparseMoeBlock),
+    "VllmMixtureOfExpertsOp": (
+        ModuleInfo("dynamic_moe", PatchedVllmMixtureOfExpertsOpV2)
+        if os.getenv("LOW_CPU_MEM", "0") == "1"
+        else ModuleInfo("dynamic_moe", PatchedVllmMixtureOfExpertsOpV1)
+    ),
 }
 
 

diff --git a/neural_compressor/torch/algorithms/fp8_quant/_quant_common/helper_modules.py b/neural_compressor/torch/algorithms/fp8_quant/_quant_common/helper_modules.py
@@ -781,6 +781,7 @@ def forward_quant(self,
                       hidden_states,
                       expert_routing_table,
                       router_weights,
+                      layer=None,
                       permuted_weights=True,
                       activation="silu"):
         experts_range = range(self.num_experts)
@@ -810,6 +811,7 @@ def forward_measure(self,
                         hidden_states,
                         expert_routing_table,
                         router_weights,
+                        layer=None,
                         permuted_weights=True,
                         activation="silu"):
         experts_range = range(self.num_experts)