From 58fe35f96ac6371397b2d734b39d7fad6ab6df61 Mon Sep 17 00:00:00 2001 From: Manuel Candales Date: Thu, 16 Apr 2026 18:04:18 -0400 Subject: [PATCH] Update [ghstack-poisoned] --- examples/models/qwen3_5_moe/export.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/examples/models/qwen3_5_moe/export.py b/examples/models/qwen3_5_moe/export.py index 1a5c4dd9edf..ba08e0222b0 100644 --- a/examples/models/qwen3_5_moe/export.py +++ b/examples/models/qwen3_5_moe/export.py @@ -642,10 +642,14 @@ def _export_metal(model, config, args): print("Decode export successful!") # --- Prefill method (T>=2, dynamic shape) --- + # Use max-sized example so the serialized numel_bound_ is large enough + # for any runtime input (Metal/AOTI pattern: alloc_graph_input=False + # means numel_bound_ comes from the export example size). print("Exporting prefill method...") - prefill_tokens = torch.tensor([[0, 1]], dtype=torch.long) - prefill_pos = torch.tensor([0, 1], dtype=torch.long) - seq_dim = Dim("seq_len", min=2, max=config.max_seq_len - 1) + max_prefill = config.max_seq_len - 1 + prefill_tokens = torch.zeros((1, max_prefill), dtype=torch.long) + prefill_pos = torch.arange(max_prefill, dtype=torch.long) + seq_dim = Dim("seq_len", min=2, max=max_prefill) prefill_dynamic_shapes = ({1: seq_dim}, {0: seq_dim}) with torch.no_grad(): prefill_ep = export(