From 58fe35f96ac6371397b2d734b39d7fad6ab6df61 Mon Sep 17 00:00:00 2001
From: Manuel Candales <mcandales@meta.com>
Date: Thu, 16 Apr 2026 18:04:18 -0400
Subject: [PATCH] Update

[ghstack-poisoned]
---
 examples/models/qwen3_5_moe/export.py | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/examples/models/qwen3_5_moe/export.py b/examples/models/qwen3_5_moe/export.py
index 1a5c4dd9edf..ba08e0222b0 100644
--- a/examples/models/qwen3_5_moe/export.py
+++ b/examples/models/qwen3_5_moe/export.py
@@ -642,10 +642,14 @@ def _export_metal(model, config, args):
     print("Decode export successful!")
 
     # --- Prefill method (T>=2, dynamic shape) ---
+    # Use max-sized example so the serialized numel_bound_ is large enough
+    # for any runtime input (Metal/AOTI pattern: alloc_graph_input=False
+    # means numel_bound_ comes from the export example size).
     print("Exporting prefill method...")
-    prefill_tokens = torch.tensor([[0, 1]], dtype=torch.long)
-    prefill_pos = torch.tensor([0, 1], dtype=torch.long)
-    seq_dim = Dim("seq_len", min=2, max=config.max_seq_len - 1)
+    max_prefill = config.max_seq_len - 1
+    prefill_tokens = torch.zeros((1, max_prefill), dtype=torch.long)
+    prefill_pos = torch.arange(max_prefill, dtype=torch.long)
+    seq_dim = Dim("seq_len", min=2, max=max_prefill)
     prefill_dynamic_shapes = ({1: seq_dim}, {0: seq_dim})
     with torch.no_grad():
         prefill_ep = export(