Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions extension/llm/export/config/llm_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -310,11 +310,15 @@ class MethodConfig:
torch.export tracing. Controls which graph path is captured, e.g.
prefill vs decode, or for YOCO, where all layers run for decode
but not prefill. When unset, uses the model's default input length.
phase: Optional inference phase tag ("prefill" or "decode"). When set,
the method name is recorded in llm_methods metadata so the runtime
can pick the correct method for each inference phase.
"""

method_name: str
lora_config: Optional[LoraConfig] = None
export_seq_len: Optional[int] = None
phase: Optional[str] = None


@dataclass
Expand Down
4 changes: 4 additions & 0 deletions extension/llm/runner/constants.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,10 @@ inline constexpr auto kVocabSize = "get_vocab_size";
inline constexpr auto kUseKVCache = "use_kv_cache";
inline constexpr auto kUseSDPAWithKVCache = "use_sdpa_with_kv_cache";

// LLM multimethod phase metadata (e.g. YOCO prefill/decode)
inline constexpr auto kLlmMethodsPrefill = "llm_methods_prefill";
inline constexpr auto kLlmMethodsDecode = "llm_methods_decode";

// Multimodal method name conventions
inline constexpr auto kVisionEncoderMethod = "vision_encoder";
inline constexpr auto kAudioEncoderMethod = "audio_encoder";
Expand Down
Loading