diff --git a/packages/react-native-executorch/src/controllers/LLMController.ts b/packages/react-native-executorch/src/controllers/LLMController.ts index 7ce921f6f5..8829f37106 100644 --- a/packages/react-native-executorch/src/controllers/LLMController.ts +++ b/packages/react-native-executorch/src/controllers/LLMController.ts @@ -254,7 +254,7 @@ export class LLMController { imagePaths && imagePaths.length > 0 ? await this.nativeModule.generateMultimodal( input, - imagePaths, + imagePaths.map(normalizeImagePath), this.getImageToken(), this.onToken ) @@ -456,3 +456,15 @@ export class LLMController { return result; } } + +/** + * The native multimodal pipeline expects image paths to be `file://` URIs. + * `ResourceFetcher.fetch` and most platform file APIs return raw filesystem + * paths without that prefix, so callers routinely pass either form. Accept + * both and normalize to the prefixed form here. + * @param path - Local image path, either with or without the `file://` prefix. + * @returns The same path with a `file://` prefix. + */ +function normalizeImagePath(path: string): string { + return path.startsWith('file://') ? path : `file://${path}`; +} diff --git a/packages/react-native-executorch/src/modules/natural_language_processing/LLMModule.ts b/packages/react-native-executorch/src/modules/natural_language_processing/LLMModule.ts index f7648a7b95..bdb5ada699 100644 --- a/packages/react-native-executorch/src/modules/natural_language_processing/LLMModule.ts +++ b/packages/react-native-executorch/src/modules/natural_language_processing/LLMModule.ts @@ -139,7 +139,7 @@ export class LLMModule { * It doesn't manage conversation context. It is intended for users that need access to the model itself without any wrapper. * If you want a simple chat with model the consider using `sendMessage` * @param input - Raw input string containing the prompt and conversation history. - * @param imagePaths - Optional array of local image paths for multimodal inference. + * @param imagePaths - Optional array of local image paths for multimodal inference. Each entry may be either `file:///absolute/path` or `/absolute/path` — the controller normalizes the path before passing it to native code. * @returns The generated response as a string. */ async forward(input: string, imagePaths?: string[]): Promise { diff --git a/packages/react-native-executorch/src/types/llm.ts b/packages/react-native-executorch/src/types/llm.ts index 83f9630c10..3dbf54bb94 100644 --- a/packages/react-native-executorch/src/types/llm.ts +++ b/packages/react-native-executorch/src/types/llm.ts @@ -270,6 +270,8 @@ export interface Message { /** * Optional local file path to media (image, audio, etc.). * Only valid on `user` messages. + * Either `file:///absolute/path` or `/absolute/path` is accepted; the + * controller normalizes the path before passing it to native code. */ mediaPath?: string; }