software-mansion · benITo47 · Apr 7, 2026 · Apr 7, 2026 · Apr 7, 2026 · Apr 7, 2026
diff --git a/packages/react-native-executorch/common/rnexecutorch/models/BaseModel.cpp b/packages/react-native-executorch/common/rnexecutorch/models/BaseModel.cpp
@@ -181,6 +181,87 @@ std::size_t BaseModel::getMemoryLowerBound() const noexcept {
 
 void BaseModel::unload() noexcept { module_.reset(nullptr); }
 
+void BaseModel::ensureMethodLoaded(const std::string &methodName) {
+  if (methodName.empty()) {
+    throw RnExecutorchError(RnExecutorchErrorCode::InvalidUserInput,
+                            "methodName cannot be empty");
+  }
+  if (currentlyLoadedMethod_ == methodName) {
+    return;
+  }
+  if (!module_) {
+    throw RnExecutorchError(RnExecutorchErrorCode::ModuleNotLoaded,
+                            "Model module is not loaded");
+  }
+  if (!currentlyLoadedMethod_.empty()) {
+    module_->unload_method(currentlyLoadedMethod_);
+  }
+  auto loadResult = module_->load_method(methodName);
+  if (loadResult != executorch::runtime::Error::Ok) {
+    throw RnExecutorchError(
+        loadResult, "Failed to load method '" + methodName +
+                        "'. Ensure the method exists in the exported model.");
+  }
+  currentlyLoadedMethod_ = methodName;
+}
+
+std::vector<int32_t>
+BaseModel::validateAndGetInputShape(const std::string &methodName,
+                                    size_t minDimensions) const {
+  auto inputShapes = getAllInputShapes(methodName);
+
+  if (inputShapes.empty()) {
+    throw RnExecutorchError(RnExecutorchErrorCode::UnexpectedNumInputs,
+                            "Model seems to not take any input tensors.");
+  }
+
+  const auto &shape = inputShapes[0];
+  if (shape.size() < minDimensions) {
+    throw RnExecutorchError(
+        RnExecutorchErrorCode::WrongDimensions,
+        "Unexpected model input size, expected at least " +
+            std::to_string(minDimensions) +
+            " dimensions but got: " + std::to_string(shape.size()) + ".");
+  }
+
+  return shape;
+}
+
+std::vector<EValue>
+BaseModel::forwardOrThrow(const EValue &input,
+                          const std::string &contextMessage) const {
+  auto result = forward(input);
+  if (!result.ok()) {
+    throw RnExecutorchError(result.error(), contextMessage);
+  }
+  return std::move(result.get());
+}
+
+std::vector<EValue>
+BaseModel::forwardOrThrow(const std::vector<EValue> &inputs,
+                          const std::string &contextMessage) const {
+  auto result = forward(inputs);
+  if (!result.ok()) {
+    throw RnExecutorchError(result.error(), contextMessage);
+  }
+  return std::move(result.get());
+}
+
+std::vector<EValue>
+BaseModel::executeOrThrow(const std::string &methodName,
+                          const std::vector<EValue> &inputs,
+                          const std::string &contextMessage) const {
+  auto result = execute(methodName, inputs);
+  if (!result.ok()) {
+    std::string message =
+        contextMessage.empty()
+            ? "Model " + methodName + " method failed. Ensure input is correct."
+            : contextMessage;
+    throw RnExecutorchError(result.error(), message);
+  }
+  return std::move(result.get());
+}
+
 std::vector<int32_t>
 BaseModel::getTensorShape(const executorch::aten::Tensor &tensor) const {
   auto sizes = tensor.sizes();

diff --git a/packages/react-native-executorch/common/rnexecutorch/models/BaseModel.h b/packages/react-native-executorch/common/rnexecutorch/models/BaseModel.h
@@ -53,6 +53,32 @@ class BaseModel {
 
   std::size_t memorySizeLowerBound{0};
 
+  /// Loads methodName, unloading any previously loaded method first.
+  /// Useful for multi-method models (e.g., "forward_384", "forward_640").
+  void ensureMethodLoaded(const std::string &methodName);
+
+  std::vector<int32_t>
+  validateAndGetInputShape(const std::string &methodName = "forward",
+                           size_t minDimensions = 2) const;
+
+  std::vector<EValue>
+  forwardOrThrow(const EValue &input,
+                 const std::string &contextMessage =
+                     "Model forward failed. Ensure input is correct.") const;
+
+  std::vector<EValue>
+  forwardOrThrow(const std::vector<EValue> &inputs,
+                 const std::string &contextMessage =
+                     "Model forward failed. Ensure input is correct.") const;
+
+  std::vector<EValue>
+  executeOrThrow(const std::string &methodName,
+                 const std::vector<EValue> &inputs,
+                 const std::string &contextMessage = "") const;
+
+  /// Name of the currently loaded method (for multi-method models).
+  std::string currentlyLoadedMethod_;
+
 private:
   std::vector<int32_t>
   getTensorShape(const executorch::aten::Tensor &tensor) const;

diff --git a/packages/react-native-executorch/common/rnexecutorch/models/VisionModel.cpp b/packages/react-native-executorch/common/rnexecutorch/models/VisionModel.cpp
@@ -1,6 +1,8 @@
 #include "VisionModel.h"
 #include <rnexecutorch/Error.h>
 #include <rnexecutorch/ErrorCodes.h>
+#include <rnexecutorch/Log.h>
+#include <rnexecutorch/data_processing/ImageProcessing.h>
 #include <rnexecutorch/utils/FrameProcessor.h>
 #include <rnexecutorch/utils/FrameTransform.h>
 
@@ -25,6 +27,25 @@ cv::Size VisionModel::modelInputSize() const {
                   modelInputShape_[modelInputShape_.size() - 2]);
 }
 
+cv::Size VisionModel::getModelInputSize(const std::string &methodName) const {
+  std::string method = methodName.empty() ? currentlyLoadedMethod_ : methodName;
+  if (method.empty()) {
+    throw RnExecutorchError(
+        RnExecutorchErrorCode::InvalidUserInput,
+        "No method specified and no method currently loaded");
+  }
+
+  auto inputShapes = getAllInputShapes(method);
+  if (inputShapes.empty() || inputShapes[0].size() < 2) {
+    throw RnExecutorchError(RnExecutorchErrorCode::UnexpectedNumInputs,
+                            "Could not determine input shape for method: " +
+                                method);
+  }
+
+  const auto &shape = inputShapes[0];
+  return cv::Size(shape[shape.size() - 1], shape[shape.size() - 2]);
+}
+
 cv::Mat VisionModel::extractFromFrame(jsi::Runtime &runtime,
                                       const jsi::Value &frameData) const {
   cv::Mat frame = ::rnexecutorch::utils::frameToMat(runtime, frameData);
@@ -51,4 +72,46 @@ cv::Mat VisionModel::extractFromPixels(const JSTensorViewIn &tensorView) const {
   return ::rnexecutorch::utils::pixelsToMat(tensorView);
 }
 
+void VisionModel::initNormalization(const std::vector<float> &normMean,
+                                    const std::vector<float> &normStd) {
+  if (normMean.size() == 3) {
+    normMean_ = cv::Scalar(normMean[0], normMean[1], normMean[2]);
+  } else if (!normMean.empty()) {
+    log(LOG_LEVEL::Warn,
+        "normMean must have 3 elements — ignoring provided value.");
+  }
+
+  if (normStd.size() == 3) {
+    normStd_ = cv::Scalar(normStd[0], normStd[1], normStd[2]);
+  } else if (!normStd.empty()) {
+    log(LOG_LEVEL::Warn,
+        "normStd must have 3 elements — ignoring provided value.");
+  }
+}
+
+TensorPtr VisionModel::createInputTensor(const cv::Mat &preprocessed) const {
+  return (normMean_ && normStd_)
+             ? image_processing::getTensorFromMatrix(
+                   modelInputShape_, preprocessed, *normMean_, *normStd_)
+             : image_processing::getTensorFromMatrix(modelInputShape_,
+                                                     preprocessed);
+}
+
+cv::Mat VisionModel::loadImageToRGB(const std::string &imageSource) const {
+  cv::Mat imageBGR = image_processing::readImage(imageSource);
+  cv::Mat imageRGB;
+  cv::cvtColor(imageBGR, imageRGB, cv::COLOR_BGR2RGB);
+  return imageRGB;
+}
+
+std::tuple<cv::Mat, utils::FrameOrientation, cv::Size>
+VisionModel::loadFrameRotated(jsi::Runtime &runtime,
+                              const jsi::Value &frameData) const {
+  auto orient = utils::readFrameOrientation(runtime, frameData);
+  cv::Mat frame = extractFromFrame(runtime, frameData);
+  cv::Size originalSize = frame.size();
+  cv::Mat rotated = utils::rotateFrameForModel(frame, orient);
+  return {rotated, orient, originalSize};
+}
+
 } // namespace rnexecutorch::models
diff --git a/packages/react-native-executorch/common/rnexecutorch/models/VisionModel.h b/packages/react-native-executorch/common/rnexecutorch/models/VisionModel.h
@@ -1,5 +1,6 @@
 #pragma once
 
+#include <executorch/extension/tensor/tensor_ptr.h>
 #include <jsi/jsi.h>
 #include <mutex>
 #include <opencv2/opencv.hpp>
@@ -10,6 +11,8 @@
 namespace rnexecutorch {
 namespace models {
 
+using executorch::extension::TensorPtr;
+
 /**
  * @brief Base class for computer vision models that support real-time camera
  * input
@@ -21,16 +24,35 @@ namespace models {
  * Thread Safety:
  * - All inference operations are protected by a mutex via scoped_lock
  *
+ * Normalization:
+ * Subclasses should call initNormalization() with ImageNet mean/std when the
+ * model expects ImageNet-normalized inputs (e.g., Classification, Detection,
+ * Segmentation). Skip initNormalization() when the model:
+ * - Has built-in normalization layers (e.g., some embeddings models)
+ * - Expects raw pixel values [0, 255] (e.g., StyleTransfer)
+ * - Uses non-ImageNet normalization (handle in custom preprocess())
+ *
+ * The createInputTensor() method safely handles both cases via std::optional.
+ *
  * Usage:
  * Subclasses should:
  * 1. Inherit from VisionModel instead of BaseModel
- * 2. Optionally override preprocess() for model-specific preprocessing
- * 3. Implement runInference() which acquires the lock internally
+ * 2. Call initNormalization() if model expects normalized inputs
+ * 3. Optionally override preprocess() for model-specific preprocessing
+ * 4. Implement runInference() which acquires the lock internally
  *
  * Example:
  * @code
  * class Classification : public VisionModel {
  * public:
+ *   Classification(const std::string& modelSource,
+ *                  std::shared_ptr<react::CallInvoker> callInvoker,
+ *                  const std::vector<float>& normMean,
+ *                  const std::vector<float>& normStd)
+ *       : VisionModel(modelSource, callInvoker) {
+ *     initNormalization(normMean, normStd);  // ImageNet normalization
+ *   }
+ *
  *   std::unordered_map<std::string_view, float>
  *   generateFromFrame(jsi::Runtime& runtime, const jsi::Value& frameValue) {
  *     auto frameObject = frameValue.asObject(runtime);
@@ -63,6 +85,13 @@ class VisionModel : public BaseModel {
   /// Set once by each subclass constructor to avoid per-frame metadata lookups.
   std::vector<int32_t> modelInputShape_;
 
+  /// Per-channel normalization mean (RGB). nullopt = no normalization applied.
+  std::optional<cv::Scalar> normMean_;
+
+  /// Per-channel normalization std-dev (RGB). nullopt = no normalization
+  /// applied.
+  std::optional<cv::Scalar> normStd_;
+
   /**
    * @brief Mutex to ensure thread-safe inference
    *
@@ -99,6 +128,35 @@ class VisionModel : public BaseModel {
    * sizes.
    */
   virtual cv::Size modelInputSize() const;
+
+  /**
+   * @brief Get input size for a specific method (last two shape dims).
+   *
+   * Useful for multi-method models with different input sizes per method.
+   * Falls back to currentlyLoadedMethod_ when methodName is empty.
+   */
+  cv::Size getModelInputSize(const std::string &methodName = "") const;
+
+  /**
+   * @brief Set normMean_/normStd_ from float vectors.
+   *
+   * Expects size == 3. Logs a warning and ignores if non-empty but wrong size.
+   */
+  void initNormalization(const std::vector<float> &normMean,
+                         const std::vector<float> &normStd);
+
+  /// Builds input tensor from a preprocessed image.
+  /// Applies normalization if normMean_/normStd_ are set, skips it otherwise.
+  TensorPtr createInputTensor(const cv::Mat &preprocessed) const;
+
+  /// Reads image from path and converts BGR → RGB.
+  cv::Mat loadImageToRGB(const std::string &imageSource) const;
+
+  /// Extracts a camera frame, applies rotation, and returns
+  /// {rotated frame, orientation, original size}.
+  std::tuple<cv::Mat, utils::FrameOrientation, cv::Size>
+  loadFrameRotated(jsi::Runtime &runtime, const jsi::Value &frameData) const;
+
   /**
    * @brief Extract an RGB cv::Mat from a VisionCamera frame
    *

diff --git a/...ages/react-native-executorch/common/rnexecutorch/models/classification/Classification.cpp b/...ages/react-native-executorch/common/rnexecutorch/models/classification/Classification.cpp
@@ -6,6 +6,7 @@
 
 #include <rnexecutorch/data_processing/ImageProcessing.h>
 #include <rnexecutorch/data_processing/Numerical.h>
+#include <rnexecutorch/utils/TensorHelpers.h>
 
 namespace rnexecutorch::models::classification {
 
@@ -16,65 +17,26 @@ Classification::Classification(const std::string &modelSource,
                                std::shared_ptr<react::CallInvoker> callInvoker)
     : VisionModel(modelSource, callInvoker),
       labelNames_(std::move(labelNames)) {
-  if (normMean.size() == 3) {
-    normMean_ = cv::Scalar(normMean[0], normMean[1], normMean[2]);
-  } else if (!normMean.empty()) {
-    log(LOG_LEVEL::Warn,
-        "normMean must have 3 elements — ignoring provided value.");
-  }
-  if (normStd.size() == 3) {
-    normStd_ = cv::Scalar(normStd[0], normStd[1], normStd[2]);
-  } else if (!normStd.empty()) {
-    log(LOG_LEVEL::Warn,
-        "normStd must have 3 elements — ignoring provided value.");
-  }
-
-  auto inputShapes = getAllInputShapes();
-  if (inputShapes.size() == 0) {
-    throw RnExecutorchError(RnExecutorchErrorCode::UnexpectedNumInputs,
-                            "Model seems to not take any input tensors.");
-  }
-  modelInputShape_ = inputShapes[0];
-  if (modelInputShape_.size() < 2) {
-    char errorMessage[100];
-    std::snprintf(errorMessage, sizeof(errorMessage),
-                  "Unexpected model input size, expected at least 2 dimensions "
-                  "but got: %zu.",
-                  modelInputShape_.size());
-    throw RnExecutorchError(RnExecutorchErrorCode::WrongDimensions,
-                            errorMessage);
-  }
+  initNormalization(normMean, normStd);
+  modelInputShape_ = validateAndGetInputShape();
 }
 
 std::unordered_map<std::string_view, float>
 Classification::runInference(cv::Mat image) {
   std::scoped_lock lock(inference_mutex_);
 
   cv::Mat preprocessed = preprocess(image);
+  auto inputTensor = createInputTensor(preprocessed);
 
-  auto inputTensor =
-      (normMean_ && normStd_)
-          ? image_processing::getTensorFromMatrix(
-                modelInputShape_, preprocessed, *normMean_, *normStd_)
-          : image_processing::getTensorFromMatrix(modelInputShape_,
-                                                  preprocessed);
-
-  auto forwardResult = BaseModel::forward(inputTensor);
-  if (!forwardResult.ok()) {
-    throw RnExecutorchError(forwardResult.error(),
-                            "The model's forward function did not succeed. "
-                            "Ensure the model input is correct.");
-  }
-  return postprocess(forwardResult->at(0).toTensor());
+  auto outputs = forwardOrThrow(inputTensor,
+                                "The model's forward function did not succeed. "
+                                "Ensure the model input is correct.");
+  return postprocess(outputs.at(0).toTensor());
 }
 
 std::unordered_map<std::string_view, float>
 Classification::generateFromString(std::string imageSource) {
-  cv::Mat imageBGR = image_processing::readImage(imageSource);
-
-  cv::Mat imageRGB;
-  cv::cvtColor(imageBGR, imageRGB, cv::COLOR_BGR2RGB);
-
+  cv::Mat imageRGB = loadImageToRGB(imageSource);
   return runInference(imageRGB);
 }
 
@@ -94,8 +56,7 @@ Classification::generateFromPixels(JSTensorViewIn pixelData) {
 
 std::unordered_map<std::string_view, float>
 Classification::postprocess(const Tensor &tensor) {
-  std::span<const float> resultData(
-      static_cast<const float *>(tensor.const_data_ptr()), tensor.numel());
+  auto resultData = utils::tensor::toSpan<float>(tensor);
   std::vector<float> resultVec(resultData.begin(), resultData.end());
 
   if (resultVec.size() != labelNames_.size()) {