From a29c4e28f7697a905d631a885c5ae65dcbc11262 Mon Sep 17 00:00:00 2001 From: benITo47 Date: Tue, 7 Apr 2026 13:54:54 +0200 Subject: [PATCH 01/19] feat(models): add preprocessing helpers to VisionModel Add initNormalization, createInputTensor, loadImageToRGB, loadFrameRotated, and loadFrameRotatedWithSize helpers to eliminate duplication across vision models. --- .../rnexecutorch/models/VisionModel.cpp | 54 ++++++++++++++++ .../common/rnexecutorch/models/VisionModel.h | 62 +++++++++++++++++++ 2 files changed, 116 insertions(+) diff --git a/packages/react-native-executorch/common/rnexecutorch/models/VisionModel.cpp b/packages/react-native-executorch/common/rnexecutorch/models/VisionModel.cpp index cc9c862b32..0d9ee03e34 100644 --- a/packages/react-native-executorch/common/rnexecutorch/models/VisionModel.cpp +++ b/packages/react-native-executorch/common/rnexecutorch/models/VisionModel.cpp @@ -1,6 +1,8 @@ #include "VisionModel.h" #include #include +#include +#include #include #include @@ -51,4 +53,56 @@ cv::Mat VisionModel::extractFromPixels(const JSTensorViewIn &tensorView) const { return ::rnexecutorch::utils::pixelsToMat(tensorView); } +void VisionModel::initNormalization(const std::vector &normMean, + const std::vector &normStd) { + if (normMean.size() == 3) { + normMean_ = cv::Scalar(normMean[0], normMean[1], normMean[2]); + } else if (!normMean.empty()) { + log(LOG_LEVEL::Warn, + "normMean must have 3 elements — ignoring provided value."); + } + + if (normStd.size() == 3) { + normStd_ = cv::Scalar(normStd[0], normStd[1], normStd[2]); + } else if (!normStd.empty()) { + log(LOG_LEVEL::Warn, + "normStd must have 3 elements — ignoring provided value."); + } +} + +TensorPtr VisionModel::createInputTensor(const cv::Mat &preprocessed) const { + return (normMean_ && normStd_) + ? image_processing::getTensorFromMatrix(modelInputShape_, + preprocessed, *normMean_, + *normStd_) + : image_processing::getTensorFromMatrix(modelInputShape_, + preprocessed); +} + +cv::Mat VisionModel::loadImageToRGB(const std::string &imageSource) const { + cv::Mat imageBGR = image_processing::readImage(imageSource); + cv::Mat imageRGB; + cv::cvtColor(imageBGR, imageRGB, cv::COLOR_BGR2RGB); + return imageRGB; +} + +std::pair +VisionModel::loadFrameRotated(jsi::Runtime &runtime, + const jsi::Value &frameData) const { + auto orient = utils::readFrameOrientation(runtime, frameData); + cv::Mat frame = extractFromFrame(runtime, frameData); + cv::Mat rotated = utils::rotateFrameForModel(frame, orient); + return {rotated, orient}; +} + +std::tuple +VisionModel::loadFrameRotatedWithSize(jsi::Runtime &runtime, + const jsi::Value &frameData) const { + auto orient = utils::readFrameOrientation(runtime, frameData); + cv::Mat frame = extractFromFrame(runtime, frameData); + cv::Size originalSize = frame.size(); + cv::Mat rotated = utils::rotateFrameForModel(frame, orient); + return {rotated, orient, originalSize}; +} + } // namespace rnexecutorch::models diff --git a/packages/react-native-executorch/common/rnexecutorch/models/VisionModel.h b/packages/react-native-executorch/common/rnexecutorch/models/VisionModel.h index cf003948af..6c058eab54 100644 --- a/packages/react-native-executorch/common/rnexecutorch/models/VisionModel.h +++ b/packages/react-native-executorch/common/rnexecutorch/models/VisionModel.h @@ -63,6 +63,12 @@ class VisionModel : public BaseModel { /// Set once by each subclass constructor to avoid per-frame metadata lookups. std::vector modelInputShape_; + /// Normalization mean values (RGB channels) + std::optional normMean_; + + /// Normalization standard deviation values (RGB channels) + std::optional normStd_; + /** * @brief Mutex to ensure thread-safe inference * @@ -99,6 +105,62 @@ class VisionModel : public BaseModel { * sizes. */ virtual cv::Size modelInputSize() const; + + /** + * @brief Initialize normalization parameters from vectors + * + * Validates size == 3 and converts to cv::Scalar. + * Logs warning if invalid but non-empty. Sets nullopt if empty/invalid. + * + * @param normMean Mean values for RGB channels (expected size: 3) + * @param normStd Standard deviation values for RGB channels (expected size: 3) + */ + void initNormalization(const std::vector &normMean, + const std::vector &normStd); + + /** + * @brief Create input tensor from preprocessed image + * + * Applies normalization if normMean_ and normStd_ are set. + * + * @param preprocessed Preprocessed image (resized, RGB format) + * @return TensorPtr ready for model input + */ + TensorPtr createInputTensor(const cv::Mat &preprocessed) const; + + /** + * @brief Load and convert image from path to RGB format + * + * Common preprocessing: readImage (BGR) → convert to RGB + * + * @param imageSource Path to the image file + * @return cv::Mat in RGB format + */ + cv::Mat loadImageToRGB(const std::string &imageSource) const; + + /** + * @brief Process camera frame with rotation support + * + * @param runtime JSI runtime + * @param frameData JSI value containing frame data from VisionCamera + * @return Pair of {rotated RGB frame, orientation info} + */ + std::pair + loadFrameRotated(jsi::Runtime &runtime, const jsi::Value &frameData) const; + + /** + * @brief Process camera frame with rotation, also returning original size + * + * For models that need original frame size (e.g., semantic segmentation) + * + * @param runtime JSI runtime + * @param frameData JSI value containing frame data from VisionCamera + * @return Tuple of {rotated RGB frame, orientation info, original size} + */ + std::tuple + loadFrameRotatedWithSize(jsi::Runtime &runtime, + const jsi::Value &frameData) const; + /** * @brief Extract an RGB cv::Mat from a VisionCamera frame * From db8cb09219a3a62f861b046a87218e433f412905 Mon Sep 17 00:00:00 2001 From: benITo47 Date: Tue, 7 Apr 2026 13:55:12 +0200 Subject: [PATCH 02/19] refactor(models): migrate 6 vision models to use VisionModel helpers Remove duplicated preprocessing code from ImageEmbeddings, Classification, StyleTransfer, ObjectDetection, BaseInstanceSegmentation, and BaseSemanticSegmentation (~105 lines removed). --- .../models/classification/Classification.cpp | 27 +---- .../models/classification/Classification.h | 2 - .../embeddings/image/ImageEmbeddings.cpp | 10 +- .../BaseInstanceSegmentation.cpp | 102 +++--------------- .../BaseInstanceSegmentation.h | 11 -- .../object_detection/ObjectDetection.cpp | 90 ++-------------- .../models/object_detection/ObjectDetection.h | 28 ----- .../BaseSemanticSegmentation.cpp | 33 ++---- .../BaseSemanticSegmentation.h | 2 - .../models/style_transfer/StyleTransfer.cpp | 15 +-- 10 files changed, 37 insertions(+), 283 deletions(-) diff --git a/packages/react-native-executorch/common/rnexecutorch/models/classification/Classification.cpp b/packages/react-native-executorch/common/rnexecutorch/models/classification/Classification.cpp index e34f68fe64..488cb03e6f 100644 --- a/packages/react-native-executorch/common/rnexecutorch/models/classification/Classification.cpp +++ b/packages/react-native-executorch/common/rnexecutorch/models/classification/Classification.cpp @@ -16,18 +16,7 @@ Classification::Classification(const std::string &modelSource, std::shared_ptr callInvoker) : VisionModel(modelSource, callInvoker), labelNames_(std::move(labelNames)) { - if (normMean.size() == 3) { - normMean_ = cv::Scalar(normMean[0], normMean[1], normMean[2]); - } else if (!normMean.empty()) { - log(LOG_LEVEL::Warn, - "normMean must have 3 elements — ignoring provided value."); - } - if (normStd.size() == 3) { - normStd_ = cv::Scalar(normStd[0], normStd[1], normStd[2]); - } else if (!normStd.empty()) { - log(LOG_LEVEL::Warn, - "normStd must have 3 elements — ignoring provided value."); - } + initNormalization(normMean, normStd); auto inputShapes = getAllInputShapes(); if (inputShapes.size() == 0) { @@ -51,13 +40,7 @@ Classification::runInference(cv::Mat image) { std::scoped_lock lock(inference_mutex_); cv::Mat preprocessed = preprocess(image); - - auto inputTensor = - (normMean_ && normStd_) - ? image_processing::getTensorFromMatrix( - modelInputShape_, preprocessed, *normMean_, *normStd_) - : image_processing::getTensorFromMatrix(modelInputShape_, - preprocessed); + auto inputTensor = createInputTensor(preprocessed); auto forwardResult = BaseModel::forward(inputTensor); if (!forwardResult.ok()) { @@ -70,11 +53,7 @@ Classification::runInference(cv::Mat image) { std::unordered_map Classification::generateFromString(std::string imageSource) { - cv::Mat imageBGR = image_processing::readImage(imageSource); - - cv::Mat imageRGB; - cv::cvtColor(imageBGR, imageRGB, cv::COLOR_BGR2RGB); - + cv::Mat imageRGB = loadImageToRGB(imageSource); return runInference(imageRGB); } diff --git a/packages/react-native-executorch/common/rnexecutorch/models/classification/Classification.h b/packages/react-native-executorch/common/rnexecutorch/models/classification/Classification.h index 2ea0e17bbb..784c2bb60a 100644 --- a/packages/react-native-executorch/common/rnexecutorch/models/classification/Classification.h +++ b/packages/react-native-executorch/common/rnexecutorch/models/classification/Classification.h @@ -40,8 +40,6 @@ class Classification : public VisionModel { std::unordered_map postprocess(const Tensor &tensor); std::vector labelNames_; - std::optional normMean_; - std::optional normStd_; }; } // namespace models::classification diff --git a/packages/react-native-executorch/common/rnexecutorch/models/embeddings/image/ImageEmbeddings.cpp b/packages/react-native-executorch/common/rnexecutorch/models/embeddings/image/ImageEmbeddings.cpp index d2914469af..1709ad8b65 100644 --- a/packages/react-native-executorch/common/rnexecutorch/models/embeddings/image/ImageEmbeddings.cpp +++ b/packages/react-native-executorch/common/rnexecutorch/models/embeddings/image/ImageEmbeddings.cpp @@ -32,9 +32,7 @@ ImageEmbeddings::runInference(cv::Mat image) { std::scoped_lock lock(inference_mutex_); cv::Mat preprocessed = preprocess(image); - - auto inputTensor = - image_processing::getTensorFromMatrix(modelInputShape_, preprocessed); + auto inputTensor = createInputTensor(preprocessed); auto forwardResult = BaseModel::forward(inputTensor); @@ -52,11 +50,7 @@ ImageEmbeddings::runInference(cv::Mat image) { std::shared_ptr ImageEmbeddings::generateFromString(std::string imageSource) { - cv::Mat imageBGR = image_processing::readImage(imageSource); - - cv::Mat imageRGB; - cv::cvtColor(imageBGR, imageRGB, cv::COLOR_BGR2RGB); - + cv::Mat imageRGB = loadImageToRGB(imageSource); return runInference(imageRGB); } diff --git a/packages/react-native-executorch/common/rnexecutorch/models/instance_segmentation/BaseInstanceSegmentation.cpp b/packages/react-native-executorch/common/rnexecutorch/models/instance_segmentation/BaseInstanceSegmentation.cpp index 3d2f9d1715..180a08278d 100644 --- a/packages/react-native-executorch/common/rnexecutorch/models/instance_segmentation/BaseInstanceSegmentation.cpp +++ b/packages/react-native-executorch/common/rnexecutorch/models/instance_segmentation/BaseInstanceSegmentation.cpp @@ -17,41 +17,23 @@ BaseInstanceSegmentation::BaseInstanceSegmentation( std::vector normStd, bool applyNMS, std::shared_ptr callInvoker) : VisionModel(modelSource, callInvoker), applyNMS_(applyNMS) { - - if (normMean.size() == 3) { - normMean_ = cv::Scalar(normMean[0], normMean[1], normMean[2]); - } else if (!normMean.empty()) { - log(LOG_LEVEL::Warn, - "normMean must have 3 elements — ignoring provided value."); - } - if (normStd.size() == 3) { - normStd_ = cv::Scalar(normStd[0], normStd[1], normStd[2]); - } else if (!normStd.empty()) { - log(LOG_LEVEL::Warn, - "normStd must have 3 elements — ignoring provided value."); - } + initNormalization(normMean, normStd); } cv::Size BaseInstanceSegmentation::modelInputSize() const { if (currentlyLoadedMethod_.empty()) { return VisionModel::modelInputSize(); } - auto inputShapes = getAllInputShapes(currentlyLoadedMethod_); - if (inputShapes.empty() || inputShapes[0].size() < 2) { + try { + return getModelInputSize(currentlyLoadedMethod_); + } catch (...) { return VisionModel::modelInputSize(); } - const auto &shape = inputShapes[0]; - return {shape[shape.size() - 2], shape[shape.size() - 1]}; } TensorPtr BaseInstanceSegmentation::buildInputTensor(const cv::Mat &image) { cv::Mat preprocessed = preprocess(image); - return (normMean_.has_value() && normStd_.has_value()) - ? image_processing::getTensorFromMatrix( - modelInputShape_, preprocessed, normMean_.value(), - normStd_.value()) - : image_processing::getTensorFromMatrix(modelInputShape_, - preprocessed); + return createInputTensor(preprocessed); } std::vector BaseInstanceSegmentation::runInference( @@ -75,7 +57,9 @@ std::vector BaseInstanceSegmentation::runInference( cv::Size modelInputSize(shape[shape.size() - 2], shape[shape.size() - 1]); cv::Size originalSize(image.cols, image.rows); - validateThresholds(confidenceThreshold, iouThreshold); + utils::computer_vision::validateThreshold(confidenceThreshold, + "confidenceThreshold"); + utils::computer_vision::validateThreshold(iouThreshold, "iouThreshold"); auto forwardResult = BaseModel::execute(methodName, {buildInputTensor(image)}); @@ -100,10 +84,7 @@ std::vector BaseInstanceSegmentation::generateFromString( int32_t maxInstances, std::vector classIndices, bool returnMaskAtOriginalResolution, std::string methodName) { - cv::Mat imageBGR = image_processing::readImage(imageSource); - cv::Mat imageRGB; - cv::cvtColor(imageBGR, imageRGB, cv::COLOR_BGR2RGB); - + cv::Mat imageRGB = loadImageToRGB(imageSource); return runInference(imageRGB, confidenceThreshold, iouThreshold, maxInstances, classIndices, returnMaskAtOriginalResolution, methodName); } @@ -114,9 +95,7 @@ std::vector BaseInstanceSegmentation::generateFromFrame( std::vector classIndices, bool returnMaskAtOriginalResolution, std::string methodName) { - auto orient = ::rnexecutorch::utils::readFrameOrientation(runtime, frameData); - cv::Mat frame = extractFromFrame(runtime, frameData); - cv::Mat rotated = utils::rotateFrameForModel(frame, orient); + auto [rotated, orient] = loadFrameRotated(runtime, frameData); auto instances = runInference(rotated, confidenceThreshold, iouThreshold, maxInstances, classIndices, returnMaskAtOriginalResolution, methodName); @@ -232,22 +211,6 @@ cv::Mat BaseInstanceSegmentation::processMaskFromLogits( return thresholdToBinary(probMat); } -void BaseInstanceSegmentation::validateThresholds(double confidenceThreshold, - double iouThreshold) const { - if (confidenceThreshold < 0 || confidenceThreshold > 1) { - throw RnExecutorchError( - RnExecutorchErrorCode::InvalidConfig, - "Confidence threshold must be greater or equal to 0 " - "and less than or equal to 1."); - } - - if (iouThreshold < 0 || iouThreshold > 1) { - throw RnExecutorchError(RnExecutorchErrorCode::InvalidConfig, - "IoU threshold must be greater or equal to 0 " - "and less than or equal to 1."); - } -} - void BaseInstanceSegmentation::validateOutputTensors( const std::vector &tensors) const { if (tensors.size() != 3) { @@ -258,48 +221,6 @@ void BaseInstanceSegmentation::validateOutputTensors( } } -std::set BaseInstanceSegmentation::prepareAllowedClasses( - const std::vector &classIndices) const { - std::set allowedClasses; - if (!classIndices.empty()) { - allowedClasses.insert(classIndices.begin(), classIndices.end()); - } - return allowedClasses; -} - -void BaseInstanceSegmentation::ensureMethodLoaded( - const std::string &methodName) { - if (methodName.empty()) { - throw RnExecutorchError( - RnExecutorchErrorCode::InvalidConfig, - "Method name cannot be empty. Use 'forward' for single-method models " - "or 'forward_{inputSize}' for multi-method models."); - } - - if (currentlyLoadedMethod_ == methodName) { - return; - } - - if (!module_) { - throw RnExecutorchError(RnExecutorchErrorCode::ModuleNotLoaded, - "Model not loaded. Cannot load method '" + - methodName + "'."); - } - - if (!currentlyLoadedMethod_.empty()) { - module_->unload_method(currentlyLoadedMethod_); - } - - auto loadResult = module_->load_method(methodName); - if (loadResult != executorch::runtime::Error::Ok) { - throw RnExecutorchError( - loadResult, "Failed to load method '" + methodName + - "'. Ensure the method exists in the exported model."); - } - - currentlyLoadedMethod_ = methodName; -} - std::vector BaseInstanceSegmentation::finalizeInstances( std::vector instances, double iouThreshold, int32_t maxInstances) const { @@ -326,7 +247,8 @@ std::vector BaseInstanceSegmentation::collectInstances( static_cast(originalSize.width) / modelInputSize.width; float heightRatio = static_cast(originalSize.height) / modelInputSize.height; - auto allowedClasses = prepareAllowedClasses(classIndices); + auto allowedClasses = + utils::computer_vision::prepareAllowedClasses(classIndices); // CONTRACT auto bboxTensor = tensors[0].toTensor(); // [1, N, 4] diff --git a/packages/react-native-executorch/common/rnexecutorch/models/instance_segmentation/BaseInstanceSegmentation.h b/packages/react-native-executorch/common/rnexecutorch/models/instance_segmentation/BaseInstanceSegmentation.h index 341d0f2235..f16cb4b14d 100644 --- a/packages/react-native-executorch/common/rnexecutorch/models/instance_segmentation/BaseInstanceSegmentation.h +++ b/packages/react-native-executorch/common/rnexecutorch/models/instance_segmentation/BaseInstanceSegmentation.h @@ -61,16 +61,8 @@ class BaseInstanceSegmentation : public VisionModel { const std::vector &classIndices, bool returnMaskAtOriginalResolution); - void validateThresholds(double confidenceThreshold, - double iouThreshold) const; void validateOutputTensors(const std::vector &tensors) const; - std::set - prepareAllowedClasses(const std::vector &classIndices) const; - - // Model loading and input helpers - void ensureMethodLoaded(const std::string &methodName); - std::tuple extractDetectionData(const float *bboxData, const float *scoresData, int32_t index); @@ -96,10 +88,7 @@ class BaseInstanceSegmentation : public VisionModel { const utils::computer_vision::BBox &bboxOriginal, cv::Size modelInputSize, cv::Size originalSize, bool warpToOriginal); - std::optional normMean_; - std::optional normStd_; bool applyNMS_; - std::string currentlyLoadedMethod_; }; } // namespace models::instance_segmentation diff --git a/packages/react-native-executorch/common/rnexecutorch/models/object_detection/ObjectDetection.cpp b/packages/react-native-executorch/common/rnexecutorch/models/object_detection/ObjectDetection.cpp index 24c4e1083a..17d738761a 100644 --- a/packages/react-native-executorch/common/rnexecutorch/models/object_detection/ObjectDetection.cpp +++ b/packages/react-native-executorch/common/rnexecutorch/models/object_detection/ObjectDetection.cpp @@ -1,8 +1,6 @@ #include "ObjectDetection.h" #include "Constants.h" -#include - #include #include #include @@ -20,66 +18,14 @@ ObjectDetection::ObjectDetection( std::shared_ptr callInvoker) : VisionModel(modelSource, callInvoker), labelNames_(std::move(labelNames)) { - if (normMean.size() == 3) { - normMean_ = cv::Scalar(normMean[0], normMean[1], normMean[2]); - } else if (!normMean.empty()) { - log(LOG_LEVEL::Warn, - "normMean must have 3 elements — ignoring provided value."); - } - if (normStd.size() == 3) { - normStd_ = cv::Scalar(normStd[0], normStd[1], normStd[2]); - } else if (!normStd.empty()) { - log(LOG_LEVEL::Warn, - "normStd must have 3 elements — ignoring provided value."); - } + initNormalization(normMean, normStd); } cv::Size ObjectDetection::modelInputSize() const { if (currentlyLoadedMethod_.empty()) { return VisionModel::modelInputSize(); } - auto inputShapes = getAllInputShapes(currentlyLoadedMethod_); - if (inputShapes.empty() || inputShapes[0].size() < 2) { - throw RnExecutorchError(RnExecutorchErrorCode::UnexpectedNumInputs, - "Could not determine input shape for method: " + - currentlyLoadedMethod_); - } - const auto &shape = inputShapes[0]; - return {static_cast(shape[shape.size() - 2]), - static_cast(shape[shape.size() - 1])}; -} - -void ObjectDetection::ensureMethodLoaded(const std::string &methodName) { - if (methodName.empty()) { - throw RnExecutorchError(RnExecutorchErrorCode::InvalidUserInput, - "methodName cannot be empty"); - } - if (currentlyLoadedMethod_ == methodName) { - return; - } - if (!module_) { - throw RnExecutorchError(RnExecutorchErrorCode::ModuleNotLoaded, - "Model module is not loaded"); - } - if (!currentlyLoadedMethod_.empty()) { - module_->unload_method(currentlyLoadedMethod_); - } - auto loadResult = module_->load_method(methodName); - if (loadResult != executorch::runtime::Error::Ok) { - throw RnExecutorchError( - loadResult, "Failed to load method '" + methodName + - "'. Ensure the method exists in the exported model."); - } - currentlyLoadedMethod_ = methodName; -} - -std::set ObjectDetection::prepareAllowedClasses( - const std::vector &classIndices) const { - std::set allowedClasses; - if (!classIndices.empty()) { - allowedClasses.insert(classIndices.begin(), classIndices.end()); - } - return allowedClasses; + return getModelInputSize(currentlyLoadedMethod_); } std::vector @@ -93,7 +39,8 @@ ObjectDetection::postprocess(const std::vector &tensors, static_cast(originalSize.height) / inputSize.height; // Prepare allowed classes set for filtering - auto allowedClasses = prepareAllowedClasses(classIndices); + auto allowedClasses = + utils::computer_vision::prepareAllowedClasses(classIndices); std::vector detections; auto bboxTensor = tensors.at(0).toTensor(); @@ -146,14 +93,9 @@ ObjectDetection::postprocess(const std::vector &tensors, std::vector ObjectDetection::runInference( cv::Mat image, double detectionThreshold, double iouThreshold, const std::vector &classIndices, const std::string &methodName) { - if (detectionThreshold < 0.0 || detectionThreshold > 1.0) { - throw RnExecutorchError(RnExecutorchErrorCode::InvalidUserInput, - "detectionThreshold must be in range [0, 1]"); - } - if (iouThreshold < 0.0 || iouThreshold > 1.0) { - throw RnExecutorchError(RnExecutorchErrorCode::InvalidUserInput, - "iouThreshold must be in range [0, 1]"); - } + utils::computer_vision::validateThreshold(detectionThreshold, + "detectionThreshold"); + utils::computer_vision::validateThreshold(iouThreshold, "iouThreshold"); std::scoped_lock lock(inference_mutex_); @@ -172,13 +114,7 @@ std::vector ObjectDetection::runInference( modelInputShape_ = inputShapes[0]; cv::Mat preprocessed = preprocess(image); - - auto inputTensor = - (normMean_ && normStd_) - ? image_processing::getTensorFromMatrix( - modelInputShape_, preprocessed, *normMean_, *normStd_) - : image_processing::getTensorFromMatrix(modelInputShape_, - preprocessed); + auto inputTensor = createInputTensor(preprocessed); auto executeResult = execute(methodName, {inputTensor}); if (!executeResult.ok()) { @@ -195,11 +131,7 @@ std::vector ObjectDetection::runInference( std::vector ObjectDetection::generateFromString( std::string imageSource, double detectionThreshold, double iouThreshold, std::vector classIndices, std::string methodName) { - cv::Mat imageBGR = image_processing::readImage(imageSource); - - cv::Mat imageRGB; - cv::cvtColor(imageBGR, imageRGB, cv::COLOR_BGR2RGB); - + cv::Mat imageRGB = loadImageToRGB(imageSource); return runInference(imageRGB, detectionThreshold, iouThreshold, classIndices, methodName); } @@ -208,9 +140,7 @@ std::vector ObjectDetection::generateFromFrame( jsi::Runtime &runtime, const jsi::Value &frameData, double detectionThreshold, double iouThreshold, std::vector classIndices, std::string methodName) { - auto orient = ::rnexecutorch::utils::readFrameOrientation(runtime, frameData); - cv::Mat frame = extractFromFrame(runtime, frameData); - cv::Mat rotated = ::rnexecutorch::utils::rotateFrameForModel(frame, orient); + auto [rotated, orient] = loadFrameRotated(runtime, frameData); auto detections = runInference(rotated, detectionThreshold, iouThreshold, classIndices, methodName); diff --git a/packages/react-native-executorch/common/rnexecutorch/models/object_detection/ObjectDetection.h b/packages/react-native-executorch/common/rnexecutorch/models/object_detection/ObjectDetection.h index 6e3c01356e..6484d5d213 100644 --- a/packages/react-native-executorch/common/rnexecutorch/models/object_detection/ObjectDetection.h +++ b/packages/react-native-executorch/common/rnexecutorch/models/object_detection/ObjectDetection.h @@ -125,36 +125,8 @@ class ObjectDetection : public VisionModel { double detectionThreshold, double iouThreshold, const std::vector &classIndices); - /** - * @brief Ensures the specified method is loaded, unloading any previous - * method if necessary. - * - * @param methodName Name of the method to load (e.g., "forward", - * "forward_384"). - * @throws RnExecutorchError if the method cannot be loaded. - */ - void ensureMethodLoaded(const std::string &methodName); - - /** - * @brief Prepares a set of allowed class indices for filtering detections. - * - * @param classIndices Vector of class indices to allow. - * @return A set containing the allowed class indices. - */ - std::set - prepareAllowedClasses(const std::vector &classIndices) const; - - /// Optional per-channel mean for input normalisation (set in constructor). - std::optional normMean_; - - /// Optional per-channel standard deviation for input normalisation. - std::optional normStd_; - /// Ordered label strings mapping class indices to human-readable names. std::vector labelNames_; - - /// Name of the currently loaded method (for multi-method models). - std::string currentlyLoadedMethod_; }; } // namespace models::object_detection diff --git a/packages/react-native-executorch/common/rnexecutorch/models/semantic_segmentation/BaseSemanticSegmentation.cpp b/packages/react-native-executorch/common/rnexecutorch/models/semantic_segmentation/BaseSemanticSegmentation.cpp index 66458cb569..3f2918aca0 100644 --- a/packages/react-native-executorch/common/rnexecutorch/models/semantic_segmentation/BaseSemanticSegmentation.cpp +++ b/packages/react-native-executorch/common/rnexecutorch/models/semantic_segmentation/BaseSemanticSegmentation.cpp @@ -18,18 +18,7 @@ BaseSemanticSegmentation::BaseSemanticSegmentation( : VisionModel(modelSource, callInvoker), allClasses_(std::move(allClasses)) { initModelImageSize(); - if (normMean.size() == 3) { - normMean_ = cv::Scalar(normMean[0], normMean[1], normMean[2]); - } else if (!normMean.empty()) { - log(LOG_LEVEL::Warn, - "normMean must have 3 elements — ignoring provided value."); - } - if (normStd.size() == 3) { - normStd_ = cv::Scalar(normStd[0], normStd[1], normStd[2]); - } else if (!normStd.empty()) { - log(LOG_LEVEL::Warn, - "normStd must have 3 elements — ignoring provided value."); - } + initNormalization(normMean, normStd); } void BaseSemanticSegmentation::initModelImageSize() { @@ -55,12 +44,7 @@ BaseSemanticSegmentation::runInference( std::scoped_lock lock(inference_mutex_); cv::Mat preprocessed = VisionModel::preprocess(image); - auto inputTensor = - (normMean_ && normStd_) - ? image_processing::getTensorFromMatrix( - modelInputShape_, preprocessed, *normMean_, *normStd_) - : image_processing::getTensorFromMatrix(modelInputShape_, - preprocessed); + auto inputTensor = createInputTensor(preprocessed); auto forwardResult = BaseModel::forward(inputTensor); if (!forwardResult.ok()) { @@ -77,11 +61,8 @@ semantic_segmentation::SegmentationResult BaseSemanticSegmentation::generateFromString( std::string imageSource, std::set> classesOfInterest, bool resize) { - cv::Mat imageBGR = image_processing::readImage(imageSource); - cv::Size originalSize = imageBGR.size(); - cv::Mat imageRGB; - cv::cvtColor(imageBGR, imageRGB, cv::COLOR_BGR2RGB); - + cv::Mat imageRGB = loadImageToRGB(imageSource); + cv::Size originalSize = imageRGB.size(); return runInference(imageRGB, originalSize, classesOfInterest, resize); } @@ -97,15 +78,13 @@ semantic_segmentation::SegmentationResult BaseSemanticSegmentation::generateFromFrame( jsi::Runtime &runtime, const jsi::Value &frameData, std::set> classesOfInterest, bool resize) { - auto orient = ::rnexecutorch::utils::readFrameOrientation(runtime, frameData); - cv::Mat frame = extractFromFrame(runtime, frameData); - cv::Mat rotated = utils::rotateFrameForModel(frame, orient); + auto [rotated, orient, originalSize] = loadFrameRotatedWithSize(runtime, frameData); // Always run inference without resize — rotate first, then resize. auto result = runInference(rotated, rotated.size(), classesOfInterest, false); const cv::Size outputSize = modelInputSize(); // JS reads maskW=frame.height, maskH=frame.width (sensor-native swap). - const cv::Size frameSize = frame.size(); + const cv::Size frameSize = originalSize; auto inverseAndResize = [&orient, &frameSize, &outputSize, resize](std::shared_ptr &buf, diff --git a/packages/react-native-executorch/common/rnexecutorch/models/semantic_segmentation/BaseSemanticSegmentation.h b/packages/react-native-executorch/common/rnexecutorch/models/semantic_segmentation/BaseSemanticSegmentation.h index a30ae375bf..ba207d919b 100644 --- a/packages/react-native-executorch/common/rnexecutorch/models/semantic_segmentation/BaseSemanticSegmentation.h +++ b/packages/react-native-executorch/common/rnexecutorch/models/semantic_segmentation/BaseSemanticSegmentation.h @@ -47,8 +47,6 @@ class BaseSemanticSegmentation : public VisionModel { std::set> &classesOfInterest, bool resize); std::size_t numModelPixels; - std::optional normMean_; - std::optional normStd_; std::vector allClasses_; private: diff --git a/packages/react-native-executorch/common/rnexecutorch/models/style_transfer/StyleTransfer.cpp b/packages/react-native-executorch/common/rnexecutorch/models/style_transfer/StyleTransfer.cpp index 70a6ec916d..2efd457ddd 100644 --- a/packages/react-native-executorch/common/rnexecutorch/models/style_transfer/StyleTransfer.cpp +++ b/packages/react-native-executorch/common/rnexecutorch/models/style_transfer/StyleTransfer.cpp @@ -37,9 +37,7 @@ cv::Mat StyleTransfer::runInference(cv::Mat image, cv::Size outputSize) { std::scoped_lock lock(inference_mutex_); cv::Mat preprocessed = preprocess(image); - - auto inputTensor = - image_processing::getTensorFromMatrix(modelInputShape_, preprocessed); + auto inputTensor = createInputTensor(preprocessed); auto forwardResult = BaseModel::forward(inputTensor); if (!forwardResult.ok()) { @@ -68,11 +66,8 @@ PixelDataResult toPixelDataResult(const cv::Mat &bgrMat) { StyleTransferResult StyleTransfer::generateFromString(std::string imageSource, bool saveToFile) { - cv::Mat imageBGR = image_processing::readImage(imageSource); - cv::Size originalSize = imageBGR.size(); - - cv::Mat imageRGB; - cv::cvtColor(imageBGR, imageRGB, cv::COLOR_BGR2RGB); + cv::Mat imageRGB = loadImageToRGB(imageSource); + cv::Size originalSize = imageRGB.size(); cv::Mat result = runInference(imageRGB, originalSize); if (saveToFile) { @@ -83,9 +78,7 @@ StyleTransferResult StyleTransfer::generateFromString(std::string imageSource, PixelDataResult StyleTransfer::generateFromFrame(jsi::Runtime &runtime, const jsi::Value &frameData) { - auto orient = ::rnexecutorch::utils::readFrameOrientation(runtime, frameData); - cv::Mat frame = extractFromFrame(runtime, frameData); - cv::Mat rotated = utils::rotateFrameForModel(frame, orient); + auto [rotated, orient] = loadFrameRotated(runtime, frameData); cv::Mat output = runInference(rotated, modelInputSize()); cv::Mat oriented = utils::inverseRotateMat(output, orient); return toPixelDataResult(oriented); From db5a2a12487a87dd36324b8d5bf1f1d52c161cf9 Mon Sep 17 00:00:00 2001 From: benITo47 Date: Tue, 7 Apr 2026 13:57:37 +0200 Subject: [PATCH 03/19] feat(models): add multi-method support helpers to BaseModel Add ensureMethodLoaded, getModelInputSize, and currentlyLoadedMethod_ to support models with multiple methods (e.g., forward_384, forward_512). --- .../common/rnexecutorch/models/BaseModel.cpp | 42 +++++++++++++++++++ .../common/rnexecutorch/models/BaseModel.h | 30 +++++++++++++ 2 files changed, 72 insertions(+) diff --git a/packages/react-native-executorch/common/rnexecutorch/models/BaseModel.cpp b/packages/react-native-executorch/common/rnexecutorch/models/BaseModel.cpp index 2ecc3d84c9..1c36e486a8 100644 --- a/packages/react-native-executorch/common/rnexecutorch/models/BaseModel.cpp +++ b/packages/react-native-executorch/common/rnexecutorch/models/BaseModel.cpp @@ -181,6 +181,48 @@ std::size_t BaseModel::getMemoryLowerBound() const noexcept { void BaseModel::unload() noexcept { module_.reset(nullptr); } +void BaseModel::ensureMethodLoaded(const std::string &methodName) { + if (methodName.empty()) { + throw RnExecutorchError(RnExecutorchErrorCode::InvalidUserInput, + "methodName cannot be empty"); + } + if (currentlyLoadedMethod_ == methodName) { + return; + } + if (!module_) { + throw RnExecutorchError(RnExecutorchErrorCode::ModuleNotLoaded, + "Model module is not loaded"); + } + if (!currentlyLoadedMethod_.empty()) { + module_->unload_method(currentlyLoadedMethod_); + } + auto loadResult = module_->load_method(methodName); + if (loadResult != executorch::runtime::Error::Ok) { + throw RnExecutorchError( + loadResult, "Failed to load method '" + methodName + + "'. Ensure the method exists in the exported model."); + } + currentlyLoadedMethod_ = methodName; +} + +cv::Size BaseModel::getModelInputSize(const std::string &methodName) const { + std::string method = methodName.empty() ? currentlyLoadedMethod_ : methodName; + if (method.empty()) { + throw RnExecutorchError(RnExecutorchErrorCode::InvalidUserInput, + "No method specified and no method currently loaded"); + } + + auto inputShapes = getAllInputShapes(method); + if (inputShapes.empty() || inputShapes[0].size() < 2) { + throw RnExecutorchError(RnExecutorchErrorCode::UnexpectedNumInputs, + "Could not determine input shape for method: " + + method); + } + + const auto &shape = inputShapes[0]; + return cv::Size(shape[shape.size() - 1], shape[shape.size() - 2]); +} + std::vector BaseModel::getTensorShape(const executorch::aten::Tensor &tensor) const { auto sizes = tensor.sizes(); diff --git a/packages/react-native-executorch/common/rnexecutorch/models/BaseModel.h b/packages/react-native-executorch/common/rnexecutorch/models/BaseModel.h index 6d44976b90..b60066c4e7 100644 --- a/packages/react-native-executorch/common/rnexecutorch/models/BaseModel.h +++ b/packages/react-native-executorch/common/rnexecutorch/models/BaseModel.h @@ -6,6 +6,7 @@ #include #include #include +#include #include #include #include @@ -53,6 +54,35 @@ class BaseModel { std::size_t memorySizeLowerBound{0}; + /** + * @brief Ensures the specified method is loaded, unloading any previous + * method if necessary. + * + * This helper is useful for models that support multiple methods with + * different input sizes (e.g., "forward_384", "forward_512", "forward_640"). + * + * @param methodName Name of the method to load (e.g., "forward", + * "forward_384"). + * @throws RnExecutorchError if the method cannot be loaded or if methodName + * is empty. + */ + void ensureMethodLoaded(const std::string &methodName); + + /** + * @brief Get model input spatial dimensions for a specific method. + * + * Useful for multi-method models with different input sizes per method. + * Returns the last two dimensions of the input shape as cv::Size. + * + * @param methodName Method to query (uses currentlyLoadedMethod_ if empty) + * @return Size (width, height) of the model input for the specified method + * @throws RnExecutorchError if method metadata cannot be retrieved + */ + cv::Size getModelInputSize(const std::string &methodName = "") const; + + /// Name of the currently loaded method (for multi-method models). + std::string currentlyLoadedMethod_; + private: std::vector getTensorShape(const executorch::aten::Tensor &tensor) const; From 251a756467e4b5d279d8c0a48b5c60d935f6027f Mon Sep 17 00:00:00 2001 From: benITo47 Date: Tue, 7 Apr 2026 13:57:52 +0200 Subject: [PATCH 04/19] feat(utils): add detection helper utilities to computer_vision/Processing Add prepareAllowedClasses and validateThreshold to Processing.{h,cpp} for reuse across detection models. --- .../utils/computer_vision/Processing.cpp | 18 ++++++++++++++++++ .../utils/computer_vision/Processing.h | 18 ++++++++++++++++++ 2 files changed, 36 insertions(+) diff --git a/packages/react-native-executorch/common/rnexecutorch/utils/computer_vision/Processing.cpp b/packages/react-native-executorch/common/rnexecutorch/utils/computer_vision/Processing.cpp index 108fd6ff8a..39d01a33ab 100644 --- a/packages/react-native-executorch/common/rnexecutorch/utils/computer_vision/Processing.cpp +++ b/packages/react-native-executorch/common/rnexecutorch/utils/computer_vision/Processing.cpp @@ -1,6 +1,8 @@ #include "Processing.h" #include #include +#include +#include namespace rnexecutorch::utils::computer_vision { @@ -18,4 +20,20 @@ float computeIoU(const BBox &a, const BBox &b) { return (unionArea > 0.0f) ? (intersectionArea / unionArea) : 0.0f; } +std::set +prepareAllowedClasses(const std::vector &classIndices) { + std::set allowedClasses; + if (!classIndices.empty()) { + allowedClasses.insert(classIndices.begin(), classIndices.end()); + } + return allowedClasses; +} + +void validateThreshold(double value, const std::string &name) { + if (value < 0.0 || value > 1.0) { + throw RnExecutorchError(RnExecutorchErrorCode::InvalidUserInput, + name + " must be in range [0, 1]"); + } +} + } // namespace rnexecutorch::utils::computer_vision diff --git a/packages/react-native-executorch/common/rnexecutorch/utils/computer_vision/Processing.h b/packages/react-native-executorch/common/rnexecutorch/utils/computer_vision/Processing.h index 3bd3022d4a..825950f151 100644 --- a/packages/react-native-executorch/common/rnexecutorch/utils/computer_vision/Processing.h +++ b/packages/react-native-executorch/common/rnexecutorch/utils/computer_vision/Processing.h @@ -2,12 +2,30 @@ #include "Types.h" #include +#include +#include #include namespace rnexecutorch::utils::computer_vision { float computeIoU(const BBox &a, const BBox &b); +/** + * @brief Convert class indices vector to a set for O(1) lookup + * @param classIndices Vector of allowed class indices + * @return Set of allowed class indices (empty set = allow all) + */ +std::set +prepareAllowedClasses(const std::vector &classIndices); + +/** + * @brief Validate that a threshold is in [0, 1] range + * @param value Threshold value to validate + * @param name Name of the threshold (for error messages) + * @throws RnExecutorchError if value is out of range + */ +void validateThreshold(double value, const std::string &name); + template std::vector nonMaxSuppression(std::vector items, double iouThreshold) { if (items.empty()) { From a17443dfe555d3637e25ee2f73978e6146e179b3 Mon Sep 17 00:00:00 2001 From: benITo47 Date: Tue, 7 Apr 2026 14:16:33 +0200 Subject: [PATCH 05/19] refactor(ocr): use BaseModel getModelInputSize helper in Detector Simplify calculateModelImageSize to use BaseModel's getModelInputSize helper (~7 lines removed). --- .../common/rnexecutorch/models/ocr/Detector.cpp | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/packages/react-native-executorch/common/rnexecutorch/models/ocr/Detector.cpp b/packages/react-native-executorch/common/rnexecutorch/models/ocr/Detector.cpp index e838a7a0f8..8df3acf05a 100644 --- a/packages/react-native-executorch/common/rnexecutorch/models/ocr/Detector.cpp +++ b/packages/react-native-executorch/common/rnexecutorch/models/ocr/Detector.cpp @@ -61,17 +61,10 @@ std::vector Detector::generate(const cv::Mat &inputImage, } cv::Size Detector::calculateModelImageSize(int32_t methodInputWidth) { - utils::validateInputWidth(methodInputWidth, constants::kDetectorInputWidths, "Detector"); std::string methodName = "forward_" + std::to_string(methodInputWidth); - - auto inputShapes = getAllInputShapes(methodName); - std::vector modelInputShape = inputShapes[0]; - cv::Size modelInputSize = - cv::Size(modelInputShape[modelInputShape.size() - 1], - modelInputShape[modelInputShape.size() - 2]); - return modelInputSize; + return getModelInputSize(methodName); } std::vector From afc81b1c61b0bd67995b386596050b9dee2aa9f0 Mon Sep 17 00:00:00 2001 From: benITo47 Date: Tue, 7 Apr 2026 15:20:38 +0200 Subject: [PATCH 06/19] refactor(models): add validateAndGetInputShape helper to BaseModel Centralize input shape validation logic across all models. Replaces duplicated validation code in 7 models (~84 lines removed). --- .../common/rnexecutorch/models/BaseModel.cpp | 22 +++++++++++++++++++ .../common/rnexecutorch/models/BaseModel.h | 16 ++++++++++++++ .../models/classification/Classification.cpp | 17 +------------- .../embeddings/image/ImageEmbeddings.cpp | 16 +------------- .../BaseInstanceSegmentation.cpp | 9 +------- .../object_detection/ObjectDetection.cpp | 10 ++------- .../rnexecutorch/models/ocr/Detector.cpp | 12 ++-------- .../BaseSemanticSegmentation.cpp | 13 +---------- .../models/style_transfer/StyleTransfer.cpp | 16 +------------- 9 files changed, 47 insertions(+), 84 deletions(-) diff --git a/packages/react-native-executorch/common/rnexecutorch/models/BaseModel.cpp b/packages/react-native-executorch/common/rnexecutorch/models/BaseModel.cpp index 1c36e486a8..10c1880d95 100644 --- a/packages/react-native-executorch/common/rnexecutorch/models/BaseModel.cpp +++ b/packages/react-native-executorch/common/rnexecutorch/models/BaseModel.cpp @@ -223,6 +223,28 @@ cv::Size BaseModel::getModelInputSize(const std::string &methodName) const { return cv::Size(shape[shape.size() - 1], shape[shape.size() - 2]); } +std::vector +BaseModel::validateAndGetInputShape(const std::string &methodName, + size_t minDimensions) const { + auto inputShapes = getAllInputShapes(methodName); + + if (inputShapes.empty()) { + throw RnExecutorchError(RnExecutorchErrorCode::UnexpectedNumInputs, + "Model seems to not take any input tensors."); + } + + const auto &shape = inputShapes[0]; + if (shape.size() < minDimensions) { + throw RnExecutorchError( + RnExecutorchErrorCode::WrongDimensions, + "Unexpected model input size, expected at least " + + std::to_string(minDimensions) + " dimensions but got: " + + std::to_string(shape.size()) + "."); + } + + return shape; +} + std::vector BaseModel::getTensorShape(const executorch::aten::Tensor &tensor) const { auto sizes = tensor.sizes(); diff --git a/packages/react-native-executorch/common/rnexecutorch/models/BaseModel.h b/packages/react-native-executorch/common/rnexecutorch/models/BaseModel.h index b60066c4e7..a015a2fb1b 100644 --- a/packages/react-native-executorch/common/rnexecutorch/models/BaseModel.h +++ b/packages/react-native-executorch/common/rnexecutorch/models/BaseModel.h @@ -80,6 +80,22 @@ class BaseModel { */ cv::Size getModelInputSize(const std::string &methodName = "") const; + /** + * @brief Validate and get input shape for model + * + * Validates that the model has at least one input tensor and that the first + * input has the minimum required dimensions. + * + * @param methodName Method to get shapes for (default: "forward") + * @param minDimensions Minimum expected dimensions (default: 2) + * @throws RnExecutorchError if validation fails (no inputs or insufficient + * dimensions) + * @return The first input shape vector + */ + std::vector + validateAndGetInputShape(const std::string &methodName = "forward", + size_t minDimensions = 2) const; + /// Name of the currently loaded method (for multi-method models). std::string currentlyLoadedMethod_; diff --git a/packages/react-native-executorch/common/rnexecutorch/models/classification/Classification.cpp b/packages/react-native-executorch/common/rnexecutorch/models/classification/Classification.cpp index 488cb03e6f..e4cc931d51 100644 --- a/packages/react-native-executorch/common/rnexecutorch/models/classification/Classification.cpp +++ b/packages/react-native-executorch/common/rnexecutorch/models/classification/Classification.cpp @@ -17,22 +17,7 @@ Classification::Classification(const std::string &modelSource, : VisionModel(modelSource, callInvoker), labelNames_(std::move(labelNames)) { initNormalization(normMean, normStd); - - auto inputShapes = getAllInputShapes(); - if (inputShapes.size() == 0) { - throw RnExecutorchError(RnExecutorchErrorCode::UnexpectedNumInputs, - "Model seems to not take any input tensors."); - } - modelInputShape_ = inputShapes[0]; - if (modelInputShape_.size() < 2) { - char errorMessage[100]; - std::snprintf(errorMessage, sizeof(errorMessage), - "Unexpected model input size, expected at least 2 dimensions " - "but got: %zu.", - modelInputShape_.size()); - throw RnExecutorchError(RnExecutorchErrorCode::WrongDimensions, - errorMessage); - } + modelInputShape_ = validateAndGetInputShape(); } std::unordered_map diff --git a/packages/react-native-executorch/common/rnexecutorch/models/embeddings/image/ImageEmbeddings.cpp b/packages/react-native-executorch/common/rnexecutorch/models/embeddings/image/ImageEmbeddings.cpp index 1709ad8b65..8f822bd6c8 100644 --- a/packages/react-native-executorch/common/rnexecutorch/models/embeddings/image/ImageEmbeddings.cpp +++ b/packages/react-native-executorch/common/rnexecutorch/models/embeddings/image/ImageEmbeddings.cpp @@ -10,21 +10,7 @@ ImageEmbeddings::ImageEmbeddings( const std::string &modelSource, std::shared_ptr callInvoker) : VisionModel(modelSource, callInvoker) { - auto inputTensors = getAllInputShapes(); - if (inputTensors.size() == 0) { - throw RnExecutorchError(RnExecutorchErrorCode::UnexpectedNumInputs, - "Model seems to not take any input tensors."); - } - modelInputShape_ = inputTensors[0]; - if (modelInputShape_.size() < 2) { - char errorMessage[100]; - std::snprintf(errorMessage, sizeof(errorMessage), - "Unexpected model input size, expected at least 2 dimensions " - "but got: %zu.", - modelInputShape_.size()); - throw RnExecutorchError(RnExecutorchErrorCode::WrongDimensions, - errorMessage); - } + modelInputShape_ = validateAndGetInputShape(); } std::shared_ptr diff --git a/packages/react-native-executorch/common/rnexecutorch/models/instance_segmentation/BaseInstanceSegmentation.cpp b/packages/react-native-executorch/common/rnexecutorch/models/instance_segmentation/BaseInstanceSegmentation.cpp index 180a08278d..4913d581c9 100644 --- a/packages/react-native-executorch/common/rnexecutorch/models/instance_segmentation/BaseInstanceSegmentation.cpp +++ b/packages/react-native-executorch/common/rnexecutorch/models/instance_segmentation/BaseInstanceSegmentation.cpp @@ -45,14 +45,7 @@ std::vector BaseInstanceSegmentation::runInference( ensureMethodLoaded(methodName); - auto inputShapes = getAllInputShapes(methodName); - if (inputShapes.empty() || inputShapes[0].empty()) { - throw RnExecutorchError(RnExecutorchErrorCode::UnexpectedNumInputs, - "Method '" + methodName + - "' has invalid input tensor shape."); - } - - modelInputShape_ = inputShapes[0]; + modelInputShape_ = validateAndGetInputShape(methodName, 2); const auto &shape = modelInputShape_; cv::Size modelInputSize(shape[shape.size() - 2], shape[shape.size() - 1]); cv::Size originalSize(image.cols, image.rows); diff --git a/packages/react-native-executorch/common/rnexecutorch/models/object_detection/ObjectDetection.cpp b/packages/react-native-executorch/common/rnexecutorch/models/object_detection/ObjectDetection.cpp index 17d738761a..ff8c79adbe 100644 --- a/packages/react-native-executorch/common/rnexecutorch/models/object_detection/ObjectDetection.cpp +++ b/packages/react-native-executorch/common/rnexecutorch/models/object_detection/ObjectDetection.cpp @@ -104,14 +104,8 @@ std::vector ObjectDetection::runInference( cv::Size originalSize = image.size(); - // Query input shapes for the currently loaded method - auto inputShapes = getAllInputShapes(methodName); - if (inputShapes.empty() || inputShapes[0].size() < 2) { - throw RnExecutorchError(RnExecutorchErrorCode::UnexpectedNumInputs, - "Could not determine input shape for method: " + - methodName); - } - modelInputShape_ = inputShapes[0]; + // Query and validate input shapes for the currently loaded method + modelInputShape_ = validateAndGetInputShape(methodName, 2); cv::Mat preprocessed = preprocess(image); auto inputTensor = createInputTensor(preprocessed); diff --git a/packages/react-native-executorch/common/rnexecutorch/models/ocr/Detector.cpp b/packages/react-native-executorch/common/rnexecutorch/models/ocr/Detector.cpp index 8df3acf05a..41307a1f80 100644 --- a/packages/react-native-executorch/common/rnexecutorch/models/ocr/Detector.cpp +++ b/packages/react-native-executorch/common/rnexecutorch/models/ocr/Detector.cpp @@ -11,18 +11,10 @@ namespace rnexecutorch::models::ocr { Detector::Detector(const std::string &modelSource, std::shared_ptr callInvoker) : BaseModel(modelSource, callInvoker) { - + // Validate all supported input widths for (auto input_size : constants::kDetectorInputWidths) { std::string methodName = "forward_" + std::to_string(input_size); - auto inputShapes = getAllInputShapes(methodName); - if (inputShapes[0].size() < 2) { - std::string errorMessage = - "Unexpected detector model input size for method: " + methodName + - "expected at least 2 dimensions but got: ." + - std::to_string(inputShapes[0].size()); - throw RnExecutorchError(RnExecutorchErrorCode::UnexpectedNumInputs, - errorMessage); - } + validateAndGetInputShape(methodName, 2); } } diff --git a/packages/react-native-executorch/common/rnexecutorch/models/semantic_segmentation/BaseSemanticSegmentation.cpp b/packages/react-native-executorch/common/rnexecutorch/models/semantic_segmentation/BaseSemanticSegmentation.cpp index 3f2918aca0..d5fdba48f6 100644 --- a/packages/react-native-executorch/common/rnexecutorch/models/semantic_segmentation/BaseSemanticSegmentation.cpp +++ b/packages/react-native-executorch/common/rnexecutorch/models/semantic_segmentation/BaseSemanticSegmentation.cpp @@ -22,18 +22,7 @@ BaseSemanticSegmentation::BaseSemanticSegmentation( } void BaseSemanticSegmentation::initModelImageSize() { - auto inputShapes = getAllInputShapes(); - if (inputShapes.empty()) { - throw RnExecutorchError(RnExecutorchErrorCode::UnexpectedNumInputs, - "Model seems to not take any input tensors."); - } - modelInputShape_ = inputShapes[0]; - if (modelInputShape_.size() < 2) { - throw RnExecutorchError(RnExecutorchErrorCode::WrongDimensions, - "Unexpected model input size, expected at least 2 " - "dimensions but got: " + - std::to_string(modelInputShape_.size()) + "."); - } + modelInputShape_ = validateAndGetInputShape(); numModelPixels = modelInputSize().area(); } diff --git a/packages/react-native-executorch/common/rnexecutorch/models/style_transfer/StyleTransfer.cpp b/packages/react-native-executorch/common/rnexecutorch/models/style_transfer/StyleTransfer.cpp index 2efd457ddd..cb34eb6b42 100644 --- a/packages/react-native-executorch/common/rnexecutorch/models/style_transfer/StyleTransfer.cpp +++ b/packages/react-native-executorch/common/rnexecutorch/models/style_transfer/StyleTransfer.cpp @@ -16,21 +16,7 @@ using executorch::extension::TensorPtr; StyleTransfer::StyleTransfer(const std::string &modelSource, std::shared_ptr callInvoker) : VisionModel(modelSource, callInvoker) { - auto inputShapes = getAllInputShapes(); - if (inputShapes.size() == 0) { - throw RnExecutorchError(RnExecutorchErrorCode::UnexpectedNumInputs, - "Model seems to not take any input tensors"); - } - modelInputShape_ = inputShapes[0]; - if (modelInputShape_.size() < 2) { - char errorMessage[100]; - std::snprintf(errorMessage, sizeof(errorMessage), - "Unexpected model input size, expected at least 2 dimensions " - "but got: %zu.", - modelInputShape_.size()); - throw RnExecutorchError(RnExecutorchErrorCode::UnexpectedNumInputs, - errorMessage); - } + modelInputShape_ = validateAndGetInputShape(); } cv::Mat StyleTransfer::runInference(cv::Mat image, cv::Size outputSize) { From 91d51232c8ecfae48ae1017d275d8b6a59f4685a Mon Sep 17 00:00:00 2001 From: benITo47 Date: Fri, 10 Apr 2026 12:21:04 +0200 Subject: [PATCH 07/19] feat: Deduplicate cpp --- .../common/rnexecutorch/models/BaseModel.cpp | 24 ++---------- .../common/rnexecutorch/models/BaseModel.h | 15 +------ .../rnexecutorch/models/VisionModel.cpp | 39 ++++++++++++------- .../common/rnexecutorch/models/VisionModel.h | 35 +++++++++-------- .../BaseInstanceSegmentation.cpp | 14 +++---- .../object_detection/ObjectDetection.cpp | 6 +-- .../rnexecutorch/models/ocr/Detector.cpp | 2 +- .../common/rnexecutorch/models/ocr/Detector.h | 10 ++--- .../BaseSemanticSegmentation.cpp | 2 +- .../models/style_transfer/StyleTransfer.cpp | 2 +- .../common/rnexecutorch/tests/CMakeLists.txt | 2 + .../utils/computer_vision/Processing.cpp | 9 ----- .../utils/computer_vision/Processing.h | 8 ---- 13 files changed, 65 insertions(+), 103 deletions(-) diff --git a/packages/react-native-executorch/common/rnexecutorch/models/BaseModel.cpp b/packages/react-native-executorch/common/rnexecutorch/models/BaseModel.cpp index 10c1880d95..9bab6831ed 100644 --- a/packages/react-native-executorch/common/rnexecutorch/models/BaseModel.cpp +++ b/packages/react-native-executorch/common/rnexecutorch/models/BaseModel.cpp @@ -205,27 +205,9 @@ void BaseModel::ensureMethodLoaded(const std::string &methodName) { currentlyLoadedMethod_ = methodName; } -cv::Size BaseModel::getModelInputSize(const std::string &methodName) const { - std::string method = methodName.empty() ? currentlyLoadedMethod_ : methodName; - if (method.empty()) { - throw RnExecutorchError(RnExecutorchErrorCode::InvalidUserInput, - "No method specified and no method currently loaded"); - } - - auto inputShapes = getAllInputShapes(method); - if (inputShapes.empty() || inputShapes[0].size() < 2) { - throw RnExecutorchError(RnExecutorchErrorCode::UnexpectedNumInputs, - "Could not determine input shape for method: " + - method); - } - - const auto &shape = inputShapes[0]; - return cv::Size(shape[shape.size() - 1], shape[shape.size() - 2]); -} - std::vector BaseModel::validateAndGetInputShape(const std::string &methodName, - size_t minDimensions) const { + size_t minDimensions) const { auto inputShapes = getAllInputShapes(methodName); if (inputShapes.empty()) { @@ -238,8 +220,8 @@ BaseModel::validateAndGetInputShape(const std::string &methodName, throw RnExecutorchError( RnExecutorchErrorCode::WrongDimensions, "Unexpected model input size, expected at least " + - std::to_string(minDimensions) + " dimensions but got: " + - std::to_string(shape.size()) + "."); + std::to_string(minDimensions) + + " dimensions but got: " + std::to_string(shape.size()) + "."); } return shape; diff --git a/packages/react-native-executorch/common/rnexecutorch/models/BaseModel.h b/packages/react-native-executorch/common/rnexecutorch/models/BaseModel.h index a015a2fb1b..f17e7615ee 100644 --- a/packages/react-native-executorch/common/rnexecutorch/models/BaseModel.h +++ b/packages/react-native-executorch/common/rnexecutorch/models/BaseModel.h @@ -6,7 +6,6 @@ #include #include #include -#include #include #include #include @@ -68,18 +67,6 @@ class BaseModel { */ void ensureMethodLoaded(const std::string &methodName); - /** - * @brief Get model input spatial dimensions for a specific method. - * - * Useful for multi-method models with different input sizes per method. - * Returns the last two dimensions of the input shape as cv::Size. - * - * @param methodName Method to query (uses currentlyLoadedMethod_ if empty) - * @return Size (width, height) of the model input for the specified method - * @throws RnExecutorchError if method metadata cannot be retrieved - */ - cv::Size getModelInputSize(const std::string &methodName = "") const; - /** * @brief Validate and get input shape for model * @@ -94,7 +81,7 @@ class BaseModel { */ std::vector validateAndGetInputShape(const std::string &methodName = "forward", - size_t minDimensions = 2) const; + size_t minDimensions = 2) const; /// Name of the currently loaded method (for multi-method models). std::string currentlyLoadedMethod_; diff --git a/packages/react-native-executorch/common/rnexecutorch/models/VisionModel.cpp b/packages/react-native-executorch/common/rnexecutorch/models/VisionModel.cpp index 0d9ee03e34..6078621761 100644 --- a/packages/react-native-executorch/common/rnexecutorch/models/VisionModel.cpp +++ b/packages/react-native-executorch/common/rnexecutorch/models/VisionModel.cpp @@ -1,7 +1,7 @@ #include "VisionModel.h" #include #include -#include +#include #include #include #include @@ -27,6 +27,25 @@ cv::Size VisionModel::modelInputSize() const { modelInputShape_[modelInputShape_.size() - 2]); } +cv::Size VisionModel::getModelInputSize(const std::string &methodName) const { + std::string method = methodName.empty() ? currentlyLoadedMethod_ : methodName; + if (method.empty()) { + throw RnExecutorchError( + RnExecutorchErrorCode::InvalidUserInput, + "No method specified and no method currently loaded"); + } + + auto inputShapes = getAllInputShapes(method); + if (inputShapes.empty() || inputShapes[0].size() < 2) { + throw RnExecutorchError(RnExecutorchErrorCode::UnexpectedNumInputs, + "Could not determine input shape for method: " + + method); + } + + const auto &shape = inputShapes[0]; + return cv::Size(shape[shape.size() - 1], shape[shape.size() - 2]); +} + cv::Mat VisionModel::extractFromFrame(jsi::Runtime &runtime, const jsi::Value &frameData) const { cv::Mat frame = ::rnexecutorch::utils::frameToMat(runtime, frameData); @@ -54,7 +73,7 @@ cv::Mat VisionModel::extractFromPixels(const JSTensorViewIn &tensorView) const { } void VisionModel::initNormalization(const std::vector &normMean, - const std::vector &normStd) { + const std::vector &normStd) { if (normMean.size() == 3) { normMean_ = cv::Scalar(normMean[0], normMean[1], normMean[2]); } else if (!normMean.empty()) { @@ -72,9 +91,8 @@ void VisionModel::initNormalization(const std::vector &normMean, TensorPtr VisionModel::createInputTensor(const cv::Mat &preprocessed) const { return (normMean_ && normStd_) - ? image_processing::getTensorFromMatrix(modelInputShape_, - preprocessed, *normMean_, - *normStd_) + ? image_processing::getTensorFromMatrix( + modelInputShape_, preprocessed, *normMean_, *normStd_) : image_processing::getTensorFromMatrix(modelInputShape_, preprocessed); } @@ -86,20 +104,11 @@ cv::Mat VisionModel::loadImageToRGB(const std::string &imageSource) const { return imageRGB; } -std::pair +std::tuple VisionModel::loadFrameRotated(jsi::Runtime &runtime, const jsi::Value &frameData) const { auto orient = utils::readFrameOrientation(runtime, frameData); cv::Mat frame = extractFromFrame(runtime, frameData); - cv::Mat rotated = utils::rotateFrameForModel(frame, orient); - return {rotated, orient}; -} - -std::tuple -VisionModel::loadFrameRotatedWithSize(jsi::Runtime &runtime, - const jsi::Value &frameData) const { - auto orient = utils::readFrameOrientation(runtime, frameData); - cv::Mat frame = extractFromFrame(runtime, frameData); cv::Size originalSize = frame.size(); cv::Mat rotated = utils::rotateFrameForModel(frame, orient); return {rotated, orient, originalSize}; diff --git a/packages/react-native-executorch/common/rnexecutorch/models/VisionModel.h b/packages/react-native-executorch/common/rnexecutorch/models/VisionModel.h index 6c058eab54..df36be5b6e 100644 --- a/packages/react-native-executorch/common/rnexecutorch/models/VisionModel.h +++ b/packages/react-native-executorch/common/rnexecutorch/models/VisionModel.h @@ -1,5 +1,6 @@ #pragma once +#include #include #include #include @@ -10,6 +11,8 @@ namespace rnexecutorch { namespace models { +using executorch::extension::TensorPtr; + /** * @brief Base class for computer vision models that support real-time camera * input @@ -106,6 +109,18 @@ class VisionModel : public BaseModel { */ virtual cv::Size modelInputSize() const; + /** + * @brief Get model input spatial dimensions for a specific method. + * + * Useful for multi-method models with different input sizes per method. + * Returns the last two dimensions of the input shape as cv::Size. + * + * @param methodName Method to query (uses currentlyLoadedMethod_ if empty) + * @return Size (width, height) of the model input for the specified method + * @throws RnExecutorchError if method metadata cannot be retrieved + */ + cv::Size getModelInputSize(const std::string &methodName = "") const; + /** * @brief Initialize normalization parameters from vectors * @@ -113,10 +128,11 @@ class VisionModel : public BaseModel { * Logs warning if invalid but non-empty. Sets nullopt if empty/invalid. * * @param normMean Mean values for RGB channels (expected size: 3) - * @param normStd Standard deviation values for RGB channels (expected size: 3) + * @param normStd Standard deviation values for RGB channels (expected size: + * 3) */ void initNormalization(const std::vector &normMean, - const std::vector &normStd); + const std::vector &normStd); /** * @brief Create input tensor from preprocessed image @@ -143,23 +159,10 @@ class VisionModel : public BaseModel { * * @param runtime JSI runtime * @param frameData JSI value containing frame data from VisionCamera - * @return Pair of {rotated RGB frame, orientation info} - */ - std::pair - loadFrameRotated(jsi::Runtime &runtime, const jsi::Value &frameData) const; - - /** - * @brief Process camera frame with rotation, also returning original size - * - * For models that need original frame size (e.g., semantic segmentation) - * - * @param runtime JSI runtime - * @param frameData JSI value containing frame data from VisionCamera * @return Tuple of {rotated RGB frame, orientation info, original size} */ std::tuple - loadFrameRotatedWithSize(jsi::Runtime &runtime, - const jsi::Value &frameData) const; + loadFrameRotated(jsi::Runtime &runtime, const jsi::Value &frameData) const; /** * @brief Extract an RGB cv::Mat from a VisionCamera frame diff --git a/packages/react-native-executorch/common/rnexecutorch/models/instance_segmentation/BaseInstanceSegmentation.cpp b/packages/react-native-executorch/common/rnexecutorch/models/instance_segmentation/BaseInstanceSegmentation.cpp index 4913d581c9..33511c3a92 100644 --- a/packages/react-native-executorch/common/rnexecutorch/models/instance_segmentation/BaseInstanceSegmentation.cpp +++ b/packages/react-native-executorch/common/rnexecutorch/models/instance_segmentation/BaseInstanceSegmentation.cpp @@ -9,6 +9,7 @@ #include #include #include +#include namespace rnexecutorch::models::instance_segmentation { @@ -21,14 +22,10 @@ BaseInstanceSegmentation::BaseInstanceSegmentation( } cv::Size BaseInstanceSegmentation::modelInputSize() const { - if (currentlyLoadedMethod_.empty()) { - return VisionModel::modelInputSize(); - } - try { + if (!currentlyLoadedMethod_.empty()) { return getModelInputSize(currentlyLoadedMethod_); - } catch (...) { - return VisionModel::modelInputSize(); } + return VisionModel::modelInputSize(); } TensorPtr BaseInstanceSegmentation::buildInputTensor(const cv::Mat &image) { @@ -88,7 +85,7 @@ std::vector BaseInstanceSegmentation::generateFromFrame( std::vector classIndices, bool returnMaskAtOriginalResolution, std::string methodName) { - auto [rotated, orient] = loadFrameRotated(runtime, frameData); + auto [rotated, orient, _] = loadFrameRotated(runtime, frameData); auto instances = runInference(rotated, confidenceThreshold, iouThreshold, maxInstances, classIndices, returnMaskAtOriginalResolution, methodName); @@ -240,8 +237,7 @@ std::vector BaseInstanceSegmentation::collectInstances( static_cast(originalSize.width) / modelInputSize.width; float heightRatio = static_cast(originalSize.height) / modelInputSize.height; - auto allowedClasses = - utils::computer_vision::prepareAllowedClasses(classIndices); + std::set allowedClasses(classIndices.begin(), classIndices.end()); // CONTRACT auto bboxTensor = tensors[0].toTensor(); // [1, N, 4] diff --git a/packages/react-native-executorch/common/rnexecutorch/models/object_detection/ObjectDetection.cpp b/packages/react-native-executorch/common/rnexecutorch/models/object_detection/ObjectDetection.cpp index ff8c79adbe..d0da9fdb4e 100644 --- a/packages/react-native-executorch/common/rnexecutorch/models/object_detection/ObjectDetection.cpp +++ b/packages/react-native-executorch/common/rnexecutorch/models/object_detection/ObjectDetection.cpp @@ -9,6 +9,7 @@ #include #include #include +#include namespace rnexecutorch::models::object_detection { @@ -39,8 +40,7 @@ ObjectDetection::postprocess(const std::vector &tensors, static_cast(originalSize.height) / inputSize.height; // Prepare allowed classes set for filtering - auto allowedClasses = - utils::computer_vision::prepareAllowedClasses(classIndices); + std::set allowedClasses(classIndices.begin(), classIndices.end()); std::vector detections; auto bboxTensor = tensors.at(0).toTensor(); @@ -134,7 +134,7 @@ std::vector ObjectDetection::generateFromFrame( jsi::Runtime &runtime, const jsi::Value &frameData, double detectionThreshold, double iouThreshold, std::vector classIndices, std::string methodName) { - auto [rotated, orient] = loadFrameRotated(runtime, frameData); + auto [rotated, orient, _] = loadFrameRotated(runtime, frameData); auto detections = runInference(rotated, detectionThreshold, iouThreshold, classIndices, methodName); diff --git a/packages/react-native-executorch/common/rnexecutorch/models/ocr/Detector.cpp b/packages/react-native-executorch/common/rnexecutorch/models/ocr/Detector.cpp index 41307a1f80..62dfa0cc6e 100644 --- a/packages/react-native-executorch/common/rnexecutorch/models/ocr/Detector.cpp +++ b/packages/react-native-executorch/common/rnexecutorch/models/ocr/Detector.cpp @@ -10,7 +10,7 @@ namespace rnexecutorch::models::ocr { Detector::Detector(const std::string &modelSource, std::shared_ptr callInvoker) - : BaseModel(modelSource, callInvoker) { + : VisionModel(modelSource, callInvoker) { // Validate all supported input widths for (auto input_size : constants::kDetectorInputWidths) { std::string methodName = "forward_" + std::to_string(input_size); diff --git a/packages/react-native-executorch/common/rnexecutorch/models/ocr/Detector.h b/packages/react-native-executorch/common/rnexecutorch/models/ocr/Detector.h index dc17aa0742..b77a379fd5 100644 --- a/packages/react-native-executorch/common/rnexecutorch/models/ocr/Detector.h +++ b/packages/react-native-executorch/common/rnexecutorch/models/ocr/Detector.h @@ -3,7 +3,7 @@ #include #include #include -#include +#include #include namespace rnexecutorch::models::ocr { @@ -17,13 +17,13 @@ namespace rnexecutorch::models::ocr { using executorch::aten::Tensor; using executorch::extension::TensorPtr; -class Detector : public BaseModel { +class Detector : public models::VisionModel { public: explicit Detector(const std::string &modelSource, std::shared_ptr callInvoker); - [[nodiscard("Registered non-void function")]] - virtual std::vector - generate(const cv::Mat &inputImage, int32_t inputWidth); + [[nodiscard("Registered non-void function")]] + virtual std::vector generate(const cv::Mat &inputImage, + int32_t inputWidth); cv::Size calculateModelImageSize(int32_t methodInputWidth); diff --git a/packages/react-native-executorch/common/rnexecutorch/models/semantic_segmentation/BaseSemanticSegmentation.cpp b/packages/react-native-executorch/common/rnexecutorch/models/semantic_segmentation/BaseSemanticSegmentation.cpp index d5fdba48f6..eda9b2b73f 100644 --- a/packages/react-native-executorch/common/rnexecutorch/models/semantic_segmentation/BaseSemanticSegmentation.cpp +++ b/packages/react-native-executorch/common/rnexecutorch/models/semantic_segmentation/BaseSemanticSegmentation.cpp @@ -67,7 +67,7 @@ semantic_segmentation::SegmentationResult BaseSemanticSegmentation::generateFromFrame( jsi::Runtime &runtime, const jsi::Value &frameData, std::set> classesOfInterest, bool resize) { - auto [rotated, orient, originalSize] = loadFrameRotatedWithSize(runtime, frameData); + auto [rotated, orient, originalSize] = loadFrameRotated(runtime, frameData); // Always run inference without resize — rotate first, then resize. auto result = runInference(rotated, rotated.size(), classesOfInterest, false); diff --git a/packages/react-native-executorch/common/rnexecutorch/models/style_transfer/StyleTransfer.cpp b/packages/react-native-executorch/common/rnexecutorch/models/style_transfer/StyleTransfer.cpp index cb34eb6b42..f045a13bde 100644 --- a/packages/react-native-executorch/common/rnexecutorch/models/style_transfer/StyleTransfer.cpp +++ b/packages/react-native-executorch/common/rnexecutorch/models/style_transfer/StyleTransfer.cpp @@ -64,7 +64,7 @@ StyleTransferResult StyleTransfer::generateFromString(std::string imageSource, PixelDataResult StyleTransfer::generateFromFrame(jsi::Runtime &runtime, const jsi::Value &frameData) { - auto [rotated, orient] = loadFrameRotated(runtime, frameData); + auto [rotated, orient, _] = loadFrameRotated(runtime, frameData); cv::Mat output = runInference(rotated, modelInputSize()); cv::Mat oriented = utils::inverseRotateMat(output, orient); return toPixelDataResult(oriented); diff --git a/packages/react-native-executorch/common/rnexecutorch/tests/CMakeLists.txt b/packages/react-native-executorch/common/rnexecutorch/tests/CMakeLists.txt index d68ab33509..cf5289bf92 100644 --- a/packages/react-native-executorch/common/rnexecutorch/tests/CMakeLists.txt +++ b/packages/react-native-executorch/common/rnexecutorch/tests/CMakeLists.txt @@ -333,6 +333,7 @@ add_rn_test(OCRTests integration/OCRTest.cpp ${RNEXECUTORCH_DIR}/models/ocr/utils/DetectorUtils.cpp ${RNEXECUTORCH_DIR}/models/ocr/utils/RecognitionHandlerUtils.cpp ${RNEXECUTORCH_DIR}/models/ocr/utils/RecognizerUtils.cpp + ${RNEXECUTORCH_DIR}/models/VisionModel.cpp ${RNEXECUTORCH_DIR}/utils/FrameProcessor.cpp ${RNEXECUTORCH_DIR}/utils/FrameExtractor.cpp ${RNEXECUTORCH_DIR}/utils/FrameTransform.cpp @@ -350,6 +351,7 @@ add_rn_test(VerticalOCRTests integration/VerticalOCRTest.cpp ${RNEXECUTORCH_DIR}/models/ocr/utils/DetectorUtils.cpp ${RNEXECUTORCH_DIR}/models/ocr/utils/RecognitionHandlerUtils.cpp ${RNEXECUTORCH_DIR}/models/ocr/utils/RecognizerUtils.cpp + ${RNEXECUTORCH_DIR}/models/VisionModel.cpp ${RNEXECUTORCH_DIR}/utils/FrameProcessor.cpp ${RNEXECUTORCH_DIR}/utils/FrameExtractor.cpp ${RNEXECUTORCH_DIR}/utils/FrameTransform.cpp diff --git a/packages/react-native-executorch/common/rnexecutorch/utils/computer_vision/Processing.cpp b/packages/react-native-executorch/common/rnexecutorch/utils/computer_vision/Processing.cpp index 39d01a33ab..63b342395d 100644 --- a/packages/react-native-executorch/common/rnexecutorch/utils/computer_vision/Processing.cpp +++ b/packages/react-native-executorch/common/rnexecutorch/utils/computer_vision/Processing.cpp @@ -20,15 +20,6 @@ float computeIoU(const BBox &a, const BBox &b) { return (unionArea > 0.0f) ? (intersectionArea / unionArea) : 0.0f; } -std::set -prepareAllowedClasses(const std::vector &classIndices) { - std::set allowedClasses; - if (!classIndices.empty()) { - allowedClasses.insert(classIndices.begin(), classIndices.end()); - } - return allowedClasses; -} - void validateThreshold(double value, const std::string &name) { if (value < 0.0 || value > 1.0) { throw RnExecutorchError(RnExecutorchErrorCode::InvalidUserInput, diff --git a/packages/react-native-executorch/common/rnexecutorch/utils/computer_vision/Processing.h b/packages/react-native-executorch/common/rnexecutorch/utils/computer_vision/Processing.h index 825950f151..89545248a6 100644 --- a/packages/react-native-executorch/common/rnexecutorch/utils/computer_vision/Processing.h +++ b/packages/react-native-executorch/common/rnexecutorch/utils/computer_vision/Processing.h @@ -10,14 +10,6 @@ namespace rnexecutorch::utils::computer_vision { float computeIoU(const BBox &a, const BBox &b); -/** - * @brief Convert class indices vector to a set for O(1) lookup - * @param classIndices Vector of allowed class indices - * @return Set of allowed class indices (empty set = allow all) - */ -std::set -prepareAllowedClasses(const std::vector &classIndices); - /** * @brief Validate that a threshold is in [0, 1] range * @param value Threshold value to validate From 9766dd83e559d7747ae2c6291b81697780126419 Mon Sep 17 00:00:00 2001 From: benITo47 Date: Fri, 10 Apr 2026 17:58:29 +0200 Subject: [PATCH 08/19] docs(vision): document normalization usage in VisionModel Add documentation explaining when subclasses should: - Call initNormalization() for models expecting ImageNet preprocessing - Skip it for models with built-in normalization or raw input - Note that createInputTensor() safely handles both cases via std::optional --- .../common/rnexecutorch/models/VisionModel.h | 33 ++++++++++++++++--- 1 file changed, 29 insertions(+), 4 deletions(-) diff --git a/packages/react-native-executorch/common/rnexecutorch/models/VisionModel.h b/packages/react-native-executorch/common/rnexecutorch/models/VisionModel.h index df36be5b6e..2bd1f5859c 100644 --- a/packages/react-native-executorch/common/rnexecutorch/models/VisionModel.h +++ b/packages/react-native-executorch/common/rnexecutorch/models/VisionModel.h @@ -24,16 +24,35 @@ using executorch::extension::TensorPtr; * Thread Safety: * - All inference operations are protected by a mutex via scoped_lock * + * Normalization: + * Subclasses should call initNormalization() with ImageNet mean/std when the + * model expects ImageNet-normalized inputs (e.g., Classification, Detection, + * Segmentation). Skip initNormalization() when the model: + * - Has built-in normalization layers (e.g., some embeddings models) + * - Expects raw pixel values [0, 255] (e.g., StyleTransfer) + * - Uses non-ImageNet normalization (handle in custom preprocess()) + * + * The createInputTensor() method safely handles both cases via std::optional. + * * Usage: * Subclasses should: * 1. Inherit from VisionModel instead of BaseModel - * 2. Optionally override preprocess() for model-specific preprocessing - * 3. Implement runInference() which acquires the lock internally + * 2. Call initNormalization() if model expects normalized inputs + * 3. Optionally override preprocess() for model-specific preprocessing + * 4. Implement runInference() which acquires the lock internally * * Example: * @code * class Classification : public VisionModel { * public: + * Classification(const std::string& modelSource, + * std::shared_ptr callInvoker, + * const std::vector& normMean, + * const std::vector& normStd) + * : VisionModel(modelSource, callInvoker) { + * initNormalization(normMean, normStd); // ImageNet normalization + * } + * * std::unordered_map * generateFromFrame(jsi::Runtime& runtime, const jsi::Value& frameValue) { * auto frameObject = frameValue.asObject(runtime); @@ -66,10 +85,16 @@ class VisionModel : public BaseModel { /// Set once by each subclass constructor to avoid per-frame metadata lookups. std::vector modelInputShape_; - /// Normalization mean values (RGB channels) + /// Normalization mean values (RGB channels). + /// Optional: set via initNormalization() for models expecting normalized + /// inputs (e.g., ImageNet preprocessing). Leave as nullopt for models with + /// built-in normalization or raw pixel input expectations. std::optional normMean_; - /// Normalization standard deviation values (RGB channels) + /// Normalization standard deviation values (RGB channels). + /// Optional: set via initNormalization() for models expecting normalized + /// inputs (e.g., ImageNet preprocessing). Leave as nullopt for models with + /// built-in normalization or raw pixel input expectations. std::optional normStd_; /** From 1127fd96383eadbbc7362d2b7a1c791e4e770c01 Mon Sep 17 00:00:00 2001 From: benITo47 Date: Fri, 10 Apr 2026 17:59:50 +0200 Subject: [PATCH 09/19] feat(utils): add TensorHelpers for type-safe span conversions Create header-only utility for converting tensors to std::span. Provides: - toSpan(Tensor&): Convert tensor to typed span - toSpan(EValue&): Extract tensor from EValue then convert Eliminates manual pointer arithmetic and improves type safety. Replaces 8+ manual span constructions across vision models. --- .../common/rnexecutorch/utils/TensorHelpers.h | 62 +++++++++++++++++++ 1 file changed, 62 insertions(+) create mode 100644 packages/react-native-executorch/common/rnexecutorch/utils/TensorHelpers.h diff --git a/packages/react-native-executorch/common/rnexecutorch/utils/TensorHelpers.h b/packages/react-native-executorch/common/rnexecutorch/utils/TensorHelpers.h new file mode 100644 index 0000000000..23e6c89bd1 --- /dev/null +++ b/packages/react-native-executorch/common/rnexecutorch/utils/TensorHelpers.h @@ -0,0 +1,62 @@ +#pragma once + +#include +#include +#include + +namespace rnexecutorch { +namespace utils { +namespace tensor { + +/** + * @brief Convert a tensor to a typed span for safe data access + * + * Provides type-safe access to tensor data via std::span, eliminating + * manual pointer arithmetic and size calculations. + * + * @tparam T The element type (e.g., float, int32_t) + * @param tensor The tensor to convert + * @return std::span A read-only view of the tensor data + * + * @note The returned span is valid only as long as the tensor exists + * + * Example: + * @code + * auto tensor = getTensor(); + * auto data = tensor::toSpan(tensor); + * for (float value : data) { + * // Process value... + * } + * @endcode + */ +template +std::span toSpan(const executorch::aten::Tensor &tensor) { + return std::span(static_cast(tensor.const_data_ptr()), + tensor.numel()); +} + +/** + * @brief Convert an EValue containing a tensor to a typed span + * + * Convenience overload for extracting tensor data from EValue results. + * + * @tparam T The element type (e.g., float, int32_t) + * @param evalue The EValue containing a tensor + * @return std::span A read-only view of the tensor data + * + * @note Assumes evalue.isTensor() == true. Behavior is undefined otherwise. + * + * Example: + * @code + * auto result = model.forward(input); + * auto outputs = tensor::toSpan(result.get()[0]); + * @endcode + */ +template +std::span toSpan(const executorch::runtime::EValue &evalue) { + return toSpan(evalue.toTensor()); +} + +} // namespace tensor +} // namespace utils +} // namespace rnexecutorch From 570e52f2fed425788dc2b33af7e6eb937ad1dee4 Mon Sep 17 00:00:00 2001 From: benITo47 Date: Fri, 10 Apr 2026 18:00:24 +0200 Subject: [PATCH 10/19] feat(utils): add extractDetectionData to computer_vision Add utility function to extract bbox, score, and label from detection model tensor outputs. Replaces private extractDetectionData() method in BaseInstanceSegmentation. Provides reusable data extraction for detection models. --- .../utils/computer_vision/Processing.cpp | 10 ++++++++ .../utils/computer_vision/Processing.h | 24 +++++++++++++++++++ 2 files changed, 34 insertions(+) diff --git a/packages/react-native-executorch/common/rnexecutorch/utils/computer_vision/Processing.cpp b/packages/react-native-executorch/common/rnexecutorch/utils/computer_vision/Processing.cpp index 63b342395d..feb52209e6 100644 --- a/packages/react-native-executorch/common/rnexecutorch/utils/computer_vision/Processing.cpp +++ b/packages/react-native-executorch/common/rnexecutorch/utils/computer_vision/Processing.cpp @@ -27,4 +27,14 @@ void validateThreshold(double value, const std::string &name) { } } +std::tuple extractDetectionData(const float *bboxData, + const float *scoresData, + int32_t index) { + BBox bbox{bboxData[index * 4], bboxData[index * 4 + 1], + bboxData[index * 4 + 2], bboxData[index * 4 + 3]}; + float score = scoresData[index * 2]; + int32_t label = static_cast(scoresData[index * 2 + 1]); + return {bbox, score, label}; +} + } // namespace rnexecutorch::utils::computer_vision diff --git a/packages/react-native-executorch/common/rnexecutorch/utils/computer_vision/Processing.h b/packages/react-native-executorch/common/rnexecutorch/utils/computer_vision/Processing.h index 89545248a6..a296fe6e25 100644 --- a/packages/react-native-executorch/common/rnexecutorch/utils/computer_vision/Processing.h +++ b/packages/react-native-executorch/common/rnexecutorch/utils/computer_vision/Processing.h @@ -2,14 +2,38 @@ #include "Types.h" #include +#include #include #include +#include #include namespace rnexecutorch::utils::computer_vision { float computeIoU(const BBox &a, const BBox &b); +/** + * @brief Extract detection data at a specific index from raw tensor buffers + * + * Parses bounding box coordinates, confidence score, and class label from + * typical object detection model outputs. + * + * @param bboxData Pointer to bounding box data (format: [x1, y1, x2, y2] per + * detection) + * @param scoresData Pointer to scores data (format: [score, label] per + * detection) + * @param index Index of the detection to extract + * @return Tuple of {bbox, score, label} + * + * Example: + * @code + * auto [bbox, score, label] = extractDetectionData(bboxData, scoresData, i); + * @endcode + */ +std::tuple extractDetectionData(const float *bboxData, + const float *scoresData, + int32_t index); + /** * @brief Validate that a threshold is in [0, 1] range * @param value Threshold value to validate From 07a0d02b784c3aaf255692fed6e98537824c0e38 Mon Sep 17 00:00:00 2001 From: benITo47 Date: Fri, 10 Apr 2026 18:00:55 +0200 Subject: [PATCH 11/19] feat(utils): add inverseRotateBboxes batch helper Add template function to apply inverse rotation to bboxes in containers. Provides convenience helper for batch operations on detection/segmentation results, eliminating manual loops in ObjectDetection and InstanceSegmentation. --- .../rnexecutorch/utils/FrameTransform.h | 29 +++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/packages/react-native-executorch/common/rnexecutorch/utils/FrameTransform.h b/packages/react-native-executorch/common/rnexecutorch/utils/FrameTransform.h index ed3fb124f4..10326944ea 100644 --- a/packages/react-native-executorch/common/rnexecutorch/utils/FrameTransform.h +++ b/packages/react-native-executorch/common/rnexecutorch/utils/FrameTransform.h @@ -112,4 +112,33 @@ void inverseRotatePoints(std::array &points, #endif } +/** + * @brief Apply inverse rotation to bounding boxes in a batch + * + * Convenience helper to inverse-rotate bboxes for all items in a container. + * Each item must have a .bbox member of type computer_vision::BBox. + * + * @tparam Container Type that supports iteration and has items with .bbox + * member + * @param items Container of detection/segmentation results + * @param orient Frame orientation info (from loadFrameRotated) + * @param rotatedSize Size of the rotated frame (rotated.size()) + * + * Example: + * @code + * std::vector detections = runDetection(frame); + * inverseRotateBboxes(detections, orient, rotated.size()); + * @endcode + */ +template + requires requires(Container c) { + { c.begin()->bbox } -> std::convertible_to; + } +void inverseRotateBboxes(Container &items, const FrameOrientation &orient, + cv::Size rotatedSize) { + for (auto &item : items) { + inverseRotateBbox(item.bbox, orient, rotatedSize); + } +} + } // namespace rnexecutorch::utils From 2062f5708a827842a8e59168f5d6bb21d5ca115c Mon Sep 17 00:00:00 2001 From: benITo47 Date: Fri, 10 Apr 2026 18:01:40 +0200 Subject: [PATCH 12/19] feat(models): add forwardOrThrow helpers to BaseModel Add convenience methods to reduce error-checking boilerplate: - forwardOrThrow(EValue): Execute forward with single input - forwardOrThrow(vector): Execute forward with multiple inputs - executeOrThrow(string, vector): Execute named method All methods throw RnExecutorchError on failure with customizable messages. Replaces 4+ manual error-checking patterns across vision models. --- .../common/rnexecutorch/models/BaseModel.cpp | 35 ++++++++++++ .../common/rnexecutorch/models/BaseModel.h | 53 +++++++++++++++++++ 2 files changed, 88 insertions(+) diff --git a/packages/react-native-executorch/common/rnexecutorch/models/BaseModel.cpp b/packages/react-native-executorch/common/rnexecutorch/models/BaseModel.cpp index 9bab6831ed..12707ecad0 100644 --- a/packages/react-native-executorch/common/rnexecutorch/models/BaseModel.cpp +++ b/packages/react-native-executorch/common/rnexecutorch/models/BaseModel.cpp @@ -227,6 +227,41 @@ BaseModel::validateAndGetInputShape(const std::string &methodName, return shape; } +std::vector +BaseModel::forwardOrThrow(const EValue &input, + const std::string &contextMessage) const { + auto result = forward(input); + if (!result.ok()) { + throw RnExecutorchError(result.error(), contextMessage); + } + return std::move(result.get()); +} + +std::vector +BaseModel::forwardOrThrow(const std::vector &inputs, + const std::string &contextMessage) const { + auto result = forward(inputs); + if (!result.ok()) { + throw RnExecutorchError(result.error(), contextMessage); + } + return std::move(result.get()); +} + +std::vector +BaseModel::executeOrThrow(const std::string &methodName, + const std::vector &inputs, + const std::string &contextMessage) const { + auto result = execute(methodName, inputs); + if (!result.ok()) { + std::string message = + contextMessage.empty() + ? "Model " + methodName + " method failed. Ensure input is correct." + : contextMessage; + throw RnExecutorchError(result.error(), message); + } + return std::move(result.get()); +} + std::vector BaseModel::getTensorShape(const executorch::aten::Tensor &tensor) const { auto sizes = tensor.sizes(); diff --git a/packages/react-native-executorch/common/rnexecutorch/models/BaseModel.h b/packages/react-native-executorch/common/rnexecutorch/models/BaseModel.h index f17e7615ee..a567b0c111 100644 --- a/packages/react-native-executorch/common/rnexecutorch/models/BaseModel.h +++ b/packages/react-native-executorch/common/rnexecutorch/models/BaseModel.h @@ -83,6 +83,59 @@ class BaseModel { validateAndGetInputShape(const std::string &methodName = "forward", size_t minDimensions = 2) const; + /** + * @brief Execute forward and throw on error + * + * Convenience helper that calls forward() and throws RnExecutorchError if + * the result is not ok. Reduces error-checking boilerplate in model + * implementations. + * + * @param input Single input value for the forward method + * @param contextMessage Custom error message (default: generic message) + * @return std::vector The successful forward result + * @throws RnExecutorchError if forward fails + */ + std::vector + forwardOrThrow(const EValue &input, + const std::string &contextMessage = + "Model forward failed. Ensure input is correct.") const; + + /** + * @brief Execute forward with multiple inputs and throw on error + * + * Convenience helper that calls forward() and throws RnExecutorchError if + * the result is not ok. Reduces error-checking boilerplate in model + * implementations. + * + * @param inputs Vector of input values for the forward method + * @param contextMessage Custom error message (default: generic message) + * @return std::vector The successful forward result + * @throws RnExecutorchError if forward fails + */ + std::vector + forwardOrThrow(const std::vector &inputs, + const std::string &contextMessage = + "Model forward failed. Ensure input is correct.") const; + + /** + * @brief Execute named method and throw on error + * + * Convenience helper that calls execute() and throws RnExecutorchError if + * the result is not ok. Reduces error-checking boilerplate in model + * implementations. + * + * @param methodName Name of the method to execute + * @param inputs Vector of input values for the method + * @param contextMessage Custom error message (default: auto-generated from + * method name) + * @return std::vector The successful execution result + * @throws RnExecutorchError if execution fails + */ + std::vector + executeOrThrow(const std::string &methodName, + const std::vector &inputs, + const std::string &contextMessage = "") const; + /// Name of the currently loaded method (for multi-method models). std::string currentlyLoadedMethod_; From 604d6bf01f245e8e66bd686bacee1941ef971082 Mon Sep 17 00:00:00 2001 From: benITo47 Date: Fri, 10 Apr 2026 18:02:14 +0200 Subject: [PATCH 13/19] refactor(classification): use tensor and error utilities Replace manual tensor-to-span conversion with utils::tensor::toSpan. Replace forward error checking with forwardOrThrow helper. Simplifies code and improves consistency with new utility patterns. --- .../models/classification/Classification.cpp | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/packages/react-native-executorch/common/rnexecutorch/models/classification/Classification.cpp b/packages/react-native-executorch/common/rnexecutorch/models/classification/Classification.cpp index e4cc931d51..4589d58c61 100644 --- a/packages/react-native-executorch/common/rnexecutorch/models/classification/Classification.cpp +++ b/packages/react-native-executorch/common/rnexecutorch/models/classification/Classification.cpp @@ -6,6 +6,7 @@ #include #include +#include namespace rnexecutorch::models::classification { @@ -27,13 +28,10 @@ Classification::runInference(cv::Mat image) { cv::Mat preprocessed = preprocess(image); auto inputTensor = createInputTensor(preprocessed); - auto forwardResult = BaseModel::forward(inputTensor); - if (!forwardResult.ok()) { - throw RnExecutorchError(forwardResult.error(), - "The model's forward function did not succeed. " - "Ensure the model input is correct."); - } - return postprocess(forwardResult->at(0).toTensor()); + auto outputs = forwardOrThrow(inputTensor, + "The model's forward function did not succeed. " + "Ensure the model input is correct."); + return postprocess(outputs.at(0).toTensor()); } std::unordered_map @@ -58,8 +56,7 @@ Classification::generateFromPixels(JSTensorViewIn pixelData) { std::unordered_map Classification::postprocess(const Tensor &tensor) { - std::span resultData( - static_cast(tensor.const_data_ptr()), tensor.numel()); + auto resultData = utils::tensor::toSpan(tensor); std::vector resultVec(resultData.begin(), resultData.end()); if (resultVec.size() != labelNames_.size()) { From bff7197ecf5329d10b696c4623786a0468935076 Mon Sep 17 00:00:00 2001 From: benITo47 Date: Fri, 10 Apr 2026 18:02:53 +0200 Subject: [PATCH 14/19] refactor(semantic-seg): use tensor and error utilities Replace manual tensor-to-span conversion with utils::tensor::toSpan. Replace forward error checking with forwardOrThrow helper. Simplifies code and improves consistency with new utility patterns. --- .../BaseSemanticSegmentation.cpp | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) diff --git a/packages/react-native-executorch/common/rnexecutorch/models/semantic_segmentation/BaseSemanticSegmentation.cpp b/packages/react-native-executorch/common/rnexecutorch/models/semantic_segmentation/BaseSemanticSegmentation.cpp index eda9b2b73f..aecc6f625e 100644 --- a/packages/react-native-executorch/common/rnexecutorch/models/semantic_segmentation/BaseSemanticSegmentation.cpp +++ b/packages/react-native-executorch/common/rnexecutorch/models/semantic_segmentation/BaseSemanticSegmentation.cpp @@ -8,6 +8,7 @@ #include #include #include +#include namespace rnexecutorch::models::semantic_segmentation { @@ -35,15 +36,12 @@ BaseSemanticSegmentation::runInference( cv::Mat preprocessed = VisionModel::preprocess(image); auto inputTensor = createInputTensor(preprocessed); - auto forwardResult = BaseModel::forward(inputTensor); - if (!forwardResult.ok()) { - throw RnExecutorchError(forwardResult.error(), - "The model's forward function did not succeed. " - "Ensure the model input is correct."); - } + auto outputs = forwardOrThrow(inputTensor, + "The model's forward function did not succeed. " + "Ensure the model input is correct."); - return computeResult(forwardResult->at(0).toTensor(), originalSize, - allClasses_, classesOfInterest, resize); + return computeResult(outputs.at(0).toTensor(), originalSize, allClasses_, + classesOfInterest, resize); } semantic_segmentation::SegmentationResult @@ -107,8 +105,7 @@ BaseSemanticSegmentation::computeResult( std::vector &allClasses, std::set> &classesOfInterest, bool resize) { - const auto *dataPtr = tensor.const_data_ptr(); - auto resultData = std::span(dataPtr, tensor.numel()); + auto resultData = utils::tensor::toSpan(tensor); // Read output dimensions directly from tensor shape std::size_t numChannels = From cf0567c838e425829673b2d92941cdec452165a8 Mon Sep 17 00:00:00 2001 From: benITo47 Date: Fri, 10 Apr 2026 18:03:41 +0200 Subject: [PATCH 15/19] refactor(object-detection): use tensor, error, and rotation utilities Replace 3 manual tensor-to-span conversions with utils::tensor::toSpan. Replace execute error checking with executeOrThrow helper. Replace rotation loop with utils::inverseRotateBboxes batch helper. Simplifies code and improves consistency with new utility patterns. --- .../object_detection/ObjectDetection.cpp | 37 ++++++------------- 1 file changed, 11 insertions(+), 26 deletions(-) diff --git a/packages/react-native-executorch/common/rnexecutorch/models/object_detection/ObjectDetection.cpp b/packages/react-native-executorch/common/rnexecutorch/models/object_detection/ObjectDetection.cpp index d0da9fdb4e..c71a3716e4 100644 --- a/packages/react-native-executorch/common/rnexecutorch/models/object_detection/ObjectDetection.cpp +++ b/packages/react-native-executorch/common/rnexecutorch/models/object_detection/ObjectDetection.cpp @@ -8,6 +8,7 @@ #include #include #include +#include #include #include @@ -43,20 +44,9 @@ ObjectDetection::postprocess(const std::vector &tensors, std::set allowedClasses(classIndices.begin(), classIndices.end()); std::vector detections; - auto bboxTensor = tensors.at(0).toTensor(); - std::span bboxes( - static_cast(bboxTensor.const_data_ptr()), - bboxTensor.numel()); - - auto scoreTensor = tensors.at(1).toTensor(); - std::span scores( - static_cast(scoreTensor.const_data_ptr()), - scoreTensor.numel()); - - auto labelTensor = tensors.at(2).toTensor(); - std::span labels( - static_cast(labelTensor.const_data_ptr()), - labelTensor.numel()); + auto bboxes = utils::tensor::toSpan(tensors.at(0)); + auto scores = utils::tensor::toSpan(tensors.at(1)); + auto labels = utils::tensor::toSpan(tensors.at(2)); for (std::size_t i = 0; i < scores.size(); ++i) { if (scores[i] < detectionThreshold) { @@ -110,16 +100,13 @@ std::vector ObjectDetection::runInference( cv::Mat preprocessed = preprocess(image); auto inputTensor = createInputTensor(preprocessed); - auto executeResult = execute(methodName, {inputTensor}); - if (!executeResult.ok()) { - throw RnExecutorchError(executeResult.error(), - "The model's " + methodName + - " method did not succeed. " - "Ensure the model input is correct."); - } + auto outputs = executeOrThrow(methodName, {inputTensor}, + "The model's " + methodName + + " method did not succeed. " + "Ensure the model input is correct."); - return postprocess(executeResult.get(), originalSize, detectionThreshold, - iouThreshold, classIndices); + return postprocess(outputs, originalSize, detectionThreshold, iouThreshold, + classIndices); } std::vector ObjectDetection::generateFromString( @@ -138,9 +125,7 @@ std::vector ObjectDetection::generateFromFrame( auto detections = runInference(rotated, detectionThreshold, iouThreshold, classIndices, methodName); - for (auto &det : detections) { - ::rnexecutorch::utils::inverseRotateBbox(det.bbox, orient, rotated.size()); - } + utils::inverseRotateBboxes(detections, orient, rotated.size()); return detections; } From 02a95b49c748c5af5590a9b40741dc4aeb5582d1 Mon Sep 17 00:00:00 2001 From: benITo47 Date: Fri, 10 Apr 2026 18:04:52 +0200 Subject: [PATCH 16/19] refactor(instance-seg): use detection and error utilities Replace execute error checking with executeOrThrow helper. Replace private extractDetectionData with utils::computer_vision version. Delete duplicate extractDetectionData method (now in shared utils). Replace bbox rotation loop with utils::inverseRotateBboxes batch helper. Mask rotation remains inline as it's instance-specific logic. --- .../BaseInstanceSegmentation.cpp | 44 +++++++------------ .../BaseInstanceSegmentation.h | 4 -- 2 files changed, 15 insertions(+), 33 deletions(-) diff --git a/packages/react-native-executorch/common/rnexecutorch/models/instance_segmentation/BaseInstanceSegmentation.cpp b/packages/react-native-executorch/common/rnexecutorch/models/instance_segmentation/BaseInstanceSegmentation.cpp index 33511c3a92..9b4e554cdd 100644 --- a/packages/react-native-executorch/common/rnexecutorch/models/instance_segmentation/BaseInstanceSegmentation.cpp +++ b/packages/react-native-executorch/common/rnexecutorch/models/instance_segmentation/BaseInstanceSegmentation.cpp @@ -51,21 +51,17 @@ std::vector BaseInstanceSegmentation::runInference( "confidenceThreshold"); utils::computer_vision::validateThreshold(iouThreshold, "iouThreshold"); - auto forwardResult = - BaseModel::execute(methodName, {buildInputTensor(image)}); - if (!forwardResult.ok()) { - throw RnExecutorchError( - forwardResult.error(), - "The model's forward function did not succeed. " - "Ensure the model input is correct and method name '" + - methodName + "' is valid."); - } + auto outputs = + executeOrThrow(methodName, {buildInputTensor(image)}, + "The model's forward function did not succeed. " + "Ensure the model input is correct and method name '" + + methodName + "' is valid."); - validateOutputTensors(forwardResult.get()); + validateOutputTensors(outputs); - auto instances = collectInstances( - forwardResult.get(), originalSize, modelInputSize, confidenceThreshold, - classIndices, returnMaskAtOriginalResolution); + auto instances = collectInstances(outputs, originalSize, modelInputSize, + confidenceThreshold, classIndices, + returnMaskAtOriginalResolution); return finalizeInstances(std::move(instances), iouThreshold, maxInstances); } @@ -89,9 +85,12 @@ std::vector BaseInstanceSegmentation::generateFromFrame( auto instances = runInference(rotated, confidenceThreshold, iouThreshold, maxInstances, classIndices, returnMaskAtOriginalResolution, methodName); + + // Inverse-rotate bboxes for all instances + utils::inverseRotateBboxes(instances, orient, rotated.size()); + + // Inverse-rotate masks (instance-specific logic) for (auto &inst : instances) { - utils::inverseRotateBbox(inst.bbox, orient, rotated.size()); - // Inverse-rotate the mask to match the screen orientation cv::Mat maskMat(inst.maskHeight, inst.maskWidth, CV_8UC1, inst.mask->data()); cv::Mat invMask = utils::inverseRotateMat(maskMat, orient); @@ -113,19 +112,6 @@ std::vector BaseInstanceSegmentation::generateFromPixels( classIndices, returnMaskAtOriginalResolution, methodName); } -std::tuple -BaseInstanceSegmentation::extractDetectionData(const float *bboxData, - const float *scoresData, - int32_t index) { - utils::computer_vision::BBox bbox{ - bboxData[index * 4], bboxData[index * 4 + 1], bboxData[index * 4 + 2], - bboxData[index * 4 + 3]}; - float score = scoresData[index * 2]; - int32_t label = static_cast(scoresData[index * 2 + 1]); - - return {bbox, score, label}; -} - cv::Rect BaseInstanceSegmentation::computeMaskCropRect( const utils::computer_vision::BBox &bboxModel, cv::Size modelInputSize, cv::Size maskSize) { @@ -262,7 +248,7 @@ std::vector BaseInstanceSegmentation::collectInstances( for (int32_t i = 0; i < numInstances; ++i) { auto [bboxModel, score, labelIdx] = - extractDetectionData(bboxData, scoresData, i); + utils::computer_vision::extractDetectionData(bboxData, scoresData, i); if (!isValidDetection(score, labelIdx)) { continue; diff --git a/packages/react-native-executorch/common/rnexecutorch/models/instance_segmentation/BaseInstanceSegmentation.h b/packages/react-native-executorch/common/rnexecutorch/models/instance_segmentation/BaseInstanceSegmentation.h index f16cb4b14d..541ef8b683 100644 --- a/packages/react-native-executorch/common/rnexecutorch/models/instance_segmentation/BaseInstanceSegmentation.h +++ b/packages/react-native-executorch/common/rnexecutorch/models/instance_segmentation/BaseInstanceSegmentation.h @@ -63,10 +63,6 @@ class BaseInstanceSegmentation : public VisionModel { void validateOutputTensors(const std::vector &tensors) const; - std::tuple - extractDetectionData(const float *bboxData, const float *scoresData, - int32_t index); - cv::Rect computeMaskCropRect(const utils::computer_vision::BBox &bboxModel, cv::Size modelInputSize, cv::Size maskSize); From 356764e590f0e20c791fa77b091b8860b03330d0 Mon Sep 17 00:00:00 2001 From: benITo47 Date: Fri, 10 Apr 2026 18:05:10 +0200 Subject: [PATCH 17/19] refactor(style-transfer): use error utilities Replace forward error checking with forwardOrThrow helper. Simplifies code and improves consistency with new utility patterns. --- .../models/style_transfer/StyleTransfer.cpp | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/packages/react-native-executorch/common/rnexecutorch/models/style_transfer/StyleTransfer.cpp b/packages/react-native-executorch/common/rnexecutorch/models/style_transfer/StyleTransfer.cpp index f045a13bde..a5a6d63b00 100644 --- a/packages/react-native-executorch/common/rnexecutorch/models/style_transfer/StyleTransfer.cpp +++ b/packages/react-native-executorch/common/rnexecutorch/models/style_transfer/StyleTransfer.cpp @@ -25,15 +25,12 @@ cv::Mat StyleTransfer::runInference(cv::Mat image, cv::Size outputSize) { cv::Mat preprocessed = preprocess(image); auto inputTensor = createInputTensor(preprocessed); - auto forwardResult = BaseModel::forward(inputTensor); - if (!forwardResult.ok()) { - throw RnExecutorchError(forwardResult.error(), - "The model's forward function did not succeed. " - "Ensure the model input is correct."); - } + auto outputs = forwardOrThrow(inputTensor, + "The model's forward function did not succeed. " + "Ensure the model input is correct."); - cv::Mat mat = image_processing::getMatrixFromTensor( - modelInputSize(), forwardResult->at(0).toTensor()); + cv::Mat mat = image_processing::getMatrixFromTensor(modelInputSize(), + outputs.at(0).toTensor()); if (mat.size() != outputSize) { cv::resize(mat, mat, outputSize); } From 4cd6d70159b77062924f2109c4fbbd3489406ac8 Mon Sep 17 00:00:00 2001 From: benITo47 Date: Fri, 10 Apr 2026 18:05:29 +0200 Subject: [PATCH 18/19] refactor(image-embeddings): use error utilities Replace forward error checking with forwardOrThrow helper. Simplifies code and improves consistency with new utility patterns. --- .../models/embeddings/image/ImageEmbeddings.cpp | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/packages/react-native-executorch/common/rnexecutorch/models/embeddings/image/ImageEmbeddings.cpp b/packages/react-native-executorch/common/rnexecutorch/models/embeddings/image/ImageEmbeddings.cpp index 8f822bd6c8..0385fa320f 100644 --- a/packages/react-native-executorch/common/rnexecutorch/models/embeddings/image/ImageEmbeddings.cpp +++ b/packages/react-native-executorch/common/rnexecutorch/models/embeddings/image/ImageEmbeddings.cpp @@ -20,16 +20,12 @@ ImageEmbeddings::runInference(cv::Mat image) { cv::Mat preprocessed = preprocess(image); auto inputTensor = createInputTensor(preprocessed); - auto forwardResult = BaseModel::forward(inputTensor); + auto outputs = forwardOrThrow( + inputTensor, + "The model's forward function did not succeed. Ensure the model input " + "is correct."); - if (!forwardResult.ok()) { - throw RnExecutorchError( - forwardResult.error(), - "The model's forward function did not succeed. Ensure the model input " - "is correct."); - } - - auto forwardResultTensor = forwardResult->at(0).toTensor(); + auto forwardResultTensor = outputs.at(0).toTensor(); return std::make_shared( forwardResultTensor.const_data_ptr(), forwardResultTensor.nbytes()); } From 10e089c0be5addea3790975c5f3cceee83a7a6a0 Mon Sep 17 00:00:00 2001 From: benITo47 Date: Tue, 14 Apr 2026 10:39:55 +0200 Subject: [PATCH 19/19] test: add unit tests for new utility functions Add comprehensive unit tests for refactoring utilities: - TensorHelpersTest: Test toSpan for Tensor and EValue conversions * Float and int32 tensors * Multidimensional tensors * Empty tensors * Type safety and const correctness - ComputerVisionProcessingTest: Test extractDetectionData * Single and multiple detections * Various indices and label formats * Edge cases (negative coords, fractional values) - FrameTransformTest: Test inverseRotateBboxes batch helper * Batch rotation of multiple detections * Empty containers and single detection * Preservation of non-bbox fields Updated CMakeLists.txt to register new test executables. --- .../common/rnexecutorch/models/BaseModel.h | 64 +------ .../common/rnexecutorch/models/VisionModel.h | 56 ++---- .../common/rnexecutorch/tests/CMakeLists.txt | 7 + .../unit/ComputerVisionProcessingTest.cpp | 163 ++++++++++++++++++ .../tests/unit/FrameTransformTest.cpp | 65 +++++++ .../tests/unit/TensorHelpersTest.cpp | 121 +++++++++++++ .../rnexecutorch/utils/FrameTransform.h | 17 +- .../common/rnexecutorch/utils/TensorHelpers.h | 42 +---- .../utils/computer_vision/Processing.h | 27 +-- 9 files changed, 379 insertions(+), 183 deletions(-) create mode 100644 packages/react-native-executorch/common/rnexecutorch/tests/unit/ComputerVisionProcessingTest.cpp create mode 100644 packages/react-native-executorch/common/rnexecutorch/tests/unit/TensorHelpersTest.cpp diff --git a/packages/react-native-executorch/common/rnexecutorch/models/BaseModel.h b/packages/react-native-executorch/common/rnexecutorch/models/BaseModel.h index a567b0c111..5a0a7eec00 100644 --- a/packages/react-native-executorch/common/rnexecutorch/models/BaseModel.h +++ b/packages/react-native-executorch/common/rnexecutorch/models/BaseModel.h @@ -53,84 +53,24 @@ class BaseModel { std::size_t memorySizeLowerBound{0}; - /** - * @brief Ensures the specified method is loaded, unloading any previous - * method if necessary. - * - * This helper is useful for models that support multiple methods with - * different input sizes (e.g., "forward_384", "forward_512", "forward_640"). - * - * @param methodName Name of the method to load (e.g., "forward", - * "forward_384"). - * @throws RnExecutorchError if the method cannot be loaded or if methodName - * is empty. - */ + /// Loads methodName, unloading any previously loaded method first. + /// Useful for multi-method models (e.g., "forward_384", "forward_640"). void ensureMethodLoaded(const std::string &methodName); - /** - * @brief Validate and get input shape for model - * - * Validates that the model has at least one input tensor and that the first - * input has the minimum required dimensions. - * - * @param methodName Method to get shapes for (default: "forward") - * @param minDimensions Minimum expected dimensions (default: 2) - * @throws RnExecutorchError if validation fails (no inputs or insufficient - * dimensions) - * @return The first input shape vector - */ std::vector validateAndGetInputShape(const std::string &methodName = "forward", size_t minDimensions = 2) const; - /** - * @brief Execute forward and throw on error - * - * Convenience helper that calls forward() and throws RnExecutorchError if - * the result is not ok. Reduces error-checking boilerplate in model - * implementations. - * - * @param input Single input value for the forward method - * @param contextMessage Custom error message (default: generic message) - * @return std::vector The successful forward result - * @throws RnExecutorchError if forward fails - */ std::vector forwardOrThrow(const EValue &input, const std::string &contextMessage = "Model forward failed. Ensure input is correct.") const; - /** - * @brief Execute forward with multiple inputs and throw on error - * - * Convenience helper that calls forward() and throws RnExecutorchError if - * the result is not ok. Reduces error-checking boilerplate in model - * implementations. - * - * @param inputs Vector of input values for the forward method - * @param contextMessage Custom error message (default: generic message) - * @return std::vector The successful forward result - * @throws RnExecutorchError if forward fails - */ std::vector forwardOrThrow(const std::vector &inputs, const std::string &contextMessage = "Model forward failed. Ensure input is correct.") const; - /** - * @brief Execute named method and throw on error - * - * Convenience helper that calls execute() and throws RnExecutorchError if - * the result is not ok. Reduces error-checking boilerplate in model - * implementations. - * - * @param methodName Name of the method to execute - * @param inputs Vector of input values for the method - * @param contextMessage Custom error message (default: auto-generated from - * method name) - * @return std::vector The successful execution result - * @throws RnExecutorchError if execution fails - */ std::vector executeOrThrow(const std::string &methodName, const std::vector &inputs, diff --git a/packages/react-native-executorch/common/rnexecutorch/models/VisionModel.h b/packages/react-native-executorch/common/rnexecutorch/models/VisionModel.h index 2bd1f5859c..60ba94e640 100644 --- a/packages/react-native-executorch/common/rnexecutorch/models/VisionModel.h +++ b/packages/react-native-executorch/common/rnexecutorch/models/VisionModel.h @@ -85,16 +85,11 @@ class VisionModel : public BaseModel { /// Set once by each subclass constructor to avoid per-frame metadata lookups. std::vector modelInputShape_; - /// Normalization mean values (RGB channels). - /// Optional: set via initNormalization() for models expecting normalized - /// inputs (e.g., ImageNet preprocessing). Leave as nullopt for models with - /// built-in normalization or raw pixel input expectations. + /// Per-channel normalization mean (RGB). nullopt = no normalization applied. std::optional normMean_; - /// Normalization standard deviation values (RGB channels). - /// Optional: set via initNormalization() for models expecting normalized - /// inputs (e.g., ImageNet preprocessing). Leave as nullopt for models with - /// built-in normalization or raw pixel input expectations. + /// Per-channel normalization std-dev (RGB). nullopt = no normalization + /// applied. std::optional normStd_; /** @@ -135,57 +130,30 @@ class VisionModel : public BaseModel { virtual cv::Size modelInputSize() const; /** - * @brief Get model input spatial dimensions for a specific method. + * @brief Get input size for a specific method (last two shape dims). * * Useful for multi-method models with different input sizes per method. - * Returns the last two dimensions of the input shape as cv::Size. - * - * @param methodName Method to query (uses currentlyLoadedMethod_ if empty) - * @return Size (width, height) of the model input for the specified method - * @throws RnExecutorchError if method metadata cannot be retrieved + * Falls back to currentlyLoadedMethod_ when methodName is empty. */ cv::Size getModelInputSize(const std::string &methodName = "") const; /** - * @brief Initialize normalization parameters from vectors - * - * Validates size == 3 and converts to cv::Scalar. - * Logs warning if invalid but non-empty. Sets nullopt if empty/invalid. + * @brief Set normMean_/normStd_ from float vectors. * - * @param normMean Mean values for RGB channels (expected size: 3) - * @param normStd Standard deviation values for RGB channels (expected size: - * 3) + * Expects size == 3. Logs a warning and ignores if non-empty but wrong size. */ void initNormalization(const std::vector &normMean, const std::vector &normStd); - /** - * @brief Create input tensor from preprocessed image - * - * Applies normalization if normMean_ and normStd_ are set. - * - * @param preprocessed Preprocessed image (resized, RGB format) - * @return TensorPtr ready for model input - */ + /// Builds input tensor from a preprocessed image. + /// Applies normalization if normMean_/normStd_ are set, skips it otherwise. TensorPtr createInputTensor(const cv::Mat &preprocessed) const; - /** - * @brief Load and convert image from path to RGB format - * - * Common preprocessing: readImage (BGR) → convert to RGB - * - * @param imageSource Path to the image file - * @return cv::Mat in RGB format - */ + /// Reads image from path and converts BGR → RGB. cv::Mat loadImageToRGB(const std::string &imageSource) const; - /** - * @brief Process camera frame with rotation support - * - * @param runtime JSI runtime - * @param frameData JSI value containing frame data from VisionCamera - * @return Tuple of {rotated RGB frame, orientation info, original size} - */ + /// Extracts a camera frame, applies rotation, and returns + /// {rotated frame, orientation, original size}. std::tuple loadFrameRotated(jsi::Runtime &runtime, const jsi::Value &frameData) const; diff --git a/packages/react-native-executorch/common/rnexecutorch/tests/CMakeLists.txt b/packages/react-native-executorch/common/rnexecutorch/tests/CMakeLists.txt index cf5289bf92..aa030737f1 100644 --- a/packages/react-native-executorch/common/rnexecutorch/tests/CMakeLists.txt +++ b/packages/react-native-executorch/common/rnexecutorch/tests/CMakeLists.txt @@ -171,6 +171,13 @@ add_rn_test(FrameTransformTests unit/FrameTransformTest.cpp LIBS opencv_deps ) +add_rn_test(TensorHelpersTest unit/TensorHelpersTest.cpp) + +add_rn_test(ComputerVisionProcessingTest unit/ComputerVisionProcessingTest.cpp + SOURCES + ${RNEXECUTORCH_DIR}/utils/computer_vision/Processing.cpp +) + add_rn_test(BaseModelTests integration/BaseModelTest.cpp) add_rn_test(VisionModelTests integration/VisionModelTest.cpp diff --git a/packages/react-native-executorch/common/rnexecutorch/tests/unit/ComputerVisionProcessingTest.cpp b/packages/react-native-executorch/common/rnexecutorch/tests/unit/ComputerVisionProcessingTest.cpp new file mode 100644 index 0000000000..57d71b7368 --- /dev/null +++ b/packages/react-native-executorch/common/rnexecutorch/tests/unit/ComputerVisionProcessingTest.cpp @@ -0,0 +1,163 @@ +#include +#include +#include +#include + +using namespace rnexecutorch::utils::computer_vision; + +// ============================================================================ +// extractDetectionData — Extract bbox, score, label from raw tensor data +// ============================================================================ + +TEST(ExtractDetectionData, SingleDetection) { + // Format: bboxData = [x1, y1, x2, y2] per detection + // scoresData = [score, label] per detection + std::vector bboxData = {10.0f, 20.0f, 100.0f, 200.0f}; + std::vector scoresData = {0.95f, 5.0f}; + + auto [bbox, score, label] = + extractDetectionData(bboxData.data(), scoresData.data(), 0); + + EXPECT_FLOAT_EQ(bbox.x1, 10.0f); + EXPECT_FLOAT_EQ(bbox.y1, 20.0f); + EXPECT_FLOAT_EQ(bbox.x2, 100.0f); + EXPECT_FLOAT_EQ(bbox.y2, 200.0f); + EXPECT_FLOAT_EQ(score, 0.95f); + EXPECT_EQ(label, 5); +} + +TEST(ExtractDetectionData, MultipleDetections_FirstIndex) { + std::vector bboxData = { + 10.0f, 20.0f, 100.0f, 200.0f, // Detection 0 + 150.0f, 50.0f, 250.0f, 150.0f, // Detection 1 + 300.0f, 100.0f, 400.0f, 300.0f // Detection 2 + }; + std::vector scoresData = { + 0.95f, 5.0f, // Detection 0: score, label + 0.85f, 3.0f, // Detection 1 + 0.75f, 12.0f // Detection 2 + }; + + auto [bbox, score, label] = + extractDetectionData(bboxData.data(), scoresData.data(), 0); + + EXPECT_FLOAT_EQ(bbox.x1, 10.0f); + EXPECT_FLOAT_EQ(bbox.y1, 20.0f); + EXPECT_FLOAT_EQ(bbox.x2, 100.0f); + EXPECT_FLOAT_EQ(bbox.y2, 200.0f); + EXPECT_FLOAT_EQ(score, 0.95f); + EXPECT_EQ(label, 5); +} + +TEST(ExtractDetectionData, MultipleDetections_SecondIndex) { + std::vector bboxData = { + 10.0f, 20.0f, 100.0f, 200.0f, // Detection 0 + 150.0f, 50.0f, 250.0f, 150.0f, // Detection 1 + 300.0f, 100.0f, 400.0f, 300.0f // Detection 2 + }; + std::vector scoresData = { + 0.95f, 5.0f, // Detection 0 + 0.85f, 3.0f, // Detection 1 + 0.75f, 12.0f // Detection 2 + }; + + auto [bbox, score, label] = + extractDetectionData(bboxData.data(), scoresData.data(), 1); + + EXPECT_FLOAT_EQ(bbox.x1, 150.0f); + EXPECT_FLOAT_EQ(bbox.y1, 50.0f); + EXPECT_FLOAT_EQ(bbox.x2, 250.0f); + EXPECT_FLOAT_EQ(bbox.y2, 150.0f); + EXPECT_FLOAT_EQ(score, 0.85f); + EXPECT_EQ(label, 3); +} + +TEST(ExtractDetectionData, MultipleDetections_ThirdIndex) { + std::vector bboxData = { + 10.0f, 20.0f, 100.0f, 200.0f, // Detection 0 + 150.0f, 50.0f, 250.0f, 150.0f, // Detection 1 + 300.0f, 100.0f, 400.0f, 300.0f // Detection 2 + }; + std::vector scoresData = { + 0.95f, 5.0f, // Detection 0 + 0.85f, 3.0f, // Detection 1 + 0.75f, 12.0f // Detection 2 + }; + + auto [bbox, score, label] = + extractDetectionData(bboxData.data(), scoresData.data(), 2); + + EXPECT_FLOAT_EQ(bbox.x1, 300.0f); + EXPECT_FLOAT_EQ(bbox.y1, 100.0f); + EXPECT_FLOAT_EQ(bbox.x2, 400.0f); + EXPECT_FLOAT_EQ(bbox.y2, 300.0f); + EXPECT_FLOAT_EQ(score, 0.75f); + EXPECT_EQ(label, 12); +} + +TEST(ExtractDetectionData, LowConfidenceDetection) { + std::vector bboxData = {50.0f, 60.0f, 150.0f, 160.0f}; + std::vector scoresData = {0.05f, 1.0f}; // Very low confidence + + auto [bbox, score, label] = + extractDetectionData(bboxData.data(), scoresData.data(), 0); + + EXPECT_FLOAT_EQ(score, 0.05f); + EXPECT_EQ(label, 1); +} + +TEST(ExtractDetectionData, ZeroBasedLabelIndex) { + std::vector bboxData = {0.0f, 0.0f, 100.0f, 100.0f}; + std::vector scoresData = {0.9f, 0.0f}; // Label index 0 + + auto [bbox, score, label] = + extractDetectionData(bboxData.data(), scoresData.data(), 0); + + EXPECT_EQ(label, 0); +} + +TEST(ExtractDetectionData, LargeLabelIndex) { + std::vector bboxData = {0.0f, 0.0f, 100.0f, 100.0f}; + std::vector scoresData = {0.9f, 999.0f}; // Large label index + + auto [bbox, score, label] = + extractDetectionData(bboxData.data(), scoresData.data(), 0); + + EXPECT_EQ(label, 999); +} + +TEST(ExtractDetectionData, FloatToInt32Conversion) { + std::vector bboxData = {0.0f, 0.0f, 100.0f, 100.0f}; + std::vector scoresData = {0.9f, 42.7f}; // Float label gets truncated + + auto [bbox, score, label] = + extractDetectionData(bboxData.data(), scoresData.data(), 0); + + EXPECT_EQ(label, 42); // Should truncate, not round +} + +TEST(ExtractDetectionData, NegativeCoordinates) { + std::vector bboxData = {-10.0f, -20.0f, 50.0f, 60.0f}; + std::vector scoresData = {0.8f, 2.0f}; + + auto [bbox, score, label] = + extractDetectionData(bboxData.data(), scoresData.data(), 0); + + EXPECT_FLOAT_EQ(bbox.x1, -10.0f); + EXPECT_FLOAT_EQ(bbox.y1, -20.0f); + EXPECT_FLOAT_EQ(bbox.x2, 50.0f); + EXPECT_FLOAT_EQ(bbox.y2, 60.0f); +} + +TEST(ExtractDetectionData, FractionalCoordinates) { + std::vector bboxData = {10.5f, 20.75f, 100.25f, 200.9f}; + std::vector scoresData = {0.88f, 7.0f}; + + auto [bbox, score, label] = + extractDetectionData(bboxData.data(), scoresData.data(), 0); + + EXPECT_FLOAT_EQ(bbox.x1, 10.5f); + EXPECT_FLOAT_EQ(bbox.y1, 20.75f); + EXPECT_FLOAT_EQ(bbox.x2, 100.25f); + EXPECT_FLOAT_EQ(bbox.y2, 200.9f); +} diff --git a/packages/react-native-executorch/common/rnexecutorch/tests/unit/FrameTransformTest.cpp b/packages/react-native-executorch/common/rnexecutorch/tests/unit/FrameTransformTest.cpp index b5c0993128..1ec6d52ce6 100644 --- a/packages/react-native-executorch/common/rnexecutorch/tests/unit/FrameTransformTest.cpp +++ b/packages/react-native-executorch/common/rnexecutorch/tests/unit/FrameTransformTest.cpp @@ -345,3 +345,68 @@ TEST(InverseRotatePoints, Down_CCW) { EXPECT_FLOAT_EQ(pts[3].x, 80); EXPECT_FLOAT_EQ(pts[3].y, 570); } + +// ============================================================================ +// inverseRotateBboxes — batch inverse rotation for containers with .bbox +// ============================================================================ + +struct Detection { + BBox bbox; + float score; +}; + +// Test batch rotation of multiple detections +TEST(InverseRotateBboxes, BatchRotation_Up) { + std::vector detections = {{{10, 20, 100, 200}, 0.9f}, + {{150, 50, 200, 150}, 0.8f}, + {{250, 100, 300, 250}, 0.7f}}; + + inverseRotateBboxes(detections, makeOrient("up", false), {640, 480}); + + // First detection: (10,20)-(100,200) → CW + EXPECT_FLOAT_EQ(detections[0].bbox.x1, 280); + EXPECT_FLOAT_EQ(detections[0].bbox.y1, 10); + EXPECT_FLOAT_EQ(detections[0].bbox.x2, 460); + EXPECT_FLOAT_EQ(detections[0].bbox.y2, 100); + + // Second detection: (150,50)-(200,150) → CW + EXPECT_FLOAT_EQ(detections[1].bbox.x1, 330); + EXPECT_FLOAT_EQ(detections[1].bbox.y1, 150); + EXPECT_FLOAT_EQ(detections[1].bbox.x2, 430); + EXPECT_FLOAT_EQ(detections[1].bbox.y2, 200); + + // Third detection: (250,100)-(300,250) → CW + EXPECT_FLOAT_EQ(detections[2].bbox.x1, 230); + EXPECT_FLOAT_EQ(detections[2].bbox.y1, 250); + EXPECT_FLOAT_EQ(detections[2].bbox.x2, 380); + EXPECT_FLOAT_EQ(detections[2].bbox.y2, 300); +} + +// Test with empty container +TEST(InverseRotateBboxes, EmptyContainer) { + std::vector detections; + inverseRotateBboxes(detections, makeOrient("up", false), {640, 480}); + EXPECT_EQ(detections.size(), 0); +} + +// Test with single detection +TEST(InverseRotateBboxes, SingleDetection) { + std::vector detections = {{{10, 20, 100, 200}, 0.9f}}; + inverseRotateBboxes(detections, makeOrient("left", false), {640, 480}); + + // "left" → no-op + EXPECT_FLOAT_EQ(detections[0].bbox.x1, 10); + EXPECT_FLOAT_EQ(detections[0].bbox.y1, 20); + EXPECT_FLOAT_EQ(detections[0].bbox.x2, 100); + EXPECT_FLOAT_EQ(detections[0].bbox.y2, 200); + EXPECT_FLOAT_EQ(detections[0].score, 0.9f); +} + +// Test that other fields are preserved +TEST(InverseRotateBboxes, PreservesOtherFields) { + std::vector detections = {{{10, 20, 100, 200}, 0.95f}}; + inverseRotateBboxes(detections, makeOrient("down", false), {640, 480}); + + // Score should be unchanged + EXPECT_FLOAT_EQ(detections[0].score, 0.95f); +} diff --git a/packages/react-native-executorch/common/rnexecutorch/tests/unit/TensorHelpersTest.cpp b/packages/react-native-executorch/common/rnexecutorch/tests/unit/TensorHelpersTest.cpp new file mode 100644 index 0000000000..a1c5077e3a --- /dev/null +++ b/packages/react-native-executorch/common/rnexecutorch/tests/unit/TensorHelpersTest.cpp @@ -0,0 +1,121 @@ +#include +#include +#include +#include +#include +#include + +using namespace rnexecutorch::utils::tensor; +using executorch::aten::ScalarType; +using executorch::extension::make_tensor_ptr; +using executorch::runtime::EValue; + +// ============================================================================ +// toSpan(Tensor) — Convert tensor to typed span +// ============================================================================ + +TEST(TensorHelpers, ToSpan_FloatTensor) { + std::vector data = {1.0f, 2.0f, 3.0f, 4.0f, 5.0f}; + auto tensor = make_tensor_ptr({5}, data.data(), ScalarType::Float); + + auto span = toSpan(*tensor); + + EXPECT_EQ(span.size(), 5); + EXPECT_FLOAT_EQ(span[0], 1.0f); + EXPECT_FLOAT_EQ(span[1], 2.0f); + EXPECT_FLOAT_EQ(span[2], 3.0f); + EXPECT_FLOAT_EQ(span[3], 4.0f); + EXPECT_FLOAT_EQ(span[4], 5.0f); +} + +TEST(TensorHelpers, ToSpan_Int32Tensor) { + std::vector data = {10, 20, 30, 40}; + auto tensor = make_tensor_ptr({4}, data.data(), ScalarType::Int); + + auto span = toSpan(*tensor); + + EXPECT_EQ(span.size(), 4); + EXPECT_EQ(span[0], 10); + EXPECT_EQ(span[1], 20); + EXPECT_EQ(span[2], 30); + EXPECT_EQ(span[3], 40); +} + +TEST(TensorHelpers, ToSpan_MultidimensionalTensor) { + std::vector data = {1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f}; + auto tensor = make_tensor_ptr({2, 3}, data.data(), ScalarType::Float); + + auto span = toSpan(*tensor); + + // Should flatten to 1D span + EXPECT_EQ(span.size(), 6); + EXPECT_FLOAT_EQ(span[0], 1.0f); + EXPECT_FLOAT_EQ(span[5], 6.0f); +} + +TEST(TensorHelpers, ToSpan_EmptyTensor) { + std::vector data; + auto tensor = make_tensor_ptr({0}, data.data(), ScalarType::Float); + + auto span = toSpan(*tensor); + + EXPECT_EQ(span.size(), 0); +} + +// ============================================================================ +// toSpan(EValue) — Extract tensor from EValue then convert to span +// ============================================================================ + +TEST(TensorHelpers, ToSpan_FromEValue) { + std::vector data = {1.5f, 2.5f, 3.5f}; + auto tensor = make_tensor_ptr({3}, data.data(), ScalarType::Float); + EValue evalue(*tensor); + + auto span = toSpan(evalue); + + EXPECT_EQ(span.size(), 3); + EXPECT_FLOAT_EQ(span[0], 1.5f); + EXPECT_FLOAT_EQ(span[1], 2.5f); + EXPECT_FLOAT_EQ(span[2], 3.5f); +} + +TEST(TensorHelpers, ToSpan_FromEValue_LargeTensor) { + std::vector data(100); + for (int i = 0; i < 100; ++i) { + data[i] = static_cast(i); + } + auto tensor = make_tensor_ptr({100}, data.data(), ScalarType::Float); + EValue evalue(*tensor); + + auto span = toSpan(evalue); + + EXPECT_EQ(span.size(), 100); + EXPECT_FLOAT_EQ(span[0], 0.0f); + EXPECT_FLOAT_EQ(span[50], 50.0f); + EXPECT_FLOAT_EQ(span[99], 99.0f); +} + +// ============================================================================ +// Type safety and const correctness +// ============================================================================ + +TEST(TensorHelpers, SpanIsConst) { + std::vector data = {1.0f, 2.0f, 3.0f}; + auto tensor = make_tensor_ptr({3}, data.data(), ScalarType::Float); + + auto span = toSpan(*tensor); + + // Verify span is const (compile-time check, but we can verify element type) + static_assert( + std::is_const_v>); +} + +TEST(TensorHelpers, CorrectDataPointer) { + std::vector data = {1.0f, 2.0f, 3.0f}; + auto tensor = make_tensor_ptr({3}, data.data(), ScalarType::Float); + + auto span = toSpan(*tensor); + + // Span should point to the same data as the original tensor + EXPECT_EQ(span.data(), tensor->const_data_ptr()); +} diff --git a/packages/react-native-executorch/common/rnexecutorch/utils/FrameTransform.h b/packages/react-native-executorch/common/rnexecutorch/utils/FrameTransform.h index 10326944ea..7eaf66009a 100644 --- a/packages/react-native-executorch/common/rnexecutorch/utils/FrameTransform.h +++ b/packages/react-native-executorch/common/rnexecutorch/utils/FrameTransform.h @@ -113,22 +113,9 @@ void inverseRotatePoints(std::array &points, } /** - * @brief Apply inverse rotation to bounding boxes in a batch + * @brief Inverse-rotate all bboxes in a container of detections/instances. * - * Convenience helper to inverse-rotate bboxes for all items in a container. - * Each item must have a .bbox member of type computer_vision::BBox. - * - * @tparam Container Type that supports iteration and has items with .bbox - * member - * @param items Container of detection/segmentation results - * @param orient Frame orientation info (from loadFrameRotated) - * @param rotatedSize Size of the rotated frame (rotated.size()) - * - * Example: - * @code - * std::vector detections = runDetection(frame); - * inverseRotateBboxes(detections, orient, rotated.size()); - * @endcode + * Items must expose a .bbox member of type computer_vision::BBox. */ template requires requires(Container c) { diff --git a/packages/react-native-executorch/common/rnexecutorch/utils/TensorHelpers.h b/packages/react-native-executorch/common/rnexecutorch/utils/TensorHelpers.h index 23e6c89bd1..67e129584c 100644 --- a/packages/react-native-executorch/common/rnexecutorch/utils/TensorHelpers.h +++ b/packages/react-native-executorch/common/rnexecutorch/utils/TensorHelpers.h @@ -8,50 +8,16 @@ namespace rnexecutorch { namespace utils { namespace tensor { -/** - * @brief Convert a tensor to a typed span for safe data access - * - * Provides type-safe access to tensor data via std::span, eliminating - * manual pointer arithmetic and size calculations. - * - * @tparam T The element type (e.g., float, int32_t) - * @param tensor The tensor to convert - * @return std::span A read-only view of the tensor data - * - * @note The returned span is valid only as long as the tensor exists - * - * Example: - * @code - * auto tensor = getTensor(); - * auto data = tensor::toSpan(tensor); - * for (float value : data) { - * // Process value... - * } - * @endcode - */ +/// Returns a read-only span over the tensor's flat data buffer. +/// The span is valid only as long as the tensor exists. template std::span toSpan(const executorch::aten::Tensor &tensor) { return std::span(static_cast(tensor.const_data_ptr()), tensor.numel()); } -/** - * @brief Convert an EValue containing a tensor to a typed span - * - * Convenience overload for extracting tensor data from EValue results. - * - * @tparam T The element type (e.g., float, int32_t) - * @param evalue The EValue containing a tensor - * @return std::span A read-only view of the tensor data - * - * @note Assumes evalue.isTensor() == true. Behavior is undefined otherwise. - * - * Example: - * @code - * auto result = model.forward(input); - * auto outputs = tensor::toSpan(result.get()[0]); - * @endcode - */ +/// Convenience overload that extracts the tensor from an EValue first. +/// Assumes evalue.isTensor() == true. template std::span toSpan(const executorch::runtime::EValue &evalue) { return toSpan(evalue.toTensor()); diff --git a/packages/react-native-executorch/common/rnexecutorch/utils/computer_vision/Processing.h b/packages/react-native-executorch/common/rnexecutorch/utils/computer_vision/Processing.h index a296fe6e25..20b69792f9 100644 --- a/packages/react-native-executorch/common/rnexecutorch/utils/computer_vision/Processing.h +++ b/packages/react-native-executorch/common/rnexecutorch/utils/computer_vision/Processing.h @@ -12,34 +12,13 @@ namespace rnexecutorch::utils::computer_vision { float computeIoU(const BBox &a, const BBox &b); -/** - * @brief Extract detection data at a specific index from raw tensor buffers - * - * Parses bounding box coordinates, confidence score, and class label from - * typical object detection model outputs. - * - * @param bboxData Pointer to bounding box data (format: [x1, y1, x2, y2] per - * detection) - * @param scoresData Pointer to scores data (format: [score, label] per - * detection) - * @param index Index of the detection to extract - * @return Tuple of {bbox, score, label} - * - * Example: - * @code - * auto [bbox, score, label] = extractDetectionData(bboxData, scoresData, i); - * @endcode - */ +/// Extracts {bbox, score, label} at index from raw model output buffers. +/// bboxData layout: [x1, y1, x2, y2] per detection. +/// scoresData layout: [score, label] per detection. std::tuple extractDetectionData(const float *bboxData, const float *scoresData, int32_t index); -/** - * @brief Validate that a threshold is in [0, 1] range - * @param value Threshold value to validate - * @param name Name of the threshold (for error messages) - * @throws RnExecutorchError if value is out of range - */ void validateThreshold(double value, const std::string &name); template