From 309c2940a66d005ed47c6a7acdf528a224771ce3 Mon Sep 17 00:00:00 2001 From: James Aguilar Date: Thu, 23 Apr 2026 22:46:57 -0600 Subject: [PATCH] Avoid reevaluating arguments. _round, _constrain and _sign currently reevaluate their arguments, which wastes CPU cycles. In torque/foc_current mode, this fix saves about 300ns/foc loop (about 2% of total CPU usage). In other modes, especially estimated_current, it saves more. --- src/common/foc_utils.h | 52 ++++++++++++++++++++++++++++++++++++++---- 1 file changed, 47 insertions(+), 5 deletions(-) diff --git a/src/common/foc_utils.h b/src/common/foc_utils.h index 9e2ab456..651bb8ab 100644 --- a/src/common/foc_utils.h +++ b/src/common/foc_utils.h @@ -3,12 +3,54 @@ #include "Arduino.h" -// sign function -#define _sign(a) ( ( (a) < 0 ) ? -1 : ( (a) > 0 ) ) +template +constexpr inline int _sign(T val) { + return __builtin_signbit(val); +} + +#ifndef __AVR__ +#include + #ifndef _round -#define _round(x) ((x)>=0?(long)((x)+0.5f):(long)((x)-0.5f)) +// Use enable_if to select the roundf function for single precision floats. +// This improves performance when -ffast-math is not set. +template +constexpr inline typename std::enable_if::value, long>::type _round(T x) { + return __builtin_roundf(x); +} +template +constexpr inline typename std::enable_if::value, long>::type _round(T x) { + return __builtin_round(x); +} +#endif + +// Use enable_if to select the fastest implementation according to the amt type. +// Using __builtin_fXf is measurably faster than using the ternary approach. +template +constexpr inline typename std::enable_if::value, T>::type _constrain(T amt, L low, H high) { + return (amt < low) ? low : (amt > high) ? high : amt; +} +template +constexpr inline typename std::enable_if::value, T>::type _constrain(T amt, L low, H high) { + return __builtin_fmaxf(low, __builtin_fminf(high, amt)); +} +template +constexpr inline typename std::enable_if::value, T>::type _constrain(T amt, L low, H high) { + return __builtin_fmax(low, __builtin_fmin(high, amt)); +} +#else // __AVR__ +// AVR compiler lacks type_traits, so we are forced to use the slower non-type inferenced +// version. That's okay, right? If you wanted to go fast you would not be on AVR. +template +constexpr long _round(T x) { + return __builtin_round(x); +} +template +constexpr T _constrain(T amt, L low, H high) { + return (amt < low) ? low : (amt > high) ? high : amt; +} #endif -#define _constrain(amt,low,high) ((amt)<(low)?(low):((amt)>(high)?(high):(amt))) + #define _sqrt(a) (_sqrtApprox(a)) #define _isset(a) ( (a) != (NOT_SET) ) #define _UNUSED(v) (void) (v) @@ -121,4 +163,4 @@ float _electricalAngle(float shaft_angle, int pole_pairs); */ float _sqrtApprox(float value); -#endif \ No newline at end of file +#endif