diff --git a/include/xsimd/config/xsimd_cpu_features_arm.hpp b/include/xsimd/config/xsimd_cpu_features_arm.hpp index add0162c4..3cf62dd37 100644 --- a/include/xsimd/config/xsimd_cpu_features_arm.hpp +++ b/include/xsimd/config/xsimd_cpu_features_arm.hpp @@ -12,6 +12,9 @@ #ifndef XSIMD_CPU_FEATURES_ARM_HPP #define XSIMD_CPU_FEATURES_ARM_HPP +#include +#include + #include "./xsimd_config.hpp" #include "./xsimd_getauxval.hpp" @@ -24,6 +27,24 @@ namespace xsimd { + + namespace detail + { + using arm_reg64_t = std::uint64_t; + + /** + * Return the SVE vector length in bytes for the current thread. + * + * SVE vector length can be restricted + * Contrary to `svcntb` this does not require to be compiles with SVE, which + * should not be done in a dynamic dispatch jump function. + * + * Safety: It is the user responsibility to first make sure that SVE is + * available. + */ + inline arm_reg64_t arm_rdvl_unsafe(); + } + /** * An opinionated CPU feature detection utility for ARM. * @@ -40,6 +61,7 @@ namespace xsimd inline bool neon() const noexcept; inline bool neon64() const noexcept; inline bool sve() const noexcept; + inline std::size_t sve_size_bytes() const noexcept; inline bool i8mm() const noexcept; }; @@ -47,6 +69,20 @@ namespace xsimd * Implementation * ********************/ + namespace detail + { +#if XSIMD_TARGET_ARM64 && (defined(__GNUC__) || defined(__clang__)) + __attribute__((target("arch=armv8-a+sve"))) inline arm_reg64_t arm_rdvl_unsafe() + { + arm_reg64_t vl; + __asm__ volatile("rdvl %0, #1" : "=r"(vl)); + return vl; + } +#else + inline arm_reg64_t arm_rdvl_unsafe() { return 0; } +#endif + } + inline bool arm_cpu_features::neon() const noexcept { #if XSIMD_TARGET_ARM && !XSIMD_TARGET_ARM64 && XSIMD_HAVE_LINUX_GETAUXVAL @@ -70,6 +106,15 @@ namespace xsimd #endif } + inline std::size_t arm_cpu_features::sve_size_bytes() const noexcept + { + if (sve()) + { + return detail::arm_rdvl_unsafe(); + } + return 0; + } + inline bool arm_cpu_features::i8mm() const noexcept { #if XSIMD_TARGET_ARM64 && XSIMD_HAVE_LINUX_GETAUXVAL diff --git a/include/xsimd/config/xsimd_cpu_features_riscv.hpp b/include/xsimd/config/xsimd_cpu_features_riscv.hpp index 62e900d3b..45475d6ca 100644 --- a/include/xsimd/config/xsimd_cpu_features_riscv.hpp +++ b/include/xsimd/config/xsimd_cpu_features_riscv.hpp @@ -12,6 +12,9 @@ #ifndef XSIMD_CPU_FEATURES_RISCV_HPP #define XSIMD_CPU_FEATURES_RISCV_HPP +#include +#include + #include "./xsimd_config.hpp" #include "./xsimd_getauxval.hpp" @@ -24,16 +27,47 @@ namespace xsimd { + namespace detail + { + using riscv_reg64_t = std::uint64_t; + + /** + * Return the RVV vector length in bytes. + * + * This does not require to be compiles with SVE, which should not + * be done in a dynamic dispatch jump function. + * + * Safety: It is the user responsibility to first make sure that RVV is + * available. + */ + inline riscv_reg64_t riscv_csrr_unsafe(); + } + class riscv_cpu_features : private linux_hwcap_backend_default { public: inline bool rvv() const noexcept; + inline std::size_t rvv_size_bytes() const noexcept; }; /******************** * Implementation * ********************/ + namespace detail + { +#if XSIMD_TARGET_RISCV && (defined(__GNUC__) || defined(__clang__)) + __attribute__((target("arch=+v"))) inline riscv_reg64_t riscv_csrr_unsafe() + { + riscv_reg64_t vlenb; + __asm__ volatile("csrr %0, vlenb" : "=r"(vlenb)); + return vlenb; + } +#else + inline riscv_reg64_t riscv_csrr_unsafe() { return 0; } +#endif + } + inline bool riscv_cpu_features::rvv() const noexcept { #if XSIMD_TARGET_RISCV && XSIMD_HAVE_LINUX_GETAUXVAL @@ -47,6 +81,15 @@ namespace xsimd return false; #endif } + + inline std::size_t riscv_cpu_features::rvv_size_bytes() const noexcept + { + if (rvv()) + { + return detail::riscv_csrr_unsafe(); + } + return 0; + } } #endif diff --git a/include/xsimd/config/xsimd_cpuid.hpp b/include/xsimd/config/xsimd_cpuid.hpp index 5ac798437..d58897f66 100644 --- a/include/xsimd/config/xsimd_cpuid.hpp +++ b/include/xsimd/config/xsimd_cpuid.hpp @@ -63,12 +63,12 @@ namespace xsimd ARCH_FIELD(neon) ARCH_FIELD(neon64) ARCH_FIELD_EX(i8mm<::xsimd::neon64>, i8mm_neon64) - ARCH_FIELD_EX(detail::sve<512>, sve) - ARCH_FIELD_EX_REUSE(detail::sve<256>, sve) - ARCH_FIELD_EX_REUSE(detail::sve<128>, sve) - ARCH_FIELD_EX(detail::rvv<512>, rvv) - ARCH_FIELD_EX_REUSE(detail::rvv<256>, rvv) - ARCH_FIELD_EX_REUSE(detail::rvv<128>, rvv) + ARCH_FIELD_EX(detail::sve<512>, sve512) + ARCH_FIELD_EX(detail::sve<256>, sve256) + ARCH_FIELD_EX(detail::sve<128>, sve128) + ARCH_FIELD_EX(detail::rvv<512>, rvv512) + ARCH_FIELD_EX(detail::rvv<256>, rvv256) + ARCH_FIELD_EX(detail::rvv<128>, rvv128) ARCH_FIELD(wasm) ARCH_FIELD(vsx) @@ -87,7 +87,9 @@ namespace xsimd // Safe on all platforms, it will be all false if non risc-v. const auto riscv_cpu = xsimd::riscv_cpu_features(); - rvv = riscv_cpu.rvv(); + rvv128 = riscv_cpu.rvv() && (riscv_cpu.rvv_size_bytes() >= (128 / 8)); + rvv256 = riscv_cpu.rvv() && (riscv_cpu.rvv_size_bytes() >= (256 / 8)); + rvv512 = riscv_cpu.rvv() && (riscv_cpu.rvv_size_bytes() >= (512 / 8)); // Safe on all platforms, it will be all false if non arm. const auto arm_cpu = xsimd::arm_cpu_features(); @@ -95,7 +97,9 @@ namespace xsimd neon = arm_cpu.neon(); neon64 = arm_cpu.neon64(); i8mm_neon64 = arm_cpu.neon64() && arm_cpu.i8mm(); - sve = arm_cpu.sve(); + sve128 = arm_cpu.sve() && (arm_cpu.sve_size_bytes() >= (128 / 8)); + sve256 = arm_cpu.sve() && (arm_cpu.sve_size_bytes() >= (256 / 8)); + sve512 = arm_cpu.sve() && (arm_cpu.sve_size_bytes() >= (512 / 8)); // Safe on all platforms, it will be all false if non x86. const auto x86_cpu = xsimd::x86_cpu_features(); diff --git a/test/test_cpu_features.cpp b/test/test_cpu_features.cpp index 9461210f5..fdc15d745 100644 --- a/test/test_cpu_features.cpp +++ b/test/test_cpu_features.cpp @@ -142,6 +142,7 @@ TEST_CASE("[cpu_features] arm implication chains") CHECK_IMPLICATION(cpu.neon64(), cpu.neon()); CHECK_IMPLICATION(cpu.sve(), cpu.neon64()); + CHECK_IMPLICATION(cpu.sve(), cpu.sve_size_bytes() >= (128 / 8)); CHECK_IMPLICATION(cpu.i8mm(), cpu.neon64()); } @@ -155,6 +156,13 @@ TEST_CASE("[cpu_features] arm features from environment") CHECK_ENV_FEATURE("XSIMD_TEST_CPU_ASSUME_I8MM", cpu.i8mm()); } +TEST_CASE("[cpu_features] risc-v implication chains") +{ + xsimd::riscv_cpu_features cpu; + + CHECK_IMPLICATION(cpu.rvv(), cpu.rvv_size_bytes() >= (128 / 8)); +} + TEST_CASE("[cpu_features] risc-v features from environment") { xsimd::riscv_cpu_features cpu;