diff --git a/ci/conda_env_cpp.txt b/ci/conda_env_cpp.txt index 470db4f8b9d..e8a7d552397 100644 --- a/ci/conda_env_cpp.txt +++ b/ci/conda_env_cpp.txt @@ -47,6 +47,6 @@ rapidjson re2 snappy thrift-cpp>=0.11.0 -xsimd>=14.0 +xsimd>=14.2 zlib zstd diff --git a/cpp/cmake_modules/ThirdpartyToolchain.cmake b/cpp/cmake_modules/ThirdpartyToolchain.cmake index c08d0c4292b..fd1752928ed 100644 --- a/cpp/cmake_modules/ThirdpartyToolchain.cmake +++ b/cpp/cmake_modules/ThirdpartyToolchain.cmake @@ -2858,7 +2858,7 @@ if(ARROW_USE_XSIMD) IS_RUNTIME_DEPENDENCY FALSE REQUIRED_VERSION - "14.0.0") + "14.2.0") if(xsimd_SOURCE STREQUAL "BUNDLED") set(ARROW_XSIMD arrow::xsimd) diff --git a/cpp/src/arrow/util/bpacking_simd_kernel_internal.h b/cpp/src/arrow/util/bpacking_simd_kernel_internal.h index fe879bb5b0f..83a969cca64 100644 --- a/cpp/src/arrow/util/bpacking_simd_kernel_internal.h +++ b/cpp/src/arrow/util/bpacking_simd_kernel_internal.h @@ -151,73 +151,6 @@ constexpr bool IsSse2 = std::is_base_of_v; template constexpr bool IsAvx2 = std::is_base_of_v; -/// Whether we are compiling for the Neon or above in the arm64 family. -template -constexpr bool IsNeon = std::is_base_of_v; - -/// Wrapper around ``xsimd::bitwise_lshift`` with optimizations for non implemented sizes. -/// -/// We replace the variable left shift by a variable multiply with a power of two. -/// -/// This trick is borrowed from Daniel Lemire and Leonid Boytsov, Decoding billions of -/// integers per second through vectorization, Software Practice & Experience 45 (1), -/// 2015. http://arxiv.org/abs/1209.2137 -/// -/// TODO(xsimd) Tracking in https://github.com/xtensor-stack/xsimd/pull/1220 -/// When migrating, be sure to use batch_constant overload, and not the batch one. -template -ARROW_FORCE_INLINE auto left_shift(const xsimd::batch& batch, - xsimd::batch_constant shifts) - -> xsimd::batch { - constexpr bool kIsSse2 = IsSse2; - constexpr bool kIsAvx2 = IsAvx2; - static_assert( - !(kIsSse2 && kIsAvx2), - "In xsimd, an x86 arch is either part of the SSE family or of the AVX family," - "not both. If this check fails, it means the assumptions made here to detect SSE " - "and AVX are out of date."); - - constexpr auto kMults = xsimd::make_batch_constant() << shifts; - - constexpr auto IntSize = sizeof(Int); - - // Sizes and architecture for which there is no variable left shift and there is a - // multiplication - if constexpr ( // - (kIsSse2 && (IntSize == sizeof(uint16_t) || IntSize == sizeof(uint32_t))) // - || (kIsAvx2 && (IntSize == sizeof(uint16_t))) // - ) { - return batch * kMults; - } - - // Architecture for which there is no variable left shift on uint8_t but a fallback - // exists for uint16_t. - if constexpr ((kIsSse2 || kIsAvx2) && (IntSize == sizeof(uint8_t))) { - const auto batch16 = xsimd::bitwise_cast(batch); - - constexpr auto kShifts0 = select_stride(shifts); - const auto shifted0 = left_shift(batch16, kShifts0) & 0x00FF; - - constexpr auto kShifts1 = select_stride(shifts); - const auto shifted1 = left_shift(batch16 & 0xFF00, kShifts1); - - return xsimd::bitwise_cast(shifted0 | shifted1); - } - - // TODO(xsimd) bug fixed in xsimd 14.1.0 - // https://github.com/xtensor-stack/xsimd/pull/1266 -#if XSIMD_VERSION_MAJOR < 14 || ((XSIMD_VERSION_MAJOR == 14) && XSIMD_VERSION_MINOR == 0) - if constexpr (IsNeon) { - using SInt = std::make_signed_t; - constexpr auto signed_shifts = - xsimd::batch_constant(kShifts)...>(); - return xsimd::kernel::bitwise_lshift(batch, signed_shifts.as_batch(), Arch{}); - } -#endif - - return batch << shifts; -} - /// Fallback for variable shift right. /// /// When we know that the relevant bits will not overflow, we can instead shift left all @@ -243,9 +176,8 @@ ARROW_FORCE_INLINE auto right_shift_by_excess( constexpr auto IntSize = sizeof(Int); - // Architecture for which there is no variable right shift but a larger fallback exists. - // TODO(xsimd) Tracking for Avx2 in https://github.com/xtensor-stack/xsimd/pull/1220 - // When migrating, be sure to use batch_constant overload, and not the batch one. + // Architectures for which there is no variable right shift but a larger fallback + // exists. if constexpr (kIsAvx2 && (IntSize == sizeof(uint8_t) || IntSize == sizeof(uint16_t))) { using twice_uint = SizedUint<2 * IntSize>; @@ -262,27 +194,17 @@ ARROW_FORCE_INLINE auto right_shift_by_excess( return xsimd::bitwise_cast(shifted0 | shifted1); } - // These conditions are the ones matched in `left_shift`, i.e. the ones where variable - // shift right will not be available but a left shift (fallback) exists. + // Architectures for which there is no variable right shift but a left shift exists + // (possibly using the multiply trick inside of xsimd). + // We use a variable left shift and fixed right shift. if constexpr (kIsSse2 && (IntSize != sizeof(uint64_t))) { constexpr Int kMaxRShift = max_value(std::array{kShifts...}); constexpr auto kLShifts = xsimd::make_batch_constant() - shifts; - return xsimd::bitwise_rshift(left_shift(batch, kLShifts)); - } - - // TODO(xsimd) bug fixed in xsimd 14.1.0 - // https://github.com/xtensor-stack/xsimd/pull/1266 -#if XSIMD_VERSION_MAJOR < 14 || ((XSIMD_VERSION_MAJOR == 14) && XSIMD_VERSION_MINOR == 0) - if constexpr (IsNeon) { - using SInt = std::make_signed_t; - constexpr auto signed_shifts = - xsimd::batch_constant(kShifts)...>(); - return xsimd::kernel::bitwise_rshift(batch, signed_shifts.as_batch(), Arch{}); + return xsimd::bitwise_rshift(batch << kLShifts); } -#endif return batch >> shifts; } @@ -1040,7 +962,7 @@ struct LargeKernel { const auto high_swizzled = xsimd::swizzle(bytes, kHighSwizzles); const auto high_words = xsimd::bitwise_cast(high_swizzled); - const auto high_shifted = left_shift(high_words, kHighLShifts); + const auto high_shifted = high_words << kHighLShifts; // We can have a single mask and apply it after OR because the shifts will ensure that // there are zeros where the high/low values are incomplete. diff --git a/cpp/thirdparty/versions.txt b/cpp/thirdparty/versions.txt index 2895247e9fc..c6f4b01a717 100644 --- a/cpp/thirdparty/versions.txt +++ b/cpp/thirdparty/versions.txt @@ -115,8 +115,8 @@ ARROW_UTF8PROC_BUILD_SHA256_CHECKSUM=6f4f1b639daa6dca9f80bc5db1233e9cbaa31a67790 # WIL (Windows Implementation Libraries) is required by Azure SDK on Windows for WinHTTP transport ARROW_WIL_BUILD_VERSION=v1.0.250325.1 ARROW_WIL_BUILD_SHA256_CHECKSUM=c9e667d5f86ded43d17b5669d243e95ca7b437e3a167c170805ffd4aa8a9a786 -ARROW_XSIMD_BUILD_VERSION=14.0.0 -ARROW_XSIMD_BUILD_SHA256_CHECKSUM=17de0236954955c10c09d6938d4c5f3a3b92d31be5dadd1d5d09fc1b15490dce +ARROW_XSIMD_BUILD_VERSION=14.2.0 +ARROW_XSIMD_BUILD_SHA256_CHECKSUM=21e841ab684b05331e81e7f782431753a029ef7b7d9d6d3ddab837e7782a40ee ARROW_ZLIB_BUILD_VERSION=1.3.1 ARROW_ZLIB_BUILD_SHA256_CHECKSUM=9a93b2b7dfdac77ceba5a558a580e74667dd6fede4585b91eefb60f03b72df23 ARROW_ZSTD_BUILD_VERSION=1.5.7