From d06fe497aac0b65d1108a43b6f942cc0eac95c89 Mon Sep 17 00:00:00 2001 From: Matthew Parkinson Date: Sun, 7 Jun 2026 11:01:16 +0100 Subject: [PATCH 1/2] Implement tick() for aarch64 on non-Apple platforms The aarch64 tick() implementation was previously gated on __APPLE__, leaving Linux (and other non-Apple aarch64 platforms) with no cycle counter. On those platforms the generic AAL fell through to __builtin_readcyclecounter(), which on aarch64 emits `mrs PMCCNTR_EL0`; that register is not accessible from EL0 on Linux and would SIGILL. CNTVCT_EL0 is readable from EL0 on Linux, FreeBSD, and other mainstream aarch64 OSes (the kernel sets CNTKCTL_EL1.EL0VCTEN), so the same implementation Apple uses works everywhere on aarch64. - aal_arm.h: extend tick() to all SNMALLOC_VA_BITS_64 targets and add an MSVC path using _ReadStatusReg(ARM64_CNTVCT). Drop NoCpuCycleCounters for 64-bit ARM (it remains for 32-bit ARM). - aal.h: exclude aarch64 from the __builtin_readcyclecounter() path so it always routes through Arch::tick(). Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- src/snmalloc/aal/aal.h | 1 + src/snmalloc/aal/aal_arm.h | 8 ++++++-- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/src/snmalloc/aal/aal.h b/src/snmalloc/aal/aal.h index f0d12072d..a584809f1 100644 --- a/src/snmalloc/aal/aal.h +++ b/src/snmalloc/aal/aal.h @@ -173,6 +173,7 @@ namespace snmalloc else { #if __has_builtin(__builtin_readcyclecounter) && !defined(__APPLE__) && \ + !defined(__aarch64__) && !defined(_M_ARM64) && !defined(_M_ARM64EC) && \ !defined(SNMALLOC_NO_AAL_BUILTINS) return __builtin_readcyclecounter(); #else diff --git a/src/snmalloc/aal/aal_arm.h b/src/snmalloc/aal/aal_arm.h index 5a9fdc5eb..cf67fd062 100644 --- a/src/snmalloc/aal/aal_arm.h +++ b/src/snmalloc/aal/aal_arm.h @@ -32,7 +32,7 @@ namespace snmalloc * Bitmap of AalFeature flags */ static constexpr uint64_t aal_features = IntegerPointers -#if defined(SNMALLOC_VA_BITS_32) || !defined(__APPLE__) +#if defined(SNMALLOC_VA_BITS_32) | NoCpuCycleCounters #endif #if defined(SNMALLOC_COMPILER_SUPPORT_PACA_PACG) && \ @@ -71,12 +71,16 @@ namespace snmalloc #endif } -#if defined(SNMALLOC_VA_BITS_64) && defined(__APPLE__) +#if defined(SNMALLOC_VA_BITS_64) static inline uint64_t tick() noexcept { +# ifdef _MSC_VER + return static_cast(_ReadStatusReg(ARM64_CNTVCT)); +# else uint64_t t; __asm__ volatile("mrs %0, cntvct_el0" : "=r"(t)); return t; +# endif } #endif From e3d866a2539e7df4e70f4ad1f6955c0063e9b900 Mon Sep 17 00:00:00 2001 From: Matthew Parkinson Date: Sun, 7 Jun 2026 11:50:14 +0100 Subject: [PATCH 2/2] Fix MSVC build for aarch64 tick() ARM64_CNTVCT is not always available as a predefined macro. Use the explicit ARM64_SYSREG(3, 3, 14, 0, 2) encoding for CNTVCT_EL0 instead, and include so _ReadStatusReg and ARM64_SYSREG are declared. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- src/snmalloc/aal/aal_arm.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/snmalloc/aal/aal_arm.h b/src/snmalloc/aal/aal_arm.h index cf67fd062..cc6abab9a 100644 --- a/src/snmalloc/aal/aal_arm.h +++ b/src/snmalloc/aal/aal_arm.h @@ -4,6 +4,7 @@ # define SNMALLOC_VA_BITS_64 # ifdef _MSC_VER # include +# include # endif #else # define SNMALLOC_VA_BITS_32 @@ -75,7 +76,9 @@ namespace snmalloc static inline uint64_t tick() noexcept { # ifdef _MSC_VER - return static_cast(_ReadStatusReg(ARM64_CNTVCT)); + // ARM64_SYSREG(op0, op1, CRn, CRm, op2) encoding for CNTVCT_EL0. + return static_cast( + _ReadStatusReg(ARM64_SYSREG(3, 3, 14, 0, 2))); # else uint64_t t; __asm__ volatile("mrs %0, cntvct_el0" : "=r"(t));