From e488932c42e7c1abf2053ab579871e8fe17cca72 Mon Sep 17 00:00:00 2001 From: Harmen Stoppels Date: Sat, 18 Apr 2026 20:08:53 +0200 Subject: [PATCH 1/2] gh-148729: use memchr in SRE prefix scan For single byte characters use `memchr` instead of the equivalent hand-written while loop. This ensures that `re.search` is typically vectorized through libc for regexes starting with a `LITERAL`. In the no-match case this means 16 or 32 bytes per iterations instead of a single byte (ok, it was unrolled, but not auto-vectorized). Signed-off-by: Harmen Stoppels --- ...2026-04-19-00-03-25.gh-issue-148729.tjrIwN.rst | 3 +++ Modules/_sre/sre_lib.h | 15 +++++++++++++++ 2 files changed, 18 insertions(+) create mode 100644 Misc/NEWS.d/next/Core_and_Builtins/2026-04-19-00-03-25.gh-issue-148729.tjrIwN.rst diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2026-04-19-00-03-25.gh-issue-148729.tjrIwN.rst b/Misc/NEWS.d/next/Core_and_Builtins/2026-04-19-00-03-25.gh-issue-148729.tjrIwN.rst new file mode 100644 index 00000000000000..365841095fd5a1 --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2026-04-19-00-03-25.gh-issue-148729.tjrIwN.rst @@ -0,0 +1,3 @@ +Optimize prefix search for regular expressions starting with literals using +``memchr()``. For single-byte character strings, the internal scanning loop +now delegates to the C library, which is typically vectorized. diff --git a/Modules/_sre/sre_lib.h b/Modules/_sre/sre_lib.h index df377905bfae0d..4f1269988b92e0 100644 --- a/Modules/_sre/sre_lib.h +++ b/Modules/_sre/sre_lib.h @@ -1753,10 +1753,17 @@ SRE(search)(SRE_STATE* state, SRE_CODE* pattern) end = (SRE_CHAR *)state->end; state->must_advance = 0; while (ptr < end) { +#if SIZEOF_SRE_CHAR == 1 + ptr = (SRE_CHAR *)memchr(ptr, c, end - ptr); + if (!ptr) { + return 0; + } +#else while (*ptr != c) { if (++ptr >= end) return 0; } +#endif TRACE(("|%p|%p|SEARCH LITERAL\n", pattern, ptr)); state->start = ptr; state->ptr = ptr + prefix_skip; @@ -1786,10 +1793,18 @@ SRE(search)(SRE_STATE* state, SRE_CODE* pattern) #endif while (ptr < end) { SRE_CHAR c = (SRE_CHAR) prefix[0]; +#if SIZEOF_SRE_CHAR == 1 + ptr = (SRE_CHAR *)memchr(ptr, c, end - ptr); + if (!ptr) { + return 0; + } + ptr++; +#else while (*ptr++ != c) { if (ptr >= end) return 0; } +#endif if (ptr >= end) return 0; From e4554e6ae6ea7f81c0457757f44e4f375d138c23 Mon Sep 17 00:00:00 2001 From: Harmen Stoppels Date: Sun, 19 Apr 2026 11:44:12 +0200 Subject: [PATCH 2/2] move news from core to library Signed-off-by: Harmen Stoppels --- .../2026-04-19-00-03-25.gh-issue-148729.tjrIwN.rst | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename Misc/NEWS.d/next/{Core_and_Builtins => Library}/2026-04-19-00-03-25.gh-issue-148729.tjrIwN.rst (100%) diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2026-04-19-00-03-25.gh-issue-148729.tjrIwN.rst b/Misc/NEWS.d/next/Library/2026-04-19-00-03-25.gh-issue-148729.tjrIwN.rst similarity index 100% rename from Misc/NEWS.d/next/Core_and_Builtins/2026-04-19-00-03-25.gh-issue-148729.tjrIwN.rst rename to Misc/NEWS.d/next/Library/2026-04-19-00-03-25.gh-issue-148729.tjrIwN.rst