diff --git a/Misc/NEWS.d/next/Library/2026-04-19-00-03-25.gh-issue-148729.tjrIwN.rst b/Misc/NEWS.d/next/Library/2026-04-19-00-03-25.gh-issue-148729.tjrIwN.rst new file mode 100644 index 00000000000000..365841095fd5a1 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2026-04-19-00-03-25.gh-issue-148729.tjrIwN.rst @@ -0,0 +1,3 @@ +Optimize prefix search for regular expressions starting with literals using +``memchr()``. For single-byte character strings, the internal scanning loop +now delegates to the C library, which is typically vectorized. diff --git a/Modules/_sre/sre_lib.h b/Modules/_sre/sre_lib.h index df377905bfae0d..4f1269988b92e0 100644 --- a/Modules/_sre/sre_lib.h +++ b/Modules/_sre/sre_lib.h @@ -1753,10 +1753,17 @@ SRE(search)(SRE_STATE* state, SRE_CODE* pattern) end = (SRE_CHAR *)state->end; state->must_advance = 0; while (ptr < end) { +#if SIZEOF_SRE_CHAR == 1 + ptr = (SRE_CHAR *)memchr(ptr, c, end - ptr); + if (!ptr) { + return 0; + } +#else while (*ptr != c) { if (++ptr >= end) return 0; } +#endif TRACE(("|%p|%p|SEARCH LITERAL\n", pattern, ptr)); state->start = ptr; state->ptr = ptr + prefix_skip; @@ -1786,10 +1793,18 @@ SRE(search)(SRE_STATE* state, SRE_CODE* pattern) #endif while (ptr < end) { SRE_CHAR c = (SRE_CHAR) prefix[0]; +#if SIZEOF_SRE_CHAR == 1 + ptr = (SRE_CHAR *)memchr(ptr, c, end - ptr); + if (!ptr) { + return 0; + } + ptr++; +#else while (*ptr++ != c) { if (ptr >= end) return 0; } +#endif if (ptr >= end) return 0;