From e73e4f2d4cbbd741649572b840e3a9816c31bb17 Mon Sep 17 00:00:00 2001 From: Sutou Kouhei Date: Mon, 1 Jun 2026 16:54:36 +0900 Subject: [PATCH 1/5] [ruby/strscan] [Feature #21943] Add `StringScanner#integer_at` (https://github.com/ruby/strscan/pull/205) See also: https://bugs.ruby-lang.org/issues/21943 This is semantically equivalent to `scanner[specifier]&.to_i(base)` but this is faster than `scanner[specifier]&.to_i(base)` because `integer_at` doesn't create a temporary String when possible. This PR also includes a benchmark for them: ```console $ ruby -v -S benchmark-driver benchmark/integer_at.yaml ruby 4.1.0dev (2026-05-01T19:25:51Z master https://github.com/ruby/strscan/commit/f2845eab29) +PRISM [x86_64-linux] Warming up -------------------------------------- [].to_i 24.272M i/s - 25.109M times in 1.034481s (41.20ns/i, 32clocks/i) integer_at 61.188M i/s - 62.491M times in 1.021289s (16.34ns/i, 62clocks/i) Calculating ------------------------------------- [].to_i 26.831M i/s - 72.816M times in 2.713883s (37.27ns/i, 169clocks/i) integer_at 81.331M i/s - 183.564M times in 2.256998s (12.30ns/i, 43clocks/i) Comparison: integer_at: 81331225.5 i/s [].to_i: 26831046.3 i/s - 3.03x slower ``` In this environment, `integer_at` is 3.03x faster than `[].to_i`. https://github.com/ruby/strscan/commit/8a60879b2d Co-authored-by: jinroq --- ext/strscan/extconf.rb | 1 + ext/strscan/lib/strscan/strscan.rb | 6 ++ ext/strscan/strscan.c | 130 ++++++++++++++++++++++++----- test/strscan/test_stringscanner.rb | 53 ++++++++++++ 4 files changed, 170 insertions(+), 20 deletions(-) diff --git a/ext/strscan/extconf.rb b/ext/strscan/extconf.rb index 2b4ec25be30909..4e8d851fdb5a54 100644 --- a/ext/strscan/extconf.rb +++ b/ext/strscan/extconf.rb @@ -5,6 +5,7 @@ have_func("onig_region_memsize(NULL)") have_func("rb_reg_onig_match", "ruby/re.h") have_func("rb_deprecate_constant") + have_func("rb_int_parse_cstr", "ruby.h") # RUBY_VERSION >= 2.5 have_func("rb_gc_location", "ruby.h") # RUBY_VERSION >= 2.7 have_const("RUBY_TYPED_EMBEDDABLE", "ruby.h") # RUBY_VERSION >= 3.3 create_makefile 'strscan' diff --git a/ext/strscan/lib/strscan/strscan.rb b/ext/strscan/lib/strscan/strscan.rb index 07ed102d9a8cfe..5e262f4007b497 100644 --- a/ext/strscan/lib/strscan/strscan.rb +++ b/ext/strscan/lib/strscan/strscan.rb @@ -1,6 +1,12 @@ # frozen_string_literal: true class StringScanner + unless method_defined?(:integer_at) # For JRuby + def integer_at(specifier, *to_i_args) + self[specifier]&.to_i(*to_i_args) + end + end + # :markup: markdown # # call-seq: diff --git a/ext/strscan/strscan.c b/ext/strscan/strscan.c index d35df7e43b1a5f..dede57218bd173 100644 --- a/ext/strscan/strscan.c +++ b/ext/strscan/strscan.c @@ -1689,6 +1689,38 @@ name_to_backref_number(struct re_registers *regs, VALUE regexp, const char* name rb_long2int(name_end - name), name); } +/* + * Resolve capture group index from Integer, Symbol, or String. + * Returns the resolved register index, or -1 if unmatched/out of range. + * For Symbol/String specifiers, raises IndexError if the named group + * does not exist. + */ +static long +resolve_capture_index(struct strscanner *p, VALUE specifier) +{ + const char *name; + long i; + if (! MATCHED_P(p)) return -1; + switch (TYPE(specifier)) { + case T_SYMBOL: + specifier = rb_sym2str(specifier); + /* fall through */ + case T_STRING: + RSTRING_GETMEM(specifier, name, i); + i = name_to_backref_number(&(p->regs), p->regex, name, name + i, + rb_enc_get(specifier)); + break; + default: + i = NUM2LONG(specifier); + } + if (i < 0) + i += p->regs.num_regs; + if (i < 0) return -1; + if (i >= p->regs.num_regs) return -1; + if (p->regs.beg[i] == -1) return -1; + return i; +} + /* * * :markup: markdown @@ -1763,36 +1795,93 @@ name_to_backref_number(struct re_registers *regs, VALUE regexp, const char* name static VALUE strscan_aref(VALUE self, VALUE idx) { - const char *name; struct strscanner *p; long i; GET_SCANNER(self, p); - if (! MATCHED_P(p)) return Qnil; - - switch (TYPE(idx)) { - case T_SYMBOL: - idx = rb_sym2str(idx); - /* fall through */ - case T_STRING: - RSTRING_GETMEM(idx, name, i); - i = name_to_backref_number(&(p->regs), p->regex, name, name + i, rb_enc_get(idx)); - break; - default: - i = NUM2LONG(idx); - } - - if (i < 0) - i += p->regs.num_regs; - if (i < 0) return Qnil; - if (i >= p->regs.num_regs) return Qnil; - if (p->regs.beg[i] == -1) return Qnil; + i = resolve_capture_index(p, idx); + if (i < 0) return Qnil; return extract_range(p, adjust_register_position(p, p->regs.beg[i]), adjust_register_position(p, p->regs.end[i])); } +/* + * :markup: markdown + * + * call-seq: + * integer_at(specifier, base=10) -> integer or nil + * + * Returns the captured substring at the given `specifier` as an Integer, + * following the behavior of `String#to_i(base)`. + * + * `specifier` can be an Integer (positive, negative, or zero), a Symbol, + * or a String for named capture groups. + * + * Returns `nil` if: + * - No match has been performed or the last match failed + * - The `specifier` is an Integer and is out of range + * - The group at `specifier` did not participate in the match + * + * Raises IndexError if `specifier` is a Symbol or String that does not + * correspond to a named capture group, consistent with + * `StringScanner#[]`. + * + * This is semantically equivalent to `self[specifier]&.to_i(base)` + * but avoids the allocation of a temporary String when possible. + * + * ```rb + * scanner = StringScanner.new("2024-06-15") + * scanner.scan(/(\d{4})-(\d{2})-(\d{2})/) + * scanner.integer_at(1) # => 2024 + * scanner.integer_at(1, 16) # => 8228 + * ``` + */ +static VALUE +strscan_integer_at(int argc, VALUE *argv, VALUE self) +{ + struct strscanner *p; + long i; + long beg, end, len; + const char *ptr; + VALUE rb_specifier; + VALUE rb_base; + int base = 10; + + GET_SCANNER(self, p); + rb_scan_args(argc, argv, "11", &rb_specifier, &rb_base); + if (argc > 1) + base = NUM2INT(rb_base); + i = resolve_capture_index(p, rb_specifier); + if (i < 0) + return Qnil; + + beg = adjust_register_position(p, p->regs.beg[i]); + end = adjust_register_position(p, p->regs.end[i]); + len = end - beg; + ptr = S_PBEG(p) + beg; +#ifdef HAVE_RB_INT_PARSE_CSTR + { + /* + * Ruby 2.5 or later export the rb_int_parse_cstr() symbol but + * prototype definition isn't provided. Ruby 4.1 or later + * provide prototype definition. + */ +# ifndef RB_INT_PARSE_DEFAULT + VALUE rb_int_parse_cstr(const char *str, ssize_t len, char **endp, + size_t *ndigits, int base, int flags); +# define RB_INT_PARSE_DEFAULT 0x07 +# endif + char *endp; + return rb_int_parse_cstr(ptr, len, &endp, NULL, base, + RB_INT_PARSE_DEFAULT); + } +#else + return rb_str_to_inum(rb_str_new(ptr, len), base, 0); +#endif +} + /* * :markup: markdown * :include: strscan/link_refs.txt @@ -2353,6 +2442,7 @@ Init_strscan(void) rb_define_method(StringScanner, "matched", strscan_matched, 0); rb_define_method(StringScanner, "matched_size", strscan_matched_size, 0); rb_define_method(StringScanner, "[]", strscan_aref, 1); + rb_define_method(StringScanner, "integer_at", strscan_integer_at, -1); rb_define_method(StringScanner, "pre_match", strscan_pre_match, 0); rb_define_method(StringScanner, "post_match", strscan_post_match, 0); rb_define_method(StringScanner, "size", strscan_size, 0); diff --git a/test/strscan/test_stringscanner.rb b/test/strscan/test_stringscanner.rb index 3b6223709cf6f7..96a1badb1f1087 100644 --- a/test/strscan/test_stringscanner.rb +++ b/test/strscan/test_stringscanner.rb @@ -525,6 +525,59 @@ def test_AREF end end + def assert_integer_at(s, specifier, *to_i_args) + assert_equal(s[specifier]&.to_i(*to_i_args), + s.integer_at(specifier, *to_i_args)) + end + + def test_integer_at + s = create_string_scanner("before 20260514 after") + s.skip_until(" ") + assert_equal("20260514", s.scan(/(\d{4})(\d{2})(\d{2})/)) + assert_integer_at(s, 0) # 20260514 + assert_integer_at(s, 1) # 2026 + assert_integer_at(s, 2) # 5 + assert_integer_at(s, 3) # 14 + assert_integer_at(s, 4) # nil + assert_integer_at(s, -1) # 14 + assert_integer_at(s, -2) # 5 + assert_integer_at(s, -3) # 2026 + assert_integer_at(s, -4) # 20260514 + assert_integer_at(s, -5) # nil + end + + def test_integer_at_name_string + s = create_string_scanner("before 20260514 after") + s.skip_until(" ") + assert_equal("20260514", s.scan(/(?\d{4})(?\d{2})(?\d{2})/)) + assert_integer_at(s, "y") + assert_integer_at(s, "m") + assert_integer_at(s, "d") + end + + def test_integer_at_name_symbol + s = create_string_scanner("before 20260514 after") + s.skip_until(" ") + assert_equal("20260514", s.scan(/(?\d{4})(?\d{2})(?\d{2})/)) + assert_integer_at(s, :y) + assert_integer_at(s, :m) + assert_integer_at(s, :d) + end + + def test_integer_at_base + s = create_string_scanner("before 111 after") + s.skip_until(" ") + assert_equal("111", s.scan(/\d+/)) + assert_integer_at(s, 0, 2) + end + + def test_integer_at_base_auto + s = create_string_scanner("before 0xa_f after") + s.skip_until(" ") + assert_equal("0xa_f", s.scan(/0x[\h_]+/)) + assert_integer_at(s, 0, 0) # 0xaf + end + def test_pre_match s = create_string_scanner('a b c d e') s.scan(/\w/) From 807159283eacc613fa4ceee2a9c986711d619327 Mon Sep 17 00:00:00 2001 From: Hiroshi SHIBATA Date: Mon, 1 Jun 2026 10:20:56 +0900 Subject: [PATCH 2/5] [ruby/rubygems] Vendor compact_index during install_test_deps The earlier `rake vendor:compact_index` hook into `dev:deps` and the hard-copy step in ruby-core.yml fell apart in ruby/ruby's test-bundler runner, which sets TMPDIR per process and does not invoke our rake tasks. Pull the fetch logic into `Spec::Rubygems.install_vendored_compact_index` and call it from `install_test_deps` so every test setup path - local `bin/rspec`, `bin/parallel_rspec`, GHA bundler.yml, and ruby/ruby's test-bundler - lands the files at `Path.tmp_root.join("compact_index")` exactly where the artifice already looks. The standalone rake task and its workflow hop are no longer needed. https://github.com/ruby/rubygems/commit/d8536e115e Co-Authored-By: Claude Opus 4.7 --- spec/bundler/support/rubygems_ext.rb | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/spec/bundler/support/rubygems_ext.rb b/spec/bundler/support/rubygems_ext.rb index cf639a660a04fd..7d1ca550ba9543 100644 --- a/spec/bundler/support/rubygems_ext.rb +++ b/spec/bundler/support/rubygems_ext.rb @@ -73,6 +73,33 @@ def install_test_deps require_relative "helpers" Helpers.install_dev_bundler + + install_vendored_compact_index + end + + # Vendor `rubygems/rubygems.org#lib/compact_index/` under `tmp/compact_index/` + # so the artifice can serve compact-index responses without a runtime gem + # dependency. Pinned to a reviewed commit; override with COMPACT_INDEX_REF. + def install_vendored_compact_index + target_root = Path.tmp_root.join("compact_index") + return if File.exist?(target_root.join("lib/compact_index.rb")) + + require "open-uri" + require "fileutils" + + ref = ENV["COMPACT_INDEX_REF"] || "7c68a7b39761c61a66f9299f85b889ec39afc02c" + %w[ + lib/compact_index.rb + lib/compact_index/dependency.rb + lib/compact_index/gem.rb + lib/compact_index/gem_version.rb + lib/compact_index/versions_file.rb + ].each do |path| + url = "https://raw.githubusercontent.com/rubygems/rubygems.org/#{ref}/#{path}" + target = target_root.join(path) + FileUtils.mkdir_p(File.dirname(target)) + File.write(target, URI.parse(url).open(&:read)) + end end def check_source_control_changes(success_message:, error_message:) From 4f04e6ab8aea92bd4433cf4da01a8282c8d5ec5d Mon Sep 17 00:00:00 2001 From: Hiroshi SHIBATA Date: Mon, 1 Jun 2026 10:25:38 +0900 Subject: [PATCH 3/5] [ruby/rubygems] Refresh vendored compact_index when COMPACT_INDEX_REF is set Previously the install_vendored_compact_index short-circuit only checked whether `tmp/compact_index/lib/compact_index.rb` existed, so once any ref was vendored a subsequent `COMPACT_INDEX_REF= bin/rspec ...` kept serving the stale copy. Drop the vendor tree first when the env var is explicitly set so an override always re-fetches against the requested ref. https://github.com/ruby/rubygems/commit/db5d06953f Co-Authored-By: Claude Opus 4.7 --- spec/bundler/support/rubygems_ext.rb | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/spec/bundler/support/rubygems_ext.rb b/spec/bundler/support/rubygems_ext.rb index 7d1ca550ba9543..9ff0138f2ccd03 100644 --- a/spec/bundler/support/rubygems_ext.rb +++ b/spec/bundler/support/rubygems_ext.rb @@ -79,14 +79,19 @@ def install_test_deps # Vendor `rubygems/rubygems.org#lib/compact_index/` under `tmp/compact_index/` # so the artifice can serve compact-index responses without a runtime gem - # dependency. Pinned to a reviewed commit; override with COMPACT_INDEX_REF. + # dependency. Pinned to a reviewed commit; override with COMPACT_INDEX_REF + # to refresh against another ref (the existing vendor copy is discarded). def install_vendored_compact_index target_root = Path.tmp_root.join("compact_index") - return if File.exist?(target_root.join("lib/compact_index.rb")) - - require "open-uri" require "fileutils" + if ENV["COMPACT_INDEX_REF"] + FileUtils.rm_rf(target_root) + elsif File.exist?(target_root.join("lib/compact_index.rb")) + return + end + + require "open-uri" ref = ENV["COMPACT_INDEX_REF"] || "7c68a7b39761c61a66f9299f85b889ec39afc02c" %w[ lib/compact_index.rb From e431c98e31876042a689396a0603f63802914018 Mon Sep 17 00:00:00 2001 From: Hiroshi SHIBATA Date: Mon, 1 Jun 2026 19:56:08 +0900 Subject: [PATCH 4/5] [ruby/rubygems] Lock compact_index vendoring against parallel races The vendored compact_index install ran without any coordination, so two test setups starting at once could both write into tmp/compact_index/ at the same time. The skip guard also checked a single file, which meant an interrupted download leaving only lib/compact_index.rb behind would be treated as a complete vendor tree on the next run. Take an exclusive file lock around the install and only skip the download once every expected file is present, removing the tree under the same lock when COMPACT_INDEX_REF forces a refresh. https://github.com/ruby/rubygems/commit/0451700769 Co-Authored-By: Claude Opus 4.8 (1M context) --- spec/bundler/support/rubygems_ext.rb | 36 +++++++++++++++++----------- 1 file changed, 22 insertions(+), 14 deletions(-) diff --git a/spec/bundler/support/rubygems_ext.rb b/spec/bundler/support/rubygems_ext.rb index 9ff0138f2ccd03..8e3d84212d31fd 100644 --- a/spec/bundler/support/rubygems_ext.rb +++ b/spec/bundler/support/rubygems_ext.rb @@ -84,26 +84,34 @@ def install_test_deps def install_vendored_compact_index target_root = Path.tmp_root.join("compact_index") require "fileutils" + FileUtils.mkdir_p(Path.tmp_root) - if ENV["COMPACT_INDEX_REF"] - FileUtils.rm_rf(target_root) - elsif File.exist?(target_root.join("lib/compact_index.rb")) - return - end - - require "open-uri" - ref = ENV["COMPACT_INDEX_REF"] || "7c68a7b39761c61a66f9299f85b889ec39afc02c" - %w[ + files = %w[ lib/compact_index.rb lib/compact_index/dependency.rb lib/compact_index/gem.rb lib/compact_index/gem_version.rb lib/compact_index/versions_file.rb - ].each do |path| - url = "https://raw.githubusercontent.com/rubygems/rubygems.org/#{ref}/#{path}" - target = target_root.join(path) - FileUtils.mkdir_p(File.dirname(target)) - File.write(target, URI.parse(url).open(&:read)) + ] + + # Serialize installs so parallel test setups don't race on the same + # vendor tree, and only skip the download when every file is present so + # an interrupted run can't leave a partial copy behind. + File.open(Path.tmp_root.join("compact_index.lock"), File::CREAT | File::RDWR) do |lock| + lock.flock(File::LOCK_EX) + + FileUtils.rm_rf(target_root) if ENV["COMPACT_INDEX_REF"] + + next if files.all? {|path| File.exist?(target_root.join(path)) } + + require "open-uri" + ref = ENV["COMPACT_INDEX_REF"] || "7c68a7b39761c61a66f9299f85b889ec39afc02c" + files.each do |path| + url = "https://raw.githubusercontent.com/rubygems/rubygems.org/#{ref}/#{path}" + target = target_root.join(path) + FileUtils.mkdir_p(File.dirname(target)) + File.write(target, URI.parse(url).open(&:read)) + end end end From 344b5eaa67f90671ce0d18c01f0b83c65a3f97d5 Mon Sep 17 00:00:00 2001 From: Peter Zhu Date: Mon, 1 Jun 2026 15:15:09 +0900 Subject: [PATCH 5/5] Wrap functions in USE_MODULAR_GC rb_gc_modular_gc_loaded_p and rb_gc_active_gc_name are only used when compiling with modular GC enabled. --- gc.c | 6 ++---- internal/gc.h | 2 ++ 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/gc.c b/gc.c index cde1b44d05b115..f0ec0f79efe692 100644 --- a/gc.c +++ b/gc.c @@ -3650,14 +3650,11 @@ rb_gc_copy_attributes(VALUE dest, VALUE obj) rb_gc_impl_copy_attributes(rb_gc_get_objspace(), dest, obj); } +#if USE_MODULAR_GC int rb_gc_modular_gc_loaded_p(void) { -#if USE_MODULAR_GC return rb_gc_functions.modular_gc_loaded_p; -#else - return false; -#endif } const char * @@ -3673,6 +3670,7 @@ rb_gc_active_gc_name(void) return gc_name; } +#endif struct rb_gc_object_metadata_entry * rb_gc_object_metadata(VALUE obj) diff --git a/internal/gc.h b/internal/gc.h index 41675810c722c4..ee2a0c28050a8a 100644 --- a/internal/gc.h +++ b/internal/gc.h @@ -257,8 +257,10 @@ void rb_gc_update_values(long n, VALUE *values); void rb_gc_mark_set_no_pin(st_table *); void rb_gc_update_set_refs(st_table *); +#if USE_MODULAR_GC const char *rb_gc_active_gc_name(void); int rb_gc_modular_gc_loaded_p(void); +#endif RUBY_SYMBOL_EXPORT_END