diff --git a/benchmark/string_codepoints.yml b/benchmark/string_codepoints.yml
new file mode 100644
index 00000000000000..6a07db7ce1fc82
--- /dev/null
+++ b/benchmark/string_codepoints.yml
@@ -0,0 +1,9 @@
+prelude: |
+ mixed_ascii64 = ("a" * 63 + "\u{100}") * 2048
+ mixed_ascii256 = ("a" * 255 + "\u{100}") * 512
+ utf8_2byte = "\u{100}" * 65536
+
+benchmark:
+ codepoints_mixed_ascii64: mixed_ascii64.codepoints
+ codepoints_mixed_ascii256: mixed_ascii256.codepoints
+ codepoints_utf8_2byte: utf8_2byte.codepoints
diff --git a/hash.c b/hash.c
index fbad278bee8170..7fae2f284def65 100644
--- a/hash.c
+++ b/hash.c
@@ -5109,6 +5109,14 @@ rb_hash_bulk_insert(long argc, const VALUE *argv, VALUE hash)
}
}
+VALUE
+rb_hash_new_with_bulk_insert(long argc, const VALUE *argv)
+{
+ VALUE val = rb_hash_new_with_size(argc / 2);
+ rb_hash_bulk_insert(argc, argv, val);
+ return val;
+}
+
static char **origenviron;
#ifdef _WIN32
#define GET_ENVIRON(e) ((e) = rb_w32_get_environ())
diff --git a/insns.def b/insns.def
index e32caef2dc8c66..26a44299db8064 100644
--- a/insns.def
+++ b/insns.def
@@ -429,9 +429,7 @@ toregexp
// attr bool leaf = false;
// attr rb_snum_t sp_inc = 1 - (rb_snum_t)cnt;
{
- const VALUE ary = rb_ary_tmp_new_from_values(0, cnt, STACK_ADDR_FROM_TOP(cnt));
- val = rb_reg_new_ary(ary, (int)opt);
- rb_ary_clear(ary);
+ val = rb_reg_new_from_values(cnt, STACK_ADDR_FROM_TOP(cnt), (int)opt);
}
/* intern str to Symbol and push it. */
@@ -591,8 +589,7 @@ newhash
RUBY_DTRACE_CREATE_HOOK(HASH, num);
if (num) {
- val = rb_hash_new_with_size(num / 2);
- rb_hash_bulk_insert(num, STACK_ADDR_FROM_TOP(num), val);
+ val = rb_hash_new_with_bulk_insert(num, STACK_ADDR_FROM_TOP(num));
}
else {
val = rb_hash_new();
diff --git a/internal/hash.h b/internal/hash.h
index 6671cd496d173f..baf5af9abd09e3 100644
--- a/internal/hash.h
+++ b/internal/hash.h
@@ -112,6 +112,7 @@ int rb_hash_stlike_foreach(VALUE hash, st_foreach_callback_func *func, st_data_t
RUBY_SYMBOL_EXPORT_END
VALUE rb_hash_new_with_size(st_index_t size);
+VALUE rb_hash_new_with_bulk_insert(long argc, const VALUE *argv);
VALUE rb_hash_resurrect(VALUE hash);
int rb_hash_stlike_lookup(VALUE hash, st_data_t key, st_data_t *pval);
VALUE rb_hash_keys(VALUE hash);
diff --git a/internal/re.h b/internal/re.h
index da165e4756969d..3ad364a1a69a1f 100644
--- a/internal/re.h
+++ b/internal/re.h
@@ -69,6 +69,7 @@ VALUE rb_backref_set_string(VALUE string, long pos, long len);
void rb_match_unbusy(VALUE);
int rb_match_count(VALUE match);
VALUE rb_reg_new_ary(VALUE ary, int options);
+VALUE rb_reg_new_from_values(long cnt, const VALUE *elements, int opt);
VALUE rb_reg_last_defined(VALUE match);
#define ARG_REG_OPTION_MASK \
diff --git a/pathname_builtin.rb b/pathname_builtin.rb
index 63426812204082..ac436d49469273 100644
--- a/pathname_builtin.rb
+++ b/pathname_builtin.rb
@@ -1463,9 +1463,31 @@ def blockdev?() FileTest.blockdev?(@path) end
# The returned value is OS-dependent; on Windows, almost always `false`.
def chardev?() FileTest.chardev?(@path) end
- # Tests the file is empty.
+ # :markup: markdown
+ #
+ # call-seq:
+ # empty? -> true or false
+ #
+ # Returns whether the entry represented by `self` exists and is empty:
+ #
+ # ```ruby
+ # dir_pn = Pathname('example_dir')
+ # dir_pn.empty? # => false # Dir does not exist.
+ # dir_pn.mkdir
+ # dir_pn.empty? # => true # Dir exists and is empty.
+ #
+ # file_pn = Pathname('example_dir/example.txt')
+ # file_pn.empty? # => false # File does not exist.
+ # file_pn.write('')
+ # file_pn.empty? # => true # File exists and is empty.
+ # dir_pn.empty? # => false # Dir exists and is not empty.
+ # file_pn.write('foo')
+ # file_pn.empty? # => false # File exists and is not empty.
+ #
+ # file_pn.delete
+ # dir_pn.delete
+ # ```
#
- # See Dir#empty? and FileTest.empty?.
def empty?
if FileTest.directory?(@path)
Dir.empty?(@path)
@@ -1474,13 +1496,53 @@ def empty?
end
end
- # See FileTest.executable?.
+ # :markup: markdown
+ #
+ # call-seq:
+ # executable? -> true or false
+ #
+ # Returns whether the entry represented by `self` is executable;
+ # calls FileTest.executable? with argument `self.to_s`:
+ #
+ # ```ruby
+ # Pathname('bin/gem').executable? # => true
+ # Pathname('README.md').executable? # => false
+ # ```
+ #
def executable?() FileTest.executable?(@path) end
- # See FileTest.executable_real?.
+ # :markup: markdown
+ #
+ # call-seq:
+ # executable_real? -> true or false
+ #
+ # Returns whether the entry represented by `self` is executable
+ # by the real user and group id of the current process;
+ # calls FileTest.executable_real? with argument `self.to_s`:
+ #
+ # ```ruby
+ # pn = Pathname('example')
+ # pn.write('')
+ # pn.executable_real? # => false
+ # pn.chmod(0100)
+ # pn.executable_real? # => true
+ # ```
+ #
def executable_real?() FileTest.executable_real?(@path) end
- # See FileTest.exist?.
+ # :markup: markdown
+ #
+ # call-seq:
+ # exist? -> true or false
+ #
+ # Returns whether the entry represented by `self` exists:
+ #
+ # ```ruby
+ # Pathname('.').exist? # => true
+ # Pathname('README.md').exist? # => true
+ # Pathname('nosuch').exist? # => false
+ # ```
+ #
def exist?() FileTest.exist?(@path) end
# See FileTest.grpowned?.
diff --git a/re.c b/re.c
index b778fa08f331e6..ec337cd21cf2f6 100644
--- a/re.c
+++ b/re.c
@@ -3528,6 +3528,15 @@ rb_reg_new_ary(VALUE ary, int opt)
return rb_reg_new_str(rb_reg_preprocess_dregexp(ary, opt), opt);
}
+VALUE
+rb_reg_new_from_values(long cnt, const VALUE *elements, int opt)
+{
+ const VALUE ary = rb_ary_tmp_new_from_values(0, cnt, elements);
+ VALUE val = rb_reg_new_ary(ary, (int)opt);
+ rb_ary_clear(ary);
+ return val;
+}
+
VALUE
rb_enc_reg_new(const char *s, long len, rb_encoding *enc, int options)
{
diff --git a/string.c b/string.c
index eb249662db74eb..a59340adfd7842 100644
--- a/string.c
+++ b/string.c
@@ -9772,6 +9772,7 @@ rb_str_enumerate_codepoints(VALUE str, VALUE ary)
unsigned int c;
const char *ptr, *end;
rb_encoding *enc;
+ int enc_asciicompat;
if (single_byte_optimizable(str))
return rb_str_enumerate_bytes(str, ary);
@@ -9780,9 +9781,15 @@ rb_str_enumerate_codepoints(VALUE str, VALUE ary)
ptr = RSTRING_PTR(str);
end = RSTRING_END(str);
enc = STR_ENC_GET(str);
+ enc_asciicompat = rb_enc_asciicompat(enc);
while (ptr < end) {
- c = rb_enc_codepoint_len(ptr, end, &n, enc);
+ /* Fast path: ASCII byte in an ASCII-compatible encoding is its own codepoint;
+ * skip rb_enc_codepoint_len and return the byte directly.
+ */
+ n = 1;
+ c = (enc_asciicompat && ISASCII(*ptr)) ?
+ (unsigned char)*ptr : rb_enc_codepoint_len(ptr, end, &n, enc);
ENUM_ELEM(ary, UINT2NUM(c));
ptr += n;
}
diff --git a/zjit/bindgen/src/main.rs b/zjit/bindgen/src/main.rs
index 573eb37a72f59d..2cde74facd66c1 100644
--- a/zjit/bindgen/src/main.rs
+++ b/zjit/bindgen/src/main.rs
@@ -123,6 +123,7 @@ fn main() {
.allowlist_function("rb_hash_aset")
.allowlist_function("rb_hash_aref")
.allowlist_function("rb_hash_bulk_insert")
+ .allowlist_function("rb_hash_new_with_bulk_insert")
.allowlist_function("rb_hash_stlike_lookup")
.allowlist_function("rb_ary_new_capa")
.allowlist_function("rb_ary_store")
@@ -215,6 +216,7 @@ fn main() {
.allowlist_function("rb_reg_match_last")
.allowlist_function("rb_reg_nth_match")
.allowlist_function("rb_reg_new_ary")
+ .allowlist_function("rb_reg_new_from_values")
.allowlist_var("ARG_ENCODING_FIXED")
.allowlist_var("ARG_ENCODING_NONE")
.allowlist_var("ONIG_OPTION_IGNORECASE")
diff --git a/zjit/src/codegen.rs b/zjit/src/codegen.rs
index f1ef17d794e7af..4eee7693150c20 100644
--- a/zjit/src/codegen.rs
+++ b/zjit/src/codegen.rs
@@ -2111,19 +2111,17 @@ fn gen_new_hash(
elements: Vec,
state: &FrameState,
) -> lir::Opnd {
- gen_prepare_non_leaf_call(jit, asm, state);
-
- let cap: c_long = elements.len().try_into().expect("Unable to fit length of elements into c_long");
- let new_hash = asm_ccall!(asm, rb_hash_new_with_size, lir::Opnd::Imm(cap));
+ if elements.is_empty() {
+ gen_prepare_leaf_call_with_gc(asm, state);
+ asm_ccall!(asm, rb_hash_new,)
+ } else {
+ gen_prepare_non_leaf_call(jit, asm, state);
- if !elements.is_empty() {
let argv = gen_push_opnds(asm, &elements);
- asm_ccall!(asm, rb_hash_bulk_insert, elements.len().into(), argv, new_hash);
-
+ let hash = asm_ccall!(asm, rb_hash_new_with_bulk_insert, elements.len().into(), argv);
gen_pop_opnds(asm, &elements);
+ hash
}
-
- new_hash
}
/// Compile a new range instruction
@@ -3401,11 +3399,7 @@ fn gen_toregexp(jit: &mut JITState, asm: &mut Assembler, opt: usize, values: Vec
gen_prepare_non_leaf_call(jit, asm, state);
let first_opnd_ptr = gen_push_opnds(asm, &values);
-
- let tmp_ary = asm_ccall!(asm, rb_ary_tmp_new_from_values, Opnd::Imm(0), values.len().into(), first_opnd_ptr);
- let result = asm_ccall!(asm, rb_reg_new_ary, tmp_ary, opt.into());
- asm_ccall!(asm, rb_ary_clear, tmp_ary);
-
+ let result = asm_ccall!(asm, rb_reg_new_from_values, values.len().into(), first_opnd_ptr, opt.into());
gen_pop_opnds(asm, &values);
result
diff --git a/zjit/src/cruby_bindings.inc.rs b/zjit/src/cruby_bindings.inc.rs
index c61e61edd1bdec..5a7c3de606c5f1 100644
--- a/zjit/src/cruby_bindings.inc.rs
+++ b/zjit/src/cruby_bindings.inc.rs
@@ -2060,6 +2060,11 @@ unsafe extern "C" {
pub fn rb_class_allocate_instance(klass: VALUE) -> VALUE;
pub fn rb_obj_equal(obj1: VALUE, obj2: VALUE) -> VALUE;
pub fn rb_reg_new_ary(ary: VALUE, options: ::std::os::raw::c_int) -> VALUE;
+ pub fn rb_reg_new_from_values(
+ cnt: ::std::os::raw::c_long,
+ elements: *const VALUE,
+ opt: ::std::os::raw::c_int,
+ ) -> VALUE;
pub fn rb_ary_tmp_new_from_values(
arg1: VALUE,
arg2: ::std::os::raw::c_long,
@@ -2132,6 +2137,7 @@ unsafe extern "C" {
arg: st_data_t,
) -> ::std::os::raw::c_int;
pub fn rb_hash_new_with_size(size: st_index_t) -> VALUE;
+ pub fn rb_hash_new_with_bulk_insert(argc: ::std::os::raw::c_long, argv: *const VALUE) -> VALUE;
pub fn rb_hash_resurrect(hash: VALUE) -> VALUE;
pub fn rb_hash_stlike_lookup(
hash: VALUE,