From 8cefe782dcb0ca3969519b74036acd5359ae09c4 Mon Sep 17 00:00:00 2001 From: pshu Date: Wed, 27 May 2026 01:56:31 +0800 Subject: [PATCH] perf: byte-level path handling on Unix to bypass Components iterator On Unix, an OsStr is raw bytes and '/', '.', '..' are always single-byte ASCII, so the heavy std::path::Components state machine (Windows-prefix detection, Component enum construction, double-ended-iter bookkeeping) is unnecessary for the resolver's hot paths. Changes: - src/path.rs: unix_normalize and unix_normalize_with replace the Components-based PathUtil::normalize / normalize_with on Unix. - src/cache.rs: byte-level parent_path replaces Path::parent in Cache::value; join_last_segment replaces Path::strip_prefix + normalize_with in realpath_uncached, since parent.path is already a strict byte prefix of self.path. Note: an earlier draft of this PR also rewrote require_without_parse to dispatch on the specifier head via a byte table. That hunk was dropped (see #246) because the maintenance cost of keeping a parallel impl of Path::components in sync with std outweighed the small isolated win. The remaining changes here cover the dominant Ir cost paths. 138/138 non-PNP unit tests pass. The 6 PNP tests already fail on main without these changes. --- src/cache.rs | 95 +++++++++++++++++++++-- src/path.rs | 213 ++++++++++++++++++++++++++++++++++++++++----------- 2 files changed, 259 insertions(+), 49 deletions(-) diff --git a/src/cache.rs b/src/cache.rs index 5a90a1e0..048439f9 100644 --- a/src/cache.rs +++ b/src/cache.rs @@ -14,10 +14,11 @@ use futures::future::BoxFuture; use rustc_hash::FxHasher; use tokio::sync::OnceCell as OnceLock; +#[cfg(not(unix))] +use crate::path::PathUtil; use crate::{ context::ResolveContext as Ctx, package_json::{off_to_location, PackageJson}, - path::PathUtil, resolver_path::{hash_path, ResolverPath}, FileMetadata, FileSystem, JSONError, ResolveError, ResolveOptions, TsConfig, }; @@ -48,7 +49,12 @@ impl Cache { if let Some(cache_entry) = self.paths.get((hash, path).borrow() as &dyn CacheKey) { return cache_entry.clone(); } - let parent = path.parent().map(|p| self.value(p)); + // Why: Cache::value is the recursive parent-walk root. `Path::parent` goes + // through `Components::next_back` / `parse_next_component_back`, which + // callgrind showed as the single largest non-allocator non-simd-json + // hotspot. On Unix the separator is always single-byte ASCII, so an + // `rposition(/)` over raw `OsStr` bytes is equivalent and far cheaper. + let parent = parent_path(path).map(|p| self.value(p)); let data = CachedPath(Arc::new(CachedPathImpl::new( hash, path.to_path_buf().into_boxed_path(), @@ -247,9 +253,16 @@ impl CachedPathImpl { } if let Some(parent) = self.parent() { let parent_path = parent.realpath(fs).await?; - return Ok(Some( - parent_path.normalize_with(self.path.strip_prefix(&parent.path).unwrap()), - )); + // Why: parent's `path` is a strict byte prefix of `self.path` + // (parents are produced by the byte-level `parent_path`), so + // `strip_prefix` is the path between them. Skipping + // `Path::strip_prefix` + `normalize_with` avoids another + // `Components` walk per realpath miss. + return Ok(Some(join_last_segment( + &parent_path, + &self.path, + &parent.path, + ))); } Ok(None) }) @@ -416,6 +429,78 @@ impl CachedPathImpl { } } +/// Join `base` with the last segment of `child`, where `child_parent` is the +/// `parent_path()` of `child` (i.e. a strict byte prefix of `child`). Used by +/// `realpath_uncached` to avoid walking `Path::strip_prefix` + `normalize_with` +/// when we already know the suffix is a single normal segment. +#[cfg(unix)] +fn join_last_segment(base: &Path, child: &Path, child_parent: &Path) -> PathBuf { + use std::{ + ffi::OsString, + os::unix::ffi::{OsStrExt, OsStringExt}, + }; + + let child_bytes = child.as_os_str().as_bytes(); + let parent_len = child_parent.as_os_str().len(); + + // Skip the `/` between parent and the trailing segment when applicable. + let suffix_start = if parent_len < child_bytes.len() && child_bytes[parent_len] == b'/' { + parent_len + 1 + } else { + parent_len + }; + let suffix = &child_bytes[suffix_start..]; + + let base_bytes = base.as_os_str().as_bytes(); + let mut out = Vec::with_capacity(base_bytes.len() + 1 + suffix.len()); + out.extend_from_slice(base_bytes); + + if !suffix.is_empty() { + if !out.is_empty() && *out.last().unwrap() != b'/' { + out.push(b'/'); + } + out.extend_from_slice(suffix); + } + + PathBuf::from(OsString::from_vec(out)) +} + +#[cfg(not(unix))] +fn join_last_segment(base: &Path, child: &Path, child_parent: &Path) -> PathBuf { + use crate::path::PathUtil; + base.normalize_with(child.strip_prefix(child_parent).unwrap()) +} + +/// Byte-level parent lookup for Unix. See `Cache::value` for why. +#[cfg(unix)] +fn parent_path(path: &Path) -> Option<&Path> { + use std::os::unix::ffi::OsStrExt; + let bytes = path.as_os_str().as_bytes(); + // Trim a trailing `/` that isn't itself the root, mirroring std's + // `Components` ignoring redundant separators. + let trimmed_len = match bytes { + [.., b'/'] if bytes.len() > 1 => bytes.len() - 1, + _ => bytes.len(), + }; + let trimmed = &bytes[..trimmed_len]; + let last_slash = trimmed.iter().rposition(|&b| b == b'/')?; + if last_slash == 0 { + // Parent is the root "/". + if bytes.len() == 1 { + // Path was "/", no parent. + return None; + } + return Some(Path::new(std::ffi::OsStr::from_bytes(&bytes[..1]))); + } + Some(Path::new(std::ffi::OsStr::from_bytes(&bytes[..last_slash]))) +} + +#[cfg(not(unix))] +#[inline] +fn parent_path(path: &Path) -> Option<&Path> { + path.parent() +} + /// Memoized cache key, code adapted from . trait CacheKey { fn tuple(&self) -> (u64, &Path); diff --git a/src/path.rs b/src/path.rs index 5034533d..3e26728c 100644 --- a/src/path.rs +++ b/src/path.rs @@ -35,65 +35,89 @@ pub trait PathUtil { impl PathUtil for Path { // https://github.com/parcel-bundler/parcel/blob/e0b99c2a42e9109a9ecbd6f537844a1b33e7faf5/packages/utils/node-resolver-rs/src/path.rs#L7 fn normalize(&self) -> PathBuf { - let mut components = self.components().peekable(); - let mut ret = if let Some(c @ Component::Prefix(..)) = components.peek() { - let buf = PathBuf::from(c.as_os_str()); - components.next(); - buf - } else { - PathBuf::new() - }; - - for component in components { - match component { - Component::Prefix(..) => unreachable!("Path {:?}", self), - Component::RootDir => { - ret.push(component.as_os_str()); - } - Component::CurDir => {} - Component::ParentDir => { - ret.pop(); - } - Component::Normal(c) => { - ret.push(c); + // Why: On Unix, an `OsStr` is raw bytes and `/`, `.` are always single-byte ASCII + // regardless of UTF-8 content in segments. Iterating bytes directly skips + // the heavy `Components` state machine (`parse_next_component_back`, + // `Component::PartialEq`, double-ended iter bookkeeping) that dominated + // ~3% of the resolver's instructions in callgrind. + #[cfg(unix)] + { + unix_normalize(self) + } + #[cfg(not(unix))] + { + let mut components = self.components().peekable(); + let mut ret = if let Some(c @ Component::Prefix(..)) = components.peek() { + let buf = PathBuf::from(c.as_os_str()); + components.next(); + buf + } else { + PathBuf::new() + }; + + for component in components { + match component { + Component::Prefix(..) => unreachable!("Path {:?}", self), + Component::RootDir => { + ret.push(component.as_os_str()); + } + Component::CurDir => {} + Component::ParentDir => { + ret.pop(); + } + Component::Normal(c) => { + ret.push(c); + } } } - } - ret + ret + } } // https://github.com/parcel-bundler/parcel/blob/e0b99c2a42e9109a9ecbd6f537844a1b33e7faf5/packages/utils/node-resolver-rs/src/path.rs#L37 fn normalize_with>(&self, subpath: B) -> PathBuf { let subpath = subpath.as_ref(); - let mut components = subpath.components(); + // Why: callgrind showed `Components::next` + `parse_next_component_back` + + // `Component::PartialEq` totalling ~5% of Ir, almost all driven from + // `normalize_with` calls in the resolver hot path. On Unix the separator + // and `.`/`..` markers are guaranteed single-byte ASCII, so a byte-level + // pass produces identical output without the iterator overhead. + #[cfg(unix)] + { + unix_normalize_with(self, subpath) + } + #[cfg(not(unix))] + { + let mut components = subpath.components(); - let Some(head) = components.next() else { - return subpath.to_path_buf(); - }; + let Some(head) = components.next() else { + return subpath.to_path_buf(); + }; - if matches!(head, Component::Prefix(..) | Component::RootDir) { - return subpath.to_path_buf(); - } + if matches!(head, Component::Prefix(..) | Component::RootDir) { + return subpath.to_path_buf(); + } - let mut ret = self.to_path_buf(); - for component in std::iter::once(head).chain(components) { - match component { - Component::CurDir => {} - Component::ParentDir => { - ret.pop(); - } - Component::Normal(c) => { - ret.push(c); - } - Component::Prefix(..) | Component::RootDir => { - unreachable!("Path {:?} Subpath {:?}", self, subpath) + let mut ret = self.to_path_buf(); + for component in std::iter::once(head).chain(components) { + match component { + Component::CurDir => {} + Component::ParentDir => { + ret.pop(); + } + Component::Normal(c) => { + ret.push(c); + } + Component::Prefix(..) | Component::RootDir => { + unreachable!("Path {:?} Subpath {:?}", self, subpath) + } } } - } - ret + ret + } } fn is_invalid_exports_target(&self) -> bool { @@ -106,6 +130,107 @@ impl PathUtil for Path { } } +/// Byte-level `normalize` for Unix. See [`PathUtil::normalize`] for why. +#[cfg(unix)] +fn unix_normalize(path: &Path) -> PathBuf { + use std::{ + ffi::OsString, + os::unix::ffi::{OsStrExt, OsStringExt}, + }; + + let bytes = path.as_os_str().as_bytes(); + let leading_slash = bytes.first() == Some(&b'/'); + + // Worst-case capacity: original length + a trailing slash placeholder. + let mut out: Vec = Vec::with_capacity(bytes.len()); + if leading_slash { + out.push(b'/'); + } + + // Track segment offsets we've written into `out` so `..` can pop in O(1) + // instead of rescanning `out` byte-by-byte. + let mut starts: Vec = Vec::new(); + + for seg in bytes.split(|&b| b == b'/') { + match seg { + b"" | b"." => {} + b".." => { + if let Some(start) = starts.pop() { + // Trim trailing `/` left over from a previous segment. + out.truncate(start.saturating_sub(usize::from(start > usize::from(leading_slash)))); + } + } + normal => { + // Insert a separator before every segment except the very first one + // when there is no leading slash. + if out.len() > usize::from(leading_slash) { + out.push(b'/'); + } + starts.push(out.len()); + out.extend_from_slice(normal); + } + } + } + + if out.is_empty() { + return PathBuf::new(); + } + + PathBuf::from(OsString::from_vec(out)) +} + +/// Byte-level `normalize_with` for Unix. See [`PathUtil::normalize_with`] for why. +#[cfg(unix)] +fn unix_normalize_with(base: &Path, subpath: &Path) -> PathBuf { + use std::{ + ffi::OsString, + os::unix::ffi::{OsStrExt, OsStringExt}, + }; + + let sub_bytes = subpath.as_os_str().as_bytes(); + + if sub_bytes.is_empty() { + return subpath.to_path_buf(); + } + + // Absolute subpath short-circuits to subpath, matching the std behavior of + // `PathBuf::push` and the original Components-based implementation. + if sub_bytes[0] == b'/' { + return subpath.to_path_buf(); + } + + let base_bytes = base.as_os_str().as_bytes(); + let mut out: Vec = Vec::with_capacity(base_bytes.len() + 1 + sub_bytes.len()); + out.extend_from_slice(base_bytes); + + for seg in sub_bytes.split(|&b| b == b'/') { + match seg { + b"" | b"." => {} + b".." => { + // Pop the trailing segment from `out` without rescanning whole bytes + // ahead of time: `rposition` walks from the end. + if let Some(slash) = out.iter().rposition(|&b| b == b'/') { + if slash == 0 { + out.truncate(1); + } else { + out.truncate(slash); + } + } else { + out.clear(); + } + } + normal => { + if !out.is_empty() && *out.last().unwrap() != b'/' { + out.push(b'/'); + } + out.extend_from_slice(normal); + } + } + } + + PathBuf::from(OsString::from_vec(out)) +} + // https://github.com/webpack/enhanced-resolve/blob/main/test/path.test.js #[tokio::test] async fn is_invalid_exports_target() {