Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
95 changes: 90 additions & 5 deletions src/cache.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,11 @@
use rustc_hash::FxHasher;
use tokio::sync::OnceCell as OnceLock;

#[cfg(not(unix))]
use crate::path::PathUtil;

Check failure on line 18 in src/cache.rs

View workflow job for this annotation

GitHub Actions / Test wasi target

unused import: `crate::path::PathUtil`

Check failure on line 18 in src/cache.rs

View workflow job for this annotation

GitHub Actions / Check Wasm

unused import: `crate::path::PathUtil`

Check failure on line 18 in src/cache.rs

View workflow job for this annotation

GitHub Actions / Check and Build (windows-latest)

unused import: `crate::path::PathUtil`

Check failure on line 18 in src/cache.rs

View workflow job for this annotation

GitHub Actions / Test (windows-latest, 22)

unused import: `crate::path::PathUtil`

Check failure on line 18 in src/cache.rs

View workflow job for this annotation

GitHub Actions / Test (windows-latest, 20)

unused import: `crate::path::PathUtil`
use crate::{
context::ResolveContext as Ctx,
package_json::{off_to_location, PackageJson},
path::PathUtil,
resolver_path::{hash_path, ResolverPath},
FileMetadata, FileSystem, JSONError, ResolveError, ResolveOptions, TsConfig,
};
Expand Down Expand Up @@ -48,7 +49,12 @@
if let Some(cache_entry) = self.paths.get((hash, path).borrow() as &dyn CacheKey) {
return cache_entry.clone();
}
let parent = path.parent().map(|p| self.value(p));
// Why: Cache::value is the recursive parent-walk root. `Path::parent` goes
// through `Components::next_back` / `parse_next_component_back`, which
// callgrind showed as the single largest non-allocator non-simd-json
// hotspot. On Unix the separator is always single-byte ASCII, so an
// `rposition(/)` over raw `OsStr` bytes is equivalent and far cheaper.
let parent = parent_path(path).map(|p| self.value(p));
let data = CachedPath(Arc::new(CachedPathImpl::new(
hash,
path.to_path_buf().into_boxed_path(),
Expand Down Expand Up @@ -261,9 +267,16 @@
}
if let Some(parent) = self.parent() {
let parent_path = parent.realpath(fs).await?;
return Ok(Some(
parent_path.normalize_with(self.path.strip_prefix(&parent.path).unwrap()),
));
// Why: parent's `path` is a strict byte prefix of `self.path`
// (parents are produced by the byte-level `parent_path`), so
// `strip_prefix` is the path between them. Skipping
// `Path::strip_prefix` + `normalize_with` avoids another
// `Components` walk per realpath miss.
return Ok(Some(join_last_segment(
&parent_path,
&self.path,
&parent.path,
)));
}
Ok(None)
})
Expand Down Expand Up @@ -430,6 +443,78 @@
}
}

/// Join `base` with the last segment of `child`, where `child_parent` is the
/// `parent_path()` of `child` (i.e. a strict byte prefix of `child`). Used by
/// `realpath_uncached` to avoid walking `Path::strip_prefix` + `normalize_with`
/// when we already know the suffix is a single normal segment.
#[cfg(unix)]
fn join_last_segment(base: &Path, child: &Path, child_parent: &Path) -> PathBuf {
use std::{
ffi::OsString,
os::unix::ffi::{OsStrExt, OsStringExt},
};

let child_bytes = child.as_os_str().as_bytes();
let parent_len = child_parent.as_os_str().len();

// Skip the `/` between parent and the trailing segment when applicable.
let suffix_start = if parent_len < child_bytes.len() && child_bytes[parent_len] == b'/' {
parent_len + 1
} else {
parent_len
};
let suffix = &child_bytes[suffix_start..];

let base_bytes = base.as_os_str().as_bytes();
let mut out = Vec::with_capacity(base_bytes.len() + 1 + suffix.len());
out.extend_from_slice(base_bytes);

if !suffix.is_empty() {
if !out.is_empty() && *out.last().unwrap() != b'/' {
out.push(b'/');
}
out.extend_from_slice(suffix);
}

PathBuf::from(OsString::from_vec(out))
}

#[cfg(not(unix))]
fn join_last_segment(base: &Path, child: &Path, child_parent: &Path) -> PathBuf {
use crate::path::PathUtil;
base.normalize_with(child.strip_prefix(child_parent).unwrap())
}

/// Byte-level parent lookup for Unix. See `Cache::value` for why.
#[cfg(unix)]
fn parent_path(path: &Path) -> Option<&Path> {
use std::os::unix::ffi::OsStrExt;
let bytes = path.as_os_str().as_bytes();
// Trim a trailing `/` that isn't itself the root, mirroring std's
// `Components` ignoring redundant separators.
let trimmed_len = match bytes {
[.., b'/'] if bytes.len() > 1 => bytes.len() - 1,
_ => bytes.len(),
};
let trimmed = &bytes[..trimmed_len];
let last_slash = trimmed.iter().rposition(|&b| b == b'/')?;
if last_slash == 0 {
// Parent is the root "/".
if bytes.len() == 1 {
// Path was "/", no parent.
return None;
}
return Some(Path::new(std::ffi::OsStr::from_bytes(&bytes[..1])));
}
Some(Path::new(std::ffi::OsStr::from_bytes(&bytes[..last_slash])))
}

#[cfg(not(unix))]
#[inline]
fn parent_path(path: &Path) -> Option<&Path> {
path.parent()
}

/// Memoized cache key, code adapted from <https://stackoverflow.com/a/50478038>.
trait CacheKey {
fn tuple(&self) -> (u64, &Path);
Expand Down
213 changes: 169 additions & 44 deletions src/path.rs
Original file line number Diff line number Diff line change
Expand Up @@ -35,65 +35,89 @@ pub trait PathUtil {
impl PathUtil for Path {
// https://github.com/parcel-bundler/parcel/blob/e0b99c2a42e9109a9ecbd6f537844a1b33e7faf5/packages/utils/node-resolver-rs/src/path.rs#L7
fn normalize(&self) -> PathBuf {
let mut components = self.components().peekable();
let mut ret = if let Some(c @ Component::Prefix(..)) = components.peek() {
let buf = PathBuf::from(c.as_os_str());
components.next();
buf
} else {
PathBuf::new()
};

for component in components {
match component {
Component::Prefix(..) => unreachable!("Path {:?}", self),
Component::RootDir => {
ret.push(component.as_os_str());
}
Component::CurDir => {}
Component::ParentDir => {
ret.pop();
}
Component::Normal(c) => {
ret.push(c);
// Why: On Unix, an `OsStr` is raw bytes and `/`, `.` are always single-byte ASCII
// regardless of UTF-8 content in segments. Iterating bytes directly skips
// the heavy `Components` state machine (`parse_next_component_back`,
// `Component::PartialEq`, double-ended iter bookkeeping) that dominated
// ~3% of the resolver's instructions in callgrind.
#[cfg(unix)]
{
unix_normalize(self)
}
#[cfg(not(unix))]
{
let mut components = self.components().peekable();
let mut ret = if let Some(c @ Component::Prefix(..)) = components.peek() {
let buf = PathBuf::from(c.as_os_str());
components.next();
buf
} else {
PathBuf::new()
};

for component in components {
match component {
Component::Prefix(..) => unreachable!("Path {:?}", self),
Component::RootDir => {
ret.push(component.as_os_str());
}
Component::CurDir => {}
Component::ParentDir => {
ret.pop();
}
Component::Normal(c) => {
ret.push(c);
}
}
}
}

ret
ret
}
}

// https://github.com/parcel-bundler/parcel/blob/e0b99c2a42e9109a9ecbd6f537844a1b33e7faf5/packages/utils/node-resolver-rs/src/path.rs#L37
fn normalize_with<B: AsRef<Self>>(&self, subpath: B) -> PathBuf {
let subpath = subpath.as_ref();

let mut components = subpath.components();
// Why: callgrind showed `Components::next` + `parse_next_component_back` +
// `Component::PartialEq` totalling ~5% of Ir, almost all driven from
// `normalize_with` calls in the resolver hot path. On Unix the separator
// and `.`/`..` markers are guaranteed single-byte ASCII, so a byte-level
// pass produces identical output without the iterator overhead.
#[cfg(unix)]
{
unix_normalize_with(self, subpath)
}
#[cfg(not(unix))]
{
let mut components = subpath.components();

let Some(head) = components.next() else {
return subpath.to_path_buf();
};
let Some(head) = components.next() else {
return subpath.to_path_buf();
};

if matches!(head, Component::Prefix(..) | Component::RootDir) {
return subpath.to_path_buf();
}
if matches!(head, Component::Prefix(..) | Component::RootDir) {
return subpath.to_path_buf();
}

let mut ret = self.to_path_buf();
for component in std::iter::once(head).chain(components) {
match component {
Component::CurDir => {}
Component::ParentDir => {
ret.pop();
}
Component::Normal(c) => {
ret.push(c);
}
Component::Prefix(..) | Component::RootDir => {
unreachable!("Path {:?} Subpath {:?}", self, subpath)
let mut ret = self.to_path_buf();
for component in std::iter::once(head).chain(components) {
match component {
Component::CurDir => {}
Component::ParentDir => {
ret.pop();
}
Component::Normal(c) => {
ret.push(c);
}
Component::Prefix(..) | Component::RootDir => {
unreachable!("Path {:?} Subpath {:?}", self, subpath)
}
}
}
}

ret
ret
}
}

fn is_invalid_exports_target(&self) -> bool {
Expand All @@ -106,6 +130,107 @@ impl PathUtil for Path {
}
}

/// Byte-level `normalize` for Unix. See [`PathUtil::normalize`] for why.
#[cfg(unix)]
fn unix_normalize(path: &Path) -> PathBuf {
use std::{
ffi::OsString,
os::unix::ffi::{OsStrExt, OsStringExt},
};

let bytes = path.as_os_str().as_bytes();
let leading_slash = bytes.first() == Some(&b'/');

// Worst-case capacity: original length + a trailing slash placeholder.
let mut out: Vec<u8> = Vec::with_capacity(bytes.len());
if leading_slash {
out.push(b'/');
}

// Track segment offsets we've written into `out` so `..` can pop in O(1)
// instead of rescanning `out` byte-by-byte.
let mut starts: Vec<usize> = Vec::new();

for seg in bytes.split(|&b| b == b'/') {
match seg {
b"" | b"." => {}
b".." => {
if let Some(start) = starts.pop() {
// Trim trailing `/` left over from a previous segment.
out.truncate(start.saturating_sub(usize::from(start > usize::from(leading_slash))));
}
}
normal => {
// Insert a separator before every segment except the very first one
// when there is no leading slash.
if out.len() > usize::from(leading_slash) {
out.push(b'/');
}
starts.push(out.len());
out.extend_from_slice(normal);
}
}
}

if out.is_empty() {
return PathBuf::new();
}

PathBuf::from(OsString::from_vec(out))
}

/// Byte-level `normalize_with` for Unix. See [`PathUtil::normalize_with`] for why.
#[cfg(unix)]
fn unix_normalize_with(base: &Path, subpath: &Path) -> PathBuf {
use std::{
ffi::OsString,
os::unix::ffi::{OsStrExt, OsStringExt},
};

let sub_bytes = subpath.as_os_str().as_bytes();

if sub_bytes.is_empty() {
return subpath.to_path_buf();
}

// Absolute subpath short-circuits to subpath, matching the std behavior of
// `PathBuf::push` and the original Components-based implementation.
if sub_bytes[0] == b'/' {
return subpath.to_path_buf();
}

let base_bytes = base.as_os_str().as_bytes();
let mut out: Vec<u8> = Vec::with_capacity(base_bytes.len() + 1 + sub_bytes.len());
out.extend_from_slice(base_bytes);

for seg in sub_bytes.split(|&b| b == b'/') {
match seg {
b"" | b"." => {}
b".." => {
// Pop the trailing segment from `out` without rescanning whole bytes
// ahead of time: `rposition` walks from the end.
if let Some(slash) = out.iter().rposition(|&b| b == b'/') {
if slash == 0 {
out.truncate(1);
} else {
out.truncate(slash);
}
} else {
out.clear();
}
}
normal => {
if !out.is_empty() && *out.last().unwrap() != b'/' {
out.push(b'/');
}
out.extend_from_slice(normal);
}
}
}

PathBuf::from(OsString::from_vec(out))
}

// https://github.com/webpack/enhanced-resolve/blob/main/test/path.test.js
#[tokio::test]
async fn is_invalid_exports_target() {
Expand Down
Loading