Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
104 changes: 103 additions & 1 deletion Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 2 additions & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -120,9 +120,10 @@ criterion = { version = "4.3.0", package = "codspeed-criterion-compat", default-
] }

normalize-path = { version = "0.2.1" }
proptest = { version = "1.10.0", default-features = false, features = ["std"] }
rayon = { version = "1.11.0" }
regex = "1.12.2"
vfs = "0.13.0" # for testing with in memory file system
vfs = "0.13.0" # for testing with in memory file system

# Benchmark allocator features kept aligned with rspack's `xtask/benchmark`.
[target.'cfg(target_os = "linux")'.dev-dependencies]
Expand Down
19 changes: 9 additions & 10 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -371,20 +371,19 @@ impl<Fs: FileSystem + Send + Sync> ResolverGeneric<Fs> {
return Ok(path);
}

let result = match Path::new(specifier).components().next() {
// 2. If X begins with '/'
Some(Component::RootDir | Component::Prefix(_)) => {
// Why: `Path::new(specifier).components().next()` runs the full
// `Components` state machine just to look at the first character.
// `classify_specifier_head` does the same dispatch over raw bytes; see
// its property test for the equivalence proof.
let result = match specifier::classify_specifier_head(specifier) {
specifier::SpecifierHead::Absolute => {
self.require_absolute(cached_path, specifier, ctx).await
}
// 3. If X begins with './' or '/' or '../'
Some(Component::CurDir | Component::ParentDir) => {
specifier::SpecifierHead::Relative => {
self.require_relative(cached_path, specifier, ctx).await
}
// 4. If X begins with '#'
Some(Component::Normal(_)) if specifier.as_bytes()[0] == b'#' => {
self.require_hash(cached_path, specifier, ctx).await
}
_ => {
specifier::SpecifierHead::Hash => self.require_hash(cached_path, specifier, ctx).await,
specifier::SpecifierHead::Bare => {
// 1. If X is a core module,
// a. return the core module
// b. STOP
Expand Down
131 changes: 130 additions & 1 deletion src/specifier.rs
Original file line number Diff line number Diff line change
Expand Up @@ -102,9 +102,82 @@ impl<'a> Specifier<'a> {
}
}

/// Classification of a specifier's first character, used by the resolver to
/// dispatch a fresh specifier into the matching resolution path.
///
/// On Unix and for `/`-prefixed inputs on Windows this is equivalent to
/// inspecting `Path::new(specifier).components().next()` for `RootDir`,
/// `CurDir`, `ParentDir`, and `Normal` — but without running the full
/// `std::path::Components` state machine (Windows-prefix detection,
/// `Component` enum construction, UTF-8 boundary checks) on what is
/// effectively a 1- or 2-byte decision. The separator and `.`/`..` markers
/// are always single-byte ASCII, so the dispatch can be made by direct byte
/// inspection.
#[derive(Copy, Clone, Debug, PartialEq, Eq)]
pub enum SpecifierHead {
/// Starts with `/` (Unix root) or, on Windows, a drive/UNC prefix.
Absolute,
/// Starts with `./`, `../`, or is exactly `.` or `..`.
Relative,
/// Starts with `#` — package imports / subpath imports.
Hash,
/// Anything else — bare specifier, empty string, or starts with `.`
/// followed by a non-`/` (e.g. `.foo`), which `Components` reports as a
/// `Normal` segment.
Bare,
}

/// Classify the first character of a module specifier.
///
/// See [`SpecifierHead`] for the contract and the property test in this
/// module's `tests` for the equivalence proof against the std API.
///
/// `#[inline]`: the call site in `require_without_parse` is per-resolve and
/// previously contained the entire `Path::components()` match inline. Without
/// this hint, CodSpeed (x86_64) saw the cross-module call survive LTO and
/// added a small per-call regression even though the body itself is cheaper.
#[inline]
pub fn classify_specifier_head(specifier: &str) -> SpecifierHead {
let bytes = specifier.as_bytes();
match bytes.first() {
None => SpecifierHead::Bare,
Some(b'/') => SpecifierHead::Absolute,
Some(b'#') => SpecifierHead::Hash,
Some(b'.') => match bytes.get(1) {
// `.` alone, or `./...`
None | Some(b'/') => SpecifierHead::Relative,
Some(b'.') => match bytes.get(2) {
// `..` alone, or `../...`
None | Some(b'/') => SpecifierHead::Relative,
// `..foo` — `Components` reports this as `Normal("..foo")`.
_ => SpecifierHead::Bare,
},
// `.foo` — `Components` reports as `Normal(".foo")`.
_ => SpecifierHead::Bare,
},
#[cfg(windows)]
Some(b'\\') => SpecifierHead::Absolute,
_ => {
// On Windows, drive-letter specifiers like `C:` are reported as
// `Component::Prefix`. Defer to the std path API to stay correct.
#[cfg(windows)]
{
use std::path::{Component, Path};
if matches!(
Path::new(specifier).components().next(),
Some(Component::RootDir | Component::Prefix(_))
) {
return SpecifierHead::Absolute;
}
}
SpecifierHead::Bare
}
}
}

#[cfg(test)]
mod tests {
use super::{Specifier, SpecifierError};
use super::{classify_specifier_head, Specifier, SpecifierError, SpecifierHead};

#[test]
fn debug() {
Expand Down Expand Up @@ -278,4 +351,60 @@ mod tests {

Ok(())
}

/// Reference dispatch derived from `Path::new(s).components().next()`. This
/// is the algorithm `classify_specifier_head` must remain equivalent to on
/// Unix and for `/`-prefixed inputs on Windows.
fn reference_head(specifier: &str) -> SpecifierHead {
use std::path::{Component, Path};
match Path::new(specifier).components().next() {
Some(Component::RootDir | Component::Prefix(_)) => SpecifierHead::Absolute,
Some(Component::CurDir | Component::ParentDir) => SpecifierHead::Relative,
Some(Component::Normal(_)) if specifier.as_bytes().first() == Some(&b'#') => {
SpecifierHead::Hash
}
_ => SpecifierHead::Bare,
}
}

#[test]
fn classify_specifier_head_known_cases() {
let cases = [
("", SpecifierHead::Bare),
("/abs", SpecifierHead::Absolute),
("/", SpecifierHead::Absolute),
(".", SpecifierHead::Relative),
("..", SpecifierHead::Relative),
("./foo", SpecifierHead::Relative),
("../foo", SpecifierHead::Relative),
(".foo", SpecifierHead::Bare),
("..foo", SpecifierHead::Bare),
("#imports/sub", SpecifierHead::Hash),
("react", SpecifierHead::Bare),
("@scope/pkg", SpecifierHead::Bare),
("中文/包", SpecifierHead::Bare),
];
for (input, expected) in cases {
assert_eq!(classify_specifier_head(input), expected, "{input:?}");
assert_eq!(reference_head(input), expected, "{input:?} reference");
}
}

proptest::proptest! {
/// Verify `classify_specifier_head` matches the std `Path::components`-based
/// dispatch for any ASCII input (the universe the resolver actually sees).
#[test]
fn classify_matches_components_for_ascii(s in "[\\x20-\\x7e]{0,32}") {
proptest::prop_assert_eq!(classify_specifier_head(&s), reference_head(&s));
}

/// Same equivalence over arbitrary UTF-8 to cover non-ASCII segment bytes.
/// Skipped on Windows, where drive/UNC prefixes (e.g. `C:`) bypass the
/// fast path and need std parsing.
#[cfg(unix)]
#[test]
fn classify_matches_components_for_utf8(s in ".*") {
proptest::prop_assert_eq!(classify_specifier_head(&s), reference_head(&s));
}
}
}
Loading