diff --git a/Cargo.lock b/Cargo.lock index 3ccc3897..df7b3fd1 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -192,7 +192,7 @@ dependencies = [ "anyhow", "cc", "colored", - "getrandom", + "getrandom 0.2.17", "glob", "libc", "nix", @@ -528,6 +528,18 @@ dependencies = [ "wasi", ] +[[package]] +name = "getrandom" +version = "0.3.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "899def5c37c4fd7b2664648c28120ecec138e4d395b459e5ca34f9cce2dd77fd" +dependencies = [ + "cfg-if", + "libc", + "r-efi", + "wasip2", +] + [[package]] name = "glob" version = "0.3.3" @@ -897,6 +909,15 @@ dependencies = [ "thiserror", ] +[[package]] +name = "ppv-lite86" +version = "0.2.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "85eae3c4ed2f50dcfe72643da4befc30deadb458a9b590d720cde2f2b1e97da9" +dependencies = [ + "zerocopy", +] + [[package]] name = "proc-macro2" version = "1.0.103" @@ -906,6 +927,21 @@ dependencies = [ "unicode-ident", ] +[[package]] +name = "proptest" +version = "1.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4b45fcc2344c680f5025fe57779faef368840d0bd1f42f216291f0dc4ace4744" +dependencies = [ + "bitflags", + "num-traits", + "rand", + "rand_chacha", + "rand_xorshift", + "regex-syntax", + "unarray", +] + [[package]] name = "quote" version = "1.0.41" @@ -915,6 +951,12 @@ dependencies = [ "proc-macro2", ] +[[package]] +name = "r-efi" +version = "5.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "69cdb34c158ceb288df11e18b4bd39de994f6657d83847bdffdbd7f346754b0f" + [[package]] name = "radix_trie" version = "0.3.0" @@ -925,6 +967,44 @@ dependencies = [ "nibble_vec", ] +[[package]] +name = "rand" +version = "0.9.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "44c5af06bb1b7d3216d91932aed5265164bf384dc89cd6ba05cf59a35f5f76ea" +dependencies = [ + "rand_chacha", + "rand_core", +] + +[[package]] +name = "rand_chacha" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d3022b5f1df60f26e1ffddd6c66e8aa15de382ae63b3a0c1bfc0e4d3e3f325cb" +dependencies = [ + "ppv-lite86", + "rand_core", +] + +[[package]] +name = "rand_core" +version = "0.9.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "76afc826de14238e6e8c374ddcc1fa19e374fd8dd986b0d2af0d02377261d83c" +dependencies = [ + "getrandom 0.3.4", +] + +[[package]] +name = "rand_xorshift" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "513962919efc330f829edb2535844d1b912b0fbe2ca165d613e4e8788bb05a5a" +dependencies = [ + "rand_core", +] + [[package]] name = "rayon" version = "1.12.0" @@ -1032,6 +1112,7 @@ dependencies = [ "mimalloc", "normalize-path", "pnp", + "proptest", "rayon", "regex", "rustc-hash", @@ -1304,6 +1385,12 @@ dependencies = [ "tracing-core", ] +[[package]] +name = "unarray" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "eaea85b334db583fe3274d12b4cd1880032beab409c0d774be044d4480ab9a94" + [[package]] name = "unicode-ident" version = "1.0.20" @@ -1353,6 +1440,15 @@ version = "0.11.1+wasi-snapshot-preview1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ccf3ec651a847eb01de73ccad15eb7d99f80485de043efb2f370cd654f4ea44b" +[[package]] +name = "wasip2" +version = "1.0.3+wasi-0.2.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "20064672db26d7cdc89c7798c48a0fdfac8213434a1186e5ef29fd560ae223d6" +dependencies = [ + "wit-bindgen", +] + [[package]] name = "winapi-util" version = "0.1.11" @@ -1515,6 +1611,12 @@ version = "0.53.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d6bbff5f0aada427a1e5a6da5f1f98158182f26556f345ac9e04d36d0ebed650" +[[package]] +name = "wit-bindgen" +version = "0.57.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1ebf944e87a7c253233ad6766e082e3cd714b5d03812acc24c318f549614536e" + [[package]] name = "zerocopy" version = "0.8.27" diff --git a/Cargo.toml b/Cargo.toml index 44f5447c..445398f7 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -120,9 +120,10 @@ criterion = { version = "4.3.0", package = "codspeed-criterion-compat", default- ] } normalize-path = { version = "0.2.1" } +proptest = { version = "1.10.0", default-features = false, features = ["std"] } rayon = { version = "1.11.0" } regex = "1.12.2" -vfs = "0.13.0" # for testing with in memory file system +vfs = "0.13.0" # for testing with in memory file system # Benchmark allocator features kept aligned with rspack's `xtask/benchmark`. [target.'cfg(target_os = "linux")'.dev-dependencies] diff --git a/src/lib.rs b/src/lib.rs index 19f4dbf9..cf035345 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -371,20 +371,19 @@ impl ResolverGeneric { return Ok(path); } - let result = match Path::new(specifier).components().next() { - // 2. If X begins with '/' - Some(Component::RootDir | Component::Prefix(_)) => { + // Why: `Path::new(specifier).components().next()` runs the full + // `Components` state machine just to look at the first character. + // `classify_specifier_head` does the same dispatch over raw bytes; see + // its property test for the equivalence proof. + let result = match specifier::classify_specifier_head(specifier) { + specifier::SpecifierHead::Absolute => { self.require_absolute(cached_path, specifier, ctx).await } - // 3. If X begins with './' or '/' or '../' - Some(Component::CurDir | Component::ParentDir) => { + specifier::SpecifierHead::Relative => { self.require_relative(cached_path, specifier, ctx).await } - // 4. If X begins with '#' - Some(Component::Normal(_)) if specifier.as_bytes()[0] == b'#' => { - self.require_hash(cached_path, specifier, ctx).await - } - _ => { + specifier::SpecifierHead::Hash => self.require_hash(cached_path, specifier, ctx).await, + specifier::SpecifierHead::Bare => { // 1. If X is a core module, // a. return the core module // b. STOP diff --git a/src/specifier.rs b/src/specifier.rs index fb1f2768..19872e2a 100644 --- a/src/specifier.rs +++ b/src/specifier.rs @@ -102,9 +102,82 @@ impl<'a> Specifier<'a> { } } +/// Classification of a specifier's first character, used by the resolver to +/// dispatch a fresh specifier into the matching resolution path. +/// +/// On Unix and for `/`-prefixed inputs on Windows this is equivalent to +/// inspecting `Path::new(specifier).components().next()` for `RootDir`, +/// `CurDir`, `ParentDir`, and `Normal` — but without running the full +/// `std::path::Components` state machine (Windows-prefix detection, +/// `Component` enum construction, UTF-8 boundary checks) on what is +/// effectively a 1- or 2-byte decision. The separator and `.`/`..` markers +/// are always single-byte ASCII, so the dispatch can be made by direct byte +/// inspection. +#[derive(Copy, Clone, Debug, PartialEq, Eq)] +pub enum SpecifierHead { + /// Starts with `/` (Unix root) or, on Windows, a drive/UNC prefix. + Absolute, + /// Starts with `./`, `../`, or is exactly `.` or `..`. + Relative, + /// Starts with `#` — package imports / subpath imports. + Hash, + /// Anything else — bare specifier, empty string, or starts with `.` + /// followed by a non-`/` (e.g. `.foo`), which `Components` reports as a + /// `Normal` segment. + Bare, +} + +/// Classify the first character of a module specifier. +/// +/// See [`SpecifierHead`] for the contract and the property test in this +/// module's `tests` for the equivalence proof against the std API. +/// +/// `#[inline]`: the call site in `require_without_parse` is per-resolve and +/// previously contained the entire `Path::components()` match inline. Without +/// this hint, CodSpeed (x86_64) saw the cross-module call survive LTO and +/// added a small per-call regression even though the body itself is cheaper. +#[inline] +pub fn classify_specifier_head(specifier: &str) -> SpecifierHead { + let bytes = specifier.as_bytes(); + match bytes.first() { + None => SpecifierHead::Bare, + Some(b'/') => SpecifierHead::Absolute, + Some(b'#') => SpecifierHead::Hash, + Some(b'.') => match bytes.get(1) { + // `.` alone, or `./...` + None | Some(b'/') => SpecifierHead::Relative, + Some(b'.') => match bytes.get(2) { + // `..` alone, or `../...` + None | Some(b'/') => SpecifierHead::Relative, + // `..foo` — `Components` reports this as `Normal("..foo")`. + _ => SpecifierHead::Bare, + }, + // `.foo` — `Components` reports as `Normal(".foo")`. + _ => SpecifierHead::Bare, + }, + #[cfg(windows)] + Some(b'\\') => SpecifierHead::Absolute, + _ => { + // On Windows, drive-letter specifiers like `C:` are reported as + // `Component::Prefix`. Defer to the std path API to stay correct. + #[cfg(windows)] + { + use std::path::{Component, Path}; + if matches!( + Path::new(specifier).components().next(), + Some(Component::RootDir | Component::Prefix(_)) + ) { + return SpecifierHead::Absolute; + } + } + SpecifierHead::Bare + } + } +} + #[cfg(test)] mod tests { - use super::{Specifier, SpecifierError}; + use super::{classify_specifier_head, Specifier, SpecifierError, SpecifierHead}; #[test] fn debug() { @@ -278,4 +351,60 @@ mod tests { Ok(()) } + + /// Reference dispatch derived from `Path::new(s).components().next()`. This + /// is the algorithm `classify_specifier_head` must remain equivalent to on + /// Unix and for `/`-prefixed inputs on Windows. + fn reference_head(specifier: &str) -> SpecifierHead { + use std::path::{Component, Path}; + match Path::new(specifier).components().next() { + Some(Component::RootDir | Component::Prefix(_)) => SpecifierHead::Absolute, + Some(Component::CurDir | Component::ParentDir) => SpecifierHead::Relative, + Some(Component::Normal(_)) if specifier.as_bytes().first() == Some(&b'#') => { + SpecifierHead::Hash + } + _ => SpecifierHead::Bare, + } + } + + #[test] + fn classify_specifier_head_known_cases() { + let cases = [ + ("", SpecifierHead::Bare), + ("/abs", SpecifierHead::Absolute), + ("/", SpecifierHead::Absolute), + (".", SpecifierHead::Relative), + ("..", SpecifierHead::Relative), + ("./foo", SpecifierHead::Relative), + ("../foo", SpecifierHead::Relative), + (".foo", SpecifierHead::Bare), + ("..foo", SpecifierHead::Bare), + ("#imports/sub", SpecifierHead::Hash), + ("react", SpecifierHead::Bare), + ("@scope/pkg", SpecifierHead::Bare), + ("中文/包", SpecifierHead::Bare), + ]; + for (input, expected) in cases { + assert_eq!(classify_specifier_head(input), expected, "{input:?}"); + assert_eq!(reference_head(input), expected, "{input:?} reference"); + } + } + + proptest::proptest! { + /// Verify `classify_specifier_head` matches the std `Path::components`-based + /// dispatch for any ASCII input (the universe the resolver actually sees). + #[test] + fn classify_matches_components_for_ascii(s in "[\\x20-\\x7e]{0,32}") { + proptest::prop_assert_eq!(classify_specifier_head(&s), reference_head(&s)); + } + + /// Same equivalence over arbitrary UTF-8 to cover non-ASCII segment bytes. + /// Skipped on Windows, where drive/UNC prefixes (e.g. `C:`) bypass the + /// fast path and need std parsing. + #[cfg(unix)] + #[test] + fn classify_matches_components_for_utf8(s in ".*") { + proptest::prop_assert_eq!(classify_specifier_head(&s), reference_head(&s)); + } + } }