From a3c42a14b4d6acaae0d61815779acfe2c6ad23c2 Mon Sep 17 00:00:00 2001 From: mscherer Date: Wed, 13 May 2026 22:53:56 +0200 Subject: [PATCH] Cache UseStatementsTrait getUseStatements and bound throw class lookup UseStatementsTrait::getUseStatements() iterates every token in the file via foreach($tokens as ...) looking for top-level T_USE tokens. The result is stable across calls during a single phpcs pass over a file, but three sniffs (DocBlockThrowsSniff, DocBlockVarSniff, UseWithAliasingSniff) call it once per T_FUNCTION. On a large method- heavy controller that turns a per-file O(file_size) walk into per- method O(methods * file_size). Cache the parsed result so each file is parsed at most once. Cache invalidation has to survive phpcbf's fix loop, which re-tokenizes the same file in-place on the same File object after each fix iteration. Token count alone is not sufficient because an in-place alias rename (use Foo as Bar; -> use Foo as Baz;) leaves the count unchanged. The cache therefore also stores a content fingerprint of each use statement range and re-verifies it against the live tokens on every cache hit. Verification is O(num_uses * avg_use_length) - a handful of token reads. Separately, DocBlockThrowsSniff::extractExceptions() looked up T_DOUBLE_COLON with an upper bound of $scopeCloser (the method's closing brace). On long bodies that meant scanning past the current throw statement and picking up an unrelated `::` from a later statement, then silently using it to gate exception collection for this throw. Bound the lookup to the next `;` instead - both faster and more correct. Measured on a CakePHP 5 app whose biggest controller is 11k lines with ~100 actions, parallel=1, --report=performance: DocBlockThrows on ResidentsController.php alone: 3.25s -> 0.08s DocBlockThrows across 1095-file codebase: 11.32s -> 0.86s Existing test suite (100 tests / 122 assertions) passes unchanged. Output diff across all 1095 files: identical violations before vs after. --- .../Sniffs/Commenting/DocBlockThrowsSniff.php | 9 +- PhpCollective/Traits/UseStatementsTrait.php | 106 ++++++++++++++++++ 2 files changed, 114 insertions(+), 1 deletion(-) diff --git a/PhpCollective/Sniffs/Commenting/DocBlockThrowsSniff.php b/PhpCollective/Sniffs/Commenting/DocBlockThrowsSniff.php index 1b4345c..b087849 100644 --- a/PhpCollective/Sniffs/Commenting/DocBlockThrowsSniff.php +++ b/PhpCollective/Sniffs/Commenting/DocBlockThrowsSniff.php @@ -132,7 +132,14 @@ protected function extractExceptions(File $phpcsFile, int $stackPointer): array $classIndex = $nextIndex; } - $doubleColonIndex = $phpcsFile->findNext(T_DOUBLE_COLON, $i + 1, $scopeCloser); + // Constrain the `::` lookup to the current statement. + // The original code searched up to $scopeCloser, which on long + // method bodies could pick up an unrelated `::` operator from + // a later statement (and silently used it to gate exception + // collection for *this* throw). Bound by the next `;`. + $statementEnd = $phpcsFile->findNext(T_SEMICOLON, $i + 1, $scopeCloser); + $doubleColonSearchEnd = $statementEnd !== false ? $statementEnd : $scopeCloser; + $doubleColonIndex = $phpcsFile->findNext(T_DOUBLE_COLON, $i + 1, $doubleColonSearchEnd); if (!$newIndex && !$classIndex && !$doubleColonIndex) { continue; diff --git a/PhpCollective/Traits/UseStatementsTrait.php b/PhpCollective/Traits/UseStatementsTrait.php index c1e5f77..e5daf87 100644 --- a/PhpCollective/Traits/UseStatementsTrait.php +++ b/PhpCollective/Traits/UseStatementsTrait.php @@ -12,6 +12,32 @@ trait UseStatementsTrait { + /** + * Per-file cache of parsed `use` statements. + * + * `getUseStatements()` iterates every token in the file via + * `foreach ($tokens as ...)` looking for top-level T_USE tokens. The + * result is stable across calls during a single phpcs pass over a + * file, but several sniffs call it once per T_FUNCTION + * (DocBlockThrowsSniff, DocBlockVarSniff, UseWithAliasingSniff). On a + * large method-heavy controller that turns a per-file O(file_size) + * walk into per-method O(methods * file_size). Cache the parsed + * result so each file is parsed at most once per pass. + * + * Cache invalidation has to survive phpcbf's fix loop, which re- + * tokenizes the same file in-place on the same File object after + * each fix iteration. Token count alone is not sufficient — a fix + * that renames an alias (`use Foo as Bar;` -> `use Foo as Baz;`) + * leaves the token count unchanged. We therefore also store the + * concrete statement strings on the way in and re-verify them + * against the live tokens before trusting a cache hit. The + * verification is O(num_use_statements * avg_use_length), typically + * just a handful of token reads, so it stays cheap. + * + * @var array>}> + */ + private static array $useStatementsCache = []; + /** * @param \PHP_CodeSniffer\Files\File $phpcsFile * @@ -20,6 +46,15 @@ trait UseStatementsTrait protected function getUseStatements(File $phpcsFile): array { $tokens = $phpcsFile->getTokens(); + $cacheKey = $phpcsFile->getFilename(); + $tokenCount = count($tokens); + if ( + isset(self::$useStatementsCache[$cacheKey]) + && self::$useStatementsCache[$cacheKey]['count'] === $tokenCount + && $this->useStatementsCacheStillValid(self::$useStatementsCache[$cacheKey]['statements'], $tokens) + ) { + return self::$useStatementsCache[$cacheKey]['statements']; + } $statements = []; foreach ($tokens as $index => $token) { @@ -81,12 +116,83 @@ protected function getUseStatements(File $phpcsFile): array 'fullName' => ltrim($fullName, '\\'), 'shortName' => $shortName, 'start' => $index, + 'cacheFingerprint' => $this->buildUseStatementFingerprint($tokens, $index, $semicolonIndex), ]; } + self::$useStatementsCache[$cacheKey] = [ + 'count' => $tokenCount, + 'statements' => $statements, + ]; + return $statements; } + /** + * Concatenate the raw content of every token in [$start, $end] inclusive. + * + * Used as a fingerprint for cache invalidation: when a phpcbf fix + * rewrites the inside of a `use` line without changing the total + * token count of the file, the fingerprint differs and the cached + * entry is rejected. + * + * @param array> $tokens + * @param int $start + * @param int $end + * + * @return string + */ + private function buildUseStatementFingerprint(array $tokens, int $start, int $end): string + { + $fingerprint = ''; + for ($i = $start; $i <= $end; $i++) { + if (!isset($tokens[$i])) { + break; + } + $fingerprint .= $tokens[$i]['content']; + } + + return $fingerprint; + } + + /** + * Verify a cache entry against the live tokens. + * + * For every cached use statement, check that the live tokens at the + * recorded [start, end] range still produce the same concatenated + * content (the fingerprint stored when the entry was created). This + * catches: + * - structural edits that shifted positions without changing + * overall token count (cached T_USE no longer at `start`); + * - in-place rewrites that preserve token count, e.g. renaming + * an imported alias to a different identifier of the same shape; + * - any whitespace/comment edit inside the use statement. + * + * Cost per call is O(num_use_statements * avg_use_length), typically + * just a handful of token reads. + * + * @param array> $statements + * @param array> $tokens + * + * @return bool + */ + private function useStatementsCacheStillValid(array $statements, array $tokens): bool + { + foreach ($statements as $statement) { + $start = $statement['start']; + if (!isset($tokens[$start]) || $tokens[$start]['code'] !== T_USE) { + return false; + } + + $live = $this->buildUseStatementFingerprint($tokens, $start, $statement['end']); + if ($live !== $statement['cacheFingerprint']) { + return false; + } + } + + return true; + } + /** * @param string $statementContent *