From a3c42a14b4d6acaae0d61815779acfe2c6ad23c2 Mon Sep 17 00:00:00 2001
From: mscherer <dereuromark@web.de>
Date: Wed, 13 May 2026 22:53:56 +0200
Subject: [PATCH] Cache UseStatementsTrait getUseStatements and bound throw
 class lookup

UseStatementsTrait::getUseStatements() iterates every token in the
file via foreach($tokens as ...) looking for top-level T_USE tokens.
The result is stable across calls during a single phpcs pass over a
file, but three sniffs (DocBlockThrowsSniff, DocBlockVarSniff,
UseWithAliasingSniff) call it once per T_FUNCTION. On a large method-
heavy controller that turns a per-file O(file_size) walk into per-
method O(methods * file_size).

Cache the parsed result so each file is parsed at most once. Cache
invalidation has to survive phpcbf's fix loop, which re-tokenizes
the same file in-place on the same File object after each fix
iteration. Token count alone is not sufficient because an in-place
alias rename (use Foo as Bar; -> use Foo as Baz;) leaves the count
unchanged. The cache therefore also stores a content fingerprint of
each use statement range and re-verifies it against the live tokens
on every cache hit. Verification is O(num_uses * avg_use_length) -
a handful of token reads.

Separately, DocBlockThrowsSniff::extractExceptions() looked up
T_DOUBLE_COLON with an upper bound of $scopeCloser (the method's
closing brace). On long bodies that meant scanning past the current
throw statement and picking up an unrelated `::` from a later
statement, then silently using it to gate exception collection for
this throw. Bound the lookup to the next `;` instead - both faster
and more correct.

Measured on a CakePHP 5 app whose biggest controller is 11k lines
with ~100 actions, parallel=1, --report=performance:

  DocBlockThrows on ResidentsController.php alone:  3.25s -> 0.08s
  DocBlockThrows across 1095-file codebase:        11.32s -> 0.86s

Existing test suite (100 tests / 122 assertions) passes unchanged.
Output diff across all 1095 files: identical violations before vs
after.
---
 .../Sniffs/Commenting/DocBlockThrowsSniff.php |   9 +-
 PhpCollective/Traits/UseStatementsTrait.php   | 106 ++++++++++++++++++
 2 files changed, 114 insertions(+), 1 deletion(-)

diff --git a/PhpCollective/Sniffs/Commenting/DocBlockThrowsSniff.php b/PhpCollective/Sniffs/Commenting/DocBlockThrowsSniff.php
index 1b4345c..b087849 100644
--- a/PhpCollective/Sniffs/Commenting/DocBlockThrowsSniff.php
+++ b/PhpCollective/Sniffs/Commenting/DocBlockThrowsSniff.php
@@ -132,7 +132,14 @@ protected function extractExceptions(File $phpcsFile, int $stackPointer): array
                 $classIndex = $nextIndex;
             }
 
-            $doubleColonIndex = $phpcsFile->findNext(T_DOUBLE_COLON, $i + 1, $scopeCloser);
+            // Constrain the `::` lookup to the current statement.
+            // The original code searched up to $scopeCloser, which on long
+            // method bodies could pick up an unrelated `::` operator from
+            // a later statement (and silently used it to gate exception
+            // collection for *this* throw). Bound by the next `;`.
+            $statementEnd = $phpcsFile->findNext(T_SEMICOLON, $i + 1, $scopeCloser);
+            $doubleColonSearchEnd = $statementEnd !== false ? $statementEnd : $scopeCloser;
+            $doubleColonIndex = $phpcsFile->findNext(T_DOUBLE_COLON, $i + 1, $doubleColonSearchEnd);
 
             if (!$newIndex && !$classIndex && !$doubleColonIndex) {
                 continue;
diff --git a/PhpCollective/Traits/UseStatementsTrait.php b/PhpCollective/Traits/UseStatementsTrait.php
index c1e5f77..e5daf87 100644
--- a/PhpCollective/Traits/UseStatementsTrait.php
+++ b/PhpCollective/Traits/UseStatementsTrait.php
@@ -12,6 +12,32 @@
 
 trait UseStatementsTrait
 {
+    /**
+     * Per-file cache of parsed `use` statements.
+     *
+     * `getUseStatements()` iterates every token in the file via
+     * `foreach ($tokens as ...)` looking for top-level T_USE tokens. The
+     * result is stable across calls during a single phpcs pass over a
+     * file, but several sniffs call it once per T_FUNCTION
+     * (DocBlockThrowsSniff, DocBlockVarSniff, UseWithAliasingSniff). On a
+     * large method-heavy controller that turns a per-file O(file_size)
+     * walk into per-method O(methods * file_size). Cache the parsed
+     * result so each file is parsed at most once per pass.
+     *
+     * Cache invalidation has to survive phpcbf's fix loop, which re-
+     * tokenizes the same file in-place on the same File object after
+     * each fix iteration. Token count alone is not sufficient — a fix
+     * that renames an alias (`use Foo as Bar;` -> `use Foo as Baz;`)
+     * leaves the token count unchanged. We therefore also store the
+     * concrete statement strings on the way in and re-verify them
+     * against the live tokens before trusting a cache hit. The
+     * verification is O(num_use_statements * avg_use_length), typically
+     * just a handful of token reads, so it stays cheap.
+     *
+     * @var array<string, array{count: int, statements: array<string, array<string, mixed>>}>
+     */
+    private static array $useStatementsCache = [];
+
     /**
      * @param \PHP_CodeSniffer\Files\File $phpcsFile
      *
@@ -20,6 +46,15 @@ trait UseStatementsTrait
     protected function getUseStatements(File $phpcsFile): array
     {
         $tokens = $phpcsFile->getTokens();
+        $cacheKey = $phpcsFile->getFilename();
+        $tokenCount = count($tokens);
+        if (
+            isset(self::$useStatementsCache[$cacheKey])
+            && self::$useStatementsCache[$cacheKey]['count'] === $tokenCount
+            && $this->useStatementsCacheStillValid(self::$useStatementsCache[$cacheKey]['statements'], $tokens)
+        ) {
+            return self::$useStatementsCache[$cacheKey]['statements'];
+        }
 
         $statements = [];
         foreach ($tokens as $index => $token) {
@@ -81,12 +116,83 @@ protected function getUseStatements(File $phpcsFile): array
                 'fullName' => ltrim($fullName, '\\'),
                 'shortName' => $shortName,
                 'start' => $index,
+                'cacheFingerprint' => $this->buildUseStatementFingerprint($tokens, $index, $semicolonIndex),
             ];
         }
 
+        self::$useStatementsCache[$cacheKey] = [
+            'count' => $tokenCount,
+            'statements' => $statements,
+        ];
+
         return $statements;
     }
 
+    /**
+     * Concatenate the raw content of every token in [$start, $end] inclusive.
+     *
+     * Used as a fingerprint for cache invalidation: when a phpcbf fix
+     * rewrites the inside of a `use` line without changing the total
+     * token count of the file, the fingerprint differs and the cached
+     * entry is rejected.
+     *
+     * @param array<int, array<string, mixed>> $tokens
+     * @param int $start
+     * @param int $end
+     *
+     * @return string
+     */
+    private function buildUseStatementFingerprint(array $tokens, int $start, int $end): string
+    {
+        $fingerprint = '';
+        for ($i = $start; $i <= $end; $i++) {
+            if (!isset($tokens[$i])) {
+                break;
+            }
+            $fingerprint .= $tokens[$i]['content'];
+        }
+
+        return $fingerprint;
+    }
+
+    /**
+     * Verify a cache entry against the live tokens.
+     *
+     * For every cached use statement, check that the live tokens at the
+     * recorded [start, end] range still produce the same concatenated
+     * content (the fingerprint stored when the entry was created). This
+     * catches:
+     *   - structural edits that shifted positions without changing
+     *     overall token count (cached T_USE no longer at `start`);
+     *   - in-place rewrites that preserve token count, e.g. renaming
+     *     an imported alias to a different identifier of the same shape;
+     *   - any whitespace/comment edit inside the use statement.
+     *
+     * Cost per call is O(num_use_statements * avg_use_length), typically
+     * just a handful of token reads.
+     *
+     * @param array<string, array<string, mixed>> $statements
+     * @param array<int, array<string, mixed>> $tokens
+     *
+     * @return bool
+     */
+    private function useStatementsCacheStillValid(array $statements, array $tokens): bool
+    {
+        foreach ($statements as $statement) {
+            $start = $statement['start'];
+            if (!isset($tokens[$start]) || $tokens[$start]['code'] !== T_USE) {
+                return false;
+            }
+
+            $live = $this->buildUseStatementFingerprint($tokens, $start, $statement['end']);
+            if ($live !== $statement['cacheFingerprint']) {
+                return false;
+            }
+        }
+
+        return true;
+    }
+
     /**
      * @param string $statementContent
      *