From dc52bd8977b9d5f92cf8a9c3594d3ee47c612988 Mon Sep 17 00:00:00 2001 From: mscherer Date: Wed, 13 May 2026 21:31:10 +0200 Subject: [PATCH 1/2] Use O(1) conditions lookups in ConsistentIndentSniff MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Three helpers in ConsistentIndentSniff (isInsideClosure, isInsideSwitchCase, couldBeInCaseBlock) walked the token stream backwards up to 2000 / 500 / 500 tokens for every indented line. The tokens they searched for are already tracked in the per-token 'conditions' map built by the phpcs PHP tokenizer: - T_CLOSURE: PHP::processAdditional() rewrites the conditions of every token inside an anonymous function from T_FUNCTION to T_CLOSURE (PHP.php around line 2840). - T_SWITCH / T_CASE / T_DEFAULT: all scope openers, so createLevelMap() adds them to inner tokens' conditions. So the inner-loop scans were redundant. Replace with O(depth) walks of the conditions array (typically <10 entries). T_FN is the one exception — arrow functions get scope_opener/scope_closer but don't propagate to inner tokens' conditions — so keep a bounded fallback scan for it. Removed the outdated "Case blocks don't show up in conditions" comment; they do. Measured on the same workload as #62 (CakePHP 5 app, 1095 files, biggest controller 11k lines): parallel=1, single file: 20s -> 16.7s parallel=16, whole codebase: 1m30s -> 1m16s Sniff perf on the slowest file: 4.12s (24%) -> 1.09s (8%). Existing test suite (100 tests / 122 assertions) passes unchanged. Output diff across 1095 real-world files: identical violations. --- .../WhiteSpace/ConsistentIndentSniff.php | 106 ++++++++---------- 1 file changed, 48 insertions(+), 58 deletions(-) diff --git a/PhpCollective/Sniffs/WhiteSpace/ConsistentIndentSniff.php b/PhpCollective/Sniffs/WhiteSpace/ConsistentIndentSniff.php index 1d328bb..db6d3d5 100644 --- a/PhpCollective/Sniffs/WhiteSpace/ConsistentIndentSniff.php +++ b/PhpCollective/Sniffs/WhiteSpace/ConsistentIndentSniff.php @@ -107,8 +107,7 @@ public function process(File $phpcsFile, $stackPtr): void return; } - // Additional safety: if this looks like it could be in a case block, skip it - // Case blocks don't show up in conditions, so expected might be wrong + // Additional safety: if this looks like it could be in a case block, skip it. if ($this->couldBeInCaseBlock($phpcsFile, $stackPtr, $tokens)) { return; } @@ -307,8 +306,16 @@ protected function isValidContinuation(int $prevToken, array $tokens): bool } /** - * Check if the current position is inside a closure/anonymous function. - * PHPCS doesn't properly track closures in the conditions array, so we need to detect them manually. + * Check if the current position is inside a closure or arrow function. + * + * Fast path: phpcs's PHP tokenizer rewrites the `conditions` map of every + * token inside an anonymous function so it contains `T_CLOSURE` rather + * than `T_FUNCTION` (see `PHP::processAdditional()`), so the common case + * resolves with an O(depth) walk of the (typically <10-entry) conditions + * array. T_FN (arrow functions) is not added to `conditions`, so we still + * scan backwards for it — but arrow function expressions are short by + * language construction (a single expression), so a tight scan window + * suffices. * * @param \PHP_CodeSniffer\Files\File $phpcsFile * @param int $stackPtr @@ -318,41 +325,26 @@ protected function isValidContinuation(int $prevToken, array $tokens): bool */ protected function isInsideClosure(File $phpcsFile, int $stackPtr, array $tokens): bool { - // Look backward for a closure token (T_CLOSURE, T_FN, or T_FUNCTION for anonymous functions) - $closureTypes = [T_CLOSURE, T_FN]; - - // Search backward for a closure opening - for ($i = $stackPtr - 1; $i >= 0; $i--) { - // Check for T_FUNCTION (could be a named method or anonymous function) - if ($tokens[$i]['code'] === T_FUNCTION) { - // Check if this is a named function (not a closure) - $nextNonWhitespace = $phpcsFile->findNext(T_WHITESPACE, $i + 1, null, true); - if ($nextNonWhitespace !== false && $tokens[$nextNonWhitespace]['code'] === T_STRING) { - // Named function/method - stop searching, we're not in a closure - return false; - } - - // It's an anonymous function (closure) - check if we're inside it - if (isset($tokens[$i]['scope_opener']) && isset($tokens[$i]['scope_closer'])) { - if ($stackPtr > $tokens[$i]['scope_opener'] && $stackPtr < $tokens[$i]['scope_closer']) { - return true; - } + if (!empty($tokens[$stackPtr]['conditions'])) { + foreach ($tokens[$stackPtr]['conditions'] as $code) { + if ($code === T_CLOSURE) { + return true; } } + } - if (in_array($tokens[$i]['code'], $closureTypes, true)) { - // Found a closure, check if our position is within its scope - if (isset($tokens[$i]['scope_opener']) && isset($tokens[$i]['scope_closer'])) { - if ($stackPtr > $tokens[$i]['scope_opener'] && $stackPtr < $tokens[$i]['scope_closer']) { - return true; - } - } + // Fallback for T_FN: bounded scan, since arrow function expressions + // can't realistically span more than a few hundred tokens. + $limit = max(0, $stackPtr - 500); + for ($i = $stackPtr - 1; $i >= $limit; $i--) { + if ($tokens[$i]['code'] !== T_FN) { + continue; } - - // Stop searching if we've gone too far (more than 2000 tokens back) - // Large closures with validation logic can easily exceed 500 tokens - if ($stackPtr - $i > 2000) { - break; + if (!isset($tokens[$i]['scope_opener'], $tokens[$i]['scope_closer'])) { + continue; + } + if ($stackPtr > $tokens[$i]['scope_opener'] && $stackPtr < $tokens[$i]['scope_closer']) { + return true; } } @@ -416,6 +408,10 @@ protected function isInsideArray(File $phpcsFile, int $stackPtr, array $tokens): * Check if the current position is inside a switch/case block. * Case blocks have special indentation rules per PER Coding Style. * + * Uses the pre-computed `conditions` map: T_SWITCH is a scope opener + * in phpcs's PHP tokenizer, so every token inside a switch's scope + * already carries T_SWITCH in its conditions array — no scan needed. + * * @param \PHP_CodeSniffer\Files\File $phpcsFile * @param int $stackPtr * @param array> $tokens @@ -424,20 +420,13 @@ protected function isInsideArray(File $phpcsFile, int $stackPtr, array $tokens): */ protected function isInsideSwitchCase(File $phpcsFile, int $stackPtr, array $tokens): bool { - // Look backward for a switch statement - for ($i = $stackPtr - 1; $i >= 0; $i--) { - if ($tokens[$i]['code'] === T_SWITCH) { - // Found a switch, check if we're within its scope - if (isset($tokens[$i]['scope_opener']) && isset($tokens[$i]['scope_closer'])) { - if ($stackPtr > $tokens[$i]['scope_opener'] && $stackPtr < $tokens[$i]['scope_closer']) { - return true; - } - } - } + if (empty($tokens[$stackPtr]['conditions'])) { + return false; + } - // Stop if we've gone too far back - if ($stackPtr - $i > 500) { - break; + foreach ($tokens[$stackPtr]['conditions'] as $code) { + if ($code === T_SWITCH) { + return true; } } @@ -514,7 +503,11 @@ protected function isInMultiLineConstruct(File $phpcsFile, int $stackPtr, array } /** - * Check if this could be in a case block by looking for case/default keywords nearby. + * Check if this could be in a case block. + * + * Uses the pre-computed `conditions` map: T_CASE and T_DEFAULT are scope + * openers in phpcs's PHP tokenizer, so a token lexically inside a case + * arm already carries T_CASE / T_DEFAULT in its conditions array. * * @param \PHP_CodeSniffer\Files\File $phpcsFile * @param int $stackPtr @@ -524,17 +517,14 @@ protected function isInMultiLineConstruct(File $phpcsFile, int $stackPtr, array */ protected function couldBeInCaseBlock(File $phpcsFile, int $stackPtr, array $tokens): bool { - // Look backward for case/default keywords (not too far) - // Using 500 to match isInsideSwitchCase() limit, as case blocks can be large - for ($i = $stackPtr - 1; $i >= max(0, $stackPtr - 500); $i--) { - if ($tokens[$i]['code'] === T_CASE || $tokens[$i]['code'] === T_DEFAULT) { - // Found a case/default, likely in a case block + if (empty($tokens[$stackPtr]['conditions'])) { + return false; + } + + foreach ($tokens[$stackPtr]['conditions'] as $code) { + if ($code === T_CASE || $code === T_DEFAULT) { return true; } - // If we hit another control structure, stop - if ($tokens[$i]['code'] === T_IF || $tokens[$i]['code'] === T_WHILE || $tokens[$i]['code'] === T_FOR) { - break; - } } return false; From f97fbbda5af86c9d996098900100da53ac915972 Mon Sep 17 00:00:00 2001 From: mscherer Date: Wed, 13 May 2026 21:40:50 +0200 Subject: [PATCH 2/2] Cache T_FN scope ranges instead of bounded backward scan Initial draft used a 500-token backward scan to find a containing T_FN (arrow function) when the conditions-based fast path didn't match. Codex review correctly flagged that as a behaviour regression: arrow function bodies can legitimately exceed 500 tokens (a long match expression, a deeply nested array as the body, a long method chain), and the bounded scan would then fail to detect that we're inside the arrow and could emit false-positive over-indentation errors on lines the original implementation correctly skipped. Replace the bounded scan with a per-file cache of [scope_opener, scope_closer] ranges for every T_FN token. Built lazily on first use in O(file) and checked in O(arrow_count) per query (typically a handful of arrows per file). No scan limit, no false negatives. Cache stores the token count alongside the ranges so it self- invalidates if the file is re-tokenized inside a phpcbf fix loop. Net effect vs the bounded-scan draft: parallel=1 ResidentsController.php: 16.7s -> 6.5s parallel=16 whole codebase wall: 1m16s -> 27.9s The big extra speedup comes from cheap empty-array iteration in files with no arrow functions at all, which was previously paying for a 500-token scan per indented line. --- .../WhiteSpace/ConsistentIndentSniff.php | 77 +++++++++++++++---- 1 file changed, 63 insertions(+), 14 deletions(-) diff --git a/PhpCollective/Sniffs/WhiteSpace/ConsistentIndentSniff.php b/PhpCollective/Sniffs/WhiteSpace/ConsistentIndentSniff.php index db6d3d5..2f6446d 100644 --- a/PhpCollective/Sniffs/WhiteSpace/ConsistentIndentSniff.php +++ b/PhpCollective/Sniffs/WhiteSpace/ConsistentIndentSniff.php @@ -305,6 +305,25 @@ protected function isValidContinuation(int $prevToken, array $tokens): bool return false; } + /** + * Per-file cache of arrow function (T_FN) scope ranges. + * + * T_FN, unlike T_CLOSURE, isn't propagated into inner tokens' `conditions` + * map by phpcs, so we can't use the O(depth) conditions check for arrow + * functions. Instead we collect every arrow function's [scope_opener, + * scope_closer] range once per file and check each line against this list. + * Arrow function scopes can legitimately be long (think a multi-line + * `match` expression or a deeply nested array literal as the body), so a + * scan window would be incorrect; the cached-ranges approach is both + * unbounded and faster. + * + * The cache stores the token count alongside the scopes so it self- + * invalidates if the file is re-tokenized during a phpcbf fix loop. + * + * @var array}> + */ + private static array $arrowFunctionScopesCache = []; + /** * Check if the current position is inside a closure or arrow function. * @@ -312,10 +331,7 @@ protected function isValidContinuation(int $prevToken, array $tokens): bool * token inside an anonymous function so it contains `T_CLOSURE` rather * than `T_FUNCTION` (see `PHP::processAdditional()`), so the common case * resolves with an O(depth) walk of the (typically <10-entry) conditions - * array. T_FN (arrow functions) is not added to `conditions`, so we still - * scan backwards for it — but arrow function expressions are short by - * language construction (a single expression), so a tight scan window - * suffices. + * array. T_FN is handled via a per-file scope cache built on first use. * * @param \PHP_CodeSniffer\Files\File $phpcsFile * @param int $stackPtr @@ -333,22 +349,55 @@ protected function isInsideClosure(File $phpcsFile, int $stackPtr, array $tokens } } - // Fallback for T_FN: bounded scan, since arrow function expressions - // can't realistically span more than a few hundred tokens. - $limit = max(0, $stackPtr - 500); - for ($i = $stackPtr - 1; $i >= $limit; $i--) { - if ($tokens[$i]['code'] !== T_FN) { - continue; + foreach ($this->getArrowFunctionScopes($phpcsFile, $tokens) as $range) { + if ($stackPtr > $range[0] && $stackPtr < $range[1]) { + return true; } - if (!isset($tokens[$i]['scope_opener'], $tokens[$i]['scope_closer'])) { + } + + return false; + } + + /** + * Build (and cache per file) the list of arrow function scope ranges. + * + * Single O(file) walk on first invocation per file, then O(arrows) per + * lookup. Arrow function counts are typically a handful per file, so the + * per-lookup cost stays small. + * + * @param \PHP_CodeSniffer\Files\File $phpcsFile + * @param array> $tokens + * + * @return array + */ + protected function getArrowFunctionScopes(File $phpcsFile, array $tokens): array + { + $cacheKey = $phpcsFile->getFilename(); + $tokenCount = count($tokens); + if ( + isset(self::$arrowFunctionScopesCache[$cacheKey]) + && self::$arrowFunctionScopesCache[$cacheKey]['count'] === $tokenCount + ) { + return self::$arrowFunctionScopesCache[$cacheKey]['scopes']; + } + + $scopes = []; + foreach ($tokens as $token) { + if ($token['code'] !== T_FN) { continue; } - if ($stackPtr > $tokens[$i]['scope_opener'] && $stackPtr < $tokens[$i]['scope_closer']) { - return true; + if (!isset($token['scope_opener'], $token['scope_closer'])) { + continue; } + $scopes[] = [$token['scope_opener'], $token['scope_closer']]; } - return false; + self::$arrowFunctionScopesCache[$cacheKey] = [ + 'count' => $tokenCount, + 'scopes' => $scopes, + ]; + + return $scopes; } /**