WordPress · JanJakes · Apr 24, 2026 · Apr 24, 2026 · Apr 24, 2026 · Apr 24, 2026
diff --git a/packages/mysql-on-sqlite/src/mysql/class-wp-mysql-lexer.php b/packages/mysql-on-sqlite/src/mysql/class-wp-mysql-lexer.php
@@ -2111,6 +2111,13 @@ class WP_MySQL_Lexer {
 	 */
 	private $sql;
 
+	/**
+	 * Byte length of the SQL payload.
+	 *
+	 * @var int
+	 */
+	private $sql_length;
+
 	/**
 	 * The version of the MySQL server that the SQL payload is intended for.
 	 *
@@ -2189,6 +2196,7 @@ public function __construct(
 		array $sql_modes = array()
 	) {
 		$this->sql           = $sql;
+		$this->sql_length    = strlen( $sql );
 		$this->mysql_version = $mysql_version;
 
 		foreach ( $sql_modes as $sql_mode ) {
@@ -2227,6 +2235,9 @@ public function next_token(): bool {
 			return false;
 		}
 
+		// Skip leading whitespace inline for optimal performance.
+		$this->bytes_already_read += strspn( $this->sql, self::WHITESPACE_MASK, $this->bytes_already_read );
+
 		do {
 			$this->token_starts_at = $this->bytes_already_read;
 			$this->token_type      = $this->read_next_token();
@@ -2284,10 +2295,51 @@ public function get_token(): ?WP_MySQL_Token {
 	 * @return WP_MySQL_Token[] An array of token objects representing the remaining tokens.
 	 */
 	public function remaining_tokens(): array {
-		$tokens = array();
-		while ( true === $this->next_token() ) {
-			$token    = $this->get_token();
-			$tokens[] = $token;
+		$tokens                            = array();
+		$no_backslash_escapes_sql_mode_set = $this->is_sql_mode_active(
+			self::SQL_MODE_NO_BACKSLASH_ESCAPES
+		);
+
+		while ( true ) {
+			// Bail on EOF, or on a null token type once at least one byte has
+			// been consumed (read_next_token() hit invalid input mid-stream).
+			if (
-			if (
+			// Break on file end
+			if (
-			if (
+			// Break on file end
+			if (
+				self::EOF === $this->token_type
+				|| ( null === $this->token_type && $this->bytes_already_read > 0 )
+			) {
+				$this->token_type = null;
+				break;
+			}
+
+			// Skip leading whitespace inline for optimal performance.
+			$this->bytes_already_read += strspn( $this->sql, self::WHITESPACE_MASK, $this->bytes_already_read );
+
+			do {
+				$this->token_starts_at = $this->bytes_already_read;
+				$this->token_type      = $this->read_next_token();
+			} while (
+				self::WHITESPACE === $this->token_type
+				|| self::COMMENT === $this->token_type
+				|| self::MYSQL_COMMENT_START === $this->token_type
+				|| self::MYSQL_COMMENT_END === $this->token_type
+			);
+
+			if ( null === $this->token_type ) {
+				break;
+			}
+
+			$tokens[] = new WP_MySQL_Token(
+				$this->token_type,
+				$this->token_starts_at,
+				$this->bytes_already_read - $this->token_starts_at,
+				$this->sql,
+				$no_backslash_escapes_sql_mode_set
+			);
+
+			if ( self::EOF === $this->token_type ) {
+				$this->token_type = null;
+				break;
+			}
 		}
 		return $tokens;
 	}
@@ -2354,20 +2406,60 @@ private function read_next_token(): ?int {
 		$byte      = $this->sql[ $this->bytes_already_read ] ?? null;
 		$next_byte = $this->sql[ $this->bytes_already_read + 1 ] ?? null;
 
-		if ( "'" === $byte || '"' === $byte || '`' === $byte ) {
+		// A map for a single-byte symbol fast path.
+		static $single_byte_ops = array(
+			'(' => self::OPEN_PAR_SYMBOL,
+			')' => self::CLOSE_PAR_SYMBOL,
+			',' => self::COMMA_SYMBOL,
+			';' => self::SEMICOLON_SYMBOL,
+			'+' => self::PLUS_OPERATOR,
+			'~' => self::BITWISE_NOT_OPERATOR,
+			'%' => self::MOD_OPERATOR,
+			'^' => self::BITWISE_XOR_OPERATOR,
+			'?' => self::PARAM_MARKER,
+			'{' => self::OPEN_CURLY_SYMBOL,
+			'}' => self::CLOSE_CURLY_SYMBOL,
+			'=' => self::EQUAL_OPERATOR,
+		);
+
+		// Fast path for keywords and identifiers.
+		// `$byte > "\x7F"` catches UTF-8 multi-byte starters (U+0080-U+FFFF).
+		// `"'" !== $next_byte` defers x'..', n'..' and similar special
+		// literals to their dedicated branches below; only single quotes
+		// form those, regardless of SQL mode.
+		if (
+			(
+				( $byte >= 'a' && $byte <= 'z' )
+				|| ( $byte >= 'A' && $byte <= 'Z' )
+				|| $byte > "\x7F"
+			)
+			&& "'" !== $next_byte
+		) {
+			$started_at = $this->bytes_already_read;
+			$type       = $this->read_identifier();
+			if ( self::IDENTIFIER === $type ) {
+				// When preceded by a dot, it is always an identifier.
+				if ( $started_at > 0 && '.' === $this->sql[ $started_at - 1 ] ) {
+					$type = self::IDENTIFIER;
+				} else {
+					$type = $this->determine_identifier_or_keyword_type( $this->get_current_token_bytes() );
+				}
+			}
+		} elseif ( null !== $byte && isset( $single_byte_ops[ $byte ] ) ) {
+			// Fast path for single-byte symbols.
+			$this->bytes_already_read += 1;
+			$type                      = $single_byte_ops[ $byte ];
+		} elseif ( "'" === $byte || '"' === $byte || '`' === $byte ) {
 			$type = $this->read_quoted_text();
-		} elseif ( null !== $byte && strspn( $byte, self::DIGIT_MASK ) > 0 ) {
+		} elseif ( null !== $byte && $byte >= '0' && $byte <= '9' ) {
 			$type = $this->read_number();
 		} elseif ( '.' === $byte ) {
-			if ( null !== $next_byte && strspn( $next_byte, self::DIGIT_MASK ) > 0 ) {
+			if ( null !== $next_byte && $next_byte >= '0' && $next_byte <= '9' ) {
 				$type = $this->read_number();
 			} else {
 				$this->bytes_already_read += 1;
 				$type                      = self::DOT_SYMBOL;
 			}
-		} elseif ( '=' === $byte ) {
-			$this->bytes_already_read += 1;
-			$type                      = self::EQUAL_OPERATOR;
 		} elseif ( ':' === $byte ) {
 			$this->bytes_already_read += 1; // Consume the ':'.
 			if ( '=' === $next_byte ) {
@@ -2414,14 +2506,17 @@ private function read_next_token(): ?int {
 			} else {
 				$type = self::LOGICAL_NOT_OPERATOR;
 			}
-		} elseif ( '+' === $byte ) {
-			$this->bytes_already_read += 1;
-			$type                      = self::PLUS_OPERATOR;
 		} elseif ( '-' === $byte ) {
+			$third_byte = $this->sql[ $this->bytes_already_read + 2 ] ?? null;
 			if (
 				'-' === $next_byte
-				&& $this->bytes_already_read + 2 < strlen( $this->sql )
-				&& strspn( $this->sql[ $this->bytes_already_read + 2 ], self::WHITESPACE_MASK ) > 0
+				&& (
+					' ' === $third_byte
+					|| "\t" === $third_byte
+					|| "\n" === $third_byte
+					|| "\r" === $third_byte
+					|| "\f" === $third_byte
+				)
 			) {
 				$type = $this->read_line_comment();
 			} elseif ( '>' === $next_byte ) {
@@ -2466,9 +2561,6 @@ private function read_next_token(): ?int {
 				$this->bytes_already_read += 1;
 				$type                      = self::DIV_OPERATOR;
 			}
-		} elseif ( '%' === $byte ) {
-			$this->bytes_already_read += 1;
-			$type                      = self::MOD_OPERATOR;
 		} elseif ( '&' === $byte ) {
 			$this->bytes_already_read += 1; // Consume the '&'.
 			if ( '&' === $next_byte ) {
@@ -2477,9 +2569,6 @@ private function read_next_token(): ?int {
 			} else {
 				$type = self::BITWISE_AND_OPERATOR;
 			}
-		} elseif ( '^' === $byte ) {
-			$this->bytes_already_read += 1;
-			$type                      = self::BITWISE_XOR_OPERATOR;
 		} elseif ( '|' === $byte ) {
 			$this->bytes_already_read += 1; // Consume the '|'.
 			if ( '|' === $next_byte ) {
@@ -2490,27 +2579,6 @@ private function read_next_token(): ?int {
 			} else {
 				$type = self::BITWISE_OR_OPERATOR;
 			}
-		} elseif ( '~' === $byte ) {
-			$this->bytes_already_read += 1;
-			$type                      = self::BITWISE_NOT_OPERATOR;
-		} elseif ( ',' === $byte ) {
-			$this->bytes_already_read += 1;
-			$type                      = self::COMMA_SYMBOL;
-		} elseif ( ';' === $byte ) {
-			$this->bytes_already_read += 1;
-			$type                      = self::SEMICOLON_SYMBOL;
-		} elseif ( '(' === $byte ) {
-			$this->bytes_already_read += 1;
-			$type                      = self::OPEN_PAR_SYMBOL;
-		} elseif ( ')' === $byte ) {
-			$this->bytes_already_read += 1;
-			$type                      = self::CLOSE_PAR_SYMBOL;
-		} elseif ( '{' === $byte ) {
-			$this->bytes_already_read += 1;
-			$type                      = self::OPEN_CURLY_SYMBOL;
-		} elseif ( '}' === $byte ) {
-			$this->bytes_already_read += 1;
-			$type                      = self::CLOSE_CURLY_SYMBOL;
 		} elseif ( '@' === $byte ) {
 			$this->bytes_already_read += 1; // Consume the '@'.
 
@@ -2534,9 +2602,6 @@ private function read_next_token(): ?int {
 					$type = self::AT_SIGN_SYMBOL;
 				}
 			}
-		} elseif ( '?' === $byte ) {
-			$this->bytes_already_read += 1;
-			$type                      = self::PARAM_MARKER;
 		} elseif ( '\\' === $byte ) {
 			$this->bytes_already_read += 1; // Consume the '\'.
 			if ( 'N' === $next_byte ) {
@@ -2547,7 +2612,13 @@ private function read_next_token(): ?int {
 			}
 		} elseif ( '#' === $byte ) {
 			$type = $this->read_line_comment();
-		} elseif ( null !== $byte && strspn( $byte, self::WHITESPACE_MASK ) > 0 ) {
+		} elseif (
+			' ' === $byte
+			|| "\t" === $byte
+			|| "\n" === $byte
+			|| "\r" === $byte
+			|| "\f" === $byte
+		) {
 			$this->bytes_already_read += strspn( $this->sql, self::WHITESPACE_MASK, $this->bytes_already_read );
 			$type                      = self::WHITESPACE;
 		} elseif ( ( 'x' === $byte || 'X' === $byte || 'b' === $byte || 'B' === $byte ) && "'" === $next_byte ) {
@@ -2561,13 +2632,9 @@ private function read_next_token(): ?int {
 		} elseif ( null === $byte ) {
 			$type = self::EOF;
 		} else {
-			$started_at = $this->bytes_already_read;
-			$type       = $this->read_identifier();
+			$type = $this->read_identifier();
 			if ( self::IDENTIFIER === $type ) {
-				// When preceded by a dot, it is always an identifier.
-				if ( $started_at > 0 && '.' === $this->sql[ $started_at - 1 ] ) {
-					$type = self::IDENTIFIER;
-				} elseif ( '_' === $byte && isset( self::UNDERSCORE_CHARSETS[ strtolower( $this->get_current_token_bytes() ) ] ) ) {
+				if ( '_' === $byte && isset( self::UNDERSCORE_CHARSETS[ strtolower( $this->get_current_token_bytes() ) ] ) ) {
 					$type = self::UNDERSCORE_CHARSET;
 				} else {
 					$type = $this->determine_identifier_or_keyword_type( $this->get_current_token_bytes() );
@@ -2675,7 +2742,7 @@ private function read_number(): ?int {
 				'0' === $byte
 				&& 'x' === $next_byte
 				&& null !== $third_byte
-				&& strspn( $third_byte, self::HEX_DIGIT_MASK ) > 0
+				&& false !== strpos( self::HEX_DIGIT_MASK, $third_byte )
 			)
 			// HEX number in the form of x'N' or X'N'.
 			|| ( ( 'x' === $byte || 'X' === $byte ) && "'" === $next_byte )
@@ -2685,7 +2752,7 @@ private function read_number(): ?int {
 			$this->bytes_already_read += strspn( $this->sql, self::HEX_DIGIT_MASK, $this->bytes_already_read );
 			if ( $is_quoted ) {
 				if (
-					$this->bytes_already_read >= strlen( $this->sql )
+					$this->bytes_already_read >= $this->sql_length
 					|| "'" !== $this->sql[ $this->bytes_already_read ]
 				) {
 					return null; // Invalid input.
@@ -2708,7 +2775,7 @@ private function read_number(): ?int {
 			$this->bytes_already_read += strspn( $this->sql, '01', $this->bytes_already_read );
 			if ( $is_quoted ) {
 				if (
-					$this->bytes_already_read >= strlen( $this->sql )
+					$this->bytes_already_read >= $this->sql_length
 					|| "'" !== $this->sql[ $this->bytes_already_read ]
 				) {
 					return null; // Invalid input.
@@ -2737,11 +2804,12 @@ private function read_number(): ?int {
 				( 'e' === $byte || 'E' === $byte )
 				&& null !== $next_byte
 				&& (
-					strspn( $next_byte, self::DIGIT_MASK ) > 0
+					( $next_byte >= '0' && $next_byte <= '9' )
 					|| (
 						( '+' === $next_byte || '-' === $next_byte )
-						&& $this->bytes_already_read + 2 < strlen( $this->sql )
-						&& strspn( $this->sql[ $this->bytes_already_read + 2 ], self::DIGIT_MASK ) > 0
+						&& $this->bytes_already_read + 2 < $this->sql_length
+						&& $this->sql[ $this->bytes_already_read + 2 ] >= '0'
+						&& $this->sql[ $this->bytes_already_read + 2 ] <= '9'
 					)
 				);
 			if ( $has_exponent ) {
@@ -2838,12 +2906,11 @@ private function read_quoted_text(): ?int {
 		// in which case the escape sequence is consumed and the loop continues.
 		$at = $this->bytes_already_read;
 		while ( true ) {
-			$at += strcspn( $this->sql, $quote, $at );
-
-			// Unclosed string - unexpected EOF.
-			if ( ( $this->sql[ $at ] ?? null ) !== $quote ) {
+			$quote_at = strpos( $this->sql, $quote, $at );
+			if ( false === $quote_at ) {
 				return null; // Invalid input.
 			}
+			$at = $quote_at;
 
 			/*
 			 * By default, quotes can be escaped with a "\".
@@ -2853,9 +2920,17 @@ private function read_quoted_text(): ?int {
 			 * The quote is escaped only when the number of preceding backslashes
 			 * is odd - "\" is an escape sequence, "\\" is an escaped backslash,
 			 * "\\\" is an escaped backslash and an escape sequence, and so on.
+			 *
+			 * The `($at - $i - 1) >= 0` guard prevents PHP's negative-string-
+			 * offset wraparound (PHP 7.1+) when the closing-quote candidate
+			 * sits at the very start of the input. The `?? null` covers
+			 * positive out-of-range indexes belt-and-suspenders.
 			 */
 			if ( ! $no_backslash_escapes ) {
-				for ( $i = 0; ( $at - $i - 1 ) >= 0 && '\\' === $this->sql[ $at - $i - 1 ]; $i += 1 );
+				$i = 0;
+				while ( ( $at - $i - 1 ) >= 0 && '\\' === ( $this->sql[ $at - $i - 1 ] ?? null ) ) {
+					$i += 1;
+				}
 				if ( 1 === $i % 2 ) {
 					$at += 1;
 					continue;
@@ -2920,17 +2995,11 @@ private function read_mysql_comment(): int {
 	}
 
 	private function read_comment_content(): void {
-		while ( true ) {
-			$this->bytes_already_read += strcspn( $this->sql, '*', $this->bytes_already_read );
-			$this->bytes_already_read += 1; // Consume the '*'.
-			$byte                      = $this->sql[ $this->bytes_already_read ] ?? null;
-			if ( null === $byte ) {
-				break;
-			}
-			if ( '/' === $byte ) {
-				$this->bytes_already_read += 1; // Consume the '/'.
-				break;
-			}
+		$comment_end = strpos( $this->sql, '*/', $this->bytes_already_read );
+		if ( false === $comment_end ) {
+			$this->bytes_already_read = $this->sql_length;
+		} else {
+			$this->bytes_already_read = $comment_end + 2;
 		}
 	}
 

diff --git a/packages/mysql-on-sqlite/src/mysql/class-wp-mysql-parser.php b/packages/mysql-on-sqlite/src/mysql/class-wp-mysql-parser.php
@@ -14,6 +14,10 @@ class WP_MySQL_Parser extends WP_Parser {
 	 * @param array<WP_Parser_Token> $tokens The parser tokens.
 	 */
 	public function reset_tokens( array $tokens ): void {
+		$this->token_count = count( $tokens );
+		// Maintain the end-of-input sentinel that parse_recursive() relies on.
+		// See WP_Parser::__construct for the invariants.
+		$tokens[]          = new WP_Parser_Token( WP_Parser_Grammar::EMPTY_RULE_ID, 0, 0, '' );
 		$this->tokens      = $tokens;
 		$this->position    = 0;
 		$this->current_ast = null;
@@ -40,7 +44,7 @@ public function reset_tokens( array $tokens ): void {
 	 * @return bool Whether a query was successfully parsed.
 	 */
 	public function next_query(): bool {
-		if ( $this->position >= count( $this->tokens ) ) {
+		if ( $this->position >= $this->token_count ) {
 			return false;
 		}
 		$this->current_ast = $this->parse();