From 3ad9db38fe90aadae8e8c83acac7b61dcc229bd0 Mon Sep 17 00:00:00 2001
From: Earlopain <14981592+Earlopain@users.noreply.github.com>
Date: Mon, 20 Apr 2026 19:16:56 +0200
Subject: [PATCH 1/3] Optimize ripper bounds

Basically a port of https://github.com/ruby/ruby/commit/c45f781771314a71856c9b348c640ba532f54349 into ruby

It's quite effective at ~97% hit rate for me.
Speeds it up from ~6.77x slower to only 4.07x slower.

For the lexer `on_sp` it also gives a bit of an improvement:
1.04x slower to 1.10x faster

I guess the class may be universally useful but for now I just made it nodoc.
---
 lib/prism/lex_compat.rb              | 11 ++++-
 lib/prism/parse_result.rb            |  4 +-
 lib/prism/translation/ripper.rb      | 69 ++++++++++++++++++++++++++--
 rbi/generated/prism/lex_compat.rbi   |  8 ++++
 rbi/generated/prism/parse_result.rbi |  4 +-
 sig/generated/prism/lex_compat.rbs   |  6 +++
 sig/generated/prism/parse_result.rbs |  4 +-
 7 files changed, 91 insertions(+), 15 deletions(-)

diff --git a/lib/prism/lex_compat.rb b/lib/prism/lex_compat.rb
index e1b04fc6ce..7aacec037d 100644
--- a/lib/prism/lex_compat.rb
+++ b/lib/prism/lex_compat.rb
@@ -23,6 +23,12 @@ module Prism
   #            def self.[]: (Integer value) -> State
   #          end
   #        end
+  #
+  #        class LineAndColumnCache
+  #          def initialize: (Source source) -> void
+  #
+  #          def line_and_column: (Integer byte_offset) -> [Integer, Integer]
+  #        end
   #      end
   #    end
 
@@ -837,6 +843,8 @@ def post_process_tokens(tokens, source, data_loc, bom, eof_token)
       prev_token_state = Translation::Ripper::Lexer::State[Translation::Ripper::EXPR_BEG]
       prev_token_end = bom ? 3 : 0
 
+      cache = Translation::Ripper::LineAndColumnCache.new(source)
+
       tokens.each do |token|
         # Skip missing heredoc ends.
         next if token[1] == :on_heredoc_end && token[2] == ""
@@ -851,8 +859,7 @@ def post_process_tokens(tokens, source, data_loc, bom, eof_token)
 
         if start_offset > prev_token_end
           sp_value = source.slice(prev_token_end, start_offset - prev_token_end)
-          sp_line = source.line(prev_token_end)
-          sp_column = source.column(prev_token_end)
+          sp_line, sp_column = cache.line_and_column(prev_token_end)
           # Ripper reports columns on line 1 without counting the BOM
           sp_column -= 3 if sp_line == 1 && bom
           continuation_index = sp_value.byteindex("\\")
diff --git a/lib/prism/parse_result.rb b/lib/prism/parse_result.rb
index 4f7bcf07d6..e37a8cd843 100644
--- a/lib/prism/parse_result.rb
+++ b/lib/prism/parse_result.rb
@@ -223,9 +223,7 @@ def deep_freeze
       freeze
     end
 
-    private
-
-    # Binary search through the offsets to find the line number for the given
+    # Binary search through the offsets to find the index for the given
     # byte offset.
     #--
     #: (Integer byte_offset) -> Integer
diff --git a/lib/prism/translation/ripper.rb b/lib/prism/translation/ripper.rb
index b066f3e3ac..d5dd760e58 100644
--- a/lib/prism/translation/ripper.rb
+++ b/lib/prism/translation/ripper.rb
@@ -446,6 +446,64 @@ def self.sexp_raw(src, filename = "-", lineno = 1, raise_errors: false)
       autoload :SexpBuilder, "prism/translation/ripper/sexp"
       autoload :SexpBuilderPP, "prism/translation/ripper/sexp"
 
+      # Provides optimized access to line and column information.
+      # Ripper bounds are mostly accessed in a linear fashion, so
+      # we can try a linear scan first and fall back to binary search.
+      class LineAndColumnCache # :nodoc:
+        # How many should it look ahead/behind before falling back to binary searching.
+        WINDOW = 8
+        private_constant :WINDOW
+
+        #: (Source source) -> void
+        def initialize(source)
+          @source = source
+          @offsets = source.offsets
+          @hint = 0
+        end
+
+        #: (Integer byte_offset) -> [Integer, Integer]
+        def line_and_column(byte_offset)
+          @hint = new_hint(byte_offset) || @source.find_line(byte_offset)
+          return [@hint + @source.start_line, byte_offset - @offsets[@hint]]
+        end
+
+        private
+
+        def new_hint(byte_offset)
+          if @offsets[@hint] <= byte_offset
+            # Same line?
+            if (@hint + 1 >= @offsets.size || @offsets[@hint + 1] > byte_offset)
+              return @hint
+            end
+
+            # Scan forwards
+            limit = [@hint + WINDOW + 1, @offsets.size].min
+            idx = @hint + 1
+            while idx < limit
+              if @offsets[idx] > byte_offset
+                return idx - 1
+              end
+              if @offsets[idx] == byte_offset
+                return idx
+              end
+              idx += 1
+            end
+          else
+            # Scan backwards
+            limit = @hint > WINDOW ? @hint - WINDOW : 0
+            idx = @hint
+            while idx >= limit + 1
+              if @offsets[idx - 1] <= byte_offset
+                return idx - 1
+              end
+              idx -= 1
+            end
+          end
+
+          nil
+        end
+      end
+
       # :stopdoc:
       # This is not part of the public API but used by some gems.
 
@@ -489,6 +547,7 @@ def initialize(source, filename = "(ripper)", lineno = 1)
         @lineno = lineno
         @column = 0
         @result = nil
+        @line_and_column_cache = nil
       end
 
       ##########################################################################
@@ -4014,6 +4073,10 @@ def result
         @result ||= Prism.parse(source, partial_script: true, version: "current")
       end
 
+      def line_and_column_cache
+        @line_and_column_cache ||= LineAndColumnCache.new(result.source)
+      end
+
       ##########################################################################
       # Helpers
       ##########################################################################
@@ -4114,12 +4177,8 @@ def visit_write_value(node)
 
       # This method is responsible for updating lineno and column information
       # to reflect the current node.
-      #
-      # This method could be drastically improved with some caching on the start
-      # of every line, but for now it's good enough.
       def bounds(location)
-        @lineno = location.start_line
-        @column = location.start_column
+        @lineno, @column = line_and_column_cache.line_and_column(location.start_offset)
       end
 
       # :startdoc:
diff --git a/rbi/generated/prism/lex_compat.rbi b/rbi/generated/prism/lex_compat.rbi
index ca479b7225..b1f72a815b 100644
--- a/rbi/generated/prism/lex_compat.rbi
+++ b/rbi/generated/prism/lex_compat.rbi
@@ -20,6 +20,14 @@ module Prism
           def self.[](value); end
         end
       end
+
+      class LineAndColumnCache
+        sig { params(source: Source).void }
+        def initialize(source); end
+
+        sig { params(byte_offset: Integer).returns([Integer, Integer]) }
+        def line_and_column(byte_offset); end
+      end
     end
   end
 
diff --git a/rbi/generated/prism/parse_result.rbi b/rbi/generated/prism/parse_result.rbi
index 4d065b5be1..44fbf42c96 100644
--- a/rbi/generated/prism/parse_result.rbi
+++ b/rbi/generated/prism/parse_result.rbi
@@ -123,10 +123,10 @@ module Prism
     sig { void }
     def deep_freeze; end
 
-    # Binary search through the offsets to find the line number for the given
+    # Binary search through the offsets to find the index for the given
     # byte offset.
     sig { params(byte_offset: Integer).returns(Integer) }
-    private def find_line(byte_offset); end
+    def find_line(byte_offset); end
   end
 
   # A cache that can be used to quickly compute code unit offsets from byte
diff --git a/sig/generated/prism/lex_compat.rbs b/sig/generated/prism/lex_compat.rbs
index 707a96b9a8..1712955ff8 100644
--- a/sig/generated/prism/lex_compat.rbs
+++ b/sig/generated/prism/lex_compat.rbs
@@ -19,6 +19,12 @@ module Prism
           def self.[]: (Integer value) -> State
         end
       end
+
+      class LineAndColumnCache
+        def initialize: (Source source) -> void
+
+        def line_and_column: (Integer byte_offset) -> [ Integer, Integer ]
+      end
     end
   end
 
diff --git a/sig/generated/prism/parse_result.rbs b/sig/generated/prism/parse_result.rbs
index f005f17375..da9c7b9636 100644
--- a/sig/generated/prism/parse_result.rbs
+++ b/sig/generated/prism/parse_result.rbs
@@ -146,9 +146,7 @@ module Prism
     # : () -> void
     def deep_freeze: () -> void
 
-    private
-
-    # Binary search through the offsets to find the line number for the given
+    # Binary search through the offsets to find the index for the given
     # byte offset.
     # --
     # : (Integer byte_offset) -> Integer

From 9e93bd6bd523e43786e574eaf352b24e594dac1a Mon Sep 17 00:00:00 2001
From: Earlopain <14981592+Earlopain@users.noreply.github.com>
Date: Mon, 20 Apr 2026 19:26:30 +0200
Subject: [PATCH 2/3] Freeze the parse result for the ripper translator

It's a small, somewhat hacky performance boost. Locations are lazy, by freezing the
result they don't have to be pack/unpacked redundantly.
This gives about a 4% speed boost.

Other changes are to not modify the frozen AST
---
 lib/prism/translation/ripper.rb | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/lib/prism/translation/ripper.rb b/lib/prism/translation/ripper.rb
index d5dd760e58..faf2865434 100644
--- a/lib/prism/translation/ripper.rb
+++ b/lib/prism/translation/ripper.rb
@@ -1007,7 +1007,7 @@ def visit_begin_node(node)
             on_stmts_add(on_stmts_new, on_void_stmt)
           else
             body = node.statements.body
-            body.unshift(nil) if void_stmt?(location, node.statements.body[0].location, allow_newline)
+            body = [nil, *body] if void_stmt?(location, node.statements.body[0].location, allow_newline)
 
             bounds(node.statements.location)
             visit_statements_node_body(body)
@@ -1024,7 +1024,7 @@ def visit_begin_node(node)
                 [nil]
               else
                 body = else_clause_node.statements.body
-                body.unshift(nil) if void_stmt?(else_clause_node.else_keyword_loc, else_clause_node.statements.body[0].location, allow_newline)
+                body = [nil, *body] if void_stmt?(else_clause_node.else_keyword_loc, else_clause_node.statements.body[0].location, allow_newline)
                 body
               end
 
@@ -1046,7 +1046,7 @@ def visit_begin_node(node)
           on_bodystmt(visit_statements_node_body([nil]), nil, nil, nil)
         when StatementsNode
           body = [*node.body]
-          body.unshift(nil) if void_stmt?(location, body[0].location, allow_newline)
+          body = [nil, *body] if void_stmt?(location, body[0].location, allow_newline)
           stmts = visit_statements_node_body(body)
 
           bounds(node.body.first.location)
@@ -1095,7 +1095,7 @@ def visit_block_node(node)
             braces ? stmts : on_bodystmt(stmts, nil, nil, nil)
           when StatementsNode
             stmts = node.body.body
-            stmts.unshift(nil) if void_stmt?(node.parameters&.location || node.opening_loc, node.body.location, false)
+            stmts = [nil, *stmts] if void_stmt?(node.parameters&.location || node.opening_loc, node.body.location, false)
             stmts = visit_statements_node_body(stmts)
 
             bounds(node.body.location)
@@ -2022,7 +2022,7 @@ def visit_else_node(node)
             [nil]
           else
             body = node.statements.body
-            body.unshift(nil) if void_stmt?(node.else_keyword_loc, node.statements.body[0].location, false)
+            body = [nil, *body] if void_stmt?(node.else_keyword_loc, node.statements.body[0].location, false)
             body
           end
 
@@ -2077,7 +2077,7 @@ def visit_ensure_node(node)
             [nil]
           else
             body = node.statements.body
-            body.unshift(nil) if void_stmt?(node.ensure_keyword_loc, body[0].location, false)
+            body = [nil, *body] if void_stmt?(node.ensure_keyword_loc, body[0].location, false)
             body
           end
 
@@ -2860,7 +2860,7 @@ def visit_lambda_node(node)
             braces ? stmts : on_bodystmt(stmts, nil, nil, nil)
           when StatementsNode
             stmts = node.body.body
-            stmts.unshift(nil) if void_stmt?(node.parameters&.location || node.opening_loc, node.body.location, false)
+            stmts = [nil, *stmts] if void_stmt?(node.parameters&.location || node.opening_loc, node.body.location, false)
             stmts = visit_statements_node_body(stmts)
 
             bounds(node.body.location)
@@ -3354,7 +3354,7 @@ def visit_pre_execution_node(node)
       # The top-level program node.
       def visit_program_node(node)
         body = node.statements.body
-        body << nil if body.empty?
+        body = [nil] if body.empty?
         statements = visit_statements_node_body(body)
 
         bounds(node.location)
@@ -4070,7 +4070,7 @@ def visit_yield_node(node)
 
       # Lazily initialize the parse result.
       def result
-        @result ||= Prism.parse(source, partial_script: true, version: "current")
+        @result ||= Prism.parse(source, partial_script: true, version: "current", freeze: true)
       end
 
       def line_and_column_cache

From d611aa9d1161a3da3a109972d18784304476f5cc Mon Sep 17 00:00:00 2001
From: Earlopain <14981592+Earlopain@users.noreply.github.com>
Date: Tue, 21 Apr 2026 19:38:06 +0200
Subject: [PATCH 3/3] Optimize ripper `visit_token`

It was showing up in profiles.

So:
* Don't splat `KEYWORDS` (also did the same for `BINARY_OPERATORS`)
* Use `start_with?` if possible

Overall gives a ~5% speed boost
---
 lib/prism/translation/ripper.rb | 25 ++++++++++++-------------
 1 file changed, 12 insertions(+), 13 deletions(-)

diff --git a/lib/prism/translation/ripper.rb b/lib/prism/translation/ripper.rb
index faf2865434..0f5608b1ec 100644
--- a/lib/prism/translation/ripper.rb
+++ b/lib/prism/translation/ripper.rb
@@ -347,7 +347,7 @@ def self.coerce_source(source) # :nodoc:
         "__ENCODING__",
         "__FILE__",
         "__LINE__"
-      ]
+      ].to_set
 
       # A list of all of the Ruby binary operators.
       BINARY_OPERATORS = [
@@ -372,7 +372,7 @@ def self.coerce_source(source) # :nodoc:
         :/,
         :*,
         :**
-      ]
+      ].to_set
 
       private_constant :KEYWORDS, :BINARY_OPERATORS
 
@@ -1295,7 +1295,7 @@ def visit_call_node(node)
               bounds(node.location)
               on_unary(:!, receiver)
             end
-          when *BINARY_OPERATORS
+          when BINARY_OPERATORS
             receiver = visit(node.receiver)
 
             bounds(node.message_loc)
@@ -4095,24 +4095,23 @@ def void_stmt?(left, right, allow_newline)
       # Visit the string content of a particular node. This method is used to
       # split into the various token types.
       def visit_token(token, allow_keywords = true)
-        case token
-        when "."
+        if token == "."
           on_period(token)
-        when "`"
+        elsif token == "`"
           on_backtick(token)
-        when *(allow_keywords ? KEYWORDS : [])
+        elsif allow_keywords && KEYWORDS.include?(token)
           on_kw(token)
-        when /^_/
+        elsif token.start_with?("_")
           on_ident(token)
-        when /^[[:upper:]]\w*$/
+        elsif token.match?(/^[[:upper:]]\w*$/)
           on_const(token)
-        when /^@@/
+        elsif token.start_with?("@@")
           on_cvar(token)
-        when /^@/
+        elsif token.start_with?("@")
           on_ivar(token)
-        when /^\$/
+        elsif token.start_with?("$")
           on_gvar(token)
-        when /^[[:punct:]]/
+        elsif token.match?(/^[[:punct:]]/)
           on_op(token)
         else
           on_ident(token)