diff --git a/Gemfile b/Gemfile index e584ffa4..d4f9f834 100644 --- a/Gemfile +++ b/Gemfile @@ -6,6 +6,7 @@ gemspec # Development dependencies gem "debug", ">= 1.11.1" gem "guard-rspec", "~> 4.0" +gem "prism", "~> 1.2" gem "rake", "~> 13.0" gem "rspec", "~> 3.0" gem "rspec_junit_formatter", "~> 0.6.0" diff --git a/lib/rufo.rb b/lib/rufo.rb index 24d94b06..b7044f49 100644 --- a/lib/rufo.rb +++ b/lib/rufo.rb @@ -14,7 +14,13 @@ def initialize(message, lineno) end def self.format(code, **options) - Formatter.format(code, **options) + engine = options.delete(:engine) + case engine + when :prism + PrismFormatter.format(code, **options) + else + Formatter.format(code, **options) + end end end @@ -25,6 +31,7 @@ def self.format(code, **options) require_relative "rufo/parser" require_relative "rufo/formatter" require_relative "rufo/erb_formatter" +require_relative "rufo/prism_formatter" require_relative "rufo/version" require_relative "rufo/file_list" require_relative "rufo/file_finder" diff --git a/lib/rufo/prism_formatter.rb b/lib/rufo/prism_formatter.rb new file mode 100644 index 00000000..a728c60d --- /dev/null +++ b/lib/rufo/prism_formatter.rb @@ -0,0 +1,358 @@ +# frozen_string_literal: true + +require "prism" + +class Rufo::PrismFormatter + include Rufo::Settings + + INDENT_SIZE = 2 + + # Prism reports some semantic-validity issues with :syntax level even + # though it still builds a complete AST. The formatter can handle these + # inputs (matching the existing Ripper-based formatter, which formats + # syntactically-well-formed-but-semantically-invalid code). + NON_FATAL_ERROR_TYPES = [ + :invalid_block_exit, # redo / break / next outside a loop + :invalid_retry_without_rescue, # retry outside rescue + ].freeze + + def self.format(code, **options) + formatter = new(code, **options) + formatter.format + formatter.result + end + + def initialize(code, **options) + @code = code + @parse_result = Prism.parse(code) + fatal_errors = @parse_result.errors.reject { |e| NON_FATAL_ERROR_TYPES.include?(e.type) } + unless fatal_errors.empty? + error = fatal_errors.first + raise Rufo::SyntaxError.new(error.message, error.location.start_line) + end + + init_settings(options) + end + + def format + visitor = FormatVisitor.new(@code, @parse_result.comments) + @parse_result.value.accept(visitor) + visitor.finish + @output = visitor.output + + @output.chomp! if @output.end_with?("\n\n") + @output.lstrip! + @output << "\n" unless @output.end_with?("\n") + end + + def result + @output + end + + class FormatVisitor < Prism::Visitor + attr_reader :output + + def initialize(code, comments) + super() + @code = code + @output = +"" + @comments = comments + @comment_index = 0 + @source_offset = 0 + @indent = 0 + @column = 0 + @indent_pending = true + @pending_heredocs = [] + end + + def finish + consume_source_up_to(@code.length) + flush_pending_heredocs + end + + def visit_nil_node(node) + write_code_at(node.location) + end + + def visit_true_node(node) + write_code_at(node.location) + end + + def visit_false_node(node) + write_code_at(node.location) + end + + def visit_integer_node(node) + write_code_at(node.location) + end + + def visit_float_node(node) + write_code_at(node.location) + end + + def visit_rational_node(node) + write_code_at(node.location) + end + + def visit_imaginary_node(node) + write_code_at(node.location) + end + + def visit_symbol_node(node) + write_code_at(node.location) + end + + def visit_interpolated_symbol_node(node) + write_code_at(node.location) + end + + def visit_string_node(node) + if heredoc?(node) + write_code_at(node.opening_loc) + @pending_heredocs << node + @source_offset = node.closing_loc.end_offset + else + write_code_at(node.location) + end + end + + def visit_x_string_node(node) + write_code_at(node.location) + end + + def visit_regular_expression_node(node) + write_code_at(node.location) + end + + def visit_interpolated_regular_expression_node(node) + write_code_at(node.location) + end + + def visit_class_variable_read_node(node) + write_code_at(node.location) + end + + def visit_global_variable_read_node(node) + write_code_at(node.location) + end + + def visit_numbered_reference_read_node(node) + write_code_at(node.location) + end + + def visit_local_variable_read_node(node) + write_code_at(node.location) + end + + def visit_local_variable_write_node(node) + consume_source_up_to(node.location.start_offset) + write(node.name.to_s) + write(" = ") + node.value.accept(self) + end + + def visit_hash_node(node) + write_code_at(node.location) + end + + def visit_instance_variable_read_node(node) + write_code_at(node.location) + end + + def visit_undef_node(node) + consume_source_up_to(node.location.start_offset) + write("undef ") + node.names.each_with_index do |name, i| + if i > 0 + write(", ") + end + name.accept(self) + end + end + + def visit_redo_node(node) + write_code_at(node.location) + end + + def visit_retry_node(node) + write_code_at(node.location) + end + + def visit_alias_method_node(node) + visit_alias(node) + end + + def visit_alias_global_variable_node(node) + visit_alias(node) + end + + def visit_parentheses_node(node) + write_code_at(node.opening_loc) + node.body.accept(self) + write_code_at(node.closing_loc) + end + + def visit_call_node(node) + if node.receiver && node.call_operator_loc + node.receiver.accept(self) + write_code_at(node.call_operator_loc) + write_code_at(node.message_loc) + elsif node.receiver + # Unary prefix operator (e.g. -x, +x): message before receiver. + write_code_at(node.message_loc) + node.receiver.accept(self) + else + write_code_at(node.message_loc) + end + end + + def visit_if_node(node) + consume_source_up_to(node.location.start_offset) + write_code_at(node.if_keyword_loc) + write(" ") + node.predicate.accept(self) + write_newline + indent do + node.statements&.accept(self) + end + write_newline_unless_pending + write_code_at(node.end_keyword_loc) + end + + def visit_unless_node(node) + consume_source_up_to(node.location.start_offset) + write_code_at(node.keyword_loc) + write(" ") + node.predicate.accept(self) + write_newline + indent do + node.statements&.accept(self) + end + write_newline_unless_pending + node.else_clause&.accept(self) + write_code_at(node.end_keyword_loc) + end + + def visit_else_node(node) + write_code_at(node.else_keyword_loc) + write_newline + indent do + node.statements&.accept(self) + end + write_newline_unless_pending + end + + def visit_statements_node(node) + node.body.each_with_index do |child, i| + consume_source_up_to(child.location.start_offset) + write_newline if i > 0 && !@indent_pending + child.accept(self) + end + end + + private + + def visit_alias(node) + consume_source_up_to(node.location.start_offset) + write_code_at(node.keyword_loc) + write(" ") + node.new_name.accept(self) + write(" ") + node.old_name.accept(self) + end + + # Append `value` to the output. Emits the pending indent first if we are + # at the start of a line. `value` is assumed not to contain "\n" — use + # `write_newline` to end a line. + def write(value) + return if value.empty? + if @indent_pending + pad = " " * @indent + @output << pad + @column += pad.length + @indent_pending = false + end + @output << value + @column += value.length + end + + def write_newline + @output << "\n" + @column = 0 + @indent_pending = true + flush_pending_heredocs + end + + def write_newline_unless_pending + write_newline unless @indent_pending + end + + def write_code_at(location) + consume_source_up_to(location.start_offset) + write(@code[location.start_offset...location.end_offset]) + @source_offset = location.end_offset + end + + def indent + @indent += Rufo::PrismFormatter::INDENT_SIZE + yield + ensure + @indent -= Rufo::PrismFormatter::INDENT_SIZE + end + + # Drain comments that occur before `offset` and advance the source cursor. + # `@source_offset` is the position past the last source bytes already + # accounted for in `@output` (either copied verbatim, or skipped as + # discardable whitespace between AST nodes). + def consume_source_up_to(offset) + return if offset <= @source_offset + while @comment_index < @comments.size && @comments[@comment_index].location.start_offset < offset + emit_comment(@comments[@comment_index]) + @comment_index += 1 + end + @source_offset = offset if offset > @source_offset + end + + def heredoc?(node) + node.opening_loc&.slice&.start_with?("<<") + end + + # Append the body and closing of pending heredocs after the current + # output line. Prism keeps the opening, body, and closing in separate + # source locations because they are interleaved with whatever follows the + # opening on the same source line. + def flush_pending_heredocs + return if @pending_heredocs.empty? + @output << "\n" unless @output.empty? || @output.end_with?("\n") + heredocs = @pending_heredocs + @pending_heredocs = [] + heredocs.each do |heredoc| + @output << @code[heredoc.content_loc.start_offset...heredoc.content_loc.end_offset] + @output << @code[heredoc.closing_loc.start_offset...heredoc.closing_loc.end_offset] + end + @column = 0 + @indent_pending = true + end + + def emit_comment(comment) + line_start = @code.rindex("\n", comment.location.start_offset - 1) + line_start = line_start ? line_start + 1 : 0 + before_on_line = @code[line_start...comment.location.start_offset] + + if before_on_line.match?(/\A\s*\z/) + # Standalone comment — emit on its own line. + write_newline_unless_pending + write(comment.slice) + write_newline + else + # Trailing comment — preserve the spacing between the preceding code + # and the comment as it appears in the source. + gap_start = [@source_offset, line_start].max + write(@code[gap_start...comment.location.start_offset]) + write(comment.slice) + write_newline + end + @source_offset = comment.location.end_offset + end + end +end diff --git a/spec/lib/rufo/prism_formatter_source_specs/alias.rb.spec b/spec/lib/rufo/prism_formatter_source_specs/alias.rb.spec new file mode 100644 index 00000000..2301cc5a --- /dev/null +++ b/spec/lib/rufo/prism_formatter_source_specs/alias.rb.spec @@ -0,0 +1,27 @@ +#~# ORIGINAL + +alias foo bar + +#~# EXPECTED +alias foo bar + +#~# ORIGINAL + +alias :foo :bar + +#~# EXPECTED +alias :foo :bar + +#~# ORIGINAL + +alias store []= + +#~# EXPECTED +alias store []= + +#~# ORIGINAL + +alias $foo $bar + +#~# EXPECTED +alias $foo $bar diff --git a/spec/lib/rufo/prism_formatter_source_specs/backtick_strings.rb.spec b/spec/lib/rufo/prism_formatter_source_specs/backtick_strings.rb.spec new file mode 100644 index 00000000..b485ea2f --- /dev/null +++ b/spec/lib/rufo/prism_formatter_source_specs/backtick_strings.rb.spec @@ -0,0 +1,13 @@ +#~# ORIGINAL + +`cat meow` + +#~# EXPECTED +`cat meow` + +#~# ORIGINAL + + %x( cat meow ) + +#~# EXPECTED +%x( cat meow ) diff --git a/spec/lib/rufo/prism_formatter_source_specs/booleans.rb.spec b/spec/lib/rufo/prism_formatter_source_specs/booleans.rb.spec new file mode 100644 index 00000000..218d87dc --- /dev/null +++ b/spec/lib/rufo/prism_formatter_source_specs/booleans.rb.spec @@ -0,0 +1,13 @@ +#~# ORIGINAL false + +false + +#~# EXPECTED +false + +#~# ORIGINAL true + +true + +#~# EXPECTED +true diff --git a/spec/lib/rufo/prism_formatter_source_specs/calls_with_receiver.rb.spec b/spec/lib/rufo/prism_formatter_source_specs/calls_with_receiver.rb.spec new file mode 100644 index 00000000..5c4b2f11 --- /dev/null +++ b/spec/lib/rufo/prism_formatter_source_specs/calls_with_receiver.rb.spec @@ -0,0 +1,20 @@ +#~# ORIGINAL simple dot + +a.foo + +#~# EXPECTED +a.foo + +#~# ORIGINAL chained dots collapse spaces + +foo . bar . baz + +#~# EXPECTED +foo.bar.baz + +#~# ORIGINAL safe navigation + +a&.foo + +#~# EXPECTED +a&.foo diff --git a/spec/lib/rufo/prism_formatter_source_specs/chars.rb.spec b/spec/lib/rufo/prism_formatter_source_specs/chars.rb.spec new file mode 100644 index 00000000..839f899d --- /dev/null +++ b/spec/lib/rufo/prism_formatter_source_specs/chars.rb.spec @@ -0,0 +1,6 @@ +#~# ORIGINAL char + +?a + +#~# EXPECTED +?a diff --git a/spec/lib/rufo/prism_formatter_source_specs/class_variables.rb.spec b/spec/lib/rufo/prism_formatter_source_specs/class_variables.rb.spec new file mode 100644 index 00000000..90feed28 --- /dev/null +++ b/spec/lib/rufo/prism_formatter_source_specs/class_variables.rb.spec @@ -0,0 +1,6 @@ +#~# ORIGINAL + +@@foo + +#~# EXPECTED +@@foo diff --git a/spec/lib/rufo/prism_formatter_source_specs/comments.rb.spec b/spec/lib/rufo/prism_formatter_source_specs/comments.rb.spec new file mode 100644 index 00000000..a40e18c3 --- /dev/null +++ b/spec/lib/rufo/prism_formatter_source_specs/comments.rb.spec @@ -0,0 +1,40 @@ +#~# ORIGINAL standalone single + +# foo + +#~# EXPECTED +# foo + +#~# ORIGINAL two consecutive standalones + +# foo +# bar + +#~# EXPECTED +# foo +# bar + +#~# ORIGINAL trailing after integer + +1 # foo + +#~# EXPECTED +1 # foo + +#~# ORIGINAL standalone before code + +# a +1 + +#~# EXPECTED +# a +1 + +#~# ORIGINAL trailing then standalone + +1 # a +# b + +#~# EXPECTED +1 # a +# b diff --git a/spec/lib/rufo/prism_formatter_source_specs/floats.rb.spec b/spec/lib/rufo/prism_formatter_source_specs/floats.rb.spec new file mode 100644 index 00000000..c837f0ea --- /dev/null +++ b/spec/lib/rufo/prism_formatter_source_specs/floats.rb.spec @@ -0,0 +1,13 @@ +#~# ORIGINAL + +12.34 + +#~# EXPECTED +12.34 + +#~# ORIGINAL + +12.34e-10 + +#~# EXPECTED +12.34e-10 diff --git a/spec/lib/rufo/prism_formatter_source_specs/heredoc.rb.spec b/spec/lib/rufo/prism_formatter_source_specs/heredoc.rb.spec new file mode 100644 index 00000000..28ae8bb4 --- /dev/null +++ b/spec/lib/rufo/prism_formatter_source_specs/heredoc.rb.spec @@ -0,0 +1,71 @@ +#~# ORIGINAL bare heredoc + +< 2 } + +#~# EXPECTED +{ 1 => 2 } + diff --git a/spec/lib/rufo/prism_formatter_source_specs/special_global_variables.rb.spec b/spec/lib/rufo/prism_formatter_source_specs/special_global_variables.rb.spec new file mode 100644 index 00000000..6d280ac3 --- /dev/null +++ b/spec/lib/rufo/prism_formatter_source_specs/special_global_variables.rb.spec @@ -0,0 +1,27 @@ +#~# ORIGINAL + +$~ + +#~# EXPECTED +$~ + +#~# ORIGINAL + +$1 + +#~# EXPECTED +$1 + +#~# ORIGINAL + +$! + +#~# EXPECTED +$! + +#~# ORIGINAL + +$@ + +#~# EXPECTED +$@ diff --git a/spec/lib/rufo/prism_formatter_source_specs/symbol_literals.rb.spec b/spec/lib/rufo/prism_formatter_source_specs/symbol_literals.rb.spec new file mode 100644 index 00000000..91549979 --- /dev/null +++ b/spec/lib/rufo/prism_formatter_source_specs/symbol_literals.rb.spec @@ -0,0 +1,27 @@ +#~# ORIGINAL + +:foo + +#~# EXPECTED +:foo + +#~# ORIGINAL + +:"foo" + +#~# EXPECTED +:"foo" + +#~# ORIGINAL + +:"foo#{1}" + +#~# EXPECTED +:"foo#{1}" + +#~# ORIGINAL + +:* + +#~# EXPECTED +:* diff --git a/spec/lib/rufo/prism_formatter_source_specs/unary_operators.rb.spec b/spec/lib/rufo/prism_formatter_source_specs/unary_operators.rb.spec new file mode 100644 index 00000000..0bed1d69 --- /dev/null +++ b/spec/lib/rufo/prism_formatter_source_specs/unary_operators.rb.spec @@ -0,0 +1,34 @@ +#~# ORIGINAL + +- x + +#~# EXPECTED +-x + +#~# ORIGINAL + ++ x + +#~# EXPECTED ++x + +#~# ORIGINAL + ++x + +#~# EXPECTED ++x + +#~# ORIGINAL + ++(x) + +#~# EXPECTED ++(x) + +#~# ORIGINAL + ++ (x) + +#~# EXPECTED ++(x) diff --git a/spec/lib/rufo/prism_formatter_source_specs/undef.rb.spec b/spec/lib/rufo/prism_formatter_source_specs/undef.rb.spec new file mode 100644 index 00000000..70888a86 --- /dev/null +++ b/spec/lib/rufo/prism_formatter_source_specs/undef.rb.spec @@ -0,0 +1,13 @@ +#~# ORIGINAL + +undef foo + +#~# EXPECTED +undef foo + +#~# ORIGINAL + +undef foo , bar + +#~# EXPECTED +undef foo, bar diff --git a/spec/lib/rufo/prism_formatter_source_specs/unless.rb.spec b/spec/lib/rufo/prism_formatter_source_specs/unless.rb.spec new file mode 100644 index 00000000..b429c290 --- /dev/null +++ b/spec/lib/rufo/prism_formatter_source_specs/unless.rb.spec @@ -0,0 +1,23 @@ +#~# ORIGINAL + +unless 1 +2 +end + +#~# EXPECTED +unless 1 + 2 +end + +#~# ORIGINAL + +unless 1 +2 +else +end + +#~# EXPECTED +unless 1 + 2 +else +end diff --git a/spec/lib/rufo/prism_formatter_source_specs/variables.rb.spec b/spec/lib/rufo/prism_formatter_source_specs/variables.rb.spec new file mode 100644 index 00000000..b9609f1c --- /dev/null +++ b/spec/lib/rufo/prism_formatter_source_specs/variables.rb.spec @@ -0,0 +1,15 @@ +#~# ORIGINAL + +a = 1 + a + +#~# EXPECTED +a = 1 +a + +#~# ORIGINAL + +@foo + +#~# EXPECTED +@foo diff --git a/spec/lib/rufo/prism_formatter_spec.rb b/spec/lib/rufo/prism_formatter_spec.rb new file mode 100644 index 00000000..fc3d99f3 --- /dev/null +++ b/spec/lib/rufo/prism_formatter_spec.rb @@ -0,0 +1,136 @@ +require "fileutils" + +VERSION = Gem::Version.new(RUBY_VERSION) +FILE_PATH = Pathname.new(File.dirname(__FILE__)) + +def assert_source_specs(source_specs) + relative_path = Pathname.new(source_specs).relative_path_from(FILE_PATH).to_s + + describe relative_path do + tests = [] + current_test = nil + + File.foreach(source_specs).with_index do |line, index| + case + when line =~ /^#~# ORIGINAL ?([\w\s()]+)$/ + # save old test + tests.push current_test if current_test + + # start a new test + + name = $~[1].strip + name = "unnamed test" if name.empty? + + current_test = { name: name, line: index + 1, options: {}, original: "" } + when line =~ /^#~# EXPECTED$/ + current_test[:expected] = "" + when line =~ /^#~# PENDING$/ + # :nocov: + current_test[:pending] = true + # :nocov: + when line =~ /^#~# (.+)$/ + current_test[:options] = eval("{ #{$~[1]} }") + when current_test[:expected] + current_test[:expected] += line + when current_test[:original] + current_test[:original] += line + end + end + + tests.concat([current_test]).each do |test| + it "formats #{test[:name]} (line: #{test[:line]})" do + pending if test[:pending] + formatted = described_class.format(test[:original], **test[:options]) + expected = test[:expected].rstrip + "\n" + expect(formatted).to eq(expected) + idempotency_check = described_class.format(formatted, **test[:options]) + expect(idempotency_check).to eq(formatted) + end + end + end +end + +def assert_format(code, expected = code, **options) + expected = expected.rstrip + "\n" + + line = caller_locations[0].lineno + + opts = options.merge(engine: :prism) + ex = it "formats #{code.inspect} (line: #{line})" do + actual = Rufo.format(code, **opts) + if actual != expected + fail "Expected\n\n~~~\n#{code}\n~~~\nto format to:\n\n~~~\n#{expected}\n~~~\n\nbut got:\n\n~~~\n#{actual}\n~~~\n\n diff = #{expected.inspect}\n #{actual.inspect}" + end + + second = Rufo.format(actual, **opts) + if second != actual + fail "Idempotency check failed. Expected\n\n~~~\n#{actual}\n~~~\nto format to:\n\n~~~\n#{actual}\n~~~\n\nbut got:\n\n~~~\n#{second}\n~~~\n\n diff = #{second.inspect}\n #{actual.inspect}" + end + end + + # This is so we can do `rspec spec/rufo_spec.rb:26` and + # refer to line numbers for assert_format + ex.metadata[:line_number] = line +end + +# Specs that PrismFormatter (engine: :prism) is expected to pass. +# +# Files in spec/lib/rufo/prism_formatter_source_specs/ mirror the layout of +# spec/lib/rufo/formatter_source_specs/ (the legacy Ripper-based engine). +# When every case in a legacy file passes, the prism copy is verbatim; +# when only some pass, the prism file is a hand-curated subset (and may +# add prism-specific cases). No prism file exists for topics PrismFormatter +# does not yet handle — the PR checklist tracks adoption. +# +# Syncing is manual: a new test case added to a legacy file does not +# automatically appear here. Until PrismFormatter reaches feature parity +# and a per-engine PENDING marker is introduced, the prism owner reviews +# legacy changes and either mirrors the case or records the gap. +RSpec.describe Rufo::PrismFormatter do + Dir[File.join(FILE_PATH, "/prism_formatter_source_specs/*")].each do |source_specs| + assert_source_specs(source_specs) if File.file?(source_specs) + end + + # if VERSION >= Gem::Version.new("3.0") + # Dir[File.join(FILE_PATH, "/prism_formatter_source_specs/3.0/*")].each do |source_specs| + # assert_source_specs(source_specs) if File.file?(source_specs) + # end + # end + + # if VERSION >= Gem::Version.new("3.1") + # Dir[File.join(FILE_PATH, "/prism_formatter_source_specs/3.1/*")].each do |source_specs| + # assert_source_specs(source_specs) if File.file?(source_specs) + # end + # end + + # if VERSION >= Gem::Version.new("3.2") + # Dir[File.join(FILE_PATH, "/prism_formatter_source_specs/3.2/*")].each do |source_specs| + # assert_source_specs(source_specs) if File.file?(source_specs) + # end + # end + + # Empty + describe "empty" do + assert_format "", "" + assert_format " ", " " + assert_format "\n", "" + assert_format "\n\n", "" + assert_format "\n\n\n", "" + end + + describe "Syntax errors not handled by Prism", pending: "no test-case for prism" do + it "raises an unknown syntax error" do + expect { + Rufo.format("def foo; FOO = 1; end", engine: :prism) + }.to raise_error(Rufo::UnknownSyntaxError) + end + end + + describe "Syntax errors handled by Prism" do + it "raises an syntax error" do + expect { + Rufo.format("def foo; FOO = 1; end", engine: :prism) + }.to raise_error(Rufo::SyntaxError) + end + end +end