From 1f46aefc4ee6abf4418e78a19edc7e67ac8b65a4 Mon Sep 17 00:00:00 2001 From: tompng Date: Tue, 26 May 2026 04:09:46 +0900 Subject: [PATCH] XPath nodeset always in document-order In XPath 1.0 spec, nodeset are unordered set, but many operations are required to use document ordered. Functions such as local-name should use first node in document order. Filter expressions such as `(preceding::foo)[1]` need to use document-order. JavaScript's `XPathResult.ORDERED_NODE_ITERATOR_TYPE` `XPathResult.FIRST_ORDERED_NODE_TYPE` always orders in document order. This commit will make REXML match to this ordering, and also fixes inconsistent ordering bug: same xpath may return document-ordered nodes or reverse-document-ordered nodes depending on `nodesets.size`. --- lib/rexml/xpath_parser.rb | 23 ++++---- test/xpath/test_axis_preceding_sibling.rb | 2 +- test/xpath/test_base.rb | 65 +++++++++++++++++------ 3 files changed, 59 insertions(+), 31 deletions(-) diff --git a/lib/rexml/xpath_parser.rb b/lib/rexml/xpath_parser.rb index 02cd5056..82c175e9 100644 --- a/lib/rexml/xpath_parser.rb +++ b/lib/rexml/xpath_parser.rb @@ -262,7 +262,7 @@ def expr( path_stack, nodeset, context=nil ) nodesets end when :ancestor - nodeset = step(path_stack) do + nodeset = step(path_stack, order: :reversed) do nodesets = [] # new_nodes = {} nodeset.each do |node| @@ -280,7 +280,7 @@ def expr( path_stack, nodeset, context=nil ) nodesets end when :ancestor_or_self - nodeset = step(path_stack) do + nodeset = step(path_stack, order: :reversed) do nodesets = [] # new_nodes = {} nodeset.each do |node| @@ -325,7 +325,7 @@ def expr( path_stack, nodeset, context=nil ) nodesets end when :preceding_sibling - nodeset = step(path_stack, order: :reverse) do + nodeset = step(path_stack, order: :reversed) do nodesets = [] nodeset.each do |node| raw_node = node.raw_node @@ -342,7 +342,7 @@ def expr( path_stack, nodeset, context=nil ) nodesets end when :preceding - nodeset = step(path_stack, order: :reverse) do + nodeset = step(path_stack, order: :reversed) do unnode(nodeset) do |node| preceding(node) end @@ -460,7 +460,7 @@ def expr( path_stack, nodeset, context=nil ) leave(:expr, path_stack, nodeset) if @debug end - def step(path_stack, any_type: :element, order: :forward) + def step(path_stack, any_type: :element, order: :ordered) nodesets = yield begin enter(:step, path_stack, nodesets) if @debug @@ -471,7 +471,7 @@ def step(path_stack, any_type: :element, order: :forward) nodesets = evaluate_predicate(predicate_expression, nodesets) end if nodesets.size == 1 - ordered_nodeset = nodesets[0] + ordered_nodeset = order == :ordered ? nodesets.first : nodesets.first.reverse else seen = {}.compare_by_identity raw_nodes = [] @@ -483,8 +483,9 @@ def step(path_stack, any_type: :element, order: :forward) raw_nodes << raw_node end end - ordered_nodeset = sort(raw_nodes, order) + ordered_nodeset = sort(raw_nodes) end + new_nodeset = [] ordered_nodeset.each do |node| new_nodeset << XPathNode.new(node, position: new_nodeset.size + 1) @@ -667,7 +668,7 @@ def leave(tag, *args) # in and out of function calls. If I knew what the index of the nodes was, # I wouldn't have to do this. Maybe add a document IDX for each node? # Problems with mutable documents. Or, rewrite everything. - def sort(array_of_nodes, order) + def sort(array_of_nodes) new_arry = [] array_of_nodes.each { |node| node_idx = [] @@ -679,11 +680,7 @@ def sort(array_of_nodes, order) new_arry << [ node_idx.reverse, node ] } ordered = new_arry.sort_by do |index, node| - if order == :forward - index - else - index.map(&:-@) - end + index end ordered.collect do |_index, node| node diff --git a/test/xpath/test_axis_preceding_sibling.rb b/test/xpath/test_axis_preceding_sibling.rb index 9c44ad63..2f0ba5d1 100644 --- a/test/xpath/test_axis_preceding_sibling.rb +++ b/test/xpath/test_axis_preceding_sibling.rb @@ -23,7 +23,7 @@ def test_preceding_sibling_axis assert_equal "6", context.attributes["id"] prev = XPath.first(context, "preceding-sibling::f") - assert_equal "5", prev.attributes["id"] + assert_equal "3", prev.attributes["id"] prev = XPath.first(context, "preceding-sibling::f[1]") assert_equal "5", prev.attributes["id"] diff --git a/test/xpath/test_base.rb b/test/xpath/test_base.rb index c7049d67..3d96215f 100644 --- a/test/xpath/test_base.rb +++ b/test/xpath/test_base.rb @@ -382,32 +382,39 @@ def test_preceding start = XPath.first( d, "/a/b[@id='1']" ) assert_equal 'b', start.name c = XPath.first( start, "preceding::c" ) - assert_equal '2', c.attributes['id'] + assert_equal '0', c.attributes['id'] - c1, c0 = XPath.match( d, "/a/b/c[@id='2']/preceding::node()" ) - assert_equal '1', c1.attributes['id'] + b0, b2, c0, c1 = XPath.match( d, "/a/b/c[@id='2']/preceding::node()" ) + assert_equal 'b', b0.name + assert_equal 'b', b2.name + assert_equal 'c', c0.name + assert_equal 'c', c1.name + + assert_equal '0', b0.attributes['id'] + assert_equal '2', b2.attributes['id'] assert_equal '0', c0.attributes['id'] + assert_equal '1', c1.attributes['id'] - c2, c1, c0, b, b2, b0 = XPath.match( start, "preceding::node()" ) + b0, b2, b, c0, c1, c2 = XPath.match( start, "preceding::node()" ) - assert_equal 'c', c2.name - assert_equal 'c', c1.name assert_equal 'c', c0.name - assert_equal 'b', b.name - assert_equal 'b', b2.name + assert_equal 'c', c1.name + assert_equal 'c', c2.name assert_equal 'b', b0.name + assert_equal 'b', b2.name + assert_equal 'b', b.name - assert_equal '2', c2.attributes['id'] - assert_equal '1', c1.attributes['id'] assert_equal '0', c0.attributes['id'] - assert b.attributes.empty? - assert_equal '2', b2.attributes['id'] + assert_equal '1', c1.attributes['id'] + assert_equal '2', c2.attributes['id'] assert_equal '0', b0.attributes['id'] + assert_equal '2', b2.attributes['id'] + assert b.attributes.empty? d = REXML::Document.new("") matches = REXML::XPath.match(d, "/a/d/preceding::node()") - assert_equal("c", matches[0].name) - assert_equal("b", matches[1].name) + assert_equal("b", matches[0].name) + assert_equal("c", matches[1].name) s = "" d = REXML::Document.new(s) @@ -425,7 +432,7 @@ def test_preceding_multiple XML doc = REXML::Document.new(source) matches = REXML::XPath.match(doc, "a/d/preceding::*") - assert_equal(["d", "c", "b"], matches.map(&:name)) + assert_equal(["b", "c", "d"], matches.map(&:name)) end def test_following_multiple @@ -498,7 +505,7 @@ def test_preceding_sibling_across_multiple_nodes XML doc = REXML::Document.new(source) matches = REXML::XPath.match(doc, "a/b/x/preceding-sibling::*") - assert_equal(["e", "d", "c"], matches.map(&:name)) + assert_equal(["c", "d", "e"], matches.map(&:name)) end def test_preceding_sibling_within_single_node @@ -511,7 +518,7 @@ def test_preceding_sibling_within_single_node XML doc = REXML::Document.new(source) matches = REXML::XPath.match(doc, "a/b/x/preceding-sibling::*") - assert_equal(["e", "x", "d", "c"], matches.map(&:name)) + assert_equal(["c", "d", "x", "e"], matches.map(&:name)) end def test_following @@ -901,6 +908,30 @@ def test_ordering r.collect {|element| element.attribute("id").value}) end + def test_order_consistency + doc = REXML::Document.new('') + assert_equal('a', REXML::XPath.first(doc, '//d/ancestor::*').name) + assert_equal('a', REXML::XPath.first(doc, '//d[@id="2"]/ancestor::*').name) + assert_equal(%w[a b c d], REXML::XPath.match(doc, '//d/ancestor::*').map(&:name)) + assert_equal(%w[a b c d], REXML::XPath.match(doc, '//d[@id="2"]/ancestor::*').map(&:name)) + + assert_equal('a', REXML::XPath.first(doc, '//d/ancestor-or-self::*').name) + assert_equal('a', REXML::XPath.first(doc, '//d[@id="2"]/ancestor-or-self::*').name) + assert_equal(%w[a b c d d], REXML::XPath.match(doc, '//d/ancestor-or-self::*').map(&:name)) + assert_equal(%w[a b c d d], REXML::XPath.match(doc, '//d[@id="2"]/ancestor-or-self::*').map(&:name)) + + doc = REXML::Document.new('') + assert_equal('b', REXML::XPath.first(doc, '//d/preceding::*').name) + assert_equal('b', REXML::XPath.first(doc, '//d[@id="2"]/preceding::*').name) + assert_equal(%w[b c d], REXML::XPath.match(doc, '//d/preceding::*').map(&:name)) + assert_equal(%w[b c d], REXML::XPath.match(doc, '//d[@id="2"]/preceding::*').map(&:name)) + + assert_equal('b', REXML::XPath.first(doc, '//d/preceding-sibling::*').name) + assert_equal('b', REXML::XPath.first(doc, '//d[@id="2"]/preceding-sibling::*').name) + assert_equal(%w[b c d], REXML::XPath.match(doc, '//d/preceding-sibling::*').map(&:name)) + assert_equal(%w[b c d], REXML::XPath.match(doc, '//d[@id="2"]/preceding-sibling::*').map(&:name)) + end + def test_descendant_or_self_ordering source = "