Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 10 additions & 13 deletions lib/rexml/xpath_parser.rb
Original file line number Diff line number Diff line change
Expand Up @@ -262,7 +262,7 @@ def expr( path_stack, nodeset, context=nil )
nodesets
end
when :ancestor
nodeset = step(path_stack) do
nodeset = step(path_stack, order: :reversed) do
nodesets = []
# new_nodes = {}
nodeset.each do |node|
Expand All @@ -280,7 +280,7 @@ def expr( path_stack, nodeset, context=nil )
nodesets
end
when :ancestor_or_self
nodeset = step(path_stack) do
nodeset = step(path_stack, order: :reversed) do
nodesets = []
# new_nodes = {}
nodeset.each do |node|
Expand Down Expand Up @@ -325,7 +325,7 @@ def expr( path_stack, nodeset, context=nil )
nodesets
end
when :preceding_sibling
nodeset = step(path_stack, order: :reverse) do
nodeset = step(path_stack, order: :reversed) do
nodesets = []
nodeset.each do |node|
raw_node = node.raw_node
Expand All @@ -342,7 +342,7 @@ def expr( path_stack, nodeset, context=nil )
nodesets
end
when :preceding
nodeset = step(path_stack, order: :reverse) do
nodeset = step(path_stack, order: :reversed) do
unnode(nodeset) do |node|
preceding(node)
end
Expand Down Expand Up @@ -460,7 +460,7 @@ def expr( path_stack, nodeset, context=nil )
leave(:expr, path_stack, nodeset) if @debug
end

def step(path_stack, any_type: :element, order: :forward)
def step(path_stack, any_type: :element, order: :ordered)
nodesets = yield
begin
enter(:step, path_stack, nodesets) if @debug
Expand All @@ -471,7 +471,7 @@ def step(path_stack, any_type: :element, order: :forward)
nodesets = evaluate_predicate(predicate_expression, nodesets)
end
if nodesets.size == 1
ordered_nodeset = nodesets[0]
ordered_nodeset = order == :ordered ? nodesets.first : nodesets.first.reverse
else
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@tompng
Thank you for this PR.

But, performance has decreased because the optimization for nodesets.size == 1 has been removed.
Is there any way to improve this?

$ git checkout master
$ rake build
rexml 3.4.5 built to pkg/rexml-3.4.5.gem.
$ gem install pkg/rexml-3.4.5.gem
$ git checkout no_reverse_order
$ benchmark-driver benchmark/xpath_no_reverse_order.yaml
Calculating -------------------------------------
                     master(a98066c)       after  master(a98066c,YJIT)  after(YJIT) 
               child         280.227      19.016               544.496       31.084 i/s -     100.000 times in 0.356853s 5.258603s 0.183656s 3.217105s
          descendant          18.445       9.682                28.523       15.258 i/s -     100.000 times in 5.421532s 10.327928s 3.505883s 6.554030s

Comparison:
                            child
master(a98066c,YJIT):       544.5 i/s 
     master(a98066c):       280.2 i/s - 1.94x  slower
         after(YJIT):        31.1 i/s - 17.52x  slower
               after:        19.0 i/s - 28.63x  slower

                       descendant
master(a98066c,YJIT):        28.5 i/s 
     master(a98066c):        18.4 i/s - 1.55x  slower
         after(YJIT):        15.3 i/s - 1.87x  slower
               after:         9.7 i/s - 2.95x  slower
  • benchmark/xpath_no_reverse_order.yaml
loop_count: 100
contexts:
  - name: master(a98066c)
    gems:
      rexml: 3.4.5
    require: false
    prelude: require 'rexml'
  - name: after
    prelude: |
      $LOAD_PATH.unshift(File.expand_path("lib"))
      require 'rexml'
  - name: master(a98066c,YJIT)
    gems:
      rexml: 3.4.5
    require: false
    prelude: |
      require 'rexml'
      RubyVM::YJIT.enable
  - name: after(YJIT)
    prelude: |
      $LOAD_PATH.unshift(File.expand_path("lib"))
      require 'rexml'
      RubyVM::YJIT.enable

prelude: |
  require "rexml"
  xml = "<root>" + (1..2000).map { |i| "<item id='#{i}'/>" }.join + "</root>"
  doc = REXML::Document.new(xml)

benchmark:
  child: REXML::XPath.match(doc, "/root/item")
  descendant: REXML::XPath.match(doc, "//item")

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is there any way to improve this?

Yes, there are several ways.

  • A. Improve performance of sort

    • Should be done in a separate PR
  • B. Mark nodesets ordering

    • ordered_nodeset = marked_as_ordered? ? nodesets.first : nodesets.first.reverse
    • Ordering is currently :ordered | :reversed but in Optimize XPath step #315 we need to add :unordered
    • Conflicts with C
  • C. Delay nodeset ordering on demand

    • Sort the final result and in some functions such number(xpath)
    • Requires predicate position-independency analytics, requires sorted nodeset or not
    • After doing this, optimization of B needs to be extended. We need to re-implement B in a different way again.

I implemented B with changing the meaning of order: keyword parameter

  • Before: key to specify the output order: :forward | :reverse
  • After: key to specify the state of nodesets ordering: :ordered | :reversed | :unordered(future)

Perhaps this approach will be a blocker implementing C(delayed nodeset ordering), and optimizing XPathParser#step (#315) but we can consider it later.

seen = {}.compare_by_identity
raw_nodes = []
Expand All @@ -483,8 +483,9 @@ def step(path_stack, any_type: :element, order: :forward)
raw_nodes << raw_node
end
end
ordered_nodeset = sort(raw_nodes, order)
ordered_nodeset = sort(raw_nodes)
end

new_nodeset = []
ordered_nodeset.each do |node|
new_nodeset << XPathNode.new(node, position: new_nodeset.size + 1)
Expand Down Expand Up @@ -667,7 +668,7 @@ def leave(tag, *args)
# in and out of function calls. If I knew what the index of the nodes was,
# I wouldn't have to do this. Maybe add a document IDX for each node?
# Problems with mutable documents. Or, rewrite everything.
def sort(array_of_nodes, order)
def sort(array_of_nodes)
new_arry = []
array_of_nodes.each { |node|
node_idx = []
Comment on lines 672 to 674
Expand All @@ -679,11 +680,7 @@ def sort(array_of_nodes, order)
new_arry << [ node_idx.reverse, node ]
}
ordered = new_arry.sort_by do |index, node|
if order == :forward
index
else
index.map(&:-@)
end
index
end
ordered.collect do |_index, node|
node
Expand Down
2 changes: 1 addition & 1 deletion test/xpath/test_axis_preceding_sibling.rb
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ def test_preceding_sibling_axis
assert_equal "6", context.attributes["id"]

prev = XPath.first(context, "preceding-sibling::f")
assert_equal "5", prev.attributes["id"]
assert_equal "3", prev.attributes["id"]

prev = XPath.first(context, "preceding-sibling::f[1]")
assert_equal "5", prev.attributes["id"]
Expand Down
65 changes: 48 additions & 17 deletions test/xpath/test_base.rb
Original file line number Diff line number Diff line change
Expand Up @@ -382,32 +382,39 @@ def test_preceding
start = XPath.first( d, "/a/b[@id='1']" )
assert_equal 'b', start.name
c = XPath.first( start, "preceding::c" )
assert_equal '2', c.attributes['id']
assert_equal '0', c.attributes['id']

c1, c0 = XPath.match( d, "/a/b/c[@id='2']/preceding::node()" )
assert_equal '1', c1.attributes['id']
b0, b2, c0, c1 = XPath.match( d, "/a/b/c[@id='2']/preceding::node()" )
assert_equal 'b', b0.name
assert_equal 'b', b2.name
assert_equal 'c', c0.name
assert_equal 'c', c1.name

assert_equal '0', b0.attributes['id']
assert_equal '2', b2.attributes['id']
assert_equal '0', c0.attributes['id']
assert_equal '1', c1.attributes['id']

c2, c1, c0, b, b2, b0 = XPath.match( start, "preceding::node()" )
b0, b2, b, c0, c1, c2 = XPath.match( start, "preceding::node()" )

assert_equal 'c', c2.name
assert_equal 'c', c1.name
assert_equal 'c', c0.name
assert_equal 'b', b.name
assert_equal 'b', b2.name
assert_equal 'c', c1.name
assert_equal 'c', c2.name
assert_equal 'b', b0.name
assert_equal 'b', b2.name
assert_equal 'b', b.name

assert_equal '2', c2.attributes['id']
assert_equal '1', c1.attributes['id']
assert_equal '0', c0.attributes['id']
assert b.attributes.empty?
assert_equal '2', b2.attributes['id']
assert_equal '1', c1.attributes['id']
assert_equal '2', c2.attributes['id']
assert_equal '0', b0.attributes['id']
assert_equal '2', b2.attributes['id']
assert b.attributes.empty?

d = REXML::Document.new("<a><b/><c/><d/></a>")
matches = REXML::XPath.match(d, "/a/d/preceding::node()")
assert_equal("c", matches[0].name)
assert_equal("b", matches[1].name)
assert_equal("b", matches[0].name)
assert_equal("c", matches[1].name)

s = "<a><b><c id='1'/></b><b><b><c id='2'/><c id='3'/></b><c id='4'/></b><c id='NOMATCH'><c id='5'/></c></a>"
d = REXML::Document.new(s)
Expand All @@ -425,7 +432,7 @@ def test_preceding_multiple
XML
doc = REXML::Document.new(source)
matches = REXML::XPath.match(doc, "a/d/preceding::*")
assert_equal(["d", "c", "b"], matches.map(&:name))
assert_equal(["b", "c", "d"], matches.map(&:name))
end

def test_following_multiple
Expand Down Expand Up @@ -498,7 +505,7 @@ def test_preceding_sibling_across_multiple_nodes
XML
doc = REXML::Document.new(source)
matches = REXML::XPath.match(doc, "a/b/x/preceding-sibling::*")
assert_equal(["e", "d", "c"], matches.map(&:name))
assert_equal(["c", "d", "e"], matches.map(&:name))
end

def test_preceding_sibling_within_single_node
Expand All @@ -511,7 +518,7 @@ def test_preceding_sibling_within_single_node
XML
doc = REXML::Document.new(source)
matches = REXML::XPath.match(doc, "a/b/x/preceding-sibling::*")
assert_equal(["e", "x", "d", "c"], matches.map(&:name))
assert_equal(["c", "d", "x", "e"], matches.map(&:name))
end

def test_following
Expand Down Expand Up @@ -901,6 +908,30 @@ def test_ordering
r.collect {|element| element.attribute("id").value})
end

def test_order_consistency
doc = REXML::Document.new('<a><b><c><d id="1"><d id="2"/></d></c></b></a>')
assert_equal('a', REXML::XPath.first(doc, '//d/ancestor::*').name)
assert_equal('a', REXML::XPath.first(doc, '//d[@id="2"]/ancestor::*').name)
assert_equal(%w[a b c d], REXML::XPath.match(doc, '//d/ancestor::*').map(&:name))
assert_equal(%w[a b c d], REXML::XPath.match(doc, '//d[@id="2"]/ancestor::*').map(&:name))

assert_equal('a', REXML::XPath.first(doc, '//d/ancestor-or-self::*').name)
assert_equal('a', REXML::XPath.first(doc, '//d[@id="2"]/ancestor-or-self::*').name)
assert_equal(%w[a b c d d], REXML::XPath.match(doc, '//d/ancestor-or-self::*').map(&:name))
assert_equal(%w[a b c d d], REXML::XPath.match(doc, '//d[@id="2"]/ancestor-or-self::*').map(&:name))

doc = REXML::Document.new('<a><b/><c/><d id="1"/><d id="2"/></a>')
assert_equal('b', REXML::XPath.first(doc, '//d/preceding::*').name)
assert_equal('b', REXML::XPath.first(doc, '//d[@id="2"]/preceding::*').name)
assert_equal(%w[b c d], REXML::XPath.match(doc, '//d/preceding::*').map(&:name))
assert_equal(%w[b c d], REXML::XPath.match(doc, '//d[@id="2"]/preceding::*').map(&:name))

assert_equal('b', REXML::XPath.first(doc, '//d/preceding-sibling::*').name)
assert_equal('b', REXML::XPath.first(doc, '//d[@id="2"]/preceding-sibling::*').name)
assert_equal(%w[b c d], REXML::XPath.match(doc, '//d/preceding-sibling::*').map(&:name))
assert_equal(%w[b c d], REXML::XPath.match(doc, '//d[@id="2"]/preceding-sibling::*').map(&:name))
end

def test_descendant_or_self_ordering
source = "<a>
<b>
Expand Down
Loading