From 13174671f8a6a54f1f648b1ebe06834c895ecd33 Mon Sep 17 00:00:00 2001 From: tompng Date: Tue, 26 May 2026 04:09:46 +0900 Subject: [PATCH 1/2] pull320 --- lib/rexml/xpath_parser.rb | 40 +++++++------- test/xpath/test_axis_preceding_sibling.rb | 2 +- test/xpath/test_base.rb | 65 +++++++++++++++++------ 3 files changed, 67 insertions(+), 40 deletions(-) diff --git a/lib/rexml/xpath_parser.rb b/lib/rexml/xpath_parser.rb index 02cd5056..c839d265 100644 --- a/lib/rexml/xpath_parser.rb +++ b/lib/rexml/xpath_parser.rb @@ -325,7 +325,7 @@ def expr( path_stack, nodeset, context=nil ) nodesets end when :preceding_sibling - nodeset = step(path_stack, order: :reverse) do + nodeset = step(path_stack) do nodesets = [] nodeset.each do |node| raw_node = node.raw_node @@ -342,7 +342,7 @@ def expr( path_stack, nodeset, context=nil ) nodesets end when :preceding - nodeset = step(path_stack, order: :reverse) do + nodeset = step(path_stack) do unnode(nodeset) do |node| preceding(node) end @@ -460,7 +460,7 @@ def expr( path_stack, nodeset, context=nil ) leave(:expr, path_stack, nodeset) if @debug end - def step(path_stack, any_type: :element, order: :forward) + def step(path_stack, any_type: :element) nodesets = yield begin enter(:step, path_stack, nodesets) if @debug @@ -470,21 +470,19 @@ def step(path_stack, any_type: :element, order: :forward) predicate_expression = path_stack.shift.dclone nodesets = evaluate_predicate(predicate_expression, nodesets) end - if nodesets.size == 1 - ordered_nodeset = nodesets[0] - else - seen = {}.compare_by_identity - raw_nodes = [] - nodesets.each do |nodeset| - nodeset.each do |node| - raw_node = node.respond_to?(:raw_node) ? node.raw_node : node - next if seen.key?(raw_node) - seen[raw_node] = true - raw_nodes << raw_node - end + + seen = {}.compare_by_identity + raw_nodes = [] + nodesets.each do |nodeset| + nodeset.each do |node| + raw_node = node.respond_to?(:raw_node) ? node.raw_node : node + next if seen.key?(raw_node) + seen[raw_node] = true + raw_nodes << raw_node end - ordered_nodeset = sort(raw_nodes, order) end + ordered_nodeset = sort(raw_nodes) + new_nodeset = [] ordered_nodeset.each do |node| new_nodeset << XPathNode.new(node, position: new_nodeset.size + 1) @@ -667,7 +665,9 @@ def leave(tag, *args) # in and out of function calls. If I knew what the index of the nodes was, # I wouldn't have to do this. Maybe add a document IDX for each node? # Problems with mutable documents. Or, rewrite everything. - def sort(array_of_nodes, order) + def sort(array_of_nodes) + return array_of_nodes if array_of_nodes.size <= 1 + new_arry = [] array_of_nodes.each { |node| node_idx = [] @@ -679,11 +679,7 @@ def sort(array_of_nodes, order) new_arry << [ node_idx.reverse, node ] } ordered = new_arry.sort_by do |index, node| - if order == :forward - index - else - index.map(&:-@) - end + index end ordered.collect do |_index, node| node diff --git a/test/xpath/test_axis_preceding_sibling.rb b/test/xpath/test_axis_preceding_sibling.rb index 9c44ad63..2f0ba5d1 100644 --- a/test/xpath/test_axis_preceding_sibling.rb +++ b/test/xpath/test_axis_preceding_sibling.rb @@ -23,7 +23,7 @@ def test_preceding_sibling_axis assert_equal "6", context.attributes["id"] prev = XPath.first(context, "preceding-sibling::f") - assert_equal "5", prev.attributes["id"] + assert_equal "3", prev.attributes["id"] prev = XPath.first(context, "preceding-sibling::f[1]") assert_equal "5", prev.attributes["id"] diff --git a/test/xpath/test_base.rb b/test/xpath/test_base.rb index c7049d67..3d96215f 100644 --- a/test/xpath/test_base.rb +++ b/test/xpath/test_base.rb @@ -382,32 +382,39 @@ def test_preceding start = XPath.first( d, "/a/b[@id='1']" ) assert_equal 'b', start.name c = XPath.first( start, "preceding::c" ) - assert_equal '2', c.attributes['id'] + assert_equal '0', c.attributes['id'] - c1, c0 = XPath.match( d, "/a/b/c[@id='2']/preceding::node()" ) - assert_equal '1', c1.attributes['id'] + b0, b2, c0, c1 = XPath.match( d, "/a/b/c[@id='2']/preceding::node()" ) + assert_equal 'b', b0.name + assert_equal 'b', b2.name + assert_equal 'c', c0.name + assert_equal 'c', c1.name + + assert_equal '0', b0.attributes['id'] + assert_equal '2', b2.attributes['id'] assert_equal '0', c0.attributes['id'] + assert_equal '1', c1.attributes['id'] - c2, c1, c0, b, b2, b0 = XPath.match( start, "preceding::node()" ) + b0, b2, b, c0, c1, c2 = XPath.match( start, "preceding::node()" ) - assert_equal 'c', c2.name - assert_equal 'c', c1.name assert_equal 'c', c0.name - assert_equal 'b', b.name - assert_equal 'b', b2.name + assert_equal 'c', c1.name + assert_equal 'c', c2.name assert_equal 'b', b0.name + assert_equal 'b', b2.name + assert_equal 'b', b.name - assert_equal '2', c2.attributes['id'] - assert_equal '1', c1.attributes['id'] assert_equal '0', c0.attributes['id'] - assert b.attributes.empty? - assert_equal '2', b2.attributes['id'] + assert_equal '1', c1.attributes['id'] + assert_equal '2', c2.attributes['id'] assert_equal '0', b0.attributes['id'] + assert_equal '2', b2.attributes['id'] + assert b.attributes.empty? d = REXML::Document.new("") matches = REXML::XPath.match(d, "/a/d/preceding::node()") - assert_equal("c", matches[0].name) - assert_equal("b", matches[1].name) + assert_equal("b", matches[0].name) + assert_equal("c", matches[1].name) s = "" d = REXML::Document.new(s) @@ -425,7 +432,7 @@ def test_preceding_multiple XML doc = REXML::Document.new(source) matches = REXML::XPath.match(doc, "a/d/preceding::*") - assert_equal(["d", "c", "b"], matches.map(&:name)) + assert_equal(["b", "c", "d"], matches.map(&:name)) end def test_following_multiple @@ -498,7 +505,7 @@ def test_preceding_sibling_across_multiple_nodes XML doc = REXML::Document.new(source) matches = REXML::XPath.match(doc, "a/b/x/preceding-sibling::*") - assert_equal(["e", "d", "c"], matches.map(&:name)) + assert_equal(["c", "d", "e"], matches.map(&:name)) end def test_preceding_sibling_within_single_node @@ -511,7 +518,7 @@ def test_preceding_sibling_within_single_node XML doc = REXML::Document.new(source) matches = REXML::XPath.match(doc, "a/b/x/preceding-sibling::*") - assert_equal(["e", "x", "d", "c"], matches.map(&:name)) + assert_equal(["c", "d", "x", "e"], matches.map(&:name)) end def test_following @@ -901,6 +908,30 @@ def test_ordering r.collect {|element| element.attribute("id").value}) end + def test_order_consistency + doc = REXML::Document.new('') + assert_equal('a', REXML::XPath.first(doc, '//d/ancestor::*').name) + assert_equal('a', REXML::XPath.first(doc, '//d[@id="2"]/ancestor::*').name) + assert_equal(%w[a b c d], REXML::XPath.match(doc, '//d/ancestor::*').map(&:name)) + assert_equal(%w[a b c d], REXML::XPath.match(doc, '//d[@id="2"]/ancestor::*').map(&:name)) + + assert_equal('a', REXML::XPath.first(doc, '//d/ancestor-or-self::*').name) + assert_equal('a', REXML::XPath.first(doc, '//d[@id="2"]/ancestor-or-self::*').name) + assert_equal(%w[a b c d d], REXML::XPath.match(doc, '//d/ancestor-or-self::*').map(&:name)) + assert_equal(%w[a b c d d], REXML::XPath.match(doc, '//d[@id="2"]/ancestor-or-self::*').map(&:name)) + + doc = REXML::Document.new('') + assert_equal('b', REXML::XPath.first(doc, '//d/preceding::*').name) + assert_equal('b', REXML::XPath.first(doc, '//d[@id="2"]/preceding::*').name) + assert_equal(%w[b c d], REXML::XPath.match(doc, '//d/preceding::*').map(&:name)) + assert_equal(%w[b c d], REXML::XPath.match(doc, '//d[@id="2"]/preceding::*').map(&:name)) + + assert_equal('b', REXML::XPath.first(doc, '//d/preceding-sibling::*').name) + assert_equal('b', REXML::XPath.first(doc, '//d[@id="2"]/preceding-sibling::*').name) + assert_equal(%w[b c d], REXML::XPath.match(doc, '//d/preceding-sibling::*').map(&:name)) + assert_equal(%w[b c d], REXML::XPath.match(doc, '//d[@id="2"]/preceding-sibling::*').map(&:name)) + end + def test_descendant_or_self_ordering source = " From 739cba0449ad0eb9d737ee20e410300dcb133472 Mon Sep 17 00:00:00 2001 From: tompng Date: Thu, 21 May 2026 02:12:55 +0900 Subject: [PATCH 2/2] Optimize XPath step If a predicate of xpath is position-independent, we don't need to create nodesets that has many duplicated nodes with different positions. Implements optimization of preceding-sibling/following-sibling with simple positional predicates as an example. We can add more optimizations for other axis in the future. --- lib/rexml/functions.rb | 2 +- lib/rexml/xpath_parser.rb | 740 ++++++++++++---------- test/xpath/test_attribute.rb | 13 + test/xpath/test_axis_preceding_sibling.rb | 68 ++ test/xpath/test_base.rb | 8 + test/xpath/test_predicate.rb | 38 ++ 6 files changed, 541 insertions(+), 328 deletions(-) diff --git a/lib/rexml/functions.rb b/lib/rexml/functions.rb index 60ae34e7..6eae58ee 100644 --- a/lib/rexml/functions.rb +++ b/lib/rexml/functions.rb @@ -53,7 +53,7 @@ def Functions::last( ) end def Functions::position( ) - @@context[:index] + @@context[:position] end # Returns the size of the given list of nodes. diff --git a/lib/rexml/xpath_parser.rb b/lib/rexml/xpath_parser.rb index c839d265..60e57360 100644 --- a/lib/rexml/xpath_parser.rb +++ b/lib/rexml/xpath_parser.rb @@ -151,11 +151,11 @@ def first( path_stack, node ) def match(path_stack, node) - nodeset = [XPathNode.new(node, position: 1)] + nodeset = [node] result = expr(path_stack, nodeset) case result when Array # nodeset - unnode(result).uniq + result.uniq else [result] end @@ -195,32 +195,26 @@ def expr( path_stack, nodeset, context=nil ) op = path_stack.shift case op when :document - first_raw_node = nodeset.first.raw_node - nodeset = [XPathNode.new(first_raw_node.root_node, position: 1)] + nodeset = [nodeset.first.root_node] when :self nodeset = step(path_stack) do - [nodeset] + [:iterate_nodesets, [nodeset]] end when :child nodeset = step(path_stack) do - child(nodeset) + [:iterate_nodesets, child(nodeset)] end when :literal trace(:literal, path_stack, nodeset) if @debug return path_stack.shift when :attribute nodeset = step(path_stack, any_type: :attribute) do - nodesets = [] - nodeset.each do |node| - raw_node = node.raw_node - next unless raw_node.node_type == :element - attributes = raw_node.attributes - next if attributes.empty? - nodesets << attributes.each_attribute.collect.with_index do |attribute, i| - XPathNode.new(attribute, position: i + 1) - end - end - nodesets + nodesets = nodeset.map do |node| + next unless node.node_type == :element + attributes = node.attributes + attributes.each_attribute.to_a unless attributes.empty? + end.compact + [:iterate_nodesets, nodesets] end when :namespace pre_defined_namespaces = { @@ -229,129 +223,44 @@ def expr( path_stack, nodeset, context=nil ) nodeset = step(path_stack, any_type: :namespace) do nodesets = [] nodeset.each do |node| - raw_node = node.raw_node - case raw_node.node_type + case node.node_type when :element if @namespaces nodesets << pre_defined_namespaces.merge(@namespaces) else - nodesets << pre_defined_namespaces.merge(raw_node.namespaces) + nodesets << pre_defined_namespaces.merge(node.namespaces) end when :attribute if @namespaces nodesets << pre_defined_namespaces.merge(@namespaces) else - nodesets << pre_defined_namespaces.merge(raw_node.element.namespaces) + nodesets << pre_defined_namespaces.merge(node.element.namespaces) end end end - nodesets + # Not working at all, so just return an empty nodesets for now. + # Needs Namespace-node class + [:iterate_nodesets, []] end when :parent nodeset = step(path_stack) do - nodesets = [] + parents = {}.compare_by_identity nodeset.each do |node| - raw_node = node.raw_node - if raw_node.node_type == :attribute - parent = raw_node.element + if node.node_type == :attribute + parent = node.element else - parent = raw_node.parent - end - nodesets << [XPathNode.new(parent, position: 1)] if parent - end - nodesets - end - when :ancestor - nodeset = step(path_stack) do - nodesets = [] - # new_nodes = {} - nodeset.each do |node| - raw_node = node.raw_node - new_nodeset = [] - while raw_node.parent - raw_node = raw_node.parent - # next if new_nodes.key?(node) - new_nodeset << XPathNode.new(raw_node, - position: new_nodeset.size + 1) - # new_nodes[node] = true - end - nodesets << new_nodeset unless new_nodeset.empty? - end - nodesets - end - when :ancestor_or_self - nodeset = step(path_stack) do - nodesets = [] - # new_nodes = {} - nodeset.each do |node| - raw_node = node.raw_node - next unless raw_node.node_type == :element - new_nodeset = [XPathNode.new(raw_node, position: 1)] - # new_nodes[node] = true - while raw_node.parent - raw_node = raw_node.parent - # next if new_nodes.key?(node) - new_nodeset << XPathNode.new(raw_node, - position: new_nodeset.size + 1) - # new_nodes[node] = true - end - nodesets << new_nodeset unless new_nodeset.empty? - end - nodesets - end - when :descendant_or_self - nodeset = step(path_stack) do - descendant(nodeset, true) - end - when :descendant - nodeset = step(path_stack) do - descendant(nodeset, false) - end - when :following_sibling - nodeset = step(path_stack) do - nodesets = [] - nodeset.each do |node| - raw_node = node.raw_node - next unless raw_node.respond_to?(:parent) - next if raw_node.parent.nil? - all_siblings = raw_node.parent.children - current_index = all_siblings.index(raw_node) - following_siblings = all_siblings[(current_index + 1)..-1] - next if following_siblings.empty? - nodesets << following_siblings.collect.with_index do |sibling, i| - XPathNode.new(sibling, position: i + 1) + parent = node.parent end + parents[parent] = true if parent end - nodesets + [:iterate_nodesets, parents.keys.map {|parent| [parent] }] end - when :preceding_sibling + when :ancestor, :ancestor_or_self, + :descendant, :descendant_or_self, + :preceding, :preceding_sibling, + :following, :following_sibling nodeset = step(path_stack) do - nodesets = [] - nodeset.each do |node| - raw_node = node.raw_node - next unless raw_node.respond_to?(:parent) - next if raw_node.parent.nil? - all_siblings = raw_node.parent.children - current_index = all_siblings.index(raw_node) - preceding_siblings = all_siblings[0, current_index].reverse - next if preceding_siblings.empty? - nodesets << preceding_siblings.collect.with_index do |sibling, i| - XPathNode.new(sibling, position: i + 1) - end - end - nodesets - end - when :preceding - nodeset = step(path_stack) do - unnode(nodeset) do |node| - preceding(node) - end - end - when :following - nodeset = step(path_stack) do - unnode(nodeset) do |node| - following(node) - end + [op, nodeset] end when :variable var_name = path_stack.shift @@ -379,8 +288,6 @@ def expr( path_stack, nodeset, context=nil ) when :div, :mod, :mult, :plus, :minus left = expr(path_stack.shift, nodeset, context) right = expr(path_stack.shift, nodeset, context) - left = unnode(left) if left.is_a?(Array) - right = unnode(right) if right.is_a?(Array) left = Functions::number(left) right = Functions::number(right) case op @@ -400,12 +307,9 @@ def expr( path_stack, nodeset, context=nil ) when :union left = expr( path_stack.shift, nodeset, context ) right = expr( path_stack.shift, nodeset, context ) - left = unnode(left) if left.is_a?(Array) - right = unnode(right) if right.is_a?(Array) return (left | right) when :neg res = expr( path_stack, nodeset, context ) - res = unnode(res) if res.is_a?(Array) return -Functions.number(res) when :not when :function @@ -424,18 +328,11 @@ def expr( path_stack, nodeset, context=nil ) target_context = context else target_context = {:size => nodeset.size} - if node.is_a?(XPathNode) - target_context[:node] = node.raw_node - target_context[:index] = node.position - else - target_context[:node] = node - target_context[:index] = 1 - end + target_context[:node] = node + target_context[:position] = 1 end args = arguments.dclone.collect do |arg| - result = expr(arg, nodeset, target_context) - result = unnode(result) if result.is_a?(Array) - result + expr(arg, nodeset, target_context) end Functions.context = target_context return Functions.send(func_name, *args) @@ -446,7 +343,7 @@ def expr( path_stack, nodeset, context=nil ) # If result is a nodeset, apply following predicates path_stack.unshift(:node) nodeset = step(path_stack) do - [result] + [:iterate_nodesets, [result]] end else return result @@ -460,145 +357,350 @@ def expr( path_stack, nodeset, context=nil ) leave(:expr, path_stack, nodeset) if @debug end + # Determines if a predicate expression is dependent on the position of nodes. + # nil if the predicate is position-independent, + # :simple if the predicate is a simple position query that can be optimized in axis scanning + # :complex if the predicate is a complex query that might be dependent on the position of nodes + def position_dependency(predicate_expr) + # [number], [position()=number], [position() < number], [position() > number] + return :simple if position_operation(predicate_expr) + + # expressions that return number. + return :complex if %i[div mod mult plus minus neg].include?(predicate_expr[0]) + return :complex if predicate_expr[0] == :function && %w[number ceiling round floor string-length sum count].include?(predicate_expr[1]) + # Numeric literal including Integer and Float: [2] means [position() = 2] + return :complex if predicate_expr[0] == :literal && Numeric === predicate_expr[1] + # A variable could resolve to a number at runtime: [$n] means [position() = $n]. + return :complex if predicate_expr[0] == :variable + + # expressions that contain position-dependent functions + return :complex if calls_position_dependent_function?(predicate_expr) + end + + # Recursively checks if the expression contains position-dependent functions such as position() or last() + def calls_position_dependent_function?(expr) + return false unless Array === expr + return true if expr[0] == :function && (expr[1] == 'position' || expr[1] == 'last') + expr.any? {|part| calls_position_dependent_function?(part) } + end + + # Detects simple position-based predicates that can be optimized in axis scanning, such as [1], [position()=1], [position() < 2], [position() > 3] + # Returns operators and values such as [:==, 1], [:<, 2], [:>, 3] + # Returns nil if the predicate is not a simple position-based predicate + def position_operation(predicate_expr) + return [:==, predicate_expr[1]] if predicate_expr[0] == :literal && predicate_expr[1].is_a?(Integer) + + op, left, right = predicate_expr + return unless op == :eq || op == :lt || op == :lteq || op == :gt || op == :gteq + return unless [left, right].include?([:function, 'position', []]) + + literal = [left, right].find {|part| part[0] == :literal && part[1].is_a?(Integer) } + return unless literal + + value = literal[1] + case op + when :eq + [:==, value] + when :lt + literal == right ? [:<, value] : [:>, value] + when :lteq + literal == right ? [:<, value + 1] : [:>, value - 1] + when :gt + literal == right ? [:>, value]: [:<, value] + when :gteq + literal == right ? [:>, value - 1] : [:<, value + 1] + end + end + + # Pseudo scanner for axis scanning step that nodesets are already collected + def iterate_nodesets(nodesets, tester, selector) + non_optimized_nodesets_select(nodesets, tester, selector) + end + + # Scanner for ancestor-or-self axis + def ancestor_or_self(nodeset, tester, selector) + ancestor(nodeset, tester, selector, include_self: true) + end + + # Scanner for preceding-sibling axis + def preceding_sibling(nodeset, tester, selector) + preceding_following_sibling(nodeset, tester, selector, reverse: true) + end + + # Scanner for following-sibling axis + def following_sibling(nodeset, tester, selector) + preceding_following_sibling(nodeset, tester, selector, reverse: false) + end + + def preceding_following_sibling(nodeset, tester, selector, reverse:) + nodeset = nodeset.select {|node| node.respond_to?(:parent) && node.parent } + case selector + when :uniq + nodeset.group_by(&:parent).flat_map do |parent, sibling_nodes| + sets = {}.compare_by_identity + sibling_nodes.each {|sibling| sets[sibling] = true } + children = parent.children + children = children.reverse if reverse + children.drop_while {|child| !sets.key?(child) }.drop(1) + end.select(&tester) + when :nodesets + nodesets = nodeset.map do |node| + parent = node.parent + index = parent.children.index(node) + reverse ? parent.children[0...index].reverse : parent.children[index + 1..-1] + end + non_optimized_nodesets_select(nodesets, tester, selector) + else + operator, value = selector + nodeset.group_by(&:parent).flat_map do |parent, sibling_nodes| + anchors = {}.compare_by_identity + sibling_nodes.each {|sibling| anchors[sibling] = true } + children = parent.children + children = children.reverse if reverse + followings = children.drop_while {|child| !anchors.key?(child) }.drop(1) + anchor_indexes = { 0 => true } + last_anchor = 0 + index = 0 + matched = [] + followings.each do |node| + if tester.call(node) + case operator + when :== + matched << node if anchor_indexes.include?(index - value + 1) + when :< + # Position from the last anchor will be the minimum possible position for the node + matched << node if index - last_anchor + 1 < value + when :> + # Position from the first anchor(==0) will be the maximum possible position for the node + matched << node if index + 1 > value + end + index += 1 + end + if anchors.key?(node) + anchor_indexes[index] = true + last_anchor = index + end + end + matched + end + end + end + + # Scanner for ancestor axis + def ancestor(nodeset, tester, selector, include_self: false) + nodeset = nodeset.select {|node| node.respond_to?(:parent) && node.parent } + case selector + when :uniq + ancestors = {}.compare_by_identity + nodeset.each do |node| + ancestors[node] = true if include_self + parent = node.parent + while parent + break if ancestors.key?(parent) + ancestors[parent] = true + parent = parent.parent + end + end + ancestors.keys.select(&tester) + else + # Slow pass + nodesets = nodeset.map do |node| + ancestors = [] + ancestors << node if include_self + parent = node.parent + while parent + ancestors << parent + parent = parent.parent + end + ancestors + end + non_optimized_nodesets_select(nodesets, tester, selector) + end + end + + # Scanner fallback step for axis that is not optimized for position-based predicates. + def non_optimized_nodesets_select(nodesets, tester, selector) + nodesets = nodesets.map do |nodeset| + nodeset.select(&tester) + end.reject(&:empty?) + case selector + when :nodesets + nodesets + when :uniq + seen = {}.compare_by_identity + nodesets.flatten.each {|node| seen[node] = true } + seen.keys + else + operator, value = selector + nodes = nodesets.flatten + nodes = + case operator + when :== + nodesets.map {|nodeset| nodeset[value - 1] if value >= 1 }.compact + when :< + nodesets.map {|nodeset| nodeset[0...value - 1] if value >= 1 }.compact.flatten + when :> + nodesets.map {|nodeset| value < 0 ? nodeset : nodeset[value..-1] }.flatten + end + seen = {}.compare_by_identity + nodes.each {|node| seen[node] = true } + seen.keys + end + end + + # Split predicates into several groups based on their dependency on the position of nodes + # If there are no position-based predicates, + # return [position_independent_predicates, nil, [], nil] + # If there are only one simple position-based predicate, + # return [position_independent_predicates, position_operator, post_position_independent_predicates, nil] + # If there are multiple position-based predicates or complex position-based predicates, + # return [position_independent_predicates, nil, nil, complex_predicates] + def split_positional_predicates(predicates) + pre_independent = predicates.take_while {|predicate| position_dependency(predicate).nil? } + predicates = predicates.drop(pre_independent.size) + return [pre_independent, nil, [], nil] if predicates.empty? + + op = position_operation(predicates.first) + if op && predicates[1..-1].all? {|predicate| position_dependency(predicate).nil? } + [pre_independent, op, predicates[1..-1], nil] + else + [pre_independent, nil, nil, predicates] + end + end + + # Performs an axis scanning step. + # The caller provides a scanner method and its argument, which determines the axis to scan and the nodes to scan from: + # step(path_stack) { [scanner_method, scanner_argument] } + # Scanner method are called with `(scanner_argument, tester_block, selector)` + # selector is a flag for the scanner to determine how to return the scan result. + # It can be: `:uniq`, `:nodesets` or `[position_comparator, value]`. + # `:uniq` means the scanner should return unique nodes. Predicates are position-independent. + # `:nodesets` means the scanner should return nodesets. Predicates are complex position queries that can't be optimized in axis scanning. + # `[position_comparator, value]` means the scanner should return nodes matching the position comparator and value. + # Each scanner method can implement optimized scanning strategy for each selector. + def step(path_stack, any_type: :element) - nodesets = yield + scanner, scanner_argument = yield begin - enter(:step, path_stack, nodesets) if @debug - nodesets = node_test(path_stack, nodesets, any_type: any_type) - while path_stack[0] == :predicate - path_stack.shift # :predicate - predicate_expression = path_stack.shift.dclone - nodesets = evaluate_predicate(predicate_expression, nodesets) + enter(:step, path_stack, scanner, scanner_argument) if @debug + tester = node_test(path_stack, any_type: any_type) + predicates = [] + while path_stack.first == :predicate + path_stack.shift + predicates << path_stack.shift + end + pre_predicates, position_operator, post_predicates, complex_predicates = split_positional_predicates(predicates) + + if pre_predicates.any? + original_tester = tester + tester = -> (node) do + original_tester.call(node) && + pre_predicates.all? do |predicate_expr| + evaluate_predicate(predicate_expr.dclone, [[node]]).flatten.size == 1 + end + end + end + if complex_predicates + nodesets = send(scanner, scanner_argument, tester, :nodesets) + elsif position_operator + nodeset = send(scanner, scanner_argument, tester, position_operator) + nodesets = [nodeset] + else + nodeset = send(scanner, scanner_argument, tester, :uniq) + nodesets = [nodeset] end + (complex_predicates || post_predicates).each do |predicate_expr| + nodesets = evaluate_predicate(predicate_expr.dclone, nodesets) + end seen = {}.compare_by_identity - raw_nodes = [] nodesets.each do |nodeset| nodeset.each do |node| - raw_node = node.respond_to?(:raw_node) ? node.raw_node : node - next if seen.key?(raw_node) - seen[raw_node] = true - raw_nodes << raw_node + seen[node] = true end end - ordered_nodeset = sort(raw_nodes) - - new_nodeset = [] - ordered_nodeset.each do |node| - new_nodeset << XPathNode.new(node, position: new_nodeset.size + 1) - end - new_nodeset + ordered = sort(seen.keys) ensure - leave(:step, path_stack, new_nodeset) if @debug + leave(:step, path_stack, ordered) if @debug end end - def node_test(path_stack, nodesets, any_type: :element) - enter(:node_test, path_stack, nodesets) if @debug + def node_test(path_stack, any_type: :element) + enter(:node_test, path_stack) if @debug operator = path_stack.shift case operator when :qname prefix = path_stack.shift name = path_stack.shift - new_nodesets = nodesets.collect do |nodeset| - filter_nodeset(nodeset) do |node| - raw_node = node.raw_node - case raw_node.node_type - when :element - if prefix.nil? - raw_node.name == name - elsif prefix.empty? - if strict? - raw_node.name == name and raw_node.namespace == "" - else - raw_node.name == name and raw_node.namespace == get_namespace(raw_node, prefix) - end - else - raw_node.name == name and raw_node.namespace == get_namespace(raw_node, prefix) - end - when :attribute - if prefix.nil? - raw_node.name == name - elsif prefix.empty? - raw_node.name == name and raw_node.namespace == "" + ->(node) do + case node.node_type + when :element + if prefix.nil? + node.name == name + elsif prefix.empty? + if strict? + node.name == name and node.namespace == "" else - raw_node.name == name and raw_node.namespace == get_namespace(raw_node.element, prefix) + node.name == name and node.namespace == get_namespace(node, prefix) end else - false + node.name == name and node.namespace == get_namespace(node, prefix) + end + when :attribute + if prefix.nil? + node.name == name + elsif prefix.empty? + node.name == name and node.namespace == "" + else + node.name == name and node.namespace == get_namespace(node.element, prefix) end + else + false end end when :namespace prefix = path_stack.shift - new_nodesets = nodesets.collect do |nodeset| - filter_nodeset(nodeset) do |node| - raw_node = node.raw_node - case raw_node.node_type - when :element - namespaces = @namespaces || raw_node.namespaces - raw_node.namespace == namespaces[prefix] - when :attribute - namespaces = @namespaces || raw_node.element.namespaces - raw_node.namespace == namespaces[prefix] - else - false - end + ->(node) do + case node.node_type + when :element + namespaces = @namespaces || node.namespaces + node.namespace == namespaces[prefix] + when :attribute + namespaces = @namespaces || node.element.namespaces + node.namespace == namespaces[prefix] + else + false end end when :any - new_nodesets = nodesets.collect do |nodeset| - filter_nodeset(nodeset) do |node| - raw_node = node.raw_node - raw_node.node_type == any_type - end + ->(node) do + node.node_type == any_type end when :comment - new_nodesets = nodesets.collect do |nodeset| - filter_nodeset(nodeset) do |node| - raw_node = node.raw_node - raw_node.node_type == :comment - end + ->(node) do + node.node_type == :comment end when :text - new_nodesets = nodesets.collect do |nodeset| - filter_nodeset(nodeset) do |node| - raw_node = node.raw_node - raw_node.node_type == :text - end + ->(node) do + node.node_type == :text end when :processing_instruction target = path_stack.shift - new_nodesets = nodesets.collect do |nodeset| - filter_nodeset(nodeset) do |node| - raw_node = node.raw_node - (raw_node.node_type == :processing_instruction) and - (target.empty? or (raw_node.target == target)) - end + ->(node) do + (node.node_type == :processing_instruction) and + (target.empty? or (node.target == target)) end when :node - new_nodesets = nodesets.collect do |nodeset| - filter_nodeset(nodeset) do |node| - true - end + ->(_node) do + true end else message = "[BUG] Unexpected node test: " + "<#{operator.inspect}>: <#{path_stack.inspect}>" raise message end - new_nodesets ensure - leave(:node_test, path_stack, new_nodesets) if @debug - end - - def filter_nodeset(nodeset) - new_nodeset = [] - nodeset.each do |node| - next unless yield(node) - new_nodeset << XPathNode.new(node, position: new_nodeset.size + 1) - end - new_nodeset + leave(:node_test, path_stack) if @debug end def evaluate_predicate(expression, nodesets) @@ -606,30 +708,25 @@ def evaluate_predicate(expression, nodesets) new_nodesets = nodesets.collect do |nodeset| new_nodeset = [] subcontext = { :size => nodeset.size } - nodeset.each_with_index do |node, index| - if node.is_a?(XPathNode) - subcontext[:node] = node.raw_node - subcontext[:index] = node.position - else - subcontext[:node] = node - subcontext[:index] = index + 1 - end + nodeset.each.with_index(1) do |node, position| + subcontext[:node] = node + subcontext[:position] = position result = expr(expression.dclone, [node], subcontext) trace(:predicate_evaluate, expression, node, subcontext, result) if @debug result = result[0] if result.kind_of? Array and result.length == 1 if result.kind_of? Numeric - if result == node.position - new_nodeset << XPathNode.new(node, position: new_nodeset.size + 1) + if result == position + new_nodeset << node end elsif result.instance_of? Array if result.size > 0 and result.inject(false) {|k,s| s or k} if result.size > 0 - new_nodeset << XPathNode.new(node, position: new_nodeset.size + 1) + new_nodeset << node end end else if result - new_nodeset << XPathNode.new(node, position: new_nodeset.size + 1) + new_nodeset << node end end end @@ -673,7 +770,7 @@ def sort(array_of_nodes) node_idx = [] np = node.node_type == :attribute ? node.element : node while np.parent and np.parent.node_type == :element - node_idx << np.parent.index( np ) + node_idx << np.parent.index(np) np = np.parent end new_arry << [ node_idx.reverse, node ] @@ -686,37 +783,71 @@ def sort(array_of_nodes) end end - def descendant(nodeset, include_self) - nodesets = [] - nodeset.each do |node| - new_nodeset = [] - new_nodes = {} - descendant_recursive(node.raw_node, new_nodeset, new_nodes, include_self) - nodesets << new_nodeset unless new_nodeset.empty? + # Scanner for descendant-or-self axis + def descendant_or_self(nodeset, tester, selector) + descendant(nodeset, tester, selector, include_self: true) + end + + # Scanner for descendant axis + def descendant(nodeset, tester, selector, include_self: false) + nodeset = nodeset.select {|node| node.respond_to?(:children) } + case selector + when :uniq + seen = {}.compare_by_identity + recursive = ->(node) do + node_type = node.node_type + return if seen[node] + seen[node] = true + return unless node_type == :element || node_type == :document + node.children.each do |child| + recursive.call(child) + end + end + nodeset.each do |node| + if include_self + recursive.call(node) + else + node.children.each(&recursive) + end + end + seen.keys.select(&tester) + else + nodesets = nodeset.map do |node| + new_nodeset = [] + new_nodes = {} + descendant_recursive(node, new_nodeset, new_nodes, include_self) + new_nodeset + end + non_optimized_nodesets_select(nodesets, tester, selector) end - nodesets end - def descendant_recursive(raw_node, new_nodeset, new_nodes, include_self) + def descendant_recursive(node, new_nodeset, new_nodes, include_self) if include_self - return if new_nodes.key?(raw_node) - new_nodeset << XPathNode.new(raw_node, position: new_nodeset.size + 1) - new_nodes[raw_node] = true + return if new_nodes.key?(node) + new_nodeset << node + new_nodes[node] = true end - node_type = raw_node.node_type + node_type = node.node_type if node_type == :element or node_type == :document - raw_node.children.each do |child| + node.children.each do |child| descendant_recursive(child, new_nodeset, new_nodes, true) end end end + # Scanner for preceding axis + def preceding(nodeset, tester, selector) + nodesets = nodeset.select {|node| node.respond_to?(:parent) }.map {|node| preceding_nodes(node) } + non_optimized_nodesets_select(nodesets, tester, selector) + end + # Builds a nodeset of all of the preceding nodes of the supplied node, # in reverse document order # preceding:: includes every element in the document that precedes this node, # except for ancestors - def preceding(node) + def preceding_nodes(node) ancestors = [] parent = node.parent while parent @@ -730,8 +861,7 @@ def preceding(node) if ancestors.include?(preceding_node) ancestors.delete(preceding_node) else - precedings << XPathNode.new(preceding_node, - position: precedings.size + 1) + precedings << preceding_node end preceding_node = preceding_node_of(preceding_node) end @@ -753,12 +883,19 @@ def preceding_node_of( node ) psn end - def following(node) + # Scanner for following axis + def following(nodeset, tester, selector) + nodesets = nodeset.select {|node| node.respond_to?(:parent) }.map do |node| + following_nodes(node) + end + non_optimized_nodesets_select(nodesets, tester, selector) + end + + def following_nodes(node) followings = [] following_node = next_sibling_node(node) while following_node - followings << XPathNode.new(following_node, - position: followings.size + 1) + followings << following_node following_node = following_node_of(following_node) end followings @@ -781,30 +918,18 @@ def next_sibling_node(node) end def child(nodeset) - nodesets = [] - nodeset.each do |node| - raw_node = node.raw_node - node_type = raw_node.node_type + nodeset.map do |node| + node_type = node.node_type # trace(:child, node_type, node) case node_type when :element - nodesets << raw_node.children.collect.with_index do |child_node, i| - XPathNode.new(child_node, position: i + 1) - end + node.children when :document - new_nodeset = [] - raw_node.children.each do |child| - case child - when XMLDecl, Text - # Ignore - else - new_nodeset << XPathNode.new(child, position: new_nodeset.size + 1) - end + node.children.reject do |child| + XMLDecl === child || Text === child end - nodesets << new_nodeset unless new_nodeset.empty? end - end - nodesets + end.compact end def norm b @@ -821,9 +946,6 @@ def norm b end def equality_relational_compare(set1, op, set2) - set1 = unnode(set1) if set1.is_a?(Array) - set2 = unnode(set2) if set2.is_a?(Array) - if set1.kind_of? Array and set2.kind_of? Array # If both objects to be compared are node-sets, then the # comparison will be true if and only if there is a node in the @@ -852,22 +974,22 @@ def equality_relational_compare(set1, op, set2) case b when true, false - each_unnode(a).any? do |unnoded| - compare(Functions.boolean(unnoded), op, b) + a.any? do |node| + compare(Functions.boolean(node), op, b) end when Numeric - each_unnode(a).any? do |unnoded| - compare(Functions.number(unnoded), op, b) + a.any? do |node| + compare(Functions.number(node), op, b) end when /\A\d+(\.\d+)?\z/ b = Functions.number(b) - each_unnode(a).any? do |unnoded| - compare(Functions.number(unnoded), op, b) + a.any? do |node| + compare(Functions.number(node), op, b) end else b = Functions::string(b) - each_unnode(a).any? do |unnoded| - compare(Functions::string(unnoded), op, b) + a.any? do |node| + compare(Functions::string(node), op, b) end end else @@ -942,41 +1064,5 @@ def compare(a, operator, b) raise message end end - - def each_unnode(nodeset) - return to_enum(__method__, nodeset) unless block_given? - nodeset.each do |node| - if node.is_a?(XPathNode) - unnoded = node.raw_node - else - unnoded = node - end - yield(unnoded) - end - end - - def unnode(nodeset) - each_unnode(nodeset).collect do |unnoded| - unnoded = yield(unnoded) if block_given? - unnoded - end - end - end - - # @private - class XPathNode - attr_reader :raw_node, :context - def initialize(node, context=nil) - if node.is_a?(XPathNode) - @raw_node = node.raw_node - else - @raw_node = node - end - @context = context || {} - end - - def position - @context[:position] - end end end diff --git a/test/xpath/test_attribute.rb b/test/xpath/test_attribute.rb index b778ff81..458fda9f 100644 --- a/test/xpath/test_attribute.rb +++ b/test/xpath/test_attribute.rb @@ -32,5 +32,18 @@ def test_no_namespace "nothing" => "") assert_equal(["child2"], children.collect(&:text)) end + + def test_no_error + REXML::XPath.match(@document, '//attribute::*/parent::*') + # Some of those are not yet supported in REXML, but at least it shouldn't raise an error + REXML::XPath.match(@document, '//attribute::*/preceding::*') + REXML::XPath.match(@document, '//attribute::*/following::*') + REXML::XPath.match(@document, '//attribute::*/preceding-sibling::*') + REXML::XPath.match(@document, '//attribute::*/following-sibling::*') + REXML::XPath.match(@document, '//attribute::*/ancestor::*') + REXML::XPath.match(@document, '//attribute::*/descendant::*') + REXML::XPath.match(@document, '//attribute::*/ancestor-or-self::*') + REXML::XPath.match(@document, '//attribute::*/descendant-or-self::*') + end end end diff --git a/test/xpath/test_axis_preceding_sibling.rb b/test/xpath/test_axis_preceding_sibling.rb index 2f0ba5d1..f4b78ebc 100644 --- a/test/xpath/test_axis_preceding_sibling.rb +++ b/test/xpath/test_axis_preceding_sibling.rb @@ -34,5 +34,73 @@ def test_preceding_sibling_axis prev = XPath.first(context, "preceding-sibling::f[3]") assert_equal "3", prev.attributes["id"] end + + def test_preceding_sibling_position_less_than + context = XPath.first(@@doc, "/a/e/f[last()]") + assert_equal([], XPath.match(context, "preceding-sibling::f[position() < 1]")) + assert_equal(["5"], + XPath.match(context, "preceding-sibling::f[position() < 2]").map {|n| n.attributes["id"] }) + assert_equal(["4", "5"], + XPath.match(context, "preceding-sibling::f[position() < 3]").map {|n| n.attributes["id"] }) + end + + def test_preceding_sibling_position_less_than_or_equal + context = XPath.first(@@doc, "/a/e/f[last()]") + assert_equal([], XPath.match(context, "preceding-sibling::f[position() <= 0]")) + assert_equal(["4", "5"], + XPath.match(context, "preceding-sibling::f[position() <= 2]").map {|n| n.attributes["id"] }) + end + + def test_preceding_sibling_position_greater_than + context = XPath.first(@@doc, "/a/e/f[last()]") + assert_equal(["3", "4", "5"], + XPath.match(context, "preceding-sibling::f[position() > 0]").map {|n| n.attributes["id"] }) + assert_equal(["3", "4"], + XPath.match(context, "preceding-sibling::f[position() > 1]").map {|n| n.attributes["id"] }) + assert_equal(["3"], + XPath.match(context, "preceding-sibling::f[position() > 2]").map {|n| n.attributes["id"] }) + end + + def test_preceding_sibling_position_greater_than_or_equal + context = XPath.first(@@doc, "/a/e/f[last()]") + assert_equal(["3", "4", "5"], + XPath.match(context, "preceding-sibling::f[position() >= 0]").map {|n| n.attributes["id"] }) + assert_equal(["3", "4", "5"], + XPath.match(context, "preceding-sibling::f[position() >= 1]").map {|n| n.attributes["id"] }) + assert_equal(["3", "4"], + XPath.match(context, "preceding-sibling::f[position() >= 2]").map {|n| n.attributes["id"] }) + end + + def test_preceding_sibling_multiple_anchors + doc = Document.new(<<~XML) + + + + + + + + + + + + + + + + + + + + + + + + XML + + assert_equal(%w[2 7 9], XPath.match(doc, "/a/anchor/preceding-sibling::b[position() = 3]").map {|n| n.attributes["id"] }) + assert_equal(%w[2 3 4 7 8 9 10 11], XPath.match(doc, "/a/anchor/preceding-sibling::b[position() <= 3]").map {|n| n.attributes["id"] }) + assert_equal(%w[1 2 3 4 5 6 7 8], XPath.match(doc, "/a/anchor/preceding-sibling::b[position() >= 4]").map {|n| n.attributes["id"] }) + end end end diff --git a/test/xpath/test_base.rb b/test/xpath/test_base.rb index 3d96215f..882250a8 100644 --- a/test/xpath/test_base.rb +++ b/test/xpath/test_base.rb @@ -492,6 +492,14 @@ def test_following_sibling_predicates assert_equal(["w", "x", "y", "z"], matches.map(&:name)) end + def test_following_sibling_position_less_than + source = "" + doc = REXML::Document.new(source) + assert_equal([], REXML::XPath.match(doc, "/r/a/following-sibling::*[position() < 1]")) + assert_equal(["b"], REXML::XPath.match(doc, "/r/a/following-sibling::*[position() < 2]").map(&:name)) + assert_equal(["b", "c"], REXML::XPath.match(doc, "/r/a/following-sibling::*[position() < 3]").map(&:name)) + end + def test_preceding_sibling_across_multiple_nodes source = <<-XML diff --git a/test/xpath/test_predicate.rb b/test/xpath/test_predicate.rb index 278e3765..f25bd758 100644 --- a/test/xpath/test_predicate.rb +++ b/test/xpath/test_predicate.rb @@ -59,12 +59,50 @@ def test_predicates_multi assert_equal( "1", m[0].attributes["id"] ) end + def test_predicate_multi_position + xml = <<~XML + + + + + XML + doc = REXML::Document.new(xml) + + result = REXML::XPath.match(doc, "/a/b/c[position()>1]") + assert_equal(%w[2 3 4 5 7 8 9 10], result.map { |node| node.attributes["id"] }) + + result = REXML::XPath.match(doc, "/a/b/c[position()>1][position()>1]") + assert_equal(%w[3 4 5 8 9 10], result.map { |node| node.attributes["id"] }) + + result = REXML::XPath.match(doc, "/a/b/c[position()>1][position()>1][@id!='3']") + assert_equal(%w[4 5 8 9 10], result.map { |node| node.attributes["id"] }) + + result = REXML::XPath.match(doc, "/a/b/c[position()>1][position()>1][@id!='3'][position()!=2]") + assert_equal(%w[4 8 10], result.map { |node| node.attributes["id"] }) + end + def do_path( path ) m = REXML::XPath.match( @doc, path ) #puts path, @parser.parse( path ).inspect return m end + def test_predicate_float_literal + doc = REXML::Document.new("") + # [N.0] is equivalent to [position() = N.0] = [position() = N] + assert_equal(["a"], REXML::XPath.match(doc, "/r/*[1.0]").map(&:name)) + assert_equal(["b"], REXML::XPath.match(doc, "/r/*[2.0]").map(&:name)) + # Non-integer numeric literals match no node. + assert_equal([], REXML::XPath.match(doc, "/r/*[1.5]")) + end + + def test_predicate_variable_as_position + doc = REXML::Document.new("") + parser = REXML::XPathParser.new + parser["x"] = 2 + assert_equal(["b"], parser.parse("/r/*[$x]", doc).map(&:name)) + end + def test_get_no_siblings_terminal_nodes source = <<-XML