diff --git a/NEWS.md b/NEWS.md index 51a45cab..7f95d829 100644 --- a/NEWS.md +++ b/NEWS.md @@ -386,7 +386,7 @@ * Patch by NAITOH Jun. - * Improved parse performance when an attribute has many `<`s. + * Improved parse performance when an attribute has many `>`s. * GH-126 diff --git a/benchmark/parse_cdata.yaml b/benchmark/parse_cdata.yaml new file mode 100644 index 00000000..cde04306 --- /dev/null +++ b/benchmark/parse_cdata.yaml @@ -0,0 +1,50 @@ +loop_count: 100 +contexts: + - gems: + rexml: 3.2.6 + require: false + prelude: require 'rexml' + - name: master + prelude: | + $LOAD_PATH.unshift(File.expand_path("lib")) + require 'rexml' + - name: 3.2.6(YJIT) + gems: + rexml: 3.2.6 + require: false + prelude: | + require 'rexml' + RubyVM::YJIT.enable + - name: master(YJIT) + prelude: | + $LOAD_PATH.unshift(File.expand_path("lib")) + require 'rexml' + RubyVM::YJIT.enable + +prelude: | + require 'rexml/document' + require 'rexml/parsers/sax2parser' + require 'rexml/parsers/pullparser' + require 'rexml/parsers/streamparser' + require 'rexml/streamlistener' + + def build_xml(size) + xml = "\n" + + "Test\n" + + "\n" + end + xml = build_xml(100000) + + class Listener + include REXML::StreamListener + end + +benchmark: + 'dom' : REXML::Document.new(xml) + 'sax' : REXML::Parsers::SAX2Parser.new(xml).parse + 'pull' : | + parser = REXML::Parsers::PullParser.new(xml) + while parser.has_next? + parser.pull + end + 'stream' : REXML::Parsers::StreamParser.new(xml, Listener.new).parse diff --git a/benchmark/parse_comment.yaml b/benchmark/parse_comment.yaml new file mode 100644 index 00000000..a0a3a771 --- /dev/null +++ b/benchmark/parse_comment.yaml @@ -0,0 +1,36 @@ +loop_count: 100 +contexts: + - gems: + rexml: 3.2.6 + require: false + prelude: require 'rexml' + - name: master + prelude: | + $LOAD_PATH.unshift(File.expand_path("lib")) + require 'rexml' + - name: 3.2.6(YJIT) + gems: + rexml: 3.2.6 + require: false + prelude: | + require 'rexml' + RubyVM::YJIT.enable + - name: master(YJIT) + prelude: | + $LOAD_PATH.unshift(File.expand_path("lib")) + require 'rexml' + RubyVM::YJIT.enable + +prelude: | + require 'rexml/document' + + SIZE = 100000 + + top_level_xml = "\n" + in_doctype_xml = "]>" + after_doctype_xml = "" + +benchmark: + 'top_level' : REXML::Document.new(top_level_xml) + 'in_doctype' : REXML::Document.new(in_doctype_xml) + 'after_doctype' : REXML::Document.new(after_doctype_xml) diff --git a/benchmark/xpath.yaml b/benchmark/xpath.yaml new file mode 100644 index 00000000..d6e970eb --- /dev/null +++ b/benchmark/xpath.yaml @@ -0,0 +1,32 @@ +loop_count: 100 +contexts: + - gems: + rexml: 3.2.6 + require: false + prelude: require 'rexml' + - name: master + prelude: | + $LOAD_PATH.unshift(File.expand_path("lib")) + require 'rexml' + - name: 3.2.6(YJIT) + gems: + rexml: 3.2.6 + require: false + prelude: | + require 'rexml' + RubyVM::YJIT.enable + - name: master(YJIT) + prelude: | + $LOAD_PATH.unshift(File.expand_path("lib")) + require 'rexml' + RubyVM::YJIT.enable + +prelude: | + require 'rexml/document' + + DEPTH = 100 + xml = '' * DEPTH + '' * DEPTH + doc = REXML::Document.new(xml) + +benchmark: + "REXML::XPath.match(REXML::Document.new(xml), 'a//a')" : REXML::XPath.match(doc, "a//a") diff --git a/lib/rexml/attribute.rb b/lib/rexml/attribute.rb index fe48745c..c5673249 100644 --- a/lib/rexml/attribute.rb +++ b/lib/rexml/attribute.rb @@ -130,10 +130,7 @@ def to_string end def doctype - if @element - doc = @element.document - doc.doctype if doc - end + @element&.document&.doctype end # Returns the attribute value, with entities replaced @@ -173,7 +170,7 @@ def element=( element ) @element = element if @normalized - Text.check( @normalized, NEEDS_A_SECOND_CHECK, doctype ) + Text.check( @normalized, NEEDS_A_SECOND_CHECK ) end self @@ -202,9 +199,11 @@ def inspect end def xpath - path = @element.xpath - path += "/@#{self.expanded_name}" - return path + @element.xpath + "/@#{self.expanded_name}" + end + + def document + @element&.document end end end diff --git a/lib/rexml/cdata.rb b/lib/rexml/cdata.rb index 997f5a08..264ad642 100644 --- a/lib/rexml/cdata.rb +++ b/lib/rexml/cdata.rb @@ -58,7 +58,7 @@ def value # c = CData.new( " Some text " ) # c.write( $stdout ) #-> def write( output=$stdout, indent=-1, transitive=false, ie_hack=false ) - Kernel.warn( "#{self.class.name}.write is deprecated", uplevel: 1) + Kernel.warn( "#{self.class.name}#write is deprecated", uplevel: 1) indent( output, indent ) output << START output << @string diff --git a/lib/rexml/child.rb b/lib/rexml/child.rb index cc6e9a47..40abde87 100644 --- a/lib/rexml/child.rb +++ b/lib/rexml/child.rb @@ -83,8 +83,7 @@ def previous_sibling=(other) # Returns:: the document this child belongs to, or nil if this child # belongs to no document def document - return parent.document unless parent.nil? - nil + parent&.document end # This doesn't yet handle encodings diff --git a/lib/rexml/comment.rb b/lib/rexml/comment.rb index 52c58b46..e7e104d4 100644 --- a/lib/rexml/comment.rb +++ b/lib/rexml/comment.rb @@ -48,7 +48,7 @@ def clone # ie_hack:: # Needed for conformity to the child API, but not used by this class. def write( output, indent=-1, transitive=false, ie_hack=false ) - Kernel.warn("Comment.write is deprecated. See REXML::Formatters", uplevel: 1) + Kernel.warn("#{self.class.name}#write is deprecated. See REXML::Formatters", uplevel: 1) indent( output, indent ) output << START output << @string diff --git a/lib/rexml/doctype.rb b/lib/rexml/doctype.rb index f3590484..a9cf9f7e 100644 --- a/lib/rexml/doctype.rb +++ b/lib/rexml/doctype.rb @@ -171,15 +171,11 @@ def write( output, indent=0, transitive=false, ie_hack=false ) end def context - if @parent - @parent.context - else - nil - end + @parent&.context end def entity( name ) - @entities[name].unnormalized if @entities[name] + @entities[name]&.unnormalized end def add child @@ -288,8 +284,7 @@ def initialize name, middle, pub, sys end def to_s - context = nil - context = parent.context if parent + context = parent&.context notation = " $stdout, :indent => -1, :transtive => false, :ie_hack => false, :encoding => nil}) + # doc.write(output=$stdout, indent=-1, transitive=false, ie_hack=false, encoding=nil) + # doc.write(options={:output => $stdout, :indent => -1, :transitive => false, :ie_hack => false, :encoding => nil}) # # Write the XML tree out, optionally with indent. This writes out the # entire XML document, including XML declarations, doctype declarations, @@ -415,7 +415,7 @@ def Document::entity_expansion_limit=( val ) # # Deprecated. Use REXML::Security.entity_expansion_limit= instead. def Document::entity_expansion_limit - return Security.entity_expansion_limit + Security.entity_expansion_limit end # Set the entity expansion limit. By default the limit is set to 10240. @@ -429,7 +429,7 @@ def Document::entity_expansion_text_limit=( val ) # # Deprecated. Use REXML::Security.entity_expansion_text_limit instead. def Document::entity_expansion_text_limit - return Security.entity_expansion_text_limit + Security.entity_expansion_text_limit end attr_reader :entity_expansion_count @@ -448,6 +448,20 @@ def document end private + + attr_accessor :namespaces_cache + + # New document level cache is created and available in this block. + # This API is thread unsafe. Users can't change this document in this block. + def enable_cache + @namespaces_cache = {} + begin + yield + ensure + @namespaces_cache = nil + end + end + def build( source ) Parsers::TreeParser.new( source, self ).parse end diff --git a/lib/rexml/element.rb b/lib/rexml/element.rb index 4e3a60b9..0d74811e 100644 --- a/lib/rexml/element.rb +++ b/lib/rexml/element.rb @@ -473,8 +473,7 @@ def root # Related: #root, #root_node. # def document - rt = root - rt.parent if rt + root&.parent end # :call-seq: @@ -566,7 +565,7 @@ def prefixes prefixes = [] prefixes = parent.prefixes if parent prefixes |= attributes.prefixes - return prefixes + prefixes end # :call-seq: @@ -589,10 +588,12 @@ def prefixes # d.elements['//c'].namespaces # => {"x"=>"1", "y"=>"2", "z"=>"3"} # def namespaces - namespaces = {} - namespaces = parent.namespaces if parent - namespaces = namespaces.merge( attributes.namespaces ) - return namespaces + namespaces_cache = document&.__send__(:namespaces_cache) + if namespaces_cache + namespaces_cache[self] ||= calculate_namespaces + else + calculate_namespaces + end end # :call-seq: @@ -619,19 +620,11 @@ def namespace(prefix=nil) if prefix.nil? prefix = prefix() end - if prefix == '' - prefix = "xmlns" - else - prefix = "xmlns:#{prefix}" unless prefix[0,5] == 'xmlns' - end - ns = nil - target = self - while ns.nil? and target - ns = target.attributes[prefix] - target = target.parent - end + prefix = (prefix == '') ? 'xmlns' : prefix.delete_prefix("xmlns:") + ns = namespaces[prefix] + ns = '' if ns.nil? and prefix == 'xmlns' - return ns + ns end # :call-seq: @@ -963,7 +956,7 @@ def get_elements( xpath ) def next_element element = next_sibling element = element.next_sibling until element.nil? or element.kind_of? Element - return element + element end # :call-seq: @@ -979,7 +972,7 @@ def next_element def previous_element element = previous_sibling element = element.previous_sibling until element.nil? or element.kind_of? Element - return element + element end @@ -1029,8 +1022,7 @@ def has_text? # def text( path = nil ) rv = get_text(path) - return rv.value unless rv.nil? - nil + rv&.value end # :call-seq: @@ -1058,7 +1050,7 @@ def get_text path = nil else rv = @children.find { |node| node.kind_of? Text } end - return rv + rv end # :call-seq: @@ -1102,7 +1094,7 @@ def text=( text ) old_text.replace_with( text ) end end - return self + self end # :call-seq: @@ -1153,7 +1145,7 @@ def add_text( text ) text = Text.new( text, whitespace(), nil, raw() ) end self << text unless text.nil? - return self + self end # :call-seq: @@ -1197,7 +1189,7 @@ def xpath cur = cur.parent path_elements << __to_xpath_helper( cur ) end - return path_elements.reverse.join( "/" ) + path_elements.reverse.join( "/" ) end ################################################# @@ -1299,7 +1291,6 @@ def attribute( name, namespace=nil ) return nil unless ( namespaces[ prefix ] == namespaces[ 'xmlns' ] ) attributes.get_attribute( name ) - end # :call-seq: @@ -1313,7 +1304,7 @@ def attribute( name, namespace=nil ) # b.has_attributes? # => false # def has_attributes? - return !@attributes.empty? + !@attributes.empty? end # :call-seq: @@ -1502,7 +1493,7 @@ def texts # doc.write( out ) #-> doc is written to the string 'out' # doc.write( $stdout ) #-> doc written to the console def write(output=$stdout, indent=-1, transitive=false, ie_hack=false) - Kernel.warn("#{self.class.name}.write is deprecated. See REXML::Formatters", uplevel: 1) + Kernel.warn("#{self.class.name}#write is deprecated. See REXML::Formatters", uplevel: 1) formatter = if indent > -1 if transitive require_relative "formatters/transitive" @@ -1516,8 +1507,15 @@ def write(output=$stdout, indent=-1, transitive=false, ie_hack=false) formatter.write( self, output ) end - private + def calculate_namespaces + if parent + parent.namespaces.merge(attributes.namespaces) + else + attributes.namespaces + end + end + def __to_xpath_helper node rv = node.expanded_name.clone if node.parent @@ -1684,11 +1682,7 @@ def []( index, name=nil) (num += 1) == index } else - return XPath::first( @element, index ) - #{ |element| - # return element if element.kind_of? Element - #} - #return nil + XPath::first( @element, index ) end end @@ -1735,7 +1729,7 @@ def []=( index, element ) else previous.replace_with element end - return previous + previous end # :call-seq: @@ -1774,7 +1768,7 @@ def index element child == element end return rv if found == element - return -1 + -1 end # :call-seq: @@ -1853,7 +1847,7 @@ def delete_all( xpath ) @element.delete element element.remove end - return rv + rv end # :call-seq: @@ -2180,8 +2174,7 @@ def initialize element # def [](name) attr = get_attribute(name) - return attr.value unless attr.nil? - return nil + attr&.value end # :call-seq: @@ -2324,11 +2317,11 @@ def get_attribute( name ) return attr end end - element_document = @element.document - if element_document and element_document.doctype + doctype = @element.document&.doctype + if doctype expn = @element.expanded_name - expn = element_document.doctype.name if expn.size == 0 - attr_val = element_document.doctype.attribute_of(expn, name) + expn = doctype.name if expn.size == 0 + attr_val = doctype.attribute_of(expn, name) return Attribute.new( name, attr_val ) if attr_val end return nil @@ -2336,7 +2329,7 @@ def get_attribute( name ) if attr.kind_of? Hash attr = attr[ @element.prefix ] end - return attr + attr end # :call-seq: @@ -2370,8 +2363,9 @@ def []=( name, value ) end unless value.kind_of? Attribute - if @element.document and @element.document.doctype - value = Text::normalize( value, @element.document.doctype ) + doctype = @element.document&.doctype + if doctype + value = Text::normalize( value, doctype ) else value = Text::normalize( value, nil ) end @@ -2389,7 +2383,7 @@ def []=( name, value ) else store value.name, value end - return @element + @element end # :call-seq: @@ -2408,10 +2402,11 @@ def prefixes each_attribute do |attribute| ns << attribute.name if attribute.prefix == 'xmlns' end - if @element.document and @element.document.doctype + doctype = @element.document&.doctype + if doctype expn = @element.expanded_name - expn = @element.document.doctype.name if expn.size == 0 - @element.document.doctype.attributes_of(expn).each { + expn = doctype.name if expn.size == 0 + doctype.attributes_of(expn).each { |attribute| ns << attribute.name if attribute.prefix == 'xmlns' } @@ -2433,10 +2428,11 @@ def namespaces each_attribute do |attribute| namespaces[attribute.name] = attribute.value if attribute.prefix == 'xmlns' or attribute.name == 'xmlns' end - if @element.document and @element.document.doctype + doctype = @element.document&.doctype + if doctype expn = @element.expanded_name - expn = @element.document.doctype.name if expn.size == 0 - @element.document.doctype.attributes_of(expn).each { + expn = doctype.name if expn.size == 0 + doctype.attributes_of(expn).each { |attribute| namespaces[attribute.name] = attribute.value if attribute.prefix == 'xmlns' or attribute.name == 'xmlns' } @@ -2491,9 +2487,7 @@ def delete( attribute ) old.each_value{|v| repl = v} store name, repl end - elsif old.nil? - return @element - else # the supplied attribute is a top-level one + elsif old # the supplied attribute is a top-level one super(name) end @element @@ -2547,7 +2541,7 @@ def delete_all( name ) rv << attribute if attribute.expanded_name == name } rv.each{ |attr| attr.remove } - return rv + rv end # :call-seq: diff --git a/lib/rexml/encoding.rb b/lib/rexml/encoding.rb index da2d70d6..7eb05f4d 100644 --- a/lib/rexml/encoding.rb +++ b/lib/rexml/encoding.rb @@ -5,7 +5,7 @@ module Encoding # ID ---> Encoding name attr_reader :encoding def encoding=(encoding) - encoding = encoding.name if encoding.is_a?(Encoding) + encoding = encoding.name if encoding.is_a?(::Encoding) if encoding.is_a?(String) original_encoding = encoding encoding = find_encoding(encoding) @@ -13,12 +13,9 @@ def encoding=(encoding) raise ArgumentError, "Bad encoding name #{original_encoding}" end end + encoding = encoding.upcase if encoding return false if defined?(@encoding) and encoding == @encoding - if encoding - @encoding = encoding.upcase - else - @encoding = 'UTF-8' - end + @encoding = encoding || "UTF-8" true end diff --git a/lib/rexml/functions.rb b/lib/rexml/functions.rb index 4c114616..60ae34e7 100644 --- a/lib/rexml/functions.rb +++ b/lib/rexml/functions.rb @@ -39,11 +39,11 @@ def Functions::context=(value); @@context = value; end def Functions::text( ) if @@context[:node].node_type == :element - return @@context[:node].find_all{|n| n.node_type == :text}.collect{|n| n.value} + @@context[:node].find_all{|n| n.node_type == :text}.collect{|n| n.value} elsif @@context[:node].node_type == :text - return @@context[:node].value + @@context[:node].value else - return false + false end end diff --git a/lib/rexml/instruction.rb b/lib/rexml/instruction.rb index 318741f0..a3dfbbec 100644 --- a/lib/rexml/instruction.rb +++ b/lib/rexml/instruction.rb @@ -49,7 +49,7 @@ def clone # See the rexml/formatters package # def write writer, indent=-1, transitive=false, ie_hack=false - Kernel.warn( "#{self.class.name}.write is deprecated", uplevel: 1) + Kernel.warn( "#{self.class.name}#write is deprecated", uplevel: 1) indent(writer, indent) writer << START writer << @target diff --git a/lib/rexml/namespace.rb b/lib/rexml/namespace.rb index 2e67252a..232b7ca4 100644 --- a/lib/rexml/namespace.rb +++ b/lib/rexml/namespace.rb @@ -42,11 +42,11 @@ def name=( name ) # Compares names optionally WITH namespaces def has_name?( other, ns=nil ) if ns - return (namespace() == ns and name() == other) + namespace() == ns and name() == other elsif other.include? ":" - return fully_expanded_name == other + fully_expanded_name == other else - return name == other + name == other end end @@ -57,7 +57,7 @@ def has_name?( other, ns=nil ) def fully_expanded_name ns = prefix return "#{ns}:#@name" if ns.size > 0 - return @name + @name end end end diff --git a/lib/rexml/node.rb b/lib/rexml/node.rb index c771db70..bccacc51 100644 --- a/lib/rexml/node.rb +++ b/lib/rexml/node.rb @@ -26,7 +26,7 @@ def previous_sibling_node # REXML::Formatters package for changing the output style. def to_s indent=nil unless indent.nil? - Kernel.warn( "#{self.class.name}.to_s(indent) parameter is deprecated", uplevel: 1) + Kernel.warn( "#{self.class.name}#to_s(indent) parameter is deprecated", uplevel: 1) f = REXML::Formatters::Pretty.new( indent ) f.write( self, rv = "" ) else @@ -68,7 +68,7 @@ def find_first_recursive(&block) # :yields: node each_recursive {|node| return node if block.call(node) } - return nil + nil end # Returns the position that +self+ holds in its parent's array, indexed diff --git a/lib/rexml/parsers/baseparser.rb b/lib/rexml/parsers/baseparser.rb index 44aacfa2..a87657b5 100644 --- a/lib/rexml/parsers/baseparser.rb +++ b/lib/rexml/parsers/baseparser.rb @@ -206,12 +206,12 @@ def position # Returns true if there are no more events def empty? - return (@source.empty? and @stack.empty?) + (@source.empty? and @stack.empty?) end # Returns true if there are more events. Synonymous with !empty? def has_next? - return !(@source.empty? and @stack.empty?) + !(@source.empty? and @stack.empty?) end # Push an event back on the head of the stream. This method @@ -277,14 +277,7 @@ def pull_event return process_instruction elsif @source.match?("/um, true) - if md.nil? - raise REXML::ParseException.new("Unclosed comment", @source) - end - if /--|-\z/.match?(md[1]) - raise REXML::ParseException.new("Malformed comment", @source) - end - return [ :comment, md[1] ] + return [ :comment, process_comment ] elsif @source.match?("DOCTYPE", true) base_error_message = "Malformed DOCTYPE" unless @source.match?(/\s+/um, true) @@ -417,20 +410,17 @@ def pull_event raise REXML::ParseException.new(message, @source) end return [:notationdecl, name, *id] - elsif md = @source.match(/--(.*?)-->/um, true) - case md[1] - when /--/, /-\z/ - raise REXML::ParseException.new("Malformed comment", @source) - end - return [ :comment, md[1] ] if md + elsif @source.match?("--", true) + return [ :comment, process_comment ] + else + raise REXML::ParseException.new("Malformed node: Started with '/um, true) @document_status = :after_doctype return [ :end_doctype ] - end - if @document_status == :in_doctype + else raise ParseException.new("Malformed DOCTYPE: invalid declaration", @source) end end @@ -460,23 +450,19 @@ def pull_event end return [ :end_element, last_tag ] elsif @source.match?("!", true) - md = @source.match(/([^>]*>)/um) #STDERR.puts "SOURCE BUFFER = #{source.buffer}, #{source.buffer.size}" - raise REXML::ParseException.new("Malformed node", @source) unless md - if md[0][0] == ?- - md = @source.match(/--(.*?)-->/um, true) - - if md.nil? || /--|-\z/.match?(md[1]) - raise REXML::ParseException.new("Malformed comment", @source) + if @source.match?("--", true) + return [ :comment, process_comment ] + elsif @source.match?("[CDATA[", true) + text = @source.read_until("]]>") + if text.chomp!("]]>") + return [ :cdata, text ] + else + raise REXML::ParseException.new("Malformed CDATA: Missing end ']]>'", @source) end - - return [ :comment, md[1] ] else - md = @source.match(/\[CDATA\[(.*?)\]\]>/um, true) - return [ :cdata, md[1] ] if md + raise REXML::ParseException.new("Malformed node: Started with '") + unless text.chomp!("-->") + raise REXML::ParseException.new("Unclosed comment: Missing end '-->'", @source) + end + + if text.include? "--" or text.end_with?("-") + raise REXML::ParseException.new("Malformed comment", @source) + end + text + end + def process_instruction name = parse_name("Malformed XML: Invalid processing instruction node") if @source.match?(/\s+/um, true) diff --git a/lib/rexml/parsers/xpathparser.rb b/lib/rexml/parsers/xpathparser.rb index bd3b6856..a6d76fdc 100644 --- a/lib/rexml/parsers/xpathparser.rb +++ b/lib/rexml/parsers/xpathparser.rb @@ -215,7 +215,7 @@ def predicate_to_path(parsed, &block) else path << yield( parsed ) end - return path.squeeze(" ") + path.squeeze(" ") end # For backward compatibility alias_method :preciate_to_string, :predicate_to_path @@ -252,7 +252,7 @@ def LocationPath path, parsed path = path[1..-1] end end - return RelativeLocationPath( path, parsed ) if path.size > 0 + RelativeLocationPath( path, parsed ) if path.size > 0 end #RelativeLocationPath @@ -388,7 +388,7 @@ def NodeTest path, parsed else path = original_path end - return path + path end # Filters the supplied nodeset on the predicate(s) @@ -600,7 +600,7 @@ def PathExpr path, parsed end rest = LocationPath(rest, n) if rest =~ /\A[\/\.\@\[\w*]/ parsed.concat(n) - return rest + rest end #| FilterExpr Predicate diff --git a/lib/rexml/quickpath.rb b/lib/rexml/quickpath.rb index a0466b25..cded06f5 100644 --- a/lib/rexml/quickpath.rb +++ b/lib/rexml/quickpath.rb @@ -41,7 +41,7 @@ def QuickPath::match element, path, namespaces=EMPTY_HASH else results = filter([element], path) end - return results + results end # Given an array of nodes it filters the array based on the path. The @@ -51,18 +51,18 @@ def QuickPath::filter elements, path return elements if path.nil? or path == '' or elements.size == 0 case path when /^\/\//u # Descendant - return axe( elements, "descendant-or-self", $' ) + axe( elements, "descendant-or-self", $' ) when /^\/?\b(\w[-\w]*)\b::/u # Axe - return axe( elements, $1, $' ) + axe( elements, $1, $' ) when /^\/(?=\b([:!\w][-\.\w]*:)?[-!\*\.\w]*\b([^:(]|$)|\*)/u # Child rest = $' results = [] elements.each do |element| results |= filter( element.to_a, rest ) end - return results + results when /^\/?(\w[-\w]*)\(/u # / Function - return function( elements, $1, $' ) + function( elements, $1, $' ) when Namespace::NAMESPLIT # Element name name = $2 ns = $1 @@ -73,21 +73,21 @@ def QuickPath::filter elements, path (element.name == name and element.namespace == Functions.namespace_context[ns]))) end - return filter( elements, rest ) + filter( elements, rest ) when /^\/\[/u matches = [] elements.each do |element| matches |= predicate( element.to_a, path[1..-1] ) if element.kind_of? Element end - return matches + matches when /^\[/u # Predicate - return predicate( elements, path ) + predicate( elements, path ) when /^\/?\.\.\./u # Ancestor - return axe( elements, "ancestor", $' ) + axe( elements, "ancestor", $' ) when /^\/?\.\./u # Parent - return filter( elements.collect{|e|e.parent}, $' ) + filter( elements.collect{|e|e.parent}, $' ) when /^\/?\./u # Self - return filter( elements, $' ) + filter( elements, $' ) when /^\*/u # Any results = [] elements.each do |element| @@ -98,9 +98,10 @@ def QuickPath::filter elements, path # results |= filter( children, $' ) #end end - return results + results + else + [] end - return [] end def QuickPath::axe( elements, axe_name, rest ) @@ -138,7 +139,7 @@ def QuickPath::axe( elements, axe_name, rest ) matches = filter(elements.collect{|element| element.previous_sibling}.uniq, rest ) end - return matches.uniq + matches.uniq end OPERAND_ = '((?=(?:(?!and|or).)*[^\s<>=])[^\s<>=]+)' @@ -200,15 +201,15 @@ def QuickPath::predicate( elements, path ) results << element end end - return filter( results, rest ) + filter( results, rest ) end def QuickPath::attribute( name ) - return Functions.node.attributes[name] if Functions.node.kind_of? Element + Functions.node.attributes[name] if Functions.node.kind_of? Element end def QuickPath::name() - return Functions.node.name if Functions.node.kind_of? Element + Functions.node.name if Functions.node.kind_of? Element end def QuickPath::method_missing( id, *args ) @@ -234,7 +235,7 @@ def QuickPath::function( elements, fname, rest ) results << element if Functions.pair[0] == res end end - return results + results end def QuickPath::parse_args( element, string ) diff --git a/lib/rexml/rexml.rb b/lib/rexml/rexml.rb index a653f028..bf3c0d32 100644 --- a/lib/rexml/rexml.rb +++ b/lib/rexml/rexml.rb @@ -31,7 +31,7 @@ module REXML COPYRIGHT = "Copyright © 2001-2008 Sean Russell " DATE = "2008/019" - VERSION = "3.4.1" + VERSION = "3.4.2" REVISION = "" Copyright = COPYRIGHT diff --git a/lib/rexml/security.rb b/lib/rexml/security.rb index 99b74607..e8e8c6b4 100644 --- a/lib/rexml/security.rb +++ b/lib/rexml/security.rb @@ -10,7 +10,7 @@ def self.entity_expansion_limit=( val ) # Get the entity expansion limit. By default the limit is set to 10000. def self.entity_expansion_limit - return @@entity_expansion_limit + @@entity_expansion_limit end @@entity_expansion_text_limit = 10_240 @@ -22,7 +22,7 @@ def self.entity_expansion_text_limit=( val ) # Get the entity expansion limit. By default the limit is set to 10240. def self.entity_expansion_text_limit - return @@entity_expansion_text_limit + @@entity_expansion_text_limit end end end diff --git a/lib/rexml/source.rb b/lib/rexml/source.rb index 5ba5ab12..3ec1141e 100644 --- a/lib/rexml/source.rb +++ b/lib/rexml/source.rb @@ -67,7 +67,7 @@ class Source module Private SCANNER_RESET_SIZE = 100000 PRE_DEFINED_TERM_PATTERNS = {} - pre_defined_terms = ["'", '"', "<"] + pre_defined_terms = ["'", '"', "<", "]]>"] if StringScanner::Version < "3.1.1" pre_defined_terms.each do |term| PRE_DEFINED_TERM_PATTERNS[term] = /#{Regexp.escape(term)}/ diff --git a/lib/rexml/text.rb b/lib/rexml/text.rb index 2bf480fb..8799d89d 100644 --- a/lib/rexml/text.rb +++ b/lib/rexml/text.rb @@ -104,16 +104,16 @@ def initialize(arg, respect_whitespace=false, parent=nil, raw=nil, @entity_filter = entity_filter if entity_filter clear_cache - Text.check(@string, illegal, doctype) if @raw + Text.check(@string, illegal) if @raw end def parent= parent super(parent) - Text.check(@string, NEEDS_A_SECOND_CHECK, doctype) if @raw and @parent + Text.check(@string, NEEDS_A_SECOND_CHECK) if @raw and @parent end # check for illegal characters - def Text.check string, pattern, doctype + def Text.check string, pattern, doctype = nil # illegal anywhere if !string.match?(VALID_XML_CHARS) @@ -177,7 +177,7 @@ def empty? def clone - return Text.new(self, true) + Text.new(self, true) end @@ -200,10 +200,7 @@ def <=>( other ) end def doctype - if @parent - doc = @parent.document - doc.doctype if doc - end + @parent&.document&.doctype end REFERENCE = /#{Entity::REFERENCE}/ @@ -264,10 +261,10 @@ def wrap(string, width, addnewline=false) # Recursively wrap string at width. return string if string.length <= width place = string.rindex(' ', width) # Position in string with last ' ' before cutoff - if addnewline then - return "\n" + string[0,place] + "\n" + wrap(string[place+1..-1], width) + if addnewline + "\n" + string[0,place] + "\n" + wrap(string[place+1..-1], width) else - return string[0,place] + "\n" + wrap(string[place+1..-1], width) + string[0,place] + "\n" + wrap(string[place+1..-1], width) end end @@ -280,14 +277,14 @@ def indent_text(string, level=1, style="\t", indentfirstline=true) new_string << new_line } new_string.strip! unless indentfirstline - return new_string + new_string end # == DEPRECATED # See REXML::Formatters # def write( writer, indent=-1, transitive=false, ie_hack=false ) - Kernel.warn("#{self.class.name}.write is deprecated. See REXML::Formatters", uplevel: 1) + Kernel.warn("#{self.class.name}#write is deprecated. See REXML::Formatters", uplevel: 1) formatter = if indent > -1 REXML::Formatters::Pretty.new( indent ) else @@ -299,9 +296,7 @@ def write( writer, indent=-1, transitive=false, ie_hack=false ) # FIXME # This probably won't work properly def xpath - path = @parent.xpath - path += "/text()" - return path + @parent.xpath + "/text()" end # Writes out text, substituting special characters beforehand. diff --git a/lib/rexml/validation/relaxng.rb b/lib/rexml/validation/relaxng.rb index f29a2c05..c6894dcb 100644 --- a/lib/rexml/validation/relaxng.rb +++ b/lib/rexml/validation/relaxng.rb @@ -157,16 +157,16 @@ def next( event ) if ( @events[@current].matches?(event) ) @current += 1 if @events[@current].nil? - return @previous.pop + @previous.pop elsif @events[@current].kind_of? State @current += 1 @events[@current-1].previous = self - return @events[@current-1] + @events[@current-1] else - return self + self end else - return nil + nil end end @@ -186,7 +186,7 @@ def inspect end def expected - return [@events[@current]] + [@events[@current]] end def <<( event ) @@ -244,7 +244,7 @@ def generate_event( event ) evt = :end_attribute end end - return Event.new( evt, arg ) + Event.new( evt, arg ) end end @@ -262,9 +262,10 @@ def next( event ) rv = super return rv if rv @prior = @previous.pop - return @prior.next( event ) + @prior.next( event ) + else + super end - super end def matches?(event) @@ -274,7 +275,7 @@ def matches?(event) def expected return [ @prior.expected, @events[0] ].flatten if @current == 0 - return [@events[@current]] + [@events[@current]] end end @@ -286,24 +287,24 @@ def next( event ) @current += 1 if @events[@current].nil? @current = 0 - return self + self elsif @events[@current].kind_of? State @current += 1 @events[@current-1].previous = self - return @events[@current-1] + @events[@current-1] else - return self + self end else @prior = @previous.pop return @prior.next( event ) if @current == 0 - return nil + nil end end def expected return [ @prior.expected, @events[0] ].flatten if @current == 0 - return [@events[@current]] + [@events[@current]] end end @@ -326,17 +327,17 @@ def next( event ) @ord += 1 if @events[@current].nil? @current = 0 - return self + self elsif @events[@current].kind_of? State @current += 1 @events[@current-1].previous = self - return @events[@current-1] + @events[@current-1] else - return self + self end else return @previous.pop.next( event ) if @current == 0 and @ord > 0 - return nil + nil end end @@ -347,9 +348,9 @@ def matches?( event ) def expected if @current == 0 and @ord > 0 - return [@previous[-1].expected, @events[0]].flatten + [@previous[-1].expected, @events[0]].flatten else - return [@events[@current]] + [@events[@current]] end end end @@ -403,7 +404,7 @@ def matches?( event ) def expected return [@events[@current]] if @events.size > 0 - return @choices.collect do |x| + @choices.collect do |x| if x[0].kind_of? State x[0].expected else @@ -490,16 +491,16 @@ def next( event ) @current += 1 if @events[@current].nil? return self unless @choices[@choice].nil? - return @previous.pop + @previous.pop elsif @events[@current].kind_of? State @current += 1 @events[@current-1].previous = self - return @events[@current-1] + @events[@current-1] else - return self + self end else - return nil + nil end end @@ -510,7 +511,7 @@ def matches?( event ) def expected return [@events[@current]] if @events[@current] - return @choices[@choice..-1].collect do |x| + @choices[@choice..-1].collect do |x| if x[0].kind_of? State x[0].expected else diff --git a/lib/rexml/validation/validation.rb b/lib/rexml/validation/validation.rb index 0ad6ada4..6475c628 100644 --- a/lib/rexml/validation/validation.rb +++ b/lib/rexml/validation/validation.rb @@ -80,26 +80,26 @@ def done? end def single? - return (@event_type != :start_element and @event_type != :start_attribute) + (@event_type != :start_element and @event_type != :start_attribute) end def matches?( event ) return false unless event[0] == @event_type case event[0] when nil - return true + true when :start_element - return true if event[1] == @event_arg + event[1] == @event_arg when :end_element - return true + true when :start_attribute - return true if event[1] == @event_arg + event[1] == @event_arg when :end_attribute - return true + true when :end_document - return true + true when :text - return (@event_arg.nil? or @event_arg == event[1]) + @event_arg.nil? || @event_arg == event[1] =begin when :processing_instruction false diff --git a/lib/rexml/xpath.rb b/lib/rexml/xpath.rb index a0921bd8..eed0300c 100644 --- a/lib/rexml/xpath.rb +++ b/lib/rexml/xpath.rb @@ -31,12 +31,7 @@ class XPath def XPath::first(element, path=nil, namespaces=nil, variables={}, options={}) raise "The namespaces argument, if supplied, must be a hash object." unless namespaces.nil? or namespaces.kind_of?(Hash) raise "The variables argument, if supplied, must be a hash object." unless variables.kind_of?(Hash) - parser = XPathParser.new(**options) - parser.namespaces = namespaces - parser.variables = variables - path = "*" unless path - element = [element] unless element.kind_of? Array - parser.parse(path, element).flatten[0] + match(element, path, namespaces, variables, options).flatten[0] end # Iterates over nodes that match the given path, calling the supplied @@ -60,12 +55,7 @@ def XPath::first(element, path=nil, namespaces=nil, variables={}, options={}) def XPath::each(element, path=nil, namespaces=nil, variables={}, options={}, &block) raise "The namespaces argument, if supplied, must be a hash object." unless namespaces.nil? or namespaces.kind_of?(Hash) raise "The variables argument, if supplied, must be a hash object." unless variables.kind_of?(Hash) - parser = XPathParser.new(**options) - parser.namespaces = namespaces - parser.variables = variables - path = "*" unless path - element = [element] unless element.kind_of? Array - parser.parse(path, element).each( &block ) + match(element, path, namespaces, variables, options).each( &block ) end # Returns an array of nodes matching a given XPath. @@ -74,7 +64,6 @@ def XPath::match(element, path=nil, namespaces=nil, variables={}, options={}) parser.namespaces = namespaces parser.variables = variables path = "*" unless path - element = [element] unless element.kind_of? Array parser.parse(path,element) end end diff --git a/lib/rexml/xpath_parser.rb b/lib/rexml/xpath_parser.rb index 5eb1e5a9..64c8846a 100644 --- a/lib/rexml/xpath_parser.rb +++ b/lib/rexml/xpath_parser.rb @@ -76,19 +76,32 @@ def variables=( vars={} ) @variables = vars end - def parse path, nodeset + def parse path, node path_stack = @parser.parse( path ) - match( path_stack, nodeset ) + if node.is_a?(Array) + Kernel.warn("REXML::XPath.each, REXML::XPath.first, REXML::XPath.match dropped support for nodeset...", uplevel: 1) + return [] if node.empty? + node = node.first + end + + document = node.document + if document + document.__send__(:enable_cache) do + match( path_stack, node ) + end + else + match( path_stack, node ) + end end - def get_first path, nodeset + def get_first path, node path_stack = @parser.parse( path ) - first( path_stack, nodeset ) + first( path_stack, node ) end - def predicate path, nodeset + def predicate path, node path_stack = @parser.parse( path ) - match( path_stack, nodeset ) + match( path_stack, node ) end def []=( variable_name, value ) @@ -106,7 +119,7 @@ def first( path_stack, node ) case path[0] when :document # do nothing - return first( path[1..-1], node ) + first( path[1..-1], node ) when :child for c in node.children r = first( path[1..-1], c ) @@ -116,9 +129,9 @@ def first( path_stack, node ) name = path[2] if node.name == name return node if path.size == 3 - return first( path[3..-1], node ) + first( path[3..-1], node ) else - return nil + nil end when :descendant_or_self r = first( path[1..-1], node ) @@ -128,23 +141,21 @@ def first( path_stack, node ) return r if r end when :node - return first( path[1..-1], node ) + first( path[1..-1], node ) when :any - return first( path[1..-1], node ) + first( path[1..-1], node ) + else + nil end - return nil end - def match(path_stack, nodeset) - nodeset = nodeset.collect.with_index do |node, i| - position = i + 1 - XPathNode.new(node, position: position) - end + def match(path_stack, node) + nodeset = [XPathNode.new(node, position: 1)] result = expr(path_stack, nodeset) case result when Array # nodeset - unnode(result) + unnode(result).uniq else [result] end @@ -162,10 +173,10 @@ def strict? # 2. If no mapping was supplied, use the context node to look up the namespace def get_namespace( node, prefix ) if @namespaces - return @namespaces[prefix] || '' + @namespaces[prefix] || '' else return node.namespace( prefix ) if node.node_type == :element - return '' + '' end end @@ -492,14 +503,10 @@ def node_test(path_stack, nodesets, any_type: :element) if strict? raw_node.name == name and raw_node.namespace == "" else - # FIXME: This DOUBLES the time XPath searches take - ns = get_namespace(raw_node, prefix) - raw_node.name == name and raw_node.namespace == ns + raw_node.name == name and raw_node.namespace == get_namespace(raw_node, prefix) end else - # FIXME: This DOUBLES the time XPath searches take - ns = get_namespace(raw_node, prefix) - raw_node.name == name and raw_node.namespace == ns + raw_node.name == name and raw_node.namespace == get_namespace(raw_node, prefix) end when :attribute if prefix.nil? @@ -507,9 +514,7 @@ def node_test(path_stack, nodesets, any_type: :element) elsif prefix.empty? raw_node.name == name and raw_node.namespace == "" else - # FIXME: This DOUBLES the time XPath searches take - ns = get_namespace(raw_node.element, prefix) - raw_node.name == name and raw_node.namespace == ns + raw_node.name == name and raw_node.namespace == get_namespace(raw_node.element, prefix) end else false @@ -671,7 +676,7 @@ def sort(array_of_nodes, order) if order == :forward index else - -index + index.map(&:-@) end end ordered.collect do |_index, node| @@ -758,22 +763,19 @@ def following(node) end def following_node_of( node ) - if node.kind_of? Element and node.children.size > 0 - return node.children[0] - end - return next_sibling_node(node) + return node.children[0] if node.kind_of?(Element) and node.children.size > 0 + + next_sibling_node(node) end def next_sibling_node(node) psn = node.next_sibling_node while psn.nil? - if node.parent.nil? or node.parent.class == Document - return nil - end + return nil if node.parent.nil? or node.parent.class == Document node = node.parent psn = node.next_sibling_node end - return psn + psn end def child(nodeset) @@ -806,13 +808,13 @@ def child(nodeset) def norm b case b when true, false - return b + b when 'true', 'false' - return Functions::boolean( b ) + Functions::boolean( b ) when /^\d+(\.\d+)?$/, Numeric - return Functions::number( b ) + Functions::number( b ) else - return Functions::string( b ) + Functions::string( b ) end end diff --git a/test/parse/test_cdata.rb b/test/parse/test_cdata.rb index b5f1a3bc..c742d6a1 100644 --- a/test/parse/test_cdata.rb +++ b/test/parse/test_cdata.rb @@ -7,10 +7,28 @@ module REXMLTests class TestParseCData < Test::Unit::TestCase include Test::Unit::CoreAssertions + def parse(xml) + REXML::Document.new(xml) + end + def test_linear_performance_gt seq = [10000, 50000, 100000, 150000, 200000] assert_linear_performance(seq, rehearsal: 10) do |n| - REXML::Document.new('" * n + ' ]]>') + parse('" * n + ' ]]>') + end + end + + class TestInvalid < self + def test_unclosed_cdata + exception = assert_raise(REXML::ParseException) do + parse("") + end + assert_equal(<<~DETAIL, exception.to_s) + Malformed CDATA: Missing end ']]>' + Line: 1 + Position: 25 + Last 80 unconsumed characters: + DETAIL end end end diff --git a/test/parse/test_comment.rb b/test/parse/test_comment.rb index 4475dca7..6339835d 100644 --- a/test/parse/test_comment.rb +++ b/test/parse/test_comment.rb @@ -17,7 +17,7 @@ def test_toplevel_unclosed_comment parse("' Line: 1 Position: 4 Last 80 unconsumed characters: @@ -48,6 +48,31 @@ def test_toplevel_malformed_comment_end DETAIL end + def test_doctype_malformed_node + exception = assert_raise(REXML::ParseException) do + parse("' + Line: 1 + Position: 19 + Last 80 unconsumed characters: + DETAIL + end + def test_doctype_malformed_comment_inner exception = assert_raise(REXML::ParseException) do parse("") @@ -72,16 +97,28 @@ def test_doctype_malformed_comment_end DETAIL end - def test_after_doctype_malformed_comment_short + def test_after_doctype_malformed_node exception = assert_raise(REXML::ParseException) do - parse("") + parse("") + end + assert_equal(<<~DETAIL, exception.to_s) + Unclosed comment: Missing end '-->' Line: 1 Position: 8 Last 80 unconsumed characters: - --> DETAIL end diff --git a/test/parser/test_xpath.rb b/test/parser/test_xpath.rb index 9143d25c..5d62afee 100644 --- a/test/parser/test_xpath.rb +++ b/test/parser/test_xpath.rb @@ -4,7 +4,7 @@ require "rexml/parsers/xpathparser" module REXMLTests - class TestXPathParser < Test::Unit::TestCase + class TestParserXPathParser < Test::Unit::TestCase sub_test_case("#abbreviate") do def abbreviate(xpath) parser = REXML::Parsers::XPathParser.new diff --git a/test/test_core.rb b/test/test_core.rb index 34fe9e07..651056f2 100644 --- a/test/test_core.rb +++ b/test/test_core.rb @@ -653,18 +653,23 @@ def test_namespace assert_equal "Some text", out end - def test_add_namespace e = Element.new 'a' + assert_equal("", e.namespace) + assert_nil(e.namespace('foo')) e.add_namespace 'someuri' e.add_namespace 'foo', 'otheruri' e.add_namespace 'xmlns:bar', 'thirduri' - assert_equal 'someuri', e.attributes['xmlns'] - assert_equal 'otheruri', e.attributes['xmlns:foo'] - assert_equal 'thirduri', e.attributes['xmlns:bar'] + assert_equal("someuri", e.namespace) + assert_equal("otheruri", e.namespace('foo')) + assert_equal("otheruri", e.namespace('xmlns:foo')) + assert_equal("thirduri", e.namespace('bar')) + assert_equal("thirduri", e.namespace('xmlns:bar')) + assert_equal('someuri', e.attributes['xmlns']) + assert_equal('otheruri', e.attributes['xmlns:foo']) + assert_equal('thirduri', e.attributes['xmlns:bar']) end - def test_big_documentation d = File.open(fixture_path("documentation.xml")) {|f| Document.new f } assert_equal "Sean Russell", d.elements["documentation/head/author"].text.tr("\n\t", " ").squeeze(" ") @@ -764,9 +769,15 @@ def test_attributes_each def test_delete_namespace doc = Document.new "" + assert_equal("1", doc.root.namespace) + assert_equal("2", doc.root.namespace('x')) + assert_equal("2", doc.root.namespace('xmlns:x')) doc.root.delete_namespace doc.root.delete_namespace 'x' - assert_equal "", doc.to_s + assert_equal("", doc.to_s) + assert_equal("", doc.root.namespace) + assert_nil(doc.root.namespace('x')) + assert_nil(doc.root.namespace('xmlns:x')) end def test_each_element_with_attribute diff --git a/test/test_jaxen.rb b/test/test_jaxen.rb index 6038e88e..548120d6 100644 --- a/test/test_jaxen.rb +++ b/test/test_jaxen.rb @@ -56,7 +56,9 @@ def process_test_case(name) # processes a tests/document/context node def process_context(doc, context) - test_context = XPath.match(doc, context.attributes["select"]) + matched = XPath.match(doc, context.attributes["select"]) + assert_equal(1, matched.size) + test_context = matched.first namespaces = context.namespaces namespaces.delete("var") namespaces = nil if namespaces.empty? @@ -101,10 +103,14 @@ def process_nominal_test(context, variables, namespaces, test) assert_equal(Integer(expected, 10), matched.size, user_message(context, xpath, matched)) + else + assert_operator(matched.size, :>, 0, user_message(context, xpath, matched)) end XPath.each(test, "valueOf") do |value_of| - process_value_of(matched, variables, namespaces, value_of) + matched.each do |subcontext| + process_value_of(subcontext, variables, namespaces, value_of) + end end end @@ -118,10 +124,8 @@ def process_exceptional_test(context, variables, namespaces, test) def user_message(context, xpath, matched) message = "" - context.each_with_index do |node, i| - message << "Node#{i}:\n" - message << "#{node}\n" - end + message << "Node:\n" + message << "#{context}\n" message << "XPath: <#{xpath}>\n" message << "Matched <#{matched}>" message diff --git a/test/test_source.rb b/test/test_source.rb new file mode 100644 index 00000000..86755f37 --- /dev/null +++ b/test/test_source.rb @@ -0,0 +1,36 @@ +require "rexml/source" + +module REXMLTests + class TestSource < Test::Unit::TestCase + def setup + @source = REXML::Source.new(+"") + end + + sub_test_case("#encoding=") do + test("String") do + @source.encoding = "UTF-8" + assert_equal("UTF-8", @source.encoding) + end + + test("encoding_updated") do + def @source.n_encoding_updated_called + @n_encoding_updated_called + end + def @source.encoding_updated + super + @n_encoding_updated_called ||= 0 + @n_encoding_updated_called += 1 + end + @source.encoding = "shift-jis" + assert_equal(1, @source.n_encoding_updated_called) + @source.encoding = "Shift-JIS" + assert_equal(1, @source.n_encoding_updated_called) + end + + test("Encoding") do + @source.encoding = Encoding::UTF_8 + assert_equal("UTF-8", @source.encoding) + end + end + end +end diff --git a/test/test_text_check.rb b/test/test_text_check.rb index 11cf65a3..3f2f7864 100644 --- a/test/test_text_check.rb +++ b/test/test_text_check.rb @@ -4,7 +4,7 @@ module REXMLTests class TextCheckTester < Test::Unit::TestCase def check(string) - REXML::Text.check(string, REXML::Text::NEEDS_A_SECOND_CHECK, nil) + REXML::Text.check(string, REXML::Text::NEEDS_A_SECOND_CHECK) end def assert_check(string) diff --git a/test/test_xpath_parser.rb b/test/test_xpath_parser.rb new file mode 100644 index 00000000..bcb14c34 --- /dev/null +++ b/test/test_xpath_parser.rb @@ -0,0 +1,35 @@ +# frozen_string_literal: true + +module REXMLTests + class TestXPathParser < Test::Unit::TestCase + def setup + @root_element = make_service_element(["urn:type1", "urn:type2"], ["http://uri"]) + @element = @root_element.children[0] + @parser = REXML::XPathParser.new + end + + def make_service_element(types, uris) + root_element = REXML::Element.new + element = root_element.add_element("Service") + types.each do |type_text| + element.add_element("Type").text = type_text + end + uris.each do |uri_text| + element.add_element("URI").text = uri_text + end + root_element + end + + def test_found + res = @parser.parse("/Service", @root_element) + assert_equal([@element], + res) + end + + def test_not_found + res = @parser.parse("/nonexistent", @root_element) + assert_equal([], + res) + end + end +end diff --git a/test/xpath/test_base.rb b/test/xpath/test_base.rb index 1dacd69d..764171ab 100644 --- a/test/xpath/test_base.rb +++ b/test/xpath/test_base.rb @@ -411,9 +411,107 @@ def test_preceding s = "" d = REXML::Document.new(s) - c = REXML::XPath.match( d, "//c[@id = '5']") - cs = REXML::XPath.match( c, "preceding::c" ) - assert_equal( 4, cs.length ) + c = REXML::XPath.match(d, "//c[@id = '5']") + assert_equal(1, c.length) + cs = REXML::XPath.match(c.first, "preceding::c") + assert_equal(4, cs.length) + end + + def test_preceding_multiple + source = <<-XML + + + + XML + doc = REXML::Document.new(source) + matches = REXML::XPath.match(doc, "a/d/preceding::*") + assert_equal(["d", "c", "b"], matches.map(&:name)) + end + + def test_following_multiple + source = <<-XML + + + + XML + doc = REXML::Document.new(source) + matches = REXML::XPath.match(doc, "a/d/following::*") + assert_equal(["d", "e", "f"], matches.map(&:name)) + end + + def test_following_sibling_across_multiple_nodes + source = <<-XML + + + + + + + + + XML + doc = REXML::Document.new(source) + matches = REXML::XPath.match(doc, "a/b/x/following-sibling::*") + assert_equal(["c", "d", "e"], matches.map(&:name)) + end + + def test_following_sibling_within_single_node + source = <<-XML + + + + + + XML + doc = REXML::Document.new(source) + matches = REXML::XPath.match(doc, "a/b/x/following-sibling::*") + assert_equal(["c", "d", "x", "e"], matches.map(&:name)) + end + + def test_following_sibling_predicates + source = <<-XML +
+
+ +
+ + + +
+ XML + doc = REXML::Document.new(source) + # Finds a node flowing + matches = REXML::XPath.match(doc, "//a/following-sibling::*[1]") + assert_equal(["w", "x", "y", "z"], matches.map(&:name)) + end + + def test_preceding_sibling_across_multiple_nodes + source = <<-XML + + + + + + + + + XML + doc = REXML::Document.new(source) + matches = REXML::XPath.match(doc, "a/b/x/preceding-sibling::*") + assert_equal(["e", "d", "c"], matches.map(&:name)) + end + + def test_preceding_sibling_within_single_node + source = <<-XML + + + + + + XML + doc = REXML::Document.new(source) + matches = REXML::XPath.match(doc, "a/b/x/preceding-sibling::*") + assert_equal(["e", "x", "d", "c"], matches.map(&:name)) end def test_following @@ -1095,6 +1193,16 @@ def test_namespaces_0 assert_equal( 1, XPath.match( d, "//x:*" ).size ) end + def test_namespaces_cache + doc = Document.new("") + assert_equal("", XPath.first(doc, "//b[namespace-uri()='1']").to_s) + assert_nil(XPath.first(doc, "//b[namespace-uri()='']")) + + doc.root.delete_namespace + assert_nil(XPath.first(doc, "//b[namespace-uri()='1']")) + assert_equal("", XPath.first(doc, "//b[namespace-uri()='']").to_s) + end + def test_ticket_71 doc = Document.new(%Q{}) el = doc.root.elements[1] @@ -1158,5 +1266,15 @@ def test_or_and end assert_equal(["/"], hrefs, "Bug #3842 [ruby-core:32447]") end + + def test_match_with_deprecated_usage + verbose, $VERBOSE = $VERBOSE, nil + doc = Document.new("") + assert_equal(['b'], XPath.match([doc, doc], '//b').map(&:name)) + assert_equal(['b'], XPath.match([doc], '//b').map(&:name)) + assert_equal([], XPath.match([], '//b').map(&:name)) + ensure + $VERBOSE = verbose + end end end