diff --git a/.github/dependabot.yml b/.github/dependabot.yml
new file mode 100644
index 00000000..b18fd293
--- /dev/null
+++ b/.github/dependabot.yml
@@ -0,0 +1,6 @@
+version: 2
+updates:
+ - package-ecosystem: 'github-actions'
+ directory: '/'
+ schedule:
+ interval: 'weekly'
diff --git a/.github/workflows/benchmark.yml b/.github/workflows/benchmark.yml
new file mode 100644
index 00000000..52349b44
--- /dev/null
+++ b/.github/workflows/benchmark.yml
@@ -0,0 +1,29 @@
+name: Benchmark
+
+on:
+ - push
+ - pull_request
+
+jobs:
+ benchmark:
+ name: "Benchmark: Ruby ${{ matrix.ruby-version }}: ${{ matrix.runs-on }}"
+ strategy:
+ fail-fast: false
+ matrix:
+ ruby-version:
+ - '3.3'
+ runs-on:
+ - ubuntu-latest
+ runs-on: ${{ matrix.runs-on }}
+ steps:
+ - uses: actions/checkout@v4
+ - uses: ruby/setup-ruby@v1
+ with:
+ ruby-version: ${{ matrix.ruby-version }}
+ - name: Install dependencies
+ run: |
+ bundle install
+ gem install rexml -v 3.2.6
+ - name: Benchmark
+ run: |
+ rake benchmark
diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml
new file mode 100644
index 00000000..20ff87e7
--- /dev/null
+++ b/.github/workflows/release.yml
@@ -0,0 +1,30 @@
+name: Release
+on:
+ push:
+ tags:
+ - "*"
+jobs:
+ github:
+ name: GitHub
+ runs-on: ubuntu-latest
+ timeout-minutes: 10
+ steps:
+ - uses: actions/checkout@v4
+ - name: Extract release note
+ run: |
+ ruby \
+ -e 'print("## REXML "); \
+ puts(ARGF.read.split(/^## /)[1]. \
+ gsub(/ {.+?}/, ""). \
+ gsub(/\[(.+?)\]\[.+?\]/) {$1})' \
+ NEWS.md > release-note.md
+ - name: Upload to release
+ run: |
+ title=$(head -n1 release-note.md | sed -e 's/^## //')
+ tail -n +2 release-note.md > release-note-without-version.md
+ gh release create ${GITHUB_REF_NAME} \
+ --discussion-category Announcements \
+ --notes-file release-note-without-version.md \
+ --title "${title}"
+ env:
+ GH_TOKEN: ${{ github.token }}
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index 65a3bffd..0bd43457 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -3,7 +3,14 @@ on:
- push
- pull_request
jobs:
+ ruby-versions-inplace:
+ uses: ruby/actions/.github/workflows/ruby_versions.yml@master
+ with:
+ engine: cruby-jruby
+ min_version: 2.5
+
inplace:
+ needs: ruby-versions-inplace
name: "Inplace: ${{ matrix.ruby-version }} on ${{ matrix.runs-on }}"
runs-on: ${{ matrix.runs-on }}
strategy:
@@ -13,16 +20,14 @@ jobs:
- ubuntu-latest
- macos-latest
- windows-latest
- ruby-version:
- - "2.5"
- - "2.6"
- - "2.7"
- - jruby
+ ruby-version: ${{ fromJson(needs.ruby-versions-inplace.outputs.versions) }}
+ exclude:
+ - {runs-on: macos-latest, ruby-version: 2.5}
# include:
# - runs-on: ubuntu-latest
# ruby-version: truffleruby
steps:
- - uses: actions/checkout@v2
+ - uses: actions/checkout@v4
- uses: ruby/setup-ruby@v1
with:
ruby-version: ${{ matrix.ruby-version }}
@@ -30,7 +35,26 @@ jobs:
- name: Test
run: bundle exec rake test
+ frozen-string-literal:
+ name: frozen-string-literal
+ runs-on: ubuntu-latest
+ steps:
+ - uses: actions/checkout@v4
+ - uses: ruby/setup-ruby@v1
+ with:
+ ruby-version: ruby
+ bundler-cache: true
+ - name: Test
+ run: bundle exec rake test RUBYOPT="--enable-frozen-string-literal"
+
+ ruby-versions-gems:
+ uses: ruby/actions/.github/workflows/ruby_versions.yml@master
+ with:
+ engine: cruby-jruby
+ min_version: 2.6 # REXML is a default gem since Ruby 2.6
+
gem:
+ needs: ruby-versions-gems
name: "Gem: ${{ matrix.ruby-version }} on ${{ matrix.runs-on }}"
runs-on: ${{ matrix.runs-on }}
strategy:
@@ -40,17 +64,26 @@ jobs:
- ubuntu-latest
- macos-latest
- windows-latest
- ruby-version:
- - "3.0"
- - head
+ ruby-version: ${{ fromJson(needs.ruby-versions-gems.outputs.versions) }}
steps:
- - uses: actions/checkout@v2
+ - uses: actions/checkout@v4
- uses: ruby/setup-ruby@v1
with:
ruby-version: ${{ matrix.ruby-version }}
- name: Install as gem
run: |
rake install
+ - name: Install test dependencies on non-Windows
+ if: matrix.runs-on != 'windows-latest'
+ run: |
+ for gem in $(ruby -e 'puts ARGF.read[/^group :test do(.*)^end/m, 1].scan(/"(.+?)"/)' Gemfile); do
+ gem install ${gem}
+ done
+ - name: Install test dependencies on Windows
+ if: matrix.runs-on == 'windows-latest'
+ run: |
+ gem install test-unit
+ gem install test-unit-ruby-core
- name: Test
run: |
ruby -run -e mkdir -- tmp
@@ -62,17 +95,17 @@ jobs:
name: "Document"
runs-on: ubuntu-latest
steps:
- - uses: actions/checkout@v2
+ - uses: actions/checkout@v4
- uses: ruby/setup-ruby@v1
with:
- ruby-version: 2.7
+ ruby-version: ruby
- name: Install dependencies
run: |
bundle install
- name: Build document
run: |
bundle exec rake warning:error rdoc
- - uses: actions/checkout@v2
+ - uses: actions/checkout@v4
if: |
github.event_name == 'push'
with:
diff --git a/Gemfile b/Gemfile
index 54da2c0c..67f21dfb 100644
--- a/Gemfile
+++ b/Gemfile
@@ -4,3 +4,17 @@ git_source(:github) {|repo_name| "https://github.com/#{repo_name}" }
# Specify your gem's dependencies in rexml.gemspec
gemspec
+
+group :development do
+ gem "bundler"
+ gem "rake"
+end
+
+group :benchmark do
+ gem "benchmark_driver"
+end
+
+group :test do
+ gem "test-unit"
+ gem "test-unit-ruby-core"
+end
diff --git a/NEWS.md b/NEWS.md
index 84bbde2d..72318b7f 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -1,15 +1,349 @@
# News
+## 3.3.3 - 2024-08-01 {#version-3-3-3}
+
+### Improvements
+
+ * Added support for detecting invalid XML that has unsupported
+ content before root element
+ * GH-184
+ * Patch by NAITOH Jun.
+
+ * Added support for `REXML::Security.entity_expansion_limit=` and
+ `REXML::Security.entity_expansion_text_limit=` in SAX2 and pull
+ parsers
+ * GH-187
+ * Patch by NAITOH Jun.
+
+ * Added more tests for invalid XMLs.
+ * GH-183
+ * Patch by Watson.
+
+ * Added more performance tests.
+ * Patch by Watson.
+
+ * Improved parse performance.
+ * GH-186
+ * Patch by tomoya ishida.
+
+### Thanks
+
+ * NAITOH Jun
+
+ * Watson
+
+ * tomoya ishida
+
+## 3.3.2 - 2024-07-16 {#version-3-3-2}
+
+### Improvements
+
+ * Improved parse performance.
+ * GH-160
+ * Patch by NAITOH Jun.
+
+ * Improved parse performance.
+ * GH-169
+ * GH-170
+ * GH-171
+ * GH-172
+ * GH-173
+ * GH-174
+ * GH-175
+ * GH-176
+ * GH-177
+ * Patch by Watson.
+
+ * Added support for raising a parse exception when an XML has extra
+ content after the root element.
+ * GH-161
+ * Patch by NAITOH Jun.
+
+ * Added support for raising a parse exception when an XML
+ declaration exists in wrong position.
+ * GH-162
+ * Patch by NAITOH Jun.
+
+ * Removed needless a space after XML declaration in pretty print mode.
+ * GH-164
+ * Patch by NAITOH Jun.
+
+ * Stopped to emit `:text` event after the root element.
+ * GH-167
+ * Patch by NAITOH Jun.
+
+### Fixes
+
+ * Fixed a bug that SAX2 parser doesn't expand predefined entities for
+ `characters` callback.
+ * GH-168
+ * Patch by NAITOH Jun.
+
+### Thanks
+
+ * NAITOH Jun
+
+ * Watson
+
+## 3.3.1 - 2024-06-25 {#version-3-3-1}
+
+### Improvements
+
+ * Added support for detecting malformed top-level comments.
+ * GH-145
+ * Patch by Hiroya Fujinami.
+
+ * Improved `REXML::Element#attribute` performance.
+ * GH-146
+ * Patch by Hiroya Fujinami.
+
+ * Added support for detecting malformed `` comments.
+ * GH-147
+ * Patch by Hiroya Fujinami.
+
+ * Added support for detecting unclosed `DOCTYPE`.
+ * GH-152
+ * Patch by Hiroya Fujinami.
+
+ * Added `changlog_uri` metadata to gemspec.
+ * GH-156
+ * Patch by fynsta.
+
+ * Improved parse performance.
+ * GH-157
+ * GH-158
+ * Patch by NAITOH Jun.
+
+### Fixes
+
+ * Fixed a bug that large XML can't be parsed.
+ * GH-154
+ * Patch by NAITOH Jun.
+
+ * Fixed a bug that private constants are visible.
+ * GH-155
+ * Patch by NAITOH Jun.
+
+### Thanks
+
+ * Hiroya Fujinami
+
+ * NAITOH Jun
+
+ * fynsta
+
+## 3.3.0 - 2024-06-11 {#version-3-3-0}
+
+### Improvements
+
+ * Added support for strscan 0.7.0 installed with Ruby 2.6.
+ * GH-142
+ * Reported by Fernando Trigoso.
+
+### Thanks
+
+ * Fernando Trigoso
+
+## 3.2.9 - 2024-06-09 {#version-3-2-9}
+
+### Improvements
+
+ * Added support for old strscan.
+ * GH-132
+ * Reported by Adam.
+
+ * Improved attribute value parse performance.
+ * GH-135
+ * Patch by NAITOH Jun.
+
+ * Improved `REXML::Node#each_recursive` performance.
+ * GH-134
+ * GH-139
+ * Patch by Hiroya Fujinami.
+
+ * Improved text parse performance.
+ * Reported by mprogrammer.
+
+### Thanks
+
+ * Adam
+ * NAITOH Jun
+ * Hiroya Fujinami
+ * mprogrammer
+
+## 3.2.8 - 2024-05-16 {#version-3-2-8}
+
+### Fixes
+
+ * Suppressed a warning
+
+## 3.2.7 - 2024-05-16 {#version-3-2-7}
+
+### Improvements
+
+ * Improve parse performance by using `StringScanner`.
+
+ * GH-106
+ * GH-107
+ * GH-108
+ * GH-109
+ * GH-112
+ * GH-113
+ * GH-114
+ * GH-115
+ * GH-116
+ * GH-117
+ * GH-118
+ * GH-119
+ * GH-121
+
+ * Patch by NAITOH Jun.
+
+ * Improved parse performance when an attribute has many `<`s.
+
+ * GH-126
+
+### Fixes
+
+ * XPath: Fixed a bug of `normalize_space(array)`.
+
+ * GH-110
+ * GH-111
+
+ * Patch by flatisland.
+
+ * XPath: Fixed a bug that wrong position is used with nested path.
+
+ * GH-110
+ * GH-122
+
+ * Reported by jcavalieri.
+ * Patch by NAITOH Jun.
+
+ * Fixed a bug that an exception message can't be generated for
+ invalid encoding XML.
+
+ * GH-29
+ * GH-123
+
+ * Reported by DuKewu.
+ * Patch by NAITOH Jun.
+
+### Thanks
+
+ * NAITOH Jun
+ * flatisland
+ * jcavalieri
+ * DuKewu
+
+## 3.2.6 - 2023-07-27 {#version-3-2-6}
+
+### Improvements
+
+ * Required Ruby 2.5 or later explicitly.
+ [GH-69][gh-69]
+ [Patch by Ivo Anjo]
+
+ * Added documentation for maintenance cycle.
+ [GH-71][gh-71]
+ [Patch by Ivo Anjo]
+
+ * Added tutorial.
+ [GH-77][gh-77]
+ [GH-78][gh-78]
+ [Patch by Burdette Lamar]
+
+ * Improved performance and memory usage.
+ [GH-94][gh-94]
+ [Patch by fatkodima]
+
+ * `REXML::Parsers::XPathParser#abbreviate`: Added support for
+ function arguments.
+ [GH-95][gh-95]
+ [Reported by pulver]
+
+ * `REXML::Parsers::XPathParser#abbreviate`: Added support for string
+ literal that contains double-quote.
+ [GH-96][gh-96]
+ [Patch by pulver]
+
+ * `REXML::Parsers::XPathParser#abbreviate`: Added missing `/` to
+ `:descendant_or_self/:self/:parent`.
+ [GH-97][gh-97]
+ [Reported by pulver]
+
+ * `REXML::Parsers::XPathParser#abbreviate`: Added support for more patterns.
+ [GH-97][gh-97]
+ [Reported by pulver]
+
+### Fixes
+
+ * Fixed a typo in NEWS.
+ [GH-72][gh-72]
+ [Patch by Spencer Goodman]
+
+ * Fixed a typo in NEWS.
+ [GH-75][gh-75]
+ [Patch by Andrew Bromwich]
+
+ * Fixed documents.
+ [GH-87][gh-87]
+ [Patch by Alexander Ilyin]
+
+ * Fixed a bug that `Attriute` convert `'` and `'` even when
+ `attribute_quote: :quote` is used.
+ [GH-92][gh-92]
+ [Reported by Edouard Brière]
+
+ * Fixed links in tutorial.
+ [GH-99][gh-99]
+ [Patch by gemmaro]
+
+
+### Thanks
+
+ * Ivo Anjo
+
+ * Spencer Goodman
+
+ * Andrew Bromwich
+
+ * Burdette Lamar
+
+ * Alexander Ilyin
+
+ * Edouard Brière
+
+ * fatkodima
+
+ * pulver
+
+ * gemmaro
+
+[gh-69]:https://github.com/ruby/rexml/issues/69
+[gh-71]:https://github.com/ruby/rexml/issues/71
+[gh-72]:https://github.com/ruby/rexml/issues/72
+[gh-75]:https://github.com/ruby/rexml/issues/75
+[gh-77]:https://github.com/ruby/rexml/issues/77
+[gh-87]:https://github.com/ruby/rexml/issues/87
+[gh-92]:https://github.com/ruby/rexml/issues/92
+[gh-94]:https://github.com/ruby/rexml/issues/94
+[gh-95]:https://github.com/ruby/rexml/issues/95
+[gh-96]:https://github.com/ruby/rexml/issues/96
+[gh-97]:https://github.com/ruby/rexml/issues/97
+[gh-98]:https://github.com/ruby/rexml/issues/98
+[gh-99]:https://github.com/ruby/rexml/issues/99
+
## 3.2.5 - 2021-04-05 {#version-3-2-5}
### Improvements
* Add more validations to XPath parser.
- * `require "rexml/docuemnt"` by default.
+ * `require "rexml/document"` by default.
[GitHub#36][Patch by Koichi ITO]
- * Don't add `#dcloe` method to core classes globally.
+ * Don't add `#dclone` method to core classes globally.
[GitHub#37][Patch by Akira Matsuda]
* Add more documentations.
diff --git a/README.md b/README.md
index 27da0e49..e8ab5082 100644
--- a/README.md
+++ b/README.md
@@ -6,7 +6,7 @@ REXML supports both tree and stream document parsing. Stream parsing is faster (
## API
-See the {API documentation}[https://ruby.github.io/rexml/]
+See the [API documentation](https://ruby.github.io/rexml/).
## Usage
@@ -33,6 +33,15 @@ doc = Document.new string
So parsing a string is just as easy as parsing a file.
+## Support
+
+REXML support follows the same maintenance cycle as Ruby releases, as shown on .
+
+If you are running on an end-of-life Ruby, do not expect modern REXML releases to be compatible with it; in fact, it's recommended that you DO NOT use this gem, and instead use the REXML version that came bundled with your end-of-life Ruby version.
+
+The `required_ruby_version` on the gemspec is kept updated on a [best-effort basis](https://github.com/ruby/rexml/pull/70) by the community.
+Up to version 3.2.5, this information was not set. That version [is known broken with at least Ruby < 2.3](https://github.com/ruby/rexml/issues/69).
+
## Development
After checking out the repo, run `rake test` to run the tests.
diff --git a/Rakefile b/Rakefile
index 7143e754..76a56296 100644
--- a/Rakefile
+++ b/Rakefile
@@ -28,3 +28,42 @@ RDoc::Task.new do |rdoc|
end
load "#{__dir__}/tasks/tocs.rake"
+
+benchmark_tasks = []
+namespace :benchmark do
+ Dir.glob("benchmark/*.yaml").sort.each do |yaml|
+ name = File.basename(yaml, ".*")
+ env = {
+ "RUBYLIB" => nil,
+ "BUNDLER_ORIG_RUBYLIB" => nil,
+ }
+ command_line = [
+ RbConfig.ruby, "-v", "-S", "benchmark-driver", File.expand_path(yaml),
+ ]
+
+ desc "Run #{name} benchmark"
+ task name do
+ puts("```")
+ sh(env, *command_line)
+ puts("```")
+ end
+ benchmark_tasks << "benchmark:#{name}"
+
+ case name
+ when /\Aparse/
+ namespace name do
+ desc "Run #{name} benchmark: small"
+ task :small do
+ puts("```")
+ sh(env.merge("N_ELEMENTS" => "500", "N_ATTRIBUTES" => "1"),
+ *command_line)
+ puts("```")
+ end
+ benchmark_tasks << "benchmark:#{name}:small"
+ end
+ end
+ end
+end
+
+desc "Run all benchmarks"
+task :benchmark => benchmark_tasks
diff --git a/benchmark/attribute.yaml b/benchmark/attribute.yaml
new file mode 100644
index 00000000..5dd7fded
--- /dev/null
+++ b/benchmark/attribute.yaml
@@ -0,0 +1,38 @@
+loop_count: 1000
+contexts:
+ - gems:
+ rexml: 3.2.6
+ require: false
+ prelude: require 'rexml'
+ - name: master
+ prelude: |
+ $LOAD_PATH.unshift(File.expand_path("lib"))
+ require 'rexml'
+ - name: 3.2.6(YJIT)
+ gems:
+ rexml: 3.2.6
+ require: false
+ prelude: |
+ require 'rexml'
+ RubyVM::YJIT.enable
+ - name: master(YJIT)
+ prelude: |
+ $LOAD_PATH.unshift(File.expand_path("lib"))
+ require 'rexml'
+ RubyVM::YJIT.enable
+
+prelude: |
+ require 'rexml/document'
+
+ xml_source = ""
+ 100.times do
+ xml_source = "#{xml_source}"
+ end
+ xml_source = "#{xml_source}"
+
+ document = REXML::Document.new(xml_source)
+ deepest_node = document.elements["//deepest"]
+
+benchmark:
+ with_ns: deepest_node.attribute("with_ns", "xyz")
+ without_ns: deepest_node.attribute("without_ns")
diff --git a/benchmark/each_recursive.yaml b/benchmark/each_recursive.yaml
new file mode 100644
index 00000000..c745f8ce
--- /dev/null
+++ b/benchmark/each_recursive.yaml
@@ -0,0 +1,40 @@
+loop_count: 100
+contexts:
+ - gems:
+ rexml: 3.2.6
+ require: false
+ prelude: require 'rexml'
+ - name: master
+ prelude: |
+ $LOAD_PATH.unshift(File.expand_path("lib"))
+ require 'rexml'
+ - name: 3.2.6(YJIT)
+ gems:
+ rexml: 3.2.6
+ require: false
+ prelude: |
+ require 'rexml'
+ RubyVM::YJIT.enable
+ - name: master(YJIT)
+ prelude: |
+ $LOAD_PATH.unshift(File.expand_path("lib"))
+ require 'rexml'
+ RubyVM::YJIT.enable
+
+prelude: |
+ require 'rexml/document'
+
+ xml_source = +""
+ 100.times do
+ x_node_source = ""
+ 100.times do
+ x_node_source = "#{x_node_source}"
+ end
+ xml_source << x_node_source
+ end
+ xml_source << ""
+
+ document = REXML::Document.new(xml_source)
+
+benchmark:
+ each_recursive: document.each_recursive { |_| }
diff --git a/benchmark/gt.yaml b/benchmark/gt.yaml
new file mode 100644
index 00000000..3f6af739
--- /dev/null
+++ b/benchmark/gt.yaml
@@ -0,0 +1,34 @@
+loop_count: 10
+contexts:
+ - gems:
+ rexml: 3.2.6
+ require: false
+ prelude: require "rexml"
+ - name: master
+ prelude: |
+ $LOAD_PATH.unshift(File.expand_path("lib"))
+ require "rexml"
+ - name: 3.2.6(YJIT)
+ gems:
+ rexml: 3.2.6
+ require: false
+ prelude: |
+ require "rexml"
+ RubyVM::YJIT.enable
+ - name: master(YJIT)
+ prelude: |
+ $LOAD_PATH.unshift(File.expand_path("lib"))
+ require "rexml"
+ RubyVM::YJIT.enable
+
+prelude: |
+ require "rexml/document"
+
+ n = 10000
+ gts = ">" * n
+ in_attribute = ""
+ in_text = "#{gts}"
+
+benchmark:
+ "attribute": REXML::Document.new(in_attribute)
+ "text": REXML::Document.new(in_text)
diff --git a/benchmark/parse.yaml b/benchmark/parse.yaml
new file mode 100644
index 00000000..f2c7d336
--- /dev/null
+++ b/benchmark/parse.yaml
@@ -0,0 +1,57 @@
+loop_count: 100
+contexts:
+ - gems:
+ rexml: 3.2.6
+ require: false
+ prelude: require 'rexml'
+ - name: master
+ prelude: |
+ $LOAD_PATH.unshift(File.expand_path("lib"))
+ require 'rexml'
+ - name: 3.2.6(YJIT)
+ gems:
+ rexml: 3.2.6
+ require: false
+ prelude: |
+ require 'rexml'
+ RubyVM::YJIT.enable
+ - name: master(YJIT)
+ prelude: |
+ $LOAD_PATH.unshift(File.expand_path("lib"))
+ require 'rexml'
+ RubyVM::YJIT.enable
+
+prelude: |
+ require 'rexml/document'
+ require 'rexml/parsers/sax2parser'
+ require 'rexml/parsers/pullparser'
+ require 'rexml/parsers/streamparser'
+ require 'rexml/streamlistener'
+
+ n_elements = Integer(ENV.fetch("N_ELEMENTS", "5000"), 10)
+ n_attributes = Integer(ENV.fetch("N_ATTRIBUTES", "2"), 10)
+
+ def build_xml(n_elements, n_attributes)
+ xml = ''
+ n_elements.times do |i|
+ xml << ''
+ end
+ xml << ''
+ end
+ xml = build_xml(n_elements, n_attributes)
+
+ class Listener
+ include REXML::StreamListener
+ end
+
+benchmark:
+ 'dom' : REXML::Document.new(xml)
+ 'sax' : REXML::Parsers::SAX2Parser.new(xml).parse
+ 'pull' : |
+ parser = REXML::Parsers::PullParser.new(xml)
+ while parser.has_next?
+ parser.pull
+ end
+ 'stream' : REXML::Parsers::StreamParser.new(xml, Listener.new).parse
diff --git a/doc/rexml/tasks/rdoc/element.rdoc b/doc/rexml/tasks/rdoc/element.rdoc
index f229275f..4b3609b0 100644
--- a/doc/rexml/tasks/rdoc/element.rdoc
+++ b/doc/rexml/tasks/rdoc/element.rdoc
@@ -369,7 +369,7 @@ to retrieve the first text node in a specified element:
Use method
{Element#has_text?}[../../../../REXML/Element.html#method-i-has_text-3F]
-to determine whethe the element has text:
+to determine whether the element has text:
e = REXML::Element.new('foo')
e.has_text? # => false
@@ -486,7 +486,7 @@ to remove a specific namespace from the element:
Use method
{Element#namespace}[../../../../REXML/Element.html#method-i-namespace]
-to retrieve a speficic namespace URI for the element:
+to retrieve a specific namespace URI for the element:
xml_string = <<-EOT
diff --git a/doc/rexml/tutorial.rdoc b/doc/rexml/tutorial.rdoc
new file mode 100644
index 00000000..c85a70d0
--- /dev/null
+++ b/doc/rexml/tutorial.rdoc
@@ -0,0 +1,1358 @@
+= \REXML Tutorial
+
+== Why \REXML?
+
+- Ruby's \REXML library is part of the Ruby distribution,
+ so using it requires no gem installations.
+- \REXML is fully maintained.
+- \REXML is mature, having been in use for long years.
+
+== To Include, or Not to Include?
+
+REXML is a module.
+To use it, you must require it:
+
+ require 'rexml' # => true
+
+If you do not also include it, you must fully qualify references to REXML:
+
+ REXML::Document # => REXML::Document
+
+If you also include the module, you may optionally omit REXML:::
+
+ include REXML
+ Document # => REXML::Document
+ REXML::Document # => REXML::Document
+
+== Preliminaries
+
+All examples here assume that the following code has been executed:
+
+ require 'rexml'
+ include REXML
+
+The source XML for many examples here is from file
+{books.xml}[https://www.w3schools.com/xml/books.xml] at w3schools.com.
+You may find it convenient to open that page in a new tab
+(Ctrl-click in some browsers).
+
+Note that your browser may display the XML with modified whitespace
+and without the XML declaration, which in this case is:
+
+
+
+For convenience, we capture the XML into a string variable:
+
+ require 'open-uri'
+ source_string = URI.open('https://www.w3schools.com/xml/books.xml').read
+
+And into a file:
+
+ File.write('source_file.xml', source_string)
+
+Throughout these examples, variable +doc+ will hold only the document
+derived from these sources:
+
+ doc = Document.new(source_string)
+
+== Parsing \XML \Source
+
+=== Parsing a Document
+
+Use method REXML::Document::new to parse XML source.
+
+The source may be a string:
+
+ doc = Document.new(source_string)
+
+Or an \IO stream:
+
+ doc = File.open('source_file.xml', 'r') do |io|
+ Document.new(io)
+ end
+
+Method URI.open returns a StringIO object,
+so the source can be from a web page:
+
+ require 'open-uri'
+ io = URI.open("https://www.w3schools.com/xml/books.xml")
+ io.class # => StringIO
+ doc = Document.new(io)
+
+For any of these sources, the returned object is an REXML::Document:
+
+ doc # => ... >
+ doc.class # => REXML::Document
+
+Note: 'UNDEFINED' is the "name" displayed for a document,
+even though doc.name returns an empty string "".
+
+A parsed document may produce \REXML objects of many classes,
+but the two that are likely to be of greatest interest are
+REXML::Document and REXML::Element.
+These two classes are covered in great detail in this tutorial.
+
+=== Context (Parsing Options)
+
+The context for parsing a document is a hash that influences
+the way the XML is read and stored.
+
+The context entries are:
+
+- +:respect_whitespace+: controls treatment of whitespace.
+- +:compress_whitespace+: determines whether whitespace is compressed.
+- +:ignore_whitespace_nodes+: determines whether whitespace-only nodes are to be ignored.
+- +:raw+: controls treatment of special characters and entities.
+
+See {Element Context}[../context_rdoc.html].
+
+== Exploring the Document
+
+An REXML::Document object represents an XML document.
+
+The object inherits from its ancestor classes:
+
+- REXML::Child (includes module REXML::Node)
+ - REXML::Parent (includes module {Enumerable}[rdoc-ref:Enumerable]).
+ - REXML::Element (includes module REXML::Namespace).
+ - REXML::Document
+
+This section covers only those properties and methods that are unique to a document
+(that is, not inherited or included).
+
+=== Document Properties
+
+A document has several properties (other than its children);
+
+- Document type.
+- Node type.
+- Name.
+- Document.
+- XPath
+
+[Document Type]
+
+ A document may have a document type:
+
+ my_xml = ''
+ my_doc = Document.new(my_xml)
+ doc_type = my_doc.doctype
+ doc_type.class # => REXML::DocType
+ doc_type.to_s # => ""
+
+[Node Type]
+
+ A document also has a node type (always +:document+):
+
+ doc.node_type # => :document
+
+[Name]
+
+ A document has a name (always an empty string):
+
+ doc.name # => ""
+
+[Document]
+
+ \Method REXML::Document#document returns +self+:
+
+ doc.document == doc # => true
+
+ An object of a different class (\REXML::Element or \REXML::Child)
+ may have a document, which is the document to which the object belongs;
+ if so, that document will be an \REXML::Document object.
+
+ doc.root.document.class # => REXML::Document
+
+[XPath]
+
+ \method REXML::Element#xpath returns the string xpath to the element,
+ relative to its most distant ancestor:
+
+ doc.root.class # => REXML::Element
+ doc.root.xpath # => "/bookstore"
+ doc.root.texts.first # => "\n\n"
+ doc.root.texts.first.xpath # => "/bookstore/text()"
+
+ If there is no ancestor, returns the expanded name of the element:
+
+ Element.new('foo').xpath # => "foo"
+
+=== Document Children
+
+A document may have children of these types:
+
+- XML declaration.
+- Root element.
+- Text.
+- Processing instructions.
+- Comments.
+- CDATA.
+
+[XML Declaration]
+
+ A document may an XML declaration, which is stored as an REXML::XMLDecl object:
+
+ doc.xml_decl # =>
+ doc.xml_decl.class # => REXML::XMLDecl
+
+ Document.new('').xml_decl # =>
+
+ my_xml = '"'
+ my_doc = Document.new(my_xml)
+ xml_decl = my_doc.xml_decl
+ xml_decl.to_s # => ""
+
+ The version, encoding, and stand-alone values may be retrieved separately:
+
+ my_doc.version # => "1.0"
+ my_doc.encoding # => "UTF-8"
+ my_doc.stand_alone? # => "yes"
+
+[Root Element]
+
+ A document may have a single element child, called the _root_ _element_,
+ which is stored as an REXML::Element object;
+ it may be retrieved with method +root+:
+
+ doc.root # => ... >
+ doc.root.class # => REXML::Element
+
+ Document.new('').root # => nil
+
+[Text]
+
+ A document may have text passages, each of which is stored
+ as an REXML::Text object:
+
+ doc.texts.each {|t| p [t.class, t] }
+
+ Output:
+
+ [REXML::Text, "\n"]
+
+[Processing Instructions]
+
+ A document may have processing instructions, which are stored
+ as REXML::Instruction objects:
+
+
+
+ Output:
+
+ [REXML::Instruction, ]
+ [REXML::Instruction, ]
+
+[Comments]
+
+ A document may have comments, which are stored
+ as REXML::Comment objects:
+
+ my_xml = <<-EOT
+
+
+ EOT
+ my_doc = Document.new(my_xml)
+ my_doc.comments.each {|c| p [c.class, c] }
+
+ Output:
+
+ [REXML::Comment, # ... >, @string="foo">]
+ [REXML::Comment, # ... >, @string="bar">]
+
+[CDATA]
+
+ A document may have CDATA entries, which are stored
+ as REXML::CData objects:
+
+ my_xml = <<-EOT
+
+
+ EOT
+ my_doc = Document.new(my_xml)
+ my_doc.cdatas.each {|cd| p [cd.class, cd] }
+
+ Output:
+
+ [REXML::CData, "foo"]
+ [REXML::CData, "bar"]
+
+The payload of a document is a tree of nodes, descending from the root element:
+
+ doc.root.children.each do |child|
+ p [child, child.class]
+ end
+
+Output:
+
+ [REXML::Text, "\n\n"]
+ [REXML::Element, ... >]
+ [REXML::Text, "\n\n"]
+ [REXML::Element, ... >]
+ [REXML::Text, "\n\n"]
+ [REXML::Element, ... >]
+ [REXML::Text, "\n\n"]
+ [REXML::Element, ... >]
+ [REXML::Text, "\n\n"]
+
+== Exploring an Element
+
+An REXML::Element object represents an XML element.
+
+The object inherits from its ancestor classes:
+
+- REXML::Child (includes module REXML::Node)
+ - REXML::Parent (includes module {Enumerable}[rdoc-ref:Enumerable]).
+ - REXML::Element (includes module REXML::Namespace).
+
+This section covers methods:
+
+- Defined in REXML::Element itself.
+- Inherited from REXML::Parent and REXML::Child.
+- Included from REXML::Node.
+
+=== Inside the Element
+
+[Brief String Representation]
+
+ Use method REXML::Element#inspect to retrieve a brief string representation.
+
+ doc.root.inspect # => " ... >"
+
+ The ellipsis (...) indicates that the element has children.
+ When there are no children, the ellipsis is omitted:
+
+ Element.new('foo').inspect # => ""
+
+ If the element has attributes, those are also included:
+
+ doc.root.elements.first.inspect # => " ... >"
+
+[Extended String Representation]
+
+ Use inherited method REXML::Child.bytes to retrieve an extended
+ string representation.
+
+ doc.root.bytes # => "\n\n\n Codestin Search App\n Giada De Laurentiis\n 2005\n 30.00\n\n\n\n Codestin Search App\n J K. Rowling\n 2005\n 29.99\n\n\n\n Codestin Search App\n James McGovern\n Per Bothner\n Kurt Cagle\n James Linn\n Vaidyanathan Nagarajan\n 2003\n 49.99\n\n\n\n Codestin Search App\n Erik T. Ray\n 2003\n 39.95\n\n\n"
+
+[Node Type]
+
+ Use method REXML::Element#node_type to retrieve the node type (always +:element+):
+
+ doc.root.node_type # => :element
+
+[Raw Mode]
+
+ Use method REXML::Element#raw to retrieve whether (+true+ or +nil+)
+ raw mode is set.
+
+ doc.root.raw # => nil
+
+[Context]
+
+ Use method REXML::Element#context to retrieve the context hash
+ (see {Element Context}[../context_rdoc.html]):
+
+ doc.root.context # => {}
+
+=== Relationships
+
+An element may have:
+
+- Ancestors.
+- Siblings.
+- Children.
+
+==== Ancestors
+
+[Containing Document]
+
+ Use method REXML::Element#document to retrieve the containing document, if any:
+
+ ele = doc.root.elements.first # => ... >
+ ele.document # => ... >
+ ele = Element.new('foo') # =>
+ ele.document # => nil
+
+[Root Element]
+
+ Use method REXML::Element#root to retrieve the root element:
+
+ ele = doc.root.elements.first # => ... >
+ ele.root # => ... >
+ ele = Element.new('foo') # =>
+ ele.root # =>
+
+[Root Node]
+
+ Use method REXML::Element#root_node to retrieve the most distant ancestor,
+ which is the containing document, if any, otherwise the root element:
+
+ ele = doc.root.elements.first # => ... >
+ ele.root_node # => ... >
+ ele = Element.new('foo') # =>
+ ele.root_node # =>
+
+[Parent]
+
+ Use inherited method REXML::Child#parent to retrieve the parent
+
+ ele = doc.root # => ... >
+ ele.parent # => ... >
+ ele = doc.root.elements.first # => ... >
+ ele.parent # => ... >
+
+ Use included method REXML::Node#index_in_parent to retrieve the index
+ of the element among all of its parents children (not just the element children).
+ Note that while the index for doc.root.elements[n] is 1-based,
+ the returned index is 0-based.
+
+ doc.root.children # =>
+ # ["\n\n",
+ # ... >,
+ # "\n\n",
+ # ... >,
+ # "\n\n",
+ # ... >,
+ # "\n\n",
+ # ... >,
+ # "\n\n"]
+ ele = doc.root.elements[1] # => ... >
+ ele.index_in_parent # => 2
+ ele = doc.root.elements[2] # => ... >
+ ele.index_in_parent# => 4
+
+==== Siblings
+
+[Next Element]
+
+ Use method REXML::Element#next_element to retrieve the first following
+ sibling that is itself an element (+nil+ if there is none):
+
+ ele = doc.root.elements[1]
+ while ele do
+ p [ele.class, ele]
+ ele = ele.next_element
+ end
+ p ele
+
+ Output:
+
+ [REXML::Element, ... >]
+ [REXML::Element, ... >]
+ [REXML::Element, ... >]
+ [REXML::Element, ... >]
+
+[Previous Element]
+
+ Use method REXML::Element#previous_element to retrieve the first preceding
+ sibling that is itself an element (+nil+ if there is none):
+
+ ele = doc.root.elements[4]
+ while ele do
+ p [ele.class, ele]
+ ele = ele.previous_element
+ end
+ p ele
+
+ Output:
+
+ [REXML::Element, ... >]
+ [REXML::Element, ... >]
+ [REXML::Element, ... >]
+ [REXML::Element, ... >]
+
+[Next Node]
+
+ Use included method REXML::Node.next_sibling_node
+ (or its alias next_sibling) to retrieve the first following node
+ regardless of its class:
+
+ node = doc.root.children[0]
+ while node do
+ p [node.class, node]
+ node = node.next_sibling
+ end
+ p node
+
+ Output:
+
+ [REXML::Text, "\n\n"]
+ [REXML::Element, ... >]
+ [REXML::Text, "\n\n"]
+ [REXML::Element, ... >]
+ [REXML::Text, "\n\n"]
+ [REXML::Element, ... >]
+ [REXML::Text, "\n\n"]
+ [REXML::Element, ... >]
+ [REXML::Text, "\n\n"]
+
+[Previous Node]
+
+ Use included method REXML::Node.previous_sibling_node
+ (or its alias previous_sibling) to retrieve the first preceding node
+ regardless of its class:
+
+ node = doc.root.children[-1]
+ while node do
+ p [node.class, node]
+ node = node.previous_sibling
+ end
+ p node
+
+ Output:
+
+ [REXML::Text, "\n\n"]
+ [REXML::Element, ... >]
+ [REXML::Text, "\n\n"]
+ [REXML::Element, ... >]
+ [REXML::Text, "\n\n"]
+ [REXML::Element, ... >]
+ [REXML::Text, "\n\n"]
+ [REXML::Element, ... >]
+ [REXML::Text, "\n\n"]
+
+==== Children
+
+[Child Count]
+
+ Use inherited method REXML::Parent.size to retrieve the count
+ of nodes (of all types) in the element:
+
+ doc.root.size # => 9
+
+[Child Nodes]
+
+ Use inherited method REXML::Parent.children to retrieve an array
+ of the child nodes (of all types):
+
+ doc.root.children # =>
+ # ["\n\n",
+ # ... >,
+ # "\n\n",
+ # ... >,
+ # "\n\n",
+ # ... >,
+ # "\n\n",
+ # ... >,
+ # "\n\n"]
+
+[Child at Index]
+
+ Use method REXML::Element#[] to retrieve the child at a given numerical index,
+ or +nil+ if there is no such child:
+
+ doc.root[0] # => "\n\n"
+ doc.root[1] # => ... >
+ doc.root[7] # => ... >
+ doc.root[8] # => "\n\n"
+
+ doc.root[-1] # => "\n\n"
+ doc.root[-2] # => ... >
+
+ doc.root[50] # => nil
+
+[Index of Child]
+
+ Use method REXML::Parent#index to retrieve the zero-based child index
+ of the given object, or #size - 1 if there is no such child:
+
+ ele = doc.root # => ... >
+ ele.index(ele[0]) # => 0
+ ele.index(ele[1]) # => 1
+ ele.index(ele[7]) # => 7
+ ele.index(ele[8]) # => 8
+
+ ele.index(ele[-1]) # => 8
+ ele.index(ele[-2]) # => 7
+
+ ele.index(ele[50]) # => 8
+
+[Element Children]
+
+ Use method REXML::Element#has_elements? to retrieve whether the element
+ has element children:
+
+ doc.root.has_elements? # => true
+ REXML::Element.new('foo').has_elements? # => false
+
+ Use method REXML::Element#elements to retrieve the REXML::Elements object
+ containing the element children:
+
+ eles = doc.root.elements
+ eles # => # ... >>
+ eles.size # => 4
+ eles.each {|e| p [e.class], e }
+
+ Output:
+
+ [ ... >,
+ ... >,
+ ... >,
+ ... >
+ ]
+
+Note that while in this example, all the element children of the root element are
+elements of the same name, 'book', that is not true of all documents;
+a root element (or any other element) may have any mixture of child elements.
+
+[CDATA Children]
+
+ Use method REXML::Element#cdatas to retrieve a frozen array of CDATA children:
+
+ my_xml = <<-EOT
+
+
+
+
+ EOT
+ my_doc = REXML::Document.new(my_xml)
+ cdatas my_doc.root.cdatas
+ cdatas.frozen? # => true
+ cdatas.map {|cd| cd.class } # => [REXML::CData, REXML::CData]
+
+[Comment Children]
+
+ Use method REXML::Element#comments to retrieve a frozen array of comment children:
+
+ my_xml = <<-EOT
+
+
+
+
+ EOT
+ my_doc = REXML::Document.new(my_xml)
+ comments = my_doc.root.comments
+ comments.frozen? # => true
+ comments.map {|c| c.class } # => [REXML::Comment, REXML::Comment]
+ comments.map {|c| c.to_s } # => ["foo", "bar"]
+
+[Processing Instruction Children]
+
+ Use method REXML::Element#instructions to retrieve a frozen array
+ of processing instruction children:
+
+ my_xml = <<-EOT
+
+
+
+
+ EOT
+ my_doc = REXML::Document.new(my_xml)
+ instrs = my_doc.root.instructions
+ instrs.frozen? # => true
+ instrs.map {|i| i.class } # => [REXML::Instruction, REXML::Instruction]
+ instrs.map {|i| i.to_s } # => ["", ""]
+
+[Text Children]
+
+ Use method REXML::Element#has_text? to retrieve whether the element
+ has text children:
+
+ doc.root.has_text? # => true
+ REXML::Element.new('foo').has_text? # => false
+
+ Use method REXML::Element#texts to retrieve a frozen array of text children:
+
+ my_xml = 'textmore'
+ my_doc = REXML::Document.new(my_xml)
+ texts = my_doc.root.texts
+ texts.frozen? # => true
+ texts.map {|t| t.class } # => [REXML::Text, REXML::Text]
+ texts.map {|t| t.to_s } # => ["text", "more"]
+
+[Parenthood]
+
+ Use inherited method REXML::Parent.parent? to retrieve whether the element is a parent;
+ always returns +true+; only REXML::Child#parent returns +false+.
+
+ doc.root.parent? # => true
+
+=== Element Attributes
+
+Use method REXML::Element#has_attributes? to return whether the element
+has attributes:
+
+ ele = doc.root # => ... >
+ ele.has_attributes? # => false
+ ele = ele.elements.first # => ... >
+ ele.has_attributes? # => true
+
+Use method REXML::Element#attributes to return the hash
+containing the attributes for the element.
+Each hash key is a string attribute name;
+each hash value is an REXML::Attribute object.
+
+ ele = doc.root # => ... >
+ attrs = ele.attributes # => {}
+
+ ele = ele.elements.first # => ... >
+ attrs = ele.attributes # => {"category"=>category='cooking'}
+ attrs.size # => 1
+ attr_name = attrs.keys.first # => "category"
+ attr_name.class # => String
+ attr_value = attrs.values.first # => category='cooking'
+ attr_value.class # => REXML::Attribute
+
+Use method REXML::Element#[] to retrieve the string value for a given attribute,
+which may be given as either a string or a symbol:
+
+ ele = doc.root.elements.first # => ... >
+ attr_value = ele['category'] # => "cooking"
+ attr_value.class # => String
+ ele['nosuch'] # => nil
+
+Use method REXML::Element#attribute to retrieve the value of a named attribute:
+
+ my_xml = ""
+ my_doc = REXML::Document.new(my_xml)
+ my_doc.root.attribute("x") # => x='x'
+ my_doc.root.attribute("x", "a") # => a:x='a:x'
+
+== Whitespace
+
+Use method REXML::Element#ignore_whitespace_nodes to determine whether
+whitespace nodes were ignored when the XML was parsed;
+returns +true+ if so, +nil+ otherwise.
+
+Use method REXML::Element#whitespace to determine whether whitespace
+is respected for the element; returns +true+ if so, +false+ otherwise.
+
+== Namespaces
+
+Use method REXML::Element#namespace to retrieve the string namespace URI
+for the element, which may derive from one of its ancestors:
+
+ xml_string = <<-EOT
+
+
+
+
+
+
+ EOT
+ d = Document.new(xml_string)
+ b = d.elements['//b']
+ b.namespace # => "1"
+ b.namespace('y') # => "2"
+ b.namespace('nosuch') # => nil
+
+Use method REXML::Element#namespaces to retrieve a hash of all defined namespaces
+in the element and its ancestors:
+
+ xml_string = <<-EOT
+
+
+
+
+
+
+ EOT
+ d = Document.new(xml_string)
+ d.elements['//a'].namespaces # => {"x"=>"1", "y"=>"2"}
+ d.elements['//b'].namespaces # => {"x"=>"1", "y"=>"2"}
+ d.elements['//c'].namespaces # => {"x"=>"1", "y"=>"2", "z"=>"3"}
+
+Use method REXML::Element#prefixes to retrieve an array of the string prefixes (names)
+of all defined namespaces in the element and its ancestors:
+
+ xml_string = <<-EOT
+
+
+
+
+
+
+ EOT
+ d = Document.new(xml_string, {compress_whitespace: :all})
+ d.elements['//a'].prefixes # => ["x", "y"]
+ d.elements['//b'].prefixes # => ["x", "y"]
+ d.elements['//c'].prefixes # => ["x", "y", "z"]
+
+== Traversing
+
+You can use certain methods to traverse children of the element.
+Each child that meets given criteria is yielded to the given block.
+
+[Traverse All Children]
+
+ Use inherited method REXML::Parent#each (or its alias #each_child) to traverse
+ all children of the element:
+
+ doc.root.each {|child| p [child.class, child] }
+
+ Output:
+
+ [REXML::Text, "\n\n"]
+ [REXML::Element, ... >]
+ [REXML::Text, "\n\n"]
+ [REXML::Element, ... >]
+ [REXML::Text, "\n\n"]
+ [REXML::Element, ... >]
+ [REXML::Text, "\n\n"]
+ [REXML::Element, ... >]
+ [REXML::Text, "\n\n"]
+
+[Traverse Element Children]
+
+ Use method REXML::Element#each_element to traverse only the element children
+ of the element:
+
+ doc.root.each_element {|e| p [e.class, e] }
+
+ Output:
+
+ [REXML::Element, ... >]
+ [REXML::Element, ... >]
+ [REXML::Element, ... >]
+ [REXML::Element, ... >]
+
+[Traverse Element Children with Attribute]
+
+ Use method REXML::Element#each_element_with_attribute with the single argument
+ +attr_name+ to traverse each element child that has the given attribute:
+
+ my_doc = Document.new ''
+ my_doc.root.each_element_with_attribute('id') {|e| p [e.class, e] }
+
+ Output:
+
+ [REXML::Element, ]
+ [REXML::Element, ]
+ [REXML::Element, ]
+
+ Use the same method with a second argument +value+ to traverse
+ each element child element that has the given attribute and value:
+
+ my_doc.root.each_element_with_attribute('id', '1') {|e| p [e.class, e] }
+
+ Output:
+
+ [REXML::Element, ]
+ [REXML::Element, ]
+
+ Use the same method with a third argument +max+ to traverse
+ no more than the given number of element children:
+
+ my_doc.root.each_element_with_attribute('id', '1', 1) {|e| p [e.class, e] }
+
+ Output:
+
+ [REXML::Element, ]
+
+ Use the same method with a fourth argument +xpath+ to traverse
+ only those element children that match the given xpath:
+
+ my_doc.root.each_element_with_attribute('id', '1', 2, '//d') {|e| p [e.class, e] }
+
+ Output:
+
+ [REXML::Element, ]
+
+[Traverse Element Children with Text]
+
+ Use method REXML::Element#each_element_with_text with no arguments
+ to traverse those element children that have text:
+
+ my_doc = Document.new 'bbd'
+ my_doc.root.each_element_with_text {|e| p [e.class, e] }
+
+ Output:
+
+ [REXML::Element, ... >]
+ [REXML::Element, ... >]
+ [REXML::Element, ... >]
+
+ Use the same method with the single argument +text+ to traverse
+ those element children that have exactly that text:
+
+ my_doc.root.each_element_with_text('b') {|e| p [e.class, e] }
+
+ Output:
+
+ [REXML::Element, ... >]
+ [REXML::Element, ... >]
+
+ Use the same method with additional second argument +max+ to traverse
+ no more than the given number of element children:
+
+ my_doc.root.each_element_with_text('b', 1) {|e| p [e.class, e] }
+
+ Output:
+
+ [REXML::Element, ... >]
+
+ Use the same method with additional third argument +xpath+ to traverse
+ only those element children that also match the given xpath:
+
+ my_doc.root.each_element_with_text('b', 2, '//c') {|e| p [e.class, e] }
+
+ Output:
+
+ [REXML::Element, ... >]
+
+[Traverse Element Children's Indexes]
+
+ Use inherited method REXML::Parent#each_index to traverse all children's indexes
+ (not just those of element children):
+
+ doc.root.each_index {|i| print i }
+
+ Output:
+
+ 012345678
+
+[Traverse Children Recursively]
+
+ Use included method REXML::Node#each_recursive to traverse all children recursively:
+
+ doc.root.each_recursive {|child| p [child.class, child] }
+
+ Output:
+
+ [REXML::Element, ... >]
+ [REXML::Element, Codestin Search App
+ Giada De Laurentiis
+ 2005
+ 30.00
+
+
+
+ Codestin Search App
+ J K. Rowling
+ 2005
+ 29.99
+
+
+
+ Codestin Search App
+ James McGovern
+ Per Bothner
+ Kurt Cagle
+ James Linn
+ Vaidyanathan Nagarajan
+ 2003
+ 49.99
+
+
+
+ Codestin Search App
+ Erik T. Ray
+ 2003
+ 39.95
+
+
+
diff --git a/lib/rexml/attribute.rb b/lib/rexml/attribute.rb
index 8933a013..11893a95 100644
--- a/lib/rexml/attribute.rb
+++ b/lib/rexml/attribute.rb
@@ -1,4 +1,4 @@
-# frozen_string_literal: false
+# frozen_string_literal: true
require_relative "namespace"
require_relative 'text'
@@ -13,9 +13,6 @@ class Attribute
# The element to which this attribute belongs
attr_reader :element
- # The normalized value of this attribute. That is, the attribute with
- # entities intact.
- attr_writer :normalized
PATTERN = /\s*(#{NAME_STR})\s*=\s*(["'])(.*?)\2/um
NEEDS_A_SECOND_CHECK = /(<|&((#{Entity::NAME});|(#0*((?:\d+)|(?:x[a-fA-F0-9]+)));)?)/um
@@ -122,10 +119,13 @@ def hash
# b = Attribute.new( "ns:x", "y" )
# b.to_string # -> "ns:x='y'"
def to_string
+ value = to_s
if @element and @element.context and @element.context[:attribute_quote] == :quote
- %Q^#@expanded_name="#{to_s().gsub(/"/, '"')}"^
+ value = value.gsub('"', '"') if value.include?('"')
+ %Q^#@expanded_name="#{value}"^
else
- "#@expanded_name='#{to_s().gsub(/'/, ''')}'"
+ value = value.gsub("'", ''') if value.include?("'")
+ "#@expanded_name='#{value}'"
end
end
@@ -141,7 +141,6 @@ def to_s
return @normalized if @normalized
@normalized = Text::normalize( @unnormalized, doctype )
- @unnormalized = nil
@normalized
end
@@ -150,10 +149,16 @@ def to_s
def value
return @unnormalized if @unnormalized
@unnormalized = Text::unnormalize( @normalized, doctype )
- @normalized = nil
@unnormalized
end
+ # The normalized value of this attribute. That is, the attribute with
+ # entities intact.
+ def normalized=(new_normalized)
+ @normalized = new_normalized
+ @unnormalized = nil
+ end
+
# Returns a copy of this attribute
def clone
Attribute.new self
@@ -190,7 +195,7 @@ def node_type
end
def inspect
- rv = ""
+ rv = +""
write( rv )
rv
end
diff --git a/lib/rexml/document.rb b/lib/rexml/document.rb
index 2edeb987..b1caa020 100644
--- a/lib/rexml/document.rb
+++ b/lib/rexml/document.rb
@@ -69,7 +69,7 @@ class Document < Element
# d.to_s # => "FooBar"
#
# When argument +document+ is given, it must be an existing
- # document object, whose context and attributes (but not chidren)
+ # document object, whose context and attributes (but not children)
# are cloned into the new document:
#
# d = REXML::Document.new(xml_string)
diff --git a/lib/rexml/element.rb b/lib/rexml/element.rb
index 4c21dbd5..a5808d7c 100644
--- a/lib/rexml/element.rb
+++ b/lib/rexml/element.rb
@@ -7,14 +7,6 @@
require_relative "parseexception"
module REXML
- # An implementation note about namespaces:
- # As we parse, when we find namespaces we put them in a hash and assign
- # them a unique ID. We then convert the namespace prefix for the node
- # to the unique ID. This makes namespace lookup much faster for the
- # cost of extra memory use. We save the namespace prefix for the
- # context node and convert it back when we write it.
- @@namespaces = {}
-
# An \REXML::Element object represents an XML element.
#
# An element:
@@ -989,7 +981,7 @@ def previous_element
# :call-seq:
# has_text? -> true or false
#
- # Returns +true if the element has one or more text noded,
+ # Returns +true+ if the element has one or more text noded,
# +false+ otherwise:
#
# d = REXML::Document.new 'text'
@@ -1006,7 +998,7 @@ def has_text?
# text(xpath = nil) -> text_string or nil
#
# Returns the text string from the first text node child
- # in a specified element, if it exists, # +nil+ otherwise.
+ # in a specified element, if it exists, +nil+ otherwise.
#
# With no argument, returns the text from the first text node in +self+:
#
@@ -1014,7 +1006,7 @@ def has_text?
# d.root.text.class # => String
# d.root.text # => "some text "
#
- # With argument +xpath+, returns text from the the first text node
+ # With argument +xpath+, returns text from the first text node
# in the element that matches +xpath+:
#
# d.root.text(1) # => "this is bold!"
@@ -1284,16 +1276,11 @@ def [](name_or_index)
# document.root.attribute("x", "a") # => a:x='a:x'
#
def attribute( name, namespace=nil )
- prefix = nil
- if namespaces.respond_to? :key
- prefix = namespaces.key(namespace) if namespace
- else
- prefix = namespaces.index(namespace) if namespace
- end
+ prefix = namespaces.key(namespace) if namespace
prefix = nil if prefix == 'xmlns'
ret_val =
- attributes.get_attribute( "#{prefix ? prefix + ':' : ''}#{name}" )
+ attributes.get_attribute( prefix ? "#{prefix}:#{name}" : name )
return ret_val unless ret_val.nil?
return nil if prefix.nil?
diff --git a/lib/rexml/entity.rb b/lib/rexml/entity.rb
index 89a9e84c..573db691 100644
--- a/lib/rexml/entity.rb
+++ b/lib/rexml/entity.rb
@@ -132,24 +132,34 @@ def to_s
# then:
# doctype.entity('yada').value #-> "nanoo bar nanoo"
def value
- if @value
- matches = @value.scan(PEREFERENCE_RE)
- rv = @value.clone
- if @parent
- sum = 0
- matches.each do |entity_reference|
- entity_value = @parent.entity( entity_reference[0] )
- if sum + entity_value.bytesize > Security.entity_expansion_text_limit
- raise "entity expansion has grown too large"
- else
- sum += entity_value.bytesize
- end
- rv.gsub!( /%#{entity_reference.join};/um, entity_value )
+ @resolved_value ||= resolve_value
+ end
+
+ def parent=(other)
+ @resolved_value = nil
+ super
+ end
+
+ private
+ def resolve_value
+ return nil if @value.nil?
+ return @value unless @value.match?(PEREFERENCE_RE)
+
+ matches = @value.scan(PEREFERENCE_RE)
+ rv = @value.clone
+ if @parent
+ sum = 0
+ matches.each do |entity_reference|
+ entity_value = @parent.entity( entity_reference[0] )
+ if sum + entity_value.bytesize > Security.entity_expansion_text_limit
+ raise "entity expansion has grown too large"
+ else
+ sum += entity_value.bytesize
end
+ rv.gsub!( /%#{entity_reference.join};/um, entity_value )
end
- return rv
end
- nil
+ rv
end
end
diff --git a/lib/rexml/formatters/pretty.rb b/lib/rexml/formatters/pretty.rb
index 562ef946..a838d835 100644
--- a/lib/rexml/formatters/pretty.rb
+++ b/lib/rexml/formatters/pretty.rb
@@ -1,4 +1,4 @@
-# frozen_string_literal: false
+# frozen_string_literal: true
require_relative 'default'
module REXML
@@ -58,7 +58,7 @@ def write_element(node, output)
skip = false
if compact
if node.children.inject(true) {|s,c| s & c.kind_of?(Text)}
- string = ""
+ string = +""
old_level = @level
@level = 0
node.children.each { |child| write( child, string ) }
@@ -111,7 +111,7 @@ def write_document( node, output )
# itself, then we don't need a carriage return... which makes this
# logic more complex.
node.children.each { |child|
- next if child == node.children[-1] and child.instance_of?(Text)
+ next if child.instance_of?(Text)
unless child == node.children[0] or child.instance_of?(Text) or
(child == node.children[1] and !node.children[0].writethis)
output << "\n"
diff --git a/lib/rexml/functions.rb b/lib/rexml/functions.rb
index 77926bf2..4c114616 100644
--- a/lib/rexml/functions.rb
+++ b/lib/rexml/functions.rb
@@ -262,11 +262,10 @@ def Functions::string_length( string )
string(string).length
end
- # UNTESTED
def Functions::normalize_space( string=nil )
string = string(@@context[:node]) if string.nil?
if string.kind_of? Array
- string.collect{|x| string.to_s.strip.gsub(/\s+/um, ' ') if string}
+ string.collect{|x| x.to_s.strip.gsub(/\s+/um, ' ') if x}
else
string.to_s.strip.gsub(/\s+/um, ' ')
end
diff --git a/lib/rexml/namespace.rb b/lib/rexml/namespace.rb
index 924edf95..2e67252a 100644
--- a/lib/rexml/namespace.rb
+++ b/lib/rexml/namespace.rb
@@ -1,4 +1,4 @@
-# frozen_string_literal: false
+# frozen_string_literal: true
require_relative 'xmltokens'
@@ -10,13 +10,17 @@ module Namespace
# The expanded name of the object, valid if name is set
attr_accessor :prefix
include XMLTokens
+ NAME_WITHOUT_NAMESPACE = /\A#{NCNAME_STR}\z/
NAMESPLIT = /^(?:(#{NCNAME_STR}):)?(#{NCNAME_STR})/u
# Sets the name and the expanded name
def name=( name )
@expanded_name = name
- case name
- when NAMESPLIT
+ if name.match?(NAME_WITHOUT_NAMESPACE)
+ @prefix = ""
+ @namespace = ""
+ @name = name
+ elsif name =~ NAMESPLIT
if $1
@prefix = $1
else
@@ -24,7 +28,7 @@ def name=( name )
@namespace = ""
end
@name = $2
- when ""
+ elsif name == ""
@prefix = nil
@namespace = nil
@name = nil
diff --git a/lib/rexml/node.rb b/lib/rexml/node.rb
index 081caba6..c771db70 100644
--- a/lib/rexml/node.rb
+++ b/lib/rexml/node.rb
@@ -52,10 +52,14 @@ def parent?
# Visit all subnodes of +self+ recursively
def each_recursive(&block) # :yields: node
- self.elements.each {|node|
- block.call(node)
- node.each_recursive(&block)
- }
+ stack = []
+ each { |child| stack.unshift child if child.node_type == :element }
+ until stack.empty?
+ child = stack.pop
+ yield child
+ n = stack.size
+ child.each { |grandchild| stack.insert n, grandchild if grandchild.node_type == :element }
+ end
end
# Find (and return) first subnode (recursively) for which the block
diff --git a/lib/rexml/parseexception.rb b/lib/rexml/parseexception.rb
index 7b16cd1a..e57d05fd 100644
--- a/lib/rexml/parseexception.rb
+++ b/lib/rexml/parseexception.rb
@@ -29,6 +29,7 @@ def to_s
err << "\nLine: #{line}\n"
err << "Position: #{position}\n"
err << "Last 80 unconsumed characters:\n"
+ err.force_encoding("ASCII-8BIT")
err << @source.buffer[0..80].force_encoding("ASCII-8BIT").gsub(/\n/, ' ')
end
diff --git a/lib/rexml/parsers/baseparser.rb b/lib/rexml/parsers/baseparser.rb
index 305b1207..44dc6580 100644
--- a/lib/rexml/parsers/baseparser.rb
+++ b/lib/rexml/parsers/baseparser.rb
@@ -1,4 +1,4 @@
-# frozen_string_literal: false
+# frozen_string_literal: true
require_relative '../parseexception'
require_relative '../undefinednamespaceexception'
require_relative '../source'
@@ -7,6 +7,17 @@
module REXML
module Parsers
+ if StringScanner::Version < "3.0.8"
+ module StringScannerCaptures
+ refine StringScanner do
+ def captures
+ values_at(*(1...size))
+ end
+ end
+ end
+ using StringScannerCaptures
+ end
+
# = Using the Pull Parser
# This API is experimental, and subject to change.
# parser = PullParser.new( "texttxet" )
@@ -96,7 +107,7 @@ class BaseParser
ENTITYDEF = "(?:#{ENTITYVALUE}|(?:#{EXTERNALID}(#{NDATADECL})?))"
PEDECL = ""
GEDECL = ""
- ENTITYDECL = /\s*(?:#{GEDECL})|(?:#{PEDECL})/um
+ ENTITYDECL = /\s*(?:#{GEDECL})|\s*(?:#{PEDECL})/um
NOTATIONDECL_START = /\A\s* [/'/, "'", "'", /'/]
}
+ module Private
+ TAG_PATTERN = /((?>#{QNAME_STR}))\s*/um
+ CLOSE_PATTERN = /(#{QNAME_STR})\s*>/um
+ ATTLISTDECL_END = /\s+#{NAME}(?:#{ATTDEF})*\s*>/um
+ NAME_PATTERN = /#{NAME}/um
+ GEDECL_PATTERN = "\\s+#{NAME}\\s+#{ENTITYDEF}\\s*>"
+ PEDECL_PATTERN = "\\s+(%)\\s+#{NAME}\\s+#{PEDEF}\\s*>"
+ ENTITYDECL_PATTERN = /(?:#{GEDECL_PATTERN})|(?:#{PEDECL_PATTERN})/um
+ CARRIAGE_RETURN_NEWLINE_PATTERN = /\r\n?/
+ CHARACTER_REFERENCES = /*((?:\d+)|(?:x[a-fA-F0-9]+));/
+ DEFAULT_ENTITIES_PATTERNS = {}
+ default_entities = ['gt', 'lt', 'quot', 'apos', 'amp']
+ default_entities.each do |term|
+ DEFAULT_ENTITIES_PATTERNS[term] = /{term};/
+ end
+ end
+ private_constant :Private
+
def initialize( source )
self.stream = source
@listeners = []
+ @prefixes = Set.new
+ @entity_expansion_count = 0
end
def add_listener( listener )
@@ -122,10 +153,12 @@ def add_listener( listener )
end
attr_reader :source
+ attr_reader :entity_expansion_count
def stream=( source )
@source = SourceFactory.create_from( source )
@closed = nil
+ @have_root = false
@document_status = nil
@tags = []
@stack = []
@@ -180,6 +213,8 @@ def peek depth=0
# Returns the next event. This is a +PullEvent+ object.
def pull
+ @source.drop_parsed_content
+
pull_event.tap do |event|
@listeners.each do |listener|
listener.receive event
@@ -192,236 +227,269 @@ def pull_event
x, @closed = @closed, nil
return [ :end_element, x ]
end
- return [ :end_document ] if empty?
+ if empty?
+ if @document_status == :in_doctype
+ raise ParseException.new("Malformed DOCTYPE: unclosed", @source)
+ end
+ return [ :end_document ]
+ end
return @stack.shift if @stack.size > 0
#STDERR.puts @source.encoding
#STDERR.puts "BUFFER = #{@source.buffer.inspect}"
+
+ @source.ensure_buffer
if @document_status == nil
- word = @source.match( /\A((?:\s+)|(?:<[^>]*>))/um )
- word = word[1] unless word.nil?
- #STDERR.puts "WORD = #{word.inspect}"
- case word
- when COMMENT_START
- return [ :comment, @source.match( COMMENT_PATTERN, true )[1] ]
- when XMLDECL_START
- #STDERR.puts "XMLDECL"
- results = @source.match( XMLDECL_PATTERN, true )[1]
- version = VERSION.match( results )
- version = version[1] unless version.nil?
- encoding = ENCODING.match(results)
- encoding = encoding[1] unless encoding.nil?
- if need_source_encoding_update?(encoding)
- @source.encoding = encoding
- end
- if encoding.nil? and /\AUTF-16(?:BE|LE)\z/i =~ @source.encoding
- encoding = "UTF-16"
- end
- standalone = STANDALONE.match(results)
- standalone = standalone[1] unless standalone.nil?
- return [ :xmldecl, version, encoding, standalone ]
- when INSTRUCTION_START
+ start_position = @source.position
+ if @source.match("", true)
return process_instruction
- when DOCTYPE_START
- base_error_message = "Malformed DOCTYPE"
- @source.match(DOCTYPE_START, true)
- @nsstack.unshift(curr_ns=Set.new)
- name = parse_name(base_error_message)
- if @source.match(/\A\s*\[/um, true)
- id = [nil, nil, nil]
- @document_status = :in_doctype
- elsif @source.match(/\A\s*>/um, true)
- id = [nil, nil, nil]
- @document_status = :after_doctype
- else
- id = parse_id(base_error_message,
- accept_external_id: true,
- accept_public_id: false)
- if id[0] == "SYSTEM"
- # For backward compatibility
- id[1], id[2] = id[2], nil
+ elsif @source.match("/um, true)
+ if md.nil?
+ raise REXML::ParseException.new("Unclosed comment", @source)
end
- if @source.match(/\A\s*\[/um, true)
+ if /--|-\z/.match?(md[1])
+ raise REXML::ParseException.new("Malformed comment", @source)
+ end
+ return [ :comment, md[1] ]
+ elsif @source.match("DOCTYPE", true)
+ base_error_message = "Malformed DOCTYPE"
+ unless @source.match(/\s+/um, true)
+ if @source.match(">")
+ message = "#{base_error_message}: name is missing"
+ else
+ message = "#{base_error_message}: invalid name"
+ end
+ @source.position = start_position
+ raise REXML::ParseException.new(message, @source)
+ end
+ @nsstack.unshift(Set.new)
+ name = parse_name(base_error_message)
+ if @source.match(/\s*\[/um, true)
+ id = [nil, nil, nil]
@document_status = :in_doctype
- elsif @source.match(/\A\s*>/um, true)
+ elsif @source.match(/\s*>/um, true)
+ id = [nil, nil, nil]
@document_status = :after_doctype
+ @source.ensure_buffer
else
- message = "#{base_error_message}: garbage after external ID"
- raise REXML::ParseException.new(message, @source)
+ id = parse_id(base_error_message,
+ accept_external_id: true,
+ accept_public_id: false)
+ if id[0] == "SYSTEM"
+ # For backward compatibility
+ id[1], id[2] = id[2], nil
+ end
+ if @source.match(/\s*\[/um, true)
+ @document_status = :in_doctype
+ elsif @source.match(/\s*>/um, true)
+ @document_status = :after_doctype
+ @source.ensure_buffer
+ else
+ message = "#{base_error_message}: garbage after external ID"
+ raise REXML::ParseException.new(message, @source)
+ end
end
- end
- args = [:start_doctype, name, *id]
- if @document_status == :after_doctype
- @source.match(/\A\s*/um, true)
- @stack << [ :end_doctype ]
- end
- return args
- when /\A\s+/
- else
- @document_status = :after_doctype
- if @source.encoding == "UTF-8"
- @source.buffer.force_encoding(::Encoding::UTF_8)
+ args = [:start_doctype, name, *id]
+ if @document_status == :after_doctype
+ @source.match(/\s*/um, true)
+ @stack << [ :end_doctype ]
+ end
+ return args
+ else
+ message = "Invalid XML"
+ raise REXML::ParseException.new(message, @source)
end
end
end
if @document_status == :in_doctype
- md = @source.match(/\A\s*(.*?>)/um)
- case md[1]
- when SYSTEMENTITY
- match = @source.match( SYSTEMENTITY, true )[1]
- return [ :externalentity, match ]
-
- when ELEMENTDECL_START
- return [ :elementdecl, @source.match( ELEMENTDECL_PATTERN, true )[1] ]
-
- when ENTITY_START
- match = @source.match( ENTITYDECL, true ).to_a.compact
- match[0] = :entitydecl
- ref = false
- if match[1] == '%'
- ref = true
- match.delete_at 1
- end
- # Now we have to sort out what kind of entity reference this is
- if match[2] == 'SYSTEM'
- # External reference
- match[3] = match[3][1..-2] # PUBID
- match.delete_at(4) if match.size > 4 # Chop out NDATA decl
- # match is [ :entity, name, SYSTEM, pubid(, ndata)? ]
- elsif match[2] == 'PUBLIC'
- # External reference
- match[3] = match[3][1..-2] # PUBID
- match[4] = match[4][1..-2] # HREF
- match.delete_at(5) if match.size > 5 # Chop out NDATA decl
- # match is [ :entity, name, PUBLIC, pubid, href(, ndata)? ]
- else
- match[2] = match[2][1..-2]
- match.pop if match.size == 4
- # match is [ :entity, name, value ]
- end
- match << '%' if ref
- return match
- when ATTLISTDECL_START
- md = @source.match( ATTLISTDECL_PATTERN, true )
- raise REXML::ParseException.new( "Bad ATTLIST declaration!", @source ) if md.nil?
- element = md[1]
- contents = md[0]
-
- pairs = {}
- values = md[0].scan( ATTDEF_RE )
- values.each do |attdef|
- unless attdef[3] == "#IMPLIED"
- attdef.compact!
- val = attdef[3]
- val = attdef[4] if val == "#FIXED "
- pairs[attdef[0]] = val
- if attdef[0] =~ /^xmlns:(.*)/
- @nsstack[0] << $1
- end
+ @source.match(/\s*/um, true) # skip spaces
+ start_position = @source.position
+ if @source.match("/um, true)
+ raise REXML::ParseException.new( "Bad ELEMENT declaration!", @source ) if md.nil?
+ return [ :elementdecl, "/um)
- message = "#{base_error_message}: name is missing"
+ match = [:entitydecl, *match_data.captures.compact]
+ ref = false
+ if match[1] == '%'
+ ref = true
+ match.delete_at 1
+ end
+ # Now we have to sort out what kind of entity reference this is
+ if match[2] == 'SYSTEM'
+ # External reference
+ match[3] = match[3][1..-2] # PUBID
+ match.delete_at(4) if match.size > 4 # Chop out NDATA decl
+ # match is [ :entity, name, SYSTEM, pubid(, ndata)? ]
+ elsif match[2] == 'PUBLIC'
+ # External reference
+ match[3] = match[3][1..-2] # PUBID
+ match[4] = match[4][1..-2] # HREF
+ match.delete_at(5) if match.size > 5 # Chop out NDATA decl
+ # match is [ :entity, name, PUBLIC, pubid, href(, ndata)? ]
else
- message = "#{base_error_message}: invalid declaration name"
+ match[2] = match[2][1..-2]
+ match.pop if match.size == 4
+ # match is [ :entity, name, value ]
end
- raise REXML::ParseException.new(message, @source)
- end
- name = parse_name(base_error_message)
- id = parse_id(base_error_message,
- accept_external_id: true,
- accept_public_id: true)
- unless @source.match(/\A\s*>/um, true)
- message = "#{base_error_message}: garbage before end >"
- raise REXML::ParseException.new(message, @source)
+ match << '%' if ref
+ return match
+ elsif @source.match("ATTLIST", true)
+ md = @source.match(Private::ATTLISTDECL_END, true)
+ raise REXML::ParseException.new( "Bad ATTLIST declaration!", @source ) if md.nil?
+ element = md[1]
+ contents = md[0]
+
+ pairs = {}
+ values = md[0].strip.scan( ATTDEF_RE )
+ values.each do |attdef|
+ unless attdef[3] == "#IMPLIED"
+ attdef.compact!
+ val = attdef[3]
+ val = attdef[4] if val == "#FIXED "
+ pairs[attdef[0]] = val
+ if attdef[0] =~ /^xmlns:(.*)/
+ @nsstack[0] << $1
+ end
+ end
+ end
+ return [ :attlistdecl, element, pairs, contents ]
+ elsif @source.match("NOTATION", true)
+ base_error_message = "Malformed notation declaration"
+ unless @source.match(/\s+/um, true)
+ if @source.match(">")
+ message = "#{base_error_message}: name is missing"
+ else
+ message = "#{base_error_message}: invalid name"
+ end
+ @source.position = start_position
+ raise REXML::ParseException.new(message, @source)
+ end
+ name = parse_name(base_error_message)
+ id = parse_id(base_error_message,
+ accept_external_id: true,
+ accept_public_id: true)
+ unless @source.match(/\s*>/um, true)
+ message = "#{base_error_message}: garbage before end >"
+ raise REXML::ParseException.new(message, @source)
+ end
+ return [:notationdecl, name, *id]
+ elsif md = @source.match(/--(.*?)-->/um, true)
+ case md[1]
+ when /--/, /-\z/
+ raise REXML::ParseException.new("Malformed comment", @source)
+ end
+ return [ :comment, md[1] ] if md
end
- return [:notationdecl, name, *id]
- when DOCTYPE_END
+ elsif match = @source.match(/(%.*?;)\s*/um, true)
+ return [ :externalentity, match[1] ]
+ elsif @source.match(/\]\s*>/um, true)
@document_status = :after_doctype
- @source.match( DOCTYPE_END, true )
return [ :end_doctype ]
end
+ if @document_status == :in_doctype
+ raise ParseException.new("Malformed DOCTYPE: invalid declaration", @source)
+ end
end
if @document_status == :after_doctype
- @source.match(/\A\s*/um, true)
+ @source.match(/\s*/um, true)
end
begin
- @source.read if @source.buffer.size<2
- if @source.buffer[0] == ?<
- if @source.buffer[1] == ?/
+ start_position = @source.position
+ if @source.match("<", true)
+ # :text's read_until may remain only "<" in buffer. In the
+ # case, buffer is empty here. So we need to fill buffer
+ # here explicitly.
+ @source.ensure_buffer
+ if @source.match("/", true)
@nsstack.shift
last_tag = @tags.pop
- md = @source.match( CLOSE_MATCH, true )
+ md = @source.match(Private::CLOSE_PATTERN, true)
if md and !last_tag
message = "Unexpected top-level end tag (got '#{md[1]}')"
raise REXML::ParseException.new(message, @source)
end
if md.nil? or last_tag != md[1]
message = "Missing end tag for '#{last_tag}'"
- message << " (got '#{md[1]}')" if md
+ message += " (got '#{md[1]}')" if md
+ @source.position = start_position if md.nil?
raise REXML::ParseException.new(message, @source)
end
return [ :end_element, last_tag ]
- elsif @source.buffer[1] == ?!
- md = @source.match(/\A(\s*[^>]*>)/um)
+ elsif @source.match("!", true)
+ md = @source.match(/([^>]*>)/um)
#STDERR.puts "SOURCE BUFFER = #{source.buffer}, #{source.buffer.size}"
raise REXML::ParseException.new("Malformed node", @source) unless md
- if md[0][2] == ?-
- md = @source.match( COMMENT_PATTERN, true )
+ if md[0][0] == ?-
+ md = @source.match(/--(.*?)-->/um, true)
- case md[1]
- when /--/, /-\z/
+ if md.nil? || /--|-\z/.match?(md[1])
raise REXML::ParseException.new("Malformed comment", @source)
end
- return [ :comment, md[1] ] if md
+ return [ :comment, md[1] ]
else
- md = @source.match( CDATA_PATTERN, true )
+ md = @source.match(/\[CDATA\[(.*?)\]\]>/um, true)
return [ :cdata, md[1] ] if md
end
raise REXML::ParseException.new( "Declarations can only occur "+
"in the doctype declaration.", @source)
- elsif @source.buffer[1] == ??
+ elsif @source.match("?", true)
return process_instruction
else
# Get the next tag
- md = @source.match(TAG_MATCH, true)
+ md = @source.match(Private::TAG_PATTERN, true)
unless md
+ @source.position = start_position
raise REXML::ParseException.new("malformed XML: missing tag start", @source)
end
+ tag = md[1]
@document_status = :in_element
- prefixes = Set.new
- prefixes << md[2] if md[2]
+ @prefixes.clear
+ @prefixes << md[2] if md[2]
@nsstack.unshift(curr_ns=Set.new)
- attributes, closed = parse_attributes(prefixes, curr_ns)
+ attributes, closed = parse_attributes(@prefixes, curr_ns)
# Verify that all of the prefixes have been defined
- for prefix in prefixes
+ for prefix in @prefixes
unless @nsstack.find{|k| k.member?(prefix)}
raise UndefinedNamespaceException.new(prefix,@source,self)
end
end
if closed
- @closed = md[1]
+ @closed = tag
@nsstack.shift
else
- @tags.push( md[1] )
+ if @tags.empty? and @have_root
+ raise ParseException.new("Malformed XML: Extra tag at the end of the document (got '<#{tag}')", @source)
+ end
+ @tags.push( tag )
end
- return [ :start_element, md[1], attributes ]
+ @have_root = true
+ return [ :start_element, tag, attributes ]
end
else
- md = @source.match( TEXT_PATTERN, true )
- if md[0].length == 0
- @source.match( /(\s+)/, true )
+ text = @source.read_until("<")
+ if text.chomp!("<")
+ @source.position -= "<".bytesize
+ end
+ if @tags.empty?
+ unless /\A\s*\z/.match?(text)
+ if @have_root
+ raise ParseException.new("Malformed XML: Extra content at the end of the document (got '#{text}')", @source)
+ else
+ raise ParseException.new("Malformed XML: Content at the start of the document (got '#{text}')", @source)
+ end
+ end
+ return pull_event if @have_root
end
- #STDERR.puts "GOT #{md[1].inspect}" unless md[0].length == 0
- #return [ :text, "" ] if md[0].length == 0
- # unnormalized = Text::unnormalize( md[1], self )
- # return PullEvent.new( :text, md[1], unnormalized )
- return [ :text, md[1] ]
+ return [ :text, text ]
end
rescue REXML::UndefinedNamespaceException
raise
@@ -438,7 +506,9 @@ def pull_event
def entity( reference, entities )
value = nil
value = entities[ reference ] if entities
- if not value
+ if value
+ record_entity_expansion
+ else
value = DEFAULT_ENTITIES[ reference ]
value = value[2] if value
end
@@ -463,35 +533,51 @@ def normalize( input, entities=nil, entity_filter=nil )
# Unescapes all possible entities
def unnormalize( string, entities=nil, filter=nil )
- rv = string.clone
- rv.gsub!( /\r\n?/, "\n" )
+ if string.include?("\r")
+ rv = string.gsub( Private::CARRIAGE_RETURN_NEWLINE_PATTERN, "\n" )
+ else
+ rv = string.dup
+ end
matches = rv.scan( REFERENCE_RE )
return rv if matches.size == 0
- rv.gsub!( /*((?:\d+)|(?:x[a-fA-F0-9]+));/ ) {
+ rv.gsub!( Private::CHARACTER_REFERENCES ) {
m=$1
m = "0#{m}" if m[0] == ?x
[Integer(m)].pack('U*')
}
matches.collect!{|x|x[0]}.compact!
if matches.size > 0
+ sum = 0
matches.each do |entity_reference|
unless filter and filter.include?(entity_reference)
entity_value = entity( entity_reference, entities )
if entity_value
- re = /{entity_reference};/
+ re = Private::DEFAULT_ENTITIES_PATTERNS[entity_reference] || /{entity_reference};/
rv.gsub!( re, entity_value )
+ sum += rv.bytesize
+ if sum > Security.entity_expansion_text_limit
+ raise "entity expansion has grown too large"
+ end
else
er = DEFAULT_ENTITIES[entity_reference]
rv.gsub!( er[0], er[2] ) if er
end
end
end
- rv.gsub!( /&/, '&' )
+ rv.gsub!( Private::DEFAULT_ENTITIES_PATTERNS['amp'], '&' )
end
rv
end
private
+
+ def record_entity_expansion
+ @entity_expansion_count += 1
+ if @entity_expansion_count > Security.entity_expansion_limit
+ raise "number of entity expansions exceeded, processing aborted."
+ end
+ end
+
def need_source_encoding_update?(xml_declaration_encoding)
return false if xml_declaration_encoding.nil?
return false if /\AUTF-16\z/i =~ xml_declaration_encoding
@@ -499,16 +585,16 @@ def need_source_encoding_update?(xml_declaration_encoding)
end
def parse_name(base_error_message)
- md = @source.match(/\A\s*#{NAME}/um, true)
+ md = @source.match(Private::NAME_PATTERN, true)
unless md
- if @source.match(/\A\s*\S/um)
+ if @source.match(/\S/um)
message = "#{base_error_message}: invalid name"
else
message = "#{base_error_message}: name is missing"
end
raise REXML::ParseException.new(message, @source)
end
- md[1]
+ md[0]
end
def parse_id(base_error_message,
@@ -578,96 +664,99 @@ def parse_id_invalid_details(accept_external_id:,
end
def process_instruction
- match_data = @source.match(INSTRUCTION_PATTERN, true)
- unless match_data
- message = "Invalid processing instruction node"
- raise REXML::ParseException.new(message, @source)
+ name = parse_name("Malformed XML: Invalid processing instruction node")
+ if @source.match(/\s+/um, true)
+ match_data = @source.match(/(.*?)\?>/um, true)
+ unless match_data
+ raise ParseException.new("Malformed XML: Unclosed processing instruction", @source)
+ end
+ content = match_data[1]
+ else
+ content = nil
+ unless @source.match("?>", true)
+ raise ParseException.new("Malformed XML: Unclosed processing instruction", @source)
+ end
+ end
+ if name == "xml"
+ if @document_status
+ raise ParseException.new("Malformed XML: XML declaration is not at the start", @source)
+ end
+ version = VERSION.match(content)
+ version = version[1] unless version.nil?
+ encoding = ENCODING.match(content)
+ encoding = encoding[1] unless encoding.nil?
+ if need_source_encoding_update?(encoding)
+ @source.encoding = encoding
+ end
+ if encoding.nil? and /\AUTF-16(?:BE|LE)\z/i =~ @source.encoding
+ encoding = "UTF-16"
+ end
+ standalone = STANDALONE.match(content)
+ standalone = standalone[1] unless standalone.nil?
+ return [ :xmldecl, version, encoding, standalone ]
end
- [:processing_instruction, match_data[1], match_data[2]]
+ [:processing_instruction, name, content]
end
def parse_attributes(prefixes, curr_ns)
attributes = {}
closed = false
- match_data = @source.match(/^(.*?)(\/)?>/um, true)
- if match_data.nil?
- message = "Start tag isn't ended"
- raise REXML::ParseException.new(message, @source)
- end
-
- raw_attributes = match_data[1]
- closed = !match_data[2].nil?
- return attributes, closed if raw_attributes.nil?
- return attributes, closed if raw_attributes.empty?
-
- scanner = StringScanner.new(raw_attributes)
- until scanner.eos?
- if scanner.scan(/\s+/)
- break if scanner.eos?
- end
-
- pos = scanner.pos
- loop do
- break if scanner.scan(ATTRIBUTE_PATTERN)
- unless scanner.scan(QNAME)
- message = "Invalid attribute name: <#{scanner.rest}>"
- raise REXML::ParseException.new(message, @source)
- end
- name = scanner[0]
- unless scanner.scan(/\s*=\s*/um)
+ while true
+ if @source.match(">", true)
+ return attributes, closed
+ elsif @source.match("/>", true)
+ closed = true
+ return attributes, closed
+ elsif match = @source.match(QNAME, true)
+ name = match[1]
+ prefix = match[2]
+ local_part = match[3]
+
+ unless @source.match(/\s*=\s*/um, true)
message = "Missing attribute equal: <#{name}>"
raise REXML::ParseException.new(message, @source)
end
- quote = scanner.scan(/['"]/)
- unless quote
+ unless match = @source.match(/(['"])/, true)
message = "Missing attribute value start quote: <#{name}>"
raise REXML::ParseException.new(message, @source)
end
- unless scanner.scan(/.*#{Regexp.escape(quote)}/um)
- match_data = @source.match(/^(.*?)(\/)?>/um, true)
- if match_data
- scanner << "/" if closed
- scanner << ">"
- scanner << match_data[1]
- scanner.pos = pos
- closed = !match_data[2].nil?
- next
- end
- message =
- "Missing attribute value end quote: <#{name}>: <#{quote}>"
+ quote = match[1]
+ start_position = @source.position
+ value = @source.read_until(quote)
+ unless value.chomp!(quote)
+ @source.position = start_position
+ message = "Missing attribute value end quote: <#{name}>: <#{quote}>"
raise REXML::ParseException.new(message, @source)
end
- end
- name = scanner[1]
- prefix = scanner[2]
- local_part = scanner[3]
- # quote = scanner[4]
- value = scanner[5]
- if prefix == "xmlns"
- if local_part == "xml"
- if value != "http://www.w3.org/XML/1998/namespace"
- msg = "The 'xml' prefix must not be bound to any other namespace "+
+ @source.match(/\s*/um, true)
+ if prefix == "xmlns"
+ if local_part == "xml"
+ if value != "http://www.w3.org/XML/1998/namespace"
+ msg = "The 'xml' prefix must not be bound to any other namespace "+
+ "(http://www.w3.org/TR/REC-xml-names/#ns-decl)"
+ raise REXML::ParseException.new( msg, @source, self )
+ end
+ elsif local_part == "xmlns"
+ msg = "The 'xmlns' prefix must not be declared "+
"(http://www.w3.org/TR/REC-xml-names/#ns-decl)"
- raise REXML::ParseException.new( msg, @source, self )
+ raise REXML::ParseException.new( msg, @source, self)
end
- elsif local_part == "xmlns"
- msg = "The 'xmlns' prefix must not be declared "+
- "(http://www.w3.org/TR/REC-xml-names/#ns-decl)"
- raise REXML::ParseException.new( msg, @source, self)
+ curr_ns << local_part
+ elsif prefix
+ prefixes << prefix unless prefix == "xml"
end
- curr_ns << local_part
- elsif prefix
- prefixes << prefix unless prefix == "xml"
- end
- if attributes.has_key?(name)
- msg = "Duplicate attribute #{name.inspect}"
- raise REXML::ParseException.new(msg, @source, self)
- end
+ if attributes[name]
+ msg = "Duplicate attribute #{name.inspect}"
+ raise REXML::ParseException.new(msg, @source, self)
+ end
- attributes[name] = value
+ attributes[name] = value
+ else
+ message = "Invalid attribute name: <#{@source.buffer.split(%r{[/>\s]}).first}>"
+ raise REXML::ParseException.new(message, @source)
+ end
end
- return attributes, closed
end
end
end
diff --git a/lib/rexml/parsers/pullparser.rb b/lib/rexml/parsers/pullparser.rb
index f8b232a2..36b45953 100644
--- a/lib/rexml/parsers/pullparser.rb
+++ b/lib/rexml/parsers/pullparser.rb
@@ -47,6 +47,10 @@ def add_listener( listener )
@listeners << listener
end
+ def entity_expansion_count
+ @parser.entity_expansion_count
+ end
+
def each
while has_next?
yield self.pull
diff --git a/lib/rexml/parsers/sax2parser.rb b/lib/rexml/parsers/sax2parser.rb
index 6a24ce22..cec9d2fc 100644
--- a/lib/rexml/parsers/sax2parser.rb
+++ b/lib/rexml/parsers/sax2parser.rb
@@ -22,6 +22,10 @@ def source
@parser.source
end
+ def entity_expansion_count
+ @parser.entity_expansion_count
+ end
+
def add_listener( listener )
@parser.add_listener( listener )
end
@@ -157,25 +161,8 @@ def parse
end
end
when :text
- #normalized = @parser.normalize( event[1] )
- #handle( :characters, normalized )
- copy = event[1].clone
-
- esub = proc { |match|
- if @entities.has_key?($1)
- @entities[$1].gsub(Text::REFERENCE, &esub)
- else
- match
- end
- }
-
- copy.gsub!( Text::REFERENCE, &esub )
- copy.gsub!( Text::NUMERICENTITY ) {|m|
- m=$1
- m = "0#{m}" if m[0] == ?x
- [Integer(m)].pack('U*')
- }
- handle( :characters, copy )
+ unnormalized = @parser.unnormalize( event[1], @entities )
+ handle( :characters, unnormalized )
when :entitydecl
handle_entitydecl( event )
when :processing_instruction, :comment, :attlistdecl,
diff --git a/lib/rexml/parsers/streamparser.rb b/lib/rexml/parsers/streamparser.rb
index 9e0eb0b3..fa3ac496 100644
--- a/lib/rexml/parsers/streamparser.rb
+++ b/lib/rexml/parsers/streamparser.rb
@@ -36,8 +36,8 @@ def parse
@listener.tag_end( event[1] )
@tag_stack.pop
when :text
- normalized = @parser.unnormalize( event[1] )
- @listener.text( normalized )
+ unnormalized = @parser.unnormalize( event[1] )
+ @listener.text( unnormalized )
when :processing_instruction
@listener.instruction( *event[1,2] )
when :start_doctype
diff --git a/lib/rexml/parsers/treeparser.rb b/lib/rexml/parsers/treeparser.rb
index bf9a4254..0cb6f7cc 100644
--- a/lib/rexml/parsers/treeparser.rb
+++ b/lib/rexml/parsers/treeparser.rb
@@ -16,7 +16,6 @@ def add_listener( listener )
def parse
tag_stack = []
- in_doctype = false
entities = nil
begin
while true
@@ -39,17 +38,15 @@ def parse
tag_stack.pop
@build_context = @build_context.parent
when :text
- if not in_doctype
- if @build_context[-1].instance_of? Text
- @build_context[-1] << event[1]
- else
- @build_context.add(
- Text.new(event[1], @build_context.whitespace, nil, true)
- ) unless (
- @build_context.ignore_whitespace_nodes and
- event[1].strip.size==0
- )
- end
+ if @build_context[-1].instance_of? Text
+ @build_context[-1] << event[1]
+ else
+ @build_context.add(
+ Text.new(event[1], @build_context.whitespace, nil, true)
+ ) unless (
+ @build_context.ignore_whitespace_nodes and
+ event[1].strip.size==0
+ )
end
when :comment
c = Comment.new( event[1] )
@@ -60,14 +57,12 @@ def parse
when :processing_instruction
@build_context.add( Instruction.new( event[1], event[2] ) )
when :end_doctype
- in_doctype = false
entities.each { |k,v| entities[k] = @build_context.entities[k].value }
@build_context = @build_context.parent
when :start_doctype
doctype = DocType.new( event[1..-1], @build_context )
@build_context = doctype
entities = {}
- in_doctype = true
when :attlistdecl
n = AttlistDecl.new( event[1..-1] )
@build_context.add( n )
diff --git a/lib/rexml/parsers/xpathparser.rb b/lib/rexml/parsers/xpathparser.rb
index d92678fe..bd3b6856 100644
--- a/lib/rexml/parsers/xpathparser.rb
+++ b/lib/rexml/parsers/xpathparser.rb
@@ -1,4 +1,5 @@
# frozen_string_literal: false
+
require_relative '../namespace'
require_relative '../xmltokens'
@@ -38,108 +39,143 @@ def predicate path
parsed
end
- def abbreviate( path )
- path = path.kind_of?(String) ? parse( path ) : path
- string = ""
- document = false
- while path.size > 0
- op = path.shift
+ def abbreviate(path_or_parsed)
+ if path_or_parsed.kind_of?(String)
+ parsed = parse(path_or_parsed)
+ else
+ parsed = path_or_parsed
+ end
+ components = []
+ component = nil
+ while parsed.size > 0
+ op = parsed.shift
case op
when :node
+ component << "node()"
when :attribute
- string << "/" if string.size > 0
- string << "@"
+ component = "@"
+ components << component
when :child
- string << "/" if string.size > 0
+ component = ""
+ components << component
when :descendant_or_self
- string << "/"
+ next_op = parsed[0]
+ if next_op == :node
+ parsed.shift
+ component = ""
+ components << component
+ else
+ component = "descendant-or-self::"
+ components << component
+ end
when :self
- string << "."
+ next_op = parsed[0]
+ if next_op == :node
+ parsed.shift
+ components << "."
+ else
+ component = "self::"
+ components << component
+ end
when :parent
- string << ".."
+ next_op = parsed[0]
+ if next_op == :node
+ parsed.shift
+ components << ".."
+ else
+ component = "parent::"
+ components << component
+ end
when :any
- string << "*"
+ component << "*"
when :text
- string << "text()"
+ component << "text()"
when :following, :following_sibling,
:ancestor, :ancestor_or_self, :descendant,
:namespace, :preceding, :preceding_sibling
- string << "/" unless string.size == 0
- string << op.to_s.tr("_", "-")
- string << "::"
+ component = op.to_s.tr("_", "-") << "::"
+ components << component
when :qname
- prefix = path.shift
- name = path.shift
- string << prefix+":" if prefix.size > 0
- string << name
+ prefix = parsed.shift
+ name = parsed.shift
+ component << prefix+":" if prefix.size > 0
+ component << name
when :predicate
- string << '['
- string << predicate_to_string( path.shift ) {|x| abbreviate( x ) }
- string << ']'
+ component << '['
+ component << predicate_to_path(parsed.shift) {|x| abbreviate(x)}
+ component << ']'
when :document
- document = true
+ components << ""
when :function
- string << path.shift
- string << "( "
- string << predicate_to_string( path.shift[0] ) {|x| abbreviate( x )}
- string << " )"
+ component << parsed.shift
+ component << "( "
+ component << predicate_to_path(parsed.shift[0]) {|x| abbreviate(x)}
+ component << " )"
when :literal
- string << %Q{ "#{path.shift}" }
+ component << quote_literal(parsed.shift)
else
- string << "/" unless string.size == 0
- string << "UNKNOWN("
- string << op.inspect
- string << ")"
+ component << "UNKNOWN("
+ component << op.inspect
+ component << ")"
end
end
- string = "/"+string if document
- return string
+ case components
+ when [""]
+ "/"
+ when ["", ""]
+ "//"
+ else
+ components.join("/")
+ end
end
- def expand( path )
- path = path.kind_of?(String) ? parse( path ) : path
- string = ""
+ def expand(path_or_parsed)
+ if path_or_parsed.kind_of?(String)
+ parsed = parse(path_or_parsed)
+ else
+ parsed = path_or_parsed
+ end
+ path = ""
document = false
- while path.size > 0
- op = path.shift
+ while parsed.size > 0
+ op = parsed.shift
case op
when :node
- string << "node()"
+ path << "node()"
when :attribute, :child, :following, :following_sibling,
:ancestor, :ancestor_or_self, :descendant, :descendant_or_self,
:namespace, :preceding, :preceding_sibling, :self, :parent
- string << "/" unless string.size == 0
- string << op.to_s.tr("_", "-")
- string << "::"
+ path << "/" unless path.size == 0
+ path << op.to_s.tr("_", "-")
+ path << "::"
when :any
- string << "*"
+ path << "*"
when :qname
- prefix = path.shift
- name = path.shift
- string << prefix+":" if prefix.size > 0
- string << name
+ prefix = parsed.shift
+ name = parsed.shift
+ path << prefix+":" if prefix.size > 0
+ path << name
when :predicate
- string << '['
- string << predicate_to_string( path.shift ) { |x| expand(x) }
- string << ']'
+ path << '['
+ path << predicate_to_path( parsed.shift ) { |x| expand(x) }
+ path << ']'
when :document
document = true
else
- string << "/" unless string.size == 0
- string << "UNKNOWN("
- string << op.inspect
- string << ")"
+ path << "UNKNOWN("
+ path << op.inspect
+ path << ")"
end
end
- string = "/"+string if document
- return string
+ path = "/"+path if document
+ path
end
- def predicate_to_string( path, &block )
- string = ""
- case path[0]
+ def predicate_to_path(parsed, &block)
+ path = ""
+ case parsed[0]
when :and, :or, :mult, :plus, :minus, :neq, :eq, :lt, :gt, :lteq, :gteq, :div, :mod, :union
- op = path.shift
+ op = parsed.shift
case op
when :eq
op = "="
@@ -156,36 +192,50 @@ def predicate_to_string( path, &block )
when :union
op = "|"
end
- left = predicate_to_string( path.shift, &block )
- right = predicate_to_string( path.shift, &block )
- string << " "
- string << left
- string << " "
- string << op.to_s
- string << " "
- string << right
- string << " "
+ left = predicate_to_path( parsed.shift, &block )
+ right = predicate_to_path( parsed.shift, &block )
+ path << left
+ path << " "
+ path << op.to_s
+ path << " "
+ path << right
when :function
- path.shift
- name = path.shift
- string << name
- string << "( "
- string << predicate_to_string( path.shift, &block )
- string << " )"
+ parsed.shift
+ name = parsed.shift
+ path << name
+ path << "("
+ parsed.shift.each_with_index do |argument, i|
+ path << ", " if i > 0
+ path << predicate_to_path(argument, &block)
+ end
+ path << ")"
when :literal
- path.shift
- string << " "
- string << path.shift.inspect
- string << " "
+ parsed.shift
+ path << quote_literal(parsed.shift)
else
- string << " "
- string << yield( path )
- string << " "
+ path << yield( parsed )
end
- return string.squeeze(" ")
+ return path.squeeze(" ")
end
+ # For backward compatibility
+ alias_method :preciate_to_string, :predicate_to_path
private
+ def quote_literal( literal )
+ case literal
+ when String
+ # XPath 1.0 does not support escape characters.
+ # Assumes literal does not contain both single and double quotes.
+ if literal.include?("'")
+ "\"#{literal}\""
+ else
+ "'#{literal}'"
+ end
+ else
+ literal.inspect
+ end
+ end
+
#LocationPath
# | RelativeLocationPath
# | '/' RelativeLocationPath?
diff --git a/lib/rexml/rexml.rb b/lib/rexml/rexml.rb
index 8a01f0e1..39e92a57 100644
--- a/lib/rexml/rexml.rb
+++ b/lib/rexml/rexml.rb
@@ -26,10 +26,12 @@
# - REXML::Document.
# - REXML::Element.
#
+# There's also an {REXML tutorial}[doc/rexml/tutorial_rdoc.html].
+#
module REXML
COPYRIGHT = "Copyright © 2001-2008 Sean Russell "
DATE = "2008/019"
- VERSION = "3.2.5"
+ VERSION = "3.3.3"
REVISION = ""
Copyright = COPYRIGHT
diff --git a/lib/rexml/source.rb b/lib/rexml/source.rb
index 90b370b9..ff887fc0 100644
--- a/lib/rexml/source.rb
+++ b/lib/rexml/source.rb
@@ -1,8 +1,28 @@
# coding: US-ASCII
# frozen_string_literal: false
+
+require "strscan"
+
require_relative 'encoding'
module REXML
+ if StringScanner::Version < "1.0.0"
+ module StringScannerCheckScanString
+ refine StringScanner do
+ def check(pattern)
+ pattern = /#{Regexp.escape(pattern)}/ if pattern.is_a?(String)
+ super(pattern)
+ end
+
+ def scan(pattern)
+ pattern = /#{Regexp.escape(pattern)}/ if pattern.is_a?(String)
+ super(pattern)
+ end
+ end
+ end
+ using StringScannerCheckScanString
+ end
+
# Generates Source-s. USE THIS CLASS.
class SourceFactory
# Generates a Source object
@@ -30,18 +50,27 @@ def SourceFactory::create_from(arg)
# objects and provides consumption of text
class Source
include Encoding
- # The current buffer (what we're going to read next)
- attr_reader :buffer
# The line number of the last consumed text
attr_reader :line
attr_reader :encoding
+ module Private
+ SCANNER_RESET_SIZE = 100000
+ PRE_DEFINED_TERM_PATTERNS = {}
+ pre_defined_terms = ["'", '"', "<"]
+ pre_defined_terms.each do |term|
+ PRE_DEFINED_TERM_PATTERNS[term] = /#{Regexp.escape(term)}/
+ end
+ end
+ private_constant :Private
+
# Constructor
# @param arg must be a String, and should be a valid XML document
# @param encoding if non-null, sets the encoding of the source to this
# value, overriding all encoding detection
def initialize(arg, encoding=nil)
- @orig = @buffer = arg
+ @orig = arg
+ @scanner = StringScanner.new(@orig)
if encoding
self.encoding = encoding
else
@@ -50,6 +79,20 @@ def initialize(arg, encoding=nil)
@line = 0
end
+ # The current buffer (what we're going to read next)
+ def buffer
+ @scanner.rest
+ end
+
+ def drop_parsed_content
+ if @scanner.pos > Private::SCANNER_RESET_SIZE
+ @scanner.string = @scanner.rest
+ end
+ end
+
+ def buffer_encoding=(encoding)
+ @scanner.string.force_encoding(encoding)
+ end
# Inherited from Encoding
# Overridden to support optimized en/decoding
@@ -58,98 +101,78 @@ def encoding=(enc)
encoding_updated
end
- # Scans the source for a given pattern. Note, that this is not your
- # usual scan() method. For one thing, the pattern argument has some
- # requirements; for another, the source can be consumed. You can easily
- # confuse this method. Originally, the patterns were easier
- # to construct and this method more robust, because this method
- # generated search regexps on the fly; however, this was
- # computationally expensive and slowed down the entire REXML package
- # considerably, since this is by far the most commonly called method.
- # @param pattern must be a Regexp, and must be in the form of
- # /^\s*(#{your pattern, with no groups})(.*)/. The first group
- # will be returned; the second group is used if the consume flag is
- # set.
- # @param consume if true, the pattern returned will be consumed, leaving
- # everything after it in the Source.
- # @return the pattern, if found, or nil if the Source is empty or the
- # pattern is not found.
- def scan(pattern, cons=false)
- return nil if @buffer.nil?
- rv = @buffer.scan(pattern)
- @buffer = $' if cons and rv.size>0
- rv
+ def read(term = nil)
end
- def read
+ def read_until(term)
+ pattern = Private::PRE_DEFINED_TERM_PATTERNS[term] || /#{Regexp.escape(term)}/
+ data = @scanner.scan_until(pattern)
+ unless data
+ data = @scanner.rest
+ @scanner.pos = @scanner.string.bytesize
+ end
+ data
end
- def consume( pattern )
- @buffer = $' if pattern.match( @buffer )
+ def ensure_buffer
end
- def match_to( char, pattern )
- return pattern.match(@buffer)
+ def match(pattern, cons=false)
+ if cons
+ @scanner.scan(pattern).nil? ? nil : @scanner
+ else
+ @scanner.check(pattern).nil? ? nil : @scanner
+ end
end
- def match_to_consume( char, pattern )
- md = pattern.match(@buffer)
- @buffer = $'
- return md
+ def position
+ @scanner.pos
end
- def match(pattern, cons=false)
- md = pattern.match(@buffer)
- @buffer = $' if cons and md
- return md
+ def position=(pos)
+ @scanner.pos = pos
end
# @return true if the Source is exhausted
def empty?
- @buffer == ""
- end
-
- def position
- @orig.index( @buffer )
+ @scanner.eos?
end
# @return the current line in the source
def current_line
lines = @orig.split
- res = lines.grep @buffer[0..30]
+ res = lines.grep @scanner.rest[0..30]
res = res[-1] if res.kind_of? Array
lines.index( res ) if res
end
private
+
def detect_encoding
- buffer_encoding = @buffer.encoding
+ scanner_encoding = @scanner.rest.encoding
detected_encoding = "UTF-8"
begin
- @buffer.force_encoding("ASCII-8BIT")
- if @buffer[0, 2] == "\xfe\xff"
- @buffer[0, 2] = ""
+ @scanner.string.force_encoding("ASCII-8BIT")
+ if @scanner.scan(/\xfe\xff/n)
detected_encoding = "UTF-16BE"
- elsif @buffer[0, 2] == "\xff\xfe"
- @buffer[0, 2] = ""
+ elsif @scanner.scan(/\xff\xfe/n)
detected_encoding = "UTF-16LE"
- elsif @buffer[0, 3] == "\xef\xbb\xbf"
- @buffer[0, 3] = ""
+ elsif @scanner.scan(/\xef\xbb\xbf/n)
detected_encoding = "UTF-8"
end
ensure
- @buffer.force_encoding(buffer_encoding)
+ @scanner.string.force_encoding(scanner_encoding)
end
self.encoding = detected_encoding
end
def encoding_updated
if @encoding != 'UTF-8'
- @buffer = decode(@buffer)
+ @scanner.string = decode(@scanner.rest)
@to_utf = true
else
@to_utf = false
- @buffer.force_encoding ::Encoding::UTF_8
+ @scanner.string.force_encoding(::Encoding::UTF_8)
end
end
end
@@ -172,7 +195,7 @@ def initialize(arg, block_size=500, encoding=nil)
end
if !@to_utf and
- @buffer.respond_to?(:force_encoding) and
+ @orig.respond_to?(:force_encoding) and
@source.respond_to?(:external_encoding) and
@source.external_encoding != ::Encoding::UTF_8
@force_utf8 = true
@@ -181,65 +204,72 @@ def initialize(arg, block_size=500, encoding=nil)
end
end
- def scan(pattern, cons=false)
- rv = super
- # You'll notice that this next section is very similar to the same
- # section in match(), but just a liiittle different. This is
- # because it is a touch faster to do it this way with scan()
- # than the way match() does it; enough faster to warrant duplicating
- # some code
- if rv.size == 0
- until @buffer =~ pattern or @source.nil?
- begin
- @buffer << readline
- rescue Iconv::IllegalSequence
- raise
- rescue
- @source = nil
+ def read(term = nil, min_bytes = 1)
+ term = encode(term) if term
+ begin
+ str = readline(term)
+ @scanner << str
+ read_bytes = str.bytesize
+ begin
+ while read_bytes < min_bytes
+ str = readline(term)
+ @scanner << str
+ read_bytes += str.bytesize
end
+ rescue IOError
end
- rv = super
+ true
+ rescue Exception, NameError
+ @source = nil
+ false
end
- rv.taint if RUBY_VERSION < '2.7'
- rv
end
- def read
- begin
- @buffer << readline
- rescue Exception, NameError
- @source = nil
+ def read_until(term)
+ pattern = Private::PRE_DEFINED_TERM_PATTERNS[term] || /#{Regexp.escape(term)}/
+ term = encode(term)
+ until str = @scanner.scan_until(pattern)
+ break if @source.nil?
+ break if @source.eof?
+ @scanner << readline(term)
+ end
+ if str
+ read if @scanner.eos? and !@source.eof?
+ str
+ else
+ rest = @scanner.rest
+ @scanner.pos = @scanner.string.bytesize
+ rest
end
end
- def consume( pattern )
- match( pattern, true )
+ def ensure_buffer
+ read if @scanner.eos? && @source
end
def match( pattern, cons=false )
- rv = pattern.match(@buffer)
- @buffer = $' if cons and rv
- while !rv and @source
- begin
- @buffer << readline
- rv = pattern.match(@buffer)
- @buffer = $' if cons and rv
- rescue
- @source = nil
+ # To avoid performance issue, we need to increase bytes to read per scan
+ min_bytes = 1
+ while true
+ if cons
+ md = @scanner.scan(pattern)
+ else
+ md = @scanner.check(pattern)
end
+ break if md
+ return nil if pattern.is_a?(String)
+ return nil if @source.nil?
+ return nil unless read(nil, min_bytes)
+ min_bytes *= 2
end
- rv.taint if RUBY_VERSION < '2.7'
- rv
+
+ md.nil? ? nil : @scanner
end
def empty?
super and ( @source.nil? || @source.eof? )
end
- def position
- @er_source.pos rescue 0
- end
-
# @return the current line in the source
def current_line
begin
@@ -263,8 +293,8 @@ def current_line
end
private
- def readline
- str = @source.readline(@line_break)
+ def readline(term = nil)
+ str = @source.readline(term || @line_break)
if @pending_buffer
if str.nil?
str = @pending_buffer
@@ -290,7 +320,7 @@ def encoding_updated
@source.set_encoding(@encoding, @encoding)
end
@line_break = encode(">")
- @pending_buffer, @buffer = @buffer, ""
+ @pending_buffer, @scanner.string = @scanner.rest, ""
@pending_buffer.force_encoding(@encoding)
super
end
diff --git a/lib/rexml/text.rb b/lib/rexml/text.rb
index 050b09c9..7e0befe9 100644
--- a/lib/rexml/text.rb
+++ b/lib/rexml/text.rb
@@ -1,4 +1,4 @@
-# frozen_string_literal: false
+# frozen_string_literal: true
require_relative 'security'
require_relative 'entity'
require_relative 'doctype'
@@ -131,7 +131,7 @@ def parent= parent
def Text.check string, pattern, doctype
# illegal anywhere
- if string !~ VALID_XML_CHARS
+ if !string.match?(VALID_XML_CHARS)
if String.method_defined? :encode
string.chars.each do |c|
case c.ord
@@ -151,25 +151,45 @@ def Text.check string, pattern, doctype
end
end
- # context sensitive
- string.scan(pattern) do
- if $1[-1] != ?;
- raise "Illegal character #{$1.inspect} in raw string #{string.inspect}"
- elsif $1[0] == ?&
- if $5 and $5[0] == ?#
- case ($5[1] == ?x ? $5[2..-1].to_i(16) : $5[1..-1].to_i)
- when *VALID_CHAR
+ pos = 0
+ while (index = string.index(/<|&/, pos))
+ if string[index] == "<"
+ raise "Illegal character \"#{string[index]}\" in raw string #{string.inspect}"
+ end
+
+ unless (end_index = string.index(/[^\s];/, index + 1))
+ raise "Illegal character \"#{string[index]}\" in raw string #{string.inspect}"
+ end
+
+ value = string[(index + 1)..end_index]
+ if /\s/.match?(value)
+ raise "Illegal character \"#{string[index]}\" in raw string #{string.inspect}"
+ end
+
+ if value[0] == "#"
+ character_reference = value[1..-1]
+
+ unless (/\A(\d+|x[0-9a-fA-F]+)\z/.match?(character_reference))
+ if character_reference[0] == "x" || character_reference[-1] == "x"
+ raise "Illegal character \"#{string[index]}\" in raw string #{string.inspect}"
else
- raise "Illegal character #{$1.inspect} in raw string #{string.inspect}"
+ raise "Illegal character #{string.inspect} in raw string #{string.inspect}"
end
- # FIXME: below can't work but this needs API change.
- # elsif @parent and $3 and !SUBSTITUTES.include?($1)
- # if !doctype or !doctype.entities.has_key?($3)
- # raise "Undeclared entity '#{$1}' in raw string \"#{string}\""
- # end
end
+
+ case (character_reference[0] == "x" ? character_reference[1..-1].to_i(16) : character_reference[0..-1].to_i)
+ when *VALID_CHAR
+ else
+ raise "Illegal character #{string.inspect} in raw string #{string.inspect}"
+ end
+ elsif !(/\A#{Entity::NAME}\z/um.match?(value))
+ raise "Illegal character \"#{string[index]}\" in raw string #{string.inspect}"
end
+
+ pos = end_index + 1
end
+
+ string
end
def node_type
@@ -371,7 +391,7 @@ def Text::normalize( input, doctype=nil, entity_filter=nil )
copy = input.to_s
# Doing it like this rather than in a loop improves the speed
#copy = copy.gsub( EREFERENCE, '&' )
- copy = copy.gsub( "&", "&" )
+ copy = copy.gsub( "&", "&" ) if copy.include?("&")
if doctype
# Replace all ampersands that aren't part of an entity
doctype.entities.each_value do |entity|
@@ -382,7 +402,9 @@ def Text::normalize( input, doctype=nil, entity_filter=nil )
else
# Replace all ampersands that aren't part of an entity
DocType::DEFAULT_ENTITIES.each_value do |entity|
- copy = copy.gsub(entity.value, "{entity.name};" )
+ if copy.include?(entity.value)
+ copy = copy.gsub(entity.value, "{entity.name};" )
+ end
end
end
copy
diff --git a/lib/rexml/xpath_parser.rb b/lib/rexml/xpath_parser.rb
index d8b88e7a..5eb1e5a9 100644
--- a/lib/rexml/xpath_parser.rb
+++ b/lib/rexml/xpath_parser.rb
@@ -590,6 +590,7 @@ def filter_nodeset(nodeset)
def evaluate_predicate(expression, nodesets)
enter(:predicate, expression, nodesets) if @debug
+ new_nodeset_count = 0
new_nodesets = nodesets.collect do |nodeset|
new_nodeset = []
subcontext = { :size => nodeset.size }
@@ -606,17 +607,20 @@ def evaluate_predicate(expression, nodesets)
result = result[0] if result.kind_of? Array and result.length == 1
if result.kind_of? Numeric
if result == node.position
- new_nodeset << XPathNode.new(node, position: new_nodeset.size + 1)
+ new_nodeset_count += 1
+ new_nodeset << XPathNode.new(node, position: new_nodeset_count)
end
elsif result.instance_of? Array
if result.size > 0 and result.inject(false) {|k,s| s or k}
if result.size > 0
- new_nodeset << XPathNode.new(node, position: new_nodeset.size + 1)
+ new_nodeset_count += 1
+ new_nodeset << XPathNode.new(node, position: new_nodeset_count)
end
end
else
if result
- new_nodeset << XPathNode.new(node, position: new_nodeset.size + 1)
+ new_nodeset_count += 1
+ new_nodeset << XPathNode.new(node, position: new_nodeset_count)
end
end
end
diff --git a/rexml.gemspec b/rexml.gemspec
index 620a8981..0de3e845 100644
--- a/rexml.gemspec
+++ b/rexml.gemspec
@@ -16,6 +16,10 @@ Gem::Specification.new do |spec|
spec.homepage = "https://github.com/ruby/rexml"
spec.license = "BSD-2-Clause"
+ spec.metadata = {
+ "changelog_uri" => "#{spec.homepage}/releases/tag/v#{spec.version}"
+ }
+
files = [
"LICENSE.txt",
"NEWS.md",
@@ -52,10 +56,8 @@ Gem::Specification.new do |spec|
spec.files = files
spec.rdoc_options.concat(["--main", "README.md"])
spec.extra_rdoc_files = rdoc_files
- spec.bindir = "exe"
- spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
- spec.add_development_dependency "bundler"
- spec.add_development_dependency "rake"
- spec.add_development_dependency "test-unit"
+ spec.required_ruby_version = '>= 2.5.0'
+
+ spec.add_runtime_dependency("strscan")
end
diff --git a/test/data/much_ado.xml b/test/data/much_ado.xml
index f008fadb..0040088c 100644
--- a/test/data/much_ado.xml
+++ b/test/data/much_ado.xml
@@ -4735,7 +4735,7 @@ CLAUDIO, BENEDICK, HERO, BEATRICE, and Attendants
But they shall find, awaked in such a kind,
Both strength of limb and policy of mind,
Ability in means and choice of friends,
-To quit me of them throughly.
+To quit me of them thoroughly.
diff --git a/test/data/ofbiz-issues-full-177.xml b/test/data/ofbiz-issues-full-177.xml
index bfff771d..e1f7bdfd 100644
--- a/test/data/ofbiz-issues-full-177.xml
+++ b/test/data/ofbiz-issues-full-177.xml
@@ -152,8 +152,8 @@
-
-
+
+
diff --git a/test/data/test/tests.xml b/test/data/test/tests.xml
index cf03b42b..fd415679 100644
--- a/test/data/test/tests.xml
+++ b/test/data/test/tests.xml
@@ -299,7 +299,7 @@
-
+
web-app
web-app
web-app
@@ -318,7 +318,7 @@
-
+
web-app
web-app
web-app
diff --git a/test/data/tutorial.xml b/test/data/tutorial.xml
index bf5783d0..9c4639b9 100644
--- a/test/data/tutorial.xml
+++ b/test/data/tutorial.xml
@@ -286,7 +286,7 @@ el1 << Text.new(" cruel world")
strings.
I can't emphasize this enough, because people do have problems with
- this. REXML can't possibly alway guess correctly how your text is
+ this. REXML can't possibly always guess correctly how your text is
encoded, so it always assumes the text is UTF-8. It also does not warn
you when you try to add text which isn't properly encoded, for the
same reason. You must make sure that you are adding UTF-8 text.
diff --git a/test/formatter/test_default.rb b/test/formatter/test_default.rb
index 321d8180..aa403dbe 100644
--- a/test/formatter/test_default.rb
+++ b/test/formatter/test_default.rb
@@ -2,7 +2,7 @@ module REXMLTests
class DefaultFormatterTest < Test::Unit::TestCase
def format(node)
formatter = REXML::Formatters::Default.new
- output = ""
+ output = +""
formatter.write(node, output)
output
end
diff --git a/test/functions/test_base.rb b/test/functions/test_base.rb
index 74dc1a31..daa38156 100644
--- a/test/functions/test_base.rb
+++ b/test/functions/test_base.rb
@@ -229,8 +229,30 @@ def test_normalize_space
assert_equal( [REXML::Comment.new("COMMENT A")], m )
end
+ def test_normalize_space_strings
+ source = <<-XML
+breakfast boosts\t\t
+
+concentration
+Coffee beans
+ aroma
+
+
+
+ Dessert
+ \t\t after dinner
+ XML
+ normalized_texts = REXML::XPath.each(REXML::Document.new(source), "normalize-space(//text())").to_a
+ assert_equal([
+ "breakfast boosts concentration",
+ "Coffee beans aroma",
+ "Dessert after dinner",
+ ],
+ normalized_texts)
+ end
+
def test_string_nil_without_context
- doc = REXML::Document.new(<<-XML)
+ doc = REXML::Document.new(<<~XML)
diff --git a/test/parse/test_attribute_list_declaration.rb b/test/parse/test_attribute_list_declaration.rb
new file mode 100644
index 00000000..43882528
--- /dev/null
+++ b/test/parse/test_attribute_list_declaration.rb
@@ -0,0 +1,30 @@
+require "test/unit"
+require "core_assertions"
+
+require "rexml/document"
+
+module REXMLTests
+ class TestParseAttributeListDeclaration < Test::Unit::TestCase
+ include Test::Unit::CoreAssertions
+
+ def test_linear_performance_space
+ seq = [10000, 50000, 100000, 150000, 200000]
+ assert_linear_performance(seq, rehearsal: 10) do |n|
+ REXML::Document.new("]>")
+ end
+ end
+
+ def test_linear_performance_tab_and_gt
+ seq = [10000, 50000, 100000, 150000, 200000]
+ assert_linear_performance(seq, rehearsal: 10) do |n|
+ REXML::Document.new("" * n +
+ "\">]>")
+ end
+ end
+ end
+end
diff --git a/test/parse/test_cdata.rb b/test/parse/test_cdata.rb
new file mode 100644
index 00000000..b5f1a3bc
--- /dev/null
+++ b/test/parse/test_cdata.rb
@@ -0,0 +1,17 @@
+require "test/unit"
+require "core_assertions"
+
+require "rexml/document"
+
+module REXMLTests
+ class TestParseCData < Test::Unit::TestCase
+ include Test::Unit::CoreAssertions
+
+ def test_linear_performance_gt
+ seq = [10000, 50000, 100000, 150000, 200000]
+ assert_linear_performance(seq, rehearsal: 10) do |n|
+ REXML::Document.new('" * n + ' ]]>')
+ end
+ end
+ end
+end
diff --git a/test/parse/test_character_reference.rb b/test/parse/test_character_reference.rb
new file mode 100644
index 00000000..bf8d2190
--- /dev/null
+++ b/test/parse/test_character_reference.rb
@@ -0,0 +1,17 @@
+require "test/unit"
+require "core_assertions"
+
+require "rexml/document"
+
+module REXMLTests
+ class TestParseCharacterReference < Test::Unit::TestCase
+ include Test::Unit::CoreAssertions
+
+ def test_linear_performance_many_preceding_zeros
+ seq = [10000, 50000, 100000, 150000, 200000]
+ assert_linear_performance(seq, rehearsal: 10) do |n|
+ REXML::Document.new('')
+ end
+ end
+ end
+end
diff --git a/test/parse/test_comment.rb b/test/parse/test_comment.rb
new file mode 100644
index 00000000..4475dca7
--- /dev/null
+++ b/test/parse/test_comment.rb
@@ -0,0 +1,151 @@
+require "test/unit"
+require "core_assertions"
+
+require "rexml/document"
+
+module REXMLTests
+ class TestParseComment < Test::Unit::TestCase
+ include Test::Unit::CoreAssertions
+
+ def parse(xml)
+ REXML::Document.new(xml)
+ end
+
+ class TestInvalid < self
+ def test_toplevel_unclosed_comment
+ exception = assert_raise(REXML::ParseException) do
+ parse("")
+ end
+ assert_equal(<<~DETAIL, exception.to_s)
+ Malformed comment
+ Line: 1
+ Position: 11
+ Last 80 unconsumed characters:
+ DETAIL
+ end
+
+ def test_toplevel_malformed_comment_end
+ exception = assert_raise(REXML::ParseException) do
+ parse("")
+ end
+ assert_equal(<<~DETAIL, exception.to_s)
+ Malformed comment
+ Line: 1
+ Position: 9
+ Last 80 unconsumed characters:
+ DETAIL
+ end
+
+ def test_doctype_malformed_comment_inner
+ exception = assert_raise(REXML::ParseException) do
+ parse("")
+ end
+ assert_equal(<<~DETAIL, exception.to_s)
+ Malformed comment
+ Line: 1
+ Position: 26
+ Last 80 unconsumed characters:
+ DETAIL
+ end
+
+ def test_doctype_malformed_comment_end
+ exception = assert_raise(REXML::ParseException) do
+ parse("")
+ end
+ assert_equal(<<~DETAIL, exception.to_s)
+ Malformed comment
+ Line: 1
+ Position: 24
+ Last 80 unconsumed characters:
+ DETAIL
+ end
+
+ def test_after_doctype_malformed_comment_short
+ exception = assert_raise(REXML::ParseException) do
+ parse("")
+ end
+ assert_equal(<<~DETAIL.chomp, exception.to_s)
+ Malformed comment
+ Line: 1
+ Position: 8
+ Last 80 unconsumed characters:
+ -->
+ DETAIL
+ end
+
+ def test_after_doctype_malformed_comment_inner
+ exception = assert_raise(REXML::ParseException) do
+ parse("")
+ end
+ assert_equal(<<~DETAIL, exception.to_s)
+ Malformed comment
+ Line: 1
+ Position: 14
+ Last 80 unconsumed characters:
+ DETAIL
+ end
+
+ def test_after_doctype_malformed_comment_end
+ exception = assert_raise(REXML::ParseException) do
+ parse("")
+ end
+ assert_equal(<<~DETAIL, exception.to_s)
+ Malformed comment
+ Line: 1
+ Position: 12
+ Last 80 unconsumed characters:
+ DETAIL
+ end
+ end
+
+ def test_before_root
+ parser = REXML::Parsers::BaseParser.new('')
+
+ events = {}
+ while parser.has_next?
+ event = parser.pull
+ events[event[0]] = event[1]
+ end
+
+ assert_equal(" ok comment ", events[:comment])
+ end
+
+ def test_after_root
+ parser = REXML::Parsers::BaseParser.new('')
+
+ events = {}
+ while parser.has_next?
+ event = parser.pull
+ events[event[0]] = event[1]
+ end
+
+ assert_equal(" ok comment ", events[:comment])
+ end
+
+ def test_linear_performance_top_level_gt
+ seq = [10000, 50000, 100000, 150000, 200000]
+ assert_linear_performance(seq, rehearsal: 10) do |n|
+ REXML::Document.new('')
+ end
+ end
+
+ def test_linear_performance_in_element_gt
+ seq = [10000, 50000, 100000, 150000, 200000]
+ assert_linear_performance(seq, rehearsal: 10) do |n|
+ REXML::Document.new('')
+ end
+ end
+ end
+end
diff --git a/test/parse/test_document_type_declaration.rb b/test/parse/test_document_type_declaration.rb
index 55713909..99c23745 100644
--- a/test/parse/test_document_type_declaration.rb
+++ b/test/parse/test_document_type_declaration.rb
@@ -1,9 +1,13 @@
# frozen_string_literal: false
require "test/unit"
+require "core_assertions"
+
require "rexml/document"
module REXMLTests
class TestParseDocumentTypeDeclaration < Test::Unit::TestCase
+ include Test::Unit::CoreAssertions
+
private
def parse(doctype)
REXML::Document.new(<<-XML).doctype
@@ -36,6 +40,66 @@ def test_garbage_plus_before_name_at_line_start
+ r SYSTEM "urn:x-rexml:test" [ ]>
DETAIL
end
+
+ def test_no_name
+ exception = assert_raise(REXML::ParseException) do
+ parse(<<-DOCTYPE)
+
+ DOCTYPE
+ end
+ assert_equal(<<-DETAIL.chomp, exception.to_s)
+Malformed DOCTYPE: name is missing
+Line: 3
+Position: 17
+Last 80 unconsumed characters:
+
+ DETAIL
+ end
+ end
+
+ class TestUnclosed < self
+ def test_no_extra_node
+ exception = assert_raise(REXML::ParseException) do
+ REXML::Document.new("
+ DOCTYPE
+ end
+ assert_equal(<<~DETAIL.chomp, exception.to_s)
+ Malformed DOCTYPE: invalid declaration
+ Line: 1
+ Position: 20
+ Last 80 unconsumed characters:
+ #{' '}
+ DETAIL
+ end
+
+ def test_text
+ exception = assert_raise(REXML::ParseException) do
+ REXML::Document.new(<<~DOCTYPE)
+ " * n + "]>")
+ rescue
+ end
+ end
+ end
+
+ def test_linear_performance_comment_gt
+ seq = [10000, 50000, 100000, 150000, 200000]
+ assert_linear_performance(seq, rehearsal: 10) do |n|
+ REXML::Document.new("" * n + " -->]>")
+ end
+ end
+
+ def test_linear_performance_external_entity_right_bracket_gt
+ seq = [10000, 50000, 100000, 150000, 200000]
+ assert_linear_performance(seq, rehearsal: 10) do |n|
+ REXML::Document.new("" * n + ";]>")
+ end
+ end
end
end
diff --git a/test/parse/test_element.rb b/test/parse/test_element.rb
index 9f172a28..2b0746ea 100644
--- a/test/parse/test_element.rb
+++ b/test/parse/test_element.rb
@@ -1,8 +1,12 @@
require "test/unit"
+require "core_assertions"
+
require "rexml/document"
module REXMLTests
class TestParseElement < Test::Unit::TestCase
+ include Test::Unit::CoreAssertions
+
def parse(xml)
REXML::Document.new(xml)
end
@@ -41,9 +45,22 @@ def test_empty_namespace_attribute_name
assert_equal(<<-DETAIL.chomp, exception.to_s)
Invalid attribute name: <:a="">
Line: 1
-Position: 9
+Position: 13
Last 80 unconsumed characters:
+:a="">
+ DETAIL
+ end
+ def test_empty_namespace_attribute_name_with_utf8_character
+ exception = assert_raise(REXML::ParseException) do
+ parse("") # U+200B ZERO WIDTH SPACE
+ end
+ assert_equal(<<-DETAIL.chomp.force_encoding("ASCII-8BIT"), exception.to_s)
+Invalid attribute name: <:\xE2\x80\x8B>
+Line: 1
+Position: 8
+Last 80 unconsumed characters:
+:\xE2\x80\x8B>
DETAIL
end
@@ -72,6 +89,47 @@ def test_garbage_less_than_slash_before_end_tag_at_line_start
DETAIL
end
+
+ def test_after_root
+ exception = assert_raise(REXML::ParseException) do
+ parser = REXML::Parsers::BaseParser.new('')
+ while parser.has_next?
+ parser.pull
+ end
+ end
+
+ assert_equal(<<~DETAIL.chomp, exception.to_s)
+ Malformed XML: Extra tag at the end of the document (got '')
+ while parser.has_next?
+ parser.pull
+ end
+ end
+
+ assert_equal(<<~DETAIL.chomp, exception.to_s)
+ Malformed XML: Extra tag at the end of the document (got '" * n + '">')
+ end
end
end
end
diff --git a/test/parse/test_entity_declaration.rb b/test/parse/test_entity_declaration.rb
new file mode 100644
index 00000000..81d95b58
--- /dev/null
+++ b/test/parse/test_entity_declaration.rb
@@ -0,0 +1,557 @@
+# frozen_string_literal: false
+require "test/unit"
+require "core_assertions"
+
+require "rexml/document"
+
+module REXMLTests
+ class TestParseEntityDeclaration < Test::Unit::TestCase
+ include Test::Unit::CoreAssertions
+
+ private
+ def xml(internal_subset)
+ <<-XML
+
+
+ XML
+ end
+
+ def parse(internal_subset)
+ REXML::Document.new(xml(internal_subset)).doctype
+ end
+
+ public
+
+ # https://www.w3.org/TR/2006/REC-xml11-20060816/#NT-GEDecl
+ class TestGeneralEntityDeclaration < self
+ # https://www.w3.org/TR/2006/REC-xml11-20060816/#NT-Name
+ class TestName < self
+ def test_prohibited_character
+ exception = assert_raise(REXML::ParseException) do
+ REXML::Document.new("]>")
+ end
+ assert_equal(<<-DETAIL.chomp, exception.to_s)
+Malformed entity declaration
+Line: 1
+Position: 61
+Last 80 unconsumed characters:
+ invalid&name "valid-entity-value">]>
+ DETAIL
+ end
+ end
+
+ # https://www.w3.org/TR/2006/REC-xml11-20060816/#NT-EntityDef
+ class TestEntityDefinition < self
+ # https://www.w3.org/TR/2006/REC-xml11-20060816/#NT-EntityValue
+ class TestEntityValue < self
+ def test_no_quote
+ exception = assert_raise(REXML::ParseException) do
+ REXML::Document.new("]>")
+ end
+ assert_equal(<<-DETAIL.chomp, exception.to_s)
+Malformed entity declaration
+Line: 1
+Position: 59
+Last 80 unconsumed characters:
+ valid-name invalid-entity-value>]>
+ DETAIL
+ end
+
+ def test_prohibited_character
+ exception = assert_raise(REXML::ParseException) do
+ REXML::Document.new("]>")
+ end
+ assert_equal(<<-DETAIL.chomp, exception.to_s)
+Malformed entity declaration
+Line: 1
+Position: 44
+Last 80 unconsumed characters:
+ valid-name "% &">]>
+ DETAIL
+ end
+
+ def test_mixed_quote
+ exception = assert_raise(REXML::ParseException) do
+ REXML::Document.new("]>")
+ end
+ assert_equal(<<-DETAIL.chomp, exception.to_s)
+Malformed entity declaration
+Line: 1
+Position: 61
+Last 80 unconsumed characters:
+ valid-name "invalid-entity-value'>]>
+ DETAIL
+ end
+ end
+
+ # https://www.w3.org/TR/2006/REC-xml11-20060816/#NT-ExternalID
+ class TestExternalID < self
+ # https://www.w3.org/TR/2006/REC-xml11-20060816/#NT-SystemLiteral
+ class TestSystemLiteral < self
+ def test_no_quote_in_system
+ exception = assert_raise(REXML::ParseException) do
+ REXML::Document.new("]>")
+ end
+ assert_equal(<<-DETAIL.chomp, exception.to_s)
+Malformed entity declaration
+Line: 1
+Position: 68
+Last 80 unconsumed characters:
+ valid-name SYSTEM invalid-system-literal>]>
+ DETAIL
+ end
+
+ def test_no_quote_in_public
+ exception = assert_raise(REXML::ParseException) do
+ REXML::Document.new("]>")
+ end
+ assert_equal(<<-DETAIL.chomp, exception.to_s)
+Malformed entity declaration
+Line: 1
+Position: 90
+Last 80 unconsumed characters:
+ valid-name PUBLIC "valid-pubid-literal" invalid-system-literal>]>
+ DETAIL
+ end
+
+ def test_mixed_quote_in_system
+ exception = assert_raise(REXML::ParseException) do
+ REXML::Document.new("]>")
+ end
+ assert_equal(<<-DETAIL.chomp, exception.to_s)
+Malformed entity declaration
+Line: 1
+Position: 70
+Last 80 unconsumed characters:
+ valid-name SYSTEM 'invalid-system-literal">]>
+ DETAIL
+ end
+
+ def test_mixed_quote_in_public
+ exception = assert_raise(REXML::ParseException) do
+ REXML::Document.new("]>")
+ end
+ assert_equal(<<-DETAIL.chomp, exception.to_s)
+Malformed entity declaration
+Line: 1
+Position: 92
+Last 80 unconsumed characters:
+ valid-name PUBLIC "valid-pubid-literal" "invalid-system-literal'>]>
+ DETAIL
+ end
+
+ def test_no_literal_in_system
+ exception = assert_raise(REXML::ParseException) do
+ REXML::Document.new("]>")
+ end
+ assert_equal(<<-DETAIL.chomp, exception.to_s)
+Malformed entity declaration
+Line: 1
+Position: 45
+Last 80 unconsumed characters:
+ valid-name SYSTEM>]>
+ DETAIL
+ end
+
+ def test_no_literal_in_public
+ exception = assert_raise(REXML::ParseException) do
+ REXML::Document.new("]>")
+ end
+ assert_equal(<<-DETAIL.chomp, exception.to_s)
+Malformed entity declaration
+Line: 1
+Position: 67
+Last 80 unconsumed characters:
+ valid-name PUBLIC "valid-pubid-literal">]>
+ DETAIL
+ end
+ end
+
+ # https://www.w3.org/TR/2006/REC-xml11-20060816/#NT-PubidLiteral
+ # https://www.w3.org/TR/2006/REC-xml11-20060816/#NT-PubidChar
+ class TestPublicIDLiteral < self
+ def test_no_quote
+ exception = assert_raise(REXML::ParseException) do
+ REXML::Document.new("]>")
+ end
+ assert_equal(<<-DETAIL.chomp, exception.to_s)
+Malformed entity declaration
+Line: 1
+Position: 90
+Last 80 unconsumed characters:
+ valid-name PUBLIC invalid-pubid-literal "valid-system-literal">]>
+ DETAIL
+ end
+
+ def test_prohibited_pubid_character
+ exception = assert_raise(REXML::ParseException) do
+ # U+3042 HIRAGANA LETTER A
+ REXML::Document.new("]>")
+ end
+ assert_equal(<<-DETAIL.force_encoding('utf-8').chomp, exception.to_s.force_encoding('utf-8'))
+Malformed entity declaration
+Line: 1
+Position: 74
+Last 80 unconsumed characters:
+ valid-name PUBLIC "\u3042" "valid-system-literal">]>
+ DETAIL
+ end
+
+ def test_mixed_quote
+ exception = assert_raise(REXML::ParseException) do
+ REXML::Document.new("]>")
+ end
+ assert_equal(<<-DETAIL.chomp, exception.to_s)
+Malformed entity declaration
+Line: 1
+Position: 92
+Last 80 unconsumed characters:
+ valid-name PUBLIC "invalid-pubid-literal' "valid-system-literal">]>
+ DETAIL
+ end
+
+ def test_no_literal
+ exception = assert_raise(REXML::ParseException) do
+ REXML::Document.new("]>")
+ end
+ assert_equal(<<-DETAIL.chomp, exception.to_s)
+Malformed entity declaration
+Line: 1
+Position: 45
+Last 80 unconsumed characters:
+ valid-name PUBLIC>]>
+ DETAIL
+ end
+ end
+ end
+
+ # https://www.w3.org/TR/2006/REC-xml11-20060816/#NT-NDataDecl
+ class TestNotationDataDeclaration < self
+ # https://www.w3.org/TR/2006/REC-xml11-20060816/#NT-NameChar
+ def test_prohibited_character
+ exception = assert_raise(REXML::ParseException) do
+ REXML::Document.new("]>")
+ end
+ assert_equal(<<-DETAIL.chomp, exception.to_s)
+Malformed entity declaration
+Line: 1
+Position: 109
+Last 80 unconsumed characters:
+ valid-name PUBLIC "valid-pubid-literal" "valid-system-literal" NDATA invalid&nam
+ DETAIL
+ end
+ end
+
+ def test_entity_value_and_notation_data_declaration
+ exception = assert_raise(REXML::ParseException) do
+ REXML::Document.new("]>")
+ end
+ assert_equal(<<-DETAIL.chomp, exception.to_s)
+Malformed entity declaration
+Line: 1
+Position: 83
+Last 80 unconsumed characters:
+ valid-name "valid-entity-value" NDATA valid-ndata-value>]>
+ DETAIL
+ end
+ end
+
+ def test_no_space
+ exception = assert_raise(REXML::ParseException) do
+ REXML::Document.new("]>")
+ end
+ assert_equal(<<-DETAIL.chomp, exception.to_s)
+Malformed entity declaration
+Line: 1
+Position: 102
+Last 80 unconsumed characters:
+ valid-namePUBLIC"valid-pubid-literal""valid-system-literal"NDATAvalid-name>]>
+ DETAIL
+ end
+ end
+
+ # https://www.w3.org/TR/2006/REC-xml11-20060816/#NT-PEDecl
+ class TestParsedEntityDeclaration < self
+ # https://www.w3.org/TR/2006/REC-xml11-20060816/#NT-Name
+ class TestName < self
+ def test_prohibited_character
+ exception = assert_raise(REXML::ParseException) do
+ REXML::Document.new("]>")
+ end
+ assert_equal(<<-DETAIL.chomp, exception.to_s)
+Malformed entity declaration
+Line: 1
+Position: 63
+Last 80 unconsumed characters:
+ % invalid&name "valid-entity-value">]>
+ DETAIL
+ end
+ end
+
+ # https://www.w3.org/TR/2006/REC-xml11-20060816/#NT-PEDef
+ class TestParsedEntityDefinition < self
+ # https://www.w3.org/TR/2006/REC-xml11-20060816/#NT-EntityValue
+ class TestEntityValue < self
+ def test_no_quote
+ exception = assert_raise(REXML::ParseException) do
+ REXML::Document.new("]>")
+ end
+ assert_equal(<<-DETAIL.chomp, exception.to_s)
+Malformed entity declaration
+Line: 1
+Position: 61
+Last 80 unconsumed characters:
+ % valid-name invalid-entity-value>]>
+ DETAIL
+ end
+
+ def test_prohibited_character
+ exception = assert_raise(REXML::ParseException) do
+ REXML::Document.new("]>")
+ end
+ assert_equal(<<-DETAIL.chomp, exception.to_s)
+Malformed entity declaration
+Line: 1
+Position: 46
+Last 80 unconsumed characters:
+ % valid-name "% &">]>
+ DETAIL
+ end
+
+ def test_mixed_quote
+ exception = assert_raise(REXML::ParseException) do
+ REXML::Document.new("]>")
+ end
+ assert_equal(<<-DETAIL.chomp, exception.to_s)
+Malformed entity declaration
+Line: 1
+Position: 63
+Last 80 unconsumed characters:
+ % valid-name 'invalid-entity-value">]>
+ DETAIL
+ end
+ end
+
+ # https://www.w3.org/TR/2006/REC-xml11-20060816/#NT-ExternalID
+ class TestExternalID < self
+ # https://www.w3.org/TR/2006/REC-xml11-20060816/#NT-SystemLiteral
+ class TestSystemLiteral < self
+ def test_no_quote_in_system
+ exception = assert_raise(REXML::ParseException) do
+ REXML::Document.new("]>")
+ end
+ assert_equal(<<-DETAIL.chomp, exception.to_s)
+Malformed entity declaration
+Line: 1
+Position: 70
+Last 80 unconsumed characters:
+ % valid-name SYSTEM invalid-system-literal>]>
+ DETAIL
+ end
+
+ def test_no_quote_in_public
+ exception = assert_raise(REXML::ParseException) do
+ REXML::Document.new("]>")
+ end
+ assert_equal(<<-DETAIL.chomp, exception.to_s)
+Malformed entity declaration
+Line: 1
+Position: 92
+Last 80 unconsumed characters:
+ % valid-name PUBLIC "valid-pubid-literal" invalid-system-literal>]>
+ DETAIL
+ end
+
+ def test_mixed_quote_in_system
+ exception = assert_raise(REXML::ParseException) do
+ REXML::Document.new("]>")
+ end
+ assert_equal(<<-DETAIL.chomp, exception.to_s)
+Malformed entity declaration
+Line: 1
+Position: 72
+Last 80 unconsumed characters:
+ % valid-name SYSTEM "invalid-system-literal'>]>
+ DETAIL
+ end
+
+ def test_mixed_quote_in_public
+ exception = assert_raise(REXML::ParseException) do
+ REXML::Document.new("]>")
+ end
+ assert_equal(<<-DETAIL.chomp, exception.to_s)
+Malformed entity declaration
+Line: 1
+Position: 94
+Last 80 unconsumed characters:
+ % valid-name PUBLIC "valid-pubid-literal" 'invalid-system-literal">]>
+ DETAIL
+ end
+
+ def test_no_literal_in_system
+ exception = assert_raise(REXML::ParseException) do
+ REXML::Document.new("]>")
+ end
+ assert_equal(<<-DETAIL.chomp, exception.to_s)
+Malformed entity declaration
+Line: 1
+Position: 47
+Last 80 unconsumed characters:
+ % valid-name SYSTEM>]>
+ DETAIL
+ end
+
+ def test_no_literal_in_public
+ exception = assert_raise(REXML::ParseException) do
+ REXML::Document.new("]>")
+ end
+ assert_equal(<<-DETAIL.chomp, exception.to_s)
+Malformed entity declaration
+Line: 1
+Position: 69
+Last 80 unconsumed characters:
+ % valid-name PUBLIC "valid-pubid-literal">]>
+ DETAIL
+ end
+ end
+
+ # https://www.w3.org/TR/2006/REC-xml11-20060816/#NT-PubidLiteral
+ # https://www.w3.org/TR/2006/REC-xml11-20060816/#NT-PubidChar
+ class TestPublicIDLiteral < self
+ def test_no_quote
+ exception = assert_raise(REXML::ParseException) do
+ REXML::Document.new("]>")
+ end
+ assert_equal(<<-DETAIL.chomp, exception.to_s)
+Malformed entity declaration
+Line: 1
+Position: 92
+Last 80 unconsumed characters:
+ % valid-name PUBLIC invalid-pubid-literal "valid-system-literal">]>
+ DETAIL
+ end
+
+ def test_prohibited_pubid_character
+ exception = assert_raise(REXML::ParseException) do
+ # U+3042 HIRAGANA LETTER A
+ REXML::Document.new("]>")
+ end
+ assert_equal(<<-DETAIL.force_encoding('utf-8').chomp, exception.to_s.force_encoding('utf-8'))
+Malformed entity declaration
+Line: 1
+Position: 76
+Last 80 unconsumed characters:
+ % valid-name PUBLIC "\u3042" "valid-system-literal">]>
+ DETAIL
+ end
+
+ def test_mixed_quote
+ exception = assert_raise(REXML::ParseException) do
+ REXML::Document.new("]>")
+ end
+ assert_equal(<<-DETAIL.chomp, exception.to_s)
+Malformed entity declaration
+Line: 1
+Position: 94
+Last 80 unconsumed characters:
+ % valid-name PUBLIC 'invalid-pubid-literal" "valid-system-literal">]>
+ DETAIL
+ end
+
+ def test_no_literal
+ exception = assert_raise(REXML::ParseException) do
+ REXML::Document.new("]>")
+ end
+ assert_equal(<<-DETAIL.chomp, exception.to_s)
+Malformed entity declaration
+Line: 1
+Position: 47
+Last 80 unconsumed characters:
+ % valid-name PUBLIC>]>
+ DETAIL
+ end
+ end
+ end
+
+ def test_entity_value_and_notation_data_declaration
+ exception = assert_raise(REXML::ParseException) do
+ REXML::Document.new("]>")
+ end
+ assert_equal(<<-DETAIL.chomp, exception.to_s)
+Malformed entity declaration
+Line: 1
+Position: 85
+Last 80 unconsumed characters:
+ % valid-name "valid-entity-value" NDATA valid-ndata-value>]>
+ DETAIL
+ end
+ end
+
+ def test_no_space
+ exception = assert_raise(REXML::ParseException) do
+ REXML::Document.new("]>")
+ end
+ assert_equal(<<-DETAIL.chomp, exception.to_s)
+Malformed entity declaration
+Line: 1
+Position: 67
+Last 80 unconsumed characters:
+ %valid-nameSYSTEM"valid-system-literal">]>
+ DETAIL
+ end
+ end
+
+ def test_empty
+ exception = assert_raise(REXML::ParseException) do
+ parse(<<-INTERNAL_SUBSET)
+
+ INTERNAL_SUBSET
+ end
+ assert_equal(<<-DETAIL.chomp, exception.to_s)
+Malformed entity declaration
+Line: 5
+Position: 70
+Last 80 unconsumed characters:
+> ]>
+ DETAIL
+ end
+
+ def test_linear_performance_entity_value_gt
+ seq = [10000, 50000, 100000, 150000, 200000]
+ assert_linear_performance(seq, rehearsal: 10) do |n|
+ REXML::Document.new("" * n +
+ "\">]>")
+ end
+ end
+
+ def test_linear_performance_entity_value_gt_right_bracket
+ seq = [10000, 50000, 100000, 150000, 200000]
+ assert_linear_performance(seq, rehearsal: 10) do |n|
+ REXML::Document.new("]" * n +
+ "\">]>")
+ end
+ end
+
+ def test_linear_performance_system_literal_in_system_gt_right_bracket
+ seq = [10000, 50000, 100000, 150000, 200000]
+ assert_linear_performance(seq, rehearsal: 10) do |n|
+ REXML::Document.new("]" * n +
+ "\">]>")
+ end
+ end
+
+ def test_linear_performance_system_literal_in_public_gt_right_bracket
+ seq = [10000, 50000, 100000, 150000, 200000]
+ assert_linear_performance(seq, rehearsal: 10) do |n|
+ REXML::Document.new("]" * n +
+ "\">]>")
+ end
+ end
+ end
+end
diff --git a/test/parse/test_notation_declaration.rb b/test/parse/test_notation_declaration.rb
index 19a0536d..9e81b6a4 100644
--- a/test/parse/test_notation_declaration.rb
+++ b/test/parse/test_notation_declaration.rb
@@ -35,7 +35,7 @@ def test_no_name
Line: 5
Position: 72
Last 80 unconsumed characters:
- ]>
+ ]>
DETAIL
end
diff --git a/test/parse/test_processing_instruction.rb b/test/parse/test_processing_instruction.rb
index f0c0c24e..ba381dc4 100644
--- a/test/parse/test_processing_instruction.rb
+++ b/test/parse/test_processing_instruction.rb
@@ -1,8 +1,12 @@
require "test/unit"
+require "core_assertions"
+
require "rexml/document"
module REXMLTests
- class TestParseProcessinInstruction < Test::Unit::TestCase
+ class TestParseProcessingInstruction < Test::Unit::TestCase
+ include Test::Unit::CoreAssertions
+
def parse(xml)
REXML::Document.new(xml)
end
@@ -13,31 +17,110 @@ def test_no_name
parse("?>")
end
assert_equal(<<-DETAIL.chomp, exception.to_s)
-Invalid processing instruction node
+Malformed XML: Invalid processing instruction node: invalid name
Line: 1
Position: 4
Last 80 unconsumed characters:
-?>
+?>
+ DETAIL
+ end
+
+ def test_unclosed_content
+ exception = assert_raise(REXML::ParseException) do
+ parse("')
+ while parser.has_next?
+ parser.pull
+ end
+ end
+
+ assert_equal(<<~DETAIL.chomp, exception.to_s)
+ Malformed XML: XML declaration is not at the start
+ Line: 1
+ Position: 25
+ Last 80 unconsumed characters:
+
DETAIL
end
+ end
- def test_garbage_text
- # TODO: This should be parse error.
- # Create test/parse/test_document.rb or something and move this to it.
- doc = parse(<<-XML)
-x?>
- XML
- pi = doc.children[1]
- assert_equal([
- "x",
- "y\n"]],
+ [[doc.children[0].target, doc.children[0].content],
+ [doc.children[1].target, doc.children[1].content]])
+ end
+
+ def test_before_root
+ parser = REXML::Parsers::BaseParser.new('')
+
+ events = {}
+ while parser.has_next?
+ event = parser.pull
+ events[event[0]] = event[1]
+ end
+
+ assert_equal("abc", events[:processing_instruction])
+ end
+
+ def test_after_root
+ parser = REXML::Parsers::BaseParser.new('')
+
+ events = {}
+ while parser.has_next?
+ event = parser.pull
+ events[event[0]] = event[1]
+ end
+
+ assert_equal("abc", events[:processing_instruction])
+ end
+
+ def test_content_question
+ document = REXML::Document.new("")
+ assert_equal("con?tent", document.root.children.first.content)
+ end
+
+ def test_linear_performance_gt
+ seq = [10000, 50000, 100000, 150000, 200000]
+ assert_linear_performance(seq, rehearsal: 10) do |n|
+ REXML::Document.new("" * n + " ?>")
+ end
+ end
+
+ def test_linear_performance_tab
+ seq = [10000, 50000, 100000, 150000, 200000]
+ assert_linear_performance(seq, rehearsal: 10) do |n|
+ REXML::Document.new(" ?>")
end
end
end
diff --git a/test/parse/test_text.rb b/test/parse/test_text.rb
new file mode 100644
index 00000000..04f553ae
--- /dev/null
+++ b/test/parse/test_text.rb
@@ -0,0 +1,57 @@
+require "test/unit"
+require 'rexml/parsers/baseparser'
+
+module REXMLTests
+ class TestParseText < Test::Unit::TestCase
+ class TestInvalid < self
+ def test_before_root
+ exception = assert_raise(REXML::ParseException) do
+ parser = REXML::Parsers::BaseParser.new('b')
+ while parser.has_next?
+ parser.pull
+ end
+ end
+
+ assert_equal(<<~DETAIL.chomp, exception.to_s)
+ Malformed XML: Content at the start of the document (got 'b')
+ Line: 1
+ Position: 4
+ Last 80 unconsumed characters:
+
+ DETAIL
+ end
+
+ def test_after_root
+ exception = assert_raise(REXML::ParseException) do
+ parser = REXML::Parsers::BaseParser.new('c')
+ while parser.has_next?
+ parser.pull
+ end
+ end
+
+ assert_equal(<<~DETAIL.chomp, exception.to_s)
+ Malformed XML: Extra content at the end of the document (got 'c')
+ Line: 1
+ Position: 8
+ Last 80 unconsumed characters:
+
+ DETAIL
+ end
+ end
+
+ def test_whitespace_characters_after_root
+ parser = REXML::Parsers::BaseParser.new('b ')
+
+ events = []
+ while parser.has_next?
+ event = parser.pull
+ case event[0]
+ when :text
+ events << event[1]
+ end
+ end
+
+ assert_equal(["b"], events)
+ end
+ end
+end
diff --git a/test/parser/test_base_parser.rb b/test/parser/test_base_parser.rb
new file mode 100644
index 00000000..17d01979
--- /dev/null
+++ b/test/parser/test_base_parser.rb
@@ -0,0 +1,27 @@
+# frozen_string_literal: false
+
+require 'rexml/parsers/baseparser'
+
+module REXMLTests
+ class BaseParserTester < Test::Unit::TestCase
+ def test_large_xml
+ large_text = "a" * 100_000
+ xml = <<-XML
+
+
+ #{large_text}
+ #{large_text}
+
+ XML
+
+ parser = REXML::Parsers::BaseParser.new(xml)
+ while parser.has_next?
+ parser.pull
+ end
+
+ assert do
+ parser.position < xml.bytesize
+ end
+ end
+ end
+end
diff --git a/test/parser/test_ultra_light.rb b/test/parser/test_ultra_light.rb
index 44fd1d1e..b3f576ff 100644
--- a/test/parser/test_ultra_light.rb
+++ b/test/parser/test_ultra_light.rb
@@ -17,7 +17,6 @@ def test_entity_declaration
[:entitydecl, "name", "value"]
],
[:start_element, :parent, "root", {}],
- [:text, "\n"],
],
parse(<<-INTERNAL_SUBSET))
diff --git a/test/parser/test_xpath.rb b/test/parser/test_xpath.rb
new file mode 100644
index 00000000..9143d25c
--- /dev/null
+++ b/test/parser/test_xpath.rb
@@ -0,0 +1,115 @@
+# frozen_string_literal: false
+
+require "test/unit"
+require "rexml/parsers/xpathparser"
+
+module REXMLTests
+ class TestXPathParser < Test::Unit::TestCase
+ sub_test_case("#abbreviate") do
+ def abbreviate(xpath)
+ parser = REXML::Parsers::XPathParser.new
+ parser.abbreviate(xpath)
+ end
+
+ def test_document
+ assert_equal("/",
+ abbreviate("/"))
+ end
+
+ def test_descendant_or_self_only
+ assert_equal("//",
+ abbreviate("/descendant-or-self::node()/"))
+ end
+
+ def test_descendant_or_self_absolute
+ assert_equal("//a/b",
+ abbreviate("/descendant-or-self::node()/a/b"))
+ end
+
+ def test_descendant_or_self_relative
+ assert_equal("a//b",
+ abbreviate("a/descendant-or-self::node()/b"))
+ end
+
+ def test_descendant_or_self_not_node
+ assert_equal("/descendant-or-self::text()",
+ abbreviate("/descendant-or-self::text()"))
+ end
+
+ def test_self_absolute
+ assert_equal("/a/./b",
+ abbreviate("/a/self::node()/b"))
+ end
+
+ def test_self_relative
+ assert_equal("a/./b",
+ abbreviate("a/self::node()/b"))
+ end
+
+ def test_self_not_node
+ assert_equal("/self::text()",
+ abbreviate("/self::text()"))
+ end
+
+ def test_parent_absolute
+ assert_equal("/a/../b",
+ abbreviate("/a/parent::node()/b"))
+ end
+
+ def test_parent_relative
+ assert_equal("a/../b",
+ abbreviate("a/parent::node()/b"))
+ end
+
+ def test_parent_not_node
+ assert_equal("/a/parent::text()",
+ abbreviate("/a/parent::text()"))
+ end
+
+ def test_any_absolute
+ assert_equal("/*/a",
+ abbreviate("/*/a"))
+ end
+
+ def test_any_relative
+ assert_equal("a/*/b",
+ abbreviate("a/*/b"))
+ end
+
+ def test_following_sibling_absolute
+ assert_equal("/following-sibling::a/b",
+ abbreviate("/following-sibling::a/b"))
+ end
+
+ def test_following_sibling_relative
+ assert_equal("a/following-sibling::b/c",
+ abbreviate("a/following-sibling::b/c"))
+ end
+
+ def test_predicate_index
+ assert_equal("a[5]/b",
+ abbreviate("a[5]/b"))
+ end
+
+ def test_attribute_relative
+ assert_equal("a/@b",
+ abbreviate("a/attribute::b"))
+ end
+
+ def test_filter_attribute
+ assert_equal("a/b[@i = 1]/c",
+ abbreviate("a/b[attribute::i=1]/c"))
+ end
+
+ def test_filter_string_single_quote
+ assert_equal("a/b[@name = \"single ' quote\"]/c",
+ abbreviate("a/b[attribute::name=\"single ' quote\"]/c"))
+ end
+
+ def test_filter_string_double_quote
+ assert_equal("a/b[@name = 'double \" quote']/c",
+ abbreviate("a/b[attribute::name='double \" quote']/c"))
+ end
+ end
+ end
+end
diff --git a/test/test_attributes.rb b/test/test_attributes.rb
index 91fc68a5..09fde442 100644
--- a/test/test_attributes.rb
+++ b/test/test_attributes.rb
@@ -178,18 +178,27 @@ def test_amp_and_lf_attributes
attr_test('name','value with LF
& ampersand')
end
- def test_quoting
+ def test_quote_root
d = Document.new(%q{})
assert_equal( %q{}, d.to_s )
d.root.context[:attribute_quote] = :quote
assert_equal( %q{}, d.to_s )
+ end
+ def test_quote_sub_element
d = Document.new(%q{})
assert_equal( %q{}, d.to_s )
d.root.context[:attribute_quote] = :quote
assert_equal( %q{}, d.to_s )
end
+ def test_quote_to_s_value
+ doc = Document.new(%q{}, {attribute_quote: :quote})
+ assert_equal(%q{}, doc.to_s)
+ assert_equal("'", doc.root.attribute("a").value)
+ assert_equal(%q{}, doc.to_s)
+ end
+
def test_ticket_127
doc = Document.new
doc.add_element 'a', { 'v' => 'x & y' }
diff --git a/test/test_contrib.rb b/test/test_contrib.rb
index f3ad0b6c..23ee35b1 100644
--- a/test/test_contrib.rb
+++ b/test/test_contrib.rb
@@ -80,7 +80,7 @@ def test_bad_doctype_Tobias
# Peter Verhage
def test_namespace_Peter
- source = <<-EOF
+ source = <<~EOF
@@ -377,7 +377,7 @@ def test_various_xpath
end
def test_entities_Holden_Glova
- document = <<-EOL
+ document = <<~EOL
diff --git a/test/test_core.rb b/test/test_core.rb
index fd3af8c2..e1fba8a7 100644
--- a/test/test_core.rb
+++ b/test/test_core.rb
@@ -15,7 +15,7 @@ class Tester < Test::Unit::TestCase
include Helper::Fixture
include REXML
def setup
- @xsa_source = <<-EOL
+ @xsa_source = <<~EOL
+
+
+ XML
+
+ expected_names = %w[
+ root
+ 1_1 1_2 1_3
+ 2_1 2_2 2_3
+ ]
+
+ document = REXML::Document.new(xml_source)
+
+ # Node#each_recursive iterates elements only.
+ # This does not iterate XML declarations, comments, attributes, CDATA sections, etc.
+ actual_names = []
+ document.each_recursive do |element|
+ actual_names << element.attributes["name"]
+ end
+ assert_equal(expected_names, actual_names)
+ end
+
class WriteTest < Test::Unit::TestCase
def setup
- @document = REXML::Document.new(<<-EOX)
+ @document = REXML::Document.new(<<-EOX.chomp)
Hello world!
EOX
@@ -212,7 +249,7 @@ class ArgumentsTest < self
def test_output
output = ""
@document.write(output)
- assert_equal(<<-EOX, output)
+ assert_equal(<<-EOX.chomp, output)
Hello world!
EOX
@@ -235,7 +272,7 @@ def test_transitive
indent = 2
transitive = true
@document.write(output, indent, transitive)
- assert_equal(<<-EOX, output)
+ assert_equal(<<-EOX.chomp, output)
Hello world!
#{japanese_text}
EOX
@@ -275,7 +312,7 @@ class OptionsTest < self
def test_output
output = ""
@document.write(:output => output)
- assert_equal(<<-EOX, output)
+ assert_equal(<<-EOX.chomp, output)
Hello world!
EOX
@@ -295,7 +332,7 @@ def test_indent
def test_transitive
output = ""
@document.write(:output => output, :indent => 2, :transitive => true)
- assert_equal(<<-EOX, output)
+ assert_equal(<<-EOX.chomp, output)
Hello world! output, :encoding => encoding)
- assert_equal(<<-EOX.encode(encoding), output)
+ assert_equal(<<-EOX.chomp.encode(encoding), output)
#{japanese_text}
EOX
@@ -401,7 +438,7 @@ def test_utf_16
actual_xml = ""
document.write(actual_xml)
- expected_xml = <<-EOX.encode("UTF-16BE")
+ expected_xml = <<-EOX.chomp.encode("UTF-16BE")
\ufeff
Hello world!
EOX
diff --git a/test/test_encoding.rb b/test/test_encoding.rb
index 09495c58..6887ffbe 100644
--- a/test/test_encoding.rb
+++ b/test/test_encoding.rb
@@ -67,7 +67,7 @@ def test_in_different_out
# * Given an encoded document, accessing text and attribute nodes
# should provide UTF-8 text.
def test_in_different_access
- doc = Document.new <<-EOL
+ doc = Document.new <<~EOL
\xFF
EOL
@@ -79,7 +79,7 @@ def test_in_different_access
def test_ticket_89
- doc = Document.new <<-EOL
+ doc = Document.new <<~EOL
EOL
diff --git a/test/test_light.rb b/test/test_light.rb
index 54b2c52e..c556c978 100644
--- a/test/test_light.rb
+++ b/test/test_light.rb
@@ -62,7 +62,7 @@ def test_access_child_elements
assert_equal( 'c', a[1].name )
end
- def test_itterate_over_children
+ def test_iterate_over_children
foo = make_small_document
ctr = 0
foo[0].each { ctr += 1 }
diff --git a/test/test_pullparser.rb b/test/test_pullparser.rb
index 53a985ba..55205af8 100644
--- a/test/test_pullparser.rb
+++ b/test/test_pullparser.rb
@@ -62,6 +62,63 @@ def test_entity_replacement
end
end
+ def test_character_references
+ source = 'AB'
+ parser = REXML::Parsers::PullParser.new( source )
+
+ events = {}
+ element_name = ''
+ while parser.has_next?
+ event = parser.pull
+ case event.event_type
+ when :start_element
+ element_name = event[0]
+ when :text
+ events[element_name] = event[1]
+ end
+ end
+
+ assert_equal('A', events['a'])
+ assert_equal("B", events['b'])
+ end
+
+ def test_text_entity_references
+ source = '<P> <I> <B> Text </B> </I>'
+ parser = REXML::Parsers::PullParser.new( source )
+
+ events = []
+ while parser.has_next?
+ event = parser.pull
+ case event.event_type
+ when :text
+ events << event[1]
+ end
+ end
+
+ assert_equal(["
Text "], events)
+ end
+
+ def test_text_content_with_line_breaks
+ source = "AB\nC\r\n"
+ parser = REXML::Parsers::PullParser.new( source )
+
+ events = {}
+ element_name = ''
+ while parser.has_next?
+ event = parser.pull
+ case event.event_type
+ when :start_element
+ element_name = event[0]
+ when :text
+ events[element_name] = event[1]
+ end
+ end
+
+ assert_equal('A', events['a'])
+ assert_equal("B\n", events['b'])
+ assert_equal("C\n", events['c'])
+ end
+
def test_peek_unshift
source = ""
REXML::Parsers::PullParser.new(source)
@@ -98,5 +155,101 @@ def test_peek
end
assert_equal( 0, names.length )
end
+
+ class EntityExpansionLimitTest < Test::Unit::TestCase
+ def setup
+ @default_entity_expansion_limit = REXML::Security.entity_expansion_limit
+ end
+
+ def teardown
+ REXML::Security.entity_expansion_limit = @default_entity_expansion_limit
+ end
+
+ class GeneralEntityTest < self
+ def test_have_value
+ source = <<-XML
+
+
+
+
+
+
+]>
+
+&a;
+
+ XML
+
+ parser = REXML::Parsers::PullParser.new(source)
+ assert_raise(RuntimeError.new("entity expansion has grown too large")) do
+ while parser.has_next?
+ parser.pull
+ end
+ end
+ end
+
+ def test_empty_value
+ source = <<-XML
+
+
+
+
+
+
+]>
+
+&a;
+
+ XML
+
+ parser = REXML::Parsers::PullParser.new(source)
+ assert_raise(RuntimeError.new("number of entity expansions exceeded, processing aborted.")) do
+ while parser.has_next?
+ parser.pull
+ end
+ end
+
+ REXML::Security.entity_expansion_limit = 100
+ parser = REXML::Parsers::PullParser.new(source)
+ assert_raise(RuntimeError.new("number of entity expansions exceeded, processing aborted.")) do
+ while parser.has_next?
+ parser.pull
+ end
+ end
+ assert_equal(101, parser.entity_expansion_count)
+ end
+
+ def test_with_default_entity
+ source = <<-XML
+
+
+
+]>
+
+&a;
+&a2;
+<
+
+ XML
+
+ REXML::Security.entity_expansion_limit = 4
+ parser = REXML::Parsers::PullParser.new(source)
+ while parser.has_next?
+ parser.pull
+ end
+
+ REXML::Security.entity_expansion_limit = 3
+ parser = REXML::Parsers::PullParser.new(source)
+ assert_raise(RuntimeError.new("number of entity expansions exceeded, processing aborted.")) do
+ while parser.has_next?
+ parser.pull
+ end
+ end
+ end
+ end
+ end
end
end
diff --git a/test/test_sax.rb b/test/test_sax.rb
index 6f775183..5e3ad75b 100644
--- a/test/test_sax.rb
+++ b/test/test_sax.rb
@@ -31,6 +31,17 @@ def test_entity_replacement
assert_equal '--1234--', results[1]
end
+ def test_characters_predefined_entities
+ source = '<P> <I> <B> Text </B> </I>'
+
+ sax = Parsers::SAX2Parser.new( source )
+ results = []
+ sax.listen(:characters) {|x| results << x }
+ sax.parse
+
+ assert_equal(["
Text "], results)
+ end
+
def test_sax2
File.open(fixture_path("documentation.xml")) do |f|
parser = Parsers::SAX2Parser.new( f )
@@ -88,6 +99,92 @@ def test_sax2
end
end
+ class EntityExpansionLimitTest < Test::Unit::TestCase
+ def setup
+ @default_entity_expansion_limit = REXML::Security.entity_expansion_limit
+ end
+
+ def teardown
+ REXML::Security.entity_expansion_limit = @default_entity_expansion_limit
+ end
+
+ class GeneralEntityTest < self
+ def test_have_value
+ source = <<-XML
+
+
+
+
+
+
+]>
+
+&a;
+
+ XML
+
+ sax = REXML::Parsers::SAX2Parser.new(source)
+ assert_raise(RuntimeError.new("entity expansion has grown too large")) do
+ sax.parse
+ end
+ end
+
+ def test_empty_value
+ source = <<-XML
+
+
+
+
+
+
+]>
+
+&a;
+
+ XML
+
+ sax = REXML::Parsers::SAX2Parser.new(source)
+ assert_raise(RuntimeError.new("number of entity expansions exceeded, processing aborted.")) do
+ sax.parse
+ end
+
+ REXML::Security.entity_expansion_limit = 100
+ sax = REXML::Parsers::SAX2Parser.new(source)
+ assert_raise(RuntimeError.new("number of entity expansions exceeded, processing aborted.")) do
+ sax.parse
+ end
+ assert_equal(101, sax.entity_expansion_count)
+ end
+
+ def test_with_default_entity
+ source = <<-XML
+
+
+
+]>
+
+&a;
+&a2;
+<
+
+ XML
+
+ REXML::Security.entity_expansion_limit = 4
+ sax = REXML::Parsers::SAX2Parser.new(source)
+ sax.parse
+
+ REXML::Security.entity_expansion_limit = 3
+ sax = REXML::Parsers::SAX2Parser.new(source)
+ assert_raise(RuntimeError.new("number of entity expansions exceeded, processing aborted.")) do
+ sax.parse
+ end
+ end
+ end
+ end
+
# used by test_simple_doctype_listener
# submitted by Jeff Barczewski
class SimpleDoctypeListener
@@ -109,7 +206,7 @@ def doctype(name, pub_sys, long_name, uri)
# test simple non-entity doctype in sax listener
# submitted by Jeff Barczewski
def test_simple_doctype_listener
- xml = <<-END
+ xml = <<~END
Hello, world!
@@ -140,8 +237,8 @@ def test_simple_doctype_listener
# test doctype with missing name, should throw ParseException
# submitted by Jeff Barczewseki
- def test_doctype_with_mising_name_throws_exception
- xml = <<-END
+ def test_doctype_with_missing_name_throws_exception
+ xml = <<~END
Hello, world!
diff --git a/test/test_text_check.rb b/test/test_text_check.rb
new file mode 100644
index 00000000..11cf65a3
--- /dev/null
+++ b/test/test_text_check.rb
@@ -0,0 +1,121 @@
+# frozen_string_literal: false
+
+module REXMLTests
+ class TextCheckTester < Test::Unit::TestCase
+
+ def check(string)
+ REXML::Text.check(string, REXML::Text::NEEDS_A_SECOND_CHECK, nil)
+ end
+
+ def assert_check(string)
+ assert_nothing_raised { check(string) }
+ end
+
+ def assert_check_failed(string, illegal_part)
+ message = "Illegal character #{illegal_part.inspect} in raw string #{string.inspect}"
+ assert_raise(RuntimeError.new(message)) do
+ check(string)
+ end
+ end
+
+ class TestValid < self
+ def test_entity_name_start_char_colon
+ assert_check("&:;")
+ end
+
+ def test_entity_name_start_char_under_score
+ assert_check("&_;")
+ end
+
+ def test_entity_name_mix
+ assert_check("&A.b-0123;")
+ end
+
+ def test_character_reference_decimal
+ assert_check("¢")
+ end
+
+ def test_character_reference_hex
+ assert_check("")
+ end
+
+ def test_entity_name_non_ascii
+ # U+3042 HIRAGANA LETTER A
+ # U+3044 HIRAGANA LETTER I
+ assert_check("&\u3042\u3044;")
+ end
+
+ def test_normal_string
+ assert_check("foo")
+ end
+ end
+
+ class TestInvalid < self
+ def test_lt
+ assert_check_failed("<;", "<")
+ end
+
+ def test_lt_mix
+ assert_check_failed("ab
@@ -24,7 +24,7 @@ def test_validate
- }
+ XML
validator = REXML::Validation::RelaxNG.new( rng )
no_error( validator, %q{} )
@@ -33,7 +33,7 @@ def test_validate
def test_sequence
- rng = %q{
+ rng = <<-XML
@@ -45,7 +45,7 @@ def test_sequence
- }
+ XML
validator = REXML::Validation::RelaxNG.new( rng )
error( validator, %q{} )
@@ -56,7 +56,7 @@ def test_sequence
def test_choice
- rng = %q{
+ rng = <<-XML
@@ -70,7 +70,7 @@ def test_choice
- }
+ XML
validator = REXML::Validation::RelaxNG.new( rng )
error( validator, %q{} )
@@ -79,7 +79,7 @@ def test_choice
end
def test_optional
- rng = %q{
+ rng = <<-XML
@@ -90,7 +90,7 @@ def test_optional
- }
+ XML
validator = REXML::Validation::RelaxNG.new( rng )
no_error( validator, %q{} )
@@ -100,7 +100,7 @@ def test_optional
end
def test_zero_or_more
- rng = %q{
+ rng = <<-XML
@@ -111,7 +111,7 @@ def test_zero_or_more
- }
+ XML
validator = REXML::Validation::RelaxNG.new( rng )
no_error( validator, %q{} )
no_error( validator, %q{} )
@@ -119,7 +119,7 @@ def test_zero_or_more
error( validator, %q{} )
error( validator, %q{} )
- rng = %q{
+ rng = <<-XML
@@ -133,7 +133,7 @@ def test_zero_or_more
- }
+ XML
validator = REXML::Validation::RelaxNG.new( rng )
no_error( validator, %q{} )
@@ -143,7 +143,7 @@ def test_zero_or_more
end
def test_one_or_more
- rng = %q{
+ rng = <<-XML
@@ -154,7 +154,7 @@ def test_one_or_more
- }
+ XML
validator = REXML::Validation::RelaxNG.new( rng )
error( validator, %q{} )
@@ -165,13 +165,13 @@ def test_one_or_more
end
def test_attribute
- rng = %q{
+ rng = <<-XML
- }
+ XML
validator = REXML::Validation::RelaxNG.new( rng )
error( validator, %q{} )
@@ -181,7 +181,7 @@ def test_attribute
end
def test_choice_attributes
- rng = %q{
+ rng = <<-XML
@@ -189,7 +189,7 @@ def test_choice_attributes
- }
+ XML
validator = REXML::Validation::RelaxNG.new( rng )
error( validator, %q{} )
@@ -199,7 +199,7 @@ def test_choice_attributes
end
def test_choice_attribute_element
- rng = %q{
+ rng = <<-XML
@@ -207,7 +207,7 @@ def test_choice_attribute_element
- }
+ XML
validator = REXML::Validation::RelaxNG.new( rng )
error( validator, %q{} )
@@ -217,12 +217,12 @@ def test_choice_attribute_element
end
def test_empty
- rng = %q{
+ rng = <<-XML
- }
+ XML
validator = REXML::Validation::RelaxNG.new( rng )
error( validator, %q{} )
@@ -231,12 +231,12 @@ def test_empty
end
def test_text_val
- rng = %q{
+ rng = <<-XML
- }
+ XML
validator = REXML::Validation::RelaxNG.new( rng )
error( validator, %q{} )
@@ -245,7 +245,7 @@ def test_text_val
end
def test_choice_text
- rng = %q{
+ rng = <<-XML
@@ -253,7 +253,7 @@ def test_choice_text
- }
+ XML
validator = REXML::Validation::RelaxNG.new( rng )
error( validator, %q{Text} )
@@ -263,7 +263,7 @@ def test_choice_text
end
def test_group
- rng = %q{
+ rng = <<-XML
@@ -274,7 +274,7 @@ def test_group
- }
+ XML
validator = REXML::Validation::RelaxNG.new( rng )
error( validator, %q{} )
@@ -282,7 +282,7 @@ def test_group
no_error( validator, %q{} )
no_error( validator, %q{} )
- rng = %q{
+ rng = <<-XML
@@ -291,7 +291,7 @@ def test_group
- }
+ XML
validator = REXML::Validation::RelaxNG.new( rng )
error( validator, %q{} )
@@ -302,14 +302,14 @@ def test_group
def test_value
# Values as text nodes
- rng = %q{
+ rng = <<-XML
VaLuE
- }
+ XML
validator = REXML::Validation::RelaxNG.new( rng )
error( validator, %q{X} )
@@ -317,7 +317,7 @@ def test_value
no_error( validator, %q{VaLuE} )
# Values as text nodes, via choice
- rng = %q{
+ rng = <<-XML
@@ -327,7 +327,7 @@ def test_value
- }
+ XML
validator = REXML::Validation::RelaxNG.new( rng )
error( validator, %q{} )
@@ -336,14 +336,14 @@ def test_value
no_error( validator, %q{Option 2} )
# Attribute values
- rng = %q{
+ rng = <<-XML
VaLuE
- }
+ XML
validator = REXML::Validation::RelaxNG.new( rng )
error( validator, %q{} )
@@ -352,7 +352,7 @@ def test_value
no_error( validator, %q{} )
# Attribute values via choice
- rng = %q{
+ rng = <<-XML
@@ -362,7 +362,7 @@ def test_value
- }
+ XML
validator = REXML::Validation::RelaxNG.new( rng )
error( validator, %q{} )
@@ -372,7 +372,7 @@ def test_value
end
def test_interleave
- rng = %q{
+ rng = <<-XML
@@ -383,7 +383,7 @@ def test_interleave
- }
+ XML
validator = REXML::Validation::RelaxNG.new( rng )
error( validator, %q{} )
@@ -396,7 +396,7 @@ def test_interleave
end
def test_mixed
- rng = %q{
+ rng = <<-XML
@@ -405,7 +405,7 @@ def test_mixed
- }
+ XML
validator = REXML::Validation::RelaxNG.new( rng )
no_error( validator, %q{Text} )
@@ -413,7 +413,7 @@ def test_mixed
end
def test_ref_sequence
- rng = %q{
+ rng = <<-XML
@@ -429,7 +429,7 @@ def test_ref_sequence
- }
+ XML
validator = REXML::Validation::RelaxNG.new( rng )
no_error( validator, %q{} )
@@ -437,7 +437,7 @@ def test_ref_sequence
end
def test_ref_choice
- rng = %q{
+ rng = <<-XML
@@ -453,7 +453,7 @@ def test_ref_choice
- }
+ XML
validator = REXML::Validation::RelaxNG.new( rng )
error( validator, %q{} )
@@ -461,7 +461,7 @@ def test_ref_choice
no_error( validator, %q{} )
no_error( validator, %q{} )
- rng = %q{
+ rng = <<-XML
@@ -477,7 +477,7 @@ def test_ref_choice
- }
+ XML
validator = REXML::Validation::RelaxNG.new( rng )
error( validator, %q{} )
@@ -485,7 +485,7 @@ def test_ref_choice
no_error( validator, %q{} )
no_error( validator, %q{} )
- rng = %q{
+ rng = <<-XML
@@ -502,7 +502,7 @@ def test_ref_choice
- }
+ XML
validator = REXML::Validation::RelaxNG.new( rng )
error( validator, %q{} )
@@ -513,7 +513,7 @@ def test_ref_choice
def test_ref_zero_plus
- rng = %q{
+ rng = <<-XML
@@ -530,7 +530,7 @@ def test_ref_zero_plus
- }
+ XML
validator = REXML::Validation::RelaxNG.new( rng )
error( validator, %q{} )
@@ -538,7 +538,7 @@ def test_ref_zero_plus
no_error( validator, %q{} )
no_error( validator, %q{} )
- rng = %q{
+ rng = <<-XML
@@ -555,7 +555,7 @@ def test_ref_zero_plus
- }
+ XML
validator = REXML::Validation::RelaxNG.new( rng )
error( validator, %q{} )
@@ -566,7 +566,7 @@ def test_ref_zero_plus
def test_ref_one_plus
- rng = %q{
+ rng = <<-XML
@@ -583,7 +583,7 @@ def test_ref_one_plus
- }
+ XML
validator = REXML::Validation::RelaxNG.new( rng )
error( validator, %q{} )
@@ -591,7 +591,7 @@ def test_ref_one_plus
no_error( validator, %q{} )
no_error( validator, %q{} )
- rng = %q{
+ rng = <<-XML
@@ -608,7 +608,7 @@ def test_ref_one_plus
- }
+ XML
validator = REXML::Validation::RelaxNG.new( rng )
error( validator, %q{} )
@@ -618,7 +618,7 @@ def test_ref_one_plus
end
def test_ref_interleave
- rng = %q{
+ rng = <<-XML
@@ -634,7 +634,7 @@ def test_ref_interleave
- }
+ XML
validator = REXML::Validation::RelaxNG.new( rng )
error( validator, %q{} )
@@ -643,7 +643,7 @@ def test_ref_interleave
no_error( validator, %q{} )
no_error( validator, %q{} )
- rng = %q{
+ rng = <<-XML
@@ -659,7 +659,7 @@ def test_ref_interleave
- }
+ XML
validator = REXML::Validation::RelaxNG.new( rng )
error( validator, %q{} )
@@ -668,7 +668,7 @@ def test_ref_interleave
no_error( validator, %q{} )
no_error( validator, %q{} )
- rng = %q{
+ rng = <<-XML
@@ -687,7 +687,7 @@ def test_ref_interleave
- }
+ XML
validator = REXML::Validation::RelaxNG.new( rng )
error( validator, %q{} )
@@ -698,7 +698,7 @@ def test_ref_interleave
end
def test_ref_recurse
- rng = %q{
+ rng = <<-XML
@@ -715,7 +715,7 @@ def test_ref_recurse
- }
+ XML
validator = REXML::Validation::RelaxNG.new( rng )
error( validator, %q{} )
@@ -724,7 +724,7 @@ def test_ref_recurse
end
def test_ref_optional
- rng = %q{
+ rng = <<-XML
@@ -740,7 +740,7 @@ def test_ref_optional
- }
+ XML
validator = REXML::Validation::RelaxNG.new( rng )
no_error( validator, %q{} )
@@ -748,7 +748,7 @@ def test_ref_optional
error( validator, %q{} )
error( validator, %q{} )
- rng = %q{
+ rng = <<-XML
@@ -764,7 +764,7 @@ def test_ref_optional
- }
+ XML
validator = REXML::Validation::RelaxNG.new( rng )
no_error( validator, %q{} )
diff --git a/test/test_xml_declaration.rb b/test/test_xml_declaration.rb
index 6db54bab..6a1f4df0 100644
--- a/test/test_xml_declaration.rb
+++ b/test/test_xml_declaration.rb
@@ -6,7 +6,7 @@
module REXMLTests
class TestXmlDeclaration < Test::Unit::TestCase
def setup
- xml = <<-XML
+ xml = <<~XML
diff --git a/test/xpath/test_base.rb b/test/xpath/test_base.rb
index 5156bbbe..1dacd69d 100644
--- a/test/xpath/test_base.rb
+++ b/test/xpath/test_base.rb
@@ -451,6 +451,46 @@ def test_following
# puts results
#end
+ def test_nested_predicates
+ doc = Document.new <<-EOF
+
+
+ ab
+ cd
+
+
+ ef
+ gh
+
+
+ hi
+
+
+ EOF
+
+ matches = XPath.match(doc, '(/div/div/test[0])').map(&:text)
+ assert_equal [], matches
+ matches = XPath.match(doc, '(/div/div/test[1])').map(&:text)
+ assert_equal ["ab", "ef", "hi"], matches
+ matches = XPath.match(doc, '(/div/div/test[2])').map(&:text)
+ assert_equal ["cd", "gh"], matches
+ matches = XPath.match(doc, '(/div/div/test[3])').map(&:text)
+ assert_equal [], matches
+
+ matches = XPath.match(doc, '(/div/div/test[1])[1]').map(&:text)
+ assert_equal ["ab"], matches
+ matches = XPath.match(doc, '(/div/div/test[1])[2]').map(&:text)
+ assert_equal ["ef"], matches
+ matches = XPath.match(doc, '(/div/div/test[1])[3]').map(&:text)
+ assert_equal ["hi"], matches
+ matches = XPath.match(doc, '(/div/div/test[2])[1]').map(&:text)
+ assert_equal ["cd"], matches
+ matches = XPath.match(doc, '(/div/div/test[2])[2]').map(&:text)
+ assert_equal ["gh"], matches
+ matches = XPath.match(doc, '(/div/div/test[2])[3]').map(&:text)
+ assert_equal [], matches
+ end
+
# Contributed by Mike Stok
def test_starts_with
source = <<-EOF
@@ -611,7 +651,7 @@ def test_comparisons
source = ""
doc = REXML::Document.new(source)
- # NOTE TO SER: check that number() is required
+ # NOTE: check that number() is required
assert_equal 2, REXML::XPath.match(doc, "//b[number(@id) > 1]").size
assert_equal 3, REXML::XPath.match(doc, "//b[number(@id) >= 1]").size
assert_equal 1, REXML::XPath.match(doc, "//b[number(@id) <= 1]").size
diff --git a/test/xpath/test_predicate.rb b/test/xpath/test_predicate.rb
index c8520712..278e3765 100644
--- a/test/xpath/test_predicate.rb
+++ b/test/xpath/test_predicate.rb
@@ -6,7 +6,7 @@
module REXMLTests
class TestXPathPredicate < Test::Unit::TestCase
include REXML
- SRC=<<-EOL
+ SRC=<<~EOL