diff --git a/.document b/.document new file mode 100644 index 0000000..1d07bdf --- /dev/null +++ b/.document @@ -0,0 +1,5 @@ +BSDL +COPYING +README.md +docs/ +lib/ diff --git a/.github/workflows/gh-pages.yml b/.github/workflows/gh-pages.yml index f0e6b5d..585eb8a 100644 --- a/.github/workflows/gh-pages.yml +++ b/.github/workflows/gh-pages.yml @@ -19,20 +19,20 @@ jobs: runs-on: ubuntu-latest steps: - name: Checkout - uses: actions/checkout@v4 + uses: actions/checkout@v5 - name: Setup Ruby - uses: ruby/setup-ruby@250fcd6a742febb1123a77a841497ccaa8b9e939 # v1.152.0 + uses: ruby/setup-ruby@32110d4e311bd8996b2a82bf2a43b714ccc91777 # v1.221.0 with: - ruby-version: '3.3' + ruby-version: '3.4' bundler-cache: true - name: Setup Pages id: pages uses: actions/configure-pages@v5 - name: Build with RDoc # Outputs to the './_site' directory by default - run: bundle exec rake rdoc + run: rake rdoc - name: Upload artifact - uses: actions/upload-pages-artifact@v3 + uses: actions/upload-pages-artifact@v4 deploy: environment: diff --git a/.github/workflows/push_gem.yml b/.github/workflows/push_gem.yml index e15d54b..c6b1e47 100644 --- a/.github/workflows/push_gem.yml +++ b/.github/workflows/push_gem.yml @@ -23,11 +23,11 @@ jobs: steps: - name: Harden Runner - uses: step-security/harden-runner@91182cccc01eb5e619899d80e4e971d6181294a7 # v2.10.1 + uses: step-security/harden-runner@f4a75cfd619ee5ce8d5b864b0d183aff3c69b55a # v2.13.1 with: egress-policy: audit - - uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b # v4.1.4 + - uses: actions/checkout@ff7abcd0c3c05ccf6adc123a8cd1fd4fb30fb493 # v4.1.4 - name: Set up Ruby uses: ruby/setup-ruby@a6e6f86333f0a2523ece813039b8b4be04560854 # v1.190.0 @@ -36,11 +36,11 @@ jobs: ruby-version: ruby - name: Publish to RubyGems - uses: rubygems/release-gem@612653d273a73bdae1df8453e090060bb4db5f31 # v1 + uses: rubygems/release-gem@a25424ba2ba8b387abc8ef40807c2c85b96cbe32 # v1.1.1 - name: Create GitHub release run: | tag_name="$(git describe --tags --abbrev=0)" gh release create "${tag_name}" --verify-tag --generate-notes env: - GITHUB_TOKEN: ${{ secrets.MATZBOT_GITHUB_WORKFLOW_TOKEN }} + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index ee9b2a5..ba860e8 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -20,7 +20,7 @@ jobs: os: macos-latest runs-on: ${{ matrix.os }} steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@v5 - name: Set up Ruby uses: ruby/setup-ruby@v1 with: diff --git a/.rdoc_options b/.rdoc_options new file mode 100644 index 0000000..cd5e496 --- /dev/null +++ b/.rdoc_options @@ -0,0 +1,5 @@ +main_page: README.md +op_dir: _site +warn_missing_rdoc_ref: true +title: URI Documentation +visibility: :private diff --git a/Gemfile b/Gemfile index ba909e8..6669eae 100644 --- a/Gemfile +++ b/Gemfile @@ -6,5 +6,5 @@ group :development do gem "bundler" gem "rake" gem "test-unit" - gem "test-unit-ruby-core" + gem "test-unit-ruby-core", ">= 1.0.7" end diff --git a/README.md b/README.md index 3775f3b..5c7c0dd 100644 --- a/README.md +++ b/README.md @@ -3,7 +3,7 @@ [![CI](https://github.com/ruby/uri/actions/workflows/test.yml/badge.svg)](https://github.com/ruby/uri/actions/workflows/test.yml) [![Yard Docs](https://img.shields.io/badge/docs-exist-blue.svg)](https://ruby.github.io/uri/) -URI is a module providing classes to handle Uniform Resource Identifiers [RFC2396](http://tools.ietf.org/html/rfc2396). +URI is a module providing classes to handle Uniform Resource Identifiers [RFC3986](http://tools.ietf.org/html/rfc3986). ## Features diff --git a/Rakefile b/Rakefile index 19de93e..3935fdf 100644 --- a/Rakefile +++ b/Rakefile @@ -11,7 +11,7 @@ require "rdoc/task" RDoc::Task.new do |doc| doc.main = "README.md" doc.title = "URI - handle Uniform Resource Identifiers" - doc.rdoc_files = FileList.new %w[lib README.md LICENSE.txt] + doc.rdoc_files = FileList.new %w[lib README.md BSDL COPYING] doc.rdoc_dir = "_site" # for github pages end diff --git a/docs/kernel.rb b/docs/kernel.rb new file mode 100644 index 0000000..68ed335 --- /dev/null +++ b/docs/kernel.rb @@ -0,0 +1,2 @@ +# :stopdoc: +module Kernel end diff --git a/lib/uri/common.rb b/lib/uri/common.rb index cf93fb1..baa0fd2 100644 --- a/lib/uri/common.rb +++ b/lib/uri/common.rb @@ -13,15 +13,19 @@ require_relative "rfc3986_parser" module URI + # The default parser instance for RFC 2396. RFC2396_PARSER = RFC2396_Parser.new Ractor.make_shareable(RFC2396_PARSER) if defined?(Ractor) + # The default parser instance for RFC 3986. RFC3986_PARSER = RFC3986_Parser.new Ractor.make_shareable(RFC3986_PARSER) if defined?(Ractor) + # The default parser instance. DEFAULT_PARSER = RFC3986_PARSER Ractor.make_shareable(DEFAULT_PARSER) if defined?(Ractor) + # Set the default parser instance. def self.parser=(parser = RFC3986_PARSER) remove_const(:Parser) if defined?(::URI::Parser) const_set("Parser", parser.class) @@ -31,23 +35,24 @@ def self.parser=(parser = RFC3986_PARSER) if Parser == RFC2396_Parser const_set("REGEXP", URI::RFC2396_REGEXP) const_set("PATTERN", URI::RFC2396_REGEXP::PATTERN) - Parser.new.pattern.each_pair do |sym, str| - unless REGEXP::PATTERN.const_defined?(sym) - REGEXP::PATTERN.const_set(sym, str) - end - end end Parser.new.regexp.each_pair do |sym, str| - remove_const(sym) if const_defined?(sym) + remove_const(sym) if const_defined?(sym, false) const_set(sym, str) end end self.parser = RFC3986_PARSER - def self.const_missing(const) - if value = RFC2396_PARSER.regexp[const] - warn "URI::#{const} is obsolete. Use RFC2396_PARSER.regexp[#{const.inspect}] explicitly.", uplevel: 1 if $VERBOSE + def self.const_missing(const) # :nodoc: + if const == :REGEXP + warn "URI::REGEXP is obsolete. Use URI::RFC2396_REGEXP explicitly.", uplevel: 1 if $VERBOSE + URI::RFC2396_REGEXP + elsif value = RFC2396_PARSER.regexp[const] + warn "URI::#{const} is obsolete. Use URI::RFC2396_PARSER.regexp[#{const.inspect}] explicitly.", uplevel: 1 if $VERBOSE + value + elsif value = RFC2396_Parser.const_get(const) + warn "URI::#{const} is obsolete. Use URI::RFC2396_Parser::#{const} explicitly.", uplevel: 1 if $VERBOSE value else super @@ -86,7 +91,41 @@ def make_components_hash(klass, array_hash) module_function :make_components_hash end - module Schemes + module Schemes # :nodoc: + class << self + ReservedChars = ".+-" + EscapedChars = "\u01C0\u01C1\u01C2" + # Use Lo category chars as escaped chars for TruffleRuby, which + # does not allow Symbol categories as identifiers. + + def escape(name) + unless name and name.ascii_only? + return nil + end + name.upcase.tr(ReservedChars, EscapedChars) + end + + def unescape(name) + name.tr(EscapedChars, ReservedChars).encode(Encoding::US_ASCII).upcase + end + + def find(name) + const_get(name, false) if name and const_defined?(name, false) + end + + def register(name, klass) + unless scheme = escape(name) + raise ArgumentError, "invalid character as scheme - #{name}" + end + const_set(scheme, klass) + end + + def list + constants.map { |name| + [unescape(name.to_s), const_get(name)] + }.to_h + end + end end private_constant :Schemes @@ -99,7 +138,7 @@ module Schemes # Note that after calling String#upcase on +scheme+, it must be a valid # constant name. def self.register_scheme(scheme, klass) - Schemes.const_set(scheme.to_s.upcase, klass) + Schemes.register(scheme, klass) end # Returns a hash of the defined schemes: @@ -117,14 +156,14 @@ def self.register_scheme(scheme, klass) # # Related: URI.register_scheme. def self.scheme_list - Schemes.constants.map { |name| - [name.to_s.upcase, Schemes.const_get(name)] - }.to_h + Schemes.list end + # :stopdoc: INITIAL_SCHEMES = scheme_list private_constant :INITIAL_SCHEMES Ractor.make_shareable(INITIAL_SCHEMES) if defined?(Ractor) + # :startdoc: # Returns a new object constructed from the given +scheme+, +arguments+, # and +default+: @@ -143,12 +182,10 @@ def self.scheme_list # # => # # def self.for(scheme, *arguments, default: Generic) - const_name = scheme.to_s.upcase + const_name = Schemes.escape(scheme) uri_class = INITIAL_SCHEMES[const_name] - uri_class ||= if /\A[A-Z]\w*\z/.match?(const_name) && Schemes.const_defined?(const_name, false) - Schemes.const_get(const_name, false) - end + uri_class ||= Schemes.find(const_name) uri_class ||= default return uri_class.new(scheme, *arguments) @@ -304,7 +341,7 @@ def self.regexp(schemes = nil)# :nodoc: 256.times do |i| TBLENCWWWCOMP_[-i.chr] = -('%%%02X' % i) end - TBLENCURICOMP_ = TBLENCWWWCOMP_.dup.freeze + TBLENCURICOMP_ = TBLENCWWWCOMP_.dup.freeze # :nodoc: TBLENCWWWCOMP_[' '] = '+' TBLENCWWWCOMP_.freeze TBLDECWWWCOMP_ = {} # :nodoc: @@ -402,6 +439,8 @@ def self.decode_uri_component(str, enc=Encoding::UTF_8) _decode_uri_component(/%\h\h/, str, enc) end + # Returns a string derived from the given string +str+ with + # URI-encoded characters matching +regexp+ according to +table+. def self._encode_uri_component(regexp, table, str, enc) str = str.to_s.dup if str.encoding != Encoding::ASCII_8BIT @@ -416,6 +455,8 @@ def self._encode_uri_component(regexp, table, str, enc) end private_class_method :_encode_uri_component + # Returns a string decoding characters matching +regexp+ from the + # given \URL-encoded string +str+. def self._decode_uri_component(regexp, str, enc) raise ArgumentError, "invalid %-encoding (#{str})" if /%(?!\h\h)/.match?(str) str.b.gsub(regexp, TBLDECWWWCOMP_).force_encoding(enc) @@ -854,6 +895,7 @@ module Kernel # Returns a \URI object derived from the given +uri+, # which may be a \URI string or an existing \URI object: # + # require 'uri' # # Returns a new URI. # uri = URI('http://github.com/ruby/ruby') # # => # @@ -861,6 +903,8 @@ module Kernel # URI(uri) # # => # # + # You must require 'uri' to use this method. + # def URI(uri) if uri.is_a?(URI::Generic) uri diff --git a/lib/uri/file.rb b/lib/uri/file.rb index 940d361..47b5aef 100644 --- a/lib/uri/file.rb +++ b/lib/uri/file.rb @@ -47,7 +47,7 @@ class File < Generic # :path => '/ruby/src'}) # uri2.to_s # => "file://host.example.com/ruby/src" # - # uri3 = URI::File.build({:path => URI::escape('/path/my file.txt')}) + # uri3 = URI::File.build({:path => URI::RFC2396_PARSER.escape('/path/my file.txt')}) # uri3.to_s # => "file:///path/my%20file.txt" # def self.build(args) diff --git a/lib/uri/generic.rb b/lib/uri/generic.rb index d4bfa3b..d811c5b 100644 --- a/lib/uri/generic.rb +++ b/lib/uri/generic.rb @@ -73,7 +73,7 @@ def self.use_registry # :nodoc: # # At first, tries to create a new URI::Generic instance using # URI::Generic::build. But, if exception URI::InvalidComponentError is raised, - # then it does URI::Escape.escape all URI components and tries again. + # then it does URI::RFC2396_PARSER.escape all URI components and tries again. # def self.build2(args) begin @@ -126,9 +126,9 @@ def self.build(args) end end else - component = self.class.component rescue ::URI::Generic::COMPONENT + component = self.component rescue ::URI::Generic::COMPONENT raise ArgumentError, - "expected Array of or Hash of components of #{self.class} (#{component.join(', ')})" + "expected Array of or Hash of components of #{self} (#{component.join(', ')})" end tmp << nil @@ -284,7 +284,7 @@ def registry # :nodoc: # Returns the parser to be used. # - # Unless a URI::Parser is defined, DEFAULT_PARSER is used. + # Unless the +parser+ is defined, DEFAULT_PARSER is used. # def parser if !defined?(@parser) || !@parser @@ -315,7 +315,7 @@ def component end # - # Checks the scheme +v+ component against the URI::Parser Regexp for :SCHEME. + # Checks the scheme +v+ component against the +parser+ Regexp for :SCHEME. # def check_scheme(v) if v && parser.regexp[:SCHEME] !~ v @@ -385,7 +385,7 @@ def check_userinfo(user, password = nil) # # Checks the user +v+ component for RFC2396 compliance - # and against the URI::Parser Regexp for :USERINFO. + # and against the +parser+ Regexp for :USERINFO. # # Can not have a registry or opaque component defined, # with a user component defined. @@ -409,7 +409,7 @@ def check_user(v) # # Checks the password +v+ component for RFC2396 compliance - # and against the URI::Parser Regexp for :USERINFO. + # and against the +parser+ Regexp for :USERINFO. # # Can not have a registry or opaque component defined, # with a user component defined. @@ -586,7 +586,7 @@ def decoded_password # # Checks the host +v+ component for RFC2396 compliance - # and against the URI::Parser Regexp for :HOST. + # and against the +parser+ Regexp for :HOST. # # Can not have a registry or opaque component defined, # with a host component defined. @@ -675,7 +675,7 @@ def hostname=(v) # # Checks the port +v+ component for RFC2396 compliance - # and against the URI::Parser Regexp for :PORT. + # and against the +parser+ Regexp for :PORT. # # Can not have a registry or opaque component defined, # with a port component defined. @@ -737,18 +737,18 @@ def check_registry(v) # :nodoc: end private :check_registry - def set_registry(v) #:nodoc: + def set_registry(v) # :nodoc: raise InvalidURIError, "cannot set registry" end protected :set_registry - def registry=(v) + def registry=(v) # :nodoc: raise InvalidURIError, "cannot set registry" end # # Checks the path +v+ component for RFC2396 compliance - # and against the URI::Parser Regexp + # and against the +parser+ Regexp # for :ABS_PATH and :REL_PATH. # # Can not have a opaque component defined, @@ -853,7 +853,7 @@ def query=(v) # # Checks the opaque +v+ component for RFC2396 compliance and - # against the URI::Parser Regexp for :OPAQUE. + # against the +parser+ Regexp for :OPAQUE. # # Can not have a host, port, user, or path component defined, # with an opaque component defined. @@ -905,7 +905,7 @@ def opaque=(v) end # - # Checks the fragment +v+ component against the URI::Parser Regexp for :FRAGMENT. + # Checks the fragment +v+ component against the +parser+ Regexp for :FRAGMENT. # # # == Args @@ -1133,17 +1133,16 @@ def merge(oth) base.fragment=(nil) # RFC2396, Section 5.2, 4) - if !authority - base.set_path(merge_path(base.path, rel.path)) if base.path && rel.path - else - # RFC2396, Section 5.2, 4) - base.set_path(rel.path) if rel.path + if authority + base.set_userinfo(rel.userinfo) + base.set_host(rel.host) + base.set_port(rel.port || base.default_port) + base.set_path(rel.path) + elsif base.path && rel.path + base.set_path(merge_path(base.path, rel.path)) end # RFC2396, Section 5.2, 7) - base.set_userinfo(rel.userinfo) if rel.userinfo - base.set_host(rel.host) if rel.host - base.set_port(rel.port) if rel.port base.query = rel.query if rel.query base.fragment=(rel.fragment) if rel.fragment @@ -1392,10 +1391,12 @@ def ==(oth) end end + # Returns the hash value. def hash self.component_ary.hash end + # Compares with _oth_ for Hash. def eql?(oth) self.class == oth.class && parser == oth.parser && @@ -1438,7 +1439,7 @@ def select(*components) end end - def inspect + def inspect # :nodoc: "#<#{self.class} #{self}>" end diff --git a/lib/uri/http.rb b/lib/uri/http.rb index 900b132..3c41cd4 100644 --- a/lib/uri/http.rb +++ b/lib/uri/http.rb @@ -61,6 +61,18 @@ def self.build(args) super(tmp) end + # Do not allow empty host names, as they are not allowed by RFC 3986. + def check_host(v) + ret = super + + if ret && v.empty? + raise InvalidComponentError, + "bad component(expected host component): #{v}" + end + + ret + end + # # == Description # diff --git a/lib/uri/mailto.rb b/lib/uri/mailto.rb index cb8024f..f747b79 100644 --- a/lib/uri/mailto.rb +++ b/lib/uri/mailto.rb @@ -52,7 +52,11 @@ class MailTo < Generic HEADER_REGEXP = /\A(?(?:%\h\h|[!$'-.0-;@-Z_a-z~])*=(?:%\h\h|[!$'-.0-;@-Z_a-z~])*)(?:&\g)*\z/ # practical regexp for email address # https://html.spec.whatwg.org/multipage/input.html#valid-e-mail-address - EMAIL_REGEXP = /\A[a-zA-Z0-9.!\#$%&'*+\/=?^_`{|}~-]+@[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(?:\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*\z/ + EMAIL_REGEXP = %r[\A#{ + atext = %q[(?:[a-zA-Z0-9!\#$%&'*+\/=?^_`{|}~-]+)] + }(?:\.#{atext})*@#{ + label = %q[(?:[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)] + }(?:\.#{label})*\z] # :startdoc: # diff --git a/lib/uri/rfc2396_parser.rb b/lib/uri/rfc2396_parser.rb index a56ca34..cefd126 100644 --- a/lib/uri/rfc2396_parser.rb +++ b/lib/uri/rfc2396_parser.rb @@ -67,7 +67,7 @@ class RFC2396_Parser # # == Synopsis # - # URI::Parser.new([opts]) + # URI::RFC2396_Parser.new([opts]) # # == Args # @@ -86,7 +86,7 @@ class RFC2396_Parser # # == Examples # - # p = URI::Parser.new(:ESCAPED => "(?:%[a-fA-F0-9]{2}|%u[a-fA-F0-9]{4})") + # p = URI::RFC2396_Parser.new(:ESCAPED => "(?:%[a-fA-F0-9]{2}|%u[a-fA-F0-9]{4})") # u = p.parse("http://example.jp/%uABCD") #=> # # URI.parse(u.to_s) #=> raises URI::InvalidURIError # @@ -108,12 +108,12 @@ def initialize(opts = {}) # The Hash of patterns. # - # See also URI::Parser.initialize_pattern. + # See also #initialize_pattern. attr_reader :pattern # The Hash of Regexp. # - # See also URI::Parser.initialize_regexp. + # See also #initialize_regexp. attr_reader :regexp # Returns a split URI against +regexp[:ABS_URI]+. @@ -202,8 +202,7 @@ def split(uri) # # == Usage # - # p = URI::Parser.new - # p.parse("ldap://ldap.example.com/dc=example?user=john") + # URI::RFC2396_PARSER.parse("ldap://ldap.example.com/dc=example?user=john") # #=> # # def parse(uri) @@ -244,7 +243,7 @@ def join(*uris) # If no +block+ given, then returns the result, # else it calls +block+ for each element in result. # - # See also URI::Parser.make_regexp. + # See also #make_regexp. # def extract(str, schemes = nil) if block_given? @@ -263,7 +262,7 @@ def make_regexp(schemes = nil) unless schemes @regexp[:ABS_URI_REF] else - /(?=#{Regexp.union(*schemes)}:)#{@pattern[:X_ABS_URI]}/x + /(?=(?i:#{Regexp.union(*schemes).source}):)#{@pattern[:X_ABS_URI]}/x end end @@ -321,14 +320,14 @@ def unescape(str, escaped = @regexp[:ESCAPED]) str.gsub(escaped) { [$&[1, 2]].pack('H2').force_encoding(enc) } end - @@to_s = Kernel.instance_method(:to_s) - if @@to_s.respond_to?(:bind_call) - def inspect - @@to_s.bind_call(self) + TO_S = Kernel.instance_method(:to_s) # :nodoc: + if TO_S.respond_to?(:bind_call) + def inspect # :nodoc: + TO_S.bind_call(self) end else - def inspect - @@to_s.bind(self).call + def inspect # :nodoc: + TO_S.bind(self).call end end @@ -524,6 +523,8 @@ def initialize_regexp(pattern) ret end + # Returns +uri+ as-is if it is URI, or convert it to URI if it is + # a String. def convert_to_uri(uri) if uri.is_a?(URI::Generic) uri @@ -536,4 +537,11 @@ def convert_to_uri(uri) end end # class Parser + + # Backward compatibility for URI::REGEXP::PATTERN::* + RFC2396_Parser.new.pattern.each_pair do |sym, str| + unless RFC2396_REGEXP::PATTERN.const_defined?(sym, false) + RFC2396_REGEXP::PATTERN.const_set(sym, str) + end + end end # module URI diff --git a/lib/uri/rfc3986_parser.rb b/lib/uri/rfc3986_parser.rb index 4000f13..0b5f0c4 100644 --- a/lib/uri/rfc3986_parser.rb +++ b/lib/uri/rfc3986_parser.rb @@ -142,25 +142,25 @@ def join(*uris) # :nodoc: # Compatibility for RFC2396 parser def extract(str, schemes = nil, &block) # :nodoc: - warn "URI::RFC3986_PARSER.extract is obsoleted. Use URI::RFC2396_PARSER.extract explicitly.", uplevel: 1 if $VERBOSE + warn "URI::RFC3986_PARSER.extract is obsolete. Use URI::RFC2396_PARSER.extract explicitly.", uplevel: 1 if $VERBOSE RFC2396_PARSER.extract(str, schemes, &block) end # Compatibility for RFC2396 parser def make_regexp(schemes = nil) # :nodoc: - warn "URI::RFC3986_PARSER.make_regexp is obsoleted. Use URI::RFC2396_PARSER.make_regexp explicitly.", uplevel: 1 if $VERBOSE + warn "URI::RFC3986_PARSER.make_regexp is obsolete. Use URI::RFC2396_PARSER.make_regexp explicitly.", uplevel: 1 if $VERBOSE RFC2396_PARSER.make_regexp(schemes) end # Compatibility for RFC2396 parser def escape(str, unsafe = nil) # :nodoc: - warn "URI::RFC3986_PARSER.escape is obsoleted. Use URI::RFC2396_PARSER.escape explicitly.", uplevel: 1 if $VERBOSE + warn "URI::RFC3986_PARSER.escape is obsolete. Use URI::RFC2396_PARSER.escape explicitly.", uplevel: 1 if $VERBOSE unsafe ? RFC2396_PARSER.escape(str, unsafe) : RFC2396_PARSER.escape(str) end # Compatibility for RFC2396 parser def unescape(str, escaped = nil) # :nodoc: - warn "URI::RFC3986_PARSER.unescape is obsoleted. Use URI::RFC2396_PARSER.unescape explicitly.", uplevel: 1 if $VERBOSE + warn "URI::RFC3986_PARSER.unescape is obsolete. Use URI::RFC2396_PARSER.unescape explicitly.", uplevel: 1 if $VERBOSE escaped ? RFC2396_PARSER.unescape(str, escaped) : RFC2396_PARSER.unescape(str) end diff --git a/lib/uri/version.rb b/lib/uri/version.rb index c68c43a..b6a8ce1 100644 --- a/lib/uri/version.rb +++ b/lib/uri/version.rb @@ -1,6 +1,6 @@ module URI # :stopdoc: - VERSION_CODE = '010000'.freeze + VERSION_CODE = '010003'.freeze VERSION = VERSION_CODE.scan(/../).collect{|n| n.to_i}.join('.').freeze # :startdoc: end diff --git a/test/uri/test_common.rb b/test/uri/test_common.rb index bccdeaf..1291366 100644 --- a/test/uri/test_common.rb +++ b/test/uri/test_common.rb @@ -10,13 +10,23 @@ def setup def teardown end + EnvUtil.suppress_warning do + class Foo + # Intentionally use `URI::REGEXP`, which is for the compatibility + include URI::REGEXP::PATTERN + end + end + def test_fallback_constants - orig_verbose = $VERBOSE - $VERBOSE = nil - assert URI::ABS_URI - assert_raise(NameError) { URI::FOO } - ensure - $VERBOSE = orig_verbose + EnvUtil.suppress_warning do + assert_raise(NameError) { URI::FOO } + + assert_equal URI::ABS_URI, URI::RFC2396_PARSER.regexp[:ABS_URI] + assert_equal URI::PATTERN, URI::RFC2396_Parser::PATTERN + assert_equal URI::REGEXP, URI::RFC2396_REGEXP + assert_equal URI::REGEXP::PATTERN, URI::RFC2396_REGEXP::PATTERN + assert_equal Foo::IPV4ADDR, URI::RFC2396_REGEXP::PATTERN::IPV4ADDR + end end def test_parser_switch @@ -30,6 +40,7 @@ def test_parser_switch assert defined?(URI::REGEXP) assert defined?(URI::PATTERN) assert defined?(URI::PATTERN::ESCAPED) + assert defined?(URI::REGEXP::PATTERN::IPV6ADDR) URI.parser = URI::RFC3986_PARSER @@ -64,7 +75,7 @@ def test_ractor return unless defined?(Ractor) assert_ractor(<<~RUBY, require: 'uri') r = Ractor.new { URI.parse("https://ruby-lang.org/").inspect } - assert_equal(URI.parse("https://ruby-lang.org/").inspect, r.take) + assert_equal(URI.parse("https://ruby-lang.org/").inspect, r.value) RUBY end @@ -102,17 +113,18 @@ def test_register_scheme_lowercase def test_register_scheme_with_symbols # Valid schemes from https://www.iana.org/assignments/uri-schemes/uri-schemes.xhtml - some_uri_class = Class.new(URI::Generic) - assert_raise(NameError) { URI.register_scheme 'ms-search', some_uri_class } - assert_raise(NameError) { URI.register_scheme 'microsoft.windows.camera', some_uri_class } - assert_raise(NameError) { URI.register_scheme 'coaps+ws', some_uri_class } + list = [] + %w[ms-search microsoft.windows.camera coaps+ws].each {|name| + list << [name, URI.register_scheme(name, Class.new(URI::Generic))] + } - ms_search_class = Class.new(URI::Generic) - URI.register_scheme 'MS_SEARCH', ms_search_class - begin - assert_equal URI::Generic, URI.parse('ms-search://localhost').class - ensure - URI.const_get(:Schemes).send(:remove_const, :MS_SEARCH) + list.each do |scheme, uri_class| + assert_equal uri_class, URI.parse("#{scheme}://localhost").class + end + ensure + schemes = URI.const_get(:Schemes) + list.each do |scheme, | + schemes.send(:remove_const, schemes.escape(scheme)) end end diff --git a/test/uri/test_ftp.rb b/test/uri/test_ftp.rb index f45bb06..3ad7864 100644 --- a/test/uri/test_ftp.rb +++ b/test/uri/test_ftp.rb @@ -33,11 +33,11 @@ def test_paths # If you think what's below is wrong, please read RubyForge bug 2055, # RFC 1738 section 3.2.2, and RFC 2396. u = URI.parse('ftp://ftp.example.com/foo/bar/file.ext') - assert(u.path == 'foo/bar/file.ext') + assert_equal('foo/bar/file.ext', u.path) u = URI.parse('ftp://ftp.example.com//foo/bar/file.ext') - assert(u.path == '/foo/bar/file.ext') + assert_equal('/foo/bar/file.ext', u.path) u = URI.parse('ftp://ftp.example.com/%2Ffoo/bar/file.ext') - assert(u.path == '/foo/bar/file.ext') + assert_equal('/foo/bar/file.ext', u.path) end def test_assemble @@ -45,8 +45,8 @@ def test_assemble # assuming everyone else has implemented RFC 2396. uri = URI::FTP.build(['user:password', 'ftp.example.com', nil, '/path/file.zip', 'i']) - assert(uri.to_s == - 'ftp://user:password@ftp.example.com/%2Fpath/file.zip;type=i') + assert_equal('ftp://user:password@ftp.example.com/%2Fpath/file.zip;type=i', + uri.to_s) end def test_select diff --git a/test/uri/test_generic.rb b/test/uri/test_generic.rb index 8209363..c725116 100644 --- a/test/uri/test_generic.rb +++ b/test/uri/test_generic.rb @@ -175,6 +175,17 @@ def test_parse # must be empty string to identify as path-abempty, not path-absolute assert_equal('', url.host) assert_equal('http:////example.com', url.to_s) + + # sec-2957667 + url = URI.parse('http://user:pass@example.com').merge('//example.net') + assert_equal('http://example.net', url.to_s) + assert_nil(url.userinfo) + url = URI.join('http://user:pass@example.com', '//example.net') + assert_equal('http://example.net', url.to_s) + assert_nil(url.userinfo) + url = URI.parse('http://user:pass@example.com') + '//example.net' + assert_equal('http://example.net', url.to_s) + assert_nil(url.userinfo) end def test_parse_scheme_with_symbols @@ -229,9 +240,9 @@ def test_merge u = URI.parse('http://foo/bar/baz') assert_equal(nil, u.merge!("")) assert_equal(nil, u.merge!(u)) - assert(nil != u.merge!(".")) + refute_nil(u.merge!(".")) assert_equal('http://foo/bar/', u.to_s) - assert(nil != u.merge!("../baz")) + refute_nil(u.merge!("../baz")) assert_equal('http://foo/baz', u.to_s) url = URI.parse('http://a/b//c') + 'd//e' @@ -267,6 +278,13 @@ def test_merge assert_equal(u0, u1) end + def test_merge_authority + u = URI.parse('http://user:pass@example.com:8080') + u0 = URI.parse('http://new.example.org/path') + u1 = u.merge('//new.example.org/path') + assert_equal(u0, u1) + end + def test_route url = URI.parse('http://hoge/a.html').route_to('http://hoge/b.html') assert_equal('b.html', url.to_s) @@ -338,7 +356,7 @@ def test_rfc3986_examples assert_equal('http://a/b/c/g', url.to_s) url = @base_url.route_to('http://a/b/c/g') assert_kind_of(URI::Generic, url) - assert('./g' != url.to_s) # ok + refute_equal('./g', url.to_s) # ok assert_equal('g', url.to_s) # http://a/b/c/d;p?q @@ -357,7 +375,7 @@ def test_rfc3986_examples assert_equal('http://a/g', url.to_s) url = @base_url.route_to('http://a/g') assert_kind_of(URI::Generic, url) - assert('/g' != url.to_s) # ok + refute_equal('/g', url.to_s) # ok assert_equal('../../g', url.to_s) # http://a/b/c/d;p?q @@ -448,7 +466,7 @@ def test_rfc3986_examples assert_equal('http://a/b/c/', url.to_s) url = @base_url.route_to('http://a/b/c/') assert_kind_of(URI::Generic, url) - assert('.' != url.to_s) # ok + refute_equal('.', url.to_s) # ok assert_equal('./', url.to_s) # http://a/b/c/d;p?q @@ -467,7 +485,7 @@ def test_rfc3986_examples assert_equal('http://a/b/', url.to_s) url = @base_url.route_to('http://a/b/') assert_kind_of(URI::Generic, url) - assert('..' != url.to_s) # ok + refute_equal('..', url.to_s) # ok assert_equal('../', url.to_s) # http://a/b/c/d;p?q @@ -495,7 +513,7 @@ def test_rfc3986_examples assert_equal('http://a/', url.to_s) url = @base_url.route_to('http://a/') assert_kind_of(URI::Generic, url) - assert('../..' != url.to_s) # ok + refute_equal('../..', url.to_s) # ok assert_equal('../../', url.to_s) # http://a/b/c/d;p?q @@ -586,7 +604,7 @@ def test_rfc3986_examples assert_equal('http://a/g', url.to_s) url = @base_url.route_to('http://a/g') assert_kind_of(URI::Generic, url) - assert('../../../g' != url.to_s) # ok? yes, it confuses you + refute_equal('../../../g', url.to_s) # ok? yes, it confuses you assert_equal('../../g', url.to_s) # and it is clearly # http://a/b/c/d;p?q @@ -596,7 +614,7 @@ def test_rfc3986_examples assert_equal('http://a/g', url.to_s) url = @base_url.route_to('http://a/g') assert_kind_of(URI::Generic, url) - assert('../../../../g' != url.to_s) # ok? yes, it confuses you + refute_equal('../../../../g', url.to_s) # ok? yes, it confuses you assert_equal('../../g', url.to_s) # and it is clearly # http://a/b/c/d;p?q @@ -606,7 +624,7 @@ def test_rfc3986_examples assert_equal('http://a/b/g', url.to_s) url = @base_url.route_to('http://a/b/g') assert_kind_of(URI::Generic, url) - assert('./../g' != url.to_s) # ok + refute_equal('./../g', url.to_s) # ok assert_equal('../g', url.to_s) # http://a/b/c/d;p?q @@ -616,7 +634,7 @@ def test_rfc3986_examples assert_equal('http://a/b/c/g/', url.to_s) url = @base_url.route_to('http://a/b/c/g/') assert_kind_of(URI::Generic, url) - assert('./g/.' != url.to_s) # ok + refute_equal('./g/.', url.to_s) # ok assert_equal('g/', url.to_s) # http://a/b/c/d;p?q @@ -626,7 +644,7 @@ def test_rfc3986_examples assert_equal('http://a/b/c/g/h', url.to_s) url = @base_url.route_to('http://a/b/c/g/h') assert_kind_of(URI::Generic, url) - assert('g/./h' != url.to_s) # ok + refute_equal('g/./h', url.to_s) # ok assert_equal('g/h', url.to_s) # http://a/b/c/d;p?q @@ -636,7 +654,7 @@ def test_rfc3986_examples assert_equal('http://a/b/c/h', url.to_s) url = @base_url.route_to('http://a/b/c/h') assert_kind_of(URI::Generic, url) - assert('g/../h' != url.to_s) # ok + refute_equal('g/../h', url.to_s) # ok assert_equal('h', url.to_s) # http://a/b/c/d;p?q @@ -646,7 +664,7 @@ def test_rfc3986_examples assert_equal('http://a/b/c/g;x=1/y', url.to_s) url = @base_url.route_to('http://a/b/c/g;x=1/y') assert_kind_of(URI::Generic, url) - assert('g;x=1/./y' != url.to_s) # ok + refute_equal('g;x=1/./y', url.to_s) # ok assert_equal('g;x=1/y', url.to_s) # http://a/b/c/d;p?q @@ -656,7 +674,7 @@ def test_rfc3986_examples assert_equal('http://a/b/c/y', url.to_s) url = @base_url.route_to('http://a/b/c/y') assert_kind_of(URI::Generic, url) - assert('g;x=1/../y' != url.to_s) # ok + refute_equal('g;x=1/../y', url.to_s) # ok assert_equal('y', url.to_s) # http://a/b/c/d;p?q @@ -804,18 +822,18 @@ def test_hierarchical hierarchical = URI.parse('http://a.b.c/example') opaque = URI.parse('mailto:mduerst@ifi.unizh.ch') - assert hierarchical.hierarchical? - refute opaque.hierarchical? + assert_predicate hierarchical, :hierarchical? + refute_predicate opaque, :hierarchical? end def test_absolute abs_uri = URI.parse('http://a.b.c/') not_abs = URI.parse('a.b.c') - refute not_abs.absolute? + refute_predicate not_abs, :absolute? - assert abs_uri.absolute - assert abs_uri.absolute? + assert_predicate abs_uri, :absolute + assert_predicate abs_uri, :absolute? end def test_ipv6 @@ -828,8 +846,10 @@ def test_ipv6 assert_equal("http://[::1]/bar", u.to_s) u.hostname = "::1" assert_equal("http://[::1]/bar", u.to_s) - u.hostname = "" - assert_equal("http:///bar", u.to_s) + + u = URI("file://foo/bar") + u.hostname = '' + assert_equal("file:///bar", u.to_s) end def test_build @@ -850,6 +870,19 @@ def test_build assert_equal("http://[::1]/bar/baz", u.to_s) assert_equal("[::1]", u.host) assert_equal("::1", u.hostname) + + assert_raise_with_message(ArgumentError, /URI::Generic/) { + URI::Generic.build(nil) + } + + c = Class.new(URI::Generic) do + def self.component; raise; end + end + expected = /\(#{URI::Generic::COMPONENT.join(', ')}\)/ + message = "fallback to URI::Generic::COMPONENT if component raised" + assert_raise_with_message(ArgumentError, expected, message) { + c.build(nil) + } end def test_build2 diff --git a/test/uri/test_http.rb b/test/uri/test_http.rb index e937b1a..8816d20 100644 --- a/test/uri/test_http.rb +++ b/test/uri/test_http.rb @@ -19,6 +19,10 @@ def test_build assert_kind_of(URI::HTTP, u) end + def test_build_empty_host + assert_raise(URI::InvalidComponentError) { URI::HTTP.build(host: '') } + end + def test_parse u = URI.parse('http://a') assert_kind_of(URI::HTTP, u) @@ -33,19 +37,19 @@ def test_normalize host = 'aBcD' u1 = URI.parse('http://' + host + '/eFg?HiJ') u2 = URI.parse('http://' + host.downcase + '/eFg?HiJ') - assert(u1.normalize.host == 'abcd') - assert(u1.normalize.path == u1.path) - assert(u1.normalize == u2.normalize) - assert(!u1.normalize.host.equal?(u1.host)) - assert( u2.normalize.host.equal?(u2.host)) + assert_equal('abcd', u1.normalize.host) + assert_equal(u1.path, u1.normalize.path) + assert_equal(u2.normalize, u1.normalize) + refute_same(u1.host, u1.normalize.host) + assert_same(u2.host, u2.normalize.host) assert_equal('http://abc/', URI.parse('http://abc').normalize.to_s) end def test_equal - assert(URI.parse('http://abc') == URI.parse('http://ABC')) - assert(URI.parse('http://abc/def') == URI.parse('http://ABC/def')) - assert(URI.parse('http://abc/def') != URI.parse('http://ABC/DEF')) + assert_equal(URI.parse('http://ABC'), URI.parse('http://abc')) + assert_equal(URI.parse('http://ABC/def'), URI.parse('http://abc/def')) + refute_equal(URI.parse('http://ABC/DEF'), URI.parse('http://abc/def')) end def test_request_uri diff --git a/test/uri/test_mailto.rb b/test/uri/test_mailto.rb index e7d3142..59bb5de 100644 --- a/test/uri/test_mailto.rb +++ b/test/uri/test_mailto.rb @@ -141,6 +141,11 @@ def test_initializer def test_check_to u = URI::MailTo.build(['joe@example.com', 'subject=Ruby']) + # Valid emails + u.to = 'a@valid.com' + assert_equal(u.to, 'a@valid.com') + + # Invalid emails assert_raise(URI::InvalidComponentError) do u.to = '#1@mail.com' end @@ -148,6 +153,79 @@ def test_check_to assert_raise(URI::InvalidComponentError) do u.to = '@invalid.email' end + + assert_raise(URI::InvalidComponentError) do + u.to = '.hello@invalid.email' + end + + assert_raise(URI::InvalidComponentError) do + u.to = 'hello.@invalid.email' + end + + assert_raise(URI::InvalidComponentError) do + u.to = 'n.@invalid.email' + end + + assert_raise(URI::InvalidComponentError) do + u.to = 'n..t@invalid.email' + end + + # Invalid host emails + assert_raise(URI::InvalidComponentError) do + u.to = 'a@.invalid.email' + end + + assert_raise(URI::InvalidComponentError) do + u.to = 'a@invalid.email.' + end + + assert_raise(URI::InvalidComponentError) do + u.to = 'a@invalid..email' + end + + assert_raise(URI::InvalidComponentError) do + u.to = 'a@-invalid.email' + end + + assert_raise(URI::InvalidComponentError) do + u.to = 'a@invalid-.email' + end + + assert_raise(URI::InvalidComponentError) do + u.to = 'a@invalid.-email' + end + + assert_raise(URI::InvalidComponentError) do + u.to = 'a@invalid.email-' + end + + u.to = 'a@'+'invalid'.ljust(63, 'd')+'.email' + assert_raise(URI::InvalidComponentError) do + u.to = 'a@'+'invalid'.ljust(64, 'd')+'.email' + end + + u.to = 'a@invalid.'+'email'.rjust(63, 'e') + assert_raise(URI::InvalidComponentError) do + u.to = 'a@invalid.'+'email'.rjust(64, 'e') + end + end + + def test_email_regexp + re = URI::MailTo::EMAIL_REGEXP + + repeat = 10 + longlabel = '.' + 'invalid'.ljust(63, 'd') + endlabel = '' + seq = (1..3).map {|i| 10**i} + rehearsal = 10 + pre = ->(n) {'a@invalid' + longlabel*(n) + endlabel} + assert_linear_performance(seq, rehearsal: rehearsal, pre: pre) do |to| + repeat.times {re =~ to or flunk} + end + endlabel = '.' + 'email'.rjust(64, 'd') + assert_linear_performance(seq, rehearsal: rehearsal, pre: pre) do |to| + repeat.times {re =~ to and flunk} + end end def test_to_s diff --git a/test/uri/test_parser.rb b/test/uri/test_parser.rb index f455a5c..c14824f 100644 --- a/test/uri/test_parser.rb +++ b/test/uri/test_parser.rb @@ -20,17 +20,17 @@ def test_compare u2 = p.parse(url) u3 = p.parse(url) - assert(u0 == u1) - assert(u0.eql?(u1)) - assert(!u0.equal?(u1)) + assert_equal(u1, u0) + assert_send([u0, :eql?, u1]) + refute_same(u1, u0) - assert(u1 == u2) - assert(!u1.eql?(u2)) - assert(!u1.equal?(u2)) + assert_equal(u2, u1) + assert_not_send([u1, :eql?, u2]) + refute_same(u1, u2) - assert(u2 == u3) - assert(u2.eql?(u3)) - assert(!u2.equal?(u3)) + assert_equal(u3, u2) + assert_send([u2, :eql?, u3]) + refute_same(u3, u2) end def test_parse_rfc2396_parser @@ -113,4 +113,12 @@ def test_rfc3986_port_check end end end + + def test_rfc2822_make_regexp + parser = URI::RFC2396_Parser.new + regexp = parser.make_regexp("HTTP") + assert_match(regexp, "HTTP://EXAMPLE.COM/") + assert_match(regexp, "http://example.com/") + refute_match(regexp, "https://example.com/") + end end diff --git a/test/uri/test_ws.rb b/test/uri/test_ws.rb index f3918f6..d63ebd4 100644 --- a/test/uri/test_ws.rb +++ b/test/uri/test_ws.rb @@ -31,19 +31,19 @@ def test_normalize host = 'aBcD' u1 = URI.parse('ws://' + host + '/eFg?HiJ') u2 = URI.parse('ws://' + host.downcase + '/eFg?HiJ') - assert(u1.normalize.host == 'abcd') - assert(u1.normalize.path == u1.path) - assert(u1.normalize == u2.normalize) - assert(!u1.normalize.host.equal?(u1.host)) - assert( u2.normalize.host.equal?(u2.host)) + assert_equal('abcd', u1.normalize.host) + assert_equal(u1.path, u1.normalize.path) + assert_equal(u2.normalize, u1.normalize) + refute_same(u1.host, u1.normalize.host) + assert_same(u2.host, u2.normalize.host) assert_equal('ws://abc/', URI.parse('ws://abc').normalize.to_s) end def test_equal - assert(URI.parse('ws://abc') == URI.parse('ws://ABC')) - assert(URI.parse('ws://abc/def') == URI.parse('ws://ABC/def')) - assert(URI.parse('ws://abc/def') != URI.parse('ws://ABC/DEF')) + assert_equal(URI.parse('ws://ABC'), URI.parse('ws://abc')) + assert_equal(URI.parse('ws://ABC/def'), URI.parse('ws://abc/def')) + refute_equal(URI.parse('ws://ABC/DEF'), URI.parse('ws://abc/def')) end def test_request_uri diff --git a/test/uri/test_wss.rb b/test/uri/test_wss.rb index 13a2583..cbef327 100644 --- a/test/uri/test_wss.rb +++ b/test/uri/test_wss.rb @@ -31,19 +31,19 @@ def test_normalize host = 'aBcD' u1 = URI.parse('wss://' + host + '/eFg?HiJ') u2 = URI.parse('wss://' + host.downcase + '/eFg?HiJ') - assert(u1.normalize.host == 'abcd') - assert(u1.normalize.path == u1.path) - assert(u1.normalize == u2.normalize) - assert(!u1.normalize.host.equal?(u1.host)) - assert( u2.normalize.host.equal?(u2.host)) + assert_equal('abcd', u1.normalize.host) + assert_equal(u1.path, u1.normalize.path) + assert_equal(u2.normalize, u1.normalize) + refute_same(u1.host, u1.normalize.host) + assert_same(u2.host, u2.normalize.host) assert_equal('wss://abc/', URI.parse('wss://abc').normalize.to_s) end def test_equal - assert(URI.parse('wss://abc') == URI.parse('wss://ABC')) - assert(URI.parse('wss://abc/def') == URI.parse('wss://ABC/def')) - assert(URI.parse('wss://abc/def') != URI.parse('wss://ABC/DEF')) + assert_equal(URI.parse('wss://ABC'), URI.parse('wss://abc')) + assert_equal(URI.parse('wss://ABC/def'), URI.parse('wss://abc/def')) + refute_equal(URI.parse('wss://ABC/DEF'), URI.parse('wss://abc/def')) end def test_request_uri diff --git a/uri.gemspec b/uri.gemspec index 9cf0a71..0d0f897 100644 --- a/uri.gemspec +++ b/uri.gemspec @@ -30,8 +30,11 @@ Gem::Specification.new do |spec| # Specify which files should be added to the gem when it is released. # The `git ls-files -z` loads the files in the RubyGem that have been added into git. + gemspec = File.basename(__FILE__) spec.files = Dir.chdir(File.expand_path('..', __FILE__)) do - `git ls-files -z 2>#{IO::NULL}`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) } + `git ls-files -z 2>#{IO::NULL}`.split("\x0").reject do |file| + (file == gemspec) || file.start_with?(*%w[bin/ test/ rakelib/ .github/ .gitignore Gemfile Rakefile]) + end end spec.bindir = "exe" spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }