Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Always issue deprecation warning when calling Regexp.new with 3rd positional argument #6976

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion lib/racc/statetransitiontable.rb
Original file line number Diff line number Diff line change
Expand Up @@ -216,7 +216,7 @@ def mkmapexp(arr)
end
i = ii
end
Regexp.compile(map, nil, 'n')
Regexp.compile(map, Regexp::NOENCODING)
end

def set_table(entries, dummy, tbl, chk, ptr)
Expand Down
24 changes: 10 additions & 14 deletions re.c
Original file line number Diff line number Diff line change
Expand Up @@ -3759,10 +3759,11 @@ struct reg_init_args {

static VALUE reg_extract_args(int argc, VALUE *argv, struct reg_init_args *args);
static VALUE reg_init_args(VALUE self, VALUE str, rb_encoding *enc, int flags);
void rb_warn_deprecated_to_remove(const char *removal, const char *fmt, const char *suggest, ...);

/*
* call-seq:
* Regexp.new(string, options = 0, n_flag = nil, timeout: nil) -> regexp
* Regexp.new(string, options = 0, timeout: nil) -> regexp
* Regexp.new(regexp, timeout: nil) -> regexp
*
* With argument +string+ given, returns a new regexp with the given string
Expand All @@ -3780,24 +3781,18 @@ static VALUE reg_init_args(VALUE self, VALUE str, rb_encoding *enc, int flags);
* Regexp.new('foo', 'im') # => /foo/im
*
* - The logical OR of one or more of the constants
* Regexp::EXTENDED, Regexp::IGNORECASE, and Regexp::MULTILINE:
* Regexp::EXTENDED, Regexp::IGNORECASE, Regexp::MULTILINE, and
* Regexp::NOENCODING:
*
* Regexp.new('foo', Regexp::IGNORECASE) # => /foo/i
* Regexp.new('foo', Regexp::EXTENDED) # => /foo/x
* Regexp.new('foo', Regexp::MULTILINE) # => /foo/m
* Regexp.new('foo', Regexp::NOENCODING) # => /foo/n
* flags = Regexp::IGNORECASE | Regexp::EXTENDED | Regexp::MULTILINE
* Regexp.new('foo', flags) # => /foo/mix
*
* - +nil+ or +false+, which is ignored.
*
* If optional argument +n_flag+ if it is a string starts with
* <code>'n'</code> or <code>'N'</code>, the encoding of +string+ is
* ignored and the new regexp encoding is fixed to +ASCII-8BIT+ or
* +US-ASCII+, by its content.
*
* Regexp.new('foo', nil, 'n') # => /foo/n
* Regexp.new("\u3042", nil, 'n') # => /\xE3\x81\x82/n
*
* If optional keyword argument +timeout+ is given,
* its float value overrides the timeout interval for the class,
* Regexp.timeout.
Expand Down Expand Up @@ -3841,7 +3836,7 @@ reg_extract_args(int argc, VALUE *argv, struct reg_init_args *args)
VALUE str, src, opts = Qundef, n_flag = Qundef, kwargs;
VALUE re = Qnil;

rb_scan_args(argc, argv, "12:", &src, &opts, &n_flag, &kwargs);
argc = rb_scan_args(argc, argv, "12:", &src, &opts, &n_flag, &kwargs);

args->timeout = Qnil;
if (!NIL_P(kwargs)) {
Expand All @@ -3852,6 +3847,10 @@ reg_extract_args(int argc, VALUE *argv, struct reg_init_args *args)
rb_get_kwargs(kwargs, keywords, 0, 1, &args->timeout);
}

if (argc == 3) {
rb_warn_deprecated_to_remove("3.3", "3rd argument to Regexp.new", "2nd argument");
}

if (RB_TYPE_P(src, T_REGEXP)) {
re = src;

Expand All @@ -3876,9 +3875,6 @@ reg_extract_args(int argc, VALUE *argv, struct reg_init_args *args)
enc = rb_ascii8bit_encoding();
flags |= ARG_ENCODING_NONE;
}
else {
rb_category_warn(RB_WARN_CATEGORY_DEPRECATED, "encoding option is ignored - %s", kcode);
}
}
str = StringValue(src);
}
Expand Down
82 changes: 43 additions & 39 deletions spec/ruby/core/regexp/shared/new.rb
Original file line number Diff line number Diff line change
Expand Up @@ -197,48 +197,50 @@ def obj.to_int() ScratchPad.record(:called) end
end
end

it "ignores the third argument if it is 'e' or 'euc' (case-insensitive)" do
-> {
Regexp.send(@method, 'Hi', nil, 'e').encoding.should == Encoding::US_ASCII
Regexp.send(@method, 'Hi', nil, 'euc').encoding.should == Encoding::US_ASCII
Regexp.send(@method, 'Hi', nil, 'E').encoding.should == Encoding::US_ASCII
Regexp.send(@method, 'Hi', nil, 'EUC').encoding.should == Encoding::US_ASCII
}.should complain(/encoding option is ignored/)
end
ruby_version_is ""..."3.2" do
it "ignores the third argument if it is 'e' or 'euc' (case-insensitive)" do
-> {
Regexp.send(@method, 'Hi', nil, 'e').encoding.should == Encoding::US_ASCII
Regexp.send(@method, 'Hi', nil, 'euc').encoding.should == Encoding::US_ASCII
Regexp.send(@method, 'Hi', nil, 'E').encoding.should == Encoding::US_ASCII
Regexp.send(@method, 'Hi', nil, 'EUC').encoding.should == Encoding::US_ASCII
}.should complain(/encoding option is ignored/)
end

it "ignores the third argument if it is 's' or 'sjis' (case-insensitive)" do
-> {
Regexp.send(@method, 'Hi', nil, 's').encoding.should == Encoding::US_ASCII
Regexp.send(@method, 'Hi', nil, 'sjis').encoding.should == Encoding::US_ASCII
Regexp.send(@method, 'Hi', nil, 'S').encoding.should == Encoding::US_ASCII
Regexp.send(@method, 'Hi', nil, 'SJIS').encoding.should == Encoding::US_ASCII
}.should complain(/encoding option is ignored/)
end
it "ignores the third argument if it is 's' or 'sjis' (case-insensitive)" do
-> {
Regexp.send(@method, 'Hi', nil, 's').encoding.should == Encoding::US_ASCII
Regexp.send(@method, 'Hi', nil, 'sjis').encoding.should == Encoding::US_ASCII
Regexp.send(@method, 'Hi', nil, 'S').encoding.should == Encoding::US_ASCII
Regexp.send(@method, 'Hi', nil, 'SJIS').encoding.should == Encoding::US_ASCII
}.should complain(/encoding option is ignored/)
end

it "ignores the third argument if it is 'u' or 'utf8' (case-insensitive)" do
-> {
Regexp.send(@method, 'Hi', nil, 'u').encoding.should == Encoding::US_ASCII
Regexp.send(@method, 'Hi', nil, 'utf8').encoding.should == Encoding::US_ASCII
Regexp.send(@method, 'Hi', nil, 'U').encoding.should == Encoding::US_ASCII
Regexp.send(@method, 'Hi', nil, 'UTF8').encoding.should == Encoding::US_ASCII
}.should complain(/encoding option is ignored/)
end
it "ignores the third argument if it is 'u' or 'utf8' (case-insensitive)" do
-> {
Regexp.send(@method, 'Hi', nil, 'u').encoding.should == Encoding::US_ASCII
Regexp.send(@method, 'Hi', nil, 'utf8').encoding.should == Encoding::US_ASCII
Regexp.send(@method, 'Hi', nil, 'U').encoding.should == Encoding::US_ASCII
Regexp.send(@method, 'Hi', nil, 'UTF8').encoding.should == Encoding::US_ASCII
}.should complain(/encoding option is ignored/)
end

it "uses US_ASCII encoding if third argument is 'n' or 'none' (case insensitive) and only ascii characters" do
Regexp.send(@method, 'Hi', nil, 'n').encoding.should == Encoding::US_ASCII
Regexp.send(@method, 'Hi', nil, 'none').encoding.should == Encoding::US_ASCII
Regexp.send(@method, 'Hi', nil, 'N').encoding.should == Encoding::US_ASCII
Regexp.send(@method, 'Hi', nil, 'NONE').encoding.should == Encoding::US_ASCII
end
it "uses US_ASCII encoding if third argument is 'n' or 'none' (case insensitive) and only ascii characters" do
Regexp.send(@method, 'Hi', nil, 'n').encoding.should == Encoding::US_ASCII
Regexp.send(@method, 'Hi', nil, 'none').encoding.should == Encoding::US_ASCII
Regexp.send(@method, 'Hi', nil, 'N').encoding.should == Encoding::US_ASCII
Regexp.send(@method, 'Hi', nil, 'NONE').encoding.should == Encoding::US_ASCII
end

it "uses ASCII_8BIT encoding if third argument is 'n' or 'none' (case insensitive) and non-ascii characters" do
a = "(?:[\x8E\xA1-\xFE])"
str = "\A(?:#{a}|x*)\z"
it "uses ASCII_8BIT encoding if third argument is 'n' or 'none' (case insensitive) and non-ascii characters" do
a = "(?:[\x8E\xA1-\xFE])"
str = "\A(?:#{a}|x*)\z"

Regexp.send(@method, str, nil, 'N').encoding.should == Encoding::BINARY
Regexp.send(@method, str, nil, 'n').encoding.should == Encoding::BINARY
Regexp.send(@method, str, nil, 'none').encoding.should == Encoding::BINARY
Regexp.send(@method, str, nil, 'NONE').encoding.should == Encoding::BINARY
Regexp.send(@method, str, nil, 'N').encoding.should == Encoding::BINARY
Regexp.send(@method, str, nil, 'n').encoding.should == Encoding::BINARY
Regexp.send(@method, str, nil, 'none').encoding.should == Encoding::BINARY
Regexp.send(@method, str, nil, 'NONE').encoding.should == Encoding::BINARY
end
end

describe "with escaped characters" do
Expand Down Expand Up @@ -598,8 +600,10 @@ def obj.to_int() ScratchPad.record(:called) end
Regexp.send(@method, /Hi/n).encoding.should == Encoding::US_ASCII
end

it "sets the encoding to source String's encoding if the Regexp literal has the 'n' option and the source String is not ASCII only" do
Regexp.send(@method, Regexp.new("\\xff", nil, 'n')).encoding.should == Encoding::BINARY
ruby_version_is ''...'3.2' do
it "sets the encoding to source String's encoding if the Regexp literal has the 'n' option and the source String is not ASCII only" do
Regexp.send(@method, Regexp.new("\\xff", nil, 'n')).encoding.should == Encoding::BINARY
end
end
end
end
2 changes: 1 addition & 1 deletion test/psych/test_yaml.rb
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ def test_multiline_regexp

# [ruby-core:34969]
def test_regexp_with_n
assert_cycle(Regexp.new('',0,'n'))
assert_cycle(Regexp.new('',Regexp::NOENCODING))
end
#
# Tests modified from 00basic.t in Psych.pm
Expand Down
40 changes: 32 additions & 8 deletions test/ruby/test_regexp.rb
Original file line number Diff line number Diff line change
Expand Up @@ -42,14 +42,14 @@ def test_ruby_dev_24887

def test_yoshidam_net_20041111_1
s = "[\xC2\xA0-\xC3\xBE]"
r = assert_deprecated_warning(/ignored/) {Regexp.new(s, nil, "u")}
r = assert_deprecated_warning(/3\.3/) {Regexp.new(s, nil, "u")}
assert_match(r, "\xC3\xBE")
end

def test_yoshidam_net_20041111_2
assert_raise(RegexpError) do
s = "[\xFF-\xFF]".force_encoding("utf-8")
assert_warning(/ignored/) {Regexp.new(s, nil, "u")}
assert_warning(/3\.3/) {Regexp.new(s, nil, "u")}
end
end

Expand Down Expand Up @@ -646,13 +646,37 @@ def test_initialize
assert_equal(/foo/, assert_no_warning(/ignored/) {Regexp.new(/foo/)})
assert_equal(/foo/, assert_no_warning(/ignored/) {Regexp.new(/foo/, timeout: nil)})

assert_equal(Encoding.find("US-ASCII"), Regexp.new("b..", nil, "n").encoding)
assert_equal("bar", "foobarbaz"[Regexp.new("b..", nil, "n")])
assert_equal(//n, Regexp.new("", nil, "n"))
arg_encoding_none = //n.options # ARG_ENCODING_NONE is implementation defined value

arg_encoding_none = 32 # ARG_ENCODING_NONE is implementation defined value
assert_equal(arg_encoding_none, Regexp.new("", nil, "n").options)
assert_equal(arg_encoding_none, Regexp.new("", nil, "N").options)
assert_deprecated_warning('') do
assert_equal(Encoding.find("US-ASCII"), Regexp.new("b..", Regexp::NOENCODING).encoding)
assert_equal("bar", "foobarbaz"[Regexp.new("b..", Regexp::NOENCODING)])
assert_equal(//, Regexp.new(""))
assert_equal(//, Regexp.new("", timeout: 1))
assert_equal(//n, Regexp.new("", Regexp::NOENCODING))
assert_equal(//n, Regexp.new("", Regexp::NOENCODING, timeout: 1))

assert_equal(arg_encoding_none, Regexp.new("", Regexp::NOENCODING).options)
end

assert_deprecated_warning(/3\.3/) do
assert_equal(Encoding.find("US-ASCII"), Regexp.new("b..", nil, "n").encoding)
end
assert_deprecated_warning(/3\.3/) do
assert_equal(Encoding.find("US-ASCII"), Regexp.new("b..", nil, "n", timeout: 1).encoding)
end
assert_deprecated_warning(/3\.3/) do
assert_equal("bar", "foobarbaz"[Regexp.new("b..", nil, "n")])
end
assert_deprecated_warning(/3\.3/) do
assert_equal(//n, Regexp.new("", nil, "n"))
end
assert_deprecated_warning(/3\.3/) do
assert_equal(arg_encoding_none, Regexp.new("", nil, "n").options)
end
assert_deprecated_warning(/3\.3/) do
assert_equal(arg_encoding_none, Regexp.new("", nil, "N").options)
end

assert_raise(RegexpError) { Regexp.new(")(") }
assert_raise(RegexpError) { Regexp.new('[\\40000000000') }
Expand Down