Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 22a309a

Browse files
Issue #21032: Deprecated the use of re.LOCALE flag with str patterns or
re.ASCII. It was newer worked.
1 parent 720b8c9 commit 22a309a

4 files changed

Lines changed: 84 additions & 17 deletions

File tree

Doc/library/re.rst

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -521,7 +521,11 @@ form.
521521
current locale. The use of this flag is discouraged as the locale mechanism
522522
is very unreliable, and it only handles one "culture" at a time anyway;
523523
you should use Unicode matching instead, which is the default in Python 3
524-
for Unicode (str) patterns.
524+
for Unicode (str) patterns. This flag makes sense only with bytes patterns.
525+
526+
.. deprecated-removed:: 3.5 3.6
527+
Deprecated the use of :const:`re.LOCALE` with string patterns or
528+
:const:`re.ASCII`.
525529

526530

527531
.. data:: M

Lib/sre_parse.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -751,13 +751,23 @@ def _parse(source, state):
751751
def fix_flags(src, flags):
752752
# Check and fix flags according to the type of pattern (str or bytes)
753753
if isinstance(src, str):
754+
if flags & SRE_FLAG_LOCALE:
755+
import warnings
756+
warnings.warn("LOCALE flag with a str pattern is deprecated. "
757+
"Will be an error in 3.6",
758+
DeprecationWarning, stacklevel=6)
754759
if not flags & SRE_FLAG_ASCII:
755760
flags |= SRE_FLAG_UNICODE
756761
elif flags & SRE_FLAG_UNICODE:
757762
raise ValueError("ASCII and UNICODE flags are incompatible")
758763
else:
759764
if flags & SRE_FLAG_UNICODE:
760765
raise ValueError("can't use UNICODE flag with a bytes pattern")
766+
if flags & SRE_FLAG_LOCALE and flags & SRE_FLAG_ASCII:
767+
import warnings
768+
warnings.warn("ASCII and LOCALE flags are incompatible. "
769+
"Will be an error in 3.6",
770+
DeprecationWarning, stacklevel=6)
761771
return flags
762772

763773
def parse(str, flags=0, pattern=None):

Lib/test/test_re.py

Lines changed: 66 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -502,10 +502,6 @@ def test_special_escapes(self):
502502
"abcd abc bcd bx", re.ASCII).group(1), "bx")
503503
self.assertEqual(re.search(r"\B(b.)\B",
504504
"abc bcd bc abxd", re.ASCII).group(1), "bx")
505-
self.assertEqual(re.search(r"\b(b.)\b",
506-
"abcd abc bcd bx", re.LOCALE).group(1), "bx")
507-
self.assertEqual(re.search(r"\B(b.)\B",
508-
"abc bcd bc abxd", re.LOCALE).group(1), "bx")
509505
self.assertEqual(re.search(r"^abc$", "\nabc\n", re.M).group(0), "abc")
510506
self.assertEqual(re.search(r"^\Aabc\Z$", "abc", re.M).group(0), "abc")
511507
self.assertIsNone(re.search(r"^\Aabc\Z$", "\nabc\n", re.M))
@@ -526,8 +522,6 @@ def test_special_escapes(self):
526522
b"1aa! a").group(0), b"1aa! a")
527523
self.assertEqual(re.search(r"\d\D\w\W\s\S",
528524
"1aa! a", re.ASCII).group(0), "1aa! a")
529-
self.assertEqual(re.search(r"\d\D\w\W\s\S",
530-
"1aa! a", re.LOCALE).group(0), "1aa! a")
531525
self.assertEqual(re.search(br"\d\D\w\W\s\S",
532526
b"1aa! a", re.LOCALE).group(0), b"1aa! a")
533527

@@ -693,9 +687,12 @@ def test_getlower(self):
693687
self.assertEqual(_sre.getlower(ord('A'), 0), ord('a'))
694688
self.assertEqual(_sre.getlower(ord('A'), re.LOCALE), ord('a'))
695689
self.assertEqual(_sre.getlower(ord('A'), re.UNICODE), ord('a'))
690+
self.assertEqual(_sre.getlower(ord('A'), re.ASCII), ord('a'))
696691

697692
self.assertEqual(re.match("abc", "ABC", re.I).group(0), "ABC")
698693
self.assertEqual(re.match(b"abc", b"ABC", re.I).group(0), b"ABC")
694+
self.assertEqual(re.match("abc", "ABC", re.I|re.A).group(0), "ABC")
695+
self.assertEqual(re.match(b"abc", b"ABC", re.I|re.L).group(0), b"ABC")
699696

700697
def test_not_literal(self):
701698
self.assertEqual(re.search("\s([^a])", " b").group(1), "b")
@@ -780,8 +777,10 @@ def test_constants(self):
780777
self.assertEqual(re.X, re.VERBOSE)
781778

782779
def test_flags(self):
783-
for flag in [re.I, re.M, re.X, re.S, re.L]:
780+
for flag in [re.I, re.M, re.X, re.S, re.A, re.U]:
784781
self.assertTrue(re.compile('^pattern$', flag))
782+
for flag in [re.I, re.M, re.X, re.S, re.A, re.L]:
783+
self.assertTrue(re.compile(b'^pattern$', flag))
785784

786785
def test_sre_character_literals(self):
787786
for i in [0, 8, 16, 32, 64, 127, 128, 255, 256, 0xFFFF, 0x10000, 0x10FFFF]:
@@ -1146,6 +1145,52 @@ def test_ascii_and_unicode_flag(self):
11461145
self.assertRaises(ValueError, re.compile, '(?a)\w', re.UNICODE)
11471146
self.assertRaises(ValueError, re.compile, '(?au)\w')
11481147

1148+
def test_locale_flag(self):
1149+
import locale
1150+
_, enc = locale.getlocale(locale.LC_CTYPE)
1151+
# Search non-ASCII letter
1152+
for i in range(128, 256):
1153+
try:
1154+
c = bytes([i]).decode(enc)
1155+
sletter = c.lower()
1156+
if sletter == c: continue
1157+
bletter = sletter.encode(enc)
1158+
if len(bletter) != 1: continue
1159+
if bletter.decode(enc) != sletter: continue
1160+
bpat = re.escape(bytes([i]))
1161+
break
1162+
except (UnicodeError, TypeError):
1163+
pass
1164+
else:
1165+
bletter = None
1166+
bpat = b'A'
1167+
# Bytes patterns
1168+
pat = re.compile(bpat, re.LOCALE | re.IGNORECASE)
1169+
if bletter:
1170+
self.assertTrue(pat.match(bletter))
1171+
pat = re.compile(b'(?L)' + bpat, re.IGNORECASE)
1172+
if bletter:
1173+
self.assertTrue(pat.match(bletter))
1174+
pat = re.compile(bpat, re.IGNORECASE)
1175+
if bletter:
1176+
self.assertIsNone(pat.match(bletter))
1177+
pat = re.compile(b'\w', re.LOCALE)
1178+
if bletter:
1179+
self.assertTrue(pat.match(bletter))
1180+
pat = re.compile(b'(?L)\w')
1181+
if bletter:
1182+
self.assertTrue(pat.match(bletter))
1183+
pat = re.compile(b'\w')
1184+
if bletter:
1185+
self.assertIsNone(pat.match(bletter))
1186+
# Incompatibilities
1187+
self.assertWarns(DeprecationWarning, re.compile, '', re.LOCALE)
1188+
self.assertWarns(DeprecationWarning, re.compile, '(?L)')
1189+
self.assertWarns(DeprecationWarning, re.compile, b'', re.LOCALE | re.ASCII)
1190+
self.assertWarns(DeprecationWarning, re.compile, b'(?L)', re.ASCII)
1191+
self.assertWarns(DeprecationWarning, re.compile, b'(?a)', re.LOCALE)
1192+
self.assertWarns(DeprecationWarning, re.compile, b'(?aL)')
1193+
11491194
def test_bug_6509(self):
11501195
# Replacement strings of both types must parse properly.
11511196
# all strings
@@ -1477,6 +1522,10 @@ def test_bytes(self):
14771522
self.check_flags(b'bytes pattern', re.A,
14781523
"re.compile(b'bytes pattern', re.ASCII)")
14791524

1525+
def test_locale(self):
1526+
self.check_flags(b'bytes pattern', re.L,
1527+
"re.compile(b'bytes pattern', re.LOCALE)")
1528+
14801529
def test_quotes(self):
14811530
self.check('random "double quoted" pattern',
14821531
'''re.compile('random "double quoted" pattern')''')
@@ -1590,8 +1639,16 @@ def test_re_tests(self):
15901639
pass
15911640
else:
15921641
with self.subTest('bytes pattern match'):
1593-
bpat = re.compile(bpat)
1594-
self.assertTrue(bpat.search(bs))
1642+
obj = re.compile(bpat)
1643+
self.assertTrue(obj.search(bs))
1644+
1645+
# Try the match with LOCALE enabled, and check that it
1646+
# still succeeds.
1647+
with self.subTest('locale-sensitive match'):
1648+
obj = re.compile(bpat, re.LOCALE)
1649+
result = obj.search(bs)
1650+
if result is None:
1651+
print('=== Fails on locale-sensitive match', t)
15951652

15961653
# Try the match with the search area limited to the extent
15971654
# of the match and see if it still succeeds. \B will
@@ -1609,13 +1666,6 @@ def test_re_tests(self):
16091666
obj = re.compile(pattern, re.IGNORECASE)
16101667
self.assertTrue(obj.search(s))
16111668

1612-
# Try the match with LOCALE enabled, and check that it
1613-
# still succeeds.
1614-
if '(?u)' not in pattern:
1615-
with self.subTest('locale-sensitive match'):
1616-
obj = re.compile(pattern, re.LOCALE)
1617-
self.assertTrue(obj.search(s))
1618-
16191669
# Try the match with UNICODE locale enabled, and check
16201670
# that it still succeeds.
16211671
with self.subTest('unicode-sensitive match'):

Misc/NEWS

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -191,6 +191,9 @@ Core and Builtins
191191
Library
192192
-------
193193

194+
- Issue #21032: Deprecated the use of re.LOCALE flag with str patterns or
195+
re.ASCII. It was newer worked.
196+
194197
- Issue #22902: The "ip" command is now used on Linux to determine MAC address
195198
in uuid.getnode(). Pach by Bruno Cauet.
196199

0 commit comments

Comments
 (0)