Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 4f35c71

Browse files
committed
Locale data that contains regex metacharacters are now properly escaped.
Closes bug #1039270.
1 parent 579b3e2 commit 4f35c71

3 files changed

Lines changed: 22 additions & 2 deletions

File tree

Lib/_strptime.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
import calendar
1616
from re import compile as re_compile
1717
from re import IGNORECASE
18+
from re import escape as re_escape
1819
from datetime import date as datetime_date
1920
try:
2021
from thread import allocate_lock as _thread_allocate_lock
@@ -232,7 +233,7 @@ def __seqToRE(self, to_convert, directive):
232233
return ''
233234
to_convert = to_convert[:]
234235
to_convert.sort(key=len, reverse=True)
235-
regex = '|'.join(to_convert)
236+
regex = '|'.join(re_escape(stuff) for stuff in to_convert)
236237
regex = '(?P<%s>%s' % (directive, regex)
237238
return '%s)' % regex
238239

@@ -245,7 +246,8 @@ def pattern(self, format):
245246
"""
246247
processed_format = ''
247248
# The sub() call escapes all characters that might be misconstrued
248-
# as regex syntax.
249+
# as regex syntax. Cannot use re.escape since we have to deal with
250+
# format directives (%m, etc.).
249251
regex_chars = re_compile(r"([\\.^$*+?\(\){}\[\]|])")
250252
format = regex_chars.sub(r"\\\1", format)
251253
whitespace_replacement = re_compile('\s+')

Lib/test/test_strptime.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -176,6 +176,19 @@ def test_matching_with_escapes(self):
176176
found = compiled_re.match("\w+ 10")
177177
self.failUnless(found, "Escaping failed of format '\w+ 10'")
178178

179+
def test_locale_data_w_regex_metacharacters(self):
180+
# Check that if locale data contains regex metacharacters they are
181+
# escaped properly.
182+
# Discovered by bug #1039270 .
183+
locale_time = _strptime.LocaleTime()
184+
locale_time.timezone = (frozenset(("utc", "gmt",
185+
"Tokyo (standard time)")),
186+
frozenset("Tokyo (daylight time)"))
187+
time_re = _strptime.TimeRE(locale_time)
188+
self.failUnless(time_re.compile("%Z").match("Tokyo (standard time)"),
189+
"locale data that contains regex metacharacters is not"
190+
" properly escaped")
191+
179192
class StrptimeTests(unittest.TestCase):
180193
"""Tests for _strptime.strptime."""
181194

Misc/NEWS

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,11 @@ Extension modules
3434
Library
3535
-------
3636

37+
- time.strptime() now properly escapes timezones and all other locale-specific
38+
strings for regex-specific symbols. Was breaking under Japanese Windows when
39+
the timezone was specified as "Tokyo (standard time)".
40+
Closes bug #1039270.
41+
3742
- Updates for the email package:
3843
+ All deprecated APIs that in email 2.x issued warnings have been removed:
3944
_encoder argument to the MIMEText constructor, Message.add_payload(),

0 commit comments

Comments
 (0)