Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 7186cc2

Browse files
bpo-30277: Replace _sre.getlower() with _sre.ascii_tolower() and _sre.unicode_tolower(). (#1468)
1 parent 76a3e51 commit 7186cc2

4 files changed

Lines changed: 83 additions & 44 deletions

File tree

Lib/sre_compile.py

Lines changed: 10 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -69,13 +69,14 @@ def _compile(code, pattern, flags):
6969
REPEATING_CODES = _REPEATING_CODES
7070
SUCCESS_CODES = _SUCCESS_CODES
7171
ASSERT_CODES = _ASSERT_CODES
72-
if (flags & SRE_FLAG_IGNORECASE and
73-
not (flags & SRE_FLAG_LOCALE) and
74-
flags & SRE_FLAG_UNICODE and
75-
not (flags & SRE_FLAG_ASCII)):
76-
fixes = _ignorecase_fixes
77-
else:
78-
fixes = None
72+
tolower = None
73+
fixes = None
74+
if flags & SRE_FLAG_IGNORECASE and not flags & SRE_FLAG_LOCALE:
75+
if flags & SRE_FLAG_UNICODE and not flags & SRE_FLAG_ASCII:
76+
tolower = _sre.unicode_tolower
77+
fixes = _ignorecase_fixes
78+
else:
79+
tolower = _sre.ascii_tolower
7980
for op, av in pattern:
8081
if op in LITERAL_CODES:
8182
if not flags & SRE_FLAG_IGNORECASE:
@@ -85,7 +86,7 @@ def _compile(code, pattern, flags):
8586
emit(OP_LOC_IGNORE[op])
8687
emit(av)
8788
else:
88-
lo = _sre.getlower(av, flags)
89+
lo = tolower(av)
8990
if fixes and lo in fixes:
9091
emit(IN_IGNORE)
9192
skip = _len(code); emit(0)
@@ -102,16 +103,12 @@ def _compile(code, pattern, flags):
102103
elif op is IN:
103104
if not flags & SRE_FLAG_IGNORECASE:
104105
emit(op)
105-
fixup = None
106106
elif flags & SRE_FLAG_LOCALE:
107107
emit(IN_LOC_IGNORE)
108-
fixup = None
109108
else:
110109
emit(IN_IGNORE)
111-
def fixup(literal, flags=flags):
112-
return _sre.getlower(literal, flags)
113110
skip = _len(code); emit(0)
114-
_compile_charset(av, flags, code, fixup, fixes)
111+
_compile_charset(av, flags, code, tolower, fixes)
115112
code[skip] = _len(code) - skip
116113
elif op is ANY:
117114
if flags & SRE_FLAG_DOTALL:

Lib/test/test_re.py

Lines changed: 16 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -883,17 +883,23 @@ def test_ignore_case_range(self):
883883
def test_category(self):
884884
self.assertEqual(re.match(r"(\s)", " ").group(1), " ")
885885

886-
def test_getlower(self):
886+
@cpython_only
887+
def test_case_helpers(self):
887888
import _sre
888-
self.assertEqual(_sre.getlower(ord('A'), 0), ord('a'))
889-
self.assertEqual(_sre.getlower(ord('A'), re.LOCALE), ord('a'))
890-
self.assertEqual(_sre.getlower(ord('A'), re.UNICODE), ord('a'))
891-
self.assertEqual(_sre.getlower(ord('A'), re.ASCII), ord('a'))
892-
893-
self.assertEqual(re.match("abc", "ABC", re.I).group(0), "ABC")
894-
self.assertEqual(re.match(b"abc", b"ABC", re.I).group(0), b"ABC")
895-
self.assertEqual(re.match("abc", "ABC", re.I|re.A).group(0), "ABC")
896-
self.assertEqual(re.match(b"abc", b"ABC", re.I|re.L).group(0), b"ABC")
889+
for i in range(128):
890+
c = chr(i)
891+
lo = ord(c.lower())
892+
self.assertEqual(_sre.ascii_tolower(i), lo)
893+
self.assertEqual(_sre.unicode_tolower(i), lo)
894+
895+
for i in list(range(128, 0x1000)) + [0x10400, 0x10428]:
896+
c = chr(i)
897+
self.assertEqual(_sre.ascii_tolower(i), i)
898+
if i != 0x0130:
899+
self.assertEqual(_sre.unicode_tolower(i), ord(c.lower()))
900+
901+
self.assertEqual(_sre.ascii_tolower(0x0130), 0x0130)
902+
self.assertEqual(_sre.unicode_tolower(0x0130), ord('i'))
897903

898904
def test_not_literal(self):
899905
self.assertEqual(re.search(r"\s([^a])", " b").group(1), "b")

Modules/_sre.c

Lines changed: 20 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -274,25 +274,35 @@ _sre_getcodesize_impl(PyObject *module)
274274
}
275275

276276
/*[clinic input]
277-
_sre.getlower -> int
277+
_sre.ascii_tolower -> int
278278
279279
character: int
280-
flags: int
281280
/
282281
283282
[clinic start generated code]*/
284283

285284
static int
286-
_sre_getlower_impl(PyObject *module, int character, int flags)
287-
/*[clinic end generated code: output=47eebc4c1214feb5 input=087d2f1c44bbca6f]*/
285+
_sre_ascii_tolower_impl(PyObject *module, int character)
286+
/*[clinic end generated code: output=228294ed6ff2a612 input=272c609b5b61f136]*/
288287
{
289-
if (flags & SRE_FLAG_LOCALE)
290-
return sre_lower_locale(character);
291-
if (flags & SRE_FLAG_UNICODE)
292-
return sre_lower_unicode(character);
293288
return sre_lower(character);
294289
}
295290

291+
/*[clinic input]
292+
_sre.unicode_tolower -> int
293+
294+
character: int
295+
/
296+
297+
[clinic start generated code]*/
298+
299+
static int
300+
_sre_unicode_tolower_impl(PyObject *module, int character)
301+
/*[clinic end generated code: output=6422272d7d7fee65 input=91d708c5f3c2045a]*/
302+
{
303+
return sre_lower_unicode(character);
304+
}
305+
296306
LOCAL(void)
297307
state_reset(SRE_STATE* state)
298308
{
@@ -2740,7 +2750,8 @@ static PyTypeObject Scanner_Type = {
27402750
static PyMethodDef _functions[] = {
27412751
_SRE_COMPILE_METHODDEF
27422752
_SRE_GETCODESIZE_METHODDEF
2743-
_SRE_GETLOWER_METHODDEF
2753+
_SRE_ASCII_TOLOWER_METHODDEF
2754+
_SRE_UNICODE_TOLOWER_METHODDEF
27442755
{NULL, NULL}
27452756
};
27462757

Modules/clinic/_sre.c.h

Lines changed: 37 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -29,34 +29,59 @@ _sre_getcodesize(PyObject *module, PyObject *Py_UNUSED(ignored))
2929
return return_value;
3030
}
3131

32-
PyDoc_STRVAR(_sre_getlower__doc__,
33-
"getlower($module, character, flags, /)\n"
32+
PyDoc_STRVAR(_sre_ascii_tolower__doc__,
33+
"ascii_tolower($module, character, /)\n"
3434
"--\n"
3535
"\n");
3636

37-
#define _SRE_GETLOWER_METHODDEF \
38-
{"getlower", (PyCFunction)_sre_getlower, METH_FASTCALL, _sre_getlower__doc__},
37+
#define _SRE_ASCII_TOLOWER_METHODDEF \
38+
{"ascii_tolower", (PyCFunction)_sre_ascii_tolower, METH_O, _sre_ascii_tolower__doc__},
3939

4040
static int
41-
_sre_getlower_impl(PyObject *module, int character, int flags);
41+
_sre_ascii_tolower_impl(PyObject *module, int character);
4242

4343
static PyObject *
44-
_sre_getlower(PyObject *module, PyObject **args, Py_ssize_t nargs, PyObject *kwnames)
44+
_sre_ascii_tolower(PyObject *module, PyObject *arg)
4545
{
4646
PyObject *return_value = NULL;
4747
int character;
48-
int flags;
4948
int _return_value;
5049

51-
if (!_PyArg_ParseStack(args, nargs, "ii:getlower",
52-
&character, &flags)) {
50+
if (!PyArg_Parse(arg, "i:ascii_tolower", &character)) {
51+
goto exit;
52+
}
53+
_return_value = _sre_ascii_tolower_impl(module, character);
54+
if ((_return_value == -1) && PyErr_Occurred()) {
5355
goto exit;
5456
}
57+
return_value = PyLong_FromLong((long)_return_value);
58+
59+
exit:
60+
return return_value;
61+
}
62+
63+
PyDoc_STRVAR(_sre_unicode_tolower__doc__,
64+
"unicode_tolower($module, character, /)\n"
65+
"--\n"
66+
"\n");
67+
68+
#define _SRE_UNICODE_TOLOWER_METHODDEF \
69+
{"unicode_tolower", (PyCFunction)_sre_unicode_tolower, METH_O, _sre_unicode_tolower__doc__},
70+
71+
static int
72+
_sre_unicode_tolower_impl(PyObject *module, int character);
73+
74+
static PyObject *
75+
_sre_unicode_tolower(PyObject *module, PyObject *arg)
76+
{
77+
PyObject *return_value = NULL;
78+
int character;
79+
int _return_value;
5580

56-
if (!_PyArg_NoStackKeywords("getlower", kwnames)) {
81+
if (!PyArg_Parse(arg, "i:unicode_tolower", &character)) {
5782
goto exit;
5883
}
59-
_return_value = _sre_getlower_impl(module, character, flags);
84+
_return_value = _sre_unicode_tolower_impl(module, character);
6085
if ((_return_value == -1) && PyErr_Occurred()) {
6186
goto exit;
6287
}
@@ -690,4 +715,4 @@ _sre_SRE_Scanner_search(ScannerObject *self, PyObject *Py_UNUSED(ignored))
690715
{
691716
return _sre_SRE_Scanner_search_impl(self);
692717
}
693-
/*[clinic end generated code: output=e6dab3ba8864da9e input=a9049054013a1b77]*/
718+
/*[clinic end generated code: output=811e67d7f8f5052e input=a9049054013a1b77]*/

0 commit comments

Comments
 (0)