Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit dac5849

Browse files
author
Fredrik Lundh
committed
fixed character set description in docstring (SRE uses Python
strings, not C strings) removed USE_PYTHON defines, and related sre.py helpers skip calling the subx helper if the template is callable. interestingly enough, this means that def callback(m): return literal result = pattern.sub(callback, string) is much faster than result = pattern.sub(literal, string)
1 parent 0402dd1 commit dac5849

2 files changed

Lines changed: 56 additions & 176 deletions

File tree

Lib/sre.py

Lines changed: 15 additions & 80 deletions
Original file line numberDiff line numberDiff line change
@@ -17,15 +17,13 @@
1717
r"""Support for regular expressions (RE).
1818
1919
This module provides regular expression matching operations similar to
20-
those found in Perl. It's 8-bit clean: the strings being processed may
21-
contain both null bytes and characters whose high bit is set. Regular
22-
expression pattern strings may not contain null bytes, but can specify
23-
the null byte using the \\number notation. Characters with the high
24-
bit set may be included.
25-
26-
Regular expressions can contain both special and ordinary
27-
characters. Most ordinary characters, like "A", "a", or "0", are the
28-
simplest regular expressions; they simply match themselves. You can
20+
those found in Perl. It supports both 8-bit and Unicode strings; both
21+
the pattern and the strings being processed can contain null bytes and
22+
characters outside the US ASCII range.
23+
24+
Regular expressions can contain both special and ordinary characters.
25+
Most ordinary characters, like "A", "a", or "0", are the simplest
26+
regular expressions; they simply match themselves. You can
2927
concatenate ordinary characters, so last matches the string 'last'.
3028
3129
The special characters are:
@@ -45,7 +43,7 @@
4543
"|" A|B, creates an RE that will match either A or B.
4644
(...) Matches the RE inside the parentheses.
4745
The contents can be retrieved or matched later in the string.
48-
(?iLmsx) Set the I, L, M, S, or X flag for the RE (see below).
46+
(?iLmsux) Set the I, L, M, S, U, or X flag for the RE (see below).
4947
(?:...) Non-grouping version of regular parentheses.
5048
(?P<name>...) The substring matched by the group is accessible by name.
5149
(?P=name) Matches the text matched earlier by the group named name.
@@ -54,7 +52,7 @@
5452
(?!...) Matches if ... doesn't match next.
5553
5654
The special sequences consist of "\\" and a character from the list
57-
below. If the ordinary character is not on the list, then the
55+
below. If the ordinary character is not on the list, then the
5856
resulting RE will match the second character.
5957
\number Matches the contents of the group of the same number.
6058
\A Matches only at the start of the string.
@@ -246,76 +244,13 @@ def _expand(pattern, match, template):
246244

247245
def _subx(pattern, template):
248246
# internal: pattern.sub/subn implementation helper
249-
if callable(template):
250-
filter = template
251-
else:
252-
template = _compile_repl(template, pattern)
253-
if not template[0] and len(template[1]) == 1:
254-
# literal replacement
255-
filter = template[1][0]
256-
else:
257-
def filter(match, template=template):
258-
return sre_parse.expand_template(template, match)
259-
return filter
260-
261-
def _sub(pattern, template, text, count=0):
262-
# internal: pattern.sub implementation hook
263-
# FIXME: not used in SRE 2.2.1 and later; will be removed soon
264-
return _subn(pattern, template, text, count)[0]
265-
266-
def _subn(pattern, template, text, count=0):
267-
# internal: pattern.subn implementation hook
268-
# FIXME: not used in SRE 2.2.1 and later; will be removed soon
269-
filter = _subx(pattern, template)
270-
if not callable(filter):
247+
template = _compile_repl(template, pattern)
248+
if not template[0] and len(template[1]) == 1:
271249
# literal replacement
272-
def filter(match, literal=filter):
273-
return literal
274-
n = i = 0
275-
s = []
276-
append = s.append
277-
c = pattern.scanner(text)
278-
while not count or n < count:
279-
m = c.search()
280-
if not m:
281-
break
282-
b, e = m.span()
283-
if i < b:
284-
append(text[i:b])
285-
elif i == b == e and n:
286-
append(text[i:b])
287-
continue # ignore empty match at previous position
288-
append(filter(m))
289-
i = e
290-
n = n + 1
291-
append(text[i:])
292-
return _join(s, text[:0]), n
293-
294-
def _split(pattern, text, maxsplit=0):
295-
# internal: pattern.split implementation hook
296-
# FIXME: not used in SRE 2.2.1 and later; will be removed soon
297-
n = i = 0
298-
s = []
299-
append = s.append
300-
extend = s.extend
301-
c = pattern.scanner(text)
302-
g = pattern.groups
303-
while not maxsplit or n < maxsplit:
304-
m = c.search()
305-
if not m:
306-
break
307-
b, e = m.span()
308-
if b == e:
309-
if i >= len(text):
310-
break
311-
continue
312-
append(text[i:b])
313-
if g and b != e:
314-
extend(list(m.groups()))
315-
i = e
316-
n = n + 1
317-
append(text[i:])
318-
return s
250+
return template[1][0]
251+
def filter(match, template=template):
252+
return sre_parse.expand_template(template, match)
253+
return filter
319254

320255
# register myself for pickling
321256

Modules/_sre.c

Lines changed: 41 additions & 96 deletions
Original file line numberDiff line numberDiff line change
@@ -76,10 +76,6 @@ static char copyright[] =
7676
/* -------------------------------------------------------------------- */
7777
/* optional features */
7878

79-
/* test: define to use sre.py helpers instead of C code */
80-
#undef USE_PYTHON_SPLIT
81-
#undef USE_PYTHON_SUB
82-
8379
/* prevent run-away recursion (bad patterns on long strings) */
8480

8581
#if !defined(USE_STACKCHECK)
@@ -1251,6 +1247,8 @@ SRE_SEARCH(SRE_STATE* state, SRE_CODE* pattern)
12511247
TRACE(("|%p|%p|SEARCH LITERAL\n", pattern, ptr));
12521248
state->start = ptr;
12531249
state->ptr = ++ptr;
1250+
if (flags & SRE_INFO_LITERAL)
1251+
return 1; /* we got all of it */
12541252
status = SRE_MATCH(state, pattern + 2, 1);
12551253
if (status != 0)
12561254
break;
@@ -1820,66 +1818,6 @@ join(PyObject* list, PyObject* pattern)
18201818
return result;
18211819
}
18221820

1823-
1824-
#ifdef USE_PYTHON_SUB
1825-
static PyObject*
1826-
pattern_sub(PatternObject* self, PyObject* args, PyObject* kw)
1827-
{
1828-
PyObject* template;
1829-
PyObject* string;
1830-
PyObject* count = Py_False; /* zero */
1831-
static char* kwlist[] = { "repl", "string", "count", NULL };
1832-
if (!PyArg_ParseTupleAndKeywords(args, kw, "OO|O:sub", kwlist,
1833-
&template, &string, &count))
1834-
return NULL;
1835-
1836-
/* delegate to Python code */
1837-
return call(
1838-
SRE_MODULE, "_sub",
1839-
Py_BuildValue("OOOO", self, template, string, count)
1840-
);
1841-
}
1842-
#endif
1843-
1844-
#ifdef USE_PYTHON_SUB
1845-
static PyObject*
1846-
pattern_subn(PatternObject* self, PyObject* args, PyObject* kw)
1847-
{
1848-
PyObject* template;
1849-
PyObject* string;
1850-
PyObject* count = Py_False; /* zero */
1851-
static char* kwlist[] = { "repl", "string", "count", NULL };
1852-
if (!PyArg_ParseTupleAndKeywords(args, kw, "OO|O:subn", kwlist,
1853-
&template, &string, &count))
1854-
return NULL;
1855-
1856-
/* delegate to Python code */
1857-
return call(
1858-
SRE_MODULE, "_subn",
1859-
Py_BuildValue("OOOO", self, template, string, count)
1860-
);
1861-
}
1862-
#endif
1863-
1864-
#if defined(USE_PYTHON_SPLIT)
1865-
static PyObject*
1866-
pattern_split(PatternObject* self, PyObject* args, PyObject* kw)
1867-
{
1868-
PyObject* string;
1869-
PyObject* maxsplit = Py_False; /* zero */
1870-
static char* kwlist[] = { "source", "maxsplit", NULL };
1871-
if (!PyArg_ParseTupleAndKeywords(args, kw, "O|O:split", kwlist,
1872-
&string, &maxsplit))
1873-
return NULL;
1874-
1875-
/* delegate to Python code */
1876-
return call(
1877-
SRE_MODULE, "_split",
1878-
Py_BuildValue("OOO", self, string, maxsplit)
1879-
);
1880-
}
1881-
#endif
1882-
18831821
static PyObject*
18841822
pattern_findall(PatternObject* self, PyObject* args, PyObject* kw)
18851823
{
@@ -1980,7 +1918,6 @@ pattern_findall(PatternObject* self, PyObject* args, PyObject* kw)
19801918

19811919
}
19821920

1983-
#if !defined(USE_PYTHON_SPLIT)
19841921
static PyObject*
19851922
pattern_split(PatternObject* self, PyObject* args, PyObject* kw)
19861923
{
@@ -2071,15 +2008,16 @@ pattern_split(PatternObject* self, PyObject* args, PyObject* kw)
20712008
}
20722009

20732010
/* get segment following last match */
2074-
item = PySequence_GetSlice(
2075-
string, STATE_OFFSET(&state, last), state.endpos
2076-
);
2077-
if (!item)
2078-
goto error;
2079-
status = PyList_Append(list, item);
2080-
Py_DECREF(item);
2081-
if (status < 0)
2082-
goto error;
2011+
i = STATE_OFFSET(&state, last);
2012+
if (i < state.endpos) {
2013+
item = PySequence_GetSlice(string, i, state.endpos);
2014+
if (!item)
2015+
goto error;
2016+
status = PyList_Append(list, item);
2017+
Py_DECREF(item);
2018+
if (status < 0)
2019+
goto error;
2020+
}
20832021

20842022
state_fini(&state);
20852023
return list;
@@ -2090,9 +2028,7 @@ pattern_split(PatternObject* self, PyObject* args, PyObject* kw)
20902028
return NULL;
20912029

20922030
}
2093-
#endif
20942031

2095-
#if !defined(USE_PYTHON_SUB)
20962032
static PyObject*
20972033
pattern_subx(PatternObject* self, PyObject* template, PyObject* string,
20982034
int count, int subn)
@@ -2108,15 +2044,22 @@ pattern_subx(PatternObject* self, PyObject* template, PyObject* string,
21082044
int i, b, e;
21092045
int filter_is_callable;
21102046

2111-
/* call subx helper to get the filter */
2112-
filter = call(
2113-
SRE_MODULE, "_subx",
2114-
Py_BuildValue("OO", self, template)
2115-
);
2116-
if (!filter)
2117-
return NULL;
2118-
2119-
filter_is_callable = PyCallable_Check(filter);
2047+
if (PyCallable_Check(template)) {
2048+
/* sub/subn takes either a function or a template */
2049+
filter = template;
2050+
Py_INCREF(filter);
2051+
filter_is_callable = 1;
2052+
} else {
2053+
/* if not callable, call the template compiler. it may return
2054+
either a filter function or a literal string */
2055+
filter = call(
2056+
SRE_MODULE, "_subx",
2057+
Py_BuildValue("OO", self, template)
2058+
);
2059+
if (!filter)
2060+
return NULL;
2061+
filter_is_callable = PyCallable_Check(filter);
2062+
}
21202063

21212064
string = state_init(&state, self, string, 0, INT_MAX);
21222065
if (!string)
@@ -2169,7 +2112,7 @@ pattern_subx(PatternObject* self, PyObject* template, PyObject* string,
21692112
goto next;
21702113

21712114
if (filter_is_callable) {
2172-
/* filter match */
2115+
/* pass match object through filter */
21732116
match = pattern_new_match(self, &state, 1);
21742117
if (!match)
21752118
goto error;
@@ -2186,7 +2129,7 @@ pattern_subx(PatternObject* self, PyObject* template, PyObject* string,
21862129
} else {
21872130
/* filter is literal string */
21882131
item = filter;
2189-
Py_INCREF(filter);
2132+
Py_INCREF(item);
21902133
}
21912134

21922135
/* add to list */
@@ -2208,18 +2151,21 @@ pattern_subx(PatternObject* self, PyObject* template, PyObject* string,
22082151
}
22092152

22102153
/* get segment following last match */
2211-
item = PySequence_GetSlice(string, i, state.endpos);
2212-
if (!item)
2213-
goto error;
2214-
status = PyList_Append(list, item);
2215-
Py_DECREF(item);
2216-
if (status < 0)
2217-
goto error;
2154+
if (i < state.endpos) {
2155+
item = PySequence_GetSlice(string, i, state.endpos);
2156+
if (!item)
2157+
goto error;
2158+
status = PyList_Append(list, item);
2159+
Py_DECREF(item);
2160+
if (status < 0)
2161+
goto error;
2162+
}
22182163

22192164
state_fini(&state);
22202165

2221-
/* convert list to single string */
2166+
/* convert list to single string (also removes list) */
22222167
item = join(list, self->pattern);
2168+
22232169
if (!item)
22242170
return NULL;
22252171

@@ -2262,7 +2208,6 @@ pattern_subn(PatternObject* self, PyObject* args, PyObject* kw)
22622208

22632209
return pattern_subx(self, template, string, count, 1);
22642210
}
2265-
#endif
22662211

22672212
static PyObject*
22682213
pattern_copy(PatternObject* self, PyObject* args)

0 commit comments

Comments
 (0)