Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 770617b

Browse files
author
Fredrik Lundh
committed
SRE fixes for 2.1 alpha:
-- added some more docstrings -- fixed typo in scanner class (#125531) -- the multiline flag (?m) should't affect the \Z operator (#127259) -- fixed non-greedy backtracking bug (#123769, #127259) -- added sre.DEBUG flag (currently dumps the parsed pattern structure) -- fixed a couple of glitches in groupdict (the #126587 memory leak had already been fixed by AMK)
1 parent 77b20f0 commit 770617b

7 files changed

Lines changed: 104 additions & 66 deletions

File tree

Lib/sre.py

Lines changed: 37 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
#
44
# re-compatible interface for the sre matching engine
55
#
6-
# Copyright (c) 1998-2000 by Secret Labs AB. All rights reserved.
6+
# Copyright (c) 1998-2001 by Secret Labs AB. All rights reserved.
77
#
88
# This version of the SRE library can be redistributed under CNRI's
99
# Python 1.6 license. For any other use, please contact Secret Labs
@@ -14,60 +14,83 @@
1414
# other compatibility work.
1515
#
1616

17-
# FIXME: change all FIXME's to XXX ;-)
18-
1917
import sre_compile
2018
import sre_parse
2119

2220
import string
2321

2422
# flags
25-
I = IGNORECASE = sre_compile.SRE_FLAG_IGNORECASE
26-
L = LOCALE = sre_compile.SRE_FLAG_LOCALE
27-
M = MULTILINE = sre_compile.SRE_FLAG_MULTILINE
28-
S = DOTALL = sre_compile.SRE_FLAG_DOTALL
29-
X = VERBOSE = sre_compile.SRE_FLAG_VERBOSE
23+
I = IGNORECASE = sre_compile.SRE_FLAG_IGNORECASE # ignore case
24+
L = LOCALE = sre_compile.SRE_FLAG_LOCALE # assume current 8-bit locale
25+
U = UNICODE = sre_compile.SRE_FLAG_UNICODE # assume unicode locale
26+
M = MULTILINE = sre_compile.SRE_FLAG_MULTILINE # make anchors look for newline
27+
S = DOTALL = sre_compile.SRE_FLAG_DOTALL # make dot match newline
28+
X = VERBOSE = sre_compile.SRE_FLAG_VERBOSE # ignore whitespace and comments
3029

31-
# sre extensions (may or may not be in 1.6/2.0 final)
32-
T = TEMPLATE = sre_compile.SRE_FLAG_TEMPLATE
33-
U = UNICODE = sre_compile.SRE_FLAG_UNICODE
30+
# sre extensions (experimental, don't rely on these)
31+
T = TEMPLATE = sre_compile.SRE_FLAG_TEMPLATE # disable backtracking
32+
DEBUG = sre_compile.SRE_FLAG_DEBUG # dump pattern after compilation
3433

3534
# sre exception
3635
error = sre_compile.error
3736

3837
# --------------------------------------------------------------------
3938
# public interface
4039

41-
# FIXME: add docstrings
42-
4340
def match(pattern, string, flags=0):
41+
"""Try to apply the pattern at the start of the string, returning
42+
a match object, or None if no match was found."""
4443
return _compile(pattern, flags).match(string)
4544

4645
def search(pattern, string, flags=0):
46+
"""Scan through string looking for a match to the pattern, returning
47+
a match object, or None if no match was found."""
4748
return _compile(pattern, flags).search(string)
4849

4950
def sub(pattern, repl, string, count=0):
51+
"""Return the string obtained by replacing the leftmost
52+
non-overlapping occurrences of the pattern in string by the
53+
replacement repl"""
5054
return _compile(pattern, 0).sub(repl, string, count)
5155

5256
def subn(pattern, repl, string, count=0):
57+
"""Return a 2-tuple containing (new_string, number).
58+
new_string is the string obtained by replacing the leftmost
59+
non-overlapping occurrences of the pattern in the source
60+
string by the replacement repl. number is the number of
61+
substitutions that were made."""
5362
return _compile(pattern, 0).subn(repl, string, count)
5463

5564
def split(pattern, string, maxsplit=0):
65+
"""Split the source string by the occurrences of the pattern,
66+
returning a list containing the resulting substrings."""
5667
return _compile(pattern, 0).split(string, maxsplit)
5768

5869
def findall(pattern, string, maxsplit=0):
70+
"""Return a list of all non-overlapping matches in the string.
71+
72+
If one or more groups are present in the pattern, return a
73+
list of groups; this will be a list of tuples if the pattern
74+
has more than one group.
75+
76+
Empty matches are included in the result."""
5977
return _compile(pattern, 0).findall(string, maxsplit)
6078

6179
def compile(pattern, flags=0):
80+
"Compile a regular expression pattern, returning a pattern object."
6281
return _compile(pattern, flags)
6382

6483
def purge():
84+
"Clear the regular expression cache"
6585
_cache.clear()
6686

6787
def template(pattern, flags=0):
88+
"Compile a template pattern, returning a pattern object"
89+
6890
return _compile(pattern, flags|T)
6991

7092
def escape(pattern):
93+
"Escape all non-alphanumeric characters in pattern."
7194
s = list(pattern)
7295
for i in range(len(pattern)):
7396
c = pattern[i]
@@ -204,7 +227,7 @@ def scan(self, string):
204227
break
205228
action = self.lexicon[m.lastindex][1]
206229
if callable(action):
207-
self.match = match
230+
self.match = m
208231
action = action(self, m.group())
209232
if action is not None:
210233
append(action)

Lib/sre_compile.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
#
44
# convert template to internal format
55
#
6-
# Copyright (c) 1997-2000 by Secret Labs AB. All rights reserved.
6+
# Copyright (c) 1997-2001 by Secret Labs AB. All rights reserved.
77
#
88
# See the sre.py file for information on usage and redistribution.
99
#
@@ -176,7 +176,7 @@ def _optimize_charset(charset, fixup):
176176
for i in range(fixup(av[0]), fixup(av[1])+1):
177177
charmap[i] = 1
178178
elif op is CATEGORY:
179-
# FIXME: could append to charmap tail
179+
# XXX: could append to charmap tail
180180
return charset # cannot compress
181181
except IndexError:
182182
# character set contains unicode characters
@@ -364,7 +364,7 @@ def compile(p, flags=0):
364364

365365
# print code
366366

367-
# FIXME: <fl> get rid of this limitation!
367+
# XXX: <fl> get rid of this limitation!
368368
assert p.pattern.groups <= 100,\
369369
"sorry, but this version only supports 100 named groups"
370370

Lib/sre_constants.py

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
# various symbols used by the regular expression engine.
55
# run this script to update the _sre include files!
66
#
7-
# Copyright (c) 1998-2000 by Secret Labs AB. All rights reserved.
7+
# Copyright (c) 1998-2001 by Secret Labs AB. All rights reserved.
88
#
99
# See the sre.py file for information on usage and redistribution.
1010
#
@@ -54,10 +54,12 @@ class error(Exception):
5454
# positions
5555
AT_BEGINNING = "at_beginning"
5656
AT_BEGINNING_LINE = "at_beginning_line"
57+
AT_BEGINNING_STRING = "at_beginning_string"
5758
AT_BOUNDARY = "at_boundary"
5859
AT_NON_BOUNDARY = "at_non_boundary"
5960
AT_END = "at_end"
6061
AT_END_LINE = "at_end_line"
62+
AT_END_STRING = "at_end_string"
6163

6264
# categories
6365
CATEGORY_DIGIT = "category_digit"
@@ -109,8 +111,8 @@ class error(Exception):
109111
]
110112

111113
ATCODES = [
112-
AT_BEGINNING, AT_BEGINNING_LINE, AT_BOUNDARY,
113-
AT_NON_BOUNDARY, AT_END, AT_END_LINE
114+
AT_BEGINNING, AT_BEGINNING_LINE, AT_BEGINNING_STRING, AT_BOUNDARY,
115+
AT_NON_BOUNDARY, AT_END, AT_END_LINE, AT_END_STRING
114116
]
115117

116118
CHCODES = [
@@ -178,6 +180,7 @@ def makedict(list):
178180
SRE_FLAG_DOTALL = 16 # treat target as a single string
179181
SRE_FLAG_UNICODE = 32 # use unicode locale
180182
SRE_FLAG_VERBOSE = 64 # ignore whitespace and comments
183+
SRE_FLAG_DEBUG = 128 # debugging
181184

182185
# flags for INFO primitive
183186
SRE_INFO_PREFIX = 1 # has prefix
@@ -201,7 +204,7 @@ def dump(f, d, prefix):
201204
* NOTE: This file is generated by sre_constants.py. If you need
202205
* to change anything in here, edit sre_constants.py and run it.
203206
*
204-
* Copyright (c) 1997-2000 by Secret Labs AB. All rights reserved.
207+
* Copyright (c) 1997-2001 by Secret Labs AB. All rights reserved.
205208
*
206209
* See the _sre.c file for information on usage and redistribution.
207210
*/

Lib/sre_parse.py

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
#
44
# convert re-style regular expression to sre pattern
55
#
6-
# Copyright (c) 1998-2000 by Secret Labs AB. All rights reserved.
6+
# Copyright (c) 1998-2001 by Secret Labs AB. All rights reserved.
77
#
88
# See the sre.py file for information on usage and redistribution.
99
#
@@ -34,7 +34,7 @@
3434
}
3535

3636
CATEGORIES = {
37-
r"\A": (AT, AT_BEGINNING), # start of string
37+
r"\A": (AT, AT_BEGINNING_STRING), # start of string
3838
r"\b": (AT, AT_BOUNDARY),
3939
r"\B": (AT, AT_NON_BOUNDARY),
4040
r"\d": (IN, [(CATEGORY, CATEGORY_DIGIT)]),
@@ -43,7 +43,7 @@
4343
r"\S": (IN, [(CATEGORY, CATEGORY_NOT_SPACE)]),
4444
r"\w": (IN, [(CATEGORY, CATEGORY_WORD)]),
4545
r"\W": (IN, [(CATEGORY, CATEGORY_NOT_WORD)]),
46-
r"\Z": (AT, AT_END), # end of string
46+
r"\Z": (AT, AT_END_STRING), # end of string
4747
}
4848

4949
FLAGS = {
@@ -421,13 +421,13 @@ def _parse(source, state):
421421
code1 = code1[1][0]
422422
set.append(code1)
423423

424-
# FIXME: <fl> move set optimization to compiler!
424+
# XXX: <fl> should move set optimization to compiler!
425425
if len(set)==1 and set[0][0] is LITERAL:
426426
subpattern.append(set[0]) # optimization
427427
elif len(set)==2 and set[0][0] is NEGATE and set[1][0] is LITERAL:
428428
subpattern.append((NOT_LITERAL, set[1][1])) # optimization
429429
else:
430-
# FIXME: <fl> add charmap optimization
430+
# XXX: <fl> should add charmap optimization here
431431
subpattern.append((IN, set))
432432

433433
elif this and this[0] in REPEAT_CHARS:
@@ -457,7 +457,7 @@ def _parse(source, state):
457457
min = int(lo)
458458
if hi:
459459
max = int(hi)
460-
# FIXME: <fl> check that hi >= lo!
460+
# XXX: <fl> check that hi >= lo ???
461461
else:
462462
raise error, "not supported"
463463
# figure out which item to repeat
@@ -601,7 +601,8 @@ def parse(str, flags=0, pattern=None):
601601
elif tail:
602602
raise error, "bogus characters at end of regular expression"
603603

604-
# p.dump()
604+
if flags & SRE_FLAG_DEBUG:
605+
p.dump()
605606

606607
if not (flags & SRE_FLAG_VERBOSE) and p.pattern.flags & SRE_FLAG_VERBOSE:
607608
# the VERBOSE flag was switched on inside the pattern. to be
@@ -672,8 +673,7 @@ def parse_template(source, pattern):
672673
return p
673674

674675
def expand_template(template, match):
675-
# FIXME: <fl> this is sooooo slow. drop in the slicelist
676-
# code instead
676+
# XXX: <fl> this is sooooo slow. drop in the slicelist code instead
677677
p = []
678678
a = p.append
679679
sep = match.string[:0]

Lib/test/test_sre.py

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -47,12 +47,12 @@ def test(expression, result, exception=None):
4747
print 'Running tests on character literals'
4848

4949
for i in [0, 8, 16, 32, 64, 127, 128, 255]:
50-
test(r"""sre.match(r"\%03o" % i, chr(i)) is not None""", 1)
51-
test(r"""sre.match(r"\%03o0" % i, chr(i)+"0") is not None""", 1)
52-
test(r"""sre.match(r"\%03o8" % i, chr(i)+"8") is not None""", 1)
53-
test(r"""sre.match(r"\x%02x" % i, chr(i)) is not None""", 1)
54-
test(r"""sre.match(r"\x%02x0" % i, chr(i)+"0") is not None""", 1)
55-
test(r"""sre.match(r"\x%02xz" % i, chr(i)+"z") is not None""", 1)
50+
test(r"""sre.match(r"\%03o" % i, chr(i)) != None""", 1)
51+
test(r"""sre.match(r"\%03o0" % i, chr(i)+"0") != None""", 1)
52+
test(r"""sre.match(r"\%03o8" % i, chr(i)+"8") != None""", 1)
53+
test(r"""sre.match(r"\x%02x" % i, chr(i)) != None""", 1)
54+
test(r"""sre.match(r"\x%02x0" % i, chr(i)+"0") != None""", 1)
55+
test(r"""sre.match(r"\x%02xz" % i, chr(i)+"z") != None""", 1)
5656
test(r"""sre.match("\911", "")""", None, sre.error)
5757

5858
#
@@ -197,11 +197,11 @@ def bump_num(matchobj):
197197
p = ""
198198
for i in range(0, 256):
199199
p = p + chr(i)
200-
test(r"""sre.match(sre.escape(chr(i)), chr(i)) is not None""", 1)
200+
test(r"""sre.match(sre.escape(chr(i)), chr(i)) != None""", 1)
201201
test(r"""sre.match(sre.escape(chr(i)), chr(i)).span()""", (0,1))
202202

203203
pat = sre.compile(sre.escape(p))
204-
test(r"""pat.match(p) is not None""", 1)
204+
test(r"""pat.match(p) != None""", 1)
205205
test(r"""pat.match(p).span()""", (0,256))
206206

207207
if verbose:

0 commit comments

Comments
 (0)