Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 95e8053

Browse files
committed
1.5a3 prerelease 1 from AMK
1 parent a74ef66 commit 95e8053

8 files changed

Lines changed: 226 additions & 114 deletions

File tree

Lib/re.py

Lines changed: 25 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -317,10 +317,19 @@ def __init__(self):
317317

318318
class Set(Instruction):
319319
name = 'set'
320-
def __init__(self, set):
320+
def __init__(self, set, flags=0):
321321
self.set = set
322-
Instruction.__init__(self, chr(3), 33)
322+
if flags & IGNORECASE: self.set=map(string.lower, self.set)
323+
if len(set)==1:
324+
# If only one element, use the "exact" opcode (it'll be faster)
325+
Instruction.__init__(self, chr(4), 2)
326+
else:
327+
# Use the "set" opcode
328+
Instruction.__init__(self, chr(3), 33)
323329
def assemble(self, position, labels):
330+
if len(self.set)==1:
331+
# If only one character in set, generate an "exact" opcode
332+
return self.opcode + self.set[0]
324333
result = self.opcode
325334
temp = 0
326335
for i, c in map(lambda x: (x, chr(x)), range(256)):
@@ -333,14 +342,16 @@ def assemble(self, position, labels):
333342
def __repr__(self):
334343
result = '%-15s' % (self.name)
335344
self.set.sort()
345+
# XXX this should print more intelligently
336346
for char in self.set:
337347
result = result + char
338348
return result
339349

340350
class Exact(Instruction):
341351
name = 'exact'
342-
def __init__(self, char):
352+
def __init__(self, char, flags):
343353
self.char = char
354+
if flags & IGNORECASE: self.char=string.lower(self.char)
344355
Instruction.__init__(self, chr(4), 2)
345356
def assemble(self, position, labels):
346357
return self.opcode + self.char
@@ -881,7 +892,7 @@ def compile(pattern, flags=0):
881892
escape_type, value, index = expand_escape(pattern, index)
882893

883894
if escape_type == CHAR:
884-
stack.append([Exact(value)])
895+
stack.append([Exact(value, flags)])
885896
lastop = '\\' + value
886897

887898
elif escape_type == MEMORY_REFERENCE:
@@ -1306,7 +1317,7 @@ def compile(pattern, flags=0):
13061317

13071318
elif char == '.':
13081319
if flags & DOTALL:
1309-
stack.append([Set(map(chr, range(256)))])
1320+
stack.append([Set(map(chr, range(256)), flags)])
13101321
else:
13111322
stack.append([AnyChar()])
13121323
lastop = '.'
@@ -1336,12 +1347,12 @@ def compile(pattern, flags=0):
13361347
index = end + 1
13371348
# do not change lastop
13381349
else:
1339-
stack.append([Exact(char)])
1350+
stack.append([Exact(char, flags)])
13401351
lastop = '#'
13411352

13421353
elif char in string.whitespace:
13431354
if not (flags & VERBOSE):
1344-
stack.append([Exact(char)])
1355+
stack.append([Exact(char, flags)])
13451356
lastop = char
13461357

13471358
elif char == '[':
@@ -1449,22 +1460,25 @@ def compile(pattern, flags=0):
14491460
index = index + 1
14501461

14511462
if negate:
1463+
# If case is being ignored, then both upper- and lowercase
1464+
# versions of the letters must be excluded.
1465+
if flags & IGNORECASE: set=set+map(string.upper, set)
14521466
notset = []
14531467
for char in map(chr, range(256)):
14541468
if char not in set:
14551469
notset.append(char)
14561470
if len(notset) == 0:
14571471
raise error, 'empty negated set'
1458-
stack.append([Set(notset)])
1472+
stack.append([Set(notset, flags)])
14591473
else:
14601474
if len(set) == 0:
14611475
raise error, 'empty set'
1462-
stack.append([Set(set)])
1476+
stack.append([Set(set, flags)])
14631477

14641478
lastop = '[]'
14651479

14661480
else:
1467-
stack.append([Exact(char)])
1481+
stack.append([Exact(char, flags)])
14681482
lastop = char
14691483

14701484
code = []
@@ -1485,6 +1499,7 @@ def compile(pattern, flags=0):
14851499
code.append(Label(label))
14861500
label = label + 1
14871501
code.append(End())
1502+
# print code
14881503
return RegexObject(pattern, flags, code, register, groupindex)
14891504

14901505
# Replace expand_escape and _expand functions with their C equivalents.

Lib/test/re_tests.py

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -318,6 +318,7 @@
318318
# ('((((((((((a))))))))))\\41', 'aa', FAIL),
319319
# ('((((((((((a))))))))))\\41', 'a!', SUCCEED, 'found', 'a!'),
320320
('((((((((((a))))))))))\\41', '', SYNTAX_ERROR),
321+
('(?i)((((((((((a))))))))))\\41', '', SYNTAX_ERROR),
321322
('(((((((((a)))))))))', 'a', SUCCEED, 'found', 'a'),
322323
('multiple words of text', 'uh-uh', FAIL),
323324
('multiple words', 'multiple words, yeah', SUCCEED, 'found', 'multiple words'),
@@ -448,7 +449,6 @@
448449
('(?i)((((((((((a))))))))))\\10', 'AA', SUCCEED, 'found', 'AA'),
449450
#('(?i)((((((((((a))))))))))\\41', 'AA', FAIL),
450451
#('(?i)((((((((((a))))))))))\\41', 'A!', SUCCEED, 'found', 'A!'),
451-
('(?i)((((((((((a))))))))))\\41', '', SYNTAX_ERROR),
452452
('(?i)(((((((((a)))))))))', 'A', SUCCEED, 'found', 'A'),
453453
('(?i)(?:(?:(?:(?:(?:(?:(?:(?:(?:(a))))))))))', 'A', SUCCEED, 'g1', 'A'),
454454
('(?i)(?:(?:(?:(?:(?:(?:(?:(?:(?:(a|b|c))))))))))', 'C', SUCCEED, 'g1', 'C'),
@@ -506,10 +506,21 @@
506506
('a.b', 'a\nb', FAIL),
507507
('(?s)a.b', 'a\nb', SUCCEED, 'found', 'a\nb'),
508508

509-
# test \w, etc.
509+
# test \w, etc. both inside and outside character classes
510510

511511
('\\w+', '--ab_cd0123--', SUCCEED, 'found', 'ab_cd0123'),
512+
('[\\w]+', '--ab_cd0123--', SUCCEED, 'found', 'ab_cd0123'),
512513
('\\D+', '1234abc5678', SUCCEED, 'found', 'abc'),
514+
('[\\D]+', '1234abc5678', SUCCEED, 'found', 'abc'),
513515
('[\\da-fA-F]+', '123abc', SUCCEED, 'found', '123abc'),
514516
('[\\d-x]', '-', SYNTAX_ERROR),
517+
(r'([\s]*)([\S]*)([\s]*)', ' testing!1972', SUCCEED, 'g3+g2+g1', 'testing!1972 '),
518+
(r'(\s*)(\S*)(\s*)', ' testing!1972', SUCCEED, 'g3+g2+g1', 'testing!1972 '),
519+
520+
(r'\xff', '\377', SUCCEED, 'found', chr(255)),
521+
(r'\x00ff', '\377', SUCCEED, 'found', chr(255)),
522+
(r'\t\n\v\r\f\a\g', '\t\n\v\r\f\ag', SUCCEED, 'found', '\t\n\v\r\f\ag'),
523+
('\t\n\v\r\f\a\g', '\t\n\v\r\f\ag', SUCCEED, 'found', '\t\n\v\r\f\ag'),
524+
(r'\t\n\v\r\f\a', '\t\n\v\r\f\a', SUCCEED, 'found', chr(9)+chr(10)+chr(11)+chr(13)+chr(12)+chr(7)),
525+
(r'[\t][\n][\v][\r][\f][\a][\A][\b][\B][\Z][\g]', '\t\n\v\r\f\aA\bBZg', SUCCEED, 'found', '\t\n\v\r\f\aA\bBZg'),
515526
]

Lib/test/regex_tests.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -278,6 +278,12 @@
278278
('\\([xyz]*\\)x', 'abcx', SUCCEED,
279279
'found+"-"+g1', 'x-'),
280280
('\\(a\\)+b\\|aac', 'aac', SUCCEED,
281-
'found+"-"+g1', 'aac-None')
281+
'found+"-"+g1', 'aac-None'),
282+
('\<a', 'a', SUCCEED, 'found', 'a'),
283+
('\<a', '!', FAIL),
284+
('a\<b', 'ab', FAIL),
285+
('a\>', 'ab', FAIL),
286+
('a\>', 'a!', SUCCEED, 'found', 'a'),
287+
('a\>', 'a', SUCCEED, 'found', 'a'),
282288
]
283289

Lib/test/test_re.py

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,10 @@ def bump_num(matchobj):
3131

3232
assert re.sub('(?P<a>x)', '\g<a>\g<a>', 'xx') == 'xxxx'
3333

34+
assert re.sub('a', r'\t\n\v\r\f\a\b\B\Z\a\A\w\W\s\S\d\D', 'a') == '\t\n\v\r\f\a\bBZ\aAwWsSdD'
35+
assert re.sub('a', '\t\n\v\r\f\a', 'a') == '\t\n\v\r\f\a'
36+
assert re.sub('a', '\t\n\v\r\f\a', 'a') == (chr(9)+chr(10)+chr(11)+chr(13)+chr(12)+chr(7))
37+
3438
except AssertionError:
3539
raise TestFailed, "re.sub"
3640

@@ -120,7 +124,6 @@ def bump_num(matchobj):
120124
print 'Running re_tests test suite'
121125

122126
for t in tests:
123-
print t
124127
sys.stdout.flush()
125128
pattern=s=outcome=repl=expected=None
126129
if len(t)==5:
@@ -136,6 +139,7 @@ def bump_num(matchobj):
136139
if outcome==SYNTAX_ERROR: pass # Expected a syntax error
137140
else:
138141
print '=== Syntax error:', t
142+
except KeyboardInterrupt: raise KeyboardInterrupt
139143
except:
140144
print '*** Unexpected error ***'
141145
if verbose:
@@ -182,3 +186,10 @@ def bump_num(matchobj):
182186
print repr(repl)+' should be '+repr(expected)
183187
else:
184188
print '=== Failed incorrectly', t
189+
190+
# Try the match with IGNORECASE enabled, and check that it
191+
# still succeeds.
192+
obj=re.compile(pattern, re.IGNORECASE)
193+
result=obj.search(s)
194+
if result==None:
195+
print '=== Fails on case-insensitive match', t

Modules/regexmodule.c

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -132,8 +132,10 @@ regobj_match(re, args)
132132
re->re_lastok = NULL;
133133
result = re_match(&re->re_patbuf, buffer, size, offset, &re->re_regs);
134134
if (result < -1) {
135-
/* Failure like stack overflow */
136-
PyErr_SetString(RegexError, "match failure");
135+
/* Serious failure of some sort; if re_match didn't
136+
set an exception, raise a generic error */
137+
if (!PyErr_Occurred())
138+
PyErr_SetString(RegexError, "match failure");
137139
return NULL;
138140
}
139141
if (result >= 0) {
@@ -174,8 +176,10 @@ regobj_search(re, args)
174176
result = re_search(&re->re_patbuf, buffer, size, offset, range,
175177
&re->re_regs);
176178
if (result < -1) {
177-
/* Failure like stack overflow */
178-
PyErr_SetString(RegexError, "match failure");
179+
/* Serious failure of some sort; if re_match didn't
180+
set an exception, raise a generic error */
181+
if (!PyErr_Occurred())
182+
PyErr_SetString(RegexError, "match failure");
179183
return NULL;
180184
}
181185
if (result >= 0) {

0 commit comments

Comments
 (0)