|
16 | 16 | # matching performs on large strings. |
17 | 17 |
|
18 | 18 | benchmarks = [ |
| 19 | + |
| 20 | + # test common prefix |
| 21 | + ('Python|Perl', 'Perl'), # Alternation |
| 22 | + ('(Python|Perl)', 'Perl'), # Grouped alternation |
| 23 | + |
| 24 | + ('Python|Perl|Tcl', 'Perl'), # Alternation |
| 25 | + ('(Python|Perl|Tcl)', 'Perl'), # Grouped alternation |
| 26 | + |
| 27 | + ('(Python)\\1', 'PythonPython'), # Backreference |
| 28 | + ('([0a-z][a-z0-9]*,)+', 'a5,b7,c9,'), # Disable the fastmap optimization |
| 29 | + ('([a-z][a-z0-9]*,)+', 'a5,b7,c9,'), # A few sets |
| 30 | + |
19 | 31 | ('Python', 'Python'), # Simple text literal |
20 | 32 | ('.*Python', 'Python'), # Bad text literal |
21 | 33 | ('.*Python.*', 'Python'), # Worse text literal |
22 | 34 | ('.*(Python)', 'Python'), # Bad text literal with grouping |
23 | 35 |
|
24 | | - ('(Python|Perl|Tcl', 'Perl'), # Alternation |
25 | | - ('(Python|Perl|Tcl)', 'Perl'), # Grouped alternation |
26 | | - ('(Python)\\1', 'PythonPython'), # Backreference |
27 | | - ('([0a-z][a-z]*,)+', 'a5,b7,c9,'), # Disable the fastmap optimization |
28 | | - ('([a-z][a-z0-9]*,)+', 'a5,b7,c9,') # A few sets |
29 | 36 | ] |
30 | 37 |
|
31 | 38 | # Test suite (for verifying correctness) |
|
79 | 86 | # Test various letter escapes |
80 | 87 | (r'\a[\b]\f\n\r\t\v', '\a\b\f\n\r\t\v', SUCCEED, 'found', '\a\b\f\n\r\t\v'), |
81 | 88 | (r'[\a][\b][\f][\n][\r][\t][\v]', '\a\b\f\n\r\t\v', SUCCEED, 'found', '\a\b\f\n\r\t\v'), |
82 | | - (r'\u', '', SYNTAX_ERROR), # A Perl escape |
| 89 | + # NOTE: not an error under PCRE/PRE: |
| 90 | + # (r'\u', '', SYNTAX_ERROR), # A Perl escape |
83 | 91 | (r'\c\e\g\h\i\j\k\m\o\p\q\y\z', 'ceghijkmopqyz', SUCCEED, 'found', 'ceghijkmopqyz'), |
84 | 92 | (r'\xff', '\377', SUCCEED, 'found', chr(255)), |
85 | | - (r'\x00ffffffffffffff', '\377', SUCCEED, 'found', chr(255)), |
86 | | - (r'\x00f', '\017', SUCCEED, 'found', chr(15)), |
87 | | - (r'\x00fe', '\376', SUCCEED, 'found', chr(254)), |
| 93 | + # new \x semantics |
| 94 | + (r'\x00ffffffffffffff', '\377', FAIL, 'found', chr(255)), |
| 95 | + (r'\x00f', '\017', FAIL, 'found', chr(15)), |
| 96 | + (r'\x00fe', '\376', FAIL, 'found', chr(254)), |
| 97 | + # (r'\x00ffffffffffffff', '\377', SUCCEED, 'found', chr(255)), |
| 98 | + # (r'\x00f', '\017', SUCCEED, 'found', chr(15)), |
| 99 | + # (r'\x00fe', '\376', SUCCEED, 'found', chr(254)), |
88 | 100 |
|
89 | 101 | (r"^\w+=(\\[\000-\277]|[^\n\\])*", "SRC=eval.c g.c blah blah blah \\\\\n\tapes.c", |
90 | 102 | SUCCEED, 'found', "SRC=eval.c g.c blah blah blah \\\\"), |
|
138 | 150 | ('a[b-d]', 'aac', SUCCEED, 'found', 'ac'), |
139 | 151 | ('a[-b]', 'a-', SUCCEED, 'found', 'a-'), |
140 | 152 | ('a[\\-b]', 'a-', SUCCEED, 'found', 'a-'), |
141 | | - ('a[b-]', 'a-', SYNTAX_ERROR), |
| 153 | + # NOTE: not an error under PCRE/PRE: |
| 154 | + # ('a[b-]', 'a-', SYNTAX_ERROR), |
142 | 155 | ('a[]b', '-', SYNTAX_ERROR), |
143 | 156 | ('a[', '-', SYNTAX_ERROR), |
144 | 157 | ('a\\', '-', SYNTAX_ERROR), |
|
543 | 556 |
|
544 | 557 | # Check odd placement of embedded pattern modifiers |
545 | 558 |
|
546 | | - ('w(?i)', 'W', SYNTAX_ERROR), |
| 559 | + # not an error under PCRE/PRE: |
| 560 | + ('w(?i)', 'W', SUCCEED, 'found', 'W'), |
| 561 | + # ('w(?i)', 'W', SYNTAX_ERROR), |
547 | 562 |
|
548 | 563 | # Comments using the x embedded pattern modifier |
549 | 564 |
|
|
577 | 592 | ('\\D+', '1234abc5678', SUCCEED, 'found', 'abc'), |
578 | 593 | ('[\\D]+', '1234abc5678', SUCCEED, 'found', 'abc'), |
579 | 594 | ('[\\da-fA-F]+', '123abc', SUCCEED, 'found', '123abc'), |
580 | | - ('[\\d-x]', '-', SYNTAX_ERROR), |
| 595 | + # not an error under PCRE/PRE: |
| 596 | + # ('[\\d-x]', '-', SYNTAX_ERROR), |
581 | 597 | (r'([\s]*)([\S]*)([\s]*)', ' testing!1972', SUCCEED, 'g3+g2+g1', 'testing!1972 '), |
582 | 598 | (r'(\s*)(\S*)(\s*)', ' testing!1972', SUCCEED, 'g3+g2+g1', 'testing!1972 '), |
583 | 599 |
|
584 | 600 | (r'\xff', '\377', SUCCEED, 'found', chr(255)), |
585 | | - (r'\x00ff', '\377', SUCCEED, 'found', chr(255)), |
| 601 | + # new \x semantics |
| 602 | + (r'\x00ff', '\377', FAIL, 'found', chr(255)), |
| 603 | + # (r'\x00ff', '\377', SUCCEED, 'found', chr(255)), |
586 | 604 | (r'\t\n\v\r\f\a\g', '\t\n\v\r\f\ag', SUCCEED, 'found', '\t\n\v\r\f\ag'), |
587 | 605 | ('\t\n\v\r\f\a\g', '\t\n\v\r\f\ag', SUCCEED, 'found', '\t\n\v\r\f\ag'), |
588 | 606 | (r'\t\n\v\r\f\a', '\t\n\v\r\f\a', SUCCEED, 'found', chr(9)+chr(10)+chr(11)+chr(13)+chr(12)+chr(7)), |
589 | 607 | (r'[\t][\n][\v][\r][\f][\b]', '\t\n\v\r\f\b', SUCCEED, 'found', '\t\n\v\r\f\b'), |
590 | 608 |
|
591 | | - # additional regression tests (1.6 and later) |
| 609 | + # |
| 610 | + # post-1.5.2 additions |
592 | 611 |
|
593 | 612 | # xmllib problem |
594 | 613 | (r'(([a-z]+):)?([a-z]+)$', 'smil', SUCCEED, 'g1+"-"+g2+"-"+g3', 'None-None-smil'), |
595 | | - |
| 614 | + # bug 111869 (PRE/PCRE fails on this one, SRE doesn't) |
| 615 | + (r'.*d', 'abc\nabd', SUCCEED, 'found', 'abd'), |
| 616 | + # bug 112468 |
| 617 | + ('(', '', SYNTAX_ERROR), |
| 618 | + ('[\\41]', '!', SUCCEED, 'found', '!'), |
596 | 619 | ] |
0 commit comments