2424 * 2000-10-24 fl really fixed assert_not; reset groups in findall
2525 * 2000-12-21 fl fixed memory leak in groupdict
2626 * 2001-01-02 fl properly reset pointer after failed assertion in MIN_UNTIL
27- * 2001-01-15 fl avoid recursion for MIN_UTIL ; fixed uppercase literal bug
27+ * 2001-01-15 fl avoid recursion for MIN_UNTIL ; fixed uppercase literal bug
2828 * 2001-01-16 fl fixed memory leak in pattern destructor
29+ * 2001-03-20 fl lots of fixes for 2.1b2
2930 *
3031 * Copyright (c) 1997-2001 by Secret Labs AB. All rights reserved.
3132 *
4041
4142#ifndef SRE_RECURSIVE
4243
43- char copyright [] = " SRE 2.1 Copyright (c) 1997-2001 by Secret Labs AB " ;
44+ char copyright [] = " SRE 2.1b2 Copyright (c) 1997-2001 by Secret Labs AB " ;
4445
4546#include "Python.h"
4647
@@ -141,11 +142,6 @@ static char sre_char_lower[128] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,
141142106 , 107 , 108 , 109 , 110 , 111 , 112 , 113 , 114 , 115 , 116 , 117 , 118 , 119 ,
142143120 , 121 , 122 , 123 , 124 , 125 , 126 , 127 };
143144
144- static unsigned int sre_lower (unsigned int ch )
145- {
146- return ((ch ) < 128 ? sre_char_lower [ch ] : ch );
147- }
148-
149145#define SRE_IS_DIGIT (ch )\
150146 ((ch) < 128 ? (sre_char_info[(ch)] & SRE_DIGIT_MASK) : 0)
151147#define SRE_IS_SPACE (ch )\
@@ -157,30 +153,39 @@ static unsigned int sre_lower(unsigned int ch)
157153#define SRE_IS_WORD (ch )\
158154 ((ch) < 128 ? (sre_char_info[(ch)] & SRE_WORD_MASK) : 0)
159155
160- /* locale-specific character predicates */
161-
162- static unsigned int sre_lower_locale (unsigned int ch )
156+ static unsigned int sre_lower (unsigned int ch )
163157{
164- return ((ch ) < 256 ? tolower (( ch )) : ch );
158+ return ((ch ) < 128 ? sre_char_lower [ ch ] : ch );
165159}
160+
161+ /* locale-specific character predicates */
162+
166163#define SRE_LOC_IS_DIGIT (ch ) ((ch) < 256 ? isdigit((ch)) : 0)
167164#define SRE_LOC_IS_SPACE (ch ) ((ch) < 256 ? isspace((ch)) : 0)
168165#define SRE_LOC_IS_LINEBREAK (ch ) ((ch) == '\n')
169166#define SRE_LOC_IS_ALNUM (ch ) ((ch) < 256 ? isalnum((ch)) : 0)
170167#define SRE_LOC_IS_WORD (ch ) (SRE_LOC_IS_ALNUM((ch)) || (ch) == '_')
171168
169+ static unsigned int sre_lower_locale (unsigned int ch )
170+ {
171+ return ((ch ) < 256 ? tolower ((ch )) : ch );
172+ }
173+
172174/* unicode-specific character predicates */
173175
174176#if defined(HAVE_UNICODE )
175- static unsigned int sre_lower_unicode (unsigned int ch )
176- {
177- return (unsigned int ) Py_UNICODE_TOLOWER ((Py_UNICODE )(ch ));
178- }
177+
179178#define SRE_UNI_IS_DIGIT (ch ) Py_UNICODE_ISDIGIT((Py_UNICODE)(ch))
180179#define SRE_UNI_IS_SPACE (ch ) Py_UNICODE_ISSPACE((Py_UNICODE)(ch))
181180#define SRE_UNI_IS_LINEBREAK (ch ) Py_UNICODE_ISLINEBREAK((Py_UNICODE)(ch))
182181#define SRE_UNI_IS_ALNUM (ch ) Py_UNICODE_ISALNUM((Py_UNICODE)(ch))
183182#define SRE_UNI_IS_WORD (ch ) (SRE_UNI_IS_ALNUM((ch)) || (ch) == '_')
183+
184+ static unsigned int sre_lower_unicode (unsigned int ch )
185+ {
186+ return (unsigned int ) Py_UNICODE_TOLOWER ((Py_UNICODE )(ch ));
187+ }
188+
184189#endif
185190
186191LOCAL (int )
@@ -418,6 +423,42 @@ SRE_AT(SRE_STATE* state, SRE_CHAR* ptr, SRE_CODE at)
418423 this = ((void * ) ptr < state -> end ) ?
419424 SRE_IS_WORD ((int ) ptr [0 ]) : 0 ;
420425 return this == that ;
426+
427+ case SRE_AT_LOC_BOUNDARY :
428+ if (state -> beginning == state -> end )
429+ return 0 ;
430+ that = ((void * ) ptr > state -> beginning ) ?
431+ SRE_LOC_IS_WORD ((int ) ptr [-1 ]) : 0 ;
432+ this = ((void * ) ptr < state -> end ) ?
433+ SRE_LOC_IS_WORD ((int ) ptr [0 ]) : 0 ;
434+ return this != that ;
435+
436+ case SRE_AT_LOC_NON_BOUNDARY :
437+ if (state -> beginning == state -> end )
438+ return 0 ;
439+ that = ((void * ) ptr > state -> beginning ) ?
440+ SRE_LOC_IS_WORD ((int ) ptr [-1 ]) : 0 ;
441+ this = ((void * ) ptr < state -> end ) ?
442+ SRE_LOC_IS_WORD ((int ) ptr [0 ]) : 0 ;
443+ return this == that ;
444+
445+ case SRE_AT_UNI_BOUNDARY :
446+ if (state -> beginning == state -> end )
447+ return 0 ;
448+ that = ((void * ) ptr > state -> beginning ) ?
449+ SRE_UNI_IS_WORD ((int ) ptr [-1 ]) : 0 ;
450+ this = ((void * ) ptr < state -> end ) ?
451+ SRE_UNI_IS_WORD ((int ) ptr [0 ]) : 0 ;
452+ return this != that ;
453+
454+ case SRE_AT_UNI_NON_BOUNDARY :
455+ if (state -> beginning == state -> end )
456+ return 0 ;
457+ that = ((void * ) ptr > state -> beginning ) ?
458+ SRE_UNI_IS_WORD ((int ) ptr [-1 ]) : 0 ;
459+ this = ((void * ) ptr < state -> end ) ?
460+ SRE_UNI_IS_WORD ((int ) ptr [0 ]) : 0 ;
461+ return this == that ;
421462 }
422463
423464 return 0 ;
@@ -1037,7 +1078,8 @@ SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern, int level)
10371078
10381079 /* see if the tail matches */
10391080 state -> repeat = rp -> prev ;
1040- if (rp -> pattern [2 ] == 65535 ) {
1081+ /* FIXME: the following fix doesn't always work (#133283) */
1082+ if (0 && rp -> pattern [2 ] == 65535 ) {
10411083 /* unbounded repeat */
10421084 for (;;) {
10431085 i = SRE_MATCH (state , pattern , level + 1 );
0 commit comments