@@ -72,7 +72,7 @@ staticforward PyTypeObject Pcre_Type;
7272#define NOT_WORD_BOUNDARY 6
7373#define BEGINNING_OF_BUFFER 7
7474#define END_OF_BUFFER 8
75-
75+ #define STRING 9
7676
7777static PcreObject *
7878newPcreObject (arg )
@@ -191,49 +191,20 @@ PyPcre_compile(self, args)
191191{
192192 PcreObject * rv ;
193193 PyObject * dictionary ;
194- char * pattern , * newpattern ;
194+ char * pattern ;
195195 const char * error ;
196196 int num_zeros , i , j ;
197197
198- int patternlen , options , erroroffset ;
199- if (!PyArg_ParseTuple (args , "s#iO !" , & pattern , & patternlen , & options ,
198+ int options , erroroffset ;
199+ if (!PyArg_ParseTuple (args , "siO !" , & pattern , & options ,
200200 & PyDict_Type , & dictionary ))
201201 return NULL ;
202202 rv = newPcreObject (args );
203203 if ( rv == NULL )
204204 return NULL ;
205205
206- /* PCRE doesn't like having null bytes in its pattern, so we have to replace
207- any zeros in the string with the characters '\000'. This increases the size
208- of the string by 3*num_zeros, plus 1 byte for the terminating \0. */
209- num_zeros = 1 ; /* Start at 1; this will give 3 extra bytes of leeway */
210- for (i = 0 ; i < patternlen ; i ++ ) {
211- if (pattern [i ]== 0 ) num_zeros ++ ;
212- }
213- newpattern = malloc (patternlen + num_zeros * 3 + 4 );
214- if (newpattern == NULL ) {
215- PyErr_SetString (PyExc_MemoryError , "can't allocate memory for new pattern" );
216- return NULL ;
217- }
218- for (i = j = 0 ; i < patternlen ; i ++ , j ++ )
219- {
220- if (pattern [i ]!= 0 ) newpattern [j ]= pattern [i ];
221- else {
222- newpattern [j ++ ] = '\\' ;
223- newpattern [j ++ ] = '0' ;
224- newpattern [j ++ ] = '0' ;
225- newpattern [j ] = '0' ;
226- }
227- }
228- /* Keep purify happy; for pcre, one null byte is enough! */
229- newpattern [j ++ ]= '\0' ;
230- newpattern [j ++ ]= '\0' ;
231- newpattern [j ++ ]= '\0' ;
232- newpattern [j ]= '\0' ;
233-
234- rv -> regex = pcre_compile ((char * )newpattern , options ,
206+ rv -> regex = pcre_compile ((char * )pattern , options ,
235207 & error , & erroroffset , dictionary );
236- free (newpattern );
237208 if (rv -> regex == NULL )
238209 {
239210 PyMem_DEL (rv );
@@ -312,6 +283,10 @@ PyPcre_expand_escape(pattern, pattern_len, indexptr, typeptr)
312283 * indexptr = index ;
313284 return Py_BuildValue ("c" , (char )8 );
314285 break ;
286+ case ('\\' ):
287+ * indexptr = index ;
288+ return Py_BuildValue ("c" , '\\' );
289+ break ;
315290
316291 case ('x' ):
317292 {
@@ -348,6 +323,8 @@ PyPcre_expand_escape(pattern, pattern_len, indexptr, typeptr)
348323 case ('g' ):
349324 {
350325 int end , i ;
326+ int group_num = 0 , is_number = 0 ;
327+
351328 if (pattern_len <=index )
352329 {
353330 PyErr_SetString (ErrorObject , "unfinished symbolic reference" );
@@ -374,16 +351,22 @@ PyPcre_expand_escape(pattern, pattern_len, indexptr, typeptr)
374351 PyErr_SetString (ErrorObject , "zero-length symbolic reference" );
375352 return NULL ;
376353 }
377- if (!(pcre_ctypes [pattern [index ]] & ctype_word ) /* First char. not alphanumeric */
378- || (pcre_ctypes [pattern [index ]] & ctype_digit ) ) /* First char. a digit */
354+ if ((pcre_ctypes [pattern [index ]] & ctype_digit )) /* First char. a digit */
379355 {
380- /* XXX should include the text of the reference */
381- PyErr_SetString (ErrorObject , "first character of symbolic reference not a letter or _" );
382- return NULL ;
356+ is_number = 1 ;
357+ group_num = pattern [index ] - '0' ;
383358 }
384359
385360 for (i = index + 1 ; i < end ; i ++ )
386361 {
362+ if (is_number &&
363+ !(pcre_ctypes [pattern [i ]] & ctype_digit ) )
364+ {
365+ /* XXX should include the text of the reference */
366+ PyErr_SetString (ErrorObject , "illegal non-digit character in \\g<...> starting with digit" );
367+ return NULL ;
368+ }
369+ else {group_num = group_num * 10 + pattern [i ] - '0' ;}
387370 if (!(pcre_ctypes [pattern [i ]] & ctype_word ) )
388371 {
389372 /* XXX should include the text of the reference */
@@ -394,6 +377,9 @@ PyPcre_expand_escape(pattern, pattern_len, indexptr, typeptr)
394377
395378 * typeptr = MEMORY_REFERENCE ;
396379 * indexptr = end + 1 ;
380+ /* If it's a number, return the integer value of the group */
381+ if (is_number ) return Py_BuildValue ("i" , group_num );
382+ /* Otherwise, return a string containing the group name */
397383 return Py_BuildValue ("s#" , pattern + index , end - index );
398384 }
399385 break ;
@@ -478,8 +464,11 @@ PyPcre_expand_escape(pattern, pattern_len, indexptr, typeptr)
478464 break ;
479465
480466 default :
467+ /* It's some unknown escape like \s, so return a string containing
468+ \s */
469+ * typeptr = STRING ;
481470 * indexptr = index ;
482- return Py_BuildValue ("c " , c );
471+ return Py_BuildValue ("s# " , pattern + index - 2 , 2 );
483472 break ;
484473 }
485474}
@@ -571,6 +560,12 @@ PyPcre_expand(self, args)
571560 Py_DECREF (result );
572561 }
573562 break ;
563+ case (STRING ):
564+ {
565+ PyList_Append (results , value );
566+ total_len += PyString_Size (value );
567+ break ;
568+ }
574569 default :
575570 Py_DECREF (results );
576571 PyErr_SetString (ErrorObject ,
0 commit comments