Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 042ff9e

Browse files
committed
AMK's latest
1 parent 104be4a commit 042ff9e

4 files changed

Lines changed: 168 additions & 90 deletions

File tree

Modules/pcre-int.h

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
*************************************************/
44

55

6-
#define PCRE_VERSION "1.04 22-Dec-1997"
6+
#define PCRE_VERSION "1.07 16-Feb-1998"
77

88

99
/* This is a library of functions to support regular expressions whose syntax
@@ -12,7 +12,7 @@ the file Tech.Notes for some information on the internals.
1212
1313
Written by: Philip Hazel <[email protected]>
1414
15-
Copyright (c) 1997 University of Cambridge
15+
Copyright (c) 1998 University of Cambridge
1616
1717
-----------------------------------------------------------------------------
1818
Permission is granted to anyone to use this software for any purpose on any
@@ -192,6 +192,7 @@ enum {
192192
OP_CRMINRANGE,
193193

194194
OP_CLASS, /* Match a character class */
195+
OP_NEGCLASS, /* Match a character class, specified negatively */
195196
OP_CLASS_L, /* Match a character class */
196197
OP_REF, /* Match a back reference */
197198

Modules/pcre.h

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
* Perl-Compatible Regular Expressions *
33
*************************************************/
44

5-
/* Copyright (c) 1997 University of Cambridge */
5+
/* Copyright (c) 1998 University of Cambridge */
66

77
#ifndef _PCRE_H
88
#define _PCRE_H
@@ -17,6 +17,12 @@ it is needed here for malloc. */
1717
#include <sys/types.h>
1818
#include <stdlib.h>
1919

20+
/* Allow for C++ users */
21+
22+
#ifdef __cplusplus
23+
extern "C" {
24+
#endif
25+
2026
/* Options */
2127

2228
#define PCRE_CASELESS 0x0001
@@ -68,4 +74,8 @@ extern int pcre_info(const pcre *, int *, int *);
6874
extern pcre_extra *pcre_study(const pcre *, int, const char **);
6975
extern const char *pcre_version(void);
7076

77+
#ifdef __cplusplus
78+
} /* extern "C" */
79+
#endif
80+
7181
#endif /* End of pcre.h */

Modules/pcremodule.c

Lines changed: 35 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -72,7 +72,7 @@ staticforward PyTypeObject Pcre_Type;
7272
#define NOT_WORD_BOUNDARY 6
7373
#define BEGINNING_OF_BUFFER 7
7474
#define END_OF_BUFFER 8
75-
75+
#define STRING 9
7676

7777
static PcreObject *
7878
newPcreObject(arg)
@@ -191,49 +191,20 @@ PyPcre_compile(self, args)
191191
{
192192
PcreObject *rv;
193193
PyObject *dictionary;
194-
char *pattern, *newpattern;
194+
char *pattern;
195195
const char *error;
196196
int num_zeros, i, j;
197197

198-
int patternlen, options, erroroffset;
199-
if (!PyArg_ParseTuple(args, "s#iO!", &pattern, &patternlen, &options,
198+
int options, erroroffset;
199+
if (!PyArg_ParseTuple(args, "siO!", &pattern, &options,
200200
&PyDict_Type, &dictionary))
201201
return NULL;
202202
rv = newPcreObject(args);
203203
if ( rv == NULL )
204204
return NULL;
205205

206-
/* PCRE doesn't like having null bytes in its pattern, so we have to replace
207-
any zeros in the string with the characters '\000'. This increases the size
208-
of the string by 3*num_zeros, plus 1 byte for the terminating \0. */
209-
num_zeros=1; /* Start at 1; this will give 3 extra bytes of leeway */
210-
for(i=0; i<patternlen; i++) {
211-
if (pattern[i]==0) num_zeros++;
212-
}
213-
newpattern=malloc(patternlen + num_zeros*3 + 4);
214-
if (newpattern==NULL) {
215-
PyErr_SetString(PyExc_MemoryError, "can't allocate memory for new pattern");
216-
return NULL;
217-
}
218-
for (i=j=0; i<patternlen; i++, j++)
219-
{
220-
if (pattern[i]!=0) newpattern[j]=pattern[i];
221-
else {
222-
newpattern[j++] ='\\';
223-
newpattern[j++] = '0';
224-
newpattern[j++] = '0';
225-
newpattern[j ] = '0';
226-
}
227-
}
228-
/* Keep purify happy; for pcre, one null byte is enough! */
229-
newpattern[j++]='\0';
230-
newpattern[j++]='\0';
231-
newpattern[j++]='\0';
232-
newpattern[j]='\0';
233-
234-
rv->regex = pcre_compile((char*)newpattern, options,
206+
rv->regex = pcre_compile((char*)pattern, options,
235207
&error, &erroroffset, dictionary);
236-
free(newpattern);
237208
if (rv->regex==NULL)
238209
{
239210
PyMem_DEL(rv);
@@ -312,6 +283,10 @@ PyPcre_expand_escape(pattern, pattern_len, indexptr, typeptr)
312283
*indexptr=index;
313284
return Py_BuildValue("c", (char)8);
314285
break;
286+
case('\\'):
287+
*indexptr=index;
288+
return Py_BuildValue("c", '\\');
289+
break;
315290

316291
case('x'):
317292
{
@@ -348,6 +323,8 @@ PyPcre_expand_escape(pattern, pattern_len, indexptr, typeptr)
348323
case('g'):
349324
{
350325
int end, i;
326+
int group_num = 0, is_number=0;
327+
351328
if (pattern_len<=index)
352329
{
353330
PyErr_SetString(ErrorObject, "unfinished symbolic reference");
@@ -374,16 +351,22 @@ PyPcre_expand_escape(pattern, pattern_len, indexptr, typeptr)
374351
PyErr_SetString(ErrorObject, "zero-length symbolic reference");
375352
return NULL;
376353
}
377-
if (!(pcre_ctypes[pattern[index]] & ctype_word) /* First char. not alphanumeric */
378-
|| (pcre_ctypes[pattern[index]] & ctype_digit) ) /* First char. a digit */
354+
if ((pcre_ctypes[pattern[index]] & ctype_digit)) /* First char. a digit */
379355
{
380-
/* XXX should include the text of the reference */
381-
PyErr_SetString(ErrorObject, "first character of symbolic reference not a letter or _");
382-
return NULL;
356+
is_number = 1;
357+
group_num = pattern[index] - '0';
383358
}
384359

385360
for(i=index+1; i<end; i++)
386361
{
362+
if (is_number &&
363+
!(pcre_ctypes[pattern[i]] & ctype_digit) )
364+
{
365+
/* XXX should include the text of the reference */
366+
PyErr_SetString(ErrorObject, "illegal non-digit character in \\g<...> starting with digit");
367+
return NULL;
368+
}
369+
else {group_num = group_num * 10 + pattern[i] - '0';}
387370
if (!(pcre_ctypes[pattern[i]] & ctype_word) )
388371
{
389372
/* XXX should include the text of the reference */
@@ -394,6 +377,9 @@ PyPcre_expand_escape(pattern, pattern_len, indexptr, typeptr)
394377

395378
*typeptr = MEMORY_REFERENCE;
396379
*indexptr = end+1;
380+
/* If it's a number, return the integer value of the group */
381+
if (is_number) return Py_BuildValue("i", group_num);
382+
/* Otherwise, return a string containing the group name */
397383
return Py_BuildValue("s#", pattern+index, end-index);
398384
}
399385
break;
@@ -478,8 +464,11 @@ PyPcre_expand_escape(pattern, pattern_len, indexptr, typeptr)
478464
break;
479465

480466
default:
467+
/* It's some unknown escape like \s, so return a string containing
468+
\s */
469+
*typeptr = STRING;
481470
*indexptr = index;
482-
return Py_BuildValue("c", c);
471+
return Py_BuildValue("s#", pattern+index-2, 2);
483472
break;
484473
}
485474
}
@@ -571,6 +560,12 @@ PyPcre_expand(self, args)
571560
Py_DECREF(result);
572561
}
573562
break;
563+
case(STRING):
564+
{
565+
PyList_Append(results, value);
566+
total_len += PyString_Size(value);
567+
break;
568+
}
574569
default:
575570
Py_DECREF(results);
576571
PyErr_SetString(ErrorObject,

0 commit comments

Comments
 (0)