Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 74fb303

Browse files
committed
Jeffrey's latests
1 parent 9e18ec7 commit 74fb303

3 files changed

Lines changed: 165 additions & 32 deletions

File tree

Modules/regexpr.c

Lines changed: 28 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,3 @@
1-
/*
2-
* -*- mode: c-mode; c-file-style: python -*-
3-
*/
4-
51
/* regexpr.c
62
*
73
* Author: Tatu Ylonen <[email protected]>
@@ -472,16 +468,15 @@ static int regexp_ansi_sequences;
472468
#define MAX_NESTING 100 /* max nesting level of operators */
473469

474470
#define SYNTAX(ch) re_syntax_table[(unsigned char)(ch)]
475-
#define Sword 1
476471

477-
static char re_syntax_table[256];
472+
char re_syntax_table[256];
478473

479-
static void re_compile_initialize(void)
474+
void re_compile_initialize(void)
480475
{
481476
int a;
482477

483478
static int syntax_table_inited = 0;
484-
479+
485480
if (!syntax_table_inited)
486481
{
487482
syntax_table_inited = 1;
@@ -491,7 +486,11 @@ static void re_compile_initialize(void)
491486
for (a = 'A'; a <= 'Z'; a++)
492487
re_syntax_table[a] = Sword;
493488
for (a = '0'; a <= '9'; a++)
494-
re_syntax_table[a] = Sword;
489+
re_syntax_table[a] = Sword | Sdigit;
490+
re_syntax_table['_'] = Sword;
491+
for (a = 9; a <= 13; a++)
492+
re_syntax_table[a] = Swhitespace;
493+
re_syntax_table[' '] = Swhitespace;
495494
}
496495
re_compile_initialized = 1;
497496
for (a = 0; a < 256; a++)
@@ -602,13 +601,12 @@ static void re_compile_fastmap_aux(char *code,
602601
return; /* we have already been here */
603602
visited[pos] = 1;
604603
for (;;)
605-
switch (code[pos++])
606-
{
604+
switch (code[pos++]) {
607605
case Cend:
608-
{
609-
*can_be_null = 1;
610-
return;
611-
}
606+
{
607+
*can_be_null = 1;
608+
return;
609+
}
612610
case Cbol:
613611
case Cbegbuf:
614612
case Cendbuf:
@@ -1609,9 +1607,6 @@ int re_match(regexp_t bufp,
16091607

16101608
NEW_STATE(state, bufp->num_registers);
16111609

1612-
if (!re_compile_initialized)
1613-
re_compile_initialize();
1614-
16151610
continue_matching:
16161611
switch (*code++)
16171612
{
@@ -1883,23 +1878,23 @@ int re_match(regexp_t bufp,
18831878
{
18841879
if (text == textend)
18851880
goto fail;
1886-
if (SYNTAX(*text) != Sword)
1881+
if (SYNTAX(*text) & Sword)
18871882
goto fail;
18881883
if (text == textstart)
18891884
goto continue_matching;
1890-
if (SYNTAX(text[-1]) != Sword)
1885+
if (!(SYNTAX(text[-1]) & Sword))
18911886
goto continue_matching;
18921887
goto fail;
18931888
}
18941889
case Cwordend:
18951890
{
18961891
if (text == textstart)
18971892
goto fail;
1898-
if (SYNTAX(text[-1]) != Sword)
1893+
if (!(SYNTAX(text[-1]) & Sword))
18991894
goto fail;
19001895
if (text == textend)
19011896
goto continue_matching;
1902-
if (SYNTAX(*text) == Sword)
1897+
if (SYNTAX(*text) & Sword)
19031898
goto fail;
19041899
goto continue_matching;
19051900
}
@@ -1910,7 +1905,7 @@ int re_match(regexp_t bufp,
19101905

19111906
if (text == textstart || text == textend)
19121907
goto continue_matching;
1913-
if ((SYNTAX(text[-1]) == Sword) ^ (SYNTAX(*text) == Sword))
1908+
if ((SYNTAX(text[-1]) & Sword) ^ (SYNTAX(*text) & Sword))
19141909
goto continue_matching;
19151910
goto fail;
19161911
}
@@ -1920,21 +1915,21 @@ int re_match(regexp_t bufp,
19201915
* beginning and end of buffer. */
19211916
if (text == textstart || text == textend)
19221917
goto fail;
1923-
if (!((SYNTAX(text[-1]) == Sword) ^ (SYNTAX(*text) == Sword)))
1918+
if (!((SYNTAX(text[-1]) & Sword) ^ (SYNTAX(*text) & Sword)))
19241919
goto fail;
19251920
goto continue_matching;
19261921
}
19271922
case Csyntaxspec:
19281923
{
19291924
NEXTCHAR(ch);
1930-
if (SYNTAX(ch) != (unsigned char)*code++)
1925+
if (!(SYNTAX(ch) & (unsigned char)*code++))
19311926
goto fail;
19321927
goto continue_matching;
19331928
}
19341929
case Cnotsyntaxspec:
19351930
{
19361931
NEXTCHAR(ch);
1937-
if (SYNTAX(ch) != (unsigned char)*code++)
1932+
if (SYNTAX(ch) & (unsigned char)*code++)
19381933
break;
19391934
goto continue_matching;
19401935
}
@@ -2067,3 +2062,10 @@ int re_search(regexp_t bufp,
20672062
}
20682063
return -1;
20692064
}
2065+
2066+
/*
2067+
** Local Variables:
2068+
** mode: c
2069+
** c-file-style: "python"
2070+
** End:
2071+
*/

Modules/regexpr.h

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -67,10 +67,16 @@ typedef struct re_registers
6767
#define RE_SYNTAX_GREP (RE_BK_PLUS_QM|RE_NEWLINE_OR)
6868
#define RE_SYNTAX_EMACS 0
6969

70+
#define Sword 1
71+
#define Swhitespace 2
72+
#define Sdigit 4
73+
7074
/* Rename all exported symbols to avoid conflicts with similarly named
7175
symbols in some systems' standard C libraries... */
7276

7377
#define re_syntax _Py_re_syntax
78+
#define re_syntax_table _Py_re_syntax_table
79+
#define re_compile_initialize _Py_re_compile_initialize
7480
#define re_set_syntax _Py_re_set_syntax
7581
#define re_compile_pattern _Py_re_compile_pattern
7682
#define re_match _Py_re_match
@@ -85,6 +91,10 @@ extern int re_syntax;
8591
/* This is the actual syntax mask. It was added so that Python could do
8692
* syntax-dependent munging of patterns before compilation. */
8793

94+
extern char re_syntax_table[256];
95+
96+
void re_compile_initialize(void);
97+
8898
int re_set_syntax(int syntax);
8999
/* This sets the syntax to use and returns the previous syntax. The
90100
* syntax is specified by a bit mask of the above defined bits. */
@@ -133,6 +143,8 @@ int re_exec(char *s);
133143
#else /* HAVE_PROTOTYPES */
134144

135145
extern int re_syntax;
146+
extern char re_syntax_table[256];
147+
void re_compile_initialize();
136148
int re_set_syntax();
137149
char *re_compile_pattern();
138150
int re_match();

Modules/reopmodule.c

Lines changed: 125 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,13 @@ PERFORMANCE OF THIS SOFTWARE.
4343

4444
static PyObject *ReopError; /* Exception */
4545

46+
#define IGNORECASE 0x01
47+
#define MULTILINE 0x02
48+
#define DOTALL 0x04
49+
#define VERBOSE 0x08
50+
51+
static char *reop_casefold;
52+
4653
static PyObject *
4754
makeresult(regs, num_regs)
4855
struct re_registers *regs;
@@ -90,6 +97,10 @@ reop_match(self, args)
9097
int flags, pos, result;
9198
struct re_pattern_buffer bufp;
9299
struct re_registers re_regs;
100+
PyObject *modules = NULL;
101+
PyObject *reopmodule = NULL;
102+
PyObject *reopdict = NULL;
103+
PyObject *casefold = NULL;
93104

94105
if (!PyArg_Parse(args, "(s#iiis#is#i)",
95106
&(bufp.buffer), &(bufp.allocated),
@@ -102,20 +113,44 @@ reop_match(self, args)
102113

103114
/* XXX sanity-check the input data */
104115
bufp.used=bufp.allocated;
105-
bufp.translate=NULL;
116+
if (flags & IGNORECASE)
117+
{
118+
if ((modules = PyImport_GetModuleDict()) == NULL)
119+
return NULL;
120+
121+
if ((reopmodule = PyDict_GetItemString(modules,
122+
"reop")) == NULL)
123+
return NULL;
124+
125+
if ((reopdict = PyModule_GetDict(reopmodule)) == NULL)
126+
return NULL;
127+
128+
if ((casefold = PyDict_GetItemString(reopdict,
129+
"casefold")) == NULL)
130+
return NULL;
131+
132+
bufp.translate = PyString_AsString(casefold);
133+
}
134+
else
135+
bufp.translate=NULL;
106136
bufp.fastmap_accurate=1;
107137
bufp.can_be_null=can_be_null;
108138
bufp.uses_registers=1;
109139
bufp.anchor=anchor;
110140

111-
for(i=0; i<bufp.num_registers; i++) {re_regs.start[i]=-1; re_regs.end[i]=-1;}
141+
for(i=0; i<bufp.num_registers; i++) {
142+
re_regs.start[i]=-1;
143+
re_regs.end[i]=-1;
144+
}
112145

113146
result = re_match(&bufp,
114147
string, stringlen, pos,
115148
&re_regs);
149+
116150
if (result < -1) {
117151
/* Failure like stack overflow */
118152
PyErr_SetString(ReopError, "match failure");
153+
119154
return NULL;
120155
}
121156
if (result == -1) {
@@ -136,6 +171,10 @@ reop_search(self, args)
136171
int flags, pos, result;
137172
struct re_pattern_buffer bufp;
138173
struct re_registers re_regs;
174+
PyObject *modules = NULL;
175+
PyObject *reopmodule = NULL;
176+
PyObject *reopdict = NULL;
177+
PyObject *casefold = NULL;
139178

140179
if (!PyArg_Parse(args, "(s#iiis#is#i)",
141180
&(bufp.buffer), &(bufp.allocated),
@@ -148,26 +187,51 @@ reop_search(self, args)
148187

149188
/* XXX sanity-check the input data */
150189
bufp.used=bufp.allocated;
151-
bufp.translate=NULL;
190+
if (flags & IGNORECASE)
191+
{
192+
if ((modules = PyImport_GetModuleDict()) == NULL)
193+
return NULL;
194+
195+
if ((reopmodule = PyDict_GetItemString(modules,
196+
"reop")) == NULL)
197+
return NULL;
198+
199+
if ((reopdict = PyModule_GetDict(reopmodule)) == NULL)
200+
return NULL;
201+
202+
if ((casefold = PyDict_GetItemString(reopdict,
203+
"casefold")) == NULL)
204+
return NULL;
205+
206+
bufp.translate = PyString_AsString(casefold);
207+
}
208+
else
209+
bufp.translate=NULL;
152210
bufp.fastmap_accurate=1;
153211
bufp.can_be_null=can_be_null;
154212
bufp.uses_registers=1;
155213
bufp.anchor=anchor;
156214

157-
for(i=0; i<bufp.num_registers; i++) {re_regs.start[i]=-1; re_regs.end[i]=-1;}
215+
for(i = 0; i < bufp.num_registers; i++) {
216+
re_regs.start[i] = -1;
217+
re_regs.end[i] = -1;
218+
}
158219

159220
result = re_search(&bufp,
160221
string, stringlen, pos, stringlen-pos,
161222
&re_regs);
223+
162224
if (result < -1) {
163225
/* Failure like stack overflow */
164226
PyErr_SetString(ReopError, "match failure");
165227
return NULL;
166228
}
229+
167230
if (result == -1) {
168231
Py_INCREF(Py_None);
169232
return Py_None;
170233
}
234+
171235
return makeresult(&re_regs, bufp.num_registers);
172236
}
173237

@@ -345,10 +409,13 @@ static struct PyMethodDef reop_global_methods[] = {
345409
void
346410
initreop()
347411
{
348-
PyObject *m, *d, *v;
412+
PyObject *m, *d, *k, *v, *o;
349413
int i;
350414
char *s;
351-
415+
char j[2];
416+
417+
re_compile_initialize();
418+
352419
m = Py_InitModule("reop", reop_global_methods);
353420
d = PyModule_GetDict(m);
354421

@@ -370,12 +437,64 @@ initreop()
370437
else
371438
s[i] = i;
372439
}
440+
373441
if (PyDict_SetItemString(d, "casefold", v) < 0)
374442
goto finally;
375443
Py_DECREF(v);
376444

445+
/* Initialize the syntax table */
446+
447+
o = PyDict_New();
448+
if (o == NULL)
449+
goto finally;
450+
451+
j[1] = '\0';
452+
for (i = 0; i < 256; i++)
453+
{
454+
j[0] = i;
455+
k = PyString_FromStringAndSize(j, 1);
456+
if (k == NULL)
457+
goto finally;
458+
v = PyInt_FromLong(re_syntax_table[i]);
459+
if (v == NULL)
460+
goto finally;
461+
if (PyDict_SetItem(o, k, v) < 0)
462+
goto finally;
463+
Py_DECREF(k);
464+
Py_DECREF(v);
465+
}
466+
467+
if (PyDict_SetItemString(d, "syntax_table", o) < 0)
468+
goto finally;
469+
Py_DECREF(o);
470+
471+
v = PyInt_FromLong(Sword);
472+
if (v == NULL)
473+
goto finally;
474+
475+
if (PyDict_SetItemString(d, "word", v) < 0)
476+
goto finally;
477+
Py_DECREF(v);
478+
479+
v = PyInt_FromLong(Swhitespace);
480+
if (v == NULL)
481+
goto finally;
482+
483+
if (PyDict_SetItemString(d, "whitespace", v) < 0)
484+
goto finally;
485+
Py_DECREF(v);
486+
487+
v = PyInt_FromLong(Sdigit);
488+
if (v == NULL)
489+
goto finally;
490+
491+
if (PyDict_SetItemString(d, "digit", v) < 0)
492+
goto finally;
493+
Py_DECREF(v);
494+
377495
if (!PyErr_Occurred())
378496
return;
497+
379498
finally:
380499
Py_FatalError("can't initialize reop module");
381500
}

0 commit comments

Comments
 (0)