44 * regular expression matching engine
55 *
66 * partial history:
7- * 99-10-24 fl created (based on existing template matcher code)
8- * 00-03-06 fl first alpha, sort of (0.5)
9- * 00-06-30 fl added fast search optimization (0.9.3)
10- * 00-06-30 fl added assert (lookahead) primitives, etc (0.9.4)
11- * 00-07-02 fl added charset optimizations, etc (0.9.5)
12- * 00-07-03 fl store code in pattern object, lookbehind, etc
13- * 00-07-08 fl added regs attribute
14- * 00-07-21 fl reset lastindex in scanner methods (0.9.6)
15- * 00-08-01 fl fixes for 1.6b1 (0.9.8)
16- * 00-08-03 fl added recursion limit
17- * 00-08-07 fl use PyOS_CheckStack() if available
18- * 00-08-08 fl changed findall to return empty strings instead of None
19- * 00-08-27 fl properly propagate memory errors
20- * 00-09-02 fl return -1 instead of None for start/end/span
7+ * 1999-10-24 fl created (based on existing template matcher code)
8+ * 2000-03-06 fl first alpha, sort of (0.5)
9+ * 2000-06-30 fl added fast search optimization (0.9.3)
10+ * 2000-06-30 fl added assert (lookahead) primitives, etc (0.9.4)
11+ * 2000-07-02 fl added charset optimizations, etc (0.9.5)
12+ * 2000-07-03 fl store code in pattern object, lookbehind, etc
13+ * 2000-07-08 fl added regs attribute
14+ * 2000-07-21 fl reset lastindex in scanner methods (0.9.6)
15+ * 2000-08-01 fl fixes for 1.6b1 (0.9.8)
16+ * 2000-08-03 fl added recursion limit
17+ * 2000-08-07 fl use PyOS_CheckStack() if available
18+ * 2000-08-08 fl changed findall to return empty strings instead of None
19+ * 2000-08-27 fl properly propagate memory errors
20+ * 2000-09-02 fl return -1 instead of None for start/end/span
21+ * 2000-09-20 fl added expand method
22+ * 2000-09-21 fl don't use the buffer interface for unicode strings
2123 *
2224 * Copyright (c) 1997-2000 by Secret Labs AB. All rights reserved.
2325 *
@@ -1045,7 +1047,7 @@ SRE_SEARCH(SRE_STATE* state, SRE_CODE* pattern)
10451047 SRE_CHAR * end = state -> end ;
10461048 int status = 0 ;
10471049 int prefix_len = 0 ;
1048- int prefix_skip ;
1050+ int prefix_skip = 0 ;
10491051 SRE_CODE * prefix = NULL ;
10501052 SRE_CODE * charset = NULL ;
10511053 SRE_CODE * overlap = NULL ;
@@ -1291,6 +1293,17 @@ state_init(SRE_STATE* state, PatternObject* pattern, PyObject* string,
12911293
12921294 state -> lastindex = -1 ;
12931295
1296+ #if defined(HAVE_UNICODE )
1297+ if (PyUnicode_Check (string )) {
1298+ /* unicode strings doesn't always support the buffer interface */
1299+ ptr = (void * ) PyUnicode_AS_DATA (string );
1300+ bytes = PyUnicode_GET_DATA_SIZE (string );
1301+ size = PyUnicode_GET_SIZE (string );
1302+ state -> charsize = sizeof (Py_UNICODE );
1303+
1304+ } else {
1305+ #endif
1306+
12941307 /* get pointer to string buffer */
12951308 buffer = string -> ob_type -> tp_as_buffer ;
12961309 if (!buffer || !buffer -> bf_getreadbuffer || !buffer -> bf_getsegcount ||
@@ -1307,7 +1320,6 @@ state_init(SRE_STATE* state, PatternObject* pattern, PyObject* string,
13071320 }
13081321
13091322 /* determine character size */
1310-
13111323#if PY_VERSION_HEX >= 0x01060000
13121324 size = PyObject_Size (string );
13131325#else
@@ -1325,6 +1337,10 @@ state_init(SRE_STATE* state, PatternObject* pattern, PyObject* string,
13251337 return NULL ;
13261338 }
13271339
1340+ #if defined(HAVE_UNICODE )
1341+ }
1342+ #endif
1343+
13281344 /* adjust boundaries */
13291345 if (start < 0 )
13301346 start = 0 ;
@@ -1857,6 +1873,20 @@ match_getslice(MatchObject* self, PyObject* index, PyObject* def)
18571873 return match_getslice_by_index (self , match_getindex (self , index ), def );
18581874}
18591875
1876+ static PyObject *
1877+ match_expand (MatchObject * self , PyObject * args )
1878+ {
1879+ PyObject * template ;
1880+ if (!PyArg_ParseTuple (args , "O:expand" , & template ))
1881+ return NULL ;
1882+
1883+ /* delegate to Python code */
1884+ return call (
1885+ "_expand" ,
1886+ Py_BuildValue ("OOO" , self -> pattern , self , template )
1887+ );
1888+ }
1889+
18601890static PyObject *
18611891match_group (MatchObject * self , PyObject * args )
18621892{
@@ -2094,6 +2124,7 @@ static PyMethodDef match_methods[] = {
20942124 {"span" , (PyCFunction ) match_span , 1 },
20952125 {"groups" , (PyCFunction ) match_groups , 1 },
20962126 {"groupdict" , (PyCFunction ) match_groupdict , 1 },
2127+ {"expand" , (PyCFunction ) match_expand , 1 },
20972128 {NULL , NULL }
20982129};
20992130
0 commit comments