Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit b91a36b

Browse files
committed
Integrate the changes from PyXML's version of pyexpat.c revisions
1.47, 1.48, 1.49 (name interning support).
1 parent d805fef commit b91a36b

1 file changed

Lines changed: 109 additions & 46 deletions

File tree

Modules/pyexpat.c

Lines changed: 109 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,7 @@ typedef struct {
6060
int ordered_attributes; /* Return attributes as a list. */
6161
int specified_attributes; /* Report only specified attributes. */
6262
int in_callback; /* Is a callback active? */
63+
PyObject *intern; /* Dictionary to intern strings */
6364
PyObject **handlers;
6465
} xmlparseobject;
6566

@@ -123,7 +124,7 @@ set_error(xmlparseobject *self)
123124
Returns None if str is a null pointer. */
124125

125126
static PyObject *
126-
conv_string_to_unicode(XML_Char *str)
127+
conv_string_to_unicode(const XML_Char *str)
127128
{
128129
/* XXX currently this code assumes that XML_Char is 8-bit,
129130
and hence in UTF-8. */
@@ -132,8 +133,7 @@ conv_string_to_unicode(XML_Char *str)
132133
Py_INCREF(Py_None);
133134
return Py_None;
134135
}
135-
return PyUnicode_DecodeUTF8((const char *)str,
136-
strlen((const char *)str),
136+
return PyUnicode_DecodeUTF8(str, strlen(str),
137137
"strict");
138138
}
139139

@@ -155,7 +155,7 @@ conv_string_len_to_unicode(const XML_Char *str, int len)
155155
Returns None if str is a null pointer. */
156156

157157
static PyObject *
158-
conv_string_to_utf8(XML_Char *str)
158+
conv_string_to_utf8(const XML_Char *str)
159159
{
160160
/* XXX currently this code assumes that XML_Char is 8-bit,
161161
and hence in UTF-8. */
@@ -164,7 +164,7 @@ conv_string_to_utf8(XML_Char *str)
164164
Py_INCREF(Py_None);
165165
return Py_None;
166166
}
167-
return PyString_FromString((const char *)str);
167+
return PyString_FromString(str);
168168
}
169169

170170
static PyObject *
@@ -275,6 +275,25 @@ call_with_frame(PyCodeObject *c, PyObject* func, PyObject* args)
275275
? conv_string_to_unicode : conv_string_to_utf8)
276276
#endif
277277

278+
static PyObject*
279+
string_intern(xmlparseobject *self, const char* str)
280+
{
281+
PyObject *result = STRING_CONV_FUNC(str);
282+
PyObject *value;
283+
if (!self->intern)
284+
return result;
285+
value = PyDict_GetItem(self->intern, result);
286+
if (!value) {
287+
if (PyDict_SetItem(self->intern, result, result) == 0)
288+
return result;
289+
else
290+
return NULL;
291+
}
292+
Py_INCREF(value);
293+
Py_DECREF(result);
294+
return value;
295+
}
296+
278297
static void
279298
my_StartElementHandler(void *userData,
280299
const XML_Char *name, const XML_Char **atts)
@@ -307,7 +326,7 @@ my_StartElementHandler(void *userData,
307326
return;
308327
}
309328
for (i = 0; i < max; i += 2) {
310-
PyObject *n = STRING_CONV_FUNC((XML_Char *) atts[i]);
329+
PyObject *n = string_intern(self, (XML_Char *) atts[i]);
311330
PyObject *v;
312331
if (n == NULL) {
313332
flag_error(self);
@@ -336,7 +355,7 @@ my_StartElementHandler(void *userData,
336355
Py_DECREF(v);
337356
}
338357
}
339-
args = Py_BuildValue("(O&N)", STRING_CONV_FUNC,name, container);
358+
args = Py_BuildValue("(NN)", string_intern(self, name), container);
340359
if (args == NULL) {
341360
Py_DECREF(container);
342361
return;
@@ -394,13 +413,13 @@ my_##NAME##Handler PARAMS {\
394413

395414
VOID_HANDLER(EndElement,
396415
(void *userData, const XML_Char *name),
397-
("(O&)", STRING_CONV_FUNC, name))
416+
("(N)", string_intern(self, name)))
398417

399418
VOID_HANDLER(ProcessingInstruction,
400419
(void *userData,
401420
const XML_Char *target,
402421
const XML_Char *data),
403-
("(O&O&)",STRING_CONV_FUNC,target, STRING_CONV_FUNC,data))
422+
("(NO&)", string_intern(self, target), STRING_CONV_FUNC,data))
404423

405424
#ifndef Py_USING_UNICODE
406425
VOID_HANDLER(CharacterData,
@@ -421,10 +440,10 @@ VOID_HANDLER(UnparsedEntityDecl,
421440
const XML_Char *systemId,
422441
const XML_Char *publicId,
423442
const XML_Char *notationName),
424-
("(O&O&O&O&O&)",
425-
STRING_CONV_FUNC,entityName, STRING_CONV_FUNC,base,
426-
STRING_CONV_FUNC,systemId, STRING_CONV_FUNC,publicId,
427-
STRING_CONV_FUNC,notationName))
443+
("(NNNNN)",
444+
string_intern(self, entityName), string_intern(self, base),
445+
string_intern(self, systemId), string_intern(self, publicId),
446+
string_intern(self, notationName)))
428447

429448
#ifndef Py_USING_UNICODE
430449
VOID_HANDLER(EntityDecl,
@@ -437,11 +456,12 @@ VOID_HANDLER(EntityDecl,
437456
const XML_Char *systemId,
438457
const XML_Char *publicId,
439458
const XML_Char *notationName),
440-
("O&iNO&O&O&O&",
441-
STRING_CONV_FUNC,entityName, is_parameter_entity,
459+
("NiNNNNN",
460+
string_intern(self, entityName), is_parameter_entity,
442461
conv_string_len_to_utf8(value, value_length),
443-
STRING_CONV_FUNC,base, STRING_CONV_FUNC,systemId,
444-
STRING_CONV_FUNC,publicId, STRING_CONV_FUNC,notationName))
462+
string_intern(self, base), string_intern(self, systemId),
463+
string_intern(self, publicId),
464+
string_intern(self, notationName)))
445465
#else
446466
VOID_HANDLER(EntityDecl,
447467
(void *userData,
@@ -453,13 +473,14 @@ VOID_HANDLER(EntityDecl,
453473
const XML_Char *systemId,
454474
const XML_Char *publicId,
455475
const XML_Char *notationName),
456-
("O&iNO&O&O&O&",
457-
STRING_CONV_FUNC,entityName, is_parameter_entity,
476+
("NiNNNNN",
477+
string_intern(self, entityName), is_parameter_entity,
458478
(self->returns_unicode
459479
? conv_string_len_to_unicode(value, value_length)
460480
: conv_string_len_to_utf8(value, value_length)),
461-
STRING_CONV_FUNC,base, STRING_CONV_FUNC,systemId,
462-
STRING_CONV_FUNC,publicId, STRING_CONV_FUNC,notationName))
481+
string_intern(self, base), string_intern(self, systemId),
482+
string_intern(self, publicId),
483+
string_intern(self, notationName)))
463484
#endif
464485

465486
VOID_HANDLER(XmlDecl,
@@ -473,7 +494,7 @@ VOID_HANDLER(XmlDecl,
473494

474495
static PyObject *
475496
conv_content_model(XML_Content * const model,
476-
PyObject *(*conv_string)(XML_Char *))
497+
PyObject *(*conv_string)(const XML_Char *))
477498
{
478499
PyObject *result = NULL;
479500
PyObject *children = PyTuple_New(model->numchildren);
@@ -514,17 +535,17 @@ VOID_HANDLER(ElementDecl,
514535
(void *userData,
515536
const XML_Char *name,
516537
XML_Content *model),
517-
("O&O&",
518-
STRING_CONV_FUNC,name,
538+
("NO&",
539+
string_intern(self, name),
519540
(self->returns_unicode ? conv_content_model_unicode
520541
: conv_content_model_utf8),model))
521542
#else
522543
VOID_HANDLER(ElementDecl,
523544
(void *userData,
524545
const XML_Char *name,
525546
XML_Content *model),
526-
("O&O&",
527-
STRING_CONV_FUNC,name, conv_content_model_utf8,model))
547+
("NO&",
548+
string_intern(self, name), conv_content_model_utf8,model))
528549
#endif
529550

530551
VOID_HANDLER(AttlistDecl,
@@ -534,8 +555,8 @@ VOID_HANDLER(AttlistDecl,
534555
const XML_Char *att_type,
535556
const XML_Char *dflt,
536557
int isrequired),
537-
("(O&O&O&O&i)",
538-
STRING_CONV_FUNC,elname, STRING_CONV_FUNC,attname,
558+
("(NNO&O&i)",
559+
string_intern(self, elname), string_intern(self, attname),
539560
STRING_CONV_FUNC,att_type, STRING_CONV_FUNC,dflt,
540561
isrequired))
541562

@@ -545,24 +566,25 @@ VOID_HANDLER(NotationDecl,
545566
const XML_Char *base,
546567
const XML_Char *systemId,
547568
const XML_Char *publicId),
548-
("(O&O&O&O&)",
549-
STRING_CONV_FUNC,notationName, STRING_CONV_FUNC,base,
550-
STRING_CONV_FUNC,systemId, STRING_CONV_FUNC,publicId))
569+
("(NNNN)",
570+
string_intern(self, notationName), string_intern(self, base),
571+
string_intern(self, systemId), string_intern(self, publicId)))
551572

552573
VOID_HANDLER(StartNamespaceDecl,
553574
(void *userData,
554575
const XML_Char *prefix,
555576
const XML_Char *uri),
556-
("(O&O&)", STRING_CONV_FUNC,prefix, STRING_CONV_FUNC,uri))
577+
("(NN)",
578+
string_intern(self, prefix), string_intern(self, uri)))
557579

558580
VOID_HANDLER(EndNamespaceDecl,
559581
(void *userData,
560582
const XML_Char *prefix),
561-
("(O&)", STRING_CONV_FUNC,prefix))
583+
("(N)", string_intern(self, prefix)))
562584

563585
VOID_HANDLER(Comment,
564-
(void *userData, const XML_Char *prefix),
565-
("(O&)", STRING_CONV_FUNC,prefix))
586+
(void *userData, const XML_Char *data),
587+
("(O&)", STRING_CONV_FUNC,data))
566588

567589
VOID_HANDLER(StartCdataSection,
568590
(void *userData),
@@ -605,9 +627,9 @@ RC_HANDLER(int, ExternalEntityRef,
605627
const XML_Char *systemId,
606628
const XML_Char *publicId),
607629
int rc=0;,
608-
("(O&O&O&O&)",
609-
STRING_CONV_FUNC,context, STRING_CONV_FUNC,base,
610-
STRING_CONV_FUNC,systemId, STRING_CONV_FUNC,publicId),
630+
("(O&NNN)",
631+
STRING_CONV_FUNC,context, string_intern(self, base),
632+
string_intern(self, systemId), string_intern(self, publicId)),
611633
rc = PyInt_AsLong(rv);, rc,
612634
XML_GetUserData(parser))
613635

@@ -617,8 +639,8 @@ VOID_HANDLER(StartDoctypeDecl,
617639
(void *userData, const XML_Char *doctypeName,
618640
const XML_Char *sysid, const XML_Char *pubid,
619641
int has_internal_subset),
620-
("(O&O&O&i)", STRING_CONV_FUNC,doctypeName,
621-
STRING_CONV_FUNC,sysid, STRING_CONV_FUNC,pubid,
642+
("(NNNi)", string_intern(self, doctypeName),
643+
string_intern(self, sysid), string_intern(self, pubid),
622644
has_internal_subset))
623645

624646
VOID_HANDLER(EndDoctypeDecl, (void *userData), ("()"))
@@ -856,6 +878,8 @@ xmlparse_ExternalEntityParserCreate(xmlparseobject *self, PyObject *args)
856878
new_parser->itself = XML_ExternalEntityParserCreate(self->itself, context,
857879
encoding);
858880
new_parser->handlers = 0;
881+
new_parser->intern = self->intern;
882+
Py_XINCREF(new_parser->intern);
859883
#ifdef Py_TPFLAGS_HAVE_GC
860884
PyObject_GC_Track(new_parser);
861885
#else
@@ -988,7 +1012,7 @@ XML_Encoding * info)
9881012
#endif
9891013

9901014
static PyObject *
991-
newxmlparseobject(char *encoding, char *namespace_separator)
1015+
newxmlparseobject(char *encoding, char *namespace_separator, PyObject *intern)
9921016
{
9931017
int i;
9941018
xmlparseobject *self;
@@ -1022,6 +1046,8 @@ newxmlparseobject(char *encoding, char *namespace_separator)
10221046
else {
10231047
self->itself = XML_ParserCreate(encoding);
10241048
}
1049+
self->intern = intern;
1050+
Py_XINCREF(self->intern);
10251051
#ifdef Py_TPFLAGS_HAVE_GC
10261052
PyObject_GC_Track(self);
10271053
#else
@@ -1074,6 +1100,7 @@ xmlparse_dealloc(xmlparseobject *self)
10741100
}
10751101
free(self->handlers);
10761102
}
1103+
Py_XDECREF(self->intern);
10771104
#if PY_MAJOR_VERSION == 1 && PY_MINOR_VERSION < 6
10781105
/* Code for versions before 1.6 */
10791106
free(self);
@@ -1118,6 +1145,16 @@ xmlparse_getattr(xmlparseobject *self, char *name)
11181145
return PyInt_FromLong((long) self->returns_unicode);
11191146
if (strcmp(name, "specified_attributes") == 0)
11201147
return PyInt_FromLong((long) self->specified_attributes);
1148+
if (strcmp(name, "intern") == 0) {
1149+
if (self->intern == NULL) {
1150+
Py_INCREF(Py_None);
1151+
return Py_None;
1152+
}
1153+
else {
1154+
Py_INCREF(self->intern);
1155+
return self->intern;
1156+
}
1157+
}
11211158

11221159
handlernum = handlername2int(name);
11231160

@@ -1138,6 +1175,7 @@ xmlparse_getattr(xmlparseobject *self, char *name)
11381175
PyList_Append(rc, PyString_FromString("ordered_attributes"));
11391176
PyList_Append(rc, PyString_FromString("returns_unicode"));
11401177
PyList_Append(rc, PyString_FromString("specified_attributes"));
1178+
PyList_Append(rc, PyString_FromString("intern"));
11411179

11421180
return rc;
11431181
}
@@ -1221,6 +1259,8 @@ static int
12211259
xmlparse_clear(xmlparseobject *op)
12221260
{
12231261
clear_handlers(op, 0);
1262+
Py_XDECREF(op->intern);
1263+
op->intern = 0;
12241264
return 0;
12251265
}
12261266
#endif
@@ -1275,10 +1315,14 @@ pyexpat_ParserCreate(PyObject *notused, PyObject *args, PyObject *kw)
12751315
{
12761316
char *encoding = NULL;
12771317
char *namespace_separator = NULL;
1278-
static char *kwlist[] = {"encoding", "namespace_separator", NULL};
1279-
1280-
if (!PyArg_ParseTupleAndKeywords(args, kw, "|zz:ParserCreate", kwlist,
1281-
&encoding, &namespace_separator))
1318+
PyObject *intern = NULL;
1319+
PyObject *result;
1320+
int intern_decref = 0;
1321+
static char *kwlist[] = {"encoding", "namespace_separator",
1322+
"intern", NULL};
1323+
1324+
if (!PyArg_ParseTupleAndKeywords(args, kw, "|zzO:ParserCreate", kwlist,
1325+
&encoding, &namespace_separator, &intern))
12821326
return NULL;
12831327
if (namespace_separator != NULL
12841328
&& strlen(namespace_separator) > 1) {
@@ -1287,7 +1331,26 @@ pyexpat_ParserCreate(PyObject *notused, PyObject *args, PyObject *kw)
12871331
" character, omitted, or None");
12881332
return NULL;
12891333
}
1290-
return newxmlparseobject(encoding, namespace_separator);
1334+
/* Explicitly passing None means no interning is desired.
1335+
Not passing anything means that a new dictionary is used. */
1336+
if (intern == Py_None)
1337+
intern = NULL;
1338+
else if (intern == NULL) {
1339+
intern = PyDict_New();
1340+
if (!intern)
1341+
return NULL;
1342+
intern_decref = 1;
1343+
}
1344+
else if (!PyDict_Check(intern)) {
1345+
PyErr_SetString(PyExc_TypeError, "intern must be a dictionary");
1346+
return NULL;
1347+
}
1348+
1349+
result = newxmlparseobject(encoding, namespace_separator, intern);
1350+
if (intern_decref) {
1351+
Py_DECREF(intern);
1352+
}
1353+
return result;
12911354
}
12921355

12931356
PyDoc_STRVAR(pyexpat_ErrorString__doc__,

0 commit comments

Comments
 (0)