Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit ef82d2f

Browse files
committed
Patch #923098: Share interned strings in marshal.
1 parent 8d97e33 commit ef82d2f

6 files changed

Lines changed: 122 additions & 31 deletions

File tree

Doc/api/utilities.tex

Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -283,20 +283,31 @@ \section{Data marshalling support \label{marshalling-utils}}
283283

284284
Numeric values are stored with the least significant byte first.
285285

286-
\begin{cfuncdesc}{void}{PyMarshal_WriteLongToFile}{long value, FILE *file}
286+
The module supports two versions of the data format: version 0 is the
287+
historical version, version 1 (new in Python 2.4) shares interned
288+
strings in the file, and upon unmarshalling. \var{Py_MARSHAL_VERSION}
289+
indicates the current file format (currently 1).
290+
291+
\begin{cfuncdesc}{void}{PyMarshal_WriteLongToFile}{long value, FILE *file, int version}
287292
Marshal a \ctype{long} integer, \var{value}, to \var{file}. This
288293
will only write the least-significant 32 bits of \var{value};
289294
regardless of the size of the native \ctype{long} type.
295+
296+
\versionchanged[\var{version} indicates the file format]{2.4}
290297
\end{cfuncdesc}
291298

292299
\begin{cfuncdesc}{void}{PyMarshal_WriteObjectToFile}{PyObject *value,
293-
FILE *file}
300+
FILE *file, int version}
294301
Marshal a Python object, \var{value}, to \var{file}.
302+
303+
\versionchanged[\var{version} indicates the file format]{2.4}
295304
\end{cfuncdesc}
296305

297-
\begin{cfuncdesc}{PyObject*}{PyMarshal_WriteObjectToString}{PyObject *value}
306+
\begin{cfuncdesc}{PyObject*}{PyMarshal_WriteObjectToString}{PyObject *value, int version}
298307
Return a string object containing the marshalled representation of
299308
\var{value}.
309+
310+
\versionchanged[\var{version} indicates the file format]{2.4}
300311
\end{cfuncdesc}
301312

302313
The following functions allow marshalled values to be read back in.

Doc/lib/libmarshal.tex

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,9 @@ \section{\module{marshal} ---
7373
a \exception{ValueError} exception is raised --- but garbage data
7474
will also be written to the file. The object will not be properly
7575
read back by \function{load()}.
76+
77+
\versionadded[The \var{version} argument indicates the data
78+
format that \code{dumps} should use.]{2.4}
7679
\end{funcdesc}
7780

7881
\begin{funcdesc}{load}{file}
@@ -86,15 +89,28 @@ \section{\module{marshal} ---
8689
\code{None} for the unmarshallable type.}
8790
\end{funcdesc}
8891

89-
\begin{funcdesc}{dumps}{value}
92+
\begin{funcdesc}{dumps}{value\optional{, version}}
9093
Return the string that would be written to a file by
9194
\code{dump(\var{value}, \var{file})}. The value must be a supported
9295
type. Raise a \exception{ValueError} exception if value has (or
9396
contains an object that has) an unsupported type.
97+
98+
\versionadded[The \var{version} argument indicates the data
99+
format that \code{dumps} should use.]{2.4}
94100
\end{funcdesc}
95101

96102
\begin{funcdesc}{loads}{string}
97103
Convert the string to a value. If no valid value is found, raise
98104
\exception{EOFError}, \exception{ValueError} or
99105
\exception{TypeError}. Extra characters in the string are ignored.
100106
\end{funcdesc}
107+
108+
In addition, the following constants are defined:
109+
110+
\begin{datadesc}{version}
111+
Indicates the format that the module uses. Version 0 is the
112+
historical format, version 1 (added in Python 2.4) shares
113+
interned strings. The current version is 1.
114+
115+
\versionadded{2.4}
116+
\end{datadesc}

Include/marshal.h

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7,9 +7,11 @@
77
extern "C" {
88
#endif
99

10-
PyAPI_FUNC(void) PyMarshal_WriteLongToFile(long, FILE *);
11-
PyAPI_FUNC(void) PyMarshal_WriteObjectToFile(PyObject *, FILE *);
12-
PyAPI_FUNC(PyObject *) PyMarshal_WriteObjectToString(PyObject *);
10+
#define Py_MARSHAL_VERSION 1
11+
12+
PyAPI_FUNC(void) PyMarshal_WriteLongToFile(long, FILE *, int);
13+
PyAPI_FUNC(void) PyMarshal_WriteObjectToFile(PyObject *, FILE *, int);
14+
PyAPI_FUNC(PyObject *) PyMarshal_WriteObjectToString(PyObject *, int);
1315

1416
PyAPI_FUNC(long) PyMarshal_ReadLongFromFile(FILE *);
1517
PyAPI_FUNC(int) PyMarshal_ReadShortFromFile(FILE *);

Misc/NEWS

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,9 @@ What's New in Python 2.4 alpha 1?
1212
Core and builtins
1313
-----------------
1414

15+
- marshal now shares interned strings. This change introduces
16+
a new .pyc magic.
17+
1518
- Bug #966623. classes created with type() in an exec(, {}) don't
1619
have a __module__, but code in typeobject assumed it would always
1720
be there.

Python/import.c

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -26,9 +26,9 @@ extern time_t PyOS_GetLastModificationTime(char *, FILE *);
2626
a .pyc file in text mode the magic number will be wrong; also, the
2727
Apple MPW compiler swaps their values, botching string constants.
2828
29-
Apparently, there was a distinction made between even and odd
30-
bytecodes that is related to Unicode. The details aren't clear,
31-
but the magic number has been odd for a long time.
29+
The magic numbers must be spaced apart atleast 2 values, as the
30+
-U interpeter flag will cause MAGIC+1 being used. They have been
31+
odd numbers for some time now.
3232
3333
There were a variety of old schemes for setting the magic number.
3434
The current working scheme is to increment the previous value by
@@ -47,9 +47,9 @@ extern time_t PyOS_GetLastModificationTime(char *, FILE *);
4747
Python 2.3a0: 62011
4848
Python 2.3a0: 62021
4949
Python 2.3a0: 62011 (!)
50-
Python 2.4a0: 62031
50+
Python 2.4a0: 62041
5151
*/
52-
#define MAGIC (62031 | ((long)'\r'<<16) | ((long)'\n'<<24))
52+
#define MAGIC (62041 | ((long)'\r'<<16) | ((long)'\n'<<24))
5353

5454
/* Magic word as global; note that _PyImport_Init() can change the
5555
value of this global to accommodate for alterations of how the
@@ -797,10 +797,10 @@ write_compiled_module(PyCodeObject *co, char *cpathname, long mtime)
797797
"# can't create %s\n", cpathname);
798798
return;
799799
}
800-
PyMarshal_WriteLongToFile(pyc_magic, fp);
800+
PyMarshal_WriteLongToFile(pyc_magic, fp, Py_MARSHAL_VERSION);
801801
/* First write a 0 for mtime */
802-
PyMarshal_WriteLongToFile(0L, fp);
803-
PyMarshal_WriteObjectToFile((PyObject *)co, fp);
802+
PyMarshal_WriteLongToFile(0L, fp, Py_MARSHAL_VERSION);
803+
PyMarshal_WriteObjectToFile((PyObject *)co, fp, Py_MARSHAL_VERSION);
804804
if (fflush(fp) != 0 || ferror(fp)) {
805805
if (Py_VerboseFlag)
806806
PySys_WriteStderr("# can't write %s\n", cpathname);
@@ -811,7 +811,7 @@ write_compiled_module(PyCodeObject *co, char *cpathname, long mtime)
811811
}
812812
/* Now write the true mtime */
813813
fseek(fp, 4L, 0);
814-
PyMarshal_WriteLongToFile(mtime, fp);
814+
PyMarshal_WriteLongToFile(mtime, fp, Py_MARSHAL_VERSION);
815815
fflush(fp);
816816
fclose(fp);
817817
if (Py_VerboseFlag)

Python/marshal.c

Lines changed: 74 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,8 @@
2727
#define TYPE_COMPLEX 'x'
2828
#define TYPE_LONG 'l'
2929
#define TYPE_STRING 's'
30+
#define TYPE_INTERNED 't'
31+
#define TYPE_STRINGREF 'R'
3032
#define TYPE_TUPLE '('
3133
#define TYPE_LIST '['
3234
#define TYPE_DICT '{'
@@ -42,6 +44,7 @@ typedef struct {
4244
PyObject *str;
4345
char *ptr;
4446
char *end;
47+
PyObject *strings; /* dict on marshal, list on unmarshal */
4548
} WFILE;
4649

4750
#define w_byte(c, p) if (((p)->fp)) putc((c), (p)->fp); \
@@ -189,7 +192,24 @@ w_object(PyObject *v, WFILE *p)
189192
}
190193
#endif
191194
else if (PyString_Check(v)) {
192-
w_byte(TYPE_STRING, p);
195+
if (p->strings && PyString_CHECK_INTERNED(v)) {
196+
PyObject *o = PyDict_GetItem(p->strings, v);
197+
if (o) {
198+
long w = PyInt_AsLong(o);
199+
w_byte(TYPE_STRINGREF, p);
200+
w_long(w, p);
201+
goto exit;
202+
}
203+
else {
204+
o = PyInt_FromLong(PyDict_Size(p->strings));
205+
PyDict_SetItem(p->strings, v, o);
206+
Py_DECREF(o);
207+
w_byte(TYPE_INTERNED, p);
208+
}
209+
}
210+
else {
211+
w_byte(TYPE_STRING, p);
212+
}
193213
n = PyString_GET_SIZE(v);
194214
w_long((long)n, p);
195215
w_string(PyString_AS_STRING(v), n, p);
@@ -269,28 +289,32 @@ w_object(PyObject *v, WFILE *p)
269289
w_byte(TYPE_UNKNOWN, p);
270290
p->error = 1;
271291
}
272-
292+
exit:
273293
p->depth--;
274294
}
275295

296+
/* version currently has no effect for writing longs. */
276297
void
277-
PyMarshal_WriteLongToFile(long x, FILE *fp)
298+
PyMarshal_WriteLongToFile(long x, FILE *fp, int version)
278299
{
279300
WFILE wf;
280301
wf.fp = fp;
281302
wf.error = 0;
282303
wf.depth = 0;
304+
wf.strings = NULL;
283305
w_long(x, &wf);
284306
}
285307

286308
void
287-
PyMarshal_WriteObjectToFile(PyObject *x, FILE *fp)
309+
PyMarshal_WriteObjectToFile(PyObject *x, FILE *fp, int version)
288310
{
289311
WFILE wf;
290312
wf.fp = fp;
291313
wf.error = 0;
292314
wf.depth = 0;
315+
wf.strings = (version > 0) ? PyDict_New() : NULL;
293316
w_object(x, &wf);
317+
Py_XDECREF(wf.strings);
294318
}
295319

296320
typedef WFILE RFILE; /* Same struct with different invariants */
@@ -491,6 +515,7 @@ r_object(RFILE *p)
491515
}
492516
#endif
493517

518+
case TYPE_INTERNED:
494519
case TYPE_STRING:
495520
n = r_long(p);
496521
if (n < 0) {
@@ -506,6 +531,16 @@ r_object(RFILE *p)
506531
"EOF read where object expected");
507532
}
508533
}
534+
if (type == TYPE_INTERNED) {
535+
PyString_InternInPlace(&v);
536+
PyList_Append(p->strings, v);
537+
}
538+
return v;
539+
540+
case TYPE_STRINGREF:
541+
n = r_long(p);
542+
v = PyList_GET_ITEM(p->strings, n);
543+
Py_INCREF(v);
509544
return v;
510545

511546
#ifdef Py_USING_UNICODE
@@ -673,6 +708,7 @@ PyMarshal_ReadShortFromFile(FILE *fp)
673708
{
674709
RFILE rf;
675710
rf.fp = fp;
711+
rf.strings = NULL;
676712
return r_short(&rf);
677713
}
678714

@@ -681,6 +717,7 @@ PyMarshal_ReadLongFromFile(FILE *fp)
681717
{
682718
RFILE rf;
683719
rf.fp = fp;
720+
rf.strings = NULL;
684721
return r_long(&rf);
685722
}
686723

@@ -747,22 +784,30 @@ PyObject *
747784
PyMarshal_ReadObjectFromFile(FILE *fp)
748785
{
749786
RFILE rf;
787+
PyObject *result;
750788
rf.fp = fp;
751-
return read_object(&rf);
789+
rf.strings = PyList_New(0);
790+
result = r_object(&rf);
791+
Py_DECREF(rf.strings);
792+
return result;
752793
}
753794

754795
PyObject *
755796
PyMarshal_ReadObjectFromString(char *str, int len)
756797
{
757798
RFILE rf;
799+
PyObject *result;
758800
rf.fp = NULL;
759801
rf.ptr = str;
760802
rf.end = str + len;
761-
return read_object(&rf);
803+
rf.strings = PyList_New(0);
804+
result = r_object(&rf);
805+
Py_DECREF(rf.strings);
806+
return result;
762807
}
763808

764809
PyObject *
765-
PyMarshal_WriteObjectToString(PyObject *x) /* wrs_object() */
810+
PyMarshal_WriteObjectToString(PyObject *x, int version)
766811
{
767812
WFILE wf;
768813
wf.fp = NULL;
@@ -773,7 +818,9 @@ PyMarshal_WriteObjectToString(PyObject *x) /* wrs_object() */
773818
wf.end = wf.ptr + PyString_Size(wf.str);
774819
wf.error = 0;
775820
wf.depth = 0;
821+
wf.strings = (version > 0) ? PyDict_New() : NULL;
776822
w_object(x, &wf);
823+
Py_XDECREF(wf.strings);
777824
if (wf.str != NULL)
778825
_PyString_Resize(&wf.str,
779826
(int) (wf.ptr -
@@ -796,7 +843,8 @@ marshal_dump(PyObject *self, PyObject *args)
796843
WFILE wf;
797844
PyObject *x;
798845
PyObject *f;
799-
if (!PyArg_ParseTuple(args, "OO:dump", &x, &f))
846+
int version = Py_MARSHAL_VERSION;
847+
if (!PyArg_ParseTuple(args, "OO|i:dump", &x, &f, &version))
800848
return NULL;
801849
if (!PyFile_Check(f)) {
802850
PyErr_SetString(PyExc_TypeError,
@@ -808,7 +856,9 @@ marshal_dump(PyObject *self, PyObject *args)
808856
wf.ptr = wf.end = NULL;
809857
wf.error = 0;
810858
wf.depth = 0;
859+
wf.strings = (version > 0) ? PyDict_New() : 0;
811860
w_object(x, &wf);
861+
Py_XDECREF(wf.strings);
812862
if (wf.error) {
813863
PyErr_SetString(PyExc_ValueError,
814864
(wf.error==1)?"unmarshallable object"
@@ -823,7 +873,7 @@ static PyObject *
823873
marshal_load(PyObject *self, PyObject *args)
824874
{
825875
RFILE rf;
826-
PyObject *f;
876+
PyObject *f, *result;
827877
if (!PyArg_ParseTuple(args, "O:load", &f))
828878
return NULL;
829879
if (!PyFile_Check(f)) {
@@ -832,16 +882,20 @@ marshal_load(PyObject *self, PyObject *args)
832882
return NULL;
833883
}
834884
rf.fp = PyFile_AsFile(f);
835-
return read_object(&rf);
885+
rf.strings = PyList_New(0);
886+
result = read_object(&rf);
887+
Py_DECREF(rf.strings);
888+
return result;
836889
}
837890

838891
static PyObject *
839892
marshal_dumps(PyObject *self, PyObject *args)
840893
{
841894
PyObject *x;
842-
if (!PyArg_ParseTuple(args, "O:dumps", &x))
895+
int version = Py_MARSHAL_VERSION;
896+
if (!PyArg_ParseTuple(args, "O|i:dumps", &x, version))
843897
return NULL;
844-
return PyMarshal_WriteObjectToString(x);
898+
return PyMarshal_WriteObjectToString(x, version);
845899
}
846900

847901
static PyObject *
@@ -850,12 +904,16 @@ marshal_loads(PyObject *self, PyObject *args)
850904
RFILE rf;
851905
char *s;
852906
int n;
853-
if (!PyArg_ParseTuple(args, "s#:loads", &s, &n))
907+
PyObject* result;
908+
if (!PyArg_ParseTuple(args, "s#|i:loads", &s, &n))
854909
return NULL;
855910
rf.fp = NULL;
856911
rf.ptr = s;
857912
rf.end = s + n;
858-
return read_object(&rf);
913+
rf.strings = PyList_New(0);
914+
result = read_object(&rf);
915+
Py_DECREF(rf.strings);
916+
return result;
859917
}
860918

861919
static PyMethodDef marshal_methods[] = {
@@ -869,5 +927,6 @@ static PyMethodDef marshal_methods[] = {
869927
PyMODINIT_FUNC
870928
PyMarshal_Init(void)
871929
{
872-
(void) Py_InitModule("marshal", marshal_methods);
930+
PyObject *mod = Py_InitModule("marshal", marshal_methods);
931+
PyModule_AddIntConstant(mod, "version", Py_MARSHAL_VERSION);
873932
}

0 commit comments

Comments
 (0)