Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit fd8a838

Browse files
committed
Issue #14684: Add support for predefined compression dictionaries to the zlib module.
Original patch by Sam Rushing.
1 parent 50b0a36 commit fd8a838

4 files changed

Lines changed: 153 additions & 29 deletions

File tree

Doc/library/zlib.rst

Lines changed: 24 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -58,12 +58,19 @@ The available exception and functions in this module are:
5858
exception if any error occurs.
5959

6060

61-
.. function:: compressobj([level])
61+
.. function:: compressobj([level[, method[, wbits[, memlevel[, strategy[, zdict]]]]]])
6262

6363
Returns a compression object, to be used for compressing data streams that won't
64-
fit into memory at once. *level* is an integer from ``1`` to ``9`` controlling
65-
the level of compression; ``1`` is fastest and produces the least compression,
66-
``9`` is slowest and produces the most. The default value is ``6``.
64+
fit into memory at once.
65+
66+
*level* is an integer from ``1`` to ``9`` controlling the level of
67+
compression; ``1`` is fastest and produces the least compression, ``9`` is
68+
slowest and produces the most. The default value is ``6``.
69+
70+
*zdict* is a predefined compression dictionary. This is a sequence of bytes
71+
(such as a :class:`bytes` object) containing subsequences that are expected
72+
to occur frequently in the data that is to be compressed. Those subsequences
73+
that are expected to be most common should come at the end of the dictionary.
6774

6875

6976
.. function:: crc32(data[, value])
@@ -114,11 +121,21 @@ The available exception and functions in this module are:
114121
to :c:func:`malloc`. The default size is 16384.
115122

116123

117-
.. function:: decompressobj([wbits])
124+
.. function:: decompressobj([wbits[, zdict]])
118125

119126
Returns a decompression object, to be used for decompressing data streams that
120-
won't fit into memory at once. The *wbits* parameter controls the size of the
121-
window buffer.
127+
won't fit into memory at once.
128+
129+
The *wbits* parameter controls the size of the window buffer.
130+
131+
The *zdict* parameter specifies a predefined compression dictionary. If
132+
provided, this must be the same dictionary as was used by the compressor that
133+
produced the data that is to be decompressed.
134+
135+
.. note::
136+
If *zdict* is a mutable object (such as a :class:`bytearray`), you must not
137+
modify its contents between the call to :func:`decompressobj` and the first
138+
call to the decompressor's ``decompress()`` method.
122139

123140

124141
Compression objects support the following methods:

Lib/test/test_zlib.py

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -425,6 +425,36 @@ def test_empty_flush(self):
425425
dco = zlib.decompressobj()
426426
self.assertEqual(dco.flush(), b"") # Returns nothing
427427

428+
def test_dictionary(self):
429+
h = HAMLET_SCENE
430+
# build a simulated dictionary out of the words in HAMLET
431+
words = h.split()
432+
random.shuffle(words)
433+
zdict = b''.join(words)
434+
# use it to compress HAMLET
435+
co = zlib.compressobj(zdict=zdict)
436+
cd = co.compress(h) + co.flush()
437+
# verify that it will decompress with the dictionary
438+
dco = zlib.decompressobj(zdict=zdict)
439+
self.assertEqual(dco.decompress(cd) + dco.flush(), h)
440+
# verify that it fails when not given the dictionary
441+
dco = zlib.decompressobj()
442+
self.assertRaises(zlib.error, dco.decompress, cd)
443+
444+
def test_dictionary_streaming(self):
445+
# this is simulating the needs of SPDY to be able to reuse the same
446+
# stream object (with its compression state) between sets of compressed
447+
# headers.
448+
co = zlib.compressobj(zdict=HAMLET_SCENE)
449+
do = zlib.decompressobj(zdict=HAMLET_SCENE)
450+
piece = HAMLET_SCENE[1000:1500]
451+
d0 = co.compress(piece) + co.flush(zlib.Z_SYNC_FLUSH)
452+
d1 = co.compress(piece[100:]) + co.flush(zlib.Z_SYNC_FLUSH)
453+
d2 = co.compress(piece[:-100]) + co.flush(zlib.Z_SYNC_FLUSH)
454+
self.assertEqual(do.decompress(d0), piece)
455+
self.assertEqual(do.decompress(d1), piece[100:])
456+
self.assertEqual(do.decompress(d2), piece[:-100])
457+
428458
def test_decompress_incomplete_stream(self):
429459
# This is 'foo', deflated
430460
x = b'x\x9cK\xcb\xcf\x07\x00\x02\x82\x01E'

Misc/NEWS

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,9 @@ Core and Builtins
3434
Library
3535
-------
3636

37+
- Issue #14684: zlib.compressobj() and zlib.decompressobj() now support the use
38+
of predefined compression dictionaries. Original patch by Sam Rushing.
39+
3740
- Fix GzipFile's handling of filenames given as bytes objects.
3841

3942
- Issue #14772: Return destination values from some shutil functions.

Modules/zlibmodule.c

Lines changed: 96 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@ typedef struct
4545
PyObject *unconsumed_tail;
4646
char eof;
4747
int is_initialised;
48+
PyObject *zdict;
4849
#ifdef WITH_THREAD
4950
PyThread_type_lock lock;
5051
#endif
@@ -80,14 +81,21 @@ zlib_error(z_stream zst, int err, char *msg)
8081
}
8182

8283
PyDoc_STRVAR(compressobj__doc__,
83-
"compressobj([level]) -- Return a compressor object.\n"
84+
"compressobj([level[, method[, wbits[, memlevel[, strategy[, zdict]]]]]])\n"
85+
" -- Return a compressor object.\n"
8486
"\n"
85-
"Optional arg level is the compression level, in 1-9.");
87+
"Optional arg level is the compression level, in 1-9.\n"
88+
"\n"
89+
"Optional arg zdict is the predefined compression dictionary - a sequence of\n"
90+
"bytes containing subsequences that are likely to occur in the input data.");
8691

8792
PyDoc_STRVAR(decompressobj__doc__,
88-
"decompressobj([wbits]) -- Return a decompressor object.\n"
93+
"decompressobj([wbits[, zdict]]) -- Return a decompressor object.\n"
94+
"\n"
95+
"Optional arg wbits is the window buffer size.\n"
8996
"\n"
90-
"Optional arg wbits is the window buffer size.");
97+
"Optional arg zdict is the predefined compression dictionary. This must be\n"
98+
"the same dictionary as used by the compressor that produced the input data.");
9199

92100
static compobject *
93101
newcompobject(PyTypeObject *type)
@@ -98,6 +106,7 @@ newcompobject(PyTypeObject *type)
98106
return NULL;
99107
self->eof = 0;
100108
self->is_initialised = 0;
109+
self->zdict = NULL;
101110
self->unused_data = PyBytes_FromStringAndSize("", 0);
102111
if (self->unused_data == NULL) {
103112
Py_DECREF(self);
@@ -316,19 +325,24 @@ PyZlib_decompress(PyObject *self, PyObject *args)
316325
}
317326

318327
static PyObject *
319-
PyZlib_compressobj(PyObject *selfptr, PyObject *args)
328+
PyZlib_compressobj(PyObject *selfptr, PyObject *args, PyObject *kwargs)
320329
{
321330
compobject *self;
322331
int level=Z_DEFAULT_COMPRESSION, method=DEFLATED;
323332
int wbits=MAX_WBITS, memLevel=DEF_MEM_LEVEL, strategy=0, err;
324-
325-
if (!PyArg_ParseTuple(args, "|iiiii:compressobj", &level, &method, &wbits,
326-
&memLevel, &strategy))
333+
Py_buffer zdict;
334+
static char *kwlist[] = {"level", "method", "wbits",
335+
"memLevel", "strategy", "zdict", NULL};
336+
337+
zdict.buf = NULL; /* Sentinel, so we can tell whether zdict was supplied. */
338+
if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|iiiiiy*:compressobj",
339+
kwlist, &level, &method, &wbits,
340+
&memLevel, &strategy, &zdict))
327341
return NULL;
328342

329343
self = newcompobject(&Comptype);
330344
if (self==NULL)
331-
return(NULL);
345+
goto error;
332346
self->zst.zalloc = (alloc_func)NULL;
333347
self->zst.zfree = (free_func)Z_NULL;
334348
self->zst.next_in = NULL;
@@ -337,30 +351,58 @@ PyZlib_compressobj(PyObject *selfptr, PyObject *args)
337351
switch(err) {
338352
case (Z_OK):
339353
self->is_initialised = 1;
340-
return (PyObject*)self;
354+
if (zdict.buf == NULL) {
355+
goto success;
356+
} else {
357+
err = deflateSetDictionary(&self->zst, zdict.buf, zdict.len);
358+
switch (err) {
359+
case (Z_OK):
360+
goto success;
361+
case (Z_STREAM_ERROR):
362+
PyErr_SetString(PyExc_ValueError, "Invalid dictionary");
363+
goto error;
364+
default:
365+
PyErr_SetString(PyExc_ValueError, "deflateSetDictionary()");
366+
goto error;
367+
}
368+
}
341369
case (Z_MEM_ERROR):
342-
Py_DECREF(self);
343370
PyErr_SetString(PyExc_MemoryError,
344371
"Can't allocate memory for compression object");
345-
return NULL;
372+
goto error;
346373
case(Z_STREAM_ERROR):
347-
Py_DECREF(self);
348374
PyErr_SetString(PyExc_ValueError, "Invalid initialization option");
349-
return NULL;
375+
goto error;
350376
default:
351377
zlib_error(self->zst, err, "while creating compression object");
352-
Py_DECREF(self);
353-
return NULL;
378+
goto error;
354379
}
380+
381+
error:
382+
Py_XDECREF(self);
383+
self = NULL;
384+
success:
385+
if (zdict.buf != NULL)
386+
PyBuffer_Release(&zdict);
387+
return (PyObject*)self;
355388
}
356389

357390
static PyObject *
358-
PyZlib_decompressobj(PyObject *selfptr, PyObject *args)
391+
PyZlib_decompressobj(PyObject *selfptr, PyObject *args, PyObject *kwargs)
359392
{
393+
static char *kwlist[] = {"wbits", "zdict", NULL};
360394
int wbits=DEF_WBITS, err;
361395
compobject *self;
362-
if (!PyArg_ParseTuple(args, "|i:decompressobj", &wbits))
396+
PyObject *zdict=NULL;
397+
398+
if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|iO:decompressobj",
399+
kwlist, &wbits, &zdict))
400+
return NULL;
401+
if (zdict != NULL && !PyObject_CheckBuffer(zdict)) {
402+
PyErr_SetString(PyExc_TypeError,
403+
"zdict argument must support the buffer protocol");
363404
return NULL;
405+
}
364406

365407
self = newcompobject(&Decomptype);
366408
if (self == NULL)
@@ -369,6 +411,10 @@ PyZlib_decompressobj(PyObject *selfptr, PyObject *args)
369411
self->zst.zfree = (free_func)Z_NULL;
370412
self->zst.next_in = NULL;
371413
self->zst.avail_in = 0;
414+
if (zdict != NULL) {
415+
Py_INCREF(zdict);
416+
self->zdict = zdict;
417+
}
372418
err = inflateInit2(&self->zst, wbits);
373419
switch(err) {
374420
case (Z_OK):
@@ -398,6 +444,7 @@ Dealloc(compobject *self)
398444
#endif
399445
Py_XDECREF(self->unused_data);
400446
Py_XDECREF(self->unconsumed_tail);
447+
Py_XDECREF(self->zdict);
401448
PyObject_Del(self);
402449
}
403450

@@ -557,6 +604,27 @@ PyZlib_objdecompress(compobject *self, PyObject *args)
557604
err = inflate(&(self->zst), Z_SYNC_FLUSH);
558605
Py_END_ALLOW_THREADS
559606

607+
if (err == Z_NEED_DICT && self->zdict != NULL) {
608+
Py_buffer zdict_buf;
609+
if (PyObject_GetBuffer(self->zdict, &zdict_buf, PyBUF_SIMPLE) == -1) {
610+
Py_DECREF(RetVal);
611+
RetVal = NULL;
612+
goto error;
613+
}
614+
err = inflateSetDictionary(&(self->zst), zdict_buf.buf, zdict_buf.len);
615+
PyBuffer_Release(&zdict_buf);
616+
if (err != Z_OK) {
617+
zlib_error(self->zst, err, "while decompressing data");
618+
Py_DECREF(RetVal);
619+
RetVal = NULL;
620+
goto error;
621+
}
622+
/* repeat the call to inflate! */
623+
Py_BEGIN_ALLOW_THREADS
624+
err = inflate(&(self->zst), Z_SYNC_FLUSH);
625+
Py_END_ALLOW_THREADS
626+
}
627+
560628
/* While Z_OK and the output buffer is full, there might be more output.
561629
So extend the output buffer and try again.
562630
*/
@@ -770,10 +838,13 @@ PyZlib_copy(compobject *self)
770838
}
771839
Py_INCREF(self->unused_data);
772840
Py_INCREF(self->unconsumed_tail);
841+
Py_XINCREF(self->zdict);
773842
Py_XDECREF(retval->unused_data);
774843
Py_XDECREF(retval->unconsumed_tail);
844+
Py_XDECREF(retval->zdict);
775845
retval->unused_data = self->unused_data;
776846
retval->unconsumed_tail = self->unconsumed_tail;
847+
retval->zdict = self->zdict;
777848
retval->eof = self->eof;
778849

779850
/* Mark it as being initialized */
@@ -822,10 +893,13 @@ PyZlib_uncopy(compobject *self)
822893

823894
Py_INCREF(self->unused_data);
824895
Py_INCREF(self->unconsumed_tail);
896+
Py_XINCREF(self->zdict);
825897
Py_XDECREF(retval->unused_data);
826898
Py_XDECREF(retval->unconsumed_tail);
899+
Py_XDECREF(retval->zdict);
827900
retval->unused_data = self->unused_data;
828901
retval->unconsumed_tail = self->unconsumed_tail;
902+
retval->zdict = self->zdict;
829903
retval->eof = self->eof;
830904

831905
/* Mark it as being initialized */
@@ -1032,13 +1106,13 @@ static PyMethodDef zlib_methods[] =
10321106
adler32__doc__},
10331107
{"compress", (PyCFunction)PyZlib_compress, METH_VARARGS,
10341108
compress__doc__},
1035-
{"compressobj", (PyCFunction)PyZlib_compressobj, METH_VARARGS,
1109+
{"compressobj", (PyCFunction)PyZlib_compressobj, METH_VARARGS|METH_KEYWORDS,
10361110
compressobj__doc__},
10371111
{"crc32", (PyCFunction)PyZlib_crc32, METH_VARARGS,
10381112
crc32__doc__},
10391113
{"decompress", (PyCFunction)PyZlib_decompress, METH_VARARGS,
10401114
decompress__doc__},
1041-
{"decompressobj", (PyCFunction)PyZlib_decompressobj, METH_VARARGS,
1115+
{"decompressobj", (PyCFunction)PyZlib_decompressobj, METH_VARARGS|METH_KEYWORDS,
10421116
decompressobj__doc__},
10431117
{NULL, NULL}
10441118
};
@@ -1112,10 +1186,10 @@ PyDoc_STRVAR(zlib_module_documentation,
11121186
"\n"
11131187
"adler32(string[, start]) -- Compute an Adler-32 checksum.\n"
11141188
"compress(string[, level]) -- Compress string, with compression level in 1-9.\n"
1115-
"compressobj([level]) -- Return a compressor object.\n"
1189+
"compressobj([level[, ...]]) -- Return a compressor object.\n"
11161190
"crc32(string[, start]) -- Compute a CRC-32 checksum.\n"
11171191
"decompress(string,[wbits],[bufsize]) -- Decompresses a compressed string.\n"
1118-
"decompressobj([wbits]) -- Return a decompressor object.\n"
1192+
"decompressobj([wbits[, zdict]]]) -- Return a decompressor object.\n"
11191193
"\n"
11201194
"'wbits' is window buffer size.\n"
11211195
"Compressor objects support compress() and flush() methods; decompressor\n"

0 commit comments

Comments
 (0)