Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 153c9e4

Browse files
committed
Patch #553171: Add writeback parameter. Also add protocol parameter.
1 parent cf615b5 commit 153c9e4

4 files changed

Lines changed: 193 additions & 50 deletions

File tree

Doc/lib/libshelve.tex

Lines changed: 65 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -13,15 +13,30 @@ \section{\module{shelve} ---
1313
sub-objects. The keys are ordinary strings.
1414
\refstmodindex{pickle}
1515

16-
\begin{funcdesc}{open}{filename\optional{,flag='c'\optional{,binary=\code{False}}}}
16+
\begin{funcdesc}{open}{filename\optional{,flag='c'\optional{,protocol=\code{None}\optional{,writeback=\code{False}\optional{,binary=\code{None}}}}}}
1717
Open a persistent dictionary. The filename specified is the base filename
1818
for the underlying database. As a side-effect, an extension may be added to
1919
the filename and more than one file may be created. By default, the
2020
underlying database file is opened for reading and writing. The optional
2121
{}\var{flag} pararameter has the same interpretation as the \var{flag}
22-
parameter of \function{anydbm.open}. By default, ASCII pickles are used to
23-
serialize values. If the optional \var{binary} parameter is set to
24-
{}\var{True}, binary pickles will be used instead.
22+
parameter of \function{anydbm.open}.
23+
24+
By default, version 0 pickles are used to serialize values.
25+
The version of the pickle protocol can be specified with the
26+
\var{protocol} parameter. \versionchanged[The \var{protocol}
27+
parameter was added. The \var{binary} parameter is deprecated
28+
and provided for backwards compatibility only]{2.3}
29+
30+
By default, mutations to persistent-dictionary mutable entries are not
31+
automatically written back. If the optional \var{writeback} parameter
32+
is set to {}\var{True}, all entries accessed are cached in memory, and
33+
written back at close time; this can make it handier to mutate mutable
34+
entries in the persistent dictionary, but, if many entries are
35+
accessed, it can consume vast amounts of memory for the cache, and it
36+
can make the close operation very slow since all accessed entries are
37+
written back (there is no way to determine which accessed entries are
38+
mutable, nor which ones were actually mutated).
39+
2540
\end{funcdesc}
2641

2742
Shelve objects support all methods supported by dictionaries. This eases
@@ -61,33 +76,47 @@ \subsection{Restrictions}
6176

6277
\end{itemize}
6378

64-
\begin{classdesc}{Shelf}{dict\optional{, binary=False}}
79+
\begin{classdesc}{Shelf}{dict\optional{, protocol=None\optional{, writeback=False\optional{, binary=None}}}}
6580
A subclass of \class{UserDict.DictMixin} which stores pickled values in the
66-
\var{dict} object. If the \var{binary} parameter is \code{True}, binary
67-
pickles will be used. This can provide much more compact storage than plain
68-
text pickles, depending on the nature of the objects stored in the database.
81+
\var{dict} object.
82+
83+
By default, version 0 pickles are used to serialize values. The
84+
version of the pickle protocol can be specified with the
85+
\var{protocol} parameter. See the \module{pickle} documentation for a
86+
discussion of the pickle protocols. \versionchanged[The \var{protocol}
87+
parameter was added. The \var{binary} parameter is deprecated and
88+
provided for backwards compatibility only]{2.3}
89+
90+
If the \var{writeback} parameter is \code{True}, the object will hold a
91+
cache of all entries accessed and write them back to the \var{dict} at
92+
sync and close times. This allows natural operations on mutable entries,
93+
but can consume much more memory and make sync and close take a long time.
6994
\end{classdesc}
7095

71-
\begin{classdesc}{BsdDbShelf}{dict\optional{, binary=False}}
72-
A subclass of \class{Shelf} which exposes \method{first}, \method{next},
73-
\method{previous}, \method{last} and \method{set_location} which are
74-
available in the \module{bsddb} module but not in other database modules.
75-
The \var{dict} object passed to the constructor must support those methods.
76-
This is generally accomplished by calling one of \function{bsddb.hashopen},
96+
\begin{classdesc}{BsdDbShelf}{dict\optional{, protocol=None\optional{, writeback=False\optional{, binary=None}}}}
97+
98+
A subclass of \class{Shelf} which exposes \method{first},
99+
\method{next}, \method{previous}, \method{last} and
100+
\method{set_location} which are available in the \module{bsddb} module
101+
but not in other database modules. The \var{dict} object passed to
102+
the constructor must support those methods. This is generally
103+
accomplished by calling one of \function{bsddb.hashopen},
77104
\function{bsddb.btopen} or \function{bsddb.rnopen}. The optional
78-
\var{binary} parameter has the same interpretation as for the \class{Shelf}
79-
class.
105+
\var{protocol}, \var{writeback}, and \var{binary} parameters have the
106+
same interpretation as for the \class{Shelf} class.
107+
80108
\end{classdesc}
81109

82-
\begin{classdesc}{DbfilenameShelf}{filename\optional{, flag='c'\optional{, binary=False}}}
110+
\begin{classdesc}{DbfilenameShelf}{filename\optional{, flag='c'\optional{, protocol=None\optional{, writeback=False\optional{, binary=None}}}}}
83111

84-
A subclass of \class{Shelf} which accepts a \var{filename} instead of a
85-
dict-like object. The underlying file will be opened using
86-
{}\function{anydbm.open}. By default, the file will be created and opened
87-
for both read and write. The optional \var{flag} parameter has the same
88-
interpretation as for the \function{open} function. The optional
89-
\var{binary} parameter has the same interpretation as for the
90-
{}\class{Shelf} class.
112+
A subclass of \class{Shelf} which accepts a \var{filename} instead of
113+
a dict-like object. The underlying file will be opened using
114+
{}\function{anydbm.open}. By default, the file will be created and
115+
opened for both read and write. The optional \var{flag} parameter has
116+
the same interpretation as for the \function{open} function. The
117+
optional \var{protocol}, \var{writeback}, and \var{binary} parameters
118+
have the same interpretation as for the \class{Shelf} class.
119+
91120
\end{classdesc}
92121

93122
\subsection{Example}
@@ -103,13 +132,24 @@ \subsection{Example}
103132
104133
d[key] = data # store data at key (overwrites old data if
105134
# using an existing key)
106-
data = d[key] # retrieve data at key (raise KeyError if no
135+
data = d[key] # retrieve a COPY of data at key (raise KeyError if no
107136
# such key)
108137
del d[key] # delete data stored at key (raises KeyError
109138
# if no such key)
110139
flag = d.has_key(key) # true if the key exists
111140
list = d.keys() # a list of all existing keys (slow!)
112141
142+
# as d was opened WITHOUT writeback=True, beware:
143+
d['xx'] = range(4) # this works as expected, but...
144+
d['xx'].append(5) # *this doesn't!* -- d['xx'] is STILL range(4)!!!
145+
# having opened d without writeback=True, you need to code carefully:
146+
temp = d['xx'] # extracts the copy
147+
temp.append(5) # mutates the copy
148+
d['xx'] = temp # stores the copy right back, to persist it
149+
# or, d=shelve.open(filename,writeback=True) would let you just code
150+
# d['xx'].append(5) and have it work as expected, BUT it would also
151+
# consume more memory and make the d.close() operation slower.
152+
113153
d.close() # close it
114154
\end{verbatim}
115155

Lib/shelve.py

Lines changed: 80 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -15,8 +15,9 @@
1515
1616
d[key] = data # store data at key (overwrites old data if
1717
# using an existing key)
18-
data = d[key] # retrieve data at key (raise KeyError if no
19-
# such key)
18+
data = d[key] # retrieve a COPY of the data at key (raise
19+
# KeyError if no such key) -- NOTE that this
20+
# access returns a *copy* of the entry!
2021
del d[key] # delete data stored at key (raises KeyError
2122
# if no such key)
2223
flag = d.has_key(key) # true if the key exists; same as "key in d"
@@ -26,6 +27,33 @@
2627
2728
Dependent on the implementation, closing a persistent dictionary may
2829
or may not be necessary to flush changes to disk.
30+
31+
Normally, d[key] returns a COPY of the entry. This needs care when
32+
mutable entries are mutated: for example, if d[key] is a list,
33+
d[key].append(anitem)
34+
does NOT modify the entry d[key] itself, as stored in the persistent
35+
mapping -- it only modifies the copy, which is then immediately
36+
discarded, so that the append has NO effect whatsoever. To append an
37+
item to d[key] in a way that will affect the persistent mapping, use:
38+
data = d[key]
39+
data.append(anitem)
40+
d[key] = data
41+
42+
To avoid the problem with mutable entries, you may pass the keyword
43+
argument writeback=True in the call to shelve.open. When you use:
44+
d = shelve.open(filename, writeback=True)
45+
then d keeps a cache of all entries you access, and writes them all back
46+
to the persistent mapping when you call d.close(). This ensures that
47+
such usage as d[key].append(anitem) works as intended.
48+
49+
However, using keyword argument writeback=True may consume vast amount
50+
of memory for the cache, and it may make d.close() very slow, if you
51+
access many of d's entries after opening it in this way: d has no way to
52+
check which of the entries you access are mutable and/or which ones you
53+
actually mutate, so it must cache, and write back at close, all of the
54+
entries that you access. You can call d.sync() to write back all the
55+
entries in the cache, and empty the cache (d.sync() also synchronizes
56+
the persistent dictionary on disk, if feasible).
2957
"""
3058

3159
# Try using cPickle and cStringIO if available.
@@ -41,6 +69,7 @@
4169
from StringIO import StringIO
4270

4371
import UserDict
72+
import warnings
4473

4574
__all__ = ["Shelf","BsdDbShelf","DbfilenameShelf","open"]
4675

@@ -51,9 +80,19 @@ class Shelf(UserDict.DictMixin):
5180
See the module's __doc__ string for an overview of the interface.
5281
"""
5382

54-
def __init__(self, dict, binary=False):
83+
def __init__(self, dict, protocol=None, writeback=False, binary=None):
5584
self.dict = dict
56-
self._binary = binary
85+
if protocol is not None and binary is not None:
86+
raise ValueError, "can't specify both 'protocol' and 'binary'"
87+
if binary is not None:
88+
warnings.warn("The 'binary' argument to Shelf() is deprecated",
89+
PendingDeprecationWarning)
90+
protocol = int(binary)
91+
if protocol is None:
92+
protocol = 0
93+
self._protocol = protocol
94+
self.writeback = writeback
95+
self.cache = {}
5796

5897
def keys(self):
5998
return self.dict.keys()
@@ -73,19 +112,32 @@ def get(self, key, default=None):
73112
return default
74113

75114
def __getitem__(self, key):
76-
f = StringIO(self.dict[key])
77-
return Unpickler(f).load()
115+
try:
116+
value = self.cache[key]
117+
except KeyError:
118+
f = StringIO(self.dict[key])
119+
value = Unpickler(f).load()
120+
if self.writeback:
121+
self.cache[key] = value
122+
return value
78123

79124
def __setitem__(self, key, value):
125+
if self.writeback:
126+
self.cache[key] = value
80127
f = StringIO()
81-
p = Pickler(f, self._binary)
128+
p = Pickler(f, self._protocol)
82129
p.dump(value)
83130
self.dict[key] = f.getvalue()
84131

85132
def __delitem__(self, key):
86133
del self.dict[key]
134+
try:
135+
del self.cache[key]
136+
except KeyError:
137+
pass
87138

88139
def close(self):
140+
self.sync()
89141
try:
90142
self.dict.close()
91143
except:
@@ -96,6 +148,12 @@ def __del__(self):
96148
self.close()
97149

98150
def sync(self):
151+
if self.writeback and self.cache:
152+
self.writeback = False
153+
for key, entry in self.cache.iteritems():
154+
self[key] = entry
155+
self.writeback = True
156+
self.cache = {}
99157
if hasattr(self.dict, 'sync'):
100158
self.dict.sync()
101159

@@ -113,8 +171,8 @@ class BsdDbShelf(Shelf):
113171
See the module's __doc__ string for an overview of the interface.
114172
"""
115173

116-
def __init__(self, dict, binary=False):
117-
Shelf.__init__(self, dict, binary)
174+
def __init__(self, dict, protocol=None, writeback=False, binary=None):
175+
Shelf.__init__(self, dict, protocol, writeback, binary)
118176

119177
def set_location(self, key):
120178
(key, value) = self.dict.set_location(key)
@@ -149,22 +207,25 @@ class DbfilenameShelf(Shelf):
149207
See the module's __doc__ string for an overview of the interface.
150208
"""
151209

152-
def __init__(self, filename, flag='c', binary=False):
210+
def __init__(self, filename, flag='c', protocol=None, writeback=False, binary=None):
153211
import anydbm
154-
Shelf.__init__(self, anydbm.open(filename, flag), binary)
212+
Shelf.__init__(self, anydbm.open(filename, flag), protocol, writeback, binary)
155213

156214

157-
def open(filename, flag='c', binary=False):
215+
def open(filename, flag='c', protocol=None, writeback=False, binary=None):
158216
"""Open a persistent dictionary for reading and writing.
159217
160-
The filename parameter is the base filename for the underlying database.
161-
As a side-effect, an extension may be added to the filename and more
162-
than one file may be created. The optional flag parameter has the
163-
same interpretation as the flag parameter of anydbm.open(). The
164-
optional binary parameter may be set to True to force the use of binary
165-
pickles for serializing data values.
218+
The filename parameter is the base filename for the underlying
219+
database. As a side-effect, an extension may be added to the
220+
filename and more than one file may be created. The optional flag
221+
parameter has the same interpretation as the flag parameter of
222+
anydbm.open(). The optional protocol parameter specifies the
223+
version of the pickle protocol (0, 1, or 2).
224+
225+
The optional binary parameter is deprecated and may be set to True
226+
to force the use of binary pickles for serializing data values.
166227
167228
See the module's __doc__ string for an overview of the interface.
168229
"""
169230

170-
return DbfilenameShelf(filename, flag, binary)
231+
return DbfilenameShelf(filename, flag, binary, writeback)

Lib/test/test_shelve.py

Lines changed: 45 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,16 @@ def test_binary_file_shelf(self):
2828
for f in glob.glob(self.fn+"*"):
2929
os.unlink(f)
3030

31+
def test_proto2_file_shelf(self):
32+
try:
33+
s = shelve.open(self.fn, protocol=2)
34+
s['key1'] = (1,2,3,4)
35+
self.assertEqual(s['key1'], (1,2,3,4))
36+
s.close()
37+
finally:
38+
for f in glob.glob(self.fn+"*"):
39+
os.unlink(f)
40+
3141
def test_in_memory_shelf(self):
3242
d1 = {}
3343
s = shelve.Shelf(d1, binary=False)
@@ -43,6 +53,27 @@ def test_in_memory_shelf(self):
4353
self.assertEqual(len(d1), 1)
4454
self.assertNotEqual(d1, d2)
4555

56+
def test_mutable_entry(self):
57+
d1 = {}
58+
s = shelve.Shelf(d1, protocol=2, writeback=False)
59+
s['key1'] = [1,2,3,4]
60+
self.assertEqual(s['key1'], [1,2,3,4])
61+
s['key1'].append(5)
62+
self.assertEqual(s['key1'], [1,2,3,4])
63+
s.close()
64+
65+
d2 = {}
66+
s = shelve.Shelf(d2, protocol=2, writeback=True)
67+
s['key1'] = [1,2,3,4]
68+
self.assertEqual(s['key1'], [1,2,3,4])
69+
s['key1'].append(5)
70+
self.assertEqual(s['key1'], [1,2,3,4,5])
71+
s.close()
72+
73+
self.assertEqual(len(d1), 1)
74+
self.assertEqual(len(d2), 1)
75+
76+
4677
from test_userdict import TestMappingProtocol
4778

4879
class TestShelveBase(TestMappingProtocol):
@@ -56,10 +87,10 @@ def _reference(self):
5687
return {"key1":"value1", "key2":2, "key3":(1,2,3)}
5788
def _empty_mapping(self):
5889
if self._in_mem:
59-
x= shelve.Shelf({}, binary = self._binary)
90+
x= shelve.Shelf({}, **self._args)
6091
else:
6192
self.counter+=1
62-
x= shelve.open(self.fn+str(self.counter), binary=self._binary)
93+
x= shelve.open(self.fn+str(self.counter), **self._args)
6394
self._db.append(x)
6495
return x
6596
def tearDown(self):
@@ -71,24 +102,32 @@ def tearDown(self):
71102
os.unlink(f)
72103

73104
class TestAsciiFileShelve(TestShelveBase):
74-
_binary = False
105+
_args={'binary':False}
75106
_in_mem = False
76107
class TestBinaryFileShelve(TestShelveBase):
77-
_binary = True
108+
_args={'binary':True}
109+
_in_mem = False
110+
class TestProto2FileShelve(TestShelveBase):
111+
_args={'protocol':2}
78112
_in_mem = False
79113
class TestAsciiMemShelve(TestShelveBase):
80-
_binary = False
114+
_args={'binary':False}
81115
_in_mem = True
82116
class TestBinaryMemShelve(TestShelveBase):
83-
_binary = True
117+
_args={'binary':True}
118+
_in_mem = True
119+
class TestProto2MemShelve(TestShelveBase):
120+
_args={'protocol':2}
84121
_in_mem = True
85122

86123
def test_main():
87124
suite = unittest.TestSuite()
88125
suite.addTest(unittest.makeSuite(TestAsciiFileShelve))
89126
suite.addTest(unittest.makeSuite(TestBinaryFileShelve))
127+
suite.addTest(unittest.makeSuite(TestProto2FileShelve))
90128
suite.addTest(unittest.makeSuite(TestAsciiMemShelve))
91129
suite.addTest(unittest.makeSuite(TestBinaryMemShelve))
130+
suite.addTest(unittest.makeSuite(TestProto2MemShelve))
92131
suite.addTest(unittest.makeSuite(TestCase))
93132
test_support.run_suite(suite)
94133

0 commit comments

Comments
 (0)