1515
1616 d[key] = data # store data at key (overwrites old data if
1717 # using an existing key)
18- data = d[key] # retrieve data at key (raise KeyError if no
19- # such key)
18+ data = d[key] # retrieve a COPY of the data at key (raise
19+ # KeyError if no such key) -- NOTE that this
20+ # access returns a *copy* of the entry!
2021 del d[key] # delete data stored at key (raises KeyError
2122 # if no such key)
2223 flag = d.has_key(key) # true if the key exists; same as "key in d"
2627
2728Dependent on the implementation, closing a persistent dictionary may
2829or may not be necessary to flush changes to disk.
30+
31+ Normally, d[key] returns a COPY of the entry. This needs care when
32+ mutable entries are mutated: for example, if d[key] is a list,
33+ d[key].append(anitem)
34+ does NOT modify the entry d[key] itself, as stored in the persistent
35+ mapping -- it only modifies the copy, which is then immediately
36+ discarded, so that the append has NO effect whatsoever. To append an
37+ item to d[key] in a way that will affect the persistent mapping, use:
38+ data = d[key]
39+ data.append(anitem)
40+ d[key] = data
41+
42+ To avoid the problem with mutable entries, you may pass the keyword
43+ argument writeback=True in the call to shelve.open. When you use:
44+ d = shelve.open(filename, writeback=True)
45+ then d keeps a cache of all entries you access, and writes them all back
46+ to the persistent mapping when you call d.close(). This ensures that
47+ such usage as d[key].append(anitem) works as intended.
48+
49+ However, using keyword argument writeback=True may consume vast amount
50+ of memory for the cache, and it may make d.close() very slow, if you
51+ access many of d's entries after opening it in this way: d has no way to
52+ check which of the entries you access are mutable and/or which ones you
53+ actually mutate, so it must cache, and write back at close, all of the
54+ entries that you access. You can call d.sync() to write back all the
55+ entries in the cache, and empty the cache (d.sync() also synchronizes
56+ the persistent dictionary on disk, if feasible).
2957"""
3058
3159# Try using cPickle and cStringIO if available.
4169 from StringIO import StringIO
4270
4371import UserDict
72+ import warnings
4473
4574__all__ = ["Shelf" ,"BsdDbShelf" ,"DbfilenameShelf" ,"open" ]
4675
@@ -51,9 +80,19 @@ class Shelf(UserDict.DictMixin):
5180 See the module's __doc__ string for an overview of the interface.
5281 """
5382
54- def __init__ (self , dict , binary = False ):
83+ def __init__ (self , dict , protocol = None , writeback = False , binary = None ):
5584 self .dict = dict
56- self ._binary = binary
85+ if protocol is not None and binary is not None :
86+ raise ValueError , "can't specify both 'protocol' and 'binary'"
87+ if binary is not None :
88+ warnings .warn ("The 'binary' argument to Shelf() is deprecated" ,
89+ PendingDeprecationWarning )
90+ protocol = int (binary )
91+ if protocol is None :
92+ protocol = 0
93+ self ._protocol = protocol
94+ self .writeback = writeback
95+ self .cache = {}
5796
5897 def keys (self ):
5998 return self .dict .keys ()
@@ -73,19 +112,32 @@ def get(self, key, default=None):
73112 return default
74113
75114 def __getitem__ (self , key ):
76- f = StringIO (self .dict [key ])
77- return Unpickler (f ).load ()
115+ try :
116+ value = self .cache [key ]
117+ except KeyError :
118+ f = StringIO (self .dict [key ])
119+ value = Unpickler (f ).load ()
120+ if self .writeback :
121+ self .cache [key ] = value
122+ return value
78123
79124 def __setitem__ (self , key , value ):
125+ if self .writeback :
126+ self .cache [key ] = value
80127 f = StringIO ()
81- p = Pickler (f , self ._binary )
128+ p = Pickler (f , self ._protocol )
82129 p .dump (value )
83130 self .dict [key ] = f .getvalue ()
84131
85132 def __delitem__ (self , key ):
86133 del self .dict [key ]
134+ try :
135+ del self .cache [key ]
136+ except KeyError :
137+ pass
87138
88139 def close (self ):
140+ self .sync ()
89141 try :
90142 self .dict .close ()
91143 except :
@@ -96,6 +148,12 @@ def __del__(self):
96148 self .close ()
97149
98150 def sync (self ):
151+ if self .writeback and self .cache :
152+ self .writeback = False
153+ for key , entry in self .cache .iteritems ():
154+ self [key ] = entry
155+ self .writeback = True
156+ self .cache = {}
99157 if hasattr (self .dict , 'sync' ):
100158 self .dict .sync ()
101159
@@ -113,8 +171,8 @@ class BsdDbShelf(Shelf):
113171 See the module's __doc__ string for an overview of the interface.
114172 """
115173
116- def __init__ (self , dict , binary = False ):
117- Shelf .__init__ (self , dict , binary )
174+ def __init__ (self , dict , protocol = None , writeback = False , binary = None ):
175+ Shelf .__init__ (self , dict , protocol , writeback , binary )
118176
119177 def set_location (self , key ):
120178 (key , value ) = self .dict .set_location (key )
@@ -149,22 +207,25 @@ class DbfilenameShelf(Shelf):
149207 See the module's __doc__ string for an overview of the interface.
150208 """
151209
152- def __init__ (self , filename , flag = 'c' , binary = False ):
210+ def __init__ (self , filename , flag = 'c' , protocol = None , writeback = False , binary = None ):
153211 import anydbm
154- Shelf .__init__ (self , anydbm .open (filename , flag ), binary )
212+ Shelf .__init__ (self , anydbm .open (filename , flag ), protocol , writeback , binary )
155213
156214
157- def open (filename , flag = 'c' , binary = False ):
215+ def open (filename , flag = 'c' , protocol = None , writeback = False , binary = None ):
158216 """Open a persistent dictionary for reading and writing.
159217
160- The filename parameter is the base filename for the underlying database.
161- As a side-effect, an extension may be added to the filename and more
162- than one file may be created. The optional flag parameter has the
163- same interpretation as the flag parameter of anydbm.open(). The
164- optional binary parameter may be set to True to force the use of binary
165- pickles for serializing data values.
218+ The filename parameter is the base filename for the underlying
219+ database. As a side-effect, an extension may be added to the
220+ filename and more than one file may be created. The optional flag
221+ parameter has the same interpretation as the flag parameter of
222+ anydbm.open(). The optional protocol parameter specifies the
223+ version of the pickle protocol (0, 1, or 2).
224+
225+ The optional binary parameter is deprecated and may be set to True
226+ to force the use of binary pickles for serializing data values.
166227
167228 See the module's __doc__ string for an overview of the interface.
168229 """
169230
170- return DbfilenameShelf (filename , flag , binary )
231+ return DbfilenameShelf (filename , flag , binary , writeback )
0 commit comments