@@ -2169,67 +2169,67 @@ def remove_frames(pickled, keep_frame=None):
21692169 def test_framed_write_sizes_with_delayed_writer (self ):
21702170 class ChunkAccumulator :
21712171 """Accumulate pickler output in a list of raw chunks."""
2172-
21732172 def __init__ (self ):
21742173 self .chunks = []
2175-
21762174 def write (self , chunk ):
21772175 self .chunks .append (chunk )
2178-
21792176 def concatenate_chunks (self ):
2180- # Some chunks can be memoryview instances, we need to convert
2181- # them to bytes to be able to call join
2182- return b"" .join ([c .tobytes () if hasattr (c , 'tobytes' ) else c
2183- for c in self .chunks ])
2184-
2185- small_objects = [(str (i ).encode ('ascii' ), i % 42 , {'i' : str (i )})
2186- for i in range (int (1e4 ))]
2177+ return b"" .join (self .chunks )
21872178
21882179 for proto in range (4 , pickle .HIGHEST_PROTOCOL + 1 ):
2180+ objects = [(str (i ).encode ('ascii' ), i % 42 , {'i' : str (i )})
2181+ for i in range (int (1e4 ))]
2182+ # Add a large unique ASCII string
2183+ objects .append ('0123456789abcdef' *
2184+ (self .FRAME_SIZE_TARGET // 16 + 1 ))
2185+
21892186 # Protocol 4 packs groups of small objects into frames and issues
21902187 # calls to write only once or twice per frame:
21912188 # The C pickler issues one call to write per-frame (header and
21922189 # contents) while Python pickler issues two calls to write: one for
21932190 # the frame header and one for the frame binary contents.
21942191 writer = ChunkAccumulator ()
2195- self .pickler (writer , proto ).dump (small_objects )
2192+ self .pickler (writer , proto ).dump (objects )
21962193
21972194 # Actually read the binary content of the chunks after the end
2198- # of the call to dump: ant memoryview passed to write should not
2195+ # of the call to dump: any memoryview passed to write should not
21992196 # be released otherwise this delayed access would not be possible.
22002197 pickled = writer .concatenate_chunks ()
22012198 reconstructed = self .loads (pickled )
2202- self .assertEqual (reconstructed , small_objects )
2199+ self .assertEqual (reconstructed , objects )
22032200 self .assertGreater (len (writer .chunks ), 1 )
22042201
2205- n_frames , remainder = divmod (len (pickled ), self .FRAME_SIZE_TARGET )
2206- if remainder > 0 :
2207- n_frames += 1
2202+ # memoryviews should own the memory.
2203+ del objects
2204+ support .gc_collect ()
2205+ self .assertEqual (writer .concatenate_chunks (), pickled )
22082206
2207+ n_frames = (len (pickled ) - 1 ) // self .FRAME_SIZE_TARGET + 1
22092208 # There should be at least one call to write per frame
22102209 self .assertGreaterEqual (len (writer .chunks ), n_frames )
22112210
22122211 # but not too many either: there can be one for the proto,
2213- # one per-frame header and one per frame for the actual contents.
2214- self .assertGreaterEqual (2 * n_frames + 1 , len (writer .chunks ))
2212+ # one per-frame header, one per frame for the actual contents,
2213+ # and two for the header.
2214+ self .assertLessEqual (len (writer .chunks ), 2 * n_frames + 3 )
22152215
2216- chunk_sizes = [len (c ) for c in writer .chunks [: - 1 ] ]
2216+ chunk_sizes = [len (c ) for c in writer .chunks ]
22172217 large_sizes = [s for s in chunk_sizes
22182218 if s >= self .FRAME_SIZE_TARGET ]
2219- small_sizes = [s for s in chunk_sizes
2220- if s < self .FRAME_SIZE_TARGET ]
2219+ medium_sizes = [s for s in chunk_sizes
2220+ if 9 < s < self .FRAME_SIZE_TARGET ]
2221+ small_sizes = [s for s in chunk_sizes if s <= 9 ]
22212222
22222223 # Large chunks should not be too large:
22232224 for chunk_size in large_sizes :
2224- self .assertGreater (2 * self .FRAME_SIZE_TARGET , chunk_size )
2225-
2226- last_chunk_size = len (writer .chunks [- 1 ])
2227- self .assertGreater (2 * self .FRAME_SIZE_TARGET , last_chunk_size )
2228-
2229- # Small chunks (if any) should be very small
2230- # (only proto and frame headers)
2231- for chunk_size in small_sizes :
2232- self .assertGreaterEqual (9 , chunk_size )
2225+ self .assertLess (chunk_size , 2 * self .FRAME_SIZE_TARGET ,
2226+ chunk_sizes )
2227+ # There shouldn't bee too many small chunks: the protocol header,
2228+ # the frame headers and the large string headers are written
2229+ # in small chunks.
2230+ self .assertLessEqual (len (small_sizes ),
2231+ len (large_sizes ) + len (medium_sizes ) + 3 ,
2232+ chunk_sizes )
22332233
22342234 def test_nested_names (self ):
22352235 global Nested
0 commit comments