Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 5d1cfd2

Browse files
fix: Fix BlobReader handling of interleaved reads and seeks (googleapis#721)
* tests (fileio): add tarfile based test case for reader seek Background info: googleapis#462 * fix: Patch blob reader to return correct seek/tell values (googleapis#462) Co-authored-by: Andrew Gorcester <[email protected]>
1 parent e0b3b35 commit 5d1cfd2

File tree

2 files changed

+57
-16
lines changed

2 files changed

+57
-16
lines changed

google/cloud/storage/fileio.py

Lines changed: 22 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -123,9 +123,12 @@ def read(self, size=-1):
123123
# If the read request demands more bytes than are buffered, fetch more.
124124
remaining_size = size - len(result)
125125
if remaining_size > 0 or size < 0:
126+
self._pos += self._buffer.tell()
127+
read_size = len(result)
128+
126129
self._buffer.seek(0)
127130
self._buffer.truncate(0) # Clear the buffer to make way for new data.
128-
fetch_start = self._pos + len(result)
131+
fetch_start = self._pos
129132
if size > 0:
130133
# Fetch the larger of self._chunk_size or the remaining_size.
131134
fetch_end = fetch_start + max(remaining_size, self._chunk_size)
@@ -154,9 +157,8 @@ def read(self, size=-1):
154157
self._buffer.write(result[size:])
155158
self._buffer.seek(0)
156159
result = result[:size]
157-
158-
self._pos += len(result)
159-
160+
# Increment relative offset by true amount read.
161+
self._pos += len(result) - read_size
160162
return result
161163

162164
def read1(self, size=-1):
@@ -174,29 +176,33 @@ def seek(self, pos, whence=0):
174176
if self._blob.size is None:
175177
self._blob.reload(**self._download_kwargs)
176178

177-
initial_pos = self._pos
179+
initial_offset = self._pos + self._buffer.tell()
178180

179181
if whence == 0:
180-
self._pos = pos
182+
target_pos = pos
181183
elif whence == 1:
182-
self._pos += pos
184+
target_pos = initial_offset + pos
183185
elif whence == 2:
184-
self._pos = self._blob.size + pos
186+
target_pos = self._blob.size + pos
185187
if whence not in {0, 1, 2}:
186188
raise ValueError("invalid whence value")
187189

188-
if self._pos > self._blob.size:
189-
self._pos = self._blob.size
190+
if target_pos > self._blob.size:
191+
target_pos = self._blob.size
190192

191193
# Seek or invalidate buffer as needed.
192-
difference = self._pos - initial_pos
193-
new_buffer_pos = self._buffer.seek(difference, 1)
194-
if new_buffer_pos != difference: # Buffer does not contain new pos.
195-
# Invalidate buffer.
194+
if target_pos < self._pos:
195+
# Target position < relative offset <= true offset.
196+
# As data is not in buffer, invalidate buffer.
196197
self._buffer.seek(0)
197198
self._buffer.truncate(0)
198-
199-
return self._pos
199+
new_pos = target_pos
200+
self._pos = target_pos
201+
else:
202+
# relative offset <= target position <= size of file.
203+
difference = target_pos - initial_offset
204+
new_pos = self._pos + self._buffer.seek(difference, 1)
205+
return new_pos
200206

201207
def close(self):
202208
self._buffer.close()

tests/unit/test_fileio.py

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -247,6 +247,41 @@ def initialize_size(**_):
247247

248248
reader.close()
249249

250+
def test_advanced_seek(self):
251+
blob = mock.Mock()
252+
253+
def read_from_fake_data(start=0, end=None, **_):
254+
return TEST_BINARY_DATA[start:end] * 1024
255+
256+
blob.download_as_bytes = mock.Mock(side_effect=read_from_fake_data)
257+
blob.size = None
258+
download_kwargs = {"if_metageneration_match": 1}
259+
reader = self._make_blob_reader(blob, chunk_size=1024, **download_kwargs)
260+
261+
# Seek needs the blob size to work and should call reload() if the size
262+
# is not known. Set a mock to initialize the size if reload() is called.
263+
def initialize_size(**_):
264+
blob.size = len(TEST_BINARY_DATA) * 1024
265+
266+
blob.reload = mock.Mock(side_effect=initialize_size)
267+
268+
self.assertEqual(reader.tell(), 0)
269+
# Mimic tarfile access pattern. Read tarinfo block.
270+
reader.read(512)
271+
self.assertEqual(reader.tell(), 512)
272+
self.assertEqual(reader.seek(512), 512)
273+
# Mimic read actual tar content.
274+
reader.read(400)
275+
self.assertEqual(reader.tell(), 912)
276+
# Tarfile offsets are rounded up by block size
277+
# A sanity seek/read is used to check for unexpected ends.
278+
reader.seek(1023)
279+
reader.read(1)
280+
self.assertEqual(reader.tell(), 1024)
281+
reader.read(512)
282+
self.assertEqual(reader.tell(), 1536)
283+
reader.close()
284+
250285
def test_close(self):
251286
blob = mock.Mock()
252287
reader = self._make_blob_reader(blob)

0 commit comments

Comments
 (0)