Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit dfdf585

Browse files
authored
fix S3 List* pagination CommonPrefixes (localstack#9608)
1 parent 2bf09be commit dfdf585

File tree

5 files changed

+3759
-2578
lines changed

5 files changed

+3759
-2578
lines changed

‎localstack/services/s3/v3/provider.py

Lines changed: 108 additions & 74 deletions
Original file line numberDiff line numberDiff line change
@@ -1300,14 +1300,11 @@ def list_objects(
13001300

13011301
s3_objects: list[Object] = []
13021302

1303-
# sort by key
1304-
for s3_object in sorted(s3_bucket.objects.values(), key=lambda r: r.key):
1305-
if count >= max_keys:
1306-
is_truncated = True
1307-
if s3_objects:
1308-
next_key_marker = s3_objects[-1]["Key"]
1309-
break
1303+
all_keys = sorted(s3_bucket.objects.values(), key=lambda r: r.key)
1304+
last_key = all_keys[-1] if all_keys else None
13101305

1306+
# sort by key
1307+
for s3_object in all_keys:
13111308
key = urlparse.quote(s3_object.key) if encoding_type else s3_object.key
13121309
# skip all keys that alphabetically come before key_marker
13131310
if marker:
@@ -1318,32 +1315,44 @@ def list_objects(
13181315
if prefix and not key.startswith(prefix):
13191316
continue
13201317

1321-
# separate keys that contain the same string between the prefix and the first occurrence of the delimiter
1318+
# see ListObjectsV2 for the logic comments (shared logic here)
1319+
prefix_including_delimiter = None
13221320
if delimiter and delimiter in (key_no_prefix := key.removeprefix(prefix)):
13231321
pre_delimiter, _, _ = key_no_prefix.partition(delimiter)
13241322
prefix_including_delimiter = f"{prefix}{pre_delimiter}{delimiter}"
13251323

1326-
if prefix_including_delimiter not in common_prefixes:
1327-
count += 1
1328-
common_prefixes.add(prefix_including_delimiter)
1329-
continue
1324+
if prefix_including_delimiter in common_prefixes or (
1325+
marker and marker.startswith(prefix_including_delimiter)
1326+
):
1327+
continue
13301328

1331-
# TODO: add RestoreStatus if present
1332-
object_data = Object(
1333-
Key=key,
1334-
ETag=s3_object.quoted_etag,
1335-
Owner=s3_bucket.owner, # TODO: verify reality
1336-
Size=s3_object.size,
1337-
LastModified=s3_object.last_modified,
1338-
StorageClass=s3_object.storage_class,
1339-
)
1329+
if prefix_including_delimiter:
1330+
common_prefixes.add(prefix_including_delimiter)
1331+
else:
1332+
# TODO: add RestoreStatus if present
1333+
object_data = Object(
1334+
Key=key,
1335+
ETag=s3_object.quoted_etag,
1336+
Owner=s3_bucket.owner, # TODO: verify reality
1337+
Size=s3_object.size,
1338+
LastModified=s3_object.last_modified,
1339+
StorageClass=s3_object.storage_class,
1340+
)
13401341

1341-
if s3_object.checksum_algorithm:
1342-
object_data["ChecksumAlgorithm"] = [s3_object.checksum_algorithm]
1342+
if s3_object.checksum_algorithm:
1343+
object_data["ChecksumAlgorithm"] = [s3_object.checksum_algorithm]
13431344

1344-
s3_objects.append(object_data)
1345+
s3_objects.append(object_data)
13451346

1347+
# we just added a CommonPrefix or an Object, increase the counter
13461348
count += 1
1349+
if count >= max_keys and last_key.key != s3_object.key:
1350+
is_truncated = True
1351+
if prefix_including_delimiter:
1352+
next_key_marker = prefix_including_delimiter
1353+
elif s3_objects:
1354+
next_key_marker = s3_objects[-1]["Key"]
1355+
break
13471356

13481357
common_prefixes = [CommonPrefix(Prefix=prefix) for prefix in sorted(common_prefixes)]
13491358

@@ -1413,13 +1422,9 @@ def list_objects_v2(
14131422

14141423
# sort by key
14151424
for s3_object in sorted(s3_bucket.objects.values(), key=lambda r: r.key):
1416-
if count >= max_keys:
1417-
is_truncated = True
1418-
next_continuation_token = to_str(base64.urlsafe_b64encode(s3_object.key.encode()))
1419-
break
1420-
14211425
key = urlparse.quote(s3_object.key) if encoding_type else s3_object.key
1422-
# skip all keys that alphabetically come before key_marker
1426+
1427+
# skip all keys that alphabetically come before continuation_token
14231428
if continuation_token:
14241429
if key < decoded_continuation_token:
14251430
continue
@@ -1433,31 +1438,46 @@ def list_objects_v2(
14331438
continue
14341439

14351440
# separate keys that contain the same string between the prefix and the first occurrence of the delimiter
1441+
prefix_including_delimiter = None
14361442
if delimiter and delimiter in (key_no_prefix := key.removeprefix(prefix)):
14371443
pre_delimiter, _, _ = key_no_prefix.partition(delimiter)
14381444
prefix_including_delimiter = f"{prefix}{pre_delimiter}{delimiter}"
14391445

1440-
if prefix_including_delimiter not in common_prefixes:
1441-
count += 1
1442-
common_prefixes.add(prefix_including_delimiter)
1443-
continue
1446+
# if the CommonPrefix is already in the CommonPrefixes, it doesn't count towards MaxKey, we can skip
1447+
# the entry without increasing the counter. We need to iterate over all of these entries before
1448+
# returning the next continuation marker, to properly start at the next key after this CommonPrefix
1449+
if prefix_including_delimiter in common_prefixes:
1450+
continue
14441451

1445-
# TODO: add RestoreStatus if present
1446-
object_data = Object(
1447-
Key=key,
1448-
ETag=s3_object.quoted_etag,
1449-
Size=s3_object.size,
1450-
LastModified=s3_object.last_modified,
1451-
StorageClass=s3_object.storage_class,
1452-
)
1452+
# After skipping all entries, verify we're not over the MaxKeys before adding a new entry
1453+
if count >= max_keys:
1454+
is_truncated = True
1455+
next_continuation_token = to_str(base64.urlsafe_b64encode(s3_object.key.encode()))
1456+
break
1457+
1458+
# if we found a new CommonPrefix, add it to the CommonPrefixes
1459+
# else, it means it's a new Object, add it to the Contents
1460+
if prefix_including_delimiter:
1461+
common_prefixes.add(prefix_including_delimiter)
1462+
else:
1463+
# TODO: add RestoreStatus if present
1464+
object_data = Object(
1465+
Key=key,
1466+
ETag=s3_object.quoted_etag,
1467+
Size=s3_object.size,
1468+
LastModified=s3_object.last_modified,
1469+
StorageClass=s3_object.storage_class,
1470+
)
14531471

1454-
if fetch_owner:
1455-
object_data["Owner"] = s3_bucket.owner
1472+
if fetch_owner:
1473+
object_data["Owner"] = s3_bucket.owner
14561474

1457-
if s3_object.checksum_algorithm:
1458-
object_data["ChecksumAlgorithm"] = [s3_object.checksum_algorithm]
1475+
if s3_object.checksum_algorithm:
1476+
object_data["ChecksumAlgorithm"] = [s3_object.checksum_algorithm]
14591477

1460-
s3_objects.append(object_data)
1478+
s3_objects.append(object_data)
1479+
1480+
# we just added either a CommonPrefix or an Object to the List, increase the counter by one
14611481
count += 1
14621482

14631483
common_prefixes = [CommonPrefix(Prefix=prefix) for prefix in sorted(common_prefixes)]
@@ -1555,17 +1575,21 @@ def list_object_versions(
15551575
if prefix and not key.startswith(prefix):
15561576
continue
15571577

1558-
# separate keys that contain the same string between the prefix and the first occurrence of the delimiter
1578+
# see ListObjectsV2 for the logic comments (shared logic here)
1579+
prefix_including_delimiter = None
15591580
if delimiter and delimiter in (key_no_prefix := key.removeprefix(prefix)):
15601581
pre_delimiter, _, _ = key_no_prefix.partition(delimiter)
15611582
prefix_including_delimiter = f"{prefix}{pre_delimiter}{delimiter}"
15621583

1563-
if prefix_including_delimiter not in common_prefixes:
1564-
count += 1
1565-
common_prefixes.add(prefix_including_delimiter)
1566-
continue
1584+
if prefix_including_delimiter in common_prefixes or (
1585+
key_marker and key_marker.startswith(prefix_including_delimiter)
1586+
):
1587+
continue
1588+
1589+
if prefix_including_delimiter:
1590+
common_prefixes.add(prefix_including_delimiter)
15671591

1568-
if isinstance(version, S3DeleteMarker):
1592+
elif isinstance(version, S3DeleteMarker):
15691593
delete_marker = DeleteMarkerEntry(
15701594
Key=key,
15711595
Owner=s3_bucket.owner,
@@ -1594,11 +1618,15 @@ def list_object_versions(
15941618

15951619
object_versions.append(object_version)
15961620

1621+
# we just added a CommonPrefix, an Object or a DeleteMarker, increase the counter
15971622
count += 1
15981623
if count >= max_keys and last_version.version_id != version.version_id:
15991624
is_truncated = True
1600-
next_key_marker = version.key
1601-
next_version_id_marker = version.version_id
1625+
if prefix_including_delimiter:
1626+
next_key_marker = prefix_including_delimiter
1627+
else:
1628+
next_key_marker = version.key
1629+
next_version_id_marker = version.version_id
16021630
break
16031631

16041632
common_prefixes = [CommonPrefix(Prefix=prefix) for prefix in sorted(common_prefixes)]
@@ -2216,13 +2244,12 @@ def list_multipart_uploads(
22162244

22172245
uploads = []
22182246
# sort by key and initiated
2219-
for multipart in sorted(
2247+
all_multiparts = sorted(
22202248
s3_bucket.multiparts.values(), key=lambda r: (r.object.key, r.initiated.timestamp())
2221-
):
2222-
if count >= max_uploads:
2223-
is_truncated = True
2224-
break
2249+
)
2250+
last_multipart = all_multiparts[-1] if all_multiparts else None
22252251

2252+
for multipart in all_multiparts:
22262253
key = urlparse.quote(multipart.object.key) if encoding_type else multipart.object.key
22272254
# skip all keys that are different than key_marker
22282255
if key_marker:
@@ -2243,27 +2270,34 @@ def list_multipart_uploads(
22432270
if prefix and not key.startswith(prefix):
22442271
continue
22452272

2246-
# separate keys that contain the same string between the prefix and the first occurrence of the delimiter
2273+
# see ListObjectsV2 for the logic comments (shared logic here)
2274+
prefix_including_delimiter = None
22472275
if delimiter and delimiter in (key_no_prefix := key.removeprefix(prefix)):
22482276
pre_delimiter, _, _ = key_no_prefix.partition(delimiter)
22492277
prefix_including_delimiter = f"{prefix}{pre_delimiter}{delimiter}"
22502278

2251-
if prefix_including_delimiter not in common_prefixes:
2252-
count += 1
2253-
common_prefixes.add(prefix_including_delimiter)
2254-
continue
2279+
if prefix_including_delimiter in common_prefixes or (
2280+
key_marker and key_marker.startswith(prefix_including_delimiter)
2281+
):
2282+
continue
22552283

2256-
multipart_upload = MultipartUpload(
2257-
UploadId=multipart.id,
2258-
Key=multipart.object.key,
2259-
Initiated=multipart.initiated,
2260-
StorageClass=multipart.object.storage_class,
2261-
Owner=multipart.initiator, # TODO: check the difference
2262-
Initiator=multipart.initiator,
2263-
)
2264-
uploads.append(multipart_upload)
2284+
if prefix_including_delimiter:
2285+
common_prefixes.add(prefix_including_delimiter)
2286+
else:
2287+
multipart_upload = MultipartUpload(
2288+
UploadId=multipart.id,
2289+
Key=multipart.object.key,
2290+
Initiated=multipart.initiated,
2291+
StorageClass=multipart.object.storage_class,
2292+
Owner=multipart.initiator, # TODO: check the difference
2293+
Initiator=multipart.initiator,
2294+
)
2295+
uploads.append(multipart_upload)
22652296

22662297
count += 1
2298+
if count >= max_uploads and last_multipart.id != multipart.id:
2299+
is_truncated = True
2300+
break
22672301

22682302
common_prefixes = [CommonPrefix(Prefix=prefix) for prefix in sorted(common_prefixes)]
22692303

0 commit comments

Comments
 (0)