From 58f20a621dd8227dd7bc9636de15f04dab50f631 Mon Sep 17 00:00:00 2001 From: Benjamin Simon Date: Tue, 5 Sep 2023 03:13:17 +0200 Subject: [PATCH 1/2] implement Content-MD5 check for PutObject --- localstack/aws/api/s3/__init__.py | 11 ++++--- localstack/aws/spec-patches.json | 15 +++++++++ localstack/services/s3/provider.py | 14 ++++++++- localstack/services/s3/provider_stream.py | 17 ++++++++++ localstack/services/s3/utils.py | 22 ++++++++++++- localstack/services/s3/v3/provider.py | 13 ++++++++ tests/aws/services/s3/test_s3.py | 35 +++++++++++++++++++-- tests/aws/services/s3/test_s3.snapshot.json | 10 +++++- 8 files changed, 127 insertions(+), 10 deletions(-) diff --git a/localstack/aws/api/s3/__init__.py b/localstack/aws/api/s3/__init__.py index 6a9407b7a76ab..6a0b606beebf2 100644 --- a/localstack/aws/api/s3/__init__.py +++ b/localstack/aws/api/s3/__init__.py @@ -232,8 +232,6 @@ class BucketLocationConstraint(str): us_gov_west_1 = "us-gov-west-1" us_west_1 = "us-west-1" us_west_2 = "us-west-2" - ap_south_2 = "ap-south-2" - eu_south_2 = "eu-south-2" class BucketLogsPermission(str): @@ -370,8 +368,6 @@ class InventoryOptionalField(str): IntelligentTieringAccessTier = "IntelligentTieringAccessTier" BucketKeyStatus = "BucketKeyStatus" ChecksumAlgorithm = "ChecksumAlgorithm" - ObjectAccessControlList = "ObjectAccessControlList" - ObjectOwner = "ObjectOwner" class JSONType(str): @@ -882,6 +878,13 @@ class NoSuchBucketPolicy(ServiceException): BucketName: Optional[BucketName] +class InvalidDigest(ServiceException): + code: str = "InvalidDigest" + sender_fault: bool = False + status_code: int = 400 + Content_MD5: Optional[ContentMD5] + + AbortDate = datetime diff --git a/localstack/aws/spec-patches.json b/localstack/aws/spec-patches.json index c6a2198ca07fa..4950a1da24db8 100644 --- a/localstack/aws/spec-patches.json +++ b/localstack/aws/spec-patches.json @@ -1134,6 +1134,21 @@ "value": { "httpStatusCode": 403 } + }, + { + "op": "add", + "path": "/shapes/InvalidDigest", + "value": { + "type": "structure", + "members": { + "Content_MD5": { + "shape": "ContentMD5", + "locationName":"Content-MD5" + } + }, + "documentation": "

The Content-MD5 you specified was invalid.

", + "exception": true + } } ] } diff --git a/localstack/services/s3/provider.py b/localstack/services/s3/provider.py index 44a5a663de6fa..f2d46d68dd23e 100644 --- a/localstack/services/s3/provider.py +++ b/localstack/services/s3/provider.py @@ -73,6 +73,7 @@ IntelligentTieringConfigurationList, IntelligentTieringId, InvalidArgument, + InvalidDigest, InvalidPartOrder, InvalidStorageClass, InvalidTargetBucketForLogging, @@ -156,6 +157,7 @@ from localstack.services.s3.utils import ( capitalize_header_name_from_snake_case, create_redirect_for_post_request, + etag_to_base_64_content_md5, extract_bucket_key_version_id_from_copy_source, get_bucket_from_moto, get_failed_precondition_copy_source, @@ -184,7 +186,7 @@ from localstack.utils.aws.arns import s3_bucket_name from localstack.utils.collections import get_safe from localstack.utils.patch import patch -from localstack.utils.strings import short_uid +from localstack.utils.strings import md5, short_uid from localstack.utils.time import parse_timestamp from localstack.utils.urls import localstack_host @@ -517,6 +519,16 @@ def put_object( if checksum_algorithm := request.get("ChecksumAlgorithm"): verify_checksum(checksum_algorithm, context.request.data, request) + # TODO: handle ContentMD5 and ChecksumAlgorithm in a handler for all requests except requests with a streaming + # body. We can use the specs to verify which operations needs to have the checksum validated + if content_md5 := request.get("ContentMD5"): + calculated_md5 = etag_to_base_64_content_md5(md5(context.request.data)) + if calculated_md5 != content_md5: + raise InvalidDigest( + "The Content-MD5 you specified was invalid.", + Content_MD5=content_md5, + ) + moto_backend = get_moto_s3_backend(context) moto_bucket = get_bucket_from_moto(moto_backend, bucket=request["Bucket"]) diff --git a/localstack/services/s3/provider_stream.py b/localstack/services/s3/provider_stream.py index 616348e45bf41..4b58110bcf927 100644 --- a/localstack/services/s3/provider_stream.py +++ b/localstack/services/s3/provider_stream.py @@ -30,6 +30,7 @@ CopyObjectOutput, CopyObjectRequest, InvalidArgument, + InvalidDigest, InvalidStorageClass, NoSuchUpload, PreconditionFailed, @@ -44,6 +45,7 @@ from localstack.services.s3.provider import S3Provider from localstack.services.s3.utils import ( InvalidRequest, + etag_to_base_64_content_md5, extract_bucket_key_version_id_from_copy_source, get_bucket_from_moto, get_key_from_moto_bucket, @@ -151,6 +153,21 @@ def put_object( # the etag is recalculated response["ETag"] = key_object.etag + # verify content_md5 + if content_md5 := request.get("ContentMD5"): + calculated_md5 = etag_to_base_64_content_md5(key_object.etag.strip('"')) + if calculated_md5 != content_md5: + moto_backend.delete_object( + bucket_name=request["Bucket"], + key_name=request["Key"], + version_id=key_object.version_id, + bypass=True, + ) + raise InvalidDigest( + "The Content-MD5 you specified was invalid.", + Content_MD5=content_md5, + ) + if expires := request.get("Expires"): key_object.set_expiry(expires) elif "expires" in key_object.metadata: # if it got added from query string parameter diff --git a/localstack/services/s3/utils.py b/localstack/services/s3/utils.py index 206b6f85c13c7..370bbae4230a2 100644 --- a/localstack/services/s3/utils.py +++ b/localstack/services/s3/utils.py @@ -1,3 +1,5 @@ +import base64 +import codecs import datetime import hashlib import logging @@ -64,7 +66,14 @@ from localstack.services.s3.exceptions import InvalidRequest, MalformedXML from localstack.utils.aws import arns from localstack.utils.aws.arns import parse_arn -from localstack.utils.strings import checksum_crc32, checksum_crc32c, hash_sha1, hash_sha256 +from localstack.utils.strings import ( + checksum_crc32, + checksum_crc32c, + hash_sha1, + hash_sha256, + to_bytes, + to_str, +) from localstack.utils.urls import localstack_host LOG = logging.getLogger(__name__) @@ -274,6 +283,17 @@ def verify_checksum(checksum_algorithm: str, data: bytes, request: Dict): ) +def etag_to_base_64_content_md5(etag: ETag) -> str: + """ + Convert an ETag, representing an md5 hexdigest (might be quoted), to its base64 encoded representation + :param etag: an ETag, might be quoted + :return: the base64 value + """ + # get the bytes digest from the hexdigest + byte_digest = codecs.decode(to_bytes(etag.strip('"')), "hex") + return to_str(base64.b64encode(byte_digest)) + + def decode_aws_chunked_object( stream: IO[bytes], buffer: IO[bytes], diff --git a/localstack/services/s3/v3/provider.py b/localstack/services/s3/v3/provider.py index 8d80cc56534fb..e8dfdab699d34 100644 --- a/localstack/services/s3/v3/provider.py +++ b/localstack/services/s3/v3/provider.py @@ -99,6 +99,7 @@ IntelligentTieringId, InvalidArgument, InvalidBucketName, + InvalidDigest, InvalidObjectState, InvalidPartNumber, InvalidPartOrder, @@ -222,6 +223,7 @@ add_expiration_days_to_datetime, create_redirect_for_post_request, create_s3_kms_managed_key_for_region, + etag_to_base_64_content_md5, extract_bucket_key_version_id_from_copy_source, get_canned_acl, get_class_attrs_from_spec_class, @@ -634,6 +636,17 @@ def put_object( f"Value for x-amz-checksum-{checksum_algorithm.lower()} header is invalid." ) + # TODO: handle ContentMD5 and ChecksumAlgorithm in a handler for all requests except requests with a streaming + # body. We can use the specs to verify which operations needs to have the checksum validated + if content_md5 := request.get("ContentMD5"): + calculated_md5 = etag_to_base_64_content_md5(s3_stored_object.etag) + if calculated_md5 != content_md5: + self._storage_backend.remove(bucket_name, s3_object) + raise InvalidDigest( + "The Content-MD5 you specified was invalid.", + Content_MD5=content_md5, + ) + s3_bucket.objects.set(key, s3_object) # in case we are overriding an object, delete the tags entry diff --git a/tests/aws/services/s3/test_s3.py b/tests/aws/services/s3/test_s3.py index d445e7e35d77c..93d58d2fc49c5 100644 --- a/tests/aws/services/s3/test_s3.py +++ b/tests/aws/services/s3/test_s3.py @@ -44,7 +44,11 @@ LAMBDA_RUNTIME_PYTHON39, ) from localstack.services.s3 import constants as s3_constants -from localstack.services.s3.utils import parse_expiration_header, rfc_1123_datetime +from localstack.services.s3.utils import ( + etag_to_base_64_content_md5, + parse_expiration_header, + rfc_1123_datetime, +) from localstack.testing.aws.util import is_aws_cloud from localstack.testing.pytest import markers from localstack.testing.snapshots.transformer_utility import TransformerUtility @@ -3225,21 +3229,46 @@ def test_precondition_failed_error(self, s3_create_bucket, snapshot, aws_client) snapshot.match("get-object-if-match", e.value.response) @markers.aws.validated - @pytest.mark.xfail(reason="Error format is wrong and missing keys") + @pytest.mark.xfail( + condition=LEGACY_S3_PROVIDER, reason="Error format is wrong and missing keys" + ) + @markers.snapshot.skip_snapshot_verify( + condition=lambda: not is_native_provider(), + paths=["$..ServerSideEncryption"], + ) def test_s3_invalid_content_md5(self, s3_bucket, snapshot, aws_client): # put object with invalid content MD5 # TODO: implement ContentMD5 in ASF + content = "something" + response = aws_client.s3.put_object( + Bucket=s3_bucket, + Key="test-key", + Body=content, + ) + md = hashlib.md5(content.encode("utf-8")).digest() + content_md5 = base64.b64encode(md).decode("utf-8") + base_64_content_md5 = etag_to_base_64_content_md5(response["ETag"]) + assert content_md5 == base_64_content_md5 + hashes = ["__invalid__", "000", "not base64 encoded checksum", "MTIz"] for index, md5hash in enumerate(hashes): with pytest.raises(ClientError) as e: aws_client.s3.put_object( Bucket=s3_bucket, Key="test-key", - Body="something", + Body=content, ContentMD5=md5hash, ) snapshot.match(f"md5-error-{index}", e.value.response) + response = aws_client.s3.put_object( + Bucket=s3_bucket, + Key="test-key", + Body=content, + ContentMD5=base_64_content_md5, + ) + snapshot.match("success-put-object-md5", response) + @markers.aws.validated @markers.snapshot.skip_snapshot_verify( condition=is_old_provider, paths=["$..VersionId", "$..ContentLanguage"] diff --git a/tests/aws/services/s3/test_s3.snapshot.json b/tests/aws/services/s3/test_s3.snapshot.json index ba4153cbd58a1..ae509cff54a6d 100644 --- a/tests/aws/services/s3/test_s3.snapshot.json +++ b/tests/aws/services/s3/test_s3.snapshot.json @@ -1613,7 +1613,7 @@ } }, "tests/aws/services/s3/test_s3.py::TestS3::test_s3_invalid_content_md5": { - "recorded-date": "03-08-2023, 04:17:53", + "recorded-date": "05-09-2023, 02:58:55", "recorded-content": { "md5-error-0": { "Error": { @@ -1658,6 +1658,14 @@ "HTTPHeaders": {}, "HTTPStatusCode": 400 } + }, + "success-put-object-md5": { + "ETag": "\"437b930db84b8079c2dd804a71936b5f\"", + "ServerSideEncryption": "AES256", + "ResponseMetadata": { + "HTTPHeaders": {}, + "HTTPStatusCode": 200 + } } } }, From 64a28a221a56bb696f5c58f906399731869e7164 Mon Sep 17 00:00:00 2001 From: Benjamin Simon Date: Tue, 5 Sep 2023 15:19:59 +0200 Subject: [PATCH 2/2] switch ContentMD5 check for default provider --- localstack/services/s3/provider.py | 29 ++++++++++++++++++----------- 1 file changed, 18 insertions(+), 11 deletions(-) diff --git a/localstack/services/s3/provider.py b/localstack/services/s3/provider.py index f2d46d68dd23e..c9bd350a9d5b0 100644 --- a/localstack/services/s3/provider.py +++ b/localstack/services/s3/provider.py @@ -186,7 +186,7 @@ from localstack.utils.aws.arns import s3_bucket_name from localstack.utils.collections import get_safe from localstack.utils.patch import patch -from localstack.utils.strings import md5, short_uid +from localstack.utils.strings import short_uid from localstack.utils.time import parse_timestamp from localstack.utils.urls import localstack_host @@ -519,16 +519,6 @@ def put_object( if checksum_algorithm := request.get("ChecksumAlgorithm"): verify_checksum(checksum_algorithm, context.request.data, request) - # TODO: handle ContentMD5 and ChecksumAlgorithm in a handler for all requests except requests with a streaming - # body. We can use the specs to verify which operations needs to have the checksum validated - if content_md5 := request.get("ContentMD5"): - calculated_md5 = etag_to_base_64_content_md5(md5(context.request.data)) - if calculated_md5 != content_md5: - raise InvalidDigest( - "The Content-MD5 you specified was invalid.", - Content_MD5=content_md5, - ) - moto_backend = get_moto_s3_backend(context) moto_bucket = get_bucket_from_moto(moto_backend, bucket=request["Bucket"]) @@ -546,6 +536,23 @@ def put_object( ) raise + # TODO: handle ContentMD5 and ChecksumAlgorithm in a handler for all requests except requests with a streaming + # body. We can use the specs to verify which operations needs to have the checksum validated + # verify content_md5 + if content_md5 := request.get("ContentMD5"): + calculated_md5 = etag_to_base_64_content_md5(response["ETag"].strip('"')) + if calculated_md5 != content_md5: + moto_backend.delete_object( + bucket_name=request["Bucket"], + key_name=request["Key"], + version_id=response.get("VersionId"), + bypass=True, + ) + raise InvalidDigest( + "The Content-MD5 you specified was invalid.", + Content_MD5=content_md5, + ) + # moto interprets the Expires in query string for presigned URL as an Expires header and use it for the object # we set it to the correctly parsed value in Request, else we remove it from moto metadata # we are getting the last set key here so no need for versionId when getting the key