diff --git a/localstack/aws/api/s3/__init__.py b/localstack/aws/api/s3/__init__.py index 179f86ca8e580..3ddb3332989d6 100644 --- a/localstack/aws/api/s3/__init__.py +++ b/localstack/aws/api/s3/__init__.py @@ -652,6 +652,7 @@ class InvalidArgument(ServiceException): status_code: int = 400 ArgumentName: Optional[ArgumentName] ArgumentValue: Optional[ArgumentValue] + HostId: Optional[HostId] class SignatureDoesNotMatch(ServiceException): @@ -3011,6 +3012,34 @@ class DeleteResult(TypedDict, total=False): Errors: Optional[Errors] +class PostObjectRequest(ServiceRequest): + Body: Optional[IO[Body]] + Bucket: BucketName + + +class PostResponse(TypedDict, total=False): + StatusCode: Optional[GetObjectResponseStatusCode] + Location: Optional[Location] + LocationHeader: Optional[Location] + Bucket: Optional[BucketName] + Key: Optional[ObjectKey] + Expiration: Optional[Expiration] + ETag: Optional[ETag] + ETagHeader: Optional[ETag] + ChecksumCRC32: Optional[ChecksumCRC32] + ChecksumCRC32C: Optional[ChecksumCRC32C] + ChecksumSHA1: Optional[ChecksumSHA1] + ChecksumSHA256: Optional[ChecksumSHA256] + ServerSideEncryption: Optional[ServerSideEncryption] + VersionId: Optional[ObjectVersionId] + SSECustomerAlgorithm: Optional[SSECustomerAlgorithm] + SSECustomerKeyMD5: Optional[SSECustomerKeyMD5] + SSEKMSKeyId: Optional[SSEKMSKeyId] + SSEKMSEncryptionContext: Optional[SSEKMSEncryptionContext] + BucketKeyEnabled: Optional[BucketKeyEnabled] + RequestCharged: Optional[RequestCharged] + + class S3Api: service = "s3" @@ -4202,3 +4231,9 @@ def write_get_object_response( bucket_key_enabled: BucketKeyEnabled = None, ) -> None: raise NotImplementedError + + @handler("PostObject") + def post_object( + self, context: RequestContext, bucket: BucketName, body: IO[Body] = None + ) -> PostResponse: + raise NotImplementedError diff --git a/localstack/aws/protocol/serializer.py b/localstack/aws/protocol/serializer.py index 1154e2099a393..687dba4270047 100644 --- a/localstack/aws/protocol/serializer.py +++ b/localstack/aws/protocol/serializer.py @@ -1355,6 +1355,27 @@ class S3ResponseSerializer(RestXMLResponseSerializer): SUPPORTED_MIME_TYPES = [TEXT_XML, APPLICATION_XML] + def _serialize_response( + self, + parameters: dict, + response: HttpResponse, + shape: Optional[Shape], + shape_members: dict, + operation_model: OperationModel, + mime_type: str, + ) -> None: + header_params, payload_params = self._partition_members(parameters, shape) + self._process_header_members(header_params, response, shape) + # "HEAD" responses are basically "GET" responses without the actual body. + # Do not process the body payload in this case (setting a body could also manipulate the headers) + # If the response is a redirection, the body should be empty as well + if operation_model.http.get("method") != "HEAD" and not 300 <= response.status_code < 400: + self._serialize_payload( + payload_params, response, shape, shape_members, operation_model, mime_type + ) + self._serialize_content_type(response, shape, shape_members, mime_type) + self._prepare_additional_traits_in_response(response, operation_model) + def _serialize_error( self, error: ServiceException, @@ -1381,11 +1402,9 @@ def _prepare_additional_traits_in_response( ): """Adds the request ID to the headers (in contrast to the body - as in the Query protocol).""" response = super()._prepare_additional_traits_in_response(response, operation_model) - request_id = gen_amzn_requestid_long() - response.headers["x-amz-request-id"] = request_id response.headers[ "x-amz-id-2" - ] = f"MzRISOwyjmnup{request_id}7/JypPGXLh0OVFGcJaaO3KW/hRAqKOpIEEp" + ] = f"MzRISOwyjmnup{response.headers['x-amz-request-id']}7/JypPGXLh0OVFGcJaaO3KW/hRAqKOpIEEp" return response def _add_error_tags( diff --git a/localstack/aws/spec-patches.json b/localstack/aws/spec-patches.json index 947500c3dae92..3fb2b6d6c9eda 100644 --- a/localstack/aws/spec-patches.json +++ b/localstack/aws/spec-patches.json @@ -267,6 +267,9 @@ }, "ArgumentValue": { "shape": "ArgumentValue" + }, + "HostId": { + "shape": "HostId" } }, "error": { @@ -439,6 +442,163 @@ "exception": true } }, + { + "op": "add", + "path": "/operations/PostObject", + "value": { + "name":"PostObject", + "http":{ + "method":"POST", + "requestUri":"/{Bucket}" + }, + "input":{"shape":"PostObjectRequest"}, + "output":{"shape":"PostResponse"}, + "documentationUrl":"http://docs.aws.amazon.com/AmazonS3/latest/API/RESTObjectPOST.html", + "documentation":"
The POST operation adds an object to a specified bucket by using HTML forms. POST is an alternate form of PUT that enables browser-based uploads as a way of putting objects in buckets. Parameters that are passed to PUT through HTTP Headers are instead passed as form fields to POST in the multipart/form-data encoded message body. To add an object to a bucket, you must have WRITE access on the bucket. Amazon S3 never stores partial objects. If you receive a successful response, you can be confident that the entire object was stored.
" + } + }, + { + "op": "add", + "path": "/shapes/PostObjectRequest", + "value": { + "type":"structure", + "required":[ + "Bucket" + ], + "members":{ + "Body":{ + "shape":"Body", + "documentation":"
Object data.
", + "streaming":true + }, + "Bucket":{ + "shape":"BucketName", + "documentation":"The bucket name to which the PUT action was initiated.
When using this action with an access point, you must direct requests to the access point hostname. The access point hostname takes the form AccessPointName-AccountId.s3-accesspoint.Region.amazonaws.com. When using this action with an access point through the Amazon Web Services SDKs, you provide the access point ARN in place of the bucket name. For more information about access point ARNs, see Using access points in the Amazon S3 User Guide.
When using this action with Amazon S3 on Outposts, you must direct requests to the S3 on Outposts hostname. The S3 on Outposts hostname takes the form AccessPointName-AccountId.outpostID.s3-outposts.Region.amazonaws.com
. When using this action with S3 on Outposts through the Amazon Web Services SDKs, you provide the Outposts bucket ARN in place of the bucket name. For more information about S3 on Outposts ARNs, see Using Amazon S3 on Outposts in the Amazon S3 User Guide.
The URI that identifies the newly created object.
" + }, + "LocationHeader":{ + "shape":"Location", + "documentation":"The URI that identifies the newly created object.
", + "location": "header", + "locationName": "Location" + }, + "Bucket":{ + "shape":"BucketName", + "documentation":"The name of the bucket that contains the newly created object. Does not return the access point ARN or access point alias if used.
When using this action with an access point, you must direct requests to the access point hostname. The access point hostname takes the form AccessPointName-AccountId.s3-accesspoint.Region.amazonaws.com. When using this action with an access point through the Amazon Web Services SDKs, you provide the access point ARN in place of the bucket name. For more information about access point ARNs, see Using access points in the Amazon S3 User Guide.
When using this action with Amazon S3 on Outposts, you must direct requests to the S3 on Outposts hostname. The S3 on Outposts hostname takes the form AccessPointName-AccountId.outpostID.s3-outposts.Region.amazonaws.com
. When using this action with S3 on Outposts through the Amazon Web Services SDKs, you provide the Outposts bucket ARN in place of the bucket name. For more information about S3 on Outposts ARNs, see Using Amazon S3 on Outposts in the Amazon S3 User Guide.
The object key of the newly created object.
" + }, + "Expiration": { + "shape": "Expiration", + "documentation": "If the expiration is configured for the object (see PutBucketLifecycleConfiguration), the response includes this header. It includes the expiry-date
and rule-id
key-value pairs that provide information about object expiration. The value of the rule-id
is URL-encoded.
Entity tag that identifies the newly created object's data. Objects with different object data will have different entity tags. The entity tag is an opaque string. The entity tag may or may not be an MD5 digest of the object data. If the entity tag is not an MD5 digest of the object data, it will contain one or more nonhexadecimal characters and/or will consist of less than 32 or more than 32 hexadecimal digits. For more information about how the entity tag is calculated, see Checking object integrity in the Amazon S3 User Guide.
" + }, + "ETagHeader":{ + "shape":"ETag", + "documentation":"Entity tag that identifies the newly created object's data. Objects with different object data will have different entity tags. The entity tag is an opaque string. The entity tag may or may not be an MD5 digest of the object data. If the entity tag is not an MD5 digest of the object data, it will contain one or more nonhexadecimal characters and/or will consist of less than 32 or more than 32 hexadecimal digits. For more information about how the entity tag is calculated, see Checking object integrity in the Amazon S3 User Guide.
", + "location": "header", + "locationName": "ETag" + }, + "ChecksumCRC32": { + "shape": "ChecksumCRC32", + "documentation": "The base64-encoded, 32-bit CRC32 checksum of the object. This will only be present if it was uploaded with the object. With multipart uploads, this may not be a checksum value of the object. For more information about how checksums are calculated with multipart uploads, see Checking object integrity in the Amazon S3 User Guide.
", + "location": "header", + "locationName": "x-amz-checksum-crc32" + }, + "ChecksumCRC32C": { + "shape": "ChecksumCRC32C", + "documentation": "The base64-encoded, 32-bit CRC32C checksum of the object. This will only be present if it was uploaded with the object. With multipart uploads, this may not be a checksum value of the object. For more information about how checksums are calculated with multipart uploads, see Checking object integrity in the Amazon S3 User Guide.
", + "location": "header", + "locationName": "x-amz-checksum-crc32c" + }, + "ChecksumSHA1": { + "shape": "ChecksumSHA1", + "documentation": "The base64-encoded, 160-bit SHA-1 digest of the object. This will only be present if it was uploaded with the object. With multipart uploads, this may not be a checksum value of the object. For more information about how checksums are calculated with multipart uploads, see Checking object integrity in the Amazon S3 User Guide.
", + "location": "header", + "locationName": "x-amz-checksum-sha1" + }, + "ChecksumSHA256": { + "shape": "ChecksumSHA256", + "documentation": "The base64-encoded, 256-bit SHA-256 digest of the object. This will only be present if it was uploaded with the object. With multipart uploads, this may not be a checksum value of the object. For more information about how checksums are calculated with multipart uploads, see Checking object integrity in the Amazon S3 User Guide.
", + "location": "header", + "locationName": "x-amz-checksum-sha256" + }, + "ServerSideEncryption": { + "shape": "ServerSideEncryption", + "documentation": "If you specified server-side encryption either with an Amazon Web Services KMS key or Amazon S3-managed encryption key in your PUT request, the response includes this header. It confirms the encryption algorithm that Amazon S3 used to encrypt the object.
", + "location": "header", + "locationName": "x-amz-server-side-encryption" + }, + "VersionId": { + "shape": "ObjectVersionId", + "documentation": "Version of the object.
", + "location": "header", + "locationName": "x-amz-version-id" + }, + "SSECustomerAlgorithm": { + "shape": "SSECustomerAlgorithm", + "documentation": "If server-side encryption with a customer-provided encryption key was requested, the response will include this header confirming the encryption algorithm used.
", + "location": "header", + "locationName": "x-amz-server-side-encryption-customer-algorithm" + }, + "SSECustomerKeyMD5": { + "shape": "SSECustomerKeyMD5", + "documentation": "If server-side encryption with a customer-provided encryption key was requested, the response will include this header to provide round-trip message integrity verification of the customer-provided encryption key.
", + "location": "header", + "locationName": "x-amz-server-side-encryption-customer-key-MD5" + }, + "SSEKMSKeyId": { + "shape": "SSEKMSKeyId", + "documentation": "If x-amz-server-side-encryption
is present and has the value of aws:kms
, this header specifies the ID of the Amazon Web Services Key Management Service (Amazon Web Services KMS) symmetric customer managed key that was used for the object.
If present, specifies the Amazon Web Services KMS Encryption Context to use for object encryption. The value of this header is a base64-encoded UTF-8 string holding JSON with the encryption context key-value pairs.
", + "location": "header", + "locationName": "x-amz-server-side-encryption-context" + }, + "BucketKeyEnabled": { + "shape": "BucketKeyEnabled", + "documentation": "Indicates whether the uploaded object uses an S3 Bucket Key for server-side encryption with Amazon Web Services KMS (SSE-KMS).
", + "location": "header", + "locationName": "x-amz-server-side-encryption-bucket-key-enabled" + }, + "RequestCharged": { + "shape": "RequestCharged", + "location": "header", + "locationName": "x-amz-request-charged" + } + } + } + }, { "op": "add", "path": "/shapes/NoSuchWebsiteConfiguration", diff --git a/localstack/services/s3/presigned_url.py b/localstack/services/s3/presigned_url.py index 3a3d8077cf8f8..3a45356ff8153 100644 --- a/localstack/services/s3/presigned_url.py +++ b/localstack/services/s3/presigned_url.py @@ -1,9 +1,11 @@ +import base64 import copy import datetime +import json import logging import re import time -from typing import Dict, Tuple, TypedDict, Union +from typing import Dict, List, Tuple, TypedDict, Union from urllib import parse as urlparse from botocore.auth import HmacV1QueryAuth, S3SigV4QueryAuth @@ -12,19 +14,25 @@ from botocore.credentials import Credentials, ReadOnlyCredentials from botocore.exceptions import NoCredentialsError from botocore.utils import percent_encode_sequence -from werkzeug.datastructures import Headers +from werkzeug.datastructures import Headers, ImmutableMultiDict from localstack import config from localstack.aws.api import RequestContext from localstack.aws.api.s3 import ( AccessDenied, AuthorizationQueryParametersError, + InvalidArgument, SignatureDoesNotMatch, ) from localstack.aws.chain import HandlerChain from localstack.constants import TEST_AWS_ACCESS_KEY_ID, TEST_AWS_SECRET_ACCESS_KEY from localstack.http import Request, Response -from localstack.services.s3.utils import S3_VIRTUAL_HOST_FORWARDED_HEADER, uses_host_addressing +from localstack.services.s3.utils import ( + S3_VIRTUAL_HOST_FORWARDED_HEADER, + _create_invalid_argument_exc, + capitalize_header_name_from_snake_case, + uses_host_addressing, +) from localstack.utils.strings import to_bytes LOG = logging.getLogger(__name__) @@ -41,6 +49,19 @@ "X-Amz-Signature", ] + +SIGNATURE_V2_POST_FIELDS = [ + "signature", + "AWSAccessKeyId", +] + +SIGNATURE_V4_POST_FIELDS = [ + "x-amz-signature", + "x-amz-algorithm", + "x-amz-credential", + "x-amz-date", +] + # headers to blacklist from request_dict.signed_headers BLACKLISTED_HEADERS = ["X-Amz-Security-Token"] @@ -66,6 +87,10 @@ HOST_COMBINATION_REGEX = r"^(.*)(:[\d]{0,6})" PORT_REPLACEMENT = [":80", ":443", ":%s" % config.EDGE_PORT, ""] +# STS policy expiration date format +POLICY_EXPIRATION_FORMAT1 = "%Y-%m-%dT%H:%M:%SZ" +POLICY_EXPIRATION_FORMAT2 = "%Y-%m-%dT%H:%M:%S.%fZ" + class NotValidSigV4Signature(TypedDict): signature_provided: str @@ -404,9 +429,9 @@ def _prepare_request_for_sig_v4_signature( if header_low in IGNORED_SIGV4_HEADERS: continue if header_low not in signed_headers.lower(): - not_signed_headers.append(header) + not_signed_headers.append(header_low) if header_low in signed_headers: - signature_headers[header] = value + signature_headers[header_low] = value if not_signed_headers: ex: AccessDenied = create_access_denied_headers_not_signed(", ".join(not_signed_headers)) @@ -595,3 +620,101 @@ def _find_valid_signature_through_ports(context: RequestContext) -> FindSigV4Res # Return the last values returned by the loop, not sure which one we should select return None, exception + + +def validate_post_policy(request_form: ImmutableMultiDict) -> None: + """ + Validate the pre-signed POST with its policy contained + For now, only validates its expiration + SigV2: https://docs.aws.amazon.com/AmazonS3/latest/userguide/HTTPPOSTExamples.html + SigV4: https://docs.aws.amazon.com/AmazonS3/latest/API/sigv4-authentication-HTTPPOST.html + + :param request_form: the form data contained in the pre-signed POST request + :raises AccessDenied, SignatureDoesNotMatch + :return: None + """ + if not request_form.get("key"): + ex: InvalidArgument = _create_invalid_argument_exc( + message="Bucket POST must contain a field named 'key'. If it is specified, please check the order of the fields.", + name="key", + value="", + host_id=FAKE_HOST_ID, + ) + raise ex + + if not (policy := request_form.get("policy")): + # A POST request needs a policy except if the bucket is publicly writable + return + + # TODO: this does validation of fields only for now + is_v4 = _is_match_with_signature_fields(request_form, SIGNATURE_V4_POST_FIELDS) + is_v2 = _is_match_with_signature_fields(request_form, SIGNATURE_V2_POST_FIELDS) + if not is_v2 and not is_v4: + ex: AccessDenied = AccessDenied("Access Denied") + ex.HostId = FAKE_HOST_ID + raise ex + + try: + policy_decoded = json.loads(base64.b64decode(policy).decode("utf-8")) + except ValueError: + # this means the policy has been tampered with + signature = request_form.get("signature") if is_v2 else request_form.get("x-amz-signature") + ex: SignatureDoesNotMatch = create_signature_does_not_match_sig_v2( + request_signature=signature, + string_to_sign=policy, + ) + raise ex + + if expiration := policy_decoded.get("expiration"): + if is_expired(_parse_policy_expiration_date(expiration)): + ex: AccessDenied = AccessDenied("Invalid according to Policy: Policy expired.") + ex.HostId = FAKE_HOST_ID + raise ex + + # TODO: validate the signature + # TODO: validate the request according to the policy + + +def _parse_policy_expiration_date(expiration_string: str) -> datetime.datetime: + """ + Parses the Policy Expiration datetime string + :param expiration_string: a policy expiration string, can be of 2 format: `2007-12-01T12:00:00.000Z` or + `2007-12-01T12:00:00Z` + :return: a datetime object representing the expiration datetime + """ + try: + dt = datetime.datetime.strptime(expiration_string, POLICY_EXPIRATION_FORMAT1) + except Exception: + dt = datetime.datetime.strptime(expiration_string, POLICY_EXPIRATION_FORMAT2) + + # both date formats assume a UTC timezone ('Z' suffix), but it's not parsed as tzinfo into the datetime object + dt = dt.replace(tzinfo=datetime.timezone.utc) + return dt + + +def _is_match_with_signature_fields( + request_form: ImmutableMultiDict, signature_fields: List[str] +) -> bool: + """ + Checks if the form contains at least one of the required fields passed in `signature_fields` + If it contains at least one field, validates it contains all of them or raises InvalidArgument + :param request_form: ImmutableMultiDict: the pre-signed POST request form + :param signature_fields: the field we want to validate against + :raises InvalidArgument + :return: False if none of the fields are present, or True if it does + """ + if any(p in request_form for p in signature_fields): + for p in signature_fields: + if p not in request_form: + LOG.info("POST pre-sign missing fields") + argument_name = capitalize_header_name_from_snake_case(p) if "-" in p else p + ex: InvalidArgument = _create_invalid_argument_exc( + message=f"Bucket POST must contain a field named '{argument_name}'. If it is specified, please check the order of the fields.", + name=argument_name, + value="", + host_id=FAKE_HOST_ID, + ) + raise ex + + return True + return False diff --git a/localstack/services/s3/provider.py b/localstack/services/s3/provider.py index 49b4a2a08501a..4ad7b46c0401f 100644 --- a/localstack/services/s3/provider.py +++ b/localstack/services/s3/provider.py @@ -1,15 +1,26 @@ import copy import logging import os -from urllib.parse import SplitResult, quote, urlsplit, urlunsplit +from typing import IO +from urllib.parse import ( + SplitResult, + parse_qs, + quote, + urlencode, + urlparse, + urlsplit, + urlunparse, + urlunsplit, +) import moto.s3.responses as moto_s3_responses from localstack.aws.accounts import get_aws_account_id -from localstack.aws.api import CommonServiceException, RequestContext, handler +from localstack.aws.api import CommonServiceException, RequestContext, ServiceException, handler from localstack.aws.api.s3 import ( AccessControlPolicy, AccountId, + Body, BucketName, ChecksumAlgorithm, CompleteMultipartUploadOutput, @@ -23,6 +34,7 @@ DeleteObjectRequest, DeleteObjectTaggingOutput, DeleteObjectTaggingRequest, + ETag, GetBucketAclOutput, GetBucketLifecycleConfigurationOutput, GetBucketLifecycleOutput, @@ -44,6 +56,7 @@ NoSuchWebsiteConfiguration, NotificationConfiguration, ObjectKey, + PostResponse, PutBucketAclRequest, PutBucketLifecycleConfigurationRequest, PutBucketLifecycleRequest, @@ -71,6 +84,7 @@ from localstack.services.s3.presigned_url import ( s3_presigned_url_request_handler, s3_presigned_url_response_handler, + validate_post_policy, ) from localstack.services.s3.utils import ( ALLOWED_HEADER_OVERRIDES, @@ -78,6 +92,7 @@ VALID_ACL_PREDEFINED_GROUPS, VALID_GRANTEE_PERMISSIONS, _create_invalid_argument_exc, + capitalize_header_name_from_snake_case, get_bucket_from_moto, get_header_name, get_key_from_moto_bucket, @@ -572,6 +587,68 @@ def delete_bucket_website( # does not raise error if the bucket did not have a config, will simply return self.get_store().bucket_website_configuration.pop(bucket, None) + def post_object( + self, context: RequestContext, bucket: BucketName, body: IO[Body] = None + ) -> PostResponse: + # see https://docs.aws.amazon.com/AmazonS3/latest/API/RESTObjectPOST.html + # TODO: signature validation is not implemented for pre-signed POST + # policy validation is not implemented either, except expiration and mandatory fields + validate_post_policy(context.request.form) + + # Botocore has trouble parsing responses with status code in the 3XX range, it interprets them as exception + # it then raises a nonsense one with a wrong code + # We have to create and populate the response manually if that happens + try: + response: PostResponse = call_moto(context=context) + except ServiceException as e: + if e.status_code == 303: + # the parser did not succeed in parsing the moto respond, we start constructing the response ourselves + response = PostResponse(StatusCode=e.status_code) + else: + raise e + + key_name = context.request.form.get("key") + if "${filename}" in key_name: + key_name = key_name.replace("${filename}", context.request.files["file"].filename) + + moto_backend = get_moto_s3_backend(context) + key = get_key_from_moto_bucket( + get_bucket_from_moto(moto_backend, bucket=bucket), key=key_name + ) + # hacky way to set the etag in the headers as well: two locations for one value + response["ETagHeader"] = key.etag + + if response["StatusCode"] == 303: + # we need to create the redirect, as the parser could not return the moto-calculated one + try: + redirect = _create_redirect_for_post_request( + base_redirect=context.request.form["success_action_redirect"], + bucket=bucket, + key=key_name, + etag=key.etag, + ) + response["LocationHeader"] = redirect + except ValueError: + # If S3 cannot interpret the URL, it acts as if the field is not present. + response["StatusCode"] = 204 + + response["LocationHeader"] = response.get( + "LocationHeader", f"{get_full_default_bucket_location(bucket)}{key_name}" + ) + + if bucket in self.get_store().bucket_versioning_status: + response["VersionId"] = key.version_id + + if context.request.form.get("success_action_status") != "201": + return response + + response["ETag"] = key.etag + response["Bucket"] = bucket + response["Key"] = key_name + response["Location"] = response["LocationHeader"] + + return response + def add_custom_routes(self): # virtual-host style: https://bucket-name.s3.region-code.amazonaws.com/key-name # host_pattern_vhost_style = f"{bucket}.s3.