From 182905f07ba38f6561f9653c2b411e9365594fcc Mon Sep 17 00:00:00 2001 From: Benjamin Simon Date: Tue, 8 Aug 2023 03:49:59 +0200 Subject: [PATCH] implement S3 Website --- localstack/services/s3/v3/models.py | 1 + localstack/services/s3/v3/provider.py | 82 +++- localstack/services/s3/website_hosting.py | 547 +++++++++++----------- tests/aws/s3/test_s3.py | 40 +- 4 files changed, 357 insertions(+), 313 deletions(-) diff --git a/localstack/services/s3/v3/models.py b/localstack/services/s3/v3/models.py index b75cd176fe5d1..0b600ecfcae89 100644 --- a/localstack/services/s3/v3/models.py +++ b/localstack/services/s3/v3/models.py @@ -131,6 +131,7 @@ def __init__( self.notification_configuration = {} self.cors_rules = None self.lifecycle_rules = None + self.website_configuration = None self.intelligent_tiering_configurations = {} self.analytics_configurations = {} self.inventory_configurations = {} diff --git a/localstack/services/s3/v3/provider.py b/localstack/services/s3/v3/provider.py index 47bdb9c1cd01b..99a00c22d7500 100644 --- a/localstack/services/s3/v3/provider.py +++ b/localstack/services/s3/v3/provider.py @@ -61,6 +61,7 @@ GetBucketLocationOutput, GetBucketTaggingOutput, GetBucketVersioningOutput, + GetBucketWebsiteOutput, GetObjectAttributesOutput, GetObjectAttributesParts, GetObjectAttributesRequest, @@ -107,6 +108,7 @@ NoSuchLifecycleConfiguration, NoSuchTagSet, NoSuchUpload, + NoSuchWebsiteConfiguration, NotificationConfiguration, Object, ObjectIdentifier, @@ -142,8 +144,10 @@ UploadPartRequest, VersionIdMarker, VersioningConfiguration, + WebsiteConfiguration, ) from localstack.aws.handlers import preprocess_request, serve_custom_service_request_handlers +from localstack.services.edge import ROUTER from localstack.services.plugins import ServiceLifecycleHook from localstack.services.s3.codec import AwsChunkedDecoder from localstack.services.s3.constants import ARCHIVES_STORAGE_CLASSES, DEFAULT_BUCKET_ENCRYPTION @@ -193,7 +197,9 @@ validate_cors_configuration, validate_inventory_configuration, validate_lifecycle_configuration, + validate_website_configuration, ) +from localstack.services.s3.website_hosting import register_website_hosting_routes from localstack.utils.strings import to_str LOG = logging.getLogger(__name__) @@ -219,6 +225,7 @@ def __init__(self) -> None: def on_after_init(self): preprocess_request.append(self._cors_handler) serve_custom_service_request_handlers.append(s3_cors_request_handler) + register_website_hosting_routes(router=ROUTER) def on_before_stop(self): self._notification_dispatcher.shutdown() @@ -272,7 +279,7 @@ def _get_expiration_header( the case. We're caching it because it can change depending on the set rules on the bucket. We can't use `lru_cache` as the parameters needs to be hashable :param lifecycle_rules: the bucket LifecycleRules - :param moto_object: FakeKey from moto + :param s3_object: S3Object :param object_tags: the object tags :return: the Expiration header if there's a rule matching """ @@ -290,6 +297,20 @@ def _get_expiration_header( self._expiration_cache[bucket][s3_object.key] = expiration_header return expiration_header + def _get_cross_account_bucket( + self, context: RequestContext, bucket_name: BucketName + ) -> tuple[S3Store, S3Bucket]: + store = self.get_store(context.account_id, context.region) + if not (s3_bucket := store.buckets.get(bucket_name)): + if not (account_id := store.global_bucket_map.get(bucket_name)): + raise NoSuchBucket("The specified bucket does not exist", BucketName=bucket_name) + + store = self.get_store(account_id, context.region) + if not (s3_bucket := store.buckets.get(bucket_name)): + raise NoSuchBucket("The specified bucket does not exist", BucketName=bucket_name) + + return store, s3_bucket + @staticmethod def get_store(account_id: str, region_name: str) -> S3Store: # Use default account id for external access? would need an anonymous one @@ -446,10 +467,8 @@ def put_object( # object_lock_mode: ObjectLockMode = None, # object_lock_retain_until_date: ObjectLockRetainUntilDate = None, # object_lock_legal_hold_status: ObjectLockLegalHoldStatus = None, - store = self.get_store(context.account_id, context.region) bucket_name = request["Bucket"] - if not (s3_bucket := store.buckets.get(bucket_name)): - raise NoSuchBucket("The specified bucket does not exist", BucketName=bucket_name) + store, s3_bucket = self._get_cross_account_bucket(context, bucket_name) if ( storage_class := request.get("StorageClass") @@ -579,12 +598,10 @@ def get_object( # part_number: PartNumber = None, # expected_bucket_owner: AccountId = None, - store = self.get_store(context.account_id, context.region) bucket_name = request["Bucket"] object_key = request["Key"] version_id = request.get("VersionId") - if not (s3_bucket := store.buckets.get(bucket_name)): - raise NoSuchBucket("The specified bucket does not exist", BucketName=bucket_name) + store, s3_bucket = self._get_cross_account_bucket(context, bucket_name) # TODO implement PartNumber once multipart is done (being able to select only a Part) @@ -1090,9 +1107,7 @@ def list_objects( expected_bucket_owner: AccountId = None, optional_object_attributes: OptionalObjectAttributesList = None, ) -> ListObjectsOutput: - store = self.get_store(context.account_id, context.region) - if not (s3_bucket := store.buckets.get(bucket)): - raise NoSuchBucket("The specified bucket does not exist", BucketName=bucket) + store, s3_bucket = self._get_cross_account_bucket(context, bucket) # TODO: URL encode keys (is it done already in serializer?) common_prefixes = set() @@ -1187,9 +1202,7 @@ def list_objects_v2( expected_bucket_owner: AccountId = None, optional_object_attributes: OptionalObjectAttributesList = None, ) -> ListObjectsV2Output: - store = self.get_store(context.account_id, context.region) - if not (s3_bucket := store.buckets.get(bucket)): - raise NoSuchBucket("The specified bucket does not exist", BucketName=bucket) + store, s3_bucket = self._get_cross_account_bucket(context, bucket) if continuation_token and continuation_token == "": raise InvalidArgument("The continuation token provided is incorrect") @@ -1302,9 +1315,7 @@ def list_object_versions( request_payer: RequestPayer = None, optional_object_attributes: OptionalObjectAttributesList = None, ) -> ListObjectVersionsOutput: - store = self.get_store(context.account_id, context.region) - if not (s3_bucket := store.buckets.get(bucket)): - raise NoSuchBucket("The specified bucket does not exist", BucketName=bucket) + store, s3_bucket = self._get_cross_account_bucket(context, bucket) # TODO: URL encode keys (is it done already in serializer?) common_prefixes = set() @@ -2545,6 +2556,45 @@ def delete_bucket_inventory_configuration( if not s3_bucket.inventory_configurations.pop(id, None): raise NoSuchConfiguration("The specified configuration does not exist.") + def get_bucket_website( + self, context: RequestContext, bucket: BucketName, expected_bucket_owner: AccountId = None + ) -> GetBucketWebsiteOutput: + store = self.get_store(context.account_id, context.region) + if not (s3_bucket := store.buckets.get(bucket)): + raise NoSuchBucket("The specified bucket does not exist", BucketName=bucket) + + if not s3_bucket.website_configuration: + raise NoSuchWebsiteConfiguration( + "The specified bucket does not have a website configuration", + BucketName=bucket, + ) + return s3_bucket.website_configuration + + def put_bucket_website( + self, + context: RequestContext, + bucket: BucketName, + website_configuration: WebsiteConfiguration, + content_md5: ContentMD5 = None, + checksum_algorithm: ChecksumAlgorithm = None, + expected_bucket_owner: AccountId = None, + ) -> None: + store = self.get_store(context.account_id, context.region) + if not (s3_bucket := store.buckets.get(bucket)): + raise NoSuchBucket("The specified bucket does not exist", BucketName=bucket) + + validate_website_configuration(website_configuration) + s3_bucket.website_configuration = website_configuration + + def delete_bucket_website( + self, context: RequestContext, bucket: BucketName, expected_bucket_owner: AccountId = None + ) -> None: + store = self.get_store(context.account_id, context.region) + if not (s3_bucket := store.buckets.get(bucket)): + raise NoSuchBucket("The specified bucket does not exist", BucketName=bucket) + # does not raise error if the bucket did not have a config, will simply return + s3_bucket.website_configuration = None + # ###### THIS ARE UNIMPLEMENTED METHODS TO ALLOW TESTING, DO NOT COUNT THEM AS DONE ###### # def delete_bucket_ownership_controls( diff --git a/localstack/services/s3/website_hosting.py b/localstack/services/s3/website_hosting.py index 5065a73e4a162..fde15662cc196 100644 --- a/localstack/services/s3/website_hosting.py +++ b/localstack/services/s3/website_hosting.py @@ -4,27 +4,23 @@ from typing import Callable, Dict, Optional, Union from urllib.parse import urlparse -from moto.s3.exceptions import MissingBucket -from moto.s3.models import FakeBucket, FakeKey from werkzeug.datastructures import Headers -from localstack.aws.accounts import get_aws_account_id from localstack.aws.api.s3 import ( BucketName, - NoSuchBucket, + ErrorDocument, + GetObjectOutput, NoSuchKey, NoSuchWebsiteConfiguration, ObjectKey, RoutingRule, RoutingRules, - WebsiteConfiguration, ) +from localstack.aws.connect import connect_to from localstack.aws.protocol.serializer import gen_amzn_requestid from localstack.constants import S3_STATIC_WEBSITE_HOSTNAME from localstack.http import Request, Response, Router from localstack.http.dispatcher import Handler -from localstack.services.s3.models import S3Store, get_moto_s3_backend, s3_stores -from localstack.utils.aws import aws_stack LOG = logging.getLogger(__name__) @@ -41,305 +37,302 @@ class NoSuchKeyFromErrorDocument(NoSuchKey): ErrorDocumentKey: Optional[ObjectKey] -def _get_bucket_from_moto(bucket: BucketName) -> FakeBucket: - # TODO: check authorization for buckets as well? would need to be public-read at least - # not enforced in the current provider - try: - return get_moto_s3_backend().get_bucket(bucket_name=bucket) - except MissingBucket: - ex = NoSuchBucket("The specified bucket does not exist") - ex.BucketName = bucket - raise ex - - -def _get_key_from_moto_bucket(moto_bucket: FakeBucket, key: ObjectKey) -> FakeKey: - return moto_bucket.keys.get(key) - - -def _get_store() -> S3Store: - return s3_stores[get_aws_account_id()][aws_stack.get_region()] - - -def _get_bucket_website_configuration(bucket: BucketName) -> WebsiteConfiguration: - """ - Retrieve the website configuration for the given bucket - :param bucket: the bucket name - :raises NoSuchWebsiteConfiguration if the bucket does not have a website config - :return: the WebsiteConfiguration of the bucket - """ - website_configuration = _get_store().bucket_website_configuration.get(bucket) - if not website_configuration: - ex = NoSuchWebsiteConfiguration( - "The specified bucket does not have a website configuration" - ) - ex.BucketName = bucket - raise ex - return website_configuration - - -def _website_handler( - request: Request, bucket_name: str, path: str = None, port: str = None -) -> Response: - """ - Tries to serve the key, and if an Exception is encountered, returns a generic response - This will allow to easily extend it to 403 exceptions - :param request: router Request object - :param bucket_name: str, bucket name - :param path: the path of the request - :param port: / - :return: Response object - """ - if request.method != "GET": - return Response( - _create_405_error_string(request.method, request_id=gen_amzn_requestid()), status=405 - ) - - try: - return _serve_key(request, bucket_name, path) - - except (NoSuchBucket, NoSuchWebsiteConfiguration, NoSuchKeyFromErrorDocument, NoSuchKey) as e: - resource_name = e.Key if hasattr(e, "Key") else e.BucketName - response_body = _create_404_error_string( - code=e.code, - message=e.message, - resource_name=resource_name, - request_id=gen_amzn_requestid(), - from_error_document=getattr(e, "ErrorDocumentKey", None), - ) - return Response(response_body, status=e.status_code) - - except Exception: - LOG.exception("Exception encountered while trying to serve s3-website at %s", request.url) - return Response(_create_500_error_string(), status=500) - - -def _serve_key(request: Request, bucket_name: BucketName, path: str = None) -> Response: - """ - Serves the S3 key as a website handler. It will match routing rules set in the configuration first, and redirect - the request if necessary. They are specific case for handling configured index, see the docs: - https://docs.aws.amazon.com/AmazonS3/latest/userguide/IndexDocumentSupport.html - https://docs.aws.amazon.com/AmazonS3/latest/userguide/CustomErrorDocSupport.html - https://docs.aws.amazon.com/AmazonS3/latest/userguide/how-to-page-redirect.html - :param request: Request object received by the router - :param bucket_name: bucket name contained in the host name - :param path: path of the request, corresponds to the S3 key - :return: Response object, either the key, a redirection or an error - """ - bucket = _get_bucket_from_moto(bucket=bucket_name) - headers = {} - - website_config = _get_bucket_website_configuration(bucket_name) - - redirection = website_config.get("RedirectAllRequestsTo") - if redirection: - parsed_url = urlparse(request.url) - redirect_to = request.url.replace(parsed_url.netloc, redirection["HostName"]) - if protocol := redirection.get("Protocol"): - redirect_to = redirect_to.replace(parsed_url.scheme, protocol) - - headers["Location"] = redirect_to - return Response("", status=301, headers=headers) - - key_name = path - routing_rules = website_config.get("RoutingRules") - # checks for prefix rules, before trying to get the key - if ( - key_name - and routing_rules - and (rule := _find_matching_rule(routing_rules, key_name=key_name)) - ): - redirect_response = _get_redirect_from_routing_rule(request, rule) - return redirect_response - - # if the URL ends with a trailing slash, try getting the index first - is_folder = request.url[-1] == "/" - if ( - not key_name or is_folder - ): # the path automatically remove the trailing slash, even with strict_slashes=False - index_key = website_config["IndexDocument"]["Suffix"] - key_name = f"{key_name}{index_key}" if key_name else index_key - - key = _get_key_from_moto_bucket(bucket, key_name) - if not key: - if not is_folder: - # try appending the index suffix in case we're accessing a "folder" without a trailing slash - index_key = website_config["IndexDocument"]["Suffix"] - key = _get_key_from_moto_bucket(bucket, f"{key_name}/{index_key}") - if key: - return Response("", status=302, headers={"Location": f"/{key_name}/"}) - - # checks for error code (and prefix) rules, after trying to get the key - if routing_rules and ( - rule := _find_matching_rule(routing_rules, key_name=key_name, error_code=404) +class S3WebsiteHostingHandler: + def __init__(self): + # TODO: once we implement ACLs, maybe revisit the way we use the client/verify the bucket/object's ACL + self.s3_client = connect_to().s3 + + def __call__( + self, request: Request, bucket_name: str, path: str = None, port: str = None + ) -> Response: + """ + Tries to serve the key, and if an Exception is encountered, returns a generic response + This will allow to easily extend it to 403 exceptions + :param request: router Request object + :param bucket_name: str, bucket name + :param path: the path of the request + :param port: / + :return: Response object + """ + if request.method != "GET": + return Response( + _create_405_error_string(request.method, request_id=gen_amzn_requestid()), + status=405, + ) + + try: + return self._serve_object(request, bucket_name, path) + + except (NoSuchKeyFromErrorDocument, NoSuchWebsiteConfiguration) as e: + resource_name = e.Key if hasattr(e, "Key") else e.BucketName + response_body = _create_404_error_string( + code=e.code, + message=e.message, + resource_name=resource_name, + request_id=gen_amzn_requestid(), + from_error_document=getattr(e, "ErrorDocumentKey", None), + ) + return Response(response_body, status=e.status_code) + + except self.s3_client.exceptions.ClientError as e: + error = e.response["Error"] + if error["Code"] not in ("NoSuchKey", "NoSuchBucket", "NoSuchWebsiteConfiguration"): + raise + + resource_name = error.get("Key", error.get("BucketName")) + response_body = _create_404_error_string( + code=error["Code"], + message=error["Message"], + resource_name=resource_name, + request_id=gen_amzn_requestid(), + from_error_document=getattr(e, "ErrorDocumentKey", None), + ) + return Response(response_body, status=e.response["ResponseMetadata"]["HTTPStatusCode"]) + + except Exception: + LOG.exception( + "Exception encountered while trying to serve s3-website at %s", request.url + ) + return Response(_create_500_error_string(), status=500) + + def _serve_object( + self, request: Request, bucket_name: BucketName, path: str = None + ) -> Response: + """ + Serves the S3 Object as a website handler. It will match routing rules set in the configuration first, + and redirect the request if necessary. They are specific case for handling configured index, see the docs: + https://docs.aws.amazon.com/AmazonS3/latest/userguide/IndexDocumentSupport.html + https://docs.aws.amazon.com/AmazonS3/latest/userguide/CustomErrorDocSupport.html + https://docs.aws.amazon.com/AmazonS3/latest/userguide/how-to-page-redirect.html + :param request: Request object received by the router + :param bucket_name: bucket name contained in the host name + :param path: path of the request, corresponds to the S3 Object key + :return: Response object, either the Object, a redirection or an error + """ + + website_config = self.s3_client.get_bucket_website(Bucket=bucket_name) + headers = {} + + redirection = website_config.get("RedirectAllRequestsTo") + if redirection: + parsed_url = urlparse(request.url) + redirect_to = request.url.replace(parsed_url.netloc, redirection["HostName"]) + if protocol := redirection.get("Protocol"): + redirect_to = redirect_to.replace(parsed_url.scheme, protocol) + + headers["Location"] = redirect_to + return Response("", status=301, headers=headers) + + object_key = path + routing_rules = website_config.get("RoutingRules") + # checks for prefix rules, before trying to get the key + if ( + object_key + and routing_rules + and (rule := self._find_matching_rule(routing_rules, object_key=object_key)) ): - redirect_response = _get_redirect_from_routing_rule(request, rule) + redirect_response = self._get_redirect_from_routing_rule(request, rule) return redirect_response - # tries to get the error document, otherwise raises NoSuchKey - response = _get_error_document( - website_config=website_config, - bucket=bucket, - missing_key=key_name, - ) - return response - - if key.website_redirect_location: - headers["Location"] = key.website_redirect_location - return Response("", status=301, headers=headers) - - if _check_if_headers(request.headers, key=key): - return Response("", status=304) - - headers = _get_response_headers_from_key(key) - return Response(key.value, headers=headers) - - -def _get_response_headers_from_key(key: FakeKey) -> Dict[str, str]: - """ - Get some header values from the key - :param key: the key name - :return: headers from the key to be part of the response - """ - response_headers = {} - if content_type := key.metadata.get("Content-Type"): - response_headers["Content-Type"] = content_type - if key.etag: - response_headers["etag"] = key.etag - - return response_headers - - -def _find_matching_rule( - routing_rules: RoutingRules, key_name: ObjectKey, error_code: int = None -) -> Union[RoutingRule, None]: - """ - Iterate over the routing rules set in the configuration, and return the first that match the key name and/or the - error code (in the 4XX range). - :param routing_rules: RoutingRules part of WebsiteConfiguration - :param key_name: - :param error_code: error code of the Response in the 4XX range - :return: a RoutingRule if matched, or None - """ - # TODO: we could separate rules depending in they have the HttpErrorCodeReturnedEquals field - # we would not try to match on them early, no need to iterate on them - # and iterate them over only if an exception is encountered - for rule in routing_rules: - if condition := rule.get("Condition"): - prefix = condition.get("KeyPrefixEquals") - return_http_code = condition.get("HttpErrorCodeReturnedEquals") - # if both prefix matching and http error matching conditions are set - if prefix and return_http_code: - if key_name.startswith(prefix) and error_code == int(return_http_code): - return rule - else: - # it must either match both or it does not apply - continue - # only prefix is set, but this should have been matched before the error - elif prefix and key_name.startswith(prefix): - return rule - elif return_http_code and error_code == int(return_http_code): - return rule - - else: - # if no Condition is set, the redirect is applied to all requests - return rule - - -def _get_redirect_from_routing_rule(request: Request, routing_rule: RoutingRule) -> Response: - """ - Return a redirect Response object created with the different parameters set in the RoutingRule - :param request: the original Request object received from the router - :param routing_rule: a RoutingRule from the WebsiteConfiguration - :return: a redirect Response - """ - parsed_url = urlparse(request.url) - redirect_to = request.url - redirect = routing_rule["Redirect"] - if host_name := redirect.get("HostName"): - redirect_to = redirect_to.replace(parsed_url.netloc, host_name) - if protocol := redirect.get("Protocol"): - redirect_to = redirect_to.replace(parsed_url.scheme, protocol) - if redirect_to_key := redirect.get("ReplaceKeyWith"): - redirect_to = redirect_to.replace(parsed_url.path, f"/{redirect_to_key}") - elif "ReplaceKeyPrefixWith" in redirect: # the value might be empty and it's a valid config - matched_prefix = routing_rule["Condition"].get("KeyPrefixEquals", "") - redirect_to = redirect_to.replace(matched_prefix, redirect.get("ReplaceKeyPrefixWith"), 1) - - return Response( - "", headers={"Location": redirect_to}, status=redirect.get("HttpRedirectCode", 301) - ) - - -def _get_error_document( - website_config: WebsiteConfiguration, bucket: FakeBucket, missing_key: ObjectKey -) -> Response: - """ - Either tries to get the - https://docs.aws.amazon.com/AmazonS3/latest/userguide/CustomErrorDocSupport.html - :param website_config: the bucket WebsiteConfiguration - :param bucket: the bucket object from moto - :param missing_key: the missing key not found in the bucket - :return: - """ - headers = {} - if error_document := website_config.get("ErrorDocument"): - # if an error document is configured, try to fetch the key + # if the URL ends with a trailing slash, try getting the index first + is_folder = request.url[-1] == "/" + if ( + not object_key or is_folder + ): # the path automatically remove the trailing slash, even with strict_slashes=False + index_key = website_config["IndexDocument"]["Suffix"] + object_key = f"{object_key}{index_key}" if object_key else index_key + + try: + s3_object = self.s3_client.get_object(Bucket=bucket_name, Key=object_key) + except self.s3_client.exceptions.NoSuchKey: + if not is_folder: + # try appending the index suffix in case we're accessing a "folder" without a trailing slash + index_key = website_config["IndexDocument"]["Suffix"] + try: + self.s3_client.head_object(Bucket=bucket_name, Key=f"{object_key}/{index_key}") + return Response("", status=302, headers={"Location": f"/{object_key}/"}) + except self.s3_client.exceptions.ClientError: + pass + + # checks for error code (and prefix) rules, after trying to get the key + if routing_rules and ( + rule := self._find_matching_rule( + routing_rules, object_key=object_key, error_code=404 + ) + ): + redirect_response = self._get_redirect_from_routing_rule(request, rule) + return redirect_response + + # tries to get the error document, otherwise raises NoSuchKey + if error_document := website_config.get("ErrorDocument"): + return self._return_error_document( + error_document=error_document, + bucket=bucket_name, + missing_key=object_key, + ) + else: + # If not ErrorDocument is configured, raise NoSuchKey + raise + + if website_redirect_location := s3_object.get("WebsiteRedirectLocation"): + headers["Location"] = website_redirect_location + return Response("", status=301, headers=headers) + + if self._check_if_headers(request.headers, s3_object=s3_object): + return Response("", status=304) + + headers = self._get_response_headers_from_object(s3_object) + return Response(s3_object["Body"], headers=headers) + + def _return_error_document( + self, + error_document: ErrorDocument, + bucket: BucketName, + missing_key: ObjectKey, + ) -> Response: + """ + Try to retrieve the configured ErrorDocument and return the response with its body + https://docs.aws.amazon.com/AmazonS3/latest/userguide/CustomErrorDocSupport.html + :param error_document: the ErrorDocument from the bucket WebsiteConfiguration + :param bucket: the bucket name + :param missing_key: the missing key not found in the bucket + :return: a Response, either a redirection or containing the Body of the ErrorDocument + :raises NoSuchKeyFromErrorDocument if the ErrorDocument is not found + """ + headers = {} error_key = error_document["Key"] - key = _get_key_from_moto_bucket(bucket, error_key) - if key: + try: + s3_object = self.s3_client.get_object(Bucket=bucket, Key=error_key) # if the key is found, return the key, or if that key has a redirect, return a redirect - error_body = key.value - if key.website_redirect_location: - headers["Location"] = key.website_redirect_location + + if website_redirect_location := s3_object.get("WebsiteRedirectLocation"): + headers["Location"] = website_redirect_location return Response("", status=301, headers=headers) - headers = _get_response_headers_from_key(key) - return Response(error_body, status=404, headers=headers) - else: - ex = NoSuchKeyFromErrorDocument("The specified key does not exist.") - ex.Key = missing_key - ex.ErrorDocumentKey = error_key - raise ex + headers = self._get_response_headers_from_object(s3_object) + return Response(s3_object["Body"], status=404, headers=headers) + + except self.s3_client.exceptions.NoSuchKey: + raise NoSuchKeyFromErrorDocument( + "The specified key does not exist.", + Key=missing_key, + ErrorDocumentKey=error_key, + ) + + @staticmethod + def _get_response_headers_from_object(get_object_response: GetObjectOutput) -> Dict[str, str]: + """ + Only return some headers from the S3 Object + :param get_object_response: the response from S3.GetObject + :return: headers from the object to be part of the response + """ + response_headers = {} + if content_type := get_object_response.get("ContentType"): + response_headers["Content-Type"] = content_type + if etag := get_object_response.get("ETag"): + response_headers["etag"] = etag + + return response_headers + + @staticmethod + def _check_if_headers(headers: Headers, s3_object: GetObjectOutput) -> bool: + # TODO: add other conditions here If-Modified-Since, etc etc + etag = s3_object.get("ETag") + # last_modified = s3_object.get("LastModified") # TODO + if "if-none-match" in headers and etag and etag in headers["if-none-match"]: + return True + + @staticmethod + def _find_matching_rule( + routing_rules: RoutingRules, object_key: ObjectKey, error_code: int = None + ) -> Union[RoutingRule, None]: + """ + Iterate over the routing rules set in the configuration, and return the first that match the key name and/or the + error code (in the 4XX range). + :param routing_rules: RoutingRules part of WebsiteConfiguration + :param object_key: ObjectKey + :param error_code: error code of the Response in the 4XX range + :return: a RoutingRule if matched, or None + """ + # TODO: we could separate rules depending in they have the HttpErrorCodeReturnedEquals field + # we would not try to match on them early, no need to iterate on them + # and iterate them over only if an exception is encountered + for rule in routing_rules: + if condition := rule.get("Condition"): + prefix = condition.get("KeyPrefixEquals") + return_http_code = condition.get("HttpErrorCodeReturnedEquals") + # if both prefix matching and http error matching conditions are set + if prefix and return_http_code: + if object_key.startswith(prefix) and error_code == int(return_http_code): + return rule + else: + # it must either match both or it does not apply + continue + # only prefix is set, but this should have been matched before the error + elif prefix and object_key.startswith(prefix): + return rule + elif return_http_code and error_code == int(return_http_code): + return rule - else: - ex = NoSuchKey("The specified key does not exist.") - ex.Key = missing_key - raise ex + else: + # if no Condition is set, the redirect is applied to all requests + return rule + @staticmethod + def _get_redirect_from_routing_rule(request: Request, routing_rule: RoutingRule) -> Response: + """ + Return a redirect Response object created with the different parameters set in the RoutingRule + :param request: the original Request object received from the router + :param routing_rule: a RoutingRule from the WebsiteConfiguration + :return: a redirect Response + """ + parsed_url = urlparse(request.url) + redirect_to = request.url + redirect = routing_rule["Redirect"] + if host_name := redirect.get("HostName"): + redirect_to = redirect_to.replace(parsed_url.netloc, host_name) + if protocol := redirect.get("Protocol"): + redirect_to = redirect_to.replace(parsed_url.scheme, protocol) + if redirect_to_key := redirect.get("ReplaceKeyWith"): + redirect_to = redirect_to.replace(parsed_url.path, f"/{redirect_to_key}") + elif "ReplaceKeyPrefixWith" in redirect: # the value might be empty and it's a valid config + matched_prefix = routing_rule["Condition"].get("KeyPrefixEquals", "") + redirect_to = redirect_to.replace( + matched_prefix, redirect.get("ReplaceKeyPrefixWith"), 1 + ) -def _check_if_headers(headers: Headers, key: FakeKey) -> bool: - # TODO: add other conditions here If-Modified-Since, etc etc - if "if-none-match" in headers and key.etag and key.etag in headers["if-none-match"]: - return True + return Response( + "", headers={"Location": redirect_to}, status=redirect.get("HttpRedirectCode", 301) + ) -def register_website_hosting_routes(router: Router[Handler]): +def register_website_hosting_routes( + router: Router[Handler], handler: S3WebsiteHostingHandler = None +): """ - Registers the S3 website hosting handlers into the given router. - + Registers the S3 website hosting handler into the given router. + :param handler: an S3WebsiteHosting handler :param router: the router to add the handlers into. """ + handler = handler or S3WebsiteHostingHandler() router.add( path="/", host=STATIC_WEBSITE_HOST_REGEX, - endpoint=_website_handler, + endpoint=handler, ) router.add( path="/", host=STATIC_WEBSITE_HOST_REGEX, - endpoint=_website_handler, + endpoint=handler, ) -def _remove_leading_whitespace(response: str) -> str: - return re.sub(_leading_whitespace_re, "", response) - - def _flatten_html_response(fn: Callable[[...], str]): @wraps(fn) def wrapper(*args, **kwargs) -> str: r = fn(*args, **kwargs) - return _remove_leading_whitespace(r) + # remove leading whitespace + return re.sub(_leading_whitespace_re, "", r) return wrapper diff --git a/tests/aws/s3/test_s3.py b/tests/aws/s3/test_s3.py index 808fff551f595..cc3454e597607 100644 --- a/tests/aws/s3/test_s3.py +++ b/tests/aws/s3/test_s3.py @@ -3330,7 +3330,7 @@ def test_s3_lambda_integration( presigned_url = json.loads(to_str(presigned_url))["body"].strip('"') response = requests.put(presigned_url, verify=False) - assert 200 == response.status_code + assert response.status_code == 200 response = aws_client.s3.head_object(Bucket=function_name, Key="key.png") snapshot.match("head_object", response) @@ -3878,7 +3878,7 @@ def test_s3_batch_delete_objects_using_requests_with_acl( snapshot.match("multi-delete-with-requests", response) response = aws_client.s3.list_objects(Bucket=s3_bucket) - assert 200 == response["ResponseMetadata"]["HTTPStatusCode"] + assert response["ResponseMetadata"]["HTTPStatusCode"] == 200 assert len(response["Contents"]) == 1 snapshot.match("list-remaining-objects", response) @@ -7397,10 +7397,10 @@ def test_s3_static_website_hosting(self, s3_bucket, aws_client, allow_bucket_acl # actual key url = f"{website_url}/actual/key.html" response = requests.get(url, verify=False) - assert 200 == response.status_code - assert "key" == response.text + assert response.status_code == 200 + assert response.text == "key" assert "content-type" in response.headers - assert "text/html" == response.headers["content-type"] + assert response.headers["content-type"] == "text/html" assert "etag" in response.headers assert actual_key_obj["ETag"] in response.headers["etag"] @@ -7408,52 +7408,52 @@ def test_s3_static_website_hosting(self, s3_bucket, aws_client, allow_bucket_acl response = requests.get( url, headers={"If-None-Match": actual_key_obj["ETag"]}, verify=False ) - assert 304 == response.status_code + assert response.status_code == 304 # key with specified content-type url = f"{website_url}/with-content-type/key.js" response = requests.get(url, verify=False) - assert 200 == response.status_code - assert "some js" == response.text + assert response.status_code == 200 + assert response.text == "some js" assert "content-type" in response.headers - assert "application/javascript; charset=utf-8" == response.headers["content-type"] + assert response.headers["content-type"] == "application/javascript; charset=utf-8" assert "etag" in response.headers - assert with_content_type_obj["ETag"] == response.headers["etag"] + assert response.headers["etag"] == with_content_type_obj["ETag"] # index document url = f"{website_url}/test" response = requests.get(url, verify=False) - assert 200 == response.status_code - assert "index" == response.text + assert response.status_code == 200 + assert response.text == "index" assert "content-type" in response.headers assert "text/html" in response.headers["content-type"] assert "etag" in response.headers - assert index_obj["ETag"] == response.headers["etag"] + assert response.headers["etag"] == index_obj["ETag"] # root path test url = f"{website_url}/" response = requests.get(url, verify=False) - assert 404 == response.status_code - assert "error" == response.text + assert response.status_code == 404 + assert response.text == "error" assert "content-type" in response.headers assert "text/html" in response.headers["content-type"] assert "etag" in response.headers - assert error_obj["ETag"] == response.headers["etag"] + assert response.headers["etag"] == error_obj["ETag"] # error document url = f"{website_url}/something" response = requests.get(url, verify=False) - assert 404 == response.status_code - assert "error" == response.text + assert response.status_code == 404 + assert response.text == "error" assert "content-type" in response.headers assert "text/html" in response.headers["content-type"] assert "etag" in response.headers - assert error_obj["ETag"] == response.headers["etag"] + assert response.headers["etag"] == error_obj["ETag"] # redirect object url = f"{website_url}/to-be-redirected.html" response = requests.get(url, verify=False, allow_redirects=False) - assert 301 == response.status_code + assert response.status_code == 301 assert "location" in response.headers assert "actual/key.html" in response.headers["location"]