From f17bba199af6badbf7451d42b37b85410329f2b6 Mon Sep 17 00:00:00 2001 From: Giovanni Grano Date: Mon, 19 May 2025 10:25:16 +0200 Subject: [PATCH 01/17] wip --- .../services/dynamodbstreams/provider.py | 78 +++++++++++++++++-- .../testing/snapshots/transformer_utility.py | 3 + tests/aws/services/dynamodb/test_dynamodb.py | 12 +-- .../dynamodb/test_dynamodb.snapshot.json | 10 +-- .../dynamodb/test_dynamodb.validation.json | 2 +- 5 files changed, 85 insertions(+), 20 deletions(-) diff --git a/localstack-core/localstack/services/dynamodbstreams/provider.py b/localstack-core/localstack/services/dynamodbstreams/provider.py index fc8d0050c4ea6..5b125990ef5e0 100644 --- a/localstack-core/localstack/services/dynamodbstreams/provider.py +++ b/localstack-core/localstack/services/dynamodbstreams/provider.py @@ -24,6 +24,7 @@ TableName, ) from localstack.aws.connect import connect_to +from localstack.services.dynamodb.v2.provider import DynamoDBProvider from localstack.services.dynamodbstreams.dynamodbstreams_api import ( get_dynamodbstreams_store, get_kinesis_client, @@ -46,7 +47,41 @@ } +def get_original_region( + context: RequestContext, stream_arn: str | None = None, table_name: str | None = None +) -> str: + """ + In DDB Global tables, we forward all the requests to the original region, instead of really replicating the data. + Since each table has a separate stream associated, we need to have a similar forwarding logic for DDB Streams. + To determine the original region, we need the table name, that can be either provided here or determined from the + ARN of the stream. + """ + if not stream_arn and not table_name: + LOG.debug( + "No Stream ARN or table name provided. Returning region '%s' from the request", + context.region, + ) + return context.region + + table_name = table_name or table_name_from_stream_arn(stream_arn) + return DynamoDBProvider.get_global_table_region(context=context, table_name=table_name) + + +def replace_region_in_stream_arn(stream_arn: str | None, old_region: str, new_region: str) -> str: + if not stream_arn: + return + if old_region == new_region: + return stream_arn + s = stream_arn.replace(old_region, new_region) + return s + + class DynamoDBStreamsProvider(DynamodbstreamsApi, ServiceLifecycleHook): + shard_iterator_to_region: dict[str, str] + + def __init__(self) -> None: + self.shard_iterator_to_region = {} + def describe_stream( self, context: RequestContext, @@ -55,13 +90,14 @@ def describe_stream( exclusive_start_shard_id: ShardId = None, **kwargs, ) -> DescribeStreamOutput: - store = get_dynamodbstreams_store(context.account_id, context.region) - kinesis = get_kinesis_client(account_id=context.account_id, region_name=context.region) + og_region = get_original_region(context=context, stream_arn=stream_arn) + store = get_dynamodbstreams_store(context.account_id, og_region) + kinesis = get_kinesis_client(account_id=context.account_id, region_name=og_region) for stream in store.ddb_streams.values(): if stream["StreamArn"] == stream_arn: # get stream details dynamodb = connect_to( - aws_access_key_id=context.account_id, region_name=context.region + aws_access_key_id=context.account_id, region_name=og_region ).dynamodb table_name = table_name_from_stream_arn(stream["StreamArn"]) stream_name = get_kinesis_stream_name(table_name) @@ -90,6 +126,10 @@ def describe_stream( stream["Shards"] = stream_shards stream_description = select_from_typed_dict(StreamDescription, stream) + if context.region != og_region: + stream_description["StreamArn"] = replace_region_in_stream_arn( + stream_description["StreamArn"], og_region, context.region + ) return DescribeStreamOutput(StreamDescription=stream_description) raise ResourceNotFoundException( @@ -98,8 +138,13 @@ def describe_stream( @handler("GetRecords", expand=False) def get_records(self, context: RequestContext, payload: GetRecordsInput) -> GetRecordsOutput: - kinesis = get_kinesis_client(account_id=context.account_id, region_name=context.region) - prefix, _, payload["ShardIterator"] = payload["ShardIterator"].rpartition("|") + shard_iterator = payload["ShardIterator"] + region = context.region + if shard_iterator in self.shard_iterator_to_region: + region = self.shard_iterator_to_region.pop(shard_iterator) + + kinesis = get_kinesis_client(account_id=context.account_id, region_name=region) + prefix, _, payload["ShardIterator"] = shard_iterator.rpartition("|") try: kinesis_records = kinesis.get_records(**payload) except kinesis.exceptions.ExpiredIteratorException: @@ -125,8 +170,9 @@ def get_shard_iterator( **kwargs, ) -> GetShardIteratorOutput: stream_name = stream_name_from_stream_arn(stream_arn) + og_region = get_original_region(context=context, stream_arn=stream_arn) stream_shard_id = kinesis_shard_id(shard_id) - kinesis = get_kinesis_client(account_id=context.account_id, region_name=context.region) + kinesis = get_kinesis_client(account_id=context.account_id, region_name=og_region) kwargs = {"StartingSequenceNumber": sequence_number} if sequence_number else {} result = kinesis.get_shard_iterator( @@ -138,6 +184,12 @@ def get_shard_iterator( del result["ResponseMetadata"] # TODO not quite clear what the |1| exactly denotes, because at AWS it's sometimes other numbers result["ShardIterator"] = f"{stream_arn}|1|{result['ShardIterator']}" + + # In case we are dealing with a stream for a replicated table, we need to keep track of the real region + # of the shard iterator, in order to be able to retrieve the record later. + if context.region != og_region: + self.shard_iterator_to_region[result["ShardIterator"]] = og_region + return GetShardIteratorOutput(**result) def list_streams( @@ -148,8 +200,18 @@ def list_streams( exclusive_start_stream_arn: StreamArn = None, **kwargs, ) -> ListStreamsOutput: - store = get_dynamodbstreams_store(context.account_id, context.region) + og_region = get_original_region(context=context, table_name=table_name) + store = get_dynamodbstreams_store(context.account_id, og_region) result = [select_from_typed_dict(Stream, res) for res in store.ddb_streams.values()] if table_name: - result = [res for res in result if res["TableName"] == table_name] + result: list[Stream] = [res for res in result if res["TableName"] == table_name] + # If this is a stream from a table replica, we need to change the region in the Stream ARN + if context.region != og_region: + for stream in result: + stream["StreamArn"] = replace_region_in_stream_arn( + stream_arn=stream["StreamArn"], + old_region=og_region, + new_region=context.region, + ) + return ListStreamsOutput(Streams=result) diff --git a/localstack-core/localstack/testing/snapshots/transformer_utility.py b/localstack-core/localstack/testing/snapshots/transformer_utility.py index 7d2d73c844dbb..562cc9e097646 100644 --- a/localstack-core/localstack/testing/snapshots/transformer_utility.py +++ b/localstack-core/localstack/testing/snapshots/transformer_utility.py @@ -327,6 +327,9 @@ def dynamodb_api(): @staticmethod def dynamodb_streams_api(): return [ + RegexTransformer( + r"^\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}\.\d{3}$", replacement="" + ), TransformerUtility.key_value("TableName"), TransformerUtility.key_value("TableStatus"), TransformerUtility.key_value("LatestStreamLabel"), diff --git a/tests/aws/services/dynamodb/test_dynamodb.py b/tests/aws/services/dynamodb/test_dynamodb.py index c4a2efc227618..1caf8d6972f6d 100644 --- a/tests/aws/services/dynamodb/test_dynamodb.py +++ b/tests/aws/services/dynamodb/test_dynamodb.py @@ -1138,8 +1138,9 @@ def test_global_tables_version_2019( assert "Replicas" not in response["Table"] @markers.aws.validated - @pytest.mark.skipif( - condition=not is_aws_cloud(), reason="Streams do not work on the regional replica" + # The stream label on the replica and replicated stream are the same. The region changes accordingly in the ARN + @markers.snapshot.skip_snapshot_verify( + paths=["$..Streams..StreamArn", "$..Streams..StreamLabel"] ) def test_streams_on_global_tables( self, @@ -1149,6 +1150,7 @@ def test_streams_on_global_tables( snapshot, region_name, secondary_region_name, + dynamodbstreams_snapshot_transformers, ): """ This test exposes an issue in LocalStack with Global tables and streams. In AWS, each regional replica should @@ -1158,9 +1160,6 @@ def test_streams_on_global_tables( region_1_factory = aws_client_factory(region_name=region_name) region_2_factory = aws_client_factory(region_name=secondary_region_name) snapshot.add_transformer(snapshot.transform.regex(secondary_region_name, "")) - snapshot.add_transformer( - snapshot.transform.jsonpath("$..Streams..StreamLabel", "stream-label") - ) # Create table in the original region table_name = f"table-{short_uid()}" @@ -1195,10 +1194,11 @@ def test_streams_on_global_tables( us_streams = region_1_factory.dynamodbstreams.list_streams(TableName=table_name) snapshot.match("region-streams", us_streams) - # FIXME: LS doesn't have a stream on the replica region eu_streams = region_2_factory.dynamodbstreams.list_streams(TableName=table_name) snapshot.match("secondary-region-streams", eu_streams) + # TODO: use the stream + @markers.aws.only_localstack def test_global_tables(self, aws_client, ddb_test_table): dynamodb = aws_client.dynamodb diff --git a/tests/aws/services/dynamodb/test_dynamodb.snapshot.json b/tests/aws/services/dynamodb/test_dynamodb.snapshot.json index ad40bf18e7c05..32ba8230fc8d4 100644 --- a/tests/aws/services/dynamodb/test_dynamodb.snapshot.json +++ b/tests/aws/services/dynamodb/test_dynamodb.snapshot.json @@ -1730,14 +1730,14 @@ } }, "tests/aws/services/dynamodb/test_dynamodb.py::TestDynamoDB::test_streams_on_global_tables": { - "recorded-date": "15-05-2025, 13:42:48", + "recorded-date": "19-05-2025, 08:18:49", "recorded-content": { "region-streams": { "Streams": [ { - "StreamArn": "arn::dynamodb::111111111111:table//stream/", + "StreamArn": "arn::dynamodb::111111111111:table//stream/", "StreamLabel": "", - "TableName": "" + "TableName": "" } ], "ResponseMetadata": { @@ -1748,9 +1748,9 @@ "secondary-region-streams": { "Streams": [ { - "StreamArn": "arn::dynamodb::111111111111:table//stream/", + "StreamArn": "arn::dynamodb::111111111111:table//stream/", "StreamLabel": "", - "TableName": "" + "TableName": "" } ], "ResponseMetadata": { diff --git a/tests/aws/services/dynamodb/test_dynamodb.validation.json b/tests/aws/services/dynamodb/test_dynamodb.validation.json index d56f13b218112..3ea242d15a91f 100644 --- a/tests/aws/services/dynamodb/test_dynamodb.validation.json +++ b/tests/aws/services/dynamodb/test_dynamodb.validation.json @@ -75,7 +75,7 @@ "last_validated_date": "2024-01-03T17:52:19+00:00" }, "tests/aws/services/dynamodb/test_dynamodb.py::TestDynamoDB::test_streams_on_global_tables": { - "last_validated_date": "2025-05-15T13:42:45+00:00" + "last_validated_date": "2025-05-19T08:18:47+00:00" }, "tests/aws/services/dynamodb/test_dynamodb.py::TestDynamoDB::test_transact_get_items": { "last_validated_date": "2023-08-23T14:33:37+00:00" From 30da251b1d0aa1e86afcecdd889a89e4e6e48796 Mon Sep 17 00:00:00 2001 From: Giovanni Grano Date: Mon, 19 May 2025 17:47:27 +0200 Subject: [PATCH 02/17] wip --- .../localstack/services/dynamodb/provider.py | 23 ++++++++++ .../services/dynamodb/v2/provider.py | 23 ++++++++++ .../services/dynamodbstreams/provider.py | 45 ++++--------------- tests/aws/services/dynamodb/test_dynamodb.py | 33 +++++++++++--- .../dynamodb/test_dynamodb.snapshot.json | 2 +- .../dynamodb/test_dynamodb.validation.json | 2 +- 6 files changed, 83 insertions(+), 45 deletions(-) diff --git a/localstack-core/localstack/services/dynamodb/provider.py b/localstack-core/localstack/services/dynamodb/provider.py index 407e6400414ca..afdc49fac26d0 100644 --- a/localstack-core/localstack/services/dynamodb/provider.py +++ b/localstack-core/localstack/services/dynamodb/provider.py @@ -147,9 +147,11 @@ from localstack.state import AssetDirectory, StateVisitor from localstack.utils.aws import arns from localstack.utils.aws.arns import ( + dynamodb_stream_arn, extract_account_id_from_arn, extract_region_from_arn, get_partition, + parse_arn, ) from localstack.utils.aws.aws_stack import get_valid_regions_for_service from localstack.utils.aws.request_context import ( @@ -803,6 +805,11 @@ def describe_table( # Terraform depends on this parity for update operations gsi["ProvisionedThroughput"] = default_values | gsi.get("ProvisionedThroughput", {}) + if global_table_region != context.region: + table_description["LatestStreamArn"] = self.get_stream_for_region( + table_description["LatestStreamArn"], context.region + ) + return DescribeTableOutput( Table=select_from_typed_dict(TableDescription, table_description) ) @@ -1761,6 +1768,22 @@ def get_global_table_region(context: RequestContext, table_name: str) -> str: return context.region + @staticmethod + def get_stream_for_region(stream_arn: str, region: str) -> str: + """ + Return the ARN of a DynamoDB Stream with a modified region. This is needed when we are dealing with global + tables, as the stream is kept in a single copy in the originating region. + """ + arn_data = parse_arn(stream_arn) + # Note: a resource has the following format in a DynamoDB Stream ARN: table//stream/ + resource_splits = arn_data["resource"].split("/") + return dynamodb_stream_arn( + table_name=resource_splits[1], + latest_stream_label=resource_splits[-1], + account_id=arn_data["account"], + region_name=region, + ) + @staticmethod def prepare_request_headers(headers: Dict, account_id: str, region_name: str): """ diff --git a/localstack-core/localstack/services/dynamodb/v2/provider.py b/localstack-core/localstack/services/dynamodb/v2/provider.py index f6dee3a68e854..8efc94892bf67 100644 --- a/localstack-core/localstack/services/dynamodb/v2/provider.py +++ b/localstack-core/localstack/services/dynamodb/v2/provider.py @@ -126,9 +126,11 @@ from localstack.state import AssetDirectory, StateVisitor from localstack.utils.aws import arns from localstack.utils.aws.arns import ( + dynamodb_stream_arn, extract_account_id_from_arn, extract_region_from_arn, get_partition, + parse_arn, ) from localstack.utils.aws.aws_stack import get_valid_regions_for_service from localstack.utils.aws.request_context import ( @@ -643,6 +645,11 @@ def describe_table( # Terraform depends on this parity for update operations gsi["ProvisionedThroughput"] = default_values | gsi.get("ProvisionedThroughput", {}) + if global_table_region != context.region: + table_description["LatestStreamArn"] = self.get_stream_for_region( + table_description["LatestStreamArn"], context.region + ) + return DescribeTableOutput( Table=select_from_typed_dict(TableDescription, table_description) ) @@ -1311,6 +1318,22 @@ def get_global_table_region(context: RequestContext, table_name: str) -> str: return context.region + @staticmethod + def get_stream_for_region(stream_arn: str, region: str) -> str: + """ + Return the ARN of a DynamoDB Stream with a modified region. This is needed when we are dealing with global + tables, as the stream is kept in a single copy in the originating region. + """ + arn_data = parse_arn(stream_arn) + # Note: a resource has the following format in a DynamoDB Stream ARN: table//stream/ + resource_splits = arn_data["resource"].split("/") + return dynamodb_stream_arn( + table_name=resource_splits[1], + latest_stream_label=resource_splits[-1], + account_id=arn_data["account"], + region_name=region, + ) + @staticmethod def prepare_request_headers(headers: Dict, account_id: str, region_name: str): """ diff --git a/localstack-core/localstack/services/dynamodbstreams/provider.py b/localstack-core/localstack/services/dynamodbstreams/provider.py index 5b125990ef5e0..b598df7e35daf 100644 --- a/localstack-core/localstack/services/dynamodbstreams/provider.py +++ b/localstack-core/localstack/services/dynamodbstreams/provider.py @@ -67,21 +67,7 @@ def get_original_region( return DynamoDBProvider.get_global_table_region(context=context, table_name=table_name) -def replace_region_in_stream_arn(stream_arn: str | None, old_region: str, new_region: str) -> str: - if not stream_arn: - return - if old_region == new_region: - return stream_arn - s = stream_arn.replace(old_region, new_region) - return s - - class DynamoDBStreamsProvider(DynamodbstreamsApi, ServiceLifecycleHook): - shard_iterator_to_region: dict[str, str] - - def __init__(self) -> None: - self.shard_iterator_to_region = {} - def describe_stream( self, context: RequestContext, @@ -94,7 +80,10 @@ def describe_stream( store = get_dynamodbstreams_store(context.account_id, og_region) kinesis = get_kinesis_client(account_id=context.account_id, region_name=og_region) for stream in store.ddb_streams.values(): - if stream["StreamArn"] == stream_arn: + _stream_arn = stream_arn + if context.region != og_region: + _stream_arn = DynamoDBProvider.get_stream_for_region(_stream_arn, og_region) + if stream["StreamArn"] == _stream_arn: # get stream details dynamodb = connect_to( aws_access_key_id=context.account_id, region_name=og_region @@ -126,10 +115,7 @@ def describe_stream( stream["Shards"] = stream_shards stream_description = select_from_typed_dict(StreamDescription, stream) - if context.region != og_region: - stream_description["StreamArn"] = replace_region_in_stream_arn( - stream_description["StreamArn"], og_region, context.region - ) + stream_description["StreamArn"] = _stream_arn return DescribeStreamOutput(StreamDescription=stream_description) raise ResourceNotFoundException( @@ -138,13 +124,8 @@ def describe_stream( @handler("GetRecords", expand=False) def get_records(self, context: RequestContext, payload: GetRecordsInput) -> GetRecordsOutput: - shard_iterator = payload["ShardIterator"] - region = context.region - if shard_iterator in self.shard_iterator_to_region: - region = self.shard_iterator_to_region.pop(shard_iterator) - - kinesis = get_kinesis_client(account_id=context.account_id, region_name=region) - prefix, _, payload["ShardIterator"] = shard_iterator.rpartition("|") + kinesis = get_kinesis_client(account_id=context.account_id, region_name=context.region) + prefix, _, payload["ShardIterator"] = payload["ShardIterator"].rpartition("|") try: kinesis_records = kinesis.get_records(**payload) except kinesis.exceptions.ExpiredIteratorException: @@ -184,12 +165,6 @@ def get_shard_iterator( del result["ResponseMetadata"] # TODO not quite clear what the |1| exactly denotes, because at AWS it's sometimes other numbers result["ShardIterator"] = f"{stream_arn}|1|{result['ShardIterator']}" - - # In case we are dealing with a stream for a replicated table, we need to keep track of the real region - # of the shard iterator, in order to be able to retrieve the record later. - if context.region != og_region: - self.shard_iterator_to_region[result["ShardIterator"]] = og_region - return GetShardIteratorOutput(**result) def list_streams( @@ -208,10 +183,8 @@ def list_streams( # If this is a stream from a table replica, we need to change the region in the Stream ARN if context.region != og_region: for stream in result: - stream["StreamArn"] = replace_region_in_stream_arn( - stream_arn=stream["StreamArn"], - old_region=og_region, - new_region=context.region, + stream["StreamArn"] = DynamoDBProvider.get_stream_for_region( + stream["StreamArn"], context.region ) return ListStreamsOutput(Streams=result) diff --git a/tests/aws/services/dynamodb/test_dynamodb.py b/tests/aws/services/dynamodb/test_dynamodb.py index 1caf8d6972f6d..fa473d6ebbd27 100644 --- a/tests/aws/services/dynamodb/test_dynamodb.py +++ b/tests/aws/services/dynamodb/test_dynamodb.py @@ -1145,7 +1145,7 @@ def test_global_tables_version_2019( def test_streams_on_global_tables( self, aws_client_factory, - dynamodb_wait_for_table_active, + wait_for_dynamodb_stream_ready, cleanups, snapshot, region_name, @@ -1164,7 +1164,7 @@ def test_streams_on_global_tables( # Create table in the original region table_name = f"table-{short_uid()}" snapshot.add_transformer(snapshot.transform.regex(table_name, "")) - region_1_factory.dynamodb.create_table( + table = region_1_factory.dynamodb.create_table( TableName=table_name, KeySchema=[ {"AttributeName": "Artist", "KeyType": "HASH"}, @@ -1192,12 +1192,31 @@ def test_streams_on_global_tables( waiter = region_2_factory.dynamodb.get_waiter("table_exists") waiter.wait(TableName=table_name, WaiterConfig={"Delay": WAIT_SEC, "MaxAttempts": 20}) - us_streams = region_1_factory.dynamodbstreams.list_streams(TableName=table_name) - snapshot.match("region-streams", us_streams) - eu_streams = region_2_factory.dynamodbstreams.list_streams(TableName=table_name) - snapshot.match("secondary-region-streams", eu_streams) + with pytest.raises(ClientError): + region_2_factory.dynamodb.update_table( + TableName=table_name, + StreamSpecification=StreamSpecification( + StreamEnabled=True, StreamViewType=StreamViewType.NEW_AND_OLD_IMAGES + ), + ) - # TODO: use the stream + stream_arn = table["TableDescription"]["LatestStreamArn"] + wait_for_dynamodb_stream_ready(stream_arn=stream_arn) + + stream_arn_region = region_1_factory.dynamodb.describe_table(TableName=table_name)["Table"][ + "LatestStreamArn" + ] + assert region_name in stream_arn_region + stream_arn_secondary_region = region_2_factory.dynamodb.describe_table( + TableName=table_name + )["Table"]["LatestStreamArn"] + assert secondary_region_name in stream_arn_secondary_region + + # Verify that we can list streams on both regions + streams_region_1 = region_1_factory.dynamodbstreams.list_streams(TableName=table_name) + snapshot.match("region-streams", streams_region_1) + streams_region_2 = region_2_factory.dynamodbstreams.list_streams(TableName=table_name) + snapshot.match("secondary-region-streams", streams_region_2) @markers.aws.only_localstack def test_global_tables(self, aws_client, ddb_test_table): diff --git a/tests/aws/services/dynamodb/test_dynamodb.snapshot.json b/tests/aws/services/dynamodb/test_dynamodb.snapshot.json index 32ba8230fc8d4..f90cac9c74f82 100644 --- a/tests/aws/services/dynamodb/test_dynamodb.snapshot.json +++ b/tests/aws/services/dynamodb/test_dynamodb.snapshot.json @@ -1730,7 +1730,7 @@ } }, "tests/aws/services/dynamodb/test_dynamodb.py::TestDynamoDB::test_streams_on_global_tables": { - "recorded-date": "19-05-2025, 08:18:49", + "recorded-date": "19-05-2025, 15:46:27", "recorded-content": { "region-streams": { "Streams": [ diff --git a/tests/aws/services/dynamodb/test_dynamodb.validation.json b/tests/aws/services/dynamodb/test_dynamodb.validation.json index 3ea242d15a91f..4cf226e062b4e 100644 --- a/tests/aws/services/dynamodb/test_dynamodb.validation.json +++ b/tests/aws/services/dynamodb/test_dynamodb.validation.json @@ -75,7 +75,7 @@ "last_validated_date": "2024-01-03T17:52:19+00:00" }, "tests/aws/services/dynamodb/test_dynamodb.py::TestDynamoDB::test_streams_on_global_tables": { - "last_validated_date": "2025-05-19T08:18:47+00:00" + "last_validated_date": "2025-05-19T15:46:25+00:00" }, "tests/aws/services/dynamodb/test_dynamodb.py::TestDynamoDB::test_transact_get_items": { "last_validated_date": "2023-08-23T14:33:37+00:00" From 5b773b31481c0c1832bb5da96d8e48533bd019c4 Mon Sep 17 00:00:00 2001 From: Giovanni Grano Date: Mon, 19 May 2025 18:02:01 +0200 Subject: [PATCH 03/17] minor --- tests/aws/services/dynamodb/test_dynamodb.py | 7 +++++-- tests/aws/services/dynamodb/test_dynamodb.snapshot.json | 2 +- tests/aws/services/dynamodb/test_dynamodb.validation.json | 2 +- 3 files changed, 7 insertions(+), 4 deletions(-) diff --git a/tests/aws/services/dynamodb/test_dynamodb.py b/tests/aws/services/dynamodb/test_dynamodb.py index fa473d6ebbd27..8de8359d83b7a 100644 --- a/tests/aws/services/dynamodb/test_dynamodb.py +++ b/tests/aws/services/dynamodb/test_dynamodb.py @@ -1138,7 +1138,8 @@ def test_global_tables_version_2019( assert "Replicas" not in response["Table"] @markers.aws.validated - # The stream label on the replica and replicated stream are the same. The region changes accordingly in the ARN + # The stream label on the replica and replicated stream are the same (while they differ on AWS). + # The region changes accordingly in the ARN. We test this with assertions. @markers.snapshot.skip_snapshot_verify( paths=["$..Streams..StreamArn", "$..Streams..StreamLabel"] ) @@ -1150,7 +1151,7 @@ def test_streams_on_global_tables( snapshot, region_name, secondary_region_name, - dynamodbstreams_snapshot_transformers, + dynamodbstreams_snapshot_transformers ): """ This test exposes an issue in LocalStack with Global tables and streams. In AWS, each regional replica should @@ -1215,8 +1216,10 @@ def test_streams_on_global_tables( # Verify that we can list streams on both regions streams_region_1 = region_1_factory.dynamodbstreams.list_streams(TableName=table_name) snapshot.match("region-streams", streams_region_1) + assert region_name in streams_region_1["Streams"][0]["StreamArn"] streams_region_2 = region_2_factory.dynamodbstreams.list_streams(TableName=table_name) snapshot.match("secondary-region-streams", streams_region_2) + assert secondary_region_name in streams_region_2["Streams"][0]["StreamArn"] @markers.aws.only_localstack def test_global_tables(self, aws_client, ddb_test_table): diff --git a/tests/aws/services/dynamodb/test_dynamodb.snapshot.json b/tests/aws/services/dynamodb/test_dynamodb.snapshot.json index f90cac9c74f82..718e4614d7ca3 100644 --- a/tests/aws/services/dynamodb/test_dynamodb.snapshot.json +++ b/tests/aws/services/dynamodb/test_dynamodb.snapshot.json @@ -1730,7 +1730,7 @@ } }, "tests/aws/services/dynamodb/test_dynamodb.py::TestDynamoDB::test_streams_on_global_tables": { - "recorded-date": "19-05-2025, 15:46:27", + "recorded-date": "19-05-2025, 16:01:40", "recorded-content": { "region-streams": { "Streams": [ diff --git a/tests/aws/services/dynamodb/test_dynamodb.validation.json b/tests/aws/services/dynamodb/test_dynamodb.validation.json index 4cf226e062b4e..0c9e085c476a4 100644 --- a/tests/aws/services/dynamodb/test_dynamodb.validation.json +++ b/tests/aws/services/dynamodb/test_dynamodb.validation.json @@ -75,7 +75,7 @@ "last_validated_date": "2024-01-03T17:52:19+00:00" }, "tests/aws/services/dynamodb/test_dynamodb.py::TestDynamoDB::test_streams_on_global_tables": { - "last_validated_date": "2025-05-19T15:46:25+00:00" + "last_validated_date": "2025-05-19T16:01:38+00:00" }, "tests/aws/services/dynamodb/test_dynamodb.py::TestDynamoDB::test_transact_get_items": { "last_validated_date": "2023-08-23T14:33:37+00:00" From 1eb4524b3ab7379ba9c77cbb662b4b00e2e4e1cf Mon Sep 17 00:00:00 2001 From: Giovanni Grano Date: Mon, 19 May 2025 18:10:25 +0200 Subject: [PATCH 04/17] lint --- tests/aws/services/dynamodb/test_dynamodb.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/aws/services/dynamodb/test_dynamodb.py b/tests/aws/services/dynamodb/test_dynamodb.py index 8de8359d83b7a..de8b219f533da 100644 --- a/tests/aws/services/dynamodb/test_dynamodb.py +++ b/tests/aws/services/dynamodb/test_dynamodb.py @@ -1151,7 +1151,7 @@ def test_streams_on_global_tables( snapshot, region_name, secondary_region_name, - dynamodbstreams_snapshot_transformers + dynamodbstreams_snapshot_transformers, ): """ This test exposes an issue in LocalStack with Global tables and streams. In AWS, each regional replica should From b3309cc23b08317c96bad5349792493ff6094c78 Mon Sep 17 00:00:00 2001 From: Giovanni Grano Date: Tue, 20 May 2025 13:34:41 +0200 Subject: [PATCH 05/17] remove not-needed code --- localstack-core/localstack/services/dynamodb/provider.py | 5 ----- localstack-core/localstack/services/dynamodb/v2/provider.py | 5 ----- 2 files changed, 10 deletions(-) diff --git a/localstack-core/localstack/services/dynamodb/provider.py b/localstack-core/localstack/services/dynamodb/provider.py index afdc49fac26d0..a02d5b86e7991 100644 --- a/localstack-core/localstack/services/dynamodb/provider.py +++ b/localstack-core/localstack/services/dynamodb/provider.py @@ -805,11 +805,6 @@ def describe_table( # Terraform depends on this parity for update operations gsi["ProvisionedThroughput"] = default_values | gsi.get("ProvisionedThroughput", {}) - if global_table_region != context.region: - table_description["LatestStreamArn"] = self.get_stream_for_region( - table_description["LatestStreamArn"], context.region - ) - return DescribeTableOutput( Table=select_from_typed_dict(TableDescription, table_description) ) diff --git a/localstack-core/localstack/services/dynamodb/v2/provider.py b/localstack-core/localstack/services/dynamodb/v2/provider.py index 8efc94892bf67..75a2269098e8d 100644 --- a/localstack-core/localstack/services/dynamodb/v2/provider.py +++ b/localstack-core/localstack/services/dynamodb/v2/provider.py @@ -645,11 +645,6 @@ def describe_table( # Terraform depends on this parity for update operations gsi["ProvisionedThroughput"] = default_values | gsi.get("ProvisionedThroughput", {}) - if global_table_region != context.region: - table_description["LatestStreamArn"] = self.get_stream_for_region( - table_description["LatestStreamArn"], context.region - ) - return DescribeTableOutput( Table=select_from_typed_dict(TableDescription, table_description) ) From 0b2013f6f2a514b5b131f469b489c4902737ab24 Mon Sep 17 00:00:00 2001 From: Giovanni Grano Date: Tue, 20 May 2025 18:21:09 +0200 Subject: [PATCH 06/17] v2 stream provider --- .../dynamodbstreams/dynamodbstreams_api.py | 22 ++++++++++++++++ .../services/dynamodbstreams/provider.py | 21 +--------------- .../services/dynamodbstreams/v2/provider.py | 25 ++++++++++++++++--- 3 files changed, 45 insertions(+), 23 deletions(-) diff --git a/localstack-core/localstack/services/dynamodbstreams/dynamodbstreams_api.py b/localstack-core/localstack/services/dynamodbstreams/dynamodbstreams_api.py index 84079dbbf3d6f..e9164465fdd57 100644 --- a/localstack-core/localstack/services/dynamodbstreams/dynamodbstreams_api.py +++ b/localstack-core/localstack/services/dynamodbstreams/dynamodbstreams_api.py @@ -5,8 +5,10 @@ from bson.json_util import dumps from localstack import config +from localstack.aws.api import RequestContext from localstack.aws.api.dynamodbstreams import StreamStatus, StreamViewType, TableName from localstack.aws.connect import connect_to +from localstack.services.dynamodb.v2.provider import DynamoDBProvider from localstack.services.dynamodbstreams.models import DynamoDbStreamsStore, dynamodbstreams_stores from localstack.utils.aws import arns, resources from localstack.utils.common import now_utc @@ -211,3 +213,23 @@ def get_shard_id(stream: Dict, kinesis_shard_id: str) -> str: stream["shards_id_map"][kinesis_shard_id] = ddb_stream_shard_id return ddb_stream_shard_id + + +def get_original_region( + context: RequestContext, stream_arn: str | None = None, table_name: str | None = None +) -> str: + """ + In DDB Global tables, we forward all the requests to the original region, instead of really replicating the data. + Since each table has a separate stream associated, we need to have a similar forwarding logic for DDB Streams. + To determine the original region, we need the table name, that can be either provided here or determined from the + ARN of the stream. + """ + if not stream_arn and not table_name: + LOG.debug( + "No Stream ARN or table name provided. Returning region '%s' from the request", + context.region, + ) + return context.region + + table_name = table_name or table_name_from_stream_arn(stream_arn) + return DynamoDBProvider.get_global_table_region(context=context, table_name=table_name) diff --git a/localstack-core/localstack/services/dynamodbstreams/provider.py b/localstack-core/localstack/services/dynamodbstreams/provider.py index b598df7e35daf..9a47db34f1715 100644 --- a/localstack-core/localstack/services/dynamodbstreams/provider.py +++ b/localstack-core/localstack/services/dynamodbstreams/provider.py @@ -29,6 +29,7 @@ get_dynamodbstreams_store, get_kinesis_client, get_kinesis_stream_name, + get_original_region, get_shard_id, kinesis_shard_id, stream_name_from_stream_arn, @@ -47,26 +48,6 @@ } -def get_original_region( - context: RequestContext, stream_arn: str | None = None, table_name: str | None = None -) -> str: - """ - In DDB Global tables, we forward all the requests to the original region, instead of really replicating the data. - Since each table has a separate stream associated, we need to have a similar forwarding logic for DDB Streams. - To determine the original region, we need the table name, that can be either provided here or determined from the - ARN of the stream. - """ - if not stream_arn and not table_name: - LOG.debug( - "No Stream ARN or table name provided. Returning region '%s' from the request", - context.region, - ) - return context.region - - table_name = table_name or table_name_from_stream_arn(stream_arn) - return DynamoDBProvider.get_global_table_region(context=context, table_name=table_name) - - class DynamoDBStreamsProvider(DynamodbstreamsApi, ServiceLifecycleHook): def describe_stream( self, diff --git a/localstack-core/localstack/services/dynamodbstreams/v2/provider.py b/localstack-core/localstack/services/dynamodbstreams/v2/provider.py index 5f6a86150b315..dc4ed85d415be 100644 --- a/localstack-core/localstack/services/dynamodbstreams/v2/provider.py +++ b/localstack-core/localstack/services/dynamodbstreams/v2/provider.py @@ -15,7 +15,8 @@ ) from localstack.services.dynamodb.server import DynamodbServer from localstack.services.dynamodb.utils import modify_ddblocal_arns -from localstack.services.dynamodb.v2.provider import DynamoDBProvider +from localstack.services.dynamodb.v2.provider import DynamoDBProvider, modify_context_region +from localstack.services.dynamodbstreams.dynamodbstreams_api import get_original_region from localstack.services.plugins import ServiceLifecycleHook from localstack.utils.aws.arns import parse_arn @@ -33,6 +34,14 @@ def on_after_init(self): def on_before_start(self): self.server.start_dynamodb() + def _forward_request( + self, context: RequestContext, region: str | None, service_request: ServiceRequest + ) -> ServiceResponse: + if region: + with modify_context_region(context, region): + return self.forward_request(context, service_request=service_request) + return self.forward_request(context, service_request=service_request) + def forward_request( self, context: RequestContext, service_request: ServiceRequest = None ) -> ServiceResponse: @@ -55,12 +64,19 @@ def describe_stream( context: RequestContext, payload: DescribeStreamInput, ) -> DescribeStreamOutput: + global_table_region = get_original_region(context=context, stream_arn=payload["StreamArn"]) request = payload.copy() request["StreamArn"] = self.modify_stream_arn_for_ddb_local(request.get("StreamArn", "")) - return self.forward_request(context, request) + return self._forward_request( + context=context, service_request=request, region=global_table_region + ) @handler("GetRecords", expand=False) def get_records(self, context: RequestContext, payload: GetRecordsInput) -> GetRecordsOutput: + # Limitation note: With this current implementation, we are not able to get the records from a stream of a + # replicated table. To do so, we would need to kept track of the emitted ShardIterators and the originating + # region in `GetShardIterator`. + request = payload.copy() request["ShardIterator"] = self.modify_stream_arn_for_ddb_local( request.get("ShardIterator", "") @@ -77,5 +93,8 @@ def get_shard_iterator( @handler("ListStreams", expand=False) def list_streams(self, context: RequestContext, payload: ListStreamsInput) -> ListStreamsOutput: + global_table_region = get_original_region(context=context, stream_arn=payload["TableName"]) # TODO: look into `ExclusiveStartStreamArn` param - return self.forward_request(context, payload) + return self._forward_request( + context=context, service_request=payload, region=global_table_region + ) From 950b5ae06ff99bf13ea207ed24031a82c37233e2 Mon Sep 17 00:00:00 2001 From: Giovanni Grano Date: Wed, 21 May 2025 19:03:37 +0200 Subject: [PATCH 07/17] minor --- .../localstack/services/dynamodb/provider.py | 18 ---------- .../localstack/services/dynamodb/utils.py | 34 ++++++++++++++++++- .../services/dynamodb/v2/provider.py | 18 ---------- .../services/dynamodbstreams/provider.py | 13 ++++--- .../services/dynamodbstreams/v2/provider.py | 6 ++-- .../dynamodb/test_dynamodb.snapshot.json | 2 +- .../dynamodb/test_dynamodb.validation.json | 2 +- 7 files changed, 47 insertions(+), 46 deletions(-) diff --git a/localstack-core/localstack/services/dynamodb/provider.py b/localstack-core/localstack/services/dynamodb/provider.py index a02d5b86e7991..407e6400414ca 100644 --- a/localstack-core/localstack/services/dynamodb/provider.py +++ b/localstack-core/localstack/services/dynamodb/provider.py @@ -147,11 +147,9 @@ from localstack.state import AssetDirectory, StateVisitor from localstack.utils.aws import arns from localstack.utils.aws.arns import ( - dynamodb_stream_arn, extract_account_id_from_arn, extract_region_from_arn, get_partition, - parse_arn, ) from localstack.utils.aws.aws_stack import get_valid_regions_for_service from localstack.utils.aws.request_context import ( @@ -1763,22 +1761,6 @@ def get_global_table_region(context: RequestContext, table_name: str) -> str: return context.region - @staticmethod - def get_stream_for_region(stream_arn: str, region: str) -> str: - """ - Return the ARN of a DynamoDB Stream with a modified region. This is needed when we are dealing with global - tables, as the stream is kept in a single copy in the originating region. - """ - arn_data = parse_arn(stream_arn) - # Note: a resource has the following format in a DynamoDB Stream ARN: table//stream/ - resource_splits = arn_data["resource"].split("/") - return dynamodb_stream_arn( - table_name=resource_splits[1], - latest_stream_label=resource_splits[-1], - account_id=arn_data["account"], - region_name=region, - ) - @staticmethod def prepare_request_headers(headers: Dict, account_id: str, region_name: str): """ diff --git a/localstack-core/localstack/services/dynamodb/utils.py b/localstack-core/localstack/services/dynamodb/utils.py index 995458b2deed7..7208d880d2038 100644 --- a/localstack-core/localstack/services/dynamodb/utils.py +++ b/localstack-core/localstack/services/dynamodb/utils.py @@ -23,7 +23,12 @@ from localstack.aws.connect import connect_to from localstack.constants import INTERNAL_AWS_SECRET_ACCESS_KEY from localstack.http import Response -from localstack.utils.aws.arns import dynamodb_table_arn, get_partition +from localstack.utils.aws.arns import ( + dynamodb_stream_arn, + dynamodb_table_arn, + get_partition, + parse_arn, +) from localstack.utils.json import canonical_json from localstack.utils.testutil import list_all_resources @@ -348,3 +353,30 @@ def _convert_arn(matchobj): # update x-amz-crc32 header required by some clients response.headers["x-amz-crc32"] = crc32(response.data) & 0xFFFFFFFF + + +def change_region_in_ddb_stream_arn(arn: str, region: str) -> str: + """ + Modify the ARN or a DynamoDB Stream by changing its region. + We need this logic when dealing with global tables, as we create a stream only in the originating region, and we + need to modify the ARN to mimic the stream of the replica regions. + """ + arn_data = parse_arn(arn) + if arn_data["region"] == region: + return arn + + if arn_data["service"] != "dynamodb": + raise Exception(f"{arn} is not a DynamoDB Streams ARN") + + # Note: a DynamoDB Streams ARN has the following pattern: + # arn:aws:dynamodb:::table//stream/ + resource_splits = arn_data["resource"].split("/") + if len(resource_splits) != 4: + raise Exception(f"The format of the '{arn}' ARN is not valid") + + return dynamodb_stream_arn( + table_name=resource_splits[1], + latest_stream_label=resource_splits[-1], + account_id=arn_data["account"], + region_name=region, + ) diff --git a/localstack-core/localstack/services/dynamodb/v2/provider.py b/localstack-core/localstack/services/dynamodb/v2/provider.py index 75a2269098e8d..f6dee3a68e854 100644 --- a/localstack-core/localstack/services/dynamodb/v2/provider.py +++ b/localstack-core/localstack/services/dynamodb/v2/provider.py @@ -126,11 +126,9 @@ from localstack.state import AssetDirectory, StateVisitor from localstack.utils.aws import arns from localstack.utils.aws.arns import ( - dynamodb_stream_arn, extract_account_id_from_arn, extract_region_from_arn, get_partition, - parse_arn, ) from localstack.utils.aws.aws_stack import get_valid_regions_for_service from localstack.utils.aws.request_context import ( @@ -1313,22 +1311,6 @@ def get_global_table_region(context: RequestContext, table_name: str) -> str: return context.region - @staticmethod - def get_stream_for_region(stream_arn: str, region: str) -> str: - """ - Return the ARN of a DynamoDB Stream with a modified region. This is needed when we are dealing with global - tables, as the stream is kept in a single copy in the originating region. - """ - arn_data = parse_arn(stream_arn) - # Note: a resource has the following format in a DynamoDB Stream ARN: table//stream/ - resource_splits = arn_data["resource"].split("/") - return dynamodb_stream_arn( - table_name=resource_splits[1], - latest_stream_label=resource_splits[-1], - account_id=arn_data["account"], - region_name=region, - ) - @staticmethod def prepare_request_headers(headers: Dict, account_id: str, region_name: str): """ diff --git a/localstack-core/localstack/services/dynamodbstreams/provider.py b/localstack-core/localstack/services/dynamodbstreams/provider.py index 9a47db34f1715..37d862db5ef6d 100644 --- a/localstack-core/localstack/services/dynamodbstreams/provider.py +++ b/localstack-core/localstack/services/dynamodbstreams/provider.py @@ -24,7 +24,7 @@ TableName, ) from localstack.aws.connect import connect_to -from localstack.services.dynamodb.v2.provider import DynamoDBProvider +from localstack.services.dynamodb.utils import change_region_in_ddb_stream_arn from localstack.services.dynamodbstreams.dynamodbstreams_api import ( get_dynamodbstreams_store, get_kinesis_client, @@ -63,7 +63,7 @@ def describe_stream( for stream in store.ddb_streams.values(): _stream_arn = stream_arn if context.region != og_region: - _stream_arn = DynamoDBProvider.get_stream_for_region(_stream_arn, og_region) + _stream_arn = change_region_in_ddb_stream_arn(_stream_arn, og_region) if stream["StreamArn"] == _stream_arn: # get stream details dynamodb = connect_to( @@ -105,6 +105,10 @@ def describe_stream( @handler("GetRecords", expand=False) def get_records(self, context: RequestContext, payload: GetRecordsInput) -> GetRecordsOutput: + # Limitation note: with this current implementation, we are not able to get the records from a stream of a + # replicated table. To do so, we would need to kept track of the originating region when we emit a ShardIterator + # (see `GetShardIterator`) in order to forward the request to the region actually holding the stream data. + kinesis = get_kinesis_client(account_id=context.account_id, region_name=context.region) prefix, _, payload["ShardIterator"] = payload["ShardIterator"].rpartition("|") try: @@ -161,10 +165,11 @@ def list_streams( result = [select_from_typed_dict(Stream, res) for res in store.ddb_streams.values()] if table_name: result: list[Stream] = [res for res in result if res["TableName"] == table_name] - # If this is a stream from a table replica, we need to change the region in the Stream ARN + # If this is a stream from a table replica, we need to change the region in the stream ARN, as LocalStack + # keeps a stream only in the originating region. if context.region != og_region: for stream in result: - stream["StreamArn"] = DynamoDBProvider.get_stream_for_region( + stream["StreamArn"] = change_region_in_ddb_stream_arn( stream["StreamArn"], context.region ) diff --git a/localstack-core/localstack/services/dynamodbstreams/v2/provider.py b/localstack-core/localstack/services/dynamodbstreams/v2/provider.py index dc4ed85d415be..33f7a256117fe 100644 --- a/localstack-core/localstack/services/dynamodbstreams/v2/provider.py +++ b/localstack-core/localstack/services/dynamodbstreams/v2/provider.py @@ -73,9 +73,9 @@ def describe_stream( @handler("GetRecords", expand=False) def get_records(self, context: RequestContext, payload: GetRecordsInput) -> GetRecordsOutput: - # Limitation note: With this current implementation, we are not able to get the records from a stream of a - # replicated table. To do so, we would need to kept track of the emitted ShardIterators and the originating - # region in `GetShardIterator`. + # Limitation note: with this current implementation, we are not able to get the records from a stream of a + # replicated table. To do so, we would need to kept track of the originating region when we emit a ShardIterator + # (see `GetShardIterator`) in order to forward the request to the region actually holding the stream data. request = payload.copy() request["ShardIterator"] = self.modify_stream_arn_for_ddb_local( diff --git a/tests/aws/services/dynamodb/test_dynamodb.snapshot.json b/tests/aws/services/dynamodb/test_dynamodb.snapshot.json index 718e4614d7ca3..722e523382122 100644 --- a/tests/aws/services/dynamodb/test_dynamodb.snapshot.json +++ b/tests/aws/services/dynamodb/test_dynamodb.snapshot.json @@ -1730,7 +1730,7 @@ } }, "tests/aws/services/dynamodb/test_dynamodb.py::TestDynamoDB::test_streams_on_global_tables": { - "recorded-date": "19-05-2025, 16:01:40", + "recorded-date": "21-05-2025, 17:02:48", "recorded-content": { "region-streams": { "Streams": [ diff --git a/tests/aws/services/dynamodb/test_dynamodb.validation.json b/tests/aws/services/dynamodb/test_dynamodb.validation.json index 0c9e085c476a4..4552dcff70100 100644 --- a/tests/aws/services/dynamodb/test_dynamodb.validation.json +++ b/tests/aws/services/dynamodb/test_dynamodb.validation.json @@ -75,7 +75,7 @@ "last_validated_date": "2024-01-03T17:52:19+00:00" }, "tests/aws/services/dynamodb/test_dynamodb.py::TestDynamoDB::test_streams_on_global_tables": { - "last_validated_date": "2025-05-19T16:01:38+00:00" + "last_validated_date": "2025-05-21T17:02:46+00:00" }, "tests/aws/services/dynamodb/test_dynamodb.py::TestDynamoDB::test_transact_get_items": { "last_validated_date": "2023-08-23T14:33:37+00:00" From e9042f5d4ce1d75a50e76479b165d5dee527fc3a Mon Sep 17 00:00:00 2001 From: Giovanni Grano Date: Thu, 22 May 2025 08:02:37 +0200 Subject: [PATCH 08/17] implement shard dict --- .../services/dynamodbstreams/provider.py | 16 +++++++--- .../services/dynamodbstreams/v2/provider.py | 31 ++++++++++++++----- 2 files changed, 36 insertions(+), 11 deletions(-) diff --git a/localstack-core/localstack/services/dynamodbstreams/provider.py b/localstack-core/localstack/services/dynamodbstreams/provider.py index 37d862db5ef6d..ea6165e5fb478 100644 --- a/localstack-core/localstack/services/dynamodbstreams/provider.py +++ b/localstack-core/localstack/services/dynamodbstreams/provider.py @@ -49,6 +49,11 @@ class DynamoDBStreamsProvider(DynamodbstreamsApi, ServiceLifecycleHook): + shard_to_region: dict[str, str] + + def __init__(self): + self.shard_to_region = {} + def describe_stream( self, context: RequestContext, @@ -105,11 +110,11 @@ def describe_stream( @handler("GetRecords", expand=False) def get_records(self, context: RequestContext, payload: GetRecordsInput) -> GetRecordsOutput: - # Limitation note: with this current implementation, we are not able to get the records from a stream of a - # replicated table. To do so, we would need to kept track of the originating region when we emit a ShardIterator - # (see `GetShardIterator`) in order to forward the request to the region actually holding the stream data. + region_name = context.region + if payload["ShardIterator"] in self.shard_to_region: + region_name = self.shard_to_region.pop(payload["ShardIterator"]) - kinesis = get_kinesis_client(account_id=context.account_id, region_name=context.region) + kinesis = get_kinesis_client(account_id=context.account_id, region_name=region_name) prefix, _, payload["ShardIterator"] = payload["ShardIterator"].rpartition("|") try: kinesis_records = kinesis.get_records(**payload) @@ -150,6 +155,9 @@ def get_shard_iterator( del result["ResponseMetadata"] # TODO not quite clear what the |1| exactly denotes, because at AWS it's sometimes other numbers result["ShardIterator"] = f"{stream_arn}|1|{result['ShardIterator']}" + + if og_region != context.region: + self.shard_to_region[result["ShardIterator"]] = og_region return GetShardIteratorOutput(**result) def list_streams( diff --git a/localstack-core/localstack/services/dynamodbstreams/v2/provider.py b/localstack-core/localstack/services/dynamodbstreams/v2/provider.py index 33f7a256117fe..7e11f6da321c6 100644 --- a/localstack-core/localstack/services/dynamodbstreams/v2/provider.py +++ b/localstack-core/localstack/services/dynamodbstreams/v2/provider.py @@ -14,7 +14,7 @@ ListStreamsOutput, ) from localstack.services.dynamodb.server import DynamodbServer -from localstack.services.dynamodb.utils import modify_ddblocal_arns +from localstack.services.dynamodb.utils import change_region_in_ddb_stream_arn, modify_ddblocal_arns from localstack.services.dynamodb.v2.provider import DynamoDBProvider, modify_context_region from localstack.services.dynamodbstreams.dynamodbstreams_api import get_original_region from localstack.services.plugins import ServiceLifecycleHook @@ -24,8 +24,11 @@ class DynamoDBStreamsProvider(DynamodbstreamsApi, ServiceLifecycleHook): + shard_to_region: dict[str, str] + def __init__(self): self.server = DynamodbServer.get() + self.shard_to_region = {} def on_after_init(self): # add response processor specific to ddblocal @@ -73,23 +76,37 @@ def describe_stream( @handler("GetRecords", expand=False) def get_records(self, context: RequestContext, payload: GetRecordsInput) -> GetRecordsOutput: - # Limitation note: with this current implementation, we are not able to get the records from a stream of a - # replicated table. To do so, we would need to kept track of the originating region when we emit a ShardIterator - # (see `GetShardIterator`) in order to forward the request to the region actually holding the stream data. - request = payload.copy() request["ShardIterator"] = self.modify_stream_arn_for_ddb_local( request.get("ShardIterator", "") ) + if payload["ShardIterator"] in self.shard_to_region: + original_region = self.shard_to_region.pop(payload["ShardIterator"]) + LOG.debug("Forwarding GetRecord request to region %s", original_region) + return self._forward_request( + context=context, + region=self.shard_to_region.pop(payload["ShardIterator"]), + service_request=request, + ) + return self.forward_request(context, request) @handler("GetShardIterator", expand=False) def get_shard_iterator( self, context: RequestContext, payload: GetShardIteratorInput ) -> GetShardIteratorOutput: + global_table_region = get_original_region(context=context, stream_arn=payload["StreamArn"]) + stream_arn = payload.get("StreamArn") + if global_table_region != context.region and stream_arn: + stream_arn = change_region_in_ddb_stream_arn(stream_arn, global_table_region) request = payload.copy() - request["StreamArn"] = self.modify_stream_arn_for_ddb_local(request.get("StreamArn", "")) - return self.forward_request(context, request) + request["StreamArn"] = self.modify_stream_arn_for_ddb_local(stream_arn) + response = self._forward_request( + context=context, service_request=request, region=global_table_region + ) + if global_table_region != context and (shard_iterator := response.get("ShardIterator")): + self.shard_to_region[shard_iterator] = global_table_region + return response @handler("ListStreams", expand=False) def list_streams(self, context: RequestContext, payload: ListStreamsInput) -> ListStreamsOutput: From 859396b1af8c18d872989d1f1d4956a9c0b71810 Mon Sep 17 00:00:00 2001 From: Giovanni Grano Date: Thu, 22 May 2025 09:00:38 +0200 Subject: [PATCH 09/17] implement shard dict --- .../services/dynamodbstreams/provider.py | 9 +++++++-- .../services/dynamodbstreams/v2/provider.py | 15 ++++++++++----- .../localstack/testing/pytest/fixtures.py | 7 +++---- tests/aws/services/dynamodb/test_dynamodb.py | 9 +++++---- 4 files changed, 25 insertions(+), 15 deletions(-) diff --git a/localstack-core/localstack/services/dynamodbstreams/provider.py b/localstack-core/localstack/services/dynamodbstreams/provider.py index ea6165e5fb478..ff47b603bdfac 100644 --- a/localstack-core/localstack/services/dynamodbstreams/provider.py +++ b/localstack-core/localstack/services/dynamodbstreams/provider.py @@ -110,15 +110,17 @@ def describe_stream( @handler("GetRecords", expand=False) def get_records(self, context: RequestContext, payload: GetRecordsInput) -> GetRecordsOutput: + _shard_iterator = payload["ShardIterator"] region_name = context.region if payload["ShardIterator"] in self.shard_to_region: - region_name = self.shard_to_region.pop(payload["ShardIterator"]) + region_name = self.shard_to_region[_shard_iterator] kinesis = get_kinesis_client(account_id=context.account_id, region_name=region_name) - prefix, _, payload["ShardIterator"] = payload["ShardIterator"].rpartition("|") + prefix, _, payload["ShardIterator"] = _shard_iterator.rpartition("|") try: kinesis_records = kinesis.get_records(**payload) except kinesis.exceptions.ExpiredIteratorException: + self.shard_to_region.pop(_shard_iterator, None) LOG.debug("Shard iterator for underlying kinesis stream expired") raise ExpiredIteratorException("Shard iterator has expired") result = { @@ -129,6 +131,9 @@ def get_records(self, context: RequestContext, payload: GetRecordsInput) -> GetR record_data = loads(record["Data"]) record_data["dynamodb"]["SequenceNumber"] = record["SequenceNumber"] result["Records"].append(record_data) + + if region_name != context.region and "NextShardIterator" in result: + self.shard_to_region[result["NextShardIterator"]] = region_name return GetRecordsOutput(**result) def get_shard_iterator( diff --git a/localstack-core/localstack/services/dynamodbstreams/v2/provider.py b/localstack-core/localstack/services/dynamodbstreams/v2/provider.py index 7e11f6da321c6..17eb48629eb23 100644 --- a/localstack-core/localstack/services/dynamodbstreams/v2/provider.py +++ b/localstack-core/localstack/services/dynamodbstreams/v2/provider.py @@ -76,20 +76,25 @@ def describe_stream( @handler("GetRecords", expand=False) def get_records(self, context: RequestContext, payload: GetRecordsInput) -> GetRecordsOutput: + region = context.region + _shard_iterator = payload["ShardIterator"] request = payload.copy() request["ShardIterator"] = self.modify_stream_arn_for_ddb_local( request.get("ShardIterator", "") ) - if payload["ShardIterator"] in self.shard_to_region: - original_region = self.shard_to_region.pop(payload["ShardIterator"]) - LOG.debug("Forwarding GetRecord request to region %s", original_region) + if _shard_iterator in self.shard_to_region: + region = self.shard_to_region.pop(_shard_iterator) + LOG.debug("Forwarding GetRecord request to region %s", region) return self._forward_request( context=context, - region=self.shard_to_region.pop(payload["ShardIterator"]), + region=region, service_request=request, ) - return self.forward_request(context, request) + response = self.forward_request(context, request) + if region != context.region and "NextShardIterator" in response: + self.shard_to_region[response["NextShardIterator"]] = region + return response @handler("GetShardIterator", expand=False) def get_shard_iterator( diff --git a/localstack-core/localstack/testing/pytest/fixtures.py b/localstack-core/localstack/testing/pytest/fixtures.py index b89d5aedf2a87..5c282ea8fcbc5 100644 --- a/localstack-core/localstack/testing/pytest/fixtures.py +++ b/localstack-core/localstack/testing/pytest/fixtures.py @@ -792,11 +792,10 @@ def is_stream_ready(): @pytest.fixture def wait_for_dynamodb_stream_ready(aws_client): - def _wait_for_stream_ready(stream_arn: str): + def _wait_for_stream_ready(stream_arn: str, client=None): def is_stream_ready(): - describe_stream_response = aws_client.dynamodbstreams.describe_stream( - StreamArn=stream_arn - ) + ddb_client = client or aws_client.dynamodbstreams + describe_stream_response = ddb_client.describe_stream(StreamArn=stream_arn) return describe_stream_response["StreamDescription"]["StreamStatus"] == "ENABLED" return poll_condition(is_stream_ready) diff --git a/tests/aws/services/dynamodb/test_dynamodb.py b/tests/aws/services/dynamodb/test_dynamodb.py index de8b219f533da..2f858c3153c5e 100644 --- a/tests/aws/services/dynamodb/test_dynamodb.py +++ b/tests/aws/services/dynamodb/test_dynamodb.py @@ -1165,7 +1165,7 @@ def test_streams_on_global_tables( # Create table in the original region table_name = f"table-{short_uid()}" snapshot.add_transformer(snapshot.transform.regex(table_name, "")) - table = region_1_factory.dynamodb.create_table( + region_1_factory.dynamodb.create_table( TableName=table_name, KeySchema=[ {"AttributeName": "Artist", "KeyType": "HASH"}, @@ -1201,17 +1201,18 @@ def test_streams_on_global_tables( ), ) - stream_arn = table["TableDescription"]["LatestStreamArn"] - wait_for_dynamodb_stream_ready(stream_arn=stream_arn) - stream_arn_region = region_1_factory.dynamodb.describe_table(TableName=table_name)["Table"][ "LatestStreamArn" ] assert region_name in stream_arn_region + wait_for_dynamodb_stream_ready(stream_arn_region) stream_arn_secondary_region = region_2_factory.dynamodb.describe_table( TableName=table_name )["Table"]["LatestStreamArn"] assert secondary_region_name in stream_arn_secondary_region + wait_for_dynamodb_stream_ready( + stream_arn_secondary_region, region_2_factory.dynamodbstreams + ) # Verify that we can list streams on both regions streams_region_1 = region_1_factory.dynamodbstreams.list_streams(TableName=table_name) From 1f1a7fe0d92186b101e4a3fd93cc85c5d1896e59 Mon Sep 17 00:00:00 2001 From: Giovanni Grano Date: Thu, 22 May 2025 09:07:52 +0200 Subject: [PATCH 10/17] wip test - to snapshot --- tests/aws/services/dynamodb/test_dynamodb.py | 54 ++++++++++++++++++++ 1 file changed, 54 insertions(+) diff --git a/tests/aws/services/dynamodb/test_dynamodb.py b/tests/aws/services/dynamodb/test_dynamodb.py index 2f858c3153c5e..efbf26a6f8e14 100644 --- a/tests/aws/services/dynamodb/test_dynamodb.py +++ b/tests/aws/services/dynamodb/test_dynamodb.py @@ -1222,6 +1222,60 @@ def test_streams_on_global_tables( snapshot.match("secondary-region-streams", streams_region_2) assert secondary_region_name in streams_region_2["Streams"][0]["StreamArn"] + # TODO: run the part below against AWS + + region_1_factory.dynamodb.put_item( + TableName=table_name, + Item={"Artist": {"S": "The Queen"}, "SongTitle": {"S": "Bohemian Rhapsody"}}, + ) + region_1_factory.dynamodb.put_item( + TableName=table_name, + Item={"Artist": {"S": "The Oasis"}, "SongTitle": {"S": "Live Forever"}}, + ) + + def _get_records_amount(record_amount, client) -> None: + nonlocal shard_iterator + if len(records) < record_amount: + _resp = client.get_records(ShardIterator=shard_iterator) + records.extend(_resp["Records"]) + if next_shard_iterator := _resp.get("NextShardIterator"): + shard_iterator = next_shard_iterator + assert len(records) >= record_amount + + # Read from stream on region 1 + describe_stream_result = region_1_factory.dynamodbstreams.describe_stream( + StreamArn=stream_arn_region + ) + shard_id = describe_stream_result["StreamDescription"]["Shards"][0]["ShardId"] + shard_iterator = region_1_factory.dynamodbstreams.get_shard_iterator( + StreamArn=stream_arn_region, ShardId=shard_id, ShardIteratorType="TRIM_HORIZON" + )["ShardIterator"] + + records = [] + retry( + lambda: _get_records_amount(2, region_1_factory.dynamodbstreams), + sleep=WAIT_SEC, + retries=50, + ) + + # Read from stream on region 2 + describe_stream_result = region_2_factory.dynamodbstreams.describe_stream( + StreamArn=stream_arn_secondary_region + ) + shard_id = describe_stream_result["StreamDescription"]["Shards"][0]["ShardId"] + shard_iterator = region_2_factory.dynamodbstreams.get_shard_iterator( + StreamArn=stream_arn_secondary_region, + ShardId=shard_id, + ShardIteratorType="TRIM_HORIZON", + )["ShardIterator"] + + records = [] + retry( + lambda: _get_records_amount(2, region_2_factory.dynamodbstreams), + sleep=WAIT_SEC, + retries=50, + ) + @markers.aws.only_localstack def test_global_tables(self, aws_client, ddb_test_table): dynamodb = aws_client.dynamodb From 176acd9e88f12616048c90c2106e469a34555ba5 Mon Sep 17 00:00:00 2001 From: Giovanni Grano Date: Thu, 22 May 2025 11:59:15 +0200 Subject: [PATCH 11/17] working test for reading shards --- tests/aws/services/dynamodb/test_dynamodb.py | 101 +++++++++++------- .../dynamodb/test_dynamodb.snapshot.json | 2 +- .../dynamodb/test_dynamodb.validation.json | 2 +- 3 files changed, 63 insertions(+), 42 deletions(-) diff --git a/tests/aws/services/dynamodb/test_dynamodb.py b/tests/aws/services/dynamodb/test_dynamodb.py index efbf26a6f8e14..64f8f8684b2ca 100644 --- a/tests/aws/services/dynamodb/test_dynamodb.py +++ b/tests/aws/services/dynamodb/test_dynamodb.py @@ -1224,56 +1224,77 @@ def test_streams_on_global_tables( # TODO: run the part below against AWS - region_1_factory.dynamodb.put_item( - TableName=table_name, - Item={"Artist": {"S": "The Queen"}, "SongTitle": {"S": "Bohemian Rhapsody"}}, - ) - region_1_factory.dynamodb.put_item( - TableName=table_name, - Item={"Artist": {"S": "The Oasis"}, "SongTitle": {"S": "Live Forever"}}, + region_1_factory.dynamodb.batch_write_item( + RequestItems={ + table_name: [ + { + "PutRequest": { + "Item": { + "Artist": {"S": "The Queen"}, + "SongTitle": {"S": "Bohemian Rhapsody"}, + } + } + }, + { + "PutRequest": { + "Item": {"Artist": {"S": "Oasis"}, "SongTitle": {"S": "Live Forever"}} + } + }, + ] + } ) - def _get_records_amount(record_amount, client) -> None: - nonlocal shard_iterator - if len(records) < record_amount: - _resp = client.get_records(ShardIterator=shard_iterator) - records.extend(_resp["Records"]) - if next_shard_iterator := _resp.get("NextShardIterator"): - shard_iterator = next_shard_iterator - assert len(records) >= record_amount - - # Read from stream on region 1 - describe_stream_result = region_1_factory.dynamodbstreams.describe_stream( - StreamArn=stream_arn_region - ) - shard_id = describe_stream_result["StreamDescription"]["Shards"][0]["ShardId"] - shard_iterator = region_1_factory.dynamodbstreams.get_shard_iterator( - StreamArn=stream_arn_region, ShardId=shard_id, ShardIteratorType="TRIM_HORIZON" - )["ShardIterator"] + def _read_records_from_shards(_stream_arn, _expected_record_count, _client) -> int: + describe_stream_result = _client.describe_stream(StreamArn=_stream_arn) + shard_id_to_iterator: dict[str, str] = {} + fetched_records = [] + # Records can be spread over multiple shards. We need to read all over them + for stream_info in describe_stream_result["StreamDescription"]["Shards"]: + _shard_id = stream_info["ShardId"] + shard_iterator = _client.get_shard_iterator( + StreamArn=_stream_arn, ShardId=_shard_id, ShardIteratorType="TRIM_HORIZON" + )["ShardIterator"] + shard_id_to_iterator[_shard_id] = shard_iterator + + while len(fetched_records) < _expected_record_count and shard_id_to_iterator: + for _shard_id, _shard_iterator in list(shard_id_to_iterator.items()): + _resp = _client.get_records(ShardIterator=_shard_iterator) + fetched_records.extend(_resp["Records"]) + if next_shard_iterator := _resp.get("NextShardIterator"): + shard_id_to_iterator[_shard_id] = next_shard_iterator + continue + shard_id_to_iterator.pop(_shard_id, None) + return fetched_records records = [] + + def _get_records_from_all_shards(_stream_arn, _expected_count, _client): + nonlocal records + records = _read_records_from_shards( + _stream_arn, + _expected_count, + _client, + ) + assert len(records) == _expected_count, ( + f"Expected {_expected_count} records, got {len(records)}" + ) + retry( - lambda: _get_records_amount(2, region_1_factory.dynamodbstreams), + _get_records_from_all_shards, sleep=WAIT_SEC, - retries=50, - ) - - # Read from stream on region 2 - describe_stream_result = region_2_factory.dynamodbstreams.describe_stream( - StreamArn=stream_arn_secondary_region + retries=20, + _stream_arn=stream_arn_region, + _expected_count=2, + _client=region_1_factory.dynamodbstreams, ) - shard_id = describe_stream_result["StreamDescription"]["Shards"][0]["ShardId"] - shard_iterator = region_2_factory.dynamodbstreams.get_shard_iterator( - StreamArn=stream_arn_secondary_region, - ShardId=shard_id, - ShardIteratorType="TRIM_HORIZON", - )["ShardIterator"] - records = [] retry( - lambda: _get_records_amount(2, region_2_factory.dynamodbstreams), + _get_records_from_all_shards, sleep=WAIT_SEC, - retries=50, + retries=20, + _stream_arn=stream_arn_secondary_region, + _expected_count=2, + _client=region_2_factory.dynamodbstreams, ) @markers.aws.only_localstack diff --git a/tests/aws/services/dynamodb/test_dynamodb.snapshot.json b/tests/aws/services/dynamodb/test_dynamodb.snapshot.json index 722e523382122..24b9298976298 100644 --- a/tests/aws/services/dynamodb/test_dynamodb.snapshot.json +++ b/tests/aws/services/dynamodb/test_dynamodb.snapshot.json @@ -1730,7 +1730,7 @@ } }, "tests/aws/services/dynamodb/test_dynamodb.py::TestDynamoDB::test_streams_on_global_tables": { - "recorded-date": "21-05-2025, 17:02:48", + "recorded-date": "22-05-2025, 09:57:00", "recorded-content": { "region-streams": { "Streams": [ diff --git a/tests/aws/services/dynamodb/test_dynamodb.validation.json b/tests/aws/services/dynamodb/test_dynamodb.validation.json index 4552dcff70100..d1c6592ba9713 100644 --- a/tests/aws/services/dynamodb/test_dynamodb.validation.json +++ b/tests/aws/services/dynamodb/test_dynamodb.validation.json @@ -75,7 +75,7 @@ "last_validated_date": "2024-01-03T17:52:19+00:00" }, "tests/aws/services/dynamodb/test_dynamodb.py::TestDynamoDB::test_streams_on_global_tables": { - "last_validated_date": "2025-05-21T17:02:46+00:00" + "last_validated_date": "2025-05-22T09:56:58+00:00" }, "tests/aws/services/dynamodb/test_dynamodb.py::TestDynamoDB::test_transact_get_items": { "last_validated_date": "2023-08-23T14:33:37+00:00" From 046376dc922f88308ed1799dca2c537be97aff83 Mon Sep 17 00:00:00 2001 From: Giovanni Grano Date: Thu, 22 May 2025 14:52:41 +0200 Subject: [PATCH 12/17] Small fix for v2 --- .../services/dynamodbstreams/provider.py | 6 +++ .../services/dynamodbstreams/v2/provider.py | 40 +++++++++---------- tests/aws/services/dynamodb/test_dynamodb.py | 11 ++--- .../dynamodb/test_dynamodb.snapshot.json | 2 +- .../dynamodb/test_dynamodb.validation.json | 2 +- 5 files changed, 31 insertions(+), 30 deletions(-) diff --git a/localstack-core/localstack/services/dynamodbstreams/provider.py b/localstack-core/localstack/services/dynamodbstreams/provider.py index ff47b603bdfac..689db050fcb33 100644 --- a/localstack-core/localstack/services/dynamodbstreams/provider.py +++ b/localstack-core/localstack/services/dynamodbstreams/provider.py @@ -50,6 +50,8 @@ class DynamoDBStreamsProvider(DynamodbstreamsApi, ServiceLifecycleHook): shard_to_region: dict[str, str] + """Map a shard iterator to the originating region. This is used in case of replica tables, as LocalStack keeps the + data in one region only, redirecting all the requests to replica regions.""" def __init__(self): self.shard_to_region = {} @@ -132,6 +134,8 @@ def get_records(self, context: RequestContext, payload: GetRecordsInput) -> GetR record_data["dynamodb"]["SequenceNumber"] = record["SequenceNumber"] result["Records"].append(record_data) + # Similar as the logic in GetShardIterator, we need to track the originating region when we get the + # NextShardIterator in the results. if region_name != context.region and "NextShardIterator" in result: self.shard_to_region[result["NextShardIterator"]] = region_name return GetRecordsOutput(**result) @@ -161,6 +165,8 @@ def get_shard_iterator( # TODO not quite clear what the |1| exactly denotes, because at AWS it's sometimes other numbers result["ShardIterator"] = f"{stream_arn}|1|{result['ShardIterator']}" + # In case of a replica table, we need to keep track of the real region originating the shard iterator. + # This region will be later used in GetRecords to redirect to the originating region, holding the data. if og_region != context.region: self.shard_to_region[result["ShardIterator"]] = og_region return GetShardIteratorOutput(**result) diff --git a/localstack-core/localstack/services/dynamodbstreams/v2/provider.py b/localstack-core/localstack/services/dynamodbstreams/v2/provider.py index 17eb48629eb23..7a0f3f04b24bb 100644 --- a/localstack-core/localstack/services/dynamodbstreams/v2/provider.py +++ b/localstack-core/localstack/services/dynamodbstreams/v2/provider.py @@ -14,7 +14,7 @@ ListStreamsOutput, ) from localstack.services.dynamodb.server import DynamodbServer -from localstack.services.dynamodb.utils import change_region_in_ddb_stream_arn, modify_ddblocal_arns +from localstack.services.dynamodb.utils import modify_ddblocal_arns from localstack.services.dynamodb.v2.provider import DynamoDBProvider, modify_context_region from localstack.services.dynamodbstreams.dynamodbstreams_api import get_original_region from localstack.services.plugins import ServiceLifecycleHook @@ -25,6 +25,8 @@ class DynamoDBStreamsProvider(DynamodbstreamsApi, ServiceLifecycleHook): shard_to_region: dict[str, str] + """Map a shard iterator to the originating region. This is used in case of replica tables, as LocalStack keeps the + data in one region only, redirecting all the requests to replica regions.""" def __init__(self): self.server = DynamodbServer.get() @@ -76,24 +78,20 @@ def describe_stream( @handler("GetRecords", expand=False) def get_records(self, context: RequestContext, payload: GetRecordsInput) -> GetRecordsOutput: - region = context.region - _shard_iterator = payload["ShardIterator"] request = payload.copy() request["ShardIterator"] = self.modify_stream_arn_for_ddb_local( request.get("ShardIterator", "") ) - if _shard_iterator in self.shard_to_region: - region = self.shard_to_region.pop(_shard_iterator) - LOG.debug("Forwarding GetRecord request to region %s", region) - return self._forward_request( - context=context, - region=region, - service_request=request, - ) - - response = self.forward_request(context, request) - if region != context.region and "NextShardIterator" in response: - self.shard_to_region[response["NextShardIterator"]] = region + region = self.shard_to_region.pop(request["ShardIterator"], None) + response = self._forward_request(context=context, region=region, service_request=request) + # Similar as the logic in GetShardIterator, we need to track the originating region when we get the + # NextShardIterator in the results. + if ( + region + and region != context.region + and (next_shard := response.get("NextShardIterator")) + ): + self.shard_to_region[next_shard] = region return response @handler("GetShardIterator", expand=False) @@ -101,15 +99,17 @@ def get_shard_iterator( self, context: RequestContext, payload: GetShardIteratorInput ) -> GetShardIteratorOutput: global_table_region = get_original_region(context=context, stream_arn=payload["StreamArn"]) - stream_arn = payload.get("StreamArn") - if global_table_region != context.region and stream_arn: - stream_arn = change_region_in_ddb_stream_arn(stream_arn, global_table_region) request = payload.copy() - request["StreamArn"] = self.modify_stream_arn_for_ddb_local(stream_arn) + request["StreamArn"] = self.modify_stream_arn_for_ddb_local(request.get("StreamArn", "")) response = self._forward_request( context=context, service_request=request, region=global_table_region ) - if global_table_region != context and (shard_iterator := response.get("ShardIterator")): + + # In case of a replica table, we need to keep track of the real region originating the shard iterator. + # This region will be later used in GetRecords to redirect to the originating region, holding the data. + if global_table_region != context.region and ( + shard_iterator := response.get("ShardIterator") + ): self.shard_to_region[shard_iterator] = global_table_region return response diff --git a/tests/aws/services/dynamodb/test_dynamodb.py b/tests/aws/services/dynamodb/test_dynamodb.py index 64f8f8684b2ca..c185927d92ba1 100644 --- a/tests/aws/services/dynamodb/test_dynamodb.py +++ b/tests/aws/services/dynamodb/test_dynamodb.py @@ -1222,8 +1222,6 @@ def test_streams_on_global_tables( snapshot.match("secondary-region-streams", streams_region_2) assert secondary_region_name in streams_region_2["Streams"][0]["StreamArn"] - # TODO: run the part below against AWS - region_1_factory.dynamodb.batch_write_item( RequestItems={ table_name: [ @@ -1266,10 +1264,7 @@ def _read_records_from_shards(_stream_arn, _expected_record_count, _client) -> i shard_id_to_iterator.pop(_shard_id, None) return fetched_records - records = [] - - def _get_records_from_all_shards(_stream_arn, _expected_count, _client): - nonlocal records + def _assert_records(_stream_arn, _expected_count, _client) -> None: records = _read_records_from_shards( _stream_arn, _expected_count, @@ -1280,7 +1275,7 @@ def _get_records_from_all_shards(_stream_arn, _expected_count, _client): ) retry( - _get_records_from_all_shards, + _assert_records, sleep=WAIT_SEC, retries=20, _stream_arn=stream_arn_region, @@ -1289,7 +1284,7 @@ def _get_records_from_all_shards(_stream_arn, _expected_count, _client): ) retry( - _get_records_from_all_shards, + _assert_records, sleep=WAIT_SEC, retries=20, _stream_arn=stream_arn_secondary_region, diff --git a/tests/aws/services/dynamodb/test_dynamodb.snapshot.json b/tests/aws/services/dynamodb/test_dynamodb.snapshot.json index 24b9298976298..4842ef3f2406b 100644 --- a/tests/aws/services/dynamodb/test_dynamodb.snapshot.json +++ b/tests/aws/services/dynamodb/test_dynamodb.snapshot.json @@ -1730,7 +1730,7 @@ } }, "tests/aws/services/dynamodb/test_dynamodb.py::TestDynamoDB::test_streams_on_global_tables": { - "recorded-date": "22-05-2025, 09:57:00", + "recorded-date": "22-05-2025, 12:44:58", "recorded-content": { "region-streams": { "Streams": [ diff --git a/tests/aws/services/dynamodb/test_dynamodb.validation.json b/tests/aws/services/dynamodb/test_dynamodb.validation.json index d1c6592ba9713..6a2220f1f2937 100644 --- a/tests/aws/services/dynamodb/test_dynamodb.validation.json +++ b/tests/aws/services/dynamodb/test_dynamodb.validation.json @@ -75,7 +75,7 @@ "last_validated_date": "2024-01-03T17:52:19+00:00" }, "tests/aws/services/dynamodb/test_dynamodb.py::TestDynamoDB::test_streams_on_global_tables": { - "last_validated_date": "2025-05-22T09:56:58+00:00" + "last_validated_date": "2025-05-22T12:44:55+00:00" }, "tests/aws/services/dynamodb/test_dynamodb.py::TestDynamoDB::test_transact_get_items": { "last_validated_date": "2023-08-23T14:33:37+00:00" From d19ddb541652fd5a4a83f7aa3b42afa2bf339448 Mon Sep 17 00:00:00 2001 From: Giovanni Grano Date: Thu, 22 May 2025 17:22:53 +0200 Subject: [PATCH 13/17] some comments --- .../services/dynamodbstreams/provider.py | 2 +- .../services/dynamodbstreams/v2/provider.py | 2 +- tests/aws/services/dynamodb/test_dynamodb.py | 15 +++++---------- 3 files changed, 7 insertions(+), 12 deletions(-) diff --git a/localstack-core/localstack/services/dynamodbstreams/provider.py b/localstack-core/localstack/services/dynamodbstreams/provider.py index 689db050fcb33..6c9548bb81ebf 100644 --- a/localstack-core/localstack/services/dynamodbstreams/provider.py +++ b/localstack-core/localstack/services/dynamodbstreams/provider.py @@ -51,7 +51,7 @@ class DynamoDBStreamsProvider(DynamodbstreamsApi, ServiceLifecycleHook): shard_to_region: dict[str, str] """Map a shard iterator to the originating region. This is used in case of replica tables, as LocalStack keeps the - data in one region only, redirecting all the requests to replica regions.""" + data in one region only, redirecting all the requests from replica regions.""" def __init__(self): self.shard_to_region = {} diff --git a/localstack-core/localstack/services/dynamodbstreams/v2/provider.py b/localstack-core/localstack/services/dynamodbstreams/v2/provider.py index 7a0f3f04b24bb..7549eaa00b851 100644 --- a/localstack-core/localstack/services/dynamodbstreams/v2/provider.py +++ b/localstack-core/localstack/services/dynamodbstreams/v2/provider.py @@ -26,7 +26,7 @@ class DynamoDBStreamsProvider(DynamodbstreamsApi, ServiceLifecycleHook): shard_to_region: dict[str, str] """Map a shard iterator to the originating region. This is used in case of replica tables, as LocalStack keeps the - data in one region only, redirecting all the requests to replica regions.""" + data in one region only, redirecting all the requests from replica regions.""" def __init__(self): self.server = DynamodbServer.get() diff --git a/tests/aws/services/dynamodb/test_dynamodb.py b/tests/aws/services/dynamodb/test_dynamodb.py index c185927d92ba1..777b3d58df6ae 100644 --- a/tests/aws/services/dynamodb/test_dynamodb.py +++ b/tests/aws/services/dynamodb/test_dynamodb.py @@ -1138,8 +1138,11 @@ def test_global_tables_version_2019( assert "Replicas" not in response["Table"] @markers.aws.validated - # The stream label on the replica and replicated stream are the same (while they differ on AWS). - # The region changes accordingly in the ARN. We test this with assertions. + # An ARM stream has a stream label as suffix. In AWS, such a label differs between the stream of the original table + # and the ones of the replicas. In LocalStack, it does not differ. The only difference in the stream ARNs is the + # region. Therefore, we skip the following paths from the snapshots. + # However, we run plain assertions to make sure that the region changes in the ARNs, i.e., the replica have their + # own stream. @markers.snapshot.skip_snapshot_verify( paths=["$..Streams..StreamArn", "$..Streams..StreamLabel"] ) @@ -1193,14 +1196,6 @@ def test_streams_on_global_tables( waiter = region_2_factory.dynamodb.get_waiter("table_exists") waiter.wait(TableName=table_name, WaiterConfig={"Delay": WAIT_SEC, "MaxAttempts": 20}) - with pytest.raises(ClientError): - region_2_factory.dynamodb.update_table( - TableName=table_name, - StreamSpecification=StreamSpecification( - StreamEnabled=True, StreamViewType=StreamViewType.NEW_AND_OLD_IMAGES - ), - ) - stream_arn_region = region_1_factory.dynamodb.describe_table(TableName=table_name)["Table"][ "LatestStreamArn" ] From 37dcffb90a90bb4c1d9a5c14495645510c1648f1 Mon Sep 17 00:00:00 2001 From: Giovanni Grano Date: Mon, 26 May 2025 08:51:22 +0200 Subject: [PATCH 14/17] fix None value --- .../localstack/services/dynamodbstreams/v2/provider.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/localstack-core/localstack/services/dynamodbstreams/v2/provider.py b/localstack-core/localstack/services/dynamodbstreams/v2/provider.py index 7549eaa00b851..d95fdbe3ea49e 100644 --- a/localstack-core/localstack/services/dynamodbstreams/v2/provider.py +++ b/localstack-core/localstack/services/dynamodbstreams/v2/provider.py @@ -115,7 +115,7 @@ def get_shard_iterator( @handler("ListStreams", expand=False) def list_streams(self, context: RequestContext, payload: ListStreamsInput) -> ListStreamsOutput: - global_table_region = get_original_region(context=context, stream_arn=payload["TableName"]) + global_table_region = get_original_region(context=context, stream_arn=payload.get("TableName")) # TODO: look into `ExclusiveStartStreamArn` param return self._forward_request( context=context, service_request=payload, region=global_table_region From 85f2231383f8f4ec3b46f04408a2998300253f23 Mon Sep 17 00:00:00 2001 From: Giovanni Grano Date: Tue, 27 May 2025 08:04:21 +0200 Subject: [PATCH 15/17] lint --- .../localstack/services/dynamodbstreams/v2/provider.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/localstack-core/localstack/services/dynamodbstreams/v2/provider.py b/localstack-core/localstack/services/dynamodbstreams/v2/provider.py index d95fdbe3ea49e..3ea20374df6a3 100644 --- a/localstack-core/localstack/services/dynamodbstreams/v2/provider.py +++ b/localstack-core/localstack/services/dynamodbstreams/v2/provider.py @@ -115,7 +115,9 @@ def get_shard_iterator( @handler("ListStreams", expand=False) def list_streams(self, context: RequestContext, payload: ListStreamsInput) -> ListStreamsOutput: - global_table_region = get_original_region(context=context, stream_arn=payload.get("TableName")) + global_table_region = get_original_region( + context=context, stream_arn=payload.get("TableName") + ) # TODO: look into `ExclusiveStartStreamArn` param return self._forward_request( context=context, service_request=payload, region=global_table_region From 08631415475822a39fdb1180092c0059409fa131 Mon Sep 17 00:00:00 2001 From: Giovanni Grano Date: Tue, 27 May 2025 08:04:34 +0200 Subject: [PATCH 16/17] Update tests/aws/services/dynamodb/test_dynamodb.py Co-authored-by: Viren Nadkarni --- tests/aws/services/dynamodb/test_dynamodb.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/aws/services/dynamodb/test_dynamodb.py b/tests/aws/services/dynamodb/test_dynamodb.py index 777b3d58df6ae..2c0ab3e50b42f 100644 --- a/tests/aws/services/dynamodb/test_dynamodb.py +++ b/tests/aws/services/dynamodb/test_dynamodb.py @@ -1138,7 +1138,7 @@ def test_global_tables_version_2019( assert "Replicas" not in response["Table"] @markers.aws.validated - # An ARM stream has a stream label as suffix. In AWS, such a label differs between the stream of the original table + # An ARN stream has a stream label as suffix. In AWS, such a label differs between the stream of the original table # and the ones of the replicas. In LocalStack, it does not differ. The only difference in the stream ARNs is the # region. Therefore, we skip the following paths from the snapshots. # However, we run plain assertions to make sure that the region changes in the ARNs, i.e., the replica have their From b1c08185fb0d97c4fefdb11e6a3d30861ce19917 Mon Sep 17 00:00:00 2001 From: Giovanni Grano Date: Tue, 27 May 2025 08:12:17 +0200 Subject: [PATCH 17/17] PR comments --- localstack-core/localstack/services/dynamodb/utils.py | 7 ++++++- .../localstack/services/dynamodbstreams/v2/provider.py | 6 ++++++ 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/localstack-core/localstack/services/dynamodb/utils.py b/localstack-core/localstack/services/dynamodb/utils.py index 7208d880d2038..4ff065440abec 100644 --- a/localstack-core/localstack/services/dynamodb/utils.py +++ b/localstack-core/localstack/services/dynamodb/utils.py @@ -20,6 +20,9 @@ TableName, Update, ) +from localstack.aws.api.dynamodbstreams import ( + ResourceNotFoundException as DynamoDBStreamsResourceNotFoundException, +) from localstack.aws.connect import connect_to from localstack.constants import INTERNAL_AWS_SECRET_ACCESS_KEY from localstack.http import Response @@ -372,7 +375,9 @@ def change_region_in_ddb_stream_arn(arn: str, region: str) -> str: # arn:aws:dynamodb:::table//stream/ resource_splits = arn_data["resource"].split("/") if len(resource_splits) != 4: - raise Exception(f"The format of the '{arn}' ARN is not valid") + raise DynamoDBStreamsResourceNotFoundException( + f"The format of the '{arn}' ARN is not valid" + ) return dynamodb_stream_arn( table_name=resource_splits[1], diff --git a/localstack-core/localstack/services/dynamodbstreams/v2/provider.py b/localstack-core/localstack/services/dynamodbstreams/v2/provider.py index 3ea20374df6a3..a91fbc592a992 100644 --- a/localstack-core/localstack/services/dynamodbstreams/v2/provider.py +++ b/localstack-core/localstack/services/dynamodbstreams/v2/provider.py @@ -42,6 +42,12 @@ def on_before_start(self): def _forward_request( self, context: RequestContext, region: str | None, service_request: ServiceRequest ) -> ServiceResponse: + """ + Modify the context region and then forward request to DynamoDB Local. + + This is used for operations impacted by global tables. In LocalStack, a single copy of global table + is kept, and any requests to replicated tables are forwarded to this original table. + """ if region: with modify_context_region(context, region): return self.forward_request(context, service_request=service_request)