Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit e491aac

Browse files
authored
fix(providers/amazon): S3DagBundle does not delete stale dag recursively (#63104)
* Refactor S3Hook's local file synchronization logic to mach GCSHook. Update tests to cover nested directories and ensure proper logging of deleted files and directories. * Update S3Hook logging level for deleted files and directories from info to debug to reduce log verbosity.
1 parent 30524f8 commit e491aac

2 files changed

Lines changed: 27 additions & 21 deletions

File tree

  • providers/amazon
    • src/airflow/providers/amazon/aws/hooks
    • tests/unit/amazon/aws/hooks

providers/amazon/src/airflow/providers/amazon/aws/hooks/s3.py

Lines changed: 13 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -1735,27 +1735,19 @@ def delete_bucket_tagging(self, bucket_name: str | None = None) -> None:
17351735
s3_client.delete_bucket_tagging(Bucket=bucket_name)
17361736

17371737
def _sync_to_local_dir_delete_stale_local_files(self, current_s3_objects: list[Path], local_dir: Path):
1738-
current_s3_keys = {key for key in current_s3_objects}
1739-
1740-
for item in local_dir.iterdir():
1741-
item: Path # type: ignore[no-redef]
1742-
absolute_item_path = item.resolve()
1743-
1744-
if absolute_item_path not in current_s3_keys:
1745-
try:
1746-
if item.is_file():
1747-
item.unlink(missing_ok=True)
1748-
self.log.debug("Deleted stale local file: %s", item)
1749-
elif item.is_dir():
1750-
# delete only when the folder is empty
1751-
if not os.listdir(item):
1752-
item.rmdir()
1753-
self.log.debug("Deleted stale empty directory: %s", item)
1754-
else:
1755-
self.log.debug("Skipping stale item of unknown type: %s", item)
1756-
except OSError as e:
1757-
self.log.error("Error deleting stale item %s: %s", item, e)
1758-
raise e
1738+
current_s3_keys = {key.resolve() for key in current_s3_objects}
1739+
1740+
for item in local_dir.rglob("*"):
1741+
if item.is_file() and item.resolve() not in current_s3_keys:
1742+
self.log.debug("Deleted stale local file: %s", item)
1743+
item.unlink()
1744+
# Clean up empty directories
1745+
for root, dirs, _ in os.walk(local_dir, topdown=False):
1746+
for d in dirs:
1747+
dir_path = os.path.join(root, d)
1748+
if not os.listdir(dir_path):
1749+
self.log.debug("Deleted stale empty directory: %s", dir_path)
1750+
os.rmdir(dir_path)
17591751

17601752
def _sync_to_local_dir_if_changed(self, s3_bucket, s3_object, local_target_path: Path):
17611753
should_download = False

providers/amazon/tests/unit/amazon/aws/hooks/test_s3.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1870,14 +1870,28 @@ def get_logs_string(call_args_list):
18701870
local_file_that_should_be_deleted.write_text("test dag")
18711871
local_folder_should_be_deleted = Path(sync_local_dir).joinpath("local_folder_should_be_deleted")
18721872
local_folder_should_be_deleted.mkdir(exist_ok=True)
1873+
nested_stale_file = Path(sync_local_dir).joinpath("subproject1", "stale_nested.py")
1874+
nested_stale_file.write_text("stale nested file")
1875+
deep_nested_dir = Path(sync_local_dir).joinpath("subproject1", "deep")
1876+
deep_nested_dir.mkdir()
1877+
deep_stale_file = deep_nested_dir.joinpath("stale_deep.py")
1878+
deep_stale_file.write_text("stale deep file")
18731879
hook.log.debug = MagicMock()
18741880
hook.sync_to_local_dir(
18751881
bucket_name=s3_bucket, local_dir=sync_local_dir, s3_prefix="", delete_stale=True
18761882
)
18771883
logs_string = get_logs_string(hook.log.debug.call_args_list)
18781884
assert f"Deleted stale local file: {local_file_that_should_be_deleted.as_posix()}" in logs_string
1885+
assert f"Deleted stale local file: {nested_stale_file.as_posix()}" in logs_string
1886+
assert f"Deleted stale local file: {deep_stale_file.as_posix()}" in logs_string
18791887

18801888
assert f"Deleted stale empty directory: {local_folder_should_be_deleted.as_posix()}" in logs_string
1889+
assert f"Deleted stale empty directory: {deep_nested_dir.as_posix()}" in logs_string
1890+
assert not nested_stale_file.exists()
1891+
assert not deep_stale_file.exists()
1892+
assert not deep_nested_dir.exists()
1893+
assert Path(sync_local_dir).joinpath("dag_01.py").exists()
1894+
assert Path(sync_local_dir).joinpath("subproject1", "dag_a.py").exists()
18811895

18821896
s3_client.put_object(Bucket=s3_bucket, Key="dag_03.py", Body=b"test data-changed")
18831897
hook.log.debug = MagicMock()

0 commit comments

Comments
 (0)