From d95991dfd7c0e73f3ca7ba298093e8b9aaf30a75 Mon Sep 17 00:00:00 2001 From: Noam Cohen Date: Sat, 24 May 2025 14:48:25 +0300 Subject: [PATCH 01/10] add a test case to replicate the issue --- Lib/test/test_tarfile.py | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/Lib/test/test_tarfile.py b/Lib/test/test_tarfile.py index 2018a20afd1b18..8c2c865f5cab76 100644 --- a/Lib/test/test_tarfile.py +++ b/Lib/test/test_tarfile.py @@ -1679,6 +1679,33 @@ def test_missing_fileobj(self): with self.assertRaises(ValueError): tar.addfile(tarinfo) + @unittest.skipUnless(os_helper.can_symlink(), 'requires symlink support') + @unittest.mock.patch('os.chown') + @unittest.mock.patch('os.utime') + @unittest.mock.patch('os.chmod') + def test_deferred_directory_attributes_update(self, mock_chmod, mock_utime, mock_chown): + # Regression test for gh-127987: setting attributes on arbitrary files + tempdir = os.path.join(TEMPDIR, 'test127987') + def mock_chmod_side_effect(path, mode, **kwargs): + target_path = os.path.realpath(path) + if os.path.commonpath([target_path, tempdir]) != tempdir: + raise Exception("should not try to chmod anything outside the destination", target_path) + mock_chmod.side_effect = mock_chmod_side_effect + + outside_tree_dir = os.path.join(TEMPDIR, 'outside_tree_dir') + with ArchiveMaker() as arc: + arc.add('x', symlink_to='.') + arc.add('x', type=tarfile.DIRTYPE, mode='?rwsrwsrwt') + arc.add('x', symlink_to=('y/' * 99 + '../' * 99 + outside_tree_dir)) + arc.add('y/' * 99, symlink_to=('../' * 98)) + + os.makedirs(outside_tree_dir) + try: + arc.open().extractall(path=tempdir, filter='tar') + finally: + os_helper.rmtree(outside_tree_dir) + os_helper.rmtree(tempdir) + class GzipWriteTest(GzipTest, WriteTest): pass From 2463a85290d114ffb025227a1d3d5f44f37c95bf Mon Sep 17 00:00:00 2001 From: Noam Cohen Date: Sat, 24 May 2025 14:48:45 +0300 Subject: [PATCH 02/10] extract `_transform_destination_path()` --- Lib/tarfile.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/Lib/tarfile.py b/Lib/tarfile.py index 212b71f6509740..962d7767e02c0d 100644 --- a/Lib/tarfile.py +++ b/Lib/tarfile.py @@ -2516,16 +2516,18 @@ def extractfile(self, member): # blkdev, etc.), return None instead of a file object. return None + def _transform_destination_path(self, targetpath): + # Build the destination pathname, replacing + # forward slashes to platform specific separators. + targetpath = targetpath.rstrip("/") + return targetpath.replace("/", os.sep) + def _extract_member(self, tarinfo, targetpath, set_attrs=True, numeric_owner=False): """Extract the TarInfo object tarinfo to a physical file called targetpath. """ - # Fetch the TarInfo object for the given name - # and build the destination pathname, replacing - # forward slashes to platform specific separators. - targetpath = targetpath.rstrip("/") - targetpath = targetpath.replace("/", os.sep) + targetpath = self._transform_destination_path(targetpath) # Create all upper directories. upperdirs = os.path.dirname(targetpath) From d97e83e8d287895139c0ef3482322b2a13f8486b Mon Sep 17 00:00:00 2001 From: Noam Cohen Date: Sat, 24 May 2025 15:01:03 +0300 Subject: [PATCH 03/10] make sure the directory is not renamed during `extractall()` --- Lib/tarfile.py | 24 ++++++++++++++++++------ 1 file changed, 18 insertions(+), 6 deletions(-) diff --git a/Lib/tarfile.py b/Lib/tarfile.py index 962d7767e02c0d..36185865721108 100644 --- a/Lib/tarfile.py +++ b/Lib/tarfile.py @@ -2389,20 +2389,32 @@ def extractall(self, path=".", members=None, *, numeric_owner=False, tarinfo = self._get_extract_tarinfo(member, filter_function, path) if tarinfo is None: continue + self._extract_one(tarinfo, path, set_attrs=not tarinfo.isdir(), + numeric_owner=numeric_owner) if tarinfo.isdir(): # For directories, delay setting attributes until later, # since permissions can interfere with extraction and # extracting contents can reset mtime. - directories.append(tarinfo) - self._extract_one(tarinfo, path, set_attrs=not tarinfo.isdir(), - numeric_owner=numeric_owner) + # We also the keep the original inode and device, to detect + # if it was changed during extraction. + dirpath = os.path.join(path, tarinfo.name) + dirpath = self._transform_destination_path(dirpath) + targetstat = os.stat(dirpath) + directories.append((tarinfo, dirpath, targetstat.st_ino, + targetstat.st_dev)) # Reverse sort directories. - directories.sort(key=lambda a: a.name, reverse=True) + directories.sort(key=lambda a: a[0].name, reverse=True) # Set correct owner, mtime and filemode on directories. - for tarinfo in directories: - dirpath = os.path.join(path, tarinfo.name) + for tarinfo, dirpath, original_ino, original_dev in directories: + dirstat = os.stat(dirpath) + if (dirstat.st_ino != original_ino or + dirstat.st_dev != original_dev): + self._dbg(1, "tarfile: Directory renamed before its " \ + "attributes could be extracted %r" % dirpath) + continue + try: self.chown(tarinfo, dirpath, numeric_owner=numeric_owner) self.utime(tarinfo, dirpath) From a4a481cefc132ad3ca9c8838d74c1e92aaa155df Mon Sep 17 00:00:00 2001 From: Noam Cohen Date: Sat, 24 May 2025 16:38:29 +0300 Subject: [PATCH 04/10] do not follow symlinks --- Lib/tarfile.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Lib/tarfile.py b/Lib/tarfile.py index 36185865721108..4b5d1b94778f54 100644 --- a/Lib/tarfile.py +++ b/Lib/tarfile.py @@ -2399,7 +2399,7 @@ def extractall(self, path=".", members=None, *, numeric_owner=False, # if it was changed during extraction. dirpath = os.path.join(path, tarinfo.name) dirpath = self._transform_destination_path(dirpath) - targetstat = os.stat(dirpath) + targetstat = os.stat(dirpath, follow_symlinks=False) directories.append((tarinfo, dirpath, targetstat.st_ino, targetstat.st_dev)) @@ -2408,7 +2408,7 @@ def extractall(self, path=".", members=None, *, numeric_owner=False, # Set correct owner, mtime and filemode on directories. for tarinfo, dirpath, original_ino, original_dev in directories: - dirstat = os.stat(dirpath) + dirstat = os.stat(dirpath, follow_symlinks=False) if (dirstat.st_ino != original_ino or dirstat.st_dev != original_dev): self._dbg(1, "tarfile: Directory renamed before its " \ From 2140b19977c80b4129394a7d66676719b8485965 Mon Sep 17 00:00:00 2001 From: Noam Cohen Date: Sat, 24 May 2025 16:41:58 +0300 Subject: [PATCH 05/10] remove unneeded mocks --- Lib/test/test_tarfile.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/Lib/test/test_tarfile.py b/Lib/test/test_tarfile.py index 8c2c865f5cab76..ea126790e3f596 100644 --- a/Lib/test/test_tarfile.py +++ b/Lib/test/test_tarfile.py @@ -1680,10 +1680,8 @@ def test_missing_fileobj(self): tar.addfile(tarinfo) @unittest.skipUnless(os_helper.can_symlink(), 'requires symlink support') - @unittest.mock.patch('os.chown') - @unittest.mock.patch('os.utime') @unittest.mock.patch('os.chmod') - def test_deferred_directory_attributes_update(self, mock_chmod, mock_utime, mock_chown): + def test_deferred_directory_attributes_update(self, mock_chmod): # Regression test for gh-127987: setting attributes on arbitrary files tempdir = os.path.join(TEMPDIR, 'test127987') def mock_chmod_side_effect(path, mode, **kwargs): From 6b20f0df888015c5a9b2767b4c172273a7cf1e9e Mon Sep 17 00:00:00 2001 From: Noam Cohen Date: Sat, 24 May 2025 16:43:47 +0300 Subject: [PATCH 06/10] do not run on systems without `os.chmod()` --- Lib/test/test_tarfile.py | 1 + 1 file changed, 1 insertion(+) diff --git a/Lib/test/test_tarfile.py b/Lib/test/test_tarfile.py index ea126790e3f596..e4db0f27156a62 100644 --- a/Lib/test/test_tarfile.py +++ b/Lib/test/test_tarfile.py @@ -1680,6 +1680,7 @@ def test_missing_fileobj(self): tar.addfile(tarinfo) @unittest.skipUnless(os_helper.can_symlink(), 'requires symlink support') + @unittest.skipUnless(hasattr(os, 'chmod'), "missing os.chmod") @unittest.mock.patch('os.chmod') def test_deferred_directory_attributes_update(self, mock_chmod): # Regression test for gh-127987: setting attributes on arbitrary files From 491b40ffc811e6cfac5bd8d9347e5aaa81075009 Mon Sep 17 00:00:00 2001 From: Noam Cohen Date: Sat, 24 May 2025 17:25:58 +0300 Subject: [PATCH 07/10] simplify test --- Lib/test/test_tarfile.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Lib/test/test_tarfile.py b/Lib/test/test_tarfile.py index e4db0f27156a62..21f16b640450dc 100644 --- a/Lib/test/test_tarfile.py +++ b/Lib/test/test_tarfile.py @@ -1695,8 +1695,8 @@ def mock_chmod_side_effect(path, mode, **kwargs): with ArchiveMaker() as arc: arc.add('x', symlink_to='.') arc.add('x', type=tarfile.DIRTYPE, mode='?rwsrwsrwt') - arc.add('x', symlink_to=('y/' * 99 + '../' * 99 + outside_tree_dir)) - arc.add('y/' * 99, symlink_to=('../' * 98)) + arc.add('x', symlink_to=('y/' + '../' + outside_tree_dir)) + arc.add('y/', symlink_to=('../' * len(tempdir.split(os.path.sep)))) os.makedirs(outside_tree_dir) try: From 0e2d15796ac5586a47ae7d7665daf72d2cb98416 Mon Sep 17 00:00:00 2001 From: Noam Cohen Date: Sat, 24 May 2025 17:41:34 +0300 Subject: [PATCH 08/10] move test to `MiscTest` --- Lib/test/test_tarfile.py | 52 ++++++++++++++++++++-------------------- 1 file changed, 26 insertions(+), 26 deletions(-) diff --git a/Lib/test/test_tarfile.py b/Lib/test/test_tarfile.py index 21f16b640450dc..c4a5b67a101368 100644 --- a/Lib/test/test_tarfile.py +++ b/Lib/test/test_tarfile.py @@ -1679,32 +1679,6 @@ def test_missing_fileobj(self): with self.assertRaises(ValueError): tar.addfile(tarinfo) - @unittest.skipUnless(os_helper.can_symlink(), 'requires symlink support') - @unittest.skipUnless(hasattr(os, 'chmod'), "missing os.chmod") - @unittest.mock.patch('os.chmod') - def test_deferred_directory_attributes_update(self, mock_chmod): - # Regression test for gh-127987: setting attributes on arbitrary files - tempdir = os.path.join(TEMPDIR, 'test127987') - def mock_chmod_side_effect(path, mode, **kwargs): - target_path = os.path.realpath(path) - if os.path.commonpath([target_path, tempdir]) != tempdir: - raise Exception("should not try to chmod anything outside the destination", target_path) - mock_chmod.side_effect = mock_chmod_side_effect - - outside_tree_dir = os.path.join(TEMPDIR, 'outside_tree_dir') - with ArchiveMaker() as arc: - arc.add('x', symlink_to='.') - arc.add('x', type=tarfile.DIRTYPE, mode='?rwsrwsrwt') - arc.add('x', symlink_to=('y/' + '../' + outside_tree_dir)) - arc.add('y/', symlink_to=('../' * len(tempdir.split(os.path.sep)))) - - os.makedirs(outside_tree_dir) - try: - arc.open().extractall(path=tempdir, filter='tar') - finally: - os_helper.rmtree(outside_tree_dir) - os_helper.rmtree(tempdir) - class GzipWriteTest(GzipTest, WriteTest): pass @@ -2741,6 +2715,32 @@ def test_useful_error_message_when_modules_missing(self): str(excinfo.exception), ) + @unittest.skipUnless(os_helper.can_symlink(), 'requires symlink support') + @unittest.skipUnless(hasattr(os, 'chmod'), "missing os.chmod") + @unittest.mock.patch('os.chmod') + def test_deferred_directory_attributes_update(self, mock_chmod): + # Regression test for gh-127987: setting attributes on arbitrary files + tempdir = os.path.join(TEMPDIR, 'test127987') + def mock_chmod_side_effect(path, mode, **kwargs): + target_path = os.path.realpath(path) + if os.path.commonpath([target_path, tempdir]) != tempdir: + raise Exception("should not try to chmod anything outside the destination", target_path) + mock_chmod.side_effect = mock_chmod_side_effect + + outside_tree_dir = os.path.join(TEMPDIR, 'outside_tree_dir') + with ArchiveMaker() as arc: + arc.add('x', symlink_to='.') + arc.add('x', type=tarfile.DIRTYPE, mode='?rwsrwsrwt') + arc.add('x', symlink_to=('y/' + '../' + outside_tree_dir)) + arc.add('y/', symlink_to=('../' * len(tempdir.split(os.path.sep)))) + + os.makedirs(outside_tree_dir) + try: + arc.open().extractall(path=tempdir, filter='tar') + finally: + os_helper.rmtree(outside_tree_dir) + os_helper.rmtree(tempdir) + class CommandLineTest(unittest.TestCase): From 7bd7b850913ff854fe1f083f3dff3ea5f32bc8c9 Mon Sep 17 00:00:00 2001 From: Noam Cohen Date: Sat, 24 May 2025 19:22:09 +0300 Subject: [PATCH 09/10] remove part of poc that used to bypass data filter it doesn't have to do with this bug --- Lib/test/test_tarfile.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/Lib/test/test_tarfile.py b/Lib/test/test_tarfile.py index c4a5b67a101368..31a7fac3ad5123 100644 --- a/Lib/test/test_tarfile.py +++ b/Lib/test/test_tarfile.py @@ -2731,8 +2731,7 @@ def mock_chmod_side_effect(path, mode, **kwargs): with ArchiveMaker() as arc: arc.add('x', symlink_to='.') arc.add('x', type=tarfile.DIRTYPE, mode='?rwsrwsrwt') - arc.add('x', symlink_to=('y/' + '../' + outside_tree_dir)) - arc.add('y/', symlink_to=('../' * len(tempdir.split(os.path.sep)))) + arc.add('x', symlink_to=outside_tree_dir) os.makedirs(outside_tree_dir) try: From c7e4e0541544aa699716e4b85b826691d20ae40d Mon Sep 17 00:00:00 2001 From: Noam Cohen Date: Sat, 24 May 2025 20:02:40 +0300 Subject: [PATCH 10/10] inode re-use guard --- Lib/tarfile.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/Lib/tarfile.py b/Lib/tarfile.py index 4b5d1b94778f54..1056341b6839d3 100644 --- a/Lib/tarfile.py +++ b/Lib/tarfile.py @@ -2410,7 +2410,9 @@ def extractall(self, path=".", members=None, *, numeric_owner=False, for tarinfo, dirpath, original_ino, original_dev in directories: dirstat = os.stat(dirpath, follow_symlinks=False) if (dirstat.st_ino != original_ino or - dirstat.st_dev != original_dev): + dirstat.st_dev != original_dev or + not stat.S_ISDIR(dirstat.st_mode) # just in case the inode was reused + ): self._dbg(1, "tarfile: Directory renamed before its " \ "attributes could be extracted %r" % dirpath) continue