diff --git a/Lib/tarfile.py b/Lib/tarfile.py index 212b71f6509740..1056341b6839d3 100644 --- a/Lib/tarfile.py +++ b/Lib/tarfile.py @@ -2389,20 +2389,34 @@ def extractall(self, path=".", members=None, *, numeric_owner=False, tarinfo = self._get_extract_tarinfo(member, filter_function, path) if tarinfo is None: continue + self._extract_one(tarinfo, path, set_attrs=not tarinfo.isdir(), + numeric_owner=numeric_owner) if tarinfo.isdir(): # For directories, delay setting attributes until later, # since permissions can interfere with extraction and # extracting contents can reset mtime. - directories.append(tarinfo) - self._extract_one(tarinfo, path, set_attrs=not tarinfo.isdir(), - numeric_owner=numeric_owner) + # We also the keep the original inode and device, to detect + # if it was changed during extraction. + dirpath = os.path.join(path, tarinfo.name) + dirpath = self._transform_destination_path(dirpath) + targetstat = os.stat(dirpath, follow_symlinks=False) + directories.append((tarinfo, dirpath, targetstat.st_ino, + targetstat.st_dev)) # Reverse sort directories. - directories.sort(key=lambda a: a.name, reverse=True) + directories.sort(key=lambda a: a[0].name, reverse=True) # Set correct owner, mtime and filemode on directories. - for tarinfo in directories: - dirpath = os.path.join(path, tarinfo.name) + for tarinfo, dirpath, original_ino, original_dev in directories: + dirstat = os.stat(dirpath, follow_symlinks=False) + if (dirstat.st_ino != original_ino or + dirstat.st_dev != original_dev or + not stat.S_ISDIR(dirstat.st_mode) # just in case the inode was reused + ): + self._dbg(1, "tarfile: Directory renamed before its " \ + "attributes could be extracted %r" % dirpath) + continue + try: self.chown(tarinfo, dirpath, numeric_owner=numeric_owner) self.utime(tarinfo, dirpath) @@ -2516,16 +2530,18 @@ def extractfile(self, member): # blkdev, etc.), return None instead of a file object. return None + def _transform_destination_path(self, targetpath): + # Build the destination pathname, replacing + # forward slashes to platform specific separators. + targetpath = targetpath.rstrip("/") + return targetpath.replace("/", os.sep) + def _extract_member(self, tarinfo, targetpath, set_attrs=True, numeric_owner=False): """Extract the TarInfo object tarinfo to a physical file called targetpath. """ - # Fetch the TarInfo object for the given name - # and build the destination pathname, replacing - # forward slashes to platform specific separators. - targetpath = targetpath.rstrip("/") - targetpath = targetpath.replace("/", os.sep) + targetpath = self._transform_destination_path(targetpath) # Create all upper directories. upperdirs = os.path.dirname(targetpath) diff --git a/Lib/test/test_tarfile.py b/Lib/test/test_tarfile.py index 2018a20afd1b18..31a7fac3ad5123 100644 --- a/Lib/test/test_tarfile.py +++ b/Lib/test/test_tarfile.py @@ -2715,6 +2715,31 @@ def test_useful_error_message_when_modules_missing(self): str(excinfo.exception), ) + @unittest.skipUnless(os_helper.can_symlink(), 'requires symlink support') + @unittest.skipUnless(hasattr(os, 'chmod'), "missing os.chmod") + @unittest.mock.patch('os.chmod') + def test_deferred_directory_attributes_update(self, mock_chmod): + # Regression test for gh-127987: setting attributes on arbitrary files + tempdir = os.path.join(TEMPDIR, 'test127987') + def mock_chmod_side_effect(path, mode, **kwargs): + target_path = os.path.realpath(path) + if os.path.commonpath([target_path, tempdir]) != tempdir: + raise Exception("should not try to chmod anything outside the destination", target_path) + mock_chmod.side_effect = mock_chmod_side_effect + + outside_tree_dir = os.path.join(TEMPDIR, 'outside_tree_dir') + with ArchiveMaker() as arc: + arc.add('x', symlink_to='.') + arc.add('x', type=tarfile.DIRTYPE, mode='?rwsrwsrwt') + arc.add('x', symlink_to=outside_tree_dir) + + os.makedirs(outside_tree_dir) + try: + arc.open().extractall(path=tempdir, filter='tar') + finally: + os_helper.rmtree(outside_tree_dir) + os_helper.rmtree(tempdir) + class CommandLineTest(unittest.TestCase):