Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit e5bd736

Browse files
bpo-39595: Improve zipfile.Path performance (#18406)
* Improve zipfile.Path performance on zipfiles with a large number of entries. * πŸ“œπŸ€– Added by blurb_it. * Add bpo to blurb * Sync with importlib_metadata 1.5 (6fe70ca) * Update blurb. * Remove compatibility code * Add stubs module, omitted from earlier commit Co-authored-by: blurb-it[bot] <43283697+blurb-it[bot]@users.noreply.github.com>
1 parent e6be9b5 commit e5bd736

7 files changed

Lines changed: 254 additions & 68 deletions

File tree

β€ŽLib/importlib/metadata.pyβ€Ž

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -391,6 +391,7 @@ class FastPath:
391391

392392
def __init__(self, root):
393393
self.root = root
394+
self.base = os.path.basename(root).lower()
394395

395396
def joinpath(self, child):
396397
return pathlib.Path(self.root, child)
@@ -413,12 +414,11 @@ def zip_children(self):
413414
)
414415

415416
def is_egg(self, search):
416-
root_n_low = os.path.split(self.root)[1].lower()
417-
417+
base = self.base
418418
return (
419-
root_n_low == search.normalized + '.egg'
420-
or root_n_low.startswith(search.prefix)
421-
and root_n_low.endswith('.egg'))
419+
base == search.versionless_egg_name
420+
or base.startswith(search.prefix)
421+
and base.endswith('.egg'))
422422

423423
def search(self, name):
424424
for child in self.children():
@@ -439,6 +439,7 @@ class Prepared:
439439
prefix = ''
440440
suffixes = '.dist-info', '.egg-info'
441441
exact_matches = [''][:0]
442+
versionless_egg_name = ''
442443

443444
def __init__(self, name):
444445
self.name = name
@@ -448,6 +449,7 @@ def __init__(self, name):
448449
self.prefix = self.normalized + '-'
449450
self.exact_matches = [
450451
self.normalized + suffix for suffix in self.suffixes]
452+
self.versionless_egg_name = self.normalized + '.egg'
451453

452454

453455
class MetadataPathFinder(DistributionFinder):

β€ŽLib/test/test_importlib/fixtures.pyβ€Ž

Lines changed: 21 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -47,14 +47,28 @@ def tempdir_as_cwd():
4747
yield tmp
4848

4949

50-
class SiteDir:
50+
@contextlib.contextmanager
51+
def install_finder(finder):
52+
sys.meta_path.append(finder)
53+
try:
54+
yield
55+
finally:
56+
sys.meta_path.remove(finder)
57+
58+
59+
class Fixtures:
5160
def setUp(self):
5261
self.fixtures = ExitStack()
5362
self.addCleanup(self.fixtures.close)
63+
64+
65+
class SiteDir(Fixtures):
66+
def setUp(self):
67+
super(SiteDir, self).setUp()
5468
self.site_dir = self.fixtures.enter_context(tempdir())
5569

5670

57-
class OnSysPath:
71+
class OnSysPath(Fixtures):
5872
@staticmethod
5973
@contextlib.contextmanager
6074
def add_sys_path(dir):
@@ -198,3 +212,8 @@ def build_files(file_defs, prefix=pathlib.Path()):
198212
def DALS(str):
199213
"Dedent and left-strip"
200214
return textwrap.dedent(str).lstrip()
215+
216+
217+
class NullFinder:
218+
def find_module(self, name):
219+
pass
Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
import unittest
2+
3+
4+
class fake_filesystem_unittest:
5+
"""
6+
Stubbed version of the pyfakefs module
7+
"""
8+
class TestCase(unittest.TestCase):
9+
def setUpPyfakefs(self):
10+
self.skipTest("pyfakefs not available")

β€ŽLib/test/test_importlib/test_main.pyβ€Ž

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,11 @@
77
import unittest
88
import importlib.metadata
99

10+
try:
11+
import pyfakefs.fake_filesystem_unittest as ffs
12+
except ImportError:
13+
from .stubs import fake_filesystem_unittest as ffs
14+
1015
from . import fixtures
1116
from importlib.metadata import (
1217
Distribution, EntryPoint,
@@ -185,6 +190,33 @@ def test_egg(self):
185190
version('foo')
186191

187192

193+
class MissingSysPath(fixtures.OnSysPath, unittest.TestCase):
194+
site_dir = '/does-not-exist'
195+
196+
def test_discovery(self):
197+
"""
198+
Discovering distributions should succeed even if
199+
there is an invalid path on sys.path.
200+
"""
201+
importlib.metadata.distributions()
202+
203+
204+
class InaccessibleSysPath(fixtures.OnSysPath, ffs.TestCase):
205+
site_dir = '/access-denied'
206+
207+
def setUp(self):
208+
super(InaccessibleSysPath, self).setUp()
209+
self.setUpPyfakefs()
210+
self.fs.create_dir(self.site_dir, perm_bits=000)
211+
212+
def test_discovery(self):
213+
"""
214+
Discovering distributions should succeed even if
215+
there is an invalid path on sys.path.
216+
"""
217+
list(importlib.metadata.distributions())
218+
219+
188220
class TestEntryPoints(unittest.TestCase):
189221
def __init__(self, *args):
190222
super(TestEntryPoints, self).__init__(*args)

β€ŽLib/test/test_zipfile.pyβ€Ž

Lines changed: 103 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -2724,16 +2724,71 @@ def test_extract_command(self):
27242724
self.assertEqual(f.read(), zf.read(zi))
27252725

27262726

2727+
class TestExecutablePrependedZip(unittest.TestCase):
2728+
"""Test our ability to open zip files with an executable prepended."""
2729+
2730+
def setUp(self):
2731+
self.exe_zip = findfile('exe_with_zip', subdir='ziptestdata')
2732+
self.exe_zip64 = findfile('exe_with_z64', subdir='ziptestdata')
2733+
2734+
def _test_zip_works(self, name):
2735+
# bpo28494 sanity check: ensure is_zipfile works on these.
2736+
self.assertTrue(zipfile.is_zipfile(name),
2737+
f'is_zipfile failed on {name}')
2738+
# Ensure we can operate on these via ZipFile.
2739+
with zipfile.ZipFile(name) as zipfp:
2740+
for n in zipfp.namelist():
2741+
data = zipfp.read(n)
2742+
self.assertIn(b'FAVORITE_NUMBER', data)
2743+
2744+
def test_read_zip_with_exe_prepended(self):
2745+
self._test_zip_works(self.exe_zip)
2746+
2747+
def test_read_zip64_with_exe_prepended(self):
2748+
self._test_zip_works(self.exe_zip64)
2749+
2750+
@unittest.skipUnless(sys.executable, 'sys.executable required.')
2751+
@unittest.skipUnless(os.access('/bin/bash', os.X_OK),
2752+
'Test relies on #!/bin/bash working.')
2753+
def test_execute_zip2(self):
2754+
output = subprocess.check_output([self.exe_zip, sys.executable])
2755+
self.assertIn(b'number in executable: 5', output)
2756+
2757+
@unittest.skipUnless(sys.executable, 'sys.executable required.')
2758+
@unittest.skipUnless(os.access('/bin/bash', os.X_OK),
2759+
'Test relies on #!/bin/bash working.')
2760+
def test_execute_zip64(self):
2761+
output = subprocess.check_output([self.exe_zip64, sys.executable])
2762+
self.assertIn(b'number in executable: 5', output)
2763+
2764+
27272765
# Poor man's technique to consume a (smallish) iterable.
27282766
consume = tuple
27292767

27302768

2769+
# from jaraco.itertools 5.0
2770+
class jaraco:
2771+
class itertools:
2772+
class Counter:
2773+
def __init__(self, i):
2774+
self.count = 0
2775+
self._orig_iter = iter(i)
2776+
2777+
def __iter__(self):
2778+
return self
2779+
2780+
def __next__(self):
2781+
result = next(self._orig_iter)
2782+
self.count += 1
2783+
return result
2784+
2785+
27312786
def add_dirs(zf):
27322787
"""
27332788
Given a writable zip file zf, inject directory entries for
27342789
any directories implied by the presence of children.
27352790
"""
2736-
for name in zipfile.Path._implied_dirs(zf.namelist()):
2791+
for name in zipfile.CompleteDirs._implied_dirs(zf.namelist()):
27372792
zf.writestr(name, b"")
27382793
return zf
27392794

@@ -2774,44 +2829,6 @@ def build_alpharep_fixture():
27742829
return zf
27752830

27762831

2777-
class TestExecutablePrependedZip(unittest.TestCase):
2778-
"""Test our ability to open zip files with an executable prepended."""
2779-
2780-
def setUp(self):
2781-
self.exe_zip = findfile('exe_with_zip', subdir='ziptestdata')
2782-
self.exe_zip64 = findfile('exe_with_z64', subdir='ziptestdata')
2783-
2784-
def _test_zip_works(self, name):
2785-
# bpo-28494 sanity check: ensure is_zipfile works on these.
2786-
self.assertTrue(zipfile.is_zipfile(name),
2787-
f'is_zipfile failed on {name}')
2788-
# Ensure we can operate on these via ZipFile.
2789-
with zipfile.ZipFile(name) as zipfp:
2790-
for n in zipfp.namelist():
2791-
data = zipfp.read(n)
2792-
self.assertIn(b'FAVORITE_NUMBER', data)
2793-
2794-
def test_read_zip_with_exe_prepended(self):
2795-
self._test_zip_works(self.exe_zip)
2796-
2797-
def test_read_zip64_with_exe_prepended(self):
2798-
self._test_zip_works(self.exe_zip64)
2799-
2800-
@unittest.skipUnless(sys.executable, 'sys.executable required.')
2801-
@unittest.skipUnless(os.access('/bin/bash', os.X_OK),
2802-
'Test relies on #!/bin/bash working.')
2803-
def test_execute_zip2(self):
2804-
output = subprocess.check_output([self.exe_zip, sys.executable])
2805-
self.assertIn(b'number in executable: 5', output)
2806-
2807-
@unittest.skipUnless(sys.executable, 'sys.executable required.')
2808-
@unittest.skipUnless(os.access('/bin/bash', os.X_OK),
2809-
'Test relies on #!/bin/bash working.')
2810-
def test_execute_zip64(self):
2811-
output = subprocess.check_output([self.exe_zip64, sys.executable])
2812-
self.assertIn(b'number in executable: 5', output)
2813-
2814-
28152832
class TestPath(unittest.TestCase):
28162833
def setUp(self):
28172834
self.fixtures = contextlib.ExitStack()
@@ -2849,6 +2866,14 @@ def test_iterdir_and_types(self):
28492866
i, = h.iterdir()
28502867
assert i.is_file()
28512868

2869+
def test_subdir_is_dir(self):
2870+
for alpharep in self.zipfile_alpharep():
2871+
root = zipfile.Path(alpharep)
2872+
assert (root / 'b').is_dir()
2873+
assert (root / 'b/').is_dir()
2874+
assert (root / 'g').is_dir()
2875+
assert (root / 'g/').is_dir()
2876+
28522877
def test_open(self):
28532878
for alpharep in self.zipfile_alpharep():
28542879
root = zipfile.Path(alpharep)
@@ -2910,6 +2935,45 @@ def test_missing_dir_parent(self):
29102935
root = zipfile.Path(alpharep)
29112936
assert (root / 'missing dir/').parent.at == ''
29122937

2938+
def test_mutability(self):
2939+
"""
2940+
If the underlying zipfile is changed, the Path object should
2941+
reflect that change.
2942+
"""
2943+
for alpharep in self.zipfile_alpharep():
2944+
root = zipfile.Path(alpharep)
2945+
a, b, g = root.iterdir()
2946+
alpharep.writestr('foo.txt', 'foo')
2947+
alpharep.writestr('bar/baz.txt', 'baz')
2948+
assert any(
2949+
child.name == 'foo.txt'
2950+
for child in root.iterdir())
2951+
assert (root / 'foo.txt').read_text() == 'foo'
2952+
baz, = (root / 'bar').iterdir()
2953+
assert baz.read_text() == 'baz'
2954+
2955+
HUGE_ZIPFILE_NUM_ENTRIES = 2 ** 13
2956+
2957+
def huge_zipfile(self):
2958+
"""Create a read-only zipfile with a huge number of entries entries."""
2959+
strm = io.BytesIO()
2960+
zf = zipfile.ZipFile(strm, "w")
2961+
for entry in map(str, range(self.HUGE_ZIPFILE_NUM_ENTRIES)):
2962+
zf.writestr(entry, entry)
2963+
zf.mode = 'r'
2964+
return zf
2965+
2966+
def test_joinpath_constant_time(self):
2967+
"""
2968+
Ensure joinpath on items in zipfile is linear time.
2969+
"""
2970+
root = zipfile.Path(self.huge_zipfile())
2971+
entries = jaraco.itertools.Counter(root.iterdir())
2972+
for entry in entries:
2973+
entry.joinpath('suffix')
2974+
# Check the file iterated all items
2975+
assert entries.count == self.HUGE_ZIPFILE_NUM_ENTRIES
2976+
29132977

29142978
if __name__ == "__main__":
29152979
unittest.main()

0 commit comments

Comments
Β (0)