Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Simplify setupext.download_or_cache. #15430

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Mar 7, 2020
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
101 changes: 29 additions & 72 deletions setupext.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,19 +35,10 @@ def _get_xdg_cache_dir():
return pathlib.Path(cache_dir, 'matplotlib')


def get_fd_hash(fd):
"""
Compute the sha256 hash of the bytes in a file-like
"""
BLOCKSIZE = 1 << 16
def _get_hash(data):
"""Compute the sha256 hash of *data*."""
hasher = hashlib.sha256()
old_pos = fd.tell()
fd.seek(0)
buf = fd.read(BLOCKSIZE)
while buf:
hasher.update(buf)
buf = fd.read(BLOCKSIZE)
fd.seek(old_pos)
hasher.update(data)
return hasher.hexdigest()


Expand All @@ -58,10 +49,9 @@ def download_or_cache(url, sha):
Parameters
----------
url : str
The url to download

The url to download.
sha : str
The sha256 of the file
The sha256 of the file.

Returns
-------
Expand All @@ -70,52 +60,37 @@ def download_or_cache(url, sha):
"""
cache_dir = _get_xdg_cache_dir()

def get_from_cache(local_fn):
if cache_dir is None:
raise Exception("no cache dir")
buf = BytesIO((cache_dir / local_fn).read_bytes())
if get_fd_hash(buf) != sha:
return None
buf.seek(0)
return buf

def write_cache(local_fn, data):
if cache_dir is None:
raise Exception("no cache dir")
cache_dir.mkdir(parents=True, exist_ok=True)
old_pos = data.tell()
data.seek(0)
with open(cache_dir / local_fn, "xb") as fout:
fout.write(data.read())
data.seek(old_pos)

try:
return get_from_cache(sha)
except Exception:
pass
if cache_dir is not None: # Try to read from cache.
try:
data = (cache_dir / sha).read_bytes()
except IOError:
pass
else:
if _get_hash(data) == sha:
return BytesIO(data)

# jQueryUI's website blocks direct downloads from urllib.request's
# default User-Agent, but not (for example) wget; so I don't feel too
# bad passing in an empty User-Agent.
with urllib.request.urlopen(
urllib.request.Request(url, headers={"User-Agent": ""})) as req:
file_contents = BytesIO(req.read())
file_contents.seek(0)

file_sha = get_fd_hash(file_contents)
data = req.read()

file_sha = _get_hash(data)
if file_sha != sha:
raise Exception(
f"The download file does not match the expected sha. {url} was "
f"expected to have {sha} but it had {file_sha}")

try:
write_cache(sha, file_contents)
except Exception:
pass
if cache_dir is not None: # Try to cache the downloaded file.
try:
cache_dir.mkdir(parents=True, exist_ok=True)
with open(cache_dir / sha, "xb") as fout:
fout.write(data)
Comment on lines +88 to +89
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
with open(cache_dir / sha, "xb") as fout:
fout.write(data)
(cache_dir / sha).write_bytes(data)

would match reading, I believe.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

but then this would not fail if a file already exists with the wrong hash (well, I dunno what we want to do in that case, which is mostly theoretical, but I guess "don't overwrite a file we didn't expect to be there" is a reasonable approach?)

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If there is an unexpected file in the cache, that could cause trouble later when reading from the cache. I think it's a valid position to claim that under this hash there should be exactly data. We can gracefully fix that. But both ways are ok.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

At worst what will happen is that the "wrong" file will stay stuck in the cache (we didn't create it, so we can't afford to overwrite it) and later builds will redownload the file and fail to cache it but will otherwise proceed correctly. I think this is the same behavior as before this patch?

except IOError:
pass

file_contents.seek(0)
return file_contents
return BytesIO(data)


# SHA256 hashes of the FreeType tarballs
Expand Down Expand Up @@ -183,16 +158,6 @@ def print_status(package, status):
subsequent_indent=indent))


def get_buffer_hash(fd):
BLOCKSIZE = 1 << 16
hasher = hashlib.sha256()
buf = fd.read(BLOCKSIZE)
while buf:
hasher.update(buf)
buf = fd.read(BLOCKSIZE)
return hasher.hexdigest()


@functools.lru_cache(1) # We only need to compute this once.
def get_pkg_config():
"""
Expand Down Expand Up @@ -506,18 +471,13 @@ def do_custom_build(self):
if not src_path.exists():
os.makedirs('build', exist_ok=True)

url_fmts = [
('https://downloads.sourceforge.net/project/freetype'
'/freetype2/{version}/{tarball}'),
('https://download.savannah.gnu.org/releases/freetype'
'/{tarball}')
]
tarball = f'freetype-{LOCAL_FREETYPE_VERSION}.tar.gz'

target_urls = [
url_fmt.format(version=LOCAL_FREETYPE_VERSION,
tarball=tarball)
for url_fmt in url_fmts]
(f'https://downloads.sourceforge.net/project/freetype'
f'/freetype2/{LOCAL_FREETYPE_VERSION}/{tarball}'),
(f'https://download.savannah.gnu.org/releases/freetype'
f'/{tarball}')
]

for tarball_url in target_urls:
try:
Expand All @@ -533,10 +493,7 @@ def do_custom_build(self):
f"top-level of the source repository.")

print(f"Extracting {tarball}")
# just to be sure
tar_contents.seek(0)
with tarfile.open(tarball, mode="r:gz",
fileobj=tar_contents) as tgz:
with tarfile.open(fileobj=tar_contents, mode="r:gz") as tgz:
tgz.extractall("build")

print(f"Building freetype in {src_path}")
Expand Down