Thanks to visit codestin.com
Credit goes to github.com

Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 19 additions & 11 deletions ocrd/ocrd/resource_manager.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,16 @@
from pathlib import Path
from os.path import join
from os import environ, listdir, getcwd, path, unlink
from shutil import copytree, rmtree
from json import loads
from os import environ, listdir, getcwd, path
from shutil import copytree, rmtree, copy
from fnmatch import filter as apply_glob
from datetime import datetime
from tarfile import open as open_tarfile
from urllib.parse import urlparse, unquote
from subprocess import run, PIPE
from zipfile import ZipFile

import requests
from yaml import safe_load, safe_dump
import magic

# https://github.com/OCR-D/core/issues/867
# https://stackoverflow.com/questions/50900727/skip-converting-entities-while-loading-a-yaml-string-using-pyyaml
Expand Down Expand Up @@ -310,18 +309,27 @@ def download(
else:
self._copy_impl(url, fpath, progress_cb)
elif resource_type == 'archive':
archive_fname = 'download.tar.xx'
with pushd_popd(tempdir=True) as tempdir:
if is_url:
self._download_impl(url, 'download.tar.xx', progress_cb)
self._download_impl(url, archive_fname, progress_cb)
else:
self._copy_impl(url, 'download.tar.xx', progress_cb)
self._copy_impl(url, archive_fname, progress_cb)
Path('out').mkdir()
with pushd_popd('out'):
log.info("Extracting archive to %s/out" % tempdir)
with open_tarfile('../download.tar.xx', 'r:*') as tar:
tar.extractall()
log.info("Copying '%s' from tarball to %s" % (path_in_archive, fpath))
copytree(path_in_archive, str(fpath))
mimetype = magic.from_file(f'../{archive_fname}', mime=True)
log.info("Extracting %s archive to %s/out" % (mimetype, tempdir))
if mimetype == 'application/zip':
with ZipFile(f'../{archive_fname}', 'r') as zipf:
zipf.extractall()
else:
with open_tarfile(f'../{archive_fname}', 'r:*') as tar:
tar.extractall()
log.info("Copying '%s' from archive to %s" % (path_in_archive, fpath))
if Path(path_in_archive).is_dir():
copytree(path_in_archive, str(fpath))
else:
copy(path_in_archive, str(fpath))
return fpath

def _dedup_database(self, database=None, dedup_key='name'):
Expand Down
1 change: 1 addition & 0 deletions ocrd/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -10,3 +10,4 @@ pyyaml
Deprecated == 1.2.0
memory-profiler >= 0.58.0
sparklines >= 0.4.2
python-magic