Thanks to visit codestin.com
Credit goes to github.com

Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
37 changes: 20 additions & 17 deletions ocrd/ocrd/cli/resmgr.py
Original file line number Diff line number Diff line change
Expand Up @@ -151,23 +151,26 @@ def download(any_url, no_dynamic, resource_type, path_in_archive, allow_uninstal
if not basedir:
basedir = resmgr.location_to_resource_dir('data')

with click.progressbar(length=resdict['size']) as bar:
fpath = resmgr.download(
this_executable,
resdict['url'],
name=resdict['name'],
resource_type=resdict.get('type', resource_type),
path_in_archive=resdict.get('path_in_archive', path_in_archive),
overwrite=overwrite,
no_subdir=location in ['cwd', 'module'],
basedir=basedir,
progress_cb=lambda delta: bar.update(delta)
)
if registered == 'unregistered':
log.info("%s resource '%s' (%s) not a known resource, creating stub in %s'", this_executable, name, any_url, resmgr.user_list)
resmgr.add_to_user_database(this_executable, fpath, url=any_url)
resmgr.save_user_list()
log.info("Installed resource %s under %s", resdict['url'], fpath)
try:
with click.progressbar(length=resdict['size']) as bar:
fpath = resmgr.download(
this_executable,
resdict['url'],
name=resdict['name'],
resource_type=resdict.get('type', resource_type),
path_in_archive=resdict.get('path_in_archive', path_in_archive),
overwrite=overwrite,
no_subdir=location in ['cwd', 'module'],
basedir=basedir,
progress_cb=lambda delta: bar.update(delta)
)
if registered == 'unregistered':
log.info("%s resource '%s' (%s) not a known resource, creating stub in %s'", this_executable, name, any_url, resmgr.user_list)
resmgr.add_to_user_database(this_executable, fpath, url=any_url)
resmgr.save_user_list()
log.info("Installed resource %s under %s", resdict['url'], fpath)
except FileExistsError as exc:
log.info(str(exc))
log.info("Use in parameters as '%s'", resmgr.parameter_usage(resdict['name'], usage=resdict.get('parameter_usage', 'as-is')))

@resmgr_cli.command('migrate')
Expand Down
19 changes: 13 additions & 6 deletions ocrd/ocrd/resource_manager.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
from pathlib import Path
from os.path import join
from os import environ, listdir, getcwd, path, unlink
from shutil import copytree, rmtree
from json import loads
from os import environ, listdir, getcwd, path
from fnmatch import filter as apply_glob
from shutil import copytree
from datetime import datetime
from tarfile import open as open_tarfile
from urllib.parse import urlparse, unquote
Expand Down Expand Up @@ -232,7 +233,7 @@ def parameter_usage(self, name, usage='as-is'):
return Path(name).stem
raise ValueError("No such usage '%s'" % usage)

def _download_impl(self, url, filename, progress_cb=None):
def _download_impl(self, url, filename, progress_cb=None, size=None):
log = getLogger('ocrd.resource_manager._download_impl')
log.info("Downloading %s to %s" % (url, filename))
with open(filename, 'wb') as f:
Expand Down Expand Up @@ -293,9 +294,15 @@ def download(
name = Path(unquote(url_parsed.path)).name
fpath = Path(destdir, name)
is_url = url.startswith('https://') or url.startswith('http://')
if fpath.exists() and not overwrite:
log.info("%s to be %s to %s which already exists and overwrite is False" % (url, 'downloaded' if is_url else 'copied', fpath))
return fpath
if fpath.exists():
if not overwrite:
raise FileExistsError("%s %s already exists but --overwrite is not set" % ('Directory' if fpath.is_dir() else 'File', fpath))
if fpath.is_dir():
log.info("Removing existing target directory {fpath}")
rmtree(str(fpath))
else:
log.info("Removing existing target file {fpath}")
unlink(str(fpath))
destdir.mkdir(parents=True, exist_ok=True)
if resource_type in ('file', 'directory'):
if is_url:
Expand All @@ -313,7 +320,7 @@ def download(
log.info("Extracting archive to %s/out" % tempdir)
with open_tarfile('../download.tar.xx', 'r:*') as tar:
tar.extractall()
log.info("Copying '%s' from archive to %s" % (path_in_archive, fpath))
log.info("Copying '%s' from tarball to %s" % (path_in_archive, fpath))
copytree(path_in_archive, str(fpath))
return fpath

Expand Down
31 changes: 24 additions & 7 deletions tests/cli/test_resmgr.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,11 @@
from ocrd.resource_manager import OcrdResourceManager

runner = CliRunner()
executable = 'ocrd-tesserocr-recognize'
executable = 'ocrd-dummy'

@fixture
def mgr_with_tmp_path(tmp_path):
print(tmp_path)
mgr = OcrdResourceManager(xdg_data_home=tmp_path, userdir=tmp_path, xdg_config_home=tmp_path)
env = {'XDG_DATA_HOME': str(tmp_path), 'XDG_CONFIG_HOME': str(tmp_path)}
return tmp_path, mgr, env
Expand All @@ -21,29 +22,30 @@ def test_url_tool_name_unregistered(mgr_with_tmp_path):
We should add a test for the -n URL TOOL NAME use-case as well (both as an unregistered resource and as URL-override).
"""
tmp_path, mgr, env = mgr_with_tmp_path
print(mgr.list_installed('ocrd-tesserocr-recognize')[0][1])
rsrcs_before = len(mgr.list_installed('ocrd-tesserocr-recognize')[0][1])
print(mgr.list_installed(executable)[0][1])
rsrcs_before = len(mgr.list_installed(executable)[0][1])

# add an unregistered resource
url = 'https://github.com/tesseract-ocr/tessdata_best/raw/main/dzo.traineddata'
name = 'dzo.traineddata'
r = runner.invoke(resmgr_cli, ['download', '--allow-uninstalled', '--any-url', url, executable, name], env=env)
mgr.load_resource_list(mgr.user_list)

rsrcs = mgr.list_installed('ocrd-tesserocr-recognize')[0][1]
rsrcs = mgr.list_installed(executable)[0][1]
assert len(rsrcs) == rsrcs_before + 1
assert rsrcs[0]['name'] == name
assert rsrcs[0]['url'] == url

# add resource with different URL but same name
url2 = url.replace('dzo', 'bos')
r = runner.invoke(resmgr_cli, ['download', '--allow-uninstalled', '--any-url', url2, executable, name], env=env)
assert 'already exists and overwrite is False' in r.output
assert 'already exists but --overwrite is not set' in r.output
r = runner.invoke(resmgr_cli, ['download', '--overwrite', '--allow-uninstalled', '--any-url', url2, executable, name], env=env)
assert 'already exists and overwrite is False' not in r.output
assert 'already exists but --overwrite is not set' not in r.output

mgr.load_resource_list(mgr.user_list)

rsrcs = mgr.list_installed('ocrd-tesserocr-recognize')[0][1]
rsrcs = mgr.list_installed(executable)[0][1]
print(rsrcs)
assert len(rsrcs) == rsrcs_before + 1
assert rsrcs[0]['name'] == name
Expand Down Expand Up @@ -73,3 +75,18 @@ def test_directory_copy(mgr_with_tmp_path):
assert not r.exception
assert Path(mgr_path / 'ocrd-resources' / proc).exists()
assert directory_size(mgr_path / 'ocrd-resources' / proc / res_name) == 30

r = runner.invoke(
resmgr_cli,
['download', '--allow-uninstalled', '--any-url', tmp_path, proc, res_name],
env=env,
catch_exceptions=False
)
assert 'already exists but --overwrite is not set' in r.output
r = runner.invoke(
resmgr_cli,
['download', '--overwrite', '--allow-uninstalled', '--any-url', tmp_path, proc, res_name],
env=env,
catch_exceptions=False
)
assert 'already exists but --overwrite is not set' not in r.output
10 changes: 5 additions & 5 deletions tests/test_resource_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ def test_resources_manager_config_default(monkeypatch, tmp_path):
assert mgr.add_to_user_database('ocrd-foo', f)
# pdb.set_trace()

mgr.list_installed()
mgr.list_installed('ocrd-foo')
proc = 'ocrd-tesserocr-recognize'
# TODO mock request
fpath = mgr.download(proc, CONST_RESOURCE_URL_LAYOUT, mgr.location_to_resource_dir('data'))
Expand All @@ -55,7 +55,7 @@ def test_resources_manager_from_environment(tmp_path, monkeypatch):
assert f.exists()
assert f == mgr.user_list
assert mgr.add_to_user_database('ocrd-foo', f)
mgr.list_installed()
mgr.list_installed('ocrd-foo')
proc = 'ocrd-tesserocr-recognize'
fpath = mgr.download(proc, CONST_RESOURCE_URL_LAYOUT, mgr.location_to_resource_dir('data'))
assert fpath.exists()
Expand All @@ -66,14 +66,14 @@ def test_resources_manager_from_environment(tmp_path, monkeypatch):
def test_resources_manager_config_explicite(tmp_path):

# act
mgr = OcrdResourceManager(xdg_config_home=str(tmp_path))
mgr = OcrdResourceManager(xdg_config_home=str(tmp_path / 'config'), xdg_data_home=str(tmp_path / 'data'))

# assert
f = tmp_path / 'ocrd' / CONST_RESOURCE_YML
f = tmp_path / 'config' / 'ocrd' / CONST_RESOURCE_YML
assert f.exists()
assert f == mgr.user_list
assert mgr.add_to_user_database('ocrd-foo', f)
mgr.list_installed()
mgr.list_installed(executable='ocrd-foo')
proc = 'ocrd-tesserocr-recognize'
fpath = mgr.download(proc, CONST_RESOURCE_URL_LAYOUT, mgr.location_to_resource_dir('data'))
assert fpath.exists()
Expand Down