Thanks to visit codestin.com
Credit goes to github.com

Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 2 additions & 4 deletions ocrd/ocrd/cli/zip.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,13 +39,12 @@ def zip_cli():
help='Basename of the METS file.',
show_default=True)
@click.option('-i', '--identifier', '--id', help="Ocrd-Identifier", required=True)
@click.option('-I', '--in-place', help="Replace workspace with bag (like bagit.py does)", is_flag=True)
@click.option('-m', '--mets', help="location of mets.xml in the bag's data dir", default="mets.xml")
@click.option('-b', '--base-version-checksum', help="Ocrd-Base-Version-Checksum")
@click.option('-t', '--tag-file', help="Add a non-payload file to bag", type=click.Path(file_okay=True, dir_okay=False, readable=True, resolve_path=True), multiple=True)
@click.option('-Z', '--skip-zip', help="Create a directory but do not ZIP it", is_flag=True, default=False)
@click.option('-j', '--processes', help="Number of parallel processes", type=int, default=1)
def bag(directory, mets_basename, dest, identifier, in_place, mets, base_version_checksum, tag_file, skip_zip, processes):
def bag(directory, mets_basename, dest, identifier, mets, base_version_checksum, tag_file, skip_zip, processes):
"""
Bag workspace as OCRD-ZIP at DEST
"""
Expand All @@ -60,8 +59,7 @@ def bag(directory, mets_basename, dest, identifier, in_place, mets, base_version
ocrd_base_version_checksum=base_version_checksum,
processes=processes,
tag_files=tag_file,
skip_zip=skip_zip,
in_place=in_place
skip_zip=skip_zip
)

# ----------------------------------------------------------------------
Expand Down
22 changes: 4 additions & 18 deletions ocrd/ocrd/workspace_bagger.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,12 +37,7 @@ def __init__(self, resolver, strict=False):
self.resolver = resolver
self.strict = strict

def _serialize_bag(self, workspace, bagdir, dest, in_place, skip_zip):
if in_place:
if not exists(BACKUPDIR):
makedirs(BACKUPDIR)
backupdir = mkdtemp(dir=BACKUPDIR)
move(workspace.directory, backupdir)
def _serialize_bag(self, workspace, bagdir, dest, skip_zip):
if skip_zip:
move(bagdir, dest)
else:
Expand Down Expand Up @@ -139,7 +134,6 @@ def bag(self,
ocrd_base_version_checksum=None,
processes=1,
skip_zip=False,
in_place=False,
tag_files=None
):
"""
Expand All @@ -155,30 +149,22 @@ def bag(self,
ord_base_version_checksum (string): Ocrd-Base-Version-Checksum in bag-info.txt
processes (integer): Number of parallel processes checksumming
skip_zip (boolean): Whether to leave directory unzipped
in_place (boolean): Whether to **replace** the workspace with its BagIt variant
tag_files (list<string>): Path names of additional tag files to be bagged at the root of the bag
"""
if in_place and (dest is not None):
raise Exception("Setting 'dest' and 'in_place' is a contradiction")
if in_place and not skip_zip:
raise Exception("Setting 'skip_zip' and not 'in_place' is a contradiction")

if tag_files is None:
tag_files = []

# create bagdir
bagdir = mkdtemp(prefix=TMP_BAGIT_PREFIX)

if dest is None:
if in_place:
dest = workspace.directory
elif not skip_zip:
if not skip_zip:
dest = '%s.ocrd.zip' % workspace.directory
else:
dest = '%s.ocrd' % workspace.directory

log = getLogger('ocrd.workspace_bagger')
log.info("Bagging %s to %s (temp dir %s)", workspace.directory, '(in-place)' if in_place else dest, bagdir)
log.info("Bagging %s to %s (temp dir %s)", workspace.directory, dest, bagdir)

# create data dir
makedirs(join(bagdir, 'data'))
Expand All @@ -201,7 +187,7 @@ def bag(self,
bag.save()

# ZIP it
self._serialize_bag(workspace, bagdir, dest, in_place, skip_zip)
self._serialize_bag(workspace, bagdir, dest, skip_zip)

log.info('Created bag at %s', dest)
return dest
Expand Down
37 changes: 9 additions & 28 deletions tests/validator/test_workspace_bagger.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,49 +30,29 @@ def setUp(self):
def tearDown(self):
rmtree(self.tempdir)

def test_bad_inplace_and_dest(self):
with self.assertRaisesRegex(Exception, "Setting 'dest' and 'in_place' is a contradiction"):
self.bagger.bag(self.workspace, 'kant_aufklaerung_1784', in_place=True, dest='/x/y/z')

def test_bad_skip_zip_and_dest(self):
with self.assertRaisesRegex(Exception, "Setting 'skip_zip' and not 'in_place' is a contradiction"):
self.bagger.bag(self.workspace, 'kant_aufklaerung_1784', in_place=True, skip_zip=False)

def test_bag_inplace(self):
self.bagger.bag(
self.workspace,
'kant_aufklaerung_1784',
skip_zip=True,
in_place=True,
ocrd_base_version_checksum='123',
tag_files=[
README_FILE
],
)

def test_bag_zip_and_spill(self):
self.workspace.mets.find_all_files(ID='INPUT_0017')[0].url = 'bad-scheme://foo'
self.workspace.mets.find_all_files(ID='INPUT_0020')[0].url = 'http://google.com'
self.bagger.bag(self.workspace, 'kant_aufklaerung_1784', skip_zip=False, dest=join(self.tempdir, 'out.ocrd.zip'))
self.bagger.spill(join(self.tempdir, 'out.ocrd.zip'), join(self.tempdir, 'out'))

def test_bag_zip_and_spill_wo_dest(self):
self.bagger.bag(self.workspace, 'kant_aufklaerung_1784', in_place=False, skip_zip=False, dest=join(self.tempdir, 'out.ocrd.zip'))
self.bagger.bag(self.workspace, 'kant_aufklaerung_1784', skip_zip=False, dest=join(self.tempdir, 'out.ocrd.zip'))
self.bagger.spill(join(self.tempdir, 'out.ocrd.zip'), self.tempdir)

def test_bag_wo_dest(self):
makedirs(BACKUPDIR)
self.bagger.bag(self.workspace, 'kant_aufklaerung_1784', in_place=True, skip_zip=True)
self.bagger.bag(self.workspace, 'kant_aufklaerung_1784', skip_zip=True)

def test_bag_wo_dest_zip(self):
makedirs(BACKUPDIR)
self.bagger.bag(self.workspace, 'kant_aufklaerung_1784', in_place=False, skip_zip=True)
self.bagger.bag(self.workspace, 'kant_aufklaerung_1784', skip_zip=True)

def test_bag_partial_http_nostrict(self):
self.bagger.strict = False
makedirs(BACKUPDIR)
self.workspace.mets.find_all_files(ID='INPUT_0020')[0].url = 'http://google.com'
self.bagger.bag(self.workspace, 'kant_aufklaerung_1784', in_place=False)
self.bagger.bag(self.workspace, 'kant_aufklaerung_1784')

def test_bag_full(self):
self.bagger.strict = True
Expand All @@ -96,13 +76,14 @@ def test_spill_derived_dest_exists(self):
def test_spill_derived_dest(self):
bag_dest = join(self.bagdir, 'foo.ocrd.zip')
spill_dest = join(self.bagdir, 'foo')
self.bagger.bag(self.workspace, 'kant_aufklaerung_1784', in_place=False, skip_zip=False, dest=bag_dest)
self.bagger.bag(self.workspace, 'kant_aufklaerung_1784', skip_zip=False, dest=bag_dest)
self.bagger.spill(bag_dest, self.bagdir)
self.assertTrue(exists(spill_dest))

def test_bag_with_changed_metsname(self):
# arrange
workspace_dir = join(self.bagdir, "changed-mets-test")
bag_dest = join(self.bagdir, 'bagged-workspace')
copytree(join(assets.path_to('kant_aufklaerung_1784'), "data"), workspace_dir)
new_metsname = "other-metsname.xml"
old_metspath = join(workspace_dir, "mets.xml")
Expand All @@ -111,13 +92,13 @@ def test_bag_with_changed_metsname(self):
workspace = Workspace(self.resolver, directory=workspace_dir, mets_basename=new_metsname)

# act
self.bagger.bag(workspace, "changed-mets-test", ocrd_mets=new_metsname, in_place=True, skip_zip=True)
self.bagger.bag(workspace, "changed-mets-test", dest=bag_dest, ocrd_mets=new_metsname, skip_zip=True)

# assert
bag_metspath = join(workspace_dir, "data", new_metsname)
bag_metspath = join(bag_dest, "data", new_metsname)
self.assertTrue(exists(bag_metspath), f"Mets not existing. Expected: {bag_metspath}")

bag_info_path = join(workspace_dir, "bag-info.txt")
bag_info_path = join(bag_dest, "bag-info.txt")
tags = _load_tag_file(bag_info_path)
self.assertTrue("Ocrd-Mets" in tags, "expect 'Ocrd-Mets'-key in bag-info.txt")
self.assertEqual(tags["Ocrd-Mets"], new_metsname, "Ocrd-Mets key present but wrong value")
Expand Down