Thanks to visit codestin.com
Credit goes to github.com

Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 19 additions & 0 deletions ocrd/ocrd/cli/zip.py
Original file line number Diff line number Diff line change
Expand Up @@ -110,3 +110,22 @@ def validate(src, **kwargs):
print(report)
if not report.is_valid:
sys.exit(1)

# ----------------------------------------------------------------------
# ocrd zip update
# ----------------------------------------------------------------------

@zip_cli.command('update')
@click.argument('src', type=click.Path(dir_okay=True, readable=True, resolve_path=True), required=True)
@click.argument('dest', type=click.Path(dir_okay=True, readable=True, writable=True, resolve_path=True), required=False)
@click.option('-o', '--overwrite', help="overwrite bag in SRC", is_flag=True)
def update(src, dest=None, overwrite=False):
"""
Recreate files containing checksums (manifest-sha512.txt, tagmanifest-sha512.txt and
'Payload-Oxum' contained in bag-info.txt) of an OCRD-ZIP.

Open the bag (zip file or directory) ``src``, create or update its manifests/checksums and
output to (zip file or directory) ``dest``. It is also possible to output to ``src`` / overwrite
``src`` in place when ``--overwrite``-flag is given.
"""
WorkspaceBagger(Resolver()).recreate_checksums(src, dest=dest, overwrite=overwrite)
59 changes: 57 additions & 2 deletions ocrd/ocrd/workspace_bagger.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,14 @@
from datetime import datetime
from os import makedirs, chdir, walk
from os.path import join, isdir, basename, exists, relpath
from pathlib import Path
from shutil import make_archive, rmtree, copyfile, move
from tempfile import mkdtemp
from tempfile import mkdtemp, TemporaryDirectory
import re
import tempfile
import sys
from bagit import Bag, make_manifests, _load_tag_file # pylint: disable=no-name-in-module
from bagit import Bag, make_manifests, _load_tag_file, _make_tag_file, _make_tagmanifest_file # pylint: disable=no-name-in-module
from distutils.dir_util import copy_tree

from ocrd_utils import (
pushd_popd,
Expand Down Expand Up @@ -270,3 +272,56 @@ def validate(self, bag):
- https://ocr-d.github.io/bagit-profile.json
- https://ocr-d.github.io/bagit-profile.yml
"""
pass

def recreate_checksums(self, src, dest=None, overwrite=False):
"""
(Re)creates the files containing the checksums of a bag

This function uses bag.py to create new files: manifest-sha512.txt and
tagminifest-sha512.txt for the bag. Also 'Payload-Oxum' in bag-info.txt will be set to the
appropriate value.

Arguments:
src (string): Path to Bag. May be an zipped or unziped bagit
dest (string): Path to where the result should be stored. Not needed if overwrite is
set
overwrite(bool): Replace bag with newly created bag
"""
if overwrite and dest:
raise Exception("Setting 'dest' and 'overwrite' is a contradiction")
if not overwrite and not dest:
raise Exception("For checksum recreation 'dest' must be provided")
src_path = Path(src)
if not src_path.exists():
raise Exception("Path to bag not existing")
is_zipped = src_path.is_file()

with TemporaryDirectory() as tempdir:
if is_zipped:
unzip_file_to_dir(src, tempdir)
path_to_bag = Path(tempdir)
if not path_to_bag.joinpath("data").exists():
raise FileNotFoundError("data directory of bag not found")
else:
path_to_bag = src_path if overwrite else Path(dest)
if not src_path.joinpath("data").exists():
raise FileNotFoundError(f"data directory of bag not found at {src}")
if not overwrite:
path_to_bag.mkdir(parents=True, exist_ok=True)
copy_tree(src, dest)

with pushd_popd(path_to_bag):
n_bytes, n_files = make_manifests("data", 1, ["sha512"])

bag_infos = _load_tag_file("bag-info.txt")
bag_infos["Payload-Oxum"] = f"{n_bytes}.{n_files}"
_make_tag_file("bag-info.txt", bag_infos)
_make_tagmanifest_file("sha512", ".")

if is_zipped:
name = src_path.name
if name.endswith(".zip"):
name = name[:-4]
zip_path = make_archive(name, "zip", path_to_bag)
move(zip_path, src if overwrite else dest)
63 changes: 61 additions & 2 deletions tests/validator/test_workspace_bagger.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,15 @@
from os import makedirs
from os.path import join, abspath, exists
from shutil import copytree, rmtree, move
from shutil import copytree, rmtree, move, make_archive
from tempfile import mkdtemp
from bagit import _load_tag_file
from bagit import _load_tag_file, Bag

from tests.base import TestCase, main, assets # pylint: disable=import-error,no-name-in-module

from ocrd.workspace import Workspace
from ocrd.workspace_bagger import WorkspaceBagger, BACKUPDIR
from ocrd.resolver import Resolver
from ocrd_utils import unzip_file_to_dir

README_FILE = abspath('README.md')

Expand Down Expand Up @@ -141,6 +142,64 @@ def test_spill_with_changed_metsname(self):
self.assertTrue(exists(join(spill_dest, new_metsname)),
"expected mets-file to be '{new_metsname}'")

def test_recreate_checksums_param_validation(self):
with self.assertRaisesRegex(Exception, "For checksum recreation 'dest' must be provided"):
self.bagger.recreate_checksums("src/path")
with self.assertRaisesRegex(Exception, "Setting 'dest' and 'overwrite' is a contradiction"):
self.bagger.recreate_checksums("src/path", "dest/path", overwrite=True)

def test_recreate_checksums_overwrite_unzipped(self):
# arrange
assert Bag(self.bagdir).is_valid(), "tests arrangements for recreate_checksums failed"
move(join(self.bagdir, "data", "mets.xml"), join(self.bagdir, "data", "mets-neu.xml"))
assert not Bag(self.bagdir).is_valid(), "tests arrangements for recreate_checksums failed"

# act
self.bagger.recreate_checksums(self.bagdir, overwrite=True)

# assert
assert Bag(self.bagdir).is_valid(), "recreate_checksums unzippd with overwrite failed"

def test_recreate_checksums_unzipped(self):
# arrange
move(join(self.bagdir, "data", "mets.xml"), join(self.bagdir, "data", "mets-neu.xml"))
new_bag = join(self.tempdir, "new_bag")

# act
self.bagger.recreate_checksums(self.bagdir, new_bag)

# assert
assert Bag(new_bag).is_valid(), "recreate_checksums unzipped failed"

def test_recreate_checksums_zipped_overwrite(self):
# arrange
move(join(self.bagdir, "data", "mets.xml"), join(self.bagdir, "data", "mets-neu.xml"))
zipped_bag = join(self.tempdir, "foo.ocrd.zip")
make_archive(zipped_bag.replace('.zip', ''), 'zip', self.bagdir)

# act
self.bagger.recreate_checksums(zipped_bag, overwrite=True)

# assert
bag_dest = join(self.tempdir, "new_bag")
unzip_file_to_dir(zipped_bag, bag_dest)
assert Bag(bag_dest).is_valid(), "recreate_checksums zipped with overwrite failed"

def test_recreate_checksums_zipped(self):
# arrange
move(join(self.bagdir, "data", "mets.xml"), join(self.bagdir, "data", "mets-neu.xml"))
zipped_bag = join(self.tempdir, "foo.ocrd.zip")
make_archive(zipped_bag.replace('.zip', ''), 'zip', self.bagdir)
zipped_bag_dest = join(self.tempdir, "foo-new.ocrd.zip")

# act
self.bagger.recreate_checksums(zipped_bag, zipped_bag_dest)

# assert
bag_dest = join(self.tempdir, "new_bag")
unzip_file_to_dir(zipped_bag_dest, bag_dest)
assert Bag(bag_dest).is_valid(), "recreate_checksums zipped failed"


if __name__ == '__main__':
main()