Thanks to visit codestin.com
Credit goes to github.com

Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 14 additions & 0 deletions ocrd/ocrd/cli/workspace.py
Original file line number Diff line number Diff line change
Expand Up @@ -629,6 +629,20 @@ def set_id(ctx, id): # pylint: disable=redefined-builtin
workspace.mets.unique_identifier = id
workspace.save_mets()

@workspace_cli.command('update-page')
@click.option('--order', help="@ORDER attribute for this mets:div", metavar='ORDER')
@click.option('--orderlabel', help="@ORDERLABEL attribute for this mets:div", metavar='ORDERLABEL')
@click.option('--contentids', help="@CONTENTIDS attribute for this mets:div", metavar='ORDERLABEL')
@click.argument('PAGE_ID')
@pass_workspace
def update_page(ctx, order, orderlabel, contentids, page_id):
"""
Update the @ORDER, @ORDERLABEL o @CONTENTIDS attributes of the mets:div with @ID=PAGE_ID
"""
workspace = Workspace(ctx.resolver, directory=ctx.directory, mets_basename=ctx.mets_basename, automatic_backup=ctx.automatic_backup)
workspace.mets.update_physical_page_attributes(page_id, order=order, orderlabel=orderlabel, contentids=contentids)
workspace.save_mets()

# ----------------------------------------------------------------------
# ocrd workspace merge
# ----------------------------------------------------------------------
Expand Down
17 changes: 17 additions & 0 deletions ocrd_models/ocrd_models/ocrd_mets.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
import typing
from lxml import etree as ET
from copy import deepcopy
from warnings import warn

from ocrd_utils import (
getLogger,
Expand Down Expand Up @@ -665,6 +666,22 @@ def set_physical_page_for_file(self, pageId, ocrd_file, order=None, orderlabel=N
# Assign the ocrd fileID to the pageId in the cache
self._fptr_cache[el_pagediv.get('ID')].update({ocrd_file.ID : el_fptr})

def update_physical_page_attributes(self, page_id, **kwargs):
mets_div = None
if self._cache_flag:
if page_id in self._page_cache.keys():
mets_div = [self._page_cache[page_id]]
else:
mets_div = self._tree.getroot().xpath(
'mets:structMap[@TYPE="PHYSICAL"]/mets:div[@TYPE="physSequence"]/mets:div[@TYPE="page"][@ID="%s"]' % page_id,
namespaces=NS)
if mets_div:
for attr_name, attr_value in kwargs.items():
if attr_value:
mets_div[0].set(attr_name.upper(), attr_value)
else:
warn("Could not find mets:div[@ID={page_id}]")

def get_physical_page_for_file(self, ocrd_file):
"""
Get the physical page ID (``@ID`` of the physical ``mets:structMap`` ``mets:div`` entry)
Expand Down
16 changes: 16 additions & 0 deletions tests/model/test_ocrd_mets.py
Original file line number Diff line number Diff line change
Expand Up @@ -379,5 +379,21 @@ def test_envvar():
assert not OcrdMets(filename=assets.url_of('SBB0000F29300010000/data/mets.xml'), cache_flag=True)._cache_flag
assert not OcrdMets(filename=assets.url_of('SBB0000F29300010000/data/mets.xml'), cache_flag=False)._cache_flag

def test_update_physical_page_attributes(sbb_directory_ocrd_mets):
m = sbb_directory_ocrd_mets
m.remove_file()
assert len(m.physical_pages) == 0
m.add_file('FOO', pageId='new page', ID='foo1', mimetype='foo/bar')
m.add_file('FOO', pageId='new page', ID='foo2', mimetype='foo/bar')
m.add_file('FOO', pageId='new page', ID='foo3', mimetype='foo/bar')
m.add_file('FOO', pageId='new page', ID='foo4', mimetype='foo/bar')
assert len(m.physical_pages) == 1
assert b'ORDER' not in m.to_xml()
assert b'ORDERLABEL' not in m.to_xml()
m.update_physical_page_attributes('new page', order='foo', orderlabel='bar')
assert b'ORDER' in m.to_xml()
assert b'ORDERLABEL' in m.to_xml()


if __name__ == '__main__':
main(__file__)