Thanks to visit codestin.com
Credit goes to github.com

Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
55 changes: 36 additions & 19 deletions ocrd/ocrd/processor/helpers.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
"""
Helper methods for running and documenting processors
"""
from os import environ
from os import chdir, environ, getcwd
from time import perf_counter, process_time
from functools import lru_cache
import json
Expand All @@ -14,7 +14,7 @@

from click import wrap_text
from ocrd.workspace import Workspace
from ocrd_utils import freeze_args, getLogger
from ocrd_utils import freeze_args, getLogger, pushd_popd


__all__ = [
Expand Down Expand Up @@ -82,6 +82,7 @@ def run_processor(
log = getLogger('ocrd.processor.helpers.run_processor')
log.debug("Running processor %s", processorClass)

old_cwd = getcwd()
processor = get_processor(
processor_class=processorClass,
parameter=parameter,
Expand All @@ -91,6 +92,8 @@ def run_processor(
output_file_grp=output_file_grp,
instance_caching=instance_caching
)
processor.workspace = workspace
chdir(processor.workspace.directory)

ocrd_tool = processor.ocrd_tool
name = '%s v%s' % (ocrd_tool['executable'], processor.version)
Expand All @@ -101,46 +104,60 @@ def run_processor(
t0_cpu = process_time()
if any(x in environ.get('OCRD_PROFILE', '') for x in ['RSS', 'PSS']):
backend = 'psutil_pss' if 'PSS' in environ['OCRD_PROFILE'] else 'psutil'
mem_usage = memory_usage(proc=processor.process,
# only run process once
max_iterations=1,
interval=.1, timeout=None, timestamps=True,
# include sub-processes
multiprocess=True, include_children=True,
# get proportional set size instead of RSS
backend=backend)
try:
mem_usage = memory_usage(proc=processor.process,
# only run process once
max_iterations=1,
interval=.1, timeout=None, timestamps=True,
# include sub-processes
multiprocess=True, include_children=True,
# get proportional set size instead of RSS
backend=backend)
except Exception as err:
log.exception("Failure in processor '%s'" % ocrd_tool['executable'])
raise err
finally:
chdir(old_cwd)
mem_usage_values = [mem for mem, _ in mem_usage]
mem_output = 'memory consumption: '
mem_output += ''.join(sparklines(mem_usage_values))
mem_output += ' max: %.2f MiB min: %.2f MiB' % (max(mem_usage_values), min(mem_usage_values))
logProfile.info(mem_output)
else:
processor.process()
try:
processor.process()
except Exception as err:
log.exception("Failure in processor '%s'" % ocrd_tool['executable'])
raise err
finally:
chdir(old_cwd)

t1_wall = perf_counter() - t0_wall
t1_cpu = process_time() - t0_cpu
logProfile.info("Executing processor '%s' took %fs (wall) %fs (CPU)( [--input-file-grp='%s' --output-file-grp='%s' --parameter='%s' --page-id='%s']" % (
ocrd_tool['executable'],
t1_wall,
t1_cpu,
input_file_grp or '',
output_file_grp or '',
json.dumps(parameter) or '',
page_id or ''
processor.input_file_grp or '',
processor.output_file_grp or '',
json.dumps(processor.parameter) or '',
processor.page_id or ''
))
workspace.mets.add_agent(
name=name,
_type='OTHER',
othertype='SOFTWARE',
role='OTHER',
otherrole=otherrole,
notes=[({'option': 'input-file-grp'}, input_file_grp or ''),
({'option': 'output-file-grp'}, output_file_grp or ''),
({'option': 'parameter'}, json.dumps(parameter or '')),
({'option': 'page-id'}, page_id or '')]
notes=[({'option': 'input-file-grp'}, processor.input_file_grp or ''),
({'option': 'output-file-grp'}, processor.output_file_grp or ''),
({'option': 'parameter'}, json.dumps(processor.parameter or '')),
({'option': 'page-id'}, processor.page_id or '')]
)
workspace.save_mets()
return processor


def run_cli(
executable,
mets_url=None,
Expand Down
3 changes: 3 additions & 0 deletions tests/processor/test_ocrd_dummy.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
# pylint: disable=invalid-name,line-too-long

from io import BytesIO
import os
from pathlib import Path

from PIL import Image
Expand All @@ -18,6 +19,7 @@ class TestDummyProcessor(TestCase):
def test_copies_ok(self):
with copy_of_directory(assets.url_of('SBB0000F29300010000/data')) as wsdir:
workspace = Workspace(Resolver(), wsdir)
os.chdir(workspace.directory)
input_files = workspace.mets.find_all_files(fileGrp='OCR-D-IMG')
self.assertEqual(len(input_files), 3)
output_files = workspace.mets.find_all_files(fileGrp='OUTPUT')
Expand Down Expand Up @@ -53,6 +55,7 @@ def test_copies_ok(self):

def test_copy_file_false(tmpdir):
workspace = Resolver().workspace_from_nothing(directory=tmpdir)
os.chdir(workspace.directory)
for i in range(10):
pil_image = Image.new('RGB', (100, 100))
bhandle = BytesIO()
Expand Down