From cfab064be4bae1ef68e1c25744805a65e1fcdf71 Mon Sep 17 00:00:00 2001 From: Mehmed Mustafa Date: Fri, 17 Feb 2023 16:52:33 +0100 Subject: [PATCH 1/8] Fix the instance caching --- ocrd/ocrd/processor/helpers.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/ocrd/ocrd/processor/helpers.py b/ocrd/ocrd/processor/helpers.py index 3450b51a0b..8d47a0766c 100644 --- a/ocrd/ocrd/processor/helpers.py +++ b/ocrd/ocrd/processor/helpers.py @@ -14,7 +14,7 @@ from click import wrap_text from ocrd.workspace import Workspace -from ocrd_utils import freeze_args, getLogger +from ocrd_utils import freeze_args, getLogger, pushd_popd __all__ = [ @@ -118,7 +118,8 @@ def run_processor( mem_output += ' max: %.2f MiB min: %.2f MiB' % (max(mem_usage_values), min(mem_usage_values)) logProfile.info(mem_output) else: - processor.process() + with pushd_popd(workspace.directory): + processor.process() t1_wall = perf_counter() - t0_wall t1_cpu = process_time() - t0_cpu logProfile.info("Executing processor '%s' took %fs (wall) %fs (CPU)( [--input-file-grp='%s' --output-file-grp='%s' --parameter='%s' --page-id='%s']" % ( From 9e3c229719ab5d92a7352e879cd233be827281a0 Mon Sep 17 00:00:00 2001 From: mehmedGIT Date: Mon, 20 Feb 2023 08:40:50 +0100 Subject: [PATCH 2/8] Chdir before processor.process() calls --- ocrd/ocrd/processor/helpers.py | 52 ++++++++++++++++++++++------------ 1 file changed, 34 insertions(+), 18 deletions(-) diff --git a/ocrd/ocrd/processor/helpers.py b/ocrd/ocrd/processor/helpers.py index 8d47a0766c..b937a95b32 100644 --- a/ocrd/ocrd/processor/helpers.py +++ b/ocrd/ocrd/processor/helpers.py @@ -1,7 +1,7 @@ """ Helper methods for running and documenting processors """ -from os import environ +from os import chdir, environ, getcwd from time import perf_counter, process_time from functools import lru_cache import json @@ -95,6 +95,9 @@ def run_processor( instance_caching=instance_caching ) + old_cwd = getcwd() + chdir(processor.workspace.directory) + ocrd_tool = processor.ocrd_tool name = '%s v%s' % (ocrd_tool['executable'], processor.version) otherrole = ocrd_tool['steps'][0] @@ -104,32 +107,44 @@ def run_processor( t0_cpu = process_time() if any(x in environ.get('OCRD_PROFILE', '') for x in ['RSS', 'PSS']): backend = 'psutil_pss' if 'PSS' in environ['OCRD_PROFILE'] else 'psutil' - mem_usage = memory_usage(proc=processor.process, - # only run process once - max_iterations=1, - interval=.1, timeout=None, timestamps=True, - # include sub-processes - multiprocess=True, include_children=True, - # get proportional set size instead of RSS - backend=backend) + try: + mem_usage = memory_usage(proc=processor.process, + # only run process once + max_iterations=1, + interval=.1, timeout=None, timestamps=True, + # include sub-processes + multiprocess=True, include_children=True, + # get proportional set size instead of RSS + backend=backend) + except Exception as err: + log.exception("Failure in processor '%s'" % ocrd_tool['executable']) + return err + finally: + chdir(old_cwd) mem_usage_values = [mem for mem, _ in mem_usage] mem_output = 'memory consumption: ' mem_output += ''.join(sparklines(mem_usage_values)) mem_output += ' max: %.2f MiB min: %.2f MiB' % (max(mem_usage_values), min(mem_usage_values)) logProfile.info(mem_output) else: - with pushd_popd(workspace.directory): + try: processor.process() + except Exception as err: + log.exception("Failure in processor '%s'" % ocrd_tool['executable']) + return err + finally: + chdir(old_cwd) + t1_wall = perf_counter() - t0_wall t1_cpu = process_time() - t0_cpu logProfile.info("Executing processor '%s' took %fs (wall) %fs (CPU)( [--input-file-grp='%s' --output-file-grp='%s' --parameter='%s' --page-id='%s']" % ( ocrd_tool['executable'], t1_wall, t1_cpu, - input_file_grp or '', - output_file_grp or '', - json.dumps(parameter) or '', - page_id or '' + processor.input_file_grp or '', + processor.output_file_grp or '', + json.dumps(processor.parameter) or '', + processor.page_id or '' )) workspace.mets.add_agent( name=name, @@ -137,14 +152,15 @@ def run_processor( othertype='SOFTWARE', role='OTHER', otherrole=otherrole, - notes=[({'option': 'input-file-grp'}, input_file_grp or ''), - ({'option': 'output-file-grp'}, output_file_grp or ''), - ({'option': 'parameter'}, json.dumps(parameter or '')), - ({'option': 'page-id'}, page_id or '')] + notes=[({'option': 'input-file-grp'}, processor.input_file_grp or ''), + ({'option': 'output-file-grp'}, processor.output_file_grp or ''), + ({'option': 'parameter'}, json.dumps(processor.parameter or '')), + ({'option': 'page-id'}, processor.page_id or '')] ) workspace.save_mets() return processor + def run_cli( executable, mets_url=None, From ee3f9d31d50bd954ed5b63a2b075ece4b57d54c8 Mon Sep 17 00:00:00 2001 From: Mehmed Mustafa Date: Thu, 2 Mar 2023 13:24:24 +0100 Subject: [PATCH 3/8] Change getcwd() call location --- ocrd/ocrd/processor/helpers.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/ocrd/ocrd/processor/helpers.py b/ocrd/ocrd/processor/helpers.py index b937a95b32..ae8f36ba71 100644 --- a/ocrd/ocrd/processor/helpers.py +++ b/ocrd/ocrd/processor/helpers.py @@ -84,6 +84,8 @@ def run_processor( log = getLogger('ocrd.processor.helpers.run_processor') log.debug("Running processor %s", processorClass) + old_cwd = getcwd() + processor = get_processor( processor_class=processorClass, parameter=parameter, @@ -95,7 +97,6 @@ def run_processor( instance_caching=instance_caching ) - old_cwd = getcwd() chdir(processor.workspace.directory) ocrd_tool = processor.ocrd_tool From 518fb21f884e52b4ea584c55d30498a8009bb5d3 Mon Sep 17 00:00:00 2001 From: Mehmed Mustafa Date: Thu, 2 Mar 2023 14:21:05 +0100 Subject: [PATCH 4/8] Raise the error instead of returning it --- ocrd/ocrd/processor/helpers.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ocrd/ocrd/processor/helpers.py b/ocrd/ocrd/processor/helpers.py index ae8f36ba71..92297e789c 100644 --- a/ocrd/ocrd/processor/helpers.py +++ b/ocrd/ocrd/processor/helpers.py @@ -119,7 +119,7 @@ def run_processor( backend=backend) except Exception as err: log.exception("Failure in processor '%s'" % ocrd_tool['executable']) - return err + raise err finally: chdir(old_cwd) mem_usage_values = [mem for mem, _ in mem_usage] @@ -132,7 +132,7 @@ def run_processor( processor.process() except Exception as err: log.exception("Failure in processor '%s'" % ocrd_tool['executable']) - return err + raise err finally: chdir(old_cwd) From 2af641e48820401f373da13cb273d3565cb7a23c Mon Sep 17 00:00:00 2001 From: Mehmed Mustafa Date: Thu, 2 Mar 2023 14:32:31 +0100 Subject: [PATCH 5/8] Revert getcwd() location - failing tests --- ocrd/ocrd/processor/helpers.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/ocrd/ocrd/processor/helpers.py b/ocrd/ocrd/processor/helpers.py index 92297e789c..b7f25b6657 100644 --- a/ocrd/ocrd/processor/helpers.py +++ b/ocrd/ocrd/processor/helpers.py @@ -84,8 +84,6 @@ def run_processor( log = getLogger('ocrd.processor.helpers.run_processor') log.debug("Running processor %s", processorClass) - old_cwd = getcwd() - processor = get_processor( processor_class=processorClass, parameter=parameter, @@ -96,7 +94,8 @@ def run_processor( output_file_grp=output_file_grp, instance_caching=instance_caching ) - + + old_cwd = getcwd() chdir(processor.workspace.directory) ocrd_tool = processor.ocrd_tool From 8090dfcda0b07e211864ff12cf938023aca64252 Mon Sep 17 00:00:00 2001 From: Mehmed Mustafa Date: Fri, 17 Mar 2023 10:32:10 +0100 Subject: [PATCH 6/8] Undo the revert of getcwd() --- ocrd/ocrd/processor/helpers.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/ocrd/ocrd/processor/helpers.py b/ocrd/ocrd/processor/helpers.py index b7f25b6657..23bc84ae20 100644 --- a/ocrd/ocrd/processor/helpers.py +++ b/ocrd/ocrd/processor/helpers.py @@ -84,6 +84,7 @@ def run_processor( log = getLogger('ocrd.processor.helpers.run_processor') log.debug("Running processor %s", processorClass) + old_cwd = getcwd() processor = get_processor( processor_class=processorClass, parameter=parameter, @@ -94,8 +95,6 @@ def run_processor( output_file_grp=output_file_grp, instance_caching=instance_caching ) - - old_cwd = getcwd() chdir(processor.workspace.directory) ocrd_tool = processor.ocrd_tool From f8390ca2bd8eca2d3453170098324f5f552bd49f Mon Sep 17 00:00:00 2001 From: mehmedGIT Date: Mon, 20 Mar 2023 10:13:24 +0100 Subject: [PATCH 7/8] chdir to ws in the beginning --- tests/processor/test_ocrd_dummy.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tests/processor/test_ocrd_dummy.py b/tests/processor/test_ocrd_dummy.py index 532f6e6687..c98a6d481a 100644 --- a/tests/processor/test_ocrd_dummy.py +++ b/tests/processor/test_ocrd_dummy.py @@ -2,6 +2,7 @@ # pylint: disable=invalid-name,line-too-long from io import BytesIO +import os from pathlib import Path from PIL import Image @@ -18,6 +19,7 @@ class TestDummyProcessor(TestCase): def test_copies_ok(self): with copy_of_directory(assets.url_of('SBB0000F29300010000/data')) as wsdir: workspace = Workspace(Resolver(), wsdir) + os.chdir(workspace.directory) input_files = workspace.mets.find_all_files(fileGrp='OCR-D-IMG') self.assertEqual(len(input_files), 3) output_files = workspace.mets.find_all_files(fileGrp='OUTPUT') @@ -53,6 +55,7 @@ def test_copies_ok(self): def test_copy_file_false(tmpdir): workspace = Resolver().workspace_from_nothing(directory=tmpdir) + os.chdir(workspace.directory) for i in range(10): pil_image = Image.new('RGB', (100, 100)) bhandle = BytesIO() From 7d22d3b42b7465ad2fc608f95e7a962e497c9a6e Mon Sep 17 00:00:00 2001 From: mehmedGIT Date: Mon, 20 Mar 2023 11:08:14 +0100 Subject: [PATCH 8/8] Set ws outside the constructor --- ocrd/ocrd/processor/helpers.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/ocrd/ocrd/processor/helpers.py b/ocrd/ocrd/processor/helpers.py index 23bc84ae20..3f28453fae 100644 --- a/ocrd/ocrd/processor/helpers.py +++ b/ocrd/ocrd/processor/helpers.py @@ -88,13 +88,14 @@ def run_processor( processor = get_processor( processor_class=processorClass, parameter=parameter, - workspace=workspace, + workspace=None, ocrd_tool=ocrd_tool, page_id=page_id, input_file_grp=input_file_grp, output_file_grp=output_file_grp, instance_caching=instance_caching ) + processor.workspace = workspace chdir(processor.workspace.directory) ocrd_tool = processor.ocrd_tool