Thanks to visit codestin.com
Credit goes to github.com

Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
74 changes: 70 additions & 4 deletions ocrd/ocrd/processor/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,21 +3,27 @@
"""
from os import environ
from time import perf_counter, process_time
from functools import lru_cache
import json
import inspect
from subprocess import run, PIPE
from typing import List, Type

from memory_profiler import memory_usage
from sparklines import sparklines

from click import wrap_text
from ocrd_utils import getLogger
from ocrd.workspace import Workspace
from ocrd_utils import freeze_args, getLogger


__all__ = [
'generate_processor_help',
'run_cli',
'run_processor'
]


def _get_workspace(workspace=None, resolver=None, mets_url=None, working_dir=None):
if workspace is None:
if resolver is None:
Expand All @@ -42,6 +48,7 @@ def run_processor(
parameter=None,
parameter_override=None,
working_dir=None,
instance_caching=False # TODO don't set this yet!
): # pylint: disable=too-many-locals
"""
Instantiate a Pythonic processor, open a workspace, run the processor and save the workspace.
Expand All @@ -58,6 +65,9 @@ def run_processor(
- :py:attr:`output_file_grp`
- :py:attr:`parameter` (after applying any :py:attr:`parameter_override` settings)

Warning: Avoid setting the `instance_caching` flag to True. It may have unexpected side effects.
This flag is used for an experimental feature we would like to adopt in future.

Run the processor on the workspace (creating output files in the filesystem).

Finally, write back the workspace (updating the METS in the filesystem).
Expand All @@ -73,14 +83,18 @@ def run_processor(
)
log = getLogger('ocrd.processor.helpers.run_processor')
log.debug("Running processor %s", processorClass)
processor = processorClass(
workspace,

processor = get_processor(
processor_class=processorClass,
parameter=parameter,
workspace=workspace,
ocrd_tool=ocrd_tool,
page_id=page_id,
input_file_grp=input_file_grp,
output_file_grp=output_file_grp,
parameter=parameter
instance_caching=instance_caching
)

ocrd_tool = processor.ocrd_tool
name = '%s v%s' % (ocrd_tool['executable'], processor.version)
otherrole = ocrd_tool['steps'][0]
Expand Down Expand Up @@ -263,3 +277,55 @@ def wrap(s):
ocrd_tool.get('input_file_grp', 'NONE'),
ocrd_tool.get('output_file_grp', 'NONE')
)


# Taken from https://github.com/OCR-D/core/pull/884
@freeze_args
@lru_cache(maxsize=environ.get('OCRD_MAX_PROCESSOR_CACHE', 128))
def get_cached_processor(parameter: dict, processor_class):
"""
Call this function to get back an instance of a processor.
The results are cached based on the parameters.
Args:
parameter (dict): a dictionary of parameters.
processor_class: the concrete `:py:class:~ocrd.Processor` class.
Returns:
When the concrete class of the processor is unknown, `None` is returned.
Otherwise, an instance of the `:py:class:~ocrd.Processor` is returned.
"""
if processor_class:
dict_params = dict(parameter) if parameter else None
return processor_class(workspace=None, parameter=dict_params)
return None


def get_processor(
processor_class,
parameter: dict,
workspace: Workspace = None,
ocrd_tool: dict = None,
page_id: str = None,
input_file_grp: List[str] = None,
output_file_grp: List[str] = None,
instance_caching: bool = False,
):
if processor_class:
if instance_caching:
cached_processor = get_cached_processor(
parameter=parameter,
processor_class=processor_class
)
cached_processor.workspace = workspace
cached_processor.page_id = page_id
cached_processor.input_file_grp = input_file_grp
cached_processor.output_file_grp = output_file_grp
return cached_processor
return processor_class(
workspace=workspace,
ocrd_tool=ocrd_tool,
page_id=page_id,
input_file_grp=input_file_grp,
output_file_grp=output_file_grp,
parameter=parameter
)
raise ValueError("Processor class is not known")
4 changes: 3 additions & 1 deletion ocrd_utils/ocrd_utils/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -151,8 +151,10 @@
xywh_from_polygon)

from .introspect import (
freeze_args,
set_json_key_value_overrides,
membername)
membername
)

from .logging import (
disableLogging,
Expand Down
17 changes: 17 additions & 0 deletions ocrd_utils/ocrd_utils/introspect.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,23 @@
Utility functions to simplify access to data structures.
"""
import json
from functools import wraps
from frozendict import frozendict


# Taken from https://github.com/OCR-D/core/pull/884
def freeze_args(func):
"""
Transform mutable dictionary into immutable. Useful to be compatible with cache.
Code taken from `this post <https://stackoverflow.com/a/53394430/1814420>`_
"""
@wraps(func)
def wrapped(*args, **kwargs):
args = tuple([frozendict(arg) if isinstance(arg, dict) else arg for arg in args])
kwargs = {k: frozendict(v) if isinstance(v, dict) else v for k, v in kwargs.items()}
return func(*args, **kwargs)
return wrapped


def membername(class_, val):
"""Convert a member variable/constant into a member name string."""
Expand Down
1 change: 1 addition & 0 deletions ocrd_utils/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -5,3 +5,4 @@ numpy
atomicwrites >= 1.3.0
importlib_metadata;python_version<'3.8'
importlib_resources;python_version<'3.8'
frozendict>=2.3.4