From 022eb26b9193b253d8891f9036eb24ec552d4011 Mon Sep 17 00:00:00 2001 From: Chad Retz Date: Fri, 17 Jan 2025 10:08:54 -0600 Subject: [PATCH 1/2] Support passing through all modules Fixes #691 --- README.md | 15 +++++++++++++++ temporalio/worker/workflow_sandbox/_importer.py | 5 +++-- .../worker/workflow_sandbox/_restrictions.py | 17 +++++++++++++++++ tests/worker/workflow_sandbox/test_importer.py | 16 ++++++++++++++++ 4 files changed, 51 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index f1a74780a..d04e2b878 100644 --- a/README.md +++ b/README.md @@ -1029,6 +1029,21 @@ my_worker = Worker( In both of these cases, now the `pydantic` module will be passed through from outside of the sandbox instead of being reloaded for every workflow run. +If users are sure that no imports they use in workflow files will ever need to be sandboxed (meaning all calls within +are deterministic and never mutate shared, global state), the `passthrough_all_modules` option can be set on the +restrictions or the `with_passthrough_all_modules` helper can by used, for example: + +```python +my_worker = Worker( + ..., + workflow_runner=SandboxedWorkflowRunner( + restrictions=SandboxRestrictions.default.with_passthrough_all_modules() + ) +) +``` + +Note, some calls from the module may still be checked for invalid calls at runtime for certain builtins. + ###### Invalid Module Members `SandboxRestrictions.invalid_module_members` contains a root matcher that applies to all module members. This already diff --git a/temporalio/worker/workflow_sandbox/_importer.py b/temporalio/worker/workflow_sandbox/_importer.py index 62f314c6f..10cc6189b 100644 --- a/temporalio/worker/workflow_sandbox/_importer.py +++ b/temporalio/worker/workflow_sandbox/_importer.py @@ -252,10 +252,11 @@ def _assert_valid_module(self, name: str) -> None: raise RestrictedWorkflowAccessError(name) def _maybe_passthrough_module(self, name: str) -> Optional[types.ModuleType]: - # If imports not passed through and name not in passthrough modules, - # check parents + # If imports not passed through and all modules are not passed through + # and name not in passthrough modules, check parents if ( not temporalio.workflow.unsafe.is_imports_passed_through() + and not self.restrictions.passthrough_all_modules and name not in self.restrictions.passthrough_modules ): end_dot = -1 diff --git a/temporalio/worker/workflow_sandbox/_restrictions.py b/temporalio/worker/workflow_sandbox/_restrictions.py index c2be3a92f..df968ca3e 100644 --- a/temporalio/worker/workflow_sandbox/_restrictions.py +++ b/temporalio/worker/workflow_sandbox/_restrictions.py @@ -99,6 +99,17 @@ class methods (including __init__, etc). The check compares the against the fully qualified path to the item. """ + passthrough_all_modules: bool = False + """ + Pass through all modules, do not sandbox any modules. This is the equivalent + of setting :py:attr:`passthrough_modules` as every module ever imported into + the workflow. This is unsafe. This means modules are never reloaded per + workflow run which means workflow authors have to be careful that they don't + import modules that do non-deterministic things. Note, just because a module + is passed through from outside the sandbox doesn't mean runtime restrictions + on invalid calls are not still applied. + """ + passthrough_modules_minimum: ClassVar[Set[str]] """Set of modules that must be passed through at the minimum.""" @@ -133,6 +144,12 @@ def with_passthrough_modules(self, *modules: str) -> SandboxRestrictions: self, passthrough_modules=self.passthrough_modules | set(modules) ) + def with_passthrough_all_modules(self) -> SandboxRestrictions: + """Create a new restriction set with :py:attr:`passthrough_all_modules` + as true. + """ + return dataclasses.replace(self, passthrough_all_modules=True) + # We intentionally use specific fields instead of generic "matcher" callbacks # for optimization reasons. diff --git a/tests/worker/workflow_sandbox/test_importer.py b/tests/worker/workflow_sandbox/test_importer.py index 26e076e46..ee29d642d 100644 --- a/tests/worker/workflow_sandbox/test_importer.py +++ b/tests/worker/workflow_sandbox/test_importer.py @@ -64,6 +64,22 @@ def test_workflow_sandbox_importer_passthough_context_manager(): assert id(outside) == id(inside) +def test_workflow_sandbox_importer_passthrough_all_modules(): + import tests.worker.workflow_sandbox.testmodules.stateful_module as outside + + # Confirm regular restrictions does re-import + with Importer(restrictions, RestrictionContext()).applied(): + import tests.worker.workflow_sandbox.testmodules.stateful_module as inside1 + assert id(outside) != id(inside1) + + # But that one with all modules passed through does not + with Importer( + restrictions.with_passthrough_all_modules(), RestrictionContext() + ).applied(): + import tests.worker.workflow_sandbox.testmodules.stateful_module as inside2 + assert id(outside) == id(inside2) + + def test_workflow_sandbox_importer_invalid_module_members(): importer = Importer(restrictions, RestrictionContext()) # Can access the function, no problem From 6ab07cb4e69682d48e61a6c3385b0bfdbd0646fb Mon Sep 17 00:00:00 2001 From: Chad Retz Date: Tue, 21 Jan 2025 08:05:16 -0600 Subject: [PATCH 2/2] Doc string fix --- temporalio/worker/workflow_sandbox/_restrictions.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/temporalio/worker/workflow_sandbox/_restrictions.py b/temporalio/worker/workflow_sandbox/_restrictions.py index df968ca3e..407e51b27 100644 --- a/temporalio/worker/workflow_sandbox/_restrictions.py +++ b/temporalio/worker/workflow_sandbox/_restrictions.py @@ -102,8 +102,8 @@ class methods (including __init__, etc). The check compares the against the passthrough_all_modules: bool = False """ Pass through all modules, do not sandbox any modules. This is the equivalent - of setting :py:attr:`passthrough_modules` as every module ever imported into - the workflow. This is unsafe. This means modules are never reloaded per + of setting :py:attr:`passthrough_modules` to a list of all modules imported + by the workflow. This is unsafe. This means modules are never reloaded per workflow run which means workflow authors have to be careful that they don't import modules that do non-deterministic things. Note, just because a module is passed through from outside the sandbox doesn't mean runtime restrictions