diff --git a/.doecode.yml b/.doecode.yml new file mode 100644 index 000000000..c7130a83e --- /dev/null +++ b/.doecode.yml @@ -0,0 +1,81 @@ +--- +code_id: 18101 +site_ownership_code: "BNL" +open_source: true +repository_link: "https://github.com/bluesky/bluesky.git" +project_type: "OS" +software_type: "S" +official_use_only: {} +developers: +- email: "scampbell@bnl.gov" + orcid: "0000-0001-7079-0878" + first_name: "Stuart" + last_name: "Campbell" + middle_name: "" + affiliations: + - "Brookhaven National Lab. (BNL), Upton, NY (United States). National Synchrotron\ + \ Light Source II (NSLS-II)" +- email: "dallan@bnl.gov" + orcid: "0000-0002-5947-6017" + first_name: "Daniel" + last_name: "Allan" + middle_name: "" + affiliations: + - "Brookhaven National Lab. (BNL), Upton, NY (United States). National Synchrotron\ + \ Light Source II (NSLS-II)" +- email: "tcaswell@bnl.gov" + orcid: "0000-0003-4692-608X" + first_name: "Thomas" + last_name: "Caswell" + middle_name: "" + affiliations: + - "Brookhaven National Lab. (BNL), Upton, NY (United States). National Synchrotron\ + \ Light Source II (NSLS-II)" +- email: "mrakitin@bnl.gov" + orcid: "0000-0003-3685-852X" + first_name: "Max" + last_name: "Rakitin" + middle_name: "" + affiliations: + - "Brookhaven National Laboratory (BNL), Upton, NY (United States). National Synchrotron\ + \ Light Source II (NSLS-II)" +contributors: [] +sponsoring_organizations: [] +contributing_organizations: +- organization_name: "Brookhaven National Laboratory" + contributor_type: "HostingInstitution" + DOE: true +research_organizations: +- organization_name: "Brookhaven National Laboratory (BNL), Upton, NY (United States)" + DOE: true +related_identifiers: [] +award_dois: [] +software_title: "Bluesky Run Engine" +acronym: "bluesky" +description: "Bluesky is a library for experiment control and collection of scientific\ + \ data and metadata. It emphasizes the following virtues:\n\n Live, Streaming\ + \ Data: Available for inline visualization and processing.\n\n Rich Metadata:\ + \ Captured and organized to facilitate reproducibility and searchability.\n\n \ + \ Experiment Generality: Seamlessly reuse a procedure on completely different hardware.\n\ + \n Interruption Recovery: Experiments are “rewindable,” recovering cleanly from\ + \ interruptions.\n\n Automated Suspend/Resume: Experiments can be run unattended,\ + \ automatically suspending and resuming if needed.\n\n Pluggable I/O: Export\ + \ data (live) into any desired format or database.\n\n Customizability: Integrate\ + \ custom experimental procedures and commands, and get the I/O and interruption\ + \ features for free.\n\n Integration with Scientific Python: Interface naturally\ + \ with numpy and Python scientific stack." +programming_languages: +- "Python" +documentation_url: "https://blueskyproject.io/bluesky" +country_of_origin: "United States" +project_keywords: [] +licenses: +- "BSD 3-clause \"New\" or \"Revised\" License" +date_record_added: "2018-09-06" +date_record_updated: "2025-04-17" +is_file_certified: false +last_editor: "scampbell@bnl.gov" +is_limited: false +links: +- rel: "citation" + href: "https://www.osti.gov/doecode/biblio/18101" diff --git a/LICENSE_README b/LICENSE_README new file mode 100644 index 000000000..02e4e6d30 --- /dev/null +++ b/LICENSE_README @@ -0,0 +1,22 @@ +The Software resulted from work developed under a U.S. Government +Contract No. DE-SC0012704 and are subject to the following terms: +the U.S. Government is granted for itself and others acting on its +behalf a paid-up, nonexclusive, irrevocable worldwide license in +this computer software and data to reproduce, prepare derivative works, +and perform publicly and display publicly. + +THE SOFTWARE IS SUPPLIED "AS IS" WITHOUT WARRANTY OF ANY KIND. +THE UNITED STATES, THE UNITED STATES DEPARTMENT OF ENERGY, AND THEIR +EMPLOYEES: (1) DISCLAIM ANY WARRANTIES, EXPRESS OR IMPLIED, INCLUDING +BUT NOT LIMITED TO ANY IMPLIED WARRANTIES OF MERCHANTABILITY, FITNESS +FOR A PARTICULAR PURPOSE, TITLE OR NON-INFRINGEMENT, (2) DO NOT ASSUME +ANY LEGAL LIABILITY OR RESPONSIBILITY FOR THE ACCURACY, COMPLETENESS, +OR USEFULNESS OF THE SOFTWARE, (3) DO NOT REPRESENT THAT USE OF THE +SOFTWARE WOULD NOT INFRINGE PRIVATELY OWNED RIGHTS, (4) DO NOT WARRANT +THAT THE SOFTWARE WILL FUNCTION UNINTERRUPTED, THAT IT IS ERROR-FREE OR +THAT ANY ERRORS WILL BE CORRECTED. + +IN NO EVENT SHALL THE UNITED STATES, THE UNITED STATES DEPARTMENT OF ENERGY, +OR THEIR EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +CONSEQUENTIAL, SPECIAL OR PUNITIVE DAMAGES OF ANY KIND OR NATURE RESULTING +FROM EXERCISE OF THIS LICENSE AGREEMENT OR THE USE OF THE SOFTWARE. diff --git a/docs/api_changes.rst b/docs/api_changes.rst index 86b77f587..a9d340aae 100644 --- a/docs/api_changes.rst +++ b/docs/api_changes.rst @@ -2,6 +2,50 @@ Release History ================= +v1.14.4 (2025-08-26) +==================== + +Changed +------- + +- Update ``TiledWriter`` to match API changes in Tiled + (demotion of "composite" from structure family to spec) + +v1.14.3 (2025-08-26) +==================== + +Changed +------- + +- RunEngine now supports both sync and async functions as a `scan_id_source` + +Fixed +----- + +- Fix a Regression Related External Data Present in Multiple Streams + +v1.14.2 (2025-06-10) +==================== + +TO DO + +v1.14.1 (2025-05-21) +==================== + +Added +----- + +- The `mv` and `mvr` plans accept a new argument, `timeout`. + +Changed +------- + +- The `bluesky.callbacks.tiled_writer.TiledWriter` looks for an + optional key `tiled_access_tags` in the 'start' document and, + if found, uses it to set `access_tags` on the nodes created + in Tiled to store the metadata and data from the BlueskyRun. + In additional, some minor refinements were made to the writer. + v1.14.0 (2025-05-06) ==================== diff --git a/docs/conf.py b/docs/conf.py index 686327a98..40e45ffe1 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -40,6 +40,7 @@ "matplotlib.sphinxext.plot_directive", "sphinx.ext.intersphinx", "numpydoc", + "sphinxcontrib.mermaid", ] # Configuration options for plot_directive. See: diff --git a/docs/documents.rst b/docs/documents.rst index d954f499b..6ee1cc89c 100644 --- a/docs/documents.rst +++ b/docs/documents.rst @@ -165,7 +165,7 @@ The most commonly useful fields here are 'time' and 'exit_status'. 'reason': '', # The RunEngine can provide reason for failure here. 'time': 1442521012.1021606, 'uid': '', - 'start': '', + 'run_start': '', 'num_events': {'primary': 16} } diff --git a/docs/plans.rst b/docs/plans.rst index dacf65ea7..3caecc528 100644 --- a/docs/plans.rst +++ b/docs/plans.rst @@ -484,6 +484,7 @@ Plans for interacting with hardware: unstage configure stop + prepare Plans for asynchronous acquisition: @@ -496,6 +497,7 @@ Plans for asynchronous acquisition: kickoff complete collect + collect_while_completing Plans that control the RunEngine: diff --git a/docs/run_engine.rst b/docs/run_engine.rst index 0b985a180..0403bffb9 100644 --- a/docs/run_engine.rst +++ b/docs/run_engine.rst @@ -163,7 +163,7 @@ reporting that the addition failed due to a ``TypeError`` finally: yield Msg('print', 'thanks for adding') -Compare the behavior of between ``adding_plan`` and ``addingplan`` in cases +Compare the behavior of between ``adding_plan`` and ``safe_adding_plan`` in cases where they succeed .. code:: python @@ -776,6 +776,16 @@ API to control the behavior. Stop, Abort, Halt ----------------- +Stop, Abort, and Halt **stop** a plan in different ways; whether the plan is running or paused, +whether cleanup is performed, and how the `exit_status` is set. +The table below summarizes the differences between these methods: + +| Method | Action | Cleanup | Exit Status | +| --------- | --------------------------------- | ------------- | ----------- | +| **Abort** | Stop a running or paused plan | Runs cleanup | `abort` | +| **Stop** | Stop a running or paused plan | Runs cleanup | `success` | +| **Halt** | Stop the running plan immediately | Skips cleanup | `abort` | + Suspending ---------- diff --git a/docs/tiled-writer.rst b/docs/tiled-writer.rst new file mode 100644 index 000000000..df849bb90 --- /dev/null +++ b/docs/tiled-writer.rst @@ -0,0 +1,156 @@ +********************** +Integration with Tiled +********************** + +`Tiled `_ is a data management system that allows for the storage and retrieval of structured data. In the context of Bluesky, it provides a way to store the data and metadata for runs in a structured format that can be easily accessed and queried. + + +Representation of Bluesky Runs in Tiled +======================================= + +The `TiledWriter` callback is designed specifically for converting Bluesky run documents into a format suitable for storage in a Tiled database. + +It implicitly distinguishes between "internal" and "external" data. The internal data are associated with the `Event` documents generated during a run; typically this represents scalar measurements from sensors, motor positions, etc, which is stored in a form of a table with columns corresponding to different data keys and each row representing a measurement at a single timestamp. + +On the other hand, the external data are written by detectors directly on disk and usually take the form of images or multidimensional arrays. The references to the external files are provided in `StreamResource` (`Resource` in legacy implementations) documents, which register the corresponding array-like `DataSources` in Tiled. `StreamDatum` (or `Datum`) documents are processed via the mechanism of `Consolidators` and determine the correspondence between the indexing within these external arrays and the physically-meaningful sequence of timestamps. + +The time dimension (that is, the sequence of measurements) is usually shared between internal and external data. Tiled handles this by writing all data from the same Bluesky stream into a container with a dedicated `"composite"` spec, which tells the Tiled client how the data are aligned. Each stream node's metadata includes the specifications for the related data keys as well as the configuration parameters provided in the `EventDescriptor` document. + +Finally, nodes for multiple streams are grouped together and placed into a container for the entire run; its metadata contains the `Start` and `Stop` documents. While the structure of the `streams` container is fixed, the parent Run allows for optional user-controlled namespaces within `views` and `aux` containers. The Run container created by TiledWriter is designated with the `BlueskyRun` version `3.0` spec to enable its back-compatibility with legacy code via bluesky-tiled-plugins. + +An example of the Tiled catalog structure for a Bluesky run might look like this: + +.. code-block:: text + + BlueskyRun_v3 + │ + ├─ streams + │ ├─ baseline + │ │ ├─ internal -- written by Tiled + │ │ ├─ image_1 -- external data from files + │ │ │ ... + │ │ └─ image_n + │ ├─ primary + │ │ ├─ internal
-- written by Tiled + │ │ ├─ image_1 -- external data from files + │ │ │ ... + │ │ └─ image_n + │ └─ third_stream + ├─ views -- optional + └─ aux -- optional + + +.. note:: + + To be able to use TiledWriter, the Tiled server must be configured with an SQL catalog and an SQL-backed storage database for tabular data. + + +Callback Architecture +===================== + +Structurally, TiledWriter consists of two main parts: `RunNormalizer` and `_RunWriter`. + +The former is responsible for converting legacy document schemas to their latest version; this ensures that existing Bluesky code that relies on older versions of the Bluesky Event Model can still function correctly with TiledWriter. For example, while TiledWriter natively works with the modern `StreamResource` and `StreamDatum` documents commonly used in asynchronous plans, the `Resource` and `Datum` documents are automatically converted to their modern counterparts prior to being written to the Tiled catalog. The schema normalization is mostly done by renaming and restructuring certain document fields, but `RunNormalizer` also allows the user to invoke use-case-specific patches for each type of documents and achieve high flexibility. + +The simplified flowchart of the `RunNormalizer` logic is shown below. It illustrates how the input documents (top) are processed and emitted as output documents (bottom) after specific transformations or caching operations. + +.. mermaid:: + + flowchart TD + %% Input documents + subgraph Input [ ] + style Input fill:#ffffff,stroke-width:0 + StartIn["Start"] + DescriptorIn["Descriptor"] + ResourceIn["Resource"] + DatumIn["Datum"] + EventIn["Event"] + StopIn["Stop"] + end + + %% Emitted documents + subgraph Output [ ] + style Output fill:#ffffff,stroke-width:0 + StartOut["Start"] + DescriptorOut["Descriptor"] + EventOut["Event"] + StreamResourceOut["StreamResource"] + StreamDatumOut["StreamDatum"] + StopOut["Stop"] + end + + %% Processing steps + StartIn --> P1["start():
patch → emit"] + P1 --> StartOut + + DescriptorIn --> P2["descriptor():
patch → rename fields →
track internal/external keys → emit"] + P2 --> DescriptorOut + + ResourceIn --> P3["resource():
patch → convert to StreamResource → cache"] + P3 --> SResCache[(SRes Cache)] + + DatumIn --> P4["datum():
patch → cache"] + P4 --> DatumCache[(Datum Cache)] + + EventIn --> P5["event():
patch → split internal/external keys → emit"] + P5 -->|internal data| EventOut + P5 -->|external data| P6["convert_datum_to_stream_datum()
move datum_kwargs to parameters on SRes"] + P6 --> StreamDatumOut + P6 --> |only before first SDatum| StreamResourceOut + + StopIn --> P7["stop():
patch → flush cached StreamDatum"] + P7 --> StopOut + P7 --> StreamDatumOut + P7 --> |if not emitted
already| StreamResourceOut + + %% Extra connections + SResCache --> P6 + DatumCache --> P6 + + %% Styling + classDef doc fill:#e0f7fa,stroke:#00796b,stroke-width:1px; + classDef emit fill:#f1f8e9,stroke:#33691e,stroke-width:1px; + classDef proc fill:#fff3e0,stroke:#e65100,stroke-width:1px; + + class StartIn,DescriptorIn,ResourceIn,DatumIn,EventIn,StopIn doc; + class StartOut,DescriptorOut,EventOut,StreamResourceOut,StreamDatumOut,StopOut emit; + class P1,P2,P3,P4,P5,P6,P7 proc; + + +The second component, `_RunWriter`, is the callback that directly communicates with the Tiled server. It uses the `RunRouter` to manage the routing of documents from multiple runs into separate instances of the internal `_RunWriter` callback, ensuring that each Bluesky run is handled separately. + +Furthermore, TiledWriter implements a backup mechanism that allows to save the documents to a local file system in case the Tiled server is not available or any other error occurs during the writing process. This ensures that no data is lost and can be retried later. + + +Usage +======== + +A minimal simulated example of using TiledWriter in a Bluesky plan is shown below: + +.. code-block:: python + + from bluesky import RunEngine + import bluesky.plans as bp + from bluesky.callbacks.tiled_writer import TiledWriter + from tiled.server import SimpleTiledServer + from tiled.client import from_uri + from ophyd.sim import det + from ophyd.sim import hw + + # Initialize the Tiled server and client + save_path = "/path/to/save/detector_data" + tiled_server = SimpleTiledServer(readable_storage=[save_path]) + tiled_client = from_uri(tiled_server.uri) + + # Initialize the RunEngine and subscribe TiledWriter + RE = RunEngine() + tw = TiledWriter(tiled_client) + RE.subscribe(tw) + + # Run an experiment collecting internal data + uid, = RE(bp.count([det], 3)) + data = tiled_client[uid]['streams/primary/det'].read() + + # Run an experiment collecting external data + uid, = RE(bp.count([hw(save_path=save_path).img], 2)) + data = tiled_client[uid]['streams/primary/img'].read() diff --git a/docs/tutorial.rst b/docs/tutorial.rst index 0d371e4ec..064a7ef71 100644 --- a/docs/tutorial.rst +++ b/docs/tutorial.rst @@ -53,7 +53,7 @@ Before You Begin .. code-block:: bash - conda install -c nsls2forge bluesky ophyd databroker matplotlib pyqt=5 ipython + conda install -c conda-forge bluesky ophyd databroker matplotlib pyqt=5 ipython * Start IPython: diff --git a/docs/userindex.rst b/docs/userindex.rst index ff28c8850..e9d97f282 100644 --- a/docs/userindex.rst +++ b/docs/userindex.rst @@ -23,4 +23,5 @@ User Documentation from-pyepics-to-bluesky comparison-with-spec hardware-interfaces + tiled-writer appendix \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml index f5276dd03..27d8fadfd 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -15,7 +15,7 @@ classifiers = [ description = "Experiment specification & orchestration." dependencies = [ "cycler", - "event-model>=1.19.8", + "event-model>=1.23.1", "historydict", "msgpack", "msgpack-numpy", @@ -75,6 +75,7 @@ dev = [ "sphinx-autobuild", "sphinx-copybutton", "sphinx-design", + "sphinxcontrib-mermaid", "sphinx_rtd_theme", "streamz", # These suitcases are test deps of databroker which we need to access diff --git a/src/bluesky/bundlers.py b/src/bluesky/bundlers.py index e970708b5..0f72f6b49 100644 --- a/src/bluesky/bundlers.py +++ b/src/bluesky/bundlers.py @@ -559,7 +559,8 @@ async def save(self, msg): # we do not have the descriptor cached, make it if descriptor_doc is None or d_objs is None: - for obj in objs_read: + # use the dequeue not the set to preserve order + for obj in self._objs_read: await self._ensure_cached(obj, collect=isinstance(obj, Collectable)) objs_dks[obj] = self._describe_cache[obj] diff --git a/src/bluesky/callbacks/buffer.py b/src/bluesky/callbacks/buffer.py new file mode 100644 index 000000000..3820aa5c7 --- /dev/null +++ b/src/bluesky/callbacks/buffer.py @@ -0,0 +1,97 @@ +import atexit +import logging +import threading +from queue import Empty, Full, Queue +from typing import Callable + +logger = logging.getLogger(__name__) + + +class BufferingWrapper: + """A wrapper for callbacks that processes documents in a separate thread. + + This class allows a callback to be executed in a background thread, processing + documents as they are received. This prevent the blocking of RunEngine on any + slow I/O operations by the callback. It handles graceful shutdown on exit or signal + termination, ensuring that no new documents are accepted after shutdown has been + initiated. + + The wrapped callback should be thread-safe and not subscribed to the RE directly. + If it maintains shared mutable state, it must protect it using internal locking. + + This is mainly a development feature to allow subscribing (potentially many) + experimental callbacks to a `RunEngine` without the risk of blocking the experiment. + The use in production is currently not encouraged (at least not without a proper + testing and risk assessment). + + Parameters + ---------- + target : callable + The instance of a callback that will be called with the documents. + It should accept two parameters: `name` and `doc`. + queue_size : int, optional + The maximum size of the internal queue. Default is 1,000,000. + + Usage + ----- + tw = TiltedWriter(client) + buff_tw = BufferingWrapper(tw) + RE.subscribe(buff_tw) + """ + + def __init__(self, target: Callable, queue_size: int = 1_000_000): + self._wrapped_callback = target + self._queue: Queue = Queue(maxsize=queue_size) + self._stop_event = threading.Event() + self._shutdown_lock = threading.Lock() + + self._thread = threading.Thread(target=self._process_queue, daemon=True) + self._thread.start() + + atexit.register(self.shutdown) + + def __call__(self, name, doc): + if self._stop_event.is_set(): + raise RuntimeError("Cannot accept new data after shutdown.") + # TODO: This can be refactored using the upstream functionality (in Python >= 3.13) + # https://docs.python.org/3/library/queue.html#queue.Queue.shutdown + try: + self._queue.put((name, doc)) + except Full as e: + logger.exception( + f"The buffer is full. The {self._wrapped_callback.__class__.__name__} can not keep up with the incoming data. " # noqa + f"Consider increasing the queue size or optimizing the callback processing: {e}" + ) + raise RuntimeError( + f"The buffer is full. The {self._wrapped_callback.__class__.__name__} can not keep up with the incoming data. " # noqa + "Consider increasing the queue size or optimizing the callback processing." + ) from e + except Exception as e: + logger.exception(f"Failed to put document {name} in queue: {e}") + raise RuntimeError(f"Failed to put document {name} in queue: {e}") from e + + def _process_queue(self): + while True: + try: + if item := self._queue.get(timeout=1): + self._wrapped_callback(*item) # Delegate to wrapped callback + else: + break # Received sentinel value to stop processing + except Empty: + if self._stop_event.is_set(): + break + except Exception as e: + logger.exception(f"Exception in {self._wrapped_callback.__class__.__name__}: {e}") + + def shutdown(self, wait: bool = True): + if self._stop_event.is_set(): + return + self._stop_event.set() + self._queue.put(None) + + atexit.unregister(self.shutdown) + + if wait: + self._thread.join() + + logger.info(f"{self._wrapped_callback.__class__.__name__} shut down gracefully.") diff --git a/src/bluesky/callbacks/core.py b/src/bluesky/callbacks/core.py index 1383f8265..090221fa0 100644 --- a/src/bluesky/callbacks/core.py +++ b/src/bluesky/callbacks/core.py @@ -6,7 +6,7 @@ import os import time as ttime import warnings -from collections import OrderedDict, defaultdict, deque, namedtuple +from collections import OrderedDict, deque, namedtuple from datetime import datetime from enum import Enum from functools import partial as _partial @@ -17,32 +17,6 @@ from ..utils import ensure_uid -MIMETYPE_LOOKUP = defaultdict( - lambda: "application/octet-stream", - { - "hdf5": "application/x-hdf5", - "AD_HDF5_SWMR_STREAM": "application/x-hdf5", - "AD_HDF5_SWMR_SLICE": "application/x-hdf5", - "PIL100k_HDF5": "application/x-hdf5", - "XSP3": "application/x-hdf5", - "XPS3": "application/x-hdf5", - "XSP3_BULK": "application/x-hdf5", - "XSP3_STEP": "application/x-hdf5", - "AD_TIFF": "multipart/related;type=image/tiff", - "AD_HDF5_GERM": "application/x-hdf5", - "PIZZABOX_ENC_FILE_TXT_PD": "text/csv", - "PANDA": "application/x-hdf5", - "ROI_HDF5_FLY": "application/x-hdf5", - "ROI_HDF51_FLY": "application/x-hdf5", - "SIS_HDF51_FLY_STREAM_V1": "application/x-hdf5", - "MERLIN_FLY_STREAM_V2": "application/x-hdf5", - "MERLIN_HDF5_BULK": "application/x-hdf5", - "TPX_HDF5": "application/x-hdf5", - "EIGER2_STREAM": "application/x-hdf5", - "NPY_SEQ": "multipart/related;type=application/x-npy", - }, -) - logger = logging.getLogger(__name__) diff --git a/src/bluesky/callbacks/json_writer.py b/src/bluesky/callbacks/json_writer.py new file mode 100644 index 000000000..22d24d364 --- /dev/null +++ b/src/bluesky/callbacks/json_writer.py @@ -0,0 +1,64 @@ +import json +from datetime import datetime +from pathlib import Path +from typing import Optional + + +class JSONWriter: + """Writer of Bluesky docuemnts of a single run into a JSON file as an array. + + The file is created when a Start doocument is received, each new document is + written immediately, and the JSON array is closed when the "stop" document + is received. + """ + + def __init__( + self, + dirname: str, + filename: Optional[str] = None, + ): + self.dirname = Path(dirname) + self.filename = filename + + def __call__(self, name, doc): + if name == "start": + self.filename = self.filename or f"{doc['uid'].split('-')[0]}.json" + with open(self.dirname / self.filename, "w") as file: + file.write("[\n") + json.dump({"name": name, "doc": doc}, file) + file.write(",\n") + + elif name == "stop": + with open(self.dirname / self.filename, "a") as file: + json.dump({"name": name, "doc": doc}, file) + file.write("\n]") + + else: + with open(self.dirname / self.filename, "a") as file: + json.dump({"name": name, "doc": doc}, file) + file.write(",\n") + + +class JSONLinesWriter: + """Writer of Bluesky docuemnts into a JSON Lines file + + If the file already exists, new documents will be appended to it. + """ + + def __init__(self, dirname: str, filename: Optional[str] = None): + self.dirname = Path(dirname) + self.filename = filename + + def __call__(self, name, doc): + if not self.filename: + if name == "start": + # If the first document is a start document, use the uid to create a filename + self.filename = f"{doc['uid'].split('-')[0]}.jsonl" + else: + # If the first document is not a start document, use the current date + self.filename = f"{datetime.today().strftime('%Y-%m-%d')}.jsonl" + mode = "a" if (self.dirname / self.filename).exists() else "w" + + with open(self.dirname / self.filename, mode) as file: + json.dump({"name": name, "doc": doc}, file) + file.write("\n") diff --git a/src/bluesky/callbacks/tiled_writer.py b/src/bluesky/callbacks/tiled_writer.py index 07602f884..bfedb2406 100644 --- a/src/bluesky/callbacks/tiled_writer.py +++ b/src/bluesky/callbacks/tiled_writer.py @@ -1,15 +1,23 @@ import copy import itertools -from collections import defaultdict +import logging +from collections import defaultdict, deque, namedtuple from pathlib import Path -from typing import Any, Optional, Union, cast +from typing import Any, Callable, Optional, Union, cast from warnings import warn import pyarrow -from event_model import RunRouter, unpack_datum_page, unpack_event_page +from event_model import ( + DocumentNames, + RunRouter, + schema_validators, + unpack_datum_page, + unpack_event_page, +) from event_model.documents import ( Datum, DatumPage, + DocumentType, Event, EventDescriptor, EventPage, @@ -19,10 +27,10 @@ StreamDatum, StreamResource, ) +from event_model.documents.event_descriptor import DataKey from event_model.documents.stream_datum import StreamRange from tiled.client import from_profile, from_uri from tiled.client.base import BaseClient -from tiled.client.composite import Composite from tiled.client.container import Container from tiled.client.dataframe import DataFrameClient from tiled.client.utils import handle_error @@ -30,10 +38,50 @@ from tiled.utils import safe_json_dump from ..consolidators import ConsolidatorBase, DataSource, StructureFamily, consolidator_factory +from ..run_engine import Dispatcher from ..utils import truncate_json_overflow -from .core import MIMETYPE_LOOKUP, CallbackBase +from .core import CallbackBase +from .json_writer import JSONLinesWriter + +# Aggregare the Event table rows and StreamDatums in batches before writing to Tiled +BATCH_SIZE = 10000 + +# Disallow using reserved words as data_keys identifiers +# Related: https://github.com/bluesky/event-model/pull/223 +RESERVED_DATA_KEYS = ["time", "seq_num"] + +# A lookup table for converting broad JSON types to numpy dtypes +JSON_TO_NUMPY_DTYPE = {"number": " StreamResource: """Make changes to and return a shallow copy of StreamRsource dictionary adhering to the new structure. @@ -114,7 +205,6 @@ def _convert_resource_to_stream_resource(self, doc: Union[Resource, StreamResour Kept for back-compatibility with old StreamResource schema from event_model<1.20.0 or Resource documents that are converted to StreamResources. """ - doc = copy.copy(doc) stream_resource_doc = cast(StreamResource, doc) if "mimetype" not in doc: @@ -128,7 +218,7 @@ def _convert_resource_to_stream_resource(self, doc: Union[Resource, StreamResour # Convert the Resource (or old StreamResource) document to a StreamResource document resource_dict = cast(dict, doc) - stream_resource_doc["mimetype"] = MIMETYPE_LOOKUP[resource_dict.pop("spec")] + stream_resource_doc["mimetype"] = self.spec_to_mimetype[resource_dict.pop("spec")] stream_resource_doc["parameters"] = resource_dict.pop("resource_kwargs", {}) file_path = Path(resource_dict.pop("root").strip("/")).joinpath( resource_dict.pop("resource_path").strip("/") @@ -141,9 +231,299 @@ def _convert_resource_to_stream_resource(self, doc: Union[Resource, StreamResour "path", stream_resource_doc["parameters"].pop("dataset", "") ) + # Ensure that only the necessary fields are present in the StreamResource document + stream_resource_doc["data_key"] = stream_resource_doc.get("data_key", "") + required_keys = {"data_key", "mimetype", "parameters", "uid", "uri"} + for key in set(stream_resource_doc.keys()).difference(required_keys): + stream_resource_doc.pop(key) # type: ignore + return stream_resource_doc - def _write_internal_data(self, data_cache: list[dict[str, Any]], desc_node: Composite): + def _convert_datum_to_stream_datum( + self, datum_doc: Datum, data_key: str, desc_uid: str, seq_num: int + ) -> tuple[Optional[StreamResource], StreamDatum]: + """Convert the Datum document to the StreamDatum format + + This conversion requires (and is triggered when) the Event document is received. The function also returns + a corresponding StreamResource document, if it hasn't been emitted yet. + + Parameters + ---------- + datum_doc : Datum + The Datum document to convert. + data_key : str + The data_key of the external data in the Event document; this parameter must be included in the new + StreamResource document. + desc_uid : str + The UID of the EventDescriptor document that this datum belongs to. + seq_num : int + The sequence number of the Event document that this datum belongs to; 1-base index. + + Returns + ------- + sres_doc : StreamResource, optional + The corresponding StreamResource document, if it hasn't been emitted yet, otehrwise -- None. + sdat_doc : StreamDatum + The StreamDatum document corresponding to the Datum document. + """ + + # Some Datums contain datum_kwargs and the 'frame' field, which indicates the last index of the + # frame. This should take precedence over the 'seq_num' field in the Event document. Keep the + # last frame index in memory, since next Datums may refer to more than one frame (it is + # assumed that Events always refer to a single frame). + # There are cases when the frame_index is reset during the scan (e.g. if Datums for the same + # data_key belong to different Resources), so the 'carry' field is used to keep track of the + # previous frame index. + datum_kwargs = datum_doc.get("datum_kwargs", {}) + frame = datum_kwargs.pop("frame", None) + if frame is not None: + desc_name = self._desc_name_by_uid[desc_uid] # Name of the descriptor (stream) + _next_index = self._next_frame_index[(desc_name, data_key)] + index_start = sum(_next_index.values()) + _next_index["index"] = frame + 1 + index_stop = sum(_next_index.values()) + if index_stop < index_start: + # The datum is likely referencing a next Resource, but the indexing must continue + _next_index["carry"] = index_start + index_stop = sum(_next_index.values()) + else: + index_start, index_stop = seq_num - 1, seq_num + indices = StreamRange(start=index_start, stop=index_stop) + seq_nums = StreamRange(start=index_start + 1, stop=index_stop + 1) + + # produce the Resource document, if needed (add data_key to match the StreamResource schema) + # Emit a copy of the StreamResource document with a new uid; this allows to account for cases + # where one Resource is used by several data streams with different data_keys and datum_kwargs. + sres_doc = None + sres_uid = datum_doc["resource"] + new_sres_uid = sres_uid + "-" + data_key + if (sres_uid in self._sres_cache) and (new_sres_uid not in self._emitted): + sres_doc = copy.deepcopy(self._sres_cache[sres_uid]) + sres_doc["data_key"] = data_key + sres_doc["parameters"].update(datum_kwargs) + sres_doc["uid"] = new_sres_uid + + # Produce the StreamDatum document + sdat_doc = StreamDatum( + uid=datum_doc["datum_id"], + stream_resource=new_sres_uid, + descriptor=desc_uid, + indices=indices, + seq_nums=seq_nums, + ) + + return sres_doc, sdat_doc + + def start(self, doc: RunStart): + doc = copy.copy(doc) + if patch := self.patches.get("start"): + doc = patch(doc) + self.emit(DocumentNames.start, doc) + + def stop(self, doc: RunStop): + doc = copy.copy(doc) + if patch := self.patches.get("stop"): + doc = patch(doc) + + # If there are any cached references to external data, emit StreamResources and StreamDatums now + for datum_id, data_key, desc_uid, seq_num in self._ext_ref_cache: + if datum_doc := self._datum_cache.pop(datum_id, None): + sres_doc, sdat_doc = self._convert_datum_to_stream_datum(datum_doc, data_key, desc_uid, seq_num) + if (sres_doc is not None) and (sres_doc["uid"] not in self._emitted): + self.emit(DocumentNames.stream_resource, sres_doc) + self._emitted.add(sres_doc["uid"]) + self.emit(DocumentNames.stream_datum, sdat_doc) + else: + raise RuntimeError( + f"Cannot emit StreamDatum for {data_key} because the corresponding Datum document is missing." + ) + + self.emit(DocumentNames.stop, doc) + + def descriptor(self, doc: EventDescriptor): + doc = copy.deepcopy(doc) + if patch := self.patches.get("descriptor"): + doc = patch(doc) + + # Rename data_keys that use reserved words, "time" and "seq_num" + for name in RESERVED_DATA_KEYS: + if name in doc["data_keys"].keys(): + if f"_{name}" in doc["data_keys"].keys(): + raise ValueError(f"Cannot rename {name} to _{name} because it already exists") + doc["data_keys"][f"_{name}"] = doc["data_keys"].pop(name) + for obj_data_keys_list in doc["object_keys"].values(): + if name in obj_data_keys_list: + obj_data_keys_list.remove(name) + obj_data_keys_list.append(f"_{name}") + + # Rename some fields (in-place) to match the current schema for the descriptor + # Loop over all dictionaries that specify data_keys (both event data_keys or configuration data_keys) + conf_data_keys = (obj["data_keys"].values() for obj in doc["configuration"].values()) + for data_keys_spec in itertools.chain(doc["data_keys"].values(), *conf_data_keys): + # Determine numpy data type. From highest precedent to lowest: + # 1. Try 'dtype_descr', optional, if present -- this is a structural dtype + # 2. Try 'dtype_numpy', optional in the document schema. + # 3. Try 'dtype_str', an old convention predataing 'dtype_numpy', not in the schema. + # 4. Get 'dtype', required by the schema, which is a fuzzy JSON spec like 'number' + # and make a best effort to convert it to a numpy spec like '= TABLE_UPDATE_BATCH_SIZE: + if len(data_cache) >= self._batch_size: self._write_internal_data(data_cache, desc_node=self._desc_nodes[desc_uid]) data_cache.clear() - # Process _external_ data: Loop over all referenced Datums - for data_key in self.data_keys_ext.keys(): - if doc["filled"].get(data_key, False): - continue - - if datum_id := doc["data"].get(data_key): - if datum_id in self._datum_cache.keys(): - # Convert the Datum document to the StreamDatum format - datum_doc = self._datum_cache.pop(datum_id) - uid = datum_doc["datum_id"] - sres_uid = datum_doc["resource"] - - # Some Datums contain datum_kwargs and the 'frame' field, which indicates the last index of the - # frame. This should take precedence over the 'seq_num' field in the Event document. Keep the - # last frame index in memory, since next Datums may refer to more than one frame (it is - # assumed that Events always refer to a single frame). - # There are cases when the frame_index is reset during the scan (e.g. if Datums for the same - # data_key belong to different Resources), so the 'carry' field is used to keep track of the - # previous frame index. - datum_kwargs = datum_doc.get("datum_kwargs", {}) - frame = datum_kwargs.pop("frame", None) - if frame is not None: - _next_index = self._next_frame_index[(desc_name, data_key)] - index_start = sum(_next_index.values()) - _next_index["index"] = frame + 1 - index_stop = sum(_next_index.values()) - if index_stop < index_start: - # The datum is likely referencing a next Resource, but the indexing must continue - _next_index["carry"] = index_start - index_stop = sum(_next_index.values()) - else: - index_start, index_stop = doc["seq_num"] - 1, doc["seq_num"] - indices = StreamRange(start=index_start, stop=index_stop) - seq_nums = StreamRange(start=index_start + 1, stop=index_stop + 1) - - # Update the Resource document (add data_key to match the StreamResource schema) - # Save a copy of the StreamResource document; this allows to account for cases where one - # Resource is used by several data streams with different data_keys and datum_kwargs. - sres_uid_key = sres_uid + "-" + data_key - if ( - sres_uid in self._stream_resource_cache.keys() - and sres_uid_key not in self._stream_resource_cache.keys() - ): - sres_doc = copy.deepcopy(self._stream_resource_cache[sres_uid]) - sres_doc["data_key"] = data_key - sres_doc["parameters"].update(datum_kwargs) - self._stream_resource_cache[sres_uid_key] = sres_doc - - # Produce the StreamDatum document - stream_datum_doc = StreamDatum( - uid=uid, - stream_resource=sres_uid_key, - descriptor=desc_uid, - indices=indices, - seq_nums=seq_nums, - ) - - # Try to concatenate and cache the StreamDatum document to process it later - if cached_stream_datum_doc := self._external_data_cache.pop(data_key, None): - try: - _doc = concatenate_stream_datums(cached_stream_datum_doc, stream_datum_doc) - if _doc["indices"]["stop"] - _doc["indices"]["start"] > TABLE_UPDATE_BATCH_SIZE: - # Write the (large) concatenated StreamDatum document immediately - self.stream_datum(_doc) - else: - # Keep it in cache for further concatenation - self._external_data_cache[data_key] = _doc - except ValueError: - # If concatenation fails, write the cached document and the new one separately - self.stream_datum(cached_stream_datum_doc) - self.stream_datum(stream_datum_doc) - else: - self._external_data_cache[data_key] = stream_datum_doc - else: - raise RuntimeError(f"Datum {datum_id} is referenced before being declared.") - def event_page(self, doc: EventPage): for _doc in unpack_event_page(doc): self.event(_doc) - def datum(self, doc: Datum): - self._datum_cache[doc["datum_id"]] = copy.copy(doc) - - def datum_page(self, doc: DatumPage): - for _doc in unpack_datum_page(doc): - self.datum(_doc) - - def resource(self, doc: Resource): - self._stream_resource_cache[doc["uid"]] = self._convert_resource_to_stream_resource(doc) - def stream_resource(self, doc: StreamResource): - # Backwards compatibility: old StreamResource schema is converted to the new one (event-model<1.20.0) - self._stream_resource_cache[doc["uid"]] = self._convert_resource_to_stream_resource(doc) + self._stream_resource_cache[doc["uid"]] = doc def get_sres_node(self, sres_uid: str, desc_uid: Optional[str] = None) -> tuple[BaseClient, ConsolidatorBase]: """Get the Tiled node and the associate Consolidator corresponding to the data_key in StreamResource @@ -395,6 +690,7 @@ def get_sres_node(self, sres_uid: str, desc_uid: Optional[str] = None) -> tuple[ data_sources=[consolidator.get_data_source()], metadata={}, specs=[], + access_tags=self.access_tags, ) self._consolidators[sres_uid] = self._consolidators[full_data_key] = consolidator @@ -404,18 +700,145 @@ def get_sres_node(self, sres_uid: str, desc_uid: Optional[str] = None) -> tuple[ return sres_node, consolidator - def _update_data_source_for_node(self, node: BaseClient, data_source: DataSource): - """Update StreamResource node in Tiled""" - data_source.id = node.data_sources()[0].id # ID of the existing DataSource record - handle_error( - node.context.http_client.put( - node.uri.replace("/metadata/", "/data_source/", 1), - content=safe_json_dump({"data_source": data_source}), - ) - ).json() - def stream_datum(self, doc: StreamDatum): - # Get the Stream Resource node and the associtaed Consolidator - sres_node, consolidator = self.get_sres_node(doc["stream_resource"], desc_uid=doc["descriptor"]) - consolidator.consume_stream_datum(doc) - self._update_data_source_for_node(sres_node, consolidator.get_data_source()) + if self._batch_size <= 1: + # If batch size is 1, write the StreamDatum immediately + self._write_external_data(doc) + return + + # Try to concatenate and cache the StreamDatum document to process it later + sres_uid = doc["stream_resource"] + if cached_stream_datum_doc := self._external_data_cache.pop(sres_uid, None): + try: + _doc = concatenate_stream_datums(cached_stream_datum_doc, doc) + if _doc["indices"]["stop"] - _doc["indices"]["start"] >= self._batch_size: + self._write_external_data(_doc) + else: + self._external_data_cache[sres_uid] = _doc + except ValueError: + # If concatenation fails, write the cached document and then the new one immediately + self._write_external_data(cached_stream_datum_doc) + self._write_external_data(doc) + else: + self._external_data_cache[sres_uid] = doc + + +class TiledWriter: + """Callback for write metadata and data from Bluesky documents into Tiled. + + This callback relies on the `RunRouter` to route documents from one or more runs into + independent instances of the `_RunWriter` callback. The `RunRouter` is responsible for + creating a new instance of the `_RunWriter` for each run. + + Parameters + ---------- + client : `tiled.client.BaseClient` + The Tiled client to use for writing data. This client must be initialized with + the appropriate credentials and connection parameters to access the Tiled server. + normalizer : Optional[CallbackBase] + A callback for normalizing Bluesky documents to the latest schema. If not provided, + the default `RunNormalizer` will be used. The supplied normalizer should accept + `patches` and `spec_to_mimetype` (or `**kwargs`) for initialization. + To disable normalization and pass the incoming document directly to _RunWriter, + set this parameter to `None`. + patches : Optional[dict[str, Callable]] + A dictionary of patch functions to apply to specific document types before normalizing + and writing them. The keys should be the document names (e.g., "start", "stop", + "descriptor", etc.), and the values should be functions that take a document and return + a modified document of the same type. + This argument is ignored if `normalizer` is set to `None`. + spec_to_mimetype : Optional[dict[str, str]] + A dictionary mapping spec names to MIME types. This is used to convert `Resource` documents + to the latest `StreamResource` schema. If not provided, the default mapping will be used. + This argument is ignored if `normalizer` is set to `None`. + backup_directory : Optional[str] + If specified, this directory will be used to back up runs that fail to be written + to Tiled. All documents for the entire Bluesky Run will be written in JSONLines format, + allowing for recovery in case of errors during the writing process. + batch_size : int + The number of Events or StreamDatums collect before writing them to Tiled. + This is useful for reducing the number of write operations and improving performance when + writing large amounts of data (e.g. database migration). For streaming applications, + it is recommended to set this parameter to <= 1, so that each Event or StreamDatum is written + to Tiled immediately after they are received. + """ + + def __init__( + self, + client: BaseClient, + *, + normalizer: Optional[type[CallbackBase]] = RunNormalizer, + patches: Optional[dict[str, Callable]] = None, + spec_to_mimetype: Optional[dict[str, str]] = None, + backup_directory: Optional[str] = None, + batch_size: int = BATCH_SIZE, + ): + self.client = client.include_data_sources() + self.patches = patches or {} + self.spec_to_mimetype = spec_to_mimetype or {} + self.backup_directory = backup_directory + self._normalizer = normalizer + self._run_router = RunRouter([self._factory]) + self._batch_size = batch_size + + def _factory(self, name, doc): + """Factory method to create a callback for writing a single run into Tiled.""" + cb = run_writer = _RunWriter(self.client, batch_size=self._batch_size) + + if self._normalizer: + # If normalize is True, create a RunNormalizer callback to update documents to the latest schema + cb = self._normalizer(patches=self.patches, spec_to_mimetype=self.spec_to_mimetype) + cb.subscribe(run_writer) + + if self.backup_directory: + # If backup_directory is specified, create a conditional backup callback writing documents to JSONLines + cb = _ConditionalBackup(cb, [JSONLinesWriter(self.backup_directory)]) + + return [cb], [] + + @classmethod + def from_uri( + cls, + uri, + *, + normalizer: Optional[type[CallbackBase]] = RunNormalizer, + patches: Optional[dict[str, Callable]] = None, + spec_to_mimetype: Optional[dict[str, str]] = None, + backup_directory: Optional[str] = None, + batch_size: int = BATCH_SIZE, + **kwargs, + ): + client = from_uri(uri, **kwargs) + return cls( + client, + normalizer=normalizer, + patches=patches, + spec_to_mimetype=spec_to_mimetype, + backup_directory=backup_directory, + batch_size=batch_size, + ) + + @classmethod + def from_profile( + cls, + profile, + *, + normalizer: Optional[type[CallbackBase]] = RunNormalizer, + patches: Optional[dict[str, Callable]] = None, + spec_to_mimetype: Optional[dict[str, str]] = None, + backup_directory: Optional[str] = None, + batch_size: int = BATCH_SIZE, + **kwargs, + ): + client = from_profile(profile, **kwargs) + return cls( + client, + normalizer=normalizer, + patches=patches, + spec_to_mimetype=spec_to_mimetype, + backup_directory=backup_directory, + batch_size=batch_size, + ) + + def __call__(self, name, doc): + self._run_router(name, doc) diff --git a/src/bluesky/consolidators.py b/src/bluesky/consolidators.py index 21934ce3e..b2d33dabe 100644 --- a/src/bluesky/consolidators.py +++ b/src/bluesky/consolidators.py @@ -4,16 +4,13 @@ import os import re import warnings -from typing import Any, Literal, Optional, Union +from typing import Any, Literal, Optional, Union, cast import numpy as np from event_model.documents import EventDescriptor, StreamDatum, StreamResource from tiled.mimetypes import DEFAULT_ADAPTERS_BY_MIMETYPE from tiled.structures.array import ArrayStructure, BuiltinDtype, StructDtype -DTYPE_LOOKUP = {"number": " dict: - return {**self._sres_parameters()} + return {**self._sres_parameters} class HDF5Consolidator(ConsolidatorBase): diff --git a/src/bluesky/plan_stubs.py b/src/bluesky/plan_stubs.py index ee9dcd556..28d1fc1b9 100644 --- a/src/bluesky/plan_stubs.py +++ b/src/bluesky/plan_stubs.py @@ -351,6 +351,7 @@ def rel_set( def mv( *args: Union[Movable, Any], group: Optional[Hashable] = None, + timeout: Optional[float] = None, **kwargs, ) -> MsgGenerator[tuple[Status, ...]]: """ @@ -364,6 +365,8 @@ def mv( device1, value1, device2, value2, ... group : string, optional Used to mark these as a unit to be waited on. + timeout : float, optional + Specify a maximum time that the move(s) can be waited for. kwargs : passed to obj.set() @@ -389,7 +392,7 @@ def mv( for obj, val in step.items(): ret = yield Msg("set", obj, val, group=group, **kwargs) status_objects.append(ret) - yield Msg("wait", None, group=group) + yield Msg("wait", None, group=group, timeout=timeout) return tuple(status_objects) @@ -398,7 +401,7 @@ def mv( @plan def mvr( - *args: Union[Movable, Any], group: Optional[Hashable] = None, **kwargs + *args: Union[Movable, Any], group: Optional[Hashable] = None, timeout: Optional[float] = None, **kwargs ) -> MsgGenerator[tuple[Status, ...]]: """ Move one or more devices to a relative setpoint. Wait for all to complete. @@ -411,6 +414,8 @@ def mvr( device1, value1, device2, value2, ... group : string, optional Used to mark these as a unit to be waited on. + timeout : float, optional + Specify a maximum time that the move(s) can be waited for. kwargs : passed to obj.set() @@ -436,7 +441,7 @@ def mvr( @relative_set_decorator(objs) def inner_mvr(): - return (yield from mv(*args, group=group, **kwargs)) + return (yield from mv(*args, group=group, timeout=timeout, **kwargs)) return (yield from inner_mvr()) @@ -743,7 +748,7 @@ def input_plan(prompt: str = "") -> MsgGenerator[str]: @plan def prepare(obj: Preparable, *args, group: Optional[Hashable] = None, wait: bool = False, **kwargs): """ - Prepare a device. + Prepare a device ready for trigger or kickoff. Parameters ---------- diff --git a/src/bluesky/run_engine.py b/src/bluesky/run_engine.py index ad864f91b..c7e5bb404 100644 --- a/src/bluesky/run_engine.py +++ b/src/bluesky/run_engine.py @@ -13,7 +13,7 @@ from dataclasses import dataclass from datetime import datetime from enum import Enum -from inspect import Parameter, Signature, iscoroutine +from inspect import iscoroutine from itertools import count from warnings import warn @@ -37,6 +37,7 @@ Stageable, Status, Stoppable, + SyncOrAsync, T, Triggerable, check_supports, @@ -58,6 +59,7 @@ RequestStop, RunEngineInterrupted, SigintHandler, + Subscribers, ensure_generator, normalize_subs_input, single_gen, @@ -195,17 +197,6 @@ def __get__(self, instance, owner): return super().__get__(instance, owner) -# See RunEngine.__call__. -_call_sig = Signature( - [ - Parameter("self", Parameter.POSITIONAL_ONLY), - Parameter("plan", Parameter.POSITIONAL_ONLY), - Parameter("subs", Parameter.POSITIONAL_ONLY, default=None), - Parameter("metadata_kw", Parameter.VAR_KEYWORD), - ] -) - - def default_scan_id_source(md): return md.get("scan_id", 0) + 1 @@ -270,9 +261,9 @@ class RunEngine: Expected return: normalized metadata scan_id_source : callable, optional - a function that will be used to calculate scan_id. Default is to - increment scan_id by 1 each time. However you could pass in a - customized function to get a scan_id from any source. + a (possibly async) function that will be used to calculate scan_id. + Default is to increment scan_id by 1 each time. However you could pass + in a customized function to get a scan_id from any source. Expected signature: f(md) Expected return: updated scan_id value @@ -418,7 +409,7 @@ def __init__( context_managers: typing.Optional[list] = None, md_validator: typing.Optional[typing.Callable] = None, md_normalizer: typing.Optional[typing.Callable] = None, - scan_id_source: typing.Optional[typing.Callable] = default_scan_id_source, + scan_id_source: typing.Callable[[dict], SyncOrAsync[int]] = default_scan_id_source, during_task: typing.Optional[DuringTask] = None, call_returns_result: bool = False, ): @@ -872,7 +863,13 @@ def _create_result(self, plan_return): ) return rs - def __call__(self, *args, **metadata_kw): + def __call__( + self, + plan: typing.Iterable[Msg], + subs: typing.Optional[Subscribers] = None, + /, + **metadata_kw: typing.Any, + ) -> typing.Union[RunEngineResult, tuple[str, ...]]: """Execute a plan. Any keyword arguments will be interpreted as metadata and recorded with @@ -905,12 +902,6 @@ def __call__(self, *args, **metadata_kw): """ if self.state == "panicked": raise RuntimeError("The RunEngine is panicked and cannot be recovered. You must restart bluesky.") - # This scheme lets us make 'plan' and 'subs' POSITIONAL ONLY, reserving - # all keyword arguments for user metadata. - arguments = _call_sig.bind(self, *args, **metadata_kw).arguments - plan = arguments["plan"] - subs = arguments.get("subs", None) - metadata_kw = arguments.get("metadata_kw", {}) if "raise_if_interrupted" in metadata_kw: warn( # noqa: B028 "The 'raise_if_interrupted' flag has been removed. The " @@ -993,8 +984,6 @@ def set_blocking_event(future): else: return tuple(self._run_start_uids) - __call__.__signature__ = _call_sig # type: ignore - def resume(self): """Resume a paused plan from the last checkpoint. @@ -1857,7 +1846,7 @@ async def _open_run(self, msg): raise IllegalMessageSequence("A 'close_run' message was not received before the 'open_run' message") # Run scan_id calculation method - self.md["scan_id"] = self.scan_id_source(self.md) + self.md["scan_id"] = await maybe_await(self.scan_id_source(self.md)) # For metadata below, info about plan passed to self.__call__ for. plan_type = type(self._plan).__name__ @@ -2902,8 +2891,8 @@ def call_in_bluesky_event_loop(coro: typing.Awaitable[T], timeout: typing.Option if iscoroutine(coro): coro.close() raise RuntimeError("Bluesky event loop not running") - fut = asyncio.run_coroutine_threadsafe( - coro, + fut: concurrent.futures.Future = asyncio.run_coroutine_threadsafe( + coro, # type: ignore loop=_bluesky_event_loop, ) return fut.result(timeout=timeout) diff --git a/src/bluesky/tests/examples/external_assets.json b/src/bluesky/tests/examples/external_assets.json index 5beaa6a91..8d8426159 100644 --- a/src/bluesky/tests/examples/external_assets.json +++ b/src/bluesky/tests/examples/external_assets.json @@ -8,7 +8,8 @@ "plan_type": "generator", "plan_name": "count", "detectors": [ - "det" + "det-obj1", + "det-obj2" ] } }, @@ -16,7 +17,7 @@ "name": "descriptor", "doc": { "configuration": { - "det": { + "det-obj1": { "data": {}, "timestamps": {}, "data_keys": {} @@ -31,7 +32,7 @@ 1 ], "external": "STREAM:", - "object_name": "det" + "object_name": "det-obj1" }, "det-key2": { "source": "file", @@ -43,32 +44,19 @@ 17 ], "external": "STREAM:", - "object_name": "det" - }, - "det-key3": { - "source": "file", - "dtype": "array", - "dtype_numpy": "|u1", - "shape": [ - 1, - 10, - 15 - ], - "external": "STREAM:", - "object_name": "det" + "object_name": "det-obj1" } }, "name": "primary", "object_keys": { - "det": [ + "det-obj1": [ "det-key1", - "det-key2", - "det-key3" + "det-key2" ] }, "run_start": "{{ uuid }}-9724b2201fe7", "time": 1745500521.79327, - "uid": "{{ uuid }}-8c00740d9771", + "uid": "{{ uuid }}-descriptor01", "hints": {} } }, @@ -82,10 +70,7 @@ ] }, "data_key": "det-key1", - "root": "{{ root_path }}", - "resource_path": "/dataset.h5", "uri": "file://localhost/{{ root_path }}/dataset.h5", - "spec": "AD_HDF5_SWMR_STREAM", "mimetype": "application/x-hdf5", "uid": "det-key1-uid", "run_start": "{{ uuid }}-9724b2201fe7" @@ -95,7 +80,7 @@ "name": "stream_datum", "doc": { "stream_resource": "det-key1-uid", - "descriptor": "{{ uuid }}-8c00740d9771", + "descriptor": "{{ uuid }}-descriptor01", "uid": "det-key1-uid/0", "indices": { "start": 0, @@ -119,10 +104,7 @@ ] }, "data_key": "det-key2", - "root": "{{ root_path }}", - "resource_path": "/dataset.h5", "uri": "file://localhost/{{ root_path }}/dataset.h5", - "spec": "AD_HDF5_SWMR_STREAM", "mimetype": "application/x-hdf5", "uid": "det-key2-uid", "run_start": "{{ uuid }}-9724b2201fe7" @@ -132,7 +114,7 @@ "name": "stream_datum", "doc": { "stream_resource": "det-key2-uid", - "descriptor": "{{ uuid }}-8c00740d9771", + "descriptor": "{{ uuid }}-descriptor01", "uid": "det-key2-uid/0", "indices": { "start": 0, @@ -144,6 +126,42 @@ } } }, + { + "name": "descriptor", + "doc": { + "configuration": { + "det-obj2": { + "data": {}, + "timestamps": {}, + "data_keys": {} + } + }, + "data_keys": { + "det-key3": { + "source": "file", + "dtype": "array", + "dtype_numpy": "|u1", + "shape": [ + 1, + 10, + 15 + ], + "external": "STREAM:", + "object_name": "det-obj2" + } + }, + "name": "secondary", + "object_keys": { + "det-obj2": [ + "det-key3" + ] + }, + "run_start": "{{ uuid }}-9724b2201fe7", + "time": 1745500521.79337, + "uid": "{{ uuid }}-descriptor02", + "hints": {} + } + }, { "name": "stream_resource", "doc": { @@ -157,9 +175,7 @@ "join_method": "stack" }, "data_key": "det-key3", - "root": "{{ root_path }}/tiff_files", "uri": "file://localhost/{{ root_path }}/tiff_files/", - "spec": "AD_TIFF", "mimetype": "multipart/related;type=image/tiff", "uid": "det-key3-uid", "run_start": "{{ uuid }}-9724b2201fe7" @@ -169,7 +185,7 @@ "name": "stream_datum", "doc": { "stream_resource": "det-key3-uid", - "descriptor": "{{ uuid }}-8c00740d9771", + "descriptor": "{{ uuid }}-descriptor02", "uid": "det-key3-uid/0", "indices": { "start": 0, @@ -190,14 +206,14 @@ "timestamps": {}, "seq_num": 1, "filled": {}, - "descriptor": "{{ uuid }}-8c00740d9771" + "descriptor": "{{ uuid }}-descriptor01" } }, { "name": "stream_datum", "doc": { "stream_resource": "det-key1-uid", - "descriptor": "{{ uuid }}-8c00740d9771", + "descriptor": "{{ uuid }}-descriptor01", "uid": "det-key1-uid/1", "indices": { "start": 1, @@ -213,7 +229,7 @@ "name": "stream_datum", "doc": { "stream_resource": "det-key2-uid", - "descriptor": "{{ uuid }}-8c00740d9771", + "descriptor": "{{ uuid }}-descriptor01", "uid": "det-key2-uid/1", "indices": { "start": 1, @@ -229,7 +245,7 @@ "name": "stream_datum", "doc": { "stream_resource": "det-key3-uid", - "descriptor": "{{ uuid }}-8c00740d9771", + "descriptor": "{{ uuid }}-descriptor02", "uid": "det-key3-uid/1", "indices": { "start": 1, @@ -250,14 +266,14 @@ "timestamps": {}, "seq_num": 2, "filled": {}, - "descriptor": "{{ uuid }}-8c00740d9771" + "descriptor": "{{ uuid }}-descriptor01" } }, { "name": "stream_datum", "doc": { "stream_resource": "det-key1-uid", - "descriptor": "{{ uuid }}-8c00740d9771", + "descriptor": "{{ uuid }}-descriptor01", "uid": "det-key1-uid/2", "indices": { "start": 2, @@ -273,7 +289,7 @@ "name": "stream_datum", "doc": { "stream_resource": "det-key2-uid", - "descriptor": "{{ uuid }}-8c00740d9771", + "descriptor": "{{ uuid }}-descriptor01", "uid": "det-key2-uid/2", "indices": { "start": 2, @@ -289,7 +305,7 @@ "name": "stream_datum", "doc": { "stream_resource": "det-key3-uid", - "descriptor": "{{ uuid }}-8c00740d9771", + "descriptor": "{{ uuid }}-descriptor02", "uid": "det-key3-uid/2", "indices": { "start": 2, @@ -310,7 +326,7 @@ "timestamps": {}, "seq_num": 3, "filled": {}, - "descriptor": "{{ uuid }}-8c00740d9771" + "descriptor": "{{ uuid }}-descriptor01" } }, { @@ -322,7 +338,8 @@ "exit_status": "success", "reason": "", "num_events": { - "primary": 3 + "primary": 3, + "secondary": 0 } } } diff --git a/src/bluesky/tests/examples/external_assets_legacy.json b/src/bluesky/tests/examples/external_assets_legacy.json new file mode 100644 index 000000000..60a775801 --- /dev/null +++ b/src/bluesky/tests/examples/external_assets_legacy.json @@ -0,0 +1,161 @@ +[ + { + "name": "start", + "doc": { + "uid": "{{ uuid }}-9724b2201fe7", + "time": 1745500521.706236, + "scan_id": 3, + "plan_type": "generator", + "plan_name": "count", + "detectors": [ + "det" + ] + } + }, + { + "name": "descriptor", + "doc": { + "configuration": { + "det": { + "data": {}, + "timestamps": {}, + "data_keys": {} + } + }, + "data_keys": { + "det-key2": { + "source": "file", + "dtype": "array", + "dtype_numpy": " y): + pass + """ + start = time.time() + while time.time() - start < timeout: + if condition(): + yield + return + time.sleep(interval) + raise TimeoutError("Condition not met within timeout") + + +@pytest.mark.parametrize("cb", ["fast_cb", "slow_cb"]) +def test_calls_are_delegated(cb, request): + cb = request.getfixturevalue(cb) + buff_cb = BufferingWrapper(cb) + + buff_cb("start", {"x": 1}) + with wait_for_condition(lambda: ("start", {"x": 1}) in cb.called): + assert len(cb.called) == 1 + + buff_cb("stop", {"x": 2}) + with wait_for_condition(lambda: ("stop", {"x": 2}) in cb.called): + assert len(cb.called) == 2 + + +@pytest.mark.parametrize("cb", ["fast_cb", "slow_cb"]) +def test_calls_are_delegated_and_finished(cb, request): + cb = request.getfixturevalue(cb) + buff_cb = BufferingWrapper(cb) + + assert buff_cb._thread.is_alive() + assert len(cb.called) == 0 + + buff_cb("start", {"x": 1}) + buff_cb("stop", {"x": 2}) + + assert buff_cb._thread.is_alive() + buff_cb.shutdown() + assert not buff_cb._thread.is_alive() + + assert ("start", {"x": 1}) in cb.called + assert ("stop", {"x": 2}) in cb.called + + +@pytest.mark.parametrize("cb", ["fast_cb", "slow_cb"]) +def test_graceful_shutdown_blocks_queue(cb, request): + cb = request.getfixturevalue(cb) + buff_cb = BufferingWrapper(cb) + + buff_cb("event", {"data": 42}) + buff_cb.shutdown() + + with pytest.raises(RuntimeError): + buff_cb("post-shutdown", {"fail": True}) + + assert ("event", {"data": 42}) in cb.called + assert ("post-shutdown", {"fail": True}) not in cb.called + + +@pytest.mark.parametrize("cb", ["fast_cb", "slow_cb"]) +def test_double_shutdown_does_not_fail(cb, request): + cb = request.getfixturevalue(cb) + buff_cb = BufferingWrapper(cb) + + buff_cb("one", {}) + buff_cb.shutdown() + buff_cb.shutdown() # Second shutdown should be a no-op + + assert ("one", {}) in cb.called + + +@pytest.mark.parametrize("cb, expected_min_duration", [("fast_cb", 0.0), ("slow_cb", 0.5)]) +def test_shutdown_waits_for_processing(cb, expected_min_duration, request): + cb = request.getfixturevalue(cb) + buff_cb = BufferingWrapper(cb) + + for i in range(5): + buff_cb("event", {"val": i}) + + t0 = time.time() + time.sleep(0.02) # Let the last document enter the queue before shutdown + buff_cb.shutdown() + duration = time.time() - t0 + assert duration >= expected_min_duration # Ensure shutdown waited for processing + + # After shutdown, all should be processed + assert len(cb.called) == 5 + for i in range(5): + assert ("event", {"val": i}) in cb.called + + +@pytest.mark.parametrize("cb, expected_max_duration", [("fast_cb", 0.1), ("slow_cb", 0.1)]) +def test_shutdown_without_wait(cb, expected_max_duration, request): + cb = request.getfixturevalue(cb) + buff_cb = BufferingWrapper(cb) + + for i in range(5): + buff_cb("event", {"val": i}) + + # Shutdown without waiting — thread may still be running briefly + t0 = time.time() + buff_cb.shutdown(wait=False) + duration = time.time() - t0 + assert duration < expected_max_duration + assert len(cb.called) <= 5 + + +@pytest.mark.parametrize("cb", ["fast_cb", "slow_cb"]) +def test_shutdown_stops_processing_new_items(cb, request): + cb = request.getfixturevalue(cb) + buff_cb = BufferingWrapper(cb) + + buff_cb("one", {}) + buff_cb.shutdown() + + with pytest.raises(RuntimeError): + buff_cb("two", {}) + + assert ("one", {}) in cb.called + assert ("two", {}) not in cb.called + + +def test_execption_in_callback(): + """Test that exceptions in the callback are handled gracefully.""" + cb = CallbackWithException() + buff_cb = BufferingWrapper(cb) + + with pytest.raises(RuntimeError, match="This callback always raises an exception."): + cb("test", {"data": 123}) + + # Ensure that the exception does not crash the buffering wrapper + buff_cb("test", {"data": 123}) + assert buff_cb._thread.is_alive() # The thread should still be running + buff_cb("test", {"data": 123}) + + +def test_callback_logging_exceptions(monkeypatch): + from types import SimpleNamespace + from unittest.mock import MagicMock + + logger = SimpleNamespace(exception=MagicMock()) + monkeypatch.setattr("bluesky.callbacks.buffer.logger", logger) + + cb = CallbackWithException() + buff_cb = BufferingWrapper(cb) + + assert logger.exception.call_count == 0 + buff_cb("test", {"data": 123}) + with wait_for_condition(lambda: logger.exception.call_count == 1): + assert True diff --git a/src/bluesky/tests/test_consolidators.py b/src/bluesky/tests/test_consolidators.py index a7c195f30..fe55167e6 100644 --- a/src/bluesky/tests/test_consolidators.py +++ b/src/bluesky/tests/test_consolidators.py @@ -430,6 +430,7 @@ def test_tiff_and_jpeg_chunks( assert len(cons.assets) == 5 * frames_per_datum / expected_chunks[0][0] if join_method == "concat" else 5 +# Tuples of (filename, original_template, expected_template, formatted) template_testdata = [ ("", "img_{:06d}", "img_{:06d}", "img_000042"), ("img", "{:s}_{:06d}", "img_{:06d}", "img_000042"), @@ -441,6 +442,7 @@ def test_tiff_and_jpeg_chunks( ("img", "%s_% 06d", "img_{: 06d}", "img_ 00042"), ("img", "%s_%-+6d", "img_{:<+6d}", "img_+42 "), ("img", "%s_%- 6d", "img_{:< 6d}", "img_ 42 "), + ("img", "%s_%6.6d", "img_{:06d}", "img_000042"), ] diff --git a/src/bluesky/tests/test_json_writer.py b/src/bluesky/tests/test_json_writer.py new file mode 100644 index 000000000..f57da24bf --- /dev/null +++ b/src/bluesky/tests/test_json_writer.py @@ -0,0 +1,43 @@ +import json +import os + +import pytest + +from bluesky.callbacks.json_writer import JSONLinesWriter, JSONWriter + + +def read_json_file(path): + with open(path) as f: + return json.load(f) + + +def read_jsonl_file(path): + with open(path) as f: + return [json.loads(line) for line in f if line.strip()] + + +@pytest.mark.parametrize("writer_class, extension", [(JSONWriter, "json"), (JSONLinesWriter, "jsonl")]) +def test_json_writer(tmpdir, writer_class, extension): + writer = writer_class(tmpdir) + start_doc = {"uid": "abc", "value": 1} + event_doc = {"seq_num": 1, "data": {"x": 1}} + stop_doc = {"exit_status": "success"} + + writer("start", start_doc) + writer("event", event_doc) + writer("stop", stop_doc) + + # Read the file and check its contents + filename = os.path.join(tmpdir, f"abc.{extension}") + data = read_json_file(filename) if (writer_class == JSONWriter) else read_jsonl_file(filename) + assert data[0]["name"] == "start" + assert data[1]["name"] == "event" + assert data[2]["name"] == "stop" + + +@pytest.mark.parametrize("writer_class, extension", [(JSONWriter, "json"), (JSONLinesWriter, "jsonl")]) +def test_custom_filename(tmpdir, writer_class, extension): + writer = writer_class(tmpdir, filename=f"custom.{extension}") + doc = {"uid": "value"} + writer("start", doc) + assert os.path.exists(os.path.join(tmpdir, f"custom.{extension}")) diff --git a/src/bluesky/tests/test_new_examples.py b/src/bluesky/tests/test_new_examples.py index 7167ee753..4590480fe 100644 --- a/src/bluesky/tests/test_new_examples.py +++ b/src/bluesky/tests/test_new_examples.py @@ -109,6 +109,26 @@ {"group": "A", "wait": True}, [Msg("read", "det"), Msg("set", "det", 5, group="A"), Msg("wait", None, group="A")], ), + ( + mv, + ("motor1", 1, "motor2", 2), + {"group": "A"}, + [ + Msg("set", "motor1", 1, group="A"), + Msg("set", "motor2", 2, group="A"), + Msg("wait", None, group="A", timeout=None), + ], + ), + ( + mv, + ("motor1", 1, "motor2", 2), + {"group": "A", "timeout": 42}, + [ + Msg("set", "motor1", 1, group="A"), + Msg("set", "motor2", 2, group="A"), + Msg("wait", None, group="A", timeout=42), + ], + ), (trigger, ("det",), {}, [Msg("trigger", "det", group=None)]), (trigger, ("det",), {"group": "A"}, [Msg("trigger", "det", group="A")]), (sleep, (2,), {}, [Msg("sleep", None, 2)]), @@ -147,28 +167,27 @@ def test_stub_plans(plan, plan_args, plan_kwargs, msgs, hw): assert list(plan(*plan_args, **plan_kwargs)) == msgs -def test_mv(hw): - # special-case mv because the group is not configurable - # move motors first to ensure that movement is absolute, not relative - actual = list(mv(hw.motor1, 1, hw.motor2, 2)) - strip_group(actual) - for msg in actual[:2]: - msg.command == "set" # noqa: B015 - assert set([msg.obj for msg in actual[:2]]) == set([hw.motor1, hw.motor2]) # noqa: C403, C405 - assert actual[2] == Msg("wait", None) +@pytest.mark.parametrize(("timeout", "should_fail"), [(0, True), (1, False), (None, False)]) +def test_mv_timeout(RE, hw, timeout, should_fail): + sig = hw.motor + sig.delay = 0.01 + def tester(obj): + try: + yield from mv(obj, 1, timeout=timeout) + except TimeoutError: + assert should_fail + else: + assert not should_fail -def test_mv_with_timeout(hw): - # special-case mv because the group is not configurable - # move motors first to ensure that movement is absolute, not relative - actual = list(mv(hw.motor1, 1, hw.motor2, 2, timeout=42)) - for msg in actual[:2]: - msg.command == "set" # noqa: B015 - msg.kwargs["timeout"] == 42 # noqa: B015 + # This needs to happen so all Futures can be cleared inside RE. + yield from sleep(obj.delay) + + RE(tester(sig)) def test_mvr(RE, hw): - # special-case mv because the group is not configurable + # special-case mvr because the value cannot be pre-defined in test_stub_plans # move motors first to ensure that movement is relative, not absolute hw.motor1.set(10) hw.motor2.set(10) @@ -180,7 +199,7 @@ def test_mvr(RE, hw): for msg in actual[:2]: msg.command == "set" # noqa: B015 assert set([msg.obj for msg in actual[:2]]) == set([hw.motor1, hw.motor2]) # noqa: C403, C405 - assert actual[2] == Msg("wait", None) + assert actual[2] == Msg("wait", None, timeout=None) def test_locatable_message_multiple_objects(RE, hw): @@ -284,8 +303,9 @@ def test_mvr_with_timeout(hw): # move motors first to ensure that movement is absolute, not relative actual = list(mvr(hw.motor1, 1, hw.motor2, 2, timeout=42)) for msg in actual[:2]: - msg.command == "set" # noqa: B015 - msg.kwargs["timeout"] == 42 # noqa: B015 + assert msg.command == "set" # noqa: B015 + + assert actual[2].kwargs["timeout"] == 42 def strip_group(plan): @@ -642,6 +662,7 @@ def accumulator(msg): for msg in msgs: msg.kwargs.pop("group", None) + msg.kwargs.pop("timeout", None) assert msgs == expected diff --git a/src/bluesky/tests/test_run_engine.py b/src/bluesky/tests/test_run_engine.py index d004108ab..8d51622bd 100644 --- a/src/bluesky/tests/test_run_engine.py +++ b/src/bluesky/tests/test_run_engine.py @@ -22,7 +22,7 @@ wait, wait_for, ) -from bluesky.plans import count, grid_scan +from bluesky.plans import count, grid_scan, scan from bluesky.preprocessors import ( SupplementalData, baseline_wrapper, @@ -2128,3 +2128,46 @@ def plan(det): assert len(d.event[desc["uid"]]) == 1 assert stop["num_events"]["primary"] == 2 + + +def test_sync_scan_id_source(RE): + def sync_scan_source(md: dict) -> int: + return 314159 + + RE.scan_id_source = sync_scan_source + RE([Msg("open_run")]) + assert RE.md["scan_id"] == 314159 + + +def test_async_scan_id_source(RE): + async def async_scan_source(md: dict) -> int: + return 42 + + RE.scan_id_source = async_scan_source + RE([Msg("open_run")]) + assert RE.md["scan_id"] == 42 + + +@requires_ophyd +def test_descriptor_order(RE): + from itertools import permutations + + from ophyd import Component, Device, Signal + + class Issue1930(Device): + alpha = Component(Signal, value=1, kind="hinted") + bravo = Component(Signal, value=2, kind="hinted") + charlie = Component(Signal, value=3, kind="hinted") + + i1930 = Issue1930(name="i1930") + + for dets in permutations([i1930.alpha, i1930.bravo, i1930.charlie]): + key_order = [d.name for d in dets] + + def check(key_order, name, doc): + if name == "event": + assert list(doc["data"]) == key_order + elif name == "descriptor": + assert list(doc["data_keys"]) == key_order + + RE(scan(dets, i1930.charlie, -1, 1, 2), lambda name, doc, key_order=key_order: check(key_order, name, doc)) diff --git a/src/bluesky/tests/test_tiled_writer.py b/src/bluesky/tests/test_tiled_writer.py index 6c7630292..ffa78eda3 100644 --- a/src/bluesky/tests/test_tiled_writer.py +++ b/src/bluesky/tests/test_tiled_writer.py @@ -3,7 +3,7 @@ import uuid from collections.abc import Iterator from pathlib import Path -from typing import Optional, Union +from typing import Optional, Union, cast import h5py import jinja2 @@ -113,7 +113,7 @@ def _get_hdf5_stream(self, data_key: str, index: int) -> tuple[Optional[StreamRe file_path = os.path.join(self.root, "dataset.h5") uid = f"{data_key}-uid" data_desc = self.describe()[data_key] # Descriptor dictionary for the current data key - data_shape = tuple(data_desc["shape"]) + data_shape = cast(tuple[int, ...], tuple(data_desc["shape"])) hdf5_dataset = f"/{data_key}/VALUE" stream_resource = None @@ -163,7 +163,7 @@ def _get_tiff_stream(self, data_key: str, index: int) -> tuple[Optional[StreamRe for data_key in [f"{self.name}-sd3"]: uid = f"{data_key}-uid" data_desc = self.describe()[data_key] # Descriptor dictionary for the current data key - data_shape = tuple(data_desc["shape"]) + data_shape = cast(tuple[int, ...], tuple(data_desc["shape"])) stream_resource = None if self.counter == 0: # Backward compatibility test, ignore typing errors @@ -290,7 +290,7 @@ def test_stream_datum_readable_counts(RE, client, tmp_path): det = StreamDatumReadableCollectable(name="det", root=str(tmp_path)) RE(bp.count([det], 3), tw) stream = client.values().last()["streams"]["primary"] - keys = sorted(set(stream.parts).difference({"internal"})) + keys = sorted(set(stream.base.keys()).difference({"internal"})) assert stream[keys[0]].shape == (3,) assert stream[keys[1]].shape == (15, 10, 15) @@ -306,7 +306,7 @@ def test_stream_datum_readable_with_two_detectors(RE, client, tmp_path): tw = TiledWriter(client) RE(bp.count([det1, det2], 3), tw) stream = client.values().last()["streams"]["primary"] - keys = sorted(set(stream.parts).difference({"internal"})) + keys = sorted(set(stream.base.keys()).difference({"internal"})) assert stream[keys[0]].shape == (3,) assert stream[keys[1]].shape == (15, 10, 15) @@ -327,7 +327,7 @@ def test_stream_datum_collectable(RE, client, tmp_path): tw = TiledWriter(client) RE(collect_plan(det, name="primary"), tw) stream = client.values().last()["streams"]["primary"] - keys = sorted(set(stream.parts).difference({"internal"})) + keys = sorted(set(stream.base.keys()).difference({"internal"})) assert stream[keys[0]].read() is not None assert stream[keys[1]].read() is not None @@ -348,8 +348,8 @@ def test_handling_non_stream_resource(RE, client, tmp_path, frames_per_event): ) tw = TiledWriter(client) RE(bp.count([det], 3), tw) - extr = client.values().last()["streams"]["primary"].parts["img"] - intr = client.values().last()["streams"]["primary"].parts["internal"] + extr = client.values().last()["streams"]["primary"].base["img"] + intr = client.values().last()["streams"]["primary"].base["internal"] assert extr.shape == (3, frames_per_event, 10, 15) assert extr.read() is not None assert set(intr.columns) == {"seq_num", "time"} @@ -364,9 +364,13 @@ def collect_plan(*objs, name="primary"): yield from bps.close_run() -@pytest.mark.parametrize("fname", ["internal_events", "external_assets"]) -def test_with_correct_sample_runs(client, external_assets_folder, fname): - tw = TiledWriter(client) +@pytest.mark.parametrize("fname", ["internal_events", "external_assets", "external_assets_legacy"]) +@pytest.mark.parametrize("batch_size", [0, 1, 1000, None]) +def test_with_correct_sample_runs(client, batch_size, external_assets_folder, fname): + if batch_size is None: + tw = TiledWriter(client) + else: + tw = TiledWriter(client, batch_size=batch_size) for item in render_templated_documents(fname + ".json", external_assets_folder): if item["name"] == "start": uid = item["doc"]["uid"] @@ -383,7 +387,7 @@ def test_with_correct_sample_runs(client, external_assets_folder, fname): def test_validate_external_data(client, external_assets_folder, error_type, validate): tw = TiledWriter(client) - documents = render_templated_documents("external_assets_key2.json", external_assets_folder) + documents = render_templated_documents("external_assets_single_key.json", external_assets_folder) for item in documents: name, doc = item["name"], item["doc"] if name == "start": @@ -422,7 +426,7 @@ def test_validate_external_data(client, external_assets_folder, error_type, vali def test_slice_and_squeeze(client, external_assets_folder, squeeze): tw = TiledWriter(client) - documents = render_templated_documents("external_assets_key2.json", external_assets_folder) + documents = render_templated_documents("external_assets_single_key.json", external_assets_folder) for item in documents: name, doc = item["name"], item["doc"] if name == "start": @@ -445,7 +449,7 @@ def test_slice_and_squeeze(client, external_assets_folder, squeeze): def test_legacy_multiplier_parameter(client, external_assets_folder): tw = TiledWriter(client) - documents = render_templated_documents("external_assets_key2.json", external_assets_folder) + documents = render_templated_documents("external_assets_single_key.json", external_assets_folder) for item in documents: name, doc = item["name"], item["doc"] if name == "start": @@ -466,7 +470,7 @@ def test_legacy_multiplier_parameter(client, external_assets_folder): def test_streams_with_no_events(client, external_assets_folder): tw = TiledWriter(client) - for item in render_templated_documents("external_assets_key2.json", external_assets_folder): + for item in render_templated_documents("external_assets_single_key.json", external_assets_folder): name, doc = item["name"], item["doc"] if name == "start": uid = doc["uid"] @@ -484,11 +488,13 @@ def test_streams_with_no_events(client, external_assets_folder): @pytest.mark.parametrize("include_data_sources", [True, False]) -@pytest.mark.parametrize("fname", ["internal_events", "external_assets"]) +@pytest.mark.parametrize("fname", ["internal_events", "external_assets", "external_assets_legacy"]) def test_zero_gets(client, external_assets_folder, fname, include_data_sources): + pytest.xfail("Broken after Tiled 0.1.0-b38 release") client = client.new_variation(include_data_sources=include_data_sources) - tw = TiledWriter(client) assert client._include_data_sources == include_data_sources + tw = TiledWriter(client) + assert bool(tw.client._include_data_sources) with record_history() as history: for item in render_templated_documents(fname + ".json", external_assets_folder): @@ -496,4 +502,71 @@ def test_zero_gets(client, external_assets_folder, fname, include_data_sources): # Count the number of GET requests num_gets = sum(1 for req in history.requests if req.method == "GET") - assert num_gets == 0 if include_data_sources else num_gets == 1 + assert num_gets == 0 + + +def test_bad_document_order(client, external_assets_folder): + """Test that the TiledWriter can handle documents in a different order than expected + + Emit datum documents in the end, before the Stop document, but after corresponding Event documents. + """ + tw = TiledWriter(client) + + document_cache = [] + for item in render_templated_documents("external_assets_legacy.json", external_assets_folder): + name, doc = item["name"], item["doc"] + if name == "start": + uid = doc["uid"] + + if name == "datum": + document_cache.append({"name": name, "doc": doc}) + continue + + if name == "stop": + for cached_item in document_cache: + tw(**cached_item) + + tw(**item) + + run = client[uid] + + for stream in run["streams"].values(): + assert stream.read() is not None + assert "time" in stream.keys() + assert "seq_num" in stream.keys() + assert len(stream.keys()) > 2 # There's at least one data key in addition to time and seq_num + + +def test_json_backup(client, tmpdir, monkeypatch): + def patched_event(name, doc): + raise RuntimeError("This is a test error to check the backup functionality") + + monkeypatch.setattr("bluesky.callbacks.tiled_writer._RunWriter.event", patched_event) + + tw = TiledWriter(client, backup_directory=str(tmpdir)) + + for item in render_templated_documents("internal_events.json", ""): + name, doc = item["name"], item["doc"] + if name == "start": + uid = doc["uid"] + print(name) + + tw(**item) + + run = client[uid] + + assert "primary" in run["streams"] # The Descriptor was processed and the primary stream was created + assert run["streams"]["primary"].read() is not None # The stream can be read + assert len(run["streams"]["primary"].read()) == 0 # No events were processed due to the error + assert "stop" in run.metadata # The TiledWriter did not crash + + # Check that the backup file was created + filepath = tmpdir / f"{uid[:8]}.jsonl" + assert filepath.exists() + with open(filepath) as f: + lines = [json.loads(line) for line in f if line.strip()] + assert len(lines) == 7 + assert lines[0]["name"] == "start" + assert lines[1]["name"] == "descriptor" + assert lines[2]["name"].startswith("event") + assert lines[6]["name"] == "stop" diff --git a/src/bluesky/utils/__init__.py b/src/bluesky/utils/__init__.py index 40f982587..a20134e42 100644 --- a/src/bluesky/utils/__init__.py +++ b/src/bluesky/utils/__init__.py @@ -15,7 +15,7 @@ import uuid import warnings from collections import namedtuple -from collections.abc import AsyncIterable, AsyncIterator, Awaitable, Generator, Iterable +from collections.abc import AsyncIterable, AsyncIterator, Awaitable, Generator, Iterable, Sequence from collections.abc import Iterable as TypingIterable from functools import partial, reduce, wraps from inspect import Parameter, Signature @@ -23,6 +23,7 @@ Any, Callable, Optional, + TypedDict, TypeVar, Union, ) @@ -32,6 +33,7 @@ import msgpack_numpy import numpy as np from cycler import Cycler, cycler +from event_model.documents import Document, Event, EventDescriptor, RunStart, RunStop from tqdm import tqdm from tqdm.utils import _screen_shape_wrapper, _term_move_up, _unicode from typing_extensions import TypeIs @@ -93,6 +95,24 @@ def __repr__(self): #: Scalar or iterable of values, one to be applied to each point in a scan ScalarOrIterableFloat = Union[float, TypingIterable[float]] +# Single function to be used as an event listener +Subscriber = Callable[[str, P], Any] + +OneOrMany = Union[P, Sequence[P]] + + +# Mapping from event type to listener or list of listeners +class SubscriberMap(TypedDict, total=False): + all: OneOrMany[Subscriber[Document]] + start: OneOrMany[Subscriber[RunStart]] + stop: OneOrMany[Subscriber[RunStop]] + event: OneOrMany[Subscriber[Event]] + descriptor: OneOrMany[Subscriber[EventDescriptor]] + + +# Single listener, multiple listeners or mapping of listeners by event type +Subscribers = Union[OneOrMany[Subscriber[Document]], SubscriberMap] + class RunEngineControlException(Exception): """Exception for signaling within the RunEngine."""