Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit dad7703

Browse files
jleibsteh-cmc
andauthored
Make it possible to configure batcher explicitly via python APIs (#10657)
### What We changed the behavior of the batcher in: #10620 However, the only way to override this behavior was to set environment variables, which can be tricky/annoying in some circumstances. This plumbs through the configuration settings so that this can be specified explicitly now. --------- Co-authored-by: Clement Rey <[email protected]>
1 parent 58a1897 commit dad7703

File tree

6 files changed

+452
-5
lines changed

6 files changed

+452
-5
lines changed

rerun_py/rerun_bindings/rerun_bindings.pyi

Lines changed: 116 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@ from __future__ import annotations
22

33
import os
44
from collections.abc import Iterable, Iterator, Sequence
5-
from datetime import datetime
5+
from datetime import datetime, timedelta
66
from enum import Enum
77
from typing import Any, Callable, Optional, Self
88

@@ -614,6 +614,120 @@ class PyRecordingStream:
614614
Calling operations such as flush or set_sink will result in an error.
615615
"""
616616

617+
class ChunkBatcherConfig:
618+
"""Defines the different batching thresholds used within the RecordingStream."""
619+
620+
def __init__(
621+
self,
622+
flush_tick: int | float | timedelta | None = None,
623+
flush_num_bytes: int | None = None,
624+
flush_num_rows: int | None = None,
625+
chunk_max_rows_if_unsorted: int | None = None,
626+
) -> None:
627+
"""
628+
Initialize the chunk batcher configuration.
629+
630+
Parameters
631+
----------
632+
flush_tick : int | float | timedelta | None
633+
Duration of the periodic tick, by default `None`.
634+
Equivalent to setting: `RERUN_FLUSH_TICK_SECS` environment variable.
635+
636+
flush_num_bytes : int | None
637+
Flush if the accumulated payload has a size in bytes equal or greater than this, by default `None`.
638+
Equivalent to setting: `RERUN_FLUSH_NUM_BYTES` environment variable.
639+
640+
flush_num_rows : int | None
641+
Flush if the accumulated payload has a number of rows equal or greater than this, by default `None`.
642+
Equivalent to setting: `RERUN_FLUSH_NUM_ROWS` environment variable.
643+
644+
chunk_max_rows_if_unsorted : int | None
645+
Split a chunk if it contains >= rows than this threshold and one or more of its timelines are unsorted,
646+
by default `None`.
647+
Equivalent to setting: `RERUN_CHUNK_MAX_ROWS_IF_UNSORTED` environment variable.
648+
649+
"""
650+
651+
@property
652+
def flush_tick(self) -> timedelta:
653+
"""
654+
Duration of the periodic tick.
655+
656+
Equivalent to setting: `RERUN_FLUSH_TICK_SECS` environment variable.
657+
"""
658+
659+
@flush_tick.setter
660+
def flush_tick(self, value: float | int | timedelta) -> None:
661+
"""
662+
Duration of the periodic tick.
663+
664+
Equivalent to setting: `RERUN_FLUSH_TICK_SECS` environment variable.
665+
"""
666+
667+
@property
668+
def flush_num_bytes(self) -> int:
669+
"""
670+
Flush if the accumulated payload has a size in bytes equal or greater than this.
671+
672+
Equivalent to setting: `RERUN_FLUSH_NUM_BYTES` environment variable.
673+
"""
674+
675+
@flush_num_bytes.setter
676+
def flush_num_bytes(self, value: int) -> None:
677+
"""
678+
Flush if the accumulated payload has a size in bytes equal or greater than this.
679+
680+
Equivalent to setting: `RERUN_FLUSH_NUM_BYTES` environment variable.
681+
"""
682+
683+
@property
684+
def flush_num_rows(self) -> int:
685+
"""
686+
Flush if the accumulated payload has a number of rows equal or greater than this.
687+
688+
Equivalent to setting: `RERUN_FLUSH_NUM_ROWS` environment variable.
689+
"""
690+
691+
@flush_num_rows.setter
692+
def flush_num_rows(self, value: int) -> None:
693+
"""
694+
Flush if the accumulated payload has a number of rows equal or greater than this.
695+
696+
Equivalent to setting: `RERUN_FLUSH_NUM_ROWS` environment variable.
697+
"""
698+
699+
@property
700+
def chunk_max_rows_if_unsorted(self) -> int:
701+
"""
702+
Split a chunk if it contains >= rows than this threshold and one or more of its timelines are unsorted.
703+
704+
Equivalent to setting: `RERUN_CHUNK_MAX_ROWS_IF_UNSORTED` environment variable.
705+
"""
706+
707+
@chunk_max_rows_if_unsorted.setter
708+
def chunk_max_rows_if_unsorted(self, value: int) -> None:
709+
"""
710+
Split a chunk if it contains >= rows than this threshold and one or more of its timelines are unsorted.
711+
712+
Equivalent to setting: `RERUN_CHUNK_MAX_ROWS_IF_UNSORTED` environment variable.
713+
"""
714+
715+
@staticmethod
716+
def DEFAULT() -> ChunkBatcherConfig:
717+
"""Default configuration, applicable to most use cases."""
718+
719+
@staticmethod
720+
def LOW_LATENCY() -> ChunkBatcherConfig:
721+
"""Low-latency configuration, preferred when streaming directly to a viewer."""
722+
723+
@staticmethod
724+
def ALWAYS() -> ChunkBatcherConfig:
725+
"""Always flushes ASAP."""
726+
727+
@staticmethod
728+
def NEVER() -> ChunkBatcherConfig:
729+
"""Never flushes unless manually told to (or hitting one the builtin invariants)."""
730+
617731
class PyMemorySinkStorage:
618732
def concat_as_bytes(self, concat: Optional[PyMemorySinkStorage] = None) -> bytes:
619733
"""
@@ -658,6 +772,7 @@ def new_recording(
658772
make_thread_default: bool = True,
659773
default_enabled: bool = True,
660774
send_properties: bool = True,
775+
batcher_config: Optional[ChunkBatcherConfig] = None,
661776
) -> PyRecordingStream:
662777
"""Create a new recording stream."""
663778

rerun_py/rerun_sdk/rerun/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -153,6 +153,7 @@
153153
)
154154
from .recording_stream import (
155155
BinaryStream as BinaryStream,
156+
ChunkBatcherConfig as ChunkBatcherConfig,
156157
RecordingStream as RecordingStream,
157158
binary_stream as binary_stream,
158159
get_application_id as get_application_id,

rerun_py/rerun_sdk/rerun/recording_stream.py

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,9 @@
1111
from typing import TYPE_CHECKING, Any, Callable, TypeVar, overload
1212

1313
import numpy as np
14+
from rerun_bindings import (
15+
ChunkBatcherConfig as ChunkBatcherConfig,
16+
)
1417
from typing_extensions import deprecated
1518

1619
import rerun as rr
@@ -39,6 +42,7 @@ def new_recording(
3942
make_thread_default: bool = False,
4043
spawn: bool = False,
4144
default_enabled: bool = True,
45+
batcher_config: ChunkBatcherConfig | None = None,
4246
) -> RecordingStream:
4347
"""
4448
Creates a new recording with a user-chosen application id (name) that can be used to log data.
@@ -103,6 +107,8 @@ def new_recording(
103107
default_enabled
104108
Should Rerun logging be on by default?
105109
Can be overridden with the RERUN env-var, e.g. `RERUN=on` or `RERUN=off`.
110+
batcher_config
111+
Optional configuration for the chunk batcher.
106112
107113
Returns
108114
-------
@@ -117,6 +123,7 @@ def new_recording(
117123
make_default=make_default,
118124
make_thread_default=make_thread_default,
119125
default_enabled=default_enabled,
126+
batcher_config=batcher_config,
120127
)
121128

122129
if spawn:
@@ -302,7 +309,8 @@ class RecordingStream:
302309
Micro-batching using both space and time triggers (whichever comes first) is done automatically
303310
in a dedicated background thread.
304311
305-
You can configure the frequency of the batches using the following environment variables:
312+
You can configure the frequency of the batches using the `batcher_config` parameter when creating
313+
the RecordingStream, or via the following environment variables:
306314
307315
- `RERUN_FLUSH_TICK_SECS`:
308316
Flush frequency in seconds (default: `0.2` (200ms)).
@@ -322,6 +330,7 @@ def __init__(
322330
make_thread_default: bool = False,
323331
default_enabled: bool = True,
324332
send_properties: bool = True,
333+
batcher_config: ChunkBatcherConfig | None = None,
325334
) -> None:
326335
"""
327336
Creates a new recording stream with a user-chosen application id (name) that can be used to log data.
@@ -383,6 +392,8 @@ def __init__(
383392
Can be overridden with the RERUN env-var, e.g. `RERUN=on` or `RERUN=off`.
384393
send_properties
385394
Immediately send the recording properties to the viewer (default: True)
395+
batcher_config
396+
Optional configuration for the chunk batcher.
386397
387398
Returns
388399
-------
@@ -411,6 +422,7 @@ def __init__(
411422
make_thread_default=make_thread_default,
412423
default_enabled=default_enabled,
413424
send_properties=send_properties,
425+
batcher_config=batcher_config,
414426
)
415427

416428
self._prev: RecordingStream | None = None

0 commit comments

Comments
 (0)