Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit ec00728

Browse files
authored
feat: streaming sink (numaproj#193)
Signed-off-by: Sidhant Kohli <[email protected]>
1 parent b1b2d94 commit ec00728

File tree

18 files changed

+622
-259
lines changed

18 files changed

+622
-259
lines changed

examples/sink/async_log/pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ description = ""
55
authors = ["Numaflow developers"]
66

77
[tool.poetry.dependencies]
8-
python = "~3.10"
8+
python = ">=3.10,<3.13"
99
pynumaflow = { path = "../../../"}
1010

1111
[tool.poetry.dev-dependencies]

examples/sink/log/pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ description = ""
55
authors = ["Numaflow developers"]
66

77
[tool.poetry.dependencies]
8-
python = "~3.10"
8+
python = ">=3.10,<3.13"
99
pynumaflow = { path = "../../../"}
1010

1111
[tool.poetry.dev-dependencies]

pynumaflow/proto/sinker/sink.proto

Lines changed: 35 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
syntax = "proto3";
2+
23
import "google/protobuf/empty.proto";
34
import "google/protobuf/timestamp.proto";
45

@@ -7,7 +8,7 @@ package sink.v1;
78

89
service Sink {
910
// SinkFn writes the request to a user defined sink.
10-
rpc SinkFn(stream SinkRequest) returns (SinkResponse);
11+
rpc SinkFn(stream SinkRequest) returns (stream SinkResponse);
1112

1213
// IsReady is the heartbeat endpoint for gRPC.
1314
rpc IsReady(google.protobuf.Empty) returns (ReadyResponse);
@@ -17,12 +18,29 @@ service Sink {
1718
* SinkRequest represents a request element.
1819
*/
1920
message SinkRequest {
20-
repeated string keys = 1;
21-
bytes value = 2;
22-
google.protobuf.Timestamp event_time = 3;
23-
google.protobuf.Timestamp watermark = 4;
24-
string id = 5;
25-
map<string, string> headers = 6;
21+
message Request {
22+
repeated string keys = 1;
23+
bytes value = 2;
24+
google.protobuf.Timestamp event_time = 3;
25+
google.protobuf.Timestamp watermark = 4;
26+
string id = 5;
27+
map<string, string> headers = 6;
28+
}
29+
// Required field indicating the request.
30+
Request request = 1;
31+
// Required field indicating the status of the request.
32+
// If eot is set to true, it indicates the end of transmission.
33+
TransmissionStatus status = 2;
34+
// optional field indicating the handshake message.
35+
optional Handshake handshake = 3;
36+
}
37+
38+
/*
39+
* Handshake message between client and server to indicate the start of transmission.
40+
*/
41+
message Handshake {
42+
// Required field indicating the start of transmission.
43+
bool sot = 1;
2644
}
2745

2846
/**
@@ -32,6 +50,13 @@ message ReadyResponse {
3250
bool ready = 1;
3351
}
3452

53+
/**
54+
* TransmissionStatus is the status of the transmission.
55+
*/
56+
message TransmissionStatus {
57+
bool eot = 1;
58+
}
59+
3560
/*
3661
* Status is the status of the response.
3762
*/
@@ -53,5 +78,7 @@ message SinkResponse {
5378
// err_msg is the error message, set it if success is set to false.
5479
string err_msg = 3;
5580
}
56-
repeated Result results = 1;
81+
Result result = 1;
82+
optional Handshake handshake = 2;
83+
optional TransmissionStatus status = 3;
5784
}

pynumaflow/proto/sinker/sink_pb2.py

Lines changed: 22 additions & 16 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

pynumaflow/proto/sinker/sink_pb2.pyi

Lines changed: 63 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -25,45 +25,72 @@ FAILURE: Status
2525
FALLBACK: Status
2626

2727
class SinkRequest(_message.Message):
28-
__slots__ = ("keys", "value", "event_time", "watermark", "id", "headers")
28+
__slots__ = ("request", "status", "handshake")
2929

30-
class HeadersEntry(_message.Message):
31-
__slots__ = ("key", "value")
32-
KEY_FIELD_NUMBER: _ClassVar[int]
30+
class Request(_message.Message):
31+
__slots__ = ("keys", "value", "event_time", "watermark", "id", "headers")
32+
33+
class HeadersEntry(_message.Message):
34+
__slots__ = ("key", "value")
35+
KEY_FIELD_NUMBER: _ClassVar[int]
36+
VALUE_FIELD_NUMBER: _ClassVar[int]
37+
key: str
38+
value: str
39+
def __init__(self, key: _Optional[str] = ..., value: _Optional[str] = ...) -> None: ...
40+
KEYS_FIELD_NUMBER: _ClassVar[int]
3341
VALUE_FIELD_NUMBER: _ClassVar[int]
34-
key: str
35-
value: str
36-
def __init__(self, key: _Optional[str] = ..., value: _Optional[str] = ...) -> None: ...
37-
KEYS_FIELD_NUMBER: _ClassVar[int]
38-
VALUE_FIELD_NUMBER: _ClassVar[int]
39-
EVENT_TIME_FIELD_NUMBER: _ClassVar[int]
40-
WATERMARK_FIELD_NUMBER: _ClassVar[int]
41-
ID_FIELD_NUMBER: _ClassVar[int]
42-
HEADERS_FIELD_NUMBER: _ClassVar[int]
43-
keys: _containers.RepeatedScalarFieldContainer[str]
44-
value: bytes
45-
event_time: _timestamp_pb2.Timestamp
46-
watermark: _timestamp_pb2.Timestamp
47-
id: str
48-
headers: _containers.ScalarMap[str, str]
42+
EVENT_TIME_FIELD_NUMBER: _ClassVar[int]
43+
WATERMARK_FIELD_NUMBER: _ClassVar[int]
44+
ID_FIELD_NUMBER: _ClassVar[int]
45+
HEADERS_FIELD_NUMBER: _ClassVar[int]
46+
keys: _containers.RepeatedScalarFieldContainer[str]
47+
value: bytes
48+
event_time: _timestamp_pb2.Timestamp
49+
watermark: _timestamp_pb2.Timestamp
50+
id: str
51+
headers: _containers.ScalarMap[str, str]
52+
def __init__(
53+
self,
54+
keys: _Optional[_Iterable[str]] = ...,
55+
value: _Optional[bytes] = ...,
56+
event_time: _Optional[_Union[_timestamp_pb2.Timestamp, _Mapping]] = ...,
57+
watermark: _Optional[_Union[_timestamp_pb2.Timestamp, _Mapping]] = ...,
58+
id: _Optional[str] = ...,
59+
headers: _Optional[_Mapping[str, str]] = ...,
60+
) -> None: ...
61+
REQUEST_FIELD_NUMBER: _ClassVar[int]
62+
STATUS_FIELD_NUMBER: _ClassVar[int]
63+
HANDSHAKE_FIELD_NUMBER: _ClassVar[int]
64+
request: SinkRequest.Request
65+
status: TransmissionStatus
66+
handshake: Handshake
4967
def __init__(
5068
self,
51-
keys: _Optional[_Iterable[str]] = ...,
52-
value: _Optional[bytes] = ...,
53-
event_time: _Optional[_Union[_timestamp_pb2.Timestamp, _Mapping]] = ...,
54-
watermark: _Optional[_Union[_timestamp_pb2.Timestamp, _Mapping]] = ...,
55-
id: _Optional[str] = ...,
56-
headers: _Optional[_Mapping[str, str]] = ...,
69+
request: _Optional[_Union[SinkRequest.Request, _Mapping]] = ...,
70+
status: _Optional[_Union[TransmissionStatus, _Mapping]] = ...,
71+
handshake: _Optional[_Union[Handshake, _Mapping]] = ...,
5772
) -> None: ...
5873

74+
class Handshake(_message.Message):
75+
__slots__ = ("sot",)
76+
SOT_FIELD_NUMBER: _ClassVar[int]
77+
sot: bool
78+
def __init__(self, sot: bool = ...) -> None: ...
79+
5980
class ReadyResponse(_message.Message):
6081
__slots__ = ("ready",)
6182
READY_FIELD_NUMBER: _ClassVar[int]
6283
ready: bool
6384
def __init__(self, ready: bool = ...) -> None: ...
6485

86+
class TransmissionStatus(_message.Message):
87+
__slots__ = ("eot",)
88+
EOT_FIELD_NUMBER: _ClassVar[int]
89+
eot: bool
90+
def __init__(self, eot: bool = ...) -> None: ...
91+
6592
class SinkResponse(_message.Message):
66-
__slots__ = ("results",)
93+
__slots__ = ("result", "handshake", "status")
6794

6895
class Result(_message.Message):
6996
__slots__ = ("id", "status", "err_msg")
@@ -79,8 +106,15 @@ class SinkResponse(_message.Message):
79106
status: _Optional[_Union[Status, str]] = ...,
80107
err_msg: _Optional[str] = ...,
81108
) -> None: ...
82-
RESULTS_FIELD_NUMBER: _ClassVar[int]
83-
results: _containers.RepeatedCompositeFieldContainer[SinkResponse.Result]
109+
RESULT_FIELD_NUMBER: _ClassVar[int]
110+
HANDSHAKE_FIELD_NUMBER: _ClassVar[int]
111+
STATUS_FIELD_NUMBER: _ClassVar[int]
112+
result: SinkResponse.Result
113+
handshake: Handshake
114+
status: TransmissionStatus
84115
def __init__(
85-
self, results: _Optional[_Iterable[_Union[SinkResponse.Result, _Mapping]]] = ...
116+
self,
117+
result: _Optional[_Union[SinkResponse.Result, _Mapping]] = ...,
118+
handshake: _Optional[_Union[Handshake, _Mapping]] = ...,
119+
status: _Optional[_Union[TransmissionStatus, _Mapping]] = ...,
86120
) -> None: ...

pynumaflow/proto/sinker/sink_pb2_grpc.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ def __init__(self, channel):
1515
Args:
1616
channel: A grpc.Channel.
1717
"""
18-
self.SinkFn = channel.stream_unary(
18+
self.SinkFn = channel.stream_stream(
1919
"/sink.v1.Sink/SinkFn",
2020
request_serializer=sink__pb2.SinkRequest.SerializeToString,
2121
response_deserializer=sink__pb2.SinkResponse.FromString,
@@ -45,7 +45,7 @@ def IsReady(self, request, context):
4545

4646
def add_SinkServicer_to_server(servicer, server):
4747
rpc_method_handlers = {
48-
"SinkFn": grpc.stream_unary_rpc_method_handler(
48+
"SinkFn": grpc.stream_stream_rpc_method_handler(
4949
servicer.SinkFn,
5050
request_deserializer=sink__pb2.SinkRequest.FromString,
5151
response_serializer=sink__pb2.SinkResponse.SerializeToString,
@@ -77,7 +77,7 @@ def SinkFn(
7777
timeout=None,
7878
metadata=None,
7979
):
80-
return grpc.experimental.stream_unary(
80+
return grpc.experimental.stream_stream(
8181
request_iterator,
8282
target,
8383
"/sink.v1.Sink/SinkFn",

pynumaflow/reducestreamer/servicer/async_servicer.py

Lines changed: 4 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,6 @@
22
from collections.abc import AsyncIterable
33
from typing import Union
44

5-
import grpc
65
from google.protobuf import empty_pb2 as _empty_pb2
76

87
from pynumaflow.proto.reducer import reduce_pb2, reduce_pb2_grpc
@@ -13,7 +12,7 @@
1312
ReduceRequest,
1413
)
1514
from pynumaflow.reducestreamer.servicer.task_manager import TaskManager
16-
from pynumaflow.shared.server import exit_on_error, handle_error
15+
from pynumaflow.shared.server import handle_async_error
1716
from pynumaflow.types import NumaflowServicerContext
1817

1918

@@ -95,35 +94,20 @@ async def ReduceFn(
9594
async for msg in consumer:
9695
# If the message is an exception, we raise the exception
9796
if isinstance(msg, BaseException):
98-
handle_error(context, msg)
99-
await asyncio.gather(
100-
context.abort(grpc.StatusCode.UNKNOWN, details=repr(msg)),
101-
return_exceptions=True,
102-
)
103-
exit_on_error(
104-
err=repr(msg), parent=False, context=context, update_context=False
105-
)
97+
await handle_async_error(context, msg)
10698
return
10799
# Send window EOF response or Window result response
108100
# back to the client
109101
else:
110102
yield msg
111103
except BaseException as e:
112-
handle_error(context, e)
113-
await asyncio.gather(
114-
context.abort(grpc.StatusCode.UNKNOWN, details=repr(e)), return_exceptions=True
115-
)
116-
exit_on_error(err=repr(e), parent=False, context=context, update_context=False)
104+
await handle_async_error(context, e)
117105
return
118106
# Wait for the process_input_stream task to finish for a clean exit
119107
try:
120108
await producer
121109
except BaseException as e:
122-
handle_error(context, e)
123-
await asyncio.gather(
124-
context.abort(grpc.StatusCode.UNKNOWN, details=repr(e)), return_exceptions=True
125-
)
126-
exit_on_error(err=repr(e), parent=False, context=context, update_context=False)
110+
await handle_async_error(context, e)
127111
return
128112

129113
async def IsReady(

pynumaflow/shared/asynciter.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,8 +8,8 @@ class NonBlockingIterator:
88

99
__slots__ = "_queue"
1010

11-
def __init__(self):
12-
self._queue = asyncio.Queue()
11+
def __init__(self, size=0):
12+
self._queue = asyncio.Queue(maxsize=size)
1313

1414
async def read_iterator(self):
1515
item = await self._queue.get()

pynumaflow/shared/server.py

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
import asyncio
12
import contextlib
23
import io
34
import multiprocessing
@@ -266,7 +267,10 @@ def exit_on_error(
266267
p.kill()
267268

268269

269-
def handle_error(context: NumaflowServicerContext, e: BaseException):
270+
def update_context_err(context: NumaflowServicerContext, e: BaseException):
271+
"""
272+
Update the context with the error and log the exception.
273+
"""
270274
trace = get_exception_traceback_str(e)
271275
_LOGGER.critical(trace)
272276
_LOGGER.critical(e.__str__())
@@ -278,3 +282,14 @@ def get_exception_traceback_str(exc) -> str:
278282
file = io.StringIO()
279283
traceback.print_exception(exc, value=exc, tb=exc.__traceback__, file=file)
280284
return file.getvalue().rstrip()
285+
286+
287+
async def handle_async_error(context: NumaflowServicerContext, exception: BaseException):
288+
"""
289+
Handle exceptions for async servers by updating the context and exiting.
290+
"""
291+
update_context_err(context, exception)
292+
await asyncio.gather(
293+
context.abort(grpc.StatusCode.UNKNOWN, details=repr(exception)), return_exceptions=True
294+
)
295+
exit_on_error(err=repr(exception), parent=False, context=context, update_context=False)

0 commit comments

Comments
 (0)