Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 79d026e

Browse files
authored
feat: Report table errors from plugins via gRPC (#320)
Fixes cloudquery/cloudquery#15525, the Python equivalent of cloudquery/plugin-sdk#2195 Most of the changes are actually to the MemDB plugin ~~(that's why tests are failing, PR in draft while I fix those)~~, as I turned it into a "real" plugin that uses the SDK scheduler so I can test this change.
1 parent cc679a3 commit 79d026e

File tree

7 files changed

+227
-80
lines changed

7 files changed

+227
-80
lines changed

cloudquery/sdk/internal/memdb/memdb.py

Lines changed: 170 additions & 67 deletions
Original file line numberDiff line numberDiff line change
@@ -3,116 +3,219 @@
33
from cloudquery.sdk import plugin
44
from cloudquery.sdk import message
55
from cloudquery.sdk import schema
6-
from typing import List, Generator, Dict
6+
from cloudquery.sdk.scheduler import Scheduler, TableResolver
7+
from typing import List, Generator, Dict, Any
78
import pyarrow as pa
89
from cloudquery.sdk.schema.table import Table
10+
from cloudquery.sdk.schema.arrow import METADATA_TABLE_NAME
911
from cloudquery.sdk.types import JSONType
1012
from dataclasses import dataclass, field
1113

1214
NAME = "memdb"
1315
VERSION = "development"
1416

1517

18+
class Client:
19+
def __init__(self) -> None:
20+
pass
21+
22+
def id(self):
23+
return "memdb"
24+
25+
26+
class MemDBResolver(TableResolver):
27+
def __init__(
28+
self, table: Table, records: List, child_resolvers: list[TableResolver] = None
29+
) -> None:
30+
super().__init__(table=table, child_resolvers=child_resolvers)
31+
self._records = records
32+
33+
def resolve(self, client: None, parent_resource) -> Generator[Any, None, None]:
34+
for record in self._records:
35+
yield record
36+
37+
38+
class Table1Relation1(Table):
39+
def __init__(self) -> None:
40+
super().__init__(
41+
name="table_1_relation_1",
42+
columns=[
43+
schema.Column(
44+
name="name",
45+
type=pa.string(),
46+
primary_key=True,
47+
not_null=True,
48+
unique=True,
49+
),
50+
schema.Column(name="data", type=JSONType()),
51+
],
52+
title="Table 1 Relation 1",
53+
description="Test Table 1 Relation 1",
54+
)
55+
56+
@property
57+
def resolver(self):
58+
return MemDBResolver(
59+
self,
60+
records=[
61+
{"name": "a", "data": {"a": 1}},
62+
{"name": "b", "data": {"b": 2}},
63+
{"name": "c", "data": {"c": 3}},
64+
],
65+
)
66+
67+
68+
class Table1(Table):
69+
def __init__(self) -> None:
70+
super().__init__(
71+
name="table_1",
72+
columns=[
73+
schema.Column(
74+
name="name",
75+
type=pa.string(),
76+
primary_key=True,
77+
not_null=True,
78+
unique=True,
79+
),
80+
schema.Column(
81+
name="id",
82+
type=pa.int64(),
83+
primary_key=True,
84+
not_null=True,
85+
unique=True,
86+
incremental_key=True,
87+
),
88+
],
89+
title="Table 1",
90+
description="Test Table 1",
91+
is_incremental=True,
92+
relations=[Table1Relation1()],
93+
)
94+
95+
@property
96+
def resolver(self):
97+
child_resolvers: list[TableResolver] = []
98+
for rel in self.relations:
99+
child_resolvers.append(rel.resolver)
100+
101+
return MemDBResolver(
102+
self,
103+
records=[
104+
{"name": "a", "id": 1},
105+
{"name": "b", "id": 2},
106+
{"name": "c", "id": 3},
107+
],
108+
child_resolvers=child_resolvers,
109+
)
110+
111+
112+
class Table2(Table):
113+
def __init__(self) -> None:
114+
super().__init__(
115+
name="table_2",
116+
columns=[
117+
schema.Column(
118+
name="name",
119+
type=pa.string(),
120+
primary_key=True,
121+
not_null=True,
122+
unique=True,
123+
),
124+
schema.Column(name="id", type=pa.int64()),
125+
],
126+
title="Table 2",
127+
description="Test Table 2",
128+
)
129+
130+
@property
131+
def resolver(self):
132+
return MemDBResolver(
133+
self,
134+
records=[
135+
{"name": "a", "id": 1},
136+
{"name": "b", "id": 2},
137+
{"name": "c", "id": 3},
138+
],
139+
)
140+
141+
16142
@dataclass
17143
class Spec:
18-
abc: str = field(default="abc")
144+
concurrency: int = field(default=1000)
145+
queue_size: int = field(default=1000)
19146

20147

21148
class MemDB(plugin.Plugin):
22149
def __init__(self) -> None:
23150
super().__init__(
24151
NAME, VERSION, opts=plugin.plugin.Options(team="cloudquery", kind="source")
25152
)
26-
self._db: Dict[str, pa.RecordBatch] = {}
153+
table1 = Table1()
154+
table2 = Table2()
27155
self._tables: Dict[str, schema.Table] = {
28-
"table_1": schema.Table(
29-
name="table_1",
30-
columns=[
31-
schema.Column(
32-
name="name",
33-
type=pa.string(),
34-
primary_key=True,
35-
not_null=True,
36-
unique=True,
37-
),
38-
schema.Column(
39-
name="id",
40-
type=pa.string(),
41-
primary_key=True,
42-
not_null=True,
43-
unique=True,
44-
incremental_key=True,
45-
),
46-
],
47-
title="Table 1",
48-
description="Test Table 1",
49-
is_incremental=True,
50-
relations=[
51-
schema.Table(
52-
name="table_1_relation_1",
53-
columns=[
54-
schema.Column(
55-
name="name",
56-
type=pa.string(),
57-
primary_key=True,
58-
not_null=True,
59-
unique=True,
60-
),
61-
schema.Column(name="data", type=JSONType()),
62-
],
63-
title="Table 1 Relation 1",
64-
description="Test Table 1 Relation 1",
65-
)
66-
],
67-
),
68-
"table_2": schema.Table(
69-
name="table_2",
70-
columns=[
71-
schema.Column(
72-
name="name",
73-
type=pa.string(),
74-
primary_key=True,
75-
not_null=True,
76-
unique=True,
77-
),
78-
schema.Column(name="id", type=pa.string()),
79-
],
80-
title="Table 2",
81-
description="Test Table 2",
82-
),
156+
table1.name: table1,
157+
table2.name: table2,
83158
}
159+
self._db: List[pa.RecordBatch] = []
160+
self._client = Client()
161+
162+
def set_logger(self, logger) -> None:
163+
self._logger = logger
84164

85165
def init(self, spec, no_connection: bool = False):
86166
if no_connection:
87167
return
88168
self._spec_json = json.loads(spec)
89169
self._spec = Spec(**self._spec_json)
170+
self._scheduler = Scheduler(
171+
concurrency=self._spec.concurrency,
172+
queue_size=self._spec.queue_size,
173+
logger=self._logger,
174+
)
90175

91176
def get_tables(self, options: plugin.TableOptions = None) -> List[plugin.Table]:
92177
tables = list(self._tables.values())
178+
179+
# set parent table relationships
180+
for table in tables:
181+
for relation in table.relations:
182+
relation.parent = table
183+
93184
return schema.filter_dfs(tables, options.tables, options.skip_tables)
94185

95186
def sync(
96187
self, options: plugin.SyncOptions
97188
) -> Generator[message.SyncMessage, None, None]:
98-
for table, record in self._db.items():
99-
yield message.SyncInsertMessage(record)
189+
resolvers: list[TableResolver] = []
190+
for table in self.get_tables(
191+
plugin.TableOptions(
192+
tables=options.tables,
193+
skip_tables=options.skip_tables,
194+
skip_dependent_tables=options.skip_dependent_tables,
195+
)
196+
):
197+
resolvers.append(table.resolver)
198+
199+
return self._scheduler.sync(
200+
self._client, resolvers, options.deterministic_cq_id
201+
)
100202

101203
def write(self, writer: Generator[message.WriteMessage, None, None]) -> None:
102204
for msg in writer:
103205
if isinstance(msg, message.WriteMigrateTableMessage):
104-
if msg.table.name not in self._db:
105-
self._db[msg.table.name] = msg.table
106-
self._tables[msg.table.name] = msg.table
206+
pass
107207
elif isinstance(msg, message.WriteInsertMessage):
108-
table = schema.Table.from_arrow_schema(msg.record.schema)
109-
self._db[table.name] = msg.record
208+
self._db.append(msg.record)
110209
else:
111210
raise NotImplementedError(f"Unknown message type {type(msg)}")
112211

113212
def read(self, table: Table) -> Generator[message.ReadMessage, None, None]:
114-
for table, record in self._db.items():
115-
yield message.ReadMessage(record)
213+
for record in self._db:
214+
recordMetadata = record.schema.metadata.get(METADATA_TABLE_NAME).decode(
215+
"utf-8"
216+
)
217+
if recordMetadata == table.name:
218+
yield message.ReadMessage(record)
116219

117220
def close(self) -> None:
118221
self._db = {}

cloudquery/sdk/internal/servers/plugin_v3/plugin.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
from cloudquery.sdk.message import (
88
SyncInsertMessage,
99
SyncMigrateTableMessage,
10+
SyncErrorMessage,
1011
WriteInsertMessage,
1112
WriteMigrateTableMessage,
1213
WriteMessage,
@@ -77,6 +78,12 @@ def Sync(self, request, context):
7778
yield plugin_pb2.Sync.Response(
7879
migrate_table=plugin_pb2.Sync.MessageMigrateTable(table=buf)
7980
)
81+
elif isinstance(msg, SyncErrorMessage) and request.withErrorMessages:
82+
yield plugin_pb2.Sync.Response(
83+
error=plugin_pb2.Sync.MessageError(
84+
table_name=msg.table_name, error=msg.error
85+
)
86+
)
8087
else:
8188
# unknown sync message type
8289
raise NotImplementedError()

cloudquery/sdk/message/__init__.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,9 @@
1-
from .sync import SyncMessage, SyncInsertMessage, SyncMigrateTableMessage
1+
from .sync import (
2+
SyncMessage,
3+
SyncInsertMessage,
4+
SyncMigrateTableMessage,
5+
SyncErrorMessage,
6+
)
27
from .write import (
38
WriteMessage,
49
WriteInsertMessage,

cloudquery/sdk/message/sync.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,3 +13,9 @@ def __init__(self, record: pa.RecordBatch):
1313
class SyncMigrateTableMessage(SyncMessage):
1414
def __init__(self, table: pa.Schema):
1515
self.table = table
16+
17+
18+
class SyncErrorMessage(SyncMessage):
19+
def __init__(self, table_name: str, error: str):
20+
self.table_name = table_name
21+
self.error = error

cloudquery/sdk/scheduler/scheduler.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
SyncMessage,
99
SyncInsertMessage,
1010
SyncMigrateTableMessage,
11+
SyncErrorMessage,
1112
)
1213
from cloudquery.sdk.schema import Resource
1314
from cloudquery.sdk.stateclient.stateclient import StateClient
@@ -162,6 +163,7 @@ def resolve_table(
162163
depth=depth,
163164
exc_info=e,
164165
)
166+
res.put(SyncErrorMessage(resolver.table.name, str(e)))
165167
finally:
166168
res.put(TableResolverFinished())
167169

tests/internal/memdb/memdb.py

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,23 @@
11
from cloudquery.sdk.internal import memdb
22
from cloudquery.sdk.internal.servers.plugin_v3 import plugin
33
from cloudquery.sdk.plugin import SyncOptions
4+
from cloudquery.sdk.message import SyncMigrateTableMessage, SyncInsertMessage
5+
import structlog
46

57

68
def test_memdb():
79
p = memdb.MemDB()
10+
p.set_logger(structlog.get_logger())
811
p.init(plugin.sanitize_spec(b"null"))
912
msgs = []
10-
for msg in p.sync(SyncOptions(tables=["*"])):
13+
for msg in p.sync(SyncOptions(tables=["*"], skip_tables=[])):
1114
msgs.append(msg)
12-
assert len(msgs) == 0
15+
assert len(msgs) == 18
16+
17+
assert isinstance(msgs[0], SyncMigrateTableMessage)
18+
assert isinstance(msgs[1], SyncMigrateTableMessage)
19+
assert isinstance(msgs[2], SyncMigrateTableMessage)
20+
21+
# other messages should be inserts
22+
for msg in msgs[3:]:
23+
assert isinstance(msg, SyncInsertMessage)

0 commit comments

Comments
 (0)