Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 1d0fae8

Browse files
authored
fix(coderd): prevent lost messages in watchWorkspaceAgentMetadata (#7934)
* fix(codersdk): wait for subscription in WatchWorkspaceAgentMetadata * fix(coderd): subscribe before sending initial metadata event * test(coderd): add retries to TestWorkspaceAgent_Metadata to avoid flake
1 parent 518300a commit 1d0fae8

File tree

3 files changed

+51
-17
lines changed

3 files changed

+51
-17
lines changed

coderd/workspaceagents.go

+5-5
Original file line numberDiff line numberDiff line change
@@ -1434,17 +1434,15 @@ func (api *API) watchWorkspaceAgentMetadata(rw http.ResponseWriter, r *http.Requ
14341434
})
14351435
}
14361436

1437-
// Send initial metadata.
1438-
sendMetadata(true)
1439-
14401437
// We debounce metadata updates to avoid overloading the frontend when
14411438
// an agent is sending a lot of updates.
14421439
pubsubDebounce := debounce.New(time.Second)
14431440
if flag.Lookup("test.v") != nil {
14441441
pubsubDebounce = debounce.New(time.Millisecond * 100)
14451442
}
14461443

1447-
// Send metadata on updates.
1444+
// Send metadata on updates, we must ensure subscription before sending
1445+
// initial metadata to guarantee that events in-between are not missed.
14481446
cancelSub, err := api.Pubsub.Subscribe(watchWorkspaceAgentMetadataChannel(workspaceAgent.ID), func(_ context.Context, _ []byte) {
14491447
pubsubDebounce(func() {
14501448
sendMetadata(true)
@@ -1456,12 +1454,14 @@ func (api *API) watchWorkspaceAgentMetadata(rw http.ResponseWriter, r *http.Requ
14561454
}
14571455
defer cancelSub()
14581456

1457+
// Send initial metadata.
1458+
sendMetadata(true)
1459+
14591460
for {
14601461
select {
14611462
case <-senderClosed:
14621463
return
14631464
case <-refreshTicker.C:
1464-
break
14651465
}
14661466

14671467
// Avoid spamming the DB with reads we know there are no updates. We want

coderd/workspaceagents_test.go

+29-10
Original file line numberDiff line numberDiff line change
@@ -1268,11 +1268,6 @@ func TestWorkspaceAgent_Metadata(t *testing.T) {
12681268

12691269
var update []codersdk.WorkspaceAgentMetadata
12701270

1271-
check := func(want codersdk.WorkspaceAgentMetadataResult, got codersdk.WorkspaceAgentMetadata) {
1272-
require.Equal(t, want.Value, got.Result.Value)
1273-
require.Equal(t, want.Error, got.Result.Error)
1274-
}
1275-
12761271
wantMetadata1 := codersdk.WorkspaceAgentMetadataResult{
12771272
CollectedAt: time.Now(),
12781273
Value: "bar",
@@ -1285,32 +1280,53 @@ func TestWorkspaceAgent_Metadata(t *testing.T) {
12851280

12861281
recvUpdate := func() []codersdk.WorkspaceAgentMetadata {
12871282
select {
1283+
case <-ctx.Done():
1284+
t.Fatalf("context done: %v", ctx.Err())
12881285
case err := <-errors:
12891286
t.Fatalf("error watching metadata: %v", err)
1290-
return nil
12911287
case update := <-updates:
12921288
return update
12931289
}
1290+
return nil
1291+
}
1292+
1293+
check := func(want codersdk.WorkspaceAgentMetadataResult, got codersdk.WorkspaceAgentMetadata, retry bool) {
1294+
// We can't trust the order of the updates due to timers and debounces,
1295+
// so let's check a few times more.
1296+
for i := 0; retry && i < 2 && (want.Value != got.Result.Value || want.Error != got.Result.Error); i++ {
1297+
update = recvUpdate()
1298+
for _, m := range update {
1299+
if m.Description.Key == got.Description.Key {
1300+
got = m
1301+
break
1302+
}
1303+
}
1304+
}
1305+
ok1 := assert.Equal(t, want.Value, got.Result.Value)
1306+
ok2 := assert.Equal(t, want.Error, got.Result.Error)
1307+
if !ok1 || !ok2 {
1308+
require.FailNow(t, "check failed")
1309+
}
12941310
}
12951311

12961312
update = recvUpdate()
12971313
require.Len(t, update, 3)
1298-
check(wantMetadata1, update[0])
1314+
check(wantMetadata1, update[0], false)
12991315
// The second metadata result is not yet posted.
13001316
require.Zero(t, update[1].Result.CollectedAt)
13011317

13021318
wantMetadata2 := wantMetadata1
13031319
post("foo2", wantMetadata2)
13041320
update = recvUpdate()
13051321
require.Len(t, update, 3)
1306-
check(wantMetadata1, update[0])
1307-
check(wantMetadata2, update[1])
1322+
check(wantMetadata1, update[0], true)
1323+
check(wantMetadata2, update[1], true)
13081324

13091325
wantMetadata1.Error = "error"
13101326
post("foo1", wantMetadata1)
13111327
update = recvUpdate()
13121328
require.Len(t, update, 3)
1313-
check(wantMetadata1, update[0])
1329+
check(wantMetadata1, update[0], true)
13141330

13151331
const maxValueLen = 32 << 10
13161332
tooLongValueMetadata := wantMetadata1
@@ -1319,6 +1335,9 @@ func TestWorkspaceAgent_Metadata(t *testing.T) {
13191335
tooLongValueMetadata.CollectedAt = time.Now()
13201336
post("foo3", tooLongValueMetadata)
13211337
got := recvUpdate()[2]
1338+
for i := 0; i < 2 && len(got.Result.Value) != maxValueLen; i++ {
1339+
got = recvUpdate()[2]
1340+
}
13221341
require.Len(t, got.Result.Value, maxValueLen)
13231342
require.NotEmpty(t, got.Result.Error)
13241343

codersdk/workspaceagents.go

+17-2
Original file line numberDiff line numberDiff line change
@@ -304,6 +304,7 @@ func (c *Client) WatchWorkspaceAgentMetadata(ctx context.Context, id uuid.UUID)
304304

305305
metadataChan := make(chan []WorkspaceAgentMetadata, 256)
306306

307+
ready := make(chan struct{})
307308
watch := func() error {
308309
res, err := c.Request(ctx, http.MethodGet, fmt.Sprintf("/api/v2/workspaceagents/%s/watch-metadata", id), nil)
309310
if err != nil {
@@ -316,19 +317,24 @@ func (c *Client) WatchWorkspaceAgentMetadata(ctx context.Context, id uuid.UUID)
316317
nextEvent := ServerSentEventReader(ctx, res.Body)
317318
defer res.Body.Close()
318319

320+
firstEvent := true
319321
for {
320322
select {
321323
case <-ctx.Done():
322324
return ctx.Err()
323325
default:
324-
break
325326
}
326327

327328
sse, err := nextEvent()
328329
if err != nil {
329330
return err
330331
}
331332

333+
if firstEvent {
334+
close(ready) // Only close ready after the first event is received.
335+
firstEvent = false
336+
}
337+
332338
b, ok := sse.Data.([]byte)
333339
if !ok {
334340
return xerrors.Errorf("unexpected data type: %T", sse.Data)
@@ -358,9 +364,18 @@ func (c *Client) WatchWorkspaceAgentMetadata(ctx context.Context, id uuid.UUID)
358364
errorChan := make(chan error, 1)
359365
go func() {
360366
defer close(errorChan)
361-
errorChan <- watch()
367+
err := watch()
368+
select {
369+
case <-ready:
370+
default:
371+
close(ready) // Error before first event.
372+
}
373+
errorChan <- err
362374
}()
363375

376+
// Wait until first event is received and the subscription is registered.
377+
<-ready
378+
364379
return metadataChan, errorChan
365380
}
366381

0 commit comments

Comments
 (0)