Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit ce7f13c

Browse files
authored
fix: fix TestPGCoordinatorSingle_MissedHeartbeats flake (coder#13686)
1 parent 089f068 commit ce7f13c

File tree

2 files changed

+12
-11
lines changed

2 files changed

+12
-11
lines changed

enterprise/tailnet/pgcoord.go

+1-1
Original file line numberDiff line numberDiff line change
@@ -1613,7 +1613,7 @@ func (h *heartbeats) resetExpiryTimerWithLock() {
16131613
if d < 0 {
16141614
d = 0
16151615
}
1616-
h.timer.Reset(d)
1616+
h.timer.Reset(d, "heartbeats", "resetExpiryTimerWithLock")
16171617
}
16181618

16191619
func (h *heartbeats) checkExpiry() {

enterprise/tailnet/pgcoord_test.go

+11-10
Original file line numberDiff line numberDiff line change
@@ -336,14 +336,14 @@ func TestPGCoordinatorSingle_MissedHeartbeats(t *testing.T) {
336336
t.Skip("test only with postgres")
337337
}
338338
store, ps := dbtestutil.NewDB(t)
339-
ctx, cancel := context.WithTimeout(context.Background(), testutil.WaitSuperLong)
339+
ctx, cancel := context.WithTimeout(context.Background(), testutil.WaitShort)
340340
defer cancel()
341341
logger := slogtest.Make(t, nil).Leveled(slog.LevelDebug)
342342
mClock := clock.NewMock(t)
343-
nowTrap := mClock.Trap().Now("heartbeats", "recvBeat")
344-
defer nowTrap.Close()
345343
afTrap := mClock.Trap().AfterFunc("heartbeats", "recvBeat")
346344
defer afTrap.Close()
345+
rstTrap := mClock.Trap().TimerReset("heartbeats", "resetExpiryTimerWithLock")
346+
defer rstTrap.Close()
347347

348348
coordinator, err := tailnet.NewTestPGCoord(ctx, logger, ps, store, mClock)
349349
require.NoError(t, err)
@@ -370,7 +370,6 @@ func TestPGCoordinatorSingle_MissedHeartbeats(t *testing.T) {
370370
}
371371

372372
fCoord2.heartbeat()
373-
nowTrap.MustWait(ctx).Release()
374373
afTrap.MustWait(ctx).Release() // heartbeat timeout started
375374

376375
fCoord2.agentNode(agent.id, &agpl.Node{PreferredDERP: 12})
@@ -383,30 +382,32 @@ func TestPGCoordinatorSingle_MissedHeartbeats(t *testing.T) {
383382
id: uuid.New(),
384383
}
385384
fCoord3.heartbeat()
386-
nowTrap.MustWait(ctx).Release()
385+
rstTrap.MustWait(ctx).Release() // timeout gets reset
387386
fCoord3.agentNode(agent.id, &agpl.Node{PreferredDERP: 13})
388387
assertEventuallyHasDERPs(ctx, t, client, 13)
389388

390389
// fCoord2 sends in a second heartbeat, one period later (on time)
391-
fCoord2.heartbeat()
392-
c := nowTrap.MustWait(ctx)
393390
mClock.Advance(tailnet.HeartbeatPeriod).MustWait(ctx)
394-
c.Release()
391+
fCoord2.heartbeat()
392+
rstTrap.MustWait(ctx).Release() // timeout gets reset
395393

396394
// when the fCoord3 misses enough heartbeats, the real coordinator should send an update with the
397395
// node from fCoord2 for the agent.
398396
mClock.Advance(tailnet.HeartbeatPeriod).MustWait(ctx)
399-
mClock.Advance(tailnet.HeartbeatPeriod).MustWait(ctx)
397+
w := mClock.Advance(tailnet.HeartbeatPeriod)
398+
rstTrap.MustWait(ctx).Release()
399+
w.MustWait(ctx)
400400
assertEventuallyHasDERPs(ctx, t, client, 12)
401401

402402
// one more heartbeat period will result in fCoord2 being expired, which should cause us to
403403
// revert to the original agent mapping
404404
mClock.Advance(tailnet.HeartbeatPeriod).MustWait(ctx)
405+
// note that the timeout doesn't get reset because both fCoord2 and fCoord3 are expired
405406
assertEventuallyHasDERPs(ctx, t, client, 10)
406407

407408
// send fCoord3 heartbeat, which should trigger us to consider that mapping valid again.
408409
fCoord3.heartbeat()
409-
nowTrap.MustWait(ctx).Release()
410+
rstTrap.MustWait(ctx).Release() // timeout gets reset
410411
assertEventuallyHasDERPs(ctx, t, client, 13)
411412

412413
err = agent.close()

0 commit comments

Comments
 (0)