Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 520c3a8

Browse files
authored
fix: use TSMP for pings and checking reachability (coder#11306)
We're seeing some flaky tests related to agent connectivity - https://github.com/coder/coder/actions/runs/7286675441/job/19856270998 I'm pretty sure what happened in this one is that the client opened a connection while the wgengine was in the process of reconfiguring the wireguard device, so the fact that the peer became "active" as a result of traffic being sent was not noticed. The test calls `AwaitReachable()` but this only tests the disco layer, so it doesn't wait for wireguard to come up. I think we should be using TSMP for pinging and reachability, since this operates at the IP layer, and therefore requires that wireguard comes up before being successful. This should also help with the problems we have seen where a TCP connection starts before wireguard is up and the initial round trip has to wait for the 5 second wireguard handshake retry. fixes: coder#11294
1 parent 58e40f6 commit 520c3a8

File tree

2 files changed

+15
-14
lines changed

2 files changed

+15
-14
lines changed

agent/agent_test.go

+13-12
Original file line numberDiff line numberDiff line change
@@ -174,10 +174,10 @@ func TestAgent_Stats_Magic(t *testing.T) {
174174
require.NoError(t, err)
175175
err = session.Shell()
176176
require.NoError(t, err)
177-
var s *agentsdk.Stats
178177
require.Eventuallyf(t, func() bool {
179-
var ok bool
180-
s, ok = <-stats
178+
s, ok := <-stats
179+
t.Logf("got stats: ok=%t, ConnectionCount=%d, RxBytes=%d, TxBytes=%d, SessionCountVSCode=%d, ConnectionMedianLatencyMS=%f",
180+
ok, s.ConnectionCount, s.RxBytes, s.TxBytes, s.SessionCountVSCode, s.ConnectionMedianLatencyMS)
181181
return ok && s.ConnectionCount > 0 && s.RxBytes > 0 && s.TxBytes > 0 &&
182182
// Ensure that the connection didn't count as a "normal" SSH session.
183183
// This was a special one, so it should be labeled specially in the stats!
@@ -186,7 +186,7 @@ func TestAgent_Stats_Magic(t *testing.T) {
186186
// If it isn't, it's set to -1.
187187
s.ConnectionMedianLatencyMS >= 0
188188
}, testutil.WaitLong, testutil.IntervalFast,
189-
"never saw stats: %+v", s,
189+
"never saw stats",
190190
)
191191
// The shell will automatically exit if there is no stdin!
192192
_ = stdin.Close()
@@ -240,14 +240,14 @@ func TestAgent_Stats_Magic(t *testing.T) {
240240
_ = tunneledConn.Close()
241241
})
242242

243-
var s *agentsdk.Stats
244243
require.Eventuallyf(t, func() bool {
245-
var ok bool
246-
s, ok = <-stats
244+
s, ok := <-stats
245+
t.Logf("got stats with conn open: ok=%t, ConnectionCount=%d, SessionCountJetBrains=%d",
246+
ok, s.ConnectionCount, s.SessionCountJetBrains)
247247
return ok && s.ConnectionCount > 0 &&
248248
s.SessionCountJetBrains == 1
249249
}, testutil.WaitLong, testutil.IntervalFast,
250-
"never saw stats with conn open: %+v", s,
250+
"never saw stats with conn open",
251251
)
252252

253253
// Kill the server and connection after checking for the echo.
@@ -256,12 +256,13 @@ func TestAgent_Stats_Magic(t *testing.T) {
256256
_ = tunneledConn.Close()
257257

258258
require.Eventuallyf(t, func() bool {
259-
var ok bool
260-
s, ok = <-stats
261-
return ok && s.ConnectionCount == 0 &&
259+
s, ok := <-stats
260+
t.Logf("got stats after disconnect %t, %d",
261+
ok, s.SessionCountJetBrains)
262+
return ok &&
262263
s.SessionCountJetBrains == 0
263264
}, testutil.WaitLong, testutil.IntervalFast,
264-
"never saw stats after conn closes: %+v", s,
265+
"never saw stats after conn closes",
265266
)
266267
})
267268
}

tailnet/conn.go

+2-2
Original file line numberDiff line numberDiff line change
@@ -670,12 +670,12 @@ func (c *Conn) Status() *ipnstate.Status {
670670
return sb.Status()
671671
}
672672

673-
// Ping sends a Disco ping to the Wireguard engine.
673+
// Ping sends a ping to the Wireguard engine.
674674
// The bool returned is true if the ping was performed P2P.
675675
func (c *Conn) Ping(ctx context.Context, ip netip.Addr) (time.Duration, bool, *ipnstate.PingResult, error) {
676676
errCh := make(chan error, 1)
677677
prChan := make(chan *ipnstate.PingResult, 1)
678-
go c.wireguardEngine.Ping(ip, tailcfg.PingDisco, func(pr *ipnstate.PingResult) {
678+
go c.wireguardEngine.Ping(ip, tailcfg.PingTSMP, func(pr *ipnstate.PingResult) {
679679
if pr.Err != "" {
680680
errCh <- xerrors.New(pr.Err)
681681
return

0 commit comments

Comments
 (0)