@@ -4,10 +4,13 @@ import (
4
4
"context"
5
5
"encoding/json"
6
6
"errors"
7
+ "fmt"
7
8
"io"
8
9
"net"
10
+ "net/http"
9
11
"net/netip"
10
12
"sync"
13
+ "sync/atomic"
11
14
"time"
12
15
13
16
"github.com/google/uuid"
@@ -22,14 +25,17 @@ import (
22
25
// └──────────────────┘ └────────────────────┘ └───────────────────┘ └──────────────────┘
23
26
// Coordinators have different guarantees for HA support.
24
27
type Coordinator interface {
28
+ // ServeHTTPDebug
29
+ ServeHTTPDebug (w http.ResponseWriter , r * http.Request )
25
30
// Node returns an in-memory node by ID.
26
31
Node (id uuid.UUID ) * Node
27
32
// ServeClient accepts a WebSocket connection that wants to connect to an agent
28
33
// with the specified ID.
29
34
ServeClient (conn net.Conn , id uuid.UUID , agent uuid.UUID ) error
30
35
// ServeAgent accepts a WebSocket connection to an agent that listens to
31
36
// incoming connections and publishes node updates.
32
- ServeAgent (conn net.Conn , id uuid.UUID ) error
37
+ // Name is just used for debug information. It can be left blank.
38
+ ServeAgent (conn net.Conn , id uuid.UUID , name string ) error
33
39
// Close closes the coordinator.
34
40
Close () error
35
41
}
@@ -104,8 +110,8 @@ func NewCoordinator() Coordinator {
104
110
return & coordinator {
105
111
closed : false ,
106
112
nodes : map [uuid.UUID ]* Node {},
107
- agentSockets : map [uuid.UUID ]idConn {},
108
- agentToConnectionSockets : map [uuid.UUID ]map [uuid.UUID ]net. Conn {},
113
+ agentSockets : map [uuid.UUID ]* trackedConn {},
114
+ agentToConnectionSockets : map [uuid.UUID ]map [uuid.UUID ]* trackedConn {},
109
115
}
110
116
}
111
117
@@ -117,23 +123,34 @@ func NewCoordinator() Coordinator {
117
123
// This coordinator is incompatible with multiple Coder
118
124
// replicas as all node data is in-memory.
119
125
type coordinator struct {
120
- mutex sync.Mutex
126
+ mutex sync.RWMutex
121
127
closed bool
122
128
123
129
// nodes maps agent and connection IDs their respective node.
124
130
nodes map [uuid.UUID ]* Node
125
131
// agentSockets maps agent IDs to their open websocket.
126
- agentSockets map [uuid.UUID ]idConn
132
+ agentSockets map [uuid.UUID ]* trackedConn
127
133
// agentToConnectionSockets maps agent IDs to connection IDs of conns that
128
134
// are subscribed to updates for that agent.
129
- agentToConnectionSockets map [uuid.UUID ]map [uuid.UUID ]net. Conn
135
+ agentToConnectionSockets map [uuid.UUID ]map [uuid.UUID ]* trackedConn
130
136
}
131
137
132
- type idConn struct {
138
+ type trackedConn struct {
139
+ net.Conn
140
+
133
141
// id is an ephemeral UUID used to uniquely identify the owner of the
134
142
// connection.
135
- id uuid.UUID
136
- conn net.Conn
143
+ id uuid.UUID
144
+
145
+ name string
146
+ start int64
147
+ lastWrite int64
148
+ overwrites int64
149
+ }
150
+
151
+ func (t * trackedConn ) Write (b []byte ) (n int , err error ) {
152
+ atomic .StoreInt64 (& t .lastWrite , time .Now ().Unix ())
153
+ return t .Conn .Write (b )
137
154
}
138
155
139
156
// Node returns an in-memory node by ID.
@@ -182,12 +199,18 @@ func (c *coordinator) ServeClient(conn net.Conn, id uuid.UUID, agent uuid.UUID)
182
199
c .mutex .Lock ()
183
200
connectionSockets , ok := c .agentToConnectionSockets [agent ]
184
201
if ! ok {
185
- connectionSockets = map [uuid.UUID ]net. Conn {}
202
+ connectionSockets = map [uuid.UUID ]* trackedConn {}
186
203
c .agentToConnectionSockets [agent ] = connectionSockets
187
204
}
205
+
206
+ now := time .Now ().Unix ()
188
207
// Insert this connection into a map so the agent
189
208
// can publish node updates.
190
- connectionSockets [id ] = conn
209
+ connectionSockets [id ] = & trackedConn {
210
+ Conn : conn ,
211
+ start : now ,
212
+ lastWrite : now ,
213
+ }
191
214
c .mutex .Unlock ()
192
215
defer func () {
193
216
c .mutex .Lock ()
@@ -243,7 +266,7 @@ func (c *coordinator) handleNextClientMessage(id, agent uuid.UUID, decoder *json
243
266
return xerrors .Errorf ("marshal nodes: %w" , err )
244
267
}
245
268
246
- _ , err = agentSocket .conn . Write (data )
269
+ _ , err = agentSocket .Write (data )
247
270
if err != nil {
248
271
if errors .Is (err , io .EOF ) || errors .Is (err , io .ErrClosedPipe ) || errors .Is (err , context .Canceled ) {
249
272
return nil
@@ -256,7 +279,7 @@ func (c *coordinator) handleNextClientMessage(id, agent uuid.UUID, decoder *json
256
279
257
280
// ServeAgent accepts a WebSocket connection to an agent that
258
281
// listens to incoming connections and publishes node updates.
259
- func (c * coordinator ) ServeAgent (conn net.Conn , id uuid.UUID ) error {
282
+ func (c * coordinator ) ServeAgent (conn net.Conn , id uuid.UUID , name string ) error {
260
283
c .mutex .Lock ()
261
284
if c .closed {
262
285
c .mutex .Unlock ()
@@ -289,6 +312,8 @@ func (c *coordinator) ServeAgent(conn net.Conn, id uuid.UUID) error {
289
312
290
313
// This uniquely identifies a connection that belongs to this goroutine.
291
314
unique := uuid .New ()
315
+ now := time .Now ().Unix ()
316
+ overwrites := int64 (0 )
292
317
293
318
// If an old agent socket is connected, we close it to avoid any leaks. This
294
319
// shouldn't ever occur because we expect one agent to be running, but it's
@@ -297,11 +322,17 @@ func (c *coordinator) ServeAgent(conn net.Conn, id uuid.UUID) error {
297
322
// dead.
298
323
oldAgentSocket , ok := c .agentSockets [id ]
299
324
if ok {
300
- _ = oldAgentSocket .conn .Close ()
325
+ overwrites = oldAgentSocket .overwrites + 1
326
+ _ = oldAgentSocket .Close ()
301
327
}
302
- c .agentSockets [id ] = idConn {
328
+ c .agentSockets [id ] = & trackedConn {
303
329
id : unique ,
304
- conn : conn ,
330
+ Conn : conn ,
331
+
332
+ name : name ,
333
+ start : now ,
334
+ lastWrite : now ,
335
+ overwrites : overwrites ,
305
336
}
306
337
307
338
c .mutex .Unlock ()
@@ -382,7 +413,7 @@ func (c *coordinator) Close() error {
382
413
for _ , socket := range c .agentSockets {
383
414
socket := socket
384
415
go func () {
385
- _ = socket .conn . Close ()
416
+ _ = socket .Close ()
386
417
wg .Done ()
387
418
}()
388
419
}
@@ -403,3 +434,71 @@ func (c *coordinator) Close() error {
403
434
wg .Wait ()
404
435
return nil
405
436
}
437
+
438
+ func (c * coordinator ) ServeHTTPDebug (w http.ResponseWriter , _ * http.Request ) {
439
+ w .Header ().Set ("Content-Type" , "text/html; charset=utf-8" )
440
+ now := time .Now ()
441
+
442
+ c .mutex .RLock ()
443
+ defer c .mutex .RUnlock ()
444
+
445
+ fmt .Fprintln (w , "<h1>in-memory wireguard coordinator debug</h1>" )
446
+ fmt .Fprintf (w , "<h2 id=agents><a href=#agents>#</a> agents: total %d</h2>\n " , len (c .agentSockets ))
447
+ fmt .Fprintln (w , "<ul>" )
448
+ for id , conn := range c .agentSockets {
449
+ fmt .Fprintf (w , "<li><b>%s</b> (%s): created %v ago, write %v ago, overwrites %d </li>\n " ,
450
+ conn .name ,
451
+ id .String (),
452
+ now .Sub (time .Unix (conn .start , 0 )).Round (time .Second ),
453
+ now .Sub (time .Unix (conn .lastWrite , 0 )).Round (time .Second ),
454
+ conn .overwrites ,
455
+ )
456
+
457
+ if connCount := len (c .agentToConnectionSockets [id ]); connCount > 0 {
458
+ fmt .Fprintf (w , "<h3>connections: total %d</h3>\n " , connCount )
459
+ fmt .Fprintln (w , "<ul>" )
460
+ for id , conn := range c .agentToConnectionSockets [id ] {
461
+ fmt .Fprintf (w , "<li><b>%s</b> (%s): created %v ago, write %v ago </li>\n " ,
462
+ conn .name ,
463
+ id .String (),
464
+ now .Sub (time .Unix (conn .start , 0 )).Round (time .Second ),
465
+ now .Sub (time .Unix (conn .lastWrite , 0 )).Round (time .Second ),
466
+ )
467
+ }
468
+ fmt .Fprintln (w , "</ul>" )
469
+ }
470
+ }
471
+ fmt .Fprintln (w , "</ul>" )
472
+
473
+ missingAgents := map [uuid.UUID ]map [uuid.UUID ]* trackedConn {}
474
+ for agentID , conns := range c .agentToConnectionSockets {
475
+ if len (conns ) == 0 {
476
+ continue
477
+ }
478
+
479
+ if _ , ok := c .agentSockets [agentID ]; ! ok {
480
+ missingAgents [agentID ] = conns
481
+ }
482
+ }
483
+
484
+ fmt .Fprintf (w , "<h2 id=missing-agents><a href=#missing-agents>#</a> missing agents: total %d</h2>\n " , len (missingAgents ))
485
+ fmt .Fprintln (w , "<ul>" )
486
+ for agentID , conns := range missingAgents {
487
+ fmt .Fprintf (w , "<li><b>unknown</b> (%s): created ? ago, write ? ago, overwrites ? </li>\n " ,
488
+ agentID .String (),
489
+ )
490
+
491
+ fmt .Fprintf (w , "<h3>connections: total %d</h3>\n " , len (conns ))
492
+ fmt .Fprintln (w , "<ul>" )
493
+ for id , conn := range conns {
494
+ fmt .Fprintf (w , "<li><b>%s</b> (%s): created %v ago, write %v ago </li>\n " ,
495
+ conn .name ,
496
+ id .String (),
497
+ now .Sub (time .Unix (conn .start , 0 )).Round (time .Second ),
498
+ now .Sub (time .Unix (conn .lastWrite , 0 )).Round (time .Second ),
499
+ )
500
+ }
501
+ fmt .Fprintln (w , "</ul>" )
502
+ }
503
+ fmt .Fprintln (w , "</ul>" )
504
+ }
0 commit comments