Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 8529028

Browse files
committed
feat: add debug server for tailnet coordinators
Resolves: #5845
1 parent 08412fd commit 8529028

File tree

7 files changed

+172
-29
lines changed

7 files changed

+172
-29
lines changed

coderd/coderd.go

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -613,6 +613,27 @@ func New(options *Options) *API {
613613
r.Get("/", api.workspaceApplicationAuth)
614614
})
615615
})
616+
617+
r.Route("/debug", func(r chi.Router) {
618+
r.Use(
619+
apiKeyMiddleware,
620+
// Ensure only owners can access debug endpoints.
621+
func(next http.Handler) http.Handler {
622+
return http.HandlerFunc(func(rw http.ResponseWriter, r *http.Request) {
623+
if !api.Authorize(r, rbac.ActionRead, rbac.ResourceDebugInfo) {
624+
httpapi.ResourceNotFound(rw)
625+
return
626+
}
627+
628+
next.ServeHTTP(rw, r)
629+
})
630+
},
631+
)
632+
633+
r.HandleFunc("/coordinator", func(w http.ResponseWriter, r *http.Request) {
634+
(*api.TailnetCoordinator.Load()).ServeHTTPDebug(w, r)
635+
})
636+
})
616637
})
617638

618639
if options.SwaggerEndpoint {

coderd/rbac/object.go

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -150,6 +150,11 @@ var (
150150
ResourceReplicas = Object{
151151
Type: "replicas",
152152
}
153+
154+
// ResourceDebugInfo controls access to the debug routes `/api/v2/debug/*`.
155+
ResourceDebugInfo = Object{
156+
Type: "debug_info",
157+
}
153158
)
154159

155160
// Object is used to create objects for authz checks when you have none in

coderd/workspaceagents.go

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -521,6 +521,16 @@ func (api *API) workspaceAgentCoordinate(rw http.ResponseWriter, r *http.Request
521521
})
522522
return
523523
}
524+
525+
workspace, err := api.Database.GetWorkspaceByID(ctx, build.WorkspaceID)
526+
if err != nil {
527+
httpapi.Write(ctx, rw, http.StatusBadRequest, codersdk.Response{
528+
Message: "Internal error fetching workspace.",
529+
Detail: err.Error(),
530+
})
531+
return
532+
}
533+
524534
// Ensure the resource is still valid!
525535
// We only accept agents for resources on the latest build.
526536
ensureLatestBuild := func() error {
@@ -618,7 +628,7 @@ func (api *API) workspaceAgentCoordinate(rw http.ResponseWriter, r *http.Request
618628
closeChan := make(chan struct{})
619629
go func() {
620630
defer close(closeChan)
621-
err := (*api.TailnetCoordinator.Load()).ServeAgent(wsNetConn, workspaceAgent.ID)
631+
err := (*api.TailnetCoordinator.Load()).ServeAgent(wsNetConn, workspaceAgent.ID, fmt.Sprintf("%s-%s", workspace.Name, workspaceAgent.Name))
622632
if err != nil {
623633
api.Logger.Warn(ctx, "tailnet coordinator agent error", slog.Error(err))
624634
_ = conn.Close(websocket.StatusInternalError, err.Error())

enterprise/tailnet/coordinator.go

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,10 @@ import (
55
"context"
66
"encoding/json"
77
"errors"
8+
"fmt"
89
"io"
910
"net"
11+
"net/http"
1012
"sync"
1113
"time"
1214

@@ -174,7 +176,7 @@ func (c *haCoordinator) handleNextClientMessage(id, agent uuid.UUID, decoder *js
174176

175177
// ServeAgent accepts a WebSocket connection to an agent that listens to
176178
// incoming connections and publishes node updates.
177-
func (c *haCoordinator) ServeAgent(conn net.Conn, id uuid.UUID) error {
179+
func (c *haCoordinator) ServeAgent(conn net.Conn, id uuid.UUID, _ string) error {
178180
// Tell clients on other instances to send a callmemaybe to us.
179181
err := c.publishAgentHello(id)
180182
if err != nil {
@@ -573,3 +575,9 @@ func (c *haCoordinator) formatAgentUpdate(id uuid.UUID, node *agpl.Node) ([]byte
573575

574576
return buf.Bytes(), nil
575577
}
578+
579+
func (*haCoordinator) ServeHTTPDebug(w http.ResponseWriter, _ *http.Request) {
580+
w.Header().Set("Content-Type", "text/html; charset=utf-8")
581+
fmt.Fprintf(w, "<h1>coordinator</h1>")
582+
fmt.Fprintf(w, "<h2>ha debug coming soon</h2>")
583+
}

enterprise/tailnet/coordinator_test.go

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,7 @@ func TestCoordinatorSingle(t *testing.T) {
6060
id := uuid.New()
6161
closeChan := make(chan struct{})
6262
go func() {
63-
err := coordinator.ServeAgent(server, id)
63+
err := coordinator.ServeAgent(server, id, "")
6464
assert.NoError(t, err)
6565
close(closeChan)
6666
}()
@@ -91,7 +91,7 @@ func TestCoordinatorSingle(t *testing.T) {
9191
agentID := uuid.New()
9292
closeAgentChan := make(chan struct{})
9393
go func() {
94-
err := coordinator.ServeAgent(agentServerWS, agentID)
94+
err := coordinator.ServeAgent(agentServerWS, agentID, "")
9595
assert.NoError(t, err)
9696
close(closeAgentChan)
9797
}()
@@ -142,7 +142,7 @@ func TestCoordinatorSingle(t *testing.T) {
142142
})
143143
closeAgentChan = make(chan struct{})
144144
go func() {
145-
err := coordinator.ServeAgent(agentServerWS, agentID)
145+
err := coordinator.ServeAgent(agentServerWS, agentID, "")
146146
assert.NoError(t, err)
147147
close(closeAgentChan)
148148
}()
@@ -184,7 +184,7 @@ func TestCoordinatorHA(t *testing.T) {
184184
agentID := uuid.New()
185185
closeAgentChan := make(chan struct{})
186186
go func() {
187-
err := coordinator1.ServeAgent(agentServerWS, agentID)
187+
err := coordinator1.ServeAgent(agentServerWS, agentID, "")
188188
assert.NoError(t, err)
189189
close(closeAgentChan)
190190
}()
@@ -240,7 +240,7 @@ func TestCoordinatorHA(t *testing.T) {
240240
})
241241
closeAgentChan = make(chan struct{})
242242
go func() {
243-
err := coordinator1.ServeAgent(agentServerWS, agentID)
243+
err := coordinator1.ServeAgent(agentServerWS, agentID, "")
244244
assert.NoError(t, err)
245245
close(closeAgentChan)
246246
}()

tailnet/coordinator.go

Lines changed: 116 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -4,10 +4,13 @@ import (
44
"context"
55
"encoding/json"
66
"errors"
7+
"fmt"
78
"io"
89
"net"
10+
"net/http"
911
"net/netip"
1012
"sync"
13+
"sync/atomic"
1114
"time"
1215

1316
"github.com/google/uuid"
@@ -22,14 +25,17 @@ import (
2225
// └──────────────────┘ └────────────────────┘ └───────────────────┘ └──────────────────┘
2326
// Coordinators have different guarantees for HA support.
2427
type Coordinator interface {
28+
// ServeHTTPDebug
29+
ServeHTTPDebug(w http.ResponseWriter, r *http.Request)
2530
// Node returns an in-memory node by ID.
2631
Node(id uuid.UUID) *Node
2732
// ServeClient accepts a WebSocket connection that wants to connect to an agent
2833
// with the specified ID.
2934
ServeClient(conn net.Conn, id uuid.UUID, agent uuid.UUID) error
3035
// ServeAgent accepts a WebSocket connection to an agent that listens to
3136
// incoming connections and publishes node updates.
32-
ServeAgent(conn net.Conn, id uuid.UUID) error
37+
// Name is just used for debug information. It can be left blank.
38+
ServeAgent(conn net.Conn, id uuid.UUID, name string) error
3339
// Close closes the coordinator.
3440
Close() error
3541
}
@@ -104,8 +110,8 @@ func NewCoordinator() Coordinator {
104110
return &coordinator{
105111
closed: false,
106112
nodes: map[uuid.UUID]*Node{},
107-
agentSockets: map[uuid.UUID]idConn{},
108-
agentToConnectionSockets: map[uuid.UUID]map[uuid.UUID]net.Conn{},
113+
agentSockets: map[uuid.UUID]*trackedConn{},
114+
agentToConnectionSockets: map[uuid.UUID]map[uuid.UUID]*trackedConn{},
109115
}
110116
}
111117

@@ -117,23 +123,34 @@ func NewCoordinator() Coordinator {
117123
// This coordinator is incompatible with multiple Coder
118124
// replicas as all node data is in-memory.
119125
type coordinator struct {
120-
mutex sync.Mutex
126+
mutex sync.RWMutex
121127
closed bool
122128

123129
// nodes maps agent and connection IDs their respective node.
124130
nodes map[uuid.UUID]*Node
125131
// agentSockets maps agent IDs to their open websocket.
126-
agentSockets map[uuid.UUID]idConn
132+
agentSockets map[uuid.UUID]*trackedConn
127133
// agentToConnectionSockets maps agent IDs to connection IDs of conns that
128134
// are subscribed to updates for that agent.
129-
agentToConnectionSockets map[uuid.UUID]map[uuid.UUID]net.Conn
135+
agentToConnectionSockets map[uuid.UUID]map[uuid.UUID]*trackedConn
130136
}
131137

132-
type idConn struct {
138+
type trackedConn struct {
139+
net.Conn
140+
133141
// id is an ephemeral UUID used to uniquely identify the owner of the
134142
// connection.
135-
id uuid.UUID
136-
conn net.Conn
143+
id uuid.UUID
144+
145+
name string
146+
start int64
147+
lastWrite int64
148+
overwrites int64
149+
}
150+
151+
func (t *trackedConn) Write(b []byte) (n int, err error) {
152+
atomic.StoreInt64(&t.lastWrite, time.Now().Unix())
153+
return t.Conn.Write(b)
137154
}
138155

139156
// Node returns an in-memory node by ID.
@@ -182,12 +199,18 @@ func (c *coordinator) ServeClient(conn net.Conn, id uuid.UUID, agent uuid.UUID)
182199
c.mutex.Lock()
183200
connectionSockets, ok := c.agentToConnectionSockets[agent]
184201
if !ok {
185-
connectionSockets = map[uuid.UUID]net.Conn{}
202+
connectionSockets = map[uuid.UUID]*trackedConn{}
186203
c.agentToConnectionSockets[agent] = connectionSockets
187204
}
205+
206+
now := time.Now().Unix()
188207
// Insert this connection into a map so the agent
189208
// can publish node updates.
190-
connectionSockets[id] = conn
209+
connectionSockets[id] = &trackedConn{
210+
Conn: conn,
211+
start: now,
212+
lastWrite: now,
213+
}
191214
c.mutex.Unlock()
192215
defer func() {
193216
c.mutex.Lock()
@@ -243,7 +266,7 @@ func (c *coordinator) handleNextClientMessage(id, agent uuid.UUID, decoder *json
243266
return xerrors.Errorf("marshal nodes: %w", err)
244267
}
245268

246-
_, err = agentSocket.conn.Write(data)
269+
_, err = agentSocket.Write(data)
247270
if err != nil {
248271
if errors.Is(err, io.EOF) || errors.Is(err, io.ErrClosedPipe) || errors.Is(err, context.Canceled) {
249272
return nil
@@ -256,7 +279,7 @@ func (c *coordinator) handleNextClientMessage(id, agent uuid.UUID, decoder *json
256279

257280
// ServeAgent accepts a WebSocket connection to an agent that
258281
// listens to incoming connections and publishes node updates.
259-
func (c *coordinator) ServeAgent(conn net.Conn, id uuid.UUID) error {
282+
func (c *coordinator) ServeAgent(conn net.Conn, id uuid.UUID, name string) error {
260283
c.mutex.Lock()
261284
if c.closed {
262285
c.mutex.Unlock()
@@ -289,6 +312,8 @@ func (c *coordinator) ServeAgent(conn net.Conn, id uuid.UUID) error {
289312

290313
// This uniquely identifies a connection that belongs to this goroutine.
291314
unique := uuid.New()
315+
now := time.Now().Unix()
316+
overwrites := int64(0)
292317

293318
// If an old agent socket is connected, we close it to avoid any leaks. This
294319
// shouldn't ever occur because we expect one agent to be running, but it's
@@ -297,11 +322,17 @@ func (c *coordinator) ServeAgent(conn net.Conn, id uuid.UUID) error {
297322
// dead.
298323
oldAgentSocket, ok := c.agentSockets[id]
299324
if ok {
300-
_ = oldAgentSocket.conn.Close()
325+
overwrites = oldAgentSocket.overwrites + 1
326+
_ = oldAgentSocket.Close()
301327
}
302-
c.agentSockets[id] = idConn{
328+
c.agentSockets[id] = &trackedConn{
303329
id: unique,
304-
conn: conn,
330+
Conn: conn,
331+
332+
name: name,
333+
start: now,
334+
lastWrite: now,
335+
overwrites: overwrites,
305336
}
306337

307338
c.mutex.Unlock()
@@ -382,7 +413,7 @@ func (c *coordinator) Close() error {
382413
for _, socket := range c.agentSockets {
383414
socket := socket
384415
go func() {
385-
_ = socket.conn.Close()
416+
_ = socket.Close()
386417
wg.Done()
387418
}()
388419
}
@@ -403,3 +434,71 @@ func (c *coordinator) Close() error {
403434
wg.Wait()
404435
return nil
405436
}
437+
438+
func (c *coordinator) ServeHTTPDebug(w http.ResponseWriter, _ *http.Request) {
439+
w.Header().Set("Content-Type", "text/html; charset=utf-8")
440+
now := time.Now()
441+
442+
c.mutex.RLock()
443+
defer c.mutex.RUnlock()
444+
445+
fmt.Fprintln(w, "<h1>in-memory wireguard coordinator debug</h1>")
446+
fmt.Fprintf(w, "<h2 id=agents><a href=#agents>#</a> agents: total %d</h2>\n", len(c.agentSockets))
447+
fmt.Fprintln(w, "<ul>")
448+
for id, conn := range c.agentSockets {
449+
fmt.Fprintf(w, "<li><b>%s</b> (%s): created %v ago, write %v ago, overwrites %d </li>\n",
450+
conn.name,
451+
id.String(),
452+
now.Sub(time.Unix(conn.start, 0)).Round(time.Second),
453+
now.Sub(time.Unix(conn.lastWrite, 0)).Round(time.Second),
454+
conn.overwrites,
455+
)
456+
457+
if connCount := len(c.agentToConnectionSockets[id]); connCount > 0 {
458+
fmt.Fprintf(w, "<h3>connections: total %d</h3>\n", connCount)
459+
fmt.Fprintln(w, "<ul>")
460+
for id, conn := range c.agentToConnectionSockets[id] {
461+
fmt.Fprintf(w, "<li><b>%s</b> (%s): created %v ago, write %v ago </li>\n",
462+
conn.name,
463+
id.String(),
464+
now.Sub(time.Unix(conn.start, 0)).Round(time.Second),
465+
now.Sub(time.Unix(conn.lastWrite, 0)).Round(time.Second),
466+
)
467+
}
468+
fmt.Fprintln(w, "</ul>")
469+
}
470+
}
471+
fmt.Fprintln(w, "</ul>")
472+
473+
missingAgents := map[uuid.UUID]map[uuid.UUID]*trackedConn{}
474+
for agentID, conns := range c.agentToConnectionSockets {
475+
if len(conns) == 0 {
476+
continue
477+
}
478+
479+
if _, ok := c.agentSockets[agentID]; !ok {
480+
missingAgents[agentID] = conns
481+
}
482+
}
483+
484+
fmt.Fprintf(w, "<h2 id=missing-agents><a href=#missing-agents>#</a> missing agents: total %d</h2>\n", len(missingAgents))
485+
fmt.Fprintln(w, "<ul>")
486+
for agentID, conns := range missingAgents {
487+
fmt.Fprintf(w, "<li><b>unknown</b> (%s): created ? ago, write ? ago, overwrites ? </li>\n",
488+
agentID.String(),
489+
)
490+
491+
fmt.Fprintf(w, "<h3>connections: total %d</h3>\n", len(conns))
492+
fmt.Fprintln(w, "<ul>")
493+
for id, conn := range conns {
494+
fmt.Fprintf(w, "<li><b>%s</b> (%s): created %v ago, write %v ago </li>\n",
495+
conn.name,
496+
id.String(),
497+
now.Sub(time.Unix(conn.start, 0)).Round(time.Second),
498+
now.Sub(time.Unix(conn.lastWrite, 0)).Round(time.Second),
499+
)
500+
}
501+
fmt.Fprintln(w, "</ul>")
502+
}
503+
fmt.Fprintln(w, "</ul>")
504+
}

0 commit comments

Comments
 (0)