Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit ae1be27

Browse files
authored
fix: set node callback each time we reinit the coordinator in servertailnet (coder#12140) (coder#12150)
I think this will resolve coder#12136 but lets get a proper test at the system level before closing. Before this change, we only register the node callback at start of day for the server tailnet. If the coordinator changes, like we know happens when we are licensed for the PGCoordinator, we close the connection to the old coord, and open a new one to the new coord. The callback is designed to direct the updates to the new coordinator, but there is nothing that specifically triggers it to fire after we connect to the new coordinator. If we have STUN, then period re-STUNs will generally get it to fire eventually, but without STUN it we could go indefinitely without a callback. This PR changes the servertailnet to re-register the callback each time we reconnect to the coordinator. Registering a callback (even if it's the same callback) triggers an immediate call with our node information, so the new coordinator will have it.
1 parent c4a01a4 commit ae1be27

File tree

2 files changed

+19
-22
lines changed

2 files changed

+19
-22
lines changed

coderd/tailnet.go

+17-22
Original file line numberDiff line numberDiff line change
@@ -136,28 +136,8 @@ func NewServerTailnet(
136136
return nil, xerrors.Errorf("get initial multi agent: %w", err)
137137
}
138138
tn.agentConn.Store(&agentConn)
139-
140-
pn, err := tailnet.NodeToProto(conn.Node())
141-
if err != nil {
142-
tn.logger.Critical(context.Background(), "failed to convert node", slog.Error(err))
143-
} else {
144-
err = tn.getAgentConn().UpdateSelf(pn)
145-
if err != nil {
146-
tn.logger.Warn(context.Background(), "server tailnet update self", slog.Error(err))
147-
}
148-
}
149-
150-
conn.SetNodeCallback(func(node *tailnet.Node) {
151-
pn, err := tailnet.NodeToProto(node)
152-
if err != nil {
153-
tn.logger.Critical(context.Background(), "failed to convert node", slog.Error(err))
154-
return
155-
}
156-
err = tn.getAgentConn().UpdateSelf(pn)
157-
if err != nil {
158-
tn.logger.Warn(context.Background(), "broadcast server node to agents", slog.Error(err))
159-
}
160-
})
139+
// registering the callback also triggers send of the initial node
140+
tn.coordinatee.SetNodeCallback(tn.nodeCallback)
161141

162142
// This is set to allow local DERP traffic to be proxied through memory
163143
// instead of needing to hit the external access URL. Don't use the ctx
@@ -183,6 +163,18 @@ func NewServerTailnet(
183163
return tn, nil
184164
}
185165

166+
func (s *ServerTailnet) nodeCallback(node *tailnet.Node) {
167+
pn, err := tailnet.NodeToProto(node)
168+
if err != nil {
169+
s.logger.Critical(context.Background(), "failed to convert node", slog.Error(err))
170+
return
171+
}
172+
err = s.getAgentConn().UpdateSelf(pn)
173+
if err != nil {
174+
s.logger.Warn(context.Background(), "broadcast server node to agents", slog.Error(err))
175+
}
176+
}
177+
186178
func (s *ServerTailnet) Describe(descs chan<- *prometheus.Desc) {
187179
s.connsPerAgent.Describe(descs)
188180
s.totalConns.Describe(descs)
@@ -285,6 +277,9 @@ func (s *ServerTailnet) reinitCoordinator() {
285277
continue
286278
}
287279
s.agentConn.Store(&agentConn)
280+
// reset the Node callback, which triggers the conn to send the node immediately, and also
281+
// register for updates
282+
s.coordinatee.SetNodeCallback(s.nodeCallback)
288283

289284
// Resubscribe to all of the agents we're tracking.
290285
for agentID := range s.agentConnectionTimes {

coderd/tailnet_internal_test.go

+2
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@ func TestServerTailnet_Reconnect(t *testing.T) {
4848
agentConnectionTimes: make(map[uuid.UUID]time.Time),
4949
}
5050
// reinit the Coordinator once, to load mMultiAgent0
51+
mCoord.EXPECT().SetNodeCallback(gomock.Any()).Times(1)
5152
uut.reinitCoordinator()
5253

5354
mMultiAgent0.EXPECT().NextUpdate(gomock.Any()).
@@ -57,6 +58,7 @@ func TestServerTailnet_Reconnect(t *testing.T) {
5758
Times(1).
5859
Return(true) // this triggers reconnect
5960
setLost := mCoord.EXPECT().SetAllPeersLost().Times(1).After(closed0)
61+
mCoord.EXPECT().SetNodeCallback(gomock.Any()).Times(1).After(closed0)
6062
mMultiAgent1.EXPECT().NextUpdate(gomock.Any()).
6163
Times(1).
6264
After(setLost).

0 commit comments

Comments
 (0)