Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 05b6c81

Browse files
committed
feat: changes codersdk to use tailnet v2 for DERPMap updates
1 parent d6ba0df commit 05b6c81

File tree

1 file changed

+188
-136
lines changed

1 file changed

+188
-136
lines changed

codersdk/workspaceagents.go

+188-136
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,8 @@ import (
1414
"strings"
1515
"time"
1616

17+
"golang.org/x/sync/errgroup"
18+
1719
"github.com/google/uuid"
1820
"golang.org/x/xerrors"
1921
"nhooyr.io/websocket"
@@ -317,142 +319,28 @@ func (c *Client) DialWorkspaceAgent(dialCtx context.Context, agentID uuid.UUID,
317319
q := coordinateURL.Query()
318320
q.Add("version", proto.CurrentVersion.String())
319321
coordinateURL.RawQuery = q.Encode()
320-
closedCoordinator := make(chan struct{})
321-
// Must only ever be used once, send error OR close to avoid
322-
// reassignment race. Buffered so we don't hang in goroutine.
323-
firstCoordinator := make(chan error, 1)
324-
go func() {
325-
defer close(closedCoordinator)
326-
isFirst := true
327-
for retrier := retry.New(50*time.Millisecond, 10*time.Second); retrier.Wait(ctx); {
328-
options.Logger.Debug(ctx, "connecting")
329-
// nolint:bodyclose
330-
ws, res, err := websocket.Dial(ctx, coordinateURL.String(), &websocket.DialOptions{
331-
HTTPClient: c.HTTPClient,
332-
HTTPHeader: headers,
333-
// Need to disable compression to avoid a data-race.
334-
CompressionMode: websocket.CompressionDisabled,
335-
})
336-
if isFirst {
337-
if res != nil && res.StatusCode == http.StatusConflict {
338-
firstCoordinator <- ReadBodyAsError(res)
339-
return
340-
}
341-
isFirst = false
342-
close(firstCoordinator)
343-
}
344-
if err != nil {
345-
if errors.Is(err, context.Canceled) {
346-
return
347-
}
348-
options.Logger.Debug(ctx, "failed to dial", slog.Error(err))
349-
continue
350-
}
351-
client, err := tailnet.NewDRPCClient(websocket.NetConn(ctx, ws, websocket.MessageBinary))
352-
if err != nil {
353-
options.Logger.Debug(ctx, "failed to create DRPCClient", slog.Error(err))
354-
_ = ws.Close(websocket.StatusInternalError, "")
355-
continue
356-
}
357-
coordinate, err := client.Coordinate(ctx)
358-
if err != nil {
359-
options.Logger.Debug(ctx, "failed to reach the Coordinate endpoint", slog.Error(err))
360-
_ = ws.Close(websocket.StatusInternalError, "")
361-
continue
362-
}
363-
364-
coordination := tailnet.NewRemoteCoordination(options.Logger, coordinate, conn, agentID)
365-
options.Logger.Debug(ctx, "serving coordinator")
366-
err = <-coordination.Error()
367-
if errors.Is(err, context.Canceled) {
368-
_ = ws.Close(websocket.StatusGoingAway, "")
369-
return
370-
}
371-
if err != nil {
372-
options.Logger.Debug(ctx, "error serving coordinator", slog.Error(err))
373-
_ = ws.Close(websocket.StatusGoingAway, "")
374-
continue
375-
}
376-
_ = ws.Close(websocket.StatusGoingAway, "")
377-
}
378-
}()
379322

380-
derpMapURL, err := c.URL.Parse("/api/v2/derp-map")
381-
if err != nil {
382-
return nil, xerrors.Errorf("parse url: %w", err)
383-
}
384-
closedDerpMap := make(chan struct{})
385-
// Must only ever be used once, send error OR close to avoid
386-
// reassignment race. Buffered so we don't hang in goroutine.
387-
firstDerpMap := make(chan error, 1)
388-
go func() {
389-
defer close(closedDerpMap)
390-
isFirst := true
391-
for retrier := retry.New(50*time.Millisecond, 10*time.Second); retrier.Wait(ctx); {
392-
options.Logger.Debug(ctx, "connecting to server for derp map updates")
393-
// nolint:bodyclose
394-
ws, res, err := websocket.Dial(ctx, derpMapURL.String(), &websocket.DialOptions{
395-
HTTPClient: c.HTTPClient,
396-
HTTPHeader: headers,
397-
// Need to disable compression to avoid a data-race.
398-
CompressionMode: websocket.CompressionDisabled,
399-
})
400-
if isFirst {
401-
if res != nil && res.StatusCode == http.StatusConflict {
402-
firstDerpMap <- ReadBodyAsError(res)
403-
return
404-
}
405-
isFirst = false
406-
close(firstDerpMap)
407-
}
408-
if err != nil {
409-
if errors.Is(err, context.Canceled) {
410-
return
411-
}
412-
options.Logger.Debug(ctx, "failed to dial", slog.Error(err))
413-
continue
414-
}
415-
416-
var (
417-
nconn = websocket.NetConn(ctx, ws, websocket.MessageBinary)
418-
dec = json.NewDecoder(nconn)
419-
)
420-
for {
421-
var derpMap tailcfg.DERPMap
422-
err := dec.Decode(&derpMap)
423-
if xerrors.Is(err, context.Canceled) {
424-
_ = ws.Close(websocket.StatusGoingAway, "")
425-
return
426-
}
427-
if err != nil {
428-
options.Logger.Debug(ctx, "failed to decode derp map", slog.Error(err))
429-
_ = ws.Close(websocket.StatusGoingAway, "")
430-
return
431-
}
432-
433-
if !tailnet.CompareDERPMaps(conn.DERPMap(), &derpMap) {
434-
options.Logger.Debug(ctx, "updating derp map due to detected changes")
435-
conn.SetDERPMap(&derpMap)
436-
}
437-
}
438-
}
439-
}()
440-
441-
for firstCoordinator != nil || firstDerpMap != nil {
442-
select {
443-
case <-dialCtx.Done():
444-
return nil, xerrors.Errorf("timed out waiting for coordinator and derp map: %w", dialCtx.Err())
445-
case err = <-firstCoordinator:
446-
if err != nil {
447-
return nil, xerrors.Errorf("start coordinator: %w", err)
448-
}
449-
firstCoordinator = nil
450-
case err = <-firstDerpMap:
451-
if err != nil {
452-
return nil, xerrors.Errorf("receive derp map: %w", err)
453-
}
454-
firstDerpMap = nil
323+
connector := runTailnetAPIConnector(ctx, options.Logger,
324+
agentID, coordinateURL.String(),
325+
&websocket.DialOptions{
326+
HTTPClient: c.HTTPClient,
327+
HTTPHeader: headers,
328+
// Need to disable compression to avoid a data-race.
329+
CompressionMode: websocket.CompressionDisabled,
330+
},
331+
conn,
332+
)
333+
options.Logger.Debug(ctx, "running tailnet API v2+ connector")
334+
335+
select {
336+
case <-dialCtx.Done():
337+
return nil, xerrors.Errorf("timed out waiting for coordinator and derp map: %w", dialCtx.Err())
338+
case err = <-connector.connected:
339+
if err != nil {
340+
options.Logger.Error(ctx, "failed to connect to tailnet v2+ API", slog.Error(err))
341+
return nil, xerrors.Errorf("start connector: %w", err)
455342
}
343+
options.Logger.Debug(ctx, "connected to tailnet v2+ API")
456344
}
457345

458346
agentConn = NewWorkspaceAgentConn(conn, WorkspaceAgentConnOptions{
@@ -464,8 +352,7 @@ func (c *Client) DialWorkspaceAgent(dialCtx context.Context, agentID uuid.UUID,
464352
AgentIP: WorkspaceAgentIP,
465353
CloseFunc: func() error {
466354
cancel()
467-
<-closedCoordinator
468-
<-closedDerpMap
355+
<-connector.closed
469356
return conn.Close()
470357
},
471358
})
@@ -478,6 +365,171 @@ func (c *Client) DialWorkspaceAgent(dialCtx context.Context, agentID uuid.UUID,
478365
return agentConn, nil
479366
}
480367

368+
// tailnetAPIConnector dials the tailnet API (v2+) and then uses the API with a tailnet.Conn to
369+
//
370+
// 1) run the Coordinate API and pass node information back and forth
371+
// 2) stream DERPMap updates and program the Conn
372+
//
373+
// These functions share the same websocket, and so are combined here so that if we hit a problem
374+
// we tear the whole thing down and start over with a new websocket.
375+
//
376+
// @typescript-ignore tailnetAPIConnector
377+
type tailnetAPIConnector struct {
378+
ctx context.Context
379+
logger slog.Logger
380+
381+
agentID uuid.UUID
382+
coordinateURL string
383+
dialOptions *websocket.DialOptions
384+
conn *tailnet.Conn
385+
386+
connected chan error
387+
isFirst bool
388+
closed chan struct{}
389+
}
390+
391+
// runTailnetAPIConnector creates and runs a tailnetAPIConnector
392+
func runTailnetAPIConnector(
393+
ctx context.Context, logger slog.Logger,
394+
agentID uuid.UUID, coordinateURL string, dialOptions *websocket.DialOptions,
395+
conn *tailnet.Conn,
396+
) *tailnetAPIConnector {
397+
tac := &tailnetAPIConnector{
398+
ctx: ctx,
399+
logger: logger,
400+
agentID: agentID,
401+
coordinateURL: coordinateURL,
402+
dialOptions: dialOptions,
403+
conn: conn,
404+
connected: make(chan error, 1),
405+
closed: make(chan struct{}),
406+
}
407+
go tac.run()
408+
return tac
409+
}
410+
411+
func (tac *tailnetAPIConnector) run() {
412+
tac.isFirst = true
413+
defer close(tac.closed)
414+
for retrier := retry.New(50*time.Millisecond, 10*time.Second); retrier.Wait(tac.ctx); {
415+
tailnetClient, err := tac.dial()
416+
if err != nil {
417+
continue
418+
}
419+
tac.logger.Debug(tac.ctx, "obtained tailnet API v2+ client")
420+
tac.coordinateAndDERPMap(tailnetClient)
421+
tac.logger.Debug(tac.ctx, "tailnet API v2+ connection lost")
422+
}
423+
}
424+
425+
func (tac *tailnetAPIConnector) dial() (proto.DRPCTailnetClient, error) {
426+
tac.logger.Debug(tac.ctx, "dialing Coder tailnet v2+ API")
427+
// nolint:bodyclose
428+
ws, res, err := websocket.Dial(tac.ctx, tac.coordinateURL, tac.dialOptions)
429+
if tac.isFirst {
430+
if res != nil && res.StatusCode == http.StatusConflict {
431+
err = ReadBodyAsError(res)
432+
tac.connected <- err
433+
return nil, err
434+
}
435+
tac.isFirst = false
436+
close(tac.connected)
437+
}
438+
if err != nil {
439+
if !errors.Is(err, context.Canceled) {
440+
tac.logger.Error(tac.ctx, "failed to dial tailnet v2+ API", slog.Error(err))
441+
}
442+
return nil, err
443+
}
444+
client, err := tailnet.NewDRPCClient(websocket.NetConn(tac.ctx, ws, websocket.MessageBinary))
445+
if err != nil {
446+
tac.logger.Debug(tac.ctx, "failed to create DRPCClient", slog.Error(err))
447+
_ = ws.Close(websocket.StatusInternalError, "")
448+
return nil, err
449+
}
450+
return client, err
451+
}
452+
453+
// coordinateAndDERPMap uses the provided client to coordinate and stream DERP Maps. It is combined
454+
// into one function so that a problem with one tears down the other and triggers a retry (if
455+
// appropriate). We multiplex both RPCs over the same websocket, so we want them to share the same
456+
// fate.
457+
func (tac *tailnetAPIConnector) coordinateAndDERPMap(client proto.DRPCTailnetClient) {
458+
defer func() {
459+
conn := client.DRPCConn()
460+
closeErr := conn.Close()
461+
if closeErr != nil &&
462+
!xerrors.Is(closeErr, io.EOF) &&
463+
!xerrors.Is(closeErr, context.Canceled) &&
464+
!xerrors.Is(closeErr, context.DeadlineExceeded) {
465+
tac.logger.Error(tac.ctx, "error closing DRPC connection", slog.Error(closeErr))
466+
<-conn.Closed()
467+
}
468+
}()
469+
eg, egCtx := errgroup.WithContext(tac.ctx)
470+
eg.Go(func() error {
471+
return tac.coordinate(egCtx, client)
472+
})
473+
eg.Go(func() error {
474+
return tac.derpMap(egCtx, client)
475+
})
476+
err := eg.Wait()
477+
if err != nil &&
478+
!xerrors.Is(err, io.EOF) &&
479+
!xerrors.Is(err, context.Canceled) &&
480+
!xerrors.Is(err, context.DeadlineExceeded) {
481+
tac.logger.Error(tac.ctx, "error while connected to tailnet v2+ API")
482+
}
483+
}
484+
485+
func (tac *tailnetAPIConnector) coordinate(ctx context.Context, client proto.DRPCTailnetClient) error {
486+
coord, err := client.Coordinate(ctx)
487+
if err != nil {
488+
return xerrors.Errorf("failed to connect to Coordinate RPC: %w", err)
489+
}
490+
defer func() {
491+
cErr := coord.Close()
492+
if cErr != nil {
493+
tac.logger.Debug(ctx, "error closing Coordinate RPC", slog.Error(cErr))
494+
}
495+
}()
496+
coordination := tailnet.NewRemoteCoordination(tac.logger, coord, tac.conn, tac.agentID)
497+
tac.logger.Debug(ctx, "serving coordinator")
498+
err = <-coordination.Error()
499+
if err != nil &&
500+
!xerrors.Is(err, io.EOF) &&
501+
!xerrors.Is(err, context.Canceled) &&
502+
!xerrors.Is(err, context.DeadlineExceeded) {
503+
return xerrors.Errorf("remote coordination error: %w", err)
504+
}
505+
return nil
506+
}
507+
508+
func (tac *tailnetAPIConnector) derpMap(ctx context.Context, client proto.DRPCTailnetClient) error {
509+
s, err := client.StreamDERPMaps(ctx, &proto.StreamDERPMapsRequest{})
510+
if err != nil {
511+
return xerrors.Errorf("failed to connect to StreamDERPMaps RPC: %w", err)
512+
}
513+
defer func() {
514+
cErr := s.Close()
515+
if cErr != nil {
516+
tac.logger.Debug(ctx, "error closing StreamDERPMaps RPC", slog.Error(cErr))
517+
}
518+
}()
519+
for {
520+
dmp, err := s.Recv()
521+
if err != nil {
522+
if xerrors.Is(err, io.EOF) || xerrors.Is(err, context.Canceled) || xerrors.Is(err, context.DeadlineExceeded) {
523+
return nil
524+
}
525+
return xerrors.Errorf("error receiving DERP Map: %w", err)
526+
}
527+
tac.logger.Debug(ctx, "got new DERP Map", slog.F("derp_map", dmp))
528+
dm := tailnet.DERPMapFromProto(dmp)
529+
tac.conn.SetDERPMap(dm)
530+
}
531+
}
532+
481533
// WatchWorkspaceAgentMetadata watches the metadata of a workspace agent.
482534
// The returned channel will be closed when the context is canceled. Exactly
483535
// one error will be sent on the error channel. The metadata channel is never closed.

0 commit comments

Comments
 (0)