From c4fa012fc1ea81770785b3d360ba52ea1341e45e Mon Sep 17 00:00:00 2001 From: Mathias Fredriksson Date: Fri, 24 Nov 2023 19:25:56 +0200 Subject: [PATCH 1/4] fix(codersdk): keep workspace agent connection open after dial context --- coderd/workspaceagents_test.go | 18 ++++++++++++++--- codersdk/workspaceagents.go | 35 +++++++++++++++++++++++----------- 2 files changed, 39 insertions(+), 14 deletions(-) diff --git a/coderd/workspaceagents_test.go b/coderd/workspaceagents_test.go index 3108b0bac880c..8e9a0d65072a4 100644 --- a/coderd/workspaceagents_test.go +++ b/coderd/workspaceagents_test.go @@ -445,13 +445,19 @@ func TestWorkspaceAgentTailnet(t *testing.T) { _ = agenttest.New(t, client.URL, authToken) resources := coderdtest.AwaitWorkspaceAgents(t, client, ws.ID) - ctx, cancelFunc := context.WithCancel(context.Background()) - defer cancelFunc() + ctx, cancel := context.WithTimeout(context.Background(), testutil.WaitLong) + defer cancel() conn, err := client.DialWorkspaceAgent(ctx, resources[0].Agents[0].ID, &codersdk.DialWorkspaceAgentOptions{ Logger: slogtest.Make(t, nil).Named("client").Leveled(slog.LevelDebug), }) require.NoError(t, err) defer conn.Close() + + // Connection should remain open even if the dial context is canceled. + cancel() + ctx, cancel = context.WithTimeout(context.Background(), testutil.WaitLong) + defer cancel() + sshClient, err := conn.SSHClient(ctx) require.NoError(t, err) session, err := sshClient.NewSession() @@ -1416,7 +1422,8 @@ func TestWorkspaceAgent_UpdatedDERP(t *testing.T) { agentID := resources[0].Agents[0].ID // Connect from a client. - ctx := testutil.Context(t, testutil.WaitLong) + ctx, cancel := context.WithTimeout(context.Background(), testutil.WaitLong) + defer cancel() conn1, err := client.DialWorkspaceAgent(ctx, agentID, &codersdk.DialWorkspaceAgentOptions{ Logger: logger.Named("client1"), }) @@ -1425,6 +1432,11 @@ func TestWorkspaceAgent_UpdatedDERP(t *testing.T) { ok := conn1.AwaitReachable(ctx) require.True(t, ok) + // Connection should remain open even if the dial context is canceled. + cancel() + ctx, cancel = context.WithTimeout(context.Background(), testutil.WaitLong) + defer cancel() + // Change the DERP map and change the region ID. newDerpMap, _ := tailnettest.RunDERPAndSTUN(t) require.NotNil(t, newDerpMap) diff --git a/codersdk/workspaceagents.go b/codersdk/workspaceagents.go index e020fd579a417..bb1b39f806e3a 100644 --- a/codersdk/workspaceagents.go +++ b/codersdk/workspaceagents.go @@ -258,12 +258,12 @@ type DialWorkspaceAgentOptions struct { BlockEndpoints bool } -func (c *Client) DialWorkspaceAgent(ctx context.Context, agentID uuid.UUID, options *DialWorkspaceAgentOptions) (agentConn *WorkspaceAgentConn, err error) { +func (c *Client) DialWorkspaceAgent(dialCtx context.Context, agentID uuid.UUID, options *DialWorkspaceAgentOptions) (agentConn *WorkspaceAgentConn, err error) { if options == nil { options = &DialWorkspaceAgentOptions{} } - connInfo, err := c.WorkspaceAgentConnectionInfo(ctx, agentID) + connInfo, err := c.WorkspaceAgentConnectionInfo(dialCtx, agentID) if err != nil { return nil, xerrors.Errorf("get connection info: %w", err) } @@ -302,7 +302,10 @@ func (c *Client) DialWorkspaceAgent(ctx context.Context, agentID uuid.UUID, opti tokenHeader = c.SessionTokenHeader } headers.Set(tokenHeader, c.SessionToken()) - ctx, cancel := context.WithCancel(ctx) + + // New context, separate from dialCtx. We don't want to cancel the + // connection if dialCtx is canceled. + ctx, cancel := context.WithCancel(context.Background()) defer func() { if err != nil { cancel() @@ -317,6 +320,7 @@ func (c *Client) DialWorkspaceAgent(ctx context.Context, agentID uuid.UUID, opti firstCoordinator := make(chan error) go func() { defer close(closedCoordinator) + firstCoordinator := firstCoordinator // Shadowed so it can be reassigned outside goroutine. isFirst := true for retrier := retry.New(50*time.Millisecond, 10*time.Second); retrier.Wait(ctx); { options.Logger.Debug(ctx, "connecting") @@ -369,6 +373,7 @@ func (c *Client) DialWorkspaceAgent(ctx context.Context, agentID uuid.UUID, opti firstDerpMap := make(chan error) go func() { defer close(closedDerpMap) + firstDerpMap := firstDerpMap // Shadowed so it can be reassigned outside goroutine. isFirst := true for retrier := retry.New(50*time.Millisecond, 10*time.Second); retrier.Wait(ctx); { options.Logger.Debug(ctx, "connecting to server for derp map updates") @@ -420,13 +425,21 @@ func (c *Client) DialWorkspaceAgent(ctx context.Context, agentID uuid.UUID, opti } }() - err = <-firstCoordinator - if err != nil { - return nil, err - } - err = <-firstDerpMap - if err != nil { - return nil, err + for firstCoordinator != nil || firstDerpMap != nil { + select { + case <-dialCtx.Done(): + return nil, dialCtx.Err() + case err = <-firstCoordinator: + if err != nil { + return nil, err + } + firstCoordinator = nil + case err = <-firstDerpMap: + if err != nil { + return nil, err + } + firstDerpMap = nil + } } agentConn = NewWorkspaceAgentConn(conn, WorkspaceAgentConnOptions{ @@ -444,7 +457,7 @@ func (c *Client) DialWorkspaceAgent(ctx context.Context, agentID uuid.UUID, opti }, }) - if !agentConn.AwaitReachable(ctx) { + if !agentConn.AwaitReachable(dialCtx) { _ = agentConn.Close() return nil, xerrors.Errorf("timed out waiting for agent to become reachable: %w", ctx.Err()) } From 65abcfa4a01a5b5f4af978a125ff283806f57dd8 Mon Sep 17 00:00:00 2001 From: Mathias Fredriksson Date: Mon, 27 Nov 2023 12:01:04 +0200 Subject: [PATCH 2/4] remove unneded shadow, add buffer and comment --- codersdk/workspaceagents.go | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/codersdk/workspaceagents.go b/codersdk/workspaceagents.go index bb1b39f806e3a..d73eed2378fee 100644 --- a/codersdk/workspaceagents.go +++ b/codersdk/workspaceagents.go @@ -317,10 +317,11 @@ func (c *Client) DialWorkspaceAgent(dialCtx context.Context, agentID uuid.UUID, return nil, xerrors.Errorf("parse url: %w", err) } closedCoordinator := make(chan struct{}) - firstCoordinator := make(chan error) + // Must only ever be used once, send error OR close to avoid + // reassignment race. Buffered so we don't hang in goroutine. + firstCoordinator := make(chan error, 1) go func() { defer close(closedCoordinator) - firstCoordinator := firstCoordinator // Shadowed so it can be reassigned outside goroutine. isFirst := true for retrier := retry.New(50*time.Millisecond, 10*time.Second); retrier.Wait(ctx); { options.Logger.Debug(ctx, "connecting") @@ -370,10 +371,11 @@ func (c *Client) DialWorkspaceAgent(dialCtx context.Context, agentID uuid.UUID, return nil, xerrors.Errorf("parse url: %w", err) } closedDerpMap := make(chan struct{}) - firstDerpMap := make(chan error) + // Must only ever be used once, send error OR close to avoid + // reassignment race. Buffered so we don't hang in goroutine. + firstDerpMap := make(chan error, 1) go func() { defer close(closedDerpMap) - firstDerpMap := firstDerpMap // Shadowed so it can be reassigned outside goroutine. isFirst := true for retrier := retry.New(50*time.Millisecond, 10*time.Second); retrier.Wait(ctx); { options.Logger.Debug(ctx, "connecting to server for derp map updates") From f32848e14c2331e50bfe0a75f487d50de2c89658 Mon Sep 17 00:00:00 2001 From: Mathias Fredriksson Date: Mon, 27 Nov 2023 12:18:12 +0200 Subject: [PATCH 3/4] improve error message --- codersdk/workspaceagents.go | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/codersdk/workspaceagents.go b/codersdk/workspaceagents.go index d73eed2378fee..ac3f28aa28324 100644 --- a/codersdk/workspaceagents.go +++ b/codersdk/workspaceagents.go @@ -430,15 +430,15 @@ func (c *Client) DialWorkspaceAgent(dialCtx context.Context, agentID uuid.UUID, for firstCoordinator != nil || firstDerpMap != nil { select { case <-dialCtx.Done(): - return nil, dialCtx.Err() + return nil, xerrors.Errorf("timed out waiting for coordinator and derp map: %w", dialCtx.Err()) case err = <-firstCoordinator: if err != nil { - return nil, err + return nil, xerrors.Errorf("start coordinator: %w", err) } firstCoordinator = nil case err = <-firstDerpMap: if err != nil { - return nil, err + return nil, xerrors.Errorf("receive derp map: %w", err) } firstDerpMap = nil } @@ -461,7 +461,7 @@ func (c *Client) DialWorkspaceAgent(dialCtx context.Context, agentID uuid.UUID, if !agentConn.AwaitReachable(dialCtx) { _ = agentConn.Close() - return nil, xerrors.Errorf("timed out waiting for agent to become reachable: %w", ctx.Err()) + return nil, xerrors.Errorf("timed out waiting for agent to become reachable: %w", dialCtx.Err()) } return agentConn, nil From 7b55671f65a018df03715d5dfe4b90a93a012561 Mon Sep 17 00:00:00 2001 From: Mathias Fredriksson Date: Mon, 27 Nov 2023 14:12:50 +0200 Subject: [PATCH 4/4] refactor test --- coderd/workspaceagents_test.go | 39 ++++++++++++++++++---------------- 1 file changed, 21 insertions(+), 18 deletions(-) diff --git a/coderd/workspaceagents_test.go b/coderd/workspaceagents_test.go index 8e9a0d65072a4..3d3e7aaa061d6 100644 --- a/coderd/workspaceagents_test.go +++ b/coderd/workspaceagents_test.go @@ -445,17 +445,18 @@ func TestWorkspaceAgentTailnet(t *testing.T) { _ = agenttest.New(t, client.URL, authToken) resources := coderdtest.AwaitWorkspaceAgents(t, client, ws.ID) - ctx, cancel := context.WithTimeout(context.Background(), testutil.WaitLong) - defer cancel() - conn, err := client.DialWorkspaceAgent(ctx, resources[0].Agents[0].ID, &codersdk.DialWorkspaceAgentOptions{ - Logger: slogtest.Make(t, nil).Named("client").Leveled(slog.LevelDebug), - }) + conn, err := func() (*codersdk.WorkspaceAgentConn, error) { + ctx, cancel := context.WithTimeout(context.Background(), testutil.WaitLong) + defer cancel() // Connection should remain open even if the dial context is canceled. + + return client.DialWorkspaceAgent(ctx, resources[0].Agents[0].ID, &codersdk.DialWorkspaceAgentOptions{ + Logger: slogtest.Make(t, nil).Named("client").Leveled(slog.LevelDebug), + }) + }() require.NoError(t, err) defer conn.Close() - // Connection should remain open even if the dial context is canceled. - cancel() - ctx, cancel = context.WithTimeout(context.Background(), testutil.WaitLong) + ctx, cancel := context.WithTimeout(context.Background(), testutil.WaitLong) defer cancel() sshClient, err := conn.SSHClient(ctx) @@ -1422,21 +1423,23 @@ func TestWorkspaceAgent_UpdatedDERP(t *testing.T) { agentID := resources[0].Agents[0].ID // Connect from a client. - ctx, cancel := context.WithTimeout(context.Background(), testutil.WaitLong) - defer cancel() - conn1, err := client.DialWorkspaceAgent(ctx, agentID, &codersdk.DialWorkspaceAgentOptions{ - Logger: logger.Named("client1"), - }) + conn1, err := func() (*codersdk.WorkspaceAgentConn, error) { + ctx, cancel := context.WithTimeout(context.Background(), testutil.WaitLong) + defer cancel() // Connection should remain open even if the dial context is canceled. + + return client.DialWorkspaceAgent(ctx, agentID, &codersdk.DialWorkspaceAgentOptions{ + Logger: logger.Named("client1"), + }) + }() require.NoError(t, err) defer conn1.Close() - ok := conn1.AwaitReachable(ctx) - require.True(t, ok) - // Connection should remain open even if the dial context is canceled. - cancel() - ctx, cancel = context.WithTimeout(context.Background(), testutil.WaitLong) + ctx, cancel := context.WithTimeout(context.Background(), testutil.WaitLong) defer cancel() + ok := conn1.AwaitReachable(ctx) + require.True(t, ok) + // Change the DERP map and change the region ID. newDerpMap, _ := tailnettest.RunDERPAndSTUN(t) require.NotNil(t, newDerpMap)