Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 90c34b7

Browse files
authored
feat: Add connection_timeout and troubleshooting_url to agent (#4937)
* feat: Add connection_timeout and troubleshooting_url to agent This commit adds the connection timeout and troubleshooting url fields to coder agents. If an initial connection cannot be established within connection timeout seconds, then the agent status will be marked as `"timeout"`. The troubleshooting URL will be present, if configured in the Terraform template, it can be presented to the user when the agent state is either `"timeout"` or `"disconnected"`. Fixes #4678
1 parent ed7de90 commit 90c34b7

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

44 files changed

+857
-423
lines changed

cli/cliui/agent.go

Lines changed: 48 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -35,12 +35,11 @@ func Agent(ctx context.Context, writer io.Writer, opts AgentOptions) error {
3535
if err != nil {
3636
return xerrors.Errorf("fetch: %w", err)
3737
}
38+
3839
if agent.Status == codersdk.WorkspaceAgentConnected {
3940
return nil
4041
}
41-
if agent.Status == codersdk.WorkspaceAgentDisconnected {
42-
opts.WarnInterval = 0
43-
}
42+
4443
spin := spinner.New(spinner.CharSets[78], 100*time.Millisecond, spinner.WithColor("fgHiGreen"))
4544
spin.Writer = writer
4645
spin.ForceOutput = true
@@ -65,43 +64,70 @@ func Agent(ctx context.Context, writer io.Writer, opts AgentOptions) error {
6564
os.Exit(1)
6665
}()
6766

68-
ticker := time.NewTicker(opts.FetchInterval)
69-
defer ticker.Stop()
70-
timer := time.NewTimer(opts.WarnInterval)
71-
defer timer.Stop()
72-
go func() {
73-
select {
74-
case <-ctx.Done():
75-
return
76-
case <-timer.C:
77-
}
67+
warningShown := false
68+
warnAfter := time.NewTimer(opts.WarnInterval)
69+
defer warnAfter.Stop()
70+
showWarning := func() {
71+
warnAfter.Stop()
72+
7873
resourceMutex.Lock()
7974
defer resourceMutex.Unlock()
80-
message := "Don't panic, your workspace is booting up!"
81-
if agent.Status == codersdk.WorkspaceAgentDisconnected {
82-
message = "The workspace agent lost connection! Wait for it to reconnect or restart your workspace."
75+
if warningShown {
76+
return
8377
}
78+
warningShown = true
79+
80+
message := waitingMessage(agent)
8481
// This saves the cursor position, then defers clearing from the cursor
8582
// position to the end of the screen.
8683
_, _ = fmt.Fprintf(writer, "\033[s\r\033[2K%s\n\n", Styles.Paragraph.Render(Styles.Prompt.String()+message))
8784
defer fmt.Fprintf(writer, "\033[u\033[J")
85+
}
86+
go func() {
87+
select {
88+
case <-ctx.Done():
89+
case <-warnAfter.C:
90+
showWarning()
91+
}
8892
}()
93+
94+
fetchInterval := time.NewTicker(opts.FetchInterval)
95+
defer fetchInterval.Stop()
8996
for {
9097
select {
9198
case <-ctx.Done():
9299
return ctx.Err()
93-
case <-ticker.C:
100+
case <-fetchInterval.C:
94101
}
95102
resourceMutex.Lock()
96103
agent, err = opts.Fetch(ctx)
97104
if err != nil {
98-
return xerrors.Errorf("fetch: %w", err)
99-
}
100-
if agent.Status != codersdk.WorkspaceAgentConnected {
101105
resourceMutex.Unlock()
102-
continue
106+
return xerrors.Errorf("fetch: %w", err)
103107
}
104108
resourceMutex.Unlock()
105-
return nil
109+
switch agent.Status {
110+
case codersdk.WorkspaceAgentConnected:
111+
return nil
112+
case codersdk.WorkspaceAgentTimeout, codersdk.WorkspaceAgentDisconnected:
113+
showWarning()
114+
}
115+
}
116+
}
117+
118+
func waitingMessage(agent codersdk.WorkspaceAgent) string {
119+
var m string
120+
switch agent.Status {
121+
case codersdk.WorkspaceAgentTimeout:
122+
m = "The workspace agent is having trouble connecting."
123+
case codersdk.WorkspaceAgentDisconnected:
124+
m = "The workspace agent lost connection!"
125+
default:
126+
// Not a failure state, no troubleshooting necessary.
127+
return "Don't panic, your workspace is booting up!"
128+
}
129+
if agent.TroubleshootingURL != "" {
130+
return fmt.Sprintf("%s See troubleshooting instructions at: %s", m, agent.TroubleshootingURL)
106131
}
132+
return fmt.Sprintf("%s Wait for it to (re)connect or restart your workspace.", m)
107133
}

cli/cliui/resources.go

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -127,6 +127,13 @@ func renderAgentStatus(agent codersdk.WorkspaceAgent) string {
127127
since := database.Now().Sub(*agent.DisconnectedAt)
128128
return Styles.Error.Render("⦾ disconnected") + " " +
129129
Styles.Placeholder.Render("["+strconv.Itoa(int(since.Seconds()))+"s]")
130+
case codersdk.WorkspaceAgentTimeout:
131+
since := database.Now().Sub(agent.CreatedAt)
132+
return fmt.Sprintf(
133+
"%s %s",
134+
Styles.Warn.Render("⦾ timeout"),
135+
Styles.Placeholder.Render("["+strconv.Itoa(int(since.Seconds()))+"s]"),
136+
)
130137
case codersdk.WorkspaceAgentConnected:
131138
return Styles.Keyword.Render("⦿ connected")
132139
default:

cli/speedtest_test.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ func TestSpeedtest(t *testing.T) {
2020
if testing.Short() {
2121
t.Skip("This test takes a minimum of 5ms per a hardcoded value in Tailscale!")
2222
}
23-
client, workspace, agentToken := setupWorkspaceForAgent(t)
23+
client, workspace, agentToken := setupWorkspaceForAgent(t, nil)
2424
agentClient := codersdk.New(client.URL)
2525
agentClient.SetSessionToken(agentToken)
2626
agentCloser := agent.New(agent.Options{

cli/ssh_test.go

Lines changed: 40 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -31,8 +31,13 @@ import (
3131
"github.com/coder/coder/testutil"
3232
)
3333

34-
func setupWorkspaceForAgent(t *testing.T) (*codersdk.Client, codersdk.Workspace, string) {
34+
func setupWorkspaceForAgent(t *testing.T, mutate func([]*proto.Agent) []*proto.Agent) (*codersdk.Client, codersdk.Workspace, string) {
3535
t.Helper()
36+
if mutate == nil {
37+
mutate = func(a []*proto.Agent) []*proto.Agent {
38+
return a
39+
}
40+
}
3641
client := coderdtest.New(t, &coderdtest.Options{IncludeProvisionerDaemon: true})
3742
user := coderdtest.CreateFirstUser(t, client)
3843
agentToken := uuid.NewString()
@@ -45,12 +50,12 @@ func setupWorkspaceForAgent(t *testing.T) (*codersdk.Client, codersdk.Workspace,
4550
Resources: []*proto.Resource{{
4651
Name: "dev",
4752
Type: "google_compute_instance",
48-
Agents: []*proto.Agent{{
53+
Agents: mutate([]*proto.Agent{{
4954
Id: uuid.NewString(),
5055
Auth: &proto.Agent_Token{
5156
Token: agentToken,
5257
},
53-
}},
58+
}}),
5459
}},
5560
},
5661
},
@@ -69,7 +74,7 @@ func TestSSH(t *testing.T) {
6974
t.Run("ImmediateExit", func(t *testing.T) {
7075
t.Parallel()
7176

72-
client, workspace, agentToken := setupWorkspaceForAgent(t)
77+
client, workspace, agentToken := setupWorkspaceForAgent(t, nil)
7378
cmd, root := clitest.New(t, "ssh", workspace.Name)
7479
clitest.SetupConfig(t, client, root)
7580
pty := ptytest.New(t)
@@ -100,9 +105,38 @@ func TestSSH(t *testing.T) {
100105
pty.WriteLine("exit")
101106
<-cmdDone
102107
})
108+
t.Run("ShowTroubleshootingURLAfterTimeout", func(t *testing.T) {
109+
t.Parallel()
110+
111+
wantURL := "https://example.com/troubleshoot"
112+
client, workspace, _ := setupWorkspaceForAgent(t, func(a []*proto.Agent) []*proto.Agent {
113+
// Unfortunately, one second is the lowest
114+
// we can go because 0 disables the feature.
115+
a[0].ConnectionTimeoutSeconds = 1
116+
a[0].TroubleshootingUrl = wantURL
117+
return a
118+
})
119+
cmd, root := clitest.New(t, "ssh", workspace.Name)
120+
clitest.SetupConfig(t, client, root)
121+
pty := ptytest.New(t)
122+
cmd.SetIn(pty.Input())
123+
cmd.SetErr(pty.Output())
124+
cmd.SetOut(pty.Output())
125+
126+
ctx, cancel := context.WithTimeout(context.Background(), testutil.WaitLong)
127+
defer cancel()
128+
129+
cmdDone := tGo(t, func() {
130+
err := cmd.ExecuteContext(ctx)
131+
assert.ErrorIs(t, err, context.Canceled)
132+
})
133+
pty.ExpectMatch(wantURL)
134+
cancel()
135+
<-cmdDone
136+
})
103137
t.Run("Stdio", func(t *testing.T) {
104138
t.Parallel()
105-
client, workspace, agentToken := setupWorkspaceForAgent(t)
139+
client, workspace, agentToken := setupWorkspaceForAgent(t, nil)
106140
_, _ = tGoContext(t, func(ctx context.Context) {
107141
// Run this async so the SSH command has to wait for
108142
// the build and agent to connect!
@@ -171,7 +205,7 @@ func TestSSH(t *testing.T) {
171205

172206
t.Parallel()
173207

174-
client, workspace, agentToken := setupWorkspaceForAgent(t)
208+
client, workspace, agentToken := setupWorkspaceForAgent(t, nil)
175209

176210
agentClient := codersdk.New(client.URL)
177211
agentClient.SetSessionToken(agentToken)

coderd/database/databasefake/databasefake.go

Lines changed: 17 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -154,6 +154,7 @@ func (q *fakeQuerier) AcquireProvisionerJob(_ context.Context, arg database.Acqu
154154
}
155155
return database.ProvisionerJob{}, sql.ErrNoRows
156156
}
157+
157158
func (*fakeQuerier) DeleteOldAgentStats(_ context.Context) error {
158159
// no-op
159160
return nil
@@ -2362,20 +2363,22 @@ func (q *fakeQuerier) InsertWorkspaceAgent(_ context.Context, arg database.Inser
23622363
defer q.mutex.Unlock()
23632364

23642365
agent := database.WorkspaceAgent{
2365-
ID: arg.ID,
2366-
CreatedAt: arg.CreatedAt,
2367-
UpdatedAt: arg.UpdatedAt,
2368-
ResourceID: arg.ResourceID,
2369-
AuthToken: arg.AuthToken,
2370-
AuthInstanceID: arg.AuthInstanceID,
2371-
EnvironmentVariables: arg.EnvironmentVariables,
2372-
Name: arg.Name,
2373-
Architecture: arg.Architecture,
2374-
OperatingSystem: arg.OperatingSystem,
2375-
Directory: arg.Directory,
2376-
StartupScript: arg.StartupScript,
2377-
InstanceMetadata: arg.InstanceMetadata,
2378-
ResourceMetadata: arg.ResourceMetadata,
2366+
ID: arg.ID,
2367+
CreatedAt: arg.CreatedAt,
2368+
UpdatedAt: arg.UpdatedAt,
2369+
ResourceID: arg.ResourceID,
2370+
AuthToken: arg.AuthToken,
2371+
AuthInstanceID: arg.AuthInstanceID,
2372+
EnvironmentVariables: arg.EnvironmentVariables,
2373+
Name: arg.Name,
2374+
Architecture: arg.Architecture,
2375+
OperatingSystem: arg.OperatingSystem,
2376+
Directory: arg.Directory,
2377+
StartupScript: arg.StartupScript,
2378+
InstanceMetadata: arg.InstanceMetadata,
2379+
ResourceMetadata: arg.ResourceMetadata,
2380+
ConnectionTimeoutSeconds: arg.ConnectionTimeoutSeconds,
2381+
TroubleshootingURL: arg.TroubleshootingURL,
23792382
}
23802383

23812384
q.provisionerJobAgents = append(q.provisionerJobAgents, agent)

coderd/database/dump.sql

Lines changed: 7 additions & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.
Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
BEGIN;
2+
3+
ALTER TABLE workspace_agents
4+
DROP COLUMN connection_timeout_seconds;
5+
6+
ALTER TABLE workspace_agents
7+
DROP COLUMN troubleshooting_url;
8+
9+
COMMIT;
Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
BEGIN;
2+
3+
ALTER TABLE workspace_agents
4+
ADD COLUMN connection_timeout_seconds integer NOT NULL DEFAULT 0;
5+
6+
COMMENT ON COLUMN workspace_agents.connection_timeout_seconds IS 'Connection timeout in seconds, 0 means disabled.';
7+
8+
ALTER TABLE workspace_agents
9+
ADD COLUMN troubleshooting_url text NOT NULL DEFAULT '';
10+
11+
COMMENT ON COLUMN workspace_agents.troubleshooting_url IS 'URL for troubleshooting the agent.';
12+
13+
COMMIT;

coderd/database/models.go

Lines changed: 4 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)