Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 60489f9

Browse files
committed
feat: add computed workspace and agent health fields to the api
1 parent 6639c69 commit 60489f9

15 files changed

+459
-25
lines changed

coderd/apidoc/docs.go

Lines changed: 52 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

coderd/apidoc/swagger.json

Lines changed: 52 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

coderd/workspaceagents.go

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1262,6 +1262,24 @@ func convertWorkspaceAgent(derpMap *tailcfg.DERPMap, coordinator tailnet.Coordin
12621262
workspaceAgent.ReadyAt = &dbAgent.ReadyAt.Time
12631263
}
12641264

1265+
switch {
1266+
case workspaceAgent.Status != codersdk.WorkspaceAgentConnected && workspaceAgent.LifecycleState == codersdk.WorkspaceAgentLifecycleOff:
1267+
workspaceAgent.Health.Reason = "agent is not running"
1268+
case workspaceAgent.Status == codersdk.WorkspaceAgentTimeout:
1269+
workspaceAgent.Health.Reason = "agent is taking too long to connect"
1270+
case workspaceAgent.Status == codersdk.WorkspaceAgentDisconnected:
1271+
workspaceAgent.Health.Reason = "agent has lost connection"
1272+
// Note: We could also handle codersdk.WorkspaceAgentLifecycleStartTimeout
1273+
// here, but it's more of a soft issue, so we don't want to mark the agent
1274+
// as unhealthy.
1275+
case workspaceAgent.LifecycleState == codersdk.WorkspaceAgentLifecycleStartError:
1276+
workspaceAgent.Health.Reason = "agent startup script exited with an error"
1277+
case workspaceAgent.LifecycleState.ShuttingDown():
1278+
workspaceAgent.Health.Reason = "agent is shutting down"
1279+
default:
1280+
workspaceAgent.Health.Healthy = true
1281+
}
1282+
12651283
return workspaceAgent, nil
12661284
}
12671285

coderd/workspaceagents_test.go

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,7 @@ func TestWorkspaceAgent(t *testing.T) {
7272
require.Equal(t, tmpDir, workspace.LatestBuild.Resources[0].Agents[0].Directory)
7373
_, err = client.WorkspaceAgent(ctx, workspace.LatestBuild.Resources[0].Agents[0].ID)
7474
require.NoError(t, err)
75+
require.True(t, workspace.LatestBuild.Resources[0].Agents[0].Health.Healthy)
7576
})
7677
t.Run("HasFallbackTroubleshootingURL", func(t *testing.T) {
7778
t.Parallel()
@@ -167,6 +168,8 @@ func TestWorkspaceAgent(t *testing.T) {
167168
}, testutil.IntervalMedium, "agent status timeout")
168169

169170
require.Equal(t, wantTroubleshootingURL, workspace.LatestBuild.Resources[0].Agents[0].TroubleshootingURL)
171+
require.False(t, workspace.LatestBuild.Resources[0].Agents[0].Health.Healthy)
172+
require.NotEmpty(t, workspace.LatestBuild.Resources[0].Agents[0].Health.Reason)
170173
})
171174
}
172175

coderd/workspaces.go

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1115,6 +1115,16 @@ func convertWorkspace(
11151115
deletingAt = calculateDeletingAt(workspace, template, workspaceBuild)
11161116
)
11171117

1118+
agentHealth := make(map[uuid.UUID]codersdk.WorkspaceAgentHealth)
1119+
for _, r := range workspaceBuild.Resources {
1120+
// For now, we only consider agent healths when the workspace is running.
1121+
if r.Transition == codersdk.WorkspaceTransitionStart {
1122+
for _, a := range r.Agents {
1123+
agentHealth[a.ID] = a.Health
1124+
}
1125+
}
1126+
}
1127+
11181128
return codersdk.Workspace{
11191129
ID: workspace.ID,
11201130
CreatedAt: workspace.CreatedAt,
@@ -1135,6 +1145,7 @@ func convertWorkspace(
11351145
LastUsedAt: workspace.LastUsedAt,
11361146
DeletingAt: deletingAt,
11371147
LockedAt: lockedAt,
1148+
Health: (codersdk.WorkspaceHealth{Agents: agentHealth}).Complete(),
11381149
}
11391150
}
11401151

coderd/workspaces_test.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,7 @@ func TestWorkspace(t *testing.T) {
6060
require.NoError(t, err)
6161
require.Equal(t, user.UserID, ws.LatestBuild.InitiatorID)
6262
require.Equal(t, codersdk.BuildReasonInitiator, ws.LatestBuild.Reason)
63+
require.True(t, ws.Health.Healthy)
6364
})
6465

6566
t.Run("Deleted", func(t *testing.T) {

codersdk/workspaceagents.go

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -164,10 +164,16 @@ type WorkspaceAgent struct {
164164
ConnectionTimeoutSeconds int32 `json:"connection_timeout_seconds"`
165165
TroubleshootingURL string `json:"troubleshooting_url"`
166166
// Deprecated: Use StartupScriptBehavior instead.
167-
LoginBeforeReady bool `json:"login_before_ready"`
168-
ShutdownScript string `json:"shutdown_script,omitempty"`
169-
ShutdownScriptTimeoutSeconds int32 `json:"shutdown_script_timeout_seconds"`
170-
Subsystem AgentSubsystem `json:"subsystem"`
167+
LoginBeforeReady bool `json:"login_before_ready"`
168+
ShutdownScript string `json:"shutdown_script,omitempty"`
169+
ShutdownScriptTimeoutSeconds int32 `json:"shutdown_script_timeout_seconds"`
170+
Subsystem AgentSubsystem `json:"subsystem"`
171+
Health WorkspaceAgentHealth `json:"health"` // Health reports the health of the agent.
172+
}
173+
174+
type WorkspaceAgentHealth struct {
175+
Healthy bool `json:"healthy"` // Healthy is true if the agent is healthy.
176+
Reason string `json:"reason,omitempty"` // Reason is a human-readable explanation of the agent's health. It is empty if Healthy is true.
171177
}
172178

173179
type DERPRegion struct {

codersdk/workspaces.go

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,30 @@ type Workspace struct {
4343
// unlocked by an admin. It is subject to deletion if it breaches
4444
// the duration of the locked_ttl field on its template.
4545
LockedAt *time.Time `json:"locked_at" format:"date-time"`
46+
// Health reports the health of the workspace and its agents.
47+
Health WorkspaceHealth `json:"health"`
48+
}
49+
50+
type WorkspaceHealth struct {
51+
Healthy bool `json:"healthy"` // Healthy is true if the workspace and all of its agents are healthy.
52+
FailingSections []string `json:"failing_sections"` // FailingSections is a list of sections that have failed their healthcheck.
53+
Agents map[uuid.UUID]WorkspaceAgentHealth `json:"agents"` // Agents is a map of agent IDs to their health.
54+
}
55+
56+
// Complete returns a new copy with the Healthy flag and FailingSections
57+
// set based on the agent healths.
58+
//
59+
//nolint:revive
60+
func (wh WorkspaceHealth) Complete() WorkspaceHealth {
61+
wh.Healthy = true
62+
wh.FailingSections = []string{}
63+
for id, agent := range wh.Agents {
64+
if !agent.Healthy {
65+
wh.Healthy = false
66+
wh.FailingSections = append(wh.FailingSections, "agents."+id.String())
67+
}
68+
}
69+
return wh
4670
}
4771

4872
type WorkspacesRequest struct {

docs/api/agents.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -450,6 +450,10 @@ curl -X GET http://coder-server:8080/api/v2/workspaceagents/{workspaceagent} \
450450
},
451451
"expanded_directory": "string",
452452
"first_connected_at": "2019-08-24T14:15:22Z",
453+
"health": {
454+
"healthy": true,
455+
"reason": "string"
456+
},
453457
"id": "497f6eca-6276-4993-bfeb-53cbbbba6f08",
454458
"instance_id": "string",
455459
"last_connected_at": "2019-08-24T14:15:22Z",

0 commit comments

Comments
 (0)