Thanks to visit codestin.com
Credit goes to github.com

Skip to content

feat: Add workspace agent lifecycle state reporting #5785

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 24 commits into from
Jan 24, 2023
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
feat: Add agent state reporting
  • Loading branch information
mafredri committed Jan 23, 2023
commit a84e46725456d3e368db121e605374ca49ca5898
79 changes: 70 additions & 9 deletions agent/agent.go
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,7 @@ type Client interface {
WorkspaceAgentMetadata(ctx context.Context) (codersdk.WorkspaceAgentMetadata, error)
ListenWorkspaceAgent(ctx context.Context) (net.Conn, error)
AgentReportStats(ctx context.Context, log slog.Logger, stats func() *codersdk.AgentStats) (io.Closer, error)
PostWorkspaceAgentState(ctx context.Context, state codersdk.PostWorkspaceAgentStateRequest) error
PostWorkspaceAgentAppHealth(ctx context.Context, req codersdk.PostWorkspaceAppHealthsRequest) error
PostWorkspaceAgentVersion(ctx context.Context, version string) error
}
Expand Down Expand Up @@ -127,6 +128,9 @@ type agent struct {
sessionToken atomic.Pointer[string]
sshServer *ssh.Server

stateMu sync.Mutex // Protects following.
state codersdk.WorkspaceAgentState

network *tailnet.Conn
}

Expand Down Expand Up @@ -156,6 +160,30 @@ func (a *agent) runLoop(ctx context.Context) {
}
}

func (a *agent) setState(ctx context.Context, state codersdk.WorkspaceAgentState) {
a.stateMu.Lock()
defer a.stateMu.Unlock()

a.state = state

var err error
for r := retry.New(time.Second, 30*time.Second); r.Wait(ctx); {
err = a.client.PostWorkspaceAgentState(ctx, codersdk.PostWorkspaceAgentStateRequest{
State: state,
})
if err == nil {
return
}
}
if xerrors.Is(err, context.Canceled) || xerrors.Is(err, context.DeadlineExceeded) || a.isClosed() {
return
}
if err != nil {
// If we fail to report the state we probably shouldn't exit, log only.
a.logger.Error(ctx, "post state", slog.Error(err))
}
}

func (a *agent) run(ctx context.Context) error {
// This allows the agent to refresh it's token if necessary.
// For instance identity this is required, since the instance
Expand All @@ -180,22 +208,55 @@ func (a *agent) run(ctx context.Context) error {

// The startup script should only execute on the first run!
if oldMetadata == nil {
scriptDone := make(chan error, 1)
scriptStart := time.Now()
go func() {
defer close(scriptDone)
scriptDone <- a.runStartupScript(ctx, metadata.StartupScript)
}()
go func() {
err := a.runStartupScript(ctx, metadata.StartupScript)
var timeout <-chan time.Time
// If timeout is zero, an older version of the coder
// provider was used. Otherwise a timeout is always > 0.
if metadata.StartupScriptTimeout > 0 {
t := time.NewTimer(metadata.StartupScriptTimeout)
defer t.Stop()
timeout = t.C
}

a.setState(ctx, codersdk.WorkspaceAgentStateStarting)

var err error
select {
case err = <-scriptDone:
case <-timeout:
a.logger.Warn(ctx, "startup script timed out")
a.setState(ctx, codersdk.WorkspaceAgentStateStartTimeout)
err = <-scriptDone // The script can still complete after a timeout.
}
if errors.Is(err, context.Canceled) {
return
}
execTime := time.Since(scriptStart)
if err != nil {
a.logger.Warn(ctx, "agent script failed", slog.Error(err))
a.logger.Warn(ctx, "startup script failed", slog.F("execution_time", execTime), slog.Error(err))
a.setState(ctx, codersdk.WorkspaceAgentStateStartError)
return
}
}()
}
a.logger.Info(ctx, "startup script completed", slog.F("execution_time", execTime))

if metadata.GitAuthConfigs > 0 {
err = gitauth.OverrideVSCodeConfigs(a.filesystem)
if err != nil {
return xerrors.Errorf("override vscode configuration for git auth: %w", err)
}
// Perform overrides after startup script has completed to ensure
// there is no conflict with the user's scripts. We also want to
// ensure this is done before the workspace is marked as ready.
if metadata.GitAuthConfigs > 0 {
err = gitauth.OverrideVSCodeConfigs(a.filesystem)
if err != nil {
a.logger.Warn(ctx, "failed to override vscode git auth configs", slog.Error(err))
}
}

a.setState(ctx, codersdk.WorkspaceAgentStateReady)
}()
}

// This automatically closes when the context ends!
Expand Down
4 changes: 4 additions & 0 deletions agent/agent_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1130,6 +1130,10 @@ func (c *client) AgentReportStats(ctx context.Context, _ slog.Logger, stats func
}), nil
}

func (*client) PostWorkspaceAgentState(_ context.Context, _ codersdk.PostWorkspaceAgentStateRequest) error {
return nil
}

func (*client) PostWorkspaceAgentAppHealth(_ context.Context, _ codersdk.PostWorkspaceAppHealthsRequest) error {
return nil
}
Expand Down
1 change: 1 addition & 0 deletions coderd/coderd.go
Original file line number Diff line number Diff line change
Expand Up @@ -541,6 +541,7 @@ func New(options *Options) *API {
r.Get("/gitsshkey", api.agentGitSSHKey)
r.Get("/coordinate", api.workspaceAgentCoordinate)
r.Post("/report-stats", api.workspaceAgentReportStats)
r.Post("/report-state", api.workspaceAgentReportState)
})
r.Route("/{workspaceagent}", func(r chi.Router) {
r.Use(
Expand Down
6 changes: 3 additions & 3 deletions coderd/database/databasefake/databasefake.go
Original file line number Diff line number Diff line change
Expand Up @@ -4295,12 +4295,12 @@ func (q *fakeQuerier) GetQuotaConsumedForUser(_ context.Context, userID uuid.UUI
return sum, nil
}

func (q *fakeQuerier) UpdateWorkspaceAgentStateByID(_ context.Context, id uuid.UUID, state database.AgentState) error {
func (q *fakeQuerier) UpdateWorkspaceAgentStateByID(_ context.Context, arg database.UpdateWorkspaceAgentStateByIDParams) error {
q.mutex.Lock()
defer q.mutex.Unlock()
for i, agent := range q.workspaceAgents {
if agent.ID == id {
agent.State = state
if agent.ID == arg.ID {
agent.State = arg.State
q.workspaceAgents[i] = agent
return nil
}
Expand Down
44 changes: 44 additions & 0 deletions coderd/workspaceagents.go
Original file line number Diff line number Diff line change
Expand Up @@ -900,6 +900,50 @@ func (api *API) workspaceAgentReportStats(rw http.ResponseWriter, r *http.Reques
})
}

// @Summary Submit workspace agent state
// @ID submit-workspace-agent-state
// @Security CoderSessionToken
// @Accept json
// @Tags Agents
// @Param request body codersdk.PostWorkspaceAgentStateRequest true "Workspace agent state request"
// @Success 204 "Success"
// @Router /workspaceagents/me/report-state [post]
func (api *API) workspaceAgentReportState(rw http.ResponseWriter, r *http.Request) {
ctx := r.Context()

workspaceAgent := httpmw.WorkspaceAgent(r)
workspace, err := api.Database.GetWorkspaceByAgentID(ctx, workspaceAgent.ID)
if err != nil {
httpapi.Write(ctx, rw, http.StatusBadRequest, codersdk.Response{
Message: "Failed to get workspace.",
Detail: err.Error(),
})
return
}

var req codersdk.PostWorkspaceAgentStateRequest
if !httpapi.Read(ctx, rw, r, &req) {
return
}

api.Logger.Debug(ctx, "workspace agent state report",
slog.F("agent", workspaceAgent.ID),
slog.F("workspace", workspace.ID),
slog.F("payload", req),
)

err = api.Database.UpdateWorkspaceAgentStateByID(ctx, database.UpdateWorkspaceAgentStateByIDParams{
ID: workspaceAgent.ID,
State: database.WorkspaceAgentState(req.State),
})
if err != nil {
httpapi.InternalServerError(rw, err)
return
}

httpapi.Write(ctx, rw, http.StatusNoContent, nil)
}

// @Summary Submit workspace agent application health
// @ID submit-workspace-agent-application-health
// @Security CoderSessionToken
Expand Down
4 changes: 4 additions & 0 deletions coderd/wsconncache/wsconncache_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -217,6 +217,10 @@ func (*client) AgentReportStats(_ context.Context, _ slog.Logger, _ func() *code
return io.NopCloser(strings.NewReader("")), nil
}

func (*client) PostWorkspaceAgentState(_ context.Context, _ codersdk.PostWorkspaceAgentStateRequest) error {
return nil
}

func (*client) PostWorkspaceAgentAppHealth(_ context.Context, _ codersdk.PostWorkspaceAppHealthsRequest) error {
return nil
}
Expand Down
30 changes: 30 additions & 0 deletions codersdk/workspaceagents.go
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,16 @@ const (
WorkspaceAgentTimeout WorkspaceAgentStatus = "timeout"
)

// WorkspaceAgentState represents the lifecycle state of a workspace agent.
type WorkspaceAgentState string

const (
WorkspaceAgentStateStarting WorkspaceAgentState = "starting"
WorkspaceAgentStateStartTimeout WorkspaceAgentState = "start_timeout"
WorkspaceAgentStateStartError WorkspaceAgentState = "start_error"
WorkspaceAgentStateReady WorkspaceAgentState = "ready"
)

type WorkspaceAgent struct {
ID uuid.UUID `json:"id" format:"uuid"`
CreatedAt time.Time `json:"created_at" format:"date-time"`
Expand All @@ -42,6 +52,7 @@ type WorkspaceAgent struct {
LastConnectedAt *time.Time `json:"last_connected_at,omitempty" format:"date-time"`
DisconnectedAt *time.Time `json:"disconnected_at,omitempty" format:"date-time"`
Status WorkspaceAgentStatus `json:"status" enums:"connecting,connected,disconnected,timeout"`
State WorkspaceAgentState `json:"state" enums:"starting,start_timeout,start_error,ready"`
Name string `json:"name"`
ResourceID uuid.UUID `json:"resource_id" format:"uuid"`
InstanceID string `json:"instance_id,omitempty"`
Expand Down Expand Up @@ -131,6 +142,7 @@ type WorkspaceAgentMetadata struct {
DERPMap *tailcfg.DERPMap `json:"derpmap"`
EnvironmentVariables map[string]string `json:"environment_variables"`
StartupScript string `json:"startup_script"`
StartupScriptTimeout time.Duration `json:"startup_script_timeout" format:"duration"`
Directory string `json:"directory"`
MOTDFile string `json:"motd_file"`
}
Expand Down Expand Up @@ -681,3 +693,21 @@ func (c *Client) WorkspaceAgentGitAuth(ctx context.Context, gitURL string, liste
var authResp WorkspaceAgentGitAuthResponse
return authResp, json.NewDecoder(res.Body).Decode(&authResp)
}

// @typescript-ignore PostWorkspaceAgentStateRequest
type PostWorkspaceAgentStateRequest struct {
State WorkspaceAgentState `json:"state"`
}

func (c *Client) PostWorkspaceAgentState(ctx context.Context, req PostWorkspaceAgentStateRequest) error {
res, err := c.Request(ctx, http.MethodPost, "/api/v2/workspaceagents/me/report-state", req)
if err != nil {
return xerrors.Errorf("agent state post request: %w", err)
}
defer res.Body.Close()
if res.StatusCode != http.StatusOK {
return readBodyAsError(res)
}

return nil
}