Thanks to visit codestin.com
Credit goes to github.com

Skip to content
Merged
Show file tree
Hide file tree
Changes from 14 commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
109 changes: 100 additions & 9 deletions agent/agent.go
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,7 @@ type Client interface {
WorkspaceAgentMetadata(ctx context.Context) (codersdk.WorkspaceAgentMetadata, error)
ListenWorkspaceAgent(ctx context.Context) (net.Conn, error)
AgentReportStats(ctx context.Context, log slog.Logger, stats func() *codersdk.AgentStats) (io.Closer, error)
PostWorkspaceAgentLifecycle(ctx context.Context, state codersdk.PostWorkspaceAgentLifecycleRequest) error
PostWorkspaceAgentAppHealth(ctx context.Context, req codersdk.PostWorkspaceAppHealthsRequest) error
PostWorkspaceAgentVersion(ctx context.Context, version string) error
}
Expand Down Expand Up @@ -101,6 +102,7 @@ func New(options Options) io.Closer {
exchangeToken: options.ExchangeToken,
filesystem: options.Filesystem,
tempDir: options.TempDir,
lifecycleUpdate: make(chan struct{}, 1),
}
a.init(ctx)
return a
Expand All @@ -127,6 +129,10 @@ type agent struct {
sessionToken atomic.Pointer[string]
sshServer *ssh.Server

lifecycleUpdate chan struct{}
lifecycleMu sync.Mutex // Protects following.
lifecycleState codersdk.WorkspaceAgentLifecycle

network *tailnet.Conn
}

Expand All @@ -135,6 +141,8 @@ type agent struct {
// may be happening, but regardless after the intermittent
// failure, you'll want the agent to reconnect.
func (a *agent) runLoop(ctx context.Context) {
go a.reportLifecycleLoop(ctx)

for retrier := retry.New(100*time.Millisecond, 10*time.Second); retrier.Wait(ctx); {
a.logger.Info(ctx, "running loop")
err := a.run(ctx)
Expand All @@ -156,6 +164,54 @@ func (a *agent) runLoop(ctx context.Context) {
}
}

// reportLifecycleLoop reports the current lifecycle state once.
// Only the latest state is reported, intermediate states may be
// lost if the agent can't communicate with the API.
func (a *agent) reportLifecycleLoop(ctx context.Context) {
Comment thread
mafredri marked this conversation as resolved.
var lastReported codersdk.WorkspaceAgentLifecycle
for {
select {
case <-a.lifecycleUpdate:
case <-ctx.Done():
return
}

for r := retry.New(time.Second, 15*time.Second); r.Wait(ctx); {
a.lifecycleMu.Lock()
state := a.lifecycleState
a.lifecycleMu.Unlock()

if state == lastReported {
continue
}

err := a.client.PostWorkspaceAgentLifecycle(ctx, codersdk.PostWorkspaceAgentLifecycleRequest{
State: state,
})
if err == nil {
lastReported = state
break
}
if xerrors.Is(err, context.Canceled) || xerrors.Is(err, context.DeadlineExceeded) {
return
}
// If we fail to report the state we probably shouldn't exit, log only.
a.logger.Error(ctx, "post state", slog.Error(err))
}
}
}

func (a *agent) setLifecycle(state codersdk.WorkspaceAgentLifecycle) {
a.lifecycleMu.Lock()
defer a.lifecycleMu.Unlock()

a.lifecycleState = state
select {
case a.lifecycleUpdate <- struct{}{}:
default:
Comment thread
mafredri marked this conversation as resolved.
}
}

func (a *agent) run(ctx context.Context) error {
// This allows the agent to refresh it's token if necessary.
// For instance identity this is required, since the instance
Expand All @@ -180,22 +236,57 @@ func (a *agent) run(ctx context.Context) error {

// The startup script should only execute on the first run!
if oldMetadata == nil {
scriptDone := make(chan error, 1)
scriptStart := time.Now()
go func() {
defer close(scriptDone)
scriptDone <- a.runStartupScript(ctx, metadata.StartupScript)
}()
go func() {
err := a.runStartupScript(ctx, metadata.StartupScript)
var timeout <-chan time.Time
// If timeout is zero, an older version of the coder
// provider was used. Otherwise a timeout is always > 0.
if metadata.StartupScriptTimeout > 0 {
t := time.NewTimer(metadata.StartupScriptTimeout)
defer t.Stop()
timeout = t.C
}

a.setLifecycle(codersdk.WorkspaceAgentLifecycleStarting)

var err error
select {
case err = <-scriptDone:
case <-timeout:
a.logger.Warn(ctx, "startup script timed out")
a.setLifecycle(codersdk.WorkspaceAgentLifecycleStartTimeout)
err = <-scriptDone // The script can still complete after a timeout.
}
if errors.Is(err, context.Canceled) {
return
}
execTime := time.Since(scriptStart)
lifecycleStatus := codersdk.WorkspaceAgentLifecycleReady
if err != nil {
a.logger.Warn(ctx, "agent script failed", slog.Error(err))
a.logger.Warn(ctx, "startup script failed", slog.F("execution_time", execTime), slog.Error(err))
lifecycleStatus = codersdk.WorkspaceAgentLifecycleStartError
} else {
a.logger.Info(ctx, "startup script completed", slog.F("execution_time", execTime))
}
}()
}

if metadata.GitAuthConfigs > 0 {
err = gitauth.OverrideVSCodeConfigs(a.filesystem)
if err != nil {
return xerrors.Errorf("override vscode configuration for git auth: %w", err)
}
// Perform overrides after startup script has completed to ensure
// there is no conflict with the user's scripts. We also want to
// ensure this is done before the workspace is marked as ready.
// Note, this is done even in the even that startup script failed.
Comment thread
mafredri marked this conversation as resolved.
Outdated
if metadata.GitAuthConfigs > 0 {
err := gitauth.OverrideVSCodeConfigs(a.filesystem)
if err != nil {
a.logger.Warn(ctx, "failed to override vscode git auth configs", slog.Error(err))
}
}

a.setLifecycle(lifecycleStatus)
}()
}

// This automatically closes when the context ends!
Expand Down
Loading