Thanks to visit codestin.com
Credit goes to github.com

Skip to content

fix(agent): start devcontainers through agentcontainers package #18471

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 25 commits into from
Jun 25, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
978c871
fix(agent): start devcontainers through agentcontainers package
DanielleMaywood Jun 20, 2025
fe99bd6
chore: appease formatter
DanielleMaywood Jun 20, 2025
aeae6e2
chore: fix test, appease linter
DanielleMaywood Jun 20, 2025
916f7e8
chore: feedback
DanielleMaywood Jun 24, 2025
53e256b
Merge branch 'main' into dm-devcontainer-log-spam
DanielleMaywood Jun 24, 2025
91bb43a
chore: re-add script timings
DanielleMaywood Jun 24, 2025
81fe11d
fix: change how containerAPI is stored
DanielleMaywood Jun 24, 2025
5c70a8c
Merge branch 'main' into dm-devcontainer-log-spam
DanielleMaywood Jun 24, 2025
8437ca4
chore: appease linter
DanielleMaywood Jun 24, 2025
2c6a2b1
chore: ensure the last log line is printed
DanielleMaywood Jun 24, 2025
a512ad4
chore: fix typo
DanielleMaywood Jun 24, 2025
c50dc6e
chore: OOPS
DanielleMaywood Jun 24, 2025
4d40ef2
chore: 1 -> 2
DanielleMaywood Jun 24, 2025
ce32e2e
chore: add a status to the timings
DanielleMaywood Jun 24, 2025
32ac48a
chore: initialize containerapi even earlier
DanielleMaywood Jun 24, 2025
738b755
chore: only enable when devcontainers are enabled
DanielleMaywood Jun 24, 2025
3714fec
chore: simplify things a little
DanielleMaywood Jun 24, 2025
996d440
chore: recreate -> create with argument
DanielleMaywood Jun 24, 2025
ae5dd1e
chore: ensure we close and init
DanielleMaywood Jun 24, 2025
9d76cf6
chore: appease linter
DanielleMaywood Jun 24, 2025
7285c39
chore: mock ReadConfig any time
DanielleMaywood Jun 24, 2025
3fce51c
chore: feedback
DanielleMaywood Jun 25, 2025
54aa84a
chore: feedback
DanielleMaywood Jun 25, 2025
3d08d0e
chore: only set status if not set, and run create in test
DanielleMaywood Jun 25, 2025
fa479fc
chore: feedback on poor error message
DanielleMaywood Jun 25, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
103 changes: 77 additions & 26 deletions agent/agent.go
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,7 @@ type Options struct {
Execer agentexec.Execer
Devcontainers bool
DevcontainerAPIOptions []agentcontainers.Option // Enable Devcontainers for these to be effective.
Clock quartz.Clock
}

type Client interface {
Expand Down Expand Up @@ -144,6 +145,9 @@ func New(options Options) Agent {
if options.PortCacheDuration == 0 {
options.PortCacheDuration = 1 * time.Second
}
if options.Clock == nil {
options.Clock = quartz.NewReal()
}

prometheusRegistry := options.PrometheusRegistry
if prometheusRegistry == nil {
Expand All @@ -157,6 +161,7 @@ func New(options Options) Agent {
hardCtx, hardCancel := context.WithCancel(context.Background())
gracefulCtx, gracefulCancel := context.WithCancel(hardCtx)
a := &agent{
clock: options.Clock,
tailnetListenPort: options.TailnetListenPort,
reconnectingPTYTimeout: options.ReconnectingPTYTimeout,
logger: options.Logger,
Expand Down Expand Up @@ -204,6 +209,7 @@ func New(options Options) Agent {
}

type agent struct {
clock quartz.Clock
logger slog.Logger
client Client
exchangeToken func(ctx context.Context) (string, error)
Expand Down Expand Up @@ -273,7 +279,7 @@ type agent struct {

devcontainers bool
containerAPIOptions []agentcontainers.Option
containerAPI atomic.Pointer[agentcontainers.API] // Set by apiHandler.
containerAPI *agentcontainers.API
}

func (a *agent) TailnetConn() *tailnet.Conn {
Expand Down Expand Up @@ -330,6 +336,19 @@ func (a *agent) init() {
// will not report anywhere.
a.scriptRunner.RegisterMetrics(a.prometheusRegistry)

if a.devcontainers {
containerAPIOpts := []agentcontainers.Option{
agentcontainers.WithExecer(a.execer),
agentcontainers.WithCommandEnv(a.sshServer.CommandEnv),
agentcontainers.WithScriptLogger(func(logSourceID uuid.UUID) agentcontainers.ScriptLogger {
return a.logSender.GetScriptLogger(logSourceID)
}),
}
containerAPIOpts = append(containerAPIOpts, a.containerAPIOptions...)

a.containerAPI = agentcontainers.NewAPI(a.logger.Named("containers"), containerAPIOpts...)
}

a.reconnectingPTYServer = reconnectingpty.NewServer(
a.logger.Named("reconnecting-pty"),
a.sshServer,
Expand Down Expand Up @@ -1141,15 +1160,18 @@ func (a *agent) handleManifest(manifestOK *checkpoint) func(ctx context.Context,
}

var (
scripts = manifest.Scripts
scriptRunnerOpts []agentscripts.InitOption
scripts = manifest.Scripts
scriptRunnerOpts []agentscripts.InitOption
devcontainerScripts map[uuid.UUID]codersdk.WorkspaceAgentScript
)
if a.devcontainers {
var dcScripts []codersdk.WorkspaceAgentScript
scripts, dcScripts = agentcontainers.ExtractAndInitializeDevcontainerScripts(manifest.Devcontainers, scripts)
// See ExtractAndInitializeDevcontainerScripts for motivation
// behind running dcScripts as post start scripts.
scriptRunnerOpts = append(scriptRunnerOpts, agentscripts.WithPostStartScripts(dcScripts...))
a.containerAPI.Init(
agentcontainers.WithManifestInfo(manifest.OwnerName, manifest.WorkspaceName),
agentcontainers.WithDevcontainers(manifest.Devcontainers, scripts),
agentcontainers.WithSubAgentClient(agentcontainers.NewSubAgentClientFromAPI(a.logger, aAPI)),
)

scripts, devcontainerScripts = agentcontainers.ExtractDevcontainerScripts(manifest.Devcontainers, scripts)
}
err = a.scriptRunner.Init(scripts, aAPI.ScriptCompleted, scriptRunnerOpts...)
if err != nil {
Expand All @@ -1168,7 +1190,12 @@ func (a *agent) handleManifest(manifestOK *checkpoint) func(ctx context.Context,
// finished (both start and post start). For instance, an
// autostarted devcontainer will be included in this time.
err := a.scriptRunner.Execute(a.gracefulCtx, agentscripts.ExecuteStartScripts)
err = errors.Join(err, a.scriptRunner.Execute(a.gracefulCtx, agentscripts.ExecutePostStartScripts))

for _, dc := range manifest.Devcontainers {
cErr := a.createDevcontainer(ctx, aAPI, dc, devcontainerScripts[dc.ID])
err = errors.Join(err, cErr)
}

dur := time.Since(start).Seconds()
if err != nil {
a.logger.Warn(ctx, "startup script(s) failed", slog.Error(err))
Expand All @@ -1187,14 +1214,6 @@ func (a *agent) handleManifest(manifestOK *checkpoint) func(ctx context.Context,
}
a.metrics.startupScriptSeconds.WithLabelValues(label).Set(dur)
a.scriptRunner.StartCron()

// If the container API is enabled, trigger an immediate refresh
// for quick sub agent injection.
if cAPI := a.containerAPI.Load(); cAPI != nil {
if err := cAPI.RefreshContainers(ctx); err != nil {
a.logger.Error(ctx, "failed to refresh containers", slog.Error(err))
}
}
})
if err != nil {
return xerrors.Errorf("track conn goroutine: %w", err)
Expand All @@ -1204,6 +1223,38 @@ func (a *agent) handleManifest(manifestOK *checkpoint) func(ctx context.Context,
}
}

func (a *agent) createDevcontainer(
ctx context.Context,
aAPI proto.DRPCAgentClient26,
dc codersdk.WorkspaceAgentDevcontainer,
script codersdk.WorkspaceAgentScript,
) (err error) {
var (
exitCode = int32(0)
startTime = a.clock.Now()
status = proto.Timing_OK
)
if err = a.containerAPI.CreateDevcontainer(dc.WorkspaceFolder, dc.ConfigPath); err != nil {
exitCode = 1
status = proto.Timing_EXIT_FAILURE
}
endTime := a.clock.Now()

if _, scriptErr := aAPI.ScriptCompleted(ctx, &proto.WorkspaceAgentScriptCompletedRequest{
Timing: &proto.Timing{
ScriptId: script.ID[:],
Start: timestamppb.New(startTime),
End: timestamppb.New(endTime),
ExitCode: exitCode,
Stage: proto.Timing_START,
Status: status,
},
}); scriptErr != nil {
a.logger.Warn(ctx, "reporting script completed failed", slog.Error(scriptErr))
}
return err
}

// createOrUpdateNetwork waits for the manifest to be set using manifestOK, then creates or updates
// the tailnet using the information in the manifest
func (a *agent) createOrUpdateNetwork(manifestOK, networkOK *checkpoint) func(context.Context, proto.DRPCAgentClient26) error {
Expand All @@ -1227,7 +1278,6 @@ func (a *agent) createOrUpdateNetwork(manifestOK, networkOK *checkpoint) func(co
// agent API.
network, err = a.createTailnet(
a.gracefulCtx,
aAPI,
manifest.AgentID,
manifest.DERPMap,
manifest.DERPForceWebSockets,
Expand Down Expand Up @@ -1262,9 +1312,9 @@ func (a *agent) createOrUpdateNetwork(manifestOK, networkOK *checkpoint) func(co
network.SetBlockEndpoints(manifest.DisableDirectConnections)

// Update the subagent client if the container API is available.
if cAPI := a.containerAPI.Load(); cAPI != nil {
if a.containerAPI != nil {
client := agentcontainers.NewSubAgentClientFromAPI(a.logger, aAPI)
cAPI.UpdateSubAgentClient(client)
a.containerAPI.UpdateSubAgentClient(client)
}
}
return nil
Expand Down Expand Up @@ -1382,7 +1432,6 @@ func (a *agent) trackGoroutine(fn func()) error {

func (a *agent) createTailnet(
ctx context.Context,
aAPI proto.DRPCAgentClient26,
agentID uuid.UUID,
derpMap *tailcfg.DERPMap,
derpForceWebSockets, disableDirectConnections bool,
Expand Down Expand Up @@ -1515,10 +1564,7 @@ func (a *agent) createTailnet(
}()
if err = a.trackGoroutine(func() {
defer apiListener.Close()
apiHandler, closeAPIHAndler := a.apiHandler(aAPI)
defer func() {
_ = closeAPIHAndler()
}()
apiHandler := a.apiHandler()
server := &http.Server{
BaseContext: func(net.Listener) context.Context { return ctx },
Handler: apiHandler,
Expand All @@ -1532,7 +1578,6 @@ func (a *agent) createTailnet(
case <-ctx.Done():
case <-a.hardCtx.Done():
}
_ = closeAPIHAndler()
_ = server.Close()
}()

Expand Down Expand Up @@ -1871,6 +1916,12 @@ func (a *agent) Close() error {
a.logger.Error(a.hardCtx, "script runner close", slog.Error(err))
}

if a.containerAPI != nil {
if err := a.containerAPI.Close(); err != nil {
a.logger.Error(a.hardCtx, "container API close", slog.Error(err))
}
}

// Wait for the graceful shutdown to complete, but don't wait forever so
// that we don't break user expectations.
go func() {
Expand Down
76 changes: 61 additions & 15 deletions agent/agentcontainers/api.go
Original file line number Diff line number Diff line change
Expand Up @@ -207,6 +207,10 @@ func WithDevcontainers(devcontainers []codersdk.WorkspaceAgentDevcontainer, scri
api.devcontainerNames = make(map[string]bool, len(devcontainers))
api.devcontainerLogSourceIDs = make(map[string]uuid.UUID)
for _, dc := range devcontainers {
if dc.Status == "" {
dc.Status = codersdk.WorkspaceAgentDevcontainerStatusStarting
}

api.knownDevcontainers[dc.WorkspaceFolder] = dc
api.devcontainerNames[dc.Name] = true
for _, script := range scripts {
Expand Down Expand Up @@ -265,8 +269,6 @@ func NewAPI(logger slog.Logger, options ...Option) *API {
api := &API{
ctx: ctx,
cancel: cancel,
watcherDone: make(chan struct{}),
updaterDone: make(chan struct{}),
initialUpdateDone: make(chan struct{}),
updateTrigger: make(chan chan error),
updateInterval: defaultUpdateInterval,
Expand Down Expand Up @@ -315,10 +317,28 @@ func NewAPI(logger slog.Logger, options ...Option) *API {
api.subAgentClient.Store(&c)
}

return api
}

// Init applies a final set of options to the API and then
// begins the watcherLoop and updaterLoop. This function
// must only be called once.
func (api *API) Init(opts ...Option) {
api.mu.Lock()
defer api.mu.Unlock()
if api.closed {
return
}

for _, opt := range opts {
opt(api)
}

api.watcherDone = make(chan struct{})
api.updaterDone = make(chan struct{})

go api.watcherLoop()
go api.updaterLoop()

return api
}

func (api *API) watcherLoop() {
Expand Down Expand Up @@ -909,8 +929,9 @@ func (api *API) handleDevcontainerRecreate(w http.ResponseWriter, r *http.Reques
dc.Status = codersdk.WorkspaceAgentDevcontainerStatusStarting
dc.Container = nil
api.knownDevcontainers[dc.WorkspaceFolder] = dc
api.asyncWg.Add(1)
go api.recreateDevcontainer(dc, configPath)
go func() {
_ = api.CreateDevcontainer(dc.WorkspaceFolder, configPath, WithRemoveExistingContainer())
}()

api.mu.Unlock()

Expand All @@ -920,15 +941,29 @@ func (api *API) handleDevcontainerRecreate(w http.ResponseWriter, r *http.Reques
})
}

// recreateDevcontainer should run in its own goroutine and is responsible for
// createDevcontainer should run in its own goroutine and is responsible for
// recreating a devcontainer based on the provided devcontainer configuration.
// It updates the devcontainer status and logs the process. The configPath is
// passed as a parameter for the odd chance that the container being recreated
// has a different config file than the one stored in the devcontainer state.
// The devcontainer state must be set to starting and the asyncWg must be
// incremented before calling this function.
func (api *API) recreateDevcontainer(dc codersdk.WorkspaceAgentDevcontainer, configPath string) {
func (api *API) CreateDevcontainer(workspaceFolder, configPath string, opts ...DevcontainerCLIUpOptions) error {
api.mu.Lock()
if api.closed {
api.mu.Unlock()
return nil
}

dc, found := api.knownDevcontainers[workspaceFolder]
if !found {
api.mu.Unlock()
return xerrors.Errorf("devcontainer not found")
}

api.asyncWg.Add(1)
defer api.asyncWg.Done()
api.mu.Unlock()

var (
err error
Expand Down Expand Up @@ -969,12 +1004,15 @@ func (api *API) recreateDevcontainer(dc codersdk.WorkspaceAgentDevcontainer, con

logger.Debug(ctx, "starting devcontainer recreation")

_, err = api.dccli.Up(ctx, dc.WorkspaceFolder, configPath, WithUpOutput(infoW, errW), WithRemoveExistingContainer())
upOptions := []DevcontainerCLIUpOptions{WithUpOutput(infoW, errW)}
upOptions = append(upOptions, opts...)

_, err = api.dccli.Up(ctx, dc.WorkspaceFolder, configPath, upOptions...)
if err != nil {
// No need to log if the API is closing (context canceled), as this
// is expected behavior when the API is shutting down.
if !errors.Is(err, context.Canceled) {
logger.Error(ctx, "devcontainer recreation failed", slog.Error(err))
logger.Error(ctx, "devcontainer creation failed", slog.Error(err))
}

api.mu.Lock()
Expand All @@ -983,10 +1021,11 @@ func (api *API) recreateDevcontainer(dc codersdk.WorkspaceAgentDevcontainer, con
api.knownDevcontainers[dc.WorkspaceFolder] = dc
api.recreateErrorTimes[dc.WorkspaceFolder] = api.clock.Now("agentcontainers", "recreate", "errorTimes")
api.mu.Unlock()
return

return xerrors.Errorf("start devcontainer: %w", err)
}

logger.Info(ctx, "devcontainer recreated successfully")
logger.Info(ctx, "devcontainer created successfully")

api.mu.Lock()
dc = api.knownDevcontainers[dc.WorkspaceFolder]
Expand All @@ -1009,8 +1048,11 @@ func (api *API) recreateDevcontainer(dc codersdk.WorkspaceAgentDevcontainer, con
// Ensure an immediate refresh to accurately reflect the
// devcontainer state after recreation.
if err := api.RefreshContainers(ctx); err != nil {
logger.Error(ctx, "failed to trigger immediate refresh after devcontainer recreation", slog.Error(err))
logger.Error(ctx, "failed to trigger immediate refresh after devcontainer creation", slog.Error(err))
return xerrors.Errorf("refresh containers: %w", err)
}

return nil
}

// markDevcontainerDirty finds the devcontainer with the given config file path
Expand Down Expand Up @@ -1609,8 +1651,12 @@ func (api *API) Close() error {
err := api.watcher.Close()

// Wait for loops to finish.
<-api.watcherDone
<-api.updaterDone
if api.watcherDone != nil {
<-api.watcherDone
}
if api.updaterDone != nil {
<-api.updaterDone
}

// Wait for all async tasks to complete.
api.asyncWg.Wait()
Expand Down
Loading
Loading