From fca99174ad93deb2c1566dfa2df24b1ae759297b Mon Sep 17 00:00:00 2001 From: Mathias Fredriksson Date: Tue, 10 Jun 2025 12:37:54 +0300 Subject: [PATCH 1/2] feat(agent/agentcontainers): implement sub agent injection (#18245) This change adds support for sub agent creation and injection into dev containers. Updates coder/internal#621 --- agent/agent.go | 6 +- agent/agent_test.go | 187 +++++- agent/agentcontainers/api.go | 539 +++++++++++++++--- agent/agentcontainers/api_test.go | 286 +++++++++- agent/agentcontainers/devcontainer.go | 2 + agent/agentcontainers/devcontainercli.go | 10 +- agent/agentcontainers/devcontainercli_test.go | 8 +- agent/agentcontainers/subagent.go | 128 +++++ agent/agenttest/client.go | 112 +++- agent/api.go | 4 +- cli/agent.go | 4 + cli/exp_rpty_test.go | 4 + cli/open_test.go | 18 +- cli/ssh_test.go | 8 +- coderd/workspaceagents_test.go | 48 +- 15 files changed, 1218 insertions(+), 146 deletions(-) create mode 100644 agent/agentcontainers/subagent.go diff --git a/agent/agent.go b/agent/agent.go index 74cf305c9434a..17298e7aa5772 100644 --- a/agent/agent.go +++ b/agent/agent.go @@ -1188,7 +1188,7 @@ func (a *agent) handleManifest(manifestOK *checkpoint) func(ctx context.Context, // createOrUpdateNetwork waits for the manifest to be set using manifestOK, then creates or updates // the tailnet using the information in the manifest func (a *agent) createOrUpdateNetwork(manifestOK, networkOK *checkpoint) func(context.Context, proto.DRPCAgentClient26) error { - return func(ctx context.Context, _ proto.DRPCAgentClient26) (retErr error) { + return func(ctx context.Context, aAPI proto.DRPCAgentClient26) (retErr error) { if err := manifestOK.wait(ctx); err != nil { return xerrors.Errorf("no manifest: %w", err) } @@ -1208,6 +1208,7 @@ func (a *agent) createOrUpdateNetwork(manifestOK, networkOK *checkpoint) func(co // agent API. network, err = a.createTailnet( a.gracefulCtx, + aAPI, manifest.AgentID, manifest.DERPMap, manifest.DERPForceWebSockets, @@ -1355,6 +1356,7 @@ func (a *agent) trackGoroutine(fn func()) error { func (a *agent) createTailnet( ctx context.Context, + aAPI proto.DRPCAgentClient26, agentID uuid.UUID, derpMap *tailcfg.DERPMap, derpForceWebSockets, disableDirectConnections bool, @@ -1487,7 +1489,7 @@ func (a *agent) createTailnet( }() if err = a.trackGoroutine(func() { defer apiListener.Close() - apiHandler, closeAPIHAndler := a.apiHandler() + apiHandler, closeAPIHAndler := a.apiHandler(aAPI) defer func() { _ = closeAPIHAndler() }() diff --git a/agent/agent_test.go b/agent/agent_test.go index 3a2562237b603..3ef9e4f4c75ba 100644 --- a/agent/agent_test.go +++ b/agent/agent_test.go @@ -48,6 +48,7 @@ import ( "cdr.dev/slog/sloggers/slogtest" "github.com/coder/coder/v2/agent" + "github.com/coder/coder/v2/agent/agentcontainers" "github.com/coder/coder/v2/agent/agentssh" "github.com/coder/coder/v2/agent/agenttest" "github.com/coder/coder/v2/agent/proto" @@ -60,9 +61,16 @@ import ( "github.com/coder/coder/v2/tailnet" "github.com/coder/coder/v2/tailnet/tailnettest" "github.com/coder/coder/v2/testutil" + "github.com/coder/quartz" ) func TestMain(m *testing.M) { + if os.Getenv("CODER_TEST_RUN_SUB_AGENT_MAIN") == "1" { + // If we're running as a subagent, we don't want to run the main tests. + // Instead, we just run the subagent tests. + exit := runSubAgentMain() + os.Exit(exit) + } goleak.VerifyTestMain(m, testutil.GoleakOptions...) } @@ -1930,6 +1938,9 @@ func TestAgent_ReconnectingPTYContainer(t *testing.T) { if os.Getenv("CODER_TEST_USE_DOCKER") != "1" { t.Skip("Set CODER_TEST_USE_DOCKER=1 to run this test") } + if _, err := exec.LookPath("devcontainer"); err != nil { + t.Skip("This test requires the devcontainer CLI: npm install -g @devcontainers/cli") + } pool, err := dockertest.NewPool("") require.NoError(t, err, "Could not connect to docker") @@ -1955,6 +1966,9 @@ func TestAgent_ReconnectingPTYContainer(t *testing.T) { // nolint: dogsled conn, _, _, _, _ := setupAgent(t, agentsdk.Manifest{}, 0, func(_ *agenttest.Client, o *agent.Options) { o.ExperimentalDevcontainersEnabled = true + o.ContainerAPIOptions = append(o.ContainerAPIOptions, + agentcontainers.WithContainerLabelIncludeFilter("this.label.does.not.exist.ignore.devcontainers", "true"), + ) }) ctx := testutil.Context(t, testutil.WaitLong) ac, err := conn.ReconnectingPTY(ctx, uuid.New(), 80, 80, "/bin/sh", func(arp *workspacesdk.AgentReconnectingPTYInit) { @@ -1986,6 +2000,60 @@ func TestAgent_ReconnectingPTYContainer(t *testing.T) { require.ErrorIs(t, tr.ReadUntil(ctx, nil), io.EOF) } +type subAgentRequestPayload struct { + Token string `json:"token"` + Directory string `json:"directory"` +} + +// runSubAgentMain is the main function for the sub-agent that connects +// to the control plane. It reads the CODER_AGENT_URL and +// CODER_AGENT_TOKEN environment variables, sends the token, and exits +// with a status code based on the response. +func runSubAgentMain() int { + url := os.Getenv("CODER_AGENT_URL") + token := os.Getenv("CODER_AGENT_TOKEN") + if url == "" || token == "" { + _, _ = fmt.Fprintln(os.Stderr, "CODER_AGENT_URL and CODER_AGENT_TOKEN must be set") + return 10 + } + + dir, err := os.Getwd() + if err != nil { + _, _ = fmt.Fprintf(os.Stderr, "failed to get current working directory: %v\n", err) + return 1 + } + payload := subAgentRequestPayload{ + Token: token, + Directory: dir, + } + b, err := json.Marshal(payload) + if err != nil { + _, _ = fmt.Fprintf(os.Stderr, "failed to marshal payload: %v\n", err) + return 1 + } + + req, err := http.NewRequest("POST", url, bytes.NewReader(b)) + if err != nil { + _, _ = fmt.Fprintf(os.Stderr, "failed to create request: %v\n", err) + return 1 + } + ctx, cancel := context.WithTimeout(context.Background(), testutil.WaitLong) + defer cancel() + req = req.WithContext(ctx) + resp, err := http.DefaultClient.Do(req) + if err != nil { + _, _ = fmt.Fprintf(os.Stderr, "agent connection failed: %v\n", err) + return 11 + } + defer resp.Body.Close() + if resp.StatusCode != http.StatusOK { + _, _ = fmt.Fprintf(os.Stderr, "agent exiting with non-zero exit code %d\n", resp.StatusCode) + return 12 + } + _, _ = fmt.Println("sub-agent connected successfully") + return 0 +} + // This tests end-to-end functionality of auto-starting a devcontainer. // It runs "devcontainer up" which creates a real Docker container. As // such, it does not run by default in CI. @@ -1999,6 +2067,56 @@ func TestAgent_DevcontainerAutostart(t *testing.T) { if os.Getenv("CODER_TEST_USE_DOCKER") != "1" { t.Skip("Set CODER_TEST_USE_DOCKER=1 to run this test") } + if _, err := exec.LookPath("devcontainer"); err != nil { + t.Skip("This test requires the devcontainer CLI: npm install -g @devcontainers/cli") + } + + // This HTTP handler handles requests from runSubAgentMain which + // acts as a fake sub-agent. We want to verify that the sub-agent + // connects and sends its token. We use a channel to signal + // that the sub-agent has connected successfully and then we wait + // until we receive another signal to return from the handler. This + // keeps the agent "alive" for as long as we want. + subAgentConnected := make(chan subAgentRequestPayload, 1) + subAgentReady := make(chan struct{}, 1) + srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + t.Logf("Sub-agent request received: %s %s", r.Method, r.URL.Path) + + if r.Method != http.MethodPost { + http.Error(w, "Method not allowed", http.StatusMethodNotAllowed) + return + } + + // Read the token from the request body. + var payload subAgentRequestPayload + if err := json.NewDecoder(r.Body).Decode(&payload); err != nil { + http.Error(w, "Failed to read token", http.StatusBadRequest) + t.Logf("Failed to read token: %v", err) + return + } + defer r.Body.Close() + + t.Logf("Sub-agent request payload received: %+v", payload) + + // Signal that the sub-agent has connected successfully. + select { + case <-t.Context().Done(): + t.Logf("Test context done, not processing sub-agent request") + return + case subAgentConnected <- payload: + } + + // Wait for the signal to return from the handler. + select { + case <-t.Context().Done(): + t.Logf("Test context done, not waiting for sub-agent ready") + return + case <-subAgentReady: + } + + w.WriteHeader(http.StatusOK) + })) + defer srv.Close() pool, err := dockertest.NewPool("") require.NoError(t, err, "Could not connect to docker") @@ -2016,9 +2134,10 @@ func TestAgent_DevcontainerAutostart(t *testing.T) { require.NoError(t, err, "create devcontainer directory") devcontainerFile := filepath.Join(devcontainerPath, "devcontainer.json") err = os.WriteFile(devcontainerFile, []byte(`{ - "name": "mywork", - "image": "busybox:latest", - "cmd": ["sleep", "infinity"] + "name": "mywork", + "image": "ubuntu:latest", + "cmd": ["sleep", "infinity"], + "runArgs": ["--network=host"] }`), 0o600) require.NoError(t, err, "write devcontainer.json") @@ -2043,9 +2162,24 @@ func TestAgent_DevcontainerAutostart(t *testing.T) { }, }, } + mClock := quartz.NewMock(t) + mClock.Set(time.Now()) + tickerFuncTrap := mClock.Trap().TickerFunc("agentcontainers") + //nolint:dogsled - conn, _, _, _, _ := setupAgent(t, manifest, 0, func(_ *agenttest.Client, o *agent.Options) { + _, agentClient, _, _, _ := setupAgent(t, manifest, 0, func(_ *agenttest.Client, o *agent.Options) { o.ExperimentalDevcontainersEnabled = true + o.ContainerAPIOptions = append( + o.ContainerAPIOptions, + // Only match this specific dev container. + agentcontainers.WithClock(mClock), + agentcontainers.WithContainerLabelIncludeFilter("devcontainer.local_folder", tempWorkspaceFolder), + agentcontainers.WithSubAgentURL(srv.URL), + // The agent will copy "itself", but in the case of this test, the + // agent is actually this test binary. So we'll tell the test binary + // to execute the sub-agent main function via this env. + agentcontainers.WithSubAgentEnv("CODER_TEST_RUN_SUB_AGENT_MAIN=1"), + ) }) t.Logf("Waiting for container with label: devcontainer.local_folder=%s", tempWorkspaceFolder) @@ -2089,32 +2223,34 @@ func TestAgent_DevcontainerAutostart(t *testing.T) { ctx := testutil.Context(t, testutil.WaitLong) - ac, err := conn.ReconnectingPTY(ctx, uuid.New(), 80, 80, "", func(opts *workspacesdk.AgentReconnectingPTYInit) { - opts.Container = container.ID - }) - require.NoError(t, err, "failed to create ReconnectingPTY") - defer ac.Close() + // Ensure the container update routine runs. + tickerFuncTrap.MustWait(ctx).MustRelease(ctx) + tickerFuncTrap.Close() + _, next := mClock.AdvanceNext() + next.MustWait(ctx) - // Use terminal reader so we can see output in case somethin goes wrong. - tr := testutil.NewTerminalReader(t, ac) + // Verify that a subagent was created. + subAgents := agentClient.GetSubAgents() + require.Len(t, subAgents, 1, "expected one sub agent") - require.NoError(t, tr.ReadUntil(ctx, func(line string) bool { - return strings.Contains(line, "#") || strings.Contains(line, "$") - }), "find prompt") + subAgent := subAgents[0] + subAgentID, err := uuid.FromBytes(subAgent.GetId()) + require.NoError(t, err, "failed to parse sub-agent ID") + t.Logf("Connecting to sub-agent: %s (ID: %s)", subAgent.Name, subAgentID) - wantFileName := "file-from-devcontainer" - wantFile := filepath.Join(tempWorkspaceFolder, wantFileName) + gotDir, err := agentClient.GetSubAgentDirectory(subAgentID) + require.NoError(t, err, "failed to get sub-agent directory") + require.Equal(t, "/workspaces/mywork", gotDir, "sub-agent directory should match") - require.NoError(t, json.NewEncoder(ac).Encode(workspacesdk.ReconnectingPTYRequest{ - // NOTE(mafredri): We must use absolute path here for some reason. - Data: fmt.Sprintf("touch /workspaces/mywork/%s; exit\r", wantFileName), - }), "create file inside devcontainer") + subAgentToken, err := uuid.FromBytes(subAgent.GetAuthToken()) + require.NoError(t, err, "failed to parse sub-agent token") - // Wait for the connection to close to ensure the touch was executed. - require.ErrorIs(t, tr.ReadUntil(ctx, nil), io.EOF) + payload := testutil.RequireReceive(ctx, t, subAgentConnected) + require.Equal(t, subAgentToken.String(), payload.Token, "sub-agent token should match") + require.Equal(t, "/workspaces/mywork", payload.Directory, "sub-agent directory should match") - _, err = os.Stat(wantFile) - require.NoError(t, err, "file should exist outside devcontainer") + // Allow the subagent to exit. + close(subAgentReady) } // TestAgent_DevcontainerRecreate tests that RecreateDevcontainer @@ -2173,6 +2309,9 @@ func TestAgent_DevcontainerRecreate(t *testing.T) { //nolint:dogsled conn, client, _, _, _ := setupAgent(t, manifest, 0, func(_ *agenttest.Client, o *agent.Options) { o.ExperimentalDevcontainersEnabled = true + o.ContainerAPIOptions = append(o.ContainerAPIOptions, + agentcontainers.WithContainerLabelIncludeFilter("devcontainer.local_folder", workspaceFolder), + ) }) ctx := testutil.Context(t, testutil.WaitLong) diff --git a/agent/agentcontainers/api.go b/agent/agentcontainers/api.go index d826cb23cbc1f..301553c651048 100644 --- a/agent/agentcontainers/api.go +++ b/agent/agentcontainers/api.go @@ -1,11 +1,16 @@ package agentcontainers import ( + "bytes" "context" "errors" "fmt" + "io" "net/http" + "os" "path" + "path/filepath" + "runtime" "slices" "strings" "sync" @@ -26,27 +31,37 @@ import ( ) const ( - defaultUpdateInterval = 10 * time.Second - listContainersTimeout = 15 * time.Second + defaultUpdateInterval = 10 * time.Second + defaultOperationTimeout = 15 * time.Second + + // Destination path inside the container, we store it in a fixed location + // under /.coder-agent/coder to avoid conflicts and avoid being shadowed + // by tmpfs or other mounts. This assumes the container root filesystem is + // read-write, which seems sensible for dev containers. + coderPathInsideContainer = "/.coder-agent/coder" ) // API is responsible for container-related operations in the agent. // It provides methods to list and manage containers. type API struct { - ctx context.Context - cancel context.CancelFunc - watcherDone chan struct{} - updaterDone chan struct{} - initialUpdateDone chan struct{} // Closed after first update in updaterLoop. - updateTrigger chan chan error // Channel to trigger manual refresh. - updateInterval time.Duration // Interval for periodic container updates. - logger slog.Logger - watcher watcher.Watcher - execer agentexec.Execer - ccli ContainerCLI - dccli DevcontainerCLI - clock quartz.Clock - scriptLogger func(logSourceID uuid.UUID) ScriptLogger + ctx context.Context + cancel context.CancelFunc + watcherDone chan struct{} + updaterDone chan struct{} + initialUpdateDone chan struct{} // Closed after first update in updaterLoop. + updateTrigger chan chan error // Channel to trigger manual refresh. + updateInterval time.Duration // Interval for periodic container updates. + logger slog.Logger + watcher watcher.Watcher + execer agentexec.Execer + ccli ContainerCLI + containerLabelIncludeFilter map[string]string // Labels to filter containers by. + dccli DevcontainerCLI + clock quartz.Clock + scriptLogger func(logSourceID uuid.UUID) ScriptLogger + subAgentClient SubAgentClient + subAgentURL string + subAgentEnv []string mu sync.RWMutex closed bool @@ -57,11 +72,18 @@ type API struct { configFileModifiedTimes map[string]time.Time // By config file path. recreateSuccessTimes map[string]time.Time // By workspace folder. recreateErrorTimes map[string]time.Time // By workspace folder. - recreateWg sync.WaitGroup + injectedSubAgentProcs map[string]subAgentProcess // By container ID. + asyncWg sync.WaitGroup devcontainerLogSourceIDs map[string]uuid.UUID // By workspace folder. } +type subAgentProcess struct { + agent SubAgent + ctx context.Context + stop context.CancelFunc +} + // Option is a functional option for API. type Option func(*API) @@ -88,6 +110,16 @@ func WithContainerCLI(ccli ContainerCLI) Option { } } +// WithContainerLabelIncludeFilter sets a label filter for containers. +// This option can be given multiple times to filter by multiple labels. +// The behavior is such that only containers matching one or more of the +// provided labels will be included. +func WithContainerLabelIncludeFilter(label, value string) Option { + return func(api *API) { + api.containerLabelIncludeFilter[label] = value + } +} + // WithDevcontainerCLI sets the DevcontainerCLI implementation to use. // This can be used in tests to modify @devcontainer/cli behavior. func WithDevcontainerCLI(dccli DevcontainerCLI) Option { @@ -96,6 +128,29 @@ func WithDevcontainerCLI(dccli DevcontainerCLI) Option { } } +// WithSubAgentClient sets the SubAgentClient implementation to use. +// This is used to list, create and delete Dev Container agents. +func WithSubAgentClient(client SubAgentClient) Option { + return func(api *API) { + api.subAgentClient = client + } +} + +// WithSubAgentURL sets the agent URL for the sub-agent for +// communicating with the control plane. +func WithSubAgentURL(url string) Option { + return func(api *API) { + api.subAgentURL = url + } +} + +// WithSubAgent sets the environment variables for the sub-agent. +func WithSubAgentEnv(env ...string) Option { + return func(api *API) { + api.subAgentEnv = env + } +} + // WithDevcontainers sets the known devcontainers for the API. This // allows the API to be aware of devcontainers defined in the workspace // agent manifest. @@ -164,22 +219,25 @@ func WithScriptLogger(scriptLogger func(logSourceID uuid.UUID) ScriptLogger) Opt func NewAPI(logger slog.Logger, options ...Option) *API { ctx, cancel := context.WithCancel(context.Background()) api := &API{ - ctx: ctx, - cancel: cancel, - watcherDone: make(chan struct{}), - updaterDone: make(chan struct{}), - initialUpdateDone: make(chan struct{}), - updateTrigger: make(chan chan error), - updateInterval: defaultUpdateInterval, - logger: logger, - clock: quartz.NewReal(), - execer: agentexec.DefaultExecer, - devcontainerNames: make(map[string]bool), - knownDevcontainers: make(map[string]codersdk.WorkspaceAgentDevcontainer), - configFileModifiedTimes: make(map[string]time.Time), - recreateSuccessTimes: make(map[string]time.Time), - recreateErrorTimes: make(map[string]time.Time), - scriptLogger: func(uuid.UUID) ScriptLogger { return noopScriptLogger{} }, + ctx: ctx, + cancel: cancel, + watcherDone: make(chan struct{}), + updaterDone: make(chan struct{}), + initialUpdateDone: make(chan struct{}), + updateTrigger: make(chan chan error), + updateInterval: defaultUpdateInterval, + logger: logger, + clock: quartz.NewReal(), + execer: agentexec.DefaultExecer, + subAgentClient: noopSubAgentClient{}, + containerLabelIncludeFilter: make(map[string]string), + devcontainerNames: make(map[string]bool), + knownDevcontainers: make(map[string]codersdk.WorkspaceAgentDevcontainer), + configFileModifiedTimes: make(map[string]time.Time), + recreateSuccessTimes: make(map[string]time.Time), + recreateErrorTimes: make(map[string]time.Time), + scriptLogger: func(uuid.UUID) ScriptLogger { return noopScriptLogger{} }, + injectedSubAgentProcs: make(map[string]subAgentProcess), } // The ctx and logger must be set before applying options to avoid // nil pointer dereference. @@ -230,7 +288,7 @@ func (api *API) watcherLoop() { continue } - now := api.clock.Now("watcherLoop") + now := api.clock.Now("agentcontainers", "watcherLoop") switch { case event.Has(fsnotify.Create | fsnotify.Write): api.logger.Debug(api.ctx, "devcontainer config file changed", slog.F("file", event.Name)) @@ -254,6 +312,15 @@ func (api *API) updaterLoop() { defer api.logger.Debug(api.ctx, "updater loop stopped") api.logger.Debug(api.ctx, "updater loop started") + // Make sure we clean up any subagents not tracked by this process + // before starting the update loop and creating new ones. + api.logger.Debug(api.ctx, "cleaning up subagents") + if err := api.cleanupSubAgents(api.ctx); err != nil { + api.logger.Error(api.ctx, "cleanup subagents failed", slog.Error(err)) + } else { + api.logger.Debug(api.ctx, "cleanup subagents complete") + } + // Perform an initial update to populate the container list, this // gives us a guarantee that the API has loaded the initial state // before returning any responses. This is useful for both tests @@ -288,9 +355,9 @@ func (api *API) updaterLoop() { } return nil // Always nil to keep the ticker going. - }, "updaterLoop") + }, "agentcontainers", "updaterLoop") defer func() { - if err := ticker.Wait("updaterLoop"); err != nil && !errors.Is(err, context.Canceled) { + if err := ticker.Wait("agentcontainers", "updaterLoop"); err != nil && !errors.Is(err, context.Canceled) { api.logger.Error(api.ctx, "updater loop ticker failed", slog.Error(err)) } }() @@ -360,7 +427,7 @@ func (api *API) handleList(rw http.ResponseWriter, r *http.Request) { // updateContainers fetches the latest container list, processes it, and // updates the cache. It performs locking for updating shared API state. func (api *API) updateContainers(ctx context.Context) error { - listCtx, listCancel := context.WithTimeout(ctx, listContainersTimeout) + listCtx, listCancel := context.WithTimeout(ctx, defaultOperationTimeout) defer listCancel() updated, err := api.ccli.List(listCtx) @@ -395,6 +462,20 @@ func (api *API) updateContainers(ctx context.Context) error { // on the latest list of containers. This method assumes that api.mu is // held. func (api *API) processUpdatedContainersLocked(ctx context.Context, updated codersdk.WorkspaceAgentListContainersResponse) { + dcFields := func(dc codersdk.WorkspaceAgentDevcontainer) []slog.Field { + f := []slog.Field{ + slog.F("devcontainer_id", dc.ID), + slog.F("devcontainer_name", dc.Name), + slog.F("workspace_folder", dc.WorkspaceFolder), + slog.F("config_path", dc.ConfigPath), + } + if dc.Container != nil { + f = append(f, slog.F("container_id", dc.Container.ID)) + f = append(f, slog.F("container_name", dc.Container.FriendlyName)) + } + return f + } + // Reset the container links in known devcontainers to detect if // they still exist. for _, dc := range api.knownDevcontainers { @@ -415,6 +496,29 @@ func (api *API) processUpdatedContainersLocked(ctx context.Context, updated code continue } + logger := api.logger.With( + slog.F("container_id", updated.Containers[i].ID), + slog.F("container_name", updated.Containers[i].FriendlyName), + slog.F("workspace_folder", workspaceFolder), + slog.F("config_file", configFile), + ) + + if len(api.containerLabelIncludeFilter) > 0 { + var ok bool + for label, value := range api.containerLabelIncludeFilter { + if v, found := container.Labels[label]; found && v == value { + ok = true + } + } + // Verbose debug logging is fine here since typically filters + // are only used in development or testing environments. + if !ok { + logger.Debug(ctx, "container does not match include filter, ignoring dev container", slog.F("container_labels", container.Labels), slog.F("include_filter", api.containerLabelIncludeFilter)) + continue + } + logger.Debug(ctx, "container matches include filter, processing dev container", slog.F("container_labels", container.Labels), slog.F("include_filter", api.containerLabelIncludeFilter)) + } + if dc, ok := api.knownDevcontainers[workspaceFolder]; ok { // If no config path is set, this devcontainer was defined // in Terraform without the optional config file. Assume the @@ -423,7 +527,7 @@ func (api *API) processUpdatedContainersLocked(ctx context.Context, updated code if dc.ConfigPath == "" && configFile != "" { dc.ConfigPath = configFile if err := api.watcher.Add(configFile); err != nil { - api.logger.Error(ctx, "watch devcontainer config file failed", slog.Error(err), slog.F("file", configFile)) + logger.With(dcFields(dc)...).Error(ctx, "watch devcontainer config file failed", slog.Error(err)) } } @@ -432,53 +536,47 @@ func (api *API) processUpdatedContainersLocked(ctx context.Context, updated code continue } - // NOTE(mafredri): This name impl. may change to accommodate devcontainer agents RFC. - // If not in our known list, add as a runtime detected entry. - name := path.Base(workspaceFolder) - if api.devcontainerNames[name] { - // Try to find a unique name by appending a number. - for i := 2; ; i++ { - newName := fmt.Sprintf("%s-%d", name, i) - if !api.devcontainerNames[newName] { - name = newName - break - } - } - } - api.devcontainerNames[name] = true - if configFile != "" { - if err := api.watcher.Add(configFile); err != nil { - api.logger.Error(ctx, "watch devcontainer config file failed", slog.Error(err), slog.F("file", configFile)) - } - } - - api.knownDevcontainers[workspaceFolder] = codersdk.WorkspaceAgentDevcontainer{ + dc := codersdk.WorkspaceAgentDevcontainer{ ID: uuid.New(), - Name: name, + Name: "", // Updated later based on container state. WorkspaceFolder: workspaceFolder, ConfigPath: configFile, Status: "", // Updated later based on container state. Dirty: false, // Updated later based on config file changes. Container: container, } + + if configFile != "" { + if err := api.watcher.Add(configFile); err != nil { + logger.With(dcFields(dc)...).Error(ctx, "watch devcontainer config file failed", slog.Error(err)) + } + } + + api.knownDevcontainers[workspaceFolder] = dc } // Iterate through all known devcontainers and update their status // based on the current state of the containers. for _, dc := range api.knownDevcontainers { + logger := api.logger.With(dcFields(dc)...) + + if dc.Container != nil { + if !api.devcontainerNames[dc.Name] { + // If the devcontainer name wasn't set via terraform, we + // use the containers friendly name as a fallback which + // will keep changing as the dev container is recreated. + // TODO(mafredri): Parse the container label (i.e. devcontainer.json) for customization. + dc.Name = safeFriendlyName(dc.Container.FriendlyName) + } + dc.Container.DevcontainerStatus = dc.Status + dc.Container.DevcontainerDirty = dc.Dirty + } + switch { case dc.Status == codersdk.WorkspaceAgentDevcontainerStatusStarting: - if dc.Container != nil { - dc.Container.DevcontainerStatus = dc.Status - dc.Container.DevcontainerDirty = dc.Dirty - } continue // This state is handled by the recreation routine. case dc.Status == codersdk.WorkspaceAgentDevcontainerStatusError && (dc.Container == nil || dc.Container.CreatedAt.Before(api.recreateErrorTimes[dc.WorkspaceFolder])): - if dc.Container != nil { - dc.Container.DevcontainerStatus = dc.Status - dc.Container.DevcontainerDirty = dc.Dirty - } continue // The devcontainer needs to be recreated. case dc.Container != nil: @@ -494,7 +592,17 @@ func (api *API) processUpdatedContainersLocked(ctx context.Context, updated code } dc.Container.DevcontainerDirty = dc.Dirty + if _, injected := api.injectedSubAgentProcs[dc.Container.ID]; !injected && dc.Status == codersdk.WorkspaceAgentDevcontainerStatusRunning { + err := api.injectSubAgentIntoContainerLocked(ctx, dc) + if err != nil { + logger.Error(ctx, "inject subagent into container failed", slog.Error(err)) + } + } + case dc.Container == nil: + if !api.devcontainerNames[dc.Name] { + dc.Name = "" + } dc.Status = codersdk.WorkspaceAgentDevcontainerStatusStopped dc.Dirty = false } @@ -507,6 +615,18 @@ func (api *API) processUpdatedContainersLocked(ctx context.Context, updated code api.containersErr = nil } +// safeFriendlyName returns a API safe version of the container's +// friendly name. +// +// See provisioner/regexes.go for the regex used to validate +// the friendly name on the API side. +func safeFriendlyName(name string) string { + name = strings.ToLower(name) + name = strings.ReplaceAll(name, "_", "-") + + return name +} + // refreshContainers triggers an immediate update of the container list // and waits for it to complete. func (api *API) refreshContainers(ctx context.Context) (err error) { @@ -624,7 +744,7 @@ func (api *API) handleDevcontainerRecreate(w http.ResponseWriter, r *http.Reques dc.Container.DevcontainerStatus = dc.Status } api.knownDevcontainers[dc.WorkspaceFolder] = dc - api.recreateWg.Add(1) + api.asyncWg.Add(1) go api.recreateDevcontainer(dc, configPath) api.mu.Unlock() @@ -640,10 +760,10 @@ func (api *API) handleDevcontainerRecreate(w http.ResponseWriter, r *http.Reques // It updates the devcontainer status and logs the process. The configPath is // passed as a parameter for the odd chance that the container being recreated // has a different config file than the one stored in the devcontainer state. -// The devcontainer state must be set to starting and the recreateWg must be +// The devcontainer state must be set to starting and the asyncWg must be // incremented before calling this function. func (api *API) recreateDevcontainer(dc codersdk.WorkspaceAgentDevcontainer, configPath string) { - defer api.recreateWg.Done() + defer api.asyncWg.Done() var ( err error @@ -699,7 +819,7 @@ func (api *API) recreateDevcontainer(dc codersdk.WorkspaceAgentDevcontainer, con dc.Container.DevcontainerStatus = dc.Status } api.knownDevcontainers[dc.WorkspaceFolder] = dc - api.recreateErrorTimes[dc.WorkspaceFolder] = api.clock.Now("recreate", "errorTimes") + api.recreateErrorTimes[dc.WorkspaceFolder] = api.clock.Now("agentcontainers", "recreate", "errorTimes") api.mu.Unlock() return } @@ -721,7 +841,7 @@ func (api *API) recreateDevcontainer(dc codersdk.WorkspaceAgentDevcontainer, con dc.Container.DevcontainerStatus = dc.Status } dc.Dirty = false - api.recreateSuccessTimes[dc.WorkspaceFolder] = api.clock.Now("recreate", "successTimes") + api.recreateSuccessTimes[dc.WorkspaceFolder] = api.clock.Now("agentcontainers", "recreate", "successTimes") api.knownDevcontainers[dc.WorkspaceFolder] = dc api.mu.Unlock() @@ -803,6 +923,269 @@ func (api *API) markDevcontainerDirty(configPath string, modifiedAt time.Time) { } } +// cleanupSubAgents removes subagents that are no longer managed by +// this agent. This is usually only run at startup to ensure a clean +// slate. This method has an internal timeout to prevent blocking +// indefinitely if something goes wrong with the subagent deletion. +func (api *API) cleanupSubAgents(ctx context.Context) error { + agents, err := api.subAgentClient.List(ctx) + if err != nil { + return xerrors.Errorf("list agents: %w", err) + } + if len(agents) == 0 { + return nil + } + + api.mu.Lock() + defer api.mu.Unlock() + + injected := make(map[uuid.UUID]bool, len(api.injectedSubAgentProcs)) + for _, proc := range api.injectedSubAgentProcs { + injected[proc.agent.ID] = true + } + + ctx, cancel := context.WithTimeout(ctx, defaultOperationTimeout) + defer cancel() + + for _, agent := range agents { + if injected[agent.ID] { + continue + } + err := api.subAgentClient.Delete(ctx, agent.ID) + if err != nil { + api.logger.Error(ctx, "failed to delete agent", + slog.Error(err), + slog.F("agent_id", agent.ID), + slog.F("agent_name", agent.Name), + ) + } + } + + return nil +} + +// injectSubAgentIntoContainerLocked injects a subagent into a dev +// container and starts the subagent process. This method assumes that +// api.mu is held. +// +// This method uses an internal timeout to prevent blocking indefinitely +// if something goes wrong with the injection. +func (api *API) injectSubAgentIntoContainerLocked(ctx context.Context, dc codersdk.WorkspaceAgentDevcontainer) (err error) { + ctx, cancel := context.WithTimeout(ctx, defaultOperationTimeout) + defer cancel() + + container := dc.Container + if container == nil { + return xerrors.New("container is nil, cannot inject subagent") + } + + // Skip if subagent already exists for this container. + if _, injected := api.injectedSubAgentProcs[container.ID]; injected || api.closed { + return nil + } + + // Mark subagent as being injected immediately with a placeholder. + subAgent := subAgentProcess{ + ctx: context.Background(), + stop: func() {}, + } + api.injectedSubAgentProcs[container.ID] = subAgent + + // This is used to track the goroutine that will run the subagent + // process inside the container. It will be decremented when the + // subagent process completes or if an error occurs before we can + // start the subagent. + api.asyncWg.Add(1) + ranSubAgent := false + + // Clean up if injection fails. + defer func() { + if !ranSubAgent { + api.asyncWg.Done() + } + if err != nil { + // Mutex is held (defer re-lock). + delete(api.injectedSubAgentProcs, container.ID) + } + }() + + // Unlock the mutex to allow other operations while we + // inject the subagent into the container. + api.mu.Unlock() + defer api.mu.Lock() // Re-lock. + + logger := api.logger.With( + slog.F("devcontainer_id", dc.ID), + slog.F("devcontainer_name", dc.Name), + slog.F("workspace_folder", dc.WorkspaceFolder), + slog.F("config_path", dc.ConfigPath), + ) + + arch, err := api.ccli.DetectArchitecture(ctx, container.ID) + if err != nil { + return xerrors.Errorf("detect architecture: %w", err) + } + + logger.Info(ctx, "detected container architecture", slog.F("architecture", arch)) + + // For now, only support injecting if the architecture matches the host. + hostArch := runtime.GOARCH + + // TODO(mafredri): Add support for downloading agents for supported architectures. + if arch != hostArch { + logger.Warn(ctx, "skipping subagent injection for unsupported architecture", + slog.F("container_arch", arch), + slog.F("host_arch", hostArch)) + return nil + } + agentBinaryPath, err := os.Executable() + if err != nil { + return xerrors.Errorf("get agent binary path: %w", err) + } + agentBinaryPath, err = filepath.EvalSymlinks(agentBinaryPath) + if err != nil { + return xerrors.Errorf("resolve agent binary path: %w", err) + } + + // If we scripted this as a `/bin/sh` script, we could reduce these + // steps to one instruction, speeding up the injection process. + // + // Note: We use `path` instead of `filepath` here because we are + // working with Unix-style paths inside the container. + if _, err := api.ccli.ExecAs(ctx, container.ID, "root", "mkdir", "-p", path.Dir(coderPathInsideContainer)); err != nil { + return xerrors.Errorf("create agent directory in container: %w", err) + } + + if err := api.ccli.Copy(ctx, container.ID, agentBinaryPath, coderPathInsideContainer); err != nil { + return xerrors.Errorf("copy agent binary: %w", err) + } + + logger.Info(ctx, "copied agent binary to container") + + // Make sure the agent binary is executable so we can run it. + if _, err := api.ccli.ExecAs(ctx, container.ID, "root", "chmod", "0755", path.Dir(coderPathInsideContainer), coderPathInsideContainer); err != nil { + return xerrors.Errorf("set agent binary executable: %w", err) + } + // Set the owner of the agent binary to root:root (UID 0, GID 0). + if _, err := api.ccli.ExecAs(ctx, container.ID, "root", "chown", "0:0", path.Dir(coderPathInsideContainer), coderPathInsideContainer); err != nil { + return xerrors.Errorf("set agent binary owner: %w", err) + } + + // Attempt to add CAP_NET_ADMIN to the binary to improve network + // performance (optional, allow to fail). See `bootstrap_linux.sh`. + if _, err := api.ccli.ExecAs(ctx, container.ID, "root", "setcap", "cap_net_admin+ep", coderPathInsideContainer); err != nil { + logger.Warn(ctx, "set CAP_NET_ADMIN on agent binary failed", slog.Error(err)) + } + + // Detect workspace folder by executing `pwd` in the container. + // NOTE(mafredri): This is a quick and dirty way to detect the + // workspace folder inside the container. In the future we will + // rely more on `devcontainer read-configuration`. + var pwdBuf bytes.Buffer + err = api.dccli.Exec(ctx, dc.WorkspaceFolder, dc.ConfigPath, "pwd", []string{}, + WithExecOutput(&pwdBuf, io.Discard), + WithExecContainerID(container.ID), + ) + if err != nil { + return xerrors.Errorf("check workspace folder in container: %w", err) + } + directory := strings.TrimSpace(pwdBuf.String()) + if directory == "" { + logger.Warn(ctx, "detected workspace folder is empty, using default workspace folder", + slog.F("default_workspace_folder", DevcontainerDefaultContainerWorkspaceFolder)) + directory = DevcontainerDefaultContainerWorkspaceFolder + } + + // The preparation of the subagent is done, now we can create the + // subagent record in the database to receive the auth token. + createdAgent, err := api.subAgentClient.Create(ctx, SubAgent{ + Name: dc.Name, + Directory: directory, + OperatingSystem: "linux", // Assuming Linux for dev containers. + Architecture: arch, + }) + if err != nil { + return xerrors.Errorf("create agent: %w", err) + } + + logger.Info(ctx, "created subagent record", slog.F("agent_id", createdAgent.ID)) + + // Start the subagent in the container in a new goroutine to avoid + // blocking. Note that we pass the api.ctx to the subagent process + // so that it isn't affected by the timeout. + go api.runSubAgentInContainer(api.ctx, dc, createdAgent, coderPathInsideContainer) + ranSubAgent = true + + return nil +} + +// runSubAgentInContainer runs the subagent process inside a dev +// container. The api.asyncWg must be incremented before calling this +// function, and it will be decremented when the subagent process +// completes or if an error occurs. +func (api *API) runSubAgentInContainer(ctx context.Context, dc codersdk.WorkspaceAgentDevcontainer, agent SubAgent, agentPath string) { + container := dc.Container // Must not be nil. + logger := api.logger.With( + slog.F("container_name", container.FriendlyName), + slog.F("agent_id", agent.ID), + ) + + agentCtx, agentStop := context.WithCancel(ctx) + defer func() { + agentStop() + + // Best effort cleanup of the agent record after the process + // completes. Note that we use the background context here + // because the api.ctx will be canceled when the API is closed. + // This may delay shutdown of the agent by the given timeout. + deleteCtx, cancel := context.WithTimeout(context.Background(), defaultOperationTimeout) + defer cancel() + err := api.subAgentClient.Delete(deleteCtx, agent.ID) + if err != nil { + logger.Error(deleteCtx, "failed to delete agent record after process completion", slog.Error(err)) + } + + api.mu.Lock() + delete(api.injectedSubAgentProcs, container.ID) + api.mu.Unlock() + + logger.Debug(ctx, "agent process cleanup complete") + api.asyncWg.Done() + }() + + api.mu.Lock() + if api.closed { + api.mu.Unlock() + // If the API is closed, we should not run the agent. + logger.Debug(ctx, "the API is closed, not running subagent in container") + return + } + // Update the placeholder with a valid subagent, context and stop. + api.injectedSubAgentProcs[container.ID] = subAgentProcess{ + agent: agent, + ctx: agentCtx, + stop: agentStop, + } + api.mu.Unlock() + + logger.Info(ctx, "starting subagent in dev container") + + env := []string{ + "CODER_AGENT_URL=" + api.subAgentURL, + "CODER_AGENT_TOKEN=" + agent.AuthToken.String(), + } + env = append(env, api.subAgentEnv...) + err := api.dccli.Exec(agentCtx, dc.WorkspaceFolder, dc.ConfigPath, agentPath, []string{"agent"}, + WithExecContainerID(container.ID), + WithRemoteEnv(env...), + ) + if err != nil && !errors.Is(err, context.Canceled) { + logger.Error(ctx, "subagent process failed", slog.Error(err)) + } else { + logger.Info(ctx, "subagent process finished") + } +} + func (api *API) Close() error { api.mu.Lock() if api.closed { @@ -811,6 +1194,12 @@ func (api *API) Close() error { } api.logger.Debug(api.ctx, "closing API") api.closed = true + + for _, proc := range api.injectedSubAgentProcs { + api.logger.Debug(api.ctx, "canceling subagent process", slog.F("agent_name", proc.agent.Name), slog.F("agent_id", proc.agent.ID)) + proc.stop() + } + api.cancel() // Interrupt all routines. api.mu.Unlock() // Release lock before waiting for goroutines. @@ -821,8 +1210,8 @@ func (api *API) Close() error { <-api.watcherDone <-api.updaterDone - // Wait for all devcontainer recreation tasks to complete. - api.recreateWg.Wait() + // Wait for all async tasks to complete. + api.asyncWg.Wait() api.logger.Debug(api.ctx, "closed API") return err diff --git a/agent/agentcontainers/api_test.go b/agent/agentcontainers/api_test.go index 313da6f9f615f..59b0461c7948a 100644 --- a/agent/agentcontainers/api_test.go +++ b/agent/agentcontainers/api_test.go @@ -6,6 +6,8 @@ import ( "math/rand" "net/http" "net/http/httptest" + "os" + "runtime" "strings" "testing" "time" @@ -62,7 +64,7 @@ type fakeDevcontainerCLI struct { upErr error upErrC chan error // If set, send to return err, close to return upErr. execErr error - execErrC chan error // If set, send to return err, close to return execErr. + execErrC chan func(cmd string, args ...string) error // If set, send fn to return err, nil or close to return execErr. } func (f *fakeDevcontainerCLI) Up(ctx context.Context, _, _ string, _ ...agentcontainers.DevcontainerCLIUpOptions) (string, error) { @@ -79,14 +81,14 @@ func (f *fakeDevcontainerCLI) Up(ctx context.Context, _, _ string, _ ...agentcon return f.upID, f.upErr } -func (f *fakeDevcontainerCLI) Exec(ctx context.Context, _, _ string, _ string, _ []string, _ ...agentcontainers.DevcontainerCLIExecOptions) error { +func (f *fakeDevcontainerCLI) Exec(ctx context.Context, _, _ string, cmd string, args []string, _ ...agentcontainers.DevcontainerCLIExecOptions) error { if f.execErrC != nil { select { case <-ctx.Done(): return ctx.Err() - case err, ok := <-f.execErrC: - if ok { - return err + case fn, ok := <-f.execErrC: + if ok && fn != nil { + return fn(cmd, args...) } } } @@ -190,6 +192,80 @@ func (w *fakeWatcher) sendEventWaitNextCalled(ctx context.Context, event fsnotif w.waitNext(ctx) } +// fakeSubAgentClient implements SubAgentClient for testing purposes. +type fakeSubAgentClient struct { + agents map[uuid.UUID]agentcontainers.SubAgent + nextID int + + listErrC chan error // If set, send to return error, close to return nil. + created []agentcontainers.SubAgent + createErrC chan error // If set, send to return error, close to return nil. + deleted []uuid.UUID + deleteErrC chan error // If set, send to return error, close to return nil. +} + +func (m *fakeSubAgentClient) List(ctx context.Context) ([]agentcontainers.SubAgent, error) { + var listErr error + if m.listErrC != nil { + select { + case <-ctx.Done(): + return nil, ctx.Err() + case err, ok := <-m.listErrC: + if ok { + listErr = err + } + } + } + var agents []agentcontainers.SubAgent + for _, agent := range m.agents { + agents = append(agents, agent) + } + return agents, listErr +} + +func (m *fakeSubAgentClient) Create(ctx context.Context, agent agentcontainers.SubAgent) (agentcontainers.SubAgent, error) { + var createErr error + if m.createErrC != nil { + select { + case <-ctx.Done(): + return agentcontainers.SubAgent{}, ctx.Err() + case err, ok := <-m.createErrC: + if ok { + createErr = err + } + } + } + m.nextID++ + agent.ID = uuid.New() + agent.AuthToken = uuid.New() + if m.agents == nil { + m.agents = make(map[uuid.UUID]agentcontainers.SubAgent) + } + m.agents[agent.ID] = agent + m.created = append(m.created, agent) + return agent, createErr +} + +func (m *fakeSubAgentClient) Delete(ctx context.Context, id uuid.UUID) error { + var deleteErr error + if m.deleteErrC != nil { + select { + case <-ctx.Done(): + return ctx.Err() + case err, ok := <-m.deleteErrC: + if ok { + deleteErr = err + } + } + } + if m.agents == nil { + m.agents = make(map[uuid.UUID]agentcontainers.SubAgent) + } + delete(m.agents, id) + m.deleted = append(m.deleted, id) + return deleteErr +} + func TestAPI(t *testing.T) { t.Parallel() @@ -286,6 +362,7 @@ func TestAPI(t *testing.T) { api := agentcontainers.NewAPI(logger, agentcontainers.WithClock(mClock), agentcontainers.WithContainerCLI(mLister), + agentcontainers.WithContainerLabelIncludeFilter("this.label.does.not.exist.ignore.devcontainers", "true"), ) defer api.Close() r.Mount("/", api.Routes()) @@ -347,7 +424,7 @@ func TestAPI(t *testing.T) { FriendlyName: "container-name", Running: true, Labels: map[string]string{ - agentcontainers.DevcontainerLocalFolderLabel: "/workspace", + agentcontainers.DevcontainerLocalFolderLabel: "/workspaces", agentcontainers.DevcontainerConfigFileLabel: "/workspace/.devcontainer/devcontainer.json", }, } @@ -415,6 +492,7 @@ func TestAPI(t *testing.T) { containers: codersdk.WorkspaceAgentListContainersResponse{ Containers: []codersdk.WorkspaceAgentContainer{validContainer}, }, + arch: "", // Unsupported architecture, don't inject subagent. }, devcontainerCLI: &fakeDevcontainerCLI{ upErr: xerrors.New("devcontainer CLI error"), @@ -429,6 +507,7 @@ func TestAPI(t *testing.T) { containers: codersdk.WorkspaceAgentListContainersResponse{ Containers: []codersdk.WorkspaceAgentContainer{validContainer}, }, + arch: "", // Unsupported architecture, don't inject subagent. }, devcontainerCLI: &fakeDevcontainerCLI{}, wantStatus: []int{http.StatusAccepted, http.StatusConflict}, @@ -1151,6 +1230,201 @@ func TestAPI(t *testing.T) { assert.False(t, response.Devcontainers[0].Container.DevcontainerDirty, "dirty flag should be cleared on the container after container recreation") }) + + t.Run("SubAgentLifecycle", func(t *testing.T) { + t.Parallel() + + if runtime.GOOS == "windows" { + t.Skip("Dev Container tests are not supported on Windows (this test uses mocks but fails due to Windows paths)") + } + + var ( + ctx = testutil.Context(t, testutil.WaitMedium) + errTestTermination = xerrors.New("test termination") + logger = slogtest.Make(t, &slogtest.Options{IgnoredErrorIs: []error{errTestTermination}}).Leveled(slog.LevelDebug) + mClock = quartz.NewMock(t) + mCCLI = acmock.NewMockContainerCLI(gomock.NewController(t)) + fakeSAC = &fakeSubAgentClient{ + createErrC: make(chan error, 1), + deleteErrC: make(chan error, 1), + } + fakeDCCLI = &fakeDevcontainerCLI{ + execErrC: make(chan func(cmd string, args ...string) error, 1), + } + + testContainer = codersdk.WorkspaceAgentContainer{ + ID: "test-container-id", + FriendlyName: "test-container", + Image: "test-image", + Running: true, + CreatedAt: time.Now(), + Labels: map[string]string{ + agentcontainers.DevcontainerLocalFolderLabel: "/workspaces", + agentcontainers.DevcontainerConfigFileLabel: "/workspace/.devcontainer/devcontainer.json", + }, + } + ) + + coderBin, err := os.Executable() + require.NoError(t, err) + + mCCLI.EXPECT().List(gomock.Any()).Return(codersdk.WorkspaceAgentListContainersResponse{ + Containers: []codersdk.WorkspaceAgentContainer{testContainer}, + }, nil).AnyTimes() + gomock.InOrder( + mCCLI.EXPECT().DetectArchitecture(gomock.Any(), "test-container-id").Return(runtime.GOARCH, nil), + mCCLI.EXPECT().ExecAs(gomock.Any(), "test-container-id", "root", "mkdir", "-p", "/.coder-agent").Return(nil, nil), + mCCLI.EXPECT().Copy(gomock.Any(), "test-container-id", coderBin, "/.coder-agent/coder").Return(nil), + mCCLI.EXPECT().ExecAs(gomock.Any(), "test-container-id", "root", "chmod", "0755", "/.coder-agent", "/.coder-agent/coder").Return(nil, nil), + mCCLI.EXPECT().ExecAs(gomock.Any(), "test-container-id", "root", "chown", "0:0", "/.coder-agent", "/.coder-agent/coder").Return(nil, nil), + mCCLI.EXPECT().ExecAs(gomock.Any(), "test-container-id", "root", "setcap", "cap_net_admin+ep", "/.coder-agent/coder").Return(nil, nil), + ) + + mClock.Set(time.Now()).MustWait(ctx) + tickerTrap := mClock.Trap().TickerFunc("updaterLoop") + + api := agentcontainers.NewAPI(logger, + agentcontainers.WithClock(mClock), + agentcontainers.WithContainerCLI(mCCLI), + agentcontainers.WithWatcher(watcher.NewNoop()), + agentcontainers.WithSubAgentClient(fakeSAC), + agentcontainers.WithSubAgentURL("test-subagent-url"), + agentcontainers.WithDevcontainerCLI(fakeDCCLI), + ) + defer api.Close() + + // Close before api.Close() defer to avoid deadlock after test. + defer close(fakeSAC.createErrC) + defer close(fakeSAC.deleteErrC) + defer close(fakeDCCLI.execErrC) + + // Allow initial agent creation and injection to succeed. + testutil.RequireSend(ctx, t, fakeSAC.createErrC, nil) + testutil.RequireSend(ctx, t, fakeDCCLI.execErrC, func(cmd string, args ...string) error { + assert.Equal(t, "pwd", cmd) + assert.Empty(t, args) + return nil + }) // Exec pwd. + + // Make sure the ticker function has been registered + // before advancing the clock. + tickerTrap.MustWait(ctx).MustRelease(ctx) + tickerTrap.Close() + + // Ensure we only inject the agent once. + for i := range 3 { + _, aw := mClock.AdvanceNext() + aw.MustWait(ctx) + + t.Logf("Iteration %d: agents created: %d", i+1, len(fakeSAC.created)) + + // Verify agent was created. + require.Len(t, fakeSAC.created, 1) + assert.Equal(t, "test-container", fakeSAC.created[0].Name) + assert.Equal(t, "/workspaces", fakeSAC.created[0].Directory) + assert.Len(t, fakeSAC.deleted, 0) + } + + t.Log("Agent injected successfully, now testing cleanup and reinjection...") + + // Expect the agent to be reinjected. + gomock.InOrder( + mCCLI.EXPECT().DetectArchitecture(gomock.Any(), "test-container-id").Return(runtime.GOARCH, nil), + mCCLI.EXPECT().ExecAs(gomock.Any(), "test-container-id", "root", "mkdir", "-p", "/.coder-agent").Return(nil, nil), + mCCLI.EXPECT().Copy(gomock.Any(), "test-container-id", coderBin, "/.coder-agent/coder").Return(nil), + mCCLI.EXPECT().ExecAs(gomock.Any(), "test-container-id", "root", "chmod", "0755", "/.coder-agent", "/.coder-agent/coder").Return(nil, nil), + mCCLI.EXPECT().ExecAs(gomock.Any(), "test-container-id", "root", "chown", "0:0", "/.coder-agent", "/.coder-agent/coder").Return(nil, nil), + mCCLI.EXPECT().ExecAs(gomock.Any(), "test-container-id", "root", "setcap", "cap_net_admin+ep", "/.coder-agent/coder").Return(nil, nil), + ) + + // Terminate the agent and verify it is deleted. + testutil.RequireSend(ctx, t, fakeDCCLI.execErrC, func(_ string, args ...string) error { + if len(args) > 0 { + assert.Equal(t, "agent", args[0]) + } else { + assert.Fail(t, `want "agent" command argument`) + } + return errTestTermination + }) + + // Allow cleanup to proceed. + testutil.RequireSend(ctx, t, fakeSAC.deleteErrC, nil) + + t.Log("Waiting for agent recreation...") + + // Allow agent recreation and reinjection to succeed. + testutil.RequireSend(ctx, t, fakeSAC.createErrC, nil) + testutil.RequireSend(ctx, t, fakeDCCLI.execErrC, func(cmd string, args ...string) error { + assert.Equal(t, "pwd", cmd) + assert.Empty(t, args) + return nil + }) // Exec pwd. + + // Wait until the agent recreation is started. + for len(fakeSAC.createErrC) > 0 { + _, aw := mClock.AdvanceNext() + aw.MustWait(ctx) + } + + t.Log("Agent recreated successfully.") + + // Verify agent was deleted. + require.Len(t, fakeSAC.deleted, 1) + assert.Equal(t, fakeSAC.created[0].ID, fakeSAC.deleted[0]) + + // Verify the agent recreated. + require.Len(t, fakeSAC.created, 2) + }) + + t.Run("SubAgentCleanup", func(t *testing.T) { + t.Parallel() + + var ( + existingAgentID = uuid.New() + existingAgentToken = uuid.New() + existingAgent = agentcontainers.SubAgent{ + ID: existingAgentID, + Name: "stopped-container", + Directory: "/tmp", + AuthToken: existingAgentToken, + } + + ctx = testutil.Context(t, testutil.WaitMedium) + logger = slog.Make() + mClock = quartz.NewMock(t) + mCCLI = acmock.NewMockContainerCLI(gomock.NewController(t)) + fakeSAC = &fakeSubAgentClient{ + agents: map[uuid.UUID]agentcontainers.SubAgent{ + existingAgentID: existingAgent, + }, + } + ) + + mCCLI.EXPECT().List(gomock.Any()).Return(codersdk.WorkspaceAgentListContainersResponse{ + Containers: []codersdk.WorkspaceAgentContainer{}, + }, nil).AnyTimes() + + mClock.Set(time.Now()).MustWait(ctx) + tickerTrap := mClock.Trap().TickerFunc("updaterLoop") + + api := agentcontainers.NewAPI(logger, + agentcontainers.WithClock(mClock), + agentcontainers.WithContainerCLI(mCCLI), + agentcontainers.WithSubAgentClient(fakeSAC), + agentcontainers.WithDevcontainerCLI(&fakeDevcontainerCLI{}), + ) + defer api.Close() + + tickerTrap.MustWait(ctx).MustRelease(ctx) + tickerTrap.Close() + + _, aw := mClock.AdvanceNext() + aw.MustWait(ctx) + + // Verify agent was deleted. + assert.Contains(t, fakeSAC.deleted, existingAgentID) + assert.Empty(t, fakeSAC.agents) + }) } // mustFindDevcontainerByPath returns the devcontainer with the given workspace diff --git a/agent/agentcontainers/devcontainer.go b/agent/agentcontainers/devcontainer.go index 09d4837d4b27a..f13963d7b63d7 100644 --- a/agent/agentcontainers/devcontainer.go +++ b/agent/agentcontainers/devcontainer.go @@ -18,6 +18,8 @@ const ( // DevcontainerConfigFileLabel is the label that contains the path to // the devcontainer.json configuration file. DevcontainerConfigFileLabel = "devcontainer.config_file" + // The default workspace folder inside the devcontainer. + DevcontainerDefaultContainerWorkspaceFolder = "/workspaces" ) const devcontainerUpScriptTemplate = ` diff --git a/agent/agentcontainers/devcontainercli.go b/agent/agentcontainers/devcontainercli.go index 94b4de610a93b..4e1ad93a715dc 100644 --- a/agent/agentcontainers/devcontainercli.go +++ b/agent/agentcontainers/devcontainercli.go @@ -66,8 +66,9 @@ func WithExecOutput(stdout, stderr io.Writer) DevcontainerCLIExecOptions { } } -// WithContainerID sets the container ID to target a specific container. -func WithContainerID(id string) DevcontainerCLIExecOptions { +// WithExecContainerID sets the container ID to target a specific +// container. +func WithExecContainerID(id string) DevcontainerCLIExecOptions { return func(o *devcontainerCLIExecConfig) { o.args = append(o.args, "--container-id", id) } @@ -165,6 +166,11 @@ func (d *devcontainerCLI) Exec(ctx context.Context, workspaceFolder, configPath logger := d.logger.With(slog.F("workspace_folder", workspaceFolder), slog.F("config_path", configPath)) args := []string{"exec"} + // For now, always set workspace folder even if --container-id is provided. + // Otherwise the environment of exec will be incomplete, like `pwd` will be + // /home/coder instead of /workspaces/coder. The downside is that the local + // `devcontainer.json` config will overwrite settings serialized in the + // container label. if workspaceFolder != "" { args = append(args, "--workspace-folder", workspaceFolder) } diff --git a/agent/agentcontainers/devcontainercli_test.go b/agent/agentcontainers/devcontainercli_test.go index 48325ab83fb21..b8b4120d2e8ab 100644 --- a/agent/agentcontainers/devcontainercli_test.go +++ b/agent/agentcontainers/devcontainercli_test.go @@ -182,7 +182,7 @@ func TestDevcontainerCLI_ArgsAndParsing(t *testing.T) { configPath: "", cmd: "echo", cmdArgs: []string{"hello"}, - opts: []agentcontainers.DevcontainerCLIExecOptions{agentcontainers.WithContainerID("test-container-123")}, + opts: []agentcontainers.DevcontainerCLIExecOptions{agentcontainers.WithExecContainerID("test-container-123")}, wantArgs: "exec --workspace-folder /test/workspace --container-id test-container-123 echo hello", wantError: false, }, @@ -192,7 +192,7 @@ func TestDevcontainerCLI_ArgsAndParsing(t *testing.T) { configPath: "/test/config.json", cmd: "bash", cmdArgs: []string{"-c", "ls -la"}, - opts: []agentcontainers.DevcontainerCLIExecOptions{agentcontainers.WithContainerID("my-container")}, + opts: []agentcontainers.DevcontainerCLIExecOptions{agentcontainers.WithExecContainerID("my-container")}, wantArgs: "exec --workspace-folder /test/workspace --config /test/config.json --container-id my-container bash -c ls -la", wantError: false, }, @@ -203,7 +203,7 @@ func TestDevcontainerCLI_ArgsAndParsing(t *testing.T) { cmd: "cat", cmdArgs: []string{"/etc/hostname"}, opts: []agentcontainers.DevcontainerCLIExecOptions{ - agentcontainers.WithContainerID("test-container-789"), + agentcontainers.WithExecContainerID("test-container-789"), }, wantArgs: "exec --workspace-folder /test/workspace --container-id test-container-789 cat /etc/hostname", wantError: false, @@ -306,7 +306,7 @@ func TestDevcontainerCLI_WithOutput(t *testing.T) { // Call Exec with WithExecOutput and WithContainerID to capture any command output. ctx := testutil.Context(t, testutil.WaitMedium) err = dccli.Exec(ctx, "/test/workspace", "", "echo", []string{"hello"}, - agentcontainers.WithContainerID("test-container-456"), + agentcontainers.WithExecContainerID("test-container-456"), agentcontainers.WithExecOutput(outBuf, errBuf), ) require.NoError(t, err, "Exec should succeed") diff --git a/agent/agentcontainers/subagent.go b/agent/agentcontainers/subagent.go new file mode 100644 index 0000000000000..70899fb96f70d --- /dev/null +++ b/agent/agentcontainers/subagent.go @@ -0,0 +1,128 @@ +package agentcontainers + +import ( + "context" + + "github.com/google/uuid" + "golang.org/x/xerrors" + + "cdr.dev/slog" + + agentproto "github.com/coder/coder/v2/agent/proto" +) + +// SubAgent represents an agent running in a dev container. +type SubAgent struct { + ID uuid.UUID + Name string + AuthToken uuid.UUID + Directory string + Architecture string + OperatingSystem string +} + +// SubAgentClient is an interface for managing sub agents and allows +// changing the implementation without having to deal with the +// agentproto package directly. +type SubAgentClient interface { + // List returns a list of all agents. + List(ctx context.Context) ([]SubAgent, error) + // Create adds a new agent. + Create(ctx context.Context, agent SubAgent) (SubAgent, error) + // Delete removes an agent by its ID. + Delete(ctx context.Context, id uuid.UUID) error +} + +// NewSubAgentClient returns a SubAgentClient that uses the provided +// agent API client. +type subAgentAPIClient struct { + logger slog.Logger + api agentproto.DRPCAgentClient26 +} + +var _ SubAgentClient = (*subAgentAPIClient)(nil) + +func NewSubAgentClientFromAPI(logger slog.Logger, agentAPI agentproto.DRPCAgentClient26) SubAgentClient { + if agentAPI == nil { + panic("developer error: agentAPI cannot be nil") + } + return &subAgentAPIClient{ + logger: logger.Named("subagentclient"), + api: agentAPI, + } +} + +func (a *subAgentAPIClient) List(ctx context.Context) ([]SubAgent, error) { + a.logger.Debug(ctx, "listing sub agents") + resp, err := a.api.ListSubAgents(ctx, &agentproto.ListSubAgentsRequest{}) + if err != nil { + return nil, err + } + + agents := make([]SubAgent, len(resp.Agents)) + for i, agent := range resp.Agents { + id, err := uuid.FromBytes(agent.GetId()) + if err != nil { + return nil, err + } + authToken, err := uuid.FromBytes(agent.GetAuthToken()) + if err != nil { + return nil, err + } + agents[i] = SubAgent{ + ID: id, + Name: agent.GetName(), + AuthToken: authToken, + } + } + return agents, nil +} + +func (a *subAgentAPIClient) Create(ctx context.Context, agent SubAgent) (SubAgent, error) { + a.logger.Debug(ctx, "creating sub agent", slog.F("name", agent.Name), slog.F("directory", agent.Directory)) + resp, err := a.api.CreateSubAgent(ctx, &agentproto.CreateSubAgentRequest{ + Name: agent.Name, + Directory: agent.Directory, + Architecture: agent.Architecture, + OperatingSystem: agent.OperatingSystem, + }) + if err != nil { + return SubAgent{}, err + } + + agent.Name = resp.Agent.Name + agent.ID, err = uuid.FromBytes(resp.Agent.Id) + if err != nil { + return agent, err + } + agent.AuthToken, err = uuid.FromBytes(resp.Agent.AuthToken) + if err != nil { + return agent, err + } + return agent, nil +} + +func (a *subAgentAPIClient) Delete(ctx context.Context, id uuid.UUID) error { + a.logger.Debug(ctx, "deleting sub agent", slog.F("id", id.String())) + _, err := a.api.DeleteSubAgent(ctx, &agentproto.DeleteSubAgentRequest{ + Id: id[:], + }) + return err +} + +// noopSubAgentClient is a SubAgentClient that does nothing. +type noopSubAgentClient struct{} + +var _ SubAgentClient = noopSubAgentClient{} + +func (noopSubAgentClient) List(_ context.Context) ([]SubAgent, error) { + return nil, nil +} + +func (noopSubAgentClient) Create(_ context.Context, _ SubAgent) (SubAgent, error) { + return SubAgent{}, xerrors.New("noopSubAgentClient does not support creating sub agents") +} + +func (noopSubAgentClient) Delete(_ context.Context, _ uuid.UUID) error { + return xerrors.New("noopSubAgentClient does not support deleting sub agents") +} diff --git a/agent/agenttest/client.go b/agent/agenttest/client.go index a957c61000c70..0a2df141ff3d4 100644 --- a/agent/agenttest/client.go +++ b/agent/agenttest/client.go @@ -163,6 +163,14 @@ func (c *Client) GetConnectionReports() []*agentproto.ReportConnectionRequest { return c.fakeAgentAPI.GetConnectionReports() } +func (c *Client) GetSubAgents() []*agentproto.SubAgent { + return c.fakeAgentAPI.GetSubAgents() +} + +func (c *Client) GetSubAgentDirectory(id uuid.UUID) (string, error) { + return c.fakeAgentAPI.GetSubAgentDirectory(id) +} + type FakeAgentAPI struct { sync.Mutex t testing.TB @@ -177,6 +185,8 @@ type FakeAgentAPI struct { metadata map[string]agentsdk.Metadata timings []*agentproto.Timing connectionReports []*agentproto.ReportConnectionRequest + subAgents map[uuid.UUID]*agentproto.SubAgent + subAgentDirs map[uuid.UUID]string getAnnouncementBannersFunc func() ([]codersdk.BannerConfig, error) getResourcesMonitoringConfigurationFunc func() (*agentproto.GetResourcesMonitoringConfigurationResponse, error) @@ -365,16 +375,106 @@ func (f *FakeAgentAPI) GetConnectionReports() []*agentproto.ReportConnectionRequ return slices.Clone(f.connectionReports) } -func (*FakeAgentAPI) CreateSubAgent(_ context.Context, _ *agentproto.CreateSubAgentRequest) (*agentproto.CreateSubAgentResponse, error) { - panic("unimplemented") +func (f *FakeAgentAPI) CreateSubAgent(ctx context.Context, req *agentproto.CreateSubAgentRequest) (*agentproto.CreateSubAgentResponse, error) { + f.Lock() + defer f.Unlock() + + f.logger.Debug(ctx, "create sub agent called", slog.F("req", req)) + + // Generate IDs for the new sub-agent. + subAgentID := uuid.New() + authToken := uuid.New() + + // Create the sub-agent proto object. + subAgent := &agentproto.SubAgent{ + Id: subAgentID[:], + Name: req.Name, + AuthToken: authToken[:], + } + + // Store the sub-agent in our map. + if f.subAgents == nil { + f.subAgents = make(map[uuid.UUID]*agentproto.SubAgent) + } + f.subAgents[subAgentID] = subAgent + if f.subAgentDirs == nil { + f.subAgentDirs = make(map[uuid.UUID]string) + } + f.subAgentDirs[subAgentID] = req.GetDirectory() + + // For a fake implementation, we don't create workspace apps. + // Real implementations would handle req.Apps here. + return &agentproto.CreateSubAgentResponse{ + Agent: subAgent, + AppCreationErrors: nil, + }, nil +} + +func (f *FakeAgentAPI) DeleteSubAgent(ctx context.Context, req *agentproto.DeleteSubAgentRequest) (*agentproto.DeleteSubAgentResponse, error) { + f.Lock() + defer f.Unlock() + + f.logger.Debug(ctx, "delete sub agent called", slog.F("req", req)) + + subAgentID, err := uuid.FromBytes(req.Id) + if err != nil { + return nil, err + } + + // Remove the sub-agent from our map. + if f.subAgents != nil { + delete(f.subAgents, subAgentID) + } + + return &agentproto.DeleteSubAgentResponse{}, nil +} + +func (f *FakeAgentAPI) ListSubAgents(ctx context.Context, req *agentproto.ListSubAgentsRequest) (*agentproto.ListSubAgentsResponse, error) { + f.Lock() + defer f.Unlock() + + f.logger.Debug(ctx, "list sub agents called", slog.F("req", req)) + + var agents []*agentproto.SubAgent + if f.subAgents != nil { + agents = make([]*agentproto.SubAgent, 0, len(f.subAgents)) + for _, agent := range f.subAgents { + agents = append(agents, agent) + } + } + + return &agentproto.ListSubAgentsResponse{ + Agents: agents, + }, nil } -func (*FakeAgentAPI) DeleteSubAgent(_ context.Context, _ *agentproto.DeleteSubAgentRequest) (*agentproto.DeleteSubAgentResponse, error) { - panic("unimplemented") +func (f *FakeAgentAPI) GetSubAgents() []*agentproto.SubAgent { + f.Lock() + defer f.Unlock() + var agents []*agentproto.SubAgent + if f.subAgents != nil { + agents = make([]*agentproto.SubAgent, 0, len(f.subAgents)) + for _, agent := range f.subAgents { + agents = append(agents, agent) + } + } + return agents } -func (*FakeAgentAPI) ListSubAgents(_ context.Context, _ *agentproto.ListSubAgentsRequest) (*agentproto.ListSubAgentsResponse, error) { - panic("unimplemented") +func (f *FakeAgentAPI) GetSubAgentDirectory(id uuid.UUID) (string, error) { + f.Lock() + defer f.Unlock() + + if f.subAgentDirs == nil { + return "", xerrors.New("no sub-agent directories available") + } + + dir, ok := f.subAgentDirs[id] + if !ok { + return "", xerrors.New("sub-agent directory not found") + } + + return dir, nil } func NewFakeAgentAPI(t testing.TB, logger slog.Logger, manifest *agentproto.Manifest, statsCh chan *agentproto.Stats) *FakeAgentAPI { diff --git a/agent/api.go b/agent/api.go index 2e15530adc608..1c9a707fbb338 100644 --- a/agent/api.go +++ b/agent/api.go @@ -10,11 +10,12 @@ import ( "github.com/google/uuid" "github.com/coder/coder/v2/agent/agentcontainers" + "github.com/coder/coder/v2/agent/proto" "github.com/coder/coder/v2/coderd/httpapi" "github.com/coder/coder/v2/codersdk" ) -func (a *agent) apiHandler() (http.Handler, func() error) { +func (a *agent) apiHandler(aAPI proto.DRPCAgentClient26) (http.Handler, func() error) { r := chi.NewRouter() r.Get("/", func(rw http.ResponseWriter, r *http.Request) { httpapi.Write(r.Context(), rw, http.StatusOK, codersdk.Response{ @@ -45,6 +46,7 @@ func (a *agent) apiHandler() (http.Handler, func() error) { agentcontainers.WithScriptLogger(func(logSourceID uuid.UUID) agentcontainers.ScriptLogger { return a.logSender.GetScriptLogger(logSourceID) }), + agentcontainers.WithSubAgentClient(agentcontainers.NewSubAgentClientFromAPI(a.logger, aAPI)), } manifest := a.manifest.Load() if manifest != nil && len(manifest.Devcontainers) > 0 { diff --git a/cli/agent.go b/cli/agent.go index deca447664337..5d6037f9930ec 100644 --- a/cli/agent.go +++ b/cli/agent.go @@ -28,6 +28,7 @@ import ( "github.com/coder/serpent" "github.com/coder/coder/v2/agent" + "github.com/coder/coder/v2/agent/agentcontainers" "github.com/coder/coder/v2/agent/agentexec" "github.com/coder/coder/v2/agent/agentssh" "github.com/coder/coder/v2/agent/reaper" @@ -362,6 +363,9 @@ func (r *RootCmd) workspaceAgent() *serpent.Command { BlockFileTransfer: blockFileTransfer, Execer: execer, ExperimentalDevcontainersEnabled: experimentalDevcontainersEnabled, + ContainerAPIOptions: []agentcontainers.Option{ + agentcontainers.WithSubAgentURL(r.agentURL.String()), + }, }) promHandler := agent.PrometheusMetricsHandler(prometheusRegistry, logger) diff --git a/cli/exp_rpty_test.go b/cli/exp_rpty_test.go index 355cc1741b5a9..923bf09bb0e15 100644 --- a/cli/exp_rpty_test.go +++ b/cli/exp_rpty_test.go @@ -9,6 +9,7 @@ import ( "github.com/ory/dockertest/v3/docker" "github.com/coder/coder/v2/agent" + "github.com/coder/coder/v2/agent/agentcontainers" "github.com/coder/coder/v2/agent/agenttest" "github.com/coder/coder/v2/cli/clitest" "github.com/coder/coder/v2/coderd/coderdtest" @@ -111,6 +112,9 @@ func TestExpRpty(t *testing.T) { _ = agenttest.New(t, client.URL, agentToken, func(o *agent.Options) { o.ExperimentalDevcontainersEnabled = true + o.ContainerAPIOptions = append(o.ContainerAPIOptions, + agentcontainers.WithContainerLabelIncludeFilter("this.label.does.not.exist.ignore.devcontainers", "true"), + ) }) _ = coderdtest.NewWorkspaceAgentWaiter(t, client, workspace.ID).Wait() diff --git a/cli/open_test.go b/cli/open_test.go index 4441e51e58c4b..f7180ab260fbd 100644 --- a/cli/open_test.go +++ b/cli/open_test.go @@ -306,8 +306,8 @@ func TestOpenVSCodeDevContainer(t *testing.T) { containerFolder := "/workspace/coder" ctrl := gomock.NewController(t) - mcl := acmock.NewMockContainerCLI(ctrl) - mcl.EXPECT().List(gomock.Any()).Return( + mccli := acmock.NewMockContainerCLI(ctrl) + mccli.EXPECT().List(gomock.Any()).Return( codersdk.WorkspaceAgentListContainersResponse{ Containers: []codersdk.WorkspaceAgentContainer{ { @@ -337,7 +337,10 @@ func TestOpenVSCodeDevContainer(t *testing.T) { _ = agenttest.New(t, client.URL, agentToken, func(o *agent.Options) { o.ExperimentalDevcontainersEnabled = true - o.ContainerAPIOptions = append(o.ContainerAPIOptions, agentcontainers.WithContainerCLI(mcl)) + o.ContainerAPIOptions = append(o.ContainerAPIOptions, + agentcontainers.WithContainerCLI(mccli), + agentcontainers.WithContainerLabelIncludeFilter("this.label.does.not.exist.ignore.devcontainers", "true"), + ) }) _ = coderdtest.NewWorkspaceAgentWaiter(t, client, workspace.ID).Wait() @@ -481,8 +484,8 @@ func TestOpenVSCodeDevContainer_NoAgentDirectory(t *testing.T) { containerFolder := "/workspace/coder" ctrl := gomock.NewController(t) - mcl := acmock.NewMockContainerCLI(ctrl) - mcl.EXPECT().List(gomock.Any()).Return( + mccli := acmock.NewMockContainerCLI(ctrl) + mccli.EXPECT().List(gomock.Any()).Return( codersdk.WorkspaceAgentListContainersResponse{ Containers: []codersdk.WorkspaceAgentContainer{ { @@ -511,7 +514,10 @@ func TestOpenVSCodeDevContainer_NoAgentDirectory(t *testing.T) { _ = agenttest.New(t, client.URL, agentToken, func(o *agent.Options) { o.ExperimentalDevcontainersEnabled = true - o.ContainerAPIOptions = append(o.ContainerAPIOptions, agentcontainers.WithContainerCLI(mcl)) + o.ContainerAPIOptions = append(o.ContainerAPIOptions, + agentcontainers.WithContainerCLI(mccli), + agentcontainers.WithContainerLabelIncludeFilter("this.label.does.not.exist.ignore.devcontainers", "true"), + ) }) _ = coderdtest.NewWorkspaceAgentWaiter(t, client, workspace.ID).Wait() diff --git a/cli/ssh_test.go b/cli/ssh_test.go index 1774d8d131a9d..bee075283c083 100644 --- a/cli/ssh_test.go +++ b/cli/ssh_test.go @@ -2032,6 +2032,9 @@ func TestSSH_Container(t *testing.T) { _ = agenttest.New(t, client.URL, agentToken, func(o *agent.Options) { o.ExperimentalDevcontainersEnabled = true + o.ContainerAPIOptions = append(o.ContainerAPIOptions, + agentcontainers.WithContainerLabelIncludeFilter("this.label.does.not.exist.ignore.devcontainers", "true"), + ) }) _ = coderdtest.NewWorkspaceAgentWaiter(t, client, workspace.ID).Wait() @@ -2069,7 +2072,10 @@ func TestSSH_Container(t *testing.T) { }, nil).AnyTimes() _ = agenttest.New(t, client.URL, agentToken, func(o *agent.Options) { o.ExperimentalDevcontainersEnabled = true - o.ContainerAPIOptions = append(o.ContainerAPIOptions, agentcontainers.WithContainerCLI(mLister)) + o.ContainerAPIOptions = append(o.ContainerAPIOptions, + agentcontainers.WithContainerCLI(mLister), + agentcontainers.WithContainerLabelIncludeFilter("this.label.does.not.exist.ignore.devcontainers", "true"), + ) }) _ = coderdtest.NewWorkspaceAgentWaiter(t, client, workspace.ID).Wait() diff --git a/coderd/workspaceagents_test.go b/coderd/workspaceagents_test.go index f32c7b1458ca2..ec0b692886918 100644 --- a/coderd/workspaceagents_test.go +++ b/coderd/workspaceagents_test.go @@ -1252,6 +1252,9 @@ func TestWorkspaceAgentContainers(t *testing.T) { }).Do() _ = agenttest.New(t, client.URL, r.AgentToken, func(o *agent.Options) { o.ExperimentalDevcontainersEnabled = true + o.ContainerAPIOptions = append(o.ContainerAPIOptions, + agentcontainers.WithContainerLabelIncludeFilter("this.label.does.not.exist.ignore.devcontainers", "true"), + ) }) resources := coderdtest.NewWorkspaceAgentWaiter(t, client, r.Workspace.ID).Wait() require.Len(t, resources, 1, "expected one resource") @@ -1358,7 +1361,10 @@ func TestWorkspaceAgentContainers(t *testing.T) { _ = agenttest.New(t, client.URL, r.AgentToken, func(o *agent.Options) { o.Logger = logger.Named("agent") o.ExperimentalDevcontainersEnabled = true - o.ContainerAPIOptions = append(o.ContainerAPIOptions, agentcontainers.WithContainerCLI(mcl)) + o.ContainerAPIOptions = append(o.ContainerAPIOptions, + agentcontainers.WithContainerCLI(mcl), + agentcontainers.WithContainerLabelIncludeFilter("this.label.does.not.exist.ignore.devcontainers", "true"), + ) }) resources := coderdtest.NewWorkspaceAgentWaiter(t, client, r.Workspace.ID).Wait() require.Len(t, resources, 1, "expected one resource") @@ -1397,14 +1403,15 @@ func TestWorkspaceAgentRecreateDevcontainer(t *testing.T) { agentcontainers.DevcontainerConfigFileLabel: configFile, } devContainer = codersdk.WorkspaceAgentContainer{ - ID: uuid.NewString(), - CreatedAt: dbtime.Now(), - FriendlyName: testutil.GetRandomName(t), - Image: "busybox:latest", - Labels: dcLabels, - Running: true, - Status: "running", - DevcontainerDirty: true, + ID: uuid.NewString(), + CreatedAt: dbtime.Now(), + FriendlyName: testutil.GetRandomName(t), + Image: "busybox:latest", + Labels: dcLabels, + Running: true, + Status: "running", + DevcontainerDirty: true, + DevcontainerStatus: codersdk.WorkspaceAgentDevcontainerStatusRunning, } plainContainer = codersdk.WorkspaceAgentContainer{ ID: uuid.NewString(), @@ -1419,29 +1426,31 @@ func TestWorkspaceAgentRecreateDevcontainer(t *testing.T) { for _, tc := range []struct { name string - setupMock func(*acmock.MockContainerCLI, *acmock.MockDevcontainerCLI) (status int) + setupMock func(mccli *acmock.MockContainerCLI, mdccli *acmock.MockDevcontainerCLI) (status int) }{ { name: "Recreate", - setupMock: func(mcl *acmock.MockContainerCLI, mdccli *acmock.MockDevcontainerCLI) int { - mcl.EXPECT().List(gomock.Any()).Return(codersdk.WorkspaceAgentListContainersResponse{ + setupMock: func(mccli *acmock.MockContainerCLI, mdccli *acmock.MockDevcontainerCLI) int { + mccli.EXPECT().List(gomock.Any()).Return(codersdk.WorkspaceAgentListContainersResponse{ Containers: []codersdk.WorkspaceAgentContainer{devContainer}, }, nil).AnyTimes() + // DetectArchitecture always returns "" for this test to disable agent injection. + mccli.EXPECT().DetectArchitecture(gomock.Any(), devContainer.ID).Return("", nil).AnyTimes() mdccli.EXPECT().Up(gomock.Any(), workspaceFolder, configFile, gomock.Any()).Return("someid", nil).Times(1) return 0 }, }, { name: "Container does not exist", - setupMock: func(mcl *acmock.MockContainerCLI, mdccli *acmock.MockDevcontainerCLI) int { - mcl.EXPECT().List(gomock.Any()).Return(codersdk.WorkspaceAgentListContainersResponse{}, nil).AnyTimes() + setupMock: func(mccli *acmock.MockContainerCLI, mdccli *acmock.MockDevcontainerCLI) int { + mccli.EXPECT().List(gomock.Any()).Return(codersdk.WorkspaceAgentListContainersResponse{}, nil).AnyTimes() return http.StatusNotFound }, }, { name: "Not a devcontainer", - setupMock: func(mcl *acmock.MockContainerCLI, mdccli *acmock.MockDevcontainerCLI) int { - mcl.EXPECT().List(gomock.Any()).Return(codersdk.WorkspaceAgentListContainersResponse{ + setupMock: func(mccli *acmock.MockContainerCLI, mdccli *acmock.MockDevcontainerCLI) int { + mccli.EXPECT().List(gomock.Any()).Return(codersdk.WorkspaceAgentListContainersResponse{ Containers: []codersdk.WorkspaceAgentContainer{plainContainer}, }, nil).AnyTimes() return http.StatusNotFound @@ -1452,9 +1461,9 @@ func TestWorkspaceAgentRecreateDevcontainer(t *testing.T) { t.Parallel() ctrl := gomock.NewController(t) - mcl := acmock.NewMockContainerCLI(ctrl) + mccli := acmock.NewMockContainerCLI(ctrl) mdccli := acmock.NewMockDevcontainerCLI(ctrl) - wantStatus := tc.setupMock(mcl, mdccli) + wantStatus := tc.setupMock(mccli, mdccli) logger := slogtest.Make(t, &slogtest.Options{IgnoreErrors: true}).Leveled(slog.LevelDebug) client, db := coderdtest.NewWithDatabase(t, &coderdtest.Options{ Logger: &logger, @@ -1471,9 +1480,10 @@ func TestWorkspaceAgentRecreateDevcontainer(t *testing.T) { o.ExperimentalDevcontainersEnabled = true o.ContainerAPIOptions = append( o.ContainerAPIOptions, - agentcontainers.WithContainerCLI(mcl), + agentcontainers.WithContainerCLI(mccli), agentcontainers.WithDevcontainerCLI(mdccli), agentcontainers.WithWatcher(watcher.NewNoop()), + agentcontainers.WithContainerLabelIncludeFilter(agentcontainers.DevcontainerLocalFolderLabel, workspaceFolder), ) }) resources := coderdtest.NewWorkspaceAgentWaiter(t, client, r.Workspace.ID).Wait() From ae0c8701bbf66aa5376634e3a977493711cb08f0 Mon Sep 17 00:00:00 2001 From: Mathias Fredriksson Date: Tue, 10 Jun 2025 13:47:02 +0300 Subject: [PATCH 2/2] feat(agent): disable devcontainers for sub agents (#18303) Updates coder/internal#621 Refs #18245 --- agent/agent.go | 12 ++++++++++++ agent/agent_test.go | 28 ++++++++++++++++++++++++++++ codersdk/agentsdk/agentsdk.go | 1 + codersdk/agentsdk/convert.go | 10 ++++++++++ codersdk/agentsdk/convert_test.go | 2 ++ 5 files changed, 53 insertions(+) diff --git a/agent/agent.go b/agent/agent.go index 17298e7aa5772..9f105ee296f5c 100644 --- a/agent/agent.go +++ b/agent/agent.go @@ -1080,6 +1080,18 @@ func (a *agent) handleManifest(manifestOK *checkpoint) func(ctx context.Context, if manifest.AgentID == uuid.Nil { return xerrors.New("nil agentID returned by manifest") } + if manifest.ParentID != uuid.Nil { + // This is a sub agent, disable all the features that should not + // be used by sub agents. + a.logger.Debug(ctx, "sub agent detected, disabling features", + slog.F("parent_id", manifest.ParentID), + slog.F("agent_id", manifest.AgentID), + ) + if a.experimentalDevcontainersEnabled { + a.logger.Info(ctx, "devcontainers are not supported on sub agents, disabling feature") + a.experimentalDevcontainersEnabled = false + } + } a.client.RewriteDERPMap(manifest.DERPMap) // Expand the directory and send it back to coderd so external diff --git a/agent/agent_test.go b/agent/agent_test.go index 3ef9e4f4c75ba..9a8073a289b5f 100644 --- a/agent/agent_test.go +++ b/agent/agent_test.go @@ -2423,6 +2423,34 @@ waitForOutcomeLoop: }(container) } +func TestAgent_DevcontainersDisabledForSubAgent(t *testing.T) { + t.Parallel() + + // Create a manifest with a ParentID to make this a sub agent. + manifest := agentsdk.Manifest{ + AgentID: uuid.New(), + ParentID: uuid.New(), + } + + // Setup the agent with devcontainers enabled initially. + //nolint:dogsled + conn, _, _, _, _ := setupAgent(t, manifest, 0, func(_ *agenttest.Client, o *agent.Options) { + o.ExperimentalDevcontainersEnabled = true + }) + + // Query the containers API endpoint. This should fail because + // devcontainers have been disabled for the sub agent. + ctx, cancel := context.WithTimeout(context.Background(), testutil.WaitMedium) + defer cancel() + + _, err := conn.ListContainers(ctx) + require.Error(t, err) + + // Verify the error message contains the expected text. + require.Contains(t, err.Error(), "The agent dev containers feature is experimental and not enabled by default.") + require.Contains(t, err.Error(), "To enable this feature, set CODER_AGENT_DEVCONTAINERS_ENABLE=true in your template.") +} + func TestAgent_Dial(t *testing.T) { t.Parallel() diff --git a/codersdk/agentsdk/agentsdk.go b/codersdk/agentsdk/agentsdk.go index e3b036dcdf00a..f44c19b998e21 100644 --- a/codersdk/agentsdk/agentsdk.go +++ b/codersdk/agentsdk/agentsdk.go @@ -102,6 +102,7 @@ type PostMetadataRequest struct { type PostMetadataRequestDeprecated = codersdk.WorkspaceAgentMetadataResult type Manifest struct { + ParentID uuid.UUID `json:"parent_id"` AgentID uuid.UUID `json:"agent_id"` AgentName string `json:"agent_name"` // OwnerUsername and WorkspaceID are used by an open-source user to identify the workspace. diff --git a/codersdk/agentsdk/convert.go b/codersdk/agentsdk/convert.go index 2b7dff950a3e7..d01c9e527fce9 100644 --- a/codersdk/agentsdk/convert.go +++ b/codersdk/agentsdk/convert.go @@ -15,6 +15,14 @@ import ( ) func ManifestFromProto(manifest *proto.Manifest) (Manifest, error) { + parentID := uuid.Nil + if pid := manifest.GetParentId(); pid != nil { + var err error + parentID, err = uuid.FromBytes(pid) + if err != nil { + return Manifest{}, xerrors.Errorf("error converting workspace agent parent ID: %w", err) + } + } apps, err := AppsFromProto(manifest.Apps) if err != nil { return Manifest{}, xerrors.Errorf("error converting workspace agent apps: %w", err) @@ -36,6 +44,7 @@ func ManifestFromProto(manifest *proto.Manifest) (Manifest, error) { return Manifest{}, xerrors.Errorf("error converting workspace agent devcontainers: %w", err) } return Manifest{ + ParentID: parentID, AgentID: agentID, AgentName: manifest.AgentName, OwnerName: manifest.OwnerUsername, @@ -62,6 +71,7 @@ func ProtoFromManifest(manifest Manifest) (*proto.Manifest, error) { return nil, xerrors.Errorf("convert workspace apps: %w", err) } return &proto.Manifest{ + ParentId: manifest.ParentID[:], AgentId: manifest.AgentID[:], AgentName: manifest.AgentName, OwnerUsername: manifest.OwnerName, diff --git a/codersdk/agentsdk/convert_test.go b/codersdk/agentsdk/convert_test.go index 09482b1694910..f324d504b838a 100644 --- a/codersdk/agentsdk/convert_test.go +++ b/codersdk/agentsdk/convert_test.go @@ -19,6 +19,7 @@ import ( func TestManifest(t *testing.T) { t.Parallel() manifest := agentsdk.Manifest{ + ParentID: uuid.New(), AgentID: uuid.New(), AgentName: "test-agent", OwnerName: "test-owner", @@ -142,6 +143,7 @@ func TestManifest(t *testing.T) { require.NoError(t, err) back, err := agentsdk.ManifestFromProto(p) require.NoError(t, err) + require.Equal(t, manifest.ParentID, back.ParentID) require.Equal(t, manifest.AgentID, back.AgentID) require.Equal(t, manifest.AgentName, back.AgentName) require.Equal(t, manifest.OwnerName, back.OwnerName)