feat(agent): send devcontainer CLI logs to coderd during recreate

We need a way to surface what's happening to the user, since autostart logs here, it's natural we do so during re-create as well. Updates #16424
coder · mafredri · May 15, 2025 · May 14, 2025 · May 15, 2025 · May 15, 2025
commit f144a033ee9b36248b1a19c7705428514a3092d4
diff --git a/agent/agent_test.go b/agent/agent_test.go
@@ -1935,8 +1935,6 @@
 		t.Skip("Set CODER_TEST_USE_DOCKER=1 to run this test")
 	}
 
-	ctx := testutil.Context(t, testutil.WaitLong)
-
 	pool, err := dockertest.NewPool("")
 	require.NoError(t, err, "Could not connect to docker")
 	ct, err := pool.RunWithOptions(&dockertest.RunOptions{
@@ -1948,10 +1946,10 @@
 		config.RestartPolicy = docker.RestartPolicy{Name: "no"}
 	})
 	require.NoError(t, err, "Could not start container")
-	t.Cleanup(func() {
+	defer func() {
 		err := pool.Purge(ct)
 		require.NoError(t, err, "Could not stop container")
-	})
+	}()
 	// Wait for container to start
 	require.Eventually(t, func() bool {
 		ct, ok := pool.ContainerByName(ct.Container.Name)
@@ -1962,6 +1960,7 @@
 	conn, _, _, _, _ := setupAgent(t, agentsdk.Manifest{}, 0, func(_ *agenttest.Client, o *agent.Options) {
 		o.ExperimentalDevcontainersEnabled = true
 	})
+	ctx := testutil.Context(t, testutil.WaitLong)
 	ac, err := conn.ReconnectingPTY(ctx, uuid.New(), 80, 80, "/bin/sh", func(arp *workspacesdk.AgentReconnectingPTYInit) {
 		arp.Container = ct.Container.ID
 	})
@@ -2005,9 +2004,6 @@
 		t.Skip("Set CODER_TEST_USE_DOCKER=1 to run this test")
 	}
 
-	ctx := testutil.Context(t, testutil.WaitLong)
-
-	// Connect to Docker
 	pool, err := dockertest.NewPool("")
 	require.NoError(t, err, "Could not connect to docker")
 
@@ -2051,7 +2047,7 @@
 			},
 		},
 	}
-	// nolint: dogsled
+	//nolint:dogsled
 	conn, _, _, _, _ := setupAgent(t, manifest, 0, func(_ *agenttest.Client, o *agent.Options) {
 		o.ExperimentalDevcontainersEnabled = true
 	})
@@ -2079,8 +2075,7 @@
 
 		return false
 	}, testutil.WaitSuperLong, testutil.IntervalMedium, "no container with workspace folder label found")
-
-	t.Cleanup(func() {
+	defer func() {
 		// We can't rely on pool here because the container is not
 		// managed by it (it is managed by @devcontainer/cli).
 		err := pool.Client.RemoveContainer(docker.RemoveContainerOptions{
@@ -2089,13 +2084,15 @@
 			Force:         true,
 		})
 		assert.NoError(t, err, "remove container")
-	})
+	}()
 
 	containerInfo, err := pool.Client.InspectContainer(container.ID)
 	require.NoError(t, err, "inspect container")
 	t.Logf("Container state: status: %v", containerInfo.State.Status)
 	require.True(t, containerInfo.State.Running, "container should be running")
 
+	ctx := testutil.Context(t, testutil.WaitLong)
+
 	ac, err := conn.ReconnectingPTY(ctx, uuid.New(), 80, 80, "", func(opts *workspacesdk.AgentReconnectingPTYInit) {
 		opts.Container = container.ID
 	})
@@ -2124,6 +2121,170 @@
 	require.NoError(t, err, "file should exist outside devcontainer")
 }
 
+// TestAgent_DevcontainerRecreate tests that RecreateDevcontainer
+// recreates a devcontainer and emits logs.
+//
+// This tests end-to-end functionality of auto-starting a devcontainer.
+// It runs "devcontainer up" which creates a real Docker container. As
+// such, it does not run by default in CI.
+//
+// You can run it manually as follows:
+//
+// CODER_TEST_USE_DOCKER=1 go test -count=1 ./agent -run TestAgent_DevcontainerRecreate
+func TestAgent_DevcontainerRecreate(t *testing.T) {
+	if os.Getenv("CODER_TEST_USE_DOCKER") != "1" {
+		t.Skip("Set CODER_TEST_USE_DOCKER=1 to run this test")
+	}
+
+	pool, err := dockertest.NewPool("")
+	require.NoError(t, err, "Could not connect to docker")
+
+	// Prepare temporary devcontainer for test (mywork).
+	devcontainerID := uuid.New()
+	devcontainerLogSourceID := uuid.New()
+	workspaceFolder := filepath.Join(t.TempDir(), "mywork")
+	t.Logf("Workspace folder: %s", workspaceFolder)
+	devcontainerPath := filepath.Join(workspaceFolder, ".devcontainer")
+	err = os.MkdirAll(devcontainerPath, 0o755)
+	require.NoError(t, err, "create devcontainer directory")
+	devcontainerFile := filepath.Join(devcontainerPath, "devcontainer.json")
+	err = os.WriteFile(devcontainerFile, []byte(`{
+        "name": "mywork",
+        "image": "busybox:latest",
+        "cmd": ["sleep", "infinity"]
+    }`), 0o600)
+	require.NoError(t, err, "write devcontainer.json")
+
+	manifest := agentsdk.Manifest{
+		// Set up pre-conditions for auto-starting a devcontainer, the
+		// script is used to extract the log source ID.
+		Devcontainers: []codersdk.WorkspaceAgentDevcontainer{
+			{
+				ID:              devcontainerID,
+				Name:            "test",
+				WorkspaceFolder: workspaceFolder,
+			},
+		},
+		Scripts: []codersdk.WorkspaceAgentScript{
+			{
+				ID:          devcontainerID,
+				LogSourceID: devcontainerLogSourceID,
+			},
+		},
+	}
+
+	//nolint:dogsled
+	conn, client, _, _, _ := setupAgent(t, manifest, 0, func(_ *agenttest.Client, o *agent.Options) {
+		o.ExperimentalDevcontainersEnabled = true
+	})
+
+	// We enabled autostart for the devcontainer, so ready is a good
+	// indication that the devcontainer is up and running. Importantly,
+	// this also means that the devcontainer startup is no longer
+	// producing logs that may interfere with the recreate logs.
+	require.Eventually(t, func() bool {
+		states := client.GetLifecycleStates()
+		return slices.Contains(states, codersdk.WorkspaceAgentLifecycleReady)
+	}, testutil.WaitLong, testutil.IntervalMedium, "devcontainer not ready")
+
+	t.Logf("Lookging for container with label: devcontainer.local_folder=%s", workspaceFolder)
+
+	var container docker.APIContainers
+	require.Eventually(t, func() bool {
+		containers, err := pool.Client.ListContainers(docker.ListContainersOptions{All: true})
+		if err != nil {
+			t.Logf("Error listing containers: %v", err)
+			return false
+		}
+		for _, c := range containers {
+			t.Logf("Found container: %s with labels: %v", c.ID[:12], c.Labels)
+			if v, ok := c.Labels["devcontainer.local_folder"]; ok && v == workspaceFolder {
+				t.Logf("Found matching container: %s", c.ID[:12])
+				container = c
+				return true
+			}
+		}
+		return false
+	}, testutil.WaitLong, testutil.IntervalMedium, "no container with workspace folder label found")
+	defer func(container docker.APIContainers) {
+		// We can't rely on pool here because the container is not
+		// managed by it (it is managed by @devcontainer/cli).
+		err := pool.Client.RemoveContainer(docker.RemoveContainerOptions{
+			ID:            container.ID,
+			RemoveVolumes: true,
+			Force:         true,
+		})
+		assert.Error(t, err, "container should be removed by recreate")
+	}(container)
+
+	ctx := testutil.Context(t, testutil.WaitLong)
+
+	// Capture logs via ScriptLogger.
+	logsCh := make(chan *proto.BatchCreateLogsRequest, 1)
+	client.SetLogsChannel(logsCh)
+
+	// Invoke recreate to trigger the destruction and recreation of the
+	// devcontainer, we do it in a goroutine so we can process logs
+	// concurrently.
+	go func(container docker.APIContainers) {
+		err := conn.RecreateDevcontainer(ctx, container.ID)
+		assert.NoError(t, err, "recreate devcontainer should succeed")
+	}(container)
+
+	t.Logf("Checking recreate logs for outcome...")
+
+	// Wait for the logs to be emitted, the @devcontainer/cli up command
+	// will emit a log with the outcome at the end suggesting we did
+	// receive all the logs.
+waitForOutcomeLoop:
+	for {
+		batch := testutil.RequireReceive(ctx, t, logsCh)
+
+		if bytes.Equal(batch.LogSourceId, devcontainerLogSourceID[:]) {
+			for _, log := range batch.Logs {
+				t.Logf("Received log: %s", log.Output)
+				if strings.Contains(log.Output, "\"outcome\"") {
+					break waitForOutcomeLoop
+				}
+			}
+		}
+	}
+
+	t.Logf("Checking there's a new container with label: devcontainer.local_folder=%s", workspaceFolder)
+
+	// Make sure the container exists and isn't the same as the old one.
+	require.Eventually(t, func() bool {
+		containers, err := pool.Client.ListContainers(docker.ListContainersOptions{All: true})
+		if err != nil {
+			t.Logf("Error listing containers: %v", err)
+			return false
+		}
+		for _, c := range containers {
+			t.Logf("Found container: %s with labels: %v", c.ID[:12], c.Labels)
+			if v, ok := c.Labels["devcontainer.local_folder"]; ok && v == workspaceFolder {
+				if c.ID == container.ID {
+					t.Logf("Found same container: %s", c.ID[:12])
+					return false
+				}
+				t.Logf("Found new container: %s", c.ID[:12])
+				container = c
+				return true
+			}
+		}
+		return false
+	}, testutil.WaitLong, testutil.IntervalMedium, "new devcontainer not found")
+	defer func(container docker.APIContainers) {
+		// We can't rely on pool here because the container is not
+		// managed by it (it is managed by @devcontainer/cli).
+		err := pool.Client.RemoveContainer(docker.RemoveContainerOptions{
+			ID:            container.ID,
+			RemoveVolumes: true,
+			Force:         true,
+		})
+		assert.NoError(t, err, "remove container")
+	}(container)
+}
+
 func TestAgent_Dial(t *testing.T) {
 	t.Parallel()
 

diff --git a/agent/agentcontainers/api.go b/agent/agentcontainers/api.go
@@ -20,6 +20,7 @@
 	"github.com/coder/coder/v2/agent/agentexec"
 	"github.com/coder/coder/v2/coderd/httpapi"
 	"github.com/coder/coder/v2/codersdk"
+	"github.com/coder/coder/v2/codersdk/agentsdk"
 	"github.com/coder/quartz"
 )
 
@@ -43,6 +44,7 @@
 	cl            Lister
 	dccli         DevcontainerCLI
 	clock         quartz.Clock
+	scriptLogger  func(logSourceID uuid.UUID) ScriptLogger
 
 	// lockCh protects the below fields. We use a channel instead of a
 	// mutex so we can handle cancellation properly.
@@ -52,6 +54,8 @@
 	devcontainerNames       map[string]struct{}                   // Track devcontainer names to avoid duplicates.
 	knownDevcontainers      []codersdk.WorkspaceAgentDevcontainer // Track predefined and runtime-detected devcontainers.
 	configFileModifiedTimes map[string]time.Time                  // Track when config files were last modified.
+
+	devcontainerLogSourceIDs map[string]uuid.UUID // Track devcontainer log source IDs.
 }
 
 // Option is a functional option for API.
@@ -91,13 +95,23 @@
 // WithDevcontainers sets the known devcontainers for the API. This
 // allows the API to be aware of devcontainers defined in the workspace
 // agent manifest.
-func WithDevcontainers(devcontainers []codersdk.WorkspaceAgentDevcontainer) Option {
+func WithDevcontainers(devcontainers []codersdk.WorkspaceAgentDevcontainer, scripts []codersdk.WorkspaceAgentScript) Option {
 	return func(api *API) {
-		if len(devcontainers) > 0 {
-			api.knownDevcontainers = slices.Clone(devcontainers)
-			api.devcontainerNames = make(map[string]struct{}, len(devcontainers))
-			for _, devcontainer := range devcontainers {
-				api.devcontainerNames[devcontainer.Name] = struct{}{}
+		if len(devcontainers) == 0 {
+			return
+		}
+		api.knownDevcontainers = slices.Clone(devcontainers)
+		api.devcontainerNames = make(map[string]struct{}, len(devcontainers))
+		api.devcontainerLogSourceIDs = make(map[string]uuid.UUID)
+		for _, devcontainer := range devcontainers {
+			api.devcontainerNames[devcontainer.Name] = struct{}{}
+			for _, script := range scripts {
+				// The devcontainer scripts match the devcontainer ID for
+				// identification.
+				if script.ID == devcontainer.ID {
+					api.devcontainerLogSourceIDs[devcontainer.WorkspaceFolder] = script.LogSourceID
+					break
+				}
 			}
 		}
 	}
@@ -112,6 +126,27 @@
 	}
 }
 
+// ScriptLogger is an interface for sending devcontainer logs to the
+// controlplane.
+type ScriptLogger interface {
+	Send(ctx context.Context, log ...agentsdk.Log) error
+	Flush(context.Context) error
+}
+
+// noopScriptLogger is a no-op implementation of the ScriptLogger
+// interface.
+type noopScriptLogger struct{}
+
+func (noopScriptLogger) Send(ctx context.Context, log ...agentsdk.Log) error { return nil }
+func (noopScriptLogger) Flush(ctx context.Context) error                     { return nil }
+
+// WithScriptLogger sets the script logger provider for devcontainer operations.
+func WithScriptLogger(scriptLogger func(logSourceID uuid.UUID) ScriptLogger) Option {
+	return func(api *API) {
+		api.scriptLogger = scriptLogger
+	}
+}
+
 // NewAPI returns a new API with the given options applied.
 func NewAPI(logger slog.Logger, options ...Option) *API {
 	ctx, cancel := context.WithCancel(context.Background())
@@ -127,6 +162,7 @@
 		devcontainerNames:       make(map[string]struct{}),
 		knownDevcontainers:      []codersdk.WorkspaceAgentDevcontainer{},
 		configFileModifiedTimes: make(map[string]time.Time),
+		scriptLogger:            func(uuid.UUID) ScriptLogger { return noopScriptLogger{} },
 	}
 	for _, opt := range options {
 		opt(api)
@@ -426,7 +462,26 @@
 		return
 	}
 
-	_, err = api.dccli.Up(ctx, workspaceFolder, configPath, WithRemoveExistingContainer())
+	// Send logs via agent logging facilities.
+	logSourceID := api.devcontainerLogSourceIDs[workspaceFolder]
+	if logSourceID == uuid.Nil {
+		// Fallback to the external log source ID if not found.
+		logSourceID = agentsdk.ExternalLogSourceID
+	}
+	scriptLogger := api.scriptLogger(logSourceID)
+	defer func() {
+		flushCtx, cancel := context.WithTimeout(api.ctx, 5*time.Second)
+		defer cancel()
+		if err := scriptLogger.Flush(flushCtx); err != nil {
+			api.logger.Error(flushCtx, "flush devcontainer logs failed", slog.Error(err))
+		}
+	}()
+	infoW := agentsdk.LogsWriter(ctx, scriptLogger.Send, logSourceID, codersdk.LogLevelInfo)
+	defer infoW.Close()
+	errW := agentsdk.LogsWriter(ctx, scriptLogger.Send, logSourceID, codersdk.LogLevelError)
+	defer errW.Close()
+
+	_, err = api.dccli.Up(ctx, workspaceFolder, configPath, WithOutput(infoW, errW), WithRemoveExistingContainer())
 	if err != nil {
 		httpapi.Write(ctx, w, http.StatusInternalServerError, codersdk.Response{
 			Message: "Could not recreate devcontainer",

diff --git a/agent/agentcontainers/api_test.go b/agent/agentcontainers/api_test.go
@@ -564,7 +564,7 @@ func TestAPI(t *testing.T) {
 				}
 
 				if len(tt.knownDevcontainers) > 0 {
-					apiOptions = append(apiOptions, agentcontainers.WithDevcontainers(tt.knownDevcontainers))
+					apiOptions = append(apiOptions, agentcontainers.WithDevcontainers(tt.knownDevcontainers, nil))
 				}
 
 				api := agentcontainers.NewAPI(logger, apiOptions...)