Thanks to visit codestin.com
Credit goes to github.com

Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Next Next commit
fix(agent/agentcontainers): reduce need to recreate sub agents
Updates #18332
  • Loading branch information
mafredri committed Jun 17, 2025
commit 2d79ecc38154f1886790b1ec49576e2360c31884
23 changes: 19 additions & 4 deletions agent/agent_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -2080,6 +2080,10 @@ func TestAgent_DevcontainerAutostart(t *testing.T) {
subAgentConnected := make(chan subAgentRequestPayload, 1)
subAgentReady := make(chan struct{}, 1)
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
if r.Method == http.MethodGet && strings.HasPrefix(r.URL.Path, "/api/v2/workspaceagents/me/") {
return
}

t.Logf("Sub-agent request received: %s %s", r.Method, r.URL.Path)

if r.Method != http.MethodPost {
Expand Down Expand Up @@ -2226,11 +2230,22 @@ func TestAgent_DevcontainerAutostart(t *testing.T) {
// Ensure the container update routine runs.
tickerFuncTrap.MustWait(ctx).MustRelease(ctx)
tickerFuncTrap.Close()
_, next := mClock.AdvanceNext()
next.MustWait(ctx)

// Verify that a subagent was created.
subAgents := agentClient.GetSubAgents()
// Since the agent does RefreshContainers, and the ticker function
// is set to skip instead of queue, we must advance the clock
// multiple times to ensure that the sub-agent is created.
var subAgents []*proto.SubAgent
for {
_, next := mClock.AdvanceNext()
next.MustWait(ctx)

// Verify that a subagent was created.
subAgents = agentClient.GetSubAgents()
if len(subAgents) > 0 {
t.Logf("Found sub-agents: %d", len(subAgents))
break
}
}
require.Len(t, subAgents, 1, "expected one sub agent")

subAgent := subAgents[0]
Expand Down
94 changes: 56 additions & 38 deletions agent/agentcontainers/api.go
Original file line number Diff line number Diff line change
Expand Up @@ -977,7 +977,7 @@ func (api *API) maybeInjectSubAgentIntoContainerLocked(ctx context.Context, dc c
)

// Check if subagent already exists for this devcontainer.
recreateSubAgent := false
maybeRecreateSubAgent := false
proc, injected := api.injectedSubAgentProcs[dc.WorkspaceFolder]
if injected {
if proc.containerID == container.ID && proc.ctx.Err() == nil {
Expand All @@ -992,12 +992,15 @@ func (api *API) maybeInjectSubAgentIntoContainerLocked(ctx context.Context, dc c
logger.Debug(ctx, "container ID changed, injecting subagent into new container",
slog.F("old_container_id", proc.containerID),
)
recreateSubAgent = true
maybeRecreateSubAgent = true
}

// Container ID changed or the subagent process is not running,
// stop the existing subagent context to replace it.
proc.stop()
} else {
// Set SubAgent defaults.
proc.agent.OperatingSystem = "linux" // Assuming Linux for devcontainers.
}

// Prepare the subAgentProcess to be used when running the subagent.
Expand Down Expand Up @@ -1090,36 +1093,29 @@ func (api *API) maybeInjectSubAgentIntoContainerLocked(ctx context.Context, dc c
// logger.Warn(ctx, "set CAP_NET_ADMIN on agent binary failed", slog.Error(err))
// }

// Detect workspace folder by executing `pwd` in the container.
// NOTE(mafredri): This is a quick and dirty way to detect the
// workspace folder inside the container. In the future we will
// rely more on `devcontainer read-configuration`.
var pwdBuf bytes.Buffer
err = api.dccli.Exec(ctx, dc.WorkspaceFolder, dc.ConfigPath, "pwd", []string{},
WithExecOutput(&pwdBuf, io.Discard),
WithExecContainerID(container.ID),
)
if err != nil {
return xerrors.Errorf("check workspace folder in container: %w", err)
}
directory := strings.TrimSpace(pwdBuf.String())
if directory == "" {
logger.Warn(ctx, "detected workspace folder is empty, using default workspace folder",
slog.F("default_workspace_folder", DevcontainerDefaultContainerWorkspaceFolder),
subAgentConfig := proc.agent.CloneConfig(dc)
if proc.agent.ID == uuid.Nil || maybeRecreateSubAgent {
// Detect workspace folder by executing `pwd` in the container.
// NOTE(mafredri): This is a quick and dirty way to detect the
// workspace folder inside the container. In the future we will
// rely more on `devcontainer read-configuration`.
var pwdBuf bytes.Buffer
err = api.dccli.Exec(ctx, dc.WorkspaceFolder, dc.ConfigPath, "pwd", []string{},
WithExecOutput(&pwdBuf, io.Discard),
WithExecContainerID(container.ID),
)
directory = DevcontainerDefaultContainerWorkspaceFolder
}

if proc.agent.ID != uuid.Nil && recreateSubAgent {
logger.Debug(ctx, "deleting existing subagent for recreation", slog.F("agent_id", proc.agent.ID))
client := *api.subAgentClient.Load()
err = client.Delete(ctx, proc.agent.ID)
if err != nil {
return xerrors.Errorf("delete existing subagent failed: %w", err)
return xerrors.Errorf("check workspace folder in container: %w", err)
}
proc.agent = SubAgent{}
}
if proc.agent.ID == uuid.Nil {
directory := strings.TrimSpace(pwdBuf.String())
if directory == "" {
logger.Warn(ctx, "detected workspace folder is empty, using default workspace folder",
slog.F("default_workspace_folder", DevcontainerDefaultContainerWorkspaceFolder),
)
directory = DevcontainerDefaultContainerWorkspaceFolder
}
subAgentConfig.Directory = directory

displayAppsMap := map[codersdk.DisplayApp]bool{
// NOTE(DanielleMaywood):
// We use the same defaults here as set in terraform-provider-coder.
Expand All @@ -1138,6 +1134,13 @@ func (api *API) maybeInjectSubAgentIntoContainerLocked(ctx context.Context, dc c

for _, customization := range coderCustomization {
for app, enabled := range customization.DisplayApps {
if _, ok := displayAppsMap[app]; !ok {
logger.Warn(ctx, "unknown display app in devcontainer customization, ignoring",
slog.F("app", app),
slog.F("enabled", enabled),
)
continue
}
displayAppsMap[app] = enabled
}
}
Expand All @@ -1149,26 +1152,41 @@ func (api *API) maybeInjectSubAgentIntoContainerLocked(ctx context.Context, dc c
displayApps = append(displayApps, app)
}
}
slices.Sort(displayApps)

subAgentConfig.DisplayApps = displayApps
}

deleteSubAgent := maybeRecreateSubAgent && !proc.agent.EqualConfig(subAgentConfig)
if deleteSubAgent {
logger.Debug(ctx, "deleting existing subagent for recreation", slog.F("agent_id", proc.agent.ID))
client := *api.subAgentClient.Load()
err = client.Delete(ctx, proc.agent.ID)
if err != nil {
return xerrors.Errorf("delete existing subagent failed: %w", err)
}
proc.agent = SubAgent{} // Clear agent to signal that we need to create a new one.
}

if proc.agent.ID == uuid.Nil {
logger.Debug(ctx, "creating new subagent",
slog.F("directory", directory),
slog.F("display_apps", displayApps),
slog.F("directory", subAgentConfig.Directory),
slog.F("display_apps", subAgentConfig.DisplayApps),
)

// Create new subagent record in the database to receive the auth token.
client := *api.subAgentClient.Load()
proc.agent, err = client.Create(ctx, SubAgent{
Name: dc.Name,
Directory: directory,
OperatingSystem: "linux", // Assuming Linux for devcontainers.
Architecture: arch,
DisplayApps: displayApps,
})
newSubAgent, err := client.Create(ctx, subAgentConfig)
if err != nil {
return xerrors.Errorf("create subagent failed: %w", err)
}
proc.agent = newSubAgent

logger.Info(ctx, "created new subagent", slog.F("agent_id", proc.agent.ID))
} else {
logger.Debug(ctx, "subagent already exists, skipping recreation",
slog.F("agent_id", proc.agent.ID),
)
}

api.mu.Lock() // Re-lock to update the agent.
Expand Down
89 changes: 49 additions & 40 deletions agent/agentcontainers/api_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -212,6 +212,7 @@ func (w *fakeWatcher) sendEventWaitNextCalled(ctx context.Context, event fsnotif

// fakeSubAgentClient implements SubAgentClient for testing purposes.
type fakeSubAgentClient struct {
logger slog.Logger
agents map[uuid.UUID]agentcontainers.SubAgent

listErrC chan error // If set, send to return error, close to return nil.
Expand Down Expand Up @@ -240,6 +241,7 @@ func (m *fakeSubAgentClient) List(ctx context.Context) ([]agentcontainers.SubAge
}

func (m *fakeSubAgentClient) Create(ctx context.Context, agent agentcontainers.SubAgent) (agentcontainers.SubAgent, error) {
m.logger.Debug(ctx, "creating sub agent", slog.F("agent", agent))
if m.createErrC != nil {
select {
case <-ctx.Done():
Expand All @@ -261,6 +263,7 @@ func (m *fakeSubAgentClient) Create(ctx context.Context, agent agentcontainers.S
}

func (m *fakeSubAgentClient) Delete(ctx context.Context, id uuid.UUID) error {
m.logger.Debug(ctx, "deleting sub agent", slog.F("id", id.String()))
if m.deleteErrC != nil {
select {
case <-ctx.Done():
Expand Down Expand Up @@ -1245,6 +1248,7 @@ func TestAPI(t *testing.T) {
mClock = quartz.NewMock(t)
mCCLI = acmock.NewMockContainerCLI(gomock.NewController(t))
fakeSAC = &fakeSubAgentClient{
logger: logger.Named("fakeSubAgentClient"),
createErrC: make(chan error, 1),
deleteErrC: make(chan error, 1),
}
Expand All @@ -1270,7 +1274,7 @@ func TestAPI(t *testing.T) {

mCCLI.EXPECT().List(gomock.Any()).Return(codersdk.WorkspaceAgentListContainersResponse{
Containers: []codersdk.WorkspaceAgentContainer{testContainer},
}, nil).Times(1 + 3) // 1 initial call + 3 updates.
}, nil).Times(3) // 1 initial call + 2 updates.
gomock.InOrder(
mCCLI.EXPECT().DetectArchitecture(gomock.Any(), "test-container-id").Return(runtime.GOARCH, nil),
mCCLI.EXPECT().ExecAs(gomock.Any(), "test-container-id", "root", "mkdir", "-p", "/.coder-agent").Return(nil, nil),
Expand Down Expand Up @@ -1315,19 +1319,20 @@ func TestAPI(t *testing.T) {
tickerTrap.MustWait(ctx).MustRelease(ctx)
tickerTrap.Close()

// Ensure we only inject the agent once.
for i := range 3 {
_, aw := mClock.AdvanceNext()
aw.MustWait(ctx)
// Refresh twice to ensure idempotency of agent creation.
err = api.RefreshContainers(ctx)
require.NoError(t, err, "refresh containers should not fail")
t.Logf("Agents created: %d, deleted: %d", len(fakeSAC.created), len(fakeSAC.deleted))

t.Logf("Iteration %d: agents created: %d", i+1, len(fakeSAC.created))
err = api.RefreshContainers(ctx)
require.NoError(t, err, "refresh containers should not fail")
t.Logf("Agents created: %d, deleted: %d", len(fakeSAC.created), len(fakeSAC.deleted))

// Verify agent was created.
require.Len(t, fakeSAC.created, 1)
assert.Equal(t, "test-container", fakeSAC.created[0].Name)
assert.Equal(t, "/workspaces", fakeSAC.created[0].Directory)
assert.Len(t, fakeSAC.deleted, 0)
}
// Verify agent was created.
require.Len(t, fakeSAC.created, 1)
assert.Equal(t, "test-container", fakeSAC.created[0].Name)
assert.Equal(t, "/workspaces", fakeSAC.created[0].Directory)
assert.Len(t, fakeSAC.deleted, 0)

t.Log("Agent injected successfully, now testing reinjection into the same container...")

Expand All @@ -1349,32 +1354,23 @@ func TestAPI(t *testing.T) {
// Expect the agent to be reinjected.
mCCLI.EXPECT().List(gomock.Any()).Return(codersdk.WorkspaceAgentListContainersResponse{
Containers: []codersdk.WorkspaceAgentContainer{testContainer},
}, nil).Times(3) // 3 updates.
}, nil).Times(1) // 1 update.
gomock.InOrder(
mCCLI.EXPECT().DetectArchitecture(gomock.Any(), "test-container-id").Return(runtime.GOARCH, nil),
mCCLI.EXPECT().ExecAs(gomock.Any(), "test-container-id", "root", "mkdir", "-p", "/.coder-agent").Return(nil, nil),
mCCLI.EXPECT().Copy(gomock.Any(), "test-container-id", coderBin, "/.coder-agent/coder").Return(nil),
mCCLI.EXPECT().ExecAs(gomock.Any(), "test-container-id", "root", "chmod", "0755", "/.coder-agent", "/.coder-agent/coder").Return(nil, nil),
)

// Allow agent reinjection to succeed.
testutil.RequireSend(ctx, t, fakeDCCLI.execErrC, func(cmd string, args ...string) error {
assert.Equal(t, "pwd", cmd)
assert.Empty(t, args)
return nil
}) // Exec pwd.

// Ensure we only inject the agent once.
for i := range 3 {
_, aw := mClock.AdvanceNext()
aw.MustWait(ctx)
// Agent reinjection will succeed and we will not re-create the
// agent, nor re-probe pwd.
err = api.RefreshContainers(ctx)
require.NoError(t, err, "refresh containers should not fail")
t.Logf("Agents created: %d, deleted: %d", len(fakeSAC.created), len(fakeSAC.deleted))

t.Logf("Iteration %d: agents created: %d", i+1, len(fakeSAC.created))

// Verify that the agent was reused.
require.Len(t, fakeSAC.created, 1)
assert.Len(t, fakeSAC.deleted, 0)
}
// Verify that the agent was reused.
require.Len(t, fakeSAC.created, 1)
assert.Len(t, fakeSAC.deleted, 0)

t.Log("Agent reinjected successfully, now testing agent deletion and recreation...")

Expand All @@ -1383,7 +1379,7 @@ func TestAPI(t *testing.T) {
// Expect the agent to be injected.
mCCLI.EXPECT().List(gomock.Any()).Return(codersdk.WorkspaceAgentListContainersResponse{
Containers: []codersdk.WorkspaceAgentContainer{testContainer},
}, nil).Times(3) // 3 updates.
}, nil).Times(1) // 1 update.
gomock.InOrder(
mCCLI.EXPECT().DetectArchitecture(gomock.Any(), "new-test-container-id").Return(runtime.GOARCH, nil),
mCCLI.EXPECT().ExecAs(gomock.Any(), "new-test-container-id", "root", "mkdir", "-p", "/.coder-agent").Return(nil, nil),
Expand All @@ -1404,7 +1400,20 @@ func TestAPI(t *testing.T) {
})
<-terminated

// Simulate the agent deletion.
fakeDCCLI.readConfig.MergedConfiguration.Customizations.Coder = []agentcontainers.CoderCustomization{
{
DisplayApps: map[codersdk.DisplayApp]bool{
codersdk.DisplayAppSSH: true,
codersdk.DisplayAppWebTerminal: true,
codersdk.DisplayAppVSCodeDesktop: true,
codersdk.DisplayAppVSCodeInsiders: true,
codersdk.DisplayAppPortForward: true,
},
},
}

// Simulate the agent deletion (this happens because the
// devcontainer configuration changed).
testutil.RequireSend(ctx, t, fakeSAC.deleteErrC, nil)
// Expect the agent to be recreated.
testutil.RequireSend(ctx, t, fakeSAC.createErrC, nil)
Expand All @@ -1414,13 +1423,9 @@ func TestAPI(t *testing.T) {
return nil
}) // Exec pwd.

// Advance the clock to run updaterLoop.
for i := range 3 {
_, aw := mClock.AdvanceNext()
aw.MustWait(ctx)

t.Logf("Iteration %d: agents created: %d, deleted: %d", i+1, len(fakeSAC.created), len(fakeSAC.deleted))
}
err = api.RefreshContainers(ctx)
require.NoError(t, err, "refresh containers should not fail")
t.Logf("Agents created: %d, deleted: %d", len(fakeSAC.created), len(fakeSAC.deleted))

// Verify the agent was deleted and recreated.
require.Len(t, fakeSAC.deleted, 1, "there should be one deleted agent after recreation")
Expand Down Expand Up @@ -1453,6 +1458,7 @@ func TestAPI(t *testing.T) {
mClock = quartz.NewMock(t)
mCCLI = acmock.NewMockContainerCLI(gomock.NewController(t))
fakeSAC = &fakeSubAgentClient{
logger: logger.Named("fakeSubAgentClient"),
agents: map[uuid.UUID]agentcontainers.SubAgent{
existingAgentID: existingAgent,
},
Expand Down Expand Up @@ -1577,7 +1583,10 @@ func TestAPI(t *testing.T) {
logger = testutil.Logger(t)
mClock = quartz.NewMock(t)
mCCLI = acmock.NewMockContainerCLI(gomock.NewController(t))
fSAC = &fakeSubAgentClient{createErrC: make(chan error, 1)}
fSAC = &fakeSubAgentClient{
logger: logger.Named("fakeSubAgentClient"),
createErrC: make(chan error, 1),
}
fDCCLI = &fakeDevcontainerCLI{
readConfig: agentcontainers.DevcontainerConfig{
MergedConfiguration: agentcontainers.DevcontainerConfiguration{
Expand Down
Loading