-
Notifications
You must be signed in to change notification settings - Fork 892
fix: Add reaper to coder agent #2441
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
99ab87e
5330e8c
b1cc536
1703e6d
2806ab9
8f2fa52
85d0635
41e0165
757257c
282b583
fb11daf
c66d7be
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
// Package reaper contains logic for reaping subprocesses. It is | ||
// specifically used in the agent to avoid the accumulation of | ||
// zombie processes. | ||
package reaper |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,19 @@ | ||
//go:build !linux | ||
|
||
package reaper | ||
|
||
import "github.com/hashicorp/go-reap" | ||
|
||
// IsChild returns true if we're the forked process. | ||
func IsChild() bool { | ||
return false | ||
} | ||
|
||
// IsInitProcess returns true if the current process's PID is 1. | ||
func IsInitProcess() bool { | ||
return false | ||
} | ||
|
||
func ForkReap(pids reap.PidCh) error { | ||
return nil | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,66 @@ | ||
//go:build linux | ||
|
||
package reaper_test | ||
|
||
import ( | ||
"os" | ||
"os/exec" | ||
"testing" | ||
"time" | ||
|
||
"github.com/hashicorp/go-reap" | ||
"github.com/stretchr/testify/require" | ||
|
||
"github.com/coder/coder/agent/reaper" | ||
) | ||
|
||
func TestReap(t *testing.T) { | ||
t.Parallel() | ||
|
||
// Don't run the reaper test in CI. It does weird | ||
// things like forkexecing which may have unintended | ||
// consequences in CI. | ||
if _, ok := os.LookupEnv("CI"); ok { | ||
t.Skip("Detected CI, skipping reaper tests") | ||
} | ||
|
||
// Because we're forkexecing these tests will try to run twice... | ||
if reaper.IsChild() { | ||
t.Skip("I'm a child!") | ||
} | ||
|
||
// OK checks that's the reaper is successfully reaping | ||
// exited processes and passing the PIDs through the shared | ||
// channel. | ||
t.Run("OK", func(t *testing.T) { | ||
pids := make(reap.PidCh, 1) | ||
err := reaper.ForkReap(pids) | ||
require.NoError(t, err) | ||
|
||
cmd := exec.Command("tail", "-f", "/dev/null") | ||
err = cmd.Start() | ||
require.NoError(t, err) | ||
|
||
cmd2 := exec.Command("tail", "-f", "/dev/null") | ||
err = cmd2.Start() | ||
require.NoError(t, err) | ||
|
||
err = cmd.Process.Kill() | ||
require.NoError(t, err) | ||
|
||
err = cmd2.Process.Kill() | ||
require.NoError(t, err) | ||
|
||
expectedPIDs := []int{cmd.Process.Pid, cmd2.Process.Pid} | ||
|
||
deadline := time.NewTimer(time.Second * 5) | ||
for i := 0; i < len(expectedPIDs); i++ { | ||
select { | ||
case <-deadline.C: | ||
t.Fatalf("Timed out waiting for process") | ||
case pid := <-pids: | ||
require.Contains(t, expectedPIDs, pid) | ||
} | ||
} | ||
}) | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,79 @@ | ||
//go:build linux | ||
|
||
package reaper | ||
|
||
import ( | ||
"fmt" | ||
"os" | ||
"syscall" | ||
|
||
"github.com/hashicorp/go-reap" | ||
"golang.org/x/xerrors" | ||
) | ||
|
||
// agentEnvMark is a simple environment variable that we use as a marker | ||
// to indicated that the process is a child as opposed to the reaper. | ||
// Since we are forkexec'ing we need to be able to differentiate between | ||
// the two to avoid fork bombing ourselves. | ||
const agentEnvMark = "CODER_DO_NOT_REAP" | ||
|
||
// IsChild returns true if we're the forked process. | ||
func IsChild() bool { | ||
return os.Getenv(agentEnvMark) != "" | ||
} | ||
|
||
// IsInitProcess returns true if the current process's PID is 1. | ||
func IsInitProcess() bool { | ||
return os.Getpid() == 1 | ||
} | ||
|
||
// ForkReap spawns a goroutine that reaps children. In order to avoid | ||
// complications with spawning `exec.Commands` in the same process that | ||
// is reaping, we forkexec a child process. This prevents a race between | ||
// the reaper and an exec.Command waiting for its process to complete. | ||
// The provided 'pids' channel may be nil if the caller does not care about the | ||
// reaped children PIDs. | ||
func ForkReap(pids reap.PidCh) error { | ||
// Check if the process is the parent or the child. | ||
// If it's the child we want to skip attempting to reap. | ||
if IsChild() { | ||
return nil | ||
} | ||
|
||
go reap.ReapChildren(pids, nil, nil, nil) | ||
|
||
args := os.Args | ||
// This is simply done to help identify the real agent process | ||
// when viewing in something like 'ps'. | ||
args = append(args, "#Agent") | ||
|
||
pwd, err := os.Getwd() | ||
if err != nil { | ||
return xerrors.Errorf("get wd: %w", err) | ||
} | ||
|
||
pattrs := &syscall.ProcAttr{ | ||
Dir: pwd, | ||
// Add our marker for identifying the child process. | ||
Env: append(os.Environ(), fmt.Sprintf("%s=true", agentEnvMark)), | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Should we filter this out from sub-process envs (i.e. when we launch shells)? It probably doesn't matter for correctness, more of a cleanliness thing. We could also add There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I agree that it's certainly cleaner to filter it out. The only reason I didn't is because I did not want implementation details of the I think if it becomes an issue we devote some time to figuring out something unobtrusive. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I already merged this but I think a cleaner solution is to have a There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. That makes sense and I agree that's a clean solution @sreya. 👍🏻 |
||
Sys: &syscall.SysProcAttr{ | ||
Setsid: true, | ||
}, | ||
Files: []uintptr{ | ||
uintptr(syscall.Stdin), | ||
uintptr(syscall.Stdout), | ||
uintptr(syscall.Stderr), | ||
}, | ||
} | ||
|
||
//#nosec G204 | ||
pid, _ := syscall.ForkExec(args[0], args, pattrs) | ||
|
||
var wstatus syscall.WaitStatus | ||
_, err = syscall.Wait4(pid, &wstatus, 0, nil) | ||
for xerrors.Is(err, syscall.EINTR) { | ||
_, err = syscall.Wait4(pid, &wstatus, 0, nil) | ||
} | ||
|
||
return nil | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -7,6 +7,7 @@ import ( | |
"net/url" | ||
"os" | ||
"path/filepath" | ||
"runtime" | ||
"time" | ||
|
||
"cloud.google.com/go/compute/metadata" | ||
|
@@ -17,6 +18,7 @@ import ( | |
"cdr.dev/slog/sloggers/sloghuman" | ||
|
||
"github.com/coder/coder/agent" | ||
"github.com/coder/coder/agent/reaper" | ||
"github.com/coder/coder/cli/cliflag" | ||
"github.com/coder/coder/codersdk" | ||
"github.com/coder/retry" | ||
|
@@ -50,6 +52,23 @@ func workspaceAgent() *cobra.Command { | |
} | ||
defer logWriter.Close() | ||
logger := slog.Make(sloghuman.Sink(cmd.ErrOrStderr()), sloghuman.Sink(logWriter)).Leveled(slog.LevelDebug) | ||
|
||
isLinux := runtime.GOOS == "linux" | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Any reason to limit this to There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It was causing things to fail when it ran on macos. I don't think limiting it to |
||
|
||
// Spawn a reaper so that we don't accumulate a ton | ||
// of zombie processes. | ||
if reaper.IsInitProcess() && !reaper.IsChild() && isLinux { | ||
logger.Info(cmd.Context(), "spawning reaper process") | ||
err := reaper.ForkReap(nil) | ||
if err != nil { | ||
logger.Error(cmd.Context(), "failed to reap", slog.Error(err)) | ||
return xerrors.Errorf("fork reap: %w", err) | ||
} | ||
|
||
logger.Info(cmd.Context(), "reaper process exiting") | ||
return nil | ||
} | ||
|
||
client := codersdk.New(coderURL) | ||
|
||
if pprofEnabled { | ||
|
Uh oh!
There was an error while loading. Please reload this page.