Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 99ab87e

Browse files
committed
fix: Add reaper to coder agent
- The coder agent runs as PID 1 in some of our Docker workspaces. In such cases it is the responsibility of the init process to reap dead processes. Failing to do so can result in an inability to create new processes by running out of PIDs. This PR adds a reaper to our agent that is only spawned if it detects that it is PID1.
1 parent c9691ea commit 99ab87e

File tree

5 files changed

+136
-1
lines changed

5 files changed

+136
-1
lines changed

agent/reaper/reaper.go

Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,68 @@
1+
package reaper
2+
3+
import (
4+
"fmt"
5+
"os"
6+
"syscall"
7+
8+
"github.com/hashicorp/go-reap"
9+
"golang.org/x/xerrors"
10+
)
11+
12+
// agentEnvMark is a simple environment variable that we use as a marker
13+
// to indicated that the process is a child as opposed to the reaper.
14+
// Since we are forkexec'ing we need to be able to differentiate between
15+
// the two to avoid fork bombing ourselves.
16+
const agentEnvMark = "CODER_AGENT"
17+
18+
// IsChild returns true if we're the forked process.
19+
func IsChild() bool {
20+
return os.Getenv(agentEnvMark) != ""
21+
}
22+
23+
// ForkReap spawns a goroutine that reaps children. In order to avoid
24+
// complications with spawning `exec.Commands` in the same process that
25+
// is reaping, we forkexec a child process. This prevents a race between
26+
// the reaper and an exec.Command waiting for its process to complete.
27+
func ForkReap(pids reap.PidCh) error {
28+
// Check if the process is the parent or the child.
29+
// If it's the child we want to skip attempting to reap.
30+
if !IsChild() {
31+
go reap.ReapChildren(pids, nil, nil, nil)
32+
33+
args := os.Args
34+
// This is simply done to help identify the real agent process
35+
// when viewing in something like 'ps'.
36+
args = append(args, "#Agent")
37+
38+
pwd, err := os.Getwd()
39+
if err != nil {
40+
return xerrors.Errorf("get wd: %w", err)
41+
}
42+
43+
pattrs := &syscall.ProcAttr{
44+
Dir: pwd,
45+
// Add our marker for identifying the child process.
46+
Env: append(os.Environ(), fmt.Sprintf("%s=true", agentEnvMark)),
47+
Sys: &syscall.SysProcAttr{
48+
Setsid: true,
49+
},
50+
Files: []uintptr{
51+
uintptr(syscall.Stdin),
52+
uintptr(syscall.Stdout),
53+
uintptr(syscall.Stderr),
54+
},
55+
}
56+
57+
pid, _ := syscall.ForkExec(args[0], args, pattrs)
58+
59+
var wstatus syscall.WaitStatus
60+
_, err = syscall.Wait4(pid, &wstatus, 0, nil)
61+
for xerrors.Is(err, syscall.EINTR) {
62+
_, err = syscall.Wait4(pid, &wstatus, 0, nil)
63+
}
64+
return nil
65+
}
66+
67+
return nil
68+
}

agent/reaper/reaper_test.go

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
package reaper_test
2+
3+
import (
4+
"os/exec"
5+
"testing"
6+
"time"
7+
8+
"github.com/hashicorp/go-reap"
9+
"github.com/stretchr/testify/require"
10+
11+
"github.com/coder/coder/agent/reaper"
12+
)
13+
14+
func TestReap(t *testing.T) {
15+
t.Parallel()
16+
17+
// Because we're forkexecing these tests will try to run twice...
18+
if reaper.IsChild() {
19+
t.Skip("I'm a child!")
20+
}
21+
22+
t.Run("OK", func(t *testing.T) {
23+
pids := make(reap.PidCh, 1)
24+
err := reaper.ForkReap(pids)
25+
require.NoError(t, err)
26+
27+
cmd := exec.Command("sleep", "5")
28+
err = cmd.Start()
29+
require.NoError(t, err)
30+
31+
cmd2 := exec.Command("sleep", "5")
32+
err = cmd2.Start()
33+
require.NoError(t, err)
34+
35+
err = cmd.Process.Kill()
36+
require.NoError(t, err)
37+
38+
err = cmd2.Process.Kill()
39+
require.NoError(t, err)
40+
41+
expectedPIDs := []int{cmd.Process.Pid, cmd2.Process.Pid}
42+
43+
deadline := time.NewTimer(time.Second * 5)
44+
select {
45+
case <-deadline.C:
46+
t.Fatalf("Timed out waiting for process")
47+
case pid := <-pids:
48+
require.Contains(t, expectedPIDs, pid)
49+
}
50+
})
51+
}

cli/agent.go

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ import (
1717
"cdr.dev/slog/sloggers/sloghuman"
1818

1919
"github.com/coder/coder/agent"
20+
"github.com/coder/coder/agent/reaper"
2021
"github.com/coder/coder/cli/cliflag"
2122
"github.com/coder/coder/codersdk"
2223
"github.com/coder/retry"
@@ -35,6 +36,16 @@ func workspaceAgent() *cobra.Command {
3536
// This command isn't useful to manually execute.
3637
Hidden: true,
3738
RunE: func(cmd *cobra.Command, args []string) error {
39+
// Spawn a reaper so that we don't accumulate a ton
40+
// of zombie processes.
41+
if !reaper.IsChild() {
42+
err := reaper.ForkReap(nil)
43+
if err != nil {
44+
return xerrors.Errorf("fork reap: %w", err)
45+
}
46+
return nil
47+
}
48+
3849
rawURL, err := cmd.Flags().GetString(varAgentURL)
3950
if err != nil {
4051
return xerrors.Errorf("CODER_AGENT_URL must be set: %w", err)

go.mod

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -134,7 +134,10 @@ require (
134134
storj.io/drpc v0.0.30
135135
)
136136

137-
require github.com/xi2/xz v0.0.0-20171230120015-48954b6210f8 // indirect
137+
require (
138+
github.com/hashicorp/go-reap v0.0.0-20170704170343-bf58d8a43e7b // indirect
139+
github.com/xi2/xz v0.0.0-20171230120015-48954b6210f8 // indirect
140+
)
138141

139142
require (
140143
github.com/Azure/go-ansiterm v0.0.0-20210617225240-d185dfc1b5a1 // indirect

go.sum

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -899,6 +899,8 @@ github.com/hashicorp/go-multierror v1.0.0/go.mod h1:dHtQlpGsu+cZNNAkkCN/P3hoUDHh
899899
github.com/hashicorp/go-multierror v1.1.0/go.mod h1:spPvp8C1qA32ftKqdAHm4hHTbPw+vmowP0z+KUhOZdA=
900900
github.com/hashicorp/go-multierror v1.1.1 h1:H5DkEtf6CXdFp0N0Em5UCwQpXMWke8IA0+lD48awMYo=
901901
github.com/hashicorp/go-multierror v1.1.1/go.mod h1:iw975J/qwKPdAO1clOe2L8331t/9/fmwbPZ6JB6eMoM=
902+
github.com/hashicorp/go-reap v0.0.0-20170704170343-bf58d8a43e7b h1:3GrpnZQBxcMj1gCXQLelfjCT1D5MPGTuGMKHVzSIH6A=
903+
github.com/hashicorp/go-reap v0.0.0-20170704170343-bf58d8a43e7b/go.mod h1:qIFzeFcJU3OIFk/7JreWXcUjFmcCaeHTH9KoNyHYVCs=
902904
github.com/hashicorp/go-rootcerts v1.0.0/go.mod h1:K6zTfqpRlCUIjkwsN4Z+hiSfzSTQa6eBIzfwKfwNnHU=
903905
github.com/hashicorp/go-sockaddr v1.0.0/go.mod h1:7Xibr9yA9JjQq1JpNB2Vw7kxv8xerXegt+ozgdvDeDU=
904906
github.com/hashicorp/go-syslog v1.0.0/go.mod h1:qPfqrKkXGihmCqbJM2mZgkZGvKG1dFdvsLplgctolz4=

0 commit comments

Comments
 (0)