From 4e9fe62d9b4537b03028136239156c660d006ebc Mon Sep 17 00:00:00 2001 From: Kyle Carberry Date: Fri, 18 Feb 2022 22:30:12 +0000 Subject: [PATCH 1/3] fix: Leaking yamux session after HTTP handler is closed Closes #317. The httptest server cancels the context after the connection is closed, but if a connection takes a long time to close, the request would never end. This applies a context to the entire listener that cancels on test cleanup. After discussion with @bryphe-coder, reducing the parallel limit on Windows is likely to reduce failures as well. --- .github/workflows/coder.yaml | 10 +++++++--- coderd/coderdtest/coderdtest.go | 9 ++++++++- pty/pty_other.go | 7 +++++++ pty/start_other.go | 8 ++++++-- 4 files changed, 28 insertions(+), 6 deletions(-) diff --git a/.github/workflows/coder.yaml b/.github/workflows/coder.yaml index f75fee6d7aa5f..ca4e6b39232bf 100644 --- a/.github/workflows/coder.yaml +++ b/.github/workflows/coder.yaml @@ -150,9 +150,13 @@ jobs: terraform_wrapper: false - name: Test with Mock Database + shell: bash + env: + GOCOUNT: ${{ runner.os == 'Windows' && 3 || 5 }} + GOMAXPROCS: ${{ runner.os == 'Windows' && 1 || 2 }} run: gotestsum --junitfile="gotests.xml" --packages="./..." -- -covermode=atomic -coverprofile="gotests.coverage" - -timeout=3m -count=5 -race -short -parallel=2 + -timeout=3m -count=$GOCOUNT -race -short -failfast - name: Upload DataDog Trace if: (success() || failure()) && github.actor != 'dependabot[bot]' @@ -166,10 +170,10 @@ jobs: if: runner.os == 'Linux' run: DB=true gotestsum --junitfile="gotests.xml" --packages="./..." -- -covermode=atomic -coverprofile="gotests.coverage" -timeout=3m - -count=1 -race -parallel=2 + -count=1 -race -parallel=2 -failfast - name: Upload DataDog Trace - if: (success() || failure()) && github.actor != 'dependabot[bot]' + if: (success() || failure()) && github.actor != 'dependabot[bot]' && runner.os == 'Linux' env: DATADOG_API_KEY: ${{ secrets.DATADOG_API_KEY }} DD_DATABASE: postgresql diff --git a/coderd/coderdtest/coderdtest.go b/coderd/coderdtest/coderdtest.go index dc7f782c83748..889f6241a442a 100644 --- a/coderd/coderdtest/coderdtest.go +++ b/coderd/coderdtest/coderdtest.go @@ -4,6 +4,7 @@ import ( "context" "database/sql" "io" + "net" "net/http/httptest" "net/url" "os" @@ -59,7 +60,13 @@ func New(t *testing.T) *codersdk.Client { Database: db, Pubsub: pubsub, }) - srv := httptest.NewServer(handler) + srv := httptest.NewUnstartedServer(handler) + srv.Config.BaseContext = func(_ net.Listener) context.Context { + ctx, cancelFunc := context.WithCancel(context.Background()) + t.Cleanup(cancelFunc) + return ctx + } + srv.Start() serverURL, err := url.Parse(srv.URL) require.NoError(t, err) t.Cleanup(srv.Close) diff --git a/pty/pty_other.go b/pty/pty_other.go index dbdda408b1365..e2520a2387116 100644 --- a/pty/pty_other.go +++ b/pty/pty_other.go @@ -6,6 +6,7 @@ package pty import ( "io" "os" + "sync" "github.com/creack/pty" ) @@ -23,6 +24,7 @@ func newPty() (PTY, error) { } type otherPty struct { + mutex sync.Mutex pty, tty *os.File } @@ -41,6 +43,8 @@ func (p *otherPty) Output() io.ReadWriter { } func (p *otherPty) Resize(cols uint16, rows uint16) error { + p.mutex.Lock() + defer p.mutex.Unlock() return pty.Setsize(p.tty, &pty.Winsize{ Rows: rows, Cols: cols, @@ -48,6 +52,9 @@ func (p *otherPty) Resize(cols uint16, rows uint16) error { } func (p *otherPty) Close() error { + p.mutex.Lock() + defer p.mutex.Unlock() + err := p.pty.Close() if err != nil { return err diff --git a/pty/start_other.go b/pty/start_other.go index 103f55202efe3..2f1a74633130e 100644 --- a/pty/start_other.go +++ b/pty/start_other.go @@ -8,13 +8,17 @@ import ( "syscall" "github.com/creack/pty" + "golang.org/x/xerrors" ) func startPty(cmd *exec.Cmd) (PTY, error) { ptty, tty, err := pty.Open() if err != nil { - return nil, err + return nil, xerrors.Errorf("open: %w", err) } + defer func() { + _ = tty.Close() + }() cmd.SysProcAttr = &syscall.SysProcAttr{ Setsid: true, Setctty: true, @@ -25,7 +29,7 @@ func startPty(cmd *exec.Cmd) (PTY, error) { err = cmd.Start() if err != nil { _ = ptty.Close() - return nil, err + return nil, xerrors.Errorf("start: %w", err) } return &otherPty{ pty: ptty, From 328852a512a00e6b65a25f9f08f7c6169159a1bf Mon Sep 17 00:00:00 2001 From: Kyle Carberry Date: Sat, 19 Feb 2022 00:52:35 +0000 Subject: [PATCH 2/3] Switch to windows-2022 to improve decompression --- .github/workflows/coder.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/coder.yaml b/.github/workflows/coder.yaml index ca4e6b39232bf..ddb897e3546e0 100644 --- a/.github/workflows/coder.yaml +++ b/.github/workflows/coder.yaml @@ -122,7 +122,7 @@ jobs: os: - ubuntu-latest - macos-latest - - windows-latest + - windows-2022 steps: - uses: actions/checkout@v2 From 69aec02c66a4531f55b28833be587a7a99268e22 Mon Sep 17 00:00:00 2001 From: Kyle Carberry Date: Sat, 19 Feb 2022 01:13:35 +0000 Subject: [PATCH 3/3] Invalidate cache on matrix OS --- .github/workflows/coder.yaml | 4 ++-- peer/conn.go | 15 +++++++++------ provisionerd/provisionerd.go | 3 +++ 3 files changed, 14 insertions(+), 8 deletions(-) diff --git a/.github/workflows/coder.yaml b/.github/workflows/coder.yaml index ddb897e3546e0..08a63aad9807f 100644 --- a/.github/workflows/coder.yaml +++ b/.github/workflows/coder.yaml @@ -138,9 +138,9 @@ jobs: ~/.cache/go-build ~/Library/Caches/go-build %LocalAppData%\go-build - key: ${{ runner.os }}-go-${{ hashFiles('**/go.sum') }} + key: ${{ matrix.os }}-go-${{ hashFiles('**/go.sum') }} restore-keys: | - ${{ runner.os }}-go- + ${{ matrix.os }}-go- - run: go install gotest.tools/gotestsum@latest diff --git a/peer/conn.go b/peer/conn.go index 3782c87aea06f..6666398a69da7 100644 --- a/peer/conn.go +++ b/peer/conn.go @@ -183,12 +183,15 @@ func (c *Conn) init() error { } }) c.rtc.OnConnectionStateChange(func(peerConnectionState webrtc.PeerConnectionState) { - if c.isClosed() { - // Make sure we don't log after Close() has been called. - return - } - c.opts.Logger.Debug(context.Background(), "rtc connection updated", - slog.F("state", peerConnectionState)) + go func() { + c.closeMutex.Lock() + defer c.closeMutex.Unlock() + if c.isClosed() { + return + } + c.opts.Logger.Debug(context.Background(), "rtc connection updated", + slog.F("state", peerConnectionState)) + }() switch peerConnectionState { case webrtc.PeerConnectionStateDisconnected: diff --git a/provisionerd/provisionerd.go b/provisionerd/provisionerd.go index 4c438d7360f66..b8449a38fbb05 100644 --- a/provisionerd/provisionerd.go +++ b/provisionerd/provisionerd.go @@ -110,10 +110,13 @@ func (p *provisionerDaemon) connect(ctx context.Context) { if errors.Is(err, context.Canceled) { return } + p.closeMutex.Lock() if p.isClosed() { + p.closeMutex.Unlock() return } p.opts.Logger.Warn(context.Background(), "failed to dial", slog.Error(err)) + p.closeMutex.Unlock() continue } p.opts.Logger.Debug(context.Background(), "connected")