diff --git a/agent/agent.go b/agent/agent.go index 1efb3e88f3dbe..8f42436f015e3 100644 --- a/agent/agent.go +++ b/agent/agent.go @@ -1104,6 +1104,18 @@ func (a *agent) handleSSHSession(session ssh.Session) (retErr error) { var wg sync.WaitGroup defer func() { defer wg.Wait() + + // If we call Close() before the output is read, the output + // will be lost. We set a deadline on the read so that we can + // wait for the output to be read before closing the PTY. + // OpenSSH also uses a 100ms timeout for reading from the PTY. + if dlErr := ptty.Output().Reader.SetReadDeadline(time.Now().Add(100 * time.Millisecond)); dlErr != nil { + a.logger.Warn(ctx, "failed to set read deadline, pty output may be lost", slog.Error(dlErr)) + } else { + // If we successfully set the deadline, we can immediately + // wait for the output copy goroutine to exit. + wg.Wait() + } closeErr := ptty.Close() if closeErr != nil { a.logger.Warn(ctx, "failed to close tty", slog.Error(closeErr)) @@ -1131,8 +1143,7 @@ func (a *agent) handleSSHSession(session ssh.Session) (retErr error) { // output being lost. To avoid this, we wait for the output copy to // start before waiting for the command to exit. This ensures that the // output copy goroutine will be scheduled before calling close on the - // pty. There is still a risk of data loss if a command produces a lot - // of output, see TestAgent_Session_TTY_HugeOutputIsNotLost (skipped). + // pty. This is a safety-net in case SetReadDeadline doesn't work. outputCopyStarted := make(chan struct{}) ptyOutput := func() io.Reader { defer close(outputCopyStarted) diff --git a/agent/agent_test.go b/agent/agent_test.go index 10ccbe51242a8..1b8da79be0623 100644 --- a/agent/agent_test.go +++ b/agent/agent_test.go @@ -410,14 +410,11 @@ func TestAgent_Session_TTY_HugeOutputIsNotLost(t *testing.T) { // it seems like it could be either. t.Skip("ConPTY appears to be inconsistent on Windows.") } - t.Skip("This test proves we have a bug where parts of large output on a PTY can be lost after the command exits, skipped to avoid test failures.") - - // This test is here to prevent prove we have a bug where quickly executing - // commands (with TTY) don't flush their output to the SSH session. This is - // due to the pty being closed before all the output has been copied, but - // protecting against this requires a non-trivial rewrite of the output - // processing (or figuring out a way to put the pty in a mode where this - // does not happen). + + // This test is here to prevent regressions where the PTY for commands is + // closed before the buffer is consumed. + // + // See: https://github.com/coder/coder/issues/6656 ctx, cancel := context.WithTimeout(context.Background(), testutil.WaitLong) defer cancel() //nolint:dogsled