Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 6a1e7ee

Browse files
authored
feat: add file logger to coder ssh (coder#7646)
* coder ssh can log to file Signed-off-by: Spike Curtis <[email protected]> * Update golden file Signed-off-by: Spike Curtis <[email protected]> * generate CLI docs Signed-off-by: Spike Curtis <[email protected]> * Fix imports, typo Signed-off-by: Spike Curtis <[email protected]> * log more things! Signed-off-by: Spike Curtis <[email protected]> --------- Signed-off-by: Spike Curtis <[email protected]>
1 parent a903d7c commit 6a1e7ee

File tree

4 files changed

+226
-76
lines changed

4 files changed

+226
-76
lines changed

cli/ssh.go

+157-76
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ import (
1010
"net/url"
1111
"os"
1212
"os/exec"
13+
"path"
1314
"path/filepath"
1415
"strings"
1516
"time"
@@ -23,6 +24,9 @@ import (
2324
"golang.org/x/term"
2425
"golang.org/x/xerrors"
2526

27+
"cdr.dev/slog"
28+
"cdr.dev/slog/sloggers/sloghuman"
29+
2630
"github.com/coder/coder/agent/agentssh"
2731
"github.com/coder/coder/cli/clibase"
2832
"github.com/coder/coder/cli/cliui"
@@ -46,6 +50,8 @@ func (r *RootCmd) ssh() *clibase.Cmd {
4650
identityAgent string
4751
wsPollInterval time.Duration
4852
noWait bool
53+
logDir string
54+
logToFile bool
4955
)
5056
client := new(codersdk.Client)
5157
cmd := &clibase.Cmd{
@@ -56,10 +62,44 @@ func (r *RootCmd) ssh() *clibase.Cmd {
5662
clibase.RequireNArgs(1),
5763
r.InitClient(client),
5864
),
59-
Handler: func(inv *clibase.Invocation) error {
65+
Handler: func(inv *clibase.Invocation) (retErr error) {
6066
ctx, cancel := context.WithCancel(inv.Context())
6167
defer cancel()
6268

69+
logger := slog.Make() // empty logger
70+
defer func() {
71+
if retErr != nil {
72+
// catch and log all returned errors so we see them in the
73+
// log file (if there is one)
74+
logger.Error(ctx, "command exit", slog.Error(retErr))
75+
}
76+
}()
77+
if logToFile {
78+
// we need a way to ensure different ssh invocations don't clobber
79+
// each other's logs. Date-time strings will likely have collisions
80+
// in unit tests and/or scripts unless we extend precision out to
81+
// sub-millisecond, which seems unwieldy. A simple 5-character random
82+
// string will do it, since the operating system already tracks
83+
// dates and times for file IO.
84+
qual, err := cryptorand.String(5)
85+
if err != nil {
86+
return xerrors.Errorf("generate random qualifier: %w", err)
87+
}
88+
logPth := path.Join(logDir, fmt.Sprintf("coder-ssh-%s.log", qual))
89+
logFile, err := os.Create(logPth)
90+
if err != nil {
91+
return xerrors.Errorf("error opening %s for logging: %w", logPth, err)
92+
}
93+
logger = slog.Make(sloghuman.Sink(logFile))
94+
defer logFile.Close()
95+
if r.verbose {
96+
logger = logger.Leveled(slog.LevelDebug)
97+
}
98+
99+
// log HTTP requests
100+
client.Logger = logger
101+
}
102+
63103
workspace, workspaceAgent, err := getWorkspaceAndAgent(ctx, inv, client, codersdk.Me, inv.Args[0])
64104
if err != nil {
65105
return err
@@ -92,110 +132,71 @@ func (r *RootCmd) ssh() *clibase.Cmd {
92132
// We don't print the error because cliui.Agent does that for us.
93133
}
94134

95-
conn, err := client.DialWorkspaceAgent(ctx, workspaceAgent.ID, &codersdk.DialWorkspaceAgentOptions{})
135+
conn, err := client.DialWorkspaceAgent(ctx, workspaceAgent.ID, &codersdk.DialWorkspaceAgentOptions{
136+
Logger: logger,
137+
})
96138
if err != nil {
97-
return err
139+
return xerrors.Errorf("dial agent: %w", err)
98140
}
99141
defer conn.Close()
100142
conn.AwaitReachable(ctx)
101143
stopPolling := tryPollWorkspaceAutostop(ctx, client, workspace)
102144
defer stopPolling()
103145

104-
// Enure connection is closed if the context is canceled or
105-
// the workspace reaches the stopped state.
106-
//
107-
// Watching the stopped state is a work-around for cases
108-
// where the agent is not gracefully shut down and the
109-
// connection is left open. If, for instance, the networking
110-
// is stopped before the agent is shut down, the disconnect
111-
// will usually not propagate.
112-
//
113-
// See: https://github.com/coder/coder/issues/6180
114-
watchAndClose := func(closer func() error) {
115-
// Ensure session is ended on both context cancellation
116-
// and workspace stop.
117-
defer func() {
118-
_ = closer()
119-
}()
120-
121-
startWatchLoop:
122-
for {
123-
// (Re)connect to the coder server and watch workspace events.
124-
var wsWatch <-chan codersdk.Workspace
125-
var err error
126-
for r := retry.New(time.Second, 15*time.Second); r.Wait(ctx); {
127-
wsWatch, err = client.WatchWorkspace(ctx, workspace.ID)
128-
if err == nil {
129-
break
130-
}
131-
if ctx.Err() != nil {
132-
return
133-
}
134-
}
135-
136-
for {
137-
select {
138-
case <-ctx.Done():
139-
return
140-
case w, ok := <-wsWatch:
141-
if !ok {
142-
continue startWatchLoop
143-
}
144-
145-
// Transitioning to stop or delete could mean that
146-
// the agent will still gracefully stop. If a new
147-
// build is starting, there's no reason to wait for
148-
// the agent, it should be long gone.
149-
if workspace.LatestBuild.ID != w.LatestBuild.ID && w.LatestBuild.Transition == codersdk.WorkspaceTransitionStart {
150-
return
151-
}
152-
// Note, we only react to the stopped state here because we
153-
// want to give the agent a chance to gracefully shut down
154-
// during "stopping".
155-
if w.LatestBuild.Status == codersdk.WorkspaceStatusStopped {
156-
return
157-
}
158-
}
159-
}
160-
}
161-
}
162-
163146
if stdio {
164147
rawSSH, err := conn.SSH(ctx)
165148
if err != nil {
166-
return err
149+
return xerrors.Errorf("connect SSH: %w", err)
167150
}
168151
defer rawSSH.Close()
169-
go watchAndClose(rawSSH.Close)
152+
go watchAndClose(ctx, rawSSH.Close, logger, client, workspace)
170153

171154
go func() {
172155
// Ensure stdout copy closes incase stdin is closed
173156
// unexpectedly. Typically we wouldn't worry about
174157
// this since OpenSSH should kill the proxy command.
175158
defer rawSSH.Close()
176159

177-
_, _ = io.Copy(rawSSH, inv.Stdin)
160+
_, err := io.Copy(rawSSH, inv.Stdin)
161+
if err != nil {
162+
logger.Error(ctx, "copy stdin error", slog.Error(err))
163+
} else {
164+
logger.Debug(ctx, "copy stdin complete")
165+
}
178166
}()
179-
_, _ = io.Copy(inv.Stdout, rawSSH)
167+
_, err = io.Copy(inv.Stdout, rawSSH)
168+
if err != nil {
169+
logger.Error(ctx, "copy stdout error", slog.Error(err))
170+
} else {
171+
logger.Debug(ctx, "copy stdout complete")
172+
}
180173
return nil
181174
}
182175

183176
sshClient, err := conn.SSHClient(ctx)
184177
if err != nil {
185-
return err
178+
return xerrors.Errorf("ssh client: %w", err)
186179
}
187180
defer sshClient.Close()
188181

189182
sshSession, err := sshClient.NewSession()
190183
if err != nil {
191-
return err
184+
return xerrors.Errorf("ssh session: %w", err)
192185
}
193186
defer sshSession.Close()
194-
go watchAndClose(func() error {
195-
_ = sshSession.Close()
196-
_ = sshClient.Close()
197-
return nil
198-
})
187+
go watchAndClose(
188+
ctx,
189+
func() error {
190+
err := sshSession.Close()
191+
logger.Debug(ctx, "session close", slog.Error(err))
192+
err = sshClient.Close()
193+
logger.Debug(ctx, "client close", slog.Error(err))
194+
return nil
195+
},
196+
logger,
197+
client,
198+
workspace,
199+
)
199200

200201
if identityAgent == "" {
201202
identityAgent = os.Getenv("SSH_AUTH_SOCK")
@@ -257,7 +258,7 @@ func (r *RootCmd) ssh() *clibase.Cmd {
257258

258259
err = sshSession.RequestPty("xterm-256color", 128, 128, gossh.TerminalModes{})
259260
if err != nil {
260-
return err
261+
return xerrors.Errorf("request pty: %w", err)
261262
}
262263

263264
sshSession.Stdin = inv.Stdin
@@ -266,7 +267,7 @@ func (r *RootCmd) ssh() *clibase.Cmd {
266267

267268
err = sshSession.Shell()
268269
if err != nil {
269-
return err
270+
return xerrors.Errorf("start shell: %w", err)
270271
}
271272

272273
// Put cancel at the top of the defer stack to initiate
@@ -289,7 +290,7 @@ func (r *RootCmd) ssh() *clibase.Cmd {
289290
if errors.Is(err, &gossh.ExitMissingError{}) {
290291
return xerrors.New("SSH connection ended unexpectedly")
291292
}
292-
return err
293+
return xerrors.Errorf("session ended: %w", err)
293294
}
294295

295296
return nil
@@ -335,10 +336,90 @@ func (r *RootCmd) ssh() *clibase.Cmd {
335336
Description: "Specifies whether to wait for a workspace to become ready before logging in (only applicable when the login before ready option has not been enabled). Note that the workspace agent may still be in the process of executing the startup script and the workspace may be in an incomplete state.",
336337
Value: clibase.BoolOf(&noWait),
337338
},
339+
{
340+
Flag: "log-dir",
341+
Default: os.TempDir(),
342+
Description: "Specify the location for the log files.",
343+
Env: "CODER_SSH_LOG_DIR",
344+
Value: clibase.StringOf(&logDir),
345+
},
346+
{
347+
Flag: "log-to-file",
348+
FlagShorthand: "l",
349+
Env: "CODER_SSH_LOG_TO_FILE",
350+
Description: "Enable diagnostic logging to file.",
351+
Value: clibase.BoolOf(&logToFile),
352+
},
338353
}
339354
return cmd
340355
}
341356

357+
// watchAndClose ensures closer is called if the context is canceled or
358+
// the workspace reaches the stopped state.
359+
//
360+
// Watching the stopped state is a work-around for cases
361+
// where the agent is not gracefully shut down and the
362+
// connection is left open. If, for instance, the networking
363+
// is stopped before the agent is shut down, the disconnect
364+
// will usually not propagate.
365+
//
366+
// See: https://github.com/coder/coder/issues/6180
367+
func watchAndClose(ctx context.Context, closer func() error, logger slog.Logger, client *codersdk.Client, workspace codersdk.Workspace) {
368+
// Ensure session is ended on both context cancellation
369+
// and workspace stop.
370+
defer func() {
371+
err := closer()
372+
if err != nil {
373+
logger.Error(ctx, "error closing session", slog.Error(err))
374+
}
375+
}()
376+
377+
startWatchLoop:
378+
for {
379+
logger.Debug(ctx, "(re)connecting to the coder server to watch workspace events.")
380+
var wsWatch <-chan codersdk.Workspace
381+
var err error
382+
for r := retry.New(time.Second, 15*time.Second); r.Wait(ctx); {
383+
wsWatch, err = client.WatchWorkspace(ctx, workspace.ID)
384+
if err == nil {
385+
break
386+
}
387+
if ctx.Err() != nil {
388+
logger.Info(ctx, "context expired", slog.Error(ctx.Err()))
389+
return
390+
}
391+
}
392+
393+
for {
394+
select {
395+
case <-ctx.Done():
396+
logger.Info(ctx, "context expired", slog.Error(ctx.Err()))
397+
return
398+
case w, ok := <-wsWatch:
399+
if !ok {
400+
continue startWatchLoop
401+
}
402+
403+
// Transitioning to stop or delete could mean that
404+
// the agent will still gracefully stop. If a new
405+
// build is starting, there's no reason to wait for
406+
// the agent, it should be long gone.
407+
if workspace.LatestBuild.ID != w.LatestBuild.ID && w.LatestBuild.Transition == codersdk.WorkspaceTransitionStart {
408+
logger.Info(ctx, "new build started")
409+
return
410+
}
411+
// Note, we only react to the stopped state here because we
412+
// want to give the agent a chance to gracefully shut down
413+
// during "stopping".
414+
if w.LatestBuild.Status == codersdk.WorkspaceStatusStopped {
415+
logger.Info(ctx, "workspace stopped")
416+
return
417+
}
418+
}
419+
}
420+
}
421+
}
422+
342423
// getWorkspaceAgent returns the workspace and agent selected using either the
343424
// `<workspace>[.<agent>]` syntax via `in` or picks a random workspace and agent
344425
// if `shuffle` is true.

cli/ssh_test.go

+44
Original file line numberDiff line numberDiff line change
@@ -407,6 +407,50 @@ func TestSSH(t *testing.T) {
407407
pty.WriteLine("exit")
408408
<-cmdDone
409409
})
410+
411+
t.Run("FileLogging", func(t *testing.T) {
412+
t.Parallel()
413+
414+
dir := t.TempDir()
415+
416+
client, workspace, agentToken := setupWorkspaceForAgent(t, nil)
417+
inv, root := clitest.New(t, "ssh", workspace.Name, "-l", "--log-dir", dir)
418+
clitest.SetupConfig(t, client, root)
419+
pty := ptytest.New(t).Attach(inv)
420+
421+
ctx, cancel := context.WithTimeout(context.Background(), testutil.WaitLong)
422+
defer cancel()
423+
424+
cmdDone := tGo(t, func() {
425+
err := inv.WithContext(ctx).Run()
426+
assert.NoError(t, err)
427+
})
428+
pty.ExpectMatch("Waiting")
429+
430+
agentClient := agentsdk.New(client.URL)
431+
agentClient.SetSessionToken(agentToken)
432+
agentCloser := agent.New(agent.Options{
433+
Client: agentClient,
434+
Logger: slogtest.Make(t, nil).Named("agent"),
435+
})
436+
defer func() {
437+
_ = agentCloser.Close()
438+
}()
439+
440+
// Shells on Mac, Windows, and Linux all exit shells with the "exit" command.
441+
pty.WriteLine("exit")
442+
<-cmdDone
443+
444+
entries, err := os.ReadDir(dir)
445+
require.NoError(t, err)
446+
for _, e := range entries {
447+
t.Logf("logdir entry: %s", e.Name())
448+
if strings.HasPrefix(e.Name(), "coder-ssh") {
449+
return
450+
}
451+
}
452+
t.Fatal("failed to find ssh logfile")
453+
})
410454
}
411455

412456
//nolint:paralleltest // This test uses t.Setenv, parent test MUST NOT be parallel.

cli/testdata/coder_ssh_--help.golden

+6
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,12 @@ Start a shell into a workspace
1818
Specifies which identity agent to use (overrides $SSH_AUTH_SOCK),
1919
forward agent must also be enabled.
2020

21+
--log-dir string, $CODER_SSH_LOG_DIR (default: /tmp)
22+
Specify the location for the log files.
23+
24+
-l, --log-to-file bool, $CODER_SSH_LOG_TO_FILE
25+
Enable diagnostic logging to file.
26+
2127
--no-wait bool, $CODER_SSH_NO_WAIT
2228
Specifies whether to wait for a workspace to become ready before
2329
logging in (only applicable when the login before ready option has not

0 commit comments

Comments
 (0)