@@ -10,6 +10,7 @@ import (
10
10
"net/url"
11
11
"os"
12
12
"os/exec"
13
+ "path"
13
14
"path/filepath"
14
15
"strings"
15
16
"time"
@@ -23,6 +24,9 @@ import (
23
24
"golang.org/x/term"
24
25
"golang.org/x/xerrors"
25
26
27
+ "cdr.dev/slog"
28
+ "cdr.dev/slog/sloggers/sloghuman"
29
+
26
30
"github.com/coder/coder/agent/agentssh"
27
31
"github.com/coder/coder/cli/clibase"
28
32
"github.com/coder/coder/cli/cliui"
@@ -46,6 +50,8 @@ func (r *RootCmd) ssh() *clibase.Cmd {
46
50
identityAgent string
47
51
wsPollInterval time.Duration
48
52
noWait bool
53
+ logDir string
54
+ logToFile bool
49
55
)
50
56
client := new (codersdk.Client )
51
57
cmd := & clibase.Cmd {
@@ -56,10 +62,44 @@ func (r *RootCmd) ssh() *clibase.Cmd {
56
62
clibase .RequireNArgs (1 ),
57
63
r .InitClient (client ),
58
64
),
59
- Handler : func (inv * clibase.Invocation ) error {
65
+ Handler : func (inv * clibase.Invocation ) ( retErr error ) {
60
66
ctx , cancel := context .WithCancel (inv .Context ())
61
67
defer cancel ()
62
68
69
+ logger := slog .Make () // empty logger
70
+ defer func () {
71
+ if retErr != nil {
72
+ // catch and log all returned errors so we see them in the
73
+ // log file (if there is one)
74
+ logger .Error (ctx , "command exit" , slog .Error (retErr ))
75
+ }
76
+ }()
77
+ if logToFile {
78
+ // we need a way to ensure different ssh invocations don't clobber
79
+ // each other's logs. Date-time strings will likely have collisions
80
+ // in unit tests and/or scripts unless we extend precision out to
81
+ // sub-millisecond, which seems unwieldy. A simple 5-character random
82
+ // string will do it, since the operating system already tracks
83
+ // dates and times for file IO.
84
+ qual , err := cryptorand .String (5 )
85
+ if err != nil {
86
+ return xerrors .Errorf ("generate random qualifier: %w" , err )
87
+ }
88
+ logPth := path .Join (logDir , fmt .Sprintf ("coder-ssh-%s.log" , qual ))
89
+ logFile , err := os .Create (logPth )
90
+ if err != nil {
91
+ return xerrors .Errorf ("error opening %s for logging: %w" , logPth , err )
92
+ }
93
+ logger = slog .Make (sloghuman .Sink (logFile ))
94
+ defer logFile .Close ()
95
+ if r .verbose {
96
+ logger = logger .Leveled (slog .LevelDebug )
97
+ }
98
+
99
+ // log HTTP requests
100
+ client .Logger = logger
101
+ }
102
+
63
103
workspace , workspaceAgent , err := getWorkspaceAndAgent (ctx , inv , client , codersdk .Me , inv .Args [0 ])
64
104
if err != nil {
65
105
return err
@@ -92,110 +132,71 @@ func (r *RootCmd) ssh() *clibase.Cmd {
92
132
// We don't print the error because cliui.Agent does that for us.
93
133
}
94
134
95
- conn , err := client .DialWorkspaceAgent (ctx , workspaceAgent .ID , & codersdk.DialWorkspaceAgentOptions {})
135
+ conn , err := client .DialWorkspaceAgent (ctx , workspaceAgent .ID , & codersdk.DialWorkspaceAgentOptions {
136
+ Logger : logger ,
137
+ })
96
138
if err != nil {
97
- return err
139
+ return xerrors . Errorf ( "dial agent: %w" , err )
98
140
}
99
141
defer conn .Close ()
100
142
conn .AwaitReachable (ctx )
101
143
stopPolling := tryPollWorkspaceAutostop (ctx , client , workspace )
102
144
defer stopPolling ()
103
145
104
- // Enure connection is closed if the context is canceled or
105
- // the workspace reaches the stopped state.
106
- //
107
- // Watching the stopped state is a work-around for cases
108
- // where the agent is not gracefully shut down and the
109
- // connection is left open. If, for instance, the networking
110
- // is stopped before the agent is shut down, the disconnect
111
- // will usually not propagate.
112
- //
113
- // See: https://github.com/coder/coder/issues/6180
114
- watchAndClose := func (closer func () error ) {
115
- // Ensure session is ended on both context cancellation
116
- // and workspace stop.
117
- defer func () {
118
- _ = closer ()
119
- }()
120
-
121
- startWatchLoop:
122
- for {
123
- // (Re)connect to the coder server and watch workspace events.
124
- var wsWatch <- chan codersdk.Workspace
125
- var err error
126
- for r := retry .New (time .Second , 15 * time .Second ); r .Wait (ctx ); {
127
- wsWatch , err = client .WatchWorkspace (ctx , workspace .ID )
128
- if err == nil {
129
- break
130
- }
131
- if ctx .Err () != nil {
132
- return
133
- }
134
- }
135
-
136
- for {
137
- select {
138
- case <- ctx .Done ():
139
- return
140
- case w , ok := <- wsWatch :
141
- if ! ok {
142
- continue startWatchLoop
143
- }
144
-
145
- // Transitioning to stop or delete could mean that
146
- // the agent will still gracefully stop. If a new
147
- // build is starting, there's no reason to wait for
148
- // the agent, it should be long gone.
149
- if workspace .LatestBuild .ID != w .LatestBuild .ID && w .LatestBuild .Transition == codersdk .WorkspaceTransitionStart {
150
- return
151
- }
152
- // Note, we only react to the stopped state here because we
153
- // want to give the agent a chance to gracefully shut down
154
- // during "stopping".
155
- if w .LatestBuild .Status == codersdk .WorkspaceStatusStopped {
156
- return
157
- }
158
- }
159
- }
160
- }
161
- }
162
-
163
146
if stdio {
164
147
rawSSH , err := conn .SSH (ctx )
165
148
if err != nil {
166
- return err
149
+ return xerrors . Errorf ( "connect SSH: %w" , err )
167
150
}
168
151
defer rawSSH .Close ()
169
- go watchAndClose (rawSSH .Close )
152
+ go watchAndClose (ctx , rawSSH .Close , logger , client , workspace )
170
153
171
154
go func () {
172
155
// Ensure stdout copy closes incase stdin is closed
173
156
// unexpectedly. Typically we wouldn't worry about
174
157
// this since OpenSSH should kill the proxy command.
175
158
defer rawSSH .Close ()
176
159
177
- _ , _ = io .Copy (rawSSH , inv .Stdin )
160
+ _ , err := io .Copy (rawSSH , inv .Stdin )
161
+ if err != nil {
162
+ logger .Error (ctx , "copy stdin error" , slog .Error (err ))
163
+ } else {
164
+ logger .Debug (ctx , "copy stdin complete" )
165
+ }
178
166
}()
179
- _ , _ = io .Copy (inv .Stdout , rawSSH )
167
+ _ , err = io .Copy (inv .Stdout , rawSSH )
168
+ if err != nil {
169
+ logger .Error (ctx , "copy stdout error" , slog .Error (err ))
170
+ } else {
171
+ logger .Debug (ctx , "copy stdout complete" )
172
+ }
180
173
return nil
181
174
}
182
175
183
176
sshClient , err := conn .SSHClient (ctx )
184
177
if err != nil {
185
- return err
178
+ return xerrors . Errorf ( "ssh client: %w" , err )
186
179
}
187
180
defer sshClient .Close ()
188
181
189
182
sshSession , err := sshClient .NewSession ()
190
183
if err != nil {
191
- return err
184
+ return xerrors . Errorf ( "ssh session: %w" , err )
192
185
}
193
186
defer sshSession .Close ()
194
- go watchAndClose (func () error {
195
- _ = sshSession .Close ()
196
- _ = sshClient .Close ()
197
- return nil
198
- })
187
+ go watchAndClose (
188
+ ctx ,
189
+ func () error {
190
+ err := sshSession .Close ()
191
+ logger .Debug (ctx , "session close" , slog .Error (err ))
192
+ err = sshClient .Close ()
193
+ logger .Debug (ctx , "client close" , slog .Error (err ))
194
+ return nil
195
+ },
196
+ logger ,
197
+ client ,
198
+ workspace ,
199
+ )
199
200
200
201
if identityAgent == "" {
201
202
identityAgent = os .Getenv ("SSH_AUTH_SOCK" )
@@ -257,7 +258,7 @@ func (r *RootCmd) ssh() *clibase.Cmd {
257
258
258
259
err = sshSession .RequestPty ("xterm-256color" , 128 , 128 , gossh.TerminalModes {})
259
260
if err != nil {
260
- return err
261
+ return xerrors . Errorf ( "request pty: %w" , err )
261
262
}
262
263
263
264
sshSession .Stdin = inv .Stdin
@@ -266,7 +267,7 @@ func (r *RootCmd) ssh() *clibase.Cmd {
266
267
267
268
err = sshSession .Shell ()
268
269
if err != nil {
269
- return err
270
+ return xerrors . Errorf ( "start shell: %w" , err )
270
271
}
271
272
272
273
// Put cancel at the top of the defer stack to initiate
@@ -289,7 +290,7 @@ func (r *RootCmd) ssh() *clibase.Cmd {
289
290
if errors .Is (err , & gossh.ExitMissingError {}) {
290
291
return xerrors .New ("SSH connection ended unexpectedly" )
291
292
}
292
- return err
293
+ return xerrors . Errorf ( "session ended: %w" , err )
293
294
}
294
295
295
296
return nil
@@ -335,10 +336,90 @@ func (r *RootCmd) ssh() *clibase.Cmd {
335
336
Description : "Specifies whether to wait for a workspace to become ready before logging in (only applicable when the login before ready option has not been enabled). Note that the workspace agent may still be in the process of executing the startup script and the workspace may be in an incomplete state." ,
336
337
Value : clibase .BoolOf (& noWait ),
337
338
},
339
+ {
340
+ Flag : "log-dir" ,
341
+ Default : os .TempDir (),
342
+ Description : "Specify the location for the log files." ,
343
+ Env : "CODER_SSH_LOG_DIR" ,
344
+ Value : clibase .StringOf (& logDir ),
345
+ },
346
+ {
347
+ Flag : "log-to-file" ,
348
+ FlagShorthand : "l" ,
349
+ Env : "CODER_SSH_LOG_TO_FILE" ,
350
+ Description : "Enable diagnostic logging to file." ,
351
+ Value : clibase .BoolOf (& logToFile ),
352
+ },
338
353
}
339
354
return cmd
340
355
}
341
356
357
+ // watchAndClose ensures closer is called if the context is canceled or
358
+ // the workspace reaches the stopped state.
359
+ //
360
+ // Watching the stopped state is a work-around for cases
361
+ // where the agent is not gracefully shut down and the
362
+ // connection is left open. If, for instance, the networking
363
+ // is stopped before the agent is shut down, the disconnect
364
+ // will usually not propagate.
365
+ //
366
+ // See: https://github.com/coder/coder/issues/6180
367
+ func watchAndClose (ctx context.Context , closer func () error , logger slog.Logger , client * codersdk.Client , workspace codersdk.Workspace ) {
368
+ // Ensure session is ended on both context cancellation
369
+ // and workspace stop.
370
+ defer func () {
371
+ err := closer ()
372
+ if err != nil {
373
+ logger .Error (ctx , "error closing session" , slog .Error (err ))
374
+ }
375
+ }()
376
+
377
+ startWatchLoop:
378
+ for {
379
+ logger .Debug (ctx , "(re)connecting to the coder server to watch workspace events." )
380
+ var wsWatch <- chan codersdk.Workspace
381
+ var err error
382
+ for r := retry .New (time .Second , 15 * time .Second ); r .Wait (ctx ); {
383
+ wsWatch , err = client .WatchWorkspace (ctx , workspace .ID )
384
+ if err == nil {
385
+ break
386
+ }
387
+ if ctx .Err () != nil {
388
+ logger .Info (ctx , "context expired" , slog .Error (ctx .Err ()))
389
+ return
390
+ }
391
+ }
392
+
393
+ for {
394
+ select {
395
+ case <- ctx .Done ():
396
+ logger .Info (ctx , "context expired" , slog .Error (ctx .Err ()))
397
+ return
398
+ case w , ok := <- wsWatch :
399
+ if ! ok {
400
+ continue startWatchLoop
401
+ }
402
+
403
+ // Transitioning to stop or delete could mean that
404
+ // the agent will still gracefully stop. If a new
405
+ // build is starting, there's no reason to wait for
406
+ // the agent, it should be long gone.
407
+ if workspace .LatestBuild .ID != w .LatestBuild .ID && w .LatestBuild .Transition == codersdk .WorkspaceTransitionStart {
408
+ logger .Info (ctx , "new build started" )
409
+ return
410
+ }
411
+ // Note, we only react to the stopped state here because we
412
+ // want to give the agent a chance to gracefully shut down
413
+ // during "stopping".
414
+ if w .LatestBuild .Status == codersdk .WorkspaceStatusStopped {
415
+ logger .Info (ctx , "workspace stopped" )
416
+ return
417
+ }
418
+ }
419
+ }
420
+ }
421
+ }
422
+
342
423
// getWorkspaceAgent returns the workspace and agent selected using either the
343
424
// `<workspace>[.<agent>]` syntax via `in` or picks a random workspace and agent
344
425
// if `shuffle` is true.
0 commit comments