@@ -121,6 +121,7 @@ func New(options Options) io.Closer {
121
121
logDir : options .LogDir ,
122
122
tempDir : options .TempDir ,
123
123
lifecycleUpdate : make (chan struct {}, 1 ),
124
+ lifecycleReported : make (chan codersdk.WorkspaceAgentLifecycle , 1 ),
124
125
connStatsChan : make (chan * agentsdk.Stats , 1 ),
125
126
}
126
127
a .init (ctx )
@@ -149,9 +150,10 @@ type agent struct {
149
150
sessionToken atomic.Pointer [string ]
150
151
sshServer * ssh.Server
151
152
152
- lifecycleUpdate chan struct {}
153
- lifecycleMu sync.Mutex // Protects following.
154
- lifecycleState codersdk.WorkspaceAgentLifecycle
153
+ lifecycleUpdate chan struct {}
154
+ lifecycleReported chan codersdk.WorkspaceAgentLifecycle
155
+ lifecycleMu sync.RWMutex // Protects following.
156
+ lifecycleState codersdk.WorkspaceAgentLifecycle
155
157
156
158
network * tailnet.Conn
157
159
connStatsChan chan * agentsdk.Stats
@@ -207,9 +209,9 @@ func (a *agent) reportLifecycleLoop(ctx context.Context) {
207
209
}
208
210
209
211
for r := retry .New (time .Second , 15 * time .Second ); r .Wait (ctx ); {
210
- a .lifecycleMu .Lock ()
212
+ a .lifecycleMu .RLock ()
211
213
state := a .lifecycleState
212
- a .lifecycleMu .Unlock ()
214
+ a .lifecycleMu .RUnlock ()
213
215
214
216
if state == lastReported {
215
217
break
@@ -222,6 +224,11 @@ func (a *agent) reportLifecycleLoop(ctx context.Context) {
222
224
})
223
225
if err == nil {
224
226
lastReported = state
227
+ select {
228
+ case a .lifecycleReported <- state :
229
+ case <- a .lifecycleReported :
230
+ a .lifecycleReported <- state
231
+ }
225
232
break
226
233
}
227
234
if xerrors .Is (err , context .Canceled ) || xerrors .Is (err , context .DeadlineExceeded ) {
@@ -233,13 +240,20 @@ func (a *agent) reportLifecycleLoop(ctx context.Context) {
233
240
}
234
241
}
235
242
243
+ // setLifecycle sets the lifecycle state and notifies the lifecycle loop.
244
+ // The state is only updated if it's a valid state transition.
236
245
func (a * agent ) setLifecycle (ctx context.Context , state codersdk.WorkspaceAgentLifecycle ) {
237
246
a .lifecycleMu .Lock ()
238
- defer a .lifecycleMu .Unlock ()
239
-
240
- a .logger .Debug (ctx , "set lifecycle state" , slog .F ("state" , state ), slog .F ("previous" , a .lifecycleState ))
241
-
247
+ lastState := a .lifecycleState
248
+ if slices .Index (codersdk .WorkspaceAgentLifecycleOrder , lastState ) > slices .Index (codersdk .WorkspaceAgentLifecycleOrder , state ) {
249
+ a .logger .Warn (ctx , "attempted to set lifecycle state to a previous state" , slog .F ("last" , lastState ), slog .F ("state" , state ))
250
+ a .lifecycleMu .Unlock ()
251
+ return
252
+ }
242
253
a .lifecycleState = state
254
+ a .logger .Debug (ctx , "set lifecycle state" , slog .F ("state" , state ), slog .F ("last" , lastState ))
255
+ a .lifecycleMu .Unlock ()
256
+
243
257
select {
244
258
case a .lifecycleUpdate <- struct {}{}:
245
259
default :
@@ -299,9 +313,10 @@ func (a *agent) run(ctx context.Context) error {
299
313
}
300
314
}
301
315
316
+ lifecycleState := codersdk .WorkspaceAgentLifecycleReady
302
317
scriptDone := make (chan error , 1 )
303
318
scriptStart := time .Now ()
304
- err : = a .trackConnGoroutine (func () {
319
+ err = a .trackConnGoroutine (func () {
305
320
defer close (scriptDone )
306
321
scriptDone <- a .runStartupScript (ctx , metadata .StartupScript )
307
322
})
@@ -329,16 +344,17 @@ func (a *agent) run(ctx context.Context) error {
329
344
if errors .Is (err , context .Canceled ) {
330
345
return
331
346
}
332
- execTime := time .Since (scriptStart )
333
- lifecycleStatus := codersdk .WorkspaceAgentLifecycleReady
334
- if err != nil {
335
- a .logger .Warn (ctx , "startup script failed" , slog .F ("execution_time" , execTime ), slog .Error (err ))
336
- lifecycleStatus = codersdk .WorkspaceAgentLifecycleStartError
337
- } else {
338
- a .logger .Info (ctx , "startup script completed" , slog .F ("execution_time" , execTime ))
347
+ // Only log if there was a startup script.
348
+ if metadata .StartupScript != "" {
349
+ execTime := time .Since (scriptStart )
350
+ if err != nil {
351
+ a .logger .Warn (ctx , "startup script failed" , slog .F ("execution_time" , execTime ), slog .Error (err ))
352
+ lifecycleState = codersdk .WorkspaceAgentLifecycleStartError
353
+ } else {
354
+ a .logger .Info (ctx , "startup script completed" , slog .F ("execution_time" , execTime ))
355
+ }
339
356
}
340
-
341
- a .setLifecycle (ctx , lifecycleStatus )
357
+ a .setLifecycle (ctx , lifecycleState )
342
358
}()
343
359
}
344
360
@@ -606,14 +622,22 @@ func (a *agent) runCoordinator(ctx context.Context, network *tailnet.Conn) error
606
622
}
607
623
608
624
func (a * agent ) runStartupScript (ctx context.Context , script string ) error {
625
+ return a .runScript (ctx , "startup" , script )
626
+ }
627
+
628
+ func (a * agent ) runShutdownScript (ctx context.Context , script string ) error {
629
+ return a .runScript (ctx , "shutdown" , script )
630
+ }
631
+
632
+ func (a * agent ) runScript (ctx context.Context , lifecycle , script string ) error {
609
633
if script == "" {
610
634
return nil
611
635
}
612
636
613
- a .logger .Info (ctx , "running startup script" , slog .F ("script" , script ))
614
- writer , err := a .filesystem .OpenFile (filepath .Join (a .logDir , "coder-startup -script.log" ), os .O_CREATE | os .O_RDWR , 0o600 )
637
+ a .logger .Info (ctx , "running script" , slog . F ( "lifecycle" , lifecycle ) , slog .F ("script" , script ))
638
+ writer , err := a .filesystem .OpenFile (filepath .Join (a .logDir , fmt . Sprintf ( "coder-%s -script.log" , lifecycle ) ), os .O_CREATE | os .O_RDWR , 0o600 )
615
639
if err != nil {
616
- return xerrors .Errorf ("open startup script log file: %w" , err )
640
+ return xerrors .Errorf ("open %s script log file: %w" , lifecycle , err )
617
641
}
618
642
defer func () {
619
643
_ = writer .Close ()
@@ -774,7 +798,7 @@ func (a *agent) createCommand(ctx context.Context, rawCommand string, env []stri
774
798
775
799
rawMetadata := a .metadata .Load ()
776
800
if rawMetadata == nil {
777
- return nil , xerrors .Errorf ("no metadata was provided: %w" , err )
801
+ return nil , xerrors .Errorf ("no metadata was provided" )
778
802
}
779
803
metadata , valid := rawMetadata .(agentsdk.Metadata )
780
804
if ! valid {
@@ -1290,13 +1314,73 @@ func (a *agent) Close() error {
1290
1314
if a .isClosed () {
1291
1315
return nil
1292
1316
}
1317
+
1318
+ ctx := context .Background ()
1319
+ a .setLifecycle (ctx , codersdk .WorkspaceAgentLifecycleShuttingDown )
1320
+
1321
+ lifecycleState := codersdk .WorkspaceAgentLifecycleOff
1322
+ if metadata , ok := a .metadata .Load ().(agentsdk.Metadata ); ok && metadata .ShutdownScript != "" {
1323
+ scriptDone := make (chan error , 1 )
1324
+ scriptStart := time .Now ()
1325
+ go func () {
1326
+ defer close (scriptDone )
1327
+ scriptDone <- a .runShutdownScript (ctx , metadata .ShutdownScript )
1328
+ }()
1329
+
1330
+ var timeout <- chan time.Time
1331
+ // If timeout is zero, an older version of the coder
1332
+ // provider was used. Otherwise a timeout is always > 0.
1333
+ if metadata .ShutdownScriptTimeout > 0 {
1334
+ t := time .NewTimer (metadata .ShutdownScriptTimeout )
1335
+ defer t .Stop ()
1336
+ timeout = t .C
1337
+ }
1338
+
1339
+ var err error
1340
+ select {
1341
+ case err = <- scriptDone :
1342
+ case <- timeout :
1343
+ a .logger .Warn (ctx , "shutdown script timed out" )
1344
+ a .setLifecycle (ctx , codersdk .WorkspaceAgentLifecycleShutdownTimeout )
1345
+ err = <- scriptDone // The script can still complete after a timeout.
1346
+ }
1347
+ execTime := time .Since (scriptStart )
1348
+ if err != nil {
1349
+ a .logger .Warn (ctx , "shutdown script failed" , slog .F ("execution_time" , execTime ), slog .Error (err ))
1350
+ lifecycleState = codersdk .WorkspaceAgentLifecycleShutdownError
1351
+ } else {
1352
+ a .logger .Info (ctx , "shutdown script completed" , slog .F ("execution_time" , execTime ))
1353
+ }
1354
+ }
1355
+
1356
+ // Set final state and wait for it to be reported because context
1357
+ // cancellation will stop the report loop.
1358
+ a .setLifecycle (ctx , lifecycleState )
1359
+
1360
+ // Wait for the lifecycle to be reported, but don't wait forever so
1361
+ // that we don't break user expectations.
1362
+ ctx , cancel := context .WithTimeout (ctx , 5 * time .Second )
1363
+ defer cancel ()
1364
+ lifecycleWaitLoop:
1365
+ for {
1366
+ select {
1367
+ case <- ctx .Done ():
1368
+ break lifecycleWaitLoop
1369
+ case s := <- a .lifecycleReported :
1370
+ if s == lifecycleState {
1371
+ break lifecycleWaitLoop
1372
+ }
1373
+ }
1374
+ }
1375
+
1293
1376
close (a .closed )
1294
1377
a .closeCancel ()
1378
+ _ = a .sshServer .Close ()
1295
1379
if a .network != nil {
1296
1380
_ = a .network .Close ()
1297
1381
}
1298
- _ = a .sshServer .Close ()
1299
1382
a .connCloseWait .Wait ()
1383
+
1300
1384
return nil
1301
1385
}
1302
1386
0 commit comments