@@ -30,6 +30,7 @@ import (
30
30
"github.com/spf13/afero"
31
31
"go.uber.org/atomic"
32
32
gossh "golang.org/x/crypto/ssh"
33
+ "golang.org/x/exp/slices"
33
34
"golang.org/x/xerrors"
34
35
"tailscale.com/net/speedtest"
35
36
"tailscale.com/tailcfg"
@@ -90,7 +91,7 @@ func New(options Options) io.Closer {
90
91
}
91
92
}
92
93
ctx , cancelFunc := context .WithCancel (context .Background ())
93
- server := & agent {
94
+ a := & agent {
94
95
reconnectingPTYTimeout : options .ReconnectingPTYTimeout ,
95
96
logger : options .Logger ,
96
97
closeCancel : cancelFunc ,
@@ -101,8 +102,8 @@ func New(options Options) io.Closer {
101
102
filesystem : options .Filesystem ,
102
103
tempDir : options .TempDir ,
103
104
}
104
- server .init (ctx )
105
- return server
105
+ a .init (ctx )
106
+ return a
106
107
}
107
108
108
109
type agent struct {
@@ -225,6 +226,25 @@ func (a *agent) run(ctx context.Context) error {
225
226
_ = network .Close ()
226
227
return xerrors .New ("agent is closed" )
227
228
}
229
+
230
+ // Report statistics from the created network.
231
+ cl , err := a .client .AgentReportStats (ctx , a .logger , func () * codersdk.AgentStats {
232
+ stats := network .ExtractTrafficStats ()
233
+ return convertAgentStats (stats )
234
+ })
235
+ if err != nil {
236
+ a .logger .Error (ctx , "report stats" , slog .Error (err ))
237
+ } else {
238
+ if err = a .trackConnGoroutine (func () {
239
+ // This is OK because the agent never re-creates the tailnet
240
+ // and the only shutdown indicator is agent.Close().
241
+ <- a .closed
242
+ _ = cl .Close ()
243
+ }); err != nil {
244
+ a .logger .Debug (ctx , "report stats goroutine" , slog .Error (err ))
245
+ _ = cl .Close ()
246
+ }
247
+ }
228
248
} else {
229
249
// Update the DERP map!
230
250
network .SetDERPMap (metadata .DERPMap )
@@ -300,10 +320,12 @@ func (a *agent) createTailnet(ctx context.Context, derpMap *tailcfg.DERPMap) (_
300
320
}
301
321
}()
302
322
if err = a .trackConnGoroutine (func () {
323
+ logger := a .logger .Named ("reconnecting-pty" )
324
+
303
325
for {
304
326
conn , err := reconnectingPTYListener .Accept ()
305
327
if err != nil {
306
- a . logger .Debug (ctx , "accept pty failed" , slog .Error (err ))
328
+ logger .Debug (ctx , "accept pty failed" , slog .Error (err ))
307
329
return
308
330
}
309
331
// This cannot use a JSON decoder, since that can
@@ -324,7 +346,9 @@ func (a *agent) createTailnet(ctx context.Context, derpMap *tailcfg.DERPMap) (_
324
346
if err != nil {
325
347
continue
326
348
}
327
- go a .handleReconnectingPTY (ctx , msg , conn )
349
+ go func () {
350
+ _ = a .handleReconnectingPTY (ctx , logger , msg , conn )
351
+ }()
328
352
}
329
353
}); err != nil {
330
354
return nil , err
@@ -556,28 +580,6 @@ func (a *agent) init(ctx context.Context) {
556
580
}
557
581
558
582
go a .runLoop (ctx )
559
- cl , err := a .client .AgentReportStats (ctx , a .logger , func () * codersdk.AgentStats {
560
- stats := map [netlogtype.Connection ]netlogtype.Counts {}
561
- a .closeMutex .Lock ()
562
- if a .network != nil {
563
- stats = a .network .ExtractTrafficStats ()
564
- }
565
- a .closeMutex .Unlock ()
566
- return convertAgentStats (stats )
567
- })
568
- if err != nil {
569
- a .logger .Error (ctx , "report stats" , slog .Error (err ))
570
- return
571
- }
572
-
573
- if err = a .trackConnGoroutine (func () {
574
- <- a .closed
575
- _ = cl .Close ()
576
- }); err != nil {
577
- a .logger .Error (ctx , "report stats goroutine" , slog .Error (err ))
578
- _ = cl .Close ()
579
- return
580
- }
581
583
}
582
584
583
585
func convertAgentStats (counts map [netlogtype.Connection ]netlogtype.Counts ) * codersdk.AgentStats {
@@ -798,38 +800,56 @@ func (a *agent) handleSSHSession(session ssh.Session) (retErr error) {
798
800
return cmd .Wait ()
799
801
}
800
802
801
- func (a * agent ) handleReconnectingPTY (ctx context.Context , msg codersdk.ReconnectingPTYInit , conn net.Conn ) {
803
+ func (a * agent ) handleReconnectingPTY (ctx context.Context , logger slog. Logger , msg codersdk.ReconnectingPTYInit , conn net.Conn ) ( retErr error ) {
802
804
defer conn .Close ()
803
805
804
806
connectionID := uuid .NewString ()
807
+ logger = logger .With (slog .F ("id" , msg .ID ), slog .F ("connection_id" , connectionID ))
808
+
809
+ defer func () {
810
+ if err := retErr ; err != nil {
811
+ a .closeMutex .Lock ()
812
+ closed := a .isClosed ()
813
+ a .closeMutex .Unlock ()
814
+
815
+ // If the agent is closed, we don't want to
816
+ // log this as an error since it's expected.
817
+ if closed {
818
+ logger .Debug (ctx , "session error after agent close" , slog .Error (err ))
819
+ } else {
820
+ logger .Error (ctx , "session error" , slog .Error (err ))
821
+ }
822
+ }
823
+ logger .Debug (ctx , "session closed" )
824
+ }()
825
+
805
826
var rpty * reconnectingPTY
806
827
rawRPTY , ok := a .reconnectingPTYs .Load (msg .ID )
807
828
if ok {
829
+ logger .Debug (ctx , "connecting to existing session" )
808
830
rpty , ok = rawRPTY .(* reconnectingPTY )
809
831
if ! ok {
810
- a .logger .Error (ctx , "found invalid type in reconnecting pty map" , slog .F ("id" , msg .ID ))
811
- return
832
+ return xerrors .Errorf ("found invalid type in reconnecting pty map: %T" , rawRPTY )
812
833
}
813
834
} else {
835
+ logger .Debug (ctx , "creating new session" )
836
+
814
837
// Empty command will default to the users shell!
815
838
cmd , err := a .createCommand (ctx , msg .Command , nil )
816
839
if err != nil {
817
- a .logger .Error (ctx , "create reconnecting pty command" , slog .Error (err ))
818
- return
840
+ return xerrors .Errorf ("create command: %w" , err )
819
841
}
820
842
cmd .Env = append (cmd .Env , "TERM=xterm-256color" )
821
843
822
844
// Default to buffer 64KiB.
823
845
circularBuffer , err := circbuf .NewBuffer (64 << 10 )
824
846
if err != nil {
825
- a .logger .Error (ctx , "create circular buffer" , slog .Error (err ))
826
- return
847
+ return xerrors .Errorf ("create circular buffer: %w" , err )
827
848
}
828
849
829
850
ptty , process , err := pty .Start (cmd )
830
851
if err != nil {
831
- a .logger .Error (ctx , "start reconnecting pty command" , slog .F ("id" , msg .ID ), slog .Error (err ))
832
- return
852
+ return xerrors .Errorf ("start command: %w" , err )
833
853
}
834
854
835
855
ctx , cancelFunc := context .WithCancel (ctx )
@@ -873,7 +893,7 @@ func (a *agent) handleReconnectingPTY(ctx context.Context, msg codersdk.Reconnec
873
893
_ , err = rpty .circularBuffer .Write (part )
874
894
rpty .circularBufferMutex .Unlock ()
875
895
if err != nil {
876
- a . logger .Error (ctx , "reconnecting pty write buffer" , slog .Error (err ), slog . F ( "id" , msg . ID ))
896
+ logger .Error (ctx , "write to circular buffer" , slog .Error (err ))
877
897
break
878
898
}
879
899
rpty .activeConnsMutex .Lock ()
@@ -889,23 +909,27 @@ func (a *agent) handleReconnectingPTY(ctx context.Context, msg codersdk.Reconnec
889
909
rpty .Close ()
890
910
a .reconnectingPTYs .Delete (msg .ID )
891
911
}); err != nil {
892
- a .logger .Error (ctx , "start reconnecting pty routine" , slog .F ("id" , msg .ID ), slog .Error (err ))
893
- return
912
+ return xerrors .Errorf ("start routine: %w" , err )
894
913
}
895
914
}
896
915
// Resize the PTY to initial height + width.
897
916
err := rpty .ptty .Resize (msg .Height , msg .Width )
898
917
if err != nil {
899
918
// We can continue after this, it's not fatal!
900
- a . logger .Error (ctx , "resize reconnecting pty" , slog . F ( "id" , msg . ID ) , slog .Error (err ))
919
+ logger .Error (ctx , "resize" , slog .Error (err ))
901
920
}
902
921
// Write any previously stored data for the TTY.
903
922
rpty .circularBufferMutex .RLock ()
904
- _ , err = conn . Write (rpty .circularBuffer .Bytes ())
923
+ prevBuf := slices . Clone (rpty .circularBuffer .Bytes ())
905
924
rpty .circularBufferMutex .RUnlock ()
925
+ // Note that there is a small race here between writing buffered
926
+ // data and storing conn in activeConns. This is likely a very minor
927
+ // edge case, but we should look into ways to avoid it. Holding
928
+ // activeConnsMutex would be one option, but holding this mutex
929
+ // while also holding circularBufferMutex seems dangerous.
930
+ _ , err = conn .Write (prevBuf )
906
931
if err != nil {
907
- a .logger .Warn (ctx , "write reconnecting pty buffer" , slog .F ("id" , msg .ID ), slog .Error (err ))
908
- return
932
+ return xerrors .Errorf ("write buffer to conn: %w" , err )
909
933
}
910
934
// Multiple connections to the same TTY are permitted.
911
935
// This could easily be used for terminal sharing, but
@@ -946,16 +970,16 @@ func (a *agent) handleReconnectingPTY(ctx context.Context, msg codersdk.Reconnec
946
970
for {
947
971
err = decoder .Decode (& req )
948
972
if xerrors .Is (err , io .EOF ) {
949
- return
973
+ return nil
950
974
}
951
975
if err != nil {
952
- a . logger .Warn (ctx , "reconnecting pty buffer read error" , slog . F ( "id" , msg . ID ) , slog .Error (err ))
953
- return
976
+ logger .Warn (ctx , "read conn" , slog .Error (err ))
977
+ return nil
954
978
}
955
979
_ , err = rpty .ptty .Input ().Write ([]byte (req .Data ))
956
980
if err != nil {
957
- a . logger .Warn (ctx , "write to reconnecting pty" , slog . F ( "id" , msg . ID ) , slog .Error (err ))
958
- return
981
+ logger .Warn (ctx , "write to pty" , slog .Error (err ))
982
+ return nil
959
983
}
960
984
// Check if a resize needs to happen!
961
985
if req .Height == 0 || req .Width == 0 {
@@ -964,7 +988,7 @@ func (a *agent) handleReconnectingPTY(ctx context.Context, msg codersdk.Reconnec
964
988
err = rpty .ptty .Resize (req .Height , req .Width )
965
989
if err != nil {
966
990
// We can continue after this, it's not fatal!
967
- a . logger .Error (ctx , "resize reconnecting pty" , slog . F ( "id" , msg . ID ) , slog .Error (err ))
991
+ logger .Error (ctx , "resize" , slog .Error (err ))
968
992
}
969
993
}
970
994
}
0 commit comments