Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 39050d2

Browse files
authored
Merge branch 'main' into jon/rmjfrog
2 parents 49b8d2f + 6330b0d commit 39050d2

File tree

114 files changed

+8432
-3000
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

114 files changed

+8432
-3000
lines changed

.gitattributes

+1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
# Generated files
22
agent/agentcontainers/acmock/acmock.go linguist-generated=true
33
agent/agentcontainers/dcspec/dcspec_gen.go linguist-generated=true
4+
agent/agentcontainers/testdata/devcontainercli/*/*.log linguist-generated=true
45
coderd/apidoc/docs.go linguist-generated=true
56
docs/reference/api/*.md linguist-generated=true
67
docs/reference/cli/*.md linguist-generated=true

.github/workflows/typos.toml

+2-1
Original file line numberDiff line numberDiff line change
@@ -42,5 +42,6 @@ extend-exclude = [
4242
"site/src/pages/SetupPage/countries.tsx",
4343
"provisioner/terraform/testdata/**",
4444
# notifications' golden files confuse the detector because of quoted-printable encoding
45-
"coderd/notifications/testdata/**"
45+
"coderd/notifications/testdata/**",
46+
"agent/agentcontainers/testdata/devcontainercli/**"
4647
]

.gitignore

+3
Original file line numberDiff line numberDiff line change
@@ -79,3 +79,6 @@ result
7979

8080
# Zed
8181
.zed_server
82+
83+
# dlv debug binaries for go tests
84+
__debug_bin*

agent/agent.go

+30-19
Original file line numberDiff line numberDiff line change
@@ -229,13 +229,21 @@ type agent struct {
229229
// we track 2 contexts and associated cancel functions: "graceful" which is Done when it is time
230230
// to start gracefully shutting down and "hard" which is Done when it is time to close
231231
// everything down (regardless of whether graceful shutdown completed).
232-
gracefulCtx context.Context
233-
gracefulCancel context.CancelFunc
234-
hardCtx context.Context
235-
hardCancel context.CancelFunc
236-
closeWaitGroup sync.WaitGroup
232+
gracefulCtx context.Context
233+
gracefulCancel context.CancelFunc
234+
hardCtx context.Context
235+
hardCancel context.CancelFunc
236+
237+
// closeMutex protects the following:
237238
closeMutex sync.Mutex
239+
closeWaitGroup sync.WaitGroup
238240
coordDisconnected chan struct{}
241+
closing bool
242+
// note that once the network is set to non-nil, it is never modified, as with the statsReporter. So, routines
243+
// that run after createOrUpdateNetwork and check the networkOK checkpoint do not need to hold the lock to use them.
244+
network *tailnet.Conn
245+
statsReporter *statsReporter
246+
// end fields protected by closeMutex
239247

240248
environmentVariables map[string]string
241249

@@ -259,9 +267,7 @@ type agent struct {
259267
reportConnectionsMu sync.Mutex
260268
reportConnections []*proto.ReportConnectionRequest
261269

262-
network *tailnet.Conn
263-
statsReporter *statsReporter
264-
logSender *agentsdk.LogSender
270+
logSender *agentsdk.LogSender
265271

266272
prometheusRegistry *prometheus.Registry
267273
// metrics are prometheus registered metrics that will be collected and
@@ -274,6 +280,8 @@ type agent struct {
274280
}
275281

276282
func (a *agent) TailnetConn() *tailnet.Conn {
283+
a.closeMutex.Lock()
284+
defer a.closeMutex.Unlock()
277285
return a.network
278286
}
279287

@@ -1205,15 +1213,15 @@ func (a *agent) createOrUpdateNetwork(manifestOK, networkOK *checkpoint) func(co
12051213
}
12061214
a.closeMutex.Lock()
12071215
// Re-check if agent was closed while initializing the network.
1208-
closed := a.isClosed()
1209-
if !closed {
1216+
closing := a.closing
1217+
if !closing {
12101218
a.network = network
12111219
a.statsReporter = newStatsReporter(a.logger, network, a)
12121220
}
12131221
a.closeMutex.Unlock()
1214-
if closed {
1222+
if closing {
12151223
_ = network.Close()
1216-
return xerrors.New("agent is closed")
1224+
return xerrors.New("agent is closing")
12171225
}
12181226
} else {
12191227
// Update the wireguard IPs if the agent ID changed.
@@ -1328,8 +1336,8 @@ func (*agent) wireguardAddresses(agentID uuid.UUID) []netip.Prefix {
13281336
func (a *agent) trackGoroutine(fn func()) error {
13291337
a.closeMutex.Lock()
13301338
defer a.closeMutex.Unlock()
1331-
if a.isClosed() {
1332-
return xerrors.New("track conn goroutine: agent is closed")
1339+
if a.closing {
1340+
return xerrors.New("track conn goroutine: agent is closing")
13331341
}
13341342
a.closeWaitGroup.Add(1)
13351343
go func() {
@@ -1547,7 +1555,7 @@ func (a *agent) runCoordinator(ctx context.Context, tClient tailnetproto.DRPCTai
15471555
func (a *agent) setCoordDisconnected() chan struct{} {
15481556
a.closeMutex.Lock()
15491557
defer a.closeMutex.Unlock()
1550-
if a.isClosed() {
1558+
if a.closing {
15511559
return nil
15521560
}
15531561
disconnected := make(chan struct{})
@@ -1772,7 +1780,10 @@ func (a *agent) HTTPDebug() http.Handler {
17721780

17731781
func (a *agent) Close() error {
17741782
a.closeMutex.Lock()
1775-
defer a.closeMutex.Unlock()
1783+
network := a.network
1784+
coordDisconnected := a.coordDisconnected
1785+
a.closing = true
1786+
a.closeMutex.Unlock()
17761787
if a.isClosed() {
17771788
return nil
17781789
}
@@ -1849,7 +1860,7 @@ lifecycleWaitLoop:
18491860
select {
18501861
case <-a.hardCtx.Done():
18511862
a.logger.Warn(context.Background(), "timed out waiting for Coordinator RPC disconnect")
1852-
case <-a.coordDisconnected:
1863+
case <-coordDisconnected:
18531864
a.logger.Debug(context.Background(), "coordinator RPC disconnected")
18541865
}
18551866

@@ -1860,8 +1871,8 @@ lifecycleWaitLoop:
18601871
}
18611872

18621873
a.hardCancel()
1863-
if a.network != nil {
1864-
_ = a.network.Close()
1874+
if network != nil {
1875+
_ = network.Close()
18651876
}
18661877
a.closeWaitGroup.Wait()
18671878

agent/agent_test.go

+48
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,54 @@ func TestMain(m *testing.M) {
6868

6969
var sshPorts = []uint16{workspacesdk.AgentSSHPort, workspacesdk.AgentStandardSSHPort}
7070

71+
// TestAgent_CloseWhileStarting is a regression test for https://github.com/coder/coder/issues/17328
72+
func TestAgent_ImmediateClose(t *testing.T) {
73+
t.Parallel()
74+
75+
ctx, cancel := context.WithTimeout(context.Background(), testutil.WaitLong)
76+
defer cancel()
77+
78+
logger := slogtest.Make(t, &slogtest.Options{
79+
// Agent can drop errors when shutting down, and some, like the
80+
// fasthttplistener connection closed error, are unexported.
81+
IgnoreErrors: true,
82+
}).Leveled(slog.LevelDebug)
83+
manifest := agentsdk.Manifest{
84+
AgentID: uuid.New(),
85+
AgentName: "test-agent",
86+
WorkspaceName: "test-workspace",
87+
WorkspaceID: uuid.New(),
88+
}
89+
90+
coordinator := tailnet.NewCoordinator(logger)
91+
t.Cleanup(func() {
92+
_ = coordinator.Close()
93+
})
94+
statsCh := make(chan *proto.Stats, 50)
95+
fs := afero.NewMemMapFs()
96+
client := agenttest.NewClient(t, logger.Named("agenttest"), manifest.AgentID, manifest, statsCh, coordinator)
97+
t.Cleanup(client.Close)
98+
99+
options := agent.Options{
100+
Client: client,
101+
Filesystem: fs,
102+
Logger: logger.Named("agent"),
103+
ReconnectingPTYTimeout: 0,
104+
EnvironmentVariables: map[string]string{},
105+
}
106+
107+
agentUnderTest := agent.New(options)
108+
t.Cleanup(func() {
109+
_ = agentUnderTest.Close()
110+
})
111+
112+
// wait until the agent has connected and is starting to find races in the startup code
113+
_ = testutil.RequireRecvCtx(ctx, t, client.GetStartup())
114+
t.Log("Closing Agent")
115+
err := agentUnderTest.Close()
116+
require.NoError(t, err)
117+
}
118+
71119
// NOTE: These tests only work when your default shell is bash for some reason.
72120

73121
func TestAgent_Stats_SSH(t *testing.T) {

0 commit comments

Comments
 (0)