From e1a4f0b701c039036632575535579f8288502e48 Mon Sep 17 00:00:00 2001 From: Garrett Delfosse Date: Wed, 23 Oct 2024 17:43:51 +0000 Subject: [PATCH 01/10] fix: close server pty connections on client disconnect --- coderd/httpapi/websocket.go | 23 ++++++++++++++++++++--- coderd/workspaceapps/proxy.go | 7 +++---- 2 files changed, 23 insertions(+), 7 deletions(-) diff --git a/coderd/httpapi/websocket.go b/coderd/httpapi/websocket.go index 629dcac8131f3..b22193bfadf65 100644 --- a/coderd/httpapi/websocket.go +++ b/coderd/httpapi/websocket.go @@ -2,8 +2,10 @@ package httpapi import ( "context" + "errors" "time" + "golang.org/x/xerrors" "nhooyr.io/websocket" "cdr.dev/slog" @@ -31,7 +33,8 @@ func Heartbeat(ctx context.Context, conn *websocket.Conn) { // Heartbeat loops to ping a WebSocket to keep it alive. It calls `exit` on ping // failure. func HeartbeatClose(ctx context.Context, logger slog.Logger, exit func(), conn *websocket.Conn) { - ticker := time.NewTicker(15 * time.Second) + inverval := 15 * time.Second + ticker := time.NewTicker(inverval) defer ticker.Stop() for { @@ -40,12 +43,26 @@ func HeartbeatClose(ctx context.Context, logger slog.Logger, exit func(), conn * return case <-ticker.C: } - err := conn.Ping(ctx) + err := pingWithTimeout(ctx, conn, inverval) if err != nil { + // context.DeadlineExceeded is expected when the client disconnects without sending a close frame + if !errors.Is(err, context.DeadlineExceeded) { + logger.Error(ctx, "failed to heartbeat ping", slog.Error(err)) + } _ = conn.Close(websocket.StatusGoingAway, "Ping failed") - logger.Info(ctx, "failed to heartbeat ping", slog.Error(err)) exit() return } } } + +func pingWithTimeout(ctx context.Context, conn *websocket.Conn, timeout time.Duration) error { + ctx, cancel := context.WithTimeout(ctx, timeout) + defer cancel() + err := conn.Ping(ctx) + if err != nil { + return xerrors.Errorf("failed to ping: %w", err) + } + + return nil +} diff --git a/coderd/workspaceapps/proxy.go b/coderd/workspaceapps/proxy.go index 69f1aadca49b2..c6cd01395db5c 100644 --- a/coderd/workspaceapps/proxy.go +++ b/coderd/workspaceapps/proxy.go @@ -593,7 +593,6 @@ func (s *Server) proxyWorkspaceApp(rw http.ResponseWriter, r *http.Request, appT tracing.EndHTTPSpan(r, http.StatusOK, trace.SpanFromContext(ctx)) report := newStatsReportFromSignedToken(appToken) - s.collectStats(report) defer func() { // We must use defer here because ServeHTTP may panic. report.SessionEndedAt = dbtime.Now() @@ -614,7 +613,8 @@ func (s *Server) proxyWorkspaceApp(rw http.ResponseWriter, r *http.Request, appT // @Success 101 // @Router /workspaceagents/{workspaceagent}/pty [get] func (s *Server) workspaceAgentPTY(rw http.ResponseWriter, r *http.Request) { - ctx := r.Context() + ctx, cancel := context.WithCancel(r.Context()) + defer cancel() s.websocketWaitMutex.Lock() s.websocketWaitGroup.Add(1) @@ -670,12 +670,11 @@ func (s *Server) workspaceAgentPTY(rw http.ResponseWriter, r *http.Request) { }) return } + go httpapi.HeartbeatClose(ctx, s.Logger, cancel, conn) ctx, wsNetConn := WebsocketNetConn(ctx, conn, websocket.MessageBinary) defer wsNetConn.Close() // Also closes conn. - go httpapi.Heartbeat(ctx, conn) - agentConn, release, err := s.AgentProvider.AgentConn(ctx, appToken.AgentID) if err != nil { log.Debug(ctx, "dial workspace agent", slog.Error(err)) From 3e2717dab82230796806c42b7d87f83b34612b09 Mon Sep 17 00:00:00 2001 From: Garrett Delfosse Date: Wed, 23 Oct 2024 18:00:06 +0000 Subject: [PATCH 02/10] fix typo --- coderd/httpapi/websocket.go | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/coderd/httpapi/websocket.go b/coderd/httpapi/websocket.go index b22193bfadf65..2d6f131fd5aa3 100644 --- a/coderd/httpapi/websocket.go +++ b/coderd/httpapi/websocket.go @@ -33,8 +33,8 @@ func Heartbeat(ctx context.Context, conn *websocket.Conn) { // Heartbeat loops to ping a WebSocket to keep it alive. It calls `exit` on ping // failure. func HeartbeatClose(ctx context.Context, logger slog.Logger, exit func(), conn *websocket.Conn) { - inverval := 15 * time.Second - ticker := time.NewTicker(inverval) + interval := 15 * time.Second + ticker := time.NewTicker(interval) defer ticker.Stop() for { @@ -43,7 +43,7 @@ func HeartbeatClose(ctx context.Context, logger slog.Logger, exit func(), conn * return case <-ticker.C: } - err := pingWithTimeout(ctx, conn, inverval) + err := pingWithTimeout(ctx, conn, interval) if err != nil { // context.DeadlineExceeded is expected when the client disconnects without sending a close frame if !errors.Is(err, context.DeadlineExceeded) { From 5171f716c6d020c6770c187c5407000e3e767a5c Mon Sep 17 00:00:00 2001 From: Garrett Delfosse Date: Wed, 23 Oct 2024 18:51:57 +0000 Subject: [PATCH 03/10] fix: stop incrementing on empty agent stats --- coderd/insights_test.go | 6 +- coderd/workspacestats/batcher.go | 5 +- .../workspacestats/batcher_internal_test.go | 6 +- coderd/workspacestats/reporter.go | 100 ++++++++---------- coderd/workspacestats/tracker.go | 1 - .../workspacestatstest/batcher.go | 3 +- 6 files changed, 52 insertions(+), 69 deletions(-) diff --git a/coderd/insights_test.go b/coderd/insights_test.go index 06fe8d46ca5ac..bf8aa4bc44506 100644 --- a/coderd/insights_test.go +++ b/coderd/insights_test.go @@ -700,14 +700,13 @@ func TestTemplateInsights_Golden(t *testing.T) { connectionCount = 0 } for createdAt.Before(stat.endedAt) { - err = batcher.Add(createdAt, workspace.agentID, workspace.template.id, workspace.user.(*testUser).sdk.ID, workspace.id, &agentproto.Stats{ + batcher.Add(createdAt, workspace.agentID, workspace.template.id, workspace.user.(*testUser).sdk.ID, workspace.id, &agentproto.Stats{ ConnectionCount: connectionCount, SessionCountVscode: stat.sessionCountVSCode, SessionCountJetbrains: stat.sessionCountJetBrains, SessionCountReconnectingPty: stat.sessionCountReconnectingPTY, SessionCountSsh: stat.sessionCountSSH, }, false) - require.NoError(t, err, "want no error inserting agent stats") createdAt = createdAt.Add(30 * time.Second) } } @@ -1599,14 +1598,13 @@ func TestUserActivityInsights_Golden(t *testing.T) { connectionCount = 0 } for createdAt.Before(stat.endedAt) { - err = batcher.Add(createdAt, workspace.agentID, workspace.template.id, workspace.user.(*testUser).sdk.ID, workspace.id, &agentproto.Stats{ + batcher.Add(createdAt, workspace.agentID, workspace.template.id, workspace.user.(*testUser).sdk.ID, workspace.id, &agentproto.Stats{ ConnectionCount: connectionCount, SessionCountVscode: stat.sessionCountVSCode, SessionCountJetbrains: stat.sessionCountJetBrains, SessionCountReconnectingPty: stat.sessionCountReconnectingPTY, SessionCountSsh: stat.sessionCountSSH, }, false) - require.NoError(t, err, "want no error inserting agent stats") createdAt = createdAt.Add(30 * time.Second) } } diff --git a/coderd/workspacestats/batcher.go b/coderd/workspacestats/batcher.go index 1f14c5cec5a17..46efc69170562 100644 --- a/coderd/workspacestats/batcher.go +++ b/coderd/workspacestats/batcher.go @@ -25,7 +25,7 @@ const ( ) type Batcher interface { - Add(now time.Time, agentID uuid.UUID, templateID uuid.UUID, userID uuid.UUID, workspaceID uuid.UUID, st *agentproto.Stats, usage bool) error + Add(now time.Time, agentID uuid.UUID, templateID uuid.UUID, userID uuid.UUID, workspaceID uuid.UUID, st *agentproto.Stats, usage bool) } // DBBatcher holds a buffer of agent stats and periodically flushes them to @@ -139,7 +139,7 @@ func (b *DBBatcher) Add( workspaceID uuid.UUID, st *agentproto.Stats, usage bool, -) error { +) { b.mu.Lock() defer b.mu.Unlock() @@ -176,7 +176,6 @@ func (b *DBBatcher) Add( b.flushLever <- struct{}{} b.flushForced.Store(true) } - return nil } // Run runs the batcher. diff --git a/coderd/workspacestats/batcher_internal_test.go b/coderd/workspacestats/batcher_internal_test.go index 2f7a25b152127..1d96789d57376 100644 --- a/coderd/workspacestats/batcher_internal_test.go +++ b/coderd/workspacestats/batcher_internal_test.go @@ -63,7 +63,7 @@ func TestBatchStats(t *testing.T) { // Given: a single data point is added for workspace t2 := t1.Add(time.Second) t.Logf("inserting 1 stat") - require.NoError(t, b.Add(t2.Add(time.Millisecond), deps1.Agent.ID, deps1.User.ID, deps1.Template.ID, deps1.Workspace.ID, randStats(t), false)) + b.Add(t2.Add(time.Millisecond), deps1.Agent.ID, deps1.User.ID, deps1.Template.ID, deps1.Workspace.ID, randStats(t), false) // When: it becomes time to report stats // Signal a tick and wait for a flush to complete. @@ -87,9 +87,9 @@ func TestBatchStats(t *testing.T) { t.Logf("inserting %d stats", defaultBufferSize) for i := 0; i < defaultBufferSize; i++ { if i%2 == 0 { - require.NoError(t, b.Add(t3.Add(time.Millisecond), deps1.Agent.ID, deps1.User.ID, deps1.Template.ID, deps1.Workspace.ID, randStats(t), false)) + b.Add(t3.Add(time.Millisecond), deps1.Agent.ID, deps1.User.ID, deps1.Template.ID, deps1.Workspace.ID, randStats(t), false) } else { - require.NoError(t, b.Add(t3.Add(time.Millisecond), deps2.Agent.ID, deps2.User.ID, deps2.Template.ID, deps2.Workspace.ID, randStats(t), false)) + b.Add(t3.Add(time.Millisecond), deps2.Agent.ID, deps2.User.ID, deps2.Template.ID, deps2.Workspace.ID, randStats(t), false) } } }() diff --git a/coderd/workspacestats/reporter.go b/coderd/workspacestats/reporter.go index fecfd1b1eda92..7b9894c644e38 100644 --- a/coderd/workspacestats/reporter.go +++ b/coderd/workspacestats/reporter.go @@ -6,7 +6,6 @@ import ( "time" "github.com/google/uuid" - "golang.org/x/sync/errgroup" "golang.org/x/xerrors" "cdr.dev/slog" @@ -119,69 +118,58 @@ func (r *Reporter) ReportAppStats(ctx context.Context, stats []workspaceapps.Sta } func (r *Reporter) ReportAgentStats(ctx context.Context, now time.Time, workspace database.Workspace, workspaceAgent database.WorkspaceAgent, templateName string, stats *agentproto.Stats, usage bool) error { - if stats.ConnectionCount > 0 { - var nextAutostart time.Time - if workspace.AutostartSchedule.String != "" { - templateSchedule, err := (*(r.opts.TemplateScheduleStore.Load())).Get(ctx, r.opts.Database, workspace.TemplateID) - // If the template schedule fails to load, just default to bumping - // without the next transition and log it. - if err != nil { - r.opts.Logger.Error(ctx, "failed to load template schedule bumping activity, defaulting to bumping by 60min", - slog.F("workspace_id", workspace.ID), - slog.F("template_id", workspace.TemplateID), - slog.Error(err), - ) - } else { - next, allowed := schedule.NextAutostart(now, workspace.AutostartSchedule.String, templateSchedule) - if allowed { - nextAutostart = next - } - } - } - ActivityBumpWorkspace(ctx, r.opts.Logger.Named("activity_bump"), r.opts.Database, workspace.ID, nextAutostart) - } + // update agent stats + r.opts.StatsBatcher.Add(now, workspaceAgent.ID, workspace.TemplateID, workspace.OwnerID, workspace.ID, stats, usage) - var errGroup errgroup.Group - errGroup.Go(func() error { - err := r.opts.StatsBatcher.Add(now, workspaceAgent.ID, workspace.TemplateID, workspace.OwnerID, workspace.ID, stats, usage) - if err != nil { - r.opts.Logger.Error(ctx, "add agent stats to batcher", slog.Error(err)) - return xerrors.Errorf("insert workspace agent stats batch: %w", err) - } - return nil - }) - errGroup.Go(func() error { - err := r.opts.Database.UpdateWorkspaceLastUsedAt(ctx, database.UpdateWorkspaceLastUsedAtParams{ - ID: workspace.ID, - LastUsedAt: now, - }) + // update prometheus metrics + if r.opts.UpdateAgentMetricsFn != nil { + user, err := r.opts.Database.GetUserByID(ctx, workspace.OwnerID) if err != nil { - return xerrors.Errorf("update workspace LastUsedAt: %w", err) + return xerrors.Errorf("get user: %w", err) } + + r.opts.UpdateAgentMetricsFn(ctx, prometheusmetrics.AgentMetricLabels{ + Username: user.Username, + WorkspaceName: workspace.Name, + AgentName: workspaceAgent.Name, + TemplateName: templateName, + }, stats.Metrics) return nil - }) - if r.opts.UpdateAgentMetricsFn != nil { - errGroup.Go(func() error { - user, err := r.opts.Database.GetUserByID(ctx, workspace.OwnerID) - if err != nil { - return xerrors.Errorf("get user: %w", err) - } + } - r.opts.UpdateAgentMetricsFn(ctx, prometheusmetrics.AgentMetricLabels{ - Username: user.Username, - WorkspaceName: workspace.Name, - AgentName: workspaceAgent.Name, - TemplateName: templateName, - }, stats.Metrics) - return nil - }) + // if no active sessions we do not bump activity + if stats.SessionCountJetbrains == 0 && stats.SessionCountReconnectingPty == 0 && stats.SessionCountSsh == 0 && stats.SessionCountVscode == 0 { + return nil } - err := errGroup.Wait() - if err != nil { - return xerrors.Errorf("update stats in database: %w", err) + + // check next autostart + var nextAutostart time.Time + if workspace.AutostartSchedule.String != "" { + templateSchedule, err := (*(r.opts.TemplateScheduleStore.Load())).Get(ctx, r.opts.Database, workspace.TemplateID) + // If the template schedule fails to load, just default to bumping + // without the next transition and log it. + if err != nil { + r.opts.Logger.Error(ctx, "failed to load template schedule bumping activity, defaulting to bumping by 60min", + slog.F("workspace_id", workspace.ID), + slog.F("template_id", workspace.TemplateID), + slog.Error(err), + ) + } else { + next, allowed := schedule.NextAutostart(now, workspace.AutostartSchedule.String, templateSchedule) + if allowed { + nextAutostart = next + } + } } - err = r.opts.Pubsub.Publish(codersdk.WorkspaceNotifyChannel(workspace.ID), []byte{}) + // bump workspace activity + ActivityBumpWorkspace(ctx, r.opts.Logger.Named("activity_bump"), r.opts.Database, workspace.ID, nextAutostart) + + // bump workspace last_used_at + r.opts.UsageTracker.Add(workspace.ID) + + // notify workspace update + err := r.opts.Pubsub.Publish(codersdk.WorkspaceNotifyChannel(workspace.ID), []byte{}) if err != nil { r.opts.Logger.Warn(ctx, "failed to publish workspace agent stats", slog.F("workspace_id", workspace.ID), slog.Error(err)) diff --git a/coderd/workspacestats/tracker.go b/coderd/workspacestats/tracker.go index 33532247b36e0..f55edde3b57e6 100644 --- a/coderd/workspacestats/tracker.go +++ b/coderd/workspacestats/tracker.go @@ -130,7 +130,6 @@ func (tr *UsageTracker) flush(now time.Time) { authCtx := dbauthz.AsSystemRestricted(ctx) tr.flushLock.Lock() defer tr.flushLock.Unlock() - // nolint:gocritic // (#13146) Will be moved soon as part of refactor. if err := tr.s.BatchUpdateWorkspaceLastUsedAt(authCtx, database.BatchUpdateWorkspaceLastUsedAtParams{ LastUsedAt: now, IDs: ids, diff --git a/coderd/workspacestats/workspacestatstest/batcher.go b/coderd/workspacestats/workspacestatstest/batcher.go index 2f5dd7d13aa0a..592e244518790 100644 --- a/coderd/workspacestats/workspacestatstest/batcher.go +++ b/coderd/workspacestats/workspacestatstest/batcher.go @@ -25,7 +25,7 @@ type StatsBatcher struct { var _ workspacestats.Batcher = &StatsBatcher{} -func (b *StatsBatcher) Add(now time.Time, agentID uuid.UUID, templateID uuid.UUID, userID uuid.UUID, workspaceID uuid.UUID, st *agentproto.Stats, usage bool) error { +func (b *StatsBatcher) Add(now time.Time, agentID uuid.UUID, templateID uuid.UUID, userID uuid.UUID, workspaceID uuid.UUID, st *agentproto.Stats, usage bool) { b.Mu.Lock() defer b.Mu.Unlock() b.Called++ @@ -36,5 +36,4 @@ func (b *StatsBatcher) Add(now time.Time, agentID uuid.UUID, templateID uuid.UUI b.LastWorkspaceID = workspaceID b.LastStats = st b.LastUsage = usage - return nil } From 61f2b7e17f0ebfea5c3656cc45db1353bafc8cfc Mon Sep 17 00:00:00 2001 From: Garrett Delfosse Date: Wed, 23 Oct 2024 19:14:19 +0000 Subject: [PATCH 04/10] fix tests --- coderd/agentapi/stats_test.go | 36 ++--------------------------------- 1 file changed, 2 insertions(+), 34 deletions(-) diff --git a/coderd/agentapi/stats_test.go b/coderd/agentapi/stats_test.go index 57534208be110..a8c2ef0bbcee3 100644 --- a/coderd/agentapi/stats_test.go +++ b/coderd/agentapi/stats_test.go @@ -230,12 +230,6 @@ func TestUpdateStates(t *testing.T) { TemplateName: template.Name, }, nil) - // Workspace last used at gets bumped. - dbM.EXPECT().UpdateWorkspaceLastUsedAt(gomock.Any(), database.UpdateWorkspaceLastUsedAtParams{ - ID: workspace.ID, - LastUsedAt: now, - }).Return(nil) - _, err := api.UpdateStats(context.Background(), req) require.NoError(t, err) }) @@ -319,6 +313,7 @@ func TestUpdateStates(t *testing.T) { "dean": 2, }, ConnectionCount: 3, + SessionCountSsh: 3, }, } ) @@ -355,7 +350,7 @@ func TestUpdateStates(t *testing.T) { TemplateName: template.Name, }, nil) - // We expect an activity bump because ConnectionCount > 0. However, the + // We expect an activity bump because SessionCountSsh > 0. However, the // next autostart time will be set on the bump. dbM.EXPECT().ActivityBumpWorkspace(gomock.Any(), database.ActivityBumpWorkspaceParams{ WorkspaceID: workspace.ID, @@ -466,29 +461,9 @@ func TestUpdateStates(t *testing.T) { TemplateName: template.Name, }, nil) - // We expect an activity bump because ConnectionCount > 0. - dbM.EXPECT().ActivityBumpWorkspace(gomock.Any(), database.ActivityBumpWorkspaceParams{ - WorkspaceID: workspace.ID, - NextAutostart: time.Time{}.UTC(), - }).Return(nil) - - // Workspace last used at gets bumped. - dbM.EXPECT().UpdateWorkspaceLastUsedAt(gomock.Any(), database.UpdateWorkspaceLastUsedAtParams{ - ID: workspace.ID, - LastUsedAt: now, - }).Return(nil) - // User gets fetched to hit the UpdateAgentMetricsFn. dbM.EXPECT().GetUserByID(gomock.Any(), user.ID).Return(user, nil) - // Ensure that pubsub notifications are sent. - notifyDescription := make(chan []byte) - ps.Subscribe(codersdk.WorkspaceNotifyChannel(workspace.ID), func(_ context.Context, description []byte) { - go func() { - notifyDescription <- description - }() - }) - resp, err := api.UpdateStats(context.Background(), req) require.NoError(t, err) require.Equal(t, &agentproto.UpdateStatsResponse{ @@ -502,13 +477,6 @@ func TestUpdateStates(t *testing.T) { require.EqualValues(t, 0, batcher.LastStats.SessionCountJetbrains) require.EqualValues(t, 0, batcher.LastStats.SessionCountVscode) require.EqualValues(t, 0, batcher.LastStats.SessionCountReconnectingPty) - ctx := testutil.Context(t, testutil.WaitShort) - select { - case <-ctx.Done(): - t.Error("timed out while waiting for pubsub notification") - case description := <-notifyDescription: - require.Equal(t, description, []byte{}) - } require.True(t, updateAgentMetricsFnCalled) }) } From 1b9be00c559d5949c7d997bea442470c736a9902 Mon Sep 17 00:00:00 2001 From: Garrett Delfosse Date: Wed, 23 Oct 2024 19:22:40 +0000 Subject: [PATCH 05/10] Revert "fix tests" This reverts commit 61f2b7e17f0ebfea5c3656cc45db1353bafc8cfc. --- coderd/agentapi/stats_test.go | 36 +++++++++++++++++++++++++++++++++-- 1 file changed, 34 insertions(+), 2 deletions(-) diff --git a/coderd/agentapi/stats_test.go b/coderd/agentapi/stats_test.go index a8c2ef0bbcee3..57534208be110 100644 --- a/coderd/agentapi/stats_test.go +++ b/coderd/agentapi/stats_test.go @@ -230,6 +230,12 @@ func TestUpdateStates(t *testing.T) { TemplateName: template.Name, }, nil) + // Workspace last used at gets bumped. + dbM.EXPECT().UpdateWorkspaceLastUsedAt(gomock.Any(), database.UpdateWorkspaceLastUsedAtParams{ + ID: workspace.ID, + LastUsedAt: now, + }).Return(nil) + _, err := api.UpdateStats(context.Background(), req) require.NoError(t, err) }) @@ -313,7 +319,6 @@ func TestUpdateStates(t *testing.T) { "dean": 2, }, ConnectionCount: 3, - SessionCountSsh: 3, }, } ) @@ -350,7 +355,7 @@ func TestUpdateStates(t *testing.T) { TemplateName: template.Name, }, nil) - // We expect an activity bump because SessionCountSsh > 0. However, the + // We expect an activity bump because ConnectionCount > 0. However, the // next autostart time will be set on the bump. dbM.EXPECT().ActivityBumpWorkspace(gomock.Any(), database.ActivityBumpWorkspaceParams{ WorkspaceID: workspace.ID, @@ -461,9 +466,29 @@ func TestUpdateStates(t *testing.T) { TemplateName: template.Name, }, nil) + // We expect an activity bump because ConnectionCount > 0. + dbM.EXPECT().ActivityBumpWorkspace(gomock.Any(), database.ActivityBumpWorkspaceParams{ + WorkspaceID: workspace.ID, + NextAutostart: time.Time{}.UTC(), + }).Return(nil) + + // Workspace last used at gets bumped. + dbM.EXPECT().UpdateWorkspaceLastUsedAt(gomock.Any(), database.UpdateWorkspaceLastUsedAtParams{ + ID: workspace.ID, + LastUsedAt: now, + }).Return(nil) + // User gets fetched to hit the UpdateAgentMetricsFn. dbM.EXPECT().GetUserByID(gomock.Any(), user.ID).Return(user, nil) + // Ensure that pubsub notifications are sent. + notifyDescription := make(chan []byte) + ps.Subscribe(codersdk.WorkspaceNotifyChannel(workspace.ID), func(_ context.Context, description []byte) { + go func() { + notifyDescription <- description + }() + }) + resp, err := api.UpdateStats(context.Background(), req) require.NoError(t, err) require.Equal(t, &agentproto.UpdateStatsResponse{ @@ -477,6 +502,13 @@ func TestUpdateStates(t *testing.T) { require.EqualValues(t, 0, batcher.LastStats.SessionCountJetbrains) require.EqualValues(t, 0, batcher.LastStats.SessionCountVscode) require.EqualValues(t, 0, batcher.LastStats.SessionCountReconnectingPty) + ctx := testutil.Context(t, testutil.WaitShort) + select { + case <-ctx.Done(): + t.Error("timed out while waiting for pubsub notification") + case description := <-notifyDescription: + require.Equal(t, description, []byte{}) + } require.True(t, updateAgentMetricsFnCalled) }) } From c8bc07e29202b36f072efbabdff0e271a9fe730b Mon Sep 17 00:00:00 2001 From: Garrett Delfosse Date: Wed, 23 Oct 2024 19:23:34 +0000 Subject: [PATCH 06/10] revert --- coderd/workspacestats/reporter.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/coderd/workspacestats/reporter.go b/coderd/workspacestats/reporter.go index 7b9894c644e38..d907eee47f1aa 100644 --- a/coderd/workspacestats/reporter.go +++ b/coderd/workspacestats/reporter.go @@ -137,8 +137,8 @@ func (r *Reporter) ReportAgentStats(ctx context.Context, now time.Time, workspac return nil } - // if no active sessions we do not bump activity - if stats.SessionCountJetbrains == 0 && stats.SessionCountReconnectingPty == 0 && stats.SessionCountSsh == 0 && stats.SessionCountVscode == 0 { + // if no active connections we do not bump activity + if stats.ConnectionCount == 0 { return nil } From 082a599009443078da4748e0c3862eab1c83003d Mon Sep 17 00:00:00 2001 From: Garrett Delfosse Date: Wed, 23 Oct 2024 19:43:21 +0000 Subject: [PATCH 07/10] fix --- coderd/agentapi/stats_test.go | 19 ++++--------------- coderd/workspacestats/reporter.go | 1 - 2 files changed, 4 insertions(+), 16 deletions(-) diff --git a/coderd/agentapi/stats_test.go b/coderd/agentapi/stats_test.go index 57534208be110..781f8560f5daa 100644 --- a/coderd/agentapi/stats_test.go +++ b/coderd/agentapi/stats_test.go @@ -108,6 +108,7 @@ func TestUpdateStates(t *testing.T) { Database: dbM, Pubsub: ps, StatsBatcher: batcher, + UsageTracker: workspacestats.NewTracker(dbM), TemplateScheduleStore: templateScheduleStorePtr(templateScheduleStore), UpdateAgentMetricsFn: func(ctx context.Context, labels prometheusmetrics.AgentMetricLabels, metrics []*agentproto.Stats_Metric) { updateAgentMetricsFnCalled = true @@ -132,21 +133,15 @@ func TestUpdateStates(t *testing.T) { TemplateName: template.Name, }, nil) + // User gets fetched to hit the UpdateAgentMetricsFn. + dbM.EXPECT().GetUserByID(gomock.Any(), user.ID).Return(user, nil) + // We expect an activity bump because ConnectionCount > 0. dbM.EXPECT().ActivityBumpWorkspace(gomock.Any(), database.ActivityBumpWorkspaceParams{ WorkspaceID: workspace.ID, NextAutostart: time.Time{}.UTC(), }).Return(nil) - // Workspace last used at gets bumped. - dbM.EXPECT().UpdateWorkspaceLastUsedAt(gomock.Any(), database.UpdateWorkspaceLastUsedAtParams{ - ID: workspace.ID, - LastUsedAt: now, - }).Return(nil) - - // User gets fetched to hit the UpdateAgentMetricsFn. - dbM.EXPECT().GetUserByID(gomock.Any(), user.ID).Return(user, nil) - // Ensure that pubsub notifications are sent. notifyDescription := make(chan []byte) ps.Subscribe(codersdk.WorkspaceNotifyChannel(workspace.ID), func(_ context.Context, description []byte) { @@ -230,12 +225,6 @@ func TestUpdateStates(t *testing.T) { TemplateName: template.Name, }, nil) - // Workspace last used at gets bumped. - dbM.EXPECT().UpdateWorkspaceLastUsedAt(gomock.Any(), database.UpdateWorkspaceLastUsedAtParams{ - ID: workspace.ID, - LastUsedAt: now, - }).Return(nil) - _, err := api.UpdateStats(context.Background(), req) require.NoError(t, err) }) diff --git a/coderd/workspacestats/reporter.go b/coderd/workspacestats/reporter.go index d907eee47f1aa..6bb1b2dea4028 100644 --- a/coderd/workspacestats/reporter.go +++ b/coderd/workspacestats/reporter.go @@ -134,7 +134,6 @@ func (r *Reporter) ReportAgentStats(ctx context.Context, now time.Time, workspac AgentName: workspaceAgent.Name, TemplateName: templateName, }, stats.Metrics) - return nil } // if no active connections we do not bump activity From 12b1ccf19ca5f588eb961591a9f85e46d5607eb7 Mon Sep 17 00:00:00 2001 From: Garrett Delfosse Date: Wed, 23 Oct 2024 19:44:30 +0000 Subject: [PATCH 08/10] fix --- coderd/agentapi/stats_test.go | 3 +++ 1 file changed, 3 insertions(+) diff --git a/coderd/agentapi/stats_test.go b/coderd/agentapi/stats_test.go index 781f8560f5daa..6e52992ef3362 100644 --- a/coderd/agentapi/stats_test.go +++ b/coderd/agentapi/stats_test.go @@ -208,6 +208,7 @@ func TestUpdateStates(t *testing.T) { StatsReporter: workspacestats.NewReporter(workspacestats.ReporterOptions{ Database: dbM, Pubsub: ps, + UsageTracker: workspacestats.NewTracker(dbM), StatsBatcher: batcher, TemplateScheduleStore: templateScheduleStorePtr(templateScheduleStore), // Ignored when nil. @@ -319,6 +320,7 @@ func TestUpdateStates(t *testing.T) { StatsReporter: workspacestats.NewReporter(workspacestats.ReporterOptions{ Database: dbM, Pubsub: ps, + UsageTracker: workspacestats.NewTracker(dbM), StatsBatcher: batcher, TemplateScheduleStore: templateScheduleStorePtr(templateScheduleStore), UpdateAgentMetricsFn: func(ctx context.Context, labels prometheusmetrics.AgentMetricLabels, metrics []*agentproto.Stats_Metric) { @@ -428,6 +430,7 @@ func TestUpdateStates(t *testing.T) { Database: dbM, Pubsub: ps, StatsBatcher: batcher, + UsageTracker: workspacestats.NewTracker(dbM), TemplateScheduleStore: templateScheduleStorePtr(templateScheduleStore), UpdateAgentMetricsFn: func(ctx context.Context, labels prometheusmetrics.AgentMetricLabels, metrics []*agentproto.Stats_Metric) { updateAgentMetricsFnCalled = true From 7412e77d93b79ee5ccf970ecbb5e834ee288dc5b Mon Sep 17 00:00:00 2001 From: Garrett Delfosse Date: Fri, 25 Oct 2024 16:02:23 +0000 Subject: [PATCH 09/10] fix tests --- coderd/agentapi/stats_test.go | 52 +++++++++++++++++++++++++++++------ 1 file changed, 44 insertions(+), 8 deletions(-) diff --git a/coderd/agentapi/stats_test.go b/coderd/agentapi/stats_test.go index 6e52992ef3362..49314aa6e9f5b 100644 --- a/coderd/agentapi/stats_test.go +++ b/coderd/agentapi/stats_test.go @@ -69,6 +69,11 @@ func TestUpdateStates(t *testing.T) { } batcher = &workspacestatstest.StatsBatcher{} updateAgentMetricsFnCalled = false + tickCh = make(chan time.Time) + flushCh = make(chan int, 1) + wut = workspacestats.NewTracker(dbM, + workspacestats.TrackerWithTickFlush(tickCh, flushCh), + ) req = &agentproto.UpdateStatsRequest{ Stats: &agentproto.Stats{ @@ -108,7 +113,7 @@ func TestUpdateStates(t *testing.T) { Database: dbM, Pubsub: ps, StatsBatcher: batcher, - UsageTracker: workspacestats.NewTracker(dbM), + UsageTracker: wut, TemplateScheduleStore: templateScheduleStorePtr(templateScheduleStore), UpdateAgentMetricsFn: func(ctx context.Context, labels prometheusmetrics.AgentMetricLabels, metrics []*agentproto.Stats_Metric) { updateAgentMetricsFnCalled = true @@ -126,6 +131,7 @@ func TestUpdateStates(t *testing.T) { return now }, } + defer wut.Close() // Workspace gets fetched. dbM.EXPECT().GetWorkspaceByAgentID(gomock.Any(), agent.ID).Return(database.GetWorkspaceByAgentIDRow{ @@ -142,6 +148,12 @@ func TestUpdateStates(t *testing.T) { NextAutostart: time.Time{}.UTC(), }).Return(nil) + // Workspace last used at gets bumped. + dbM.EXPECT().BatchUpdateWorkspaceLastUsedAt(gomock.Any(), database.BatchUpdateWorkspaceLastUsedAtParams{ + IDs: []uuid.UUID{workspace.ID}, + LastUsedAt: now, + }).Return(nil) + // Ensure that pubsub notifications are sent. notifyDescription := make(chan []byte) ps.Subscribe(codersdk.WorkspaceNotifyChannel(workspace.ID), func(_ context.Context, description []byte) { @@ -156,6 +168,10 @@ func TestUpdateStates(t *testing.T) { ReportInterval: durationpb.New(10 * time.Second), }, resp) + tickCh <- now + count := <-flushCh + require.Equal(t, 1, count, "expected one flush with one id") + batcher.Mu.Lock() defer batcher.Mu.Unlock() require.Equal(t, int64(1), batcher.Called) @@ -301,6 +317,11 @@ func TestUpdateStates(t *testing.T) { } batcher = &workspacestatstest.StatsBatcher{} updateAgentMetricsFnCalled = false + tickCh = make(chan time.Time) + flushCh = make(chan int, 1) + wut = workspacestats.NewTracker(dbM, + workspacestats.TrackerWithTickFlush(tickCh, flushCh), + ) req = &agentproto.UpdateStatsRequest{ Stats: &agentproto.Stats{ @@ -320,7 +341,7 @@ func TestUpdateStates(t *testing.T) { StatsReporter: workspacestats.NewReporter(workspacestats.ReporterOptions{ Database: dbM, Pubsub: ps, - UsageTracker: workspacestats.NewTracker(dbM), + UsageTracker: wut, StatsBatcher: batcher, TemplateScheduleStore: templateScheduleStorePtr(templateScheduleStore), UpdateAgentMetricsFn: func(ctx context.Context, labels prometheusmetrics.AgentMetricLabels, metrics []*agentproto.Stats_Metric) { @@ -339,6 +360,7 @@ func TestUpdateStates(t *testing.T) { return now }, } + defer wut.Close() // Workspace gets fetched. dbM.EXPECT().GetWorkspaceByAgentID(gomock.Any(), agent.ID).Return(database.GetWorkspaceByAgentIDRow{ @@ -354,9 +376,9 @@ func TestUpdateStates(t *testing.T) { }).Return(nil) // Workspace last used at gets bumped. - dbM.EXPECT().UpdateWorkspaceLastUsedAt(gomock.Any(), database.UpdateWorkspaceLastUsedAtParams{ - ID: workspace.ID, - LastUsedAt: now, + dbM.EXPECT().BatchUpdateWorkspaceLastUsedAt(gomock.Any(), database.BatchUpdateWorkspaceLastUsedAtParams{ + IDs: []uuid.UUID{workspace.ID}, + LastUsedAt: now.UTC(), }).Return(nil) // User gets fetched to hit the UpdateAgentMetricsFn. @@ -368,6 +390,10 @@ func TestUpdateStates(t *testing.T) { ReportInterval: durationpb.New(15 * time.Second), }, resp) + tickCh <- now + count := <-flushCh + require.Equal(t, 1, count, "expected one flush with one id") + require.True(t, updateAgentMetricsFnCalled) }) @@ -391,6 +417,11 @@ func TestUpdateStates(t *testing.T) { } batcher = &workspacestatstest.StatsBatcher{} updateAgentMetricsFnCalled = false + tickCh = make(chan time.Time) + flushCh = make(chan int, 1) + wut = workspacestats.NewTracker(dbM, + workspacestats.TrackerWithTickFlush(tickCh, flushCh), + ) req = &agentproto.UpdateStatsRequest{ Stats: &agentproto.Stats{ @@ -421,6 +452,7 @@ func TestUpdateStates(t *testing.T) { }, } ) + defer wut.Close() api := agentapi.StatsAPI{ AgentFn: func(context.Context) (database.WorkspaceAgent, error) { return agent, nil @@ -430,7 +462,7 @@ func TestUpdateStates(t *testing.T) { Database: dbM, Pubsub: ps, StatsBatcher: batcher, - UsageTracker: workspacestats.NewTracker(dbM), + UsageTracker: wut, TemplateScheduleStore: templateScheduleStorePtr(templateScheduleStore), UpdateAgentMetricsFn: func(ctx context.Context, labels prometheusmetrics.AgentMetricLabels, metrics []*agentproto.Stats_Metric) { updateAgentMetricsFnCalled = true @@ -465,8 +497,8 @@ func TestUpdateStates(t *testing.T) { }).Return(nil) // Workspace last used at gets bumped. - dbM.EXPECT().UpdateWorkspaceLastUsedAt(gomock.Any(), database.UpdateWorkspaceLastUsedAtParams{ - ID: workspace.ID, + dbM.EXPECT().BatchUpdateWorkspaceLastUsedAt(gomock.Any(), database.BatchUpdateWorkspaceLastUsedAtParams{ + IDs: []uuid.UUID{workspace.ID}, LastUsedAt: now, }).Return(nil) @@ -487,6 +519,10 @@ func TestUpdateStates(t *testing.T) { ReportInterval: durationpb.New(10 * time.Second), }, resp) + tickCh <- now + count := <-flushCh + require.Equal(t, 1, count, "expected one flush with one id") + batcher.Mu.Lock() defer batcher.Mu.Unlock() require.EqualValues(t, 1, batcher.Called) From 0b8e06eceaac18ef890fce73496a078f249d5510 Mon Sep 17 00:00:00 2001 From: Garrett Delfosse Date: Fri, 25 Oct 2024 16:29:11 +0000 Subject: [PATCH 10/10] add flush --- coderd/workspaceagentsrpc_test.go | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/coderd/workspaceagentsrpc_test.go b/coderd/workspaceagentsrpc_test.go index ca8f334d4e766..df57442462e2f 100644 --- a/coderd/workspaceagentsrpc_test.go +++ b/coderd/workspaceagentsrpc_test.go @@ -3,6 +3,7 @@ package coderd_test import ( "context" "testing" + "time" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" @@ -11,6 +12,7 @@ import ( "github.com/coder/coder/v2/coderd/coderdtest" "github.com/coder/coder/v2/coderd/database" "github.com/coder/coder/v2/coderd/database/dbfake" + "github.com/coder/coder/v2/coderd/database/dbtime" "github.com/coder/coder/v2/codersdk/agentsdk" "github.com/coder/coder/v2/provisionersdk/proto" "github.com/coder/coder/v2/testutil" @@ -20,7 +22,12 @@ import ( func TestWorkspaceAgentReportStats(t *testing.T) { t.Parallel() - client, db := coderdtest.NewWithDatabase(t, nil) + tickCh := make(chan time.Time) + flushCh := make(chan int, 1) + client, db := coderdtest.NewWithDatabase(t, &coderdtest.Options{ + WorkspaceUsageTrackerFlush: flushCh, + WorkspaceUsageTrackerTick: tickCh, + }) user := coderdtest.CreateFirstUser(t, client) r := dbfake.WorkspaceBuild(t, db, database.Workspace{ OrganizationID: user.OrganizationID, @@ -53,6 +60,10 @@ func TestWorkspaceAgentReportStats(t *testing.T) { }) require.NoError(t, err) + tickCh <- dbtime.Now() + count := <-flushCh + require.Equal(t, 1, count, "expected one flush with one id") + newWorkspace, err := client.Workspace(context.Background(), r.Workspace.ID) require.NoError(t, err)