From c6de43a906b2f0574695840286336e80af597e36 Mon Sep 17 00:00:00 2001 From: Mathias Fredriksson Date: Mon, 18 Mar 2024 20:27:21 +0200 Subject: [PATCH 1/2] feat(coderd/database): add `dbrollup` service to rollup insights --- coderd/coderd.go | 18 ++- coderd/coderdtest/coderdtest.go | 2 + coderd/database/dbgen/dbgen.go | 32 +++++ coderd/database/dbpurge/dbpurge.go | 1 + coderd/database/dbrollup/dbrollup.go | 130 +++++++++++++++++ coderd/database/dbrollup/dbrollup_test.go | 167 ++++++++++++++++++++++ coderd/database/lock.go | 5 +- 7 files changed, 350 insertions(+), 5 deletions(-) create mode 100644 coderd/database/dbrollup/dbrollup.go create mode 100644 coderd/database/dbrollup/dbrollup_test.go diff --git a/coderd/coderd.go b/coderd/coderd.go index 47066f0f439c4..636e2e9992f1b 100644 --- a/coderd/coderd.go +++ b/coderd/coderd.go @@ -47,6 +47,7 @@ import ( "github.com/coder/coder/v2/coderd/batchstats" "github.com/coder/coder/v2/coderd/database" "github.com/coder/coder/v2/coderd/database/dbauthz" + "github.com/coder/coder/v2/coderd/database/dbrollup" "github.com/coder/coder/v2/coderd/database/dbtime" "github.com/coder/coder/v2/coderd/database/pubsub" "github.com/coder/coder/v2/coderd/externalauth" @@ -180,6 +181,7 @@ type Options struct { UpdateAgentMetrics func(ctx context.Context, labels prometheusmetrics.AgentMetricLabels, metrics []*agentproto.Stats_Metric) StatsBatcher *batchstats.Batcher + DBRollupInterval time.Duration WorkspaceAppsStatsCollectorOptions workspaceapps.StatsCollectorOptions @@ -342,6 +344,9 @@ func New(options *Options) *API { if options.StatsBatcher == nil { panic("developer error: options.StatsBatcher is nil") } + if options.DBRollupInterval == 0 { + options.DBRollupInterval = dbrollup.DefaultInterval + } siteCacheDir := options.CacheDir if siteCacheDir != "" { @@ -414,7 +419,13 @@ func New(options *Options) *API { ctx, options.Logger.Named("acquirer"), options.Database, - options.Pubsub), + options.Pubsub, + ), + rolluper: dbrollup.New( + options.Logger, + options.Database, + options.DBRollupInterval, + ), workspaceUsageTracker: options.WorkspaceUsageTracker, } @@ -1197,7 +1208,9 @@ type API struct { statsBatcher *batchstats.Batcher Acquirer *provisionerdserver.Acquirer - + // rolluper rolls up template usage stats from raw agent and app + // stats. This is used to provide insights in the WebUI. + rolluper *dbrollup.Rolluper workspaceUsageTracker *workspaceusage.Tracker } @@ -1212,6 +1225,7 @@ func (api *API) Close() error { api.WebsocketWaitGroup.Wait() api.WebsocketWaitMutex.Unlock() + api.rolluper.Close() api.metricsCache.Close() if api.updateChecker != nil { api.updateChecker.Close() diff --git a/coderd/coderdtest/coderdtest.go b/coderd/coderdtest/coderdtest.go index 1ce314b0220ec..cba981750f059 100644 --- a/coderd/coderdtest/coderdtest.go +++ b/coderd/coderdtest/coderdtest.go @@ -147,6 +147,7 @@ type Options struct { WorkspaceAppsStatsCollectorOptions workspaceapps.StatsCollectorOptions AllowWorkspaceRenames bool NewTicker func(duration time.Duration) (<-chan time.Time, func()) + DBRollupInterval time.Duration WorkspaceUsageTrackerFlush chan int WorkspaceUsageTrackerTick chan time.Time } @@ -491,6 +492,7 @@ func NewOptions(t testing.TB, options *Options) (func(http.Handler), context.Can WorkspaceAppsStatsCollectorOptions: options.WorkspaceAppsStatsCollectorOptions, AllowWorkspaceRenames: options.AllowWorkspaceRenames, NewTicker: options.NewTicker, + DBRollupInterval: options.DBRollupInterval, WorkspaceUsageTracker: wuTracker, } } diff --git a/coderd/database/dbgen/dbgen.go b/coderd/database/dbgen/dbgen.go index da5f214e7f314..707e977178cde 100644 --- a/coderd/database/dbgen/dbgen.go +++ b/coderd/database/dbgen/dbgen.go @@ -489,6 +489,38 @@ func WorkspaceApp(t testing.TB, db database.Store, orig database.WorkspaceApp) d return resource } +func WorkspaceAppStat(t testing.TB, db database.Store, orig database.WorkspaceAppStat) database.WorkspaceAppStat { + // This is not going to be correct, but our query doesn't return the ID. + id, err := cryptorand.Int63() + require.NoError(t, err, "generate id") + + scheme := database.WorkspaceAppStat{ + ID: takeFirst(orig.ID, id), + UserID: takeFirst(orig.UserID, uuid.New()), + WorkspaceID: takeFirst(orig.WorkspaceID, uuid.New()), + AgentID: takeFirst(orig.AgentID, uuid.New()), + AccessMethod: takeFirst(orig.AccessMethod, ""), + SlugOrPort: takeFirst(orig.SlugOrPort, ""), + SessionID: takeFirst(orig.SessionID, uuid.New()), + SessionStartedAt: takeFirst(orig.SessionStartedAt, dbtime.Now().Add(-time.Minute)), + SessionEndedAt: takeFirst(orig.SessionEndedAt, dbtime.Now()), + Requests: takeFirst(orig.Requests, 1), + } + err = db.InsertWorkspaceAppStats(genCtx, database.InsertWorkspaceAppStatsParams{ + UserID: []uuid.UUID{scheme.UserID}, + WorkspaceID: []uuid.UUID{scheme.WorkspaceID}, + AgentID: []uuid.UUID{scheme.AgentID}, + AccessMethod: []string{scheme.AccessMethod}, + SlugOrPort: []string{scheme.SlugOrPort}, + SessionID: []uuid.UUID{scheme.SessionID}, + SessionStartedAt: []time.Time{scheme.SessionStartedAt}, + SessionEndedAt: []time.Time{scheme.SessionEndedAt}, + Requests: []int32{scheme.Requests}, + }) + require.NoError(t, err, "insert workspace agent stat") + return scheme +} + func WorkspaceResource(t testing.TB, db database.Store, orig database.WorkspaceResource) database.WorkspaceResource { resource, err := db.InsertWorkspaceResource(genCtx, database.InsertWorkspaceResourceParams{ ID: takeFirst(orig.ID, uuid.New()), diff --git a/coderd/database/dbpurge/dbpurge.go b/coderd/database/dbpurge/dbpurge.go index d3fc56a8c5f21..c4b5a609a3179 100644 --- a/coderd/database/dbpurge/dbpurge.go +++ b/coderd/database/dbpurge/dbpurge.go @@ -24,6 +24,7 @@ const ( // This is for cleaning up old, unused resources from the database that take up space. func New(ctx context.Context, logger slog.Logger, db database.Store) io.Closer { closed := make(chan struct{}) + logger = logger.Named("dbpurge") ctx, cancelFunc := context.WithCancel(ctx) //nolint:gocritic // The system purges old db records without user input. diff --git a/coderd/database/dbrollup/dbrollup.go b/coderd/database/dbrollup/dbrollup.go new file mode 100644 index 0000000000000..55eaaf171aa84 --- /dev/null +++ b/coderd/database/dbrollup/dbrollup.go @@ -0,0 +1,130 @@ +package dbrollup + +import ( + "context" + "time" + + "golang.org/x/sync/errgroup" + + "cdr.dev/slog" + + "github.com/coder/coder/v2/coderd/database" + "github.com/coder/coder/v2/coderd/database/dbauthz" +) + +const ( + // DefaultInterval is the default time between rollups. + // Rollups will be synchronized with the clock so that + // they happen 13:00, 13:05, 13:10, etc. + DefaultInterval = 5 * time.Minute +) + +type Rolluper struct { + cancel context.CancelFunc + closed chan struct{} + db database.Store + logger slog.Logger +} + +// New creates a new DB rollup service that periodically runs rollup queries. +// It is the caller's responsibility to call Close on the returned instance. +// +// This is for e.g. generating insights data (template_usage_stats) from +// raw data (workspace_agent_stats, workspace_app_stats). +func New(logger slog.Logger, db database.Store, interval time.Duration) *Rolluper { + ctx, cancel := context.WithCancel(context.Background()) + + r := &Rolluper{ + cancel: cancel, + closed: make(chan struct{}), + db: db, + logger: logger.Named("dbrollup"), + } + + //nolint:gocritic // The system rolls up database tables without user input. + ctx = dbauthz.AsSystemRestricted(ctx) + go r.start(ctx, interval) + + return r +} + +func (r *Rolluper) start(ctx context.Context, interval time.Duration) { + defer close(r.closed) + + do := func() { + var eg errgroup.Group + + r.logger.Debug(ctx, "rolling up data") + now := time.Now() + + // Track whether or not we performed a rollup (we got the advisory lock). + templateUsageStats := false + + eg.Go(func() error { + return r.db.InTx(func(tx database.Store) error { + // Acquire a lock to ensure that only one instance of + // the rollup is running at a time. + ok, err := tx.TryAcquireLock(ctx, database.LockIDDBRollup) + if err != nil { + return err + } + if !ok { + return nil + } + + templateUsageStats = true + return tx.UpsertTemplateUsageStats(ctx) + }, nil) + }) + + err := eg.Wait() + if err != nil { + if database.IsQueryCanceledError(err) { + return + } + // Only log if Close hasn't been called. + if ctx.Err() == nil { + r.logger.Error(ctx, "failed to rollup data", slog.Error(err)) + } + } else { + r.logger.Debug(ctx, + "rolled up data", + slog.F("took", time.Since(now)), + slog.F("template_usage_stats", templateUsageStats), + ) + } + } + + // Perform do immediately and on every tick of the ticker, + // disregarding the execution time of do. This ensure that + // the rollup is performed every interval assuming do does + // not take longer than the interval to execute. + t := time.NewTicker(time.Microsecond) + defer t.Stop() + for { + select { + case <-ctx.Done(): + return + case <-t.C: + // Ensure we're on the interval. + now := time.Now() + next := now.Add(interval).Truncate(interval) // Ensure we're on the interval and synced with the clock. + d := next.Sub(now) + // Safety check (shouldn't be possible). + if d <= 0 { + d = interval + } + t.Reset(d) + + do() + + r.logger.Debug(ctx, "next rollup at", slog.F("next", next)) + } + } +} + +func (r *Rolluper) Close() error { + r.cancel() + <-r.closed + return nil +} diff --git a/coderd/database/dbrollup/dbrollup_test.go b/coderd/database/dbrollup/dbrollup_test.go new file mode 100644 index 0000000000000..57909b774b77e --- /dev/null +++ b/coderd/database/dbrollup/dbrollup_test.go @@ -0,0 +1,167 @@ +package dbrollup_test + +import ( + "context" + "database/sql" + "errors" + "testing" + "time" + + "github.com/google/uuid" + "github.com/stretchr/testify/require" + "go.uber.org/goleak" + + "cdr.dev/slog" + "cdr.dev/slog/sloggers/slogtest" + + "github.com/coder/coder/v2/coderd/database" + "github.com/coder/coder/v2/coderd/database/dbgen" + "github.com/coder/coder/v2/coderd/database/dbmem" + "github.com/coder/coder/v2/coderd/database/dbrollup" + "github.com/coder/coder/v2/coderd/database/dbtestutil" + "github.com/coder/coder/v2/coderd/database/dbtime" + "github.com/coder/coder/v2/testutil" +) + +func TestMain(m *testing.M) { + goleak.VerifyTestMain(m) +} + +func TestRollup_Close(t *testing.T) { + t.Parallel() + rolluper := dbrollup.New(slogtest.Make(t, nil), dbmem.New(), dbrollup.DefaultInterval) + err := rolluper.Close() + require.NoError(t, err) +} + +func TestRollupTemplateUsageStats(t *testing.T) { + t.Parallel() + + db, ps := dbtestutil.NewDB(t, dbtestutil.WithDumpOnFailure()) + logger := slogtest.Make(t, &slogtest.Options{IgnoreErrors: true}).Leveled(slog.LevelDebug) + + anHourAgo := dbtime.Now().Add(-time.Hour).Truncate(time.Hour) + anHourAndSixMonthsAgo := anHourAgo.AddDate(0, -6, 0) + + org := dbgen.Organization(t, db, database.Organization{}) + user := dbgen.User(t, db, database.User{Name: "user1"}) + tpl := dbgen.Template(t, db, database.Template{OrganizationID: org.ID, CreatedBy: user.ID}) + ver := dbgen.TemplateVersion(t, db, database.TemplateVersion{ + OrganizationID: org.ID, + TemplateID: uuid.NullUUID{UUID: tpl.ID, Valid: true}, + CreatedBy: user.ID, + }) + ws := dbgen.Workspace(t, db, database.Workspace{ + OrganizationID: org.ID, + TemplateID: tpl.ID, OwnerID: user.ID, + }) + job := dbgen.ProvisionerJob(t, db, ps, database.ProvisionerJob{OrganizationID: org.ID}) + build := dbgen.WorkspaceBuild(t, db, database.WorkspaceBuild{ + WorkspaceID: ws.ID, + JobID: job.ID, + TemplateVersionID: ver.ID, + }) + res := dbgen.WorkspaceResource(t, db, database.WorkspaceResource{JobID: build.JobID}) + agent := dbgen.WorkspaceAgent(t, db, database.WorkspaceAgent{ResourceID: res.ID}) + app := dbgen.WorkspaceApp(t, db, database.WorkspaceApp{AgentID: agent.ID}) + + // Stats inserted 6 months + 1 day ago, should be excluded. + _ = dbgen.WorkspaceAgentStat(t, db, database.WorkspaceAgentStat{ + TemplateID: tpl.ID, + WorkspaceID: ws.ID, + AgentID: agent.ID, + UserID: user.ID, + CreatedAt: anHourAndSixMonthsAgo.AddDate(0, 0, -1), + ConnectionMedianLatencyMS: 1, + ConnectionCount: 1, + SessionCountSSH: 1, + }) + _ = dbgen.WorkspaceAppStat(t, db, database.WorkspaceAppStat{ + UserID: user.ID, + WorkspaceID: ws.ID, + AgentID: agent.ID, + SessionStartedAt: anHourAndSixMonthsAgo.AddDate(0, 0, -1), + SessionEndedAt: anHourAndSixMonthsAgo.AddDate(0, 0, -1).Add(time.Minute), + SlugOrPort: app.Slug, + }) + + // Stats inserted 6 months - 1 day ago, should be rolled up. + wags1 := dbgen.WorkspaceAgentStat(t, db, database.WorkspaceAgentStat{ + TemplateID: tpl.ID, + WorkspaceID: ws.ID, + AgentID: agent.ID, + UserID: user.ID, + CreatedAt: anHourAndSixMonthsAgo.AddDate(0, 0, 1), + ConnectionMedianLatencyMS: 1, + ConnectionCount: 1, + SessionCountReconnectingPTY: 1, + }) + wags2 := dbgen.WorkspaceAgentStat(t, db, database.WorkspaceAgentStat{ + TemplateID: tpl.ID, + WorkspaceID: ws.ID, + AgentID: agent.ID, + UserID: user.ID, + CreatedAt: wags1.CreatedAt.Add(time.Minute), + ConnectionMedianLatencyMS: 1, + ConnectionCount: 1, + SessionCountReconnectingPTY: 1, + }) + // wags2 and waps1 overlap, so total usage is 4 - 1. + waps1 := dbgen.WorkspaceAppStat(t, db, database.WorkspaceAppStat{ + UserID: user.ID, + WorkspaceID: ws.ID, + AgentID: agent.ID, + SessionStartedAt: wags2.CreatedAt, + SessionEndedAt: wags2.CreatedAt.Add(time.Minute), + SlugOrPort: app.Slug, + }) + waps2 := dbgen.WorkspaceAppStat(t, db, database.WorkspaceAppStat{ + UserID: user.ID, + WorkspaceID: ws.ID, + AgentID: agent.ID, + SessionStartedAt: waps1.SessionEndedAt, + SessionEndedAt: waps1.SessionEndedAt.Add(time.Minute), + SlugOrPort: app.Slug, + }) + _ = waps2 // Keep the name for documentation. + + // The data is already present, so we can rely on initial rollup to occur. + rolluper := dbrollup.New(logger, db, dbrollup.DefaultInterval) + defer rolluper.Close() + + ctx, cancel := context.WithTimeout(context.Background(), testutil.WaitShort) + defer cancel() + testutil.Go(t, func() { + <-ctx.Done() + _ = rolluper.Close() + }) + + var stats []database.TemplateUsageStat + var err error + require.Eventually(t, func() bool { + stats, err = db.GetTemplateUsageStats(ctx, database.GetTemplateUsageStatsParams{ + StartTime: anHourAndSixMonthsAgo.Add(-time.Minute), + EndTime: anHourAgo, + }) + if err != nil { + // Stop looping on unexpected errors. + return !errors.Is(err, sql.ErrNoRows) + } + return len(stats) > 0 + }, testutil.WaitShort, testutil.IntervalFast) + require.NoError(t, err) + require.Len(t, stats, 1) + + require.Equal(t, database.TemplateUsageStat{ + TemplateID: tpl.ID, + UserID: user.ID, + StartTime: wags1.CreatedAt, + EndTime: wags1.CreatedAt.Add(30 * time.Minute), + MedianLatencyMs: sql.NullFloat64{Float64: 1, Valid: true}, + UsageMins: 3, + ReconnectingPtyMins: 2, + AppUsageMins: database.StringMapOfInt{ + app.Slug: 2, + }, + }, stats[0]) +} diff --git a/coderd/database/lock.go b/coderd/database/lock.go index a007e5e03e752..65dd6eb84a832 100644 --- a/coderd/database/lock.go +++ b/coderd/database/lock.go @@ -6,10 +6,9 @@ import "hash/fnv" // change. If locks are deprecated, they should be kept in this list to avoid // reusing the same ID. const ( - // Keep the unused iota here so we don't need + 1 every time - lockIDUnused = iota - LockIDDeploymentSetup + LockIDDeploymentSetup = iota + 1 LockIDEnterpriseDeploymentSetup + LockIDDBRollup ) // GenLockID generates a unique and consistent lock ID from a given string. From 2040b875d7ff7d0732bf972cefbe1d985d2ec2d8 Mon Sep 17 00:00:00 2001 From: Mathias Fredriksson Date: Fri, 22 Mar 2024 13:01:00 +0000 Subject: [PATCH 2/2] refactor and add test for locking --- coderd/coderd.go | 23 ++- coderd/coderdtest/coderdtest.go | 5 +- coderd/database/dbrollup/dbrollup.go | 85 ++++++++--- coderd/database/dbrollup/dbrollup_test.go | 172 ++++++++++++++++------ 4 files changed, 207 insertions(+), 78 deletions(-) diff --git a/coderd/coderd.go b/coderd/coderd.go index 636e2e9992f1b..7c62a62bc20dc 100644 --- a/coderd/coderd.go +++ b/coderd/coderd.go @@ -181,7 +181,6 @@ type Options struct { UpdateAgentMetrics func(ctx context.Context, labels prometheusmetrics.AgentMetricLabels, metrics []*agentproto.Stats_Metric) StatsBatcher *batchstats.Batcher - DBRollupInterval time.Duration WorkspaceAppsStatsCollectorOptions workspaceapps.StatsCollectorOptions @@ -194,6 +193,9 @@ type Options struct { // NewTicker is used for unit tests to replace "time.NewTicker". NewTicker func(duration time.Duration) (tick <-chan time.Time, done func()) + // DatabaseRolluper rolls up template usage stats from raw agent and app + // stats. This is used to provide insights in the WebUI. + DatabaseRolluper *dbrollup.Rolluper // WorkspaceUsageTracker tracks workspace usage by the CLI. WorkspaceUsageTracker *workspaceusage.Tracker } @@ -344,9 +346,6 @@ func New(options *Options) *API { if options.StatsBatcher == nil { panic("developer error: options.StatsBatcher is nil") } - if options.DBRollupInterval == 0 { - options.DBRollupInterval = dbrollup.DefaultInterval - } siteCacheDir := options.CacheDir if siteCacheDir != "" { @@ -371,6 +370,10 @@ func New(options *Options) *API { OIDC: options.OIDCConfig, } + if options.DatabaseRolluper == nil { + options.DatabaseRolluper = dbrollup.New(options.Logger.Named("dbrollup"), options.Database) + } + if options.WorkspaceUsageTracker == nil { options.WorkspaceUsageTracker = workspaceusage.New(options.Database, workspaceusage.WithLogger(options.Logger.Named("workspace_usage_tracker")), @@ -421,11 +424,7 @@ func New(options *Options) *API { options.Database, options.Pubsub, ), - rolluper: dbrollup.New( - options.Logger, - options.Database, - options.DBRollupInterval, - ), + dbRolluper: options.DatabaseRolluper, workspaceUsageTracker: options.WorkspaceUsageTracker, } @@ -1208,9 +1207,9 @@ type API struct { statsBatcher *batchstats.Batcher Acquirer *provisionerdserver.Acquirer - // rolluper rolls up template usage stats from raw agent and app + // dbRolluper rolls up template usage stats from raw agent and app // stats. This is used to provide insights in the WebUI. - rolluper *dbrollup.Rolluper + dbRolluper *dbrollup.Rolluper workspaceUsageTracker *workspaceusage.Tracker } @@ -1225,7 +1224,7 @@ func (api *API) Close() error { api.WebsocketWaitGroup.Wait() api.WebsocketWaitMutex.Unlock() - api.rolluper.Close() + api.dbRolluper.Close() api.metricsCache.Close() if api.updateChecker != nil { api.updateChecker.Close() diff --git a/coderd/coderdtest/coderdtest.go b/coderd/coderdtest/coderdtest.go index cba981750f059..60bec647874fe 100644 --- a/coderd/coderdtest/coderdtest.go +++ b/coderd/coderdtest/coderdtest.go @@ -57,6 +57,7 @@ import ( "github.com/coder/coder/v2/coderd/batchstats" "github.com/coder/coder/v2/coderd/database" "github.com/coder/coder/v2/coderd/database/dbauthz" + "github.com/coder/coder/v2/coderd/database/dbrollup" "github.com/coder/coder/v2/coderd/database/dbtestutil" "github.com/coder/coder/v2/coderd/database/pubsub" "github.com/coder/coder/v2/coderd/externalauth" @@ -147,7 +148,7 @@ type Options struct { WorkspaceAppsStatsCollectorOptions workspaceapps.StatsCollectorOptions AllowWorkspaceRenames bool NewTicker func(duration time.Duration) (<-chan time.Time, func()) - DBRollupInterval time.Duration + DatabaseRolluper *dbrollup.Rolluper WorkspaceUsageTrackerFlush chan int WorkspaceUsageTrackerTick chan time.Time } @@ -492,7 +493,7 @@ func NewOptions(t testing.TB, options *Options) (func(http.Handler), context.Can WorkspaceAppsStatsCollectorOptions: options.WorkspaceAppsStatsCollectorOptions, AllowWorkspaceRenames: options.AllowWorkspaceRenames, NewTicker: options.NewTicker, - DBRollupInterval: options.DBRollupInterval, + DatabaseRolluper: options.DatabaseRolluper, WorkspaceUsageTracker: wuTracker, } } diff --git a/coderd/database/dbrollup/dbrollup.go b/coderd/database/dbrollup/dbrollup.go index 55eaaf171aa84..5f67bd0dd3c24 100644 --- a/coderd/database/dbrollup/dbrollup.go +++ b/coderd/database/dbrollup/dbrollup.go @@ -2,6 +2,7 @@ package dbrollup import ( "context" + "flag" "time" "golang.org/x/sync/errgroup" @@ -19,11 +20,38 @@ const ( DefaultInterval = 5 * time.Minute ) +type Event struct { + TemplateUsageStats bool +} + type Rolluper struct { - cancel context.CancelFunc - closed chan struct{} - db database.Store - logger slog.Logger + cancel context.CancelFunc + closed chan struct{} + db database.Store + logger slog.Logger + interval time.Duration + event chan<- Event +} + +type Option func(*Rolluper) + +// WithInterval sets the interval between rollups. +func WithInterval(interval time.Duration) Option { + return func(r *Rolluper) { + r.interval = interval + } +} + +// WithEventChannel sets the event channel to use for rollup events. +// +// This is only used for testing. +func WithEventChannel(ch chan<- Event) Option { + if flag.Lookup("test.v") == nil { + panic("developer error: WithEventChannel is not to be used outside of tests") + } + return func(r *Rolluper) { + r.event = ch + } } // New creates a new DB rollup service that periodically runs rollup queries. @@ -31,24 +59,29 @@ type Rolluper struct { // // This is for e.g. generating insights data (template_usage_stats) from // raw data (workspace_agent_stats, workspace_app_stats). -func New(logger slog.Logger, db database.Store, interval time.Duration) *Rolluper { +func New(logger slog.Logger, db database.Store, opts ...Option) *Rolluper { ctx, cancel := context.WithCancel(context.Background()) r := &Rolluper{ - cancel: cancel, - closed: make(chan struct{}), - db: db, - logger: logger.Named("dbrollup"), + cancel: cancel, + closed: make(chan struct{}), + db: db, + logger: logger, + interval: DefaultInterval, + } + + for _, opt := range opts { + opt(r) } //nolint:gocritic // The system rolls up database tables without user input. ctx = dbauthz.AsSystemRestricted(ctx) - go r.start(ctx, interval) + go r.start(ctx) return r } -func (r *Rolluper) start(ctx context.Context, interval time.Duration) { +func (r *Rolluper) start(ctx context.Context) { defer close(r.closed) do := func() { @@ -58,7 +91,7 @@ func (r *Rolluper) start(ctx context.Context, interval time.Duration) { now := time.Now() // Track whether or not we performed a rollup (we got the advisory lock). - templateUsageStats := false + var ev Event eg.Go(func() error { return r.db.InTx(func(tx database.Store) error { @@ -72,7 +105,7 @@ func (r *Rolluper) start(ctx context.Context, interval time.Duration) { return nil } - templateUsageStats = true + ev.TemplateUsageStats = true return tx.UpsertTemplateUsageStats(ctx) }, nil) }) @@ -86,12 +119,22 @@ func (r *Rolluper) start(ctx context.Context, interval time.Duration) { if ctx.Err() == nil { r.logger.Error(ctx, "failed to rollup data", slog.Error(err)) } - } else { - r.logger.Debug(ctx, - "rolled up data", - slog.F("took", time.Since(now)), - slog.F("template_usage_stats", templateUsageStats), - ) + return + } + + r.logger.Debug(ctx, + "rolled up data", + slog.F("took", time.Since(now)), + slog.F("event", ev), + ) + + // For testing. + if r.event != nil { + select { + case <-ctx.Done(): + return + case r.event <- ev: + } } } @@ -108,11 +151,11 @@ func (r *Rolluper) start(ctx context.Context, interval time.Duration) { case <-t.C: // Ensure we're on the interval. now := time.Now() - next := now.Add(interval).Truncate(interval) // Ensure we're on the interval and synced with the clock. + next := now.Add(r.interval).Truncate(r.interval) // Ensure we're on the interval and synced with the clock. d := next.Sub(now) // Safety check (shouldn't be possible). if d <= 0 { - d = interval + d = r.interval } t.Reset(d) diff --git a/coderd/database/dbrollup/dbrollup_test.go b/coderd/database/dbrollup/dbrollup_test.go index 57909b774b77e..e2db4ce96b1ee 100644 --- a/coderd/database/dbrollup/dbrollup_test.go +++ b/coderd/database/dbrollup/dbrollup_test.go @@ -3,7 +3,6 @@ package dbrollup_test import ( "context" "database/sql" - "errors" "testing" "time" @@ -29,11 +28,113 @@ func TestMain(m *testing.M) { func TestRollup_Close(t *testing.T) { t.Parallel() - rolluper := dbrollup.New(slogtest.Make(t, nil), dbmem.New(), dbrollup.DefaultInterval) + rolluper := dbrollup.New(slogtest.Make(t, nil), dbmem.New(), dbrollup.WithInterval(250*time.Millisecond)) err := rolluper.Close() require.NoError(t, err) } +type wrapUpsertDB struct { + database.Store + resume <-chan struct{} +} + +func (w *wrapUpsertDB) InTx(fn func(database.Store) error, opts *sql.TxOptions) error { + return w.Store.InTx(func(tx database.Store) error { + return fn(&wrapUpsertDB{Store: tx, resume: w.resume}) + }, opts) +} + +func (w *wrapUpsertDB) UpsertTemplateUsageStats(ctx context.Context) error { + <-w.resume + return w.Store.UpsertTemplateUsageStats(ctx) +} + +func TestRollup_TwoInstancesUseLocking(t *testing.T) { + t.Parallel() + + if !dbtestutil.WillUsePostgres() { + t.Skip("Skipping test; only works with PostgreSQL.") + } + + db, ps := dbtestutil.NewDB(t, dbtestutil.WithDumpOnFailure()) + logger := slogtest.Make(t, &slogtest.Options{IgnoreErrors: false}).Leveled(slog.LevelDebug) + + var ( + org = dbgen.Organization(t, db, database.Organization{}) + user = dbgen.User(t, db, database.User{Name: "user1"}) + tpl = dbgen.Template(t, db, database.Template{OrganizationID: org.ID, CreatedBy: user.ID}) + ver = dbgen.TemplateVersion(t, db, database.TemplateVersion{OrganizationID: org.ID, TemplateID: uuid.NullUUID{UUID: tpl.ID, Valid: true}, CreatedBy: user.ID}) + ws = dbgen.Workspace(t, db, database.Workspace{OrganizationID: org.ID, TemplateID: tpl.ID, OwnerID: user.ID}) + job = dbgen.ProvisionerJob(t, db, ps, database.ProvisionerJob{OrganizationID: org.ID}) + build = dbgen.WorkspaceBuild(t, db, database.WorkspaceBuild{WorkspaceID: ws.ID, JobID: job.ID, TemplateVersionID: ver.ID}) + res = dbgen.WorkspaceResource(t, db, database.WorkspaceResource{JobID: build.JobID}) + agent = dbgen.WorkspaceAgent(t, db, database.WorkspaceAgent{ResourceID: res.ID}) + ) + + refTime := dbtime.Now().Truncate(time.Hour) + _ = dbgen.WorkspaceAgentStat(t, db, database.WorkspaceAgentStat{ + TemplateID: tpl.ID, + WorkspaceID: ws.ID, + AgentID: agent.ID, + UserID: user.ID, + CreatedAt: refTime.Add(-time.Minute), + ConnectionMedianLatencyMS: 1, + ConnectionCount: 1, + SessionCountSSH: 1, + }) + + closeRolluper := func(rolluper *dbrollup.Rolluper, resume chan struct{}) { + close(resume) + err := rolluper.Close() + require.NoError(t, err) + } + + interval := dbrollup.WithInterval(250 * time.Millisecond) + events1 := make(chan dbrollup.Event) + resume1 := make(chan struct{}, 1) + rolluper1 := dbrollup.New( + logger.Named("dbrollup1"), + &wrapUpsertDB{Store: db, resume: resume1}, + interval, + dbrollup.WithEventChannel(events1), + ) + defer closeRolluper(rolluper1, resume1) + + events2 := make(chan dbrollup.Event) + resume2 := make(chan struct{}, 1) + rolluper2 := dbrollup.New( + logger.Named("dbrollup2"), + &wrapUpsertDB{Store: db, resume: resume2}, + interval, + dbrollup.WithEventChannel(events2), + ) + defer closeRolluper(rolluper2, resume2) + + ctx := testutil.Context(t, testutil.WaitMedium) + + // One of the rollup instances should roll up and the other should not. + var ev1, ev2 dbrollup.Event + select { + case <-ctx.Done(): + t.Fatal("timed out waiting for rollup to occur") + case ev1 = <-events1: + resume2 <- struct{}{} + ev2 = <-events2 + case ev2 = <-events2: + resume1 <- struct{}{} + ev1 = <-events1 + } + + require.NotEqual(t, ev1, ev2, "one of the rollup instances should have rolled up and the other not") + + rows, err := db.GetTemplateUsageStats(ctx, database.GetTemplateUsageStatsParams{ + StartTime: refTime.Add(-time.Hour).Truncate(time.Hour), + EndTime: refTime, + }) + require.NoError(t, err) + require.Len(t, rows, 1) +} + func TestRollupTemplateUsageStats(t *testing.T) { t.Parallel() @@ -43,27 +144,18 @@ func TestRollupTemplateUsageStats(t *testing.T) { anHourAgo := dbtime.Now().Add(-time.Hour).Truncate(time.Hour) anHourAndSixMonthsAgo := anHourAgo.AddDate(0, -6, 0) - org := dbgen.Organization(t, db, database.Organization{}) - user := dbgen.User(t, db, database.User{Name: "user1"}) - tpl := dbgen.Template(t, db, database.Template{OrganizationID: org.ID, CreatedBy: user.ID}) - ver := dbgen.TemplateVersion(t, db, database.TemplateVersion{ - OrganizationID: org.ID, - TemplateID: uuid.NullUUID{UUID: tpl.ID, Valid: true}, - CreatedBy: user.ID, - }) - ws := dbgen.Workspace(t, db, database.Workspace{ - OrganizationID: org.ID, - TemplateID: tpl.ID, OwnerID: user.ID, - }) - job := dbgen.ProvisionerJob(t, db, ps, database.ProvisionerJob{OrganizationID: org.ID}) - build := dbgen.WorkspaceBuild(t, db, database.WorkspaceBuild{ - WorkspaceID: ws.ID, - JobID: job.ID, - TemplateVersionID: ver.ID, - }) - res := dbgen.WorkspaceResource(t, db, database.WorkspaceResource{JobID: build.JobID}) - agent := dbgen.WorkspaceAgent(t, db, database.WorkspaceAgent{ResourceID: res.ID}) - app := dbgen.WorkspaceApp(t, db, database.WorkspaceApp{AgentID: agent.ID}) + var ( + org = dbgen.Organization(t, db, database.Organization{}) + user = dbgen.User(t, db, database.User{Name: "user1"}) + tpl = dbgen.Template(t, db, database.Template{OrganizationID: org.ID, CreatedBy: user.ID}) + ver = dbgen.TemplateVersion(t, db, database.TemplateVersion{OrganizationID: org.ID, TemplateID: uuid.NullUUID{UUID: tpl.ID, Valid: true}, CreatedBy: user.ID}) + ws = dbgen.Workspace(t, db, database.Workspace{OrganizationID: org.ID, TemplateID: tpl.ID, OwnerID: user.ID}) + job = dbgen.ProvisionerJob(t, db, ps, database.ProvisionerJob{OrganizationID: org.ID}) + build = dbgen.WorkspaceBuild(t, db, database.WorkspaceBuild{WorkspaceID: ws.ID, JobID: job.ID, TemplateVersionID: ver.ID}) + res = dbgen.WorkspaceResource(t, db, database.WorkspaceResource{JobID: build.JobID}) + agent = dbgen.WorkspaceAgent(t, db, database.WorkspaceAgent{ResourceID: res.ID}) + app = dbgen.WorkspaceApp(t, db, database.WorkspaceApp{AgentID: agent.ID}) + ) // Stats inserted 6 months + 1 day ago, should be excluded. _ = dbgen.WorkspaceAgentStat(t, db, database.WorkspaceAgentStat{ @@ -126,29 +218,23 @@ func TestRollupTemplateUsageStats(t *testing.T) { _ = waps2 // Keep the name for documentation. // The data is already present, so we can rely on initial rollup to occur. - rolluper := dbrollup.New(logger, db, dbrollup.DefaultInterval) + events := make(chan dbrollup.Event, 1) + rolluper := dbrollup.New(logger, db, dbrollup.WithInterval(250*time.Millisecond), dbrollup.WithEventChannel(events)) defer rolluper.Close() - ctx, cancel := context.WithTimeout(context.Background(), testutil.WaitShort) - defer cancel() - testutil.Go(t, func() { - <-ctx.Done() - _ = rolluper.Close() - }) + ctx := testutil.Context(t, testutil.WaitMedium) + + select { + case <-ctx.Done(): + t.Fatal("timed out waiting for rollup to occur") + case ev := <-events: + require.True(t, ev.TemplateUsageStats, "expected template usage stats to be rolled up") + } - var stats []database.TemplateUsageStat - var err error - require.Eventually(t, func() bool { - stats, err = db.GetTemplateUsageStats(ctx, database.GetTemplateUsageStatsParams{ - StartTime: anHourAndSixMonthsAgo.Add(-time.Minute), - EndTime: anHourAgo, - }) - if err != nil { - // Stop looping on unexpected errors. - return !errors.Is(err, sql.ErrNoRows) - } - return len(stats) > 0 - }, testutil.WaitShort, testutil.IntervalFast) + stats, err := db.GetTemplateUsageStats(ctx, database.GetTemplateUsageStatsParams{ + StartTime: anHourAndSixMonthsAgo.Add(-time.Minute), + EndTime: anHourAgo, + }) require.NoError(t, err) require.Len(t, stats, 1)