Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit e0483e3

Browse files
authored
feat: add prebuilds metrics collector (#17547)
Closes coder/internal#509 --------- Signed-off-by: Danny Kopping <[email protected]>
1 parent b47d54d commit e0483e3

File tree

7 files changed

+548
-26
lines changed

7 files changed

+548
-26
lines changed

coderd/prebuilds/api.go

+13
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,8 @@ import (
55

66
"github.com/google/uuid"
77
"golang.org/x/xerrors"
8+
9+
"github.com/coder/coder/v2/coderd/database"
810
)
911

1012
var ErrNoClaimablePrebuiltWorkspaces = xerrors.New("no claimable prebuilt workspaces found")
@@ -25,12 +27,23 @@ type ReconciliationOrchestrator interface {
2527
}
2628

2729
type Reconciler interface {
30+
StateSnapshotter
31+
2832
// ReconcileAll orchestrates the reconciliation of all prebuilds across all templates.
2933
// It takes a global snapshot of the system state and then reconciles each preset
3034
// in parallel, creating or deleting prebuilds as needed to reach their desired states.
3135
ReconcileAll(ctx context.Context) error
3236
}
3337

38+
// StateSnapshotter defines the operations necessary to capture workspace prebuilds state.
39+
type StateSnapshotter interface {
40+
// SnapshotState captures the current state of all prebuilds across templates.
41+
// It creates a global database snapshot that can be viewed as a collection of PresetSnapshots,
42+
// each representing the state of prebuilds for a specific preset.
43+
// MUST be called inside a repeatable-read transaction.
44+
SnapshotState(ctx context.Context, store database.Store) (*GlobalSnapshot, error)
45+
}
46+
3447
type Claimer interface {
3548
Claim(ctx context.Context, userID uuid.UUID, name string, presetID uuid.UUID) (*uuid.UUID, error)
3649
Initiator() uuid.UUID

enterprise/coderd/coderd.go

+1-1
Original file line numberDiff line numberDiff line change
@@ -1165,6 +1165,6 @@ func (api *API) setupPrebuilds(featureEnabled bool) (agplprebuilds.Reconciliatio
11651165
}
11661166

11671167
reconciler := prebuilds.NewStoreReconciler(api.Database, api.Pubsub, api.DeploymentValues.Prebuilds,
1168-
api.Logger.Named("prebuilds"), quartz.NewReal())
1168+
api.Logger.Named("prebuilds"), quartz.NewReal(), api.PrometheusRegistry)
11691169
return reconciler, prebuilds.EnterpriseClaimer{}
11701170
}

enterprise/coderd/prebuilds/claim_test.go

+3-2
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ import (
1010
"time"
1111

1212
"github.com/google/uuid"
13+
"github.com/prometheus/client_golang/prometheus"
1314
"github.com/stretchr/testify/require"
1415
"golang.org/x/xerrors"
1516

@@ -142,7 +143,7 @@ func TestClaimPrebuild(t *testing.T) {
142143
EntitlementsUpdateInterval: time.Second,
143144
})
144145

145-
reconciler := prebuilds.NewStoreReconciler(spy, pubsub, codersdk.PrebuildsConfig{}, logger, quartz.NewMock(t))
146+
reconciler := prebuilds.NewStoreReconciler(spy, pubsub, codersdk.PrebuildsConfig{}, logger, quartz.NewMock(t), prometheus.NewRegistry())
146147
var claimer agplprebuilds.Claimer = prebuilds.NewEnterpriseClaimer(spy)
147148
api.AGPL.PrebuildsClaimer.Store(&claimer)
148149

@@ -419,7 +420,7 @@ func TestClaimPrebuild_CheckDifferentErrors(t *testing.T) {
419420
EntitlementsUpdateInterval: time.Second,
420421
})
421422

422-
reconciler := prebuilds.NewStoreReconciler(errorStore, pubsub, codersdk.PrebuildsConfig{}, logger, quartz.NewMock(t))
423+
reconciler := prebuilds.NewStoreReconciler(errorStore, pubsub, codersdk.PrebuildsConfig{}, logger, quartz.NewMock(t), api.PrometheusRegistry)
423424
var claimer agplprebuilds.Claimer = prebuilds.NewEnterpriseClaimer(errorStore)
424425
api.AGPL.PrebuildsClaimer.Store(&claimer)
425426

Original file line numberDiff line numberDiff line change
@@ -0,0 +1,123 @@
1+
package prebuilds
2+
3+
import (
4+
"context"
5+
"time"
6+
7+
"cdr.dev/slog"
8+
9+
"github.com/prometheus/client_golang/prometheus"
10+
11+
"github.com/coder/coder/v2/coderd/database"
12+
"github.com/coder/coder/v2/coderd/database/dbauthz"
13+
"github.com/coder/coder/v2/coderd/prebuilds"
14+
)
15+
16+
var (
17+
labels = []string{"template_name", "preset_name", "organization_name"}
18+
createdPrebuildsDesc = prometheus.NewDesc(
19+
"coderd_prebuilt_workspaces_created_total",
20+
"Total number of prebuilt workspaces that have been created to meet the desired instance count of each "+
21+
"template preset.",
22+
labels,
23+
nil,
24+
)
25+
failedPrebuildsDesc = prometheus.NewDesc(
26+
"coderd_prebuilt_workspaces_failed_total",
27+
"Total number of prebuilt workspaces that failed to build.",
28+
labels,
29+
nil,
30+
)
31+
claimedPrebuildsDesc = prometheus.NewDesc(
32+
"coderd_prebuilt_workspaces_claimed_total",
33+
"Total number of prebuilt workspaces which were claimed by users. Claiming refers to creating a workspace "+
34+
"with a preset selected for which eligible prebuilt workspaces are available and one is reassigned to a user.",
35+
labels,
36+
nil,
37+
)
38+
desiredPrebuildsDesc = prometheus.NewDesc(
39+
"coderd_prebuilt_workspaces_desired",
40+
"Target number of prebuilt workspaces that should be available for each template preset.",
41+
labels,
42+
nil,
43+
)
44+
runningPrebuildsDesc = prometheus.NewDesc(
45+
"coderd_prebuilt_workspaces_running",
46+
"Current number of prebuilt workspaces that are in a running state. These workspaces have started "+
47+
"successfully but may not yet be claimable by users (see coderd_prebuilt_workspaces_eligible).",
48+
labels,
49+
nil,
50+
)
51+
eligiblePrebuildsDesc = prometheus.NewDesc(
52+
"coderd_prebuilt_workspaces_eligible",
53+
"Current number of prebuilt workspaces that are eligible to be claimed by users. These are workspaces that "+
54+
"have completed their build process with their agent reporting 'ready' status.",
55+
labels,
56+
nil,
57+
)
58+
)
59+
60+
type MetricsCollector struct {
61+
database database.Store
62+
logger slog.Logger
63+
snapshotter prebuilds.StateSnapshotter
64+
}
65+
66+
var _ prometheus.Collector = new(MetricsCollector)
67+
68+
func NewMetricsCollector(db database.Store, logger slog.Logger, snapshotter prebuilds.StateSnapshotter) *MetricsCollector {
69+
return &MetricsCollector{
70+
database: db,
71+
logger: logger.Named("prebuilds_metrics_collector"),
72+
snapshotter: snapshotter,
73+
}
74+
}
75+
76+
func (*MetricsCollector) Describe(descCh chan<- *prometheus.Desc) {
77+
descCh <- createdPrebuildsDesc
78+
descCh <- failedPrebuildsDesc
79+
descCh <- claimedPrebuildsDesc
80+
descCh <- desiredPrebuildsDesc
81+
descCh <- runningPrebuildsDesc
82+
descCh <- eligiblePrebuildsDesc
83+
}
84+
85+
func (mc *MetricsCollector) Collect(metricsCh chan<- prometheus.Metric) {
86+
// nolint:gocritic // We need to set an authz context to read metrics from the db.
87+
ctx, cancel := context.WithTimeout(dbauthz.AsPrebuildsOrchestrator(context.Background()), 10*time.Second)
88+
defer cancel()
89+
prebuildMetrics, err := mc.database.GetPrebuildMetrics(ctx)
90+
if err != nil {
91+
mc.logger.Error(ctx, "failed to get prebuild metrics", slog.Error(err))
92+
return
93+
}
94+
95+
for _, metric := range prebuildMetrics {
96+
metricsCh <- prometheus.MustNewConstMetric(createdPrebuildsDesc, prometheus.CounterValue, float64(metric.CreatedCount), metric.TemplateName, metric.PresetName, metric.OrganizationName)
97+
metricsCh <- prometheus.MustNewConstMetric(failedPrebuildsDesc, prometheus.CounterValue, float64(metric.FailedCount), metric.TemplateName, metric.PresetName, metric.OrganizationName)
98+
metricsCh <- prometheus.MustNewConstMetric(claimedPrebuildsDesc, prometheus.CounterValue, float64(metric.ClaimedCount), metric.TemplateName, metric.PresetName, metric.OrganizationName)
99+
}
100+
101+
snapshot, err := mc.snapshotter.SnapshotState(ctx, mc.database)
102+
if err != nil {
103+
mc.logger.Error(ctx, "failed to get latest prebuild state", slog.Error(err))
104+
return
105+
}
106+
107+
for _, preset := range snapshot.Presets {
108+
if !preset.UsingActiveVersion {
109+
continue
110+
}
111+
112+
presetSnapshot, err := snapshot.FilterByPreset(preset.ID)
113+
if err != nil {
114+
mc.logger.Error(ctx, "failed to filter by preset", slog.Error(err))
115+
continue
116+
}
117+
state := presetSnapshot.CalculateState()
118+
119+
metricsCh <- prometheus.MustNewConstMetric(desiredPrebuildsDesc, prometheus.GaugeValue, float64(state.Desired), preset.TemplateName, preset.Name, preset.OrganizationName)
120+
metricsCh <- prometheus.MustNewConstMetric(runningPrebuildsDesc, prometheus.GaugeValue, float64(state.Actual), preset.TemplateName, preset.Name, preset.OrganizationName)
121+
metricsCh <- prometheus.MustNewConstMetric(eligiblePrebuildsDesc, prometheus.GaugeValue, float64(state.Eligible), preset.TemplateName, preset.Name, preset.OrganizationName)
122+
}
123+
}

0 commit comments

Comments
 (0)