@@ -124,45 +124,45 @@ func Agents(ctx context.Context, logger slog.Logger, registerer prometheus.Regis
124
124
duration = 1 * time .Minute
125
125
}
126
126
127
- agentsGauge := prometheus .NewGaugeVec (prometheus.GaugeOpts {
127
+ agentsGauge := NewCachedGaugeVec ( prometheus .NewGaugeVec (prometheus.GaugeOpts {
128
128
Namespace : "coderd" ,
129
129
Subsystem : "agents" ,
130
130
Name : "up" ,
131
131
Help : "The number of active agents per workspace." ,
132
- }, []string {"username" , "workspace_name" })
132
+ }, []string {"username" , "workspace_name" }))
133
133
err := registerer .Register (agentsGauge )
134
134
if err != nil {
135
135
return nil , err
136
136
}
137
137
138
- agentsConnectionsGauge := prometheus .NewGaugeVec (prometheus.GaugeOpts {
138
+ agentsConnectionsGauge := NewCachedGaugeVec ( prometheus .NewGaugeVec (prometheus.GaugeOpts {
139
139
Namespace : "coderd" ,
140
140
Subsystem : "agents" ,
141
141
Name : "connections" ,
142
142
Help : "Agent connections with statuses." ,
143
- }, []string {"agent_name" , "username" , "workspace_name" , "status" , "lifecycle_state" , "tailnet_node" })
143
+ }, []string {"agent_name" , "username" , "workspace_name" , "status" , "lifecycle_state" , "tailnet_node" }))
144
144
err = registerer .Register (agentsConnectionsGauge )
145
145
if err != nil {
146
146
return nil , err
147
147
}
148
148
149
- agentsConnectionLatenciesGauge := prometheus .NewGaugeVec (prometheus.GaugeOpts {
149
+ agentsConnectionLatenciesGauge := NewCachedGaugeVec ( prometheus .NewGaugeVec (prometheus.GaugeOpts {
150
150
Namespace : "coderd" ,
151
151
Subsystem : "agents" ,
152
152
Name : "connection_latencies_seconds" ,
153
153
Help : "Agent connection latencies in seconds." ,
154
- }, []string {"agent_id" , "username" , "workspace_name" , "derp_region" , "preferred" })
154
+ }, []string {"agent_id" , "username" , "workspace_name" , "derp_region" , "preferred" }))
155
155
err = registerer .Register (agentsConnectionLatenciesGauge )
156
156
if err != nil {
157
157
return nil , err
158
158
}
159
159
160
- agentsAppsGauge := prometheus .NewGaugeVec (prometheus.GaugeOpts {
160
+ agentsAppsGauge := NewCachedGaugeVec ( prometheus .NewGaugeVec (prometheus.GaugeOpts {
161
161
Namespace : "coderd" ,
162
162
Subsystem : "agents" ,
163
163
Name : "apps" ,
164
164
Help : "Agent applications with statuses." ,
165
- }, []string {"agent_name" , "username" , "workspace_name" , "app_name" , "health" })
165
+ }, []string {"agent_name" , "username" , "workspace_name" , "app_name" , "health" }))
166
166
err = registerer .Register (agentsAppsGauge )
167
167
if err != nil {
168
168
return nil , err
@@ -203,35 +203,30 @@ func Agents(ctx context.Context, logger slog.Logger, registerer prometheus.Regis
203
203
continue
204
204
}
205
205
206
- agentsGauge .Reset ()
207
- agentsConnectionsGauge .Reset ()
208
- agentsConnectionLatenciesGauge .Reset ()
209
- agentsAppsGauge .Reset ()
210
-
211
206
for _ , workspace := range workspaceRows {
212
207
user , err := db .GetUserByID (ctx , workspace .OwnerID )
213
208
if err != nil {
214
209
logger .Error (ctx , "can't get user" , slog .F ("user_id" , workspace .OwnerID ), slog .Error (err ))
215
- agentsGauge .WithLabelValues (user .Username , workspace .Name ). Add ( 0 )
210
+ agentsGauge .WithLabelValues (VectorOperationAdd , 0 , user .Username , workspace .Name )
216
211
continue
217
212
}
218
213
219
214
agents , err := db .GetWorkspaceAgentsInLatestBuildByWorkspaceID (ctx , workspace .ID )
220
215
if err != nil {
221
216
logger .Error (ctx , "can't get workspace agents" , slog .F ("workspace_id" , workspace .ID ), slog .Error (err ))
222
- agentsGauge .WithLabelValues (user .Username , workspace .Name ). Add ( 0 )
217
+ agentsGauge .WithLabelValues (VectorOperationAdd , 0 , user .Username , workspace .Name )
223
218
continue
224
219
}
225
220
226
221
if len (agents ) == 0 {
227
222
logger .Debug (ctx , "workspace agents are unavailable" , slog .F ("workspace_id" , workspace .ID ))
228
- agentsGauge .WithLabelValues (user .Username , workspace .Name ). Add ( 0 )
223
+ agentsGauge .WithLabelValues (VectorOperationAdd , 0 , user .Username , workspace .Name )
229
224
continue
230
225
}
231
226
232
227
for _ , agent := range agents {
233
228
// Collect information about agents
234
- agentsGauge .WithLabelValues (user .Username , workspace .Name ). Add ( 1 )
229
+ agentsGauge .WithLabelValues (VectorOperationAdd , 1 , user .Username , workspace .Name )
235
230
236
231
connectionStatus := agent .Status (agentInactiveDisconnectTimeout )
237
232
node := (* coordinator .Load ()).Node (agent .ID )
@@ -241,7 +236,7 @@ func Agents(ctx context.Context, logger slog.Logger, registerer prometheus.Regis
241
236
tailnetNode = node .ID .String ()
242
237
}
243
238
244
- agentsConnectionsGauge .WithLabelValues (agent .Name , user .Username , workspace .Name , string (connectionStatus .Status ), string (agent .LifecycleState ), tailnetNode ). Set ( 1 )
239
+ agentsConnectionsGauge .WithLabelValues (VectorOperationSet , 1 , agent .Name , user .Username , workspace .Name , string (connectionStatus .Status ), string (agent .LifecycleState ), tailnetNode )
245
240
246
241
if node == nil {
247
242
logger .Debug (ctx , "can't read in-memory node for agent" , slog .F ("agent_id" , agent .ID ))
@@ -266,7 +261,7 @@ func Agents(ctx context.Context, logger slog.Logger, registerer prometheus.Regis
266
261
}
267
262
}
268
263
269
- agentsConnectionLatenciesGauge .WithLabelValues (agent .Name , user .Username , workspace .Name , region .RegionName , fmt .Sprintf ("%v" , node .PreferredDERP == regionID )). Set ( latency )
264
+ agentsConnectionLatenciesGauge .WithLabelValues (VectorOperationSet , latency , agent .Name , user .Username , workspace .Name , region .RegionName , fmt .Sprintf ("%v" , node .PreferredDERP == regionID ))
270
265
}
271
266
}
272
267
@@ -278,11 +273,16 @@ func Agents(ctx context.Context, logger slog.Logger, registerer prometheus.Regis
278
273
}
279
274
280
275
for _ , app := range apps {
281
- agentsAppsGauge .WithLabelValues (agent .Name , user .Username , workspace .Name , app .DisplayName , string (app .Health )). Add ( 1 )
276
+ agentsAppsGauge .WithLabelValues (VectorOperationAdd , 1 , agent .Name , user .Username , workspace .Name , app .DisplayName , string (app .Health ))
282
277
}
283
278
}
284
279
}
285
280
281
+ agentsGauge .Commit ()
282
+ agentsConnectionsGauge .Commit ()
283
+ agentsConnectionLatenciesGauge .Commit ()
284
+ agentsAppsGauge .Commit ()
285
+
286
286
logger .Debug (ctx , "Agent metrics collection is done" )
287
287
metricsCollectorAgents .Observe (timer .ObserveDuration ().Seconds ())
288
288
}
0 commit comments