Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 663b5d5

Browse files
committed
fix
1 parent 8764f89 commit 663b5d5

File tree

1 file changed

+57
-36
lines changed

1 file changed

+57
-36
lines changed

coderd/prometheusmetrics/prometheusmetrics.go

Lines changed: 57 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -122,24 +122,24 @@ func Agents(ctx context.Context, logger slog.Logger, registerer prometheus.Regis
122122
duration = 15 * time.Second // TODO 5 * time.Minute
123123
}
124124

125-
workspaceAgentsGauge := prometheus.NewGaugeVec(prometheus.GaugeOpts{
125+
agentsGauge := prometheus.NewGaugeVec(prometheus.GaugeOpts{
126126
Namespace: "coderd",
127127
Subsystem: "agents",
128128
Name: "up",
129129
Help: "The number of active agents per workspace.",
130130
}, []string{"username", "workspace_name"})
131-
err := registerer.Register(workspaceAgentsGauge)
131+
err := registerer.Register(agentsGauge)
132132
if err != nil {
133133
return nil, err
134134
}
135135

136-
agentsConnectionGauge := prometheus.NewGaugeVec(prometheus.GaugeOpts{
136+
agentsConnectionsGauge := prometheus.NewGaugeVec(prometheus.GaugeOpts{
137137
Namespace: "coderd",
138138
Subsystem: "agents",
139139
Name: "connections",
140140
Help: "Agent connections with statuses.",
141141
}, []string{"agent_name", "username", "workspace_name", "status", "lifecycle_state", "tailnet_node"})
142-
err = registerer.Register(agentsConnectionGauge)
142+
err = registerer.Register(agentsConnectionsGauge)
143143
if err != nil {
144144
return nil, err
145145
}
@@ -155,6 +155,17 @@ func Agents(ctx context.Context, logger slog.Logger, registerer prometheus.Regis
155155
return nil, err
156156
}
157157

158+
agentsAppsGauge := prometheus.NewGaugeVec(prometheus.GaugeOpts{
159+
Namespace: "coderd",
160+
Subsystem: "agents",
161+
Name: "apps",
162+
Help: "Agent applications with statuses.",
163+
}, []string{"agent_name", "username", "workspace_name", "app_name", "health"})
164+
err = registerer.Register(agentsAppsGauge)
165+
if err != nil {
166+
return nil, err
167+
}
168+
158169
// nolint:gocritic // Prometheus must collect metrics for all Coder users.
159170
ctx, cancelFunc := context.WithCancel(dbauthz.AsSystemRestricted(ctx))
160171
ticker := time.NewTicker(duration)
@@ -167,7 +178,7 @@ func Agents(ctx context.Context, logger slog.Logger, registerer prometheus.Regis
167178
case <-ticker.C:
168179
}
169180

170-
logger.Info(ctx, "Collect agent metrics now")
181+
logger.Debug(ctx, "Collect agent metrics now")
171182

172183
workspaceRows, err := db.GetWorkspaces(ctx, database.GetWorkspacesParams{
173184
AgentInactiveDisconnectTimeoutSeconds: int64(agentInactiveDisconnectTimeout.Seconds()),
@@ -177,34 +188,35 @@ func Agents(ctx context.Context, logger slog.Logger, registerer prometheus.Regis
177188
continue
178189
}
179190

180-
workspaceAgentsGauge.Reset()
181-
agentsConnectionGauge.Reset()
191+
agentsGauge.Reset()
192+
agentsConnectionsGauge.Reset()
182193
agentsConnectionLatenciesGauge.Reset()
194+
agentsAppsGauge.Reset()
183195

184196
for _, workspace := range workspaceRows {
185197
user, err := db.GetUserByID(ctx, workspace.OwnerID)
186198
if err != nil {
187-
logger.Error(ctx, "can't get user", slog.Error(err), slog.F("user_id", workspace.OwnerID))
188-
workspaceAgentsGauge.WithLabelValues(user.Username, workspace.Name).Add(0)
199+
logger.Error(ctx, "can't get user", slog.F("user_id", workspace.OwnerID), slog.Error(err))
200+
agentsGauge.WithLabelValues(user.Username, workspace.Name).Add(0)
189201
continue
190202
}
191203

192204
agents, err := db.GetWorkspaceAgentsInLatestBuildByWorkspaceID(ctx, workspace.ID)
193205
if err != nil {
194-
logger.Error(ctx, "can't get workspace agents", slog.F("workspace_name", workspace.Name), slog.Error(err))
195-
workspaceAgentsGauge.WithLabelValues(user.Username, workspace.Name).Add(0)
206+
logger.Error(ctx, "can't get workspace agents", slog.F("workspace_id", workspace.ID), slog.Error(err))
207+
agentsGauge.WithLabelValues(user.Username, workspace.Name).Add(0)
196208
continue
197209
}
198210

199211
if len(agents) == 0 {
200-
logger.Info(ctx, "workspace agents are unavailable", slog.F("workspace_name", workspace.Name))
201-
workspaceAgentsGauge.WithLabelValues(user.Username, workspace.Name).Add(0)
212+
logger.Debug(ctx, "workspace agents are unavailable", slog.F("workspace_id", workspace.ID))
213+
agentsGauge.WithLabelValues(user.Username, workspace.Name).Add(0)
202214
continue
203215
}
204216

205217
for _, agent := range agents {
206218
// Collect information about agents
207-
workspaceAgentsGauge.WithLabelValues(user.Username, workspace.Name).Add(1)
219+
agentsGauge.WithLabelValues(user.Username, workspace.Name).Add(1)
208220

209221
connectionStatus := agent.Status(agentInactiveDisconnectTimeout)
210222
node := (*coordinator.Load()).Node(agent.ID)
@@ -214,37 +226,46 @@ func Agents(ctx context.Context, logger slog.Logger, registerer prometheus.Regis
214226
tailnetNode = node.ID.String()
215227
}
216228

217-
agentsConnectionGauge.WithLabelValues(agent.Name, user.Username, workspace.Name, string(connectionStatus.Status), string(agent.LifecycleState), tailnetNode).Set(1)
229+
agentsConnectionsGauge.WithLabelValues(agent.Name, user.Username, workspace.Name, string(connectionStatus.Status), string(agent.LifecycleState), tailnetNode).Set(1)
218230

219231
if node == nil {
220-
logger.Info(ctx, "can't read in-memory node for agent", slog.F("workspace_name", workspace.Name), slog.F("agent_name", agent.Name))
232+
logger.Debug(ctx, "can't read in-memory node for agent", slog.F("agent_id", agent.ID))
221233
continue
222-
}
234+
} else {
235+
// Collect information about connection latencies
236+
for rawRegion, latency := range node.DERPLatency {
237+
regionParts := strings.SplitN(rawRegion, "-", 2)
238+
regionID, err := strconv.Atoi(regionParts[0])
239+
if err != nil {
240+
logger.Error(ctx, "can't convert DERP region", slog.F("agent_id", agent.ID), slog.F("raw_region", rawRegion), slog.Error(err))
241+
continue
242+
}
223243

224-
// Collect information about connection latencies
225-
for rawRegion, latency := range node.DERPLatency {
226-
regionParts := strings.SplitN(rawRegion, "-", 2)
227-
regionID, err := strconv.Atoi(regionParts[0])
228-
if err != nil {
229-
logger.Error(ctx, "can't convert DERP region", slog.Error(err), slog.F("agent_name", agent.Name), slog.F("raw_region", rawRegion))
230-
continue
231-
}
232-
region, found := derpMap.Regions[regionID]
233-
if !found {
234-
// It's possible that a workspace agent is using an old DERPMap
235-
// and reports regions that do not exist. If that's the case,
236-
// report the region as unknown!
237-
region = &tailcfg.DERPRegion{
238-
RegionID: regionID,
239-
RegionName: fmt.Sprintf("Unnamed %d", regionID),
244+
region, found := derpMap.Regions[regionID]
245+
if !found {
246+
// It's possible that a workspace agent is using an old DERPMap
247+
// and reports regions that do not exist. If that's the case,
248+
// report the region as unknown!
249+
region = &tailcfg.DERPRegion{
250+
RegionID: regionID,
251+
RegionName: fmt.Sprintf("Unnamed %d", regionID),
252+
}
240253
}
254+
255+
agentsConnectionLatenciesGauge.WithLabelValues(agent.Name, user.Username, workspace.Name, region.RegionName, fmt.Sprintf("%v", node.PreferredDERP == regionID)).Set(latency)
241256
}
257+
}
242258

243-
agentsConnectionLatenciesGauge.WithLabelValues(agent.Name, user.Username, workspace.Name, region.RegionName, fmt.Sprintf("%v", node.PreferredDERP == regionID)).Set(latency)
259+
// Collect information about registered applications
260+
apps, err := db.GetWorkspaceAppsByAgentID(ctx, agent.ID)
261+
if err != nil {
262+
logger.Error(ctx, "can't get workspace apps", slog.F("agent_id", agent.ID), slog.Error(err))
263+
continue
244264
}
245265

246-
// FIXME IDE?
247-
// FIXME connection_type ide
266+
for _, app := range apps {
267+
agentsAppsGauge.WithLabelValues(agent.Name, user.Username, workspace.Name, app.DisplayName, string(app.Health)).Add(1)
268+
}
248269
}
249270
}
250271
}

0 commit comments

Comments
 (0)