Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 7998f89

Browse files
chore: handle missing datapoints
1 parent 1550cc6 commit 7998f89

File tree

3 files changed

+156
-137
lines changed

3 files changed

+156
-137
lines changed

coderd/agentapi/resources_monitoring.go

+16-53
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,6 @@ import (
55
"database/sql"
66
"errors"
77
"fmt"
8-
"slices"
98
"time"
109

1110
"golang.org/x/xerrors"
@@ -15,6 +14,7 @@ import (
1514
"github.com/google/uuid"
1615

1716
"github.com/coder/coder/v2/agent/proto"
17+
"github.com/coder/coder/v2/coderd/agentapi/resourcesmonitor"
1818
"github.com/coder/coder/v2/coderd/database"
1919
"github.com/coder/coder/v2/coderd/database/dbauthz"
2020
"github.com/coder/coder/v2/coderd/database/dbtime"
@@ -122,7 +122,7 @@ func (a *ResourcesMonitoringAPI) monitorMemory(ctx context.Context, datapoints [
122122
usageDatapoints = append(usageDatapoints, datapoint.Memory)
123123
}
124124

125-
usageStates := calculateMemoryUsageStates(monitor, usageDatapoints)
125+
usageStates := resourcesmonitor.CalculateMemoryUsageStates(monitor, usageDatapoints)
126126

127127
oldState := monitor.State
128128
newState := a.calculateNextState(oldState, usageStates)
@@ -198,7 +198,7 @@ func (a *ResourcesMonitoringAPI) monitorVolumes(ctx context.Context, datapoints
198198
return VolumeNotFoundError{Volume: monitor.Path}
199199
}
200200

201-
usageStates := calculateVolumeUsageStates(monitor, datapoints)
201+
usageStates := resourcesmonitor.CalculateVolumeUsageStates(monitor, datapoints)
202202

203203
oldState := monitor.State
204204
newState := a.calculateNextState(oldState, usageStates)
@@ -256,19 +256,22 @@ func (a *ResourcesMonitoringAPI) monitorVolumes(ctx context.Context, datapoints
256256

257257
func (a *ResourcesMonitoringAPI) calculateNextState(
258258
oldState database.WorkspaceAgentMonitorState,
259-
states []database.WorkspaceAgentMonitorState,
259+
states []resourcesmonitor.State,
260260
) database.WorkspaceAgentMonitorState {
261-
// If we do not have an OK in the last `X` datapoints, then we are
262-
// in an alert state.
263-
lastXStates := states[max(len(states)-a.ConsecutiveNOKsToAlert, 0):]
264-
if !slices.Contains(lastXStates, database.WorkspaceAgentMonitorStateOK) {
261+
// If there are enough consecutive NOK states, we should be in an
262+
// alert state.
263+
consecutiveNOKs := resourcesmonitor.CalculateConsecutiveNOK(states)
264+
if consecutiveNOKs >= a.ConsecutiveNOKsToAlert {
265265
return database.WorkspaceAgentMonitorStateNOK
266266
}
267267

268-
nokCount := 0
268+
nokCount, okCount := 0, 0
269269
for _, state := range states {
270-
if state == database.WorkspaceAgentMonitorStateNOK {
271-
nokCount++
270+
switch state {
271+
case resourcesmonitor.StateOK:
272+
okCount += 1
273+
case resourcesmonitor.StateNOK:
274+
nokCount += 1
272275
}
273276
}
274277

@@ -277,51 +280,11 @@ func (a *ResourcesMonitoringAPI) calculateNextState(
277280
return database.WorkspaceAgentMonitorStateNOK
278281
}
279282

280-
// If there are no NOK datapoints, we should be in an OK state.
281-
if nokCount == 0 {
283+
// If all datapoints are OK, we should be in an OK state
284+
if okCount == len(states) {
282285
return database.WorkspaceAgentMonitorStateOK
283286
}
284287

285288
// Otherwise we stay in the same state as last.
286289
return oldState
287290
}
288-
289-
func calculateMemoryUsageStates(
290-
monitor database.WorkspaceAgentMemoryResourceMonitor,
291-
datapoints []*proto.PushResourcesMonitoringUsageRequest_Datapoint_MemoryUsage,
292-
) []database.WorkspaceAgentMonitorState {
293-
states := make([]database.WorkspaceAgentMonitorState, 0, len(datapoints))
294-
295-
for _, datapoint := range datapoints {
296-
percent := int32(float64(datapoint.Used) / float64(datapoint.Total) * 100)
297-
298-
state := database.WorkspaceAgentMonitorStateOK
299-
if percent >= monitor.Threshold {
300-
state = database.WorkspaceAgentMonitorStateNOK
301-
}
302-
303-
states = append(states, state)
304-
}
305-
306-
return states
307-
}
308-
309-
func calculateVolumeUsageStates(
310-
monitor database.WorkspaceAgentVolumeResourceMonitor,
311-
datapoints []*proto.PushResourcesMonitoringUsageRequest_Datapoint_VolumeUsage,
312-
) []database.WorkspaceAgentMonitorState {
313-
states := make([]database.WorkspaceAgentMonitorState, 0, len(datapoints))
314-
315-
for _, datapoint := range datapoints {
316-
percent := int32(float64(datapoint.Used) / float64(datapoint.Total) * 100)
317-
318-
state := database.WorkspaceAgentMonitorStateOK
319-
if percent >= monitor.Threshold {
320-
state = database.WorkspaceAgentMonitorStateNOK
321-
}
322-
323-
states = append(states, state)
324-
}
325-
326-
return states
327-
}

coderd/agentapi/resources_monitoring_test.go

+60-84
Original file line numberDiff line numberDiff line change
@@ -84,6 +84,7 @@ func TestMemoryResourceMonitorDebounce(t *testing.T) {
8484
// 5. OK -> NOK |> sends a notification as debounce period exceeded
8585

8686
api, user, clock, notifyEnq := resourceMonitorAPI(t)
87+
api.ConsecutiveNOKsToAlert = 1
8788

8889
// Given: A monitor in an OK state
8990
dbgen.WorkspaceAgentMemoryResourceMonitor(t, api.Database, database.WorkspaceAgentMemoryResourceMonitor{
@@ -197,103 +198,76 @@ func TestMemoryResourceMonitor(t *testing.T) {
197198
t.Parallel()
198199

199200
tests := []struct {
200-
name string
201-
memoryUsage []int64
202-
memoryTotal int64
203-
thresholdPercent int32
204-
minimumNOKs int
205-
consecutiveNOKs int
206-
previousState database.WorkspaceAgentMonitorState
207-
expectState database.WorkspaceAgentMonitorState
208-
shouldNotify bool
201+
name string
202+
memoryUsage []int64
203+
memoryTotal int64
204+
previousState database.WorkspaceAgentMonitorState
205+
expectState database.WorkspaceAgentMonitorState
206+
shouldNotify bool
209207
}{
210208
{
211-
name: "WhenOK/NeverExceedsThreshold",
212-
memoryUsage: []int64{2, 3, 2, 4, 2, 3, 2, 1, 2, 3, 4, 4, 1, 2, 3, 1, 2},
213-
memoryTotal: 10,
214-
thresholdPercent: 80,
215-
consecutiveNOKs: 4,
216-
minimumNOKs: 10,
217-
previousState: database.WorkspaceAgentMonitorStateOK,
218-
expectState: database.WorkspaceAgentMonitorStateOK,
219-
shouldNotify: false,
209+
name: "WhenOK/NeverExceedsThreshold",
210+
memoryUsage: []int64{2, 3, 2, 4, 2, 3, 2, 1, 2, 3, 4, 4, 1, 2, 3, 1, 2},
211+
memoryTotal: 10,
212+
previousState: database.WorkspaceAgentMonitorStateOK,
213+
expectState: database.WorkspaceAgentMonitorStateOK,
214+
shouldNotify: false,
220215
},
221216
{
222-
name: "WhenOK/ShouldStayInOK",
223-
memoryUsage: []int64{9, 3, 2, 4, 2, 3, 2, 1, 2, 3, 4, 4, 1, 2, 3, 1, 2},
224-
memoryTotal: 10,
225-
thresholdPercent: 80,
226-
consecutiveNOKs: 4,
227-
minimumNOKs: 10,
228-
previousState: database.WorkspaceAgentMonitorStateOK,
229-
expectState: database.WorkspaceAgentMonitorStateOK,
230-
shouldNotify: false,
217+
name: "WhenOK/ShouldStayInOK",
218+
memoryUsage: []int64{9, 3, 2, 4, 2, 3, 2, 1, 2, 3, 4, 4, 1, 2, 3, 1, 2},
219+
memoryTotal: 10,
220+
previousState: database.WorkspaceAgentMonitorStateOK,
221+
expectState: database.WorkspaceAgentMonitorStateOK,
222+
shouldNotify: false,
231223
},
232224
{
233-
name: "WhenOK/ConsecutiveExceedsThreshold",
234-
memoryUsage: []int64{2, 3, 2, 4, 2, 3, 2, 1, 2, 3, 4, 4, 1, 8, 9, 8, 9},
235-
memoryTotal: 10,
236-
thresholdPercent: 80,
237-
consecutiveNOKs: 4,
238-
minimumNOKs: 10,
239-
previousState: database.WorkspaceAgentMonitorStateOK,
240-
expectState: database.WorkspaceAgentMonitorStateNOK,
241-
shouldNotify: true,
225+
name: "WhenOK/ConsecutiveExceedsThreshold",
226+
memoryUsage: []int64{2, 3, 2, 4, 2, 3, 2, 1, 2, 3, 4, 4, 1, 8, 9, 8, 9},
227+
memoryTotal: 10,
228+
previousState: database.WorkspaceAgentMonitorStateOK,
229+
expectState: database.WorkspaceAgentMonitorStateNOK,
230+
shouldNotify: true,
242231
},
243232
{
244-
name: "WhenOK/MinimumExceedsThreshold",
245-
memoryUsage: []int64{2, 8, 2, 9, 2, 8, 2, 9, 2, 8, 4, 9, 1, 8, 2, 8, 9},
246-
memoryTotal: 10,
247-
thresholdPercent: 80,
248-
minimumNOKs: 4,
249-
consecutiveNOKs: 10,
250-
previousState: database.WorkspaceAgentMonitorStateOK,
251-
expectState: database.WorkspaceAgentMonitorStateNOK,
252-
shouldNotify: true,
233+
name: "WhenOK/MinimumExceedsThreshold",
234+
memoryUsage: []int64{2, 8, 2, 9, 2, 8, 2, 9, 2, 8, 4, 9, 1, 8, 2, 8, 9},
235+
memoryTotal: 10,
236+
previousState: database.WorkspaceAgentMonitorStateOK,
237+
expectState: database.WorkspaceAgentMonitorStateNOK,
238+
shouldNotify: true,
253239
},
254240
{
255-
name: "WhenNOK/NeverExceedsThreshold",
256-
memoryUsage: []int64{2, 3, 2, 4, 2, 3, 2, 1, 2, 3, 4, 4, 1, 2, 3, 1, 2},
257-
memoryTotal: 10,
258-
thresholdPercent: 80,
259-
consecutiveNOKs: 4,
260-
minimumNOKs: 10,
261-
previousState: database.WorkspaceAgentMonitorStateNOK,
262-
expectState: database.WorkspaceAgentMonitorStateOK,
263-
shouldNotify: false,
241+
name: "WhenNOK/NeverExceedsThreshold",
242+
memoryUsage: []int64{2, 3, 2, 4, 2, 3, 2, 1, 2, 3, 4, 4, 1, 2, 3, 1, 2},
243+
memoryTotal: 10,
244+
previousState: database.WorkspaceAgentMonitorStateNOK,
245+
expectState: database.WorkspaceAgentMonitorStateOK,
246+
shouldNotify: false,
264247
},
265248
{
266-
name: "WhenNOK/ShouldStayInNOK",
267-
memoryUsage: []int64{9, 3, 2, 4, 2, 3, 2, 1, 2, 3, 4, 4, 1, 2, 3, 1, 2},
268-
memoryTotal: 10,
269-
thresholdPercent: 80,
270-
consecutiveNOKs: 4,
271-
minimumNOKs: 10,
272-
previousState: database.WorkspaceAgentMonitorStateNOK,
273-
expectState: database.WorkspaceAgentMonitorStateNOK,
274-
shouldNotify: false,
249+
name: "WhenNOK/ShouldStayInNOK",
250+
memoryUsage: []int64{9, 3, 2, 4, 2, 3, 2, 1, 2, 3, 4, 4, 1, 2, 3, 1, 2},
251+
memoryTotal: 10,
252+
previousState: database.WorkspaceAgentMonitorStateNOK,
253+
expectState: database.WorkspaceAgentMonitorStateNOK,
254+
shouldNotify: false,
275255
},
276256
{
277-
name: "WhenNOK/ConsecutiveExceedsThreshold",
278-
memoryUsage: []int64{2, 3, 2, 4, 2, 3, 2, 1, 2, 3, 4, 4, 1, 8, 9, 8, 9},
279-
memoryTotal: 10,
280-
thresholdPercent: 80,
281-
consecutiveNOKs: 4,
282-
minimumNOKs: 10,
283-
previousState: database.WorkspaceAgentMonitorStateNOK,
284-
expectState: database.WorkspaceAgentMonitorStateNOK,
285-
shouldNotify: false,
257+
name: "WhenNOK/ConsecutiveExceedsThreshold",
258+
memoryUsage: []int64{2, 3, 2, 4, 2, 3, 2, 1, 2, 3, 4, 4, 1, 8, 9, 8, 9},
259+
memoryTotal: 10,
260+
previousState: database.WorkspaceAgentMonitorStateNOK,
261+
expectState: database.WorkspaceAgentMonitorStateNOK,
262+
shouldNotify: false,
286263
},
287264
{
288-
name: "WhenNOK/MinimumExceedsThreshold",
289-
memoryUsage: []int64{2, 8, 2, 9, 2, 8, 2, 9, 2, 8, 4, 9, 1, 8, 2, 8, 9},
290-
memoryTotal: 10,
291-
thresholdPercent: 80,
292-
minimumNOKs: 4,
293-
consecutiveNOKs: 10,
294-
previousState: database.WorkspaceAgentMonitorStateNOK,
295-
expectState: database.WorkspaceAgentMonitorStateNOK,
296-
shouldNotify: false,
265+
name: "WhenNOK/MinimumExceedsThreshold",
266+
memoryUsage: []int64{2, 8, 2, 9, 2, 8, 2, 9, 2, 8, 4, 9, 1, 8, 2, 8, 9},
267+
memoryTotal: 10,
268+
previousState: database.WorkspaceAgentMonitorStateNOK,
269+
expectState: database.WorkspaceAgentMonitorStateNOK,
270+
shouldNotify: false,
297271
},
298272
}
299273

@@ -304,8 +278,8 @@ func TestMemoryResourceMonitor(t *testing.T) {
304278
t.Parallel()
305279

306280
api, user, clock, notifyEnq := resourceMonitorAPI(t)
307-
api.MinimumNOKsToAlert = tt.minimumNOKs
308-
api.ConsecutiveNOKsToAlert = tt.consecutiveNOKs
281+
api.MinimumNOKsToAlert = 4
282+
api.ConsecutiveNOKsToAlert = 10
309283

310284
datapoints := make([]*agentproto.PushResourcesMonitoringUsageRequest_Datapoint, 0, len(tt.memoryUsage))
311285
collectedAt := clock.Now()
@@ -323,7 +297,7 @@ func TestMemoryResourceMonitor(t *testing.T) {
323297
dbgen.WorkspaceAgentMemoryResourceMonitor(t, api.Database, database.WorkspaceAgentMemoryResourceMonitor{
324298
AgentID: api.AgentID,
325299
State: tt.previousState,
326-
Threshold: tt.thresholdPercent,
300+
Threshold: 80,
327301
})
328302

329303
clock.Set(collectedAt)
@@ -373,6 +347,7 @@ func TestVolumeResourceMonitorDebounce(t *testing.T) {
373347
secondVolumePath := "/dev/coder"
374348

375349
api, _, clock, notifyEnq := resourceMonitorAPI(t)
350+
api.MinimumNOKsToAlert = 1
376351

377352
// Given:
378353
// - First monitor in an OK state
@@ -709,6 +684,7 @@ func TestVolumeResourceMonitorMultiple(t *testing.T) {
709684
t.Parallel()
710685

711686
api, _, clock, notifyEnq := resourceMonitorAPI(t)
687+
api.ConsecutiveNOKsToAlert = 1
712688

713689
// Given: two different volume resource monitors
714690
dbgen.WorkspaceAgentVolumeResourceMonitor(t, api.Database, database.WorkspaceAgentVolumeResourceMonitor{

0 commit comments

Comments
 (0)