diff --git a/coderd/agentapi/api.go b/coderd/agentapi/api.go index 7f9fda63cb98c..3922dfc4bcad0 100644 --- a/coderd/agentapi/api.go +++ b/coderd/agentapi/api.go @@ -17,10 +17,12 @@ import ( "cdr.dev/slog" agentproto "github.com/coder/coder/v2/agent/proto" + "github.com/coder/coder/v2/coderd/agentapi/resourcesmonitor" "github.com/coder/coder/v2/coderd/appearance" "github.com/coder/coder/v2/coderd/database" "github.com/coder/coder/v2/coderd/database/pubsub" "github.com/coder/coder/v2/coderd/externalauth" + "github.com/coder/coder/v2/coderd/notifications" "github.com/coder/coder/v2/coderd/prometheusmetrics" "github.com/coder/coder/v2/coderd/tracing" "github.com/coder/coder/v2/coderd/workspacestats" @@ -29,6 +31,7 @@ import ( "github.com/coder/coder/v2/codersdk/agentsdk" "github.com/coder/coder/v2/tailnet" tailnetproto "github.com/coder/coder/v2/tailnet/proto" + "github.com/coder/quartz" ) // API implements the DRPC agent API interface from agent/proto. This struct is @@ -59,7 +62,9 @@ type Options struct { Ctx context.Context Log slog.Logger + Clock quartz.Clock Database database.Store + NotificationsEnqueuer notifications.Enqueuer Pubsub pubsub.Pubsub DerpMapFn func() *tailcfg.DERPMap TailnetCoordinator *atomic.Pointer[tailnet.Coordinator] @@ -82,6 +87,10 @@ type Options struct { } func New(opts Options) *API { + if opts.Clock == nil { + opts.Clock = quartz.NewReal() + } + api := &API{ opts: opts, mu: sync.Mutex{}, @@ -104,9 +113,22 @@ func New(opts Options) *API { } api.ResourcesMonitoringAPI = &ResourcesMonitoringAPI{ - Log: opts.Log, - AgentID: opts.AgentID, - Database: opts.Database, + AgentID: opts.AgentID, + WorkspaceID: opts.WorkspaceID, + Clock: opts.Clock, + Database: opts.Database, + NotificationsEnqueuer: opts.NotificationsEnqueuer, + Debounce: 5 * time.Minute, + + Config: resourcesmonitor.Config{ + NumDatapoints: 20, + CollectionInterval: 10 * time.Second, + + Alert: resourcesmonitor.AlertConfig{ + MinimumNOKsPercent: 20, + ConsecutiveNOKsPercent: 50, + }, + }, } api.StatsAPI = &StatsAPI{ diff --git a/coderd/agentapi/resources_monitoring.go b/coderd/agentapi/resources_monitoring.go index 0bce9b5104be6..e21c9bc7581d8 100644 --- a/coderd/agentapi/resources_monitoring.go +++ b/coderd/agentapi/resources_monitoring.go @@ -4,20 +4,35 @@ import ( "context" "database/sql" "errors" + "fmt" + "time" "golang.org/x/xerrors" + "cdr.dev/slog" + "github.com/google/uuid" - "cdr.dev/slog" "github.com/coder/coder/v2/agent/proto" + "github.com/coder/coder/v2/coderd/agentapi/resourcesmonitor" "github.com/coder/coder/v2/coderd/database" + "github.com/coder/coder/v2/coderd/database/dbauthz" + "github.com/coder/coder/v2/coderd/database/dbtime" + "github.com/coder/coder/v2/coderd/notifications" + "github.com/coder/quartz" ) type ResourcesMonitoringAPI struct { - AgentID uuid.UUID - Database database.Store - Log slog.Logger + AgentID uuid.UUID + WorkspaceID uuid.UUID + + Log slog.Logger + Clock quartz.Clock + Database database.Store + NotificationsEnqueuer notifications.Enqueuer + + Debounce time.Duration + Config resourcesmonitor.Config } func (a *ResourcesMonitoringAPI) GetResourcesMonitoringConfiguration(ctx context.Context, _ *proto.GetResourcesMonitoringConfigurationRequest) (*proto.GetResourcesMonitoringConfigurationResponse, error) { @@ -33,8 +48,8 @@ func (a *ResourcesMonitoringAPI) GetResourcesMonitoringConfiguration(ctx context return &proto.GetResourcesMonitoringConfigurationResponse{ Config: &proto.GetResourcesMonitoringConfigurationResponse_Config{ - CollectionIntervalSeconds: 10, - NumDatapoints: 20, + CollectionIntervalSeconds: int32(a.Config.CollectionInterval.Seconds()), + NumDatapoints: a.Config.NumDatapoints, }, Memory: func() *proto.GetResourcesMonitoringConfigurationResponse_Memory { if memoryErr != nil { @@ -60,8 +75,182 @@ func (a *ResourcesMonitoringAPI) GetResourcesMonitoringConfiguration(ctx context } func (a *ResourcesMonitoringAPI) PushResourcesMonitoringUsage(ctx context.Context, req *proto.PushResourcesMonitoringUsageRequest) (*proto.PushResourcesMonitoringUsageResponse, error) { - a.Log.Info(ctx, "resources monitoring usage received", - slog.F("request", req)) + var err error + + if memoryErr := a.monitorMemory(ctx, req.Datapoints); memoryErr != nil { + err = errors.Join(err, xerrors.Errorf("monitor memory: %w", memoryErr)) + } + + if volumeErr := a.monitorVolumes(ctx, req.Datapoints); volumeErr != nil { + err = errors.Join(err, xerrors.Errorf("monitor volume: %w", volumeErr)) + } + + return &proto.PushResourcesMonitoringUsageResponse{}, err +} + +func (a *ResourcesMonitoringAPI) monitorMemory(ctx context.Context, datapoints []*proto.PushResourcesMonitoringUsageRequest_Datapoint) error { + monitor, err := a.Database.FetchMemoryResourceMonitorsByAgentID(ctx, a.AgentID) + if err != nil { + // It is valid for an agent to not have a memory monitor, so we + // do not want to treat it as an error. + if errors.Is(err, sql.ErrNoRows) { + return nil + } + + return xerrors.Errorf("fetch memory resource monitor: %w", err) + } + + if !monitor.Enabled { + return nil + } + + usageDatapoints := make([]*proto.PushResourcesMonitoringUsageRequest_Datapoint_MemoryUsage, 0, len(datapoints)) + for _, datapoint := range datapoints { + usageDatapoints = append(usageDatapoints, datapoint.Memory) + } + + usageStates := resourcesmonitor.CalculateMemoryUsageStates(monitor, usageDatapoints) + + oldState := monitor.State + newState := resourcesmonitor.NextState(a.Config, oldState, usageStates) + + debouncedUntil, shouldNotify := monitor.Debounce(a.Debounce, a.Clock.Now(), oldState, newState) + + //nolint:gocritic // We need to be able to update the resource monitor here. + err = a.Database.UpdateMemoryResourceMonitor(dbauthz.AsResourceMonitor(ctx), database.UpdateMemoryResourceMonitorParams{ + AgentID: a.AgentID, + State: newState, + UpdatedAt: dbtime.Time(a.Clock.Now()), + DebouncedUntil: dbtime.Time(debouncedUntil), + }) + if err != nil { + return xerrors.Errorf("update workspace monitor: %w", err) + } + + if !shouldNotify { + return nil + } + + workspace, err := a.Database.GetWorkspaceByID(ctx, a.WorkspaceID) + if err != nil { + return xerrors.Errorf("get workspace by id: %w", err) + } + + _, err = a.NotificationsEnqueuer.EnqueueWithData( + // nolint:gocritic // We need to be able to send the notification. + dbauthz.AsNotifier(ctx), + workspace.OwnerID, + notifications.TemplateWorkspaceOutOfMemory, + map[string]string{ + "workspace": workspace.Name, + "threshold": fmt.Sprintf("%d%%", monitor.Threshold), + }, + map[string]any{ + // NOTE(DanielleMaywood): + // When notifications are enqueued, they are checked to be + // unique within a single day. This means that if we attempt + // to send two OOM notifications for the same workspace on + // the same day, the enqueuer will prevent us from sending + // a second one. We are inject a timestamp to make the + // notifications appear different enough to circumvent this + // deduplication logic. + "timestamp": a.Clock.Now(), + }, + "workspace-monitor-memory", + ) + if err != nil { + return xerrors.Errorf("notify workspace OOM: %w", err) + } + + return nil +} + +func (a *ResourcesMonitoringAPI) monitorVolumes(ctx context.Context, datapoints []*proto.PushResourcesMonitoringUsageRequest_Datapoint) error { + volumeMonitors, err := a.Database.FetchVolumesResourceMonitorsByAgentID(ctx, a.AgentID) + if err != nil { + return xerrors.Errorf("get or insert volume monitor: %w", err) + } + + outOfDiskVolumes := make([]map[string]any, 0) + + for _, monitor := range volumeMonitors { + if !monitor.Enabled { + continue + } + + usageDatapoints := make([]*proto.PushResourcesMonitoringUsageRequest_Datapoint_VolumeUsage, 0, len(datapoints)) + for _, datapoint := range datapoints { + var usage *proto.PushResourcesMonitoringUsageRequest_Datapoint_VolumeUsage + + for _, volume := range datapoint.Volumes { + if volume.Volume == monitor.Path { + usage = volume + break + } + } + + usageDatapoints = append(usageDatapoints, usage) + } + + usageStates := resourcesmonitor.CalculateVolumeUsageStates(monitor, usageDatapoints) + + oldState := monitor.State + newState := resourcesmonitor.NextState(a.Config, oldState, usageStates) + + debouncedUntil, shouldNotify := monitor.Debounce(a.Debounce, a.Clock.Now(), oldState, newState) + + if shouldNotify { + outOfDiskVolumes = append(outOfDiskVolumes, map[string]any{ + "path": monitor.Path, + "threshold": fmt.Sprintf("%d%%", monitor.Threshold), + }) + } + + //nolint:gocritic // We need to be able to update the resource monitor here. + if err := a.Database.UpdateVolumeResourceMonitor(dbauthz.AsResourceMonitor(ctx), database.UpdateVolumeResourceMonitorParams{ + AgentID: a.AgentID, + Path: monitor.Path, + State: newState, + UpdatedAt: dbtime.Time(a.Clock.Now()), + DebouncedUntil: dbtime.Time(debouncedUntil), + }); err != nil { + return xerrors.Errorf("update workspace monitor: %w", err) + } + } + + if len(outOfDiskVolumes) == 0 { + return nil + } + + workspace, err := a.Database.GetWorkspaceByID(ctx, a.WorkspaceID) + if err != nil { + return xerrors.Errorf("get workspace by id: %w", err) + } + + if _, err := a.NotificationsEnqueuer.EnqueueWithData( + // nolint:gocritic // We need to be able to send the notification. + dbauthz.AsNotifier(ctx), + workspace.OwnerID, + notifications.TemplateWorkspaceOutOfDisk, + map[string]string{ + "workspace": workspace.Name, + }, + map[string]any{ + "volumes": outOfDiskVolumes, + // NOTE(DanielleMaywood): + // When notifications are enqueued, they are checked to be + // unique within a single day. This means that if we attempt + // to send two OOM notifications for the same workspace on + // the same day, the enqueuer will prevent us from sending + // a second one. We are inject a timestamp to make the + // notifications appear different enough to circumvent this + // deduplication logic. + "timestamp": a.Clock.Now(), + }, + "workspace-monitor-volumes", + ); err != nil { + return xerrors.Errorf("notify workspace OOD: %w", err) + } - return &proto.PushResourcesMonitoringUsageResponse{}, nil + return nil } diff --git a/coderd/agentapi/resources_monitoring_test.go b/coderd/agentapi/resources_monitoring_test.go new file mode 100644 index 0000000000000..087ccfd24e459 --- /dev/null +++ b/coderd/agentapi/resources_monitoring_test.go @@ -0,0 +1,944 @@ +package agentapi_test + +import ( + "context" + "testing" + "time" + + "github.com/google/uuid" + "github.com/stretchr/testify/require" + "google.golang.org/protobuf/types/known/timestamppb" + + agentproto "github.com/coder/coder/v2/agent/proto" + "github.com/coder/coder/v2/coderd/agentapi" + "github.com/coder/coder/v2/coderd/agentapi/resourcesmonitor" + "github.com/coder/coder/v2/coderd/database" + "github.com/coder/coder/v2/coderd/database/dbgen" + "github.com/coder/coder/v2/coderd/database/dbtestutil" + "github.com/coder/coder/v2/coderd/notifications" + "github.com/coder/coder/v2/coderd/notifications/notificationstest" + "github.com/coder/quartz" +) + +func resourceMonitorAPI(t *testing.T) (*agentapi.ResourcesMonitoringAPI, database.User, *quartz.Mock, *notificationstest.FakeEnqueuer) { + t.Helper() + + db, _ := dbtestutil.NewDB(t) + user := dbgen.User(t, db, database.User{}) + org := dbgen.Organization(t, db, database.Organization{}) + template := dbgen.Template(t, db, database.Template{ + OrganizationID: org.ID, + CreatedBy: user.ID, + }) + templateVersion := dbgen.TemplateVersion(t, db, database.TemplateVersion{ + TemplateID: uuid.NullUUID{Valid: true, UUID: template.ID}, + OrganizationID: org.ID, + CreatedBy: user.ID, + }) + workspace := dbgen.Workspace(t, db, database.WorkspaceTable{ + OrganizationID: org.ID, + TemplateID: template.ID, + OwnerID: user.ID, + }) + job := dbgen.ProvisionerJob(t, db, nil, database.ProvisionerJob{ + Type: database.ProvisionerJobTypeWorkspaceBuild, + }) + build := dbgen.WorkspaceBuild(t, db, database.WorkspaceBuild{ + JobID: job.ID, + WorkspaceID: workspace.ID, + TemplateVersionID: templateVersion.ID, + }) + resource := dbgen.WorkspaceResource(t, db, database.WorkspaceResource{ + JobID: build.JobID, + }) + agent := dbgen.WorkspaceAgent(t, db, database.WorkspaceAgent{ + ResourceID: resource.ID, + }) + + notifyEnq := ¬ificationstest.FakeEnqueuer{} + clock := quartz.NewMock(t) + + return &agentapi.ResourcesMonitoringAPI{ + AgentID: agent.ID, + WorkspaceID: workspace.ID, + Clock: clock, + Database: db, + NotificationsEnqueuer: notifyEnq, + Config: resourcesmonitor.Config{ + NumDatapoints: 20, + CollectionInterval: 10 * time.Second, + + Alert: resourcesmonitor.AlertConfig{ + MinimumNOKsPercent: 20, + ConsecutiveNOKsPercent: 50, + }, + }, + Debounce: 1 * time.Minute, + }, user, clock, notifyEnq +} + +func TestMemoryResourceMonitorDebounce(t *testing.T) { + t.Parallel() + + // This test is a bit of a long one. We're testing that + // when a monitor goes into an alert state, it doesn't + // allow another notification to occur until after the + // debounce period. + // + // 1. OK -> NOK |> sends a notification + // 2. NOK -> OK |> does nothing + // 3. OK -> NOK |> does nothing due to debounce period + // 4. NOK -> OK |> does nothing + // 5. OK -> NOK |> sends a notification as debounce period exceeded + + api, user, clock, notifyEnq := resourceMonitorAPI(t) + api.Config.Alert.ConsecutiveNOKsPercent = 100 + + // Given: A monitor in an OK state + dbgen.WorkspaceAgentMemoryResourceMonitor(t, api.Database, database.WorkspaceAgentMemoryResourceMonitor{ + AgentID: api.AgentID, + State: database.WorkspaceAgentMonitorStateOK, + Threshold: 80, + }) + + // When: The monitor is given a state that will trigger NOK + _, err := api.PushResourcesMonitoringUsage(context.Background(), &agentproto.PushResourcesMonitoringUsageRequest{ + Datapoints: []*agentproto.PushResourcesMonitoringUsageRequest_Datapoint{ + { + CollectedAt: timestamppb.New(clock.Now()), + Memory: &agentproto.PushResourcesMonitoringUsageRequest_Datapoint_MemoryUsage{ + Used: 10, + Total: 10, + }, + }, + }, + }) + require.NoError(t, err) + + // Then: We expect there to be a notification sent + sent := notifyEnq.Sent(notificationstest.WithTemplateID(notifications.TemplateWorkspaceOutOfMemory)) + require.Len(t, sent, 1) + require.Equal(t, user.ID, sent[0].UserID) + notifyEnq.Clear() + + // When: The monitor moves to an OK state from NOK + clock.Advance(api.Debounce / 4) + _, err = api.PushResourcesMonitoringUsage(context.Background(), &agentproto.PushResourcesMonitoringUsageRequest{ + Datapoints: []*agentproto.PushResourcesMonitoringUsageRequest_Datapoint{ + { + CollectedAt: timestamppb.New(clock.Now()), + Memory: &agentproto.PushResourcesMonitoringUsageRequest_Datapoint_MemoryUsage{ + Used: 1, + Total: 10, + }, + }, + }, + }) + require.NoError(t, err) + + // Then: We expect no new notifications + sent = notifyEnq.Sent(notificationstest.WithTemplateID(notifications.TemplateWorkspaceOutOfMemory)) + require.Len(t, sent, 0) + notifyEnq.Clear() + + // When: The monitor moves back to a NOK state before the debounced time. + clock.Advance(api.Debounce / 4) + _, err = api.PushResourcesMonitoringUsage(context.Background(), &agentproto.PushResourcesMonitoringUsageRequest{ + Datapoints: []*agentproto.PushResourcesMonitoringUsageRequest_Datapoint{ + { + CollectedAt: timestamppb.New(clock.Now()), + Memory: &agentproto.PushResourcesMonitoringUsageRequest_Datapoint_MemoryUsage{ + Used: 10, + Total: 10, + }, + }, + }, + }) + require.NoError(t, err) + + // Then: We expect no new notifications (showing the debouncer working) + sent = notifyEnq.Sent(notificationstest.WithTemplateID(notifications.TemplateWorkspaceOutOfMemory)) + require.Len(t, sent, 0) + notifyEnq.Clear() + + // When: The monitor moves back to an OK state from NOK + clock.Advance(api.Debounce / 4) + _, err = api.PushResourcesMonitoringUsage(context.Background(), &agentproto.PushResourcesMonitoringUsageRequest{ + Datapoints: []*agentproto.PushResourcesMonitoringUsageRequest_Datapoint{ + { + CollectedAt: timestamppb.New(clock.Now()), + Memory: &agentproto.PushResourcesMonitoringUsageRequest_Datapoint_MemoryUsage{ + Used: 1, + Total: 10, + }, + }, + }, + }) + require.NoError(t, err) + + // Then: We still expect no new notifications + sent = notifyEnq.Sent(notificationstest.WithTemplateID(notifications.TemplateWorkspaceOutOfMemory)) + require.Len(t, sent, 0) + notifyEnq.Clear() + + // When: The monitor moves back to a NOK state after the debounce period. + clock.Advance(api.Debounce/4 + 1*time.Second) + _, err = api.PushResourcesMonitoringUsage(context.Background(), &agentproto.PushResourcesMonitoringUsageRequest{ + Datapoints: []*agentproto.PushResourcesMonitoringUsageRequest_Datapoint{ + { + CollectedAt: timestamppb.New(clock.Now()), + Memory: &agentproto.PushResourcesMonitoringUsageRequest_Datapoint_MemoryUsage{ + Used: 10, + Total: 10, + }, + }, + }, + }) + require.NoError(t, err) + + // Then: We expect a notification + sent = notifyEnq.Sent(notificationstest.WithTemplateID(notifications.TemplateWorkspaceOutOfMemory)) + require.Len(t, sent, 1) + require.Equal(t, user.ID, sent[0].UserID) +} + +func TestMemoryResourceMonitor(t *testing.T) { + t.Parallel() + + tests := []struct { + name string + memoryUsage []int64 + memoryTotal int64 + previousState database.WorkspaceAgentMonitorState + expectState database.WorkspaceAgentMonitorState + shouldNotify bool + }{ + { + name: "WhenOK/NeverExceedsThreshold", + memoryUsage: []int64{2, 3, 2, 4, 2, 3, 2, 1, 2, 3, 4, 4, 1, 2, 3, 1, 2}, + memoryTotal: 10, + previousState: database.WorkspaceAgentMonitorStateOK, + expectState: database.WorkspaceAgentMonitorStateOK, + shouldNotify: false, + }, + { + name: "WhenOK/ShouldStayInOK", + memoryUsage: []int64{9, 3, 2, 4, 2, 3, 2, 1, 2, 3, 4, 4, 1, 2, 3, 1, 2}, + memoryTotal: 10, + previousState: database.WorkspaceAgentMonitorStateOK, + expectState: database.WorkspaceAgentMonitorStateOK, + shouldNotify: false, + }, + { + name: "WhenOK/ConsecutiveExceedsThreshold", + memoryUsage: []int64{2, 3, 2, 4, 2, 3, 2, 1, 2, 3, 4, 4, 1, 8, 9, 8, 9}, + memoryTotal: 10, + previousState: database.WorkspaceAgentMonitorStateOK, + expectState: database.WorkspaceAgentMonitorStateNOK, + shouldNotify: true, + }, + { + name: "WhenOK/MinimumExceedsThreshold", + memoryUsage: []int64{2, 8, 2, 9, 2, 8, 2, 9, 2, 8, 4, 9, 1, 8, 2, 8, 9}, + memoryTotal: 10, + previousState: database.WorkspaceAgentMonitorStateOK, + expectState: database.WorkspaceAgentMonitorStateNOK, + shouldNotify: true, + }, + { + name: "WhenNOK/NeverExceedsThreshold", + memoryUsage: []int64{2, 3, 2, 4, 2, 3, 2, 1, 2, 3, 4, 4, 1, 2, 3, 1, 2}, + memoryTotal: 10, + previousState: database.WorkspaceAgentMonitorStateNOK, + expectState: database.WorkspaceAgentMonitorStateOK, + shouldNotify: false, + }, + { + name: "WhenNOK/ShouldStayInNOK", + memoryUsage: []int64{9, 3, 2, 4, 2, 3, 2, 1, 2, 3, 4, 4, 1, 2, 3, 1, 2}, + memoryTotal: 10, + previousState: database.WorkspaceAgentMonitorStateNOK, + expectState: database.WorkspaceAgentMonitorStateNOK, + shouldNotify: false, + }, + { + name: "WhenNOK/ConsecutiveExceedsThreshold", + memoryUsage: []int64{2, 3, 2, 4, 2, 3, 2, 1, 2, 3, 4, 4, 1, 8, 9, 8, 9}, + memoryTotal: 10, + previousState: database.WorkspaceAgentMonitorStateNOK, + expectState: database.WorkspaceAgentMonitorStateNOK, + shouldNotify: false, + }, + { + name: "WhenNOK/MinimumExceedsThreshold", + memoryUsage: []int64{2, 8, 2, 9, 2, 8, 2, 9, 2, 8, 4, 9, 1, 8, 2, 8, 9}, + memoryTotal: 10, + previousState: database.WorkspaceAgentMonitorStateNOK, + expectState: database.WorkspaceAgentMonitorStateNOK, + shouldNotify: false, + }, + } + + for _, tt := range tests { + tt := tt + + t.Run(tt.name, func(t *testing.T) { + t.Parallel() + + api, user, clock, notifyEnq := resourceMonitorAPI(t) + + datapoints := make([]*agentproto.PushResourcesMonitoringUsageRequest_Datapoint, 0, len(tt.memoryUsage)) + collectedAt := clock.Now() + for _, usage := range tt.memoryUsage { + collectedAt = collectedAt.Add(15 * time.Second) + datapoints = append(datapoints, &agentproto.PushResourcesMonitoringUsageRequest_Datapoint{ + CollectedAt: timestamppb.New(collectedAt), + Memory: &agentproto.PushResourcesMonitoringUsageRequest_Datapoint_MemoryUsage{ + Used: usage, + Total: tt.memoryTotal, + }, + }) + } + + dbgen.WorkspaceAgentMemoryResourceMonitor(t, api.Database, database.WorkspaceAgentMemoryResourceMonitor{ + AgentID: api.AgentID, + State: tt.previousState, + Threshold: 80, + }) + + clock.Set(collectedAt) + _, err := api.PushResourcesMonitoringUsage(context.Background(), &agentproto.PushResourcesMonitoringUsageRequest{ + Datapoints: datapoints, + }) + require.NoError(t, err) + + sent := notifyEnq.Sent(notificationstest.WithTemplateID(notifications.TemplateWorkspaceOutOfMemory)) + if tt.shouldNotify { + require.Len(t, sent, 1) + require.Equal(t, user.ID, sent[0].UserID) + } else { + require.Len(t, sent, 0) + } + }) + } +} + +func TestMemoryResourceMonitorMissingData(t *testing.T) { + t.Parallel() + + t.Run("UnknownPreventsMovingIntoAlertState", func(t *testing.T) { + t.Parallel() + + api, _, clock, notifyEnq := resourceMonitorAPI(t) + api.Config.Alert.ConsecutiveNOKsPercent = 50 + api.Config.Alert.MinimumNOKsPercent = 100 + + // Given: A monitor in an OK state. + dbgen.WorkspaceAgentMemoryResourceMonitor(t, api.Database, database.WorkspaceAgentMemoryResourceMonitor{ + AgentID: api.AgentID, + State: database.WorkspaceAgentMonitorStateOK, + Threshold: 80, + }) + + // When: A datapoint is missing, surrounded by two NOK datapoints. + _, err := api.PushResourcesMonitoringUsage(context.Background(), &agentproto.PushResourcesMonitoringUsageRequest{ + Datapoints: []*agentproto.PushResourcesMonitoringUsageRequest_Datapoint{ + { + CollectedAt: timestamppb.New(clock.Now()), + Memory: &agentproto.PushResourcesMonitoringUsageRequest_Datapoint_MemoryUsage{ + Used: 10, + Total: 10, + }, + }, + { + CollectedAt: timestamppb.New(clock.Now().Add(10 * time.Second)), + Memory: nil, + }, + { + CollectedAt: timestamppb.New(clock.Now().Add(20 * time.Second)), + Memory: &agentproto.PushResourcesMonitoringUsageRequest_Datapoint_MemoryUsage{ + Used: 10, + Total: 10, + }, + }, + }, + }) + require.NoError(t, err) + + // Then: We expect no notifications, as this unknown prevents us knowing we should alert. + sent := notifyEnq.Sent(notificationstest.WithTemplateID(notifications.TemplateWorkspaceOutOfMemory)) + require.Len(t, sent, 0) + + // Then: We expect the monitor to still be in an OK state. + monitor, err := api.Database.FetchMemoryResourceMonitorsByAgentID(context.Background(), api.AgentID) + require.NoError(t, err) + require.Equal(t, database.WorkspaceAgentMonitorStateOK, monitor.State) + }) + + t.Run("UnknownPreventsMovingOutOfAlertState", func(t *testing.T) { + t.Parallel() + + api, _, clock, _ := resourceMonitorAPI(t) + api.Config.Alert.ConsecutiveNOKsPercent = 50 + api.Config.Alert.MinimumNOKsPercent = 100 + + // Given: A monitor in a NOK state. + dbgen.WorkspaceAgentMemoryResourceMonitor(t, api.Database, database.WorkspaceAgentMemoryResourceMonitor{ + AgentID: api.AgentID, + State: database.WorkspaceAgentMonitorStateNOK, + Threshold: 80, + }) + + // When: A datapoint is missing, surrounded by two OK datapoints. + _, err := api.PushResourcesMonitoringUsage(context.Background(), &agentproto.PushResourcesMonitoringUsageRequest{ + Datapoints: []*agentproto.PushResourcesMonitoringUsageRequest_Datapoint{ + { + CollectedAt: timestamppb.New(clock.Now()), + Memory: &agentproto.PushResourcesMonitoringUsageRequest_Datapoint_MemoryUsage{ + Used: 1, + Total: 10, + }, + }, + { + CollectedAt: timestamppb.New(clock.Now().Add(10 * time.Second)), + Memory: nil, + }, + { + CollectedAt: timestamppb.New(clock.Now().Add(20 * time.Second)), + Memory: &agentproto.PushResourcesMonitoringUsageRequest_Datapoint_MemoryUsage{ + Used: 1, + Total: 10, + }, + }, + }, + }) + require.NoError(t, err) + + // Then: We expect the monitor to still be in a NOK state. + monitor, err := api.Database.FetchMemoryResourceMonitorsByAgentID(context.Background(), api.AgentID) + require.NoError(t, err) + require.Equal(t, database.WorkspaceAgentMonitorStateNOK, monitor.State) + }) +} + +func TestVolumeResourceMonitorDebounce(t *testing.T) { + t.Parallel() + + // This test is an even longer one. We're testing + // that the debounce logic is independent per + // volume monitor. We interleave the triggering + // of each monitor to ensure the debounce logic + // is monitor independent. + // + // First Monitor: + // 1. OK -> NOK |> sends a notification + // 2. NOK -> OK |> does nothing + // 3. OK -> NOK |> does nothing due to debounce period + // 4. NOK -> OK |> does nothing + // 5. OK -> NOK |> sends a notification as debounce period exceeded + // 6. NOK -> OK |> does nothing + // + // Second Monitor: + // 1. OK -> OK |> does nothing + // 2. OK -> NOK |> sends a notification + // 3. NOK -> OK |> does nothing + // 4. OK -> NOK |> does nothing due to debounce period + // 5. NOK -> OK |> does nothing + // 6. OK -> NOK |> sends a notification as debounce period exceeded + // + + firstVolumePath := "/home/coder" + secondVolumePath := "/dev/coder" + + api, _, clock, notifyEnq := resourceMonitorAPI(t) + + // Given: + // - First monitor in an OK state + // - Second monitor in an OK state + dbgen.WorkspaceAgentVolumeResourceMonitor(t, api.Database, database.WorkspaceAgentVolumeResourceMonitor{ + AgentID: api.AgentID, + Path: firstVolumePath, + State: database.WorkspaceAgentMonitorStateOK, + Threshold: 80, + }) + dbgen.WorkspaceAgentVolumeResourceMonitor(t, api.Database, database.WorkspaceAgentVolumeResourceMonitor{ + AgentID: api.AgentID, + Path: secondVolumePath, + State: database.WorkspaceAgentMonitorStateNOK, + Threshold: 80, + }) + + // When: + // - First monitor is in a NOK state + // - Second monitor is in an OK state + _, err := api.PushResourcesMonitoringUsage(context.Background(), &agentproto.PushResourcesMonitoringUsageRequest{ + Datapoints: []*agentproto.PushResourcesMonitoringUsageRequest_Datapoint{ + { + CollectedAt: timestamppb.New(clock.Now()), + Volumes: []*agentproto.PushResourcesMonitoringUsageRequest_Datapoint_VolumeUsage{ + {Volume: firstVolumePath, Used: 10, Total: 10}, + {Volume: secondVolumePath, Used: 1, Total: 10}, + }, + }, + }, + }) + require.NoError(t, err) + + // Then: + // - We expect a notification from only the first monitor + sent := notifyEnq.Sent(notificationstest.WithTemplateID(notifications.TemplateWorkspaceOutOfDisk)) + require.Len(t, sent, 1) + volumes := requireVolumeData(t, sent[0]) + require.Len(t, volumes, 1) + require.Equal(t, firstVolumePath, volumes[0]["path"]) + notifyEnq.Clear() + + // When: + // - First monitor moves back to OK + // - Second monitor moves to NOK + clock.Advance(api.Debounce / 4) + _, err = api.PushResourcesMonitoringUsage(context.Background(), &agentproto.PushResourcesMonitoringUsageRequest{ + Datapoints: []*agentproto.PushResourcesMonitoringUsageRequest_Datapoint{ + { + CollectedAt: timestamppb.New(clock.Now()), + Volumes: []*agentproto.PushResourcesMonitoringUsageRequest_Datapoint_VolumeUsage{ + {Volume: firstVolumePath, Used: 1, Total: 10}, + {Volume: secondVolumePath, Used: 10, Total: 10}, + }, + }, + }, + }) + require.NoError(t, err) + + // Then: + // - We expect a notification from only the second monitor + sent = notifyEnq.Sent(notificationstest.WithTemplateID(notifications.TemplateWorkspaceOutOfDisk)) + require.Len(t, sent, 1) + volumes = requireVolumeData(t, sent[0]) + require.Len(t, volumes, 1) + require.Equal(t, secondVolumePath, volumes[0]["path"]) + notifyEnq.Clear() + + // When: + // - First monitor moves back to NOK before debounce period has ended + // - Second monitor moves back to OK + clock.Advance(api.Debounce / 4) + _, err = api.PushResourcesMonitoringUsage(context.Background(), &agentproto.PushResourcesMonitoringUsageRequest{ + Datapoints: []*agentproto.PushResourcesMonitoringUsageRequest_Datapoint{ + { + CollectedAt: timestamppb.New(clock.Now()), + Volumes: []*agentproto.PushResourcesMonitoringUsageRequest_Datapoint_VolumeUsage{ + {Volume: firstVolumePath, Used: 10, Total: 10}, + {Volume: secondVolumePath, Used: 1, Total: 10}, + }, + }, + }, + }) + require.NoError(t, err) + + // Then: + // - We expect no new notifications + sent = notifyEnq.Sent(notificationstest.WithTemplateID(notifications.TemplateWorkspaceOutOfDisk)) + require.Len(t, sent, 0) + notifyEnq.Clear() + + // When: + // - First monitor moves back to OK + // - Second monitor moves back to NOK + clock.Advance(api.Debounce / 4) + _, err = api.PushResourcesMonitoringUsage(context.Background(), &agentproto.PushResourcesMonitoringUsageRequest{ + Datapoints: []*agentproto.PushResourcesMonitoringUsageRequest_Datapoint{ + { + CollectedAt: timestamppb.New(clock.Now()), + Volumes: []*agentproto.PushResourcesMonitoringUsageRequest_Datapoint_VolumeUsage{ + {Volume: firstVolumePath, Used: 1, Total: 10}, + {Volume: secondVolumePath, Used: 10, Total: 10}, + }, + }, + }, + }) + require.NoError(t, err) + + // Then: + // - We expect no new notifications. + sent = notifyEnq.Sent(notificationstest.WithTemplateID(notifications.TemplateWorkspaceOutOfDisk)) + require.Len(t, sent, 0) + notifyEnq.Clear() + + // When: + // - First monitor moves back to a NOK state after the debounce period + // - Second monitor moves back to OK + clock.Advance(api.Debounce/4 + 1*time.Second) + _, err = api.PushResourcesMonitoringUsage(context.Background(), &agentproto.PushResourcesMonitoringUsageRequest{ + Datapoints: []*agentproto.PushResourcesMonitoringUsageRequest_Datapoint{ + { + CollectedAt: timestamppb.New(clock.Now()), + Volumes: []*agentproto.PushResourcesMonitoringUsageRequest_Datapoint_VolumeUsage{ + {Volume: firstVolumePath, Used: 10, Total: 10}, + {Volume: secondVolumePath, Used: 1, Total: 10}, + }, + }, + }, + }) + require.NoError(t, err) + + // Then: + // - We expect a notification from only the first monitor + sent = notifyEnq.Sent(notificationstest.WithTemplateID(notifications.TemplateWorkspaceOutOfDisk)) + require.Len(t, sent, 1) + volumes = requireVolumeData(t, sent[0]) + require.Len(t, volumes, 1) + require.Equal(t, firstVolumePath, volumes[0]["path"]) + notifyEnq.Clear() + + // When: + // - First montior moves back to OK + // - Second monitor moves back to NOK after the debounce period + clock.Advance(api.Debounce/4 + 1*time.Second) + _, err = api.PushResourcesMonitoringUsage(context.Background(), &agentproto.PushResourcesMonitoringUsageRequest{ + Datapoints: []*agentproto.PushResourcesMonitoringUsageRequest_Datapoint{ + { + CollectedAt: timestamppb.New(clock.Now()), + Volumes: []*agentproto.PushResourcesMonitoringUsageRequest_Datapoint_VolumeUsage{ + {Volume: firstVolumePath, Used: 1, Total: 10}, + {Volume: secondVolumePath, Used: 10, Total: 10}, + }, + }, + }, + }) + require.NoError(t, err) + + // Then: + // - We expect a notification from only the second monitor + sent = notifyEnq.Sent(notificationstest.WithTemplateID(notifications.TemplateWorkspaceOutOfDisk)) + require.Len(t, sent, 1) + volumes = requireVolumeData(t, sent[0]) + require.Len(t, volumes, 1) + require.Equal(t, secondVolumePath, volumes[0]["path"]) +} + +func TestVolumeResourceMonitor(t *testing.T) { + t.Parallel() + + tests := []struct { + name string + volumePath string + volumeUsage []int64 + volumeTotal int64 + thresholdPercent int32 + previousState database.WorkspaceAgentMonitorState + expectState database.WorkspaceAgentMonitorState + shouldNotify bool + }{ + { + name: "WhenOK/NeverExceedsThreshold", + volumePath: "/home/coder", + volumeUsage: []int64{2, 3, 2, 4, 2, 3, 2, 1, 2, 3, 4, 4, 1, 2, 3, 1, 2}, + volumeTotal: 10, + thresholdPercent: 80, + previousState: database.WorkspaceAgentMonitorStateOK, + expectState: database.WorkspaceAgentMonitorStateOK, + shouldNotify: false, + }, + { + name: "WhenOK/ShouldStayInOK", + volumePath: "/home/coder", + volumeUsage: []int64{9, 3, 2, 4, 2, 3, 2, 1, 2, 3, 4, 4, 1, 2, 3, 1, 2}, + volumeTotal: 10, + thresholdPercent: 80, + previousState: database.WorkspaceAgentMonitorStateOK, + expectState: database.WorkspaceAgentMonitorStateOK, + shouldNotify: false, + }, + { + name: "WhenOK/ConsecutiveExceedsThreshold", + volumePath: "/home/coder", + volumeUsage: []int64{2, 3, 2, 4, 2, 3, 2, 1, 2, 3, 4, 4, 1, 8, 9, 8, 9}, + volumeTotal: 10, + thresholdPercent: 80, + previousState: database.WorkspaceAgentMonitorStateOK, + expectState: database.WorkspaceAgentMonitorStateNOK, + shouldNotify: true, + }, + { + name: "WhenOK/MinimumExceedsThreshold", + volumePath: "/home/coder", + volumeUsage: []int64{2, 8, 2, 9, 2, 8, 2, 9, 2, 8, 4, 9, 1, 8, 2, 8, 9}, + volumeTotal: 10, + thresholdPercent: 80, + previousState: database.WorkspaceAgentMonitorStateOK, + expectState: database.WorkspaceAgentMonitorStateNOK, + shouldNotify: true, + }, + { + name: "WhenNOK/NeverExceedsThreshold", + volumePath: "/home/coder", + volumeUsage: []int64{2, 3, 2, 4, 2, 3, 2, 1, 2, 3, 4, 4, 1, 2, 3, 1, 2}, + volumeTotal: 10, + thresholdPercent: 80, + previousState: database.WorkspaceAgentMonitorStateNOK, + expectState: database.WorkspaceAgentMonitorStateOK, + shouldNotify: false, + }, + { + name: "WhenNOK/ShouldStayInNOK", + volumePath: "/home/coder", + volumeUsage: []int64{9, 3, 2, 4, 2, 3, 2, 1, 2, 3, 4, 4, 1, 2, 3, 1, 2}, + volumeTotal: 10, + thresholdPercent: 80, + previousState: database.WorkspaceAgentMonitorStateNOK, + expectState: database.WorkspaceAgentMonitorStateNOK, + shouldNotify: false, + }, + { + name: "WhenNOK/ConsecutiveExceedsThreshold", + volumePath: "/home/coder", + volumeUsage: []int64{2, 3, 2, 4, 2, 3, 2, 1, 2, 3, 4, 4, 1, 8, 9, 8, 9}, + volumeTotal: 10, + thresholdPercent: 80, + previousState: database.WorkspaceAgentMonitorStateNOK, + expectState: database.WorkspaceAgentMonitorStateNOK, + shouldNotify: false, + }, + { + name: "WhenNOK/MinimumExceedsThreshold", + volumePath: "/home/coder", + volumeUsage: []int64{2, 8, 2, 9, 2, 8, 2, 9, 2, 8, 4, 9, 1, 8, 2, 8, 9}, + volumeTotal: 10, + thresholdPercent: 80, + previousState: database.WorkspaceAgentMonitorStateNOK, + expectState: database.WorkspaceAgentMonitorStateNOK, + shouldNotify: false, + }, + } + + for _, tt := range tests { + tt := tt + + t.Run(tt.name, func(t *testing.T) { + t.Parallel() + + api, user, clock, notifyEnq := resourceMonitorAPI(t) + + datapoints := make([]*agentproto.PushResourcesMonitoringUsageRequest_Datapoint, 0, len(tt.volumeUsage)) + collectedAt := clock.Now() + for _, volumeUsage := range tt.volumeUsage { + collectedAt = collectedAt.Add(15 * time.Second) + + volumeDatapoints := []*agentproto.PushResourcesMonitoringUsageRequest_Datapoint_VolumeUsage{ + { + Volume: tt.volumePath, + Used: volumeUsage, + Total: tt.volumeTotal, + }, + } + + datapoints = append(datapoints, &agentproto.PushResourcesMonitoringUsageRequest_Datapoint{ + CollectedAt: timestamppb.New(collectedAt), + Volumes: volumeDatapoints, + }) + } + + dbgen.WorkspaceAgentVolumeResourceMonitor(t, api.Database, database.WorkspaceAgentVolumeResourceMonitor{ + AgentID: api.AgentID, + Path: tt.volumePath, + State: tt.previousState, + Threshold: tt.thresholdPercent, + }) + + clock.Set(collectedAt) + _, err := api.PushResourcesMonitoringUsage(context.Background(), &agentproto.PushResourcesMonitoringUsageRequest{ + Datapoints: datapoints, + }) + require.NoError(t, err) + + sent := notifyEnq.Sent(notificationstest.WithTemplateID(notifications.TemplateWorkspaceOutOfDisk)) + if tt.shouldNotify { + require.Len(t, sent, 1) + require.Equal(t, user.ID, sent[0].UserID) + } else { + require.Len(t, sent, 0) + } + }) + } +} + +func TestVolumeResourceMonitorMultiple(t *testing.T) { + t.Parallel() + + api, _, clock, notifyEnq := resourceMonitorAPI(t) + api.Config.Alert.ConsecutiveNOKsPercent = 100 + + // Given: two different volume resource monitors + dbgen.WorkspaceAgentVolumeResourceMonitor(t, api.Database, database.WorkspaceAgentVolumeResourceMonitor{ + AgentID: api.AgentID, + Path: "/home/coder", + State: database.WorkspaceAgentMonitorStateOK, + Threshold: 80, + }) + + dbgen.WorkspaceAgentVolumeResourceMonitor(t, api.Database, database.WorkspaceAgentVolumeResourceMonitor{ + AgentID: api.AgentID, + Path: "/dev/coder", + State: database.WorkspaceAgentMonitorStateOK, + Threshold: 80, + }) + + // When: both of them move to a NOK state + _, err := api.PushResourcesMonitoringUsage(context.Background(), &agentproto.PushResourcesMonitoringUsageRequest{ + Datapoints: []*agentproto.PushResourcesMonitoringUsageRequest_Datapoint{ + { + CollectedAt: timestamppb.New(clock.Now()), + Volumes: []*agentproto.PushResourcesMonitoringUsageRequest_Datapoint_VolumeUsage{ + { + Volume: "/home/coder", + Used: 10, + Total: 10, + }, + { + Volume: "/dev/coder", + Used: 10, + Total: 10, + }, + }, + }, + }, + }) + require.NoError(t, err) + + // Then: We expect a notification to alert with information about both + sent := notifyEnq.Sent(notificationstest.WithTemplateID(notifications.TemplateWorkspaceOutOfDisk)) + require.Len(t, sent, 1) + + volumes := requireVolumeData(t, sent[0]) + require.Len(t, volumes, 2) + require.Equal(t, "/home/coder", volumes[0]["path"]) + require.Equal(t, "/dev/coder", volumes[1]["path"]) +} + +func TestVolumeResourceMonitorMissingData(t *testing.T) { + t.Parallel() + + t.Run("UnknownPreventsMovingIntoAlertState", func(t *testing.T) { + t.Parallel() + + volumePath := "/home/coder" + + api, _, clock, notifyEnq := resourceMonitorAPI(t) + api.Config.Alert.ConsecutiveNOKsPercent = 50 + api.Config.Alert.MinimumNOKsPercent = 100 + + // Given: A monitor in an OK state. + dbgen.WorkspaceAgentVolumeResourceMonitor(t, api.Database, database.WorkspaceAgentVolumeResourceMonitor{ + AgentID: api.AgentID, + Path: volumePath, + State: database.WorkspaceAgentMonitorStateOK, + Threshold: 80, + }) + + // When: A datapoint is missing, surrounded by two NOK datapoints. + _, err := api.PushResourcesMonitoringUsage(context.Background(), &agentproto.PushResourcesMonitoringUsageRequest{ + Datapoints: []*agentproto.PushResourcesMonitoringUsageRequest_Datapoint{ + { + CollectedAt: timestamppb.New(clock.Now()), + Volumes: []*agentproto.PushResourcesMonitoringUsageRequest_Datapoint_VolumeUsage{ + { + Volume: volumePath, + Used: 10, + Total: 10, + }, + }, + }, + { + CollectedAt: timestamppb.New(clock.Now().Add(10 * time.Second)), + Volumes: []*agentproto.PushResourcesMonitoringUsageRequest_Datapoint_VolumeUsage{}, + }, + { + CollectedAt: timestamppb.New(clock.Now().Add(20 * time.Second)), + Volumes: []*agentproto.PushResourcesMonitoringUsageRequest_Datapoint_VolumeUsage{ + { + Volume: volumePath, + Used: 10, + Total: 10, + }, + }, + }, + }, + }) + require.NoError(t, err) + + // Then: We expect no notifications, as this unknown prevents us knowing we should alert. + sent := notifyEnq.Sent(notificationstest.WithTemplateID(notifications.TemplateWorkspaceOutOfDisk)) + require.Len(t, sent, 0) + + // Then: We expect the monitor to still be in an OK state. + monitors, err := api.Database.FetchVolumesResourceMonitorsByAgentID(context.Background(), api.AgentID) + require.NoError(t, err) + require.Len(t, monitors, 1) + require.Equal(t, database.WorkspaceAgentMonitorStateOK, monitors[0].State) + }) + + t.Run("UnknownPreventsMovingOutOfAlertState", func(t *testing.T) { + t.Parallel() + + volumePath := "/home/coder" + + api, _, clock, _ := resourceMonitorAPI(t) + api.Config.Alert.ConsecutiveNOKsPercent = 50 + api.Config.Alert.MinimumNOKsPercent = 100 + + // Given: A monitor in a NOK state. + dbgen.WorkspaceAgentVolumeResourceMonitor(t, api.Database, database.WorkspaceAgentVolumeResourceMonitor{ + AgentID: api.AgentID, + Path: volumePath, + State: database.WorkspaceAgentMonitorStateNOK, + Threshold: 80, + }) + + // When: A datapoint is missing, surrounded by two OK datapoints. + _, err := api.PushResourcesMonitoringUsage(context.Background(), &agentproto.PushResourcesMonitoringUsageRequest{ + Datapoints: []*agentproto.PushResourcesMonitoringUsageRequest_Datapoint{ + { + CollectedAt: timestamppb.New(clock.Now()), + Volumes: []*agentproto.PushResourcesMonitoringUsageRequest_Datapoint_VolumeUsage{ + { + Volume: volumePath, + Used: 1, + Total: 10, + }, + }, + }, + { + CollectedAt: timestamppb.New(clock.Now().Add(10 * time.Second)), + Volumes: []*agentproto.PushResourcesMonitoringUsageRequest_Datapoint_VolumeUsage{}, + }, + { + CollectedAt: timestamppb.New(clock.Now().Add(20 * time.Second)), + Volumes: []*agentproto.PushResourcesMonitoringUsageRequest_Datapoint_VolumeUsage{ + { + Volume: volumePath, + Used: 1, + Total: 10, + }, + }, + }, + }, + }) + require.NoError(t, err) + + // Then: We expect the monitor to still be in a NOK state. + monitors, err := api.Database.FetchVolumesResourceMonitorsByAgentID(context.Background(), api.AgentID) + require.NoError(t, err) + require.Len(t, monitors, 1) + require.Equal(t, database.WorkspaceAgentMonitorStateNOK, monitors[0].State) + }) +} + +func requireVolumeData(t *testing.T, notif *notificationstest.FakeNotification) []map[string]any { + t.Helper() + + volumesData := notif.Data["volumes"] + require.IsType(t, []map[string]any{}, volumesData) + + return volumesData.([]map[string]any) +} diff --git a/coderd/agentapi/resourcesmonitor/resources_monitor.go b/coderd/agentapi/resourcesmonitor/resources_monitor.go new file mode 100644 index 0000000000000..9b1749cd0abd6 --- /dev/null +++ b/coderd/agentapi/resourcesmonitor/resources_monitor.go @@ -0,0 +1,129 @@ +package resourcesmonitor + +import ( + "math" + "time" + + "github.com/coder/coder/v2/agent/proto" + "github.com/coder/coder/v2/coderd/database" + "github.com/coder/coder/v2/coderd/util/slice" +) + +type State int + +const ( + StateOK State = iota + StateNOK + StateUnknown +) + +type AlertConfig struct { + // What percentage of datapoints in a row are + // required to put the monitor in an alert state. + ConsecutiveNOKsPercent int + + // What percentage of datapoints in a window are + // required to put the monitor in an alert state. + MinimumNOKsPercent int +} + +type Config struct { + // How many datapoints should the agent send + NumDatapoints int32 + + // How long between each datapoint should + // collection occur. + CollectionInterval time.Duration + + Alert AlertConfig +} + +func CalculateMemoryUsageStates( + monitor database.WorkspaceAgentMemoryResourceMonitor, + datapoints []*proto.PushResourcesMonitoringUsageRequest_Datapoint_MemoryUsage, +) []State { + states := make([]State, 0, len(datapoints)) + + for _, datapoint := range datapoints { + state := StateUnknown + + if datapoint != nil { + percent := int32(float64(datapoint.Used) / float64(datapoint.Total) * 100) + + if percent < monitor.Threshold { + state = StateOK + } else { + state = StateNOK + } + } + + states = append(states, state) + } + + return states +} + +func CalculateVolumeUsageStates( + monitor database.WorkspaceAgentVolumeResourceMonitor, + datapoints []*proto.PushResourcesMonitoringUsageRequest_Datapoint_VolumeUsage, +) []State { + states := make([]State, 0, len(datapoints)) + + for _, datapoint := range datapoints { + state := StateUnknown + + if datapoint != nil { + percent := int32(float64(datapoint.Used) / float64(datapoint.Total) * 100) + + if percent < monitor.Threshold { + state = StateOK + } else { + state = StateNOK + } + } + + states = append(states, state) + } + + return states +} + +func NextState(c Config, oldState database.WorkspaceAgentMonitorState, states []State) database.WorkspaceAgentMonitorState { + // If there are enough consecutive NOK states, we should be in an + // alert state. + consecutiveNOKs := slice.CountConsecutive(StateNOK, states...) + if percent(consecutiveNOKs, len(states)) >= c.Alert.ConsecutiveNOKsPercent { + return database.WorkspaceAgentMonitorStateNOK + } + + // We do not explicitly handle StateUnknown because it could have + // been either StateOK or StateNOK if collection didn't fail. As + // it could be either, our best bet is to ignore it. + nokCount, okCount := 0, 0 + for _, state := range states { + switch state { + case StateOK: + okCount++ + case StateNOK: + nokCount++ + } + } + + // If there are enough NOK datapoints, we should be in an alert state. + if percent(nokCount, len(states)) >= c.Alert.MinimumNOKsPercent { + return database.WorkspaceAgentMonitorStateNOK + } + + // If all datapoints are OK, we should be in an OK state + if okCount == len(states) { + return database.WorkspaceAgentMonitorStateOK + } + + // Otherwise we stay in the same state as last. + return oldState +} + +func percent[T int](numerator, denominator T) int { + percent := float64(numerator*100) / float64(denominator) + return int(math.Round(percent)) +} diff --git a/coderd/database/dbauthz/dbauthz.go b/coderd/database/dbauthz/dbauthz.go index 89a17ce580d04..9e616dd79dcbc 100644 --- a/coderd/database/dbauthz/dbauthz.go +++ b/coderd/database/dbauthz/dbauthz.go @@ -289,6 +289,24 @@ var ( Scope: rbac.ScopeAll, }.WithCachedASTValue() + subjectResourceMonitor = rbac.Subject{ + FriendlyName: "Resource Monitor", + ID: uuid.Nil.String(), + Roles: rbac.Roles([]rbac.Role{ + { + Identifier: rbac.RoleIdentifier{Name: "resourcemonitor"}, + DisplayName: "Resource Monitor", + Site: rbac.Permissions(map[string][]policy.Action{ + // The workspace monitor needs to be able to update monitors + rbac.ResourceWorkspaceAgentResourceMonitor.Type: {policy.ActionUpdate}, + }), + Org: map[string][]rbac.Permission{}, + User: []rbac.Permission{}, + }, + }), + Scope: rbac.ScopeAll, + }.WithCachedASTValue() + subjectSystemRestricted = rbac.Subject{ FriendlyName: "System", ID: uuid.Nil.String(), @@ -376,6 +394,12 @@ func AsNotifier(ctx context.Context) context.Context { return context.WithValue(ctx, authContextKey{}, subjectNotifier) } +// AsResourceMonitor returns a context with an actor that has permissions required for +// updating resource monitors. +func AsResourceMonitor(ctx context.Context) context.Context { + return context.WithValue(ctx, authContextKey{}, subjectResourceMonitor) +} + // AsSystemRestricted returns a context with an actor that has permissions // required for various system operations (login, logout, metrics cache). func AsSystemRestricted(ctx context.Context) context.Context { @@ -3677,6 +3701,14 @@ func (q *querier) UpdateMemberRoles(ctx context.Context, arg database.UpdateMemb return q.db.UpdateMemberRoles(ctx, arg) } +func (q *querier) UpdateMemoryResourceMonitor(ctx context.Context, arg database.UpdateMemoryResourceMonitorParams) error { + if err := q.authorizeContext(ctx, policy.ActionUpdate, rbac.ResourceWorkspaceAgentResourceMonitor); err != nil { + return err + } + + return q.db.UpdateMemoryResourceMonitor(ctx, arg) +} + func (q *querier) UpdateNotificationTemplateMethodByID(ctx context.Context, arg database.UpdateNotificationTemplateMethodByIDParams) (database.NotificationTemplate, error) { if err := q.authorizeContext(ctx, policy.ActionUpdate, rbac.ResourceNotificationTemplate); err != nil { return database.NotificationTemplate{}, err @@ -4073,6 +4105,14 @@ func (q *querier) UpdateUserStatus(ctx context.Context, arg database.UpdateUserS return updateWithReturn(q.log, q.auth, fetch, q.db.UpdateUserStatus)(ctx, arg) } +func (q *querier) UpdateVolumeResourceMonitor(ctx context.Context, arg database.UpdateVolumeResourceMonitorParams) error { + if err := q.authorizeContext(ctx, policy.ActionUpdate, rbac.ResourceWorkspaceAgentResourceMonitor); err != nil { + return err + } + + return q.db.UpdateVolumeResourceMonitor(ctx, arg) +} + func (q *querier) UpdateWorkspace(ctx context.Context, arg database.UpdateWorkspaceParams) (database.WorkspaceTable, error) { fetch := func(ctx context.Context, arg database.UpdateWorkspaceParams) (database.WorkspaceTable, error) { w, err := q.db.GetWorkspaceByID(ctx, arg.ID) diff --git a/coderd/database/dbauthz/dbauthz_test.go b/coderd/database/dbauthz/dbauthz_test.go index 24ecf0b8eca47..3bf63c3300f13 100644 --- a/coderd/database/dbauthz/dbauthz_test.go +++ b/coderd/database/dbauthz/dbauthz_test.go @@ -4725,43 +4725,78 @@ func (s *MethodTestSuite) TestOAuth2ProviderAppTokens() { } func (s *MethodTestSuite) TestResourcesMonitor() { - s.Run("InsertMemoryResourceMonitor", s.Subtest(func(db database.Store, check *expects) { - dbtestutil.DisableForeignKeysAndTriggers(s.T(), db) - check.Args(database.InsertMemoryResourceMonitorParams{}).Asserts(rbac.ResourceWorkspaceAgentResourceMonitor, policy.ActionCreate) - })) + createAgent := func(t *testing.T, db database.Store) (database.WorkspaceAgent, database.WorkspaceTable) { + t.Helper() - s.Run("InsertVolumeResourceMonitor", s.Subtest(func(db database.Store, check *expects) { - dbtestutil.DisableForeignKeysAndTriggers(s.T(), db) - check.Args(database.InsertVolumeResourceMonitorParams{}).Asserts(rbac.ResourceWorkspaceAgentResourceMonitor, policy.ActionCreate) - })) - - s.Run("FetchMemoryResourceMonitorsByAgentID", s.Subtest(func(db database.Store, check *expects) { - u := dbgen.User(s.T(), db, database.User{}) - o := dbgen.Organization(s.T(), db, database.Organization{}) - tpl := dbgen.Template(s.T(), db, database.Template{ + u := dbgen.User(t, db, database.User{}) + o := dbgen.Organization(t, db, database.Organization{}) + tpl := dbgen.Template(t, db, database.Template{ OrganizationID: o.ID, CreatedBy: u.ID, }) - tv := dbgen.TemplateVersion(s.T(), db, database.TemplateVersion{ + tv := dbgen.TemplateVersion(t, db, database.TemplateVersion{ TemplateID: uuid.NullUUID{UUID: tpl.ID, Valid: true}, OrganizationID: o.ID, CreatedBy: u.ID, }) - w := dbgen.Workspace(s.T(), db, database.WorkspaceTable{ + w := dbgen.Workspace(t, db, database.WorkspaceTable{ TemplateID: tpl.ID, OrganizationID: o.ID, OwnerID: u.ID, }) - j := dbgen.ProvisionerJob(s.T(), db, nil, database.ProvisionerJob{ + j := dbgen.ProvisionerJob(t, db, nil, database.ProvisionerJob{ Type: database.ProvisionerJobTypeWorkspaceBuild, }) - b := dbgen.WorkspaceBuild(s.T(), db, database.WorkspaceBuild{ + b := dbgen.WorkspaceBuild(t, db, database.WorkspaceBuild{ JobID: j.ID, WorkspaceID: w.ID, TemplateVersionID: tv.ID, }) - res := dbgen.WorkspaceResource(s.T(), db, database.WorkspaceResource{JobID: b.JobID}) - agt := dbgen.WorkspaceAgent(s.T(), db, database.WorkspaceAgent{ResourceID: res.ID}) + res := dbgen.WorkspaceResource(t, db, database.WorkspaceResource{JobID: b.JobID}) + agt := dbgen.WorkspaceAgent(t, db, database.WorkspaceAgent{ResourceID: res.ID}) + + return agt, w + } + + s.Run("InsertMemoryResourceMonitor", s.Subtest(func(db database.Store, check *expects) { + agt, _ := createAgent(s.T(), db) + + check.Args(database.InsertMemoryResourceMonitorParams{ + AgentID: agt.ID, + State: database.WorkspaceAgentMonitorStateOK, + }).Asserts(rbac.ResourceWorkspaceAgentResourceMonitor, policy.ActionCreate) + })) + + s.Run("InsertVolumeResourceMonitor", s.Subtest(func(db database.Store, check *expects) { + agt, _ := createAgent(s.T(), db) + + check.Args(database.InsertVolumeResourceMonitorParams{ + AgentID: agt.ID, + State: database.WorkspaceAgentMonitorStateOK, + }).Asserts(rbac.ResourceWorkspaceAgentResourceMonitor, policy.ActionCreate) + })) + + s.Run("UpdateMemoryResourceMonitor", s.Subtest(func(db database.Store, check *expects) { + agt, _ := createAgent(s.T(), db) + + check.Args(database.UpdateMemoryResourceMonitorParams{ + AgentID: agt.ID, + State: database.WorkspaceAgentMonitorStateOK, + }).Asserts(rbac.ResourceWorkspaceAgentResourceMonitor, policy.ActionUpdate) + })) + + s.Run("UpdateVolumeResourceMonitor", s.Subtest(func(db database.Store, check *expects) { + agt, _ := createAgent(s.T(), db) + + check.Args(database.UpdateVolumeResourceMonitorParams{ + AgentID: agt.ID, + State: database.WorkspaceAgentMonitorStateOK, + }).Asserts(rbac.ResourceWorkspaceAgentResourceMonitor, policy.ActionUpdate) + })) + + s.Run("FetchMemoryResourceMonitorsByAgentID", s.Subtest(func(db database.Store, check *expects) { + agt, w := createAgent(s.T(), db) + dbgen.WorkspaceAgentMemoryResourceMonitor(s.T(), db, database.WorkspaceAgentMemoryResourceMonitor{ AgentID: agt.ID, Enabled: true, @@ -4776,32 +4811,8 @@ func (s *MethodTestSuite) TestResourcesMonitor() { })) s.Run("FetchVolumesResourceMonitorsByAgentID", s.Subtest(func(db database.Store, check *expects) { - u := dbgen.User(s.T(), db, database.User{}) - o := dbgen.Organization(s.T(), db, database.Organization{}) - tpl := dbgen.Template(s.T(), db, database.Template{ - OrganizationID: o.ID, - CreatedBy: u.ID, - }) - tv := dbgen.TemplateVersion(s.T(), db, database.TemplateVersion{ - TemplateID: uuid.NullUUID{UUID: tpl.ID, Valid: true}, - OrganizationID: o.ID, - CreatedBy: u.ID, - }) - w := dbgen.Workspace(s.T(), db, database.WorkspaceTable{ - TemplateID: tpl.ID, - OrganizationID: o.ID, - OwnerID: u.ID, - }) - j := dbgen.ProvisionerJob(s.T(), db, nil, database.ProvisionerJob{ - Type: database.ProvisionerJobTypeWorkspaceBuild, - }) - b := dbgen.WorkspaceBuild(s.T(), db, database.WorkspaceBuild{ - JobID: j.ID, - WorkspaceID: w.ID, - TemplateVersionID: tv.ID, - }) - res := dbgen.WorkspaceResource(s.T(), db, database.WorkspaceResource{JobID: b.JobID}) - agt := dbgen.WorkspaceAgent(s.T(), db, database.WorkspaceAgent{ResourceID: res.ID}) + agt, w := createAgent(s.T(), db) + dbgen.WorkspaceAgentVolumeResourceMonitor(s.T(), db, database.WorkspaceAgentVolumeResourceMonitor{ AgentID: agt.ID, Path: "/var/lib", diff --git a/coderd/database/dbgen/dbgen.go b/coderd/database/dbgen/dbgen.go index cfd360f740183..9c4ebbe8bb8ca 100644 --- a/coderd/database/dbgen/dbgen.go +++ b/coderd/database/dbgen/dbgen.go @@ -1038,10 +1038,13 @@ func OAuth2ProviderAppToken(t testing.TB, db database.Store, seed database.OAuth func WorkspaceAgentMemoryResourceMonitor(t testing.TB, db database.Store, seed database.WorkspaceAgentMemoryResourceMonitor) database.WorkspaceAgentMemoryResourceMonitor { monitor, err := db.InsertMemoryResourceMonitor(genCtx, database.InsertMemoryResourceMonitorParams{ - AgentID: takeFirst(seed.AgentID, uuid.New()), - Enabled: takeFirst(seed.Enabled, true), - Threshold: takeFirst(seed.Threshold, 100), - CreatedAt: takeFirst(seed.CreatedAt, dbtime.Now()), + AgentID: takeFirst(seed.AgentID, uuid.New()), + Enabled: takeFirst(seed.Enabled, true), + State: takeFirst(seed.State, database.WorkspaceAgentMonitorStateOK), + Threshold: takeFirst(seed.Threshold, 100), + CreatedAt: takeFirst(seed.CreatedAt, dbtime.Now()), + UpdatedAt: takeFirst(seed.UpdatedAt, dbtime.Now()), + DebouncedUntil: takeFirst(seed.DebouncedUntil, time.Time{}), }) require.NoError(t, err, "insert workspace agent memory resource monitor") return monitor @@ -1049,11 +1052,14 @@ func WorkspaceAgentMemoryResourceMonitor(t testing.TB, db database.Store, seed d func WorkspaceAgentVolumeResourceMonitor(t testing.TB, db database.Store, seed database.WorkspaceAgentVolumeResourceMonitor) database.WorkspaceAgentVolumeResourceMonitor { monitor, err := db.InsertVolumeResourceMonitor(genCtx, database.InsertVolumeResourceMonitorParams{ - AgentID: takeFirst(seed.AgentID, uuid.New()), - Path: takeFirst(seed.Path, "/"), - Enabled: takeFirst(seed.Enabled, true), - Threshold: takeFirst(seed.Threshold, 100), - CreatedAt: takeFirst(seed.CreatedAt, dbtime.Now()), + AgentID: takeFirst(seed.AgentID, uuid.New()), + Path: takeFirst(seed.Path, "/"), + Enabled: takeFirst(seed.Enabled, true), + State: takeFirst(seed.State, database.WorkspaceAgentMonitorStateOK), + Threshold: takeFirst(seed.Threshold, 100), + CreatedAt: takeFirst(seed.CreatedAt, dbtime.Now()), + UpdatedAt: takeFirst(seed.UpdatedAt, dbtime.Now()), + DebouncedUntil: takeFirst(seed.DebouncedUntil, time.Time{}), }) require.NoError(t, err, "insert workspace agent volume resource monitor") return monitor diff --git a/coderd/database/dbmem/dbmem.go b/coderd/database/dbmem/dbmem.go index 808e7b1a8a16c..7f56ea5f463e5 100644 --- a/coderd/database/dbmem/dbmem.go +++ b/coderd/database/dbmem/dbmem.go @@ -7989,7 +7989,16 @@ func (q *FakeQuerier) InsertMemoryResourceMonitor(_ context.Context, arg databas q.mutex.Lock() defer q.mutex.Unlock() - monitor := database.WorkspaceAgentMemoryResourceMonitor(arg) + //nolint:unconvert // The structs field-order differs so this is needed. + monitor := database.WorkspaceAgentMemoryResourceMonitor(database.WorkspaceAgentMemoryResourceMonitor{ + AgentID: arg.AgentID, + Enabled: arg.Enabled, + State: arg.State, + Threshold: arg.Threshold, + CreatedAt: arg.CreatedAt, + UpdatedAt: arg.UpdatedAt, + DebouncedUntil: arg.DebouncedUntil, + }) q.workspaceAgentMemoryResourceMonitors = append(q.workspaceAgentMemoryResourceMonitors, monitor) return monitor, nil @@ -8676,11 +8685,14 @@ func (q *FakeQuerier) InsertVolumeResourceMonitor(_ context.Context, arg databas defer q.mutex.Unlock() monitor := database.WorkspaceAgentVolumeResourceMonitor{ - AgentID: arg.AgentID, - Path: arg.Path, - Enabled: arg.Enabled, - Threshold: arg.Threshold, - CreatedAt: arg.CreatedAt, + AgentID: arg.AgentID, + Path: arg.Path, + Enabled: arg.Enabled, + State: arg.State, + Threshold: arg.Threshold, + CreatedAt: arg.CreatedAt, + UpdatedAt: arg.UpdatedAt, + DebouncedUntil: arg.DebouncedUntil, } q.workspaceAgentVolumeResourceMonitors = append(q.workspaceAgentVolumeResourceMonitors, monitor) @@ -9691,6 +9703,30 @@ func (q *FakeQuerier) UpdateMemberRoles(_ context.Context, arg database.UpdateMe return database.OrganizationMember{}, sql.ErrNoRows } +func (q *FakeQuerier) UpdateMemoryResourceMonitor(_ context.Context, arg database.UpdateMemoryResourceMonitorParams) error { + err := validateDatabaseType(arg) + if err != nil { + return err + } + + q.mutex.Lock() + defer q.mutex.Unlock() + + for i, monitor := range q.workspaceAgentMemoryResourceMonitors { + if monitor.AgentID != arg.AgentID { + continue + } + + monitor.State = arg.State + monitor.UpdatedAt = arg.UpdatedAt + monitor.DebouncedUntil = arg.DebouncedUntil + q.workspaceAgentMemoryResourceMonitors[i] = monitor + return nil + } + + return nil +} + func (*FakeQuerier) UpdateNotificationTemplateMethodByID(_ context.Context, _ database.UpdateNotificationTemplateMethodByIDParams) (database.NotificationTemplate, error) { // Not implementing this function because it relies on state in the database which is created with migrations. // We could consider using code-generation to align the database state and dbmem, but it's not worth it right now. @@ -10469,6 +10505,30 @@ func (q *FakeQuerier) UpdateUserStatus(_ context.Context, arg database.UpdateUse return database.User{}, sql.ErrNoRows } +func (q *FakeQuerier) UpdateVolumeResourceMonitor(_ context.Context, arg database.UpdateVolumeResourceMonitorParams) error { + err := validateDatabaseType(arg) + if err != nil { + return err + } + + q.mutex.Lock() + defer q.mutex.Unlock() + + for i, monitor := range q.workspaceAgentVolumeResourceMonitors { + if monitor.AgentID != arg.AgentID || monitor.Path != arg.Path { + continue + } + + monitor.State = arg.State + monitor.UpdatedAt = arg.UpdatedAt + monitor.DebouncedUntil = arg.DebouncedUntil + q.workspaceAgentVolumeResourceMonitors[i] = monitor + return nil + } + + return nil +} + func (q *FakeQuerier) UpdateWorkspace(_ context.Context, arg database.UpdateWorkspaceParams) (database.WorkspaceTable, error) { if err := validateDatabaseType(arg); err != nil { return database.WorkspaceTable{}, err diff --git a/coderd/database/dbmetrics/querymetrics.go b/coderd/database/dbmetrics/querymetrics.go index fc84f556aabfb..665c10658a5bc 100644 --- a/coderd/database/dbmetrics/querymetrics.go +++ b/coderd/database/dbmetrics/querymetrics.go @@ -2331,6 +2331,13 @@ func (m queryMetricsStore) UpdateMemberRoles(ctx context.Context, arg database.U return member, err } +func (m queryMetricsStore) UpdateMemoryResourceMonitor(ctx context.Context, arg database.UpdateMemoryResourceMonitorParams) error { + start := time.Now() + r0 := m.s.UpdateMemoryResourceMonitor(ctx, arg) + m.queryLatencies.WithLabelValues("UpdateMemoryResourceMonitor").Observe(time.Since(start).Seconds()) + return r0 +} + func (m queryMetricsStore) UpdateNotificationTemplateMethodByID(ctx context.Context, arg database.UpdateNotificationTemplateMethodByIDParams) (database.NotificationTemplate, error) { start := time.Now() r0, r1 := m.s.UpdateNotificationTemplateMethodByID(ctx, arg) @@ -2569,6 +2576,13 @@ func (m queryMetricsStore) UpdateUserStatus(ctx context.Context, arg database.Up return user, err } +func (m queryMetricsStore) UpdateVolumeResourceMonitor(ctx context.Context, arg database.UpdateVolumeResourceMonitorParams) error { + start := time.Now() + r0 := m.s.UpdateVolumeResourceMonitor(ctx, arg) + m.queryLatencies.WithLabelValues("UpdateVolumeResourceMonitor").Observe(time.Since(start).Seconds()) + return r0 +} + func (m queryMetricsStore) UpdateWorkspace(ctx context.Context, arg database.UpdateWorkspaceParams) (database.WorkspaceTable, error) { start := time.Now() workspace, err := m.s.UpdateWorkspace(ctx, arg) diff --git a/coderd/database/dbmock/dbmock.go b/coderd/database/dbmock/dbmock.go index d51631316a3cd..c7711505d7d51 100644 --- a/coderd/database/dbmock/dbmock.go +++ b/coderd/database/dbmock/dbmock.go @@ -4965,6 +4965,20 @@ func (mr *MockStoreMockRecorder) UpdateMemberRoles(ctx, arg any) *gomock.Call { return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "UpdateMemberRoles", reflect.TypeOf((*MockStore)(nil).UpdateMemberRoles), ctx, arg) } +// UpdateMemoryResourceMonitor mocks base method. +func (m *MockStore) UpdateMemoryResourceMonitor(ctx context.Context, arg database.UpdateMemoryResourceMonitorParams) error { + m.ctrl.T.Helper() + ret := m.ctrl.Call(m, "UpdateMemoryResourceMonitor", ctx, arg) + ret0, _ := ret[0].(error) + return ret0 +} + +// UpdateMemoryResourceMonitor indicates an expected call of UpdateMemoryResourceMonitor. +func (mr *MockStoreMockRecorder) UpdateMemoryResourceMonitor(ctx, arg any) *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "UpdateMemoryResourceMonitor", reflect.TypeOf((*MockStore)(nil).UpdateMemoryResourceMonitor), ctx, arg) +} + // UpdateNotificationTemplateMethodByID mocks base method. func (m *MockStore) UpdateNotificationTemplateMethodByID(ctx context.Context, arg database.UpdateNotificationTemplateMethodByIDParams) (database.NotificationTemplate, error) { m.ctrl.T.Helper() @@ -5456,6 +5470,20 @@ func (mr *MockStoreMockRecorder) UpdateUserStatus(ctx, arg any) *gomock.Call { return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "UpdateUserStatus", reflect.TypeOf((*MockStore)(nil).UpdateUserStatus), ctx, arg) } +// UpdateVolumeResourceMonitor mocks base method. +func (m *MockStore) UpdateVolumeResourceMonitor(ctx context.Context, arg database.UpdateVolumeResourceMonitorParams) error { + m.ctrl.T.Helper() + ret := m.ctrl.Call(m, "UpdateVolumeResourceMonitor", ctx, arg) + ret0, _ := ret[0].(error) + return ret0 +} + +// UpdateVolumeResourceMonitor indicates an expected call of UpdateVolumeResourceMonitor. +func (mr *MockStoreMockRecorder) UpdateVolumeResourceMonitor(ctx, arg any) *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "UpdateVolumeResourceMonitor", reflect.TypeOf((*MockStore)(nil).UpdateVolumeResourceMonitor), ctx, arg) +} + // UpdateWorkspace mocks base method. func (m *MockStore) UpdateWorkspace(ctx context.Context, arg database.UpdateWorkspaceParams) (database.WorkspaceTable, error) { m.ctrl.T.Helper() diff --git a/coderd/database/dump.sql b/coderd/database/dump.sql index 44bf68a36eb40..e699b34bd5433 100644 --- a/coderd/database/dump.sql +++ b/coderd/database/dump.sql @@ -244,6 +244,11 @@ CREATE TYPE workspace_agent_lifecycle_state AS ENUM ( 'off' ); +CREATE TYPE workspace_agent_monitor_state AS ENUM ( + 'OK', + 'NOK' +); + CREATE TYPE workspace_agent_script_timing_stage AS ENUM ( 'start', 'stop', @@ -1510,7 +1515,10 @@ CREATE TABLE workspace_agent_memory_resource_monitors ( agent_id uuid NOT NULL, enabled boolean NOT NULL, threshold integer NOT NULL, - created_at timestamp with time zone NOT NULL + created_at timestamp with time zone NOT NULL, + updated_at timestamp with time zone DEFAULT CURRENT_TIMESTAMP NOT NULL, + state workspace_agent_monitor_state DEFAULT 'OK'::workspace_agent_monitor_state NOT NULL, + debounced_until timestamp with time zone DEFAULT '0001-01-01 00:00:00+00'::timestamp with time zone NOT NULL ); CREATE UNLOGGED TABLE workspace_agent_metadata ( @@ -1595,7 +1603,10 @@ CREATE TABLE workspace_agent_volume_resource_monitors ( enabled boolean NOT NULL, threshold integer NOT NULL, path text NOT NULL, - created_at timestamp with time zone NOT NULL + created_at timestamp with time zone NOT NULL, + updated_at timestamp with time zone DEFAULT CURRENT_TIMESTAMP NOT NULL, + state workspace_agent_monitor_state DEFAULT 'OK'::workspace_agent_monitor_state NOT NULL, + debounced_until timestamp with time zone DEFAULT '0001-01-01 00:00:00+00'::timestamp with time zone NOT NULL ); CREATE TABLE workspace_agents ( diff --git a/coderd/database/migrations/000294_workspace_monitors_state.down.sql b/coderd/database/migrations/000294_workspace_monitors_state.down.sql new file mode 100644 index 0000000000000..c3c6ce7c614ac --- /dev/null +++ b/coderd/database/migrations/000294_workspace_monitors_state.down.sql @@ -0,0 +1,11 @@ +ALTER TABLE workspace_agent_volume_resource_monitors + DROP COLUMN updated_at, + DROP COLUMN state, + DROP COLUMN debounced_until; + +ALTER TABLE workspace_agent_memory_resource_monitors + DROP COLUMN updated_at, + DROP COLUMN state, + DROP COLUMN debounced_until; + +DROP TYPE workspace_agent_monitor_state; diff --git a/coderd/database/migrations/000294_workspace_monitors_state.up.sql b/coderd/database/migrations/000294_workspace_monitors_state.up.sql new file mode 100644 index 0000000000000..a6b1f7609d7da --- /dev/null +++ b/coderd/database/migrations/000294_workspace_monitors_state.up.sql @@ -0,0 +1,14 @@ +CREATE TYPE workspace_agent_monitor_state AS ENUM ( + 'OK', + 'NOK' +); + +ALTER TABLE workspace_agent_memory_resource_monitors + ADD COLUMN updated_at timestamp with time zone NOT NULL DEFAULT CURRENT_TIMESTAMP, + ADD COLUMN state workspace_agent_monitor_state NOT NULL DEFAULT 'OK', + ADD COLUMN debounced_until timestamp with time zone NOT NULL DEFAULT '0001-01-01 00:00:00'::timestamptz; + +ALTER TABLE workspace_agent_volume_resource_monitors + ADD COLUMN updated_at timestamp with time zone NOT NULL DEFAULT CURRENT_TIMESTAMP, + ADD COLUMN state workspace_agent_monitor_state NOT NULL DEFAULT 'OK', + ADD COLUMN debounced_until timestamp with time zone NOT NULL DEFAULT '0001-01-01 00:00:00'::timestamptz; diff --git a/coderd/database/modelmethods.go b/coderd/database/modelmethods.go index 63e03ccb27f40..171c0454563de 100644 --- a/coderd/database/modelmethods.go +++ b/coderd/database/modelmethods.go @@ -527,3 +527,31 @@ func (k CryptoKey) CanVerify(now time.Time) bool { func (r GetProvisionerJobsByOrganizationAndStatusWithQueuePositionAndProvisionerRow) RBACObject() rbac.Object { return r.ProvisionerJob.RBACObject() } + +func (m WorkspaceAgentMemoryResourceMonitor) Debounce( + by time.Duration, + now time.Time, + oldState, newState WorkspaceAgentMonitorState, +) (time.Time, bool) { + if now.After(m.DebouncedUntil) && + oldState == WorkspaceAgentMonitorStateOK && + newState == WorkspaceAgentMonitorStateNOK { + return now.Add(by), true + } + + return m.DebouncedUntil, false +} + +func (m WorkspaceAgentVolumeResourceMonitor) Debounce( + by time.Duration, + now time.Time, + oldState, newState WorkspaceAgentMonitorState, +) (debouncedUntil time.Time, shouldNotify bool) { + if now.After(m.DebouncedUntil) && + oldState == WorkspaceAgentMonitorStateOK && + newState == WorkspaceAgentMonitorStateNOK { + return now.Add(by), true + } + + return m.DebouncedUntil, false +} diff --git a/coderd/database/models.go b/coderd/database/models.go index 9ddcba7897699..5411591eed51c 100644 --- a/coderd/database/models.go +++ b/coderd/database/models.go @@ -1976,6 +1976,64 @@ func AllWorkspaceAgentLifecycleStateValues() []WorkspaceAgentLifecycleState { } } +type WorkspaceAgentMonitorState string + +const ( + WorkspaceAgentMonitorStateOK WorkspaceAgentMonitorState = "OK" + WorkspaceAgentMonitorStateNOK WorkspaceAgentMonitorState = "NOK" +) + +func (e *WorkspaceAgentMonitorState) Scan(src interface{}) error { + switch s := src.(type) { + case []byte: + *e = WorkspaceAgentMonitorState(s) + case string: + *e = WorkspaceAgentMonitorState(s) + default: + return fmt.Errorf("unsupported scan type for WorkspaceAgentMonitorState: %T", src) + } + return nil +} + +type NullWorkspaceAgentMonitorState struct { + WorkspaceAgentMonitorState WorkspaceAgentMonitorState `json:"workspace_agent_monitor_state"` + Valid bool `json:"valid"` // Valid is true if WorkspaceAgentMonitorState is not NULL +} + +// Scan implements the Scanner interface. +func (ns *NullWorkspaceAgentMonitorState) Scan(value interface{}) error { + if value == nil { + ns.WorkspaceAgentMonitorState, ns.Valid = "", false + return nil + } + ns.Valid = true + return ns.WorkspaceAgentMonitorState.Scan(value) +} + +// Value implements the driver Valuer interface. +func (ns NullWorkspaceAgentMonitorState) Value() (driver.Value, error) { + if !ns.Valid { + return nil, nil + } + return string(ns.WorkspaceAgentMonitorState), nil +} + +func (e WorkspaceAgentMonitorState) Valid() bool { + switch e { + case WorkspaceAgentMonitorStateOK, + WorkspaceAgentMonitorStateNOK: + return true + } + return false +} + +func AllWorkspaceAgentMonitorStateValues() []WorkspaceAgentMonitorState { + return []WorkspaceAgentMonitorState{ + WorkspaceAgentMonitorStateOK, + WorkspaceAgentMonitorStateNOK, + } +} + // What stage the script was ran in. type WorkspaceAgentScriptTimingStage string @@ -3185,10 +3243,13 @@ type WorkspaceAgentLogSource struct { } type WorkspaceAgentMemoryResourceMonitor struct { - AgentID uuid.UUID `db:"agent_id" json:"agent_id"` - Enabled bool `db:"enabled" json:"enabled"` - Threshold int32 `db:"threshold" json:"threshold"` - CreatedAt time.Time `db:"created_at" json:"created_at"` + AgentID uuid.UUID `db:"agent_id" json:"agent_id"` + Enabled bool `db:"enabled" json:"enabled"` + Threshold int32 `db:"threshold" json:"threshold"` + CreatedAt time.Time `db:"created_at" json:"created_at"` + UpdatedAt time.Time `db:"updated_at" json:"updated_at"` + State WorkspaceAgentMonitorState `db:"state" json:"state"` + DebouncedUntil time.Time `db:"debounced_until" json:"debounced_until"` } type WorkspaceAgentMetadatum struct { @@ -3259,11 +3320,14 @@ type WorkspaceAgentStat struct { } type WorkspaceAgentVolumeResourceMonitor struct { - AgentID uuid.UUID `db:"agent_id" json:"agent_id"` - Enabled bool `db:"enabled" json:"enabled"` - Threshold int32 `db:"threshold" json:"threshold"` - Path string `db:"path" json:"path"` - CreatedAt time.Time `db:"created_at" json:"created_at"` + AgentID uuid.UUID `db:"agent_id" json:"agent_id"` + Enabled bool `db:"enabled" json:"enabled"` + Threshold int32 `db:"threshold" json:"threshold"` + Path string `db:"path" json:"path"` + CreatedAt time.Time `db:"created_at" json:"created_at"` + UpdatedAt time.Time `db:"updated_at" json:"updated_at"` + State WorkspaceAgentMonitorState `db:"state" json:"state"` + DebouncedUntil time.Time `db:"debounced_until" json:"debounced_until"` } type WorkspaceApp struct { diff --git a/coderd/database/querier.go b/coderd/database/querier.go index 31c4a18a5808a..42b88d855e4c3 100644 --- a/coderd/database/querier.go +++ b/coderd/database/querier.go @@ -480,6 +480,7 @@ type sqlcQuerier interface { UpdateGroupByID(ctx context.Context, arg UpdateGroupByIDParams) (Group, error) UpdateInactiveUsersToDormant(ctx context.Context, arg UpdateInactiveUsersToDormantParams) ([]UpdateInactiveUsersToDormantRow, error) UpdateMemberRoles(ctx context.Context, arg UpdateMemberRolesParams) (OrganizationMember, error) + UpdateMemoryResourceMonitor(ctx context.Context, arg UpdateMemoryResourceMonitorParams) error UpdateNotificationTemplateMethodByID(ctx context.Context, arg UpdateNotificationTemplateMethodByIDParams) (NotificationTemplate, error) UpdateOAuth2ProviderAppByID(ctx context.Context, arg UpdateOAuth2ProviderAppByIDParams) (OAuth2ProviderApp, error) UpdateOAuth2ProviderAppSecretByID(ctx context.Context, arg UpdateOAuth2ProviderAppSecretByIDParams) (OAuth2ProviderAppSecret, error) @@ -514,6 +515,7 @@ type sqlcQuerier interface { UpdateUserQuietHoursSchedule(ctx context.Context, arg UpdateUserQuietHoursScheduleParams) (User, error) UpdateUserRoles(ctx context.Context, arg UpdateUserRolesParams) (User, error) UpdateUserStatus(ctx context.Context, arg UpdateUserStatusParams) (User, error) + UpdateVolumeResourceMonitor(ctx context.Context, arg UpdateVolumeResourceMonitorParams) error UpdateWorkspace(ctx context.Context, arg UpdateWorkspaceParams) (WorkspaceTable, error) UpdateWorkspaceAgentConnectionByID(ctx context.Context, arg UpdateWorkspaceAgentConnectionByIDParams) error UpdateWorkspaceAgentLifecycleStateByID(ctx context.Context, arg UpdateWorkspaceAgentLifecycleStateByIDParams) error diff --git a/coderd/database/queries.sql.go b/coderd/database/queries.sql.go index dc9b04c2244f0..58722dc152005 100644 --- a/coderd/database/queries.sql.go +++ b/coderd/database/queries.sql.go @@ -12044,7 +12044,7 @@ func (q *sqlQuerier) UpsertWorkspaceAgentPortShare(ctx context.Context, arg Upse const fetchMemoryResourceMonitorsByAgentID = `-- name: FetchMemoryResourceMonitorsByAgentID :one SELECT - agent_id, enabled, threshold, created_at + agent_id, enabled, threshold, created_at, updated_at, state, debounced_until FROM workspace_agent_memory_resource_monitors WHERE @@ -12059,13 +12059,16 @@ func (q *sqlQuerier) FetchMemoryResourceMonitorsByAgentID(ctx context.Context, a &i.Enabled, &i.Threshold, &i.CreatedAt, + &i.UpdatedAt, + &i.State, + &i.DebouncedUntil, ) return i, err } const fetchVolumesResourceMonitorsByAgentID = `-- name: FetchVolumesResourceMonitorsByAgentID :many SELECT - agent_id, enabled, threshold, path, created_at + agent_id, enabled, threshold, path, created_at, updated_at, state, debounced_until FROM workspace_agent_volume_resource_monitors WHERE @@ -12087,6 +12090,9 @@ func (q *sqlQuerier) FetchVolumesResourceMonitorsByAgentID(ctx context.Context, &i.Threshold, &i.Path, &i.CreatedAt, + &i.UpdatedAt, + &i.State, + &i.DebouncedUntil, ); err != nil { return nil, err } @@ -12106,26 +12112,35 @@ INSERT INTO workspace_agent_memory_resource_monitors ( agent_id, enabled, + state, threshold, - created_at + created_at, + updated_at, + debounced_until ) VALUES - ($1, $2, $3, $4) RETURNING agent_id, enabled, threshold, created_at + ($1, $2, $3, $4, $5, $6, $7) RETURNING agent_id, enabled, threshold, created_at, updated_at, state, debounced_until ` type InsertMemoryResourceMonitorParams struct { - AgentID uuid.UUID `db:"agent_id" json:"agent_id"` - Enabled bool `db:"enabled" json:"enabled"` - Threshold int32 `db:"threshold" json:"threshold"` - CreatedAt time.Time `db:"created_at" json:"created_at"` + AgentID uuid.UUID `db:"agent_id" json:"agent_id"` + Enabled bool `db:"enabled" json:"enabled"` + State WorkspaceAgentMonitorState `db:"state" json:"state"` + Threshold int32 `db:"threshold" json:"threshold"` + CreatedAt time.Time `db:"created_at" json:"created_at"` + UpdatedAt time.Time `db:"updated_at" json:"updated_at"` + DebouncedUntil time.Time `db:"debounced_until" json:"debounced_until"` } func (q *sqlQuerier) InsertMemoryResourceMonitor(ctx context.Context, arg InsertMemoryResourceMonitorParams) (WorkspaceAgentMemoryResourceMonitor, error) { row := q.db.QueryRowContext(ctx, insertMemoryResourceMonitor, arg.AgentID, arg.Enabled, + arg.State, arg.Threshold, arg.CreatedAt, + arg.UpdatedAt, + arg.DebouncedUntil, ) var i WorkspaceAgentMemoryResourceMonitor err := row.Scan( @@ -12133,6 +12148,9 @@ func (q *sqlQuerier) InsertMemoryResourceMonitor(ctx context.Context, arg Insert &i.Enabled, &i.Threshold, &i.CreatedAt, + &i.UpdatedAt, + &i.State, + &i.DebouncedUntil, ) return i, err } @@ -12143,19 +12161,25 @@ INSERT INTO agent_id, path, enabled, + state, threshold, - created_at + created_at, + updated_at, + debounced_until ) VALUES - ($1, $2, $3, $4, $5) RETURNING agent_id, enabled, threshold, path, created_at + ($1, $2, $3, $4, $5, $6, $7, $8) RETURNING agent_id, enabled, threshold, path, created_at, updated_at, state, debounced_until ` type InsertVolumeResourceMonitorParams struct { - AgentID uuid.UUID `db:"agent_id" json:"agent_id"` - Path string `db:"path" json:"path"` - Enabled bool `db:"enabled" json:"enabled"` - Threshold int32 `db:"threshold" json:"threshold"` - CreatedAt time.Time `db:"created_at" json:"created_at"` + AgentID uuid.UUID `db:"agent_id" json:"agent_id"` + Path string `db:"path" json:"path"` + Enabled bool `db:"enabled" json:"enabled"` + State WorkspaceAgentMonitorState `db:"state" json:"state"` + Threshold int32 `db:"threshold" json:"threshold"` + CreatedAt time.Time `db:"created_at" json:"created_at"` + UpdatedAt time.Time `db:"updated_at" json:"updated_at"` + DebouncedUntil time.Time `db:"debounced_until" json:"debounced_until"` } func (q *sqlQuerier) InsertVolumeResourceMonitor(ctx context.Context, arg InsertVolumeResourceMonitorParams) (WorkspaceAgentVolumeResourceMonitor, error) { @@ -12163,8 +12187,11 @@ func (q *sqlQuerier) InsertVolumeResourceMonitor(ctx context.Context, arg Insert arg.AgentID, arg.Path, arg.Enabled, + arg.State, arg.Threshold, arg.CreatedAt, + arg.UpdatedAt, + arg.DebouncedUntil, ) var i WorkspaceAgentVolumeResourceMonitor err := row.Scan( @@ -12173,10 +12200,69 @@ func (q *sqlQuerier) InsertVolumeResourceMonitor(ctx context.Context, arg Insert &i.Threshold, &i.Path, &i.CreatedAt, + &i.UpdatedAt, + &i.State, + &i.DebouncedUntil, ) return i, err } +const updateMemoryResourceMonitor = `-- name: UpdateMemoryResourceMonitor :exec +UPDATE workspace_agent_memory_resource_monitors +SET + updated_at = $2, + state = $3, + debounced_until = $4 +WHERE + agent_id = $1 +` + +type UpdateMemoryResourceMonitorParams struct { + AgentID uuid.UUID `db:"agent_id" json:"agent_id"` + UpdatedAt time.Time `db:"updated_at" json:"updated_at"` + State WorkspaceAgentMonitorState `db:"state" json:"state"` + DebouncedUntil time.Time `db:"debounced_until" json:"debounced_until"` +} + +func (q *sqlQuerier) UpdateMemoryResourceMonitor(ctx context.Context, arg UpdateMemoryResourceMonitorParams) error { + _, err := q.db.ExecContext(ctx, updateMemoryResourceMonitor, + arg.AgentID, + arg.UpdatedAt, + arg.State, + arg.DebouncedUntil, + ) + return err +} + +const updateVolumeResourceMonitor = `-- name: UpdateVolumeResourceMonitor :exec +UPDATE workspace_agent_volume_resource_monitors +SET + updated_at = $3, + state = $4, + debounced_until = $5 +WHERE + agent_id = $1 AND path = $2 +` + +type UpdateVolumeResourceMonitorParams struct { + AgentID uuid.UUID `db:"agent_id" json:"agent_id"` + Path string `db:"path" json:"path"` + UpdatedAt time.Time `db:"updated_at" json:"updated_at"` + State WorkspaceAgentMonitorState `db:"state" json:"state"` + DebouncedUntil time.Time `db:"debounced_until" json:"debounced_until"` +} + +func (q *sqlQuerier) UpdateVolumeResourceMonitor(ctx context.Context, arg UpdateVolumeResourceMonitorParams) error { + _, err := q.db.ExecContext(ctx, updateVolumeResourceMonitor, + arg.AgentID, + arg.Path, + arg.UpdatedAt, + arg.State, + arg.DebouncedUntil, + ) + return err +} + const deleteOldWorkspaceAgentLogs = `-- name: DeleteOldWorkspaceAgentLogs :exec WITH latest_builds AS ( diff --git a/coderd/database/queries/workspaceagentresourcemonitors.sql b/coderd/database/queries/workspaceagentresourcemonitors.sql index e70ef85f3cbd5..84ee5c67b37ef 100644 --- a/coderd/database/queries/workspaceagentresourcemonitors.sql +++ b/coderd/database/queries/workspaceagentresourcemonitors.sql @@ -19,11 +19,14 @@ INSERT INTO workspace_agent_memory_resource_monitors ( agent_id, enabled, + state, threshold, - created_at + created_at, + updated_at, + debounced_until ) VALUES - ($1, $2, $3, $4) RETURNING *; + ($1, $2, $3, $4, $5, $6, $7) RETURNING *; -- name: InsertVolumeResourceMonitor :one INSERT INTO @@ -31,8 +34,29 @@ INSERT INTO agent_id, path, enabled, + state, threshold, - created_at + created_at, + updated_at, + debounced_until ) VALUES - ($1, $2, $3, $4, $5) RETURNING *; + ($1, $2, $3, $4, $5, $6, $7, $8) RETURNING *; + +-- name: UpdateMemoryResourceMonitor :exec +UPDATE workspace_agent_memory_resource_monitors +SET + updated_at = $2, + state = $3, + debounced_until = $4 +WHERE + agent_id = $1; + +-- name: UpdateVolumeResourceMonitor :exec +UPDATE workspace_agent_volume_resource_monitors +SET + updated_at = $3, + state = $4, + debounced_until = $5 +WHERE + agent_id = $1 AND path = $2; diff --git a/coderd/provisionerdserver/provisionerdserver.go b/coderd/provisionerdserver/provisionerdserver.go index 2a58aa421f1c8..b928be1b52481 100644 --- a/coderd/provisionerdserver/provisionerdserver.go +++ b/coderd/provisionerdserver/provisionerdserver.go @@ -1981,10 +1981,13 @@ func InsertWorkspaceResource(ctx context.Context, db database.Store, jobID uuid. if prAgent.ResourcesMonitoring != nil { if prAgent.ResourcesMonitoring.Memory != nil { _, err = db.InsertMemoryResourceMonitor(ctx, database.InsertMemoryResourceMonitorParams{ - AgentID: agentID, - Enabled: prAgent.ResourcesMonitoring.Memory.Enabled, - Threshold: prAgent.ResourcesMonitoring.Memory.Threshold, - CreatedAt: dbtime.Now(), + AgentID: agentID, + Enabled: prAgent.ResourcesMonitoring.Memory.Enabled, + Threshold: prAgent.ResourcesMonitoring.Memory.Threshold, + State: database.WorkspaceAgentMonitorStateOK, + CreatedAt: dbtime.Now(), + UpdatedAt: dbtime.Now(), + DebouncedUntil: time.Time{}, }) if err != nil { return xerrors.Errorf("failed to insert agent memory resource monitor into db: %w", err) @@ -1992,11 +1995,14 @@ func InsertWorkspaceResource(ctx context.Context, db database.Store, jobID uuid. } for _, volume := range prAgent.ResourcesMonitoring.Volumes { _, err = db.InsertVolumeResourceMonitor(ctx, database.InsertVolumeResourceMonitorParams{ - AgentID: agentID, - Path: volume.Path, - Enabled: volume.Enabled, - Threshold: volume.Threshold, - CreatedAt: dbtime.Now(), + AgentID: agentID, + Path: volume.Path, + Enabled: volume.Enabled, + Threshold: volume.Threshold, + State: database.WorkspaceAgentMonitorStateOK, + CreatedAt: dbtime.Now(), + UpdatedAt: dbtime.Now(), + DebouncedUntil: time.Time{}, }) if err != nil { return xerrors.Errorf("failed to insert agent volume resource monitor into db: %w", err) diff --git a/coderd/rbac/object_gen.go b/coderd/rbac/object_gen.go index 547e10859b5b7..e5323225120b5 100644 --- a/coderd/rbac/object_gen.go +++ b/coderd/rbac/object_gen.go @@ -299,6 +299,7 @@ var ( // Valid Actions // - "ActionCreate" :: create workspace agent resource monitor // - "ActionRead" :: read workspace agent resource monitor + // - "ActionUpdate" :: update workspace agent resource monitor ResourceWorkspaceAgentResourceMonitor = Object{ Type: "workspace_agent_resource_monitor", } diff --git a/coderd/rbac/policy/policy.go b/coderd/rbac/policy/policy.go index 6dc64f6660248..c06a2117cb4e9 100644 --- a/coderd/rbac/policy/policy.go +++ b/coderd/rbac/policy/policy.go @@ -306,6 +306,7 @@ var RBACPermissions = map[string]PermissionDefinition{ Actions: map[Action]ActionDefinition{ ActionRead: actDef("read workspace agent resource monitor"), ActionCreate: actDef("create workspace agent resource monitor"), + ActionUpdate: actDef("update workspace agent resource monitor"), }, }, } diff --git a/coderd/rbac/roles_test.go b/coderd/rbac/roles_test.go index 6db591d028454..db0d9832579fc 100644 --- a/coderd/rbac/roles_test.go +++ b/coderd/rbac/roles_test.go @@ -779,7 +779,7 @@ func TestRolePermissions(t *testing.T) { }, { Name: "ResourceMonitor", - Actions: []policy.Action{policy.ActionRead, policy.ActionCreate}, + Actions: []policy.Action{policy.ActionRead, policy.ActionCreate, policy.ActionUpdate}, Resource: rbac.ResourceWorkspaceAgentResourceMonitor, AuthorizeMap: map[bool][]hasAuthSubjects{ true: {owner}, diff --git a/coderd/util/slice/slice.go b/coderd/util/slice/slice.go index 2a62e23592d84..508827dfaae81 100644 --- a/coderd/util/slice/slice.go +++ b/coderd/util/slice/slice.go @@ -177,3 +177,19 @@ func DifferenceFunc[T any](a []T, b []T, equal func(a, b T) bool) []T { } return tmp } + +func CountConsecutive[T comparable](needle T, haystack ...T) int { + maxLength := 0 + curLength := 0 + + for _, v := range haystack { + if v == needle { + curLength++ + } else { + maxLength = max(maxLength, curLength) + curLength = 0 + } + } + + return max(maxLength, curLength) +} diff --git a/coderd/workspaceagentsrpc.go b/coderd/workspaceagentsrpc.go index cbb3a1bc44b8a..c794c9c14349b 100644 --- a/coderd/workspaceagentsrpc.go +++ b/coderd/workspaceagentsrpc.go @@ -143,7 +143,9 @@ func (api *API) workspaceAgentRPC(rw http.ResponseWriter, r *http.Request) { Ctx: api.ctx, Log: logger, + Clock: api.Clock, Database: api.Database, + NotificationsEnqueuer: api.NotificationsEnqueuer, Pubsub: api.Pubsub, DerpMapFn: api.DERPMap, TailnetCoordinator: &api.TailnetCoordinator, diff --git a/codersdk/rbacresources_gen.go b/codersdk/rbacresources_gen.go index 8afb1858ca15c..f4d7790d40b76 100644 --- a/codersdk/rbacresources_gen.go +++ b/codersdk/rbacresources_gen.go @@ -92,7 +92,7 @@ var RBACResourceActions = map[RBACResource][]RBACAction{ ResourceTemplate: {ActionCreate, ActionDelete, ActionRead, ActionUpdate, ActionUse, ActionViewInsights}, ResourceUser: {ActionCreate, ActionDelete, ActionRead, ActionReadPersonal, ActionUpdate, ActionUpdatePersonal}, ResourceWorkspace: {ActionApplicationConnect, ActionCreate, ActionDelete, ActionRead, ActionSSH, ActionWorkspaceStart, ActionWorkspaceStop, ActionUpdate}, - ResourceWorkspaceAgentResourceMonitor: {ActionCreate, ActionRead}, + ResourceWorkspaceAgentResourceMonitor: {ActionCreate, ActionRead, ActionUpdate}, ResourceWorkspaceDormant: {ActionApplicationConnect, ActionCreate, ActionDelete, ActionRead, ActionSSH, ActionWorkspaceStart, ActionWorkspaceStop, ActionUpdate}, ResourceWorkspaceProxy: {ActionCreate, ActionDelete, ActionRead, ActionUpdate}, } diff --git a/site/src/api/rbacresourcesGenerated.ts b/site/src/api/rbacresourcesGenerated.ts index e557ceddbdda6..437f89ec776a7 100644 --- a/site/src/api/rbacresourcesGenerated.ts +++ b/site/src/api/rbacresourcesGenerated.ts @@ -171,6 +171,7 @@ export const RBACResourceActions: Partial< workspace_agent_resource_monitor: { create: "create workspace agent resource monitor", read: "read workspace agent resource monitor", + update: "update workspace agent resource monitor", }, workspace_dormant: { application_connect: "connect to workspace apps via browser",