Thanks to visit codestin.com
Credit goes to github.com

Skip to content

fix(coderd): extract provisionerdserver.StaleInterval to 90 seconds #15643

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Nov 28, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion coderd/coderd.go
Original file line number Diff line number Diff line change
Expand Up @@ -628,7 +628,8 @@ func New(options *Options) *API {
CurrentVersion: buildinfo.Version(),
CurrentAPIMajorVersion: proto.CurrentMajor,
Store: options.Database,
// TimeNow and StaleInterval set to defaults, see healthcheck/provisioner.go
StaleInterval: provisionerdserver.StaleInterval,
// TimeNow set to default, see healthcheck/provisioner.go
},
})
}
Expand Down
2 changes: 1 addition & 1 deletion coderd/healthcheck/provisioner.go
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ func (r *ProvisionerDaemonsReport) Run(ctx context.Context, opts *ProvisionerDae
now := opts.TimeNow()

if opts.StaleInterval == 0 {
opts.StaleInterval = provisionerdserver.DefaultHeartbeatInterval * 3
opts.StaleInterval = provisionerdserver.StaleInterval
}

if opts.CurrentVersion == "" {
Expand Down
106 changes: 80 additions & 26 deletions coderd/healthcheck/provisioner_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,15 +15,21 @@ import (
"github.com/coder/coder/v2/coderd/database/dbtime"
"github.com/coder/coder/v2/coderd/healthcheck"
"github.com/coder/coder/v2/coderd/healthcheck/health"
"github.com/coder/coder/v2/coderd/provisionerdserver"
"github.com/coder/coder/v2/codersdk"
"github.com/coder/coder/v2/codersdk/healthsdk"
"github.com/coder/coder/v2/provisionerd/proto"
"github.com/coder/coder/v2/testutil"
)

func TestProvisionerDaemonReport(t *testing.T) {
t.Parallel()

now := dbtime.Now()
var (
now = dbtime.Now()
oneHourAgo = now.Add(-time.Hour)
staleThreshold = now.Add(-provisionerdserver.StaleInterval).Add(-time.Second)
)

for _, tt := range []struct {
name string
Expand Down Expand Up @@ -65,7 +71,9 @@ func TestProvisionerDaemonReport(t *testing.T) {
currentVersion: "v1.2.3",
currentAPIMajorVersion: proto.CurrentMajor,
expectedSeverity: health.SeverityOK,
provisionerDaemons: []database.ProvisionerDaemon{fakeProvisionerDaemon(t, "pd-ok", "v1.2.3", "1.0", now)},
provisionerDaemons: []database.ProvisionerDaemon{
fakeProvisionerDaemon(t, withName("pd-ok"), withVersion("v1.2.3"), withAPIVersion("1.0"), withCreatedAt(now), withLastSeenAt(now)),
},
expectedItems: []healthsdk.ProvisionerDaemonsReportItem{
{
ProvisionerDaemon: codersdk.ProvisionerDaemon{
Expand All @@ -88,7 +96,9 @@ func TestProvisionerDaemonReport(t *testing.T) {
currentAPIMajorVersion: proto.CurrentMajor,
expectedSeverity: health.SeverityWarning,
expectedWarningCode: health.CodeProvisionerDaemonVersionMismatch,
provisionerDaemons: []database.ProvisionerDaemon{fakeProvisionerDaemon(t, "pd-old", "v1.1.2", "1.0", now)},
provisionerDaemons: []database.ProvisionerDaemon{
fakeProvisionerDaemon(t, withName("pd-old"), withVersion("v1.1.2"), withAPIVersion("1.0"), withCreatedAt(now), withLastSeenAt(now)),
},
expectedItems: []healthsdk.ProvisionerDaemonsReportItem{
{
ProvisionerDaemon: codersdk.ProvisionerDaemon{
Expand Down Expand Up @@ -116,7 +126,9 @@ func TestProvisionerDaemonReport(t *testing.T) {
currentAPIMajorVersion: proto.CurrentMajor,
expectedSeverity: health.SeverityError,
expectedWarningCode: health.CodeUnknown,
provisionerDaemons: []database.ProvisionerDaemon{fakeProvisionerDaemon(t, "pd-invalid-version", "invalid", "1.0", now)},
provisionerDaemons: []database.ProvisionerDaemon{
fakeProvisionerDaemon(t, withName("pd-invalid-version"), withVersion("invalid"), withAPIVersion("1.0"), withCreatedAt(now), withLastSeenAt(now)),
},
expectedItems: []healthsdk.ProvisionerDaemonsReportItem{
{
ProvisionerDaemon: codersdk.ProvisionerDaemon{
Expand Down Expand Up @@ -144,7 +156,9 @@ func TestProvisionerDaemonReport(t *testing.T) {
currentAPIMajorVersion: proto.CurrentMajor,
expectedSeverity: health.SeverityError,
expectedWarningCode: health.CodeUnknown,
provisionerDaemons: []database.ProvisionerDaemon{fakeProvisionerDaemon(t, "pd-invalid-api", "v1.2.3", "invalid", now)},
provisionerDaemons: []database.ProvisionerDaemon{
fakeProvisionerDaemon(t, withName("pd-invalid-api"), withVersion("v1.2.3"), withAPIVersion("invalid"), withCreatedAt(now), withLastSeenAt(now)),
},
expectedItems: []healthsdk.ProvisionerDaemonsReportItem{
{
ProvisionerDaemon: codersdk.ProvisionerDaemon{
Expand Down Expand Up @@ -172,7 +186,9 @@ func TestProvisionerDaemonReport(t *testing.T) {
currentAPIMajorVersion: 2,
expectedSeverity: health.SeverityWarning,
expectedWarningCode: health.CodeProvisionerDaemonAPIMajorVersionDeprecated,
provisionerDaemons: []database.ProvisionerDaemon{fakeProvisionerDaemon(t, "pd-old-api", "v2.3.4", "1.0", now)},
provisionerDaemons: []database.ProvisionerDaemon{
fakeProvisionerDaemon(t, withName("pd-old-api"), withVersion("v2.3.4"), withAPIVersion("1.0"), withCreatedAt(now), withLastSeenAt(now)),
},
expectedItems: []healthsdk.ProvisionerDaemonsReportItem{
{
ProvisionerDaemon: codersdk.ProvisionerDaemon{
Expand Down Expand Up @@ -200,7 +216,10 @@ func TestProvisionerDaemonReport(t *testing.T) {
currentAPIMajorVersion: proto.CurrentMajor,
expectedSeverity: health.SeverityWarning,
expectedWarningCode: health.CodeProvisionerDaemonVersionMismatch,
provisionerDaemons: []database.ProvisionerDaemon{fakeProvisionerDaemon(t, "pd-ok", "v1.2.3", "1.0", now), fakeProvisionerDaemon(t, "pd-old", "v1.1.2", "1.0", now)},
provisionerDaemons: []database.ProvisionerDaemon{
fakeProvisionerDaemon(t, withName("pd-ok"), withVersion("v1.2.3"), withAPIVersion("1.0"), withCreatedAt(now), withLastSeenAt(now)),
fakeProvisionerDaemon(t, withName("pd-old"), withVersion("v1.1.2"), withAPIVersion("1.0"), withCreatedAt(now), withLastSeenAt(now)),
},
expectedItems: []healthsdk.ProvisionerDaemonsReportItem{
{
ProvisionerDaemon: codersdk.ProvisionerDaemon{
Expand Down Expand Up @@ -241,7 +260,10 @@ func TestProvisionerDaemonReport(t *testing.T) {
currentAPIMajorVersion: proto.CurrentMajor,
expectedSeverity: health.SeverityWarning,
expectedWarningCode: health.CodeProvisionerDaemonVersionMismatch,
provisionerDaemons: []database.ProvisionerDaemon{fakeProvisionerDaemon(t, "pd-ok", "v1.2.3", "1.0", now), fakeProvisionerDaemon(t, "pd-new", "v2.3.4", "1.0", now)},
provisionerDaemons: []database.ProvisionerDaemon{
fakeProvisionerDaemon(t, withName("pd-ok"), withVersion("v1.2.3"), withAPIVersion("1.0"), withCreatedAt(now), withLastSeenAt(now)),
fakeProvisionerDaemon(t, withName("pd-new"), withVersion("v2.3.4"), withAPIVersion("1.0"), withCreatedAt(now), withLastSeenAt(now)),
},
expectedItems: []healthsdk.ProvisionerDaemonsReportItem{
{
ProvisionerDaemon: codersdk.ProvisionerDaemon{
Expand Down Expand Up @@ -281,7 +303,10 @@ func TestProvisionerDaemonReport(t *testing.T) {
currentVersion: "v2.3.4",
currentAPIMajorVersion: proto.CurrentMajor,
expectedSeverity: health.SeverityOK,
provisionerDaemons: []database.ProvisionerDaemon{fakeProvisionerDaemonStale(t, "pd-stale", "v1.2.3", "0.9", now.Add(-5*time.Minute), now), fakeProvisionerDaemon(t, "pd-ok", "v2.3.4", "1.0", now)},
provisionerDaemons: []database.ProvisionerDaemon{
fakeProvisionerDaemon(t, withName("pd-stale"), withVersion("v1.2.3"), withAPIVersion("0.9"), withCreatedAt(oneHourAgo), withLastSeenAt(staleThreshold)),
fakeProvisionerDaemon(t, withName("pd-ok"), withVersion("v2.3.4"), withAPIVersion("1.0"), withCreatedAt(now), withLastSeenAt(now)),
},
expectedItems: []healthsdk.ProvisionerDaemonsReportItem{
{
ProvisionerDaemon: codersdk.ProvisionerDaemon{
Expand All @@ -304,8 +329,10 @@ func TestProvisionerDaemonReport(t *testing.T) {
currentAPIMajorVersion: proto.CurrentMajor,
expectedSeverity: health.SeverityError,
expectedWarningCode: health.CodeProvisionerDaemonsNoProvisionerDaemons,
provisionerDaemons: []database.ProvisionerDaemon{fakeProvisionerDaemonStale(t, "pd-ok", "v1.2.3", "0.9", now.Add(-5*time.Minute), now)},
expectedItems: []healthsdk.ProvisionerDaemonsReportItem{},
provisionerDaemons: []database.ProvisionerDaemon{
fakeProvisionerDaemon(t, withName("pd-stale"), withVersion("v1.2.3"), withAPIVersion("0.9"), withCreatedAt(oneHourAgo), withLastSeenAt(staleThreshold)),
},
expectedItems: []healthsdk.ProvisionerDaemonsReportItem{},
},
} {
tt := tt
Expand Down Expand Up @@ -353,25 +380,52 @@ func TestProvisionerDaemonReport(t *testing.T) {
}
}

func fakeProvisionerDaemon(t *testing.T, name, version, apiVersion string, now time.Time) database.ProvisionerDaemon {
func withName(s string) func(*database.ProvisionerDaemon) {
return func(pd *database.ProvisionerDaemon) {
pd.Name = s
}
}

func withCreatedAt(at time.Time) func(*database.ProvisionerDaemon) {
return func(pd *database.ProvisionerDaemon) {
pd.CreatedAt = at
}
}

func withLastSeenAt(at time.Time) func(*database.ProvisionerDaemon) {
return func(pd *database.ProvisionerDaemon) {
pd.LastSeenAt.Valid = true
pd.LastSeenAt.Time = at
}
}

func withVersion(v string) func(*database.ProvisionerDaemon) {
return func(pd *database.ProvisionerDaemon) {
pd.Version = v
}
}

func withAPIVersion(v string) func(*database.ProvisionerDaemon) {
return func(pd *database.ProvisionerDaemon) {
pd.APIVersion = v
}
}

func fakeProvisionerDaemon(t *testing.T, opts ...func(*database.ProvisionerDaemon)) database.ProvisionerDaemon {
t.Helper()
return database.ProvisionerDaemon{
pd := database.ProvisionerDaemon{
ID: uuid.Nil,
Name: name,
CreatedAt: now,
LastSeenAt: sql.NullTime{Time: now, Valid: true},
Name: testutil.GetRandomName(t),
CreatedAt: time.Time{},
LastSeenAt: sql.NullTime{},
Provisioners: []database.ProvisionerType{database.ProvisionerTypeEcho, database.ProvisionerTypeTerraform},
ReplicaID: uuid.NullUUID{},
Tags: map[string]string{},
Version: version,
APIVersion: apiVersion,
Version: "",
APIVersion: "",
}
}

func fakeProvisionerDaemonStale(t *testing.T, name, version, apiVersion string, lastSeenAt, now time.Time) database.ProvisionerDaemon {
t.Helper()
d := fakeProvisionerDaemon(t, name, version, apiVersion, now)
d.LastSeenAt.Valid = true
d.LastSeenAt.Time = lastSeenAt
return d
for _, o := range opts {
o(&pd)
}
return pd
}
4 changes: 4 additions & 0 deletions coderd/provisionerdserver/provisionerdserver.go
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,10 @@ const (
// DefaultHeartbeatInterval is the interval at which the provisioner daemon
// will update its last seen at timestamp in the database.
DefaultHeartbeatInterval = time.Minute

// StaleInterval is the amount of time after the last heartbeat for which
// the provisioner will be reported as 'stale'.
StaleInterval = 90 * time.Second
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If I configure my heartbeat interval to be greater than 90 sec, it reads like this will introduce false negatives where a heartbeat simply hasn't run but the provisioner presents as stale. Am I missing something?

Copy link
Member Author

@johnstcn johnstcn Nov 28, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yep, but we don't expose that as a coderd configuration knob yet. The only way you could configure that is by editing the source code right now.

)

type Options struct {
Expand Down
8 changes: 4 additions & 4 deletions coderd/templateversions.go
Original file line number Diff line number Diff line change
Expand Up @@ -1515,7 +1515,7 @@ func (api *API) postTemplateVersionsByOrganization(rw http.ResponseWriter, r *ht

// Check for eligible provisioners. This allows us to log a message warning deployment administrators
// of users submitting jobs for which no provisioners are available.
matchedProvisioners, err = checkProvisioners(ctx, tx, organization.ID, tags, api.DeploymentValues.Provisioner.DaemonPollInterval.Value())
matchedProvisioners, err = checkProvisioners(ctx, tx, organization.ID, tags)
if err != nil {
api.Logger.Error(ctx, "failed to check eligible provisioner daemons for job", slog.Error(err))
} else if matchedProvisioners.Count == 0 {
Expand Down Expand Up @@ -1819,7 +1819,7 @@ func (api *API) publishTemplateUpdate(ctx context.Context, templateID uuid.UUID)
}
}

func checkProvisioners(ctx context.Context, store database.Store, orgID uuid.UUID, wantTags map[string]string, pollInterval time.Duration) (codersdk.MatchedProvisioners, error) {
func checkProvisioners(ctx context.Context, store database.Store, orgID uuid.UUID, wantTags map[string]string) (codersdk.MatchedProvisioners, error) {
// Check for eligible provisioners. This allows us to return a warning to the user if they
// submit a job for which no provisioner is available.
eligibleProvisioners, err := store.GetProvisionerDaemonsByOrganization(ctx, database.GetProvisionerDaemonsByOrganizationParams{
Expand All @@ -1831,15 +1831,15 @@ func checkProvisioners(ctx context.Context, store database.Store, orgID uuid.UUI
return codersdk.MatchedProvisioners{}, xerrors.Errorf("provisioner daemons by organization: %w", err)
}

threePollsAgo := time.Now().Add(-3 * pollInterval)
staleInterval := time.Now().Add(-provisionerdserver.StaleInterval)
mostRecentlySeen := codersdk.NullTime{}
var matched codersdk.MatchedProvisioners
for _, provisioner := range eligibleProvisioners {
if !provisioner.LastSeenAt.Valid {
continue
}
matched.Count++
if provisioner.LastSeenAt.Time.After(threePollsAgo) {
if provisioner.LastSeenAt.Time.After(staleInterval) {
matched.Available++
}
if provisioner.LastSeenAt.Time.After(mostRecentlySeen.Time) {
Expand Down
Loading