Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 36c2cf8

Browse files
johnstcnmafredri
andauthored
fix(coderd/database): exclude canceled jobs in queue position (#15835)
When calculating the queue position in `GetProvisionerJobsByIDsWithQueuePosition` we only counted jobs with `started_at = NULL`. This is misleading, as it allows canceling or canceled jobs to take up rows in the computed queue position, giving an impression that the queue is larger than it really is. This modifies the query to also exclude jobs with a null `canceled_at`, `completed_at`, or `error` field for the purposes of calculating the queue position, and also adds a test to validate this behaviour. (Note: due to the behaviour of `dbgen.ProvisionerJob` with `dbmem` I had to use other proxy methods to validate the corresponding dbmem implementation.) --------- Co-authored-by: Mathias Fredriksson <[email protected]>
1 parent edb0cb1 commit 36c2cf8

File tree

4 files changed

+220
-30
lines changed

4 files changed

+220
-30
lines changed

coderd/database/dbmem/dbmem.go

+81-24
Original file line numberDiff line numberDiff line change
@@ -3804,35 +3804,92 @@ func (q *FakeQuerier) GetProvisionerJobsByIDsWithQueuePosition(_ context.Context
38043804
q.mutex.RLock()
38053805
defer q.mutex.RUnlock()
38063806

3807-
jobs := make([]database.GetProvisionerJobsByIDsWithQueuePositionRow, 0)
3808-
queuePosition := int64(1)
3807+
// WITH pending_jobs AS (
3808+
// SELECT
3809+
// id, created_at
3810+
// FROM
3811+
// provisioner_jobs
3812+
// WHERE
3813+
// started_at IS NULL
3814+
// AND
3815+
// canceled_at IS NULL
3816+
// AND
3817+
// completed_at IS NULL
3818+
// AND
3819+
// error IS NULL
3820+
// ),
3821+
type pendingJobRow struct {
3822+
ID uuid.UUID
3823+
CreatedAt time.Time
3824+
}
3825+
pendingJobs := make([]pendingJobRow, 0)
38093826
for _, job := range q.provisionerJobs {
3810-
for _, id := range ids {
3811-
if id == job.ID {
3812-
// clone the Tags before appending, since maps are reference types and
3813-
// we don't want the caller to be able to mutate the map we have inside
3814-
// dbmem!
3815-
job.Tags = maps.Clone(job.Tags)
3816-
job := database.GetProvisionerJobsByIDsWithQueuePositionRow{
3817-
ProvisionerJob: job,
3818-
}
3819-
if !job.ProvisionerJob.StartedAt.Valid {
3820-
job.QueuePosition = queuePosition
3821-
}
3822-
jobs = append(jobs, job)
3823-
break
3824-
}
3825-
}
3826-
if !job.StartedAt.Valid {
3827-
queuePosition++
3827+
if job.StartedAt.Valid ||
3828+
job.CanceledAt.Valid ||
3829+
job.CompletedAt.Valid ||
3830+
job.Error.Valid {
3831+
continue
38283832
}
3833+
pendingJobs = append(pendingJobs, pendingJobRow{
3834+
ID: job.ID,
3835+
CreatedAt: job.CreatedAt,
3836+
})
38293837
}
3830-
for _, job := range jobs {
3831-
if !job.ProvisionerJob.StartedAt.Valid {
3832-
// Set it to the max position!
3833-
job.QueueSize = queuePosition
3838+
3839+
// queue_position AS (
3840+
// SELECT
3841+
// id,
3842+
// ROW_NUMBER() OVER (ORDER BY created_at ASC) AS queue_position
3843+
// FROM
3844+
// pending_jobs
3845+
// ),
3846+
slices.SortFunc(pendingJobs, func(a, b pendingJobRow) int {
3847+
c := a.CreatedAt.Compare(b.CreatedAt)
3848+
return c
3849+
})
3850+
3851+
queuePosition := make(map[uuid.UUID]int64)
3852+
for idx, pj := range pendingJobs {
3853+
queuePosition[pj.ID] = int64(idx + 1)
3854+
}
3855+
3856+
// queue_size AS (
3857+
// SELECT COUNT(*) AS count FROM pending_jobs
3858+
// ),
3859+
queueSize := len(pendingJobs)
3860+
3861+
// SELECT
3862+
// sqlc.embed(pj),
3863+
// COALESCE(qp.queue_position, 0) AS queue_position,
3864+
// COALESCE(qs.count, 0) AS queue_size
3865+
// FROM
3866+
// provisioner_jobs pj
3867+
// LEFT JOIN
3868+
// queue_position qp ON pj.id = qp.id
3869+
// LEFT JOIN
3870+
// queue_size qs ON TRUE
3871+
// WHERE
3872+
// pj.id IN (...)
3873+
jobs := make([]database.GetProvisionerJobsByIDsWithQueuePositionRow, 0)
3874+
for _, job := range q.provisionerJobs {
3875+
if !slices.Contains(ids, job.ID) {
3876+
continue
3877+
}
3878+
// clone the Tags before appending, since maps are reference types and
3879+
// we don't want the caller to be able to mutate the map we have inside
3880+
// dbmem!
3881+
job.Tags = maps.Clone(job.Tags)
3882+
job := database.GetProvisionerJobsByIDsWithQueuePositionRow{
3883+
// sqlc.embed(pj),
3884+
ProvisionerJob: job,
3885+
// COALESCE(qp.queue_position, 0) AS queue_position,
3886+
QueuePosition: queuePosition[job.ID],
3887+
// COALESCE(qs.count, 0) AS queue_size
3888+
QueueSize: int64(queueSize),
38343889
}
3890+
jobs = append(jobs, job)
38353891
}
3892+
38363893
return jobs, nil
38373894
}
38383895

coderd/database/querier_test.go

+121
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ import (
1313

1414
"github.com/google/uuid"
1515
"github.com/prometheus/client_golang/prometheus"
16+
"github.com/stretchr/testify/assert"
1617
"github.com/stretchr/testify/require"
1718

1819
"cdr.dev/slog/sloggers/slogtest"
@@ -2037,6 +2038,126 @@ func TestExpectOne(t *testing.T) {
20372038
})
20382039
}
20392040

2041+
func TestGetProvisionerJobsByIDsWithQueuePosition(t *testing.T) {
2042+
t.Parallel()
2043+
if !dbtestutil.WillUsePostgres() {
2044+
t.SkipNow()
2045+
}
2046+
2047+
db, _ := dbtestutil.NewDB(t)
2048+
now := dbtime.Now()
2049+
ctx := testutil.Context(t, testutil.WaitShort)
2050+
2051+
// Given the following provisioner jobs:
2052+
allJobs := []database.ProvisionerJob{
2053+
// Pending. This will be the last in the queue because
2054+
// it was created most recently.
2055+
dbgen.ProvisionerJob(t, db, nil, database.ProvisionerJob{
2056+
CreatedAt: now.Add(-time.Minute),
2057+
StartedAt: sql.NullTime{},
2058+
CanceledAt: sql.NullTime{},
2059+
CompletedAt: sql.NullTime{},
2060+
Error: sql.NullString{},
2061+
}),
2062+
2063+
// Another pending. This will come first in the queue
2064+
// because it was created before the previous job.
2065+
dbgen.ProvisionerJob(t, db, nil, database.ProvisionerJob{
2066+
CreatedAt: now.Add(-2 * time.Minute),
2067+
StartedAt: sql.NullTime{},
2068+
CanceledAt: sql.NullTime{},
2069+
CompletedAt: sql.NullTime{},
2070+
Error: sql.NullString{},
2071+
}),
2072+
2073+
// Running
2074+
dbgen.ProvisionerJob(t, db, nil, database.ProvisionerJob{
2075+
CreatedAt: now.Add(-3 * time.Minute),
2076+
StartedAt: sql.NullTime{Valid: true, Time: now},
2077+
CanceledAt: sql.NullTime{},
2078+
CompletedAt: sql.NullTime{},
2079+
Error: sql.NullString{},
2080+
}),
2081+
2082+
// Succeeded
2083+
dbgen.ProvisionerJob(t, db, nil, database.ProvisionerJob{
2084+
CreatedAt: now.Add(-4 * time.Minute),
2085+
StartedAt: sql.NullTime{Valid: true, Time: now},
2086+
CanceledAt: sql.NullTime{},
2087+
CompletedAt: sql.NullTime{Valid: true, Time: now},
2088+
Error: sql.NullString{},
2089+
}),
2090+
2091+
// Canceling
2092+
dbgen.ProvisionerJob(t, db, nil, database.ProvisionerJob{
2093+
CreatedAt: now.Add(-5 * time.Minute),
2094+
StartedAt: sql.NullTime{},
2095+
CanceledAt: sql.NullTime{Valid: true, Time: now},
2096+
CompletedAt: sql.NullTime{},
2097+
Error: sql.NullString{},
2098+
}),
2099+
2100+
// Canceled
2101+
dbgen.ProvisionerJob(t, db, nil, database.ProvisionerJob{
2102+
CreatedAt: now.Add(-6 * time.Minute),
2103+
StartedAt: sql.NullTime{},
2104+
CanceledAt: sql.NullTime{Valid: true, Time: now},
2105+
CompletedAt: sql.NullTime{Valid: true, Time: now},
2106+
Error: sql.NullString{},
2107+
}),
2108+
2109+
// Failed
2110+
dbgen.ProvisionerJob(t, db, nil, database.ProvisionerJob{
2111+
CreatedAt: now.Add(-7 * time.Minute),
2112+
StartedAt: sql.NullTime{},
2113+
CanceledAt: sql.NullTime{},
2114+
CompletedAt: sql.NullTime{},
2115+
Error: sql.NullString{String: "failed", Valid: true},
2116+
}),
2117+
}
2118+
2119+
// Assert invariant: the jobs are in the expected order
2120+
require.Len(t, allJobs, 7, "expected 7 jobs")
2121+
for idx, status := range []database.ProvisionerJobStatus{
2122+
database.ProvisionerJobStatusPending,
2123+
database.ProvisionerJobStatusPending,
2124+
database.ProvisionerJobStatusRunning,
2125+
database.ProvisionerJobStatusSucceeded,
2126+
database.ProvisionerJobStatusCanceling,
2127+
database.ProvisionerJobStatusCanceled,
2128+
database.ProvisionerJobStatusFailed,
2129+
} {
2130+
require.Equal(t, status, allJobs[idx].JobStatus, "expected job %d to have status %s", idx, status)
2131+
}
2132+
2133+
var jobIDs []uuid.UUID
2134+
for _, job := range allJobs {
2135+
jobIDs = append(jobIDs, job.ID)
2136+
}
2137+
2138+
// When: we fetch the jobs by their IDs
2139+
actualJobs, err := db.GetProvisionerJobsByIDsWithQueuePosition(ctx, jobIDs)
2140+
require.NoError(t, err)
2141+
require.Len(t, actualJobs, len(allJobs), "should return all jobs")
2142+
2143+
// Then: the jobs should be returned in the correct order (by IDs in the input slice)
2144+
for idx, job := range actualJobs {
2145+
assert.EqualValues(t, allJobs[idx], job.ProvisionerJob)
2146+
}
2147+
2148+
// Then: the queue size should be set correctly
2149+
for _, job := range actualJobs {
2150+
assert.EqualValues(t, job.QueueSize, 2, "should have queue size 2")
2151+
}
2152+
2153+
// Then: the queue position should be set correctly:
2154+
var queuePositions []int64
2155+
for _, job := range actualJobs {
2156+
queuePositions = append(queuePositions, job.QueuePosition)
2157+
}
2158+
assert.EqualValues(t, []int64{2, 1, 0, 0, 0, 0, 0}, queuePositions, "expected queue positions to be set correctly")
2159+
}
2160+
20402161
func TestGroupRemovalTrigger(t *testing.T) {
20412162
t.Parallel()
20422163

coderd/database/queries.sql.go

+9-3
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

coderd/database/queries/provisionerjobs.sql

+9-3
Original file line numberDiff line numberDiff line change
@@ -50,23 +50,29 @@ WHERE
5050
id = ANY(@ids :: uuid [ ]);
5151

5252
-- name: GetProvisionerJobsByIDsWithQueuePosition :many
53-
WITH unstarted_jobs AS (
53+
WITH pending_jobs AS (
5454
SELECT
5555
id, created_at
5656
FROM
5757
provisioner_jobs
5858
WHERE
5959
started_at IS NULL
60+
AND
61+
canceled_at IS NULL
62+
AND
63+
completed_at IS NULL
64+
AND
65+
error IS NULL
6066
),
6167
queue_position AS (
6268
SELECT
6369
id,
6470
ROW_NUMBER() OVER (ORDER BY created_at ASC) AS queue_position
6571
FROM
66-
unstarted_jobs
72+
pending_jobs
6773
),
6874
queue_size AS (
69-
SELECT COUNT(*) as count FROM unstarted_jobs
75+
SELECT COUNT(*) AS count FROM pending_jobs
7076
)
7177
SELECT
7278
sqlc.embed(pj),

0 commit comments

Comments
 (0)