Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 2f55e29

Browse files
authored
fix: complete job and mark workspace as deleted when no provisioners are available (#18465)
Alternate fix for #18080 Modifies wsbuilder to complete the provisioner job and mark the workspace as deleted if it is clear that no provisioner will be able to pick up the delete build. This has a significant advantage of not deviating too much from the current semantics of `POST /api/v2/workspacebuilds`. #18460 ends up returning a 204 on orphan delete due to no build being created. Downside is that we have to duplicate some responsibilities of provisionerdserver in wsbuilder. There is a slight gotcha to this approach though: if you stop a provisioner and then immediately try to orphan-delete, the job will still be created because of the provisioner heartbeat interval. However you can cancel it and try again.
1 parent c3bc1e7 commit 2f55e29

File tree

7 files changed

+502
-86
lines changed

7 files changed

+502
-86
lines changed

cli/delete_test.go

Lines changed: 17 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ import (
55
"database/sql"
66
"fmt"
77
"io"
8+
"net/http"
89
"testing"
910
"time"
1011

@@ -60,28 +61,35 @@ func TestDelete(t *testing.T) {
6061
t.Parallel()
6162
client := coderdtest.New(t, &coderdtest.Options{IncludeProvisionerDaemon: true})
6263
owner := coderdtest.CreateFirstUser(t, client)
63-
version := coderdtest.CreateTemplateVersion(t, client, owner.OrganizationID, nil)
64-
coderdtest.AwaitTemplateVersionJobCompleted(t, client, version.ID)
65-
template := coderdtest.CreateTemplate(t, client, owner.OrganizationID, version.ID)
66-
workspace := coderdtest.CreateWorkspace(t, client, template.ID)
67-
coderdtest.AwaitWorkspaceBuildJobCompleted(t, client, workspace.LatestBuild.ID)
64+
templateAdmin, _ := coderdtest.CreateAnotherUser(t, client, owner.OrganizationID, rbac.RoleTemplateAdmin())
65+
version := coderdtest.CreateTemplateVersion(t, templateAdmin, owner.OrganizationID, nil)
66+
coderdtest.AwaitTemplateVersionJobCompleted(t, templateAdmin, version.ID)
67+
template := coderdtest.CreateTemplate(t, templateAdmin, owner.OrganizationID, version.ID)
68+
workspace := coderdtest.CreateWorkspace(t, templateAdmin, template.ID)
69+
coderdtest.AwaitWorkspaceBuildJobCompleted(t, templateAdmin, workspace.LatestBuild.ID)
70+
71+
ctx := testutil.Context(t, testutil.WaitShort)
6872
inv, root := clitest.New(t, "delete", workspace.Name, "-y", "--orphan")
73+
clitest.SetupConfig(t, templateAdmin, root)
6974

70-
//nolint:gocritic // Deleting orphaned workspaces requires an admin.
71-
clitest.SetupConfig(t, client, root)
7275
doneChan := make(chan struct{})
7376
pty := ptytest.New(t).Attach(inv)
7477
inv.Stderr = pty.Output()
7578
go func() {
7679
defer close(doneChan)
77-
err := inv.Run()
80+
err := inv.WithContext(ctx).Run()
7881
// When running with the race detector on, we sometimes get an EOF.
7982
if err != nil {
8083
assert.ErrorIs(t, err, io.EOF)
8184
}
8285
}()
8386
pty.ExpectMatch("has been deleted")
84-
<-doneChan
87+
testutil.TryReceive(ctx, t, doneChan)
88+
89+
_, err := client.Workspace(ctx, workspace.ID)
90+
require.Error(t, err)
91+
cerr := coderdtest.SDKError(t, err)
92+
require.Equal(t, http.StatusGone, cerr.StatusCode())
8593
})
8694

8795
// Super orphaned, as the workspace doesn't even have a user.

coderd/database/dbmem/dbmem.go

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4497,7 +4497,8 @@ func (q *FakeQuerier) GetProvisionerDaemons(_ context.Context) ([]database.Provi
44974497
defer q.mutex.RUnlock()
44984498

44994499
if len(q.provisionerDaemons) == 0 {
4500-
return nil, sql.ErrNoRows
4500+
// Returning err=nil here for consistency with real querier
4501+
return []database.ProvisionerDaemon{}, nil
45014502
}
45024503
// copy the data so that the caller can't manipulate any data inside dbmem
45034504
// after returning

coderd/workspacebuilds.go

Lines changed: 41 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ package coderd
33
import (
44
"context"
55
"database/sql"
6+
"encoding/json"
67
"errors"
78
"fmt"
89
"math"
@@ -433,20 +434,56 @@ func (api *API) postWorkspaceBuilds(rw http.ResponseWriter, r *http.Request) {
433434
return
434435
}
435436

437+
var queuePos database.GetProvisionerJobsByIDsWithQueuePositionRow
436438
if provisionerJob != nil {
439+
queuePos.ProvisionerJob = *provisionerJob
440+
queuePos.QueuePosition = 0
437441
if err := provisionerjobs.PostJob(api.Pubsub, *provisionerJob); err != nil {
438442
// Client probably doesn't care about this error, so just log it.
439443
api.Logger.Error(ctx, "failed to post provisioner job to pubsub", slog.Error(err))
440444
}
445+
446+
// We may need to complete the audit if wsbuilder determined that
447+
// no provisioner could handle an orphan-delete job and completed it.
448+
if createBuild.Orphan && createBuild.Transition == codersdk.WorkspaceTransitionDelete && provisionerJob.CompletedAt.Valid {
449+
api.Logger.Warn(ctx, "orphan delete handled by wsbuilder due to no eligible provisioners",
450+
slog.F("workspace_id", workspace.ID),
451+
slog.F("workspace_build_id", workspaceBuild.ID),
452+
slog.F("provisioner_job_id", provisionerJob.ID),
453+
)
454+
buildResourceInfo := audit.AdditionalFields{
455+
WorkspaceName: workspace.Name,
456+
BuildNumber: strconv.Itoa(int(workspaceBuild.BuildNumber)),
457+
BuildReason: workspaceBuild.Reason,
458+
WorkspaceID: workspace.ID,
459+
WorkspaceOwner: workspace.OwnerName,
460+
}
461+
briBytes, err := json.Marshal(buildResourceInfo)
462+
if err != nil {
463+
api.Logger.Error(ctx, "failed to marshal build resource info for audit", slog.Error(err))
464+
}
465+
auditor := api.Auditor.Load()
466+
bag := audit.BaggageFromContext(ctx)
467+
audit.BackgroundAudit(ctx, &audit.BackgroundAuditParams[database.WorkspaceBuild]{
468+
Audit: *auditor,
469+
Log: api.Logger,
470+
UserID: provisionerJob.InitiatorID,
471+
OrganizationID: workspace.OrganizationID,
472+
RequestID: provisionerJob.ID,
473+
IP: bag.IP,
474+
Action: database.AuditActionDelete,
475+
Old: previousWorkspaceBuild,
476+
New: *workspaceBuild,
477+
Status: http.StatusOK,
478+
AdditionalFields: briBytes,
479+
})
480+
}
441481
}
442482

443483
apiBuild, err := api.convertWorkspaceBuild(
444484
*workspaceBuild,
445485
workspace,
446-
database.GetProvisionerJobsByIDsWithQueuePositionRow{
447-
ProvisionerJob: *provisionerJob,
448-
QueuePosition: 0,
449-
},
486+
queuePos,
450487
[]database.WorkspaceResource{},
451488
[]database.WorkspaceResourceMetadatum{},
452489
[]database.WorkspaceAgent{},

coderd/workspacebuilds_test.go

Lines changed: 163 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
package coderd_test
22

33
import (
4+
"bytes"
45
"context"
56
"database/sql"
67
"errors"
@@ -25,6 +26,7 @@ import (
2526
"github.com/coder/coder/v2/coderd/coderdtest/oidctest"
2627
"github.com/coder/coder/v2/coderd/database"
2728
"github.com/coder/coder/v2/coderd/database/dbauthz"
29+
"github.com/coder/coder/v2/coderd/database/dbfake"
2830
"github.com/coder/coder/v2/coderd/database/dbgen"
2931
"github.com/coder/coder/v2/coderd/database/dbtestutil"
3032
"github.com/coder/coder/v2/coderd/database/dbtime"
@@ -371,42 +373,174 @@ func TestWorkspaceBuildsProvisionerState(t *testing.T) {
371373

372374
t.Run("Orphan", func(t *testing.T) {
373375
t.Parallel()
374-
client := coderdtest.New(t, &coderdtest.Options{IncludeProvisionerDaemon: true})
375-
first := coderdtest.CreateFirstUser(t, client)
376-
377-
ctx, cancel := context.WithTimeout(context.Background(), testutil.WaitLong)
378-
defer cancel()
379376

380-
version := coderdtest.CreateTemplateVersion(t, client, first.OrganizationID, nil)
381-
template := coderdtest.CreateTemplate(t, client, first.OrganizationID, version.ID)
382-
coderdtest.AwaitTemplateVersionJobCompleted(t, client, version.ID)
377+
t.Run("WithoutDelete", func(t *testing.T) {
378+
t.Parallel()
379+
client, store := coderdtest.NewWithDatabase(t, nil)
380+
first := coderdtest.CreateFirstUser(t, client)
381+
templateAdmin, templateAdminUser := coderdtest.CreateAnotherUser(t, client, first.OrganizationID, rbac.RoleTemplateAdmin())
382+
383+
r := dbfake.WorkspaceBuild(t, store, database.WorkspaceTable{
384+
OwnerID: templateAdminUser.ID,
385+
OrganizationID: first.OrganizationID,
386+
}).Do()
387+
388+
ctx, cancel := context.WithTimeout(context.Background(), testutil.WaitLong)
389+
defer cancel()
390+
391+
// Trying to orphan without delete transition fails.
392+
_, err := templateAdmin.CreateWorkspaceBuild(ctx, r.Workspace.ID, codersdk.CreateWorkspaceBuildRequest{
393+
TemplateVersionID: r.TemplateVersion.ID,
394+
Transition: codersdk.WorkspaceTransitionStart,
395+
Orphan: true,
396+
})
397+
require.Error(t, err, "Orphan is only permitted when deleting a workspace.")
398+
cerr := coderdtest.SDKError(t, err)
399+
require.Equal(t, http.StatusBadRequest, cerr.StatusCode())
400+
})
383401

384-
workspace := coderdtest.CreateWorkspace(t, client, template.ID)
385-
coderdtest.AwaitWorkspaceBuildJobCompleted(t, client, workspace.LatestBuild.ID)
402+
t.Run("WithState", func(t *testing.T) {
403+
t.Parallel()
404+
client, store := coderdtest.NewWithDatabase(t, nil)
405+
first := coderdtest.CreateFirstUser(t, client)
406+
templateAdmin, templateAdminUser := coderdtest.CreateAnotherUser(t, client, first.OrganizationID, rbac.RoleTemplateAdmin())
407+
408+
r := dbfake.WorkspaceBuild(t, store, database.WorkspaceTable{
409+
OwnerID: templateAdminUser.ID,
410+
OrganizationID: first.OrganizationID,
411+
}).Do()
412+
413+
ctx, cancel := context.WithTimeout(context.Background(), testutil.WaitLong)
414+
defer cancel()
415+
416+
// Providing both state and orphan fails.
417+
_, err := templateAdmin.CreateWorkspaceBuild(ctx, r.Workspace.ID, codersdk.CreateWorkspaceBuildRequest{
418+
TemplateVersionID: r.TemplateVersion.ID,
419+
Transition: codersdk.WorkspaceTransitionDelete,
420+
ProvisionerState: []byte(" "),
421+
Orphan: true,
422+
})
423+
require.Error(t, err)
424+
cerr := coderdtest.SDKError(t, err)
425+
require.Equal(t, http.StatusBadRequest, cerr.StatusCode())
426+
})
386427

387-
// Providing both state and orphan fails.
388-
_, err := client.CreateWorkspaceBuild(ctx, workspace.ID, codersdk.CreateWorkspaceBuildRequest{
389-
TemplateVersionID: workspace.LatestBuild.TemplateVersionID,
390-
Transition: codersdk.WorkspaceTransitionDelete,
391-
ProvisionerState: []byte(" "),
392-
Orphan: true,
428+
t.Run("NoPermission", func(t *testing.T) {
429+
t.Parallel()
430+
client, store := coderdtest.NewWithDatabase(t, nil)
431+
first := coderdtest.CreateFirstUser(t, client)
432+
member, memberUser := coderdtest.CreateAnotherUser(t, client, first.OrganizationID)
433+
434+
r := dbfake.WorkspaceBuild(t, store, database.WorkspaceTable{
435+
OwnerID: memberUser.ID,
436+
OrganizationID: first.OrganizationID,
437+
}).Do()
438+
439+
ctx, cancel := context.WithTimeout(context.Background(), testutil.WaitLong)
440+
defer cancel()
441+
442+
// Trying to orphan without being a template admin fails.
443+
_, err := member.CreateWorkspaceBuild(ctx, r.Workspace.ID, codersdk.CreateWorkspaceBuildRequest{
444+
TemplateVersionID: r.TemplateVersion.ID,
445+
Transition: codersdk.WorkspaceTransitionDelete,
446+
Orphan: true,
447+
})
448+
require.Error(t, err)
449+
cerr := coderdtest.SDKError(t, err)
450+
require.Equal(t, http.StatusForbidden, cerr.StatusCode())
393451
})
394-
require.Error(t, err)
395-
cerr := coderdtest.SDKError(t, err)
396-
require.Equal(t, http.StatusBadRequest, cerr.StatusCode())
397452

398-
// Regular orphan operation succeeds.
399-
build, err := client.CreateWorkspaceBuild(ctx, workspace.ID, codersdk.CreateWorkspaceBuildRequest{
400-
TemplateVersionID: workspace.LatestBuild.TemplateVersionID,
401-
Transition: codersdk.WorkspaceTransitionDelete,
402-
Orphan: true,
453+
t.Run("OK", func(t *testing.T) {
454+
// Include a provisioner so that we can test that provisionerdserver
455+
// performs deletion.
456+
auditor := audit.NewMock()
457+
client, store := coderdtest.NewWithDatabase(t, &coderdtest.Options{IncludeProvisionerDaemon: true, Auditor: auditor})
458+
first := coderdtest.CreateFirstUser(t, client)
459+
templateAdmin, templateAdminUser := coderdtest.CreateAnotherUser(t, client, first.OrganizationID, rbac.RoleTemplateAdmin())
460+
461+
ctx, cancel := context.WithTimeout(context.Background(), testutil.WaitLong)
462+
defer cancel()
463+
// This is a valid zip file. Without this the job will fail to complete.
464+
// TODO: add this to dbfake by default.
465+
zipBytes := make([]byte, 22)
466+
zipBytes[0] = 80
467+
zipBytes[1] = 75
468+
zipBytes[2] = 0o5
469+
zipBytes[3] = 0o6
470+
uploadRes, err := client.Upload(ctx, codersdk.ContentTypeZip, bytes.NewReader(zipBytes))
471+
require.NoError(t, err)
472+
473+
tv := dbfake.TemplateVersion(t, store).
474+
FileID(uploadRes.ID).
475+
Seed(database.TemplateVersion{
476+
OrganizationID: first.OrganizationID,
477+
CreatedBy: templateAdminUser.ID,
478+
}).
479+
Do()
480+
481+
r := dbfake.WorkspaceBuild(t, store, database.WorkspaceTable{
482+
OwnerID: templateAdminUser.ID,
483+
OrganizationID: first.OrganizationID,
484+
TemplateID: tv.Template.ID,
485+
}).Do()
486+
487+
auditor.ResetLogs()
488+
// Regular orphan operation succeeds.
489+
build, err := templateAdmin.CreateWorkspaceBuild(ctx, r.Workspace.ID, codersdk.CreateWorkspaceBuildRequest{
490+
TemplateVersionID: r.TemplateVersion.ID,
491+
Transition: codersdk.WorkspaceTransitionDelete,
492+
Orphan: true,
493+
})
494+
require.NoError(t, err)
495+
coderdtest.AwaitWorkspaceBuildJobCompleted(t, client, build.ID)
496+
497+
// Validate that the deletion was audited.
498+
require.True(t, auditor.Contains(t, database.AuditLog{
499+
ResourceID: build.ID,
500+
Action: database.AuditActionDelete,
501+
}))
403502
})
404-
require.NoError(t, err)
405-
coderdtest.AwaitWorkspaceBuildJobCompleted(t, client, build.ID)
406503

407-
_, err = client.Workspace(ctx, workspace.ID)
408-
require.Error(t, err)
409-
require.Equal(t, http.StatusGone, coderdtest.SDKError(t, err).StatusCode())
504+
t.Run("NoProvisioners", func(t *testing.T) {
505+
t.Parallel()
506+
auditor := audit.NewMock()
507+
client, store := coderdtest.NewWithDatabase(t, &coderdtest.Options{Auditor: auditor})
508+
first := coderdtest.CreateFirstUser(t, client)
509+
templateAdmin, templateAdminUser := coderdtest.CreateAnotherUser(t, client, first.OrganizationID, rbac.RoleTemplateAdmin())
510+
511+
ctx, cancel := context.WithTimeout(context.Background(), testutil.WaitLong)
512+
defer cancel()
513+
r := dbfake.WorkspaceBuild(t, store, database.WorkspaceTable{
514+
OwnerID: templateAdminUser.ID,
515+
OrganizationID: first.OrganizationID,
516+
}).Do()
517+
518+
// nolint:gocritic // For testing
519+
daemons, err := store.GetProvisionerDaemons(dbauthz.AsSystemReadProvisionerDaemons(ctx))
520+
require.NoError(t, err)
521+
require.Empty(t, daemons, "Provisioner daemons should be empty for this test")
522+
523+
// Orphan deletion still succeeds despite no provisioners being available.
524+
build, err := templateAdmin.CreateWorkspaceBuild(ctx, r.Workspace.ID, codersdk.CreateWorkspaceBuildRequest{
525+
TemplateVersionID: r.TemplateVersion.ID,
526+
Transition: codersdk.WorkspaceTransitionDelete,
527+
Orphan: true,
528+
})
529+
require.NoError(t, err)
530+
require.Equal(t, codersdk.WorkspaceTransitionDelete, build.Transition)
531+
require.Equal(t, codersdk.ProvisionerJobSucceeded, build.Job.Status)
532+
require.Empty(t, build.Job.Error)
533+
534+
ws, err := client.Workspace(ctx, r.Workspace.ID)
535+
require.Empty(t, ws)
536+
require.Equal(t, http.StatusGone, coderdtest.SDKError(t, err).StatusCode())
537+
538+
// Validate that the deletion was audited.
539+
require.True(t, auditor.Contains(t, database.AuditLog{
540+
ResourceID: build.ID,
541+
Action: database.AuditActionDelete,
542+
}))
543+
})
410544
})
411545
}
412546

0 commit comments

Comments
 (0)