Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit ab3b3d5

Browse files
authored
feat: add debouncing to provisionerd rpc calls (coder#5198)
1 parent 5457dd0 commit ab3b3d5

File tree

14 files changed

+123
-46
lines changed

14 files changed

+123
-46
lines changed

.gitignore

+1
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ vendor
1414
.eslintcache
1515
yarn-error.log
1616
gotests.coverage
17+
gotests.xml
1718
.idea
1819
.gitpod.yml
1920
.DS_Store

cli/deployment/config.go

+12
Original file line numberDiff line numberDiff line change
@@ -388,6 +388,18 @@ func newConfig() *codersdk.DeploymentConfig {
388388
Flag: "provisioner-daemons",
389389
Default: 3,
390390
},
391+
DaemonPollInterval: &codersdk.DeploymentConfigField[time.Duration]{
392+
Name: "Poll Interval",
393+
Usage: "Time to wait before polling for a new job.",
394+
Flag: "provisioner-daemon-poll-interval",
395+
Default: time.Second,
396+
},
397+
DaemonPollJitter: &codersdk.DeploymentConfigField[time.Duration]{
398+
Name: "Poll Jitter",
399+
Usage: "Random jitter added to the poll interval.",
400+
Flag: "provisioner-daemon-poll-jitter",
401+
Default: 100 * time.Millisecond,
402+
},
391403
ForceCancelInterval: &codersdk.DeploymentConfigField[time.Duration]{
392404
Name: "Force Cancel Interval",
393405
Usage: "Time to force cancel provisioning tasks that are stuck.",

cli/deployment/config_test.go

+19-14
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ package deployment_test
22

33
import (
44
"testing"
5+
"time"
56

67
"github.com/spf13/pflag"
78
"github.com/stretchr/testify/require"
@@ -25,20 +26,22 @@ func TestConfig(t *testing.T) {
2526
}{{
2627
Name: "Deployment",
2728
Env: map[string]string{
28-
"CODER_ADDRESS": "0.0.0.0:8443",
29-
"CODER_ACCESS_URL": "https://dev.coder.com",
30-
"CODER_PG_CONNECTION_URL": "some-url",
31-
"CODER_PPROF_ADDRESS": "something",
32-
"CODER_PPROF_ENABLE": "true",
33-
"CODER_PROMETHEUS_ADDRESS": "hello-world",
34-
"CODER_PROMETHEUS_ENABLE": "true",
35-
"CODER_PROVISIONER_DAEMONS": "5",
36-
"CODER_SECURE_AUTH_COOKIE": "true",
37-
"CODER_SSH_KEYGEN_ALGORITHM": "potato",
38-
"CODER_TELEMETRY": "false",
39-
"CODER_TELEMETRY_TRACE": "false",
40-
"CODER_WILDCARD_ACCESS_URL": "something-wildcard.com",
41-
"CODER_UPDATE_CHECK": "false",
29+
"CODER_ADDRESS": "0.0.0.0:8443",
30+
"CODER_ACCESS_URL": "https://dev.coder.com",
31+
"CODER_PG_CONNECTION_URL": "some-url",
32+
"CODER_PPROF_ADDRESS": "something",
33+
"CODER_PPROF_ENABLE": "true",
34+
"CODER_PROMETHEUS_ADDRESS": "hello-world",
35+
"CODER_PROMETHEUS_ENABLE": "true",
36+
"CODER_PROVISIONER_DAEMONS": "5",
37+
"CODER_PROVISIONER_DAEMON_POLL_INTERVAL": "5s",
38+
"CODER_PROVISIONER_DAEMON_POLL_JITTER": "1s",
39+
"CODER_SECURE_AUTH_COOKIE": "true",
40+
"CODER_SSH_KEYGEN_ALGORITHM": "potato",
41+
"CODER_TELEMETRY": "false",
42+
"CODER_TELEMETRY_TRACE": "false",
43+
"CODER_WILDCARD_ACCESS_URL": "something-wildcard.com",
44+
"CODER_UPDATE_CHECK": "false",
4245
},
4346
Valid: func(config *codersdk.DeploymentConfig) {
4447
require.Equal(t, config.Address.Value, "0.0.0.0:8443")
@@ -49,6 +52,8 @@ func TestConfig(t *testing.T) {
4952
require.Equal(t, config.Prometheus.Address.Value, "hello-world")
5053
require.Equal(t, config.Prometheus.Enable.Value, true)
5154
require.Equal(t, config.Provisioner.Daemons.Value, 5)
55+
require.Equal(t, config.Provisioner.DaemonPollInterval.Value, 5*time.Second)
56+
require.Equal(t, config.Provisioner.DaemonPollJitter.Value, 1*time.Second)
5257
require.Equal(t, config.SecureAuthCookie.Value, true)
5358
require.Equal(t, config.SSHKeygenAlgorithm.Value, "potato")
5459
require.Equal(t, config.Telemetry.Enable.Value, false)

cli/root_test.go

+2
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,8 @@ import (
2222
"github.com/coder/coder/testutil"
2323
)
2424

25+
// To update the golden files:
26+
// make update-golden-files
2527
var updateGoldenFiles = flag.Bool("update", false, "update .golden files")
2628

2729
//nolint:tparallel,paralleltest // These test sets env vars.

cli/server.go

+5-2
Original file line numberDiff line numberDiff line change
@@ -970,13 +970,16 @@ func newProvisionerDaemon(
970970
}()
971971
provisioners[string(database.ProvisionerTypeEcho)] = sdkproto.NewDRPCProvisionerClient(echoClient)
972972
}
973+
debounce := time.Second
973974
return provisionerd.New(func(ctx context.Context) (proto.DRPCProvisionerDaemonClient, error) {
974975
// This debounces calls to listen every second. Read the comment
975976
// in provisionerdserver.go to learn more!
976-
return coderAPI.CreateInMemoryProvisionerDaemon(ctx, time.Second)
977+
return coderAPI.CreateInMemoryProvisionerDaemon(ctx, debounce)
977978
}, &provisionerd.Options{
978979
Logger: logger,
979-
PollInterval: 500 * time.Millisecond,
980+
JobPollInterval: cfg.Provisioner.DaemonPollInterval.Value,
981+
JobPollJitter: cfg.Provisioner.DaemonPollJitter.Value,
982+
JobPollDebounce: debounce,
980983
UpdateInterval: 500 * time.Millisecond,
981984
ForceCancelInterval: cfg.Provisioner.ForceCancelInterval.Value,
982985
Provisioners: provisioners,

cli/testdata/coder_server_--help.golden

+9
Original file line numberDiff line numberDiff line change
@@ -128,6 +128,15 @@ Flags:
128128
--prometheus-enable Serve prometheus metrics on the address
129129
defined by prometheus address.
130130
Consumes $CODER_PROMETHEUS_ENABLE
131+
--provisioner-daemon-poll-interval duration Time to wait before polling for a new
132+
job.
133+
Consumes
134+
$CODER_PROVISIONER_DAEMON_POLL_INTERVAL
135+
(default 1s)
136+
--provisioner-daemon-poll-jitter duration Random jitter added to the poll interval.
137+
Consumes
138+
$CODER_PROVISIONER_DAEMON_POLL_JITTER
139+
(default 100ms)
131140
--provisioner-daemons int Number of provisioner daemons to create
132141
on start. If builds are stuck in queued
133142
state for a long time, consider

coderd/coderd.go

+2-2
Original file line numberDiff line numberDiff line change
@@ -643,8 +643,8 @@ func compressHandler(h http.Handler) http.Handler {
643643
return cmp.Handler(h)
644644
}
645645

646-
// CreateInMemoryProvisionerDaemon is an in-memory connection to a provisionerd. Useful when starting coderd and provisionerd
647-
// in the same process.
646+
// CreateInMemoryProvisionerDaemon is an in-memory connection to a provisionerd.
647+
// Useful when starting coderd and provisionerd in the same process.
648648
func (api *API) CreateInMemoryProvisionerDaemon(ctx context.Context, debounce time.Duration) (client proto.DRPCProvisionerDaemonClient, err error) {
649649
clientSession, serverSession := provisionersdk.MemTransportPipe()
650650
defer func() {

coderd/coderdtest/coderdtest.go

+2-2
Original file line numberDiff line numberDiff line change
@@ -340,7 +340,7 @@ func NewProvisionerDaemon(t *testing.T, coderAPI *coderd.API) io.Closer {
340340
}, &provisionerd.Options{
341341
Filesystem: fs,
342342
Logger: slogtest.Make(t, nil).Named("provisionerd").Leveled(slog.LevelDebug),
343-
PollInterval: 50 * time.Millisecond,
343+
JobPollInterval: 50 * time.Millisecond,
344344
UpdateInterval: 250 * time.Millisecond,
345345
ForceCancelInterval: time.Second,
346346
Provisioners: provisionerd.Provisioners{
@@ -375,7 +375,7 @@ func NewExternalProvisionerDaemon(t *testing.T, client *codersdk.Client, org uui
375375
}, &provisionerd.Options{
376376
Filesystem: fs,
377377
Logger: slogtest.Make(t, nil).Named("provisionerd").Leveled(slog.LevelDebug),
378-
PollInterval: 50 * time.Millisecond,
378+
JobPollInterval: 50 * time.Millisecond,
379379
UpdateInterval: 250 * time.Millisecond,
380380
ForceCancelInterval: time.Second,
381381
Provisioners: provisionerd.Provisioners{

codersdk/deploymentconfig.go

+2
Original file line numberDiff line numberDiff line change
@@ -135,6 +135,8 @@ type GitAuthConfig struct {
135135

136136
type ProvisionerConfig struct {
137137
Daemons *DeploymentConfigField[int] `json:"daemons" typescript:",notnull"`
138+
DaemonPollInterval *DeploymentConfigField[time.Duration] `json:"daemon_poll_interval" typescript:",notnull"`
139+
DaemonPollJitter *DeploymentConfigField[time.Duration] `json:"daemon_poll_jitter" typescript:",notnull"`
138140
ForceCancelInterval *DeploymentConfigField[time.Duration] `json:"force_cancel_interval" typescript:",notnull"`
139141
}
140142

enterprise/cli/provisionerdaemons.go

+17-10
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,9 @@ import (
77
"os/signal"
88
"time"
99

10+
"github.com/spf13/cobra"
11+
"golang.org/x/xerrors"
12+
1013
"cdr.dev/slog"
1114
"cdr.dev/slog/sloggers/sloghuman"
1215
agpl "github.com/coder/coder/cli"
@@ -20,9 +23,6 @@ import (
2023
provisionerdproto "github.com/coder/coder/provisionerd/proto"
2124
"github.com/coder/coder/provisionersdk"
2225
"github.com/coder/coder/provisionersdk/proto"
23-
24-
"github.com/spf13/cobra"
25-
"golang.org/x/xerrors"
2626
)
2727

2828
func provisionerDaemons() *cobra.Command {
@@ -37,8 +37,10 @@ func provisionerDaemons() *cobra.Command {
3737

3838
func provisionerDaemonStart() *cobra.Command {
3939
var (
40-
cacheDir string
41-
rawTags []string
40+
cacheDir string
41+
rawTags []string
42+
pollInterval time.Duration
43+
pollJitter time.Duration
4244
)
4345
cmd := &cobra.Command{
4446
Use: "start",
@@ -111,11 +113,12 @@ func provisionerDaemonStart() *cobra.Command {
111113
codersdk.ProvisionerTypeTerraform,
112114
}, tags)
113115
}, &provisionerd.Options{
114-
Logger: logger,
115-
PollInterval: 500 * time.Millisecond,
116-
UpdateInterval: 500 * time.Millisecond,
117-
Provisioners: provisioners,
118-
WorkDirectory: tempDir,
116+
Logger: logger,
117+
JobPollInterval: pollInterval,
118+
JobPollJitter: pollJitter,
119+
UpdateInterval: 500 * time.Millisecond,
120+
Provisioners: provisioners,
121+
WorkDirectory: tempDir,
119122
})
120123

121124
var exitErr error
@@ -150,6 +153,10 @@ func provisionerDaemonStart() *cobra.Command {
150153
"Specify a directory to cache provisioner job files.")
151154
cliflag.StringArrayVarP(cmd.Flags(), &rawTags, "tag", "t", "CODER_PROVISIONERD_TAGS", []string{},
152155
"Specify a list of tags to target provisioner jobs.")
156+
cliflag.DurationVarP(cmd.Flags(), &pollInterval, "poll-interval", "", "CODER_PROVISIONERD_POLL_INTERVAL", time.Second,
157+
"Specify the interval for which the provisioner daemon should poll for jobs.")
158+
cliflag.DurationVarP(cmd.Flags(), &pollJitter, "poll-jitter", "", "CODER_PROVISIONERD_POLL_JITTER", 100*time.Millisecond,
159+
"Random jitter added to the poll interval.")
153160

154161
return cmd
155162
}

enterprise/coderd/provisionerdaemons.go

-1
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,6 @@ import (
1818
"storj.io/drpc/drpcserver"
1919

2020
"cdr.dev/slog"
21-
2221
"github.com/coder/coder/coderd"
2322
"github.com/coder/coder/coderd/database"
2423
"github.com/coder/coder/coderd/httpapi"

provisionerd/provisionerd.go

+45-10
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ import (
2323

2424
"cdr.dev/slog"
2525
"github.com/coder/coder/coderd/tracing"
26+
"github.com/coder/coder/cryptorand"
2627
"github.com/coder/coder/provisionerd/proto"
2728
"github.com/coder/coder/provisionerd/runner"
2829
sdkproto "github.com/coder/coder/provisionersdk/proto"
@@ -52,7 +53,9 @@ type Options struct {
5253
ForceCancelInterval time.Duration
5354
UpdateInterval time.Duration
5455
LogBufferInterval time.Duration
55-
PollInterval time.Duration
56+
JobPollInterval time.Duration
57+
JobPollJitter time.Duration
58+
JobPollDebounce time.Duration
5659
Provisioners Provisioners
5760
WorkDirectory string
5861
}
@@ -62,8 +65,11 @@ func New(clientDialer Dialer, opts *Options) *Server {
6265
if opts == nil {
6366
opts = &Options{}
6467
}
65-
if opts.PollInterval == 0 {
66-
opts.PollInterval = 5 * time.Second
68+
if opts.JobPollInterval == 0 {
69+
opts.JobPollInterval = 5 * time.Second
70+
}
71+
if opts.JobPollJitter == 0 {
72+
opts.JobPollJitter = time.Second
6773
}
6874
if opts.UpdateInterval == 0 {
6975
opts.UpdateInterval = 5 * time.Second
@@ -207,8 +213,8 @@ func (p *Server) connect(ctx context.Context) {
207213
if p.isClosed() {
208214
return
209215
}
210-
ticker := time.NewTicker(p.opts.PollInterval)
211-
defer ticker.Stop()
216+
timer := time.NewTimer(p.opts.JobPollInterval)
217+
defer timer.Stop()
212218
for {
213219
client, ok := p.client()
214220
if !ok {
@@ -219,13 +225,23 @@ func (p *Server) connect(ctx context.Context) {
219225
return
220226
case <-client.DRPCConn().Closed():
221227
return
222-
case <-ticker.C:
228+
case <-timer.C:
223229
p.acquireJob(ctx)
230+
timer.Reset(p.nextInterval())
224231
}
225232
}
226233
}()
227234
}
228235

236+
func (p *Server) nextInterval() time.Duration {
237+
r, err := cryptorand.Float64()
238+
if err != nil {
239+
panic("get random float:" + err.Error())
240+
}
241+
242+
return p.opts.JobPollInterval + time.Duration(float64(p.opts.JobPollJitter)*r)
243+
}
244+
229245
func (p *Server) client() (proto.DRPCProvisionerDaemonClient, bool) {
230246
rawClient := p.clientValue.Load()
231247
if rawClient == nil {
@@ -248,6 +264,11 @@ func (p *Server) isRunningJob() bool {
248264
}
249265
}
250266

267+
var (
268+
lastAcquire time.Time
269+
lastAcquireMutex sync.RWMutex
270+
)
271+
251272
// Locks a job in the database, and runs it!
252273
func (p *Server) acquireJob(ctx context.Context) {
253274
p.mutex.Lock()
@@ -263,6 +284,18 @@ func (p *Server) acquireJob(ctx context.Context) {
263284
return
264285
}
265286

287+
// This prevents loads of provisioner daemons from consistently sending
288+
// requests when no jobs are available.
289+
//
290+
// The debounce only occurs when no job is returned, so if loads of jobs are
291+
// added at once, they will start after at most this duration.
292+
lastAcquireMutex.RLock()
293+
if !lastAcquire.IsZero() && time.Since(lastAcquire) < p.opts.JobPollDebounce {
294+
lastAcquireMutex.RUnlock()
295+
return
296+
}
297+
lastAcquireMutex.RUnlock()
298+
266299
var err error
267300
client, ok := p.client()
268301
if !ok {
@@ -271,17 +304,19 @@ func (p *Server) acquireJob(ctx context.Context) {
271304

272305
job, err := client.AcquireJob(ctx, &proto.Empty{})
273306
if err != nil {
274-
if errors.Is(err, context.Canceled) {
275-
return
276-
}
277-
if errors.Is(err, yamux.ErrSessionShutdown) {
307+
if errors.Is(err, context.Canceled) ||
308+
errors.Is(err, yamux.ErrSessionShutdown) ||
309+
errors.Is(err, fasthttputil.ErrInmemoryListenerClosed) {
278310
return
279311
}
280312

281313
p.opts.Logger.Warn(ctx, "acquire job", slog.Error(err))
282314
return
283315
}
284316
if job.JobId == "" {
317+
lastAcquireMutex.Lock()
318+
lastAcquire = time.Now()
319+
lastAcquireMutex.Unlock()
285320
return
286321
}
287322

provisionerd/provisionerd_test.go

+5-5
Original file line numberDiff line numberDiff line change
@@ -1053,11 +1053,11 @@ func createTar(t *testing.T, files map[string]string) []byte {
10531053
// Creates a provisionerd implementation with the provided dialer and provisioners.
10541054
func createProvisionerd(t *testing.T, dialer provisionerd.Dialer, provisioners provisionerd.Provisioners) *provisionerd.Server {
10551055
server := provisionerd.New(dialer, &provisionerd.Options{
1056-
Logger: slogtest.Make(t, nil).Named("provisionerd").Leveled(slog.LevelDebug),
1057-
PollInterval: 50 * time.Millisecond,
1058-
UpdateInterval: 50 * time.Millisecond,
1059-
Provisioners: provisioners,
1060-
WorkDirectory: t.TempDir(),
1056+
Logger: slogtest.Make(t, nil).Named("provisionerd").Leveled(slog.LevelDebug),
1057+
JobPollInterval: 50 * time.Millisecond,
1058+
UpdateInterval: 50 * time.Millisecond,
1059+
Provisioners: provisioners,
1060+
WorkDirectory: t.TempDir(),
10611061
})
10621062
t.Cleanup(func() {
10631063
_ = server.Close()

site/src/api/typesGenerated.ts

+2
Original file line numberDiff line numberDiff line change
@@ -527,6 +527,8 @@ export interface PrometheusConfig {
527527
// From codersdk/deploymentconfig.go
528528
export interface ProvisionerConfig {
529529
readonly daemons: DeploymentConfigField<number>
530+
readonly daemon_poll_interval: DeploymentConfigField<number>
531+
readonly daemon_poll_jitter: DeploymentConfigField<number>
530532
readonly force_cancel_interval: DeploymentConfigField<number>
531533
}
532534

0 commit comments

Comments
 (0)