@@ -23,6 +23,7 @@ import (
23
23
24
24
"cdr.dev/slog"
25
25
"github.com/coder/coder/coderd/tracing"
26
+ "github.com/coder/coder/cryptorand"
26
27
"github.com/coder/coder/provisionerd/proto"
27
28
"github.com/coder/coder/provisionerd/runner"
28
29
sdkproto "github.com/coder/coder/provisionersdk/proto"
@@ -52,7 +53,9 @@ type Options struct {
52
53
ForceCancelInterval time.Duration
53
54
UpdateInterval time.Duration
54
55
LogBufferInterval time.Duration
55
- PollInterval time.Duration
56
+ JobPollInterval time.Duration
57
+ JobPollJitter time.Duration
58
+ JobPollDebounce time.Duration
56
59
Provisioners Provisioners
57
60
WorkDirectory string
58
61
}
@@ -62,8 +65,11 @@ func New(clientDialer Dialer, opts *Options) *Server {
62
65
if opts == nil {
63
66
opts = & Options {}
64
67
}
65
- if opts .PollInterval == 0 {
66
- opts .PollInterval = 5 * time .Second
68
+ if opts .JobPollInterval == 0 {
69
+ opts .JobPollInterval = 5 * time .Second
70
+ }
71
+ if opts .JobPollJitter == 0 {
72
+ opts .JobPollJitter = time .Second
67
73
}
68
74
if opts .UpdateInterval == 0 {
69
75
opts .UpdateInterval = 5 * time .Second
@@ -207,8 +213,8 @@ func (p *Server) connect(ctx context.Context) {
207
213
if p .isClosed () {
208
214
return
209
215
}
210
- ticker := time .NewTicker (p .opts .PollInterval )
211
- defer ticker .Stop ()
216
+ timer := time .NewTimer (p .opts .JobPollInterval )
217
+ defer timer .Stop ()
212
218
for {
213
219
client , ok := p .client ()
214
220
if ! ok {
@@ -219,13 +225,23 @@ func (p *Server) connect(ctx context.Context) {
219
225
return
220
226
case <- client .DRPCConn ().Closed ():
221
227
return
222
- case <- ticker .C :
228
+ case <- timer .C :
223
229
p .acquireJob (ctx )
230
+ timer .Reset (p .nextInterval ())
224
231
}
225
232
}
226
233
}()
227
234
}
228
235
236
+ func (p * Server ) nextInterval () time.Duration {
237
+ r , err := cryptorand .Float64 ()
238
+ if err != nil {
239
+ panic ("get random float:" + err .Error ())
240
+ }
241
+
242
+ return p .opts .JobPollInterval + time .Duration (float64 (p .opts .JobPollJitter )* r )
243
+ }
244
+
229
245
func (p * Server ) client () (proto.DRPCProvisionerDaemonClient , bool ) {
230
246
rawClient := p .clientValue .Load ()
231
247
if rawClient == nil {
@@ -248,6 +264,11 @@ func (p *Server) isRunningJob() bool {
248
264
}
249
265
}
250
266
267
+ var (
268
+ lastAcquire time.Time
269
+ lastAcquireMutex sync.RWMutex
270
+ )
271
+
251
272
// Locks a job in the database, and runs it!
252
273
func (p * Server ) acquireJob (ctx context.Context ) {
253
274
p .mutex .Lock ()
@@ -263,6 +284,18 @@ func (p *Server) acquireJob(ctx context.Context) {
263
284
return
264
285
}
265
286
287
+ // This prevents loads of provisioner daemons from consistently sending
288
+ // requests when no jobs are available.
289
+ //
290
+ // The debounce only occurs when no job is returned, so if loads of jobs are
291
+ // added at once, they will start after at most this duration.
292
+ lastAcquireMutex .RLock ()
293
+ if ! lastAcquire .IsZero () && time .Since (lastAcquire ) < p .opts .JobPollDebounce {
294
+ lastAcquireMutex .RUnlock ()
295
+ return
296
+ }
297
+ lastAcquireMutex .RUnlock ()
298
+
266
299
var err error
267
300
client , ok := p .client ()
268
301
if ! ok {
@@ -271,17 +304,19 @@ func (p *Server) acquireJob(ctx context.Context) {
271
304
272
305
job , err := client .AcquireJob (ctx , & proto.Empty {})
273
306
if err != nil {
274
- if errors .Is (err , context .Canceled ) {
275
- return
276
- }
277
- if errors .Is (err , yamux .ErrSessionShutdown ) {
307
+ if errors .Is (err , context .Canceled ) ||
308
+ errors .Is (err , yamux .ErrSessionShutdown ) ||
309
+ errors .Is (err , fasthttputil .ErrInmemoryListenerClosed ) {
278
310
return
279
311
}
280
312
281
313
p .opts .Logger .Warn (ctx , "acquire job" , slog .Error (err ))
282
314
return
283
315
}
284
316
if job .JobId == "" {
317
+ lastAcquireMutex .Lock ()
318
+ lastAcquire = time .Now ()
319
+ lastAcquireMutex .Unlock ()
285
320
return
286
321
}
287
322
0 commit comments