@@ -35,14 +35,18 @@ resource "null_resource" "permission_check" {
35
35
}
36
36
37
37
locals {
38
- workspace_pod_name = " coder-scaletest-runner-${ lower (data. coder_workspace . me . owner )} -${ lower (data. coder_workspace . me . name )} "
39
- workspace_pod_instance = " coder-workspace-${ lower (data. coder_workspace . me . owner )} -${ lower (data. coder_workspace . me . name )} "
40
- service_account_name = " scaletest-sa"
41
- cpu = 2
42
- memory = 2
43
- home_disk_size = 10
44
- scaletest_run_id = " scaletest-${ time_static . start_time . rfc3339 } "
45
- scaletest_run_dir = " /home/coder/${ local . scaletest_run_id } "
38
+ workspace_pod_name = " coder-scaletest-runner-${ lower (data. coder_workspace . me . owner )} -${ lower (data. coder_workspace . me . name )} "
39
+ workspace_pod_instance = " coder-workspace-${ lower (data. coder_workspace . me . owner )} -${ lower (data. coder_workspace . me . name )} "
40
+ workspace_pod_termination_grace_period_seconds = 7200 # 2 hours (cleanup timeout).
41
+ service_account_name = " scaletest-sa"
42
+ cpu = 16
43
+ memory = 64
44
+ home_disk_size = 10
45
+ scaletest_run_id = " scaletest-${ time_static . start_time . rfc3339 } "
46
+ scaletest_run_dir = " /home/coder/${ local . scaletest_run_id } "
47
+ grafana_url = " https://stats.dev.c8s.io"
48
+ grafana_dashboard_uid = " qLVSTR-Vz"
49
+ grafana_dashboard_name = " coderv2-loadtest-dashboard"
46
50
}
47
51
48
52
data "coder_provisioner" "me" {
@@ -91,15 +95,14 @@ data "coder_parameter" "job_concurrency" {
91
95
order = 11
92
96
type = " number"
93
97
name = " Job concurrency"
94
- default = 10
98
+ default = 0
95
99
description = " The number of concurrent jobs (e.g. when producing workspace traffic)."
96
100
mutable = true
97
101
98
102
# Setting zero = unlimited, but perhaps not a good idea,
99
103
# we can raise this limit instead.
100
104
validation {
101
- min = 1
102
- max = 100
105
+ min = 0
103
106
}
104
107
}
105
108
@@ -197,6 +200,121 @@ data "coder_parameter" "num_workspaces" {
197
200
}
198
201
}
199
202
203
+
204
+ data "coder_parameter" "load_scenarios" {
205
+ order = 22
206
+ name = " Load Scenarios"
207
+ type = " list(string)"
208
+ description = " The load scenarios to run."
209
+ mutable = true
210
+ ephemeral = true
211
+ default = jsonencode ([
212
+ " SSH Traffic" ,
213
+ " Web Terminal Traffic" ,
214
+ " Dashboard Traffic" ,
215
+ ])
216
+ }
217
+
218
+ data "coder_parameter" "load_scenario_ssh_traffic_duration" {
219
+ order = 23
220
+ name = " SSH Traffic Duration"
221
+ type = " number"
222
+ description = " The duration of the SSH traffic load scenario in minutes."
223
+ mutable = true
224
+ default = 30
225
+ validation {
226
+ min = 1
227
+ max = 1440 // 24 hours.
228
+ }
229
+ }
230
+
231
+ data "coder_parameter" "load_scenario_ssh_bytes_per_tick" {
232
+ order = 24
233
+ name = " SSH Bytes Per Tick"
234
+ type = " number"
235
+ description = " The number of bytes to send per tick in the SSH traffic load scenario."
236
+ mutable = true
237
+ default = 1024
238
+ validation {
239
+ min = 1
240
+ }
241
+ }
242
+
243
+ data "coder_parameter" "load_scenario_ssh_tick_interval" {
244
+ order = 25
245
+ name = " SSH Tick Interval"
246
+ type = " number"
247
+ description = " The number of milliseconds between each tick in the SSH traffic load scenario."
248
+ mutable = true
249
+ default = 100
250
+ validation {
251
+ min = 1
252
+ }
253
+ }
254
+
255
+ data "coder_parameter" "load_scenario_web_terminal_traffic_duration" {
256
+ order = 26
257
+ name = " Web Terminal Traffic Duration"
258
+ type = " number"
259
+ description = " The duration of the web terminal traffic load scenario in minutes."
260
+ mutable = true
261
+ default = 30
262
+ validation {
263
+ min = 1
264
+ max = 1440 // 24 hours.
265
+ }
266
+ }
267
+
268
+ data "coder_parameter" "load_scenario_web_terminal_bytes_per_tick" {
269
+ order = 27
270
+ name = " Web Terminal Bytes Per Tick"
271
+ type = " number"
272
+ description = " The number of bytes to send per tick in the web terminal traffic load scenario."
273
+ mutable = true
274
+ default = 1024
275
+ validation {
276
+ min = 1
277
+ }
278
+ }
279
+
280
+ data "coder_parameter" "load_scenario_web_terminal_tick_interval" {
281
+ order = 28
282
+ name = " Web Terminal Tick Interval"
283
+ type = " number"
284
+ description = " The number of milliseconds between each tick in the web terminal traffic load scenario."
285
+ mutable = true
286
+ default = 100
287
+ validation {
288
+ min = 1
289
+ }
290
+ }
291
+
292
+ data "coder_parameter" "load_scenario_dashboard_traffic_duration" {
293
+ order = 29
294
+ name = " Dashboard Traffic Duration"
295
+ type = " number"
296
+ description = " The duration of the dashboard traffic load scenario in minutes."
297
+ mutable = true
298
+ default = 30
299
+ validation {
300
+ min = 1
301
+ max = 1440 // 24 hours.
302
+ }
303
+ }
304
+
305
+ data "coder_parameter" "load_scenario_baseline_duration" {
306
+ order = 26
307
+ name = " Baseline Wait Duration"
308
+ type = " number"
309
+ description = " The duration to wait before starting a load scenario in minutes."
310
+ mutable = true
311
+ default = 5
312
+ validation {
313
+ min = 0
314
+ max = 60
315
+ }
316
+ }
317
+
200
318
data "coder_parameter" "namespace" {
201
319
order = 999
202
320
type = " string"
@@ -221,21 +339,38 @@ resource "coder_agent" "main" {
221
339
CODER_CONFIG_DIR : " /home/coder/.config/coderv2" ,
222
340
CODER_USER_TOKEN : data.coder_workspace.me.owner_session_token,
223
341
CODER_URL : data.coder_workspace.me.access_url,
342
+ CODER_USER : data.coder_workspace.me.owner,
343
+ CODER_WORKSPACE : data.coder_workspace.me.name,
224
344
225
345
# Global scaletest envs that may affect each `coder exp scaletest` invocation.
226
346
CODER_SCALETEST_PROMETHEUS_ADDRESS : " 0.0.0.0:21112" ,
227
347
CODER_SCALETEST_PROMETHEUS_WAIT : " 60s" ,
228
348
CODER_SCALETEST_CONCURRENCY : " ${ data . coder_parameter . job_concurrency . value } " ,
229
349
CODER_SCALETEST_CLEANUP_CONCURRENCY : " ${ data . coder_parameter . cleanup_concurrency . value } " ,
230
350
351
+ # Expose as params as well, for reporting (TODO(mafredri): refactor, only have one).
352
+ SCALETEST_PARAM_SCALETEST_CONCURRENCY : " ${ data . coder_parameter . job_concurrency . value } " ,
353
+ SCALETEST_PARAM_SCALETEST_CLEANUP_CONCURRENCY : " ${ data . coder_parameter . cleanup_concurrency . value } " ,
354
+
231
355
# Local envs passed as arguments to `coder exp scaletest` invocations.
232
356
SCALETEST_RUN_ID : local.scaletest_run_id,
233
357
SCALETEST_RUN_DIR : local.scaletest_run_dir,
234
- SCALETEST_TEMPLATE : data.coder_parameter.workspace_template.value,
235
- SCALETEST_SKIP_CLEANUP : " 1" ,
236
- SCALETEST_NUM_WORKSPACES : data.coder_parameter.num_workspaces.value,
237
- SCALETEST_CREATE_CONCURRENCY : " ${ data . coder_parameter . create_concurrency . value } " ,
238
- SCALETEST_CLEANUP_STRATEGY : data.coder_parameter.cleanup_strategy.value,
358
+
359
+ SCALETEST_PARAM_TEMPLATE : data.coder_parameter.workspace_template.value,
360
+ SCALETEST_PARAM_NUM_WORKSPACES : data.coder_parameter.num_workspaces.value,
361
+ SCALETEST_PARAM_CREATE_CONCURRENCY : " ${ data . coder_parameter . create_concurrency . value } " ,
362
+ SCALETEST_PARAM_CLEANUP_STRATEGY : data.coder_parameter.cleanup_strategy.value,
363
+ SCALETEST_PARAM_LOAD_SCENARIOS : data.coder_parameter.load_scenarios.value,
364
+ SCALETEST_PARAM_LOAD_SCENARIO_SSH_TRAFFIC_DURATION : " ${ data . coder_parameter . load_scenario_ssh_traffic_duration . value } " ,
365
+ SCALETEST_PARAM_LOAD_SCENARIO_SSH_TRAFFIC_BYTES_PER_TICK : " ${ data . coder_parameter . load_scenario_ssh_bytes_per_tick . value } " ,
366
+ SCALETEST_PARAM_LOAD_SCENARIO_SSH_TRAFFIC_TICK_INTERVAL : " ${ data . coder_parameter . load_scenario_ssh_tick_interval . value } " ,
367
+ SCALETEST_PARAM_LOAD_SCENARIO_WEB_TERMINAL_TRAFFIC_DURATION : " ${ data . coder_parameter . load_scenario_web_terminal_traffic_duration . value } " ,
368
+ SCALETEST_PARAM_LOAD_SCENARIO_WEB_TERMINAL_TRAFFIC_BYTES_PER_TICK : " ${ data . coder_parameter . load_scenario_web_terminal_bytes_per_tick . value } " ,
369
+ SCALETEST_PARAM_LOAD_SCENARIO_WEB_TERMINAL_TRAFFIC_TICK_INTERVAL : " ${ data . coder_parameter . load_scenario_web_terminal_tick_interval . value } " ,
370
+ SCALETEST_PARAM_LOAD_SCENARIO_DASHBOARD_TRAFFIC_DURATION : " ${ data . coder_parameter . load_scenario_dashboard_traffic_duration . value } " ,
371
+ SCALETEST_PARAM_LOAD_SCENARIO_BASELINE_DURATION : " ${ data . coder_parameter . load_scenario_baseline_duration . value } " ,
372
+
373
+ GRAFANA_URL : local.grafana_url,
239
374
240
375
SCRIPTS_ZIP : filebase64 (data. archive_file . scripts_zip . output_path ),
241
376
SCRIPTS_DIR : " /tmp/scripts" ,
@@ -244,12 +379,13 @@ resource "coder_agent" "main" {
244
379
vscode = false
245
380
ssh_helper = false
246
381
}
247
- startup_script_timeout = 3600
248
- shutdown_script_timeout = 1800
382
+ startup_script_timeout = 86400
383
+ shutdown_script_timeout = 7200
249
384
startup_script_behavior = " blocking"
250
385
startup_script = file (" startup.sh" )
251
386
shutdown_script = file (" shutdown.sh" )
252
387
388
+ # IDEA(mafredri): It would be pretty cool to define metadata to expect JSON output, each field/item could become a separate metadata item.
253
389
# Scaletest metadata.
254
390
metadata {
255
391
display_name = " Scaletest status"
@@ -332,7 +468,7 @@ resource "coder_app" "grafana" {
332
468
agent_id = coder_agent. main . id
333
469
slug = " 00-grafana"
334
470
display_name = " Grafana"
335
- url = " https://stats.dev.c8s.io /d/qLVSTR-Vz/coderv2-loadtest-dashboard ?orgId=1&from=${ time_static . start_time . unix * 1000 } &to=now"
471
+ url = " ${ local . grafana_url } /d/${ local . grafana_dashboard_uid } / ${ local . grafana_dashboard_name } ?orgId=1&from=${ time_static . start_time . unix * 1000 } &to=now"
336
472
icon = " https://grafana.com/static/assets/img/fav32.png"
337
473
external = true
338
474
}
@@ -409,7 +545,7 @@ resource "kubernetes_pod" "main" {
409
545
}
410
546
# Set the pod delete timeout to termination_grace_period_seconds + 1m.
411
547
timeouts {
412
- delete = " 32m "
548
+ delete = " ${ (local . workspace_pod_termination_grace_period_seconds + 120 ) / 60 } s "
413
549
}
414
550
spec {
415
551
security_context {
@@ -421,8 +557,9 @@ resource "kubernetes_pod" "main" {
421
557
service_account_name = local. service_account_name
422
558
423
559
# Allow the coder agent to perform graceful shutdown and cleanup of
424
- # scaletest resources, 30 minutes (cleanup timeout) + 1 minute.
425
- termination_grace_period_seconds = 1860
560
+ # scaletest resources. We add an extra minute so ensure work
561
+ # completion is prioritized over timeout.
562
+ termination_grace_period_seconds = local. workspace_pod_termination_grace_period_seconds + 60
426
563
427
564
container {
428
565
name = " dev"
@@ -440,6 +577,24 @@ resource "kubernetes_pod" "main" {
440
577
name = " CODER_AGENT_LOG_DIR"
441
578
value = " ${ local . scaletest_run_dir } /logs"
442
579
}
580
+ env {
581
+ name = " GRAFANA_API_TOKEN"
582
+ value_from {
583
+ secret_key_ref {
584
+ name = data. kubernetes_secret . grafana_editor_api_token . metadata [0 ]. name
585
+ key = " token"
586
+ }
587
+ }
588
+ }
589
+ env {
590
+ name = " SLACK_WEBHOOK_URL"
591
+ value_from {
592
+ secret_key_ref {
593
+ name = data. kubernetes_secret . slack_scaletest_notifications_webhook_url . metadata [0 ]. name
594
+ key = " url"
595
+ }
596
+ }
597
+ }
443
598
resources {
444
599
# Set requests and limits values such that we can do performant
445
600
# execution of `coder scaletest` commands.
@@ -496,7 +651,7 @@ resource "kubernetes_pod" "main" {
496
651
match_expressions {
497
652
key = " cloud.google.com/gke-nodepool"
498
653
operator = " In"
499
- values = [" big-misc " ] # Avoid placing on the same nodes as scaletest workspaces.
654
+ values = [" big-workspacetraffic " ] # Avoid placing on the same nodes as scaletest workspaces.
500
655
}
501
656
}
502
657
}
@@ -505,6 +660,20 @@ resource "kubernetes_pod" "main" {
505
660
}
506
661
}
507
662
663
+ data "kubernetes_secret" "grafana_editor_api_token" {
664
+ metadata {
665
+ name = " grafana-editor-api-token"
666
+ namespace = data. coder_parameter . namespace . value
667
+ }
668
+ }
669
+
670
+ data "kubernetes_secret" "slack_scaletest_notifications_webhook_url" {
671
+ metadata {
672
+ name = " slack-scaletest-notifications-webhook-url"
673
+ namespace = data. coder_parameter . namespace . value
674
+ }
675
+ }
676
+
508
677
resource "kubernetes_manifest" "pod_monitor" {
509
678
count = data. coder_workspace . me . start_count
510
679
manifest = {
0 commit comments