@@ -35,14 +35,18 @@ resource "null_resource" "permission_check" {
3535}
3636
3737locals {
38- workspace_pod_name = " coder-scaletest-runner-${ lower (data. coder_workspace . me . owner )} -${ lower (data. coder_workspace . me . name )} "
39- workspace_pod_instance = " coder-workspace-${ lower (data. coder_workspace . me . owner )} -${ lower (data. coder_workspace . me . name )} "
40- service_account_name = " scaletest-sa"
41- cpu = 2
42- memory = 2
43- home_disk_size = 10
44- scaletest_run_id = " scaletest-${ time_static . start_time . rfc3339 } "
45- scaletest_run_dir = " /home/coder/${ local . scaletest_run_id } "
38+ workspace_pod_name = " coder-scaletest-runner-${ lower (data. coder_workspace . me . owner )} -${ lower (data. coder_workspace . me . name )} "
39+ workspace_pod_instance = " coder-workspace-${ lower (data. coder_workspace . me . owner )} -${ lower (data. coder_workspace . me . name )} "
40+ workspace_pod_termination_grace_period_seconds = 7200 # 2 hours (cleanup timeout).
41+ service_account_name = " scaletest-sa"
42+ cpu = 16
43+ memory = 64
44+ home_disk_size = 10
45+ scaletest_run_id = " scaletest-${ time_static . start_time . rfc3339 } "
46+ scaletest_run_dir = " /home/coder/${ local . scaletest_run_id } "
47+ grafana_url = " https://stats.dev.c8s.io"
48+ grafana_dashboard_uid = " qLVSTR-Vz"
49+ grafana_dashboard_name = " coderv2-loadtest-dashboard"
4650}
4751
4852data "coder_provisioner" "me" {
@@ -91,15 +95,14 @@ data "coder_parameter" "job_concurrency" {
9195 order = 11
9296 type = " number"
9397 name = " Job concurrency"
94- default = 10
98+ default = 0
9599 description = " The number of concurrent jobs (e.g. when producing workspace traffic)."
96100 mutable = true
97101
98102 # Setting zero = unlimited, but perhaps not a good idea,
99103 # we can raise this limit instead.
100104 validation {
101- min = 1
102- max = 100
105+ min = 0
103106 }
104107}
105108
@@ -197,6 +200,121 @@ data "coder_parameter" "num_workspaces" {
197200 }
198201}
199202
203+
204+ data "coder_parameter" "load_scenarios" {
205+ order = 22
206+ name = " Load Scenarios"
207+ type = " list(string)"
208+ description = " The load scenarios to run."
209+ mutable = true
210+ ephemeral = true
211+ default = jsonencode ([
212+ " SSH Traffic" ,
213+ " Web Terminal Traffic" ,
214+ " Dashboard Traffic" ,
215+ ])
216+ }
217+
218+ data "coder_parameter" "load_scenario_ssh_traffic_duration" {
219+ order = 23
220+ name = " SSH Traffic Duration"
221+ type = " number"
222+ description = " The duration of the SSH traffic load scenario in minutes."
223+ mutable = true
224+ default = 30
225+ validation {
226+ min = 1
227+ max = 1440 // 24 hours.
228+ }
229+ }
230+
231+ data "coder_parameter" "load_scenario_ssh_bytes_per_tick" {
232+ order = 24
233+ name = " SSH Bytes Per Tick"
234+ type = " number"
235+ description = " The number of bytes to send per tick in the SSH traffic load scenario."
236+ mutable = true
237+ default = 1024
238+ validation {
239+ min = 1
240+ }
241+ }
242+
243+ data "coder_parameter" "load_scenario_ssh_tick_interval" {
244+ order = 25
245+ name = " SSH Tick Interval"
246+ type = " number"
247+ description = " The number of milliseconds between each tick in the SSH traffic load scenario."
248+ mutable = true
249+ default = 100
250+ validation {
251+ min = 1
252+ }
253+ }
254+
255+ data "coder_parameter" "load_scenario_web_terminal_traffic_duration" {
256+ order = 26
257+ name = " Web Terminal Traffic Duration"
258+ type = " number"
259+ description = " The duration of the web terminal traffic load scenario in minutes."
260+ mutable = true
261+ default = 30
262+ validation {
263+ min = 1
264+ max = 1440 // 24 hours.
265+ }
266+ }
267+
268+ data "coder_parameter" "load_scenario_web_terminal_bytes_per_tick" {
269+ order = 27
270+ name = " Web Terminal Bytes Per Tick"
271+ type = " number"
272+ description = " The number of bytes to send per tick in the web terminal traffic load scenario."
273+ mutable = true
274+ default = 1024
275+ validation {
276+ min = 1
277+ }
278+ }
279+
280+ data "coder_parameter" "load_scenario_web_terminal_tick_interval" {
281+ order = 28
282+ name = " Web Terminal Tick Interval"
283+ type = " number"
284+ description = " The number of milliseconds between each tick in the web terminal traffic load scenario."
285+ mutable = true
286+ default = 100
287+ validation {
288+ min = 1
289+ }
290+ }
291+
292+ data "coder_parameter" "load_scenario_dashboard_traffic_duration" {
293+ order = 29
294+ name = " Dashboard Traffic Duration"
295+ type = " number"
296+ description = " The duration of the dashboard traffic load scenario in minutes."
297+ mutable = true
298+ default = 30
299+ validation {
300+ min = 1
301+ max = 1440 // 24 hours.
302+ }
303+ }
304+
305+ data "coder_parameter" "load_scenario_baseline_duration" {
306+ order = 26
307+ name = " Baseline Wait Duration"
308+ type = " number"
309+ description = " The duration to wait before starting a load scenario in minutes."
310+ mutable = true
311+ default = 5
312+ validation {
313+ min = 0
314+ max = 60
315+ }
316+ }
317+
200318data "coder_parameter" "namespace" {
201319 order = 999
202320 type = " string"
@@ -221,21 +339,38 @@ resource "coder_agent" "main" {
221339 CODER_CONFIG_DIR : " /home/coder/.config/coderv2" ,
222340 CODER_USER_TOKEN : data.coder_workspace.me.owner_session_token,
223341 CODER_URL : data.coder_workspace.me.access_url,
342+ CODER_USER : data.coder_workspace.me.owner,
343+ CODER_WORKSPACE : data.coder_workspace.me.name,
224344
225345 # Global scaletest envs that may affect each `coder exp scaletest` invocation.
226346 CODER_SCALETEST_PROMETHEUS_ADDRESS : " 0.0.0.0:21112" ,
227347 CODER_SCALETEST_PROMETHEUS_WAIT : " 60s" ,
228348 CODER_SCALETEST_CONCURRENCY : " ${ data . coder_parameter . job_concurrency . value } " ,
229349 CODER_SCALETEST_CLEANUP_CONCURRENCY : " ${ data . coder_parameter . cleanup_concurrency . value } " ,
230350
351+ # Expose as params as well, for reporting (TODO(mafredri): refactor, only have one).
352+ SCALETEST_PARAM_SCALETEST_CONCURRENCY : " ${ data . coder_parameter . job_concurrency . value } " ,
353+ SCALETEST_PARAM_SCALETEST_CLEANUP_CONCURRENCY : " ${ data . coder_parameter . cleanup_concurrency . value } " ,
354+
231355 # Local envs passed as arguments to `coder exp scaletest` invocations.
232356 SCALETEST_RUN_ID : local.scaletest_run_id,
233357 SCALETEST_RUN_DIR : local.scaletest_run_dir,
234- SCALETEST_TEMPLATE : data.coder_parameter.workspace_template.value,
235- SCALETEST_SKIP_CLEANUP : " 1" ,
236- SCALETEST_NUM_WORKSPACES : data.coder_parameter.num_workspaces.value,
237- SCALETEST_CREATE_CONCURRENCY : " ${ data . coder_parameter . create_concurrency . value } " ,
238- SCALETEST_CLEANUP_STRATEGY : data.coder_parameter.cleanup_strategy.value,
358+
359+ SCALETEST_PARAM_TEMPLATE : data.coder_parameter.workspace_template.value,
360+ SCALETEST_PARAM_NUM_WORKSPACES : data.coder_parameter.num_workspaces.value,
361+ SCALETEST_PARAM_CREATE_CONCURRENCY : " ${ data . coder_parameter . create_concurrency . value } " ,
362+ SCALETEST_PARAM_CLEANUP_STRATEGY : data.coder_parameter.cleanup_strategy.value,
363+ SCALETEST_PARAM_LOAD_SCENARIOS : data.coder_parameter.load_scenarios.value,
364+ SCALETEST_PARAM_LOAD_SCENARIO_SSH_TRAFFIC_DURATION : " ${ data . coder_parameter . load_scenario_ssh_traffic_duration . value } " ,
365+ SCALETEST_PARAM_LOAD_SCENARIO_SSH_TRAFFIC_BYTES_PER_TICK : " ${ data . coder_parameter . load_scenario_ssh_bytes_per_tick . value } " ,
366+ SCALETEST_PARAM_LOAD_SCENARIO_SSH_TRAFFIC_TICK_INTERVAL : " ${ data . coder_parameter . load_scenario_ssh_tick_interval . value } " ,
367+ SCALETEST_PARAM_LOAD_SCENARIO_WEB_TERMINAL_TRAFFIC_DURATION : " ${ data . coder_parameter . load_scenario_web_terminal_traffic_duration . value } " ,
368+ SCALETEST_PARAM_LOAD_SCENARIO_WEB_TERMINAL_TRAFFIC_BYTES_PER_TICK : " ${ data . coder_parameter . load_scenario_web_terminal_bytes_per_tick . value } " ,
369+ SCALETEST_PARAM_LOAD_SCENARIO_WEB_TERMINAL_TRAFFIC_TICK_INTERVAL : " ${ data . coder_parameter . load_scenario_web_terminal_tick_interval . value } " ,
370+ SCALETEST_PARAM_LOAD_SCENARIO_DASHBOARD_TRAFFIC_DURATION : " ${ data . coder_parameter . load_scenario_dashboard_traffic_duration . value } " ,
371+ SCALETEST_PARAM_LOAD_SCENARIO_BASELINE_DURATION : " ${ data . coder_parameter . load_scenario_baseline_duration . value } " ,
372+
373+ GRAFANA_URL : local.grafana_url,
239374
240375 SCRIPTS_ZIP : filebase64 (data. archive_file . scripts_zip . output_path ),
241376 SCRIPTS_DIR : " /tmp/scripts" ,
@@ -244,12 +379,13 @@ resource "coder_agent" "main" {
244379 vscode = false
245380 ssh_helper = false
246381 }
247- startup_script_timeout = 3600
248- shutdown_script_timeout = 1800
382+ startup_script_timeout = 86400
383+ shutdown_script_timeout = 7200
249384 startup_script_behavior = " blocking"
250385 startup_script = file (" startup.sh" )
251386 shutdown_script = file (" shutdown.sh" )
252387
388+ # IDEA(mafredri): It would be pretty cool to define metadata to expect JSON output, each field/item could become a separate metadata item.
253389 # Scaletest metadata.
254390 metadata {
255391 display_name = " Scaletest status"
@@ -332,7 +468,7 @@ resource "coder_app" "grafana" {
332468 agent_id = coder_agent. main . id
333469 slug = " 00-grafana"
334470 display_name = " Grafana"
335- url = " https://stats.dev.c8s.io /d/qLVSTR-Vz/coderv2-loadtest-dashboard ?orgId=1&from=${ time_static . start_time . unix * 1000 } &to=now"
471+ url = " ${ local . grafana_url } /d/${ local . grafana_dashboard_uid } / ${ local . grafana_dashboard_name } ?orgId=1&from=${ time_static . start_time . unix * 1000 } &to=now"
336472 icon = " https://grafana.com/static/assets/img/fav32.png"
337473 external = true
338474}
@@ -409,7 +545,7 @@ resource "kubernetes_pod" "main" {
409545 }
410546 # Set the pod delete timeout to termination_grace_period_seconds + 1m.
411547 timeouts {
412- delete = " 32m "
548+ delete = " ${ (local . workspace_pod_termination_grace_period_seconds + 120 ) / 60 } s "
413549 }
414550 spec {
415551 security_context {
@@ -421,8 +557,9 @@ resource "kubernetes_pod" "main" {
421557 service_account_name = local. service_account_name
422558
423559 # Allow the coder agent to perform graceful shutdown and cleanup of
424- # scaletest resources, 30 minutes (cleanup timeout) + 1 minute.
425- termination_grace_period_seconds = 1860
560+ # scaletest resources. We add an extra minute so ensure work
561+ # completion is prioritized over timeout.
562+ termination_grace_period_seconds = local. workspace_pod_termination_grace_period_seconds + 60
426563
427564 container {
428565 name = " dev"
@@ -440,6 +577,24 @@ resource "kubernetes_pod" "main" {
440577 name = " CODER_AGENT_LOG_DIR"
441578 value = " ${ local . scaletest_run_dir } /logs"
442579 }
580+ env {
581+ name = " GRAFANA_API_TOKEN"
582+ value_from {
583+ secret_key_ref {
584+ name = data. kubernetes_secret . grafana_editor_api_token . metadata [0 ]. name
585+ key = " token"
586+ }
587+ }
588+ }
589+ env {
590+ name = " SLACK_WEBHOOK_URL"
591+ value_from {
592+ secret_key_ref {
593+ name = data. kubernetes_secret . slack_scaletest_notifications_webhook_url . metadata [0 ]. name
594+ key = " url"
595+ }
596+ }
597+ }
443598 resources {
444599 # Set requests and limits values such that we can do performant
445600 # execution of `coder scaletest` commands.
@@ -496,7 +651,7 @@ resource "kubernetes_pod" "main" {
496651 match_expressions {
497652 key = " cloud.google.com/gke-nodepool"
498653 operator = " In"
499- values = [" big-misc " ] # Avoid placing on the same nodes as scaletest workspaces.
654+ values = [" big-workspacetraffic " ] # Avoid placing on the same nodes as scaletest workspaces.
500655 }
501656 }
502657 }
@@ -505,6 +660,20 @@ resource "kubernetes_pod" "main" {
505660 }
506661}
507662
663+ data "kubernetes_secret" "grafana_editor_api_token" {
664+ metadata {
665+ name = " grafana-editor-api-token"
666+ namespace = data. coder_parameter . namespace . value
667+ }
668+ }
669+
670+ data "kubernetes_secret" "slack_scaletest_notifications_webhook_url" {
671+ metadata {
672+ name = " slack-scaletest-notifications-webhook-url"
673+ namespace = data. coder_parameter . namespace . value
674+ }
675+ }
676+
508677resource "kubernetes_manifest" "pod_monitor" {
509678 count = data. coder_workspace . me . start_count
510679 manifest = {
0 commit comments