Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 7b46433

Browse files
authored
feat: start remote writing prometheus data (#16235)
Part of coder/internal#150 - Stop creating the VPC since we rely on an existing VPC in the scaletest GCP project now - Add prometheus remote writer to the new prometheus deployment
1 parent f27e73d commit 7b46433

File tree

7 files changed

+184
-46
lines changed

7 files changed

+184
-46
lines changed

scaletest/terraform/action/coder_traffic.tf

+13-21
Original file line numberDiff line numberDiff line change
@@ -5,23 +5,20 @@ locals {
55

66
traffic_types = {
77
ssh = {
8-
wait_duration = "0m"
9-
duration = "30m"
10-
job_timeout = "35m"
8+
duration = "30m"
9+
job_timeout = "35m"
1110
flags = [
1211
"--ssh",
1312
]
1413
}
1514
webterminal = {
16-
wait_duration = "5m"
17-
duration = "25m"
18-
job_timeout = "30m"
19-
flags = []
15+
duration = "25m"
16+
job_timeout = "30m"
17+
flags = []
2018
}
2119
app = {
22-
wait_duration = "10m"
23-
duration = "20m"
24-
job_timeout = "25m"
20+
duration = "20m"
21+
job_timeout = "25m"
2522
flags = [
2623
"--app=wsec",
2724
]
@@ -34,19 +31,14 @@ resource "time_sleep" "wait_baseline" {
3431
kubernetes_job.create_workspaces_primary,
3532
kubernetes_job.create_workspaces_europe,
3633
kubernetes_job.create_workspaces_asia,
34+
helm_release.prometheus_chart_primary,
35+
helm_release.prometheus_chart_europe,
36+
helm_release.prometheus_chart_asia,
3737
]
3838

3939
create_duration = local.wait_baseline_duration
4040
}
4141

42-
resource "time_sleep" "wait_traffic" {
43-
for_each = local.traffic_types
44-
45-
depends_on = [time_sleep.wait_baseline]
46-
47-
create_duration = local.traffic_types[each.key].wait_duration
48-
}
49-
5042
resource "kubernetes_job" "workspace_traffic_primary" {
5143
provider = kubernetes.primary
5244

@@ -106,7 +98,7 @@ resource "kubernetes_job" "workspace_traffic_primary" {
10698
create = local.traffic_types[each.key].job_timeout
10799
}
108100

109-
depends_on = [time_sleep.wait_baseline, time_sleep.wait_traffic[each.key]]
101+
depends_on = [time_sleep.wait_baseline]
110102
}
111103

112104
resource "kubernetes_job" "workspace_traffic_europe" {
@@ -169,7 +161,7 @@ resource "kubernetes_job" "workspace_traffic_europe" {
169161
create = local.traffic_types[each.key].job_timeout
170162
}
171163

172-
depends_on = [time_sleep.wait_baseline, time_sleep.wait_traffic[each.key]]
164+
depends_on = [time_sleep.wait_baseline]
173165
}
174166

175167
resource "kubernetes_job" "workspace_traffic_asia" {
@@ -232,5 +224,5 @@ resource "kubernetes_job" "workspace_traffic_asia" {
232224
create = local.traffic_types[each.key].job_timeout
233225
}
234226

235-
depends_on = [time_sleep.wait_baseline, time_sleep.wait_traffic[each.key]]
227+
depends_on = [time_sleep.wait_baseline]
236228
}

scaletest/terraform/action/gcp_clusters.tf

+5-5
Original file line numberDiff line numberDiff line change
@@ -10,21 +10,21 @@ locals {
1010
url = "http://${var.name}-scaletest.${var.cloudflare_domain}"
1111
region = "us-east1"
1212
zone = "us-east1-c"
13-
cidr = "10.200.0.0/24"
13+
subnet = "scaletest"
1414
}
1515
europe = {
1616
subdomain = "${var.name}-europe-scaletest"
1717
url = "http://${var.name}-europe-scaletest.${var.cloudflare_domain}"
1818
region = "europe-west1"
1919
zone = "europe-west1-b"
20-
cidr = "10.201.0.0/24"
20+
subnet = "scaletest"
2121
}
2222
asia = {
2323
subdomain = "${var.name}-asia-scaletest"
2424
url = "http://${var.name}-asia-scaletest.${var.cloudflare_domain}"
2525
region = "asia-southeast1"
2626
zone = "asia-southeast1-a"
27-
cidr = "10.202.0.0/24"
27+
subnet = "scaletest"
2828
}
2929
}
3030
node_pools = {
@@ -72,8 +72,8 @@ resource "google_container_cluster" "cluster" {
7272
name = "${var.name}-${each.key}"
7373
location = each.value.zone
7474
project = var.project_id
75-
network = google_compute_network.vpc.name
76-
subnetwork = google_compute_subnetwork.subnet[each.key].name
75+
network = local.vpc_name
76+
subnetwork = local.subnet_name
7777
networking_mode = "VPC_NATIVE"
7878
default_max_pods_per_node = 256
7979
ip_allocation_policy { # Required with networking_mode=VPC_NATIVE

scaletest/terraform/action/gcp_db.tf

+1-1
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ resource "google_sql_database_instance" "db" {
2323

2424
ip_configuration {
2525
ipv4_enabled = false
26-
private_network = google_compute_network.vpc.id
26+
private_network = local.vpc_id
2727
}
2828

2929
insights_config {

scaletest/terraform/action/gcp_vpc.tf

+6-19
Original file line numberDiff line numberDiff line change
@@ -1,20 +1,7 @@
1-
2-
resource "google_compute_network" "vpc" {
3-
project = var.project_id
4-
name = var.name
5-
auto_create_subnetworks = "false"
6-
depends_on = [
7-
google_project_service.api["compute.googleapis.com"]
8-
]
9-
}
10-
11-
resource "google_compute_subnetwork" "subnet" {
12-
for_each = local.deployments
13-
name = "${var.name}-${each.key}"
14-
project = var.project_id
15-
region = each.value.region
16-
network = google_compute_network.vpc.name
17-
ip_cidr_range = each.value.cidr
1+
locals {
2+
vpc_name = "scaletest"
3+
vpc_id = "projects/${var.project_id}/global/networks/${local.vpc_name}"
4+
subnet_name = "scaletest"
185
}
196

207
resource "google_compute_address" "coder" {
@@ -32,11 +19,11 @@ resource "google_compute_global_address" "sql_peering" {
3219
purpose = "VPC_PEERING"
3320
address_type = "INTERNAL"
3421
prefix_length = 16
35-
network = google_compute_network.vpc.id
22+
network = local.vpc_name
3623
}
3724

3825
resource "google_service_networking_connection" "private_vpc_connection" {
39-
network = google_compute_network.vpc.id
26+
network = local.vpc_id
4027
service = "servicenetworking.googleapis.com"
4128
reserved_peering_ranges = [google_compute_global_address.sql_peering.name]
4229
}
+118
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,118 @@
1+
locals {
2+
prometheus_helm_repo = "https://prometheus-community.github.io/helm-charts"
3+
prometheus_helm_chart = "kube-prometheus-stack"
4+
prometheus_release_name = "prometheus"
5+
prometheus_remote_write_send_interval = "15s"
6+
prometheus_remote_write_metrics_regex = ".*"
7+
}
8+
9+
resource "helm_release" "prometheus_chart_primary" {
10+
provider = helm.primary
11+
12+
repository = local.prometheus_helm_repo
13+
chart = local.prometheus_helm_chart
14+
name = local.prometheus_release_name
15+
namespace = kubernetes_namespace.coder_primary.metadata.0.name
16+
values = [templatefile("${path.module}/prometheus_helm_values.tftpl", {
17+
nodepool = google_container_node_pool.node_pool["primary_misc"].name,
18+
cluster = "primary",
19+
prometheus_remote_write_url = var.prometheus_remote_write_url,
20+
prometheus_remote_write_metrics_regex = local.prometheus_remote_write_metrics_regex,
21+
prometheus_remote_write_send_interval = local.prometheus_remote_write_send_interval,
22+
})]
23+
}
24+
25+
resource "kubectl_manifest" "pod_monitor_primary" {
26+
provider = kubectl.primary
27+
28+
yaml_body = <<YAML
29+
apiVersion: monitoring.coreos.com/v1
30+
kind: PodMonitor
31+
metadata:
32+
namespace: ${kubernetes_namespace.coder_primary.metadata.0.name}
33+
name: coder-monitoring
34+
spec:
35+
selector:
36+
matchLabels:
37+
"app.kubernetes.io/name": coder
38+
podMetricsEndpoints:
39+
- port: prometheus-http
40+
interval: 30s
41+
YAML
42+
43+
depends_on = [helm_release.prometheus_chart_primary]
44+
}
45+
46+
resource "helm_release" "prometheus_chart_europe" {
47+
provider = helm.europe
48+
49+
repository = local.prometheus_helm_repo
50+
chart = local.prometheus_helm_chart
51+
name = local.prometheus_release_name
52+
namespace = kubernetes_namespace.coder_europe.metadata.0.name
53+
values = [templatefile("${path.module}/prometheus_helm_values.tftpl", {
54+
nodepool = google_container_node_pool.node_pool["europe_misc"].name,
55+
cluster = "europe",
56+
prometheus_remote_write_url = var.prometheus_remote_write_url,
57+
prometheus_remote_write_metrics_regex = local.prometheus_remote_write_metrics_regex,
58+
prometheus_remote_write_send_interval = local.prometheus_remote_write_send_interval,
59+
})]
60+
}
61+
62+
resource "kubectl_manifest" "pod_monitor_europe" {
63+
provider = kubectl.europe
64+
65+
yaml_body = <<YAML
66+
apiVersion: monitoring.coreos.com/v1
67+
kind: PodMonitor
68+
metadata:
69+
namespace: ${kubernetes_namespace.coder_europe.metadata.0.name}
70+
name: coder-monitoring
71+
spec:
72+
selector:
73+
matchLabels:
74+
"app.kubernetes.io/name": coder
75+
podMetricsEndpoints:
76+
- port: prometheus-http
77+
interval: 30s
78+
YAML
79+
80+
depends_on = [helm_release.prometheus_chart_europe]
81+
}
82+
83+
resource "helm_release" "prometheus_chart_asia" {
84+
provider = helm.asia
85+
86+
repository = local.prometheus_helm_repo
87+
chart = local.prometheus_helm_chart
88+
name = local.prometheus_release_name
89+
namespace = kubernetes_namespace.coder_asia.metadata.0.name
90+
values = [templatefile("${path.module}/prometheus_helm_values.tftpl", {
91+
nodepool = google_container_node_pool.node_pool["asia_misc"].name,
92+
cluster = "asia",
93+
prometheus_remote_write_url = var.prometheus_remote_write_url,
94+
prometheus_remote_write_metrics_regex = local.prometheus_remote_write_metrics_regex,
95+
prometheus_remote_write_send_interval = local.prometheus_remote_write_send_interval,
96+
})]
97+
}
98+
99+
resource "kubectl_manifest" "pod_monitor_asia" {
100+
provider = kubectl.asia
101+
102+
yaml_body = <<YAML
103+
apiVersion: monitoring.coreos.com/v1
104+
kind: PodMonitor
105+
metadata:
106+
namespace: ${kubernetes_namespace.coder_asia.metadata.0.name}
107+
name: coder-monitoring
108+
spec:
109+
selector:
110+
matchLabels:
111+
"app.kubernetes.io/name": coder
112+
podMetricsEndpoints:
113+
- port: prometheus-http
114+
interval: 30s
115+
YAML
116+
117+
depends_on = [helm_release.prometheus_chart_asia]
118+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
alertmanager:
2+
enabled: false
3+
grafana:
4+
enabled: false
5+
prometheusOperator:
6+
affinity:
7+
nodeAffinity:
8+
requiredDuringSchedulingIgnoredDuringExecution:
9+
nodeSelectorTerms:
10+
- matchExpressions:
11+
- key: "cloud.google.com/gke-nodepool"
12+
operator: "In"
13+
values: ["${nodepool}"]
14+
prometheus:
15+
affinity:
16+
nodeAffinity:
17+
requiredDuringSchedulingIgnoredDuringExecution:
18+
nodeSelectorTerms:
19+
- matchExpressions:
20+
- key: "cloud.google.com/gke-nodepool"
21+
operator: "In"
22+
values: ["${nodepool}"]
23+
prometheusSpec:
24+
externalLabels:
25+
cluster: "${cluster}"
26+
podMonitorSelectorNilUsesHelmValues: false
27+
remoteWrite:
28+
- url: "${prometheus_remote_write_url}"
29+
tlsConfig:
30+
insecureSkipVerify: true
31+
writeRelabelConfigs:
32+
- sourceLabels: [__name__]
33+
regex: "${prometheus_remote_write_metrics_regex}"
34+
action: keep
35+
metadataConfig:
36+
sendInterval: "${prometheus_remote_write_send_interval}"

scaletest/terraform/action/vars.tf

+5
Original file line numberDiff line numberDiff line change
@@ -85,3 +85,8 @@ variable "provisionerd_image_tag" {
8585
description = "Tag to use for Provisionerd image."
8686
default = "latest"
8787
}
88+
89+
// Prometheus
90+
variable "prometheus_remote_write_url" {
91+
description = "URL to push prometheus metrics to."
92+
}

0 commit comments

Comments
 (0)