diff --git a/server/metrics/metrics.go b/server/metrics/metrics.go index 8dd99c9a1b2..1a8ee2a8eb3 100644 --- a/server/metrics/metrics.go +++ b/server/metrics/metrics.go @@ -314,9 +314,9 @@ func New(config *libconfig.MetricsConfig) *Metrics { prometheus.CounterOpts{ Subsystem: collectors.Subsystem, Name: collectors.ContainersSeccompNotifierCountTotal.String(), - Help: "Amount of containers stopped because they used a forbidden syscalls by their name", + Help: "Number of forbidden syscalls by syscall and container name", }, - []string{"name", "syscalls"}, + []string{"name", "syscall"}, ), } return Instance() @@ -448,8 +448,8 @@ func (m *Metrics) MetricContainersOOMTotalInc() { m.metricContainersOOMTotal.Inc() } -func (m *Metrics) MetricContainersSeccompNotifierCountTotalInc(name, syscalls string) { - c, err := m.metricContainersSeccompNotifierCountTotal.GetMetricWithLabelValues(name, syscalls) +func (m *Metrics) MetricContainersSeccompNotifierCountTotalInc(name, syscall string) { + c, err := m.metricContainersSeccompNotifierCountTotal.GetMetricWithLabelValues(name, syscall) if err != nil { logrus.Warnf("Unable to write container seccomp notifier metric: %v", err) return diff --git a/server/server.go b/server/server.go index 08dfc30fae6..f632ff4e564 100644 --- a/server/server.go +++ b/server/server.go @@ -782,7 +782,7 @@ func (s *Server) startSeccompNotifierWatcher(ctx context.Context) error { }) } - metrics.Instance().MetricContainersSeccompNotifierCountTotalInc(ctr.Name(), usedSyscalls) + metrics.Instance().MetricContainersSeccompNotifierCountTotalInc(ctr.Name(), syscall) } }() diff --git a/test/seccomp_notifier.bats b/test/seccomp_notifier.bats index 55596ac2b00..9cfb36faa94 100644 --- a/test/seccomp_notifier.bats +++ b/test/seccomp_notifier.bats @@ -44,7 +44,7 @@ function teardown() { grep -q "Got seccomp notifier message for container ID: $CTR (syscall = swapoff)" "$CRIO_LOG" crictl inspect "$CTR" | jq -e '.status.reason == "seccomp killed"' crictl inspect "$CTR" | jq -e '.status.message == "Used forbidden syscalls: swapoff (3x)"' - curl -sf "http://localhost:$PORT/metrics" | grep 'container_runtime_crio_containers_seccomp_notifier_count_total{name="k8s_podsandbox1-redis_podsandbox1_redhat.test.crio_redhat-test-crio_0",syscalls="swapoff (3x)"} 1' + curl -sf "http://localhost:$PORT/metrics" | grep 'container_runtime_crio_containers_seccomp_notifier_count_total{name="k8s_podsandbox1-redis_podsandbox1_redhat.test.crio_redhat-test-crio_0",syscall="swapoff"} 3' } @test "seccomp notifier with runtime/default but not stop" { @@ -72,7 +72,7 @@ function teardown() { # Assert grep -q "Got seccomp notifier message for container ID: $CTR (syscall = swapoff)" "$CRIO_LOG" crictl inspect "$CTR" | jq -e '.status.state == "CONTAINER_RUNNING"' - curl -sf "http://localhost:$PORT/metrics" | grep 'container_runtime_crio_containers_seccomp_notifier_count_total{name="k8s_podsandbox1-redis_podsandbox1_redhat.test.crio_redhat-test-crio_0",syscalls="swapoff (3x)"} 1' + curl -sf "http://localhost:$PORT/metrics" | grep 'container_runtime_crio_containers_seccomp_notifier_count_total{name="k8s_podsandbox1-redis_podsandbox1_redhat.test.crio_redhat-test-crio_0",syscall="swapoff"} 3' } @test "seccomp notifier with custom profile" { diff --git a/tutorials/metrics.md b/tutorials/metrics.md index 65f43357a52..d7b879c2a80 100644 --- a/tutorials/metrics.md +++ b/tutorials/metrics.md @@ -53,7 +53,7 @@ Beside the [default golang based metrics][2], CRI-O provides the following addit | `crio_image_layer_reuse_total` | | Counter | Reused (not pulled) local image layer count by name. | | `crio_containers_oom_total` | | Counter | Total number of containers killed because they ran out of memory (OOM). | | `crio_containers_oom_count_total` | `name` | Counter | Containers killed because they ran out of memory (OOM) by their name.
The label `name` can have high cardinality sometimes but it is in the interest of users giving them the ease to identify which container(s) are going into OOM state. Also, ideally very few containers should OOM keeping the label cardinality of `name` reasonably low. | -| `crio_containers_seccomp_notifier_count_total` | `name`, `syscalls` | Counter | Containers stopped because they used forbidden `syscalls` by their `name`. | +| `crio_containers_seccomp_notifier_count_total` | `name`, `syscall` | Counter | Forbidden `syscall` count resulting in killed containers by `name`. | | `crio_processes_defunct` | | Gauge | Total number of defunct processes in the node | | `crio_operations` | every CRI-O RPC\* | Counter | (DEPRECATED: in favour of `crio_operations_total`) Cumulative number of CRI-O operations by operation type. | | `crio_operations_latency_microseconds_total` | every CRI-O RPC\*,

`network_setup_pod` (CNI pod network setup time),

`network_setup_overall` (Overall network setup time) | Summary | (DEPRECATED: in favour of `crio_operations_latency_seconds_total`) Latency in microseconds of CRI-O operations. Split-up by operation type. |