Thanks to visit codestin.com
Credit goes to github.com

Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions server/metrics/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -314,9 +314,9 @@ func New(config *libconfig.MetricsConfig) *Metrics {
prometheus.CounterOpts{
Subsystem: collectors.Subsystem,
Name: collectors.ContainersSeccompNotifierCountTotal.String(),
Help: "Amount of containers stopped because they used a forbidden syscalls by their name",
Help: "Number of forbidden syscalls by syscall and container name",
},
[]string{"name", "syscalls"},
[]string{"name", "syscall"},
),
}
return Instance()
Expand Down Expand Up @@ -448,8 +448,8 @@ func (m *Metrics) MetricContainersOOMTotalInc() {
m.metricContainersOOMTotal.Inc()
}

func (m *Metrics) MetricContainersSeccompNotifierCountTotalInc(name, syscalls string) {
c, err := m.metricContainersSeccompNotifierCountTotal.GetMetricWithLabelValues(name, syscalls)
func (m *Metrics) MetricContainersSeccompNotifierCountTotalInc(name, syscall string) {
c, err := m.metricContainersSeccompNotifierCountTotal.GetMetricWithLabelValues(name, syscall)
if err != nil {
logrus.Warnf("Unable to write container seccomp notifier metric: %v", err)
return
Expand Down
2 changes: 1 addition & 1 deletion server/server.go
Original file line number Diff line number Diff line change
Expand Up @@ -782,7 +782,7 @@ func (s *Server) startSeccompNotifierWatcher(ctx context.Context) error {
})
}

metrics.Instance().MetricContainersSeccompNotifierCountTotalInc(ctr.Name(), usedSyscalls)
metrics.Instance().MetricContainersSeccompNotifierCountTotalInc(ctr.Name(), syscall)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

above we pull notifier.UsedSyscallswhich got use usedSyscalls. I don't see us finding all of the syscalls that were reported. shouldn't we call this function for each syscall?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is called in a loop, every time a seccomp notification arrives on the channel:

cri-o/server/server.go

Lines 746 to 750 in 093d680

for {
msg := <-s.seccompNotifierChan
ctx := msg.Ctx()
id := msg.ContainerID()
syscall := msg.Syscall()

usedSyscalls accumulates syscalls from each iteration in the same loop:

notifier.AddSyscall(syscall)

So we do call the function for each syscall - we just incr the metric count for the specific syscall name received each iteration, ie:

iteration msg.Syscall() metric values
0 swapoff {name="...", syscall="swapoff"} = 1
1 swapoff {name="...", syscall="swapoff"} = 2
2 chroot {name="...", syscall="swapoff"} = 2, {name="...", syscall="chroot"} = 1

}
}()

Expand Down
4 changes: 2 additions & 2 deletions test/seccomp_notifier.bats
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ function teardown() {
grep -q "Got seccomp notifier message for container ID: $CTR (syscall = swapoff)" "$CRIO_LOG"
crictl inspect "$CTR" | jq -e '.status.reason == "seccomp killed"'
crictl inspect "$CTR" | jq -e '.status.message == "Used forbidden syscalls: swapoff (3x)"'
curl -sf "http://localhost:$PORT/metrics" | grep 'container_runtime_crio_containers_seccomp_notifier_count_total{name="k8s_podsandbox1-redis_podsandbox1_redhat.test.crio_redhat-test-crio_0",syscalls="swapoff (3x)"} 1'
curl -sf "http://localhost:$PORT/metrics" | grep 'container_runtime_crio_containers_seccomp_notifier_count_total{name="k8s_podsandbox1-redis_podsandbox1_redhat.test.crio_redhat-test-crio_0",syscall="swapoff"} 3'
}

@test "seccomp notifier with runtime/default but not stop" {
Expand Down Expand Up @@ -72,7 +72,7 @@ function teardown() {
# Assert
grep -q "Got seccomp notifier message for container ID: $CTR (syscall = swapoff)" "$CRIO_LOG"
crictl inspect "$CTR" | jq -e '.status.state == "CONTAINER_RUNNING"'
curl -sf "http://localhost:$PORT/metrics" | grep 'container_runtime_crio_containers_seccomp_notifier_count_total{name="k8s_podsandbox1-redis_podsandbox1_redhat.test.crio_redhat-test-crio_0",syscalls="swapoff (3x)"} 1'
curl -sf "http://localhost:$PORT/metrics" | grep 'container_runtime_crio_containers_seccomp_notifier_count_total{name="k8s_podsandbox1-redis_podsandbox1_redhat.test.crio_redhat-test-crio_0",syscall="swapoff"} 3'
}

@test "seccomp notifier with custom profile" {
Expand Down
2 changes: 1 addition & 1 deletion tutorials/metrics.md
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ Beside the [default golang based metrics][2], CRI-O provides the following addit
| `crio_image_layer_reuse_total` | | Counter | Reused (not pulled) local image layer count by name. |
| `crio_containers_oom_total` | | Counter | Total number of containers killed because they ran out of memory (OOM). |
| `crio_containers_oom_count_total` | `name` | Counter | Containers killed because they ran out of memory (OOM) by their name.<br>The label `name` can have high cardinality sometimes but it is in the interest of users giving them the ease to identify which container(s) are going into OOM state. Also, ideally very few containers should OOM keeping the label cardinality of `name` reasonably low. |
| `crio_containers_seccomp_notifier_count_total` | `name`, `syscalls` | Counter | Containers stopped because they used forbidden `syscalls` by their `name`. |
| `crio_containers_seccomp_notifier_count_total` | `name`, `syscall` | Counter | Forbidden `syscall` count resulting in killed containers by `name`. |
| `crio_processes_defunct` | | Gauge | Total number of defunct processes in the node |
| `crio_operations` | every CRI-O RPC\* | Counter | (DEPRECATED: in favour of `crio_operations_total`) Cumulative number of CRI-O operations by operation type. |
| `crio_operations_latency_microseconds_total` | every CRI-O RPC\*,<br><br>`network_setup_pod` (CNI pod network setup time),<br><br>`network_setup_overall` (Overall network setup time) | Summary | (DEPRECATED: in favour of `crio_operations_latency_seconds_total`) Latency in microseconds of CRI-O operations. Split-up by operation type. |
Expand Down