feat(nri-resctrl-plugin): add integration tests #102
Workflow file for this run
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| name: test-nri-init | |
| on: | |
| pull_request: | |
| paths: | |
| - 'Cargo.toml' | |
| - 'crates/nri-init/**' | |
| - 'Dockerfile.nri-init' | |
| - '.github/actions/setup-k3s/**' | |
| - '.github/workflows/test-nri-init.yaml' | |
| push: | |
| branches: | |
| - main | |
| paths: | |
| - 'Cargo.toml' | |
| - 'crates/nri-init/**' | |
| - 'Dockerfile.nri-init' | |
| - '.github/actions/setup-k3s/**' | |
| - '.github/workflows/test-nri-init.yaml' | |
| workflow_dispatch: | |
| jobs: | |
| helm-lint: | |
| name: Helm Chart Linting | |
| runs-on: ubuntu-latest | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| - name: Set up Helm | |
| uses: azure/setup-helm@v4 | |
| with: | |
| version: 'latest' | |
| - name: Lint Helm chart | |
| run: | | |
| helm lint charts/collector | |
| - name: Lint with NRI disabled values | |
| run: | | |
| helm lint charts/collector -f charts/collector/ci/nri-disabled-values.yaml | |
| - name: Lint with NRI configure-only values | |
| run: | | |
| helm lint charts/collector -f charts/collector/ci/nri-configure-only-values.yaml | |
| - name: Lint with NRI full setup values | |
| run: | | |
| helm lint charts/collector -f charts/collector/ci/nri-full-setup-values.yaml | |
| helm-template: | |
| name: Test Helm Template Rendering | |
| runs-on: ubuntu-latest | |
| strategy: | |
| matrix: | |
| values: | |
| - name: "Default values" | |
| file: "" | |
| configure_expected: "false" | |
| restart_expected: "false" | |
| - name: "NRI disabled" | |
| file: "charts/collector/ci/nri-disabled-values.yaml" | |
| configure_expected: "false" | |
| restart_expected: "false" | |
| - name: "NRI configure only" | |
| file: "charts/collector/ci/nri-configure-only-values.yaml" | |
| configure_expected: "true" | |
| restart_expected: "false" | |
| - name: "NRI full setup" | |
| file: "charts/collector/ci/nri-full-setup-values.yaml" | |
| configure_expected: "true" | |
| restart_expected: "true" | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| - name: Install prerequisites | |
| uses: awalsh128/cache-apt-pkgs-action@v1 | |
| with: | |
| packages: jq | |
| version: 1.0 | |
| - name: Set up Helm | |
| uses: azure/setup-helm@v4 | |
| with: | |
| version: 'latest' | |
| - name: Template chart - ${{ matrix.values.name }} | |
| run: | | |
| if [ -z "${{ matrix.values.file }}" ]; then | |
| helm template test-release charts/collector > /tmp/rendered.yaml | |
| else | |
| helm template test-release charts/collector -f ${{ matrix.values.file }} > /tmp/rendered.yaml | |
| fi | |
| echo "=== Rendered template ===" | |
| cat /tmp/rendered.yaml | |
| - name: Verify expected content | |
| run: | | |
| # Basic presence of the init container | |
| if grep -q "name: nri-init" /tmp/rendered.yaml; then | |
| echo "✓ Found init container name" | |
| else | |
| echo "✗ Missing init container name" | |
| exit 1 | |
| fi | |
| # Verify env values adjacent to their names | |
| CE='${{ matrix.values.configure_expected }}' | |
| RE='${{ matrix.values.restart_expected }}' | |
| awk -v ce="$CE" -v re="$RE" ' | |
| /name: NRI_CONFIGURE/ { in_cfg=1; next } | |
| in_cfg && /value: \"/ { if ($0 ~ "value: \"" ce "\"") cfg_ok=1; in_cfg=0 } | |
| /name: NRI_RESTART/ { in_rst=1; next } | |
| in_rst && /value: \"/ { if ($0 ~ "value: \"" re "\"") rst_ok=1; in_rst=0 } | |
| END { if (!(cfg_ok && rst_ok)) exit 1 } | |
| ' /tmp/rendered.yaml && echo "✓ Env values match (configure=$CE, restart=$RE)" || { echo "✗ Env values mismatch"; exit 1; } | |
| - name: Verify volume mounts | |
| run: | | |
| required_volumes="etc-containerd var-lib-rancher var-run" | |
| for vol in $required_volumes; do | |
| if grep -q "name: $vol" /tmp/rendered.yaml; then | |
| echo "✓ Volume $vol found" | |
| else | |
| echo "✗ Volume $vol missing" | |
| exit 1 | |
| fi | |
| done | |
| - name: Verify hostPID when restart enabled | |
| if: ${{ matrix.values.restart_expected == 'true' }} | |
| run: | | |
| if grep -qE '\bhostPID: true\b' /tmp/rendered.yaml; then | |
| echo "✓ hostPID: true present for restart=true" | |
| else | |
| echo "✗ hostPID not set while restart=true" | |
| exit 1 | |
| fi | |
| test-helm-install: | |
| name: Test Helm Installation | |
| runs-on: ubuntu-latest | |
| needs: [build-test-image] | |
| strategy: | |
| matrix: | |
| k8s_version: ["1.28", "1.29", "1.30", "1.31"] | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| - name: Create kind cluster | |
| uses: helm/kind-action@v1 | |
| with: | |
| node_image: kindest/node:v${{ matrix.k8s_version }}.0 | |
| cluster_name: test-cluster-${{ matrix.k8s_version }} | |
| - name: Download nri-init image artifact and load into Docker | |
| uses: actions/download-artifact@v4 | |
| with: | |
| name: nri-init-image | |
| path: . | |
| - name: Load image into Docker | |
| run: | | |
| docker load -i image.tar | |
| - name: Install Helm chart with NRI disabled | |
| env: | |
| IMAGE: ${{ needs.build-test-image.outputs.image }} | |
| run: | | |
| # Prepare test image for KIND cluster and set chart image overrides | |
| REPO="${IMAGE%:*}" | |
| TAG="${IMAGE##*:}" | |
| kind load docker-image "$IMAGE" --name test-cluster-${{ matrix.k8s_version }} | |
| helm install test-collector charts/collector \ | |
| -f charts/collector/ci/nri-disabled-values.yaml \ | |
| --set image.repository=ghcr.io/${{ github.repository }}/collector \ | |
| --set image.tag=latest \ | |
| --set nri.init.image.repository="$REPO" \ | |
| --set nri.init.image.tag="$TAG" \ | |
| --set-string nri.init.command[0]="/usr/local/bin/nri-init" \ | |
| --wait --timeout 2m | |
| - name: Check pod status / wait for init completion | |
| run: | | |
| kubectl get pods -l app.kubernetes.io/name=collector | |
| POD=$(kubectl get pod -l app.kubernetes.io/name=collector --sort-by=.metadata.creationTimestamp -o name | tail -n 1 | cut -d/ -f2) | |
| echo "Waiting for init container to complete..." | |
| for i in {1..60}; do | |
| STATUS=$(kubectl get pod $POD -o jsonpath='{.status.initContainerStatuses[?(@.name=="nri-init")].state}' 2>/dev/null || echo "{}") | |
| if echo "$STATUS" | grep -q "terminated"; then | |
| echo "Init container completed" | |
| break | |
| fi | |
| echo "Waiting for init container... ($i/60)" | |
| sleep 2 | |
| done | |
| - name: Check init container logs | |
| run: | | |
| # Get pod name | |
| POD=$(kubectl get pod -l app.kubernetes.io/name=collector --sort-by=.metadata.creationTimestamp -o name | tail -n 1 | cut -d/ -f2) | |
| echo "=== NRI Init Container Logs ===" | |
| kubectl logs $POD -c nri-init | |
| # Verify expected log messages (new nri-init format) and fail on explicit failure | |
| if kubectl logs $POD -c nri-init | grep -q "nri-init failed"; then | |
| echo "✗ nri-init failed (found failure marker in logs)" | |
| NODE=$(kubectl get pod "$POD" -o jsonpath='{.spec.nodeName}' 2>/dev/null || true) | |
| if [ -n "$NODE" ]; then | |
| echo "=== containerd status on node: $NODE ===" | |
| docker exec "$NODE" /bin/sh -lc 'systemctl status containerd || true' | |
| echo "=== recent containerd logs (journalctl) ===" | |
| docker exec "$NODE" /bin/sh -lc 'journalctl -u containerd --no-pager -n 200 || true' | |
| fi | |
| exit 1 | |
| fi | |
| if kubectl logs $POD -c nri-init | grep -q "Configuration: configure=false, restart=false"; then | |
| echo "✓ Detected expected configuration flags in logs (configure=false, restart=false)" | |
| else | |
| echo "✗ Expected 'Configuration: configure=false, restart=false' in init logs" | |
| exit 1 | |
| fi | |
| - name: Uninstall chart | |
| run: | | |
| helm uninstall test-collector | |
| - name: Install Helm chart with NRI configure-and-restart | |
| env: | |
| IMAGE: ${{ needs.build-test-image.outputs.image }} | |
| run: | | |
| REPO="${IMAGE%:*}" | |
| TAG="${IMAGE##*:}" | |
| kind load docker-image "$IMAGE" --name test-cluster-${{ matrix.k8s_version }} | |
| helm install test-collector charts/collector \ | |
| -f charts/collector/ci/nri-configure-only-values.yaml \ | |
| --set image.repository=ghcr.io/${{ github.repository }}/collector \ | |
| --set image.tag=latest \ | |
| --set nri.init.image.repository="$REPO" \ | |
| --set nri.init.image.tag="$TAG" \ | |
| --set nri.restart=true \ | |
| --set nri.failIfUnavailable=true \ | |
| --set-string nri.init.command[0]="/usr/local/bin/nri-init" \ | |
| --wait --timeout 2m | |
| - name: Check configuration and restart | |
| run: | | |
| # Wait for pod to exist | |
| echo "Waiting for collector pod to be created..." | |
| for i in {1..30}; do | |
| if kubectl get pod -l app.kubernetes.io/name=collector -o jsonpath='{.items[0].metadata.name}' 2>/dev/null; then | |
| break | |
| fi | |
| echo "Waiting for pod... ($i/30)" | |
| sleep 2 | |
| done | |
| POD=$(kubectl get pod -l app.kubernetes.io/name=collector --sort-by=.metadata.creationTimestamp -o name | tail -n 1 | cut -d/ -f2) | |
| echo "Waiting for nri-init completion markers..." | |
| for i in {1..240}; do | |
| LOGS=$(kubectl logs "$POD" -c nri-init 2>/dev/null || true) | |
| if echo "$LOGS" | grep -q "nri-init failed"; then | |
| echo "✗ nri-init failed (found failure marker)" | |
| echo "$LOGS" | |
| NODE=$(kubectl get pod "$POD" -o jsonpath='{.spec.nodeName}' 2>/dev/null || true) | |
| if [ -n "$NODE" ]; then | |
| echo "=== containerd status on node: $NODE ===" | |
| docker exec "$NODE" /bin/sh -lc 'systemctl status containerd || true' | |
| echo "=== recent containerd logs (journalctl) ===" | |
| docker exec "$NODE" /bin/sh -lc 'journalctl -u containerd --no-pager -n 200 || true' | |
| fi | |
| exit 1 | |
| fi | |
| if echo "$LOGS" | grep -q "nri-init done"; then | |
| echo "✓ nri-init done observed" | |
| break | |
| fi | |
| sleep 1 | |
| done | |
| if ! kubectl logs "$POD" -c nri-init | grep -q "nri-init done"; then | |
| echo "✗ nri-init completion marker not observed" | |
| kubectl logs "$POD" -c nri-init || true | |
| NODE=$(kubectl get pod "$POD" -o jsonpath='{.spec.nodeName}' 2>/dev/null || true) | |
| if [ -n "$NODE" ]; then | |
| echo "=== containerd status on node: $NODE ===" | |
| docker exec "$NODE" /bin/sh -lc 'systemctl status containerd || true' | |
| echo "=== recent containerd logs (journalctl) ===" | |
| docker exec "$NODE" /bin/sh -lc 'journalctl -u containerd --no-pager -n 200 || true' | |
| fi | |
| exit 1 | |
| fi | |
| echo "=== NRI Init Container Logs (Configure + Restart) ===" | |
| kubectl logs $POD -c nri-init | |
| # Verify configuration flags via structured log line | |
| if kubectl logs $POD -c nri-init | grep -q "Configuration: configure=true, restart=true"; then | |
| echo "✓ Detected expected configuration flags in logs (configure=true, restart=true)" | |
| else | |
| echo "✗ Expected 'Configuration: configure=true, restart=true' in init logs" | |
| kubectl logs $POD -c nri-init || true | |
| exit 1 | |
| fi | |
| - name: Verify NRI socket present on KIND node after restart | |
| run: | | |
| set -x | |
| NODE_NAME="test-cluster-${{ matrix.k8s_version }}-control-plane" | |
| echo "Waiting for containerd active in node $NODE_NAME..." | |
| # Best-effort wait; KIND nodes use systemd within container image | |
| for i in {1..90}; do | |
| if docker exec "$NODE_NAME" /bin/sh -lc 'systemctl is-active containerd' | grep -q '^active$'; then | |
| echo "containerd active" | |
| break | |
| fi | |
| sleep 1 | |
| done | |
| echo "Waiting for NRI socket to appear..." | |
| for i in {1..120}; do | |
| if docker exec "$NODE_NAME" /bin/sh -lc 'test -S /var/run/nri/nri.sock'; then | |
| docker exec "$NODE_NAME" ls -la /var/run/nri/nri.sock | |
| echo "✓ NRI socket exists after restart" | |
| exit 0 | |
| fi | |
| sleep 1 | |
| done | |
| echo "✗ NRI socket not found on KIND node after restart" | |
| docker exec "$NODE_NAME" /bin/sh -lc 'ls -la /var/run/nri || true; head -n 200 /etc/containerd/config.toml || true' | |
| exit 1 | |
| verify-nri-preconfigured: | |
| name: Verify NRI Pre-configured on KIND v0.30 | |
| runs-on: ubuntu-latest | |
| needs: [build-test-image] | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| - name: Download nri-init image artifact and load into Docker | |
| uses: actions/download-artifact@v4 | |
| with: | |
| name: nri-init-image | |
| path: . | |
| - name: Load image into Docker | |
| run: | | |
| docker load -i image.tar | |
| - name: Create kind cluster (KIND v0.30) | |
| uses: helm/kind-action@v1 | |
| with: | |
| version: v0.30.0 | |
| cluster_name: kind-nri-preconfigured | |
| - name: Verify NRI is already enabled | |
| run: | | |
| echo "=== Checking NRI status on KIND v0.30 cluster (docker exec) ===" | |
| CONTAINER_NAME="kind-nri-preconfigured-control-plane" | |
| # Check if NRI socket exists (must exist for pre-configured expectation) | |
| if docker exec $CONTAINER_NAME ls -la /var/run/nri/nri.sock 2>/dev/null; then | |
| echo "✓ NRI socket found at /var/run/nri/nri.sock" | |
| else | |
| echo "✗ NRI socket not found; expected pre-configured NRI on KIND v0.30" | |
| docker exec $CONTAINER_NAME ls -la /var/run/nri/ 2>/dev/null || echo "NRI directory doesn't exist" | |
| exit 1 | |
| fi | |
| - name: Deploy collector with NRI disabled to verify it detects pre-configured NRI | |
| env: | |
| IMAGE: ${{ needs.build-test-image.outputs.image }} | |
| run: | | |
| echo "=== Deploying collector with NRI configuration disabled ===" | |
| REPO="${IMAGE%:*}" | |
| TAG="${IMAGE##*:}" | |
| CONTAINER_NAME="kind-nri-preconfigured-control-plane" | |
| # Load the image into KIND nodes | |
| kind load docker-image "$IMAGE" --name "kind-nri-preconfigured" | |
| # Install chart without waiting for Ready, since GH runners may prevent the | |
| # main collector container from becoming Ready. We only need the init to run. | |
| helm install test-collector charts/collector \ | |
| -f charts/collector/ci/nri-disabled-values.yaml \ | |
| --set image.repository=ghcr.io/${{ github.repository }}/collector \ | |
| --set image.tag=latest \ | |
| --set nri.init.image.repository="$REPO" \ | |
| --set nri.init.image.tag="$TAG" \ | |
| --set-string nri.init.command[0]="/usr/local/bin/nri-init" | |
| # Wait for the pod to either become Ready or clearly fail, then evaluate init result | |
| echo "Waiting for collector pod to be created..." | |
| for i in {1..60}; do | |
| POD=$(kubectl get pod -l app.kubernetes.io/name=collector -o jsonpath='{.items[0].metadata.name}' 2>/dev/null || true) | |
| if [ -n "$POD" ]; then | |
| echo "Found pod: $POD" | |
| break | |
| fi | |
| sleep 2 | |
| done | |
| if [ -z "$POD" ]; then | |
| echo "✗ Collector pod was not created" | |
| kubectl get pods -A || true | |
| exit 1 | |
| fi | |
| echo "Waiting for pod to be Ready or fail (ignoring collector failures)..." | |
| for i in {1..180}; do | |
| READY=$(kubectl get pod "$POD" -o jsonpath='{.status.conditions[?(@.type=="Ready")].status}' 2>/dev/null || echo "") | |
| WAITING_REASON=$(kubectl get pod "$POD" -o jsonpath='{.status.containerStatuses[?(@.name=="collector")].state.waiting.reason}' 2>/dev/null || echo "") | |
| TERM_REASON=$(kubectl get pod "$POD" -o jsonpath='{.status.containerStatuses[?(@.name=="collector")].state.terminated.reason}' 2>/dev/null || echo "") | |
| PHASE=$(kubectl get pod "$POD" -o jsonpath='{.status.phase}' 2>/dev/null || echo "") | |
| # Ready -> proceed; CrashLoopBackOff/Err* or Terminated/Failed -> proceed as well | |
| if [ "$READY" = "True" ] || [ "$PHASE" = "Failed" ] || [ "$WAITING_REASON" = "CrashLoopBackOff" ] || [ -n "$TERM_REASON" ]; then | |
| echo "Pod condition reached (Ready=$READY, phase=$PHASE, waiting=$WAITING_REASON, terminated=$TERM_REASON)" | |
| break | |
| fi | |
| sleep 2 | |
| done | |
| # Get pod name | |
| POD=$(kubectl get pod -l app.kubernetes.io/name=collector -o jsonpath='{.items[0].metadata.name}') | |
| echo "=== NRI Init Container Logs ===" | |
| kubectl logs $POD -c nri-init | |
| # Check if init container detected pre-existing NRI (socket present) | |
| if kubectl logs $POD -c nri-init | grep -q "NRI socket found at"; then | |
| echo "✓ Init container detected pre-configured NRI" | |
| else | |
| echo "✗ Init container did not detect pre-configured NRI" | |
| kubectl logs $POD -c nri-init || true | |
| exit 1 | |
| fi | |
| # Explicitly ignore collector container failures on GH runners | |
| echo "=== Main container logs (ignored for pass/fail) ===" | |
| kubectl logs "$POD" --tail=200 || true | |
| helm uninstall test-collector | |
| build-test-image: | |
| name: Build nri-init image and binary (reusable) | |
| uses: ./.github/workflows/build-component-artifacts.yaml | |
| permissions: | |
| contents: read | |
| packages: read | |
| with: | |
| runner: ubuntu-latest | |
| component: nri-init | |
| push: false | |
| upload-image-artifact: true | |
| upload-binary-artifact: true | |
| unit: | |
| runs-on: ubuntu-latest | |
| steps: | |
| - uses: actions/checkout@v4 | |
| - uses: dtolnay/rust-toolchain@stable | |
| - name: Run unit tests | |
| run: cargo test -p nri-init --lib --verbose | |
| # Safe integration tests (no system services), operate in temp dirs | |
| integration-sim: | |
| runs-on: ubuntu-latest | |
| steps: | |
| - uses: actions/checkout@v4 | |
| - uses: dtolnay/rust-toolchain@stable | |
| - name: Run simulated integration tests | |
| run: cargo test -p nri-init --test integration_sim --verbose | |
| # Real environment tests (require self-hosted runner with systemd privileges) | |
| integration-real: | |
| runs-on: ubuntu-latest | |
| steps: | |
| - uses: actions/checkout@v4 | |
| - uses: dtolnay/rust-toolchain@stable | |
| - name: K3s tests | |
| run: cargo test -p nri-init --test k3s -- --ignored --test-threads=1 | |
| - name: Containerd tests | |
| run: cargo test -p nri-init --test containerd -- --ignored --test-threads=1 | |
| # GH-hosted integration tests that set up KIND and K3s and run the nri-init binary | |
| integration-matrix: | |
| name: Rust NRI Integration (${{ matrix.target }} / ${{ matrix.scenario }}) | |
| runs-on: ubuntu-latest | |
| needs: [build-test-image] | |
| strategy: | |
| fail-fast: false | |
| matrix: | |
| include: | |
| # KIND | |
| - target: kind | |
| scenario: configure-only | |
| restart: false | |
| runner: container | |
| - target: kind | |
| scenario: configure-only | |
| restart: false | |
| runner: binary | |
| - target: kind | |
| scenario: configure-and-restart | |
| restart: true | |
| runner: container | |
| - target: kind | |
| scenario: configure-and-restart | |
| restart: true | |
| runner: binary | |
| # K3s | |
| - target: k3s | |
| scenario: configure-only | |
| restart: false | |
| runner: binary | |
| - target: k3s | |
| scenario: configure-only | |
| restart: false | |
| runner: container | |
| - target: k3s | |
| scenario: configure-and-restart | |
| restart: true | |
| runner: binary | |
| - target: k3s | |
| scenario: configure-and-restart | |
| restart: true | |
| runner: container | |
| steps: | |
| - name: Checkout | |
| uses: actions/checkout@v4 | |
| - name: Download nri-init binary artifact | |
| if: ${{ matrix.runner == 'binary' }} | |
| uses: actions/download-artifact@v4 | |
| with: | |
| name: nri-init-binary | |
| path: ./bin | |
| - name: Make nri-init executable | |
| if: ${{ matrix.runner == 'binary' }} | |
| run: chmod +x ./bin/nri-init | |
| - name: Install kubectl | |
| if: ${{ matrix.target == 'kind' || matrix.target == 'k3s' }} | |
| run: | | |
| curl -LO "https://dl.k8s.io/release/$(curl -L -s https://dl.k8s.io/release/stable.txt)/bin/linux/amd64/kubectl" | |
| chmod +x kubectl | |
| sudo mv kubectl /usr/local/bin/kubectl | |
| kubectl version --client | |
| - name: Ensure Docker is running | |
| if: ${{ matrix.target == 'kind' }} | |
| run: | | |
| set -x | |
| set +e | |
| (sudo systemctl start docker || sudo service docker start || true) | |
| docker info | |
| # KIND target setup | |
| - name: Install KIND | |
| if: ${{ matrix.target == 'kind' }} | |
| run: | | |
| curl -Lo ./kind https://kind.sigs.k8s.io/dl/v0.24.0/kind-linux-amd64 | |
| chmod +x ./kind | |
| sudo mv ./kind /usr/local/bin/kind | |
| kind version | |
| - name: Create KIND cluster (no NRI) | |
| if: ${{ matrix.target == 'kind' }} | |
| env: | |
| CLUSTER: nri-rust-${{ matrix.scenario }} | |
| run: | | |
| # Expand ${CLUSTER} inside the config | |
| cat > kind-config.yaml << EOF | |
| kind: Cluster | |
| apiVersion: kind.x-k8s.io/v1alpha4 | |
| name: ${CLUSTER} | |
| nodes: | |
| - role: control-plane | |
| image: kindest/node:v1.31.0@sha256:53df588e04085fd41ae12de0c3fe4c72f7013bba32a20e7325357a1ac94ba865 | |
| EOF | |
| kind create cluster --config kind-config.yaml --wait 300s | |
| kubectl cluster-info --context kind-${CLUSTER} | |
| kubectl get nodes | |
| - name: Download nri-init image artifact and load into Docker (KIND target) | |
| if: ${{ matrix.target == 'kind' }} | |
| uses: actions/download-artifact@v4 | |
| with: | |
| name: nri-init-image | |
| path: . | |
| - name: Load image into Docker (KIND target) | |
| if: ${{ matrix.target == 'kind' }} | |
| run: | | |
| docker load -i image.tar | |
| - name: Load test image into KIND | |
| if: ${{ matrix.target == 'kind' }} | |
| env: | |
| IMAGE: ${{ needs.build-test-image.outputs.image }} | |
| CLUSTER: nri-rust-${{ matrix.scenario }} | |
| run: | | |
| set -x | |
| kind load docker-image "$IMAGE" --name ${CLUSTER} | |
| - name: Run nri-init as privileged DaemonSet on KIND (container mode) | |
| if: ${{ matrix.target == 'kind' && matrix.runner == 'container' }} | |
| env: | |
| TEST_IMAGE: ${{ needs.build-test-image.outputs.image }} | |
| run: | | |
| echo '=== Cluster nodes ===' | |
| kubectl get nodes -o wide || true | |
| echo '=== System pods ===' | |
| kubectl get pods -A || true | |
| # Expand ${TEST_IMAGE} into the manifest | |
| cat > ds.yaml << EOF | |
| apiVersion: apps/v1 | |
| kind: DaemonSet | |
| metadata: | |
| name: nri-init-test | |
| namespace: kube-system | |
| labels: { app: nri-init-test } | |
| spec: | |
| selector: | |
| matchLabels: { app: nri-init-test } | |
| template: | |
| metadata: | |
| labels: { app: nri-init-test } | |
| spec: | |
| hostPID: true | |
| tolerations: | |
| - key: "node-role.kubernetes.io/control-plane" | |
| operator: "Exists" | |
| effect: "NoSchedule" | |
| containers: | |
| - name: runner | |
| image: ${TEST_IMAGE} | |
| imagePullPolicy: IfNotPresent | |
| securityContext: | |
| privileged: true | |
| command: ["/bin/sh", "-lc"] | |
| args: | |
| - >- | |
| /usr/local/bin/nri-init --log-level debug --configure --nsenter-path nsenter $( [ "${{ matrix.restart }}" = "true" ] && echo "--restart --fail-if-unavailable" || echo "--no-restart" ) || true; | |
| echo "sleeping to keep pod Ready"; | |
| sleep 3600 | |
| volumeMounts: | |
| - { name: host-root, mountPath: /host } | |
| - { name: etc-containerd, mountPath: /etc/containerd } | |
| - { name: run-nri, mountPath: /var/run/nri } | |
| volumes: | |
| - name: host-root | |
| hostPath: { path: /, type: Directory } | |
| - name: etc-containerd | |
| hostPath: { path: /etc/containerd, type: DirectoryOrCreate } | |
| - name: run-nri | |
| hostPath: { path: /var/run/nri, type: DirectoryOrCreate } | |
| EOF | |
| set +e | |
| kubectl apply -f ds.yaml | |
| # Wait up to 60s for the DaemonSet to be Available | |
| kubectl -n kube-system rollout status ds/nri-init-test --timeout=60s | |
| ROLLOUT=$? | |
| POD=$(kubectl -n kube-system get pod -l app=nri-init-test -o jsonpath='{.items[0].metadata.name}' 2>/dev/null) | |
| if [ "$ROLLOUT" != "0" ]; then | |
| echo '--- DS Status ---' | |
| kubectl -n kube-system get ds nri-init-test -o wide || true | |
| kubectl -n kube-system describe ds nri-init-test || true | |
| echo '--- Pods ---' | |
| kubectl -n kube-system get pods -l app=nri-init-test -o wide || true | |
| echo '--- Recent Events ---' | |
| kubectl -n kube-system get events --sort-by=.lastTimestamp | tail -n 50 || true | |
| if [ -n "$POD" ]; then | |
| echo '--- Pod Describe ---' | |
| kubectl -n kube-system describe pod "$POD" || true | |
| echo '--- Pod Logs ---' | |
| kubectl -n kube-system logs "$POD" || true | |
| fi | |
| exit 1 | |
| fi | |
| echo "=== Wait for nri-init completion (KIND DS) ===" | |
| for i in {1..120}; do | |
| LOGS=$(kubectl -n kube-system logs "$POD" || true) | |
| if echo "$LOGS" | grep -q "nri-init failed"; then | |
| echo "✗ Detected 'nri-init failed' in logs" | |
| echo "$LOGS" | |
| exit 1 | |
| fi | |
| if echo "$LOGS" | grep -q "nri-init done"; then | |
| echo "✓ nri-init done observed in logs" | |
| break | |
| fi | |
| sleep 1 | |
| done | |
| if ! kubectl -n kube-system logs "$POD" | grep -q "nri-init done"; then | |
| echo "✗ Did not observe 'nri-init done' in logs within 120s" | |
| kubectl -n kube-system logs "$POD" || true | |
| exit 1 | |
| fi | |
| echo "=== nri-init logs (KIND DS) ===" | |
| kubectl -n kube-system logs "$POD" || true | |
| # Note: nri-init may report a transient failure immediately after restart. | |
| # Treat final socket presence as the source of truth. | |
| echo "=== Verify host containerd config patched ===" | |
| kubectl -n kube-system exec "$POD" -- sh -lc 'test -f /etc/containerd/config.toml && grep -q "plugins.\"io.containerd.nri.v1.nri\"" /etc/containerd/config.toml && grep -q "disable = false" /etc/containerd/config.toml' | |
| # If restart requested, wait for containerd active on node, then verify socket appears | |
| if [ "${{ matrix.restart }}" = "true" ]; then | |
| echo "=== Waiting for containerd to be active (KIND) ===" | |
| for i in {1..90}; do | |
| if kubectl -n kube-system exec "$POD" -- sh -lc 'nsenter --target 1 --mount --uts --ipc --net --pid -- systemctl is-active containerd' | grep -q '^active$'; then | |
| echo "✓ containerd is active on node" | |
| break | |
| fi | |
| sleep 1 | |
| done | |
| kubectl -n kube-system exec "$POD" -- sh -lc 'nsenter --target 1 --mount --uts --ipc --net --pid -- systemctl status containerd || true' | |
| for i in {1..120}; do | |
| if kubectl -n kube-system exec "$POD" -- sh -lc 'test -S /var/run/nri/nri.sock'; then | |
| echo "✓ NRI socket exists after restart" | |
| break | |
| fi | |
| sleep 1 | |
| done | |
| if ! kubectl -n kube-system exec "$POD" -- sh -lc 'test -S /var/run/nri/nri.sock'; then | |
| echo "✗ NRI socket not found after restart" | |
| echo '--- DS Status ---' | |
| kubectl -n kube-system get ds nri-init-test -o wide || true | |
| kubectl -n kube-system describe ds nri-init-test || true | |
| echo '--- Pod Logs ---' | |
| kubectl -n kube-system logs "$POD" || true | |
| echo '--- containerd service status via nsenter ---' | |
| # Try to inspect containerd service on the node | |
| kubectl -n kube-system exec "$POD" -- sh -lc 'nsenter --target 1 --mount --uts --ipc --net --pid -- systemctl status containerd || true' | |
| echo '--- containerd process list on node ---' | |
| kubectl -n kube-system exec "$POD" -- sh -lc 'nsenter --target 1 --mount --uts --ipc --net --pid -- ps aux | grep -E "containerd( |$)" || true' | |
| exit 1 | |
| fi | |
| fi | |
| - name: Run nri-init directly inside KIND node (binary mode) | |
| if: ${{ matrix.target == 'kind' && matrix.runner == 'binary' }} | |
| env: | |
| CLUSTER: nri-rust-${{ matrix.scenario }} | |
| run: | | |
| set -x | |
| NODE=${CLUSTER}-control-plane | |
| # Copy binary into KIND node | |
| docker cp ./bin/nri-init ${NODE}:/usr/local/bin/nri-init | |
| # Sanity on node environment | |
| docker exec ${NODE} /bin/sh -lc "id && uname -a && containerd --version || true" | |
| docker exec ${NODE} /bin/sh -lc "ls -la /etc/containerd || true && head -n 60 /etc/containerd/config.toml || true" | |
| # Run configure (and optional restart) inside the node (explicit config path) | |
| docker exec ${NODE} /bin/sh -lc "\ | |
| /usr/local/bin/nri-init --log-level debug --mode containerd --containerd-config /etc/containerd/config.toml --configure $( [ '${{ matrix.restart }}' = 'true' ] && echo '--restart --fail-if-unavailable' || echo '--no-restart' )" | |
| BIN_RC=$? | |
| if [ "${{ matrix.restart }}" = "true" ] && [ "$BIN_RC" -ne 0 ]; then | |
| echo "✗ nri-init binary failed in restart mode (rc=$BIN_RC)" | |
| exit 1 | |
| fi | |
| echo '=== Show containerd config ===' | |
| docker exec ${NODE} /bin/sh -lc "head -n 200 /etc/containerd/config.toml || true" | |
| # Verify config edited (print context on failure) | |
| if ! docker exec ${NODE} /bin/sh -lc "grep -q 'plugins.\"io.containerd.nri.v1.nri\"' /etc/containerd/config.toml && grep -q 'disable = false' /etc/containerd/config.toml"; then | |
| echo 'Final /etc/containerd/config.toml:' | |
| docker exec ${NODE} /bin/sh -lc 'sed -n "1,200p" /etc/containerd/config.toml || true' | |
| exit 1 | |
| fi | |
| # If restart requested, best-effort check for socket (may be unsupported in KIND) | |
| if [ "${{ matrix.restart }}" = "true" ]; then | |
| if docker exec ${NODE} /bin/sh -lc "test -S /var/run/nri/nri.sock"; then | |
| echo "✓ NRI socket present after restart" | |
| else | |
| echo "ℹ NRI socket missing; restart may be NotSupported in KIND" | |
| fi | |
| fi | |
| - name: Cleanup KIND | |
| if: ${{ always() && matrix.target == 'kind' }} | |
| env: | |
| CLUSTER: nri-rust-${{ matrix.scenario }} | |
| run: | | |
| kind delete cluster --name ${CLUSTER} || true | |
| # K3s target setup | |
| - name: Setup k3s (pinned) | |
| if: ${{ matrix.target == 'k3s' }} | |
| uses: ./.github/actions/setup-k3s | |
| with: | |
| k3s_version: v1.31.5+k3s1 | |
| kubeconfig_path: /etc/rancher/k3s/k3s.yaml | |
| disable_packaged_addons: true | |
| preflight_inotify: true | |
| timeout_api_server_ready_seconds: 300 | |
| timeout_node_ready_seconds: 300 | |
| - name: Copy kubeconfig for non-root use | |
| if: ${{ matrix.target == 'k3s' }} | |
| run: | | |
| mkdir -p ~/.kube | |
| sudo cp /etc/rancher/k3s/k3s.yaml ~/.kube/config | |
| sudo chown $(id -u):$(id -g) ~/.kube/config | |
| - name: Download nri-init image artifact (K3s) | |
| if: ${{ matrix.target == 'k3s' }} | |
| uses: actions/download-artifact@v4 | |
| with: | |
| name: nri-init-image | |
| path: . | |
| - name: Import image into k3s containerd | |
| if: ${{ matrix.target == 'k3s' }} | |
| run: | | |
| # Import the image archive into the k3s-managed containerd (k8s.io namespace) | |
| sudo k3s ctr -n k8s.io images import image.tar | |
| sudo k3s ctr -n k8s.io images ls | head -n 50 || true | |
| - name: Run nri-init on host (K3s, binary mode) | |
| if: ${{ matrix.target == 'k3s' && matrix.runner == 'binary' }} | |
| run: | | |
| echo "=== Run nri-init for K3s (configure=true restart=${{ matrix.restart }}) ===" | |
| set -x | |
| FAILED=0 | |
| if ! sudo ./bin/nri-init --log-level debug \ | |
| --mode k3s \ | |
| $( [ "${{ matrix.restart }}" = "true" ] && echo "--restart" || echo "--no-restart" ) \ | |
| --configure \ | |
| $( [ "${{ matrix.restart }}" = "true" ] && echo "--fail-if-unavailable" ); then | |
| echo "nri-init (k3s) failed; dumping k3s logs and template" | |
| sudo journalctl -u k3s --no-pager | tail -n 200 || true | |
| sudo ls -l /var/lib/rancher/k3s/agent/etc/containerd || true | |
| sudo sed -n '1,200p' /var/lib/rancher/k3s/agent/etc/containerd/config.toml.tmpl || true | |
| sudo sed -n '1,200p' /var/lib/rancher/k3s/agent/etc/containerd/config-v3.toml.tmpl || true | |
| FAILED=1 | |
| fi | |
| echo "=== Verify K3s template patched ===" | |
| sudo ls -la /var/lib/rancher/k3s/agent/etc/containerd || true | |
| sudo head -n 80 /var/lib/rancher/k3s/agent/etc/containerd/config.toml.tmpl 2>/dev/null || true | |
| sudo head -n 80 /var/lib/rancher/k3s/agent/etc/containerd/config-v3.toml.tmpl 2>/dev/null || true | |
| if [ -f "/var/lib/rancher/k3s/agent/etc/containerd/config.toml.tmpl" ]; then | |
| if sudo grep -q 'plugins."io.containerd.nri.v1.nri"' /var/lib/rancher/k3s/agent/etc/containerd/config.toml.tmpl; then | |
| echo "✓ NRI section present in template" | |
| else | |
| echo "⚠ NRI section missing from K3s template (configure-only); proceeding" | |
| sudo head -n 80 /var/lib/rancher/k3s/agent/etc/containerd/config.toml.tmpl || true | |
| fi | |
| fi | |
| if [ "${{ matrix.restart }}" = "false" ]; then | |
| echo "=== Verify socket absent when restart=false ===" | |
| if sudo test -S /var/run/nri/nri.sock; then | |
| echo "✗ Socket present but restart=false" | |
| exit 1 | |
| else | |
| echo "✓ Socket absent as expected (no restart)" | |
| fi | |
| else | |
| echo "=== Verify socket after restart ===" | |
| for i in {1..30}; do | |
| if sudo test -S /var/run/nri/nri.sock; then | |
| sudo ls -la /var/run/nri/nri.sock | |
| echo "✓ NRI socket exists after restart" | |
| break | |
| fi | |
| sleep 1 | |
| done | |
| if ! sudo test -S /var/run/nri/nri.sock; then | |
| echo "✗ NRI socket not found after restart on K3s" | |
| exit 1 | |
| fi | |
| fi | |
| if [ "$FAILED" = "1" ]; then | |
| echo "nri-init returned failure earlier, but templates/socket checks passed; continuing" | |
| fi | |
| - name: Run nri-init as DaemonSet on K3s (container mode) | |
| if: ${{ matrix.target == 'k3s' && matrix.runner == 'container' }} | |
| env: | |
| TEST_IMAGE: ${{ needs.build-test-image.outputs.image }} | |
| run: | | |
| set -x | |
| kubectl get nodes -o wide || true | |
| # Deploy DaemonSet that mounts k3s template dir and runs nri-init | |
| cat > ds-k3s.yaml << EOF | |
| apiVersion: apps/v1 | |
| kind: DaemonSet | |
| metadata: | |
| name: nri-init-test-k3s | |
| namespace: kube-system | |
| labels: { app: nri-init-test-k3s } | |
| spec: | |
| selector: | |
| matchLabels: { app: nri-init-test-k3s } | |
| template: | |
| metadata: | |
| labels: { app: nri-init-test-k3s } | |
| spec: | |
| hostPID: true | |
| tolerations: | |
| - key: "node-role.kubernetes.io/control-plane" | |
| operator: "Exists" | |
| effect: "NoSchedule" | |
| containers: | |
| - name: runner | |
| image: ${TEST_IMAGE} | |
| imagePullPolicy: IfNotPresent | |
| securityContext: | |
| privileged: true | |
| command: ["/bin/sh", "-lc"] | |
| args: | |
| - >- | |
| /usr/local/bin/nri-init --log-level debug --mode k3s --configure --nsenter-path nsenter $( [ "${{ matrix.restart }}" = "true" ] && echo "--restart --fail-if-unavailable" || echo "--no-restart" ) || true; | |
| echo "sleeping to keep pod Ready"; | |
| sleep 3600 | |
| volumeMounts: | |
| - { name: host-root, mountPath: /host } | |
| - { name: k3s-containerd, mountPath: /var/lib/rancher/k3s/agent/etc/containerd } | |
| - { name: run-nri, mountPath: /var/run/nri } | |
| volumes: | |
| - name: host-root | |
| hostPath: { path: /, type: Directory } | |
| - name: k3s-containerd | |
| hostPath: { path: /var/lib/rancher/k3s/agent/etc/containerd, type: DirectoryOrCreate } | |
| - name: run-nri | |
| hostPath: { path: /var/run/nri, type: DirectoryOrCreate } | |
| EOF | |
| kubectl apply -f ds-k3s.yaml | |
| kubectl -n kube-system rollout status ds/nri-init-test-k3s --timeout=60s || true | |
| echo "=== Waiting for DaemonSet pod to appear (K3s) ===" | |
| for i in {1..60}; do | |
| POD=$(kubectl -n kube-system get pod -l app=nri-init-test-k3s -o jsonpath='{.items[0].metadata.name}' 2>/dev/null || true) | |
| if [ -n "$POD" ]; then | |
| echo "Found pod: $POD" | |
| break | |
| fi | |
| sleep 2 | |
| done | |
| if [ -z "$POD" ]; then | |
| echo "✗ Timed out waiting for DaemonSet pod" | |
| kubectl -n kube-system get pods -l app=nri-init-test-k3s -o wide || true | |
| exit 1 | |
| fi | |
| echo "=== Wait for nri-init completion (K3s DS) ===" | |
| for i in {1..180}; do | |
| LOGS=$(kubectl -n kube-system logs "$POD" || true) | |
| if echo "$LOGS" | grep -q "nri-init failed"; then | |
| echo "✗ Detected 'nri-init failed' in logs" | |
| echo "$LOGS" | |
| exit 1 | |
| fi | |
| if echo "$LOGS" | grep -q "nri-init done"; then | |
| echo "✓ nri-init done observed in logs" | |
| break | |
| fi | |
| sleep 1 | |
| done | |
| if ! kubectl -n kube-system logs "$POD" | grep -q "nri-init done"; then | |
| echo "✗ Did not observe 'nri-init done' in logs within 180s" | |
| kubectl -n kube-system logs "$POD" || true | |
| exit 1 | |
| fi | |
| echo "=== nri-init logs (k3s DS) ===" | |
| kubectl -n kube-system logs "$POD" || true | |
| # Note: nri-init may report a transient failure immediately after restart. | |
| # Treat final socket presence as the source of truth. | |
| echo "=== Verify K3s template patched ===" | |
| kubectl -n kube-system exec "$POD" -- sh -lc 'ls -la /var/lib/rancher/k3s/agent/etc/containerd || true' | |
| kubectl -n kube-system exec "$POD" -- sh -lc 'test -f /var/lib/rancher/k3s/agent/etc/containerd/config.toml.tmpl || test -f /var/lib/rancher/k3s/agent/etc/containerd/config-v3.toml.tmpl' | |
| # Socket expectations based on restart flag (after ensuring containerd is active) | |
| if [ "${{ matrix.restart }}" = "false" ]; then | |
| if kubectl -n kube-system exec "$POD" -- sh -lc 'test -S /var/run/nri/nri.sock'; then | |
| echo "✗ Socket present but restart=false" | |
| exit 1 | |
| fi | |
| else | |
| echo "=== Waiting for containerd to be active (K3s) ===" | |
| for i in {1..90}; do | |
| if kubectl -n kube-system exec "$POD" -- sh -lc 'nsenter --target 1 --mount --uts --ipc --net --pid -- systemctl is-active containerd' | grep -q '^active$'; then | |
| echo "✓ containerd is active on node" | |
| break | |
| fi | |
| sleep 1 | |
| done | |
| kubectl -n kube-system exec "$POD" -- sh -lc 'nsenter --target 1 --mount --uts --ipc --net --pid -- systemctl status containerd || true' | |
| for i in {1..90}; do | |
| if kubectl -n kube-system exec "$POD" -- sh -lc 'test -S /var/run/nri/nri.sock'; then | |
| echo "✓ NRI socket exists after restart" | |
| break | |
| fi | |
| sleep 1 | |
| done | |
| if ! kubectl -n kube-system exec "$POD" -- sh -lc 'test -S /var/run/nri/nri.sock'; then | |
| echo "✗ NRI socket not found after restart" | |
| exit 1 | |
| fi | |
| fi |