Thanks to visit codestin.com
Credit goes to github.com

Skip to content

E2E Test Agents

E2E Test Agents #119

Workflow file for this run

# This workflow tests enrolling of agents on the supported platforms.
#
# It starts the latest release of fleet with the "fleetctl preview" command.
# It generates the installers for the latest version of fleetd with the
# "fleetctl package" command.
#
# It tests across a matrix of configurations:
# OS: mac/Linux/Windows
# Updates: enabled/disabled
# Channels (for each of orbit/osquery\desktop): edge/stable
# Arch: arm/x86
#
# Troubleshooting
# The top two errors seen while developing this:
# 1) Jobs are queued waiting for runners long enough for the entire workflow to fail. Scheduling for the middle of the night attempts to mitigate this. Timeouts have been tuned to try to manage it as well.
# 2) Network issues (commonly related to Cloudflare tunnels) cause some request to fail.
#
# Upon failure, the workflow will automatically retry up to 3 times. A Slack notification is sent only if all retries are exhausted.
name: E2E Test Agents
on:
workflow_dispatch: # Manual
inputs:
retry:
description: 'Number of retries attempted so far'
type: number
default: 0
schedule:
- cron: '0 5 * * *' # Nightly 5AM UTC
pull_request:
paths:
- '.github/workflows/e2e-agent.yml'
permissions:
contents: read
actions: read
# Each cron schedule gets its own concurrency group. workflow_dispatch and pull_request also get their own.
concurrency:
group: ${{ github.workflow }}-${{ github.head_ref || github.run_id}}-${{ github.event.schedule || github.event_name }}
cancel-in-progress: true
defaults:
run:
# fail-fast using bash -eo pipefail. See https://docs.github.com/en/actions/using-workflows/workflow-syntax-for-github-actions#exit-codes-and-error-action-preference
shell: bash
env:
# Supply-chain guard: refuse npm packages younger than 12 hours. See .npmrc.
NPM_CONFIG_MIN_RELEASE_AGE: 0.5
jobs:
# Generate a random UUID to be used for the Cloudflare tunnel subdomain and make it available to later jobs.
gen:
runs-on: ubuntu-latest
outputs:
subdomain: ${{ steps.gen.outputs.subdomain }}
address: ${{ steps.gen.outputs.address }}
steps:
- name: Harden Runner
uses: step-security/harden-runner@20cf305ff2072d973412fa9b1e3a4f227bda3c76 # v2.14.0
with:
egress-policy: audit
- id: gen
run: |
UUID=$(uuidgen)
echo "subdomain=fleet-test-$UUID" >> $GITHUB_OUTPUT
echo "address=https://fleet-test-$UUID.fleetuem.com" >> $GITHUB_OUTPUT
run-server:
timeout-minutes: 240
runs-on: ubuntu-8core
needs: gen
steps:
- name: Harden Runner
uses: step-security/harden-runner@20cf305ff2072d973412fa9b1e3a4f227bda3c76 # v2.14.0
with:
egress-policy: audit
- name: Start tunnel
env:
CERT_PEM: ${{ secrets.CLOUDFLARE_TUNNEL_FLEETUEM_CERT_B64 }}
run: |
# Increase maximum receive buffer size to roughly 2.5 MB.
# Cloudflared uses quic-go. This buffer holds packets that have been received by the kernel,
# but not yet read by the application (quic-go in this case). Once this buffer fills up, the
# kernel will drop any new incoming packet.
# See https://github.com/quic-go/quic-go/wiki/UDP-Receive-Buffer-Size.
sudo sysctl -w net.core.rmem_max=2500000
# Install cloudflared and run tunnel
wget https://github.com/cloudflare/cloudflared/releases/download/2026.3.0/cloudflared-linux-amd64.deb
sudo dpkg -i cloudflared-linux-amd64.deb
echo "$CERT_PEM" | base64 -d > cert.pem
cloudflared tunnel --origincert cert.pem create ${{ needs.gen.outputs.subdomain }}
cloudflared tunnel --origincert cert.pem route dns ${{ needs.gen.outputs.subdomain }} ${{ needs.gen.outputs.subdomain }}
cloudflared tunnel --origincert cert.pem --url http://localhost:1337 --logfile cloudflared.log run ${{ needs.gen.outputs.subdomain }} &
until [[ $(cloudflared tunnel --origincert cert.pem info -o json ${{ needs.gen.outputs.subdomain }} | jq '.conns[0].conns[0].is_pending_reconnect') = false ]]; do
echo "Awaiting tunnel ready..."
sleep 1
done
- name: Run Fleet server
run: |
npm install -g fleetctl
fleetctl preview --no-hosts --disable-open-browser
fleetctl config set --address ${{ needs.gen.outputs.address }}
fleetctl get enroll-secret
docker compose -f ~/.fleet/preview/docker-compose.yml logs --follow fleet01 fleet02 &
# Ensure Fleet server is responding before waiting for enrollments
echo "Checking Fleet server health..."
HEALTH_CHECK_COUNT=0
until HTTP_CODE=$(curl -sS -o /dev/null -w "%{http_code}" http://localhost:1337/healthz) && [[ "$HTTP_CODE" == "200" ]]; do
HEALTH_CHECK_COUNT=$((HEALTH_CHECK_COUNT + 1))
if [ $HEALTH_CHECK_COUNT -ge 30 ]; then
echo "ERROR: Fleet server not responding after 150 seconds"
docker ps -a --filter "name=fleet"
exit 1
fi
echo "Health check ${HEALTH_CHECK_COUNT}/30 (HTTP status: ${HTTP_CODE:-connection failed})"
sleep 5
done
echo "Fleet server is responding"
# Wait for all hosts to enroll, then keep the server alive until the summary job completes.
EXPECTED=96 # This needs to be updated when the matrix strategies are updated.
START=$(date +%s)
while true; do
ELAPSED=$(( $(date +%s) - START ))
# Check and display enrollment status
fleetctl get hosts || true
HOST_COUNT=$(fleetctl get hosts --json | (grep -v "No hosts found" || true) | wc -l | tr -d ' ')
echo "Hosts enrolled: ${HOST_COUNT} / $EXPECTED (${ELAPSED}s)"
# Check summary job status
JOBS_JSON=$(gh api "/repos/${{ github.repository }}/actions/runs/${{ github.run_id }}/jobs?per_page=100")
SUMMARY_STATUS=$(echo "$JOBS_JSON" | jq -r '[.jobs[] | select(.name == "summary")] | if length > 0 then .[0].status else "not_started" end')
echo "Summary job status: $SUMMARY_STATUS"
if [ "$SUMMARY_STATUS" = "completed" ]; then
echo "Summary job completed, exiting."
break
fi
sleep 10
done
env:
GH_TOKEN: ${{ github.token }}
- name: Show enrolled hosts
if: always()
run: |
fleetctl get hosts
fleetctl get hosts --json | jq
- name: Cleanup tunnel
if: always()
run: cloudflared tunnel --origincert cert.pem delete --force ${{ needs.gen.outputs.subdomain }} || true
- name: Print cloudflared logs
if: always()
run: cat cloudflared.log || true
- name: Cancel workflow if run-server fails
if: failure()
run: gh run cancel ${{ github.run_id }} --repo fleetdm/fleet
env:
GH_TOKEN: ${{ secrets.FLEET_RELEASE_GITHUB_PAT }}
login:
timeout-minutes: 15
runs-on: ubuntu-latest
needs: gen
outputs:
token: ${{ steps.login.outputs.token }}
steps:
- name: Harden Runner
uses: step-security/harden-runner@20cf305ff2072d973412fa9b1e3a4f227bda3c76 # v2.14.0
with:
egress-policy: audit
# Login only here and share the token because otherwise we could hit rate limits.
- name: Set Cloudflare DNS
run: |
# Use Cloudflare's DNS resolver (1.1.1.1) since the tunnel DNS record is managed
# by Cloudflare. Apply to all non-loopback interfaces in case traffic routes
# through one other than eth0.
for iface in $(ip -o link show | awk -F': ' '{print $2}' | grep -v lo); do
sudo resolvectl dns "$iface" 1.1.1.1 2>/dev/null || true
done
- id: login
name: Attempt login
run: |
npm install -g fleetctl
fleetctl config set --address ${{ needs.gen.outputs.address }}
# Wait for DNS to propagate by querying Cloudflare's DoH endpoint over HTTPS.
# This avoids relying on UDP/53 to 1.1.1.1, which may be blocked on runners.
HOSTNAME=$(echo "${{ needs.gen.outputs.address }}" | sed 's|https://||')
echo "Waiting for DNS propagation..."
DNS_START=$(date +%s)
until curl -sf "https://1.1.1.1/dns-query?name=${HOSTNAME}&type=A" \
-H 'accept: application/dns-json' | jq -e '.Status == 0' > /dev/null; do
ELAPSED=$(( $(date +%s) - DNS_START ))
echo "DNS not yet propagated... (${ELAPSED}s)"
sleep 2
done
echo "DNS propagated."
# Wait for Fleet server to be reachable
echo "Waiting for Fleet server to pass health check..."
HEALTH_CHECK_START=$(date +%s)
until curl -s -o /dev/null -w "%{http_code}" ${{ needs.gen.outputs.address }}/healthz | grep -q "200"; do
ELAPSED=$(( $(date +%s) - HEALTH_CHECK_START ))
echo "Health check failed... (${ELAPSED}s)"
sleep 1
done
echo "Fleet server is responding, attempting login..."
LOGIN_START=$(date +%s)
until fleetctl login --email [email protected] --password preview1337#; do
ELAPSED=$(( $(date +%s) - LOGIN_START ))
echo "Login attempt failed... (${ELAPSED}s)"
sleep 1
done
TOKEN=$(fleetctl config get token | awk '{print $3}')
echo "token=$TOKEN" >> $GITHUB_OUTPUT
fleetd-macos:
timeout-minutes: 10
strategy:
matrix:
runner: [ 'macos-15', 'macos-15-intel' ]
orbit-channel: [ 'stable', 'edge' ]
osqueryd-channel: [ 'stable', 'edge' ]
desktop-channel: [ 'stable', 'edge' ]
disable-updates: [ true, false ]
runs-on: ${{ matrix.runner }}
needs: [gen, login]
steps:
- name: Install fleetctl
run: |
npm install -g fleetctl
fleetctl config set --address ${{ needs.gen.outputs.address }} --token ${{ needs.login.outputs.token }}
- name: Set Cloudflare DNS
run: |
# Use Cloudflare's DNS resolver (1.1.1.1) since the tunnel DNS record is managed
# by Cloudflare — their resolver sees the new record immediately.
for svc in $(networksetup -listallnetworkservices | tail -n +2); do
sudo networksetup -setdnsservers "$svc" 1.1.1.1 2>/dev/null || true
done
sudo dscacheutil -flushcache
sudo killall -HUP mDNSResponder || true
- name: Install fleetd
run: |
ARCH=$(uname -m)
sudo hostname macos-${ARCH}-${{ matrix.orbit-channel }}-${{ matrix.osqueryd-channel }}-${{ matrix.desktop-channel }}-${{ matrix.disable-updates }}
SECRET_JSON=$(fleetctl get enroll_secret --json --debug)
echo $SECRET_JSON
SECRET=$(echo $SECRET_JSON | jq -r '.spec.secrets[0].secret')
echo "Secret: $SECRET"
echo "Hostname: $(hostname -s)"
# Instance identifier is needed because macOS runners share UUIDs
fleetctl package --type pkg --fleet-url=${{ needs.gen.outputs.address }} --enroll-secret=$SECRET --orbit-channel=${{ matrix.orbit-channel }} --osqueryd-channel=${{ matrix.osqueryd-channel }} --desktop-channel=${{ matrix.desktop-channel }} --fleet-desktop --debug --host-identifier=instance --disable-updates=${{ matrix.disable-updates }}
sudo installer -pkg fleet-osquery.pkg -target /
ENROLLMENT_START=$(date +%s)
until fleetctl get hosts | grep -iF $(hostname -s);
do
CURRENT_TIME=$(date +%s)
ELAPSED=$((CURRENT_TIME - ENROLLMENT_START))
echo "Awaiting enrollment... (${ELAPSED}s)"
sleep 1
done
- name: Check processes
run: |
sleep 30
sudo tail -60 /var/log/orbit/orbit.stderr.log
echo "Checking if osqueryd is running..."
pgrep -x osqueryd || (echo "ERROR: osqueryd is not running" && exit 1)
echo "Checking if orbit is running..."
pgrep -x orbit || (echo "ERROR: orbit is not running" && exit 1)
echo "Checking if fleet-desktop is running..."
pgrep -x fleet-desktop || (echo "ERROR: fleet-desktop is not running" && exit 1)
echo "All processes are running."
- name: Print orbit logs
if: always()
run: |
sudo cat /var/log/orbit/*
- name: Checkout
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
with:
fetch-depth: 1
sparse-checkout: |
it-and-security/lib/macos/scripts/uninstall-fleetd-macos.sh
- name: Uninstall Orbit
run: |
sudo ./it-and-security/lib/macos/scripts/uninstall-fleetd-macos.sh
fleetd-ubuntu:
timeout-minutes: 10
strategy:
matrix:
runner: [ 'ubuntu-24.04', 'ubuntu-24.04-arm' ]
orbit-channel: [ 'stable', 'edge' ]
osqueryd-channel: [ 'stable', 'edge' ]
desktop-channel: [ 'stable', 'edge' ]
disable-updates: [ true, false ]
runs-on: ${{ matrix.runner }}
needs: [gen, login]
steps:
- name: Harden Runner
uses: step-security/harden-runner@20cf305ff2072d973412fa9b1e3a4f227bda3c76 # v2.14.0
with:
egress-policy: audit
- name: Install fleetctl
run: |
npm install -g fleetctl
fleetctl config set --address ${{ needs.gen.outputs.address }} --token ${{ needs.login.outputs.token }}
- name: Set Cloudflare DNS
run: |
# Use Cloudflare's DNS resolver (1.1.1.1) since the tunnel DNS record is managed
# by Cloudflare. Apply to all non-loopback interfaces in case traffic routes
# through one other than eth0.
for iface in $(ip -o link show | awk -F': ' '{print $2}' | grep -v lo); do
sudo resolvectl dns "$iface" 1.1.1.1 2>/dev/null || true
done
- name: Install Orbit
run: |
ARCH=$(uname -m)
if [ "$ARCH" = "x86_64" ]; then FLEET_ARCH="amd64"; else FLEET_ARCH="arm64"; fi
sudo hostnamectl set-hostname ubuntu-${ARCH}-${{ matrix.orbit-channel }}-${{ matrix.osqueryd-channel }}-${{ matrix.desktop-channel }}-${{ matrix.disable-updates }}
SECRET_JSON=$(fleetctl get enroll_secret --json --debug)
echo $SECRET_JSON
SECRET=$(echo $SECRET_JSON | jq -r '.spec.secrets[0].secret')
echo "Secret: $SECRET"
echo "Hostname: $(hostname -s)"
fleetctl package --type deb --fleet-url=${{ needs.gen.outputs.address }} --enroll-secret=$SECRET --orbit-channel=${{ matrix.orbit-channel }} --osqueryd-channel=${{ matrix.osqueryd-channel }} --desktop-channel=${{ matrix.desktop-channel }} --fleet-desktop --debug --arch=$FLEET_ARCH --disable-updates=${{ matrix.disable-updates }}
sudo dpkg -i fleet-osquery*
ENROLLMENT_START=$(date +%s)
until fleetctl get hosts | grep -iF $(hostname -s); do
CURRENT_TIME=$(date +%s)
ELAPSED=$((CURRENT_TIME - ENROLLMENT_START))
echo "Waiting for enrollment... (${ELAPSED}s)"
sudo systemctl status orbit.service || true
sleep 1
done
- name: Check processes
run: |
sudo systemctl status orbit.service
sleep 30
sudo systemctl status orbit.service
echo "Checking if osqueryd is running..."
pgrep -x osqueryd || (echo "ERROR: osqueryd is not running" && exit 1)
echo "Checking if orbit is running..."
pgrep -x orbit || (echo "ERROR: orbit is not running" && exit 1)
# Don't check for Fleet Desktop as it doesn't run in the windowless CI environment.
echo "All processes are running."
- name: Print orbit logs
if: always()
run: |
sudo journalctl -u orbit.service --no-pager
- name: Uninstall Orbit
run: |
sudo apt remove fleet-osquery -y
fleetd-windows:
timeout-minutes: 10
strategy:
matrix:
runner: [ 'windows-2025', 'windows-11-arm' ]
orbit-channel: [ 'stable', 'edge' ]
osqueryd-channel: [ 'stable', 'edge' ]
desktop-channel: [ 'stable', 'edge' ]
disable-updates: [ true, false ]
needs: [gen, login]
runs-on: ${{ matrix.runner }}
steps:
# We need to use some shenanigans to rename the Windows computer without restarting. Note: Windows computers should not get names longer than 15 characters (confirmed this breaks networking).
- name: Rename computer
shell: powershell
run: |
$orbit = "${{ matrix.orbit-channel }}"
$osqueryd = "${{ matrix.osqueryd-channel }}"
$desktop = "${{ matrix.desktop-channel }}"
$arch = if ($env:PROCESSOR_ARCHITECTURE -eq 'ARM64') { 'a' } else { 'x' }
$disableUpdates = if ("${{ matrix.disable-updates }}" -eq "true") { "t" } else { "f" }
$ComputerName = "win-$arch-$($orbit[0])-$($osqueryd[0])-$($desktop[0])-$disableUpdates"
echo "Setting computer name to $ComputerName"
Remove-ItemProperty -path "HKLM:\SYSTEM\CurrentControlSet\Services\Tcpip\Parameters" -name "Hostname"
Remove-ItemProperty -path "HKLM:\SYSTEM\CurrentControlSet\Services\Tcpip\Parameters" -name "NV Hostname"
Set-ItemProperty -path "HKLM:\SYSTEM\CurrentControlSet\Control\Computername\Computername" -name "Computername" -value $ComputerName
Set-ItemProperty -path "HKLM:\SYSTEM\CurrentControlSet\Control\Computername\ActiveComputername" -name "Computername" -value $ComputerName
Set-ItemProperty -path "HKLM:\SYSTEM\CurrentControlSet\Services\Tcpip\Parameters" -name "Hostname" -value $ComputerName
Set-ItemProperty -path "HKLM:\SYSTEM\CurrentControlSet\Services\Tcpip\Parameters" -name "NV Hostname" -value $ComputerName
Set-ItemProperty -path "HKLM:\SOFTWARE\Microsoft\Windows NT\CurrentVersion\Winlogon" -name "AltDefaultDomainName" -value $ComputerName
Set-ItemProperty -path "HKLM:\SOFTWARE\Microsoft\Windows NT\CurrentVersion\Winlogon" -name "DefaultDomainName" -value $ComputerName
- name: Set Cloudflare DNS
shell: powershell
run: |
# Use Cloudflare's DNS resolver (1.1.1.1) since the tunnel DNS record is managed
# by Cloudflare — their resolver sees the new record immediately.
# -ErrorAction SilentlyContinue skips adapters (e.g. Hyper-V virtual/internal)
# that have no associated DNS client address object.
Get-NetAdapter | ForEach-Object { Set-DnsClientServerAddress -InterfaceIndex $_.InterfaceIndex -ServerAddresses "1.1.1.1" -ErrorAction SilentlyContinue }
Clear-DnsClientCache
- name: Install fleetctl
shell: bash
# On Windows we need to set rootca or tls-skip verify. Since this is a test environment we can skip TLS verification.
run: |
npm install -g fleetctl
fleetctl config set --address ${{ needs.gen.outputs.address }} --token ${{ needs.login.outputs.token }} --tls-skip-verify
- name: Install WiX toolset (arm runner only)
if: matrix.runner == 'windows-11-arm'
shell: powershell
run: |
Invoke-WebRequest -Uri "https://github.com/wixtoolset/wix3/releases/download/wix3141rtm/wix314.exe" -OutFile wix314.exe
Start-Process -Wait -FilePath .\wix314.exe -ArgumentList "/quiet"
"WIX=C:\Program Files (x86)\WiX Toolset v3.14\" | Out-File -FilePath $env:GITHUB_ENV -Encoding utf8 -Append
- name: Build MSI
shell: bash
run: |
SECRET_JSON=$(fleetctl get enroll_secret --json --debug)
echo "$SECRET_JSON"
# Strip any prefix before the JSON (e.g. "Installing fleetctl... Install completed. ") in case
# fleetctl auto-updates and writes an install message to stdout on the same line as the JSON.
SECRET=$(echo "$SECRET_JSON" | sed 's/^[^{]*//' | jq -r '.spec.secrets[0].secret')
echo "Secret: $SECRET"
ARCH=$(echo "$PROCESSOR_ARCHITECTURE" | tr '[:upper:]' '[:lower:]')
# WIX env var points to the WiX Toolset install dir (pre-installed on windows-11 runner, installed above on windows-11-arm)
fleetctl package --type msi --fleet-url=${{ needs.gen.outputs.address }} --enroll-secret=$SECRET --orbit-channel=${{ matrix.orbit-channel }} --osqueryd-channel=${{ matrix.osqueryd-channel }} --desktop-channel=${{ matrix.desktop-channel }} --fleet-desktop --debug --local-wix-dir="${WIX}bin" --arch=$ARCH --disable-updates=${{ matrix.disable-updates }}
- name: Install Orbit
shell: cmd
run: |
msiexec /i fleet-osquery.msi /quiet /passive /lv log.txt
- name: Wait for enrollment
shell: powershell
run: |
$orbit = "${{ matrix.orbit-channel }}"
$osqueryd = "${{ matrix.osqueryd-channel }}"
$desktop = "${{ matrix.desktop-channel }}"
$arch = if ($env:PROCESSOR_ARCHITECTURE -eq 'ARM64') { 'a' } else { 'x' }
$disableUpdates = if ("${{ matrix.disable-updates }}" -eq "true") { "t" } else { "f" }
$ComputerName = "win-$arch-$($orbit[0])-$($osqueryd[0])-$($desktop[0])-$disableUpdates"
$StartTime = Get-Date
do {
$hosts = fleetctl get hosts
if ($hosts -match $ComputerName) {
Write-Host "Success! $ComputerName enrolled."
break
}
$Elapsed = [math]::Round(((Get-Date) - $StartTime).TotalSeconds)
Write-Host "Waiting for enrollment... (${Elapsed}s)"
Start-Sleep -Seconds 1
} while ($true)
- name: Check processes
shell: powershell
run: |
Start-Sleep -Seconds 30
Write-Host "Checking if osqueryd is running..."
if (-not (Get-Process -Name "osqueryd" -ErrorAction SilentlyContinue)) {
Write-Host "ERROR: osqueryd is not running"
exit 1
}
Write-Host "Checking if orbit is running..."
if (-not (Get-Process -Name "orbit" -ErrorAction SilentlyContinue)) {
Write-Host "ERROR: orbit is not running"
exit 1
}
Write-Host "Checking if fleet-desktop is running..."
if (-not (Get-Process -Name "fleet-desktop" -ErrorAction SilentlyContinue)) {
Write-Host "ERROR: fleet-desktop is not running"
exit 1
}
Write-Host "All processes are running."
- name: Print orbit install log
if: always()
shell: powershell
run: Get-Content log.txt -ErrorAction SilentlyContinue
- name: Print Orbit logs
if: always()
shell: powershell
run: Get-Content "C:\Windows\system32\config\systemprofile\AppData\Local\FleetDM\Orbit\Logs\orbit-osquery.log" -ErrorAction SilentlyContinue
- name: Checkout
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
with:
fetch-depth: 1
sparse-checkout: |
it-and-security/lib/windows/scripts/uninstall-fleetd-windows.ps1
- name: Uninstall Orbit
shell: powershell
run: |
.\it-and-security\lib\windows\scripts\uninstall-fleetd-windows.ps1
summary:
needs: [fleetd-macos, fleetd-ubuntu, fleetd-windows]
runs-on: ubuntu-latest
if: always()
steps:
- name: Compute next retry
id: next-retry
run: echo "value=$(( ${{ inputs.retry || 0 }} + 1 ))" >> $GITHUB_OUTPUT
- name: Slack Notification (all retries exhausted)
if: (contains(needs.*.result, 'failure') || contains(needs.*.result, 'cancelled')) && (inputs.retry || 0) >= 3
uses: slackapi/slack-github-action@af78098f536edbc4de71162a307590698245be95 # v3.0.1
with:
webhook: ${{ secrets.SLACK_G_HELP_ENGINEERING_WEBHOOK_URL }}
webhook-type: incoming-webhook
payload: |
blocks:
- type: "header"
text:
type: "plain_text"
text: "🚨 ALL RETRIES EXHAUSTED — MANUAL INVESTIGATION REQUIRED 🚨"
- type: "section"
text:
type: "mrkdwn"
text: "*Agent E2E test FAILED all 4 attempts* :rotating_light:\n${{ github.event.pull_request.html_url || github.event.head_commit.url }}\n<${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}|View workflow run>"
- name: Ping Cronitor on success
if: ${{ (github.event_name == 'schedule' || github.event_name == 'workflow_dispatch') && !contains(needs.*.result, 'failure') && !contains(needs.*.result, 'cancelled') }}
run: curl -sf "${{ secrets.CRONITOR_E2E_AGENT_PING_URL }}" > /dev/null
- name: Retry workflow on failure
# Only retry scheduled runs or manual runs that are retries for scheduled runs (inputs.retry > 0)
if: ${{ (github.event_name == 'schedule' || (github.event_name == 'workflow_dispatch' && (inputs.retry || 0) > 0)) && (contains(needs.*.result, 'failure') || contains(needs.*.result, 'cancelled')) && (inputs.retry || 0) < 3 }}
run: |
gh workflow run e2e-agent.yml --repo ${{ github.repository }} --ref ${{ github.head_ref || github.ref_name }} -f retry=${{ steps.next-retry.outputs.value }}
env:
GH_TOKEN: ${{ secrets.FLEET_RELEASE_GITHUB_PAT }}
- name: Cancel workflow if any job failed
if: ${{ contains(needs.*.result, 'failure') || contains(needs.*.result, 'cancelled') }}
run: gh run cancel ${{ github.run_id }} --repo fleetdm/fleet
env:
GH_TOKEN: ${{ secrets.FLEET_RELEASE_GITHUB_PAT }}