E2E Test Agents #119
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # This workflow tests enrolling of agents on the supported platforms. | |
| # | |
| # It starts the latest release of fleet with the "fleetctl preview" command. | |
| # It generates the installers for the latest version of fleetd with the | |
| # "fleetctl package" command. | |
| # | |
| # It tests across a matrix of configurations: | |
| # OS: mac/Linux/Windows | |
| # Updates: enabled/disabled | |
| # Channels (for each of orbit/osquery\desktop): edge/stable | |
| # Arch: arm/x86 | |
| # | |
| # Troubleshooting | |
| # The top two errors seen while developing this: | |
| # 1) Jobs are queued waiting for runners long enough for the entire workflow to fail. Scheduling for the middle of the night attempts to mitigate this. Timeouts have been tuned to try to manage it as well. | |
| # 2) Network issues (commonly related to Cloudflare tunnels) cause some request to fail. | |
| # | |
| # Upon failure, the workflow will automatically retry up to 3 times. A Slack notification is sent only if all retries are exhausted. | |
| name: E2E Test Agents | |
| on: | |
| workflow_dispatch: # Manual | |
| inputs: | |
| retry: | |
| description: 'Number of retries attempted so far' | |
| type: number | |
| default: 0 | |
| schedule: | |
| - cron: '0 5 * * *' # Nightly 5AM UTC | |
| pull_request: | |
| paths: | |
| - '.github/workflows/e2e-agent.yml' | |
| permissions: | |
| contents: read | |
| actions: read | |
| # Each cron schedule gets its own concurrency group. workflow_dispatch and pull_request also get their own. | |
| concurrency: | |
| group: ${{ github.workflow }}-${{ github.head_ref || github.run_id}}-${{ github.event.schedule || github.event_name }} | |
| cancel-in-progress: true | |
| defaults: | |
| run: | |
| # fail-fast using bash -eo pipefail. See https://docs.github.com/en/actions/using-workflows/workflow-syntax-for-github-actions#exit-codes-and-error-action-preference | |
| shell: bash | |
| env: | |
| # Supply-chain guard: refuse npm packages younger than 12 hours. See .npmrc. | |
| NPM_CONFIG_MIN_RELEASE_AGE: 0.5 | |
| jobs: | |
| # Generate a random UUID to be used for the Cloudflare tunnel subdomain and make it available to later jobs. | |
| gen: | |
| runs-on: ubuntu-latest | |
| outputs: | |
| subdomain: ${{ steps.gen.outputs.subdomain }} | |
| address: ${{ steps.gen.outputs.address }} | |
| steps: | |
| - name: Harden Runner | |
| uses: step-security/harden-runner@20cf305ff2072d973412fa9b1e3a4f227bda3c76 # v2.14.0 | |
| with: | |
| egress-policy: audit | |
| - id: gen | |
| run: | | |
| UUID=$(uuidgen) | |
| echo "subdomain=fleet-test-$UUID" >> $GITHUB_OUTPUT | |
| echo "address=https://fleet-test-$UUID.fleetuem.com" >> $GITHUB_OUTPUT | |
| run-server: | |
| timeout-minutes: 240 | |
| runs-on: ubuntu-8core | |
| needs: gen | |
| steps: | |
| - name: Harden Runner | |
| uses: step-security/harden-runner@20cf305ff2072d973412fa9b1e3a4f227bda3c76 # v2.14.0 | |
| with: | |
| egress-policy: audit | |
| - name: Start tunnel | |
| env: | |
| CERT_PEM: ${{ secrets.CLOUDFLARE_TUNNEL_FLEETUEM_CERT_B64 }} | |
| run: | | |
| # Increase maximum receive buffer size to roughly 2.5 MB. | |
| # Cloudflared uses quic-go. This buffer holds packets that have been received by the kernel, | |
| # but not yet read by the application (quic-go in this case). Once this buffer fills up, the | |
| # kernel will drop any new incoming packet. | |
| # See https://github.com/quic-go/quic-go/wiki/UDP-Receive-Buffer-Size. | |
| sudo sysctl -w net.core.rmem_max=2500000 | |
| # Install cloudflared and run tunnel | |
| wget https://github.com/cloudflare/cloudflared/releases/download/2026.3.0/cloudflared-linux-amd64.deb | |
| sudo dpkg -i cloudflared-linux-amd64.deb | |
| echo "$CERT_PEM" | base64 -d > cert.pem | |
| cloudflared tunnel --origincert cert.pem create ${{ needs.gen.outputs.subdomain }} | |
| cloudflared tunnel --origincert cert.pem route dns ${{ needs.gen.outputs.subdomain }} ${{ needs.gen.outputs.subdomain }} | |
| cloudflared tunnel --origincert cert.pem --url http://localhost:1337 --logfile cloudflared.log run ${{ needs.gen.outputs.subdomain }} & | |
| until [[ $(cloudflared tunnel --origincert cert.pem info -o json ${{ needs.gen.outputs.subdomain }} | jq '.conns[0].conns[0].is_pending_reconnect') = false ]]; do | |
| echo "Awaiting tunnel ready..." | |
| sleep 1 | |
| done | |
| - name: Run Fleet server | |
| run: | | |
| npm install -g fleetctl | |
| fleetctl preview --no-hosts --disable-open-browser | |
| fleetctl config set --address ${{ needs.gen.outputs.address }} | |
| fleetctl get enroll-secret | |
| docker compose -f ~/.fleet/preview/docker-compose.yml logs --follow fleet01 fleet02 & | |
| # Ensure Fleet server is responding before waiting for enrollments | |
| echo "Checking Fleet server health..." | |
| HEALTH_CHECK_COUNT=0 | |
| until HTTP_CODE=$(curl -sS -o /dev/null -w "%{http_code}" http://localhost:1337/healthz) && [[ "$HTTP_CODE" == "200" ]]; do | |
| HEALTH_CHECK_COUNT=$((HEALTH_CHECK_COUNT + 1)) | |
| if [ $HEALTH_CHECK_COUNT -ge 30 ]; then | |
| echo "ERROR: Fleet server not responding after 150 seconds" | |
| docker ps -a --filter "name=fleet" | |
| exit 1 | |
| fi | |
| echo "Health check ${HEALTH_CHECK_COUNT}/30 (HTTP status: ${HTTP_CODE:-connection failed})" | |
| sleep 5 | |
| done | |
| echo "Fleet server is responding" | |
| # Wait for all hosts to enroll, then keep the server alive until the summary job completes. | |
| EXPECTED=96 # This needs to be updated when the matrix strategies are updated. | |
| START=$(date +%s) | |
| while true; do | |
| ELAPSED=$(( $(date +%s) - START )) | |
| # Check and display enrollment status | |
| fleetctl get hosts || true | |
| HOST_COUNT=$(fleetctl get hosts --json | (grep -v "No hosts found" || true) | wc -l | tr -d ' ') | |
| echo "Hosts enrolled: ${HOST_COUNT} / $EXPECTED (${ELAPSED}s)" | |
| # Check summary job status | |
| JOBS_JSON=$(gh api "/repos/${{ github.repository }}/actions/runs/${{ github.run_id }}/jobs?per_page=100") | |
| SUMMARY_STATUS=$(echo "$JOBS_JSON" | jq -r '[.jobs[] | select(.name == "summary")] | if length > 0 then .[0].status else "not_started" end') | |
| echo "Summary job status: $SUMMARY_STATUS" | |
| if [ "$SUMMARY_STATUS" = "completed" ]; then | |
| echo "Summary job completed, exiting." | |
| break | |
| fi | |
| sleep 10 | |
| done | |
| env: | |
| GH_TOKEN: ${{ github.token }} | |
| - name: Show enrolled hosts | |
| if: always() | |
| run: | | |
| fleetctl get hosts | |
| fleetctl get hosts --json | jq | |
| - name: Cleanup tunnel | |
| if: always() | |
| run: cloudflared tunnel --origincert cert.pem delete --force ${{ needs.gen.outputs.subdomain }} || true | |
| - name: Print cloudflared logs | |
| if: always() | |
| run: cat cloudflared.log || true | |
| - name: Cancel workflow if run-server fails | |
| if: failure() | |
| run: gh run cancel ${{ github.run_id }} --repo fleetdm/fleet | |
| env: | |
| GH_TOKEN: ${{ secrets.FLEET_RELEASE_GITHUB_PAT }} | |
| login: | |
| timeout-minutes: 15 | |
| runs-on: ubuntu-latest | |
| needs: gen | |
| outputs: | |
| token: ${{ steps.login.outputs.token }} | |
| steps: | |
| - name: Harden Runner | |
| uses: step-security/harden-runner@20cf305ff2072d973412fa9b1e3a4f227bda3c76 # v2.14.0 | |
| with: | |
| egress-policy: audit | |
| # Login only here and share the token because otherwise we could hit rate limits. | |
| - name: Set Cloudflare DNS | |
| run: | | |
| # Use Cloudflare's DNS resolver (1.1.1.1) since the tunnel DNS record is managed | |
| # by Cloudflare. Apply to all non-loopback interfaces in case traffic routes | |
| # through one other than eth0. | |
| for iface in $(ip -o link show | awk -F': ' '{print $2}' | grep -v lo); do | |
| sudo resolvectl dns "$iface" 1.1.1.1 2>/dev/null || true | |
| done | |
| - id: login | |
| name: Attempt login | |
| run: | | |
| npm install -g fleetctl | |
| fleetctl config set --address ${{ needs.gen.outputs.address }} | |
| # Wait for DNS to propagate by querying Cloudflare's DoH endpoint over HTTPS. | |
| # This avoids relying on UDP/53 to 1.1.1.1, which may be blocked on runners. | |
| HOSTNAME=$(echo "${{ needs.gen.outputs.address }}" | sed 's|https://||') | |
| echo "Waiting for DNS propagation..." | |
| DNS_START=$(date +%s) | |
| until curl -sf "https://1.1.1.1/dns-query?name=${HOSTNAME}&type=A" \ | |
| -H 'accept: application/dns-json' | jq -e '.Status == 0' > /dev/null; do | |
| ELAPSED=$(( $(date +%s) - DNS_START )) | |
| echo "DNS not yet propagated... (${ELAPSED}s)" | |
| sleep 2 | |
| done | |
| echo "DNS propagated." | |
| # Wait for Fleet server to be reachable | |
| echo "Waiting for Fleet server to pass health check..." | |
| HEALTH_CHECK_START=$(date +%s) | |
| until curl -s -o /dev/null -w "%{http_code}" ${{ needs.gen.outputs.address }}/healthz | grep -q "200"; do | |
| ELAPSED=$(( $(date +%s) - HEALTH_CHECK_START )) | |
| echo "Health check failed... (${ELAPSED}s)" | |
| sleep 1 | |
| done | |
| echo "Fleet server is responding, attempting login..." | |
| LOGIN_START=$(date +%s) | |
| until fleetctl login --email [email protected] --password preview1337#; do | |
| ELAPSED=$(( $(date +%s) - LOGIN_START )) | |
| echo "Login attempt failed... (${ELAPSED}s)" | |
| sleep 1 | |
| done | |
| TOKEN=$(fleetctl config get token | awk '{print $3}') | |
| echo "token=$TOKEN" >> $GITHUB_OUTPUT | |
| fleetd-macos: | |
| timeout-minutes: 10 | |
| strategy: | |
| matrix: | |
| runner: [ 'macos-15', 'macos-15-intel' ] | |
| orbit-channel: [ 'stable', 'edge' ] | |
| osqueryd-channel: [ 'stable', 'edge' ] | |
| desktop-channel: [ 'stable', 'edge' ] | |
| disable-updates: [ true, false ] | |
| runs-on: ${{ matrix.runner }} | |
| needs: [gen, login] | |
| steps: | |
| - name: Install fleetctl | |
| run: | | |
| npm install -g fleetctl | |
| fleetctl config set --address ${{ needs.gen.outputs.address }} --token ${{ needs.login.outputs.token }} | |
| - name: Set Cloudflare DNS | |
| run: | | |
| # Use Cloudflare's DNS resolver (1.1.1.1) since the tunnel DNS record is managed | |
| # by Cloudflare — their resolver sees the new record immediately. | |
| for svc in $(networksetup -listallnetworkservices | tail -n +2); do | |
| sudo networksetup -setdnsservers "$svc" 1.1.1.1 2>/dev/null || true | |
| done | |
| sudo dscacheutil -flushcache | |
| sudo killall -HUP mDNSResponder || true | |
| - name: Install fleetd | |
| run: | | |
| ARCH=$(uname -m) | |
| sudo hostname macos-${ARCH}-${{ matrix.orbit-channel }}-${{ matrix.osqueryd-channel }}-${{ matrix.desktop-channel }}-${{ matrix.disable-updates }} | |
| SECRET_JSON=$(fleetctl get enroll_secret --json --debug) | |
| echo $SECRET_JSON | |
| SECRET=$(echo $SECRET_JSON | jq -r '.spec.secrets[0].secret') | |
| echo "Secret: $SECRET" | |
| echo "Hostname: $(hostname -s)" | |
| # Instance identifier is needed because macOS runners share UUIDs | |
| fleetctl package --type pkg --fleet-url=${{ needs.gen.outputs.address }} --enroll-secret=$SECRET --orbit-channel=${{ matrix.orbit-channel }} --osqueryd-channel=${{ matrix.osqueryd-channel }} --desktop-channel=${{ matrix.desktop-channel }} --fleet-desktop --debug --host-identifier=instance --disable-updates=${{ matrix.disable-updates }} | |
| sudo installer -pkg fleet-osquery.pkg -target / | |
| ENROLLMENT_START=$(date +%s) | |
| until fleetctl get hosts | grep -iF $(hostname -s); | |
| do | |
| CURRENT_TIME=$(date +%s) | |
| ELAPSED=$((CURRENT_TIME - ENROLLMENT_START)) | |
| echo "Awaiting enrollment... (${ELAPSED}s)" | |
| sleep 1 | |
| done | |
| - name: Check processes | |
| run: | | |
| sleep 30 | |
| sudo tail -60 /var/log/orbit/orbit.stderr.log | |
| echo "Checking if osqueryd is running..." | |
| pgrep -x osqueryd || (echo "ERROR: osqueryd is not running" && exit 1) | |
| echo "Checking if orbit is running..." | |
| pgrep -x orbit || (echo "ERROR: orbit is not running" && exit 1) | |
| echo "Checking if fleet-desktop is running..." | |
| pgrep -x fleet-desktop || (echo "ERROR: fleet-desktop is not running" && exit 1) | |
| echo "All processes are running." | |
| - name: Print orbit logs | |
| if: always() | |
| run: | | |
| sudo cat /var/log/orbit/* | |
| - name: Checkout | |
| uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 | |
| with: | |
| fetch-depth: 1 | |
| sparse-checkout: | | |
| it-and-security/lib/macos/scripts/uninstall-fleetd-macos.sh | |
| - name: Uninstall Orbit | |
| run: | | |
| sudo ./it-and-security/lib/macos/scripts/uninstall-fleetd-macos.sh | |
| fleetd-ubuntu: | |
| timeout-minutes: 10 | |
| strategy: | |
| matrix: | |
| runner: [ 'ubuntu-24.04', 'ubuntu-24.04-arm' ] | |
| orbit-channel: [ 'stable', 'edge' ] | |
| osqueryd-channel: [ 'stable', 'edge' ] | |
| desktop-channel: [ 'stable', 'edge' ] | |
| disable-updates: [ true, false ] | |
| runs-on: ${{ matrix.runner }} | |
| needs: [gen, login] | |
| steps: | |
| - name: Harden Runner | |
| uses: step-security/harden-runner@20cf305ff2072d973412fa9b1e3a4f227bda3c76 # v2.14.0 | |
| with: | |
| egress-policy: audit | |
| - name: Install fleetctl | |
| run: | | |
| npm install -g fleetctl | |
| fleetctl config set --address ${{ needs.gen.outputs.address }} --token ${{ needs.login.outputs.token }} | |
| - name: Set Cloudflare DNS | |
| run: | | |
| # Use Cloudflare's DNS resolver (1.1.1.1) since the tunnel DNS record is managed | |
| # by Cloudflare. Apply to all non-loopback interfaces in case traffic routes | |
| # through one other than eth0. | |
| for iface in $(ip -o link show | awk -F': ' '{print $2}' | grep -v lo); do | |
| sudo resolvectl dns "$iface" 1.1.1.1 2>/dev/null || true | |
| done | |
| - name: Install Orbit | |
| run: | | |
| ARCH=$(uname -m) | |
| if [ "$ARCH" = "x86_64" ]; then FLEET_ARCH="amd64"; else FLEET_ARCH="arm64"; fi | |
| sudo hostnamectl set-hostname ubuntu-${ARCH}-${{ matrix.orbit-channel }}-${{ matrix.osqueryd-channel }}-${{ matrix.desktop-channel }}-${{ matrix.disable-updates }} | |
| SECRET_JSON=$(fleetctl get enroll_secret --json --debug) | |
| echo $SECRET_JSON | |
| SECRET=$(echo $SECRET_JSON | jq -r '.spec.secrets[0].secret') | |
| echo "Secret: $SECRET" | |
| echo "Hostname: $(hostname -s)" | |
| fleetctl package --type deb --fleet-url=${{ needs.gen.outputs.address }} --enroll-secret=$SECRET --orbit-channel=${{ matrix.orbit-channel }} --osqueryd-channel=${{ matrix.osqueryd-channel }} --desktop-channel=${{ matrix.desktop-channel }} --fleet-desktop --debug --arch=$FLEET_ARCH --disable-updates=${{ matrix.disable-updates }} | |
| sudo dpkg -i fleet-osquery* | |
| ENROLLMENT_START=$(date +%s) | |
| until fleetctl get hosts | grep -iF $(hostname -s); do | |
| CURRENT_TIME=$(date +%s) | |
| ELAPSED=$((CURRENT_TIME - ENROLLMENT_START)) | |
| echo "Waiting for enrollment... (${ELAPSED}s)" | |
| sudo systemctl status orbit.service || true | |
| sleep 1 | |
| done | |
| - name: Check processes | |
| run: | | |
| sudo systemctl status orbit.service | |
| sleep 30 | |
| sudo systemctl status orbit.service | |
| echo "Checking if osqueryd is running..." | |
| pgrep -x osqueryd || (echo "ERROR: osqueryd is not running" && exit 1) | |
| echo "Checking if orbit is running..." | |
| pgrep -x orbit || (echo "ERROR: orbit is not running" && exit 1) | |
| # Don't check for Fleet Desktop as it doesn't run in the windowless CI environment. | |
| echo "All processes are running." | |
| - name: Print orbit logs | |
| if: always() | |
| run: | | |
| sudo journalctl -u orbit.service --no-pager | |
| - name: Uninstall Orbit | |
| run: | | |
| sudo apt remove fleet-osquery -y | |
| fleetd-windows: | |
| timeout-minutes: 10 | |
| strategy: | |
| matrix: | |
| runner: [ 'windows-2025', 'windows-11-arm' ] | |
| orbit-channel: [ 'stable', 'edge' ] | |
| osqueryd-channel: [ 'stable', 'edge' ] | |
| desktop-channel: [ 'stable', 'edge' ] | |
| disable-updates: [ true, false ] | |
| needs: [gen, login] | |
| runs-on: ${{ matrix.runner }} | |
| steps: | |
| # We need to use some shenanigans to rename the Windows computer without restarting. Note: Windows computers should not get names longer than 15 characters (confirmed this breaks networking). | |
| - name: Rename computer | |
| shell: powershell | |
| run: | | |
| $orbit = "${{ matrix.orbit-channel }}" | |
| $osqueryd = "${{ matrix.osqueryd-channel }}" | |
| $desktop = "${{ matrix.desktop-channel }}" | |
| $arch = if ($env:PROCESSOR_ARCHITECTURE -eq 'ARM64') { 'a' } else { 'x' } | |
| $disableUpdates = if ("${{ matrix.disable-updates }}" -eq "true") { "t" } else { "f" } | |
| $ComputerName = "win-$arch-$($orbit[0])-$($osqueryd[0])-$($desktop[0])-$disableUpdates" | |
| echo "Setting computer name to $ComputerName" | |
| Remove-ItemProperty -path "HKLM:\SYSTEM\CurrentControlSet\Services\Tcpip\Parameters" -name "Hostname" | |
| Remove-ItemProperty -path "HKLM:\SYSTEM\CurrentControlSet\Services\Tcpip\Parameters" -name "NV Hostname" | |
| Set-ItemProperty -path "HKLM:\SYSTEM\CurrentControlSet\Control\Computername\Computername" -name "Computername" -value $ComputerName | |
| Set-ItemProperty -path "HKLM:\SYSTEM\CurrentControlSet\Control\Computername\ActiveComputername" -name "Computername" -value $ComputerName | |
| Set-ItemProperty -path "HKLM:\SYSTEM\CurrentControlSet\Services\Tcpip\Parameters" -name "Hostname" -value $ComputerName | |
| Set-ItemProperty -path "HKLM:\SYSTEM\CurrentControlSet\Services\Tcpip\Parameters" -name "NV Hostname" -value $ComputerName | |
| Set-ItemProperty -path "HKLM:\SOFTWARE\Microsoft\Windows NT\CurrentVersion\Winlogon" -name "AltDefaultDomainName" -value $ComputerName | |
| Set-ItemProperty -path "HKLM:\SOFTWARE\Microsoft\Windows NT\CurrentVersion\Winlogon" -name "DefaultDomainName" -value $ComputerName | |
| - name: Set Cloudflare DNS | |
| shell: powershell | |
| run: | | |
| # Use Cloudflare's DNS resolver (1.1.1.1) since the tunnel DNS record is managed | |
| # by Cloudflare — their resolver sees the new record immediately. | |
| # -ErrorAction SilentlyContinue skips adapters (e.g. Hyper-V virtual/internal) | |
| # that have no associated DNS client address object. | |
| Get-NetAdapter | ForEach-Object { Set-DnsClientServerAddress -InterfaceIndex $_.InterfaceIndex -ServerAddresses "1.1.1.1" -ErrorAction SilentlyContinue } | |
| Clear-DnsClientCache | |
| - name: Install fleetctl | |
| shell: bash | |
| # On Windows we need to set rootca or tls-skip verify. Since this is a test environment we can skip TLS verification. | |
| run: | | |
| npm install -g fleetctl | |
| fleetctl config set --address ${{ needs.gen.outputs.address }} --token ${{ needs.login.outputs.token }} --tls-skip-verify | |
| - name: Install WiX toolset (arm runner only) | |
| if: matrix.runner == 'windows-11-arm' | |
| shell: powershell | |
| run: | | |
| Invoke-WebRequest -Uri "https://github.com/wixtoolset/wix3/releases/download/wix3141rtm/wix314.exe" -OutFile wix314.exe | |
| Start-Process -Wait -FilePath .\wix314.exe -ArgumentList "/quiet" | |
| "WIX=C:\Program Files (x86)\WiX Toolset v3.14\" | Out-File -FilePath $env:GITHUB_ENV -Encoding utf8 -Append | |
| - name: Build MSI | |
| shell: bash | |
| run: | | |
| SECRET_JSON=$(fleetctl get enroll_secret --json --debug) | |
| echo "$SECRET_JSON" | |
| # Strip any prefix before the JSON (e.g. "Installing fleetctl... Install completed. ") in case | |
| # fleetctl auto-updates and writes an install message to stdout on the same line as the JSON. | |
| SECRET=$(echo "$SECRET_JSON" | sed 's/^[^{]*//' | jq -r '.spec.secrets[0].secret') | |
| echo "Secret: $SECRET" | |
| ARCH=$(echo "$PROCESSOR_ARCHITECTURE" | tr '[:upper:]' '[:lower:]') | |
| # WIX env var points to the WiX Toolset install dir (pre-installed on windows-11 runner, installed above on windows-11-arm) | |
| fleetctl package --type msi --fleet-url=${{ needs.gen.outputs.address }} --enroll-secret=$SECRET --orbit-channel=${{ matrix.orbit-channel }} --osqueryd-channel=${{ matrix.osqueryd-channel }} --desktop-channel=${{ matrix.desktop-channel }} --fleet-desktop --debug --local-wix-dir="${WIX}bin" --arch=$ARCH --disable-updates=${{ matrix.disable-updates }} | |
| - name: Install Orbit | |
| shell: cmd | |
| run: | | |
| msiexec /i fleet-osquery.msi /quiet /passive /lv log.txt | |
| - name: Wait for enrollment | |
| shell: powershell | |
| run: | | |
| $orbit = "${{ matrix.orbit-channel }}" | |
| $osqueryd = "${{ matrix.osqueryd-channel }}" | |
| $desktop = "${{ matrix.desktop-channel }}" | |
| $arch = if ($env:PROCESSOR_ARCHITECTURE -eq 'ARM64') { 'a' } else { 'x' } | |
| $disableUpdates = if ("${{ matrix.disable-updates }}" -eq "true") { "t" } else { "f" } | |
| $ComputerName = "win-$arch-$($orbit[0])-$($osqueryd[0])-$($desktop[0])-$disableUpdates" | |
| $StartTime = Get-Date | |
| do { | |
| $hosts = fleetctl get hosts | |
| if ($hosts -match $ComputerName) { | |
| Write-Host "Success! $ComputerName enrolled." | |
| break | |
| } | |
| $Elapsed = [math]::Round(((Get-Date) - $StartTime).TotalSeconds) | |
| Write-Host "Waiting for enrollment... (${Elapsed}s)" | |
| Start-Sleep -Seconds 1 | |
| } while ($true) | |
| - name: Check processes | |
| shell: powershell | |
| run: | | |
| Start-Sleep -Seconds 30 | |
| Write-Host "Checking if osqueryd is running..." | |
| if (-not (Get-Process -Name "osqueryd" -ErrorAction SilentlyContinue)) { | |
| Write-Host "ERROR: osqueryd is not running" | |
| exit 1 | |
| } | |
| Write-Host "Checking if orbit is running..." | |
| if (-not (Get-Process -Name "orbit" -ErrorAction SilentlyContinue)) { | |
| Write-Host "ERROR: orbit is not running" | |
| exit 1 | |
| } | |
| Write-Host "Checking if fleet-desktop is running..." | |
| if (-not (Get-Process -Name "fleet-desktop" -ErrorAction SilentlyContinue)) { | |
| Write-Host "ERROR: fleet-desktop is not running" | |
| exit 1 | |
| } | |
| Write-Host "All processes are running." | |
| - name: Print orbit install log | |
| if: always() | |
| shell: powershell | |
| run: Get-Content log.txt -ErrorAction SilentlyContinue | |
| - name: Print Orbit logs | |
| if: always() | |
| shell: powershell | |
| run: Get-Content "C:\Windows\system32\config\systemprofile\AppData\Local\FleetDM\Orbit\Logs\orbit-osquery.log" -ErrorAction SilentlyContinue | |
| - name: Checkout | |
| uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 | |
| with: | |
| fetch-depth: 1 | |
| sparse-checkout: | | |
| it-and-security/lib/windows/scripts/uninstall-fleetd-windows.ps1 | |
| - name: Uninstall Orbit | |
| shell: powershell | |
| run: | | |
| .\it-and-security\lib\windows\scripts\uninstall-fleetd-windows.ps1 | |
| summary: | |
| needs: [fleetd-macos, fleetd-ubuntu, fleetd-windows] | |
| runs-on: ubuntu-latest | |
| if: always() | |
| steps: | |
| - name: Compute next retry | |
| id: next-retry | |
| run: echo "value=$(( ${{ inputs.retry || 0 }} + 1 ))" >> $GITHUB_OUTPUT | |
| - name: Slack Notification (all retries exhausted) | |
| if: (contains(needs.*.result, 'failure') || contains(needs.*.result, 'cancelled')) && (inputs.retry || 0) >= 3 | |
| uses: slackapi/slack-github-action@af78098f536edbc4de71162a307590698245be95 # v3.0.1 | |
| with: | |
| webhook: ${{ secrets.SLACK_G_HELP_ENGINEERING_WEBHOOK_URL }} | |
| webhook-type: incoming-webhook | |
| payload: | | |
| blocks: | |
| - type: "header" | |
| text: | |
| type: "plain_text" | |
| text: "🚨 ALL RETRIES EXHAUSTED — MANUAL INVESTIGATION REQUIRED 🚨" | |
| - type: "section" | |
| text: | |
| type: "mrkdwn" | |
| text: "*Agent E2E test FAILED all 4 attempts* :rotating_light:\n${{ github.event.pull_request.html_url || github.event.head_commit.url }}\n<${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}|View workflow run>" | |
| - name: Ping Cronitor on success | |
| if: ${{ (github.event_name == 'schedule' || github.event_name == 'workflow_dispatch') && !contains(needs.*.result, 'failure') && !contains(needs.*.result, 'cancelled') }} | |
| run: curl -sf "${{ secrets.CRONITOR_E2E_AGENT_PING_URL }}" > /dev/null | |
| - name: Retry workflow on failure | |
| # Only retry scheduled runs or manual runs that are retries for scheduled runs (inputs.retry > 0) | |
| if: ${{ (github.event_name == 'schedule' || (github.event_name == 'workflow_dispatch' && (inputs.retry || 0) > 0)) && (contains(needs.*.result, 'failure') || contains(needs.*.result, 'cancelled')) && (inputs.retry || 0) < 3 }} | |
| run: | | |
| gh workflow run e2e-agent.yml --repo ${{ github.repository }} --ref ${{ github.head_ref || github.ref_name }} -f retry=${{ steps.next-retry.outputs.value }} | |
| env: | |
| GH_TOKEN: ${{ secrets.FLEET_RELEASE_GITHUB_PAT }} | |
| - name: Cancel workflow if any job failed | |
| if: ${{ contains(needs.*.result, 'failure') || contains(needs.*.result, 'cancelled') }} | |
| run: gh run cancel ${{ github.run_id }} --repo fleetdm/fleet | |
| env: | |
| GH_TOKEN: ${{ secrets.FLEET_RELEASE_GITHUB_PAT }} |