02 - CVE Enrichment Pipeline #98
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| name: 02 - CVE Enrichment Pipeline | |
| on: | |
| schedule: | |
| # Run at 6:00 AM and 6:00 PM UTC, Monday through Friday | |
| - cron: '15 6,18 * * 1-5' | |
| workflow_dispatch: | |
| inputs: | |
| use_api_key: | |
| description: 'Use API key for CVE enrichment' | |
| type: boolean | |
| default: true | |
| debug_mode: | |
| description: 'Enable verbose debugging' | |
| type: boolean | |
| default: false | |
| env: | |
| PYTHON_VERSION: '3.13' | |
| jobs: | |
| test-cve-pipeline: | |
| name: CVE Pipeline | |
| runs-on: ubuntu-latest | |
| steps: | |
| - name: Checkout repository | |
| uses: actions/checkout@v4 | |
| - name: Set up UV | |
| uses: astral-sh/setup-uv@v3 | |
| with: | |
| enable-cache: true | |
| cache-dependency-glob: "scripts/sofa_pipeline.py" | |
| - name: Install Apple Root Certificates | |
| run: | | |
| echo "🔐 Installing Apple root certificates for SSL validation..." | |
| # Update package database | |
| sudo apt-get update | |
| # Install ca-certificates if not present | |
| sudo apt-get install -y ca-certificates curl | |
| # Download and install Apple Root CA-G3 (current primary root) | |
| echo "📥 Downloading Apple Root CA-G3..." | |
| sudo curl -f -o /usr/local/share/ca-certificates/apple-root-ca-g3.crt \ | |
| "https://www.apple.com/certificateauthority/AppleRootCA-G3.cer" | |
| # Download and install Apple Root CA (legacy support) | |
| echo "📥 Downloading Apple Root CA (legacy)..." | |
| sudo curl -f -o /usr/local/share/ca-certificates/apple-root-ca.crt \ | |
| "https://www.apple.com/certificateauthority/AppleComputerRootCertificate.cer" | |
| # Update system certificate store | |
| echo "🔄 Updating system certificate store..." | |
| sudo update-ca-certificates | |
| # Verify installation | |
| echo "✅ Apple certificates installed:" | |
| ls -la /usr/local/share/ca-certificates/apple-* | |
| # Test SSL connection to Apple Developer | |
| echo "🧪 Testing SSL connection to developer.apple.com..." | |
| if curl -I --max-time 10 "https://developer.apple.com/news/releases/" >/dev/null 2>&1; then | |
| echo "✅ SSL connection to Apple Developer successful" | |
| else | |
| echo "⚠️ SSL connection test failed - but continuing anyway" | |
| fi | |
| - name: Download SOFA CLI binaries | |
| run: | | |
| echo "📥 Downloading SOFA CLI binaries..." | |
| mkdir -p bin | |
| DOWNLOAD_URL="https://github.com/headmin/sofa-core-cli/releases/download/v0.1.5" | |
| LINUX_ZIP="sofa-core-cli-x86_64-linux-binaries.zip" | |
| curl -L -f -o "$LINUX_ZIP" "$DOWNLOAD_URL/$LINUX_ZIP" | |
| unzip -o -j "$LINUX_ZIP" -d bin/ | |
| chmod +x bin/* | |
| rm "$LINUX_ZIP" | |
| echo "Downloaded binaries:" | |
| ls -la bin/ | |
| - name: Prepare data sources | |
| run: | | |
| echo "📋 Preparing data sources for CVE pipeline..." | |
| # Create necessary directories | |
| mkdir -p data/resources | |
| mkdir -p data/feeds | |
| mkdir -p data/cache/html | |
| mkdir -p logs | |
| # We need Apple security releases data for CVE extraction | |
| # Run gather and fetch stages to get the required data | |
| export PATH="./bin:$PATH" | |
| # Clear stale beta history to force fresh gathering | |
| echo "🧹 Clearing stale beta history to force fresh data collection..." | |
| rm -f data/resources/apple_beta_os_history.json || true | |
| rm -f data/resources/apple_beta_feed.json || true | |
| echo "✅ Cleared beta cache files" | |
| echo "" | |
| echo "Running gather stage to get Apple security data..." | |
| echo "🔧 Using --insecure flag and fixing path resolution..." | |
| # Fix path resolution by modifying existing gather.toml with absolute paths | |
| REPO_ROOT=$(pwd) | |
| mkdir -p config | |
| # Copy and modify the existing gather.toml to use absolute paths | |
| if [ -f "config/gather.toml" ]; then | |
| cp config/gather.toml config/gather.toml.backup | |
| sed 's|directory = "../data/resources"|directory = "/home/runner/work/sofa-2.0-beta/sofa-2.0-beta/data/resources"|g' config/gather.toml.backup | \ | |
| sed 's|directory = "../data/cache"|directory = "/home/runner/work/sofa-2.0-beta/sofa-2.0-beta/data/cache"|g' > config/gather.toml | |
| echo "🔧 Modified gather.toml with absolute paths" | |
| else | |
| echo "❌ No gather.toml found - binaries may use defaults" | |
| fi | |
| # Try with proper SSL first, fallback to insecure if needed | |
| if RUST_LOG=info ./bin/sofa-gather all --continue-on-error; then | |
| echo "✅ Direct gather with SSL completed" | |
| elif uv run --script scripts/sofa_pipeline.py run gather; then | |
| echo "✅ Gather completed" | |
| else | |
| echo "❌ Gather failed - continuing anyway" | |
| fi | |
| echo "" | |
| echo "🔍 Verifying gathered data files..." | |
| echo "✅ Data gathering completed" | |
| echo "Running fetch stage to get Apple security releases..." | |
| if uv run --script scripts/sofa_pipeline.py run fetch; then | |
| echo "✅ Fetch completed - ready for CVE processing" | |
| else | |
| echo "❌ Fetch failed - will try CVE pipeline anyway" | |
| fi | |
| - name: Run CVE pipeline (Direct Binary) | |
| env: | |
| VULNCHECK_API_KEY: ${{ secrets.VULNCHECK_API_KEY }} | |
| run: | | |
| echo "🔍 Running CVE pipeline with direct binary calls..." | |
| export PATH="./bin:$PATH" | |
| # For scheduled runs, always use API key if available | |
| # For manual runs, respect the use_api_key input | |
| USE_API_KEY="true" | |
| if [ "${{ github.event_name }}" == "workflow_dispatch" ] && [ "${{ github.event.inputs.use_api_key }}" == "false" ]; then | |
| USE_API_KEY="false" | |
| fi | |
| if [ "$USE_API_KEY" == "true" ]; then | |
| if [ -z "$VULNCHECK_API_KEY" ]; then | |
| echo "❌ VULNCHECK_API_KEY secret not set" | |
| echo "## ❌ CVE Pipeline Failed" >> $GITHUB_STEP_SUMMARY | |
| echo "VULNCHECK_API_KEY secret not configured" >> $GITHUB_STEP_SUMMARY | |
| exit 1 | |
| fi | |
| echo "Running with API key enrichment" | |
| else | |
| echo "ℹ️ Running without API key (extract and index only)" | |
| fi | |
| echo "Step 1: CVE extraction" | |
| echo "Checking for Apple security releases data..." | |
| if [ -f "data/resources/apple_security_releases.json" ]; then | |
| echo "✅ Apple security releases data found: $(wc -c < data/resources/apple_security_releases.json) bytes" | |
| else | |
| echo "⚠️ No Apple security releases data found - CVE extraction may fail" | |
| fi | |
| if ./bin/sofa-cve extract; then | |
| echo "✅ CVE extraction completed" | |
| else | |
| echo "❌ CVE extraction failed" | |
| echo "## ❌ CVE Pipeline Failed" >> $GITHUB_STEP_SUMMARY | |
| exit 1 | |
| fi | |
| if [ "$USE_API_KEY" == "true" ]; then | |
| echo "Step 2: CVE enrichment (with API key - light mode)" | |
| if ./bin/sofa-cve enrich --light --concurrency 10; then | |
| echo "✅ CVE enrichment completed (light mode)" | |
| else | |
| echo "❌ CVE enrichment failed" | |
| echo "## ❌ CVE Pipeline Failed" >> $GITHUB_STEP_SUMMARY | |
| exit 1 | |
| fi | |
| else | |
| echo "Step 2: Skipping CVE enrichment (no API key)" | |
| fi | |
| echo "Step 3: CVE indexing" | |
| if ./bin/sofa-cve index; then | |
| echo "✅ CVE indexing completed" | |
| if [ "$USE_API_KEY" == "true" ]; then | |
| echo "## ✅ CVE Pipeline Success (With Light Mode Enrichment)" >> $GITHUB_STEP_SUMMARY | |
| echo "CVE pipeline completed successfully with API key enrichment (light mode - CVSS scores)" >> $GITHUB_STEP_SUMMARY | |
| else | |
| echo "## ✅ CVE Pipeline Success (Extract Only)" >> $GITHUB_STEP_SUMMARY | |
| echo "CVE pipeline completed successfully (extract and index only)" >> $GITHUB_STEP_SUMMARY | |
| fi | |
| else | |
| echo "❌ CVE indexing failed" | |
| echo "## ❌ CVE Pipeline Failed" >> $GITHUB_STEP_SUMMARY | |
| exit 1 | |
| fi | |
| - name: Show CVE results | |
| if: always() | |
| run: | | |
| echo "📊 CVE Pipeline Results..." | |
| echo "" | |
| echo "Generated CVE files:" | |
| find data/ -name "*cve*" -type f 2>/dev/null | head -10 || echo "No CVE files found" | |
| echo "" | |
| echo "Key output files:" | |
| if [ -f "data/resources/apple_cves_with_context.ndjson" ]; then | |
| CVE_COUNT=$(wc -l < data/resources/apple_cves_with_context.ndjson) | |
| echo "✅ CVE extraction: $CVE_COUNT CVEs extracted" | |
| if [ "$CVE_COUNT" -gt 0 ]; then | |
| echo "✅ CVE data appears valid (non-empty)" | |
| else | |
| echo "⚠️ CVE extraction file is empty" | |
| fi | |
| else | |
| echo "❌ No CVE extraction file found" | |
| fi | |
| if [ -f "data/resources/cve_enriched.ndjson" ]; then | |
| ENRICHED_COUNT=$(wc -l < data/resources/cve_enriched.ndjson) | |
| FILE_SIZE=$(stat -c%s data/resources/cve_enriched.ndjson 2>/dev/null || stat -f%z data/resources/cve_enriched.ndjson 2>/dev/null) | |
| echo "✅ CVE enrichment: $ENRICHED_COUNT CVEs enriched ($FILE_SIZE bytes)" | |
| if [ "$ENRICHED_COUNT" -gt 0 ] && [ "$FILE_SIZE" -gt 1000000 ]; then | |
| echo "✅ Enriched CVE data looks substantial (>1MB)" | |
| else | |
| echo "⚠️ Enriched CVE data seems small - may indicate issues" | |
| fi | |
| else | |
| echo "ℹ️ No CVE enrichment file (expected when API key not used)" | |
| fi | |
| echo "" | |
| echo "Sample CVE data:" | |
| if [ -f "data/resources/apple_cves_with_context.ndjson" ]; then | |
| echo "File size: $(wc -c < data/resources/apple_cves_with_context.ndjson) bytes" | |
| echo "First few lines:" | |
| head -3 data/resources/apple_cves_with_context.ndjson || echo "Could not read file" | |
| echo "" | |
| echo "Trying to parse first actual CVE (skipping metadata):" | |
| sed -n '2p' data/resources/apple_cves_with_context.ndjson | jq -r '.cve_id // .id // .cve // "No CVE ID found"' 2>/dev/null || echo "Could not parse JSON" | |
| echo "" | |
| echo "Available JSON keys in first CVE record:" | |
| sed -n '2p' data/resources/apple_cves_with_context.ndjson | jq -r 'keys[]' 2>/dev/null | head -5 || echo "Could not parse keys" | |
| echo "" | |
| echo "Testing for specific CVE (CVE-2025-43300):" | |
| if grep -q "CVE-2025-43300" data/resources/apple_cves_with_context.ndjson 2>/dev/null; then | |
| echo "✅ Found CVE-2025-43300 in extracted data" | |
| grep "CVE-2025-43300" data/resources/apple_cves_with_context.ndjson | jq -r '.cve_id // .id // .cve' 2>/dev/null || echo "Found but could not parse" | |
| else | |
| echo "ℹ️ CVE-2025-43300 not found (may not be in current Apple security releases)" | |
| fi | |
| else | |
| echo "❌ No CVE extraction file found" | |
| fi | |
| echo "" | |
| echo "Enriched CVE data:" | |
| if [ -f "data/resources/cve_enriched.ndjson" ]; then | |
| echo "File size: $(wc -c < data/resources/cve_enriched.ndjson) bytes" | |
| echo "Sample enriched CVE (skipping metadata if present):" | |
| sed -n '2p' data/resources/cve_enriched.ndjson | jq -r '.cve_id // .id // .cve // "No CVE ID found"' 2>/dev/null || echo "Could not parse enriched JSON" | |
| echo "" | |
| echo "Testing for CVE-2025-43300 in enriched data:" | |
| if grep -q "CVE-2025-43300" data/resources/cve_enriched.ndjson 2>/dev/null; then | |
| echo "✅ Found CVE-2025-43300 in enriched data" | |
| else | |
| echo "ℹ️ CVE-2025-43300 not found in enriched data" | |
| fi | |
| else | |
| echo "ℹ️ No enriched CVE file (expected when API key not used)" | |
| fi | |
| - name: Upload CVE artifacts | |
| if: always() | |
| uses: actions/upload-artifact@v4 | |
| with: | |
| name: cve-data-${{ github.run_number }} | |
| path: | | |
| data/resources/apple_cves_with_context.ndjson | |
| data/resources/cve_enriched.ndjson | |
| data/resources/apple_security_releases.json | |
| data/resources/kev_catalog.json | |
| logs/ | |
| retention-days: 30 | |
| if-no-files-found: warn | |
| - name: Commit and push CVE data | |
| if: success() | |
| run: | | |
| echo "📝 Committing CVE data to repository..." | |
| echo "Event type: ${{ github.event_name }}" | |
| echo "Run number: ${{ github.run_number }}" | |
| # Configure git | |
| git config --local user.email "[email protected]" | |
| git config --local user.name "GitHub Action" | |
| # Fetch latest changes to minimize conflicts | |
| echo "📥 Fetching latest changes from remote..." | |
| git fetch origin main | |
| # Sort JSON files to ensure consistent ordering | |
| echo "Sorting JSON files for consistent commits..." | |
| # Check if jq is available | |
| if ! command -v jq &> /dev/null; then | |
| echo "⚠️ jq not found - installing..." | |
| sudo apt-get update && sudo apt-get install -y jq | |
| fi | |
| if [ -f "data/resources/apple_security_releases.json" ]; then | |
| echo "Sorting apple_security_releases.json..." | |
| if jq --sort-keys '.' data/resources/apple_security_releases.json > data/resources/apple_security_releases.json.tmp; then | |
| mv data/resources/apple_security_releases.json.tmp data/resources/apple_security_releases.json | |
| echo "✅ Sorted apple_security_releases.json" | |
| else | |
| echo "⚠️ Failed to sort apple_security_releases.json - keeping original" | |
| rm -f data/resources/apple_security_releases.json.tmp | |
| fi | |
| fi | |
| if [ -f "data/resources/kev_catalog.json" ]; then | |
| echo "Sorting kev_catalog.json..." | |
| if jq --sort-keys '.' data/resources/kev_catalog.json > data/resources/kev_catalog.json.tmp; then | |
| mv data/resources/kev_catalog.json.tmp data/resources/kev_catalog.json | |
| echo "✅ Sorted kev_catalog.json" | |
| else | |
| echo "⚠️ Failed to sort kev_catalog.json - keeping original" | |
| rm -f data/resources/kev_catalog.json.tmp | |
| fi | |
| fi | |
| # Comprehensive verification before commit | |
| echo "🔍 Pre-commit verification of gathered data..." | |
| echo "" | |
| echo "✅ Data verification completed" | |
| echo "" | |
| # Show what files exist | |
| echo "📁 Files to potentially commit:" | |
| ls -la data/resources/*.ndjson data/resources/*.json data/feeds/*.ndjson 2>/dev/null || echo "No generated files found" | |
| echo "" | |
| # Show specific file details with jq validation | |
| echo "📊 File details for commit:" | |
| for file in data/resources/apple_security_releases.json data/resources/kev_catalog.json data/resources/apple_beta_feed.json; do | |
| if [ -f "$file" ]; then | |
| size=$(wc -c < "$file") | |
| case "$(basename "$file")" in | |
| "apple_security_releases.json") | |
| releases=$(jq '.releases | length' "$file" 2>/dev/null || echo "parse_error") | |
| echo " • $file: $size bytes ($releases releases)" | |
| ;; | |
| "kev_catalog.json") | |
| vulns=$(jq '.vulnerabilities | length' "$file" 2>/dev/null || echo "parse_error") | |
| date=$(jq -r '.dateReleased // "unknown"' "$file" 2>/dev/null) | |
| echo " • $file: $size bytes ($vulns vulnerabilities, date: $date)" | |
| ;; | |
| "apple_beta_feed.json") | |
| betas=$(jq '.items | length' "$file" 2>/dev/null || echo "parse_error") | |
| created=$(jq -r '.created_at // "unknown"' "$file" 2>/dev/null) | |
| hash=$(jq -r '.UpdateHash // "unknown"' "$file" 2>/dev/null) | |
| echo " • $file: $size bytes ($betas beta releases)" | |
| echo " Created: $created" | |
| echo " Hash: ${hash:0:16}..." | |
| ;; | |
| esac | |
| else | |
| echo " • $file: ❌ NOT FOUND" | |
| fi | |
| done | |
| echo "" | |
| # Add generated files (now including beta if present) | |
| git add data/resources/apple_cves_with_context.ndjson || echo "No CVE extraction file to add" | |
| git add data/resources/cve_enriched.ndjson || echo "No CVE enrichment file to add" | |
| git add data/resources/apple_security_releases.json || echo "No security releases file to add" | |
| git add data/resources/kev_catalog.json || echo "No KEV catalog file to add" | |
| git add data/resources/apple_beta_feed.json || echo "No beta feed file to add" | |
| git add data/resources/gdmf_cached.json || echo "No GDMF cache file to add" | |
| git add data/resources/ipsw.json || echo "No IPSW file to add" | |
| git add data/resources/uma_catalog.json || echo "No UMA catalog file to add" | |
| echo "Git status after adding files:" | |
| git status --porcelain | |
| # Check if there are meaningful changes to commit | |
| echo "Checking for meaningful changes..." | |
| git diff --staged --stat | |
| if git diff --staged --quiet; then | |
| echo "ℹ️ No changes to commit (files are identical after sorting)" | |
| else | |
| echo "📝 Found changes to commit:" | |
| git diff --staged --name-only | |
| # Create commit with timestamp | |
| TIMESTAMP=$(date -u +"%Y-%m-%d %H:%M UTC") | |
| if [ "${{ github.event_name }}" == "schedule" ]; then | |
| COMMIT_TYPE="🔄 Scheduled CVE data update" | |
| else | |
| COMMIT_TYPE="🔄 Manual CVE data update" | |
| fi | |
| git commit -m "$COMMIT_TYPE - $TIMESTAMP | |
| - Updated Apple CVE database ($(wc -l < data/resources/apple_cves_with_context.ndjson 2>/dev/null || echo 0) CVEs) | |
| - Refreshed security releases data | |
| - Updated KEV catalog | |
| Generated by ${{ github.event_name }} workflow run #${{ github.run_number }}" | |
| # Handle potential conflicts by pulling and retrying | |
| echo "🔄 Pushing changes with conflict resolution..." | |
| if git push; then | |
| echo "✅ CVE data committed and pushed to repository" | |
| else | |
| echo "⚠️ Push failed - likely due to concurrent changes. Attempting to resolve..." | |
| # Pull latest changes with rebase to resolve conflicts | |
| echo "📥 Pulling latest changes and rebasing..." | |
| if git pull --rebase origin main; then | |
| echo "✅ Successfully rebased on latest changes" | |
| # Try pushing again | |
| if git push; then | |
| echo "✅ CVE data successfully pushed after rebase" | |
| else | |
| echo "❌ Push failed even after rebase - this may require manual intervention" | |
| echo "## ⚠️ CVE Pipeline Partial Success" >> $GITHUB_STEP_SUMMARY | |
| echo "CVE processing completed but push failed due to conflicts" >> $GITHUB_STEP_SUMMARY | |
| exit 1 | |
| fi | |
| else | |
| echo "❌ Rebase failed - there may be merge conflicts" | |
| echo "Showing git status:" | |
| git status | |
| echo "## ⚠️ CVE Pipeline Partial Success" >> $GITHUB_STEP_SUMMARY | |
| echo "CVE processing completed but push failed due to merge conflicts" >> $GITHUB_STEP_SUMMARY | |
| exit 1 | |
| fi | |
| fi | |
| fi | |
| - name: Debug CVE binary | |
| if: | | |
| github.event_name == 'schedule' || | |
| (github.event_name == 'workflow_dispatch' && github.event.inputs.debug_mode == 'true') | |
| run: | | |
| echo "🧪 Testing sofa-cve binary directly..." | |
| export PATH="./bin:$PATH" | |
| echo "sofa-cve help:" | |
| ./bin/sofa-cve --help || echo "Help command failed" | |
| echo "" | |
| echo "sofa-cve version:" | |
| ./bin/sofa-cve --version || echo "Version command failed" | |
| echo "" | |
| if [ -f "data/resources/apple_security_releases.json" ]; then | |
| echo "Testing direct CVE extract:" | |
| ./bin/sofa-cve extract || echo "Direct extract failed" | |
| else | |
| echo "No Apple security releases data found for direct test" | |
| fi |