Thanks to visit codestin.com
Credit goes to Github.com

Skip to content

02 - CVE Enrichment Pipeline #98

02 - CVE Enrichment Pipeline

02 - CVE Enrichment Pipeline #98

name: 02 - CVE Enrichment Pipeline
on:
schedule:
# Run at 6:00 AM and 6:00 PM UTC, Monday through Friday
- cron: '15 6,18 * * 1-5'
workflow_dispatch:
inputs:
use_api_key:
description: 'Use API key for CVE enrichment'
type: boolean
default: true
debug_mode:
description: 'Enable verbose debugging'
type: boolean
default: false
env:
PYTHON_VERSION: '3.13'
jobs:
test-cve-pipeline:
name: CVE Pipeline
runs-on: ubuntu-latest
steps:
- name: Checkout repository
uses: actions/checkout@v4
- name: Set up UV
uses: astral-sh/setup-uv@v3
with:
enable-cache: true
cache-dependency-glob: "scripts/sofa_pipeline.py"
- name: Install Apple Root Certificates
run: |
echo "🔐 Installing Apple root certificates for SSL validation..."
# Update package database
sudo apt-get update
# Install ca-certificates if not present
sudo apt-get install -y ca-certificates curl
# Download and install Apple Root CA-G3 (current primary root)
echo "📥 Downloading Apple Root CA-G3..."
sudo curl -f -o /usr/local/share/ca-certificates/apple-root-ca-g3.crt \
"https://www.apple.com/certificateauthority/AppleRootCA-G3.cer"
# Download and install Apple Root CA (legacy support)
echo "📥 Downloading Apple Root CA (legacy)..."
sudo curl -f -o /usr/local/share/ca-certificates/apple-root-ca.crt \
"https://www.apple.com/certificateauthority/AppleComputerRootCertificate.cer"
# Update system certificate store
echo "🔄 Updating system certificate store..."
sudo update-ca-certificates
# Verify installation
echo "✅ Apple certificates installed:"
ls -la /usr/local/share/ca-certificates/apple-*
# Test SSL connection to Apple Developer
echo "🧪 Testing SSL connection to developer.apple.com..."
if curl -I --max-time 10 "https://developer.apple.com/news/releases/" >/dev/null 2>&1; then
echo "✅ SSL connection to Apple Developer successful"
else
echo "⚠️ SSL connection test failed - but continuing anyway"
fi
- name: Download SOFA CLI binaries
run: |
echo "📥 Downloading SOFA CLI binaries..."
mkdir -p bin
DOWNLOAD_URL="https://github.com/headmin/sofa-core-cli/releases/download/v0.1.5"
LINUX_ZIP="sofa-core-cli-x86_64-linux-binaries.zip"
curl -L -f -o "$LINUX_ZIP" "$DOWNLOAD_URL/$LINUX_ZIP"
unzip -o -j "$LINUX_ZIP" -d bin/
chmod +x bin/*
rm "$LINUX_ZIP"
echo "Downloaded binaries:"
ls -la bin/
- name: Prepare data sources
run: |
echo "📋 Preparing data sources for CVE pipeline..."
# Create necessary directories
mkdir -p data/resources
mkdir -p data/feeds
mkdir -p data/cache/html
mkdir -p logs
# We need Apple security releases data for CVE extraction
# Run gather and fetch stages to get the required data
export PATH="./bin:$PATH"
# Clear stale beta history to force fresh gathering
echo "🧹 Clearing stale beta history to force fresh data collection..."
rm -f data/resources/apple_beta_os_history.json || true
rm -f data/resources/apple_beta_feed.json || true
echo "✅ Cleared beta cache files"
echo ""
echo "Running gather stage to get Apple security data..."
echo "🔧 Using --insecure flag and fixing path resolution..."
# Fix path resolution by modifying existing gather.toml with absolute paths
REPO_ROOT=$(pwd)
mkdir -p config
# Copy and modify the existing gather.toml to use absolute paths
if [ -f "config/gather.toml" ]; then
cp config/gather.toml config/gather.toml.backup
sed 's|directory = "../data/resources"|directory = "/home/runner/work/sofa-2.0-beta/sofa-2.0-beta/data/resources"|g' config/gather.toml.backup | \
sed 's|directory = "../data/cache"|directory = "/home/runner/work/sofa-2.0-beta/sofa-2.0-beta/data/cache"|g' > config/gather.toml
echo "🔧 Modified gather.toml with absolute paths"
else
echo "❌ No gather.toml found - binaries may use defaults"
fi
# Try with proper SSL first, fallback to insecure if needed
if RUST_LOG=info ./bin/sofa-gather all --continue-on-error; then
echo "✅ Direct gather with SSL completed"
elif uv run --script scripts/sofa_pipeline.py run gather; then
echo "✅ Gather completed"
else
echo "❌ Gather failed - continuing anyway"
fi
echo ""
echo "🔍 Verifying gathered data files..."
echo "✅ Data gathering completed"
echo "Running fetch stage to get Apple security releases..."
if uv run --script scripts/sofa_pipeline.py run fetch; then
echo "✅ Fetch completed - ready for CVE processing"
else
echo "❌ Fetch failed - will try CVE pipeline anyway"
fi
- name: Run CVE pipeline (Direct Binary)
env:
VULNCHECK_API_KEY: ${{ secrets.VULNCHECK_API_KEY }}
run: |
echo "🔍 Running CVE pipeline with direct binary calls..."
export PATH="./bin:$PATH"
# For scheduled runs, always use API key if available
# For manual runs, respect the use_api_key input
USE_API_KEY="true"
if [ "${{ github.event_name }}" == "workflow_dispatch" ] && [ "${{ github.event.inputs.use_api_key }}" == "false" ]; then
USE_API_KEY="false"
fi
if [ "$USE_API_KEY" == "true" ]; then
if [ -z "$VULNCHECK_API_KEY" ]; then
echo "❌ VULNCHECK_API_KEY secret not set"
echo "## ❌ CVE Pipeline Failed" >> $GITHUB_STEP_SUMMARY
echo "VULNCHECK_API_KEY secret not configured" >> $GITHUB_STEP_SUMMARY
exit 1
fi
echo "Running with API key enrichment"
else
echo "ℹ️ Running without API key (extract and index only)"
fi
echo "Step 1: CVE extraction"
echo "Checking for Apple security releases data..."
if [ -f "data/resources/apple_security_releases.json" ]; then
echo "✅ Apple security releases data found: $(wc -c < data/resources/apple_security_releases.json) bytes"
else
echo "⚠️ No Apple security releases data found - CVE extraction may fail"
fi
if ./bin/sofa-cve extract; then
echo "✅ CVE extraction completed"
else
echo "❌ CVE extraction failed"
echo "## ❌ CVE Pipeline Failed" >> $GITHUB_STEP_SUMMARY
exit 1
fi
if [ "$USE_API_KEY" == "true" ]; then
echo "Step 2: CVE enrichment (with API key - light mode)"
if ./bin/sofa-cve enrich --light --concurrency 10; then
echo "✅ CVE enrichment completed (light mode)"
else
echo "❌ CVE enrichment failed"
echo "## ❌ CVE Pipeline Failed" >> $GITHUB_STEP_SUMMARY
exit 1
fi
else
echo "Step 2: Skipping CVE enrichment (no API key)"
fi
echo "Step 3: CVE indexing"
if ./bin/sofa-cve index; then
echo "✅ CVE indexing completed"
if [ "$USE_API_KEY" == "true" ]; then
echo "## ✅ CVE Pipeline Success (With Light Mode Enrichment)" >> $GITHUB_STEP_SUMMARY
echo "CVE pipeline completed successfully with API key enrichment (light mode - CVSS scores)" >> $GITHUB_STEP_SUMMARY
else
echo "## ✅ CVE Pipeline Success (Extract Only)" >> $GITHUB_STEP_SUMMARY
echo "CVE pipeline completed successfully (extract and index only)" >> $GITHUB_STEP_SUMMARY
fi
else
echo "❌ CVE indexing failed"
echo "## ❌ CVE Pipeline Failed" >> $GITHUB_STEP_SUMMARY
exit 1
fi
- name: Show CVE results
if: always()
run: |
echo "📊 CVE Pipeline Results..."
echo ""
echo "Generated CVE files:"
find data/ -name "*cve*" -type f 2>/dev/null | head -10 || echo "No CVE files found"
echo ""
echo "Key output files:"
if [ -f "data/resources/apple_cves_with_context.ndjson" ]; then
CVE_COUNT=$(wc -l < data/resources/apple_cves_with_context.ndjson)
echo "✅ CVE extraction: $CVE_COUNT CVEs extracted"
if [ "$CVE_COUNT" -gt 0 ]; then
echo "✅ CVE data appears valid (non-empty)"
else
echo "⚠️ CVE extraction file is empty"
fi
else
echo "❌ No CVE extraction file found"
fi
if [ -f "data/resources/cve_enriched.ndjson" ]; then
ENRICHED_COUNT=$(wc -l < data/resources/cve_enriched.ndjson)
FILE_SIZE=$(stat -c%s data/resources/cve_enriched.ndjson 2>/dev/null || stat -f%z data/resources/cve_enriched.ndjson 2>/dev/null)
echo "✅ CVE enrichment: $ENRICHED_COUNT CVEs enriched ($FILE_SIZE bytes)"
if [ "$ENRICHED_COUNT" -gt 0 ] && [ "$FILE_SIZE" -gt 1000000 ]; then
echo "✅ Enriched CVE data looks substantial (>1MB)"
else
echo "⚠️ Enriched CVE data seems small - may indicate issues"
fi
else
echo "ℹ️ No CVE enrichment file (expected when API key not used)"
fi
echo ""
echo "Sample CVE data:"
if [ -f "data/resources/apple_cves_with_context.ndjson" ]; then
echo "File size: $(wc -c < data/resources/apple_cves_with_context.ndjson) bytes"
echo "First few lines:"
head -3 data/resources/apple_cves_with_context.ndjson || echo "Could not read file"
echo ""
echo "Trying to parse first actual CVE (skipping metadata):"
sed -n '2p' data/resources/apple_cves_with_context.ndjson | jq -r '.cve_id // .id // .cve // "No CVE ID found"' 2>/dev/null || echo "Could not parse JSON"
echo ""
echo "Available JSON keys in first CVE record:"
sed -n '2p' data/resources/apple_cves_with_context.ndjson | jq -r 'keys[]' 2>/dev/null | head -5 || echo "Could not parse keys"
echo ""
echo "Testing for specific CVE (CVE-2025-43300):"
if grep -q "CVE-2025-43300" data/resources/apple_cves_with_context.ndjson 2>/dev/null; then
echo "✅ Found CVE-2025-43300 in extracted data"
grep "CVE-2025-43300" data/resources/apple_cves_with_context.ndjson | jq -r '.cve_id // .id // .cve' 2>/dev/null || echo "Found but could not parse"
else
echo "ℹ️ CVE-2025-43300 not found (may not be in current Apple security releases)"
fi
else
echo "❌ No CVE extraction file found"
fi
echo ""
echo "Enriched CVE data:"
if [ -f "data/resources/cve_enriched.ndjson" ]; then
echo "File size: $(wc -c < data/resources/cve_enriched.ndjson) bytes"
echo "Sample enriched CVE (skipping metadata if present):"
sed -n '2p' data/resources/cve_enriched.ndjson | jq -r '.cve_id // .id // .cve // "No CVE ID found"' 2>/dev/null || echo "Could not parse enriched JSON"
echo ""
echo "Testing for CVE-2025-43300 in enriched data:"
if grep -q "CVE-2025-43300" data/resources/cve_enriched.ndjson 2>/dev/null; then
echo "✅ Found CVE-2025-43300 in enriched data"
else
echo "ℹ️ CVE-2025-43300 not found in enriched data"
fi
else
echo "ℹ️ No enriched CVE file (expected when API key not used)"
fi
- name: Upload CVE artifacts
if: always()
uses: actions/upload-artifact@v4
with:
name: cve-data-${{ github.run_number }}
path: |
data/resources/apple_cves_with_context.ndjson
data/resources/cve_enriched.ndjson
data/resources/apple_security_releases.json
data/resources/kev_catalog.json
logs/
retention-days: 30
if-no-files-found: warn
- name: Commit and push CVE data
if: success()
run: |
echo "📝 Committing CVE data to repository..."
echo "Event type: ${{ github.event_name }}"
echo "Run number: ${{ github.run_number }}"
# Configure git
git config --local user.email "[email protected]"
git config --local user.name "GitHub Action"
# Fetch latest changes to minimize conflicts
echo "📥 Fetching latest changes from remote..."
git fetch origin main
# Sort JSON files to ensure consistent ordering
echo "Sorting JSON files for consistent commits..."
# Check if jq is available
if ! command -v jq &> /dev/null; then
echo "⚠️ jq not found - installing..."
sudo apt-get update && sudo apt-get install -y jq
fi
if [ -f "data/resources/apple_security_releases.json" ]; then
echo "Sorting apple_security_releases.json..."
if jq --sort-keys '.' data/resources/apple_security_releases.json > data/resources/apple_security_releases.json.tmp; then
mv data/resources/apple_security_releases.json.tmp data/resources/apple_security_releases.json
echo "✅ Sorted apple_security_releases.json"
else
echo "⚠️ Failed to sort apple_security_releases.json - keeping original"
rm -f data/resources/apple_security_releases.json.tmp
fi
fi
if [ -f "data/resources/kev_catalog.json" ]; then
echo "Sorting kev_catalog.json..."
if jq --sort-keys '.' data/resources/kev_catalog.json > data/resources/kev_catalog.json.tmp; then
mv data/resources/kev_catalog.json.tmp data/resources/kev_catalog.json
echo "✅ Sorted kev_catalog.json"
else
echo "⚠️ Failed to sort kev_catalog.json - keeping original"
rm -f data/resources/kev_catalog.json.tmp
fi
fi
# Comprehensive verification before commit
echo "🔍 Pre-commit verification of gathered data..."
echo ""
echo "✅ Data verification completed"
echo ""
# Show what files exist
echo "📁 Files to potentially commit:"
ls -la data/resources/*.ndjson data/resources/*.json data/feeds/*.ndjson 2>/dev/null || echo "No generated files found"
echo ""
# Show specific file details with jq validation
echo "📊 File details for commit:"
for file in data/resources/apple_security_releases.json data/resources/kev_catalog.json data/resources/apple_beta_feed.json; do
if [ -f "$file" ]; then
size=$(wc -c < "$file")
case "$(basename "$file")" in
"apple_security_releases.json")
releases=$(jq '.releases | length' "$file" 2>/dev/null || echo "parse_error")
echo " • $file: $size bytes ($releases releases)"
;;
"kev_catalog.json")
vulns=$(jq '.vulnerabilities | length' "$file" 2>/dev/null || echo "parse_error")
date=$(jq -r '.dateReleased // "unknown"' "$file" 2>/dev/null)
echo " • $file: $size bytes ($vulns vulnerabilities, date: $date)"
;;
"apple_beta_feed.json")
betas=$(jq '.items | length' "$file" 2>/dev/null || echo "parse_error")
created=$(jq -r '.created_at // "unknown"' "$file" 2>/dev/null)
hash=$(jq -r '.UpdateHash // "unknown"' "$file" 2>/dev/null)
echo " • $file: $size bytes ($betas beta releases)"
echo " Created: $created"
echo " Hash: ${hash:0:16}..."
;;
esac
else
echo " • $file: ❌ NOT FOUND"
fi
done
echo ""
# Add generated files (now including beta if present)
git add data/resources/apple_cves_with_context.ndjson || echo "No CVE extraction file to add"
git add data/resources/cve_enriched.ndjson || echo "No CVE enrichment file to add"
git add data/resources/apple_security_releases.json || echo "No security releases file to add"
git add data/resources/kev_catalog.json || echo "No KEV catalog file to add"
git add data/resources/apple_beta_feed.json || echo "No beta feed file to add"
git add data/resources/gdmf_cached.json || echo "No GDMF cache file to add"
git add data/resources/ipsw.json || echo "No IPSW file to add"
git add data/resources/uma_catalog.json || echo "No UMA catalog file to add"
echo "Git status after adding files:"
git status --porcelain
# Check if there are meaningful changes to commit
echo "Checking for meaningful changes..."
git diff --staged --stat
if git diff --staged --quiet; then
echo "ℹ️ No changes to commit (files are identical after sorting)"
else
echo "📝 Found changes to commit:"
git diff --staged --name-only
# Create commit with timestamp
TIMESTAMP=$(date -u +"%Y-%m-%d %H:%M UTC")
if [ "${{ github.event_name }}" == "schedule" ]; then
COMMIT_TYPE="🔄 Scheduled CVE data update"
else
COMMIT_TYPE="🔄 Manual CVE data update"
fi
git commit -m "$COMMIT_TYPE - $TIMESTAMP
- Updated Apple CVE database ($(wc -l < data/resources/apple_cves_with_context.ndjson 2>/dev/null || echo 0) CVEs)
- Refreshed security releases data
- Updated KEV catalog
Generated by ${{ github.event_name }} workflow run #${{ github.run_number }}"
# Handle potential conflicts by pulling and retrying
echo "🔄 Pushing changes with conflict resolution..."
if git push; then
echo "✅ CVE data committed and pushed to repository"
else
echo "⚠️ Push failed - likely due to concurrent changes. Attempting to resolve..."
# Pull latest changes with rebase to resolve conflicts
echo "📥 Pulling latest changes and rebasing..."
if git pull --rebase origin main; then
echo "✅ Successfully rebased on latest changes"
# Try pushing again
if git push; then
echo "✅ CVE data successfully pushed after rebase"
else
echo "❌ Push failed even after rebase - this may require manual intervention"
echo "## ⚠️ CVE Pipeline Partial Success" >> $GITHUB_STEP_SUMMARY
echo "CVE processing completed but push failed due to conflicts" >> $GITHUB_STEP_SUMMARY
exit 1
fi
else
echo "❌ Rebase failed - there may be merge conflicts"
echo "Showing git status:"
git status
echo "## ⚠️ CVE Pipeline Partial Success" >> $GITHUB_STEP_SUMMARY
echo "CVE processing completed but push failed due to merge conflicts" >> $GITHUB_STEP_SUMMARY
exit 1
fi
fi
fi
- name: Debug CVE binary
if: |
github.event_name == 'schedule' ||
(github.event_name == 'workflow_dispatch' && github.event.inputs.debug_mode == 'true')
run: |
echo "🧪 Testing sofa-cve binary directly..."
export PATH="./bin:$PATH"
echo "sofa-cve help:"
./bin/sofa-cve --help || echo "Help command failed"
echo ""
echo "sofa-cve version:"
./bin/sofa-cve --version || echo "Version command failed"
echo ""
if [ -f "data/resources/apple_security_releases.json" ]; then
echo "Testing direct CVE extract:"
./bin/sofa-cve extract || echo "Direct extract failed"
else
echo "No Apple security releases data found for direct test"
fi