From a7a89ae66106d37cb138339667712da56adda808 Mon Sep 17 00:00:00 2001 From: Marcel van der Veldt Date: Thu, 5 Feb 2026 23:30:52 +0100 Subject: [PATCH] Add workflow to check dependencies for supply chain risks --- .github/workflows/dependency-security.yml | 314 ++++++++++++++++++++++ scripts/check_package_safety.py | 248 +++++++++++++++++ 2 files changed, 562 insertions(+) create mode 100644 .github/workflows/dependency-security.yml create mode 100755 scripts/check_package_safety.py diff --git a/.github/workflows/dependency-security.yml b/.github/workflows/dependency-security.yml new file mode 100644 index 00000000..f2d6b11f --- /dev/null +++ b/.github/workflows/dependency-security.yml @@ -0,0 +1,314 @@ +# Dependency Security Check Workflow +# Checks Python dependencies for security vulnerabilities and supply chain risks + +name: Dependency Security Check + +on: + pull_request: + paths: + - 'requirements_all.txt' + - '**/manifest.json' + branches: + - stable + - dev + +permissions: + contents: read + pull-requests: write # Needed to post comments + +jobs: + security-check: + runs-on: ubuntu-latest + steps: + - name: Check out code from GitHub + uses: actions/checkout@v6 + with: + fetch-depth: 0 # Need full history for diff + + - name: Set up Python + uses: actions/setup-python@v6.2.0 + with: + python-version: "3.12" + + - name: Install security tools + run: | + pip install pip-audit + + # Step 1: Run pip-audit for known vulnerabilities + - name: Run pip-audit on all requirements + id: pip_audit + continue-on-error: true + run: | + echo "## 🔍 Vulnerability Scan Results" > audit_report.md + echo "" >> audit_report.md + + if pip-audit -r requirements_all.txt --desc --format=markdown >> audit_report.md 2>&1; then + echo "status=pass" >> $GITHUB_OUTPUT + echo "✅ No known vulnerabilities found" >> audit_report.md + else + echo "status=fail" >> $GITHUB_OUTPUT + echo "" >> audit_report.md + echo "⚠️ **Vulnerabilities detected! Please review the findings above.**" >> audit_report.md + fi + + cat audit_report.md + + # Step 2: Detect new or changed dependencies + - name: Detect dependency changes + id: deps_check + run: | + # Get base branch (dev or stable) + BASE_BRANCH="${{ github.base_ref }}" + + # Check for changes in requirements_all.txt + if git diff origin/$BASE_BRANCH...HEAD -- requirements_all.txt > /dev/null 2>&1; then + # Extract added lines (new or modified dependencies) + git diff origin/$BASE_BRANCH...HEAD -- requirements_all.txt | \ + grep "^+" | grep -v "^+++" | sed 's/^+//' > new_deps_raw.txt || true + + # Also check for version changes (lines that were modified) + git diff origin/$BASE_BRANCH...HEAD -- requirements_all.txt | \ + grep "^-" | grep -v "^---" | sed 's/^-//' > old_deps_raw.txt || true + + if [ -s new_deps_raw.txt ]; then + echo "has_changes=true" >> $GITHUB_OUTPUT + echo "## 📦 Dependency Changes Detected" > deps_report.md + echo "" >> deps_report.md + echo "The following dependencies were added or modified:" >> deps_report.md + echo "" >> deps_report.md + echo '```diff' >> deps_report.md + git diff origin/$BASE_BRANCH...HEAD -- requirements_all.txt >> deps_report.md + echo '```' >> deps_report.md + echo "" >> deps_report.md + + # Extract just package names for safety check + cat new_deps_raw.txt | grep -v "^#" | grep -v "^$" > new_deps.txt || true + + if [ -s new_deps.txt ]; then + echo "New/modified packages to review:" >> deps_report.md + cat new_deps.txt | while read line; do + echo "- \`$line\`" >> deps_report.md + done + fi + else + echo "has_changes=false" >> $GITHUB_OUTPUT + echo "No dependency changes detected in requirements_all.txt" > deps_report.md + fi + else + echo "has_changes=false" >> $GITHUB_OUTPUT + echo "No dependency changes detected" > deps_report.md + fi + + cat deps_report.md + + # Step 3: Check manifest.json changes + - name: Check provider manifest changes + id: manifest_check + run: | + BASE_BRANCH="${{ github.base_ref }}" + + # Find all changed manifest.json files + CHANGED_MANIFESTS=$(git diff --name-only origin/$BASE_BRANCH...HEAD | grep "manifest.json" || true) + + if [ -n "$CHANGED_MANIFESTS" ]; then + echo "has_changes=true" >> $GITHUB_OUTPUT + echo "## 📋 Provider Manifest Changes" > manifest_report.md + echo "" >> manifest_report.md + echo "The following provider manifests were modified:" >> manifest_report.md + echo "" >> manifest_report.md + + for manifest in $CHANGED_MANIFESTS; do + echo "### \`$manifest\`" >> manifest_report.md + echo "" >> manifest_report.md + + # Check if there are requirements changes in the manifest + if git diff origin/$BASE_BRANCH...HEAD -- "$manifest" | grep -i "requirements" > /dev/null 2>&1; then + echo "Requirements section modified:" >> manifest_report.md + echo '```diff' >> manifest_report.md + git diff origin/$BASE_BRANCH...HEAD -- "$manifest" | grep -A 10 -B 2 "requirements" >> manifest_report.md || true + echo '```' >> manifest_report.md + else + echo "No requirements changes detected" >> manifest_report.md + fi + echo "" >> manifest_report.md + done + else + echo "has_changes=false" >> $GITHUB_OUTPUT + echo "No provider manifest changes detected" > manifest_report.md + fi + + cat manifest_report.md + + # Step 4: Run package safety check on new dependencies + - name: Check new package safety + id: safety_check + if: steps.deps_check.outputs.has_changes == 'true' + continue-on-error: true + run: | + echo "## 🛡️ Supply Chain Security Check" > safety_report.md + echo "" >> safety_report.md + + if [ -f new_deps.txt ] && [ -s new_deps.txt ]; then + # Run our custom safety check script + python scripts/check_package_safety.py new_deps.txt > safety_output.txt 2>&1 + SAFETY_EXIT=$? + + cat safety_output.txt >> safety_report.md + echo "" >> safety_report.md + + if [ $SAFETY_EXIT -eq 2 ]; then + echo "status=high_risk" >> $GITHUB_OUTPUT + echo "" >> safety_report.md + echo "⚠️ **HIGH RISK PACKAGES DETECTED**" >> safety_report.md + echo "Manual security review is **required** before merging this PR." >> safety_report.md + elif [ $SAFETY_EXIT -eq 1 ]; then + echo "status=medium_risk" >> $GITHUB_OUTPUT + echo "" >> safety_report.md + echo "⚠️ **MEDIUM RISK PACKAGES DETECTED**" >> safety_report.md + echo "Please review the warnings above before merging." >> safety_report.md + else + echo "status=pass" >> $GITHUB_OUTPUT + fi + else + echo "No new dependencies to check" >> safety_report.md + echo "status=pass" >> $GITHUB_OUTPUT + fi + + cat safety_report.md + + # Step 5: Combine all reports and post as PR comment + - name: Create combined security report + id: report + run: | + echo "# 🔒 Dependency Security Report" > security_report.md + echo "" >> security_report.md + echo "Automated security check for dependency changes in this PR." >> security_report.md + echo "" >> security_report.md + echo "---" >> security_report.md + echo "" >> security_report.md + + # Add all report sections + cat audit_report.md >> security_report.md + echo "" >> security_report.md + echo "---" >> security_report.md + echo "" >> security_report.md + + cat deps_report.md >> security_report.md + echo "" >> security_report.md + + if [ -f manifest_report.md ]; then + echo "---" >> security_report.md + echo "" >> security_report.md + cat manifest_report.md >> security_report.md + echo "" >> security_report.md + fi + + if [ -f safety_report.md ]; then + echo "---" >> security_report.md + echo "" >> security_report.md + cat safety_report.md >> security_report.md + echo "" >> security_report.md + fi + + echo "---" >> security_report.md + echo "" >> security_report.md + echo "## 📋 Review Checklist" >> security_report.md + echo "" >> security_report.md + + if [ "${{ steps.deps_check.outputs.has_changes }}" == "true" ] || [ "${{ steps.manifest_check.outputs.has_changes }}" == "true" ]; then + echo "Before merging this PR, please ensure:" >> security_report.md + echo "" >> security_report.md + echo "- [ ] All new dependencies are from trusted sources" >> security_report.md + echo "- [ ] Package names are spelled correctly (check for typosquatting)" >> security_report.md + echo "- [ ] Dependencies have active maintenance and community" >> security_report.md + echo "- [ ] No known vulnerabilities are present" >> security_report.md + echo "- [ ] Licenses are compatible with the project" >> security_report.md + echo "" >> security_report.md + echo "Once reviewed, a maintainer should add the **\`dependencies-reviewed\`** label to this PR." >> security_report.md + else + echo "✅ No dependency changes detected in this PR." >> security_report.md + fi + + cat security_report.md + + # Step 6: Post comment to PR + - name: Post security report to PR + uses: actions/github-script@v7 + with: + script: | + const fs = require('fs'); + const report = fs.readFileSync('security_report.md', 'utf8'); + + // Find existing bot comment + const comments = await github.rest.issues.listComments({ + owner: context.repo.owner, + repo: context.repo.repo, + issue_number: context.issue.number, + }); + + const botComment = comments.data.find(comment => + comment.user.type === 'Bot' && + comment.body.includes('🔒 Dependency Security Report') + ); + + if (botComment) { + // Update existing comment + await github.rest.issues.updateComment({ + owner: context.repo.owner, + repo: context.repo.repo, + comment_id: botComment.id, + body: report + }); + } else { + // Create new comment + await github.rest.issues.createComment({ + owner: context.repo.owner, + repo: context.repo.repo, + issue_number: context.issue.number, + body: report + }); + } + + # Step 7: Check for approval label (if dependencies changed) + - name: Check for security review approval + if: | + (steps.deps_check.outputs.has_changes == 'true' || + steps.manifest_check.outputs.has_changes == 'true') + uses: actions/github-script@v7 + with: + script: | + const labels = context.payload.pull_request.labels.map(l => l.name); + const hasReviewLabel = labels.includes('dependencies-reviewed'); + const isHighRisk = '${{ steps.safety_check.outputs.status }}' === 'high_risk'; + const hasFailed = '${{ steps.pip_audit.outputs.status }}' === 'fail'; + + if (isHighRisk) { + core.setFailed('🔴 HIGH RISK dependencies detected! This PR requires thorough security review before merging.'); + } else if (hasFailed) { + core.setFailed('🔴 Known vulnerabilities detected! Please address the security issues above.'); + } else if (!hasReviewLabel) { + core.setFailed('⚠️ Dependency changes detected. A maintainer must add the "dependencies-reviewed" label after security review.'); + } else { + core.info('✅ Security review approved via "dependencies-reviewed" label'); + } + + # Step 8: Fail the check if high-risk or vulnerabilities found + - name: Final security status + if: always() + run: | + if [ "${{ steps.pip_audit.outputs.status }}" == "fail" ]; then + echo "❌ Known vulnerabilities found!" + exit 1 + fi + + if [ "${{ steps.safety_check.outputs.status }}" == "high_risk" ]; then + echo "❌ High-risk packages detected!" + exit 1 + fi + + if [ "${{ steps.deps_check.outputs.has_changes }}" == "true" ] || [ "${{ steps.manifest_check.outputs.has_changes }}" == "true" ]; then + echo "⚠️ Dependency changes require review" + # Don't fail here - the label check above will handle it + fi + + echo "✅ Security checks completed" diff --git a/scripts/check_package_safety.py b/scripts/check_package_safety.py new file mode 100755 index 00000000..6fb88fe5 --- /dev/null +++ b/scripts/check_package_safety.py @@ -0,0 +1,248 @@ +#!/usr/bin/env python3 +"""Check PyPI package metadata for security and supply chain concerns. + +This script checks new or updated Python dependencies for suspicious indicators +that might suggest supply chain attacks or unmaintained packages. +""" + +# ruff: noqa: T201, S310, RUF001, PLR0915 +import json +import re +import sys +import urllib.request +from datetime import datetime +from typing import Any + + +def parse_requirement(line: str) -> str | None: + """Extract package name from a requirement line. + + :param line: A line from requirements.txt (e.g., "package==1.0.0" or "package>=1.0") + """ + line = line.strip() + if not line or line.startswith("#"): + return None + + # Handle various requirement formats + # package==1.0.0, package>=1.0, package[extra]>=1.0, etc. + match = re.match(r"^([a-zA-Z0-9_-]+)", line) + if match: + return match.group(1).lower() + return None + + +def get_pypi_metadata(package_name: str) -> dict[str, Any] | None: + """Fetch package metadata from PyPI JSON API. + + :param package_name: The name of the package to check. + """ + url = f"https://pypi.org/pypi/{package_name}/json" + + try: + with urllib.request.urlopen(url, timeout=10) as response: + return json.loads(response.read()) + except urllib.error.HTTPError as err: + if err.code == 404: + print(f"❌ Package '{package_name}' not found on PyPI") + else: + print(f"⚠️ Error fetching metadata for '{package_name}': {err}") + return None + except Exception as err: + print(f"⚠️ Error fetching metadata for '{package_name}': {err}") + return None + + +def check_package(package_name: str) -> dict[str, Any]: + """Check a single package for security concerns. + + :param package_name: The name of the package to check. + """ + data = get_pypi_metadata(package_name) + + if not data: + return { + "name": package_name, + "error": "Could not fetch package metadata", + "risk_level": "unknown", + "warnings": [], + } + + info = data.get("info", {}) + releases = data.get("releases", {}) + + # Get package age + upload_times = [] + for release_files in releases.values(): + if release_files: + for file_info in release_files: + if "upload_time" in file_info: + try: + upload_time_str = file_info["upload_time"] + # Handle both formats: with 'Z' suffix or with timezone + if upload_time_str.endswith("Z"): + upload_time_str = upload_time_str[:-1] + "+00:00" + upload_time = datetime.fromisoformat(upload_time_str) + upload_times.append(upload_time) + except (ValueError, AttributeError): + continue + + first_upload = min(upload_times) if upload_times else None + age_days = (datetime.now(first_upload.tzinfo) - first_upload).days if first_upload else 0 + + # Extract metadata + project_urls = info.get("project_urls") or {} + homepage = info.get("home_page") or project_urls.get("Homepage") + source = project_urls.get("Source") or project_urls.get("Repository") + + checks = { + "name": package_name, + "version": info.get("version", "unknown"), + "age_days": age_days, + "total_releases": len(releases), + "has_homepage": bool(homepage), + "has_source": bool(source), + "author": info.get("author") or info.get("maintainer") or "Unknown", + "license": info.get("license") or "Unknown", + "summary": info.get("summary", "No description"), + "warnings": [], + "info_items": [], + "risk_level": "low", + } + + # Check for suspicious indicators + risk_score = 0 + + if age_days < 30: + checks["warnings"].append(f"Very new package (only {age_days} days old)") + risk_score += 3 + elif age_days < 90: + checks["warnings"].append(f"Relatively new package ({age_days} days old)") + risk_score += 1 + + if checks["total_releases"] < 3: + checks["warnings"].append(f"Very few releases (only {checks['total_releases']})") + risk_score += 2 + + if not source: + checks["warnings"].append("No source repository linked") + risk_score += 2 + + if not homepage and not source: + checks["warnings"].append("No homepage or source repository") + risk_score += 1 + + if checks["author"] == "Unknown": + checks["warnings"].append("No author information available") + risk_score += 1 + + # Add informational items + checks["info_items"].append(f"Age: {age_days} days") + checks["info_items"].append(f"Releases: {checks['total_releases']}") + checks["info_items"].append(f"Author: {checks['author']}") + checks["info_items"].append(f"License: {checks['license']}") + if source: + checks["info_items"].append(f"Source: {source}") + + # Determine risk level + if risk_score >= 5: + checks["risk_level"] = "high" + elif risk_score >= 3: + checks["risk_level"] = "medium" + else: + checks["risk_level"] = "low" + + return checks + + +def format_check_result(result: dict[str, Any]) -> str: + """Format a check result for display. + + :param result: The check result dictionary. + """ + risk_emoji = {"high": "🔴", "medium": "🟡", "low": "🟢", "unknown": "⚪"} + version = result.get("version", "unknown") + + lines = [f"\n{risk_emoji[result['risk_level']]} **{result['name']}** (v{version})"] + + if result.get("error"): + lines.append(f" ❌ {result['error']}") + return "\n".join(lines) + + if result.get("summary"): + lines.append(f" 📝 {result['summary']}") + + if result.get("info_items"): + for item in result["info_items"]: + lines.append(f" ℹ️ {item}") + + if result.get("warnings"): + for warning in result["warnings"]: + lines.append(f" ⚠️ {warning}") + + return "\n".join(lines) + + +def main() -> int: + """Run the package safety check.""" + if len(sys.argv) < 2: + print("Usage: check_package_safety.py ") + print(" Or: check_package_safety.py package1 package2 package3") + return 1 + + packages = [] + + # Check if first argument is a file + if len(sys.argv) == 2 and sys.argv[1].endswith(".txt"): + try: + with open(sys.argv[1]) as f: + for line in f: + package = parse_requirement(line) + if package: + packages.append(package) + except FileNotFoundError: + print(f"Error: File '{sys.argv[1]}' not found") + return 1 + else: + # Treat arguments as package names + packages = [arg.lower() for arg in sys.argv[1:]] + + if not packages: + print("No packages to check") + return 0 + + print(f"Checking {len(packages)} package(s)...\n") + print("=" * 80) + + results = [] + for package in packages: + result = check_package(package) + results.append(result) + print(format_check_result(result)) + + print("\n" + "=" * 80) + + # Summary + high_risk = sum(1 for r in results if r["risk_level"] == "high") + medium_risk = sum(1 for r in results if r["risk_level"] == "medium") + low_risk = sum(1 for r in results if r["risk_level"] == "low") + + print(f"\n📊 Summary: {len(results)} packages checked") + if high_risk: + print(f" 🔴 High risk: {high_risk}") + if medium_risk: + print(f" 🟡 Medium risk: {medium_risk}") + print(f" 🟢 Low risk: {low_risk}") + + if high_risk > 0: + print("\n⚠️ High-risk packages detected! Manual review strongly recommended.") + return 2 + if medium_risk > 0: + print("\n⚠️ Medium-risk packages detected. Please review before merging.") + return 1 + + print("\n✅ All packages passed basic safety checks.") + return 0 + + +if __name__ == "__main__": + sys.exit(main()) -- 2.34.1