Improve dependency check workflow

author Marcel van der Veldt <m.vanderveldt@outlook.com>

Fri, 6 Feb 2026 12:11:33 +0000 (13:11 +0100)

committer Marcel van der Veldt <m.vanderveldt@outlook.com>

Fri, 6 Feb 2026 12:11:33 +0000 (13:11 +0100)
author Marcel van der Veldt <m.vanderveldt@outlook.com>
Fri, 6 Feb 2026 12:11:33 +0000 (13:11 +0100)
committer Marcel van der Veldt <m.vanderveldt@outlook.com>
Fri, 6 Feb 2026 12:11:33 +0000 (13:11 +0100)
diff --git a/.github/workflows/dependency-security.yml b/.github/workflows/dependency-security.yml

index f602170d7d1527bfa2cd5b779f0eb44869f92a36..71bd1d347ffeefb2783a6b8ebdf86f26658eea73 100644 (file)
--- a/.github/workflows/dependency-security.yml
+++ b/.github/workflows/dependency-security.yml
@@ -27,6 +27,22 @@ jobs:
            ref: ${{ github.event.pull_request.head.sha }}
            fetch-depth: 0 # Need full history for diff
  
+      - name: Detect automated dependency PRs
+        id: pr_type
+        run: |
+          PR_AUTHOR="${{ github.event.pull_request.user.login }}"
+          PR_LABELS="${{ toJson(github.event.pull_request.labels.*.name) }}"
+
+          # Check if PR is from dependabot, renovate, or has auto-merge label
+          if [[ "$PR_AUTHOR" == "dependabot[bot]" ]] || \
+             [[ "$PR_AUTHOR" == "renovate[bot]" ]] || \
+             echo "$PR_LABELS" | grep -q "auto-merge"; then
+            echo "is_automated=true" >> $GITHUB_OUTPUT
+            echo "✅ Detected automated dependency update PR - will auto-approve security checks"
+          else
+            echo "is_automated=false" >> $GITHUB_OUTPUT
+          fi
+
        - name: Set up Python
          uses: actions/setup-python@v6.2.0
          with:
@@ -113,27 +129,37 @@ jobs:
            CHANGED_MANIFESTS=$(git diff --name-only origin/$BASE_BRANCH...HEAD | grep "manifest.json" || true)
  
            if [ -n "$CHANGED_MANIFESTS" ]; then
-            echo "has_changes=true" >> $GITHUB_OUTPUT
              echo "## 📋 Provider Manifest Changes" > manifest_report.md
              echo "" >> manifest_report.md
-            echo "The following provider manifests were modified:" >> manifest_report.md
-            echo "" >> manifest_report.md
+
+            HAS_REQ_CHANGES=false
  
              for manifest in $CHANGED_MANIFESTS; do
-              echo "### \`$manifest\`" >> manifest_report.md
-              echo "" >> manifest_report.md
-
-              # Check if there are requirements changes in the manifest
-              if git diff origin/$BASE_BRANCH...HEAD -- "$manifest" | grep -i "requirements" > /dev/null 2>&1; then
-                echo "Requirements section modified:" >> manifest_report.md
-                echo '```diff' >> manifest_report.md
-                git diff origin/$BASE_BRANCH...HEAD -- "$manifest" | grep -A 10 -B 2 "requirements" >> manifest_report.md || true
-                echo '```' >> manifest_report.md
-              else
-                echo "No requirements changes detected" >> manifest_report.md
+              # Check if requirements actually changed
+              OLD_REQS=$(git show origin/$BASE_BRANCH:$manifest 2>/dev/null | python3 -c "import sys, json; data=json.load(sys.stdin); print(' '.join(data.get('requirements', [])))" 2>/dev/null || echo "")
+              NEW_REQS=$(cat $manifest | python3 -c "import sys, json; data=json.load(sys.stdin); print(' '.join(data.get('requirements', [])))" 2>/dev/null || echo "")
+
+              if [ "$OLD_REQS" != "$NEW_REQS" ]; then
+                HAS_REQ_CHANGES=true
+                echo "### \`$manifest\`" >> manifest_report.md
+                echo "" >> manifest_report.md
+
+                # Save old and new versions for comparison
+                git show origin/$BASE_BRANCH:$manifest > /tmp/old_manifest.json 2>/dev/null || echo '{"requirements":[]}' > /tmp/old_manifest.json
+                cp $manifest /tmp/new_manifest.json
+
+                # Use Python script to parse dependency changes
+                python3 scripts/parse_manifest_deps.py /tmp/old_manifest.json /tmp/new_manifest.json >> manifest_report.md
+                echo "" >> manifest_report.md
                fi
-              echo "" >> manifest_report.md
              done
+
+            if [ "$HAS_REQ_CHANGES" = "true" ]; then
+              echo "has_changes=true" >> $GITHUB_OUTPUT
+            else
+              echo "has_changes=false" >> $GITHUB_OUTPUT
+              echo "Manifest files changed but no dependency changes detected" > manifest_report.md
+            fi
            else
              echo "has_changes=false" >> $GITHUB_OUTPUT
              echo "No provider manifest changes detected" > manifest_report.md
@@ -158,6 +184,25 @@ jobs:
              cat safety_output.txt >> safety_report.md
              echo "" >> safety_report.md
  
+            # Parse automated check results
+            if grep -q "✅.*Trusted Sources.*All packages" safety_output.txt; then
+              echo "trusted_sources=pass" >> $GITHUB_OUTPUT
+            else
+              echo "trusted_sources=fail" >> $GITHUB_OUTPUT
+            fi
+
+            if grep -q "✅.*Typosquatting.*No suspicious" safety_output.txt; then
+              echo "typosquatting=pass" >> $GITHUB_OUTPUT
+            else
+              echo "typosquatting=fail" >> $GITHUB_OUTPUT
+            fi
+
+            if grep -q "✅.*License.*All licenses" safety_output.txt; then
+              echo "license=pass" >> $GITHUB_OUTPUT
+            else
+              echo "license=fail" >> $GITHUB_OUTPUT
+            fi
+
              if [ $SAFETY_EXIT -eq 2 ]; then
                echo "status=high_risk" >> $GITHUB_OUTPUT
                echo "" >> safety_report.md
@@ -174,6 +219,9 @@ jobs:
            else
              echo "No new dependencies to check" >> safety_report.md
              echo "status=pass" >> $GITHUB_OUTPUT
+            echo "trusted_sources=pass" >> $GITHUB_OUTPUT
+            echo "typosquatting=pass" >> $GITHUB_OUTPUT
+            echo "license=pass" >> $GITHUB_OUTPUT
            fi
  
            cat safety_report.md
@@ -214,19 +262,70 @@ jobs:
  
            echo "---" >> security_report.md
            echo "" >> security_report.md
-          echo "## 📋 Review Checklist" >> security_report.md
+          echo "## 📋 Security Checks" >> security_report.md
            echo "" >> security_report.md
  
            if [ "${{ steps.deps_check.outputs.has_changes }}" == "true" ] || [ "${{ steps.manifest_check.outputs.has_changes }}" == "true" ]; then
-            echo "Before merging this PR, please ensure:" >> security_report.md
+            echo "### Automated Security Checks" >> security_report.md
+            echo "" >> security_report.md
+
+            # Vulnerability scan check
+            if [ "${{ steps.pip_audit.outputs.status }}" == "fail" ]; then
+              echo "- ❌ **Vulnerability Scan**: Failed - Known vulnerabilities detected" >> security_report.md
+            else
+              echo "- ✅ **Vulnerability Scan**: Passed - No known vulnerabilities" >> security_report.md
+            fi
+
+            # Trusted sources check
+            if [ "${{ steps.safety_check.outputs.trusted_sources }}" == "fail" ]; then
+              echo "- ❌ **Trusted Sources**: Some packages missing source repository" >> security_report.md
+            else
+              echo "- ✅ **Trusted Sources**: All packages have verified source repositories" >> security_report.md
+            fi
+
+            # Typosquatting check
+            if [ "${{ steps.safety_check.outputs.typosquatting }}" == "fail" ]; then
+              echo "- ❌ **Typosquatting Check**: Suspicious package names detected!" >> security_report.md
+            else
+              echo "- ✅ **Typosquatting Check**: No suspicious package names detected" >> security_report.md
+            fi
+
+            # License compatibility check
+            if [ "${{ steps.safety_check.outputs.license }}" == "fail" ]; then
+              echo "- ⚠️  **License Compatibility**: Some licenses may not be compatible" >> security_report.md
+            else
+              echo "- ✅ **License Compatibility**: All licenses are OSI-approved and compatible" >> security_report.md
+            fi
+
+            # Supply chain risk check
+            if [ "${{ steps.safety_check.outputs.status }}" == "high_risk" ]; then
+              echo "- ❌ **Supply Chain Risk**: High risk packages detected" >> security_report.md
+            elif [ "${{ steps.safety_check.outputs.status }}" == "medium_risk" ]; then
+              echo "- ⚠️  **Supply Chain Risk**: Medium risk - review recommended" >> security_report.md
+            else
+              echo "- ✅ **Supply Chain Risk**: Passed - packages appear mature and maintained" >> security_report.md
+            fi
+
              echo "" >> security_report.md
-            echo "- [ ] All new dependencies are from trusted sources" >> security_report.md
-            echo "- [ ] Package names are spelled correctly (check for typosquatting)" >> security_report.md
-            echo "- [ ] Dependencies have active maintenance and community" >> security_report.md
-            echo "- [ ] No known vulnerabilities are present" >> security_report.md
-            echo "- [ ] Licenses are compatible with the project" >> security_report.md
+
+            # Check if automated PR
+            if [ "${{ steps.pr_type.outputs.is_automated }}" == "true" ]; then
+              echo "> 🤖 **Automated dependency update** - This PR is from a trusted source (dependabot/renovate) and will be auto-approved if all checks pass." >> security_report.md
+              echo "" >> security_report.md
+            fi
+
+            echo "### Manual Review" >> security_report.md
+            echo "" >> security_report.md
+            echo "**Maintainer approval required:**" >> security_report.md
              echo "" >> security_report.md
-            echo "Once reviewed, a maintainer should add the **\`dependencies-reviewed\`** label to this PR." >> security_report.md
+            echo "- [ ] **I have reviewed the changes above and approve these dependency updates**" >> security_report.md
+            echo "" >> security_report.md
+
+            if [ "${{ steps.pr_type.outputs.is_automated }}" == "true" ]; then
+              echo "_Automated PRs with all checks passing will be auto-approved._" >> security_report.md
+            else
+              echo "_After review, add the **\`dependencies-reviewed\`** label to approve this PR._" >> security_report.md
+            fi
            else
              echo "✅ No dependency changes detected in this PR." >> security_report.md
            fi
@@ -284,6 +383,7 @@ jobs:
            script: |
              const labels = context.payload.pull_request.labels.map(l => l.name);
              const hasReviewLabel = labels.includes('dependencies-reviewed');
+            const isAutomated = '${{ steps.pr_type.outputs.is_automated }}' === 'true';
              const isHighRisk = '${{ steps.safety_check.outputs.status }}' === 'high_risk';
              const hasFailed = '${{ steps.pip_audit.outputs.status }}' === 'fail';
  
@@ -291,6 +391,17 @@ jobs:
                core.setFailed('🔴 HIGH RISK dependencies detected! This PR requires thorough security review before merging.');
              } else if (hasFailed) {
                core.setFailed('🔴 Known vulnerabilities detected! Please address the security issues above.');
+            } else if (isAutomated) {
+              // Auto-approve automated PRs if security checks passed
+              core.info('✅ Automated dependency update with passing security checks - auto-approved');
+
+              // Optionally add the label automatically
+              await github.rest.issues.addLabels({
+                owner: context.repo.owner,
+                repo: context.repo.repo,
+                issue_number: context.issue.number,
+                labels: ['dependencies-reviewed']
+              });
              } else if (!hasReviewLabel) {
                core.setFailed('⚠️  Dependency changes detected. A maintainer must add the "dependencies-reviewed" label after security review.');
              } else {
diff --git a/scripts/check_package_safety.py b/scripts/check_package_safety.py

index 6fb88fe574be54e3ed58cf83ea4383c1b375a9eb..0ff64873b79cf29536e085183bc3a180ec70f20a 100755 (executable)
--- a/scripts/check_package_safety.py
+++ b/scripts/check_package_safety.py
@@ -13,6 +13,106 @@ import urllib.request
  from datetime import datetime
  from typing import Any
  
+# OSI-approved and common compatible licenses
+COMPATIBLE_LICENSES = {
+    "MIT",
+    "Apache-2.0",
+    "Apache Software License",
+    "BSD",
+    "BSD-3-Clause",
+    "BSD-2-Clause",
+    "ISC",
+    "Python Software Foundation License",
+    "PSF",
+    "LGPL",
+    "MPL-2.0",
+    "Unlicense",
+    "CC0",
+}
+
+# Common packages to check for typosquatting (popular Python packages)
+POPULAR_PACKAGES = {
+    "requests",
+    "urllib3",
+    "setuptools",
+    "certifi",
+    "pip",
+    "numpy",
+    "pandas",
+    "boto3",
+    "botocore",
+    "awscli",
+    "django",
+    "flask",
+    "sqlalchemy",
+    "pytest",
+    "pydantic",
+    "aiohttp",
+    "fastapi",
+}
+
+
+def check_typosquatting(package_name: str) -> str | None:
+    """Check if package name might be typosquatting a popular package.
+
+    :param package_name: The package name to check.
+    """
+    package_lower = package_name.lower().replace("-", "").replace("_", "")
+
+    for popular in POPULAR_PACKAGES:
+        popular_normalized = popular.lower().replace("-", "").replace("_", "")
+
+        # Check for common typosquatting techniques
+        if package_lower == popular_normalized:
+            continue  # Exact match is fine
+
+        # Check edit distance (1-2 character changes)
+        if len(package_lower) == len(popular_normalized):
+            differences = sum(
+                c1 != c2 for c1, c2 in zip(package_lower, popular_normalized, strict=True)
+            )
+            if differences == 1:
+                return f"Suspicious: Very similar to popular package '{popular}'"
+
+        # Check for common substitutions
+        substitutions = [
+            ("0", "o"),
+            ("1", "l"),
+            ("1", "i"),
+        ]
+        for old, new in substitutions:
+            if old in package_lower:
+                test_name = package_lower.replace(old, new)
+                if test_name == popular_normalized:
+                    return f"Suspicious: Character substitution of popular package '{popular}'"
+
+    return None
+
+
+def check_license_compatibility(license_str: str) -> tuple[bool, str]:
+    """Check if license is compatible with the project.
+
+    :param license_str: The license string from PyPI.
+    """
+    if not license_str or license_str == "Unknown":
+        return False, "No license information"
+
+    license_upper = license_str.upper()
+
+    # Check against compatible licenses
+    for compatible in COMPATIBLE_LICENSES:
+        if compatible.upper() in license_upper:
+            return True, f"Compatible ({license_str})"
+
+    # Check for problematic licenses
+    problematic = ["GPL", "AGPL", "SSPL"]
+    for problem in problematic:
+        if problem in license_upper and "LGPL" not in license_upper:
+            return False, f"Incompatible copyleft license ({license_str})"
+
+    # Unknown license
+    return False, f"Unknown/unverified license ({license_str})"
+
  
  def parse_requirement(line: str) -> str | None:
      """Extract package name from a requirement line.
@@ -94,6 +194,10 @@ def check_package(package_name: str) -> dict[str, Any]:
      homepage = info.get("home_page") or project_urls.get("Homepage")
      source = project_urls.get("Source") or project_urls.get("Repository")
  
+    # Run automated security checks
+    typosquat_check = check_typosquatting(package_name)
+    license_compatible, license_status = check_license_compatibility(info.get("license", "Unknown"))
+
      checks = {
          "name": package_name,
          "version": info.get("version", "unknown"),
@@ -107,11 +211,30 @@ def check_package(package_name: str) -> dict[str, Any]:
          "warnings": [],
          "info_items": [],
          "risk_level": "low",
+        "automated_checks": {
+            "trusted_source": bool(source),
+            "typosquatting": typosquat_check is None,
+            "license_compatible": license_compatible,
+        },
+        "check_details": {
+            "typosquatting": typosquat_check or "✓ No typosquatting detected",
+            "license": license_status,
+        },
      }
  
      # Check for suspicious indicators
      risk_score = 0
  
+    # Typosquatting check
+    if typosquat_check:
+        checks["warnings"].append(typosquat_check)
+        risk_score += 5  # High risk
+
+    # License check
+    if not license_compatible:
+        checks["warnings"].append(f"License issue: {license_status}")
+        risk_score += 2
+
      if age_days < 30:
          checks["warnings"].append(f"Very new package (only {age_days} days old)")
          risk_score += 3
@@ -221,6 +344,35 @@ def main() -> int:
  
      print("\n" + "=" * 80)
  
+    # Automated checks summary
+    all_trusted = all(r.get("automated_checks", {}).get("trusted_source", False) for r in results)
+    all_no_typosquat = all(
+        r.get("automated_checks", {}).get("typosquatting", False) for r in results
+    )
+    all_license_ok = all(
+        r.get("automated_checks", {}).get("license_compatible", False) for r in results
+    )
+
+    print("\n🤖 Automated Security Checks:")
+    trusted_msg = (
+        "All packages have source repositories"
+        if all_trusted
+        else "Some packages missing source info"
+    )
+    print(f"  {'✅' if all_trusted else '❌'} Trusted Sources: {trusted_msg}")
+
+    typosquat_msg = (
+        "No suspicious package names detected"
+        if all_no_typosquat
+        else "Possible typosquatting detected!"
+    )
+    print(f"  {'✅' if all_no_typosquat else '❌'} Typosquatting: {typosquat_msg}")
+
+    license_msg = (
+        "All licenses are compatible" if all_license_ok else "Some license issues detected"
+    )
+    print(f"  {'✅' if all_license_ok else '⚠️ '} License Compatibility: {license_msg}")
+
      # Summary
      high_risk = sum(1 for r in results if r["risk_level"] == "high")
      medium_risk = sum(1 for r in results if r["risk_level"] == "medium")
diff --git a/scripts/parse_manifest_deps.py b/scripts/parse_manifest_deps.py

new file mode 100755 (executable)

index 0000000..9addfa6
--- /dev/null
+++ b/scripts/parse_manifest_deps.py
@@ -0,0 +1,85 @@
+#!/usr/bin/env python3
+"""Parse manifest.json files to extract dependency changes.
+
+This script compares old and new versions of manifest.json files
+to identify changes in the requirements field.
+"""
+
+# ruff: noqa: T201
+import json
+import sys
+
+
+def parse_requirements(manifest_content: str) -> list[str]:
+    """Extract requirements from manifest JSON content.
+
+    :param manifest_content: JSON string content of manifest file.
+    """
+    try:
+        data = json.loads(manifest_content)
+        return data.get("requirements", [])
+    except (json.JSONDecodeError, KeyError):
+        return []
+
+
+def main() -> int:
+    """Parse manifest dependency changes."""
+    if len(sys.argv) != 3:
+        print("Usage: parse_manifest_deps.py <old_manifest> <new_manifest>")
+        return 1
+
+    old_file = sys.argv[1]
+    new_file = sys.argv[2]
+
+    try:
+        with open(old_file) as f:
+            old_reqs = parse_requirements(f.read())
+    except FileNotFoundError:
+        old_reqs = []
+
+    try:
+        with open(new_file) as f:
+            new_reqs = parse_requirements(f.read())
+    except FileNotFoundError:
+        print("Error: New manifest file not found")
+        return 1
+
+    # Find added, removed, and unchanged requirements
+    old_set = set(old_reqs)
+    new_set = set(new_reqs)
+
+    added = new_set - old_set
+    removed = old_set - new_set
+    unchanged = old_set & new_set
+
+    if not added and not removed:
+        print("No dependency changes")
+        return 0
+
+    # Output in markdown format
+    if added:
+        print("**Added:**")
+        for req in sorted(added):
+            print(f"- ✅ `{req}`")
+        print()
+
+    if removed:
+        print("**Removed:**")
+        for req in sorted(removed):
+            print(f"- ❌ `{req}`")
+        print()
+
+    if unchanged and (added or removed):
+        print("<details>")
+        print("<summary>Unchanged dependencies</summary>")
+        print()
+        for req in sorted(unchanged):
+            print(f"- `{req}`")
+        print()
+        print("</details>")
+
+    return 0
+
+
+if __name__ == "__main__":
+    sys.exit(main())
author	Marcel van der Veldt <m.vanderveldt@outlook.com>
	Fri, 6 Feb 2026 12:11:33 +0000 (13:11 +0100)
committer	Marcel van der Veldt <m.vanderveldt@outlook.com>
	Fri, 6 Feb 2026 12:11:33 +0000 (13:11 +0100)
.github/workflows/dependency-security.yml		patch \| blob \| history
scripts/check_package_safety.py		patch \| blob \| history
scripts/parse_manifest_deps.py	[new file with mode: 0755]	patch \| blob