Improve dependency check workflow
authorMarcel van der Veldt <m.vanderveldt@outlook.com>
Fri, 6 Feb 2026 12:11:33 +0000 (13:11 +0100)
committerMarcel van der Veldt <m.vanderveldt@outlook.com>
Fri, 6 Feb 2026 12:11:33 +0000 (13:11 +0100)
.github/workflows/dependency-security.yml
scripts/check_package_safety.py
scripts/parse_manifest_deps.py [new file with mode: 0755]

index f602170d7d1527bfa2cd5b779f0eb44869f92a36..71bd1d347ffeefb2783a6b8ebdf86f26658eea73 100644 (file)
@@ -27,6 +27,22 @@ jobs:
           ref: ${{ github.event.pull_request.head.sha }}
           fetch-depth: 0 # Need full history for diff
 
+      - name: Detect automated dependency PRs
+        id: pr_type
+        run: |
+          PR_AUTHOR="${{ github.event.pull_request.user.login }}"
+          PR_LABELS="${{ toJson(github.event.pull_request.labels.*.name) }}"
+
+          # Check if PR is from dependabot, renovate, or has auto-merge label
+          if [[ "$PR_AUTHOR" == "dependabot[bot]" ]] || \
+             [[ "$PR_AUTHOR" == "renovate[bot]" ]] || \
+             echo "$PR_LABELS" | grep -q "auto-merge"; then
+            echo "is_automated=true" >> $GITHUB_OUTPUT
+            echo "āœ… Detected automated dependency update PR - will auto-approve security checks"
+          else
+            echo "is_automated=false" >> $GITHUB_OUTPUT
+          fi
+
       - name: Set up Python
         uses: actions/setup-python@v6.2.0
         with:
@@ -113,27 +129,37 @@ jobs:
           CHANGED_MANIFESTS=$(git diff --name-only origin/$BASE_BRANCH...HEAD | grep "manifest.json" || true)
 
           if [ -n "$CHANGED_MANIFESTS" ]; then
-            echo "has_changes=true" >> $GITHUB_OUTPUT
             echo "## šŸ“‹ Provider Manifest Changes" > manifest_report.md
             echo "" >> manifest_report.md
-            echo "The following provider manifests were modified:" >> manifest_report.md
-            echo "" >> manifest_report.md
+
+            HAS_REQ_CHANGES=false
 
             for manifest in $CHANGED_MANIFESTS; do
-              echo "### \`$manifest\`" >> manifest_report.md
-              echo "" >> manifest_report.md
-
-              # Check if there are requirements changes in the manifest
-              if git diff origin/$BASE_BRANCH...HEAD -- "$manifest" | grep -i "requirements" > /dev/null 2>&1; then
-                echo "Requirements section modified:" >> manifest_report.md
-                echo '```diff' >> manifest_report.md
-                git diff origin/$BASE_BRANCH...HEAD -- "$manifest" | grep -A 10 -B 2 "requirements" >> manifest_report.md || true
-                echo '```' >> manifest_report.md
-              else
-                echo "No requirements changes detected" >> manifest_report.md
+              # Check if requirements actually changed
+              OLD_REQS=$(git show origin/$BASE_BRANCH:$manifest 2>/dev/null | python3 -c "import sys, json; data=json.load(sys.stdin); print(' '.join(data.get('requirements', [])))" 2>/dev/null || echo "")
+              NEW_REQS=$(cat $manifest | python3 -c "import sys, json; data=json.load(sys.stdin); print(' '.join(data.get('requirements', [])))" 2>/dev/null || echo "")
+
+              if [ "$OLD_REQS" != "$NEW_REQS" ]; then
+                HAS_REQ_CHANGES=true
+                echo "### \`$manifest\`" >> manifest_report.md
+                echo "" >> manifest_report.md
+
+                # Save old and new versions for comparison
+                git show origin/$BASE_BRANCH:$manifest > /tmp/old_manifest.json 2>/dev/null || echo '{"requirements":[]}' > /tmp/old_manifest.json
+                cp $manifest /tmp/new_manifest.json
+
+                # Use Python script to parse dependency changes
+                python3 scripts/parse_manifest_deps.py /tmp/old_manifest.json /tmp/new_manifest.json >> manifest_report.md
+                echo "" >> manifest_report.md
               fi
-              echo "" >> manifest_report.md
             done
+
+            if [ "$HAS_REQ_CHANGES" = "true" ]; then
+              echo "has_changes=true" >> $GITHUB_OUTPUT
+            else
+              echo "has_changes=false" >> $GITHUB_OUTPUT
+              echo "Manifest files changed but no dependency changes detected" > manifest_report.md
+            fi
           else
             echo "has_changes=false" >> $GITHUB_OUTPUT
             echo "No provider manifest changes detected" > manifest_report.md
@@ -158,6 +184,25 @@ jobs:
             cat safety_output.txt >> safety_report.md
             echo "" >> safety_report.md
 
+            # Parse automated check results
+            if grep -q "āœ….*Trusted Sources.*All packages" safety_output.txt; then
+              echo "trusted_sources=pass" >> $GITHUB_OUTPUT
+            else
+              echo "trusted_sources=fail" >> $GITHUB_OUTPUT
+            fi
+
+            if grep -q "āœ….*Typosquatting.*No suspicious" safety_output.txt; then
+              echo "typosquatting=pass" >> $GITHUB_OUTPUT
+            else
+              echo "typosquatting=fail" >> $GITHUB_OUTPUT
+            fi
+
+            if grep -q "āœ….*License.*All licenses" safety_output.txt; then
+              echo "license=pass" >> $GITHUB_OUTPUT
+            else
+              echo "license=fail" >> $GITHUB_OUTPUT
+            fi
+
             if [ $SAFETY_EXIT -eq 2 ]; then
               echo "status=high_risk" >> $GITHUB_OUTPUT
               echo "" >> safety_report.md
@@ -174,6 +219,9 @@ jobs:
           else
             echo "No new dependencies to check" >> safety_report.md
             echo "status=pass" >> $GITHUB_OUTPUT
+            echo "trusted_sources=pass" >> $GITHUB_OUTPUT
+            echo "typosquatting=pass" >> $GITHUB_OUTPUT
+            echo "license=pass" >> $GITHUB_OUTPUT
           fi
 
           cat safety_report.md
@@ -214,19 +262,70 @@ jobs:
 
           echo "---" >> security_report.md
           echo "" >> security_report.md
-          echo "## šŸ“‹ Review Checklist" >> security_report.md
+          echo "## šŸ“‹ Security Checks" >> security_report.md
           echo "" >> security_report.md
 
           if [ "${{ steps.deps_check.outputs.has_changes }}" == "true" ] || [ "${{ steps.manifest_check.outputs.has_changes }}" == "true" ]; then
-            echo "Before merging this PR, please ensure:" >> security_report.md
+            echo "### Automated Security Checks" >> security_report.md
+            echo "" >> security_report.md
+
+            # Vulnerability scan check
+            if [ "${{ steps.pip_audit.outputs.status }}" == "fail" ]; then
+              echo "- āŒ **Vulnerability Scan**: Failed - Known vulnerabilities detected" >> security_report.md
+            else
+              echo "- āœ… **Vulnerability Scan**: Passed - No known vulnerabilities" >> security_report.md
+            fi
+
+            # Trusted sources check
+            if [ "${{ steps.safety_check.outputs.trusted_sources }}" == "fail" ]; then
+              echo "- āŒ **Trusted Sources**: Some packages missing source repository" >> security_report.md
+            else
+              echo "- āœ… **Trusted Sources**: All packages have verified source repositories" >> security_report.md
+            fi
+
+            # Typosquatting check
+            if [ "${{ steps.safety_check.outputs.typosquatting }}" == "fail" ]; then
+              echo "- āŒ **Typosquatting Check**: Suspicious package names detected!" >> security_report.md
+            else
+              echo "- āœ… **Typosquatting Check**: No suspicious package names detected" >> security_report.md
+            fi
+
+            # License compatibility check
+            if [ "${{ steps.safety_check.outputs.license }}" == "fail" ]; then
+              echo "- āš ļø  **License Compatibility**: Some licenses may not be compatible" >> security_report.md
+            else
+              echo "- āœ… **License Compatibility**: All licenses are OSI-approved and compatible" >> security_report.md
+            fi
+
+            # Supply chain risk check
+            if [ "${{ steps.safety_check.outputs.status }}" == "high_risk" ]; then
+              echo "- āŒ **Supply Chain Risk**: High risk packages detected" >> security_report.md
+            elif [ "${{ steps.safety_check.outputs.status }}" == "medium_risk" ]; then
+              echo "- āš ļø  **Supply Chain Risk**: Medium risk - review recommended" >> security_report.md
+            else
+              echo "- āœ… **Supply Chain Risk**: Passed - packages appear mature and maintained" >> security_report.md
+            fi
+
             echo "" >> security_report.md
-            echo "- [ ] All new dependencies are from trusted sources" >> security_report.md
-            echo "- [ ] Package names are spelled correctly (check for typosquatting)" >> security_report.md
-            echo "- [ ] Dependencies have active maintenance and community" >> security_report.md
-            echo "- [ ] No known vulnerabilities are present" >> security_report.md
-            echo "- [ ] Licenses are compatible with the project" >> security_report.md
+
+            # Check if automated PR
+            if [ "${{ steps.pr_type.outputs.is_automated }}" == "true" ]; then
+              echo "> šŸ¤– **Automated dependency update** - This PR is from a trusted source (dependabot/renovate) and will be auto-approved if all checks pass." >> security_report.md
+              echo "" >> security_report.md
+            fi
+
+            echo "### Manual Review" >> security_report.md
+            echo "" >> security_report.md
+            echo "**Maintainer approval required:**" >> security_report.md
             echo "" >> security_report.md
-            echo "Once reviewed, a maintainer should add the **\`dependencies-reviewed\`** label to this PR." >> security_report.md
+            echo "- [ ] **I have reviewed the changes above and approve these dependency updates**" >> security_report.md
+            echo "" >> security_report.md
+
+            if [ "${{ steps.pr_type.outputs.is_automated }}" == "true" ]; then
+              echo "_Automated PRs with all checks passing will be auto-approved._" >> security_report.md
+            else
+              echo "_After review, add the **\`dependencies-reviewed\`** label to approve this PR._" >> security_report.md
+            fi
           else
             echo "āœ… No dependency changes detected in this PR." >> security_report.md
           fi
@@ -284,6 +383,7 @@ jobs:
           script: |
             const labels = context.payload.pull_request.labels.map(l => l.name);
             const hasReviewLabel = labels.includes('dependencies-reviewed');
+            const isAutomated = '${{ steps.pr_type.outputs.is_automated }}' === 'true';
             const isHighRisk = '${{ steps.safety_check.outputs.status }}' === 'high_risk';
             const hasFailed = '${{ steps.pip_audit.outputs.status }}' === 'fail';
 
@@ -291,6 +391,17 @@ jobs:
               core.setFailed('šŸ”“ HIGH RISK dependencies detected! This PR requires thorough security review before merging.');
             } else if (hasFailed) {
               core.setFailed('šŸ”“ Known vulnerabilities detected! Please address the security issues above.');
+            } else if (isAutomated) {
+              // Auto-approve automated PRs if security checks passed
+              core.info('āœ… Automated dependency update with passing security checks - auto-approved');
+
+              // Optionally add the label automatically
+              await github.rest.issues.addLabels({
+                owner: context.repo.owner,
+                repo: context.repo.repo,
+                issue_number: context.issue.number,
+                labels: ['dependencies-reviewed']
+              });
             } else if (!hasReviewLabel) {
               core.setFailed('āš ļø  Dependency changes detected. A maintainer must add the "dependencies-reviewed" label after security review.');
             } else {
index 6fb88fe574be54e3ed58cf83ea4383c1b375a9eb..0ff64873b79cf29536e085183bc3a180ec70f20a 100755 (executable)
@@ -13,6 +13,106 @@ import urllib.request
 from datetime import datetime
 from typing import Any
 
+# OSI-approved and common compatible licenses
+COMPATIBLE_LICENSES = {
+    "MIT",
+    "Apache-2.0",
+    "Apache Software License",
+    "BSD",
+    "BSD-3-Clause",
+    "BSD-2-Clause",
+    "ISC",
+    "Python Software Foundation License",
+    "PSF",
+    "LGPL",
+    "MPL-2.0",
+    "Unlicense",
+    "CC0",
+}
+
+# Common packages to check for typosquatting (popular Python packages)
+POPULAR_PACKAGES = {
+    "requests",
+    "urllib3",
+    "setuptools",
+    "certifi",
+    "pip",
+    "numpy",
+    "pandas",
+    "boto3",
+    "botocore",
+    "awscli",
+    "django",
+    "flask",
+    "sqlalchemy",
+    "pytest",
+    "pydantic",
+    "aiohttp",
+    "fastapi",
+}
+
+
+def check_typosquatting(package_name: str) -> str | None:
+    """Check if package name might be typosquatting a popular package.
+
+    :param package_name: The package name to check.
+    """
+    package_lower = package_name.lower().replace("-", "").replace("_", "")
+
+    for popular in POPULAR_PACKAGES:
+        popular_normalized = popular.lower().replace("-", "").replace("_", "")
+
+        # Check for common typosquatting techniques
+        if package_lower == popular_normalized:
+            continue  # Exact match is fine
+
+        # Check edit distance (1-2 character changes)
+        if len(package_lower) == len(popular_normalized):
+            differences = sum(
+                c1 != c2 for c1, c2 in zip(package_lower, popular_normalized, strict=True)
+            )
+            if differences == 1:
+                return f"Suspicious: Very similar to popular package '{popular}'"
+
+        # Check for common substitutions
+        substitutions = [
+            ("0", "o"),
+            ("1", "l"),
+            ("1", "i"),
+        ]
+        for old, new in substitutions:
+            if old in package_lower:
+                test_name = package_lower.replace(old, new)
+                if test_name == popular_normalized:
+                    return f"Suspicious: Character substitution of popular package '{popular}'"
+
+    return None
+
+
+def check_license_compatibility(license_str: str) -> tuple[bool, str]:
+    """Check if license is compatible with the project.
+
+    :param license_str: The license string from PyPI.
+    """
+    if not license_str or license_str == "Unknown":
+        return False, "No license information"
+
+    license_upper = license_str.upper()
+
+    # Check against compatible licenses
+    for compatible in COMPATIBLE_LICENSES:
+        if compatible.upper() in license_upper:
+            return True, f"Compatible ({license_str})"
+
+    # Check for problematic licenses
+    problematic = ["GPL", "AGPL", "SSPL"]
+    for problem in problematic:
+        if problem in license_upper and "LGPL" not in license_upper:
+            return False, f"Incompatible copyleft license ({license_str})"
+
+    # Unknown license
+    return False, f"Unknown/unverified license ({license_str})"
+
 
 def parse_requirement(line: str) -> str | None:
     """Extract package name from a requirement line.
@@ -94,6 +194,10 @@ def check_package(package_name: str) -> dict[str, Any]:
     homepage = info.get("home_page") or project_urls.get("Homepage")
     source = project_urls.get("Source") or project_urls.get("Repository")
 
+    # Run automated security checks
+    typosquat_check = check_typosquatting(package_name)
+    license_compatible, license_status = check_license_compatibility(info.get("license", "Unknown"))
+
     checks = {
         "name": package_name,
         "version": info.get("version", "unknown"),
@@ -107,11 +211,30 @@ def check_package(package_name: str) -> dict[str, Any]:
         "warnings": [],
         "info_items": [],
         "risk_level": "low",
+        "automated_checks": {
+            "trusted_source": bool(source),
+            "typosquatting": typosquat_check is None,
+            "license_compatible": license_compatible,
+        },
+        "check_details": {
+            "typosquatting": typosquat_check or "āœ“ No typosquatting detected",
+            "license": license_status,
+        },
     }
 
     # Check for suspicious indicators
     risk_score = 0
 
+    # Typosquatting check
+    if typosquat_check:
+        checks["warnings"].append(typosquat_check)
+        risk_score += 5  # High risk
+
+    # License check
+    if not license_compatible:
+        checks["warnings"].append(f"License issue: {license_status}")
+        risk_score += 2
+
     if age_days < 30:
         checks["warnings"].append(f"Very new package (only {age_days} days old)")
         risk_score += 3
@@ -221,6 +344,35 @@ def main() -> int:
 
     print("\n" + "=" * 80)
 
+    # Automated checks summary
+    all_trusted = all(r.get("automated_checks", {}).get("trusted_source", False) for r in results)
+    all_no_typosquat = all(
+        r.get("automated_checks", {}).get("typosquatting", False) for r in results
+    )
+    all_license_ok = all(
+        r.get("automated_checks", {}).get("license_compatible", False) for r in results
+    )
+
+    print("\nšŸ¤– Automated Security Checks:")
+    trusted_msg = (
+        "All packages have source repositories"
+        if all_trusted
+        else "Some packages missing source info"
+    )
+    print(f"  {'āœ…' if all_trusted else 'āŒ'} Trusted Sources: {trusted_msg}")
+
+    typosquat_msg = (
+        "No suspicious package names detected"
+        if all_no_typosquat
+        else "Possible typosquatting detected!"
+    )
+    print(f"  {'āœ…' if all_no_typosquat else 'āŒ'} Typosquatting: {typosquat_msg}")
+
+    license_msg = (
+        "All licenses are compatible" if all_license_ok else "Some license issues detected"
+    )
+    print(f"  {'āœ…' if all_license_ok else 'āš ļø '} License Compatibility: {license_msg}")
+
     # Summary
     high_risk = sum(1 for r in results if r["risk_level"] == "high")
     medium_risk = sum(1 for r in results if r["risk_level"] == "medium")
diff --git a/scripts/parse_manifest_deps.py b/scripts/parse_manifest_deps.py
new file mode 100755 (executable)
index 0000000..9addfa6
--- /dev/null
@@ -0,0 +1,85 @@
+#!/usr/bin/env python3
+"""Parse manifest.json files to extract dependency changes.
+
+This script compares old and new versions of manifest.json files
+to identify changes in the requirements field.
+"""
+
+# ruff: noqa: T201
+import json
+import sys
+
+
+def parse_requirements(manifest_content: str) -> list[str]:
+    """Extract requirements from manifest JSON content.
+
+    :param manifest_content: JSON string content of manifest file.
+    """
+    try:
+        data = json.loads(manifest_content)
+        return data.get("requirements", [])
+    except (json.JSONDecodeError, KeyError):
+        return []
+
+
+def main() -> int:
+    """Parse manifest dependency changes."""
+    if len(sys.argv) != 3:
+        print("Usage: parse_manifest_deps.py <old_manifest> <new_manifest>")
+        return 1
+
+    old_file = sys.argv[1]
+    new_file = sys.argv[2]
+
+    try:
+        with open(old_file) as f:
+            old_reqs = parse_requirements(f.read())
+    except FileNotFoundError:
+        old_reqs = []
+
+    try:
+        with open(new_file) as f:
+            new_reqs = parse_requirements(f.read())
+    except FileNotFoundError:
+        print("Error: New manifest file not found")
+        return 1
+
+    # Find added, removed, and unchanged requirements
+    old_set = set(old_reqs)
+    new_set = set(new_reqs)
+
+    added = new_set - old_set
+    removed = old_set - new_set
+    unchanged = old_set & new_set
+
+    if not added and not removed:
+        print("No dependency changes")
+        return 0
+
+    # Output in markdown format
+    if added:
+        print("**Added:**")
+        for req in sorted(added):
+            print(f"- āœ… `{req}`")
+        print()
+
+    if removed:
+        print("**Removed:**")
+        for req in sorted(removed):
+            print(f"- āŒ `{req}`")
+        print()
+
+    if unchanged and (added or removed):
+        print("<details>")
+        print("<summary>Unchanged dependencies</summary>")
+        print()
+        for req in sorted(unchanged):
+            print(f"- `{req}`")
+        print()
+        print("</details>")
+
+    return 0
+
+
+if __name__ == "__main__":
+    sys.exit(main())