From: Marcel van der Veldt Date: Fri, 6 Feb 2026 12:11:33 +0000 (+0100) Subject: Improve dependency check workflow X-Git-Url: https://git.kitaultman.com/?a=commitdiff_plain;h=fdb1101a85cc63b8ff6405aa69d534b6220dcfcd;p=music-assistant-server.git Improve dependency check workflow --- diff --git a/.github/workflows/dependency-security.yml b/.github/workflows/dependency-security.yml index f602170d..71bd1d34 100644 --- a/.github/workflows/dependency-security.yml +++ b/.github/workflows/dependency-security.yml @@ -27,6 +27,22 @@ jobs: ref: ${{ github.event.pull_request.head.sha }} fetch-depth: 0 # Need full history for diff + - name: Detect automated dependency PRs + id: pr_type + run: | + PR_AUTHOR="${{ github.event.pull_request.user.login }}" + PR_LABELS="${{ toJson(github.event.pull_request.labels.*.name) }}" + + # Check if PR is from dependabot, renovate, or has auto-merge label + if [[ "$PR_AUTHOR" == "dependabot[bot]" ]] || \ + [[ "$PR_AUTHOR" == "renovate[bot]" ]] || \ + echo "$PR_LABELS" | grep -q "auto-merge"; then + echo "is_automated=true" >> $GITHUB_OUTPUT + echo "✅ Detected automated dependency update PR - will auto-approve security checks" + else + echo "is_automated=false" >> $GITHUB_OUTPUT + fi + - name: Set up Python uses: actions/setup-python@v6.2.0 with: @@ -113,27 +129,37 @@ jobs: CHANGED_MANIFESTS=$(git diff --name-only origin/$BASE_BRANCH...HEAD | grep "manifest.json" || true) if [ -n "$CHANGED_MANIFESTS" ]; then - echo "has_changes=true" >> $GITHUB_OUTPUT echo "## 📋 Provider Manifest Changes" > manifest_report.md echo "" >> manifest_report.md - echo "The following provider manifests were modified:" >> manifest_report.md - echo "" >> manifest_report.md + + HAS_REQ_CHANGES=false for manifest in $CHANGED_MANIFESTS; do - echo "### \`$manifest\`" >> manifest_report.md - echo "" >> manifest_report.md - - # Check if there are requirements changes in the manifest - if git diff origin/$BASE_BRANCH...HEAD -- "$manifest" | grep -i "requirements" > /dev/null 2>&1; then - echo "Requirements section modified:" >> manifest_report.md - echo '```diff' >> manifest_report.md - git diff origin/$BASE_BRANCH...HEAD -- "$manifest" | grep -A 10 -B 2 "requirements" >> manifest_report.md || true - echo '```' >> manifest_report.md - else - echo "No requirements changes detected" >> manifest_report.md + # Check if requirements actually changed + OLD_REQS=$(git show origin/$BASE_BRANCH:$manifest 2>/dev/null | python3 -c "import sys, json; data=json.load(sys.stdin); print(' '.join(data.get('requirements', [])))" 2>/dev/null || echo "") + NEW_REQS=$(cat $manifest | python3 -c "import sys, json; data=json.load(sys.stdin); print(' '.join(data.get('requirements', [])))" 2>/dev/null || echo "") + + if [ "$OLD_REQS" != "$NEW_REQS" ]; then + HAS_REQ_CHANGES=true + echo "### \`$manifest\`" >> manifest_report.md + echo "" >> manifest_report.md + + # Save old and new versions for comparison + git show origin/$BASE_BRANCH:$manifest > /tmp/old_manifest.json 2>/dev/null || echo '{"requirements":[]}' > /tmp/old_manifest.json + cp $manifest /tmp/new_manifest.json + + # Use Python script to parse dependency changes + python3 scripts/parse_manifest_deps.py /tmp/old_manifest.json /tmp/new_manifest.json >> manifest_report.md + echo "" >> manifest_report.md fi - echo "" >> manifest_report.md done + + if [ "$HAS_REQ_CHANGES" = "true" ]; then + echo "has_changes=true" >> $GITHUB_OUTPUT + else + echo "has_changes=false" >> $GITHUB_OUTPUT + echo "Manifest files changed but no dependency changes detected" > manifest_report.md + fi else echo "has_changes=false" >> $GITHUB_OUTPUT echo "No provider manifest changes detected" > manifest_report.md @@ -158,6 +184,25 @@ jobs: cat safety_output.txt >> safety_report.md echo "" >> safety_report.md + # Parse automated check results + if grep -q "✅.*Trusted Sources.*All packages" safety_output.txt; then + echo "trusted_sources=pass" >> $GITHUB_OUTPUT + else + echo "trusted_sources=fail" >> $GITHUB_OUTPUT + fi + + if grep -q "✅.*Typosquatting.*No suspicious" safety_output.txt; then + echo "typosquatting=pass" >> $GITHUB_OUTPUT + else + echo "typosquatting=fail" >> $GITHUB_OUTPUT + fi + + if grep -q "✅.*License.*All licenses" safety_output.txt; then + echo "license=pass" >> $GITHUB_OUTPUT + else + echo "license=fail" >> $GITHUB_OUTPUT + fi + if [ $SAFETY_EXIT -eq 2 ]; then echo "status=high_risk" >> $GITHUB_OUTPUT echo "" >> safety_report.md @@ -174,6 +219,9 @@ jobs: else echo "No new dependencies to check" >> safety_report.md echo "status=pass" >> $GITHUB_OUTPUT + echo "trusted_sources=pass" >> $GITHUB_OUTPUT + echo "typosquatting=pass" >> $GITHUB_OUTPUT + echo "license=pass" >> $GITHUB_OUTPUT fi cat safety_report.md @@ -214,19 +262,70 @@ jobs: echo "---" >> security_report.md echo "" >> security_report.md - echo "## 📋 Review Checklist" >> security_report.md + echo "## 📋 Security Checks" >> security_report.md echo "" >> security_report.md if [ "${{ steps.deps_check.outputs.has_changes }}" == "true" ] || [ "${{ steps.manifest_check.outputs.has_changes }}" == "true" ]; then - echo "Before merging this PR, please ensure:" >> security_report.md + echo "### Automated Security Checks" >> security_report.md + echo "" >> security_report.md + + # Vulnerability scan check + if [ "${{ steps.pip_audit.outputs.status }}" == "fail" ]; then + echo "- ❌ **Vulnerability Scan**: Failed - Known vulnerabilities detected" >> security_report.md + else + echo "- ✅ **Vulnerability Scan**: Passed - No known vulnerabilities" >> security_report.md + fi + + # Trusted sources check + if [ "${{ steps.safety_check.outputs.trusted_sources }}" == "fail" ]; then + echo "- ❌ **Trusted Sources**: Some packages missing source repository" >> security_report.md + else + echo "- ✅ **Trusted Sources**: All packages have verified source repositories" >> security_report.md + fi + + # Typosquatting check + if [ "${{ steps.safety_check.outputs.typosquatting }}" == "fail" ]; then + echo "- ❌ **Typosquatting Check**: Suspicious package names detected!" >> security_report.md + else + echo "- ✅ **Typosquatting Check**: No suspicious package names detected" >> security_report.md + fi + + # License compatibility check + if [ "${{ steps.safety_check.outputs.license }}" == "fail" ]; then + echo "- ⚠️ **License Compatibility**: Some licenses may not be compatible" >> security_report.md + else + echo "- ✅ **License Compatibility**: All licenses are OSI-approved and compatible" >> security_report.md + fi + + # Supply chain risk check + if [ "${{ steps.safety_check.outputs.status }}" == "high_risk" ]; then + echo "- ❌ **Supply Chain Risk**: High risk packages detected" >> security_report.md + elif [ "${{ steps.safety_check.outputs.status }}" == "medium_risk" ]; then + echo "- ⚠️ **Supply Chain Risk**: Medium risk - review recommended" >> security_report.md + else + echo "- ✅ **Supply Chain Risk**: Passed - packages appear mature and maintained" >> security_report.md + fi + echo "" >> security_report.md - echo "- [ ] All new dependencies are from trusted sources" >> security_report.md - echo "- [ ] Package names are spelled correctly (check for typosquatting)" >> security_report.md - echo "- [ ] Dependencies have active maintenance and community" >> security_report.md - echo "- [ ] No known vulnerabilities are present" >> security_report.md - echo "- [ ] Licenses are compatible with the project" >> security_report.md + + # Check if automated PR + if [ "${{ steps.pr_type.outputs.is_automated }}" == "true" ]; then + echo "> 🤖 **Automated dependency update** - This PR is from a trusted source (dependabot/renovate) and will be auto-approved if all checks pass." >> security_report.md + echo "" >> security_report.md + fi + + echo "### Manual Review" >> security_report.md + echo "" >> security_report.md + echo "**Maintainer approval required:**" >> security_report.md echo "" >> security_report.md - echo "Once reviewed, a maintainer should add the **\`dependencies-reviewed\`** label to this PR." >> security_report.md + echo "- [ ] **I have reviewed the changes above and approve these dependency updates**" >> security_report.md + echo "" >> security_report.md + + if [ "${{ steps.pr_type.outputs.is_automated }}" == "true" ]; then + echo "_Automated PRs with all checks passing will be auto-approved._" >> security_report.md + else + echo "_After review, add the **\`dependencies-reviewed\`** label to approve this PR._" >> security_report.md + fi else echo "✅ No dependency changes detected in this PR." >> security_report.md fi @@ -284,6 +383,7 @@ jobs: script: | const labels = context.payload.pull_request.labels.map(l => l.name); const hasReviewLabel = labels.includes('dependencies-reviewed'); + const isAutomated = '${{ steps.pr_type.outputs.is_automated }}' === 'true'; const isHighRisk = '${{ steps.safety_check.outputs.status }}' === 'high_risk'; const hasFailed = '${{ steps.pip_audit.outputs.status }}' === 'fail'; @@ -291,6 +391,17 @@ jobs: core.setFailed('🔴 HIGH RISK dependencies detected! This PR requires thorough security review before merging.'); } else if (hasFailed) { core.setFailed('🔴 Known vulnerabilities detected! Please address the security issues above.'); + } else if (isAutomated) { + // Auto-approve automated PRs if security checks passed + core.info('✅ Automated dependency update with passing security checks - auto-approved'); + + // Optionally add the label automatically + await github.rest.issues.addLabels({ + owner: context.repo.owner, + repo: context.repo.repo, + issue_number: context.issue.number, + labels: ['dependencies-reviewed'] + }); } else if (!hasReviewLabel) { core.setFailed('⚠️ Dependency changes detected. A maintainer must add the "dependencies-reviewed" label after security review.'); } else { diff --git a/scripts/check_package_safety.py b/scripts/check_package_safety.py index 6fb88fe5..0ff64873 100755 --- a/scripts/check_package_safety.py +++ b/scripts/check_package_safety.py @@ -13,6 +13,106 @@ import urllib.request from datetime import datetime from typing import Any +# OSI-approved and common compatible licenses +COMPATIBLE_LICENSES = { + "MIT", + "Apache-2.0", + "Apache Software License", + "BSD", + "BSD-3-Clause", + "BSD-2-Clause", + "ISC", + "Python Software Foundation License", + "PSF", + "LGPL", + "MPL-2.0", + "Unlicense", + "CC0", +} + +# Common packages to check for typosquatting (popular Python packages) +POPULAR_PACKAGES = { + "requests", + "urllib3", + "setuptools", + "certifi", + "pip", + "numpy", + "pandas", + "boto3", + "botocore", + "awscli", + "django", + "flask", + "sqlalchemy", + "pytest", + "pydantic", + "aiohttp", + "fastapi", +} + + +def check_typosquatting(package_name: str) -> str | None: + """Check if package name might be typosquatting a popular package. + + :param package_name: The package name to check. + """ + package_lower = package_name.lower().replace("-", "").replace("_", "") + + for popular in POPULAR_PACKAGES: + popular_normalized = popular.lower().replace("-", "").replace("_", "") + + # Check for common typosquatting techniques + if package_lower == popular_normalized: + continue # Exact match is fine + + # Check edit distance (1-2 character changes) + if len(package_lower) == len(popular_normalized): + differences = sum( + c1 != c2 for c1, c2 in zip(package_lower, popular_normalized, strict=True) + ) + if differences == 1: + return f"Suspicious: Very similar to popular package '{popular}'" + + # Check for common substitutions + substitutions = [ + ("0", "o"), + ("1", "l"), + ("1", "i"), + ] + for old, new in substitutions: + if old in package_lower: + test_name = package_lower.replace(old, new) + if test_name == popular_normalized: + return f"Suspicious: Character substitution of popular package '{popular}'" + + return None + + +def check_license_compatibility(license_str: str) -> tuple[bool, str]: + """Check if license is compatible with the project. + + :param license_str: The license string from PyPI. + """ + if not license_str or license_str == "Unknown": + return False, "No license information" + + license_upper = license_str.upper() + + # Check against compatible licenses + for compatible in COMPATIBLE_LICENSES: + if compatible.upper() in license_upper: + return True, f"Compatible ({license_str})" + + # Check for problematic licenses + problematic = ["GPL", "AGPL", "SSPL"] + for problem in problematic: + if problem in license_upper and "LGPL" not in license_upper: + return False, f"Incompatible copyleft license ({license_str})" + + # Unknown license + return False, f"Unknown/unverified license ({license_str})" + def parse_requirement(line: str) -> str | None: """Extract package name from a requirement line. @@ -94,6 +194,10 @@ def check_package(package_name: str) -> dict[str, Any]: homepage = info.get("home_page") or project_urls.get("Homepage") source = project_urls.get("Source") or project_urls.get("Repository") + # Run automated security checks + typosquat_check = check_typosquatting(package_name) + license_compatible, license_status = check_license_compatibility(info.get("license", "Unknown")) + checks = { "name": package_name, "version": info.get("version", "unknown"), @@ -107,11 +211,30 @@ def check_package(package_name: str) -> dict[str, Any]: "warnings": [], "info_items": [], "risk_level": "low", + "automated_checks": { + "trusted_source": bool(source), + "typosquatting": typosquat_check is None, + "license_compatible": license_compatible, + }, + "check_details": { + "typosquatting": typosquat_check or "✓ No typosquatting detected", + "license": license_status, + }, } # Check for suspicious indicators risk_score = 0 + # Typosquatting check + if typosquat_check: + checks["warnings"].append(typosquat_check) + risk_score += 5 # High risk + + # License check + if not license_compatible: + checks["warnings"].append(f"License issue: {license_status}") + risk_score += 2 + if age_days < 30: checks["warnings"].append(f"Very new package (only {age_days} days old)") risk_score += 3 @@ -221,6 +344,35 @@ def main() -> int: print("\n" + "=" * 80) + # Automated checks summary + all_trusted = all(r.get("automated_checks", {}).get("trusted_source", False) for r in results) + all_no_typosquat = all( + r.get("automated_checks", {}).get("typosquatting", False) for r in results + ) + all_license_ok = all( + r.get("automated_checks", {}).get("license_compatible", False) for r in results + ) + + print("\n🤖 Automated Security Checks:") + trusted_msg = ( + "All packages have source repositories" + if all_trusted + else "Some packages missing source info" + ) + print(f" {'✅' if all_trusted else '❌'} Trusted Sources: {trusted_msg}") + + typosquat_msg = ( + "No suspicious package names detected" + if all_no_typosquat + else "Possible typosquatting detected!" + ) + print(f" {'✅' if all_no_typosquat else '❌'} Typosquatting: {typosquat_msg}") + + license_msg = ( + "All licenses are compatible" if all_license_ok else "Some license issues detected" + ) + print(f" {'✅' if all_license_ok else '⚠️ '} License Compatibility: {license_msg}") + # Summary high_risk = sum(1 for r in results if r["risk_level"] == "high") medium_risk = sum(1 for r in results if r["risk_level"] == "medium") diff --git a/scripts/parse_manifest_deps.py b/scripts/parse_manifest_deps.py new file mode 100755 index 00000000..9addfa63 --- /dev/null +++ b/scripts/parse_manifest_deps.py @@ -0,0 +1,85 @@ +#!/usr/bin/env python3 +"""Parse manifest.json files to extract dependency changes. + +This script compares old and new versions of manifest.json files +to identify changes in the requirements field. +""" + +# ruff: noqa: T201 +import json +import sys + + +def parse_requirements(manifest_content: str) -> list[str]: + """Extract requirements from manifest JSON content. + + :param manifest_content: JSON string content of manifest file. + """ + try: + data = json.loads(manifest_content) + return data.get("requirements", []) + except (json.JSONDecodeError, KeyError): + return [] + + +def main() -> int: + """Parse manifest dependency changes.""" + if len(sys.argv) != 3: + print("Usage: parse_manifest_deps.py ") + return 1 + + old_file = sys.argv[1] + new_file = sys.argv[2] + + try: + with open(old_file) as f: + old_reqs = parse_requirements(f.read()) + except FileNotFoundError: + old_reqs = [] + + try: + with open(new_file) as f: + new_reqs = parse_requirements(f.read()) + except FileNotFoundError: + print("Error: New manifest file not found") + return 1 + + # Find added, removed, and unchanged requirements + old_set = set(old_reqs) + new_set = set(new_reqs) + + added = new_set - old_set + removed = old_set - new_set + unchanged = old_set & new_set + + if not added and not removed: + print("No dependency changes") + return 0 + + # Output in markdown format + if added: + print("**Added:**") + for req in sorted(added): + print(f"- ✅ `{req}`") + print() + + if removed: + print("**Removed:**") + for req in sorted(removed): + print(f"- ❌ `{req}`") + print() + + if unchanged and (added or removed): + print("
") + print("Unchanged dependencies") + print() + for req in sorted(unchanged): + print(f"- `{req}`") + print() + print("
") + + return 0 + + +if __name__ == "__main__": + sys.exit(main())