v2026.04.02/scripts/validate_pr.py

332 lines
10 KiB
Python
Raw Permalink Normal View History

#!/usr/bin/env python3
"""Validate BIOS file contributions in Pull Requests.
Usage:
python scripts/validate_pr.py [files...]
python scripts/validate_pr.py --changed # Auto-detect changed files via git
Multi-layer validation:
1. Hash verified against known databases (System.dat, batocera-systems)
2. File size matches expected value
3. File referenced in 1 platform config
4. Duplicate detection against database.json
5. Security checks (no executables, reasonable sizes)
Outputs a structured report suitable for PR comments.
"""
from __future__ import annotations
import argparse
import json
import os
import subprocess
import sys
from pathlib import Path
sys.path.insert(0, os.path.dirname(__file__))
from common import compute_hashes, list_registered_platforms, load_database
try:
import yaml
except ImportError:
yaml = None
DEFAULT_DB = "database.json"
DEFAULT_PLATFORMS_DIR = "platforms"
BLOCKED_EXTENSIONS = {
".exe",
".bat",
".cmd",
".sh",
".ps1",
".vbs",
".js",
".msi",
".dll",
".so",
".dylib",
".py",
".rb",
".pl",
}
MAX_FILE_SIZE = 100 * 1024 * 1024
class ValidationResult:
def __init__(self, filepath: str):
self.filepath = filepath
self.filename = os.path.basename(filepath)
self.checks = [] # (status, message) tuples
self.sha1 = ""
self.md5 = ""
self.crc32 = ""
self.size = 0
def add_check(self, passed: bool, message: str):
self.checks.append(("PASS" if passed else "FAIL", message))
def add_warning(self, message: str):
self.checks.append(("WARN", message))
def add_info(self, message: str):
self.checks.append(("INFO", message))
@property
def passed(self) -> bool:
return all(s != "FAIL" for s, _ in self.checks)
def to_markdown(self) -> str:
status = "" if self.passed else ""
lines = [f"### {status} `{self.filename}`"]
lines.append("")
lines.append(f"- **Path**: `{self.filepath}`")
lines.append(f"- **Size**: {self.size:,} bytes")
lines.append(f"- **SHA1**: `{self.sha1}`")
lines.append(f"- **MD5**: `{self.md5}`")
lines.append(f"- **CRC32**: `{self.crc32}`")
lines.append("")
for status_str, message in self.checks:
if status_str == "PASS":
lines.append(f"- ✅ {message}")
elif status_str == "FAIL":
lines.append(f"- ❌ {message}")
elif status_str == "WARN":
lines.append(f"- ⚠️ {message}")
else:
lines.append(f"- {message}")
return "\n".join(lines)
def load_platform_hashes(platforms_dir: str) -> dict:
"""Load all known hashes from platform configs."""
known = {"sha1": set(), "md5": set(), "names": set()}
if not os.path.isdir(platforms_dir) or yaml is None:
return known
for name in list_registered_platforms(platforms_dir, include_archived=True):
f = Path(platforms_dir) / f"{name}.yml"
with open(f) as fh:
try:
config = yaml.safe_load(fh) or {}
except yaml.YAMLError:
continue
for sys_id, system in config.get("systems", {}).items():
for file_entry in system.get("files", []):
if "sha1" in file_entry:
known["sha1"].add(file_entry["sha1"])
if "md5" in file_entry:
known["md5"].add(file_entry["md5"])
if "name" in file_entry:
known["names"].add(file_entry["name"])
return known
def validate_file(
filepath: str,
db: dict | None,
platform_hashes: dict,
) -> ValidationResult:
"""Run all validation checks on a file."""
result = ValidationResult(filepath)
if not os.path.exists(filepath):
result.add_check(False, f"File not found: {filepath}")
return result
result.size = os.path.getsize(filepath)
hashes = compute_hashes(filepath)
result.sha1 = hashes["sha1"]
result.md5 = hashes["md5"]
result.crc32 = hashes["crc32"]
ext = os.path.splitext(filepath)[1].lower()
if ext in BLOCKED_EXTENSIONS:
result.add_check(False, f"Blocked file extension: {ext}")
if result.size > MAX_FILE_SIZE:
result.add_check(
False,
f"File too large for embedded storage ({result.size:,} > {MAX_FILE_SIZE:,} bytes). Use storage: external in platform config.",
)
elif result.size == 0:
result.add_check(False, "File is empty (0 bytes)")
else:
result.add_check(True, f"File size OK ({result.size:,} bytes)")
if db:
if result.sha1 in db.get("files", {}):
existing = db["files"][result.sha1]
result.add_warning(
f"Duplicate: identical file already exists at `{existing['path']}`"
)
else:
result.add_check(True, "Not a duplicate in database")
sha1_known = result.sha1 in platform_hashes.get("sha1", set())
md5_known = result.md5 in platform_hashes.get("md5", set())
name_known = result.filename in platform_hashes.get("names", set())
if sha1_known:
result.add_check(True, "SHA1 matches known platform requirement")
elif md5_known:
result.add_check(True, "MD5 matches known platform requirement")
elif name_known:
result.add_warning(
"Filename matches a known requirement but hash differs - may be a variant"
)
else:
result.add_warning(
"File not referenced in any platform config - needs manual review"
)
normalized = os.path.normpath(filepath)
if os.path.islink(filepath):
result.add_check(False, "Symlinks are not allowed")
elif normalized.startswith("bios" + os.sep):
parts = normalized.split(os.sep)
if len(parts) >= 4:
result.add_check(True, f"Correct placement: bios/{parts[1]}/{parts[2]}/")
else:
result.add_warning("File should be in bios/Manufacturer/Console/ structure")
else:
result.add_warning("File is not under bios/ directory")
if name_known and not sha1_known and not md5_known:
result.add_info(
"This may be a valid variant. If accepted, it will be placed in "
f"`.variants/{result.filename}.{result.sha1[:8]}`"
)
return result
def get_changed_files() -> list[str]:
"""Get list of changed files in current PR/branch using git."""
try:
for base in ("main", "master", "v2"):
try:
result = subprocess.run(
["git", "diff", "--name-only", f"origin/{base}...HEAD"],
capture_output=True,
text=True,
check=True,
)
files = [
f
for f in result.stdout.strip().split("\n")
if f.startswith("bios/")
]
if files:
return files
except subprocess.CalledProcessError:
continue
except (subprocess.CalledProcessError, OSError):
pass
result = subprocess.run(
["git", "diff", "--cached", "--name-only"],
capture_output=True,
text=True,
)
return [f for f in result.stdout.strip().split("\n") if f.startswith("bios/") and f]
def main():
parser = argparse.ArgumentParser(description="Validate BIOS file contributions")
parser.add_argument("files", nargs="*", help="Files to validate")
parser.add_argument(
"--changed", action="store_true", help="Auto-detect changed BIOS files"
)
parser.add_argument("--db", default=DEFAULT_DB, help="Path to database.json")
parser.add_argument("--platforms-dir", default=DEFAULT_PLATFORMS_DIR)
parser.add_argument(
"--markdown", action="store_true", help="Output as markdown (for PR comments)"
)
parser.add_argument("--json", action="store_true", help="Output as JSON")
args = parser.parse_args()
files = args.files
if args.changed:
files = get_changed_files()
if not files:
print("No changed BIOS files detected")
return
if not files:
parser.error("No files specified. Use --changed or provide file paths.")
try:
db = load_database(args.db)
except FileNotFoundError:
db = None
except json.JSONDecodeError as e:
print(f"WARNING: corrupt database.json: {e}", file=sys.stderr)
db = None
platform_hashes = load_platform_hashes(args.platforms_dir)
results = []
for f in files:
result = validate_file(f, db, platform_hashes)
results.append(result)
all_passed = all(r.passed for r in results)
if args.json:
output = []
for r in results:
output.append(
{
"file": r.filepath,
"passed": r.passed,
"sha1": r.sha1,
"md5": r.md5,
"size": r.size,
"checks": [{"status": s, "message": m} for s, m in r.checks],
}
)
print(json.dumps(output, indent=2))
elif args.markdown:
lines = ["## BIOS Validation Report", ""]
status = "✅ All checks passed" if all_passed else "❌ Some checks failed"
lines.append(f"**Status**: {status}")
lines.append("")
for r in results:
lines.append(r.to_markdown())
lines.append("")
print("\n".join(lines))
else:
for r in results:
status = "PASS" if r.passed else "FAIL"
print(f"\n[{status}] {r.filepath}")
print(f" SHA1: {r.sha1}")
print(f" MD5: {r.md5}")
print(f" Size: {r.size:,}")
for s, m in r.checks:
marker = (
""
if s == "PASS"
else ""
if s == "FAIL"
else "!"
if s == "WARN"
else "i"
)
print(f" [{marker}] {m}")
if not all_passed:
sys.exit(1)
if __name__ == "__main__":
main()