mirror of
https://github.com/Abdess/retrobios.git
synced 2026-06-29 13:52:48 +00:00
v2: automated BIOS platform with full pipeline
Reorganized 6 branches into bios/Manufacturer/Console/. Scrapers for RetroArch, Batocera, Recalbox, and libretro core-info. Platform-aware verification replicating native logic per platform. Pack generation with dedup, alias resolution, variant support. CI/CD: weekly auto-scrape, auto-release, PR validation. Large files (>50MB) stored as GitHub Release assets, auto-fetched at build time.
This commit is contained in:
parent
5f96368f6d
commit
13c561888d
7038 changed files with 3243612 additions and 29617 deletions
418
scripts/auto_fetch.py
Normal file
418
scripts/auto_fetch.py
Normal file
|
|
@ -0,0 +1,418 @@
|
|||
#!/usr/bin/env python3
|
||||
"""Auto-fetch missing BIOS files from multiple sources.
|
||||
|
||||
Pipeline:
|
||||
1. Cross-reference database.json (already exists under different name/path?)
|
||||
2. Scan old branches (git show origin/branch:path)
|
||||
3. Search public BIOS repos on GitHub
|
||||
4. Search archive.org collections
|
||||
5. Create GitHub Issue for community help
|
||||
|
||||
Usage:
|
||||
python scripts/auto_fetch.py --platform retroarch [--dry-run]
|
||||
python scripts/auto_fetch.py --all [--dry-run]
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import hashlib
|
||||
import json
|
||||
import os
|
||||
import subprocess
|
||||
import sys
|
||||
import urllib.request
|
||||
import urllib.error
|
||||
from pathlib import Path
|
||||
|
||||
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
|
||||
from common import load_platform_config
|
||||
|
||||
try:
|
||||
import yaml
|
||||
except ImportError:
|
||||
print("Error: PyYAML required (pip install pyyaml)", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
DEFAULT_DB = "database.json"
|
||||
DEFAULT_PLATFORMS_DIR = "platforms"
|
||||
DEFAULT_BIOS_DIR = "bios"
|
||||
|
||||
LEGACY_BRANCHES = ["libretro", "RetroArch", "RetroPie", "Recalbox", "batocera", "Other"]
|
||||
|
||||
PUBLIC_REPOS = [
|
||||
# archtaurus/RetroPieBIOS - most complete verified collection
|
||||
"https://raw.githubusercontent.com/archtaurus/RetroPieBIOS/master/BIOS/{name}",
|
||||
"https://raw.githubusercontent.com/archtaurus/RetroPieBIOS/master/BIOS/pcsx2/bios/{name}",
|
||||
"https://raw.githubusercontent.com/archtaurus/RetroPieBIOS/master/BIOS/ep128emu/roms/{name}",
|
||||
"https://raw.githubusercontent.com/archtaurus/RetroPieBIOS/master/BIOS/fuse/{name}",
|
||||
# prefetchnta/retroarch-bios - alternative verified collection
|
||||
"https://raw.githubusercontent.com/prefetchnta/retroarch-bios/main/system/{name}",
|
||||
"https://raw.githubusercontent.com/prefetchnta/retroarch-bios/main/system/pcsx2/bios/{name}",
|
||||
# BatoceraPLUS - Batocera-specific
|
||||
"https://raw.githubusercontent.com/BatoceraPLUS/Batocera.PLUS-bios/main/{name}",
|
||||
]
|
||||
|
||||
ARCHIVE_ORG_COLLECTIONS = [
|
||||
"RetroarchSystemFiles",
|
||||
"retroarch_bios",
|
||||
"retroarch-ultimate-bios-pack_20250824",
|
||||
"system_20240621",
|
||||
"full-pack-bios-batocera-39",
|
||||
]
|
||||
|
||||
|
||||
def load_database(db_path: str) -> dict:
|
||||
with open(db_path) as f:
|
||||
return json.load(f)
|
||||
|
||||
|
||||
def find_missing(config: dict, db: dict) -> list[dict]:
|
||||
"""Find BIOS files required by platform but not in database."""
|
||||
missing = []
|
||||
|
||||
for sys_id, system in config.get("systems", {}).items():
|
||||
for file_entry in system.get("files", []):
|
||||
storage = file_entry.get("storage", "embedded")
|
||||
if storage != "embedded":
|
||||
continue
|
||||
|
||||
sha1 = file_entry.get("sha1")
|
||||
md5 = file_entry.get("md5")
|
||||
name = file_entry.get("name", "")
|
||||
|
||||
found = False
|
||||
if sha1 and sha1 in db.get("files", {}):
|
||||
found = True
|
||||
elif md5 and md5 in db.get("indexes", {}).get("by_md5", {}):
|
||||
found = True
|
||||
|
||||
if not found:
|
||||
missing.append({
|
||||
"name": name,
|
||||
"system": sys_id,
|
||||
"sha1": sha1,
|
||||
"md5": md5,
|
||||
"size": file_entry.get("size"),
|
||||
"destination": file_entry.get("destination", name),
|
||||
})
|
||||
|
||||
return missing
|
||||
|
||||
|
||||
def verify_content(data: bytes, expected: dict) -> bool:
|
||||
"""Verify downloaded content matches expected hashes."""
|
||||
if expected.get("sha1"):
|
||||
actual = hashlib.sha1(data).hexdigest()
|
||||
return actual == expected["sha1"]
|
||||
if expected.get("md5"):
|
||||
actual = hashlib.md5(data).hexdigest()
|
||||
return actual == expected["md5"]
|
||||
return False
|
||||
|
||||
|
||||
def step1_crossref_db(entry: dict, db: dict) -> str | None:
|
||||
"""Check if file exists under different name/path in database."""
|
||||
sha1 = entry.get("sha1")
|
||||
md5 = entry.get("md5")
|
||||
|
||||
if sha1 and sha1 in db.get("files", {}):
|
||||
return db["files"][sha1]["path"]
|
||||
|
||||
if md5:
|
||||
sha1_match = db.get("indexes", {}).get("by_md5", {}).get(md5)
|
||||
if sha1_match and sha1_match in db["files"]:
|
||||
return db["files"][sha1_match]["path"]
|
||||
|
||||
return None
|
||||
|
||||
|
||||
def step2_scan_branches(entry: dict) -> bytes | None:
|
||||
"""Search old git branches for the file by hash."""
|
||||
name = entry["name"]
|
||||
|
||||
for branch in LEGACY_BRANCHES:
|
||||
ref = f"origin/{branch}"
|
||||
try:
|
||||
subprocess.run(
|
||||
["git", "rev-parse", "--verify", ref],
|
||||
capture_output=True, check=True,
|
||||
)
|
||||
except subprocess.CalledProcessError:
|
||||
continue
|
||||
|
||||
result = subprocess.run(
|
||||
["git", "ls-tree", "-r", "--name-only", ref],
|
||||
capture_output=True, text=True,
|
||||
)
|
||||
|
||||
for filepath in result.stdout.strip().split("\n"):
|
||||
if filepath.endswith(f"/{name}") or filepath == name or filepath.endswith(name):
|
||||
try:
|
||||
blob = subprocess.run(
|
||||
["git", "show", f"{ref}:{filepath}"],
|
||||
capture_output=True, check=True,
|
||||
)
|
||||
if verify_content(blob.stdout, entry):
|
||||
return blob.stdout
|
||||
except subprocess.CalledProcessError:
|
||||
continue
|
||||
|
||||
return None
|
||||
|
||||
|
||||
def step3_search_public_repos(entry: dict) -> bytes | None:
|
||||
"""Search public GitHub BIOS repos."""
|
||||
name = entry["name"]
|
||||
destination = entry.get("destination", name)
|
||||
|
||||
for url_template in PUBLIC_REPOS:
|
||||
url = url_template.format(name=name)
|
||||
try:
|
||||
req = urllib.request.Request(url, headers={"User-Agent": "retrobios-fetch/1.0"})
|
||||
with urllib.request.urlopen(req, timeout=30) as resp:
|
||||
data = resp.read()
|
||||
if verify_content(data, entry):
|
||||
return data
|
||||
except (urllib.error.URLError, urllib.error.HTTPError):
|
||||
continue
|
||||
|
||||
if "/" in destination:
|
||||
url = url_template.format(name=destination)
|
||||
try:
|
||||
req = urllib.request.Request(url, headers={"User-Agent": "retrobios-fetch/1.0"})
|
||||
with urllib.request.urlopen(req, timeout=30) as resp:
|
||||
data = resp.read()
|
||||
if verify_content(data, entry):
|
||||
return data
|
||||
except (urllib.error.URLError, urllib.error.HTTPError):
|
||||
continue
|
||||
|
||||
return None
|
||||
|
||||
|
||||
def step4_search_archive_org(entry: dict) -> bytes | None:
|
||||
"""Search archive.org firmware collections by direct download."""
|
||||
name = entry["name"]
|
||||
|
||||
for collection_id in ARCHIVE_ORG_COLLECTIONS:
|
||||
for path in [name, f"system/{name}", f"bios/{name}"]:
|
||||
url = f"https://archive.org/download/{collection_id}/{path}"
|
||||
try:
|
||||
req = urllib.request.Request(url, headers={"User-Agent": "retrobios-fetch/1.0"})
|
||||
with urllib.request.urlopen(req, timeout=30) as resp:
|
||||
data = resp.read()
|
||||
if verify_content(data, entry):
|
||||
return data
|
||||
except (urllib.error.URLError, urllib.error.HTTPError):
|
||||
continue
|
||||
|
||||
sha1 = entry.get("sha1", "")
|
||||
if not sha1:
|
||||
return None
|
||||
|
||||
search_url = (
|
||||
f"https://archive.org/advancedsearch.php?"
|
||||
f"q=sha1:{sha1}&output=json&rows=1"
|
||||
)
|
||||
|
||||
try:
|
||||
req = urllib.request.Request(search_url, headers={"User-Agent": "retrobios-fetch/1.0"})
|
||||
with urllib.request.urlopen(req, timeout=30) as resp:
|
||||
result = json.loads(resp.read())
|
||||
docs = result.get("response", {}).get("docs", [])
|
||||
if docs:
|
||||
identifier = docs[0].get("identifier")
|
||||
if identifier:
|
||||
dl_url = f"https://archive.org/download/{identifier}/{name}"
|
||||
try:
|
||||
req2 = urllib.request.Request(dl_url, headers={"User-Agent": "retrobios-fetch/1.0"})
|
||||
with urllib.request.urlopen(req2, timeout=30) as resp2:
|
||||
data = resp2.read()
|
||||
if verify_content(data, entry):
|
||||
return data
|
||||
except (urllib.error.URLError, urllib.error.HTTPError):
|
||||
pass
|
||||
except (urllib.error.URLError, json.JSONDecodeError):
|
||||
pass
|
||||
|
||||
return None
|
||||
|
||||
|
||||
def place_file(data: bytes, entry: dict, bios_dir: str, db: dict) -> str:
|
||||
"""Place a fetched BIOS file in the correct location."""
|
||||
name = entry["name"]
|
||||
system = entry["system"]
|
||||
|
||||
dest_dir = Path(bios_dir)
|
||||
|
||||
for manufacturer_dir in dest_dir.iterdir():
|
||||
if not manufacturer_dir.is_dir():
|
||||
continue
|
||||
for console_dir in manufacturer_dir.iterdir():
|
||||
if not console_dir.is_dir():
|
||||
continue
|
||||
dir_path = f"{manufacturer_dir.name}/{console_dir.name}".lower()
|
||||
if any(part in dir_path for part in system.split("-") if len(part) > 2):
|
||||
dest = console_dir / name
|
||||
dest.parent.mkdir(parents=True, exist_ok=True)
|
||||
with open(dest, "wb") as f:
|
||||
f.write(data)
|
||||
return str(dest)
|
||||
|
||||
dest = dest_dir / "Unknown" / system / name
|
||||
dest.parent.mkdir(parents=True, exist_ok=True)
|
||||
with open(dest, "wb") as f:
|
||||
f.write(data)
|
||||
return str(dest)
|
||||
|
||||
|
||||
def fetch_missing(
|
||||
missing: list[dict],
|
||||
db: dict,
|
||||
bios_dir: str,
|
||||
dry_run: bool = False,
|
||||
) -> dict:
|
||||
"""Run the 5-step auto-fetch pipeline for missing files."""
|
||||
stats = {"found": 0, "not_found": 0, "errors": 0}
|
||||
still_missing = []
|
||||
|
||||
for entry in missing:
|
||||
name = entry["name"]
|
||||
print(f"\n Searching: {name} ({entry['system']})")
|
||||
|
||||
existing = step1_crossref_db(entry, db)
|
||||
if existing:
|
||||
print(f" [1] Found in database at: {existing}")
|
||||
stats["found"] += 1
|
||||
continue
|
||||
|
||||
if dry_run:
|
||||
print(f" [DRY RUN] Would search branches, repos, archive.org")
|
||||
still_missing.append(entry)
|
||||
stats["not_found"] += 1
|
||||
continue
|
||||
|
||||
data = step2_scan_branches(entry)
|
||||
if data:
|
||||
path = place_file(data, entry, bios_dir, db)
|
||||
print(f" [2] Found in branch, saved to: {path}")
|
||||
stats["found"] += 1
|
||||
continue
|
||||
|
||||
data = step3_search_public_repos(entry)
|
||||
if data:
|
||||
path = place_file(data, entry, bios_dir, db)
|
||||
print(f" [3] Found in public repo, saved to: {path}")
|
||||
stats["found"] += 1
|
||||
continue
|
||||
|
||||
data = step4_search_archive_org(entry)
|
||||
if data:
|
||||
path = place_file(data, entry, bios_dir, db)
|
||||
print(f" [4] Found on archive.org, saved to: {path}")
|
||||
stats["found"] += 1
|
||||
continue
|
||||
|
||||
print(f" [5] Not found - needs community contribution")
|
||||
still_missing.append(entry)
|
||||
stats["not_found"] += 1
|
||||
|
||||
return {"stats": stats, "still_missing": still_missing}
|
||||
|
||||
|
||||
def generate_issue_body(missing: list[dict], platform: str) -> str:
|
||||
"""Generate a GitHub Issue body for missing BIOS files."""
|
||||
lines = [
|
||||
f"## Missing BIOS Files for {platform}",
|
||||
"",
|
||||
"The following BIOS files are required but not available in the repository.",
|
||||
"If you have any of these files, please submit a Pull Request!",
|
||||
"",
|
||||
"| File | System | SHA1 | MD5 |",
|
||||
"|------|--------|------|-----|",
|
||||
]
|
||||
|
||||
for entry in missing:
|
||||
sha1 = entry.get("sha1", "N/A")
|
||||
md5 = entry.get("md5", "N/A")
|
||||
lines.append(f"| `{entry['name']}` | {entry['system']} | `{sha1[:12]}...` | `{md5[:12]}...` |")
|
||||
|
||||
lines.extend([
|
||||
"",
|
||||
"### How to Contribute",
|
||||
"",
|
||||
"1. Fork this repository",
|
||||
"2. Add the BIOS file to `bios/Manufacturer/Console/`",
|
||||
"3. Create a Pull Request - checksums are verified automatically",
|
||||
])
|
||||
|
||||
return "\n".join(lines)
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description="Auto-fetch missing BIOS files")
|
||||
parser.add_argument("--platform", "-p", help="Platform to check")
|
||||
parser.add_argument("--all", action="store_true", help="Check all platforms")
|
||||
parser.add_argument("--dry-run", action="store_true", help="Don't download, just report")
|
||||
parser.add_argument("--db", default=DEFAULT_DB)
|
||||
parser.add_argument("--platforms-dir", default=DEFAULT_PLATFORMS_DIR)
|
||||
parser.add_argument("--bios-dir", default=DEFAULT_BIOS_DIR)
|
||||
parser.add_argument("--create-issues", action="store_true", help="Output GitHub Issue bodies")
|
||||
args = parser.parse_args()
|
||||
|
||||
if not os.path.exists(args.db):
|
||||
print(f"Error: {args.db} not found. Run generate_db.py first.", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
db = load_database(args.db)
|
||||
|
||||
if args.all:
|
||||
platforms = []
|
||||
for f in Path(args.platforms_dir).glob("*.yml"):
|
||||
if not f.name.startswith("_"):
|
||||
platforms.append(f.stem)
|
||||
elif args.platform:
|
||||
platforms = [args.platform]
|
||||
else:
|
||||
parser.error("Specify --platform or --all")
|
||||
return
|
||||
|
||||
all_still_missing = {}
|
||||
|
||||
for platform in sorted(platforms):
|
||||
print(f"\n{'='*60}")
|
||||
print(f"Platform: {platform}")
|
||||
print(f"{'='*60}")
|
||||
|
||||
try:
|
||||
config = load_platform_config(platform, args.platforms_dir)
|
||||
except FileNotFoundError:
|
||||
print(f" Config not found, skipping")
|
||||
continue
|
||||
|
||||
missing = find_missing(config, db)
|
||||
if not missing:
|
||||
print(f" All BIOS files present!")
|
||||
continue
|
||||
|
||||
print(f" {len(missing)} missing files")
|
||||
result = fetch_missing(missing, db, args.bios_dir, args.dry_run)
|
||||
|
||||
if result["still_missing"]:
|
||||
all_still_missing[platform] = result["still_missing"]
|
||||
|
||||
stats = result["stats"]
|
||||
print(f"\n Results: {stats['found']} found, {stats['not_found']} not found")
|
||||
|
||||
if args.create_issues and all_still_missing:
|
||||
print(f"\n{'='*60}")
|
||||
print("GitHub Issue Bodies")
|
||||
print(f"{'='*60}")
|
||||
for platform, missing in all_still_missing.items():
|
||||
print(f"\n--- Issue for {platform} ---\n")
|
||||
print(generate_issue_body(missing, platform))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
102
scripts/common.py
Normal file
102
scripts/common.py
Normal file
|
|
@ -0,0 +1,102 @@
|
|||
"""Shared utilities for retrobios scripts.
|
||||
|
||||
Single source of truth for platform config loading, hash computation,
|
||||
and file resolution - eliminates DRY violations across scripts.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import hashlib
|
||||
import os
|
||||
import zipfile
|
||||
import zlib
|
||||
from pathlib import Path
|
||||
|
||||
try:
|
||||
import yaml
|
||||
except ImportError:
|
||||
yaml = None
|
||||
|
||||
|
||||
def compute_hashes(filepath: str | Path) -> dict[str, str]:
|
||||
"""Compute SHA1, MD5, SHA256, CRC32 for a file."""
|
||||
sha1 = hashlib.sha1()
|
||||
md5 = hashlib.md5()
|
||||
sha256 = hashlib.sha256()
|
||||
crc = 0
|
||||
with open(filepath, "rb") as f:
|
||||
for chunk in iter(lambda: f.read(65536), b""):
|
||||
sha1.update(chunk)
|
||||
md5.update(chunk)
|
||||
sha256.update(chunk)
|
||||
crc = zlib.crc32(chunk, crc)
|
||||
return {
|
||||
"sha1": sha1.hexdigest(),
|
||||
"md5": md5.hexdigest(),
|
||||
"sha256": sha256.hexdigest(),
|
||||
"crc32": format(crc & 0xFFFFFFFF, "08x"),
|
||||
}
|
||||
|
||||
|
||||
def md5sum(filepath: str | Path) -> str:
|
||||
"""Compute MD5 of a file - matches Batocera's md5sum()."""
|
||||
h = hashlib.md5()
|
||||
with open(filepath, "rb") as f:
|
||||
for chunk in iter(lambda: f.read(65536), b""):
|
||||
h.update(chunk)
|
||||
return h.hexdigest()
|
||||
|
||||
|
||||
def load_platform_config(platform_name: str, platforms_dir: str = "platforms") -> dict:
|
||||
"""Load a platform config with inheritance and shared group resolution.
|
||||
|
||||
This is the SINGLE implementation used by generate_pack, generate_readme,
|
||||
verify, and auto_fetch. No other copy should exist.
|
||||
"""
|
||||
if yaml is None:
|
||||
raise ImportError("PyYAML required: pip install pyyaml")
|
||||
|
||||
config_file = os.path.join(platforms_dir, f"{platform_name}.yml")
|
||||
if not os.path.exists(config_file):
|
||||
raise FileNotFoundError(f"Platform config not found: {config_file}")
|
||||
|
||||
with open(config_file) as f:
|
||||
config = yaml.safe_load(f) or {}
|
||||
|
||||
# Resolve inheritance
|
||||
if "inherits" in config:
|
||||
parent = load_platform_config(config["inherits"], platforms_dir)
|
||||
merged = {**parent}
|
||||
merged.update({k: v for k, v in config.items() if k not in ("inherits", "overrides")})
|
||||
if "overrides" in config and "systems" in config["overrides"]:
|
||||
merged.setdefault("systems", {})
|
||||
for sys_id, override in config["overrides"]["systems"].items():
|
||||
if sys_id in merged["systems"]:
|
||||
merged["systems"][sys_id] = {**merged["systems"][sys_id], **override}
|
||||
else:
|
||||
merged["systems"][sys_id] = override
|
||||
config = merged
|
||||
|
||||
# Resolve shared group includes
|
||||
shared_path = os.path.join(platforms_dir, "_shared.yml")
|
||||
if os.path.exists(shared_path):
|
||||
with open(shared_path) as f:
|
||||
shared = yaml.safe_load(f) or {}
|
||||
shared_groups = shared.get("shared_groups", {})
|
||||
for system in config.get("systems", {}).values():
|
||||
for group_name in system.get("includes", []):
|
||||
if group_name in shared_groups:
|
||||
system.setdefault("files", []).extend(shared_groups[group_name])
|
||||
|
||||
return config
|
||||
|
||||
|
||||
def safe_extract_zip(zip_path: str, dest_dir: str) -> None:
|
||||
"""Extract a ZIP file safely, preventing zip-slip path traversal."""
|
||||
dest = os.path.realpath(dest_dir)
|
||||
with zipfile.ZipFile(zip_path, "r") as zf:
|
||||
for member in zf.infolist():
|
||||
member_path = os.path.realpath(os.path.join(dest, member.filename))
|
||||
if not member_path.startswith(dest + os.sep) and member_path != dest:
|
||||
raise ValueError(f"Zip slip detected: {member.filename}")
|
||||
zf.extract(member, dest)
|
||||
155
scripts/dedup.py
Normal file
155
scripts/dedup.py
Normal file
|
|
@ -0,0 +1,155 @@
|
|||
#!/usr/bin/env python3
|
||||
"""Deduplicate bios/ directory - keep one canonical file per unique SHA1.
|
||||
|
||||
Usage:
|
||||
python scripts/dedup.py [--dry-run] [--bios-dir bios]
|
||||
|
||||
For each group of files with the same SHA1:
|
||||
- Keeps the file with the shortest, most canonical path
|
||||
- Removes duplicates
|
||||
- Records all alternate names in database.json aliases
|
||||
|
||||
After dedup, generate_pack.py resolves files by hash and writes them
|
||||
with the correct destination name - no duplicates needed on disk.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import os
|
||||
import sys
|
||||
from collections import defaultdict
|
||||
from pathlib import Path
|
||||
|
||||
sys.path.insert(0, os.path.dirname(__file__))
|
||||
from common import compute_hashes
|
||||
|
||||
DEFAULT_BIOS_DIR = "bios"
|
||||
|
||||
# Directories where deduplication must NOT be applied.
|
||||
# RPG Maker RTP files are referenced by exact name in game scripts -
|
||||
# removing a "duplicate" breaks games that reference that specific filename.
|
||||
# ScummVM themes/extra also have name-dependent loading.
|
||||
NODEDUP_DIRS = {
|
||||
"RPG Maker",
|
||||
"ScummVM",
|
||||
}
|
||||
|
||||
|
||||
def path_priority(path: str) -> tuple:
|
||||
"""Lower score = better candidate to keep as canonical.
|
||||
|
||||
Prefers:
|
||||
- Shorter paths
|
||||
- Non-.variants paths
|
||||
- Non-nested paths (fewer /)
|
||||
- Lowercase names (more standard)
|
||||
"""
|
||||
parts = path.split("/")
|
||||
is_variant = ".variants" in path
|
||||
depth = len(parts)
|
||||
name = os.path.basename(path)
|
||||
# Prefer non-variant, shallow, short name
|
||||
return (is_variant, depth, len(name), path)
|
||||
|
||||
|
||||
def _in_nodedup_dir(path: str) -> bool:
|
||||
"""Check if a file is inside a no-dedup directory."""
|
||||
return any(nodedup in path for nodedup in NODEDUP_DIRS)
|
||||
|
||||
|
||||
def scan_duplicates(bios_dir: str) -> dict[str, list[str]]:
|
||||
"""Find all files grouped by SHA1, excluding no-dedup directories."""
|
||||
sha1_to_paths = defaultdict(list)
|
||||
|
||||
for root, dirs, files in os.walk(bios_dir):
|
||||
for name in files:
|
||||
path = os.path.join(root, name)
|
||||
if _in_nodedup_dir(path):
|
||||
continue
|
||||
sha1 = compute_hashes(path)["sha1"]
|
||||
sha1_to_paths[sha1].append(path)
|
||||
|
||||
return sha1_to_paths
|
||||
|
||||
|
||||
def deduplicate(bios_dir: str, dry_run: bool = False) -> dict:
|
||||
"""Remove duplicate files, keeping one canonical copy per SHA1.
|
||||
|
||||
Returns dict of {sha1: {"canonical": path, "removed": [paths], "aliases": [names]}}
|
||||
"""
|
||||
sha1_groups = scan_duplicates(bios_dir)
|
||||
results = {}
|
||||
total_removed = 0
|
||||
total_saved = 0
|
||||
|
||||
for sha1, paths in sorted(sha1_groups.items()):
|
||||
if len(paths) <= 1:
|
||||
continue
|
||||
|
||||
paths.sort(key=path_priority)
|
||||
canonical = paths[0]
|
||||
duplicates = paths[1:]
|
||||
|
||||
all_names = set()
|
||||
for p in paths:
|
||||
all_names.add(os.path.basename(p))
|
||||
|
||||
canonical_name = os.path.basename(canonical)
|
||||
alias_names = sorted(all_names - {canonical_name})
|
||||
|
||||
size = os.path.getsize(canonical)
|
||||
|
||||
results[sha1] = {
|
||||
"canonical": canonical,
|
||||
"removed": [],
|
||||
"aliases": alias_names,
|
||||
}
|
||||
|
||||
for dup in duplicates:
|
||||
if dry_run:
|
||||
print(f" WOULD REMOVE: {dup}")
|
||||
else:
|
||||
os.remove(dup)
|
||||
results[sha1]["removed"].append(dup)
|
||||
total_removed += 1
|
||||
total_saved += size
|
||||
|
||||
if alias_names:
|
||||
action = "Would remove" if dry_run else "Removed"
|
||||
print(f" {canonical_name} (keep: {canonical})")
|
||||
print(f" {action} {len(duplicates)} copies, aliases: {alias_names}")
|
||||
|
||||
if not dry_run:
|
||||
for root, dirs, files in os.walk(bios_dir, topdown=False):
|
||||
if not files and not dirs:
|
||||
try:
|
||||
os.rmdir(root)
|
||||
except OSError:
|
||||
pass
|
||||
|
||||
print(f"\n{'Would remove' if dry_run else 'Removed'}: {total_removed} files")
|
||||
print(f"Space {'to save' if dry_run else 'saved'}: {total_saved / 1024 / 1024:.1f} MB")
|
||||
|
||||
return results
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description="Deduplicate BIOS files")
|
||||
parser.add_argument("--dry-run", action="store_true")
|
||||
parser.add_argument("--bios-dir", default=DEFAULT_BIOS_DIR)
|
||||
args = parser.parse_args()
|
||||
|
||||
if not os.path.isdir(args.bios_dir):
|
||||
print(f"Error: {args.bios_dir} not found", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
print(f"Scanning {args.bios_dir}/ for duplicates...")
|
||||
if args.dry_run:
|
||||
print("(DRY RUN)\n")
|
||||
|
||||
deduplicate(args.bios_dir, args.dry_run)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
259
scripts/download.py
Normal file
259
scripts/download.py
Normal file
|
|
@ -0,0 +1,259 @@
|
|||
#!/usr/bin/env python3
|
||||
"""Download BIOS packs from GitHub Releases.
|
||||
|
||||
Cross-platform tool (Linux/macOS/Windows) using only Python stdlib.
|
||||
|
||||
Usage:
|
||||
python scripts/download.py --list # List platforms
|
||||
python scripts/download.py retroarch ~/path/ # Download pack
|
||||
python scripts/download.py --verify retroarch ~/path # Verify local files
|
||||
python scripts/download.py --info retroarch # Show coverage info
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import hashlib
|
||||
import json
|
||||
import os
|
||||
import sys
|
||||
import urllib.request
|
||||
import urllib.error
|
||||
import zipfile
|
||||
from pathlib import Path
|
||||
|
||||
sys.path.insert(0, os.path.dirname(__file__))
|
||||
from common import safe_extract_zip
|
||||
|
||||
GITHUB_API = "https://api.github.com"
|
||||
REPO = "Abdess/retroarch_system"
|
||||
|
||||
|
||||
def get_latest_release() -> dict:
|
||||
"""Fetch latest release info from GitHub API."""
|
||||
url = f"{GITHUB_API}/repos/{REPO}/releases/latest"
|
||||
req = urllib.request.Request(url, headers={
|
||||
"User-Agent": "retrobios-downloader/1.0",
|
||||
"Accept": "application/vnd.github.v3+json",
|
||||
})
|
||||
|
||||
try:
|
||||
with urllib.request.urlopen(req, timeout=30) as resp:
|
||||
return json.loads(resp.read())
|
||||
except urllib.error.HTTPError as e:
|
||||
if e.code == 404:
|
||||
print("No releases found. The repository may not have any releases yet.")
|
||||
sys.exit(1)
|
||||
raise
|
||||
|
||||
|
||||
def list_platforms(release: dict) -> list[str]:
|
||||
"""List available platform packs from release assets."""
|
||||
platforms = []
|
||||
for asset in release.get("assets", []):
|
||||
name = asset["name"]
|
||||
if name.endswith("_BIOS_Pack.zip"):
|
||||
platform = name.replace("_BIOS_Pack.zip", "").replace("_", " ")
|
||||
platforms.append(platform)
|
||||
return sorted(platforms)
|
||||
|
||||
|
||||
def find_asset(release: dict, platform: str) -> dict | None:
|
||||
"""Find the release asset for a specific platform."""
|
||||
normalized = platform.lower().replace(" ", "_").replace("-", "_")
|
||||
|
||||
for asset in release.get("assets", []):
|
||||
asset_name = asset["name"].lower().replace(" ", "_").replace("-", "_")
|
||||
if normalized in asset_name and asset_name.endswith("_bios_pack.zip"):
|
||||
return asset
|
||||
|
||||
return None
|
||||
|
||||
|
||||
def download_file(url: str, dest: str, expected_size: int = 0):
|
||||
"""Download a file with progress indication."""
|
||||
req = urllib.request.Request(url, headers={"User-Agent": "retrobios-downloader/1.0"})
|
||||
|
||||
with urllib.request.urlopen(req, timeout=300) as resp:
|
||||
total = int(resp.headers.get("Content-Length", expected_size))
|
||||
downloaded = 0
|
||||
|
||||
with open(dest, "wb") as f:
|
||||
while True:
|
||||
chunk = resp.read(65536)
|
||||
if not chunk:
|
||||
break
|
||||
f.write(chunk)
|
||||
downloaded += len(chunk)
|
||||
|
||||
if total > 0:
|
||||
pct = downloaded * 100 // total
|
||||
bar = "=" * (pct // 2) + " " * (50 - pct // 2)
|
||||
print(f"\r [{bar}] {pct}% ({downloaded:,}/{total:,})", end="", flush=True)
|
||||
|
||||
print()
|
||||
|
||||
|
||||
def extract_pack(zip_path: str, dest_dir: str):
|
||||
"""Extract a BIOS pack ZIP to destination."""
|
||||
with zipfile.ZipFile(zip_path, "r") as zf:
|
||||
members = zf.namelist()
|
||||
print(f" Extracting {len(members)} files to {dest_dir}/")
|
||||
safe_extract_zip(zip_path, dest_dir)
|
||||
|
||||
|
||||
def verify_files(platform: str, dest_dir: str, release: dict):
|
||||
"""Verify local files against database.json from release."""
|
||||
db_asset = None
|
||||
for asset in release.get("assets", []):
|
||||
if asset["name"] == "database.json":
|
||||
db_asset = asset
|
||||
break
|
||||
|
||||
if not db_asset:
|
||||
print("No database.json found in release assets. Cannot verify.")
|
||||
return
|
||||
|
||||
import tempfile
|
||||
tmp = tempfile.NamedTemporaryFile(suffix=".json", delete=False)
|
||||
tmp.close()
|
||||
|
||||
try:
|
||||
download_file(db_asset["browser_download_url"], tmp.name, db_asset.get("size", 0))
|
||||
with open(tmp.name) as f:
|
||||
db = json.load(f)
|
||||
finally:
|
||||
os.unlink(tmp.name)
|
||||
|
||||
dest = Path(dest_dir)
|
||||
verified = 0
|
||||
missing = 0
|
||||
mismatched = 0
|
||||
|
||||
for sha1, entry in db.get("files", {}).items():
|
||||
name = entry["name"]
|
||||
found = False
|
||||
for local_file in dest.rglob(name):
|
||||
if local_file.is_file():
|
||||
h = hashlib.sha1()
|
||||
with open(local_file, "rb") as f:
|
||||
while True:
|
||||
chunk = f.read(65536)
|
||||
if not chunk:
|
||||
break
|
||||
h.update(chunk)
|
||||
|
||||
if h.hexdigest() == sha1:
|
||||
verified += 1
|
||||
found = True
|
||||
break
|
||||
else:
|
||||
mismatched += 1
|
||||
print(f" MISMATCH: {name} (expected {sha1[:12]}..., got {h.hexdigest()[:12]}...)")
|
||||
found = True
|
||||
break
|
||||
|
||||
if not found:
|
||||
missing += 1
|
||||
|
||||
total = verified + missing + mismatched
|
||||
print(f"\n Verified: {verified}/{total}")
|
||||
if missing:
|
||||
print(f" Missing: {missing}")
|
||||
if mismatched:
|
||||
print(f" Mismatched: {mismatched}")
|
||||
|
||||
|
||||
def show_info(platform: str, release: dict):
|
||||
"""Show coverage information for a platform."""
|
||||
asset = find_asset(release, platform)
|
||||
if not asset:
|
||||
print(f"Platform '{platform}' not found in release")
|
||||
return
|
||||
|
||||
print(f" Platform: {platform}")
|
||||
print(f" File: {asset['name']}")
|
||||
print(f" Size: {asset['size']:,} bytes ({asset['size'] / (1024*1024):.1f} MB)")
|
||||
print(f" Downloads: {asset.get('download_count', 'N/A')}")
|
||||
print(f" Updated: {asset.get('updated_at', 'N/A')}")
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Download BIOS packs from GitHub Releases",
|
||||
formatter_class=argparse.RawDescriptionHelpFormatter,
|
||||
epilog="""
|
||||
Examples:
|
||||
%(prog)s --list List available platforms
|
||||
%(prog)s retroarch ~/RetroArch/system Download RetroArch pack
|
||||
%(prog)s --verify retroarch ~/path Verify local files
|
||||
%(prog)s --info retroarch Show pack info
|
||||
""",
|
||||
)
|
||||
parser.add_argument("platform", nargs="?", help="Platform name")
|
||||
parser.add_argument("dest", nargs="?", help="Destination directory")
|
||||
parser.add_argument("--list", action="store_true", help="List available platforms")
|
||||
parser.add_argument("--verify", action="store_true", help="Verify existing files")
|
||||
parser.add_argument("--info", action="store_true", help="Show platform info")
|
||||
args = parser.parse_args()
|
||||
|
||||
if args.list:
|
||||
try:
|
||||
release = get_latest_release()
|
||||
platforms = list_platforms(release)
|
||||
if platforms:
|
||||
print("Available platforms:")
|
||||
for p in platforms:
|
||||
print(f" - {p}")
|
||||
else:
|
||||
print("No platform packs found in latest release")
|
||||
except Exception as e:
|
||||
print(f"Error: {e}")
|
||||
return
|
||||
|
||||
if not args.platform:
|
||||
parser.error("Platform name required (use --list to see options)")
|
||||
|
||||
try:
|
||||
release = get_latest_release()
|
||||
except Exception as e:
|
||||
print(f"Error fetching release info: {e}")
|
||||
sys.exit(1)
|
||||
|
||||
if args.info:
|
||||
show_info(args.platform, release)
|
||||
return
|
||||
|
||||
if args.verify:
|
||||
if not args.dest:
|
||||
parser.error("Destination directory required for --verify")
|
||||
verify_files(args.platform, args.dest, release)
|
||||
return
|
||||
|
||||
if not args.dest:
|
||||
parser.error("Destination directory required")
|
||||
|
||||
asset = find_asset(release, args.platform)
|
||||
if not asset:
|
||||
print(f"Platform '{args.platform}' not found in release.")
|
||||
print("Available:", ", ".join(list_platforms(release)))
|
||||
sys.exit(1)
|
||||
|
||||
import tempfile
|
||||
zip_path = os.path.join(tempfile.gettempdir(), asset["name"])
|
||||
|
||||
print(f"Downloading {asset['name']} ({asset['size']:,} bytes)...")
|
||||
download_file(asset["browser_download_url"], zip_path, asset["size"])
|
||||
|
||||
dest = os.path.expanduser(args.dest)
|
||||
os.makedirs(dest, exist_ok=True)
|
||||
|
||||
print(f"Extracting to {dest}/...")
|
||||
extract_pack(zip_path, dest)
|
||||
|
||||
os.unlink(zip_path)
|
||||
print("Done!")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
90
scripts/download.sh
Executable file
90
scripts/download.sh
Executable file
|
|
@ -0,0 +1,90 @@
|
|||
#!/usr/bin/env bash
|
||||
# Download BIOS pack from GitHub Releases (Linux/macOS one-liner compatible)
|
||||
#
|
||||
# Usage:
|
||||
# bash scripts/download.sh retroarch ~/RetroArch/system/
|
||||
# bash scripts/download.sh --list
|
||||
#
|
||||
# Requires: curl, unzip, jq (optional, for --list)
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
REPO="Abdess/retroarch_system"
|
||||
API="https://api.github.com/repos/${REPO}/releases/latest"
|
||||
|
||||
usage() {
|
||||
echo "Usage: $0 <platform> <destination>"
|
||||
echo " $0 --list"
|
||||
echo ""
|
||||
echo "Download BIOS packs from GitHub Releases."
|
||||
echo ""
|
||||
echo "Examples:"
|
||||
echo " $0 retroarch ~/RetroArch/system/"
|
||||
echo " $0 batocera /userdata/bios/"
|
||||
echo " $0 --list"
|
||||
exit 1
|
||||
}
|
||||
|
||||
list_platforms() {
|
||||
echo "Fetching available platforms..."
|
||||
if command -v jq &>/dev/null; then
|
||||
curl -sL "$API" | jq -r '.assets[].name' | grep '_BIOS_Pack.zip' | sed 's/_BIOS_Pack.zip//' | tr '_' ' '
|
||||
else
|
||||
curl -sL "$API" | grep -oP '"name":\s*"\K[^"]*_BIOS_Pack\.zip' | sed 's/_BIOS_Pack.zip//' | tr '_' ' '
|
||||
fi
|
||||
}
|
||||
|
||||
download_pack() {
|
||||
local platform="$1"
|
||||
local dest="$2"
|
||||
local normalized
|
||||
normalized=$(echo "$platform" | tr ' ' '_' | tr '[:upper:]' '[:lower:]')
|
||||
|
||||
echo "Fetching release info..."
|
||||
local release_json
|
||||
release_json=$(curl -sL "$API")
|
||||
|
||||
# Find matching asset URL
|
||||
local download_url
|
||||
download_url=$(echo "$release_json" | grep -oP "\"browser_download_url\":\s*\"[^\"]*${normalized}[^\"]*_BIOS_Pack\.zip\"" | head -1 | grep -oP 'https://[^"]+')
|
||||
|
||||
if [[ -z "$download_url" ]]; then
|
||||
echo "Error: Platform '$platform' not found in latest release."
|
||||
echo "Available platforms:"
|
||||
list_platforms
|
||||
exit 1
|
||||
fi
|
||||
|
||||
local filename
|
||||
filename=$(basename "$download_url")
|
||||
|
||||
local tmpfile
|
||||
tmpfile=$(mktemp "/tmp/${filename}.XXXXXX")
|
||||
|
||||
echo "Downloading ${filename}..."
|
||||
curl -L --progress-bar -o "$tmpfile" "$download_url"
|
||||
|
||||
echo "Extracting to ${dest}/..."
|
||||
mkdir -p "$dest"
|
||||
unzip -o -q "$tmpfile" -d "$dest"
|
||||
|
||||
rm -f "$tmpfile"
|
||||
echo "Done! BIOS files extracted to ${dest}/"
|
||||
}
|
||||
|
||||
# Main
|
||||
case "${1:-}" in
|
||||
--list|-l)
|
||||
list_platforms
|
||||
;;
|
||||
--help|-h|"")
|
||||
usage
|
||||
;;
|
||||
*)
|
||||
if [[ -z "${2:-}" ]]; then
|
||||
echo "Error: Destination directory required."
|
||||
usage
|
||||
fi
|
||||
download_pack "$1" "$2"
|
||||
;;
|
||||
esac
|
||||
343
scripts/generate_db.py
Normal file
343
scripts/generate_db.py
Normal file
|
|
@ -0,0 +1,343 @@
|
|||
#!/usr/bin/env python3
|
||||
"""Scan bios/ directory and generate multi-indexed database.json.
|
||||
|
||||
Usage:
|
||||
python scripts/generate_db.py [--force] [--bios-dir DIR] [--output FILE]
|
||||
|
||||
Supports incremental mode via .cache/db_cache.json (mtime-based).
|
||||
Use --force to rehash all files.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import hashlib
|
||||
import json
|
||||
import os
|
||||
import sys
|
||||
import zlib
|
||||
from datetime import datetime, timezone
|
||||
from pathlib import Path
|
||||
|
||||
CACHE_DIR = ".cache"
|
||||
CACHE_FILE = os.path.join(CACHE_DIR, "db_cache.json")
|
||||
DEFAULT_BIOS_DIR = "bios"
|
||||
DEFAULT_OUTPUT = "database.json"
|
||||
|
||||
SKIP_PATTERNS = {".git", ".github", "__pycache__", ".cache", ".DS_Store", "desktop.ini"}
|
||||
|
||||
|
||||
def compute_hashes(filepath: Path) -> dict:
|
||||
"""Compute SHA1, MD5, SHA256, CRC32 for a file."""
|
||||
sha1 = hashlib.sha1()
|
||||
md5 = hashlib.md5()
|
||||
sha256 = hashlib.sha256()
|
||||
crc = 0
|
||||
|
||||
with open(filepath, "rb") as f:
|
||||
while True:
|
||||
chunk = f.read(65536)
|
||||
if not chunk:
|
||||
break
|
||||
sha1.update(chunk)
|
||||
md5.update(chunk)
|
||||
sha256.update(chunk)
|
||||
crc = zlib.crc32(chunk, crc)
|
||||
|
||||
return {
|
||||
"sha1": sha1.hexdigest(),
|
||||
"md5": md5.hexdigest(),
|
||||
"sha256": sha256.hexdigest(),
|
||||
"crc32": format(crc & 0xFFFFFFFF, "08x"),
|
||||
}
|
||||
|
||||
|
||||
def should_skip(path: Path) -> bool:
|
||||
"""Check if a path should be skipped. Allows .variants/ directories."""
|
||||
for part in path.parts:
|
||||
if part in SKIP_PATTERNS:
|
||||
return True
|
||||
if part.startswith(".") and part != ".variants":
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
def scan_bios_dir(bios_dir: Path, cache: dict, force: bool) -> dict:
|
||||
"""Scan bios directory and compute hashes, using cache when possible."""
|
||||
files = {}
|
||||
aliases = {}
|
||||
new_cache = {}
|
||||
|
||||
for filepath in sorted(bios_dir.rglob("*")):
|
||||
if not filepath.is_file():
|
||||
continue
|
||||
if should_skip(filepath.relative_to(bios_dir)):
|
||||
continue
|
||||
|
||||
rel_path = str(filepath.relative_to(bios_dir.parent))
|
||||
stat = filepath.stat()
|
||||
mtime = stat.st_mtime
|
||||
size = stat.st_size
|
||||
cache_key = rel_path
|
||||
|
||||
if not force and cache_key in cache:
|
||||
cached = cache[cache_key]
|
||||
if cached.get("mtime") == mtime and cached.get("size") == size:
|
||||
hashes = {
|
||||
"sha1": cached["sha1"],
|
||||
"md5": cached["md5"],
|
||||
"sha256": cached["sha256"],
|
||||
"crc32": cached["crc32"],
|
||||
}
|
||||
sha1 = hashes["sha1"]
|
||||
if sha1 in files:
|
||||
if sha1 not in aliases:
|
||||
aliases[sha1] = []
|
||||
aliases[sha1].append({"name": filepath.name, "path": rel_path})
|
||||
else:
|
||||
entry = {
|
||||
"path": rel_path,
|
||||
"name": filepath.name,
|
||||
"size": size,
|
||||
**hashes,
|
||||
}
|
||||
files[sha1] = entry
|
||||
new_cache[cache_key] = {**hashes, "mtime": mtime, "size": size}
|
||||
continue
|
||||
|
||||
hashes = compute_hashes(filepath)
|
||||
sha1 = hashes["sha1"]
|
||||
if sha1 in files:
|
||||
if sha1 not in aliases:
|
||||
aliases[sha1] = []
|
||||
aliases[sha1].append({"name": filepath.name, "path": rel_path})
|
||||
else:
|
||||
entry = {
|
||||
"path": rel_path,
|
||||
"name": filepath.name,
|
||||
"size": size,
|
||||
**hashes,
|
||||
}
|
||||
files[sha1] = entry
|
||||
new_cache[cache_key] = {**hashes, "mtime": mtime, "size": size}
|
||||
|
||||
return files, aliases, new_cache
|
||||
|
||||
|
||||
def build_indexes(files: dict, aliases: dict) -> dict:
|
||||
"""Build secondary indexes for fast lookup."""
|
||||
by_md5 = {}
|
||||
by_name = {}
|
||||
by_crc32 = {}
|
||||
|
||||
for sha1, entry in files.items():
|
||||
by_md5[entry["md5"]] = sha1
|
||||
|
||||
name = entry["name"]
|
||||
if name not in by_name:
|
||||
by_name[name] = []
|
||||
by_name[name].append(sha1)
|
||||
|
||||
by_crc32[entry["crc32"]] = sha1
|
||||
|
||||
# Add alias names to by_name index (aliases have different filenames for same SHA1)
|
||||
for sha1, alias_list in aliases.items():
|
||||
for alias in alias_list:
|
||||
name = alias["name"]
|
||||
if name not in by_name:
|
||||
by_name[name] = []
|
||||
if sha1 not in by_name[name]:
|
||||
by_name[name].append(sha1)
|
||||
|
||||
return {
|
||||
"by_md5": by_md5,
|
||||
"by_name": by_name,
|
||||
"by_crc32": by_crc32,
|
||||
}
|
||||
|
||||
|
||||
def load_cache(cache_path: str) -> dict:
|
||||
"""Load cache file if it exists."""
|
||||
try:
|
||||
with open(cache_path) as f:
|
||||
return json.load(f)
|
||||
except (FileNotFoundError, json.JSONDecodeError):
|
||||
return {}
|
||||
|
||||
|
||||
def save_cache(cache_path: str, cache: dict):
|
||||
"""Save cache to disk."""
|
||||
os.makedirs(os.path.dirname(cache_path), exist_ok=True)
|
||||
with open(cache_path, "w") as f:
|
||||
json.dump(cache, f)
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description="Generate multi-indexed BIOS database")
|
||||
parser.add_argument("--force", action="store_true", help="Force rehash all files")
|
||||
parser.add_argument("--bios-dir", default=DEFAULT_BIOS_DIR, help="BIOS directory path")
|
||||
parser.add_argument("--output", "-o", default=DEFAULT_OUTPUT, help="Output JSON file")
|
||||
args = parser.parse_args()
|
||||
|
||||
bios_dir = Path(args.bios_dir)
|
||||
if not bios_dir.is_dir():
|
||||
print(f"Error: BIOS directory '{bios_dir}' not found", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
cache = {} if args.force else load_cache(CACHE_FILE)
|
||||
|
||||
print(f"Scanning {bios_dir}/ ...")
|
||||
files, aliases, new_cache = scan_bios_dir(bios_dir, cache, args.force)
|
||||
|
||||
if not files:
|
||||
print("Warning: No BIOS files found", file=sys.stderr)
|
||||
|
||||
platform_aliases = _collect_all_aliases(files)
|
||||
for sha1, name_list in platform_aliases.items():
|
||||
for alias_entry in name_list:
|
||||
if sha1 not in aliases:
|
||||
aliases[sha1] = []
|
||||
aliases[sha1].append(alias_entry)
|
||||
|
||||
indexes = build_indexes(files, aliases)
|
||||
total_size = sum(entry["size"] for entry in files.values())
|
||||
|
||||
database = {
|
||||
"generated_at": datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ"),
|
||||
"total_files": len(files),
|
||||
"total_size": total_size,
|
||||
"files": files,
|
||||
"indexes": indexes,
|
||||
}
|
||||
|
||||
with open(args.output, "w") as f:
|
||||
json.dump(database, f, indent=2)
|
||||
|
||||
save_cache(CACHE_FILE, new_cache)
|
||||
|
||||
alias_count = sum(len(v) for v in aliases.values())
|
||||
name_count = len(indexes["by_name"])
|
||||
print(f"Generated {args.output}: {len(files)} files, {total_size:,} bytes total")
|
||||
print(f" Name index: {name_count} names ({alias_count} aliases)")
|
||||
return 0
|
||||
|
||||
|
||||
def _collect_all_aliases(files: dict) -> dict:
|
||||
"""Collect alternate filenames from platform YAMLs, core-info, and known aliases.
|
||||
|
||||
Registers alternate names so generate_pack can resolve files stored under different names.
|
||||
"""
|
||||
md5_to_sha1 = {}
|
||||
name_to_sha1 = {}
|
||||
for sha1, entry in files.items():
|
||||
md5_to_sha1[entry["md5"]] = sha1
|
||||
name_to_sha1[entry["name"]] = sha1
|
||||
|
||||
aliases = {}
|
||||
|
||||
def _add_alias(name: str, matched_sha1: str):
|
||||
if not name or name in name_to_sha1:
|
||||
return
|
||||
if matched_sha1 not in aliases:
|
||||
aliases[matched_sha1] = []
|
||||
existing = {a["name"] for a in aliases[matched_sha1]}
|
||||
if name not in existing:
|
||||
aliases[matched_sha1].append({"name": name, "path": ""})
|
||||
|
||||
platforms_dir = Path("platforms")
|
||||
if platforms_dir.is_dir():
|
||||
try:
|
||||
import yaml
|
||||
for config_file in platforms_dir.glob("*.yml"):
|
||||
if config_file.name.startswith("_"):
|
||||
continue
|
||||
try:
|
||||
with open(config_file) as f:
|
||||
config = yaml.safe_load(f) or {}
|
||||
except (yaml.YAMLError, OSError) as e:
|
||||
print(f"Warning: {config_file.name}: {e}", file=sys.stderr)
|
||||
continue
|
||||
|
||||
for sys_id, system in config.get("systems", {}).items():
|
||||
for file_entry in system.get("files", []):
|
||||
name = file_entry.get("name", "")
|
||||
sha1 = file_entry.get("sha1", "")
|
||||
md5 = file_entry.get("md5", "")
|
||||
|
||||
matched = None
|
||||
if sha1 and sha1 in files:
|
||||
matched = sha1
|
||||
elif md5 and md5 in md5_to_sha1:
|
||||
matched = md5_to_sha1[md5]
|
||||
|
||||
if matched:
|
||||
_add_alias(name, matched)
|
||||
except ImportError:
|
||||
pass
|
||||
|
||||
try:
|
||||
import sys as _sys
|
||||
_sys.path.insert(0, "scripts")
|
||||
from scraper.coreinfo_scraper import Scraper as CoreInfoScraper
|
||||
ci_reqs = CoreInfoScraper().fetch_requirements()
|
||||
for r in ci_reqs:
|
||||
basename = r.name
|
||||
# Try to match by MD5 or by known canonical names
|
||||
matched = None
|
||||
if r.md5 and r.md5 in md5_to_sha1:
|
||||
matched = md5_to_sha1[r.md5]
|
||||
if matched:
|
||||
_add_alias(basename, matched)
|
||||
except (ImportError, ConnectionError) as e:
|
||||
pass
|
||||
|
||||
# Identical content named differently across platforms/cores
|
||||
KNOWN_ALIAS_GROUPS = [
|
||||
# ColecoVision - all these are the same 8KB BIOS
|
||||
["colecovision.rom", "coleco.rom", "BIOS.col", "bioscv.rom"],
|
||||
# Game Boy - DMG boot ROM
|
||||
["gb_bios.bin", "dmg_boot.bin", "dmg_rom.bin", "dmg0_rom.bin"],
|
||||
# Game Boy Color - CGB boot ROM
|
||||
["gbc_bios.bin", "cgb_boot.bin", "cgb0_boot.bin", "cgb_agb_boot.bin"],
|
||||
# Super Game Boy
|
||||
["sgb_bios.bin", "sgb_boot.bin", "sgb.boot.rom"],
|
||||
["sgb2_bios.bin", "sgb2_boot.bin", "sgb2.boot.rom"],
|
||||
["sgb1.program.rom", "SGB1.sfc/program.rom"],
|
||||
["sgb2.program.rom", "SGB2.sfc/program.rom"],
|
||||
# Nintendo DS
|
||||
["bios7.bin", "nds7.bin"],
|
||||
["bios9.bin", "nds9.bin"],
|
||||
["dsi_sd_card.bin", "nds_sd_card.bin"],
|
||||
# MSX
|
||||
["MSX.ROM", "MSX.rom", "Machines/Shared Roms/MSX.rom"],
|
||||
# NEC PC-98
|
||||
["N88KNJ1.ROM", "n88knj1.rom", "quasi88/n88knj1.rom"],
|
||||
# Enterprise
|
||||
["zt19uk.rom", "zt19hfnt.rom", "ep128emu/roms/zt19hfnt.rom"],
|
||||
# ZX Spectrum
|
||||
["48.rom", "zx48.rom"],
|
||||
# SquirrelJME - all JARs are the same
|
||||
["squirreljme.sqc", "squirreljme.jar", "squirreljme-fast.jar",
|
||||
"squirreljme-slow.jar", "squirreljme-slow-test.jar",
|
||||
"squirreljme-0.3.0.jar", "squirreljme-0.3.0-fast.jar",
|
||||
"squirreljme-0.3.0-slow.jar", "squirreljme-0.3.0-slow-test.jar"],
|
||||
# Arcade - FBNeo spectrum
|
||||
["spectrum.zip", "fbneo/spectrum.zip", "spec48k.zip"],
|
||||
]
|
||||
|
||||
for group in KNOWN_ALIAS_GROUPS:
|
||||
matched_sha1 = None
|
||||
for name in group:
|
||||
if name in name_to_sha1:
|
||||
matched_sha1 = name_to_sha1[name]
|
||||
break
|
||||
if not matched_sha1:
|
||||
continue
|
||||
for name in group:
|
||||
_add_alias(name, matched_sha1)
|
||||
|
||||
return aliases
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
sys.exit(main() or 0)
|
||||
423
scripts/generate_pack.py
Normal file
423
scripts/generate_pack.py
Normal file
|
|
@ -0,0 +1,423 @@
|
|||
#!/usr/bin/env python3
|
||||
"""Generate platform-specific BIOS ZIP packs.
|
||||
|
||||
Usage:
|
||||
python scripts/generate_pack.py --platform retroarch [--output-dir dist/]
|
||||
python scripts/generate_pack.py --all [--output-dir dist/]
|
||||
|
||||
Reads platform YAML config + database.json -> creates ZIP with correct
|
||||
file layout for each platform. Handles inheritance, shared groups, variants,
|
||||
and 3-tier storage (embedded/external/user_provided).
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import hashlib
|
||||
import json
|
||||
import os
|
||||
import sys
|
||||
import tempfile
|
||||
import urllib.request
|
||||
import urllib.error
|
||||
import zipfile
|
||||
from pathlib import Path
|
||||
|
||||
sys.path.insert(0, os.path.dirname(__file__))
|
||||
from common import load_platform_config
|
||||
|
||||
try:
|
||||
import yaml
|
||||
except ImportError:
|
||||
print("Error: PyYAML required (pip install pyyaml)", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
DEFAULT_PLATFORMS_DIR = "platforms"
|
||||
DEFAULT_DB_FILE = "database.json"
|
||||
DEFAULT_OUTPUT_DIR = "dist"
|
||||
DEFAULT_BIOS_DIR = "bios"
|
||||
LARGE_FILES_RELEASE = "large-files"
|
||||
LARGE_FILES_REPO = "Abdess/retroarch_system"
|
||||
|
||||
|
||||
def load_database(db_path: str) -> dict:
|
||||
with open(db_path) as f:
|
||||
return json.load(f)
|
||||
|
||||
|
||||
def fetch_large_file(name: str, dest_dir: str = ".cache/large") -> str | None:
|
||||
"""Download a large file from the 'large-files' GitHub release if not cached."""
|
||||
cached = os.path.join(dest_dir, name)
|
||||
if os.path.exists(cached):
|
||||
return cached
|
||||
|
||||
url = f"https://github.com/{LARGE_FILES_REPO}/releases/download/{LARGE_FILES_RELEASE}/{name}"
|
||||
try:
|
||||
req = urllib.request.Request(url, headers={"User-Agent": "retrobios-pack/1.0"})
|
||||
with urllib.request.urlopen(req, timeout=300) as resp:
|
||||
data = resp.read()
|
||||
os.makedirs(dest_dir, exist_ok=True)
|
||||
with open(cached, "wb") as f:
|
||||
f.write(data)
|
||||
return cached
|
||||
except (urllib.error.URLError, urllib.error.HTTPError):
|
||||
return None
|
||||
|
||||
|
||||
def resolve_file(file_entry: dict, db: dict, bios_dir: str,
|
||||
zip_contents: dict | None = None) -> tuple[str | None, str]:
|
||||
"""Resolve a BIOS file to its local path using database.json.
|
||||
|
||||
Returns (local_path, status) where status is one of:
|
||||
exact, zip_exact, hash_mismatch, external, user_provided, not_found.
|
||||
"""
|
||||
storage = file_entry.get("storage", "embedded")
|
||||
if storage == "user_provided":
|
||||
return None, "user_provided"
|
||||
if storage == "external":
|
||||
return None, "external"
|
||||
|
||||
sha1 = file_entry.get("sha1")
|
||||
md5 = file_entry.get("md5")
|
||||
name = file_entry.get("name", "")
|
||||
zipped_file = file_entry.get("zipped_file")
|
||||
|
||||
if sha1 and sha1 in db.get("files", {}):
|
||||
local_path = db["files"][sha1]["path"]
|
||||
if os.path.exists(local_path):
|
||||
return local_path, "exact"
|
||||
|
||||
if md5:
|
||||
sha1_from_md5 = db.get("indexes", {}).get("by_md5", {}).get(md5)
|
||||
if sha1_from_md5 and sha1_from_md5 in db["files"]:
|
||||
local_path = db["files"][sha1_from_md5]["path"]
|
||||
if os.path.exists(local_path):
|
||||
return local_path, "exact"
|
||||
|
||||
# Truncated MD5 match (batocera-systems bug: 29 chars instead of 32)
|
||||
if len(md5) < 32:
|
||||
for db_md5, db_sha1 in db.get("indexes", {}).get("by_md5", {}).items():
|
||||
if db_md5.startswith(md5) and db_sha1 in db["files"]:
|
||||
local_path = db["files"][db_sha1]["path"]
|
||||
if os.path.exists(local_path):
|
||||
return local_path, "exact"
|
||||
|
||||
if zipped_file and md5 and zip_contents:
|
||||
if md5 in zip_contents:
|
||||
zip_sha1 = zip_contents[md5]
|
||||
if zip_sha1 in db["files"]:
|
||||
local_path = db["files"][zip_sha1]["path"]
|
||||
if os.path.exists(local_path):
|
||||
return local_path, "zip_exact"
|
||||
|
||||
# No MD5 specified = any file with that name is acceptable
|
||||
if not md5:
|
||||
name_matches = db.get("indexes", {}).get("by_name", {}).get(name, [])
|
||||
for match_sha1 in name_matches:
|
||||
if match_sha1 in db["files"]:
|
||||
local_path = db["files"][match_sha1]["path"]
|
||||
if os.path.exists(local_path):
|
||||
return local_path, "exact"
|
||||
|
||||
name_matches = db.get("indexes", {}).get("by_name", {}).get(name, [])
|
||||
for match_sha1 in name_matches:
|
||||
if match_sha1 in db["files"]:
|
||||
local_path = db["files"][match_sha1]["path"]
|
||||
if os.path.exists(local_path):
|
||||
return local_path, "hash_mismatch"
|
||||
|
||||
# Last resort: try downloading from large-files release
|
||||
cached = fetch_large_file(name)
|
||||
if cached:
|
||||
return cached, "release_asset"
|
||||
|
||||
return None, "not_found"
|
||||
|
||||
|
||||
def build_zip_contents_index(db: dict) -> dict:
|
||||
"""Build index of {inner_rom_md5: zip_file_sha1} for ROMs inside ZIP files."""
|
||||
index = {}
|
||||
for sha1, entry in db.get("files", {}).items():
|
||||
path = entry["path"]
|
||||
if not path.endswith(".zip") or not os.path.exists(path):
|
||||
continue
|
||||
try:
|
||||
with zipfile.ZipFile(path, "r") as zf:
|
||||
for info in zf.infolist():
|
||||
if info.is_dir():
|
||||
continue
|
||||
data = zf.read(info.filename)
|
||||
inner_md5 = hashlib.md5(data).hexdigest()
|
||||
index[inner_md5] = sha1
|
||||
except (zipfile.BadZipFile, OSError):
|
||||
continue
|
||||
return index
|
||||
|
||||
|
||||
def download_external(file_entry: dict, dest_path: str) -> bool:
|
||||
"""Download an external BIOS file, verify hash, save to dest_path."""
|
||||
url = file_entry.get("source_url")
|
||||
if not url:
|
||||
return False
|
||||
|
||||
try:
|
||||
req = urllib.request.Request(url, headers={"User-Agent": "retrobios-pack-gen/1.0"})
|
||||
with urllib.request.urlopen(req, timeout=120) as resp:
|
||||
data = resp.read()
|
||||
except urllib.error.URLError as e:
|
||||
print(f" WARNING: Failed to download {url}: {e}")
|
||||
return False
|
||||
|
||||
# Verify hash
|
||||
sha256 = file_entry.get("sha256")
|
||||
sha1 = file_entry.get("sha1")
|
||||
md5 = file_entry.get("md5")
|
||||
|
||||
if sha256:
|
||||
actual = hashlib.sha256(data).hexdigest()
|
||||
if actual != sha256:
|
||||
print(f" WARNING: SHA256 mismatch for {file_entry['name']}")
|
||||
return False
|
||||
elif sha1:
|
||||
actual = hashlib.sha1(data).hexdigest()
|
||||
if actual != sha1:
|
||||
print(f" WARNING: SHA1 mismatch for {file_entry['name']}")
|
||||
return False
|
||||
elif md5:
|
||||
actual = hashlib.md5(data).hexdigest()
|
||||
if actual != md5:
|
||||
print(f" WARNING: MD5 mismatch for {file_entry['name']}")
|
||||
return False
|
||||
|
||||
os.makedirs(os.path.dirname(dest_path), exist_ok=True)
|
||||
with open(dest_path, "wb") as f:
|
||||
f.write(data)
|
||||
return True
|
||||
|
||||
|
||||
def generate_pack(
|
||||
platform_name: str,
|
||||
platforms_dir: str,
|
||||
db_path: str,
|
||||
bios_dir: str,
|
||||
output_dir: str,
|
||||
) -> str | None:
|
||||
"""Generate a ZIP pack for a platform.
|
||||
|
||||
Returns the path to the generated ZIP, or None on failure.
|
||||
"""
|
||||
config = load_platform_config(platform_name, platforms_dir)
|
||||
db = load_database(db_path)
|
||||
|
||||
zip_contents = build_zip_contents_index(db)
|
||||
|
||||
verification_mode = config.get("verification_mode", "existence")
|
||||
platform_display = config.get("platform", platform_name)
|
||||
base_dest = config.get("base_destination", "")
|
||||
|
||||
zip_name = f"{platform_display.replace(' ', '_')}_BIOS_Pack.zip"
|
||||
zip_path = os.path.join(output_dir, zip_name)
|
||||
os.makedirs(output_dir, exist_ok=True)
|
||||
|
||||
total_files = 0
|
||||
missing_files = []
|
||||
untested_files = []
|
||||
user_provided = []
|
||||
seen_destinations = set()
|
||||
|
||||
with zipfile.ZipFile(zip_path, "w", zipfile.ZIP_DEFLATED) as zf:
|
||||
for sys_id, system in sorted(config.get("systems", {}).items()):
|
||||
for file_entry in system.get("files", []):
|
||||
dest = file_entry.get("destination", file_entry["name"])
|
||||
if base_dest:
|
||||
full_dest = f"{base_dest}/{dest}"
|
||||
else:
|
||||
full_dest = dest
|
||||
|
||||
dedup_key = full_dest
|
||||
if dedup_key in seen_destinations:
|
||||
continue
|
||||
seen_destinations.add(dedup_key)
|
||||
|
||||
storage = file_entry.get("storage", "embedded")
|
||||
|
||||
if storage == "user_provided":
|
||||
instructions = file_entry.get("instructions", "Please provide this file manually.")
|
||||
instr_name = f"INSTRUCTIONS_{file_entry['name']}.txt"
|
||||
instr_path = f"{base_dest}/{instr_name}" if base_dest else instr_name
|
||||
zf.writestr(instr_path, f"File needed: {file_entry['name']}\n\n{instructions}\n")
|
||||
user_provided.append(file_entry["name"])
|
||||
total_files += 1
|
||||
continue
|
||||
|
||||
local_path, status = resolve_file(file_entry, db, bios_dir, zip_contents)
|
||||
|
||||
if status == "external":
|
||||
suffix = os.path.splitext(file_entry["name"])[1] or ""
|
||||
with tempfile.NamedTemporaryFile(delete=False, suffix=suffix) as tmp:
|
||||
tmp_path = tmp.name
|
||||
|
||||
try:
|
||||
if download_external(file_entry, tmp_path):
|
||||
extract = file_entry.get("extract", False)
|
||||
if extract and tmp_path.endswith(".zip"):
|
||||
_extract_zip_to_archive(tmp_path, full_dest, zf)
|
||||
else:
|
||||
zf.write(tmp_path, full_dest)
|
||||
total_files += 1
|
||||
else:
|
||||
missing_files.append(file_entry["name"])
|
||||
finally:
|
||||
if os.path.exists(tmp_path):
|
||||
os.unlink(tmp_path)
|
||||
continue
|
||||
|
||||
if status == "not_found":
|
||||
missing_files.append(file_entry["name"])
|
||||
continue
|
||||
|
||||
if status == "hash_mismatch":
|
||||
if verification_mode != "existence":
|
||||
untested_files.append(file_entry["name"])
|
||||
|
||||
extract = file_entry.get("extract", False)
|
||||
if extract and local_path.endswith(".zip"):
|
||||
_extract_zip_to_archive(local_path, full_dest, zf)
|
||||
else:
|
||||
zf.write(local_path, full_dest)
|
||||
total_files += 1
|
||||
|
||||
if missing_files:
|
||||
print(f" Missing ({len(missing_files)}): {', '.join(missing_files[:10])}")
|
||||
if len(missing_files) > 10:
|
||||
print(f" ... and {len(missing_files) - 10} more")
|
||||
|
||||
if untested_files:
|
||||
print(f" Untested ({len(untested_files)}): {', '.join(untested_files[:10])}")
|
||||
if len(untested_files) > 10:
|
||||
print(f" ... and {len(untested_files) - 10} more")
|
||||
|
||||
if user_provided:
|
||||
print(f" User-provided ({len(user_provided)}): {', '.join(user_provided)}")
|
||||
|
||||
if verification_mode == "existence":
|
||||
# RetroArch-family: only existence matters
|
||||
print(f" Generated {zip_path}: {total_files} files ({total_files} present, {len(missing_files)} missing) [verification: existence]")
|
||||
else:
|
||||
# Batocera-family: hash verification matters
|
||||
verified = total_files - len(untested_files)
|
||||
print(f" Generated {zip_path}: {total_files} files ({verified} verified, {len(untested_files)} untested, {len(missing_files)} missing) [verification: {verification_mode}]")
|
||||
return zip_path
|
||||
|
||||
|
||||
def _extract_zip_to_archive(source_zip: str, dest_prefix: str, target_zf: zipfile.ZipFile):
|
||||
"""Extract contents of a source ZIP into target ZIP under dest_prefix."""
|
||||
with zipfile.ZipFile(source_zip, "r") as src:
|
||||
for info in src.infolist():
|
||||
if info.is_dir():
|
||||
continue
|
||||
data = src.read(info.filename)
|
||||
target_path = f"{dest_prefix}/{info.filename}" if dest_prefix else info.filename
|
||||
target_zf.writestr(target_path, data)
|
||||
|
||||
|
||||
def list_platforms(platforms_dir: str) -> list[str]:
|
||||
"""List available platform names from YAML files."""
|
||||
platforms = []
|
||||
for f in sorted(Path(platforms_dir).glob("*.yml")):
|
||||
if f.name.startswith("_"):
|
||||
continue
|
||||
platforms.append(f.stem)
|
||||
return platforms
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description="Generate platform BIOS ZIP packs")
|
||||
parser.add_argument("--platform", "-p", help="Platform name (e.g., retroarch)")
|
||||
parser.add_argument("--all", action="store_true", help="Generate packs for all active platforms")
|
||||
parser.add_argument("--include-archived", action="store_true", help="Include archived platforms")
|
||||
parser.add_argument("--platforms-dir", default=DEFAULT_PLATFORMS_DIR)
|
||||
parser.add_argument("--db", default=DEFAULT_DB_FILE, help="Path to database.json")
|
||||
parser.add_argument("--bios-dir", default=DEFAULT_BIOS_DIR)
|
||||
parser.add_argument("--output-dir", "-o", default=DEFAULT_OUTPUT_DIR)
|
||||
parser.add_argument("--list", action="store_true", help="List available platforms")
|
||||
args = parser.parse_args()
|
||||
|
||||
if args.list:
|
||||
platforms = list_platforms(args.platforms_dir)
|
||||
for p in platforms:
|
||||
print(p)
|
||||
return
|
||||
|
||||
if args.all:
|
||||
sys.path.insert(0, os.path.dirname(__file__))
|
||||
from list_platforms import list_platforms as _list_active
|
||||
platforms = _list_active(include_archived=args.include_archived)
|
||||
elif args.platform:
|
||||
platforms = [args.platform]
|
||||
else:
|
||||
parser.error("Specify --platform or --all")
|
||||
return
|
||||
|
||||
groups = _group_identical_platforms(platforms, args.platforms_dir)
|
||||
|
||||
for group_platforms, representative in groups:
|
||||
if len(group_platforms) > 1:
|
||||
names = [load_platform_config(p, args.platforms_dir).get("platform", p) for p in group_platforms]
|
||||
combined_name = " + ".join(names)
|
||||
print(f"\nGenerating shared pack for {combined_name}...")
|
||||
else:
|
||||
print(f"\nGenerating pack for {representative}...")
|
||||
|
||||
try:
|
||||
zip_path = generate_pack(representative, args.platforms_dir, args.db, args.bios_dir, args.output_dir)
|
||||
if zip_path and len(group_platforms) > 1:
|
||||
# Rename ZIP to include all platform names
|
||||
names = [load_platform_config(p, args.platforms_dir).get("platform", p) for p in group_platforms]
|
||||
combined_filename = "_".join(n.replace(" ", "") for n in names) + "_BIOS_Pack.zip"
|
||||
new_path = os.path.join(os.path.dirname(zip_path), combined_filename)
|
||||
if new_path != zip_path:
|
||||
os.rename(zip_path, new_path)
|
||||
print(f" Renamed -> {os.path.basename(new_path)}")
|
||||
except FileNotFoundError as e:
|
||||
print(f" ERROR: {e}")
|
||||
except Exception as e:
|
||||
print(f" ERROR: {e}")
|
||||
|
||||
|
||||
def _group_identical_platforms(platforms: list[str], platforms_dir: str) -> list[tuple[list[str], str]]:
|
||||
"""Group platforms that would produce identical ZIP packs.
|
||||
|
||||
Returns [(group_of_platform_names, representative_platform), ...].
|
||||
Platforms with the same resolved systems+files+base_destination are grouped.
|
||||
"""
|
||||
import hashlib as _hl
|
||||
|
||||
fingerprints = {}
|
||||
representatives = {}
|
||||
|
||||
for platform in platforms:
|
||||
try:
|
||||
config = load_platform_config(platform, platforms_dir)
|
||||
except FileNotFoundError:
|
||||
fingerprints.setdefault(platform, []).append(platform)
|
||||
representatives.setdefault(platform, platform)
|
||||
continue
|
||||
|
||||
base_dest = config.get("base_destination", "")
|
||||
entries = []
|
||||
for sys_id, system in sorted(config.get("systems", {}).items()):
|
||||
for fe in system.get("files", []):
|
||||
dest = fe.get("destination", fe.get("name", ""))
|
||||
full_dest = f"{base_dest}/{dest}" if base_dest else dest
|
||||
entries.append(full_dest)
|
||||
|
||||
fingerprint = _hl.sha1("|".join(sorted(entries)).encode()).hexdigest()
|
||||
fingerprints.setdefault(fingerprint, []).append(platform)
|
||||
representatives.setdefault(fingerprint, platform)
|
||||
|
||||
return [(group, representatives[fp]) for fp, group in fingerprints.items()]
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
446
scripts/generate_readme.py
Normal file
446
scripts/generate_readme.py
Normal file
|
|
@ -0,0 +1,446 @@
|
|||
#!/usr/bin/env python3
|
||||
"""Generate README.md and CONTRIBUTING.md from database.json and platform configs.
|
||||
|
||||
Usage:
|
||||
python scripts/generate_readme.py [--db database.json] [--platforms-dir platforms/]
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import os
|
||||
import sys
|
||||
from datetime import datetime, timezone
|
||||
from pathlib import Path
|
||||
|
||||
sys.path.insert(0, os.path.dirname(__file__))
|
||||
from common import load_platform_config
|
||||
|
||||
try:
|
||||
import yaml
|
||||
except ImportError:
|
||||
print("Error: PyYAML required (pip install pyyaml)", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
def load_database(db_path: str) -> dict:
|
||||
with open(db_path) as f:
|
||||
return json.load(f)
|
||||
|
||||
|
||||
def load_platform_configs(platforms_dir: str) -> dict:
|
||||
"""Load all platform configs with inheritance resolved."""
|
||||
configs = {}
|
||||
for f in sorted(Path(platforms_dir).glob("*.yml")):
|
||||
if f.name.startswith("_"):
|
||||
continue
|
||||
try:
|
||||
config = load_platform_config(f.stem, platforms_dir)
|
||||
if config:
|
||||
configs[f.stem] = config
|
||||
except Exception as e:
|
||||
print(f"Warning: {f.name}: {e}", file=sys.stderr)
|
||||
return configs
|
||||
|
||||
|
||||
def compute_coverage(config: dict, db: dict, **kwargs) -> dict:
|
||||
"""Compute BIOS coverage by delegating to verify.py's platform-aware logic."""
|
||||
sys.path.insert(0, os.path.dirname(__file__))
|
||||
from verify import verify_platform
|
||||
|
||||
result = verify_platform(config, db)
|
||||
|
||||
present = result["ok"] + result["untested"]
|
||||
pct = (present / result["total"] * 100) if result["total"] > 0 else 0
|
||||
|
||||
return {
|
||||
"total": result["total"],
|
||||
"verified": result["ok"],
|
||||
"untested": result["untested"],
|
||||
"present": present,
|
||||
"missing": [d["name"] for d in result["details"] if d["status"] == "missing"],
|
||||
"percentage": pct,
|
||||
"verification_mode": result["verification_mode"],
|
||||
}
|
||||
|
||||
|
||||
def status_badge(pct: float, platform: str = "") -> str:
|
||||
"""Generate a shields.io badge URL for platform coverage."""
|
||||
if pct >= 90:
|
||||
color = "brightgreen"
|
||||
elif pct >= 70:
|
||||
color = "yellow"
|
||||
else:
|
||||
color = "red"
|
||||
label = platform.replace(" ", "%20") if platform else "coverage"
|
||||
return f""
|
||||
|
||||
|
||||
def status_emoji(pct: float) -> str:
|
||||
if pct >= 90:
|
||||
return "🟢"
|
||||
elif pct >= 70:
|
||||
return "🟡"
|
||||
else:
|
||||
return "🔴"
|
||||
|
||||
|
||||
def _rel_link(path: str) -> str:
|
||||
"""Build a relative link to a file in the repo."""
|
||||
encoded = path.replace(" ", "%20").replace("(", "%28").replace(")", "%29")
|
||||
return encoded
|
||||
|
||||
|
||||
def generate_readme(db: dict, configs: dict) -> str:
|
||||
"""Generate README.md content."""
|
||||
generated_at = db.get("generated_at", "unknown")
|
||||
total_files = db.get("total_files", 0)
|
||||
total_size_mb = db.get("total_size", 0) / (1024 * 1024)
|
||||
|
||||
systems = {}
|
||||
for sha1, entry in db.get("files", {}).items():
|
||||
path = entry.get("path", "")
|
||||
parts = path.split("/")
|
||||
if len(parts) >= 3:
|
||||
system = f"{parts[1]}/{parts[2]}"
|
||||
elif len(parts) >= 2:
|
||||
system = parts[1]
|
||||
else:
|
||||
system = "Other"
|
||||
systems.setdefault(system, []).append(entry)
|
||||
|
||||
lines = []
|
||||
lines.append("# Retrogaming BIOS & Firmware Collection")
|
||||
lines.append("")
|
||||
lines.append("Complete, verified collection of BIOS, firmware, and system files "
|
||||
"for retrogaming emulators - RetroArch, Batocera, Recalbox, Lakka, "
|
||||
"RetroPie, and more. Every file checked against official checksums "
|
||||
"from [libretro System.dat](https://github.com/libretro/libretro-database), "
|
||||
"[batocera-systems](https://github.com/batocera-linux/batocera.linux), "
|
||||
"and [Recalbox es_bios.xml](https://gitlab.com/recalbox/recalbox).")
|
||||
lines.append("")
|
||||
lines.append(f"> **{total_files}** files | **{total_size_mb:.1f} MB** | "
|
||||
f"Last updated: {generated_at}")
|
||||
lines.append(">")
|
||||
lines.append("> PlayStation, PS2, Nintendo DS, Game Boy, GBA, Dreamcast, Saturn, "
|
||||
"Neo Geo, Mega CD, PC Engine, MSX, Amiga, Atari ST, ZX Spectrum, "
|
||||
"Arcade (MAME/FBNeo), and 50+ systems.")
|
||||
lines.append("")
|
||||
lines.append("## Quick Start")
|
||||
lines.append("")
|
||||
lines.append("### Download a complete pack")
|
||||
lines.append("")
|
||||
lines.append("Go to [Releases](../../releases) and download the ZIP for your platform.")
|
||||
lines.append("")
|
||||
lines.append("### Using the download tool")
|
||||
lines.append("")
|
||||
lines.append("```bash")
|
||||
lines.append("# List available platforms")
|
||||
lines.append("python scripts/download.py --list")
|
||||
lines.append("")
|
||||
lines.append("# Download BIOS pack for RetroArch")
|
||||
lines.append("python scripts/download.py retroarch ~/RetroArch/system/")
|
||||
lines.append("")
|
||||
lines.append("# Verify existing BIOS files")
|
||||
lines.append("python scripts/download.py --verify retroarch ~/RetroArch/system/")
|
||||
lines.append("```")
|
||||
lines.append("")
|
||||
lines.append("### Generate a pack locally (any platform)")
|
||||
lines.append("")
|
||||
lines.append("Some platforms are archived and not included in automated releases. "
|
||||
"You can generate any pack locally - including archived ones:")
|
||||
lines.append("")
|
||||
lines.append("```bash")
|
||||
lines.append("git clone https://github.com/Abdess/retroarch_system.git")
|
||||
lines.append("cd retroarch_system")
|
||||
lines.append("pip install pyyaml")
|
||||
lines.append("")
|
||||
lines.append("# Generate for a specific platform")
|
||||
lines.append("python scripts/generate_pack.py --platform retropie --output-dir ~/Downloads/")
|
||||
lines.append("")
|
||||
lines.append("# Generate for ALL platforms (including archived)")
|
||||
lines.append("python scripts/generate_pack.py --all --include-archived --output-dir ~/Downloads/")
|
||||
lines.append("```")
|
||||
lines.append("")
|
||||
|
||||
registry = {}
|
||||
registry_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "..", "platforms", "_registry.yml")
|
||||
if os.path.exists(registry_path):
|
||||
with open(registry_path) as f:
|
||||
registry = (yaml.safe_load(f) or {}).get("platforms", {})
|
||||
|
||||
if configs:
|
||||
lines.append("## Platform Coverage")
|
||||
lines.append("")
|
||||
lines.append("| Platform | Coverage | Status | Verification | Details |")
|
||||
lines.append("|----------|----------|--------|--------------|---------|")
|
||||
|
||||
for name, config in sorted(configs.items()):
|
||||
platform_display = config.get("platform", name)
|
||||
platform_status = registry.get(name, {}).get("status", "active")
|
||||
coverage = compute_coverage(config, db)
|
||||
badge = status_badge(coverage["percentage"], platform_display)
|
||||
emoji = status_emoji(coverage["percentage"])
|
||||
mode = coverage["verification_mode"]
|
||||
|
||||
if platform_status == "archived":
|
||||
badge = f"}-archived-lightgrey)"
|
||||
emoji = "📦"
|
||||
|
||||
if mode == "existence":
|
||||
detail = f"{coverage['verified']} present"
|
||||
if coverage['missing']:
|
||||
detail += f", {len(coverage['missing'])} missing"
|
||||
else:
|
||||
parts = []
|
||||
if coverage['verified']:
|
||||
parts.append(f"{coverage['verified']} verified")
|
||||
if coverage['untested']:
|
||||
parts.append(f"{coverage['untested']} untested")
|
||||
if coverage['missing']:
|
||||
parts.append(f"{len(coverage['missing'])} missing")
|
||||
detail = ", ".join(parts) if parts else "0 files"
|
||||
|
||||
if platform_status == "archived":
|
||||
detail += " *(archived - generate manually)*"
|
||||
|
||||
lines.append(
|
||||
f"| {platform_display} | "
|
||||
f"{coverage['present']}/{coverage['total']} ({coverage['percentage']:.1f}%) | "
|
||||
f"{badge} {emoji} | "
|
||||
f"{mode} | "
|
||||
f"{detail} |"
|
||||
)
|
||||
|
||||
lines.append("")
|
||||
|
||||
DATA_PACK_MARKERS = {"RPG Maker", "ScummVM"}
|
||||
|
||||
bios_systems = {}
|
||||
data_packs = {}
|
||||
for system_name, files in systems.items():
|
||||
if any(marker in system_name for marker in DATA_PACK_MARKERS):
|
||||
data_packs[system_name] = files
|
||||
else:
|
||||
bios_systems[system_name] = files
|
||||
|
||||
lines.append("## Systems")
|
||||
lines.append("")
|
||||
lines.append("| System | Files | Size |")
|
||||
lines.append("|--------|-------|------|")
|
||||
|
||||
for system_name, files in sorted(bios_systems.items()):
|
||||
total_size = sum(f.get("size", 0) for f in files)
|
||||
if total_size > 1024 * 1024:
|
||||
size_str = f"{total_size / (1024*1024):.1f} MB"
|
||||
elif total_size > 1024:
|
||||
size_str = f"{total_size / 1024:.1f} KB"
|
||||
else:
|
||||
size_str = f"{total_size} B"
|
||||
lines.append(f"| {system_name} | {len(files)} | {size_str} |")
|
||||
|
||||
lines.append("")
|
||||
|
||||
if data_packs:
|
||||
lines.append("## Data Packs")
|
||||
lines.append("")
|
||||
lines.append("These are large asset packs required by specific cores. "
|
||||
"They are included in the repository but not listed individually.")
|
||||
lines.append("")
|
||||
lines.append("| Pack | Files | Size |")
|
||||
lines.append("|------|-------|------|")
|
||||
for pack_name, files in sorted(data_packs.items()):
|
||||
total_size = sum(f.get("size", 0) for f in files)
|
||||
size_str = f"{total_size / (1024*1024):.1f} MB" if total_size > 1024*1024 else f"{total_size / 1024:.1f} KB"
|
||||
# Link to the manufacturer/system directory
|
||||
first_path = files[0].get("path", "") if files else ""
|
||||
parts = first_path.split("/")
|
||||
pack_path = "/".join(parts[:3]) if len(parts) >= 3 else first_path
|
||||
lines.append(f"| [{pack_name}]({_rel_link(pack_path)}) | {len(files)} | {size_str} |")
|
||||
lines.append("")
|
||||
|
||||
platform_names = {}
|
||||
by_name_idx = db.get("indexes", {}).get("by_name", {})
|
||||
files_db = db.get("files", {})
|
||||
for cfg_name, cfg in configs.items():
|
||||
plat_display = cfg.get("platform", cfg_name)
|
||||
for sys_id, system in cfg.get("systems", {}).items():
|
||||
for fe in system.get("files", []):
|
||||
fe_name = fe.get("name", "")
|
||||
fe_dest = fe.get("destination", fe_name)
|
||||
fe_sha1 = fe.get("sha1")
|
||||
fe_md5 = fe.get("md5", "").split(",")[0].strip() if fe.get("md5") else ""
|
||||
# Find matching SHA1
|
||||
matched_sha1 = None
|
||||
if fe_sha1 and fe_sha1 in files_db:
|
||||
matched_sha1 = fe_sha1
|
||||
elif fe_md5:
|
||||
matched_sha1 = db.get("indexes", {}).get("by_md5", {}).get(fe_md5.lower())
|
||||
if not matched_sha1:
|
||||
matched_sha1 = db.get("indexes", {}).get("by_md5", {}).get(fe_md5)
|
||||
if not matched_sha1 and fe_name in by_name_idx:
|
||||
matched_sha1 = by_name_idx[fe_name][0]
|
||||
if matched_sha1:
|
||||
if matched_sha1 not in platform_names:
|
||||
platform_names[matched_sha1] = []
|
||||
dest_name = fe_dest.split("/")[-1] if "/" in fe_dest else fe_dest
|
||||
if dest_name != files_db.get(matched_sha1, {}).get("name", ""):
|
||||
entry = (plat_display, dest_name)
|
||||
if entry not in platform_names[matched_sha1]:
|
||||
platform_names[matched_sha1].append(entry)
|
||||
|
||||
variants_map = {}
|
||||
for sha1, entry in files_db.items():
|
||||
if ".variants/" not in entry.get("path", ""):
|
||||
continue
|
||||
vname = entry["name"]
|
||||
# Strip the .sha1short suffix to get the original filename
|
||||
parts = vname.rsplit(".", 1)
|
||||
if len(parts) == 2 and len(parts[1]) == 8 and all(c in "0123456789abcdef" for c in parts[1]):
|
||||
base_name = parts[0]
|
||||
else:
|
||||
base_name = vname
|
||||
variants_map.setdefault(base_name, []).append(entry)
|
||||
|
||||
lines.append("## BIOS File Listing")
|
||||
lines.append("")
|
||||
|
||||
for system_name, files in sorted(bios_systems.items()):
|
||||
lines.append(f"### {system_name}")
|
||||
lines.append("")
|
||||
|
||||
for entry in sorted(files, key=lambda x: x["name"]):
|
||||
name = entry["name"]
|
||||
path = entry.get("path", "")
|
||||
size = entry.get("size", 0)
|
||||
sha1 = entry.get("sha1", "")
|
||||
dl_link = _rel_link(path)
|
||||
lines.append(f"- **[{name}]({dl_link})** ({size:,} bytes)")
|
||||
lines.append(f" - SHA1: `{sha1 or 'N/A'}`")
|
||||
lines.append(f" - MD5: `{entry.get('md5', 'N/A')}`")
|
||||
lines.append(f" - CRC32: `{entry.get('crc32', 'N/A')}`")
|
||||
|
||||
if sha1:
|
||||
alt_names = []
|
||||
for alias_name, alias_sha1s in by_name_idx.items():
|
||||
if sha1 in alias_sha1s and alias_name != name:
|
||||
alt_names.append(alias_name)
|
||||
if alt_names:
|
||||
lines.append(f" - Also known as: {', '.join(f'`{a}`' for a in sorted(alt_names))}")
|
||||
|
||||
if sha1 and sha1 in platform_names and platform_names[sha1]:
|
||||
plat_refs = [f"{plat}: `{dest}`" for plat, dest in platform_names[sha1]]
|
||||
lines.append(f" - Platform names: {', '.join(plat_refs)}")
|
||||
|
||||
if name in variants_map:
|
||||
vlist = variants_map[name]
|
||||
lines.append(f" - **Variants** ({len(vlist)} alternate versions):")
|
||||
for v in sorted(vlist, key=lambda x: x["name"]):
|
||||
vlink = _rel_link(v["path"])
|
||||
lines.append(f" - [{v['name']}]({vlink}) ({v['size']:,} bytes) "
|
||||
f"- SHA1: `{v['sha1']}`, MD5: `{v['md5']}`")
|
||||
|
||||
lines.append("")
|
||||
|
||||
lines.append("---")
|
||||
lines.append("")
|
||||
lines.append("## Contributing")
|
||||
lines.append("")
|
||||
lines.append("See [CONTRIBUTING.md](CONTRIBUTING.md) for guidelines on submitting BIOS files.")
|
||||
lines.append("")
|
||||
lines.append("## License")
|
||||
lines.append("")
|
||||
lines.append("This repository provides BIOS files for personal backup and archival purposes.")
|
||||
lines.append("")
|
||||
lines.append(f"*Auto-generated on {generated_at}*")
|
||||
lines.append("")
|
||||
|
||||
return "\n".join(lines)
|
||||
|
||||
|
||||
def generate_contributing() -> str:
|
||||
"""Generate CONTRIBUTING.md content."""
|
||||
return """# Contributing BIOS Files
|
||||
|
||||
Thank you for helping expand the BIOS collection!
|
||||
|
||||
## How to Contribute
|
||||
|
||||
1. **Fork** this repository
|
||||
2. **Add** your BIOS file to the correct directory under `bios/Manufacturer/Console/`
|
||||
3. **Create a Pull Request**
|
||||
|
||||
## File Placement
|
||||
|
||||
Place files in the correct manufacturer/console directory:
|
||||
```
|
||||
bios/
|
||||
├── Sony/
|
||||
│ └── PlayStation/
|
||||
│ └── scph5501.bin
|
||||
├── Nintendo/
|
||||
│ └── Game Boy Advance/
|
||||
│ └── gba_bios.bin
|
||||
└── Sega/
|
||||
└── Dreamcast/
|
||||
└── dc_boot.bin
|
||||
```
|
||||
|
||||
## Verification
|
||||
|
||||
All submitted BIOS files are automatically verified against known checksums:
|
||||
|
||||
1. **Hash verification** - SHA1/MD5 checked against known databases
|
||||
2. **Size verification** - File size matches expected value
|
||||
3. **Platform reference** - File must be referenced in at least one platform config
|
||||
4. **Duplicate detection** - Existing files are flagged to avoid duplication
|
||||
|
||||
## What We Accept
|
||||
|
||||
- **Verified BIOS dumps** with matching checksums from known databases
|
||||
- **System firmware** required by emulators
|
||||
- **New variants** of existing BIOS files (different regions, versions)
|
||||
|
||||
## What We Don't Accept
|
||||
|
||||
- Game ROMs or ISOs
|
||||
- Modified/patched BIOS files
|
||||
- Files without verifiable checksums
|
||||
- Executable files (.exe, .bat, .sh)
|
||||
|
||||
## Questions?
|
||||
|
||||
Open an [Issue](../../issues) if you're unsure about a file.
|
||||
"""
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description="Generate README.md and CONTRIBUTING.md")
|
||||
parser.add_argument("--db", default="database.json", help="Path to database.json")
|
||||
parser.add_argument("--platforms-dir", default="platforms", help="Platforms config directory")
|
||||
parser.add_argument("--output-dir", default=".", help="Output directory for README/CONTRIBUTING")
|
||||
args = parser.parse_args()
|
||||
|
||||
if not os.path.exists(args.db):
|
||||
print(f"Error: {args.db} not found. Run generate_db.py first.", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
db = load_database(args.db)
|
||||
configs = load_platform_configs(args.platforms_dir) if os.path.isdir(args.platforms_dir) else {}
|
||||
|
||||
readme = generate_readme(db, configs)
|
||||
readme_path = os.path.join(args.output_dir, "README.md")
|
||||
with open(readme_path, "w") as f:
|
||||
f.write(readme)
|
||||
print(f"Generated {readme_path}")
|
||||
|
||||
contributing = generate_contributing()
|
||||
contributing_path = os.path.join(args.output_dir, "CONTRIBUTING.md")
|
||||
with open(contributing_path, "w") as f:
|
||||
f.write(contributing)
|
||||
print(f"Generated {contributing_path}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
78
scripts/list_platforms.py
Normal file
78
scripts/list_platforms.py
Normal file
|
|
@ -0,0 +1,78 @@
|
|||
#!/usr/bin/env python3
|
||||
"""List available platforms for CI matrix strategy.
|
||||
|
||||
Respects the `status` field in _registry.yml:
|
||||
- active: included in CI releases and automated scraping
|
||||
- archived: excluded from CI, user can generate manually
|
||||
|
||||
Usage:
|
||||
python scripts/list_platforms.py # Active platforms (for CI)
|
||||
python scripts/list_platforms.py --all # All platforms including archived
|
||||
python scripts/list_platforms.py >> "$GITHUB_OUTPUT"
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import os
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
try:
|
||||
import yaml
|
||||
except ImportError:
|
||||
yaml = None
|
||||
|
||||
PLATFORMS_DIR = "platforms"
|
||||
|
||||
|
||||
def _load_registry(platforms_dir: str = PLATFORMS_DIR) -> dict:
|
||||
"""Load _registry.yml if available."""
|
||||
registry_path = Path(platforms_dir) / "_registry.yml"
|
||||
if yaml and registry_path.exists():
|
||||
with open(registry_path) as f:
|
||||
return yaml.safe_load(f) or {}
|
||||
return {}
|
||||
|
||||
|
||||
def list_platforms(include_archived: bool = False) -> list[str]:
|
||||
"""List platform config files, filtering by status from _registry.yml."""
|
||||
platforms_dir = Path(PLATFORMS_DIR)
|
||||
if not platforms_dir.is_dir():
|
||||
return []
|
||||
|
||||
registry = _load_registry(str(platforms_dir))
|
||||
registry_platforms = registry.get("platforms", {})
|
||||
|
||||
platforms = []
|
||||
for f in sorted(platforms_dir.glob("*.yml")):
|
||||
if f.name.startswith("_"):
|
||||
continue
|
||||
name = f.stem
|
||||
status = registry_platforms.get(name, {}).get("status", "active")
|
||||
if status == "archived" and not include_archived:
|
||||
continue
|
||||
platforms.append(name)
|
||||
|
||||
return platforms
|
||||
|
||||
|
||||
def main():
|
||||
include_all = "--all" in sys.argv
|
||||
|
||||
platforms = list_platforms(include_archived=include_all)
|
||||
|
||||
if not platforms:
|
||||
print("No platform configs found", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
github_output = os.environ.get("GITHUB_OUTPUT")
|
||||
if github_output:
|
||||
with open(github_output, "a") as f:
|
||||
f.write(f"platforms={json.dumps(platforms)}\n")
|
||||
else:
|
||||
print(json.dumps(platforms))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
550
scripts/migrate.py
Normal file
550
scripts/migrate.py
Normal file
|
|
@ -0,0 +1,550 @@
|
|||
#!/usr/bin/env python3
|
||||
"""Migrate current flat structure AND other branches to bios/Manufacturer/Console/ hierarchy.
|
||||
|
||||
Usage:
|
||||
python scripts/migrate.py [--dry-run] [--source DIR] [--target DIR] [--include-branches]
|
||||
|
||||
Reads existing directories like "Sony - PlayStation" and moves files to
|
||||
"bios/Sony/PlayStation/". With --include-branches, also extracts unique BIOS files
|
||||
from all remote branches (RetroArch, RetroPie, Recalbox, batocera, Other).
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import hashlib
|
||||
import os
|
||||
import shutil
|
||||
import subprocess
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
sys.path.insert(0, os.path.dirname(__file__))
|
||||
from common import compute_hashes
|
||||
|
||||
SYSTEM_MAP = {
|
||||
"3DO Company, The - 3DO": ("3DO Company", "3DO"),
|
||||
"Arcade": ("Arcade", "Arcade"),
|
||||
"Atari - 400-800": ("Atari", "400-800"),
|
||||
"Atari - 5200": ("Atari", "5200"),
|
||||
"Atari - 7800": ("Atari", "7800"),
|
||||
"Atari - Lynx": ("Atari", "Lynx"),
|
||||
"Atari - ST": ("Atari", "ST"),
|
||||
"Coleco - ColecoVision": ("Coleco", "ColecoVision"),
|
||||
"Commodore - Amiga": ("Commodore", "Amiga"),
|
||||
"Fairchild Channel F": ("Fairchild", "Channel F"),
|
||||
"Id Software - Doom": ("Id Software", "Doom"),
|
||||
"J2ME": ("Java", "J2ME"),
|
||||
"MacII": ("Apple", "Macintosh II"),
|
||||
"Magnavox - Odyssey2": ("Magnavox", "Odyssey2"),
|
||||
"Mattel - Intellivision": ("Mattel", "Intellivision"),
|
||||
"Microsoft - MSX": ("Microsoft", "MSX"),
|
||||
"NEC - PC Engine - TurboGrafx 16 - SuperGrafx": ("NEC", "PC Engine"),
|
||||
"NEC - PC-98": ("NEC", "PC-98"),
|
||||
"NEC - PC-FX": ("NEC", "PC-FX"),
|
||||
"Nintendo - Famicom Disk System": ("Nintendo", "Famicom Disk System"),
|
||||
"Nintendo - Game Boy Advance": ("Nintendo", "Game Boy Advance"),
|
||||
"Nintendo - GameCube": ("Nintendo", "GameCube"),
|
||||
"Nintendo - Gameboy": ("Nintendo", "Game Boy"),
|
||||
"Nintendo - Gameboy Color": ("Nintendo", "Game Boy Color"),
|
||||
"Nintendo - Nintendo 64DD": ("Nintendo", "Nintendo 64DD"),
|
||||
"Nintendo - Nintendo DS": ("Nintendo", "Nintendo DS"),
|
||||
"Nintendo - Nintendo Entertainment System": ("Nintendo", "NES"),
|
||||
"Nintendo - Pokemon Mini": ("Nintendo", "Pokemon Mini"),
|
||||
"Nintendo - Satellaview": ("Nintendo", "Satellaview"),
|
||||
"Nintendo - SuFami Turbo": ("Nintendo", "SuFami Turbo"),
|
||||
"Nintendo - Super Game Boy": ("Nintendo", "Super Game Boy"),
|
||||
"Nintendo - Super Nintendo Entertainment System": ("Nintendo", "SNES"),
|
||||
"Phillips - Videopac+": ("Philips", "Videopac+"),
|
||||
"SNK - NeoGeo CD": ("SNK", "Neo Geo CD"),
|
||||
"ScummVM": ("ScummVM", "ScummVM"),
|
||||
"Sega - Dreamcast": ("Sega", "Dreamcast"),
|
||||
"Sega - Game Gear": ("Sega", "Game Gear"),
|
||||
"Sega - Master System - Mark III": ("Sega", "Master System"),
|
||||
"Sega - Mega CD - Sega CD": ("Sega", "Mega CD"),
|
||||
"Sega - Mega Drive - Genesis": ("Sega", "Mega Drive"),
|
||||
"Sega - Saturn": ("Sega", "Saturn"),
|
||||
"Sharp - X1": ("Sharp", "X1"),
|
||||
"Sharp - X68000": ("Sharp", "X68000"),
|
||||
"Sinclair - ZX Spectrum": ("Sinclair", "ZX Spectrum"),
|
||||
"Sony - PlayStation": ("Sony", "PlayStation"),
|
||||
"Sony - PlayStation Portable": ("Sony", "PlayStation Portable"),
|
||||
"Wolfenstein 3D": ("Id Software", "Wolfenstein 3D"),
|
||||
}
|
||||
|
||||
BIOS_FILE_MAP = {
|
||||
"panafz": ("3DO Company", "3DO"),
|
||||
"goldstar.bin": ("3DO Company", "3DO"),
|
||||
"sanyotry.bin": ("3DO Company", "3DO"),
|
||||
"3do_arcade_saot.bin": ("3DO Company", "3DO"),
|
||||
"3dobios.zip": ("3DO Company", "3DO"),
|
||||
|
||||
"cpc464.rom": ("Amstrad", "CPC"),
|
||||
"cpc664.rom": ("Amstrad", "CPC"),
|
||||
"cpc6128.rom": ("Amstrad", "CPC"),
|
||||
|
||||
"neogeo.zip": ("SNK", "Neo Geo"),
|
||||
"pgm.zip": ("Arcade", "Arcade"),
|
||||
"skns.zip": ("Arcade", "Arcade"),
|
||||
"bubsys.zip": ("Arcade", "Arcade"),
|
||||
"cchip.zip": ("Arcade", "Arcade"),
|
||||
"decocass.zip": ("Arcade", "Arcade"),
|
||||
"isgsm.zip": ("Arcade", "Arcade"),
|
||||
"midssio.zip": ("Arcade", "Arcade"),
|
||||
"nmk004.zip": ("Arcade", "Arcade"),
|
||||
"ym2608.zip": ("Arcade", "Arcade"),
|
||||
"qsound.zip": ("Arcade", "Arcade"),
|
||||
|
||||
"ATARIBAS.ROM": ("Atari", "400-800"),
|
||||
"ATARIOSA.ROM": ("Atari", "400-800"),
|
||||
"ATARIOSB.ROM": ("Atari", "400-800"),
|
||||
"ATARIXL.ROM": ("Atari", "400-800"),
|
||||
"BB01R4_OS.ROM": ("Atari", "400-800"),
|
||||
"XEGAME.ROM": ("Atari", "400-800"),
|
||||
"5200.rom": ("Atari", "5200"),
|
||||
"7800 BIOS (U).rom": ("Atari", "7800"),
|
||||
"7800 BIOS (E).rom": ("Atari", "7800"),
|
||||
"lynxboot.img": ("Atari", "Lynx"),
|
||||
"tos.img": ("Atari", "ST"),
|
||||
|
||||
"colecovision.rom": ("Coleco", "ColecoVision"),
|
||||
"coleco.rom": ("Coleco", "ColecoVision"),
|
||||
|
||||
"kick33180.A500": ("Commodore", "Amiga"),
|
||||
"kick34005.A500": ("Commodore", "Amiga"),
|
||||
"kick34005.CDTV": ("Commodore", "Amiga"),
|
||||
"kick37175.A500": ("Commodore", "Amiga"),
|
||||
"kick37350.A600": ("Commodore", "Amiga"),
|
||||
"kick39106.A1200": ("Commodore", "Amiga"),
|
||||
"kick39106.A4000": ("Commodore", "Amiga"),
|
||||
"kick40060.CD32": ("Commodore", "Amiga"),
|
||||
"kick40060.CD32.ext": ("Commodore", "Amiga"),
|
||||
"kick40063.A600": ("Commodore", "Amiga"),
|
||||
"kick40068.A1200": ("Commodore", "Amiga"),
|
||||
"kick40068.A4000": ("Commodore", "Amiga"),
|
||||
|
||||
"sl31253.bin": ("Fairchild", "Channel F"),
|
||||
"sl31254.bin": ("Fairchild", "Channel F"),
|
||||
"sl90025.bin": ("Fairchild", "Channel F"),
|
||||
|
||||
"prboom.wad": ("Id Software", "Doom"),
|
||||
"ecwolf.pk3": ("Id Software", "Wolfenstein 3D"),
|
||||
|
||||
"MacII.ROM": ("Apple", "Macintosh II"),
|
||||
"MacIIx.ROM": ("Apple", "Macintosh II"),
|
||||
"vMac.ROM": ("Apple", "Macintosh II"),
|
||||
|
||||
"o2rom.bin": ("Magnavox", "Odyssey2"),
|
||||
"g7400.bin": ("Philips", "Videopac+"),
|
||||
"jopac.bin": ("Philips", "Videopac+"),
|
||||
|
||||
"exec.bin": ("Mattel", "Intellivision"),
|
||||
"grom.bin": ("Mattel", "Intellivision"),
|
||||
"ECS.bin": ("Mattel", "Intellivision"),
|
||||
"IVOICE.BIN": ("Mattel", "Intellivision"),
|
||||
|
||||
"MSX.ROM": ("Microsoft", "MSX"),
|
||||
"MSX2.ROM": ("Microsoft", "MSX"),
|
||||
"MSX2EXT.ROM": ("Microsoft", "MSX"),
|
||||
"MSX2P.ROM": ("Microsoft", "MSX"),
|
||||
"MSX2PEXT.ROM": ("Microsoft", "MSX"),
|
||||
|
||||
"syscard1.pce": ("NEC", "PC Engine"),
|
||||
"syscard2.pce": ("NEC", "PC Engine"),
|
||||
"syscard2u.pce": ("NEC", "PC Engine"),
|
||||
"syscard3.pce": ("NEC", "PC Engine"),
|
||||
"syscard3u.pce": ("NEC", "PC Engine"),
|
||||
"gexpress.pce": ("NEC", "PC Engine"),
|
||||
"pcfx.rom": ("NEC", "PC-FX"),
|
||||
|
||||
"disksys.rom": ("Nintendo", "Famicom Disk System"),
|
||||
"gba_bios.bin": ("Nintendo", "Game Boy Advance"),
|
||||
"gb_bios.bin": ("Nintendo", "Game Boy"),
|
||||
"dmg_boot.bin": ("Nintendo", "Game Boy"),
|
||||
"gbc_bios.bin": ("Nintendo", "Game Boy Color"),
|
||||
"BS-X.bin": ("Nintendo", "Satellaview"),
|
||||
"sgb_bios.bin": ("Nintendo", "Super Game Boy"),
|
||||
"sgb_boot.bin": ("Nintendo", "Super Game Boy"),
|
||||
"sgb2_boot.bin": ("Nintendo", "Super Game Boy"),
|
||||
"SGB1.sfc": ("Nintendo", "Super Game Boy"),
|
||||
"SGB2.sfc": ("Nintendo", "Super Game Boy"),
|
||||
"bios7.bin": ("Nintendo", "Nintendo DS"),
|
||||
"bios9.bin": ("Nintendo", "Nintendo DS"),
|
||||
"firmware.bin": ("Nintendo", "Nintendo DS"),
|
||||
"biosnds7.bin": ("Nintendo", "Nintendo DS"),
|
||||
"biosnds9.bin": ("Nintendo", "Nintendo DS"),
|
||||
"dsfirmware.bin": ("Nintendo", "Nintendo DS"),
|
||||
"biosdsi7.bin": ("Nintendo", "Nintendo DS"),
|
||||
"biosdsi9.bin": ("Nintendo", "Nintendo DS"),
|
||||
"dsifirmware.bin": ("Nintendo", "Nintendo DS"),
|
||||
"bios.min": ("Nintendo", "Pokemon Mini"),
|
||||
"64DD_IPL.bin": ("Nintendo", "Nintendo 64DD"),
|
||||
|
||||
"dc_boot.bin": ("Sega", "Dreamcast"),
|
||||
"dc_flash.bin": ("Sega", "Dreamcast"),
|
||||
"bios.gg": ("Sega", "Game Gear"),
|
||||
"bios_E.sms": ("Sega", "Master System"),
|
||||
"bios_J.sms": ("Sega", "Master System"),
|
||||
"bios_U.sms": ("Sega", "Master System"),
|
||||
"bios_CD_E.bin": ("Sega", "Mega CD"),
|
||||
"bios_CD_J.bin": ("Sega", "Mega CD"),
|
||||
"bios_CD_U.bin": ("Sega", "Mega CD"),
|
||||
"bios_MD.bin": ("Sega", "Mega Drive"),
|
||||
"mpr-17933.bin": ("Sega", "Saturn"),
|
||||
"mpr-18811-mx.ic1": ("Sega", "Saturn"),
|
||||
"mpr-19367-mx.ic1": ("Sega", "Saturn"),
|
||||
"saturn_bios.bin": ("Sega", "Saturn"),
|
||||
"sega_101.bin": ("Sega", "Saturn"),
|
||||
"stvbios.zip": ("Sega", "Saturn"),
|
||||
|
||||
"scph1001.bin": ("Sony", "PlayStation"),
|
||||
"SCPH1001.BIN": ("Sony", "PlayStation"),
|
||||
"scph5500.bin": ("Sony", "PlayStation"),
|
||||
"scph5501.bin": ("Sony", "PlayStation"),
|
||||
"scph5502.bin": ("Sony", "PlayStation"),
|
||||
"scph7001.bin": ("Sony", "PlayStation"),
|
||||
"scph101.bin": ("Sony", "PlayStation"),
|
||||
"ps1_rom.bin": ("Sony", "PlayStation"),
|
||||
"psxonpsp660.bin": ("Sony", "PlayStation"),
|
||||
"PSXONPSP660.BIN": ("Sony", "PlayStation Portable"),
|
||||
|
||||
"scummvm.zip": ("ScummVM", "ScummVM"),
|
||||
"MT32_CONTROL.ROM": ("ScummVM", "ScummVM"),
|
||||
"MT32_PCM.ROM": ("ScummVM", "ScummVM"),
|
||||
}
|
||||
|
||||
PATH_PREFIX_MAP = {
|
||||
"neocd/": ("SNK", "Neo Geo CD"),
|
||||
"dc/": ("Sega", "Dreamcast"),
|
||||
"np2kai/": ("NEC", "PC-98"),
|
||||
"quasi88/": ("NEC", "PC-98"),
|
||||
"keropi/": ("Sharp", "X68000"),
|
||||
"xmil/": ("Sharp", "X1"),
|
||||
"fuse/": ("Sinclair", "ZX Spectrum"),
|
||||
"vice/": ("Commodore", "C128"),
|
||||
"bk/": ("Elektronika", "BK"),
|
||||
"dragon/": ("Dragon", "Dragon"),
|
||||
"oricutron/": ("Oric", "Oric"),
|
||||
"trs80coco/": ("Tandy", "CoCo"),
|
||||
"ti994a/": ("Texas Instruments", "TI-99"),
|
||||
"gamecube/": ("Nintendo", "GameCube"),
|
||||
"Mupen64plus/": ("Nintendo", "Nintendo 64DD"),
|
||||
"ps2/": ("Sony", "PlayStation 2"),
|
||||
"fmtowns/": ("Fujitsu", "FM Towns"),
|
||||
"mame/": ("Arcade", "MAME"),
|
||||
"fbneo/": ("Arcade", "Arcade"),
|
||||
"saves/3ds/": ("Nintendo", "3DS"),
|
||||
"saves/citra-emu/": ("Nintendo", "3DS"),
|
||||
"saves/dolphin-emu/": ("Nintendo", "Wii"),
|
||||
"saves/xbox/": ("Microsoft", "Xbox"),
|
||||
"cemu/": ("Nintendo", "Wii U"),
|
||||
"wsh57/": ("Other", "Misc"),
|
||||
"Machines/COL - ColecoVision/": ("Coleco", "ColecoVision"),
|
||||
"Machines/Shared Roms/": ("Microsoft", "MSX"),
|
||||
"Sony - PlayStation 2/": ("Sony", "PlayStation 2"),
|
||||
"Sony - PlayStation/": ("Sony", "PlayStation"),
|
||||
}
|
||||
|
||||
TOS_PATTERN_MAP = {
|
||||
"tos": ("Atari", "ST"),
|
||||
}
|
||||
|
||||
SKIP_LARGE_ROM_DIRS = {"roms/"}
|
||||
|
||||
BRANCHES = ["RetroArch", "RetroPie", "Recalbox", "batocera", "Other"]
|
||||
|
||||
SKIP_FILES = {
|
||||
"README.md", ".gitignore", "desktop.ini",
|
||||
"telemetry_id", "citra_log.txt",
|
||||
}
|
||||
SKIP_EXTENSIONS = {".txt", ".log", ".pem", ".nvm", ".ctg", ".exe", ".bat", ".sh"}
|
||||
|
||||
|
||||
def sha1_blob(data: bytes) -> str:
|
||||
"""Compute SHA1 hash of raw bytes."""
|
||||
return hashlib.sha1(data).hexdigest()
|
||||
|
||||
|
||||
def classify_file(filepath: str) -> tuple:
|
||||
"""Determine (Manufacturer, Console) for a file path from a branch.
|
||||
|
||||
Returns None if the file should be skipped.
|
||||
"""
|
||||
name = os.path.basename(filepath)
|
||||
|
||||
if name in SKIP_FILES:
|
||||
return None
|
||||
ext = os.path.splitext(name)[1].lower()
|
||||
if ext in SKIP_EXTENSIONS:
|
||||
return None
|
||||
|
||||
clean = filepath
|
||||
for prefix in ("bios/", "BIOS/", "roms/fba/", "roms/fbneo/", "roms/mame/",
|
||||
"roms/mame-libretro/", "roms/neogeo/", "roms/naomi/",
|
||||
"roms/atomiswave/", "roms/macintosh/"):
|
||||
if clean.startswith(prefix):
|
||||
clean = clean[len(prefix):]
|
||||
break
|
||||
|
||||
if filepath.startswith("roms/") and not any(
|
||||
filepath.startswith(p) for p in (
|
||||
"roms/fba/", "roms/fbneo/", "roms/mame/", "roms/mame-libretro/",
|
||||
"roms/neogeo/", "roms/naomi/", "roms/atomiswave/", "roms/macintosh/"
|
||||
)
|
||||
):
|
||||
return None
|
||||
|
||||
for prefix, target in PATH_PREFIX_MAP.items():
|
||||
if clean.startswith(prefix):
|
||||
return target
|
||||
|
||||
if name in BIOS_FILE_MAP:
|
||||
return BIOS_FILE_MAP[name]
|
||||
|
||||
for prefix, target in BIOS_FILE_MAP.items():
|
||||
if name.lower().startswith(prefix.lower()) and len(prefix) > 3:
|
||||
return target
|
||||
|
||||
if name.startswith("tos") and name.endswith(".img"):
|
||||
return ("Atari", "ST")
|
||||
|
||||
if name.startswith("kick") and (name.endswith(".rom") or "." in name):
|
||||
return ("Commodore", "Amiga")
|
||||
|
||||
if name.startswith("amiga-"):
|
||||
return ("Commodore", "Amiga")
|
||||
|
||||
if name.upper().startswith("SCPH"):
|
||||
if "70004" in name or "39001" in name or "30004" in name or "10000" in name:
|
||||
return ("Sony", "PlayStation 2")
|
||||
return ("Sony", "PlayStation")
|
||||
|
||||
if name.endswith(".zip") and filepath.startswith(("roms/", "BIOS/")):
|
||||
return ("Arcade", "Arcade")
|
||||
|
||||
if "saves/" in filepath:
|
||||
return None
|
||||
|
||||
if name.endswith(".chd"):
|
||||
return None
|
||||
|
||||
if name.endswith((".img", ".lst", ".dat")) and "saves/" in filepath:
|
||||
return None
|
||||
|
||||
return None
|
||||
|
||||
|
||||
def get_subpath(filepath: str, manufacturer: str, console: str) -> str:
|
||||
"""Get the sub-path within the console directory (for nested files like neocd/*)."""
|
||||
name = os.path.basename(filepath)
|
||||
|
||||
clean = filepath
|
||||
for prefix in ("bios/", "BIOS/"):
|
||||
if clean.startswith(prefix):
|
||||
clean = clean[len(prefix):]
|
||||
break
|
||||
|
||||
for prefix in PATH_PREFIX_MAP:
|
||||
if clean.startswith(prefix):
|
||||
remaining = clean[len(prefix):]
|
||||
if "/" in remaining:
|
||||
return remaining
|
||||
return remaining
|
||||
|
||||
return name
|
||||
|
||||
|
||||
def extract_from_branches(target: Path, dry_run: bool, existing_hashes: set) -> int:
|
||||
"""Extract BIOS files from all branches into the target structure."""
|
||||
extracted = 0
|
||||
|
||||
for branch in BRANCHES:
|
||||
ref = f"origin/{branch}"
|
||||
|
||||
try:
|
||||
subprocess.run(
|
||||
["git", "rev-parse", "--verify", ref],
|
||||
capture_output=True, check=True
|
||||
)
|
||||
except subprocess.CalledProcessError:
|
||||
print(f" Branch {branch} not found, skipping")
|
||||
continue
|
||||
|
||||
result = subprocess.run(
|
||||
["git", "ls-tree", "-r", "--name-only", ref],
|
||||
capture_output=True, text=True
|
||||
)
|
||||
files = result.stdout.strip().split("\n")
|
||||
print(f"\n Branch '{branch}': {len(files)} files")
|
||||
|
||||
branch_extracted = 0
|
||||
for filepath in files:
|
||||
classification = classify_file(filepath)
|
||||
if classification is None:
|
||||
continue
|
||||
|
||||
manufacturer, console = classification
|
||||
subpath = get_subpath(filepath, manufacturer, console)
|
||||
dest_dir = target / manufacturer / console
|
||||
dest = dest_dir / subpath
|
||||
|
||||
try:
|
||||
blob = subprocess.run(
|
||||
["git", "show", f"{ref}:{filepath}"],
|
||||
capture_output=True, check=True
|
||||
)
|
||||
content = blob.stdout
|
||||
except subprocess.CalledProcessError:
|
||||
continue
|
||||
|
||||
file_hash = sha1_blob(content)
|
||||
|
||||
if file_hash in existing_hashes:
|
||||
continue
|
||||
|
||||
if dest.exists():
|
||||
existing_hash = compute_hashes(dest)["sha1"]
|
||||
if existing_hash == file_hash:
|
||||
existing_hashes.add(file_hash)
|
||||
continue
|
||||
variant_dir = dest_dir / ".variants"
|
||||
variant_name = f"{dest.name}.{file_hash[:8]}"
|
||||
dest = variant_dir / variant_name
|
||||
|
||||
if dest.exists():
|
||||
continue
|
||||
|
||||
if dry_run:
|
||||
print(f" VARIANT: {filepath} -> {dest.relative_to(target)}")
|
||||
else:
|
||||
variant_dir.mkdir(parents=True, exist_ok=True)
|
||||
with open(dest, "wb") as f:
|
||||
f.write(content)
|
||||
print(f" VARIANT: {filepath} -> {dest.relative_to(target)}")
|
||||
existing_hashes.add(file_hash)
|
||||
branch_extracted += 1
|
||||
continue
|
||||
|
||||
if dry_run:
|
||||
print(f" NEW: {filepath} -> {dest.relative_to(target)}")
|
||||
else:
|
||||
dest.parent.mkdir(parents=True, exist_ok=True)
|
||||
with open(dest, "wb") as f:
|
||||
f.write(content)
|
||||
print(f" NEW: {filepath} -> {dest.relative_to(target)}")
|
||||
|
||||
existing_hashes.add(file_hash)
|
||||
branch_extracted += 1
|
||||
|
||||
print(f" -> {branch_extracted} new files from {branch}")
|
||||
extracted += branch_extracted
|
||||
|
||||
return extracted
|
||||
|
||||
|
||||
def migrate_local(source: Path, target: Path, dry_run: bool) -> tuple:
|
||||
"""Migrate files from local flat structure to Manufacturer/Console hierarchy."""
|
||||
moved = 0
|
||||
skipped = 0
|
||||
errors = []
|
||||
existing_hashes = set()
|
||||
|
||||
for old_dir_name, (manufacturer, console) in sorted(SYSTEM_MAP.items()):
|
||||
old_path = source / old_dir_name
|
||||
if not old_path.is_dir():
|
||||
continue
|
||||
|
||||
new_path = target / manufacturer / console
|
||||
files = [f for f in old_path.iterdir() if f.is_file()]
|
||||
|
||||
if not files:
|
||||
continue
|
||||
|
||||
print(f" {old_dir_name}/ -> bios/{manufacturer}/{console}/")
|
||||
|
||||
if not dry_run:
|
||||
new_path.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
for f in files:
|
||||
dest = new_path / f.name
|
||||
if dest.exists():
|
||||
print(f" SKIP (exists): {f.name}")
|
||||
skipped += 1
|
||||
continue
|
||||
|
||||
if dry_run:
|
||||
print(f" COPY: {f.name}")
|
||||
else:
|
||||
try:
|
||||
shutil.copy2(str(f), str(dest))
|
||||
except OSError as e:
|
||||
errors.append((f, str(e)))
|
||||
print(f" ERROR: {f.name}: {e}")
|
||||
continue
|
||||
|
||||
file_hash = compute_hashes(f)["sha1"]
|
||||
existing_hashes.add(file_hash)
|
||||
moved += 1
|
||||
|
||||
return moved, skipped, errors, existing_hashes
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Migrate BIOS files to Manufacturer/Console structure"
|
||||
)
|
||||
parser.add_argument("--dry-run", action="store_true",
|
||||
help="Show what would be done without moving files")
|
||||
parser.add_argument("--source", default=".",
|
||||
help="Source directory (repo root)")
|
||||
parser.add_argument("--target", default="bios",
|
||||
help="Target directory for organized BIOS files")
|
||||
parser.add_argument("--include-branches", action="store_true",
|
||||
help="Also extract BIOS files from all remote branches")
|
||||
args = parser.parse_args()
|
||||
|
||||
source = Path(args.source)
|
||||
target = Path(args.target)
|
||||
|
||||
if not source.is_dir():
|
||||
print(f"Error: Source directory '{source}' not found", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
print(f"Migrating from {source}/ to {target}/Manufacturer/Console/")
|
||||
if args.dry_run:
|
||||
print("(DRY RUN - no files will be moved)\n")
|
||||
else:
|
||||
print()
|
||||
|
||||
print("=== Phase 1: Local files (libretro branch) ===")
|
||||
moved, skipped, errors, existing_hashes = migrate_local(source, target, args.dry_run)
|
||||
action = "Would copy" if args.dry_run else "Copied"
|
||||
print(f"\n{action} {moved} files, skipped {skipped}")
|
||||
|
||||
if args.include_branches:
|
||||
print("\n=== Phase 2: Extracting from other branches ===")
|
||||
branch_count = extract_from_branches(target, args.dry_run, existing_hashes)
|
||||
print(f"\n{action} {branch_count} additional files from branches")
|
||||
moved += branch_count
|
||||
|
||||
if source.is_dir():
|
||||
known = set(SYSTEM_MAP.keys()) | {
|
||||
"bios", "scripts", "platforms", "schemas", ".github", ".cache",
|
||||
".git", "README.md", ".gitignore",
|
||||
}
|
||||
for d in sorted(source.iterdir()):
|
||||
if d.name not in known and not d.name.startswith("."):
|
||||
if d.is_dir():
|
||||
print(f"\nWARNING: Unmapped directory: {d.name}")
|
||||
|
||||
print(f"\nTotal: {moved} files migrated, {len(existing_hashes)} unique hashes")
|
||||
|
||||
if errors:
|
||||
print(f"Errors: {len(errors)}")
|
||||
for f, e in errors:
|
||||
print(f" {f}: {e}")
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
46
scripts/scraper/__init__.py
Normal file
46
scripts/scraper/__init__.py
Normal file
|
|
@ -0,0 +1,46 @@
|
|||
"""Scraper plugin discovery module.
|
||||
|
||||
Auto-detects *_scraper.py files and exposes their scrapers.
|
||||
Each scraper module must define:
|
||||
PLATFORM_NAME: str
|
||||
Scraper: class inheriting BaseScraper
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import importlib
|
||||
import pkgutil
|
||||
from pathlib import Path
|
||||
|
||||
from .base_scraper import BaseScraper
|
||||
|
||||
_scrapers: dict[str, type] = {}
|
||||
|
||||
|
||||
def discover_scrapers() -> dict[str, type]:
|
||||
"""Auto-discover all *_scraper.py modules and return {platform_name: ScraperClass}."""
|
||||
if _scrapers:
|
||||
return _scrapers
|
||||
|
||||
package_dir = Path(__file__).parent
|
||||
|
||||
for finder, name, ispkg in pkgutil.iter_modules([str(package_dir)]):
|
||||
if not name.endswith("_scraper"):
|
||||
continue
|
||||
|
||||
module = importlib.import_module(f".{name}", package=__package__)
|
||||
|
||||
platform_name = getattr(module, "PLATFORM_NAME", None)
|
||||
scraper_class = getattr(module, "Scraper", None)
|
||||
|
||||
if platform_name and scraper_class and issubclass(scraper_class, BaseScraper):
|
||||
_scrapers[platform_name] = scraper_class
|
||||
|
||||
return _scrapers
|
||||
|
||||
|
||||
def get_scraper(platform_name: str) -> BaseScraper | None:
|
||||
"""Get an instantiated scraper for a platform."""
|
||||
scrapers = discover_scrapers()
|
||||
cls = scrapers.get(platform_name)
|
||||
return cls() if cls else None
|
||||
155
scripts/scraper/base_scraper.py
Normal file
155
scripts/scraper/base_scraper.py
Normal file
|
|
@ -0,0 +1,155 @@
|
|||
"""Base scraper interface for platform BIOS requirement sources."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import urllib.request
|
||||
import urllib.error
|
||||
from abc import ABC, abstractmethod
|
||||
from dataclasses import dataclass, field
|
||||
|
||||
|
||||
@dataclass
|
||||
class BiosRequirement:
|
||||
"""A single BIOS file requirement from a platform source."""
|
||||
name: str
|
||||
system: str
|
||||
sha1: str | None = None
|
||||
md5: str | None = None
|
||||
crc32: str | None = None
|
||||
size: int | None = None
|
||||
destination: str = ""
|
||||
required: bool = True
|
||||
zipped_file: str | None = None # If set, md5 is for this ROM inside the ZIP
|
||||
|
||||
|
||||
@dataclass
|
||||
class ChangeSet:
|
||||
"""Differences between scraped requirements and current config."""
|
||||
added: list[BiosRequirement] = field(default_factory=list)
|
||||
removed: list[BiosRequirement] = field(default_factory=list)
|
||||
modified: list[tuple[BiosRequirement, BiosRequirement]] = field(default_factory=list)
|
||||
|
||||
@property
|
||||
def has_changes(self) -> bool:
|
||||
return bool(self.added or self.removed or self.modified)
|
||||
|
||||
def summary(self) -> str:
|
||||
parts = []
|
||||
if self.added:
|
||||
parts.append(f"+{len(self.added)} added")
|
||||
if self.removed:
|
||||
parts.append(f"-{len(self.removed)} removed")
|
||||
if self.modified:
|
||||
parts.append(f"~{len(self.modified)} modified")
|
||||
return ", ".join(parts) if parts else "no changes"
|
||||
|
||||
|
||||
class BaseScraper(ABC):
|
||||
"""Abstract base class for platform BIOS requirement scrapers."""
|
||||
|
||||
@abstractmethod
|
||||
def fetch_requirements(self) -> list[BiosRequirement]:
|
||||
"""Fetch current BIOS requirements from the platform source."""
|
||||
...
|
||||
|
||||
def compare_with_config(self, config: dict) -> ChangeSet:
|
||||
"""Compare scraped requirements against existing platform config."""
|
||||
scraped = self.fetch_requirements()
|
||||
changes = ChangeSet()
|
||||
|
||||
existing = {}
|
||||
for sys_id, system in config.get("systems", {}).items():
|
||||
for f in system.get("files", []):
|
||||
key = (sys_id, f["name"])
|
||||
existing[key] = f
|
||||
|
||||
scraped_map = {}
|
||||
for req in scraped:
|
||||
key = (req.system, req.name)
|
||||
scraped_map[key] = req
|
||||
|
||||
for key, req in scraped_map.items():
|
||||
if key not in existing:
|
||||
changes.added.append(req)
|
||||
else:
|
||||
existing_file = existing[key]
|
||||
if req.sha1 and existing_file.get("sha1") and req.sha1 != existing_file["sha1"]:
|
||||
changes.modified.append((
|
||||
BiosRequirement(
|
||||
name=existing_file["name"],
|
||||
system=key[0],
|
||||
sha1=existing_file.get("sha1"),
|
||||
md5=existing_file.get("md5"),
|
||||
),
|
||||
req,
|
||||
))
|
||||
elif req.md5 and existing_file.get("md5") and req.md5 != existing_file["md5"]:
|
||||
changes.modified.append((
|
||||
BiosRequirement(
|
||||
name=existing_file["name"],
|
||||
system=key[0],
|
||||
md5=existing_file.get("md5"),
|
||||
),
|
||||
req,
|
||||
))
|
||||
|
||||
for key in existing:
|
||||
if key not in scraped_map:
|
||||
f = existing[key]
|
||||
changes.removed.append(BiosRequirement(
|
||||
name=f["name"],
|
||||
system=key[0],
|
||||
sha1=f.get("sha1"),
|
||||
md5=f.get("md5"),
|
||||
))
|
||||
|
||||
return changes
|
||||
|
||||
def test_connection(self) -> bool:
|
||||
"""Test if the source URL is reachable."""
|
||||
try:
|
||||
self.fetch_requirements()
|
||||
return True
|
||||
except Exception:
|
||||
return False
|
||||
|
||||
@abstractmethod
|
||||
def validate_format(self, raw_data: str) -> bool:
|
||||
"""Validate source data format. Returns False if format has changed unexpectedly."""
|
||||
...
|
||||
|
||||
|
||||
def fetch_github_latest_version(repo: str) -> str | None:
|
||||
"""Fetch the latest release version tag from a GitHub repo."""
|
||||
url = f"https://api.github.com/repos/{repo}/releases/latest"
|
||||
try:
|
||||
req = urllib.request.Request(url, headers={
|
||||
"User-Agent": "retrobios-scraper/1.0",
|
||||
"Accept": "application/vnd.github.v3+json",
|
||||
})
|
||||
with urllib.request.urlopen(req, timeout=15) as resp:
|
||||
data = json.loads(resp.read())
|
||||
return data.get("tag_name", "")
|
||||
except (urllib.error.URLError, urllib.error.HTTPError, json.JSONDecodeError):
|
||||
return None
|
||||
|
||||
|
||||
def fetch_github_latest_tag(repo: str, prefix: str = "") -> str | None:
|
||||
"""Fetch the most recent matching tag from a GitHub repo."""
|
||||
url = f"https://api.github.com/repos/{repo}/tags?per_page=50"
|
||||
try:
|
||||
req = urllib.request.Request(url, headers={
|
||||
"User-Agent": "retrobios-scraper/1.0",
|
||||
"Accept": "application/vnd.github.v3+json",
|
||||
})
|
||||
with urllib.request.urlopen(req, timeout=15) as resp:
|
||||
tags = json.loads(resp.read())
|
||||
for tag in tags:
|
||||
name = tag["name"]
|
||||
if prefix and not name.startswith(prefix):
|
||||
continue
|
||||
return name
|
||||
return tags[0]["name"] if tags else None
|
||||
except (urllib.error.URLError, urllib.error.HTTPError, json.JSONDecodeError):
|
||||
return None
|
||||
315
scripts/scraper/batocera_scraper.py
Normal file
315
scripts/scraper/batocera_scraper.py
Normal file
|
|
@ -0,0 +1,315 @@
|
|||
#!/usr/bin/env python3
|
||||
"""Scraper for Batocera batocera-systems.
|
||||
|
||||
Source: https://github.com/batocera-linux/batocera.linux/.../batocera-systems
|
||||
Format: Python dict with systems -> biosFiles
|
||||
Hash: MD5 primary
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import ast
|
||||
import re
|
||||
import sys
|
||||
import urllib.request
|
||||
import urllib.error
|
||||
|
||||
from .base_scraper import BaseScraper, BiosRequirement, fetch_github_latest_tag
|
||||
|
||||
PLATFORM_NAME = "batocera"
|
||||
|
||||
SOURCE_URL = (
|
||||
"https://raw.githubusercontent.com/batocera-linux/batocera.linux/"
|
||||
"master/package/batocera/core/batocera-scripts/scripts/batocera-systems"
|
||||
)
|
||||
|
||||
SYSTEM_SLUG_MAP = {
|
||||
"atari800": "atari-400-800",
|
||||
"atari5200": "atari-5200",
|
||||
"atarist": "atari-st",
|
||||
"lynx": "atari-lynx",
|
||||
"3do": "3do",
|
||||
"amiga": "commodore-amiga",
|
||||
"amiga600": "commodore-amiga",
|
||||
"amiga1200": "commodore-amiga",
|
||||
"amigacd32": "commodore-amiga",
|
||||
"amigacdtv": "commodore-amiga",
|
||||
"c128": "commodore-c128",
|
||||
"colecovision": "coleco-colecovision",
|
||||
"dreamcast": "sega-dreamcast",
|
||||
"naomi": "sega-dreamcast-arcade",
|
||||
"naomi2": "sega-dreamcast-arcade",
|
||||
"atomiswave": "sega-dreamcast-arcade",
|
||||
"fds": "nintendo-fds",
|
||||
"gamecube": "nintendo-gamecube",
|
||||
"gb": "nintendo-gb",
|
||||
"gba": "nintendo-gba",
|
||||
"gbc": "nintendo-gbc",
|
||||
"nds": "nintendo-ds",
|
||||
"n64dd": "nintendo-64dd",
|
||||
"satellaview": "nintendo-satellaview",
|
||||
"sgb": "nintendo-sgb",
|
||||
"snes": "nintendo-snes",
|
||||
"channelf": "fairchild-channel-f",
|
||||
"intellivision": "mattel-intellivision",
|
||||
"msx": "microsoft-msx",
|
||||
"msx1": "microsoft-msx",
|
||||
"msx2": "microsoft-msx",
|
||||
"msxturbor": "microsoft-msx",
|
||||
"neogeo": "snk-neogeo",
|
||||
"neogeocd": "snk-neogeo-cd",
|
||||
"odyssey2": "magnavox-odyssey2",
|
||||
"pcengine": "nec-pc-engine",
|
||||
"pcenginecd": "nec-pc-engine",
|
||||
"supergrafx": "nec-pc-engine",
|
||||
"pc88": "nec-pc-88",
|
||||
"pc98": "nec-pc-98",
|
||||
"pcfx": "nec-pc-fx",
|
||||
"psx": "sony-playstation",
|
||||
"ps2": "sony-playstation-2",
|
||||
"psp": "sony-psp",
|
||||
"saturn": "sega-saturn",
|
||||
"segacd": "sega-mega-cd",
|
||||
"mastersystem": "sega-master-system",
|
||||
"megadrive": "sega-mega-drive",
|
||||
"gamegear": "sega-game-gear",
|
||||
"x1": "sharp-x1",
|
||||
"x68000": "sharp-x68000",
|
||||
"zxspectrum": "sinclair-zx-spectrum",
|
||||
"scummvm": "scummvm",
|
||||
"doom": "doom",
|
||||
"macintosh": "apple-macintosh-ii",
|
||||
"dos": "dos",
|
||||
"videopac": "philips-videopac",
|
||||
"pokemini": "nintendo-pokemon-mini",
|
||||
}
|
||||
|
||||
|
||||
class Scraper(BaseScraper):
|
||||
"""Scraper for batocera-systems Python dict."""
|
||||
|
||||
def __init__(self, url: str = SOURCE_URL):
|
||||
self.url = url
|
||||
self._raw_data: str | None = None
|
||||
|
||||
def _fetch_raw(self) -> str:
|
||||
if self._raw_data is not None:
|
||||
return self._raw_data
|
||||
|
||||
try:
|
||||
req = urllib.request.Request(self.url, headers={"User-Agent": "retrobios-scraper/1.0"})
|
||||
with urllib.request.urlopen(req, timeout=30) as resp:
|
||||
self._raw_data = resp.read().decode("utf-8")
|
||||
return self._raw_data
|
||||
except urllib.error.URLError as e:
|
||||
raise ConnectionError(f"Failed to fetch {self.url}: {e}") from e
|
||||
|
||||
def _extract_systems_dict(self, raw: str) -> dict:
|
||||
"""Extract and parse the 'systems' dict from the Python source via ast.literal_eval."""
|
||||
match = re.search(r'^systems\s*=\s*\{', raw, re.MULTILINE)
|
||||
if not match:
|
||||
raise ValueError("Could not find 'systems = {' in batocera-systems")
|
||||
|
||||
start = match.start() + raw[match.start():].index("{")
|
||||
depth = 0
|
||||
i = start
|
||||
while i < len(raw):
|
||||
if raw[i] == "{":
|
||||
depth += 1
|
||||
elif raw[i] == "}":
|
||||
depth -= 1
|
||||
if depth == 0:
|
||||
break
|
||||
elif raw[i] == "#":
|
||||
while i < len(raw) and raw[i] != "\n":
|
||||
i += 1
|
||||
i += 1
|
||||
|
||||
dict_str = raw[start:i + 1]
|
||||
|
||||
lines = []
|
||||
for line in dict_str.split("\n"):
|
||||
in_string = False
|
||||
string_char = None
|
||||
clean = []
|
||||
for j, ch in enumerate(line):
|
||||
if ch in ('"', "'") and not in_string:
|
||||
in_string = True
|
||||
string_char = ch
|
||||
clean.append(ch)
|
||||
elif ch == string_char and in_string:
|
||||
in_string = False
|
||||
clean.append(ch)
|
||||
elif ch == "#" and not in_string:
|
||||
break
|
||||
else:
|
||||
clean.append(ch)
|
||||
lines.append("".join(clean))
|
||||
|
||||
clean_dict_str = "\n".join(lines)
|
||||
|
||||
clean_dict_str = clean_dict_str.replace("OrderedDict(", "dict(")
|
||||
|
||||
try:
|
||||
return ast.literal_eval(clean_dict_str)
|
||||
except (SyntaxError, ValueError) as e:
|
||||
raise ValueError(f"Failed to parse systems dict: {e}") from e
|
||||
|
||||
def fetch_requirements(self) -> list[BiosRequirement]:
|
||||
"""Parse batocera-systems and return BIOS requirements."""
|
||||
raw = self._fetch_raw()
|
||||
|
||||
if not self.validate_format(raw):
|
||||
raise ValueError("batocera-systems format validation failed")
|
||||
|
||||
systems = self._extract_systems_dict(raw)
|
||||
requirements = []
|
||||
|
||||
for sys_key, sys_data in systems.items():
|
||||
system_slug = SYSTEM_SLUG_MAP.get(sys_key, sys_key)
|
||||
bios_files = sys_data.get("biosFiles", [])
|
||||
|
||||
for bios in bios_files:
|
||||
file_path = bios.get("file", "")
|
||||
md5 = bios.get("md5", "")
|
||||
zipped_file = bios.get("zippedFile", "")
|
||||
|
||||
if file_path.startswith("bios/"):
|
||||
file_path = file_path[5:]
|
||||
|
||||
name = file_path.split("/")[-1] if "/" in file_path else file_path
|
||||
|
||||
requirements.append(BiosRequirement(
|
||||
name=name,
|
||||
system=system_slug,
|
||||
md5=md5 or None,
|
||||
destination=file_path,
|
||||
required=True,
|
||||
zipped_file=zipped_file or None,
|
||||
))
|
||||
|
||||
return requirements
|
||||
|
||||
def validate_format(self, raw_data: str) -> bool:
|
||||
"""Validate batocera-systems format."""
|
||||
has_systems = "systems" in raw_data and "biosFiles" in raw_data
|
||||
has_dict = re.search(r'^systems\s*=\s*\{', raw_data, re.MULTILINE) is not None
|
||||
has_md5 = '"md5"' in raw_data
|
||||
has_file = '"file"' in raw_data
|
||||
return has_systems and has_dict and has_md5 and has_file
|
||||
|
||||
def generate_platform_yaml(self) -> dict:
|
||||
"""Generate a platform YAML config dict from scraped data."""
|
||||
requirements = self.fetch_requirements()
|
||||
|
||||
systems = {}
|
||||
for req in requirements:
|
||||
if req.system not in systems:
|
||||
systems[req.system] = {"files": []}
|
||||
|
||||
entry = {
|
||||
"name": req.name,
|
||||
"destination": req.destination,
|
||||
"required": req.required,
|
||||
}
|
||||
if req.md5:
|
||||
entry["md5"] = req.md5
|
||||
if req.zipped_file:
|
||||
entry["zipped_file"] = req.zipped_file
|
||||
|
||||
systems[req.system]["files"].append(entry)
|
||||
|
||||
# Sort numerically since API returns by commit date, not version
|
||||
import json as _json
|
||||
batocera_version = ""
|
||||
try:
|
||||
_url = "https://api.github.com/repos/batocera-linux/batocera.linux/tags?per_page=50"
|
||||
_req = urllib.request.Request(_url, headers={
|
||||
"User-Agent": "retrobios-scraper/1.0",
|
||||
"Accept": "application/vnd.github.v3+json",
|
||||
})
|
||||
with urllib.request.urlopen(_req, timeout=15) as _resp:
|
||||
_tags = _json.loads(_resp.read())
|
||||
_versions = []
|
||||
for _t in _tags:
|
||||
_name = _t["name"]
|
||||
if _name.startswith("batocera-"):
|
||||
_num = _name.replace("batocera-", "")
|
||||
if _num.isdigit():
|
||||
_versions.append(int(_num))
|
||||
if _versions:
|
||||
batocera_version = str(max(_versions))
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
return {
|
||||
"platform": "Batocera",
|
||||
"version": batocera_version or "",
|
||||
"homepage": "https://batocera.org",
|
||||
"source": SOURCE_URL,
|
||||
"base_destination": "bios",
|
||||
"hash_type": "md5",
|
||||
"verification_mode": "md5",
|
||||
"systems": systems,
|
||||
}
|
||||
|
||||
|
||||
def main():
|
||||
"""CLI entry point for testing."""
|
||||
import argparse
|
||||
import json
|
||||
|
||||
parser = argparse.ArgumentParser(description="Scrape batocera-systems")
|
||||
parser.add_argument("--dry-run", action="store_true")
|
||||
parser.add_argument("--json", action="store_true")
|
||||
parser.add_argument("--output", "-o")
|
||||
args = parser.parse_args()
|
||||
|
||||
scraper = Scraper()
|
||||
|
||||
try:
|
||||
reqs = scraper.fetch_requirements()
|
||||
except (ConnectionError, ValueError) as e:
|
||||
print(f"Error: {e}", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
if args.dry_run:
|
||||
by_system = {}
|
||||
for req in reqs:
|
||||
by_system.setdefault(req.system, []).append(req)
|
||||
|
||||
for system, files in sorted(by_system.items()):
|
||||
print(f"\n{system} ({len(files)} files):")
|
||||
for f in files:
|
||||
hash_info = f.md5[:12] if f.md5 else "no-hash"
|
||||
print(f" {f.name} ({hash_info}...)")
|
||||
|
||||
print(f"\nTotal: {len(reqs)} BIOS files across {len(by_system)} systems")
|
||||
return
|
||||
|
||||
if args.json:
|
||||
config = scraper.generate_platform_yaml()
|
||||
print(json.dumps(config, indent=2))
|
||||
return
|
||||
|
||||
if args.output:
|
||||
try:
|
||||
import yaml
|
||||
except ImportError:
|
||||
print("Error: PyYAML required", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
config = scraper.generate_platform_yaml()
|
||||
with open(args.output, "w") as f:
|
||||
yaml.dump(config, f, default_flow_style=False, allow_unicode=True, sort_keys=False)
|
||||
print(f"Written to {args.output}")
|
||||
else:
|
||||
reqs = scraper.fetch_requirements()
|
||||
by_system = {}
|
||||
for req in reqs:
|
||||
by_system.setdefault(req.system, []).append(req)
|
||||
print(f"Scraped {len(reqs)} BIOS files across {len(by_system)} systems")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
334
scripts/scraper/coreinfo_scraper.py
Normal file
334
scripts/scraper/coreinfo_scraper.py
Normal file
|
|
@ -0,0 +1,334 @@
|
|||
#!/usr/bin/env python3
|
||||
"""Scraper for libretro-core-info firmware declarations.
|
||||
|
||||
Source: https://github.com/libretro/libretro-core-info
|
||||
Format: .info files with firmware0_path, firmware0_desc, firmware0_opt patterns
|
||||
Hash: From notes field (MD5) or cross-referenced with System.dat
|
||||
|
||||
Complements libretro_scraper (System.dat) with:
|
||||
- Exact firmware paths per core
|
||||
- Required vs optional status
|
||||
- Firmware for cores not covered by System.dat
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import re
|
||||
import sys
|
||||
import urllib.request
|
||||
import urllib.error
|
||||
import json
|
||||
|
||||
try:
|
||||
from .base_scraper import BaseScraper, BiosRequirement, fetch_github_latest_version
|
||||
except ImportError:
|
||||
# Allow running directly: python scripts/scraper/coreinfo_scraper.py
|
||||
import os
|
||||
sys.path.insert(0, os.path.join(os.path.dirname(__file__), ".."))
|
||||
from scraper.base_scraper import BaseScraper, BiosRequirement, fetch_github_latest_version
|
||||
|
||||
PLATFORM_NAME = "libretro_coreinfo"
|
||||
|
||||
GITHUB_API = "https://api.github.com/repos/libretro/libretro-core-info"
|
||||
RAW_BASE = "https://raw.githubusercontent.com/libretro/libretro-core-info/master"
|
||||
|
||||
CORE_SYSTEM_MAP = {
|
||||
"pcsx_rearmed": "sony-playstation",
|
||||
"mednafen_psx": "sony-playstation",
|
||||
"mednafen_psx_hw": "sony-playstation",
|
||||
"swanstation": "sony-playstation",
|
||||
"duckstation": "sony-playstation",
|
||||
"pcsx1": "sony-playstation",
|
||||
"lrps2": "sony-playstation-2",
|
||||
"play": "sony-playstation-2",
|
||||
"ppsspp": "sony-psp",
|
||||
"fbneo": "arcade",
|
||||
"mame": "arcade",
|
||||
"mame2003": "arcade",
|
||||
"mame2003_plus": "arcade",
|
||||
"dolphin": "nintendo-gamecube",
|
||||
"melonds": "nintendo-ds",
|
||||
"melonds_ds": "nintendo-ds",
|
||||
"desmume": "nintendo-ds",
|
||||
"mgba": "nintendo-gba",
|
||||
"vba_next": "nintendo-gba",
|
||||
"gpsp": "nintendo-gba",
|
||||
"gambatte": "nintendo-gb",
|
||||
"sameboy": "nintendo-gb",
|
||||
"gearboy": "nintendo-gb",
|
||||
"bsnes": "nintendo-snes",
|
||||
"snes9x": "nintendo-snes",
|
||||
"higan_sfc": "nintendo-snes",
|
||||
"mesen-s": "nintendo-snes",
|
||||
"nestopia": "nintendo-nes",
|
||||
"fceumm": "nintendo-nes",
|
||||
"mesen": "nintendo-nes",
|
||||
"mupen64plus_next": "nintendo-64",
|
||||
"parallel_n64": "nintendo-64",
|
||||
"flycast": "sega-dreamcast",
|
||||
"reicast": "sega-dreamcast",
|
||||
"kronos": "sega-saturn",
|
||||
"mednafen_saturn": "sega-saturn",
|
||||
"yabause": "sega-saturn",
|
||||
"genesis_plus_gx": "sega-mega-drive",
|
||||
"picodrive": "sega-mega-drive",
|
||||
"mednafen_pce": "nec-pc-engine",
|
||||
"mednafen_pce_fast": "nec-pc-engine",
|
||||
"mednafen_pcfx": "nec-pc-fx",
|
||||
"mednafen_ngp": "snk-neogeo-pocket",
|
||||
"mednafen_lynx": "atari-lynx",
|
||||
"handy": "atari-lynx",
|
||||
"hatari": "atari-st",
|
||||
"puae": "commodore-amiga",
|
||||
"fuse": "sinclair-zx-spectrum",
|
||||
"dosbox_pure": "dos",
|
||||
"dosbox_svn": "dos",
|
||||
"scummvm": "scummvm",
|
||||
"opera": "3do",
|
||||
"4do": "3do",
|
||||
"ep128emu": "enterprise-64-128",
|
||||
"freej2me": "j2me",
|
||||
"squirreljme": "j2me",
|
||||
"numero": "ti-83",
|
||||
"neocd": "snk-neogeo-cd",
|
||||
"vice_x64": "commodore-c64",
|
||||
"vice_x128": "commodore-c128",
|
||||
"cap32": "amstrad-cpc",
|
||||
"o2em": "magnavox-odyssey2",
|
||||
"vecx": "vectrex",
|
||||
"virtualjaguar": "atari-jaguar",
|
||||
"prosystem": "atari-7800",
|
||||
"stella": "atari-2600",
|
||||
"a5200": "atari-5200",
|
||||
"bluemsx": "microsoft-msx",
|
||||
"fmsx": "microsoft-msx",
|
||||
"px68k": "sharp-x68000",
|
||||
"x1": "sharp-x1",
|
||||
"quasi88": "nec-pc-88",
|
||||
"np2kai": "nec-pc-98",
|
||||
"theodore": "thomson",
|
||||
"81": "sinclair-zx81",
|
||||
"crocods": "amstrad-cpc",
|
||||
"dinothawr": "dinothawr",
|
||||
}
|
||||
|
||||
|
||||
def _parse_info_file(content: str) -> dict:
|
||||
"""Parse a .info file into a dictionary."""
|
||||
result = {}
|
||||
for line in content.split("\n"):
|
||||
line = line.strip()
|
||||
if not line or line.startswith("#"):
|
||||
continue
|
||||
match = re.match(r'^(\w+)\s*=\s*"?(.*?)"?\s*$', line)
|
||||
if match:
|
||||
key, value = match.group(1), match.group(2)
|
||||
result[key] = value
|
||||
return result
|
||||
|
||||
|
||||
_SKIP_EXTENSIONS = {".dll", ".so", ".dylib", ".exe", ".bat", ".sh"}
|
||||
_DIRECTORY_MARKERS = {"folder", "directory", "dir"}
|
||||
|
||||
|
||||
def _is_directory_ref(path: str, desc: str) -> bool:
|
||||
"""Check if a firmware entry is a directory reference rather than a file."""
|
||||
if "." not in path.split("/")[-1]:
|
||||
return True
|
||||
desc_lower = desc.lower()
|
||||
return any(marker in desc_lower for marker in _DIRECTORY_MARKERS)
|
||||
|
||||
|
||||
def _is_native_lib(path: str) -> bool:
|
||||
"""Check if path is a native library (.dll, .so, .dylib) - not a BIOS."""
|
||||
ext = "." + path.rsplit(".", 1)[-1] if "." in path else ""
|
||||
return ext.lower() in _SKIP_EXTENSIONS
|
||||
|
||||
|
||||
def _extract_firmware(info: dict) -> list[dict]:
|
||||
"""Extract firmware entries, filtering out directories and native libraries."""
|
||||
count_str = info.get("firmware_count", "0")
|
||||
try:
|
||||
count = int(count_str)
|
||||
except ValueError:
|
||||
return []
|
||||
|
||||
firmware = []
|
||||
for i in range(count):
|
||||
path = info.get(f"firmware{i}_path", "")
|
||||
desc = info.get(f"firmware{i}_desc", "")
|
||||
opt = info.get(f"firmware{i}_opt", "false")
|
||||
|
||||
if not path:
|
||||
continue
|
||||
|
||||
if _is_directory_ref(path, desc):
|
||||
continue
|
||||
|
||||
if _is_native_lib(path):
|
||||
continue
|
||||
|
||||
firmware.append({
|
||||
"path": path,
|
||||
"desc": desc,
|
||||
"optional": opt.lower() == "true",
|
||||
})
|
||||
|
||||
return firmware
|
||||
|
||||
|
||||
def _extract_md5_from_notes(info: dict) -> dict[str, str]:
|
||||
"""Extract MD5 hashes from the notes field."""
|
||||
notes = info.get("notes", "")
|
||||
md5_map = {}
|
||||
|
||||
for match in re.finditer(r'\(!\)\s+(.+?)\s+\(md5\):\s+([a-f0-9]{32})', notes):
|
||||
filename = match.group(1).strip()
|
||||
md5 = match.group(2)
|
||||
md5_map[filename] = md5
|
||||
|
||||
return md5_map
|
||||
|
||||
|
||||
class Scraper(BaseScraper):
|
||||
"""Scraper for libretro-core-info firmware declarations."""
|
||||
|
||||
def __init__(self):
|
||||
self._info_files: dict[str, dict] | None = None
|
||||
|
||||
def _fetch_info_list(self) -> list[str]:
|
||||
"""Fetch list of all .info files from GitHub API."""
|
||||
# Use the tree API to get all files at once
|
||||
url = f"{GITHUB_API}/git/trees/master?recursive=1"
|
||||
try:
|
||||
req = urllib.request.Request(url, headers={
|
||||
"User-Agent": "retrobios-scraper/1.0",
|
||||
"Accept": "application/vnd.github.v3+json",
|
||||
})
|
||||
with urllib.request.urlopen(req, timeout=30) as resp:
|
||||
data = json.loads(resp.read())
|
||||
|
||||
return [
|
||||
item["path"] for item in data.get("tree", [])
|
||||
if item["path"].endswith("_libretro.info")
|
||||
]
|
||||
except (urllib.error.URLError, json.JSONDecodeError) as e:
|
||||
raise ConnectionError(f"Failed to list core-info files: {e}") from e
|
||||
|
||||
def _fetch_info_file(self, filename: str) -> dict:
|
||||
"""Fetch and parse a single .info file."""
|
||||
url = f"{RAW_BASE}/{filename}"
|
||||
try:
|
||||
req = urllib.request.Request(url, headers={"User-Agent": "retrobios-scraper/1.0"})
|
||||
with urllib.request.urlopen(req, timeout=15) as resp:
|
||||
content = resp.read().decode("utf-8")
|
||||
return _parse_info_file(content)
|
||||
except (urllib.error.URLError, urllib.error.HTTPError):
|
||||
return {}
|
||||
|
||||
def fetch_requirements(self) -> list[BiosRequirement]:
|
||||
"""Fetch firmware requirements from all core .info files."""
|
||||
info_files = self._fetch_info_list()
|
||||
requirements = []
|
||||
seen = set()
|
||||
|
||||
for filename in info_files:
|
||||
info = self._fetch_info_file(filename)
|
||||
firmware_list = _extract_firmware(info)
|
||||
|
||||
if not firmware_list:
|
||||
continue
|
||||
|
||||
core_name = filename.replace("_libretro.info", "")
|
||||
system = CORE_SYSTEM_MAP.get(core_name, core_name)
|
||||
|
||||
md5_map = _extract_md5_from_notes(info)
|
||||
|
||||
for fw in firmware_list:
|
||||
path = fw["path"]
|
||||
if path in seen:
|
||||
continue
|
||||
seen.add(path)
|
||||
|
||||
basename = path.split("/")[-1] if "/" in path else path
|
||||
# Full path when basename is generic to avoid SGB1.sfc/program.rom vs SGB2.sfc/program.rom collisions
|
||||
GENERIC_NAMES = {"program.rom", "data.rom", "boot.rom", "bios.bin", "firmware.bin"}
|
||||
name = path if basename.lower() in GENERIC_NAMES else basename
|
||||
md5 = md5_map.get(basename)
|
||||
|
||||
requirements.append(BiosRequirement(
|
||||
name=name,
|
||||
system=system,
|
||||
md5=md5,
|
||||
destination=path,
|
||||
required=not fw["optional"],
|
||||
))
|
||||
|
||||
return requirements
|
||||
|
||||
def validate_format(self, raw_data: str) -> bool:
|
||||
"""Validate .info file format."""
|
||||
return "firmware_count" in raw_data or "display_name" in raw_data
|
||||
|
||||
def fetch_metadata(self) -> dict:
|
||||
"""Fetch version info from GitHub."""
|
||||
version = fetch_github_latest_version("libretro/libretro-core-info")
|
||||
return {"version": version or ""}
|
||||
|
||||
|
||||
def main():
|
||||
"""CLI entry point."""
|
||||
import argparse
|
||||
|
||||
parser = argparse.ArgumentParser(description="Scrape libretro-core-info firmware requirements")
|
||||
parser.add_argument("--dry-run", action="store_true")
|
||||
parser.add_argument("--compare-db", help="Compare against database.json")
|
||||
args = parser.parse_args()
|
||||
|
||||
scraper = Scraper()
|
||||
|
||||
try:
|
||||
reqs = scraper.fetch_requirements()
|
||||
except ConnectionError as e:
|
||||
print(f"Error: {e}", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
if args.compare_db:
|
||||
import json as _json
|
||||
with open(args.compare_db) as f:
|
||||
db = _json.load(f)
|
||||
|
||||
found = 0
|
||||
missing = []
|
||||
for r in reqs:
|
||||
if r.name in db["indexes"]["by_name"]:
|
||||
found += 1
|
||||
elif r.md5 and r.md5 in db["indexes"]["by_md5"]:
|
||||
found += 1
|
||||
else:
|
||||
missing.append(r)
|
||||
|
||||
print(f"Core-info: {len(reqs)} unique firmware paths")
|
||||
print(f"Found in DB: {found}")
|
||||
print(f"Missing: {len(missing)}")
|
||||
if missing:
|
||||
print("\nMissing files:")
|
||||
for r in sorted(missing, key=lambda x: x.system):
|
||||
opt = "(optional)" if not r.required else "(REQUIRED)"
|
||||
print(f" {r.system}: {r.destination} {opt}")
|
||||
return
|
||||
|
||||
from collections import defaultdict
|
||||
by_system = defaultdict(list)
|
||||
for r in reqs:
|
||||
by_system[r.system].append(r)
|
||||
|
||||
print(f"Total: {len(reqs)} unique firmware paths across {len(by_system)} systems")
|
||||
for sys_name, files in sorted(by_system.items()):
|
||||
req_count = sum(1 for f in files if f.required)
|
||||
opt_count = sum(1 for f in files if not f.required)
|
||||
print(f" {sys_name}: {req_count} required, {opt_count} optional")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
167
scripts/scraper/dat_parser.py
Normal file
167
scripts/scraper/dat_parser.py
Normal file
|
|
@ -0,0 +1,167 @@
|
|||
"""Parser for clrmamepro DAT format.
|
||||
|
||||
Parses files like libretro's System.dat which uses the format:
|
||||
game (
|
||||
name "System"
|
||||
comment "Platform Name"
|
||||
rom ( name filename size 12345 crc ABCD1234 md5 ... sha1 ... )
|
||||
)
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import re
|
||||
from dataclasses import dataclass
|
||||
|
||||
|
||||
@dataclass
|
||||
class DatRom:
|
||||
"""A ROM entry from a DAT file."""
|
||||
name: str
|
||||
size: int
|
||||
crc32: str
|
||||
md5: str
|
||||
sha1: str
|
||||
system: str # From the preceding comment line
|
||||
|
||||
|
||||
@dataclass
|
||||
class DatMetadata:
|
||||
"""Metadata from a DAT file header."""
|
||||
name: str = ""
|
||||
version: str = ""
|
||||
description: str = ""
|
||||
author: str = ""
|
||||
homepage: str = ""
|
||||
url: str = ""
|
||||
|
||||
|
||||
def parse_dat(content: str) -> list[DatRom]:
|
||||
"""Parse clrmamepro DAT content and return list of DatRom entries.
|
||||
|
||||
Handles:
|
||||
- Quoted filenames with spaces: name "7800 BIOS (U).rom"
|
||||
- Path filenames: name "pcsx2/bios/file.bin"
|
||||
- Unquoted filenames: name cpc464.rom
|
||||
- Inconsistent indentation (tabs vs spaces)
|
||||
"""
|
||||
roms = []
|
||||
current_system = ""
|
||||
|
||||
for line in content.split("\n"):
|
||||
stripped = line.strip()
|
||||
|
||||
if stripped.startswith("comment "):
|
||||
value = stripped[8:].strip().strip('"')
|
||||
if value in ("System", "System, firmware, and BIOS files used by libretro cores."):
|
||||
continue
|
||||
current_system = value
|
||||
|
||||
elif stripped.startswith("rom (") or stripped.startswith("rom("):
|
||||
rom = _parse_rom_line(stripped, current_system)
|
||||
if rom:
|
||||
roms.append(rom)
|
||||
|
||||
return roms
|
||||
|
||||
|
||||
def parse_dat_metadata(content: str) -> DatMetadata:
|
||||
"""Extract metadata from the clrmamepro header block."""
|
||||
meta = DatMetadata()
|
||||
in_header = False
|
||||
|
||||
for line in content.split("\n"):
|
||||
stripped = line.strip()
|
||||
if stripped.startswith("clrmamepro"):
|
||||
in_header = True
|
||||
continue
|
||||
if in_header and stripped == ")":
|
||||
break
|
||||
if in_header:
|
||||
for field in ("name", "version", "description", "author", "homepage", "url"):
|
||||
if stripped.startswith(f"{field} "):
|
||||
value = stripped[len(field) + 1:].strip().strip('"')
|
||||
setattr(meta, field, value)
|
||||
|
||||
return meta
|
||||
|
||||
|
||||
def _parse_rom_line(line: str, system: str) -> DatRom | None:
|
||||
"""Parse a single rom ( ... ) line."""
|
||||
# rfind because filenames may contain parentheses like "(E).rom"
|
||||
start = line.find("(")
|
||||
end = line.rfind(")")
|
||||
if start == -1 or end == -1 or end <= start:
|
||||
return None
|
||||
|
||||
content = line[start + 1:end].strip()
|
||||
|
||||
fields = {}
|
||||
i = 0
|
||||
tokens = _tokenize(content)
|
||||
|
||||
while i < len(tokens) - 1:
|
||||
key = tokens[i]
|
||||
value = tokens[i + 1]
|
||||
fields[key] = value
|
||||
i += 2
|
||||
|
||||
name = fields.get("name", "")
|
||||
if not name:
|
||||
return None
|
||||
|
||||
try:
|
||||
size = int(fields.get("size", "0"))
|
||||
except ValueError:
|
||||
size = 0
|
||||
|
||||
return DatRom(
|
||||
name=name,
|
||||
size=size,
|
||||
crc32=fields.get("crc", "").lower(),
|
||||
md5=fields.get("md5", ""),
|
||||
sha1=fields.get("sha1", ""),
|
||||
system=system,
|
||||
)
|
||||
|
||||
|
||||
def _tokenize(content: str) -> list[str]:
|
||||
"""Tokenize DAT content, handling quoted strings."""
|
||||
tokens = []
|
||||
i = 0
|
||||
while i < len(content):
|
||||
while i < len(content) and content[i] in (" ", "\t"):
|
||||
i += 1
|
||||
if i >= len(content):
|
||||
break
|
||||
|
||||
if content[i] == '"':
|
||||
i += 1
|
||||
start = i
|
||||
while i < len(content) and content[i] != '"':
|
||||
i += 1
|
||||
tokens.append(content[start:i])
|
||||
i += 1
|
||||
else:
|
||||
start = i
|
||||
while i < len(content) and content[i] not in (" ", "\t"):
|
||||
i += 1
|
||||
tokens.append(content[start:i])
|
||||
|
||||
return tokens
|
||||
|
||||
|
||||
def validate_dat_format(content: str) -> bool:
|
||||
"""Validate that content is a valid clrmamepro DAT file.
|
||||
|
||||
Checks for:
|
||||
- clrmamepro header
|
||||
- game block
|
||||
- rom entries
|
||||
"""
|
||||
has_header = "clrmamepro" in content[:500]
|
||||
has_game = "game (" in content
|
||||
has_rom = "rom (" in content or "rom(" in content
|
||||
has_comment = 'comment "' in content
|
||||
|
||||
return has_header and has_game and has_rom and has_comment
|
||||
323
scripts/scraper/libretro_scraper.py
Normal file
323
scripts/scraper/libretro_scraper.py
Normal file
|
|
@ -0,0 +1,323 @@
|
|||
#!/usr/bin/env python3
|
||||
"""Scraper for libretro System.dat (RetroArch, Lakka).
|
||||
|
||||
Source: https://github.com/libretro/libretro-database/blob/master/dat/System.dat
|
||||
Format: clrmamepro DAT
|
||||
Hash: SHA1 primary
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import sys
|
||||
import urllib.request
|
||||
import urllib.error
|
||||
|
||||
from .base_scraper import BaseScraper, BiosRequirement, fetch_github_latest_version
|
||||
from .dat_parser import parse_dat, parse_dat_metadata, validate_dat_format
|
||||
|
||||
PLATFORM_NAME = "libretro"
|
||||
|
||||
SOURCE_URL = (
|
||||
"https://raw.githubusercontent.com/libretro/libretro-database/"
|
||||
"master/dat/System.dat"
|
||||
)
|
||||
|
||||
SYSTEM_SLUG_MAP = {
|
||||
"3DO Company, The - 3DO": "3do",
|
||||
"Amstrad - CPC": "amstrad-cpc",
|
||||
"Arcade": "arcade",
|
||||
"Atari - 400-800": "atari-400-800",
|
||||
"Atari - 5200": "atari-5200",
|
||||
"Atari - 7800": "atari-7800",
|
||||
"Atari - Lynx": "atari-lynx",
|
||||
"Atari - ST": "atari-st",
|
||||
"Coleco - ColecoVision": "coleco-colecovision",
|
||||
"Commodore - Amiga": "commodore-amiga",
|
||||
"Commodore - C128": "commodore-c128",
|
||||
"Dinothawr": "dinothawr",
|
||||
"DOS": "dos",
|
||||
"EPOCH/YENO Super Cassette Vision": "epoch-scv",
|
||||
"Elektronika - BK-0010/BK-0011(M)": "elektronika-bk",
|
||||
"Enterprise - 64/128": "enterprise-64-128",
|
||||
"Fairchild Channel F": "fairchild-channel-f",
|
||||
"Id Software - Doom": "doom",
|
||||
"J2ME": "j2me",
|
||||
"MacII": "apple-macintosh-ii",
|
||||
"Magnavox - Odyssey2": "magnavox-odyssey2",
|
||||
"Mattel - Intellivision": "mattel-intellivision",
|
||||
"Microsoft - MSX": "microsoft-msx",
|
||||
"NEC - PC Engine - TurboGrafx 16 - SuperGrafx": "nec-pc-engine",
|
||||
"NEC - PC-98": "nec-pc-98",
|
||||
"NEC - PC-FX": "nec-pc-fx",
|
||||
"Nintendo - Famicom Disk System": "nintendo-fds",
|
||||
"Nintendo - Game Boy Advance": "nintendo-gba",
|
||||
"Nintendo - GameCube": "nintendo-gamecube",
|
||||
"Nintendo - Gameboy": "nintendo-gb",
|
||||
"Nintendo - Gameboy Color": "nintendo-gbc",
|
||||
"Nintendo - Nintendo 64DD": "nintendo-64dd",
|
||||
"Nintendo - Nintendo DS": "nintendo-ds",
|
||||
"Nintendo - Nintendo Entertainment System": "nintendo-nes",
|
||||
"Nintendo - Pokemon Mini": "nintendo-pokemon-mini",
|
||||
"Nintendo - Satellaview": "nintendo-satellaview",
|
||||
"Nintendo - SuFami Turbo": "nintendo-sufami-turbo",
|
||||
"Nintendo - Super Game Boy": "nintendo-sgb",
|
||||
"Nintendo - Super Nintendo Entertainment System": "nintendo-snes",
|
||||
"Phillips - Videopac+": "philips-videopac",
|
||||
"SNK - NeoGeo CD": "snk-neogeo-cd",
|
||||
"ScummVM": "scummvm",
|
||||
"Sega - Dreamcast": "sega-dreamcast",
|
||||
"Sega - Dreamcast-based Arcade": "sega-dreamcast-arcade",
|
||||
"Sega - Game Gear": "sega-game-gear",
|
||||
"Sega - Master System - Mark III": "sega-master-system",
|
||||
"Sega - Mega CD - Sega CD": "sega-mega-cd",
|
||||
"Sega - Mega Drive - Genesis": "sega-mega-drive",
|
||||
"Sega - Saturn": "sega-saturn",
|
||||
"Sharp - X1": "sharp-x1",
|
||||
"Sharp - X68000": "sharp-x68000",
|
||||
"Sinclair - ZX Spectrum": "sinclair-zx-spectrum",
|
||||
"Sony - PlayStation": "sony-playstation",
|
||||
"Sony - PlayStation 2": "sony-playstation-2",
|
||||
"Sony - PlayStation Portable": "sony-psp",
|
||||
"Texas Instruments TI-83": "ti-83",
|
||||
"Videoton - TV Computer": "videoton-tvc",
|
||||
"Wolfenstein 3D": "wolfenstein-3d",
|
||||
}
|
||||
|
||||
|
||||
class Scraper(BaseScraper):
|
||||
"""Scraper for libretro System.dat."""
|
||||
|
||||
def __init__(self, url: str = SOURCE_URL):
|
||||
self.url = url
|
||||
self._raw_data: str | None = None
|
||||
|
||||
def _fetch_raw(self) -> str:
|
||||
"""Fetch raw DAT content from source URL."""
|
||||
if self._raw_data is not None:
|
||||
return self._raw_data
|
||||
|
||||
try:
|
||||
req = urllib.request.Request(self.url, headers={"User-Agent": "retrobios-scraper/1.0"})
|
||||
with urllib.request.urlopen(req, timeout=30) as resp:
|
||||
self._raw_data = resp.read().decode("utf-8")
|
||||
return self._raw_data
|
||||
except urllib.error.URLError as e:
|
||||
raise ConnectionError(f"Failed to fetch {self.url}: {e}") from e
|
||||
|
||||
def fetch_requirements(self) -> list[BiosRequirement]:
|
||||
"""Parse System.dat and return BIOS requirements."""
|
||||
raw = self._fetch_raw()
|
||||
|
||||
if not self.validate_format(raw):
|
||||
raise ValueError("System.dat format validation failed")
|
||||
|
||||
roms = parse_dat(raw)
|
||||
requirements = []
|
||||
|
||||
for rom in roms:
|
||||
system_slug = SYSTEM_SLUG_MAP.get(rom.system, rom.system.lower().replace(" ", "-"))
|
||||
|
||||
destination = rom.name
|
||||
name = rom.name.split("/")[-1] if "/" in rom.name else rom.name
|
||||
|
||||
requirements.append(BiosRequirement(
|
||||
name=name,
|
||||
system=system_slug,
|
||||
sha1=rom.sha1 or None,
|
||||
md5=rom.md5 or None,
|
||||
crc32=rom.crc32 or None,
|
||||
size=rom.size or None,
|
||||
destination=destination,
|
||||
required=True,
|
||||
))
|
||||
|
||||
return requirements
|
||||
|
||||
def validate_format(self, raw_data: str) -> bool:
|
||||
"""Validate System.dat format."""
|
||||
return validate_dat_format(raw_data)
|
||||
|
||||
def fetch_metadata(self) -> dict:
|
||||
"""Fetch version info from System.dat header and GitHub API."""
|
||||
raw = self._fetch_raw()
|
||||
meta = parse_dat_metadata(raw)
|
||||
|
||||
retroarch_version = fetch_github_latest_version("libretro/RetroArch")
|
||||
db_version = fetch_github_latest_version("libretro/libretro-database")
|
||||
|
||||
return {
|
||||
"dat_version": meta.version,
|
||||
"retroarch_version": retroarch_version,
|
||||
"db_version": db_version,
|
||||
}
|
||||
|
||||
def _fetch_core_metadata(self) -> dict[str, dict]:
|
||||
"""Fetch per-core metadata from libretro-core-info .info files."""
|
||||
metadata = {}
|
||||
try:
|
||||
url = f"https://api.github.com/repos/libretro/libretro-core-info/git/trees/master?recursive=1"
|
||||
req = urllib.request.Request(url, headers={
|
||||
"User-Agent": "retrobios-scraper/1.0",
|
||||
"Accept": "application/vnd.github.v3+json",
|
||||
})
|
||||
with urllib.request.urlopen(req, timeout=30) as resp:
|
||||
import json
|
||||
tree = json.loads(resp.read())
|
||||
|
||||
info_files = [
|
||||
item["path"] for item in tree.get("tree", [])
|
||||
if item["path"].endswith("_libretro.info")
|
||||
]
|
||||
|
||||
for filename in info_files:
|
||||
core_name = filename.replace("_libretro.info", "")
|
||||
try:
|
||||
info_url = f"https://raw.githubusercontent.com/libretro/libretro-core-info/master/{filename}"
|
||||
req = urllib.request.Request(info_url, headers={"User-Agent": "retrobios-scraper/1.0"})
|
||||
with urllib.request.urlopen(req, timeout=10) as resp:
|
||||
content = resp.read().decode("utf-8")
|
||||
|
||||
info = {}
|
||||
for line in content.split("\n"):
|
||||
line = line.strip()
|
||||
if " = " in line:
|
||||
key, _, value = line.partition(" = ")
|
||||
info[key.strip()] = value.strip().strip('"')
|
||||
|
||||
fw_count = int(info.get("firmware_count", "0"))
|
||||
if fw_count == 0:
|
||||
continue
|
||||
|
||||
system_name = info.get("systemname", "")
|
||||
manufacturer = info.get("manufacturer", "")
|
||||
display_name = info.get("display_name", "")
|
||||
categories = info.get("categories", "")
|
||||
|
||||
# Map core to our system slug via firmware paths
|
||||
from .coreinfo_scraper import CORE_SYSTEM_MAP
|
||||
system_slug = CORE_SYSTEM_MAP.get(core_name)
|
||||
if not system_slug:
|
||||
continue
|
||||
|
||||
if system_slug not in metadata:
|
||||
metadata[system_slug] = {
|
||||
"core": core_name,
|
||||
"manufacturer": manufacturer,
|
||||
"display_name": display_name or system_name,
|
||||
"docs": f"https://docs.libretro.com/library/{core_name}/",
|
||||
}
|
||||
except (urllib.error.URLError, urllib.error.HTTPError):
|
||||
continue
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
return metadata
|
||||
|
||||
def generate_platform_yaml(self) -> dict:
|
||||
"""Generate a platform YAML config dict, merging System.dat with core-info metadata."""
|
||||
requirements = self.fetch_requirements()
|
||||
metadata = self.fetch_metadata()
|
||||
core_meta = self._fetch_core_metadata()
|
||||
|
||||
systems = {}
|
||||
for req in requirements:
|
||||
if req.system not in systems:
|
||||
system_entry = {"files": []}
|
||||
if req.system in core_meta:
|
||||
cm = core_meta[req.system]
|
||||
if cm.get("core"):
|
||||
system_entry["core"] = cm["core"]
|
||||
if cm.get("manufacturer"):
|
||||
system_entry["manufacturer"] = cm["manufacturer"]
|
||||
if cm.get("docs"):
|
||||
system_entry["docs"] = cm["docs"]
|
||||
systems[req.system] = system_entry
|
||||
|
||||
entry = {
|
||||
"name": req.name,
|
||||
"destination": req.destination,
|
||||
"required": req.required,
|
||||
}
|
||||
if req.sha1:
|
||||
entry["sha1"] = req.sha1
|
||||
if req.md5:
|
||||
entry["md5"] = req.md5
|
||||
if req.crc32:
|
||||
entry["crc32"] = req.crc32
|
||||
if req.size:
|
||||
entry["size"] = req.size
|
||||
|
||||
systems[req.system]["files"].append(entry)
|
||||
|
||||
return {
|
||||
"platform": "RetroArch",
|
||||
"version": metadata["retroarch_version"] or "",
|
||||
"dat_version": metadata["dat_version"] or "",
|
||||
"homepage": "https://www.retroarch.com",
|
||||
"source": "https://github.com/libretro/libretro-database/blob/master/dat/System.dat",
|
||||
"base_destination": "system",
|
||||
"hash_type": "sha1",
|
||||
"verification_mode": "existence",
|
||||
"systems": systems,
|
||||
}
|
||||
|
||||
|
||||
def main():
|
||||
"""CLI entry point for testing."""
|
||||
import argparse
|
||||
import json
|
||||
|
||||
parser = argparse.ArgumentParser(description="Scrape libretro System.dat")
|
||||
parser.add_argument("--dry-run", action="store_true", help="Just show what would be scraped")
|
||||
parser.add_argument("--output", "-o", help="Output YAML file")
|
||||
parser.add_argument("--json", action="store_true", help="Output as JSON")
|
||||
args = parser.parse_args()
|
||||
|
||||
scraper = Scraper()
|
||||
|
||||
try:
|
||||
reqs = scraper.fetch_requirements()
|
||||
except (ConnectionError, ValueError) as e:
|
||||
print(f"Error: {e}", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
if args.dry_run:
|
||||
by_system = {}
|
||||
for req in reqs:
|
||||
by_system.setdefault(req.system, []).append(req)
|
||||
|
||||
for system, files in sorted(by_system.items()):
|
||||
print(f"\n{system} ({len(files)} files):")
|
||||
for f in files:
|
||||
hash_info = f.sha1[:12] if f.sha1 else f.md5[:12] if f.md5 else "no-hash"
|
||||
print(f" {f.name} ({f.size or '?'} bytes, {hash_info}...)")
|
||||
|
||||
print(f"\nTotal: {len(reqs)} BIOS files across {len(by_system)} systems")
|
||||
return
|
||||
|
||||
if args.json:
|
||||
config = scraper.generate_platform_yaml()
|
||||
print(json.dumps(config, indent=2))
|
||||
return
|
||||
|
||||
if args.output:
|
||||
try:
|
||||
import yaml
|
||||
except ImportError:
|
||||
print("Error: PyYAML required (pip install pyyaml)", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
config = scraper.generate_platform_yaml()
|
||||
with open(args.output, "w") as f:
|
||||
yaml.dump(config, f, default_flow_style=False, allow_unicode=True, sort_keys=False)
|
||||
print(f"Written to {args.output}")
|
||||
else:
|
||||
reqs = scraper.fetch_requirements()
|
||||
by_system = {}
|
||||
for req in reqs:
|
||||
by_system.setdefault(req.system, []).append(req)
|
||||
print(f"Scraped {len(reqs)} BIOS files across {len(by_system)} systems")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
285
scripts/scraper/recalbox_scraper.py
Normal file
285
scripts/scraper/recalbox_scraper.py
Normal file
|
|
@ -0,0 +1,285 @@
|
|||
#!/usr/bin/env python3
|
||||
"""Scraper for Recalbox BIOS requirements.
|
||||
|
||||
Source: https://gitlab.com/recalbox/recalbox/-/raw/master/board/recalbox/fsoverlay/recalbox/share_init/system/.emulationstation/es_bios.xml
|
||||
Format: XML (es_bios.xml)
|
||||
Hash: MD5 (multiple valid hashes per entry, comma-separated)
|
||||
|
||||
Recalbox verification logic:
|
||||
- Checks MD5 of file on disk against list of valid hashes
|
||||
- Multiple MD5s accepted per BIOS (different ROM revisions)
|
||||
- Alternate file paths (pipe-separated)
|
||||
- hashMatchMandatory flag: if false, wrong hash = warning (YELLOW) not error (RED)
|
||||
- ZIP files get composite MD5 calculation
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import sys
|
||||
import urllib.request
|
||||
import urllib.error
|
||||
import xml.etree.ElementTree as ET
|
||||
|
||||
from .base_scraper import BaseScraper, BiosRequirement, fetch_github_latest_tag
|
||||
|
||||
PLATFORM_NAME = "recalbox"
|
||||
|
||||
SOURCE_URL = (
|
||||
"https://gitlab.com/recalbox/recalbox/-/raw/master/"
|
||||
"board/recalbox/fsoverlay/recalbox/share_init/system/"
|
||||
".emulationstation/es_bios.xml"
|
||||
)
|
||||
|
||||
SYSTEM_SLUG_MAP = {
|
||||
"3do": "3do",
|
||||
"amiga600": "commodore-amiga",
|
||||
"amiga1200": "commodore-amiga",
|
||||
"amigacd32": "commodore-amiga",
|
||||
"amigacdtv": "commodore-amiga",
|
||||
"amstradcpc": "amstrad-cpc",
|
||||
"atari800": "atari-400-800",
|
||||
"atari5200": "atari-5200",
|
||||
"atari7800": "atari-7800",
|
||||
"atarilynx": "atari-lynx",
|
||||
"atarist": "atari-st",
|
||||
"c64": "commodore-c64",
|
||||
"channelf": "fairchild-channel-f",
|
||||
"colecovision": "coleco-colecovision",
|
||||
"dreamcast": "sega-dreamcast",
|
||||
"fds": "nintendo-fds",
|
||||
"gamecube": "nintendo-gamecube",
|
||||
"gamegear": "sega-game-gear",
|
||||
"gb": "nintendo-gb",
|
||||
"gba": "nintendo-gba",
|
||||
"gbc": "nintendo-gbc",
|
||||
"intellivision": "mattel-intellivision",
|
||||
"jaguar": "atari-jaguar",
|
||||
"mastersystem": "sega-master-system",
|
||||
"megadrive": "sega-mega-drive",
|
||||
"msx": "microsoft-msx",
|
||||
"msx1": "microsoft-msx",
|
||||
"msx2": "microsoft-msx",
|
||||
"n64": "nintendo-64",
|
||||
"naomi": "sega-dreamcast-arcade",
|
||||
"naomigd": "sega-dreamcast-arcade",
|
||||
"atomiswave": "sega-dreamcast-arcade",
|
||||
"nds": "nintendo-ds",
|
||||
"neogeo": "snk-neogeo",
|
||||
"neogeocd": "snk-neogeo-cd",
|
||||
"o2em": "magnavox-odyssey2",
|
||||
"pcengine": "nec-pc-engine",
|
||||
"pcenginecd": "nec-pc-engine",
|
||||
"pcfx": "nec-pc-fx",
|
||||
"ps2": "sony-playstation-2",
|
||||
"psx": "sony-playstation",
|
||||
"saturn": "sega-saturn",
|
||||
"scummvm": "scummvm",
|
||||
"segacd": "sega-mega-cd",
|
||||
"snes": "nintendo-snes",
|
||||
"supergrafx": "nec-pc-engine",
|
||||
"x68000": "sharp-x68000",
|
||||
"zxspectrum": "sinclair-zx-spectrum",
|
||||
}
|
||||
|
||||
|
||||
class Scraper(BaseScraper):
|
||||
"""Scraper for Recalbox es_bios.xml."""
|
||||
|
||||
def __init__(self, url: str = SOURCE_URL):
|
||||
self.url = url
|
||||
self._raw_data: str | None = None
|
||||
|
||||
def _fetch_raw(self) -> str:
|
||||
if self._raw_data is not None:
|
||||
return self._raw_data
|
||||
|
||||
try:
|
||||
req = urllib.request.Request(self.url, headers={"User-Agent": "retrobios-scraper/1.0"})
|
||||
with urllib.request.urlopen(req, timeout=30) as resp:
|
||||
self._raw_data = resp.read().decode("utf-8")
|
||||
return self._raw_data
|
||||
except urllib.error.URLError as e:
|
||||
raise ConnectionError(f"Failed to fetch {self.url}: {e}") from e
|
||||
|
||||
def fetch_requirements(self) -> list[BiosRequirement]:
|
||||
"""Parse es_bios.xml and return BIOS requirements."""
|
||||
raw = self._fetch_raw()
|
||||
|
||||
if not self.validate_format(raw):
|
||||
raise ValueError("es_bios.xml format validation failed")
|
||||
|
||||
root = ET.fromstring(raw)
|
||||
requirements = []
|
||||
seen = set()
|
||||
|
||||
for system_elem in root.findall(".//system"):
|
||||
platform = system_elem.get("platform", "")
|
||||
system_slug = SYSTEM_SLUG_MAP.get(platform, platform)
|
||||
|
||||
for bios_elem in system_elem.findall("bios"):
|
||||
paths_str = bios_elem.get("path", "")
|
||||
md5_str = bios_elem.get("md5", "")
|
||||
core = bios_elem.get("core", "")
|
||||
mandatory = bios_elem.get("mandatory", "true") != "false"
|
||||
hash_match_mandatory = bios_elem.get("hashMatchMandatory", "true") != "false"
|
||||
note = bios_elem.get("note", "")
|
||||
|
||||
paths = [p.strip() for p in paths_str.split("|") if p.strip()]
|
||||
if not paths:
|
||||
continue
|
||||
|
||||
primary_path = paths[0]
|
||||
name = primary_path.split("/")[-1] if "/" in primary_path else primary_path
|
||||
|
||||
md5_list = [m.strip() for m in md5_str.split(",") if m.strip()]
|
||||
all_md5 = ",".join(md5_list) if md5_list else None
|
||||
|
||||
dedup_key = primary_path
|
||||
if dedup_key in seen:
|
||||
continue
|
||||
seen.add(dedup_key)
|
||||
|
||||
requirements.append(BiosRequirement(
|
||||
name=name,
|
||||
system=system_slug,
|
||||
md5=all_md5,
|
||||
destination=primary_path,
|
||||
required=mandatory,
|
||||
))
|
||||
|
||||
return requirements
|
||||
|
||||
def fetch_full_requirements(self) -> list[dict]:
|
||||
"""Parse es_bios.xml preserving all Recalbox-specific fields."""
|
||||
raw = self._fetch_raw()
|
||||
root = ET.fromstring(raw)
|
||||
requirements = []
|
||||
|
||||
for system_elem in root.findall(".//system"):
|
||||
platform = system_elem.get("platform", "")
|
||||
system_name = system_elem.get("name", platform)
|
||||
system_slug = SYSTEM_SLUG_MAP.get(platform, platform)
|
||||
|
||||
for bios_elem in system_elem.findall("bios"):
|
||||
paths_str = bios_elem.get("path", "")
|
||||
md5_str = bios_elem.get("md5", "")
|
||||
core = bios_elem.get("core", "")
|
||||
mandatory = bios_elem.get("mandatory", "true") != "false"
|
||||
hash_match_mandatory = bios_elem.get("hashMatchMandatory", "true") != "false"
|
||||
note = bios_elem.get("note", "")
|
||||
|
||||
paths = [p.strip() for p in paths_str.split("|") if p.strip()]
|
||||
md5_list = [m.strip() for m in md5_str.split(",") if m.strip()]
|
||||
|
||||
if not paths:
|
||||
continue
|
||||
|
||||
name = paths[0].split("/")[-1] if "/" in paths[0] else paths[0]
|
||||
|
||||
requirements.append({
|
||||
"name": name,
|
||||
"system": system_slug,
|
||||
"system_name": system_name,
|
||||
"paths": paths,
|
||||
"md5_list": md5_list,
|
||||
"core": core,
|
||||
"mandatory": mandatory,
|
||||
"hash_match_mandatory": hash_match_mandatory,
|
||||
"note": note,
|
||||
})
|
||||
|
||||
return requirements
|
||||
|
||||
def validate_format(self, raw_data: str) -> bool:
|
||||
"""Validate es_bios.xml format."""
|
||||
return "<biosList" in raw_data and "<system" in raw_data and "<bios" in raw_data
|
||||
|
||||
def generate_platform_yaml(self) -> dict:
|
||||
"""Generate a platform YAML config dict from scraped data."""
|
||||
requirements = self.fetch_requirements()
|
||||
|
||||
systems = {}
|
||||
for req in requirements:
|
||||
if req.system not in systems:
|
||||
systems[req.system] = {"files": []}
|
||||
|
||||
entry = {
|
||||
"name": req.name,
|
||||
"destination": req.destination,
|
||||
"required": req.required,
|
||||
}
|
||||
if req.md5:
|
||||
entry["md5"] = req.md5
|
||||
|
||||
systems[req.system]["files"].append(entry)
|
||||
|
||||
version = fetch_github_latest_tag("recalbox/recalbox", prefix="") or ""
|
||||
# Recalbox uses GitLab - GitHub API may not resolve
|
||||
if not version:
|
||||
version = "10.0"
|
||||
|
||||
return {
|
||||
"platform": "Recalbox",
|
||||
"version": version,
|
||||
"homepage": "https://www.recalbox.com",
|
||||
"source": SOURCE_URL,
|
||||
"base_destination": "bios",
|
||||
"hash_type": "md5",
|
||||
"verification_mode": "md5",
|
||||
"systems": systems,
|
||||
}
|
||||
|
||||
|
||||
def main():
|
||||
"""CLI entry point."""
|
||||
import argparse
|
||||
import json
|
||||
|
||||
parser = argparse.ArgumentParser(description="Scrape Recalbox es_bios.xml")
|
||||
parser.add_argument("--dry-run", action="store_true")
|
||||
parser.add_argument("--json", action="store_true")
|
||||
parser.add_argument("--full", action="store_true", help="Show full Recalbox-specific fields")
|
||||
parser.add_argument("--output", "-o")
|
||||
args = parser.parse_args()
|
||||
|
||||
scraper = Scraper()
|
||||
|
||||
try:
|
||||
if args.full:
|
||||
reqs = scraper.fetch_full_requirements()
|
||||
print(json.dumps(reqs[:5], indent=2))
|
||||
print(f"\nTotal: {len(reqs)} BIOS entries")
|
||||
return
|
||||
reqs = scraper.fetch_requirements()
|
||||
except (ConnectionError, ValueError) as e:
|
||||
print(f"Error: {e}", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
if args.dry_run:
|
||||
from collections import defaultdict
|
||||
by_system = defaultdict(list)
|
||||
for r in reqs:
|
||||
by_system[r.system].append(r)
|
||||
for sys_name, files in sorted(by_system.items()):
|
||||
print(f"\n{sys_name} ({len(files)} files):")
|
||||
for f in files[:5]:
|
||||
print(f" {f.name} (md5={f.md5[:12] if f.md5 else 'N/A'}...)")
|
||||
if len(files) > 5:
|
||||
print(f" ... +{len(files)-5} more")
|
||||
print(f"\nTotal: {len(reqs)} BIOS files across {len(by_system)} systems")
|
||||
return
|
||||
|
||||
if args.json:
|
||||
config = scraper.generate_platform_yaml()
|
||||
print(json.dumps(config, indent=2))
|
||||
return
|
||||
|
||||
reqs = scraper.fetch_requirements()
|
||||
by_system = {}
|
||||
for r in reqs:
|
||||
by_system.setdefault(r.system, []).append(r)
|
||||
print(f"Scraped {len(reqs)} BIOS files across {len(by_system)} systems")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
291
scripts/validate_pr.py
Normal file
291
scripts/validate_pr.py
Normal file
|
|
@ -0,0 +1,291 @@
|
|||
#!/usr/bin/env python3
|
||||
"""Validate BIOS file contributions in Pull Requests.
|
||||
|
||||
Usage:
|
||||
python scripts/validate_pr.py [files...]
|
||||
python scripts/validate_pr.py --changed # Auto-detect changed files via git
|
||||
|
||||
Multi-layer validation:
|
||||
1. Hash verified against known databases (System.dat, batocera-systems)
|
||||
2. File size matches expected value
|
||||
3. File referenced in ≥1 platform config
|
||||
4. Duplicate detection against database.json
|
||||
5. Security checks (no executables, reasonable sizes)
|
||||
|
||||
Outputs a structured report suitable for PR comments.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import hashlib
|
||||
import json
|
||||
import os
|
||||
import subprocess
|
||||
import sys
|
||||
import zlib
|
||||
from pathlib import Path
|
||||
|
||||
sys.path.insert(0, os.path.dirname(__file__))
|
||||
from common import compute_hashes
|
||||
|
||||
try:
|
||||
import yaml
|
||||
except ImportError:
|
||||
yaml = None
|
||||
|
||||
DEFAULT_DB = "database.json"
|
||||
DEFAULT_PLATFORMS_DIR = "platforms"
|
||||
|
||||
BLOCKED_EXTENSIONS = {
|
||||
".exe", ".bat", ".cmd", ".sh", ".ps1", ".vbs", ".js",
|
||||
".msi", ".dll", ".so", ".dylib", ".py", ".rb", ".pl",
|
||||
}
|
||||
|
||||
MAX_FILE_SIZE = 100 * 1024 * 1024
|
||||
|
||||
|
||||
class ValidationResult:
|
||||
def __init__(self, filepath: str):
|
||||
self.filepath = filepath
|
||||
self.filename = os.path.basename(filepath)
|
||||
self.checks = [] # (status, message) tuples
|
||||
self.sha1 = ""
|
||||
self.md5 = ""
|
||||
self.crc32 = ""
|
||||
self.size = 0
|
||||
|
||||
def add_check(self, passed: bool, message: str):
|
||||
self.checks.append(("PASS" if passed else "FAIL", message))
|
||||
|
||||
def add_warning(self, message: str):
|
||||
self.checks.append(("WARN", message))
|
||||
|
||||
def add_info(self, message: str):
|
||||
self.checks.append(("INFO", message))
|
||||
|
||||
@property
|
||||
def passed(self) -> bool:
|
||||
return all(s != "FAIL" for s, _ in self.checks)
|
||||
|
||||
def to_markdown(self) -> str:
|
||||
status = "✅" if self.passed else "❌"
|
||||
lines = [f"### {status} `{self.filename}`"]
|
||||
lines.append("")
|
||||
lines.append(f"- **Path**: `{self.filepath}`")
|
||||
lines.append(f"- **Size**: {self.size:,} bytes")
|
||||
lines.append(f"- **SHA1**: `{self.sha1}`")
|
||||
lines.append(f"- **MD5**: `{self.md5}`")
|
||||
lines.append(f"- **CRC32**: `{self.crc32}`")
|
||||
lines.append("")
|
||||
|
||||
for status_str, message in self.checks:
|
||||
if status_str == "PASS":
|
||||
lines.append(f"- ✅ {message}")
|
||||
elif status_str == "FAIL":
|
||||
lines.append(f"- ❌ {message}")
|
||||
elif status_str == "WARN":
|
||||
lines.append(f"- ⚠️ {message}")
|
||||
else:
|
||||
lines.append(f"- ℹ️ {message}")
|
||||
|
||||
return "\n".join(lines)
|
||||
|
||||
|
||||
def load_database(db_path: str) -> dict | None:
|
||||
"""Load database.json if available."""
|
||||
if os.path.exists(db_path):
|
||||
with open(db_path) as f:
|
||||
return json.load(f)
|
||||
return None
|
||||
|
||||
|
||||
def load_platform_hashes(platforms_dir: str) -> dict:
|
||||
"""Load all known hashes from platform configs."""
|
||||
known = {"sha1": set(), "md5": set(), "names": set()}
|
||||
|
||||
if not os.path.isdir(platforms_dir) or yaml is None:
|
||||
return known
|
||||
|
||||
for f in Path(platforms_dir).glob("*.yml"):
|
||||
if f.name.startswith("_"):
|
||||
continue
|
||||
with open(f) as fh:
|
||||
try:
|
||||
config = yaml.safe_load(fh) or {}
|
||||
except yaml.YAMLError:
|
||||
continue
|
||||
|
||||
for sys_id, system in config.get("systems", {}).items():
|
||||
for file_entry in system.get("files", []):
|
||||
if "sha1" in file_entry:
|
||||
known["sha1"].add(file_entry["sha1"])
|
||||
if "md5" in file_entry:
|
||||
known["md5"].add(file_entry["md5"])
|
||||
if "name" in file_entry:
|
||||
known["names"].add(file_entry["name"])
|
||||
|
||||
return known
|
||||
|
||||
|
||||
def validate_file(
|
||||
filepath: str,
|
||||
db: dict | None,
|
||||
platform_hashes: dict,
|
||||
) -> ValidationResult:
|
||||
"""Run all validation checks on a file."""
|
||||
result = ValidationResult(filepath)
|
||||
|
||||
if not os.path.exists(filepath):
|
||||
result.add_check(False, f"File not found: {filepath}")
|
||||
return result
|
||||
|
||||
result.size = os.path.getsize(filepath)
|
||||
hashes = compute_hashes(filepath)
|
||||
result.sha1 = hashes["sha1"]
|
||||
result.md5 = hashes["md5"]
|
||||
result.crc32 = hashes["crc32"]
|
||||
|
||||
ext = os.path.splitext(filepath)[1].lower()
|
||||
if ext in BLOCKED_EXTENSIONS:
|
||||
result.add_check(False, f"Blocked file extension: {ext}")
|
||||
|
||||
if result.size > MAX_FILE_SIZE:
|
||||
result.add_check(False, f"File too large for embedded storage ({result.size:,} > {MAX_FILE_SIZE:,} bytes). Use storage: external in platform config.")
|
||||
elif result.size == 0:
|
||||
result.add_check(False, "File is empty (0 bytes)")
|
||||
else:
|
||||
result.add_check(True, f"File size OK ({result.size:,} bytes)")
|
||||
|
||||
if db:
|
||||
if result.sha1 in db.get("files", {}):
|
||||
existing = db["files"][result.sha1]
|
||||
result.add_warning(f"Duplicate: identical file already exists at `{existing['path']}`")
|
||||
else:
|
||||
result.add_check(True, "Not a duplicate in database")
|
||||
|
||||
sha1_known = result.sha1 in platform_hashes.get("sha1", set())
|
||||
md5_known = result.md5 in platform_hashes.get("md5", set())
|
||||
name_known = result.filename in platform_hashes.get("names", set())
|
||||
|
||||
if sha1_known:
|
||||
result.add_check(True, "SHA1 matches known platform requirement")
|
||||
elif md5_known:
|
||||
result.add_check(True, "MD5 matches known platform requirement")
|
||||
elif name_known:
|
||||
result.add_warning("Filename matches a known requirement but hash differs - may be a variant")
|
||||
else:
|
||||
result.add_warning("File not referenced in any platform config - needs manual review")
|
||||
|
||||
if filepath.startswith("bios/"):
|
||||
parts = filepath.split("/")
|
||||
if len(parts) >= 4:
|
||||
result.add_check(True, f"Correct placement: bios/{parts[1]}/{parts[2]}/")
|
||||
else:
|
||||
result.add_warning("File should be in bios/Manufacturer/Console/ structure")
|
||||
else:
|
||||
result.add_warning(f"File is not under bios/ directory")
|
||||
|
||||
if name_known and not sha1_known and not md5_known:
|
||||
result.add_info(
|
||||
"This may be a valid variant. If accepted, it will be placed in "
|
||||
f"`.variants/{result.filename}.{result.sha1[:8]}`"
|
||||
)
|
||||
|
||||
return result
|
||||
|
||||
|
||||
def get_changed_files() -> list[str]:
|
||||
"""Get list of changed files in current PR/branch using git."""
|
||||
try:
|
||||
for base in ("main", "master", "v2"):
|
||||
try:
|
||||
result = subprocess.run(
|
||||
["git", "diff", "--name-only", f"origin/{base}...HEAD"],
|
||||
capture_output=True, text=True, check=True,
|
||||
)
|
||||
files = [f for f in result.stdout.strip().split("\n") if f.startswith("bios/")]
|
||||
if files:
|
||||
return files
|
||||
except subprocess.CalledProcessError:
|
||||
continue
|
||||
except (subprocess.CalledProcessError, OSError):
|
||||
pass
|
||||
|
||||
result = subprocess.run(
|
||||
["git", "diff", "--cached", "--name-only"],
|
||||
capture_output=True, text=True,
|
||||
)
|
||||
return [f for f in result.stdout.strip().split("\n") if f.startswith("bios/") and f]
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description="Validate BIOS file contributions")
|
||||
parser.add_argument("files", nargs="*", help="Files to validate")
|
||||
parser.add_argument("--changed", action="store_true", help="Auto-detect changed BIOS files")
|
||||
parser.add_argument("--db", default=DEFAULT_DB, help="Path to database.json")
|
||||
parser.add_argument("--platforms-dir", default=DEFAULT_PLATFORMS_DIR)
|
||||
parser.add_argument("--markdown", action="store_true", help="Output as markdown (for PR comments)")
|
||||
parser.add_argument("--json", action="store_true", help="Output as JSON")
|
||||
args = parser.parse_args()
|
||||
|
||||
files = args.files
|
||||
if args.changed:
|
||||
files = get_changed_files()
|
||||
if not files:
|
||||
print("No changed BIOS files detected")
|
||||
return
|
||||
|
||||
if not files:
|
||||
parser.error("No files specified. Use --changed or provide file paths.")
|
||||
|
||||
db = load_database(args.db)
|
||||
platform_hashes = load_platform_hashes(args.platforms_dir)
|
||||
|
||||
results = []
|
||||
for f in files:
|
||||
result = validate_file(f, db, platform_hashes)
|
||||
results.append(result)
|
||||
|
||||
all_passed = all(r.passed for r in results)
|
||||
|
||||
if args.json:
|
||||
output = []
|
||||
for r in results:
|
||||
output.append({
|
||||
"file": r.filepath,
|
||||
"passed": r.passed,
|
||||
"sha1": r.sha1,
|
||||
"md5": r.md5,
|
||||
"size": r.size,
|
||||
"checks": [{"status": s, "message": m} for s, m in r.checks],
|
||||
})
|
||||
print(json.dumps(output, indent=2))
|
||||
elif args.markdown:
|
||||
lines = ["## BIOS Validation Report", ""]
|
||||
status = "✅ All checks passed" if all_passed else "❌ Some checks failed"
|
||||
lines.append(f"**Status**: {status}")
|
||||
lines.append("")
|
||||
|
||||
for r in results:
|
||||
lines.append(r.to_markdown())
|
||||
lines.append("")
|
||||
|
||||
print("\n".join(lines))
|
||||
else:
|
||||
for r in results:
|
||||
status = "PASS" if r.passed else "FAIL"
|
||||
print(f"\n[{status}] {r.filepath}")
|
||||
print(f" SHA1: {r.sha1}")
|
||||
print(f" MD5: {r.md5}")
|
||||
print(f" Size: {r.size:,}")
|
||||
for s, m in r.checks:
|
||||
marker = "✓" if s == "PASS" else "✗" if s == "FAIL" else "!" if s == "WARN" else "i"
|
||||
print(f" [{marker}] {m}")
|
||||
|
||||
if not all_passed:
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
264
scripts/verify.py
Normal file
264
scripts/verify.py
Normal file
|
|
@ -0,0 +1,264 @@
|
|||
#!/usr/bin/env python3
|
||||
"""Platform-aware BIOS verification engine.
|
||||
|
||||
Replicates the exact verification logic of each platform:
|
||||
- RetroArch/Lakka/RetroPie: file existence only (path_is_valid)
|
||||
- Batocera: MD5 hash verification + zippedFile content check (checkBios/checkInsideZip)
|
||||
|
||||
Usage:
|
||||
python scripts/verify.py --platform batocera
|
||||
python scripts/verify.py --all
|
||||
python scripts/verify.py --platform retroarch --json
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import hashlib
|
||||
import json
|
||||
import os
|
||||
import sys
|
||||
import zipfile
|
||||
from pathlib import Path
|
||||
|
||||
try:
|
||||
import yaml
|
||||
except ImportError:
|
||||
print("Error: PyYAML required (pip install pyyaml)", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
sys.path.insert(0, os.path.dirname(__file__))
|
||||
from common import load_platform_config, md5sum
|
||||
|
||||
DEFAULT_DB = "database.json"
|
||||
DEFAULT_PLATFORMS_DIR = "platforms"
|
||||
|
||||
|
||||
class Status:
|
||||
OK = "ok" # Verified - hash matches (or existence for existence-only platforms)
|
||||
UNTESTED = "untested" # File present but hash mismatch (Batocera term)
|
||||
MISSING = "missing" # File not found at all
|
||||
|
||||
|
||||
def check_inside_zip(container: str, file_name: str, expected_md5: str) -> str:
|
||||
"""Check a ROM inside a ZIP - replicates Batocera's checkInsideZip().
|
||||
|
||||
Returns Status.OK, Status.UNTESTED, or "not_in_zip".
|
||||
"""
|
||||
try:
|
||||
with zipfile.ZipFile(container) as archive:
|
||||
# casefold() for case-insensitive ZIP lookup, matching Batocera's checkInsideZip()
|
||||
for fname in archive.namelist():
|
||||
if fname.casefold() == file_name.casefold():
|
||||
if expected_md5 == "":
|
||||
return Status.OK
|
||||
|
||||
with archive.open(fname) as entry:
|
||||
h = hashlib.md5()
|
||||
while True:
|
||||
block = entry.read(65536)
|
||||
if not block:
|
||||
break
|
||||
h.update(block)
|
||||
|
||||
if h.hexdigest() == expected_md5:
|
||||
return Status.OK
|
||||
else:
|
||||
return Status.UNTESTED
|
||||
|
||||
return "not_in_zip"
|
||||
except Exception:
|
||||
return "error"
|
||||
|
||||
|
||||
def resolve_to_local_path(file_entry: dict, db: dict) -> str | None:
|
||||
"""Find the local file path for a BIOS entry using database.json.
|
||||
|
||||
Tries: SHA1 -> MD5 -> name index. Returns the first existing path found.
|
||||
"""
|
||||
sha1 = file_entry.get("sha1")
|
||||
md5 = file_entry.get("md5")
|
||||
name = file_entry.get("name", "")
|
||||
files_db = db.get("files", {})
|
||||
by_md5 = db.get("indexes", {}).get("by_md5", {})
|
||||
by_name = db.get("indexes", {}).get("by_name", {})
|
||||
|
||||
if sha1 and sha1 in files_db:
|
||||
path = files_db[sha1]["path"]
|
||||
if os.path.exists(path):
|
||||
return path
|
||||
|
||||
if md5 and md5 in by_md5:
|
||||
sha1_match = by_md5[md5]
|
||||
if sha1_match in files_db:
|
||||
path = files_db[sha1_match]["path"]
|
||||
if os.path.exists(path):
|
||||
return path
|
||||
|
||||
# Truncated MD5 (batocera-systems bug: 29 chars instead of 32)
|
||||
if md5 and len(md5) < 32:
|
||||
for db_md5, db_sha1 in by_md5.items():
|
||||
if db_md5.startswith(md5) and db_sha1 in files_db:
|
||||
path = files_db[db_sha1]["path"]
|
||||
if os.path.exists(path):
|
||||
return path
|
||||
|
||||
if name in by_name:
|
||||
for match_sha1 in by_name[name]:
|
||||
if match_sha1 in files_db:
|
||||
path = files_db[match_sha1]["path"]
|
||||
if os.path.exists(path):
|
||||
return path
|
||||
|
||||
return None
|
||||
|
||||
|
||||
def verify_entry_existence(file_entry: dict, local_path: str | None) -> dict:
|
||||
"""RetroArch verification: file exists = OK."""
|
||||
name = file_entry.get("name", "")
|
||||
if local_path:
|
||||
return {"name": name, "status": Status.OK, "path": local_path}
|
||||
return {"name": name, "status": Status.MISSING}
|
||||
|
||||
|
||||
def verify_entry_md5(file_entry: dict, local_path: str | None) -> dict:
|
||||
"""MD5 verification - supports single MD5 (Batocera) and multi-MD5 (Recalbox)."""
|
||||
name = file_entry.get("name", "")
|
||||
expected_md5 = file_entry.get("md5", "")
|
||||
zipped_file = file_entry.get("zipped_file")
|
||||
|
||||
# Recalbox uses comma-separated MD5 lists
|
||||
if expected_md5 and "," in expected_md5:
|
||||
md5_list = [m.strip() for m in expected_md5.split(",") if m.strip()]
|
||||
else:
|
||||
md5_list = [expected_md5] if expected_md5 else []
|
||||
|
||||
if not local_path:
|
||||
return {"name": name, "status": Status.MISSING, "expected_md5": expected_md5}
|
||||
|
||||
if zipped_file:
|
||||
for md5_candidate in md5_list or [""]:
|
||||
result = check_inside_zip(local_path, zipped_file, md5_candidate)
|
||||
if result == Status.OK:
|
||||
return {"name": name, "status": Status.OK, "path": local_path}
|
||||
return {
|
||||
"name": name, "status": Status.UNTESTED, "path": local_path,
|
||||
"reason": f"{zipped_file} MD5 mismatch inside ZIP",
|
||||
}
|
||||
|
||||
if not md5_list:
|
||||
return {"name": name, "status": Status.OK, "path": local_path}
|
||||
|
||||
actual_md5 = md5sum(local_path)
|
||||
|
||||
# Case-insensitive - Recalbox uses uppercase MD5s
|
||||
actual_lower = actual_md5.lower()
|
||||
for expected in md5_list:
|
||||
if actual_lower == expected.lower():
|
||||
return {"name": name, "status": Status.OK, "path": local_path}
|
||||
if len(expected) < 32 and actual_lower.startswith(expected.lower()):
|
||||
return {"name": name, "status": Status.OK, "path": local_path}
|
||||
|
||||
return {
|
||||
"name": name, "status": Status.UNTESTED, "path": local_path,
|
||||
"expected_md5": md5_list[0] if md5_list else "", "actual_md5": actual_md5,
|
||||
}
|
||||
|
||||
|
||||
def verify_platform(config: dict, db: dict) -> dict:
|
||||
"""Verify all BIOS files for a platform using its verification_mode.
|
||||
|
||||
Returns:
|
||||
{
|
||||
"platform": str,
|
||||
"verification_mode": str,
|
||||
"total": int,
|
||||
"ok": int,
|
||||
"untested": int,
|
||||
"missing": int,
|
||||
"details": [{"name", "status", ...}, ...]
|
||||
}
|
||||
"""
|
||||
mode = config.get("verification_mode", "existence")
|
||||
platform = config.get("platform", "unknown")
|
||||
|
||||
verify_fn = verify_entry_existence if mode == "existence" else verify_entry_md5
|
||||
|
||||
results = []
|
||||
for sys_id, system in config.get("systems", {}).items():
|
||||
for file_entry in system.get("files", []):
|
||||
local_path = resolve_to_local_path(file_entry, db)
|
||||
result = verify_fn(file_entry, local_path)
|
||||
result["system"] = sys_id
|
||||
results.append(result)
|
||||
|
||||
ok = sum(1 for r in results if r["status"] == Status.OK)
|
||||
untested = sum(1 for r in results if r["status"] == Status.UNTESTED)
|
||||
missing = sum(1 for r in results if r["status"] == Status.MISSING)
|
||||
|
||||
return {
|
||||
"platform": platform,
|
||||
"verification_mode": mode,
|
||||
"total": len(results),
|
||||
"ok": ok,
|
||||
"untested": untested,
|
||||
"missing": missing,
|
||||
"details": results,
|
||||
}
|
||||
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description="Verify BIOS coverage per platform")
|
||||
parser.add_argument("--platform", "-p", help="Platform name")
|
||||
parser.add_argument("--all", action="store_true", help="Verify all platforms")
|
||||
parser.add_argument("--db", default=DEFAULT_DB)
|
||||
parser.add_argument("--platforms-dir", default=DEFAULT_PLATFORMS_DIR)
|
||||
parser.add_argument("--json", action="store_true", help="JSON output")
|
||||
args = parser.parse_args()
|
||||
|
||||
with open(args.db) as f:
|
||||
db = json.load(f)
|
||||
|
||||
if args.all:
|
||||
platforms = [p.stem for p in Path(args.platforms_dir).glob("*.yml") if not p.name.startswith("_")]
|
||||
elif args.platform:
|
||||
platforms = [args.platform]
|
||||
else:
|
||||
parser.error("Specify --platform or --all")
|
||||
return
|
||||
|
||||
all_results = {}
|
||||
for platform in sorted(platforms):
|
||||
config = load_platform_config(platform, args.platforms_dir)
|
||||
result = verify_platform(config, db)
|
||||
all_results[platform] = result
|
||||
|
||||
if not args.json:
|
||||
mode = result["verification_mode"]
|
||||
if mode == "existence":
|
||||
print(f"{result['platform']}: {result['ok']}/{result['total']} present, "
|
||||
f"{result['missing']} missing [verification: {mode}]")
|
||||
else:
|
||||
print(f"{result['platform']}: {result['ok']}/{result['total']} verified, "
|
||||
f"{result['untested']} untested, {result['missing']} missing [verification: {mode}]")
|
||||
|
||||
for d in result["details"]:
|
||||
if d["status"] == Status.UNTESTED:
|
||||
reason = d.get("reason", "")
|
||||
if not reason and "expected_md5" in d:
|
||||
reason = f"expected={d['expected_md5'][:16]}... got={d['actual_md5'][:16]}..."
|
||||
print(f" UNTESTED: {d['system']}/{d['name']} - {reason}")
|
||||
|
||||
for d in result["details"]:
|
||||
if d["status"] == Status.MISSING:
|
||||
print(f" MISSING: {d['system']}/{d['name']}")
|
||||
|
||||
if args.json:
|
||||
for r in all_results.values():
|
||||
r["details"] = [d for d in r["details"] if d["status"] != Status.OK]
|
||||
print(json.dumps(all_results, indent=2))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Loading…
Add table
Add a link
Reference in a new issue