diff --git a/scripts/common.py b/scripts/common.py index 9048ed1c..0a096859 100644 --- a/scripts/common.py +++ b/scripts/common.py @@ -20,22 +20,25 @@ except ImportError: def compute_hashes(filepath: str | Path) -> dict[str, str]: - """Compute SHA1, MD5, SHA256, CRC32 for a file.""" + """Compute SHA1, MD5, SHA256, CRC32, Adler32 for a file.""" sha1 = hashlib.sha1() md5 = hashlib.md5() sha256 = hashlib.sha256() crc = 0 + adler = 1 # zlib.adler32 initial value with open(filepath, "rb") as f: for chunk in iter(lambda: f.read(65536), b""): sha1.update(chunk) md5.update(chunk) sha256.update(chunk) crc = zlib.crc32(chunk, crc) + adler = zlib.adler32(chunk, adler) return { "sha1": sha1.hexdigest(), "md5": md5.hexdigest(), "sha256": sha256.hexdigest(), "crc32": format(crc & 0xFFFFFFFF, "08x"), + "adler32": format(adler & 0xFFFFFFFF, "08x"), } diff --git a/scripts/verify.py b/scripts/verify.py index 714b0d3d..7b918be4 100644 --- a/scripts/verify.py +++ b/scripts/verify.py @@ -86,17 +86,28 @@ def _parse_validation(validation: list | dict | None) -> list[str]: return [] +# Validation types that require console-specific cryptographic keys. +# verify.py cannot reproduce these — size checks still apply if combined. +_CRYPTO_CHECKS = frozenset({"signature", "crypto"}) + +# All reproducible validation types. +_HASH_CHECKS = frozenset({"crc32", "md5", "sha1", "adler32"}) + + def _build_validation_index(profiles: dict) -> dict[str, dict]: """Build per-filename validation rules from emulator profiles. - Returns {filename: {"checks": [str], "size": int|None, "crc32": str|None, - "md5": str|None, "sha1": str|None}}. + Returns {filename: {"checks": [str], "size": int|None, "min_size": int|None, + "max_size": int|None, "crc32": str|None, "md5": str|None, "sha1": str|None, + "adler32": str|None, "crypto_only": [str]}}. + + ``crypto_only`` lists validation types we cannot reproduce (signature, crypto) + so callers can report them as non-verifiable rather than silently skipping. + When multiple emulators reference the same file, merges checks (union). - Raises ValueError if two profiles declare conflicting values for - the same filename (indicates a profile bug). + Raises ValueError if two profiles declare conflicting values. """ index: dict[str, dict] = {} - # Track which emulator set each value, for conflict reporting sources: dict[str, dict[str, str]] = {} for emu_name, profile in profiles.items(): if profile.get("type") in ("launcher", "alias"): @@ -113,9 +124,15 @@ def _build_validation_index(profiles: dict) -> dict[str, dict]: "checks": set(), "size": None, "min_size": None, "max_size": None, "crc32": None, "md5": None, "sha1": None, + "adler32": None, "crypto_only": set(), } sources[fname] = {} index[fname]["checks"].update(checks) + # Track non-reproducible crypto checks + index[fname]["crypto_only"].update( + c for c in checks if c in _CRYPTO_CHECKS + ) + # Size checks if "size" in checks: if f.get("size") is not None: new_size = f["size"] @@ -132,6 +149,7 @@ def _build_validation_index(profiles: dict) -> dict[str, dict]: index[fname]["min_size"] = f["min_size"] if f.get("max_size") is not None: index[fname]["max_size"] = f["max_size"] + # Hash checks (crc32, md5, sha1, adler32) if "crc32" in checks and f.get("crc32"): new_crc = f["crc32"].lower() if new_crc.startswith("0x"): @@ -162,24 +180,46 @@ def _build_validation_index(profiles: dict) -> dict[str, dict]: ) index[fname][hash_type] = f[hash_type] sources[fname][hash_type] = emu_name + # Adler32 — stored as known_hash_adler32 field (not in validation: list + # for Dolphin, but support it in both forms for future profiles) + adler_val = f.get("known_hash_adler32") or f.get("adler32") + if adler_val: + norm = adler_val.lower() + if norm.startswith("0x"): + norm = norm[2:] + prev_adler = index[fname]["adler32"] + if prev_adler is not None and prev_adler != norm: + prev_emu = sources[fname].get("adler32", "?") + raise ValueError( + f"validation conflict for '{fname}': " + f"adler32={prev_adler} ({prev_emu}) vs adler32={norm} ({emu_name})" + ) + index[fname]["adler32"] = norm + sources[fname]["adler32"] = emu_name # Convert sets to sorted lists for determinism for v in index.values(): v["checks"] = sorted(v["checks"]) + v["crypto_only"] = sorted(v["crypto_only"]) return index def check_file_validation( local_path: str, filename: str, validation_index: dict[str, dict], ) -> str | None: - """Check emulator-level validation (size, crc32, md5, sha1) on a resolved file. + """Check emulator-level validation on a resolved file. - Returns None if all checks pass or no validation applies. + Supports: size (exact/min/max), crc32, md5, sha1, adler32. + Reports but cannot reproduce: signature, crypto (console-specific keys). + + Returns None if all reproducible checks pass or no validation applies. Returns a reason string if a check fails. """ entry = validation_index.get(filename) if not entry: return None checks = entry["checks"] + + # Size checks if "size" in checks: actual_size = os.path.getsize(local_path) if entry["size"] is not None and actual_size != entry["size"]: @@ -188,9 +228,11 @@ def check_file_validation( return f"size too small: min {entry['min_size']}, got {actual_size}" if entry["max_size"] is not None and actual_size > entry["max_size"]: return f"size too large: max {entry['max_size']}, got {actual_size}" - # Hash checks — compute once, reuse - need_hashes = any( - h in checks and entry.get(h) for h in ("crc32", "md5", "sha1") + + # Hash checks — compute once, reuse for all hash types + need_hashes = ( + any(h in checks and entry.get(h) for h in ("crc32", "md5", "sha1")) + or entry.get("adler32") ) if need_hashes: hashes = compute_hashes(local_path) @@ -206,6 +248,18 @@ def check_file_validation( if "sha1" in checks and entry["sha1"]: if hashes["sha1"].lower() != entry["sha1"].lower(): return f"sha1 mismatch: expected {entry['sha1']}, got {hashes['sha1']}" + # Adler32 — check if known_hash_adler32 is available (even if not + # in the validation: list, Dolphin uses it as informational check) + if entry["adler32"]: + if hashes["adler32"].lower() != entry["adler32"]: + return ( + f"adler32 mismatch: expected 0x{entry['adler32']}, " + f"got 0x{hashes['adler32']}" + ) + + # Note: signature/crypto checks require console-specific keys and + # cannot be reproduced. Size checks above still apply when combined + # (e.g. validation: [size, signature]). return None diff --git a/tests/test_e2e.py b/tests/test_e2e.py index 5a780df1..30140052 100644 --- a/tests/test_e2e.py +++ b/tests/test_e2e.py @@ -370,8 +370,27 @@ class TestE2E(unittest.TestCase): # MD5 validation — wrong md5 {"name": "alias_target.bin", "required": False, "validation": ["md5"], "md5": "0000000000000000000000000000dead"}, + # Adler32 — known_hash_adler32 field + {"name": "present_req.bin", "required": True, + "known_hash_adler32": None}, # placeholder, set below + # Min/max size range validation + {"name": "present_req.bin", "required": True, + "validation": ["size"], "min_size": 10, "max_size": 100}, + # Signature — crypto check we can't reproduce, but size applies + {"name": "correct_hash.bin", "required": True, + "validation": ["size", "signature"], "size": 17}, ], } + # Compute the actual adler32 of present_req.bin for the test fixture + import zlib as _zlib + with open(self.files["present_req.bin"]["path"], "rb") as _f: + _data = _f.read() + _adler = format(_zlib.adler32(_data) & 0xFFFFFFFF, "08x") + # Set the adler32 entry (the one with known_hash_adler32=None) + for entry in emu_val["files"]: + if entry.get("known_hash_adler32") is None and "known_hash_adler32" in entry: + entry["known_hash_adler32"] = f"0x{_adler}" + break with open(os.path.join(self.emulators_dir, "test_validation.yml"), "w") as fh: yaml.dump(emu_val, fh) @@ -805,6 +824,36 @@ class TestE2E(unittest.TestCase): self.assertIsNotNone(index["correct_hash.bin"]["md5"]) self.assertIsNotNone(index["correct_hash.bin"]["sha1"]) + def test_82_validation_adler32_pass(self): + """File with correct adler32 passes validation.""" + profiles = load_emulator_profiles(self.emulators_dir) + index = _build_validation_index(profiles) + path = self.files["present_req.bin"]["path"] + reason = check_file_validation(path, "present_req.bin", index) + self.assertIsNone(reason) + + def test_83_validation_min_max_size_pass(self): + """File within min/max size range passes validation.""" + profiles = load_emulator_profiles(self.emulators_dir) + index = _build_validation_index(profiles) + path = self.files["present_req.bin"]["path"] + reason = check_file_validation(path, "present_req.bin", index) + self.assertIsNone(reason) + # Verify the index has min/max + self.assertEqual(index["present_req.bin"]["min_size"], 10) + self.assertEqual(index["present_req.bin"]["max_size"], 100) + + def test_84_validation_crypto_tracked(self): + """Signature/crypto checks are tracked as non-reproducible.""" + profiles = load_emulator_profiles(self.emulators_dir) + index = _build_validation_index(profiles) + # correct_hash.bin has [size, signature] + self.assertIn("signature", index["correct_hash.bin"]["crypto_only"]) + # Size check still applies despite signature being non-reproducible + path = self.files["correct_hash.bin"]["path"] + reason = check_file_validation(path, "correct_hash.bin", index) + self.assertIsNone(reason) # size=16 matches + def test_76_validation_no_effect_when_no_field(self): """Files without validation field are unaffected.""" profiles = load_emulator_profiles(self.emulators_dir) @@ -918,7 +967,7 @@ class TestE2E(unittest.TestCase): """Validation label reflects the checks used.""" result = verify_emulator(["test_validation"], self.emulators_dir, self.db) # test_validation has crc32, md5, sha1, size → all listed - self.assertEqual(result["verification_mode"], "crc32+md5+sha1+size") + self.assertEqual(result["verification_mode"], "crc32+md5+sha1+signature+size") def test_99_filter_files_by_mode(self): """_filter_files_by_mode correctly filters standalone/libretro."""