From 9ce4724fc466deda72e47c6cb9e54377410057b6 Mon Sep 17 00:00:00 2001 From: Abdessamad Derraz <3028866+Abdess@users.noreply.github.com> Date: Thu, 2 Apr 2026 18:46:44 +0200 Subject: [PATCH] fix: sha1-based large file restore, fix broken data dir urls Replace grep-based restore with SHA1 matching via database.json. The old grep heuristic failed for assets with renamed basenames (dsi_nand_batocera42.bin) or special characters (MAME dots vs spaces), and only restored to the first .gitignore match when multiple paths shared a basename. Fix 3 broken data directory sources: - opentyrian: buildbot URL 404, use release asset - syobonaction: invalid git_subtree URL, use GitHub archive - stonesoup: same fix, adds 532 game data files --- .github/workflows/build.yml | 36 ++++++++++++++++++++++--------- .github/workflows/deploy-site.yml | 36 ++++++++++++++++++++++--------- platforms/_data_dirs.yml | 20 +++++++++-------- 3 files changed, 63 insertions(+), 29 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 87561c19..f168a458 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -58,16 +58,32 @@ jobs: run: | mkdir -p .cache/large gh release download large-files -D .cache/large/ 2>/dev/null || true - for f in .cache/large/*; do - [ -f "$f" ] || continue - name=$(basename "$f") - target=$(grep "$name" .gitignore | head -1) - if [ -n "$target" ] && [ ! -f "$target" ]; then - mkdir -p "$(dirname "$target")" - cp "$f" "$target" - echo "Restored: $target" - fi - done + python3 -c " + import hashlib, json, os, shutil + db = json.load(open('database.json')) + with open('.gitignore') as f: + ignored = {l.strip() for l in f if l.strip().startswith('bios/')} + cache = '.cache/large' + if not os.path.isdir(cache): + exit(0) + idx = {} + for fn in os.listdir(cache): + fp = os.path.join(cache, fn) + if os.path.isfile(fp): + h = hashlib.sha1(open(fp, 'rb').read()).hexdigest() + idx[h] = fp + restored = 0 + for sha1, entry in db['files'].items(): + path = entry['path'] + if path in ignored and not os.path.exists(path): + src = idx.get(sha1) + if src: + os.makedirs(os.path.dirname(path), exist_ok=True) + shutil.copy2(src, path) + print(f'Restored: {path}') + restored += 1 + print(f'Total: {restored} files restored') + " env: GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} diff --git a/.github/workflows/deploy-site.yml b/.github/workflows/deploy-site.yml index a673cf28..2ffc8378 100644 --- a/.github/workflows/deploy-site.yml +++ b/.github/workflows/deploy-site.yml @@ -40,16 +40,32 @@ jobs: run: | mkdir -p .cache/large gh release download large-files -D .cache/large/ 2>/dev/null || true - for f in .cache/large/*; do - [ -f "$f" ] || continue - name=$(basename "$f") - target=$(grep "$name" .gitignore | head -1) - if [ -n "$target" ] && [ ! -f "$target" ]; then - mkdir -p "$(dirname "$target")" - cp "$f" "$target" - echo "Restored: $target" - fi - done + python3 -c " + import hashlib, json, os, shutil + db = json.load(open('database.json')) + with open('.gitignore') as f: + ignored = {l.strip() for l in f if l.strip().startswith('bios/')} + cache = '.cache/large' + if not os.path.isdir(cache): + exit(0) + idx = {} + for fn in os.listdir(cache): + fp = os.path.join(cache, fn) + if os.path.isfile(fp): + h = hashlib.sha1(open(fp, 'rb').read()).hexdigest() + idx[h] = fp + restored = 0 + for sha1, entry in db['files'].items(): + path = entry['path'] + if path in ignored and not os.path.exists(path): + src = idx.get(sha1) + if src: + os.makedirs(os.path.dirname(path), exist_ok=True) + shutil.copy2(src, path) + print(f'Restored: {path}') + restored += 1 + print(f'Total: {restored} files restored') + " env: GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} diff --git a/platforms/_data_dirs.yml b/platforms/_data_dirs.yml index 139747f8..1c16d305 100644 --- a/platforms/_data_dirs.yml +++ b/platforms/_data_dirs.yml @@ -150,22 +150,23 @@ data_directories: description: "SDLPAL Chinese Paladin game data (.mkf archives)" # ref: OpenTyrian2000 — system/opentyrian/tyrian/ - # Tyrian 2.1 freeware data (also on buildbot as OpenTyrian.zip) + # Tyrian 2.1 freeware data (buildbot URLs removed, sourced from release asset) opentyrian: - source_url: "https://buildbot.libretro.com/assets/system/OpenTyrian%20%28Game%20Data%29.zip" + source_url: "https://github.com/Abdess/retrobios/releases/download/large-files/opentyrian-data.zip" source_type: zip for_platforms: [retroarch, lakka, retropie] local_cache: data/opentyrian description: "OpenTyrian Tyrian 2.1 freeware game data" # ref: syobonaction — system/syobonaction/ - # Freeware game data from OpenSyobonAction + # Freeware game data from OpenSyobonAction (BGM, res, SE directories) syobonaction: - source_url: "https://github.com/akemin-dayo/OpenSyobonAction" - source_type: git_subtree - source_path: "res" + source_url: "https://github.com/akemin-dayo/OpenSyobonAction/archive/refs/heads/{version}.tar.gz" + source_type: tarball + source_path: "OpenSyobonAction-master" version: master local_cache: data/syobonaction + exclude: [DxLib.cpp, DxLib.h, icon.ico, joyconfig.h, loadg.cpp, main.cpp, main.h, Makefile, README_ja.md, README.md] description: "Syobon Action (Cat Mario) game data (sprites, BGM, SE)" # ========================================================================= @@ -190,9 +191,10 @@ data_directories: # Not on buildbot — sourced from libretro repo # 532 files (tiles, fonts, databases, lua scripts, level descriptions) stonesoup: - source_url: "https://github.com/libretro/crawl-ref" - source_type: git_subtree - source_path: "crawl-ref/source/dat" + source_url: "https://github.com/libretro/crawl-ref/archive/refs/heads/{version}.tar.gz" + source_type: tarball + source_path: "crawl-ref-master/crawl-ref/source/dat" + version: master local_cache: data/stonesoup description: "DCSS game data (tiles, fonts, databases, lua, level descriptions)"