mirror of
https://github.com/Abdess/retrobios.git
synced 2026-06-27 05:02:48 +00:00
refactor: extract _fetch_raw to BaseScraper (DRY)
Identical _fetch_raw() implementation (URL fetch + cache + error handling) was duplicated in 4 scrapers. Moved to BaseScraper.__init__ with url param. Each scraper now passes url to super().__init__() and inherits _fetch_raw(). Eliminates ~48 lines of duplicated code. DRY audit now clean: resolve logic in common.py, scraper CLI in base_scraper, _fetch_raw in BaseScraper. Remaining duplications are justified (different list_platforms semantics, context-specific hash computation).
This commit is contained in:
parent
2466fc4a97
commit
3de4bf8190
5 changed files with 22 additions and 53 deletions
|
|
@ -48,6 +48,24 @@ class ChangeSet:
|
|||
class BaseScraper(ABC):
|
||||
"""Abstract base class for platform BIOS requirement scrapers."""
|
||||
|
||||
def __init__(self, url: str = ""):
|
||||
self.url = url
|
||||
self._raw_data: str | None = None
|
||||
|
||||
def _fetch_raw(self) -> str:
|
||||
"""Fetch raw content from source URL. Cached after first call."""
|
||||
if self._raw_data is not None:
|
||||
return self._raw_data
|
||||
if not self.url:
|
||||
raise ValueError("No source URL configured")
|
||||
try:
|
||||
req = urllib.request.Request(self.url, headers={"User-Agent": "retrobios-scraper/1.0"})
|
||||
with urllib.request.urlopen(req, timeout=30) as resp:
|
||||
self._raw_data = resp.read().decode("utf-8")
|
||||
return self._raw_data
|
||||
except urllib.error.URLError as e:
|
||||
raise ConnectionError(f"Failed to fetch {self.url}: {e}") from e
|
||||
|
||||
@abstractmethod
|
||||
def fetch_requirements(self) -> list[BiosRequirement]:
|
||||
"""Fetch current BIOS requirements from the platform source."""
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue