From 7fb9136bf70951a3da3acfedc3d5cff12e7dc12c Mon Sep 17 00:00:00 2001 From: Melody Horn Date: Mon, 29 Mar 2021 15:55:43 -0600 Subject: throw together a very rough draft --- repos/base.py | 66 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 66 insertions(+) create mode 100644 repos/base.py (limited to 'repos/base.py') diff --git a/repos/base.py b/repos/base.py new file mode 100644 index 0000000..66ecf2d --- /dev/null +++ b/repos/base.py @@ -0,0 +1,66 @@ +from dataclasses import dataclass +import datetime +import gzip +import json +import os +from pathlib import Path +import re +from typing import Callable, Mapping + +import requests + +HTTP_DATE = '%a, %d %b %Y %H:%M:%S GMT' +SLUGIFY = re.compile('\W+') + +def slug(text: str) -> str: + return SLUGIFY.sub('-', text.lower()).strip('-') + +@dataclass() +class Repository: + family: str + repo: str + index_url: str + parse: Callable[[Path], Mapping[str, str]] + + def _full_name(self): + return f'{self.family} {self.repo}' + + def _cache_dir(self) -> Path: + return Path('data') / slug(self.family) / slug(self.repo) + + def _cache_file(self, name: str) -> Path: + return self._cache_dir() / name + + def get_versions(self) -> Mapping[str, str]: + self._cache_dir().mkdir(parents=True, exist_ok=True) + downloaded_file = self._cache_file('downloaded') + if downloaded_file.exists(): + mtime = downloaded_file.stat().st_mtime + else: + mtime = 0 + mtime = datetime.datetime.fromtimestamp(mtime, datetime.timezone.utc) + mtime = mtime.strftime(HTTP_DATE) + + parsed_file = self._cache_file('parsed.json.gz') + + response = requests.get(self.index_url, headers={ + 'If-Modified-Since': mtime, + }, stream=True) + if response.status_code != requests.codes.not_modified: + response.raise_for_status() + print('Re-downloading', self._full_name()) + set_mtime = response.headers.get('Last-Modified', '') + with downloaded_file.open('wb') as f: + for chunk in response.iter_content(chunk_size=256): + f.write(chunk) + if len(set_mtime) > 0: + set_mtime = datetime.datetime.strptime(set_mtime, HTTP_DATE) + os.utime(downloaded_file, (datetime.datetime.now().timestamp(), set_mtime.timestamp())) + + parsed_data = self.parse(downloaded_file) + with gzip.open(parsed_file, 'wt') as f: + json.dump(parsed_data, f) + return parsed_data + else: + with gzip.open(parsed_file, 'rt') as f: + return json.load(f) -- cgit v1.2.3