From 40e39981c511823df24da419799a84d7f8b88280 Mon Sep 17 00:00:00 2001 From: Melody Horn Date: Sun, 11 Apr 2021 22:21:29 -0600 Subject: store info in an actual database --- repos/__init__.py | 12 ++++++++---- repos/base.py | 20 ++++++++++---------- repos/db.py | 46 ++++++++++++++++++++++++++++++++++++++++++++++ requirements.txt | 1 + update.py | 5 +++++ 5 files changed, 70 insertions(+), 14 deletions(-) create mode 100644 repos/db.py create mode 100644 update.py diff --git a/repos/__init__.py b/repos/__init__.py index 416a0ac..75f25c3 100644 --- a/repos/__init__.py +++ b/repos/__init__.py @@ -16,19 +16,22 @@ def repos_from(module): all_repos: List[Repository] = [ *repos_from(alpine_linux), *repos_from(arch_linux), - # *repos_from(crates_io), + *repos_from(crates_io), *repos_from(debian), *repos_from(homebrew), *repos_from(ubuntu), ] +def update_versions(): + for repo in all_repos: + repo.update() + def get_versions(package: str, args: Mapping[str, str]) -> Mapping[str, Version]: special_cases = dict() for repo, name in args.items(): special_cases[repo] = name result = dict() for repo in all_repos: - repo_versions = repo.get_versions() if slug(repo.full_name()) in special_cases: package_here = special_cases[slug(repo.full_name())] elif slug(repo.family) in special_cases: @@ -37,6 +40,7 @@ def get_versions(package: str, args: Mapping[str, str]) -> Mapping[str, Version] package_here = special_cases[slug(repo.repo)] else: package_here = package - if package_here in repo_versions: - result[repo.full_name()] = repo_versions[package_here] + repo_version = repo.get_version(package_here) + if repo_version is not None: + result[repo.full_name()] = repo_version return result diff --git a/repos/base.py b/repos/base.py index c20853a..1b4c42b 100644 --- a/repos/base.py +++ b/repos/base.py @@ -1,6 +1,5 @@ from dataclasses import dataclass, asdict as dataclass_asdict from functools import total_ordering -import gzip import json from pathlib import Path import re @@ -9,6 +8,8 @@ from typing import Any, Callable, Mapping, Optional import requests import semver +from . import db + __all__ = [ 'Repository', 'slug', @@ -79,7 +80,7 @@ class Repository: def _cache_file(self, name: str) -> Path: return self._cache_dir() / name - def get_versions(self) -> Mapping[str, Version]: + def update(self): self._cache_dir().mkdir(parents=True, exist_ok=True) headers = dict() @@ -109,12 +110,11 @@ class Repository: set_etag = response.headers['ETag'] etag_file.write_text(set_etag) - parsed_file = self._cache_file('parsed.json.gz') - if response.status_code != requests.codes.not_modified or not parsed_file.exists(): parsed_data = self.parse(downloaded_file) - with gzip.open(parsed_file, 'wt') as f: - json.dump(parsed_data, f, cls=JSONEncoder) - return parsed_data - else: - with gzip.open(parsed_file, 'rt') as f: - return json.load(f, cls=JSONDecoder) + db.write(self.full_name(), parsed_data) + + def get_version(self, package_name: str) -> Optional[Version]: + db_result = db.read(self.full_name(), package_name) + if db_result is None: + return None + return Version(**db_result) diff --git a/repos/db.py b/repos/db.py new file mode 100644 index 0000000..1cd95d5 --- /dev/null +++ b/repos/db.py @@ -0,0 +1,46 @@ +import os +from typing import Mapping, TYPE_CHECKING + +import psycopg2 + +if TYPE_CHECKING: + from .base import Version + +def connect(): + return psycopg2.connect(os.environ['DATABASE_URL']) + +def poke_table(force=False): + with connect() as conn: + with conn.cursor() as cur: + if force: + cur.execute('DROP TABLE IF EXISTS versions CASCADE;') + cur.execute('CREATE TABLE IF NOT EXISTS versions (' + 'repo text,' + 'package_name text,' + 'clean_version text,' + 'orig_version text,' + 'PRIMARY KEY (repo, package_name)' + ');') + +def write(repo: str, data: Mapping[str, 'Version']): + with connect() as conn: + with conn.cursor() as cur: + insert = 'INSERT INTO versions (repo, package_name, clean_version, orig_version)' \ + 'VALUES (%(repo)s, %(package_name)s, %(clean_version)s, %(orig_version)s)' \ + 'ON CONFLICT (repo, package_name) DO UPDATE SET ' \ + 'clean_version = EXCLUDED.clean_version,' \ + 'orig_version = EXCLUDED.orig_version' + for package, version in data.items(): + cur.execute(insert, {'repo': repo, 'package_name': package, 'clean_version': version.clean, 'orig_version': version.original}) + +def read(repo: str, package: str): + with connect() as conn: + with conn.cursor() as cur: + sql = 'SELECT clean_version, orig_version FROM versions ' \ + 'WHERE repo = %(repo)s AND package_name = %(package_name)s' + cur.execute(sql, {'repo': repo, 'package_name': package}) + result = cur.fetchone() + if result is None: + return None + clean, orig = result + return {'clean': clean, 'original': orig} diff --git a/requirements.txt b/requirements.txt index b7f0093..8f00072 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,5 +1,6 @@ Flask~=1.1.2 Jinja2~=2.11.3 +psycopg2~=2.8.6 requests~=2.25.1 semver~=2.13.0 gunicorn; sys_platform != 'win32' diff --git a/update.py b/update.py new file mode 100644 index 0000000..95e4e03 --- /dev/null +++ b/update.py @@ -0,0 +1,5 @@ +import repos +import repos.db + +repos.db.poke_table(force=False) +repos.update_versions() -- cgit v1.2.3