From 72c0ecc21f571bba38c889c57be0ba53aa72c015 Mon Sep 17 00:00:00 2001 From: Melody Horn Date: Mon, 29 Mar 2021 18:55:08 -0600 Subject: implement crates.io --- repos/__init__.py | 3 ++- repos/crates_io.py | 47 +++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 49 insertions(+), 1 deletion(-) create mode 100644 repos/crates_io.py diff --git a/repos/__init__.py b/repos/__init__.py index ab6aa76..7023071 100644 --- a/repos/__init__.py +++ b/repos/__init__.py @@ -1,6 +1,6 @@ from typing import Mapping, List -from . import alpine_linux, arch_linux +from . import alpine_linux, arch_linux, crates_io from .base import Repository, Version __all__ = [ @@ -16,6 +16,7 @@ def repos_from(module): all_repos: List[Repository] = [ *repos_from(alpine_linux), *repos_from(arch_linux), + *repos_from(crates_io), ] def get_versions(package: str) -> Mapping[str, Version]: diff --git a/repos/crates_io.py b/repos/crates_io.py new file mode 100644 index 0000000..95078e3 --- /dev/null +++ b/repos/crates_io.py @@ -0,0 +1,47 @@ +import csv +from io import TextIOWrapper +from pathlib import Path +import tarfile +from typing import Mapping + +from .base import Repository, Version + +__all__ = [ + 'crates', +] + +csv.field_size_limit(69696969) + +def parse_cached(cached: Path) -> Mapping[str, Version]: + dump = tarfile.open(cached) + crate_name = dict() + crate_version = dict() + for archive_member in dump.getmembers(): + if archive_member.name.endswith('crates.csv'): + reader = csv.DictReader(TextIOWrapper(dump.extractfile(archive_member), 'UTF-8')) + for crate in reader: + crate_name[crate['id']] = crate['name'] + elif archive_member.name.endswith('versions.csv'): + reader = csv.DictReader(TextIOWrapper(dump.extractfile(archive_member), 'UTF-8')) + for version in reader: + if version['yanked'] == 't': + continue + crate_id = version['crate_id'] + version = version['num'] + this_version = Version(version, version) + if crate_id in crate_version: + if this_version < crate_version[crate_id]: + continue + crate_version[crate_id] = this_version + result = dict() + for crate_id in set(crate_name.keys()).union(crate_version.keys()): + if crate_id in crate_name and crate_id in crate_version: + result[crate_name[crate_id]] = crate_version[crate_id] + return result + +crates = Repository( + family=None, + repo='crates.io', + index_url='https://static.crates.io/db-dump.tar.gz', + parse=parse_cached, +) -- cgit v1.2.3