From 72c0ecc21f571bba38c889c57be0ba53aa72c015 Mon Sep 17 00:00:00 2001 From: Melody Horn Date: Mon, 29 Mar 2021 18:55:08 -0600 Subject: implement crates.io --- repos/crates_io.py | 47 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 47 insertions(+) create mode 100644 repos/crates_io.py (limited to 'repos/crates_io.py') diff --git a/repos/crates_io.py b/repos/crates_io.py new file mode 100644 index 0000000..95078e3 --- /dev/null +++ b/repos/crates_io.py @@ -0,0 +1,47 @@ +import csv +from io import TextIOWrapper +from pathlib import Path +import tarfile +from typing import Mapping + +from .base import Repository, Version + +__all__ = [ + 'crates', +] + +csv.field_size_limit(69696969) + +def parse_cached(cached: Path) -> Mapping[str, Version]: + dump = tarfile.open(cached) + crate_name = dict() + crate_version = dict() + for archive_member in dump.getmembers(): + if archive_member.name.endswith('crates.csv'): + reader = csv.DictReader(TextIOWrapper(dump.extractfile(archive_member), 'UTF-8')) + for crate in reader: + crate_name[crate['id']] = crate['name'] + elif archive_member.name.endswith('versions.csv'): + reader = csv.DictReader(TextIOWrapper(dump.extractfile(archive_member), 'UTF-8')) + for version in reader: + if version['yanked'] == 't': + continue + crate_id = version['crate_id'] + version = version['num'] + this_version = Version(version, version) + if crate_id in crate_version: + if this_version < crate_version[crate_id]: + continue + crate_version[crate_id] = this_version + result = dict() + for crate_id in set(crate_name.keys()).union(crate_version.keys()): + if crate_id in crate_name and crate_id in crate_version: + result[crate_name[crate_id]] = crate_version[crate_id] + return result + +crates = Repository( + family=None, + repo='crates.io', + index_url='https://static.crates.io/db-dump.tar.gz', + parse=parse_cached, +) -- cgit v1.2.3