import csv from io import TextIOWrapper from pathlib import Path import tarfile from typing import Mapping from .base import Repository, Version __all__ = [ 'crates', ] csv.field_size_limit(69696969) def parse_cached(cached: Path) -> Mapping[str, Version]: dump = tarfile.open(cached) crate_name = dict() crate_version = dict() for archive_member in dump.getmembers(): if archive_member.name.endswith('crates.csv'): reader = csv.DictReader(TextIOWrapper(dump.extractfile(archive_member), 'UTF-8')) for crate in reader: crate_name[crate['id']] = crate['name'] elif archive_member.name.endswith('versions.csv'): reader = csv.DictReader(TextIOWrapper(dump.extractfile(archive_member), 'UTF-8')) for version in reader: if version['yanked'] == 't': continue crate_id = version['crate_id'] version = version['num'] this_version = Version(version, version) if crate_id in crate_version: if this_version < crate_version[crate_id]: continue crate_version[crate_id] = this_version result = dict() for crate_id in set(crate_name.keys()).union(crate_version.keys()): if crate_id in crate_name and crate_id in crate_version: result[crate_name[crate_id]] = crate_version[crate_id] return result crates = Repository( family=None, repo='crates.io', section=None, index_url='https://static.crates.io/db-dump.tar.gz', parse=parse_cached, )