aboutsummaryrefslogtreecommitdiff
path: root/repos/crates_io.py
blob: 42680f07cd63b7f299dbef8651c217b7dffb31fa (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
import csv
from io import TextIOWrapper
from pathlib import Path
import tarfile
from typing import Mapping

from .base import Repository, Version

__all__ = [
    'crates',
]

csv.field_size_limit(69696969)

def parse_cached(cached: Path) -> Mapping[str, Version]:
    dump = tarfile.open(cached)
    crate_name = dict()
    crate_version = dict()
    for archive_member in dump.getmembers():
        if archive_member.name.endswith('crates.csv'):
            reader = csv.DictReader(TextIOWrapper(dump.extractfile(archive_member), 'UTF-8'))
            for crate in reader:
                crate_name[crate['id']] = crate['name']
        elif archive_member.name.endswith('versions.csv'):
            reader = csv.DictReader(TextIOWrapper(dump.extractfile(archive_member), 'UTF-8'))
            for version in reader:
                if version['yanked'] == 't':
                    continue
                crate_id = version['crate_id']
                version = version['num']
                this_version = Version(version, version)
                if crate_id in crate_version:
                    if this_version < crate_version[crate_id]:
                        continue
                crate_version[crate_id] = this_version
    result = dict()
    for crate_id in set(crate_name.keys()).union(crate_version.keys()):
        if crate_id in crate_name and crate_id in crate_version:
            result[crate_name[crate_id]] = crate_version[crate_id]
    return result

crates = Repository(
    family=None,
    repo='crates.io',
    section=None,
    index_url='https://static.crates.io/db-dump.tar.gz',
    parse=parse_cached,
)