aboutsummaryrefslogtreecommitdiff
path: root/update.py
blob: 7f84193142cf13946dccb026b876300634b7bd13 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
from collections import defaultdict
import csv
from dataclasses import dataclass
import datetime
from functools import total_ordering
import io
import json
from pathlib import Path
import tarfile
import urllib.request

from jinja2 import Environment, FileSystemLoader, StrictUndefined, select_autoescape


@total_ordering
@dataclass()
class Version:
    major: int
    minor: int
    patch: int
    pre_release: str

    def __init__(self, text):
        text = text.split('+')[0]
        core, self.pre_release = (text.split('-') + [''])[:2]
        self.major, self.minor, self.patch = [int(x) for x in core.split('.')]

    def __str__(self):
        pre_release = self.pre_release
        if len(pre_release) > 0:
            pre_release = '-' + pre_release
        return '{}.{}.{}{}'.format(self.major, self.minor, self.patch, pre_release)

    def __lt__(self, other: 'Version') -> bool:
        if self.major != other.major:
            return self.major < other.major
        if self.minor != other.minor:
            return self.minor < other.minor
        if self.patch != other.patch:
            return self.patch < other.patch
        if self.pre_release == '' and other.pre_release != '':
            return False
        if self.pre_release != '' and other.pre_release == '':
            return True

        def pre_release_lt(a: str, b: str):
            if len(a) == 0 and len(b) != 0:
                return True
            if len(b) == 0:
                return False
            a1, an = (a.split('.', 2) + [''])[:2]
            b1, bn = (b.split('.', 2) + [''])[:2]
            try:
                a1, b1 = int(a1), int(b1)
            except ValueError:
                pass
            if a1 < b1:
                return True
            elif a1 > b1:
                return False
            else:
                return pre_release_lt(an, bn)
        return pre_release_lt(self.pre_release, other.pre_release)

    @property
    def is_1_0(self):
        return self.major >= 1


@dataclass()
class Crate:
    name: str
    downloads: int
    latest_version: Version = None
    latest_pre_release_version: Version = None


today = datetime.date.today().strftime('%Y-%m-%d')

dump_tarball = Path(f'db-dump-{today}.tar.gz')
if not dump_tarball.exists():
    with urllib.request.urlopen('https://static.crates.io/db-dump.tar.gz') as f:
        dump_tarball.write_bytes(f.read())

csv.field_size_limit(69696969)
dump = tarfile.open(dump_tarball)
crates = defaultdict(lambda: Crate('', -1))
metadata = None
for item in dump:
    if item.name.endswith('metadata.json'):
        metadata = json.load(dump.extractfile(item))
    elif item.name.endswith('crates.csv'):
        reader = csv.DictReader(io.TextIOWrapper(dump.extractfile(item), 'UTF-8'))
        for crate in reader:
            crates[crate['id']].name = crate['name']
            crates[crate['id']].downloads = int(crate['downloads'])
    elif item.name.endswith('versions.csv'):
        reader = csv.DictReader(io.TextIOWrapper(dump.extractfile(item), 'UTF-8'))
        for version in reader:
            if version['yanked'] == 't':
                continue
            crate = crates[version['crate_id']]
            this_version = Version(version['num'])
            if crate.latest_pre_release_version is None or crate.latest_pre_release_version < this_version:
                crate.latest_pre_release_version = this_version
            if this_version.pre_release == '':
                if crate.latest_version is None or crate.latest_version < this_version:
                    crate.latest_version = this_version
        versions = list(reader)

most_downloaded_crates = sorted(crates.values(), key=lambda x: x.downloads, reverse=True)

crates = most_downloaded_crates[:360]

print('{}/{} crates at or above version 1.0'.format(sum(1 for crate in crates if crate.latest_version.is_1_0),
                                                    len(crates)))

env = Environment(
    loader=FileSystemLoader('.'),
    autoescape=select_autoescape(['html', 'xml']),
    undefined=StrictUndefined
)
index_template = env.get_template('index.html')
rendered_index = index_template.render(crates=crates, metadata=metadata)

out_file = Path('out', 'index.html')
out_file.parent.mkdir(parents=True, exist_ok=True)
with open(out_file, 'w') as f:
    f.write(rendered_index)