1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
|
from collections import defaultdict
import csv
from dataclasses import dataclass
import datetime
from functools import total_ordering
import io
import json
from pathlib import Path
import tarfile
import urllib.request
from jinja2 import Environment, FileSystemLoader, StrictUndefined, select_autoescape
@total_ordering
@dataclass()
class Version:
major: int
minor: int
patch: int
pre_release: str
def __init__(self, text):
text = text.split('+')[0]
core, self.pre_release = (text.split('-') + [''])[:2]
self.major, self.minor, self.patch = [int(x) for x in core.split('.')]
def __str__(self):
pre_release = self.pre_release
if len(pre_release) > 0:
pre_release = '-' + pre_release
return '{}.{}.{}{}'.format(self.major, self.minor, self.patch, pre_release)
def __lt__(self, other: 'Version') -> bool:
if self.major != other.major:
return self.major < other.major
if self.minor != other.minor:
return self.minor < other.minor
if self.patch != other.patch:
return self.patch < other.patch
if self.pre_release == '' and other.pre_release != '':
return False
if self.pre_release != '' and other.pre_release == '':
return True
def pre_release_lt(a: str, b: str):
if len(a) == 0 and len(b) != 0:
return True
if len(b) == 0:
return False
a1, an = (a.split('.', 2) + [''])[:2]
b1, bn = (b.split('.', 2) + [''])[:2]
try:
a1, b1 = int(a1), int(b1)
except ValueError:
pass
if a1 < b1:
return True
elif a1 > b1:
return False
else:
return pre_release_lt(an, bn)
return pre_release_lt(self.pre_release, other.pre_release)
@property
def is_1_0(self):
return self.major >= 1
@dataclass()
class Crate:
name: str
downloads: int
latest_version: Version = None
latest_pre_release_version: Version = None
today = datetime.date.today().strftime('%Y-%m-%d')
dump_tarball = Path(f'db-dump-{today}.tar.gz')
if not dump_tarball.exists():
with urllib.request.urlopen('https://static.crates.io/db-dump.tar.gz') as f:
dump_tarball.write_bytes(f.read())
csv.field_size_limit(69696969)
dump = tarfile.open(dump_tarball)
crates = defaultdict(lambda: Crate('', -1))
metadata = None
for item in dump:
if item.name.endswith('metadata.json'):
metadata = json.load(dump.extractfile(item))
elif item.name.endswith('crates.csv'):
reader = csv.DictReader(io.TextIOWrapper(dump.extractfile(item), 'UTF-8'))
for crate in reader:
crates[crate['id']].name = crate['name']
crates[crate['id']].downloads = int(crate['downloads'])
elif item.name.endswith('versions.csv'):
reader = csv.DictReader(io.TextIOWrapper(dump.extractfile(item), 'UTF-8'))
for version in reader:
if version['yanked'] == 't':
continue
crate = crates[version['crate_id']]
this_version = Version(version['num'])
if crate.latest_pre_release_version is None or crate.latest_pre_release_version < this_version:
crate.latest_pre_release_version = this_version
if this_version.pre_release == '':
if crate.latest_version is None or crate.latest_version < this_version:
crate.latest_version = this_version
versions = list(reader)
most_downloaded_crates = sorted(crates.values(), key=lambda x: x.downloads, reverse=True)
crates = most_downloaded_crates[:360]
print('{}/{} crates at or above version 1.0'.format(sum(1 for crate in crates if crate.latest_version.is_1_0),
len(crates)))
env = Environment(
loader=FileSystemLoader('.'),
autoescape=select_autoescape(['html', 'xml']),
undefined=StrictUndefined
)
index_template = env.get_template('index.html')
rendered_index = index_template.render(crates=crates, metadata=metadata)
out_file = Path('out', 'index.html')
out_file.parent.mkdir(parents=True, exist_ok=True)
with open(out_file, 'w') as f:
f.write(rendered_index)
|