From f8383e59ceb16a37f3dbea7ec103552168376fa0 Mon Sep 17 00:00:00 2001 From: Melody Horn Date: Tue, 15 Dec 2020 22:40:04 -0700 Subject: initial website logic --- update.py | 128 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 128 insertions(+) create mode 100644 update.py (limited to 'update.py') diff --git a/update.py b/update.py new file mode 100644 index 0000000..64a7768 --- /dev/null +++ b/update.py @@ -0,0 +1,128 @@ +import csv +from dataclasses import dataclass +import datetime +from functools import total_ordering +import io +import json +from pathlib import Path +import tarfile +import urllib.request + +from jinja2 import Environment, FileSystemLoader, StrictUndefined, select_autoescape + + +@total_ordering +@dataclass() +class Version: + major: int + minor: int + patch: int + pre_release: str + + def __init__(self, text): + text = text.split('+')[0] + core, self.pre_release = (text.split('-') + [''])[:2] + self.major, self.minor, self.patch = [int(x) for x in core.split('.')] + + def __str__(self): + pre_release = self.pre_release + if len(pre_release) > 0: + pre_release = '-' + pre_release + return '{}.{}.{}{}'.format(self.major, self.minor, self.patch, pre_release) + + def __lt__(self, other: 'Version') -> bool: + if self.major != other.major: + return self.major < other.major + if self.minor != other.minor: + return self.minor < other.minor + if self.patch != other.patch: + return self.patch < other.patch + if self.pre_release == '' and other.pre_release != '': + return False + if self.pre_release != '' and other.pre_release == '': + return True + + def pre_release_lt(a: str, b: str): + if len(a) == 0 and len(b) != 0: + return True + if len(b) == 0: + return False + a1, an = (a.split('.', 2) + [''])[:2] + b1, bn = (b.split('.', 2) + [''])[:2] + try: + a1, b1 = int(a1), int(b1) + except ValueError: + pass + if a1 < b1: + return True + elif a1 > b1: + return False + else: + return pre_release_lt(an, bn) + return pre_release_lt(self.pre_release, other.pre_release) + + @property + def is_1_0(self): + return self.major >= 1 + + +@dataclass() +class Crate: + name: str + downloads: int + latest_version: Version = None + latest_pre_release_version: Version = None + + +today = datetime.date.today().strftime('%Y-%m-%d') + +dump_tarball = Path(f'db-dump-{today}.tar.gz') +if not dump_tarball.exists(): + with urllib.request.urlopen('https://static.crates.io/db-dump.tar.gz') as f: + dump_tarball.write_bytes(f.read()) + +csv.field_size_limit(69696969) +dump = tarfile.open(dump_tarball) +crates = dict() +metadata = None +for item in dump: + if item.name.endswith('metadata.json'): + metadata = json.load(dump.extractfile(item)) + elif item.name.endswith('crates.csv'): + reader = csv.DictReader(io.TextIOWrapper(dump.extractfile(item), 'UTF-8')) + for crate in reader: + crates[crate['id']] = Crate(crate['name'], int(crate['downloads'])) + elif item.name.endswith('versions.csv'): + assert len(crates) > 0, "versions read before crates!" + reader = csv.DictReader(io.TextIOWrapper(dump.extractfile(item), 'UTF-8')) + for version in reader: + if version['yanked'] == 't': + continue + crate = crates[version['crate_id']] + this_version = Version(version['num']) + if crate.latest_pre_release_version is None or crate.latest_pre_release_version < this_version: + crate.latest_pre_release_version = this_version + if this_version.pre_release == '': + if crate.latest_version is None or crate.latest_version < this_version: + crate.latest_version = this_version + versions = list(reader) + +most_downloaded_crates = sorted(crates.values(), key=lambda x: x.downloads, reverse=True) + +crates = most_downloaded_crates[:360] + +print('{}/{} crates at or above version 1.0'.format(sum(1 for crate in crates if crate.latest_version.is_1_0), + len(crates))) + +env = Environment( + loader=FileSystemLoader('.'), + autoescape=select_autoescape(['html', 'xml']), + undefined=StrictUndefined +) +index_template = env.get_template('index.html') +rendered_index = index_template.render(crates=crates, metadata=metadata) + +out_file = Path('out', 'index.html') +out_file.parent.mkdir(parents=True, exist_ok=True) +with open(out_file, 'w') as f: + f.write(rendered_index) -- cgit v1.2.3