aboutsummaryrefslogtreecommitdiff
path: root/update.py
diff options
context:
space:
mode:
Diffstat (limited to 'update.py')
-rw-r--r--update.py128
1 files changed, 128 insertions, 0 deletions
diff --git a/update.py b/update.py
new file mode 100644
index 0000000..64a7768
--- /dev/null
+++ b/update.py
@@ -0,0 +1,128 @@
+import csv
+from dataclasses import dataclass
+import datetime
+from functools import total_ordering
+import io
+import json
+from pathlib import Path
+import tarfile
+import urllib.request
+
+from jinja2 import Environment, FileSystemLoader, StrictUndefined, select_autoescape
+
+
+@total_ordering
+@dataclass()
+class Version:
+ major: int
+ minor: int
+ patch: int
+ pre_release: str
+
+ def __init__(self, text):
+ text = text.split('+')[0]
+ core, self.pre_release = (text.split('-') + [''])[:2]
+ self.major, self.minor, self.patch = [int(x) for x in core.split('.')]
+
+ def __str__(self):
+ pre_release = self.pre_release
+ if len(pre_release) > 0:
+ pre_release = '-' + pre_release
+ return '{}.{}.{}{}'.format(self.major, self.minor, self.patch, pre_release)
+
+ def __lt__(self, other: 'Version') -> bool:
+ if self.major != other.major:
+ return self.major < other.major
+ if self.minor != other.minor:
+ return self.minor < other.minor
+ if self.patch != other.patch:
+ return self.patch < other.patch
+ if self.pre_release == '' and other.pre_release != '':
+ return False
+ if self.pre_release != '' and other.pre_release == '':
+ return True
+
+ def pre_release_lt(a: str, b: str):
+ if len(a) == 0 and len(b) != 0:
+ return True
+ if len(b) == 0:
+ return False
+ a1, an = (a.split('.', 2) + [''])[:2]
+ b1, bn = (b.split('.', 2) + [''])[:2]
+ try:
+ a1, b1 = int(a1), int(b1)
+ except ValueError:
+ pass
+ if a1 < b1:
+ return True
+ elif a1 > b1:
+ return False
+ else:
+ return pre_release_lt(an, bn)
+ return pre_release_lt(self.pre_release, other.pre_release)
+
+ @property
+ def is_1_0(self):
+ return self.major >= 1
+
+
+@dataclass()
+class Crate:
+ name: str
+ downloads: int
+ latest_version: Version = None
+ latest_pre_release_version: Version = None
+
+
+today = datetime.date.today().strftime('%Y-%m-%d')
+
+dump_tarball = Path(f'db-dump-{today}.tar.gz')
+if not dump_tarball.exists():
+ with urllib.request.urlopen('https://static.crates.io/db-dump.tar.gz') as f:
+ dump_tarball.write_bytes(f.read())
+
+csv.field_size_limit(69696969)
+dump = tarfile.open(dump_tarball)
+crates = dict()
+metadata = None
+for item in dump:
+ if item.name.endswith('metadata.json'):
+ metadata = json.load(dump.extractfile(item))
+ elif item.name.endswith('crates.csv'):
+ reader = csv.DictReader(io.TextIOWrapper(dump.extractfile(item), 'UTF-8'))
+ for crate in reader:
+ crates[crate['id']] = Crate(crate['name'], int(crate['downloads']))
+ elif item.name.endswith('versions.csv'):
+ assert len(crates) > 0, "versions read before crates!"
+ reader = csv.DictReader(io.TextIOWrapper(dump.extractfile(item), 'UTF-8'))
+ for version in reader:
+ if version['yanked'] == 't':
+ continue
+ crate = crates[version['crate_id']]
+ this_version = Version(version['num'])
+ if crate.latest_pre_release_version is None or crate.latest_pre_release_version < this_version:
+ crate.latest_pre_release_version = this_version
+ if this_version.pre_release == '':
+ if crate.latest_version is None or crate.latest_version < this_version:
+ crate.latest_version = this_version
+ versions = list(reader)
+
+most_downloaded_crates = sorted(crates.values(), key=lambda x: x.downloads, reverse=True)
+
+crates = most_downloaded_crates[:360]
+
+print('{}/{} crates at or above version 1.0'.format(sum(1 for crate in crates if crate.latest_version.is_1_0),
+ len(crates)))
+
+env = Environment(
+ loader=FileSystemLoader('.'),
+ autoescape=select_autoescape(['html', 'xml']),
+ undefined=StrictUndefined
+)
+index_template = env.get_template('index.html')
+rendered_index = index_template.render(crates=crates, metadata=metadata)
+
+out_file = Path('out', 'index.html')
+out_file.parent.mkdir(parents=True, exist_ok=True)
+with open(out_file, 'w') as f:
+ f.write(rendered_index)