from dataclasses import dataclass from datetime import date, datetime from pathlib import Path import re import shutil import typing from jinja2 import Environment, FileSystemLoader, StrictUndefined, Template, select_autoescape from markupsafe import Markup from md2gemini import md2gemini import mistune import mistune_contrib.highlight POST_DATE = re.compile(r'(\d{4}-\d{2}-\d{2})-(.*)\.md') POST_FRONT_MATTER = re.compile(r'---\n(.*?)---\n\n?(.*)', re.DOTALL) html_template = Template('') gmi_template = Template('') @dataclass class Page: title: str description: typing.Optional[str] excerpt: str url: str date: typing.Optional[date] listed: bool raw_content: str html_content: str gmi_content: str @staticmethod def load(path: Path, *, template_context: typing.Optional[dict] = None): date_match = POST_DATE.fullmatch(path.name) if date_match is None: slug = path.stem dest = Path(slug) page_date = None else: post_date, slug = date_match.groups() page_date = date.fromisoformat(post_date) dest = Path(page_date.strftime('%Y/%m/%d')) / slug file_data = path.read_text() front_matter = POST_FRONT_MATTER.fullmatch(file_data) if front_matter is not None: front_matter, content = POST_FRONT_MATTER.fullmatch(file_data).groups() front_matter = parse_front_matter(front_matter) else: front_matter = {'title': slug} content = file_data title = front_matter['title'] description = front_matter.get('description', None) excerpt = get_excerpt(content) if 'permalink' in front_matter: permalink = front_matter['permalink'].lstrip('/') if permalink.endswith('/'): permalink = permalink + 'index' dest = Path(permalink) listed = 'unlisted' not in front_matter if template_context is not None: content = Template(content) content = content.render(**template_context) html_content = markdown(content) gmi_content = md2gemini(content, links='copy', md_links=True).replace('\r\n', '\n') return Page(title, description, excerpt, str(dest).replace('\\', '/'), page_date, listed, content, html_content, gmi_content) def describe(self): if self.description is not None: return self.description return self.excerpt class HighlightRenderer(mistune_contrib.highlight.HighlightMixin, mistune.HTMLRenderer): options = {'inlinestyles': False, 'linenos': False} def link(self, link, text=None, title=None): if link.startswith('/') and link.endswith('.md'): link = re.sub('.md$', '.html', link) return super(HighlightRenderer, self).link(link, text, title) markdown = mistune.create_markdown(renderer=HighlightRenderer(), plugins=['strikethrough', 'table']) def render(site_dir, page: Page): html_dest = (site_dir / 'html' / page.url).with_suffix('.html') html_dest.parent.mkdir(parents=True, exist_ok=True) html_dest.write_text(html_template.render(content=Markup(page.html_content), page=page)) gmi_dest = (site_dir / 'gmi' / page.url).with_suffix('.gmi') gmi_dest.parent.mkdir(parents=True, exist_ok=True) gmi_dest.write_text(gmi_template.render(content=page.gmi_content, page=page)) def parse_front_matter(front_matter: str): lines = front_matter.split('\n') fields = [line.split(': ', 1) for line in lines if len(line.strip()) > 0] for i in range(len(fields)): if fields[i][1].startswith('"') and fields[i][1].endswith('"'): fields[i][1] = fields[i][1][1:-1] return dict((key, value) for key, value in fields) def get_excerpt(body: str): def flatten_ast(node: dict): if not isinstance(node, dict): return str(node) elif node['type'] == 'text': return node['text'] else: return ''.join(flatten_ast(child) for child in node['children']) ast = mistune.markdown(body, renderer='ast') paragraphs = [node for node in ast if node['type'] == 'paragraph'] + [{'type': 'text', 'text': ''}] first_paragraph = paragraphs[0] excerpt = flatten_ast(first_paragraph) excerpt = re.sub(r'\s+', ' ', excerpt) return excerpt def copy_assets(src: Path, dest: Path): dest_assets = dest / 'assets' if dest_assets.exists(): for asset in dest_assets.iterdir(): asset.unlink() dest_assets.rmdir() shutil.copytree(src / 'assets', dest_assets) def main(): print('building...') env = Environment( loader=FileSystemLoader('_layouts'), autoescape=select_autoescape(['html']), undefined=StrictUndefined, ) env.filters['absolute_url'] = lambda x: f"https://www.boringcactus.com/{x}" global html_template html_template = env.get_template('default.html') global gmi_template gmi_template = env.get_template('default.gmi') source_dir = Path('.') site_dir = Path('_site') html_site_dir = site_dir / 'html' gmi_site_dir = site_dir / 'gmi' html_site_dir.mkdir(parents=True, exist_ok=True) gmi_site_dir.mkdir(parents=True, exist_ok=True) copy_assets(source_dir, html_site_dir) copy_assets(source_dir, gmi_site_dir) posts = [] for post_filename in (source_dir / '_posts').glob('*.md'): print(' -', post_filename.name) page = Page.load(post_filename) render(site_dir, page) if page.listed: posts.append(page) posts.sort(key=lambda x: x.date, reverse=True) for page_filename in source_dir.glob('*.md'): print(' -', page_filename.name) page = Page.load(page_filename, template_context=dict(posts=posts)) render(site_dir, page) print(' - feed.xml') feed = (source_dir / 'feed.xml').read_text() feed = Template(feed, autoescape=True) (html_site_dir / 'feed.xml').write_text(feed.render( root='https://www.boringcactus.com', mime='text/html', ext='html', now=datetime.utcnow(), posts=posts, content_attr='html_content', content_type='html', )) (gmi_site_dir / 'feed.xml').write_text(feed.render( root='gemini://boringcactus.com', mime='text/gemini', ext='gmi', now=datetime.utcnow(), posts=posts, content_attr='gmi_content', content_type='text/gemini', )) if __name__ == '__main__': main()