aboutsummaryrefslogtreecommitdiff
path: root/build.py
blob: 72f6006f97f9d5d848e2a9d635bfe7c21f40d159 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
from dataclasses import dataclass
from datetime import date, datetime
from pathlib import Path
import re
import shutil
import typing

from jinja2 import Environment, FileSystemLoader, Markup, StrictUndefined, Template, select_autoescape
from md2gemini import md2gemini
import mistune
import mistune_contrib.highlight

POST_DATE = re.compile(r'(\d{4}-\d{2}-\d{2})-(.*)\.md')
POST_FRONT_MATTER = re.compile(r'---\n(.*?)---\n\n?(.*)', re.DOTALL)

html_template = Template('')
gmi_template = Template('')


@dataclass
class Page:
    title: str
    description: typing.Optional[str]
    excerpt: str
    url: str
    date: typing.Optional[date]
    listed: bool

    raw_content: str
    html_content: str
    gmi_content: str

    @staticmethod
    def load(path: Path, *, template_context: typing.Optional[dict] = None):
        date_match = POST_DATE.fullmatch(path.name)
        if date_match is None:
            slug = path.stem
            dest = Path(slug)
            page_date = None
        else:
            post_date, slug = date_match.groups()
            page_date = date.fromisoformat(post_date)
            dest = Path(page_date.strftime('%Y/%m/%d')) / slug

        file_data = path.read_text()
        front_matter = POST_FRONT_MATTER.fullmatch(file_data)
        if front_matter is not None:
            front_matter, content = POST_FRONT_MATTER.fullmatch(file_data).groups()
            front_matter = parse_front_matter(front_matter)
        else:
            front_matter = {'title': slug}
            content = file_data
        title = front_matter['title']
        description = front_matter.get('description', None)
        excerpt = get_excerpt(content)
        if 'permalink' in front_matter:
            permalink = front_matter['permalink'].lstrip('/')
            if permalink.endswith('/'):
                permalink = permalink + 'index'
            dest = Path(permalink)
        listed = 'unlisted' not in front_matter

        if template_context is not None:
            content = Template(content)
            content = content.render(**template_context)

        html_content = markdown(content)
        gmi_content = md2gemini(content, links='copy', md_links=True).replace('\r\n', '\n')

        return Page(title, description, excerpt, str(dest).replace('\\', '/'), page_date, listed,
                    content, html_content, gmi_content)

    def describe(self):
        if self.description is not None:
            return self.description
        return self.excerpt


class HighlightRenderer(mistune_contrib.highlight.HighlightMixin, mistune.HTMLRenderer):
    options = {'inlinestyles': False, 'linenos': False}

    def link(self, link, text=None, title=None):
        if link.startswith('/') and link.endswith('.md'):
            link = re.sub('.md$', '.html', link)
        return super(HighlightRenderer, self).link(link, text, title)


markdown = mistune.create_markdown(renderer=HighlightRenderer(), plugins=['strikethrough', 'table'])


def render(site_dir, page: Page):
    html_dest = (site_dir / 'html' / page.url).with_suffix('.html')
    html_dest.parent.mkdir(parents=True, exist_ok=True)
    html_dest.write_text(html_template.render(content=Markup(page.html_content), page=page))

    gmi_dest = (site_dir / 'gmi' / page.url).with_suffix('.gmi')
    gmi_dest.parent.mkdir(parents=True, exist_ok=True)
    gmi_dest.write_text(gmi_template.render(content=page.gmi_content, page=page))


def parse_front_matter(front_matter: str):
    lines = front_matter.split('\n')
    fields = [line.split(': ', 1) for line in lines if len(line.strip()) > 0]
    for i in range(len(fields)):
        if fields[i][1].startswith('"') and fields[i][1].endswith('"'):
            fields[i][1] = fields[i][1][1:-1]
    return dict((key, value) for key, value in fields)


def get_excerpt(body: str):
    def flatten_ast(node: dict):
        if not isinstance(node, dict):
            return str(node)
        elif node['type'] == 'text':
            return node['text']
        else:
            return ''.join(flatten_ast(child) for child in node['children'])

    ast = mistune.markdown(body, renderer='ast')
    paragraphs = [node for node in ast if node['type'] == 'paragraph'] + [{'type': 'text', 'text': ''}]
    first_paragraph = paragraphs[0]
    excerpt = flatten_ast(first_paragraph)
    excerpt = re.sub(r'\s+', ' ', excerpt)
    return excerpt


def copy_assets(src: Path, dest: Path):
    dest_assets = dest / 'assets'
    if dest_assets.exists():
        for asset in dest_assets.iterdir():
            asset.unlink()
        dest_assets.rmdir()
    shutil.copytree(src / 'assets', dest_assets)


def main():
    print('building...')
    env = Environment(
        loader=FileSystemLoader('_layouts'),
        autoescape=select_autoescape(['html']),
        undefined=StrictUndefined,
    )
    env.filters['absolute_url'] = lambda x: f"https://www.boringcactus.com/{x}"
    global html_template
    html_template = env.get_template('default.html')
    global gmi_template
    gmi_template = env.get_template('default.gmi')

    source_dir = Path('.')
    site_dir = Path('_site')
    html_site_dir = site_dir / 'html'
    gmi_site_dir = site_dir / 'gmi'

    html_site_dir.mkdir(parents=True, exist_ok=True)
    gmi_site_dir.mkdir(parents=True, exist_ok=True)

    copy_assets(source_dir, html_site_dir)
    copy_assets(source_dir, gmi_site_dir)

    posts = []
    for post_filename in (source_dir / '_posts').glob('*.md'):
        print('  -', post_filename.name)
        page = Page.load(post_filename)
        render(site_dir, page)
        if page.listed:
            posts.append(page)

    posts.sort(key=lambda x: x.date, reverse=True)

    for page_filename in source_dir.glob('*.md'):
        print('  -', page_filename.name)
        page = Page.load(page_filename, template_context=dict(posts=posts))
        render(site_dir, page)

    print('  - feed.xml')

    feed = (source_dir / 'feed.xml').read_text()
    feed = Template(feed, autoescape=True)

    (html_site_dir / 'feed.xml').write_text(feed.render(
        root='https://www.boringcactus.com',
        mime='text/html',
        ext='html',
        now=datetime.utcnow(),
        posts=posts,
        content_attr='html_content',
        content_type='html',
    ))
    (gmi_site_dir / 'feed.xml').write_text(feed.render(
        root='gemini://boringcactus.com',
        mime='text/gemini',
        ext='gmi',
        now=datetime.utcnow(),
        posts=posts,
        content_attr='gmi_content',
        content_type='text/gemini',
    ))


if __name__ == '__main__':
    main()