aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMelody Horn <melody@boringcactus.com>2020-10-14 17:44:38 -0600
committerMelody Horn <melody@boringcactus.com>2020-10-14 17:44:38 -0600
commit732ef5e9787e618ead3a18f9f6aa2ed1f03f1670 (patch)
treec4d456cf8a6c426ad4a9347a9c1c498fe35c2017
downloadreference-compiler-732ef5e9787e618ead3a18f9f6aa2ed1f03f1670.tar.gz
reference-compiler-732ef5e9787e618ead3a18f9f6aa2ed1f03f1670.zip
throw early draft of parser into the world
-rw-r--r--.build.yml15
-rw-r--r--.gitignore3
-rw-r--r--README.md0
-rw-r--r--crowbar_reference_compiler/__init__.py2
-rw-r--r--crowbar_reference_compiler/parser.py200
-rw-r--r--crowbar_reference_compiler/scanner.py76
-rw-r--r--poetry.lock157
-rw-r--r--pyproject.toml26
-rw-r--r--tests/__init__.py0
-rw-r--r--tests/test_parsing.py75
10 files changed, 554 insertions, 0 deletions
diff --git a/.build.yml b/.build.yml
new file mode 100644
index 0000000..7080980
--- /dev/null
+++ b/.build.yml
@@ -0,0 +1,15 @@
+image: alpine/latest
+packages:
+ - python3
+sources:
+ - https://git.sr.ht/~boringcactus/crowbar-reference-compiler
+tasks:
+ - setup: |
+ pip install poetry
+ poetry config virtualenvs.create false
+ cd crowbar-reference-compiler
+ poetry install
+ - test: |
+ cd crowbar-reference-compiler
+ poetry run mypy -p crowbar_reference_compiler
+ poetry run python -m unittest
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..95b58df
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,3 @@
+/.venv
+/.idea
+/.mypy_cache
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/README.md
diff --git a/crowbar_reference_compiler/__init__.py b/crowbar_reference_compiler/__init__.py
new file mode 100644
index 0000000..7c7fca7
--- /dev/null
+++ b/crowbar_reference_compiler/__init__.py
@@ -0,0 +1,2 @@
+from .parser import parse_header, parse_implementation
+from .scanner import scan
diff --git a/crowbar_reference_compiler/parser.py b/crowbar_reference_compiler/parser.py
new file mode 100644
index 0000000..9bb3179
--- /dev/null
+++ b/crowbar_reference_compiler/parser.py
@@ -0,0 +1,200 @@
+from parsimonious import TokenGrammar, ParseError, IncompleteParseError # type: ignore
+
+grammar = TokenGrammar(
+ r"""
+HeaderFile = HeaderFileElement+
+HeaderFileElement = IncludeStatement /
+ TypeDeclaration /
+ FunctionDeclaration
+
+ImplementationFile = ImplementationFileElement+
+ImplementationFileElement = HeaderFileElement /
+ FunctionDefinition
+
+IncludeStatement = "include" string_literal ";"
+
+TypeDeclaration = StructDeclaration /
+ EnumDeclaration /
+ TypedefDeclaration
+StructDeclaration = "struct" identifier "{" VariableDeclaration+ "}" ";"
+EnumDeclaration = "enum" identifier "{" EnumBody "}" ";"
+EnumBody = (identifier ("=" Expression)? "," EnumBody) /
+ (identifier ("=" Expression)? ","?)
+TypedefDeclaration = "typedef" identifier "=" Type ";"
+
+FunctionDeclaration = FunctionSignature ";"
+FunctionDefinition = FunctionSignature Block
+FunctionSignature = Type identifier "(" SignatureArguments? ")"
+SignatureArguments = (Type identifier "," SignatureArguments) /
+ (Type identifier ","?)
+
+Block = "{" Statement* "}"
+
+Statement = VariableDefinition /
+ VariableDeclaration /
+ IfStatement /
+ SwitchStatement /
+ WhileStatement /
+ DoWhileStatement /
+ ForStatement /
+ FlowControlStatement /
+ AssignmentStatement /
+ ExpressionStatement
+
+VariableDefinition = Type identifier "=" Expression ";"
+VariableDeclaration = Type identifier ";"
+
+IfStatement = ("if" Expression Block "else" Block) /
+ ("if" Expression Block)
+
+SwitchStatement = "switch" Expression "{" SwitchCase+ "}"
+SwitchCase = (CaseSpecifier Block) /
+ ("default" Block)
+CaseSpecifier = ("case" Expression "," CaseSpecifier) /
+ ("case" Expression ","?)
+
+WhileStatement = "while" Expression Block
+DoWhileStatement = "do" Block "while" Expression ";"
+ForStatement = "for" VariableDefinition? ";" Expression ";" AssignmentStatementBody? Block
+
+FlowControlStatement = ("continue" ";") /
+ ("break" ";") /
+ ("return" Expression? ";")
+
+AssignmentStatement = AssignmentStatementBody ";"
+AssignmentStatementBody = (AssignmentTargetExpression "=" Expression) /
+ (AssignmentTargetExpression "+=" Expression) /
+ (AssignmentTargetExpression "-=" Expression) /
+ (AssignmentTargetExpression "*=" Expression) /
+ (AssignmentTargetExpression "/=" Expression) /
+ (AssignmentTargetExpression "%=" Expression) /
+ (AssignmentTargetExpression "&=" Expression) /
+ (AssignmentTargetExpression "^=" Expression) /
+ (AssignmentTargetExpression "|=" Expression) /
+ (AssignmentTargetExpression "++") /
+ (AssignmentTargetExpression "--")
+
+ExpressionStatement = Expression ";"
+
+Type = ("const" BasicType) /
+ (BasicType "*") /
+ (BasicType "[" Expression "]") /
+ (BasicType "function" "(" (BasicType ",")* ")") /
+ BasicType
+BasicType = "void" /
+ IntegerType /
+ ("signed" IntegerType) /
+ ("unsigned" IntegerType) /
+ "float" /
+ "double" /
+ "bool" /
+ ("struct" identifier) /
+ ("enum" identifier) /
+ ("typedef" identifier) /
+ ("(" Type ")")
+IntegerType = "char" /
+ "short" /
+ "int" /
+ "long"
+
+AssignmentTargetExpression = identifier ATEElementSuffix*
+ATEElementSuffix = ("[" Expression "]") /
+ ("." identifier) /
+ ("->" identifier)
+
+AtomicExpression = identifier /
+ constant /
+ string_literal /
+ ("(" Expression ")")
+
+ObjectExpression = (AtomicExpression ObjectSuffix*) /
+ ArrayLiteralExpression /
+ StructLiteralExpression
+ObjectSuffix = ("[" Expression "]") /
+ ("(" CommasExpressionList? ")") /
+ ("." identifier) /
+ ("->" identifier)
+CommasExpressionList = (Expression "," CommasExpressionList) /
+ (Expression ","?)
+ArrayLiteralExpression = "{" CommasExpressionList "}"
+StructLiteralExpression = "{" StructLiteralBody "}"
+StructLiteralBody = (StructLiteralElement "," StructLiteralBody?) /
+ (StructLiteralElement ","?)
+StructLiteralElement = "." identifier "=" Expression
+
+FactorExpression = ("(" Type ")" FactorExpression) /
+ ("&" FactorExpression) /
+ ("*" FactorExpression) /
+ ("+" FactorExpression) /
+ ("-" FactorExpression) /
+ ("~" FactorExpression) /
+ ("!" FactorExpression) /
+ ("sizeof" FactorExpression) /
+ ("sizeof" Type) /
+ ObjectExpression
+
+TermExpression = FactorExpression TermSuffix*
+TermSuffix = ("*" FactorExpression) /
+ ("/" FactorExpression) /
+ ("%" FactorExpression)
+
+ArithmeticExpression = TermExpression ArithmeticSuffix*
+ArithmeticSuffix = ("+" TermExpression) /
+ ("-" TermExpression)
+
+BitwiseOpExpression = (ArithmeticExpression "<<" ArithmeticExpression) /
+ (ArithmeticExpression ">>" ArithmeticExpression) /
+ (ArithmeticExpression "^" ArithmeticExpression) /
+ (ArithmeticExpression ("&" ArithmeticExpression)+) /
+ (ArithmeticExpression ("|" ArithmeticExpression)+) /
+ ArithmeticExpression
+
+ComparisonExpression = (BitwiseOpExpression "==" BitwiseOpExpression) /
+ (BitwiseOpExpression "!=" BitwiseOpExpression) /
+ (BitwiseOpExpression "<=" BitwiseOpExpression) /
+ (BitwiseOpExpression ">=" BitwiseOpExpression) /
+ (BitwiseOpExpression "<" BitwiseOpExpression) /
+ (BitwiseOpExpression ">" BitwiseOpExpression) /
+ BitwiseOpExpression
+
+Expression = (ComparisonExpression ("&&" ComparisonExpression)+) /
+ (ComparisonExpression ("||" ComparisonExpression)+) /
+ ComparisonExpression
+
+identifier = "identifier"
+constant = "constant"
+string_literal = "string_literal"
+""")
+
+
+class LegibleParseError(ParseError):
+ def line(self):
+ return "🤷"
+
+ def column(self):
+ return "🤷"
+
+
+class LegibleIncompleteParseError(IncompleteParseError):
+ def line(self):
+ return "🤷"
+
+ def column(self):
+ return "🤷"
+
+
+def parse_from_rule(rule, tokens):
+ try:
+ return rule.parse(tokens)
+ except IncompleteParseError as error:
+ raise LegibleIncompleteParseError(error.text, error.pos, error.expr)
+ except ParseError as error:
+ raise LegibleParseError(error.text, error.pos, error.expr)
+
+
+def parse_header(tokens):
+ return parse_from_rule(grammar['HeaderFile'], tokens)
+
+
+def parse_implementation(tokens):
+ return parse_from_rule(grammar['ImplementationFile'], tokens)
diff --git a/crowbar_reference_compiler/scanner.py b/crowbar_reference_compiler/scanner.py
new file mode 100644
index 0000000..fff8c35
--- /dev/null
+++ b/crowbar_reference_compiler/scanner.py
@@ -0,0 +1,76 @@
+from dataclasses import dataclass
+from typing import Optional, overload, List, Union
+
+import regex as re # type: ignore
+
+
+@dataclass
+class Token:
+ type: str
+ data: Optional[str] = None
+
+ def __repr__(self) -> str:
+ if self.data is not None:
+ return "{}: {}".format(self.type, repr(self.data))
+ else:
+ return repr(self.type)
+
+
+class GenerousTokenList(List[Token]):
+ def __getitem__(self, i):
+ try:
+ return super(GenerousTokenList, self).__getitem__(i)
+ except IndexError:
+ return Token('')
+
+
+KEYWORD = re.compile("bool|break|case|char|const|continue|default|do|double|else|enum|extern|float|for|function|if|include|int|long|return|short|signed|sizeof|struct|switch|typedef|unsigned|void|while")
+IDENTIFIER = re.compile(r"[\p{L}\p{Pc}\p{Cf}\p{Sk}\p{Mn}][\p{L}\p{Pc}\p{Cf}\p{Sk}\p{Mn}\p{N}]*")
+CONSTANT = re.compile(r"""([0-9_]+)|(0[bB][01_]+)|(0[xX][0-9a-fA-F_]+)|([0-9_]+(\.[0-9_]+|[eE][0-9_]+|\.[0-9_]+[eE][0-9_]+))|('([^\'\\]|\\'|\\"|\\\\|\\r|\\n|\\t|\\0|\\x[0-9a-fA-F]{2}|\\u[0-9a-fA-F]{4}|\\U[0-9a-fA-F]{8})')""")
+STRING_LITERAL = re.compile(r'''"([^\\"]|\\'|\\"|\\\\|\\r|\\n|\\t|\\0|\\x[0-9a-fA-F]{2}|\\u[0-9a-fA-F]{4}|\\U[0-9a-fA-F]{8})*"''')
+PUNCTUATOR = re.compile(r"->|\+\+|--|>>|<<|<=|>=|&&|\|\||[=!+\-*/%&|^]=|[\[\](){}.,+\-*/%;!&|^~><=]")
+WHITESPACE = re.compile(r"[\p{Zs}\p{Cc}]+")
+COMMENT = re.compile(r"(//[^\n]*\n)|(/\*.*?\*/)", re.DOTALL)
+
+
+def scan(code):
+ result = []
+ remaining = code
+
+ while len(remaining) > 0:
+ match = COMMENT.match(remaining)
+ if match:
+ remaining = remaining[match.end():]
+ continue
+ match = WHITESPACE.match(remaining)
+ if match:
+ remaining = remaining[match.end():]
+ continue
+ match = KEYWORD.match(remaining)
+ if match:
+ result.append(Token(match.group()))
+ remaining = remaining[match.end():]
+ continue
+ match = IDENTIFIER.match(remaining)
+ if match:
+ result.append(Token('identifier', match.group()))
+ remaining = remaining[match.end():]
+ continue
+ match = CONSTANT.match(remaining)
+ if match:
+ result.append(Token('constant', match.group()))
+ remaining = remaining[match.end():]
+ continue
+ match = STRING_LITERAL.match(remaining)
+ if match:
+ result.append(Token('string_literal', match.group()))
+ remaining = remaining[match.end():]
+ continue
+ match = PUNCTUATOR.match(remaining)
+ if match:
+ result.append(Token(match.group()))
+ remaining = remaining[match.end():]
+ continue
+ raise ValueError("unrecognized code in scanner: {}".format(repr(remaining[:20])))
+
+ return GenerousTokenList(result)
diff --git a/poetry.lock b/poetry.lock
new file mode 100644
index 0000000..ca35235
--- /dev/null
+++ b/poetry.lock
@@ -0,0 +1,157 @@
+[[package]]
+name = "mypy"
+version = "0.790"
+description = "Optional static typing for Python"
+category = "dev"
+optional = false
+python-versions = ">=3.5"
+
+[package.dependencies]
+mypy-extensions = ">=0.4.3,<0.5.0"
+typed-ast = ">=1.4.0,<1.5.0"
+typing-extensions = ">=3.7.4"
+
+[package.extras]
+dmypy = ["psutil (>=4.0)"]
+
+[[package]]
+name = "mypy-extensions"
+version = "0.4.3"
+description = "Experimental type system extensions for programs checked with the mypy typechecker."
+category = "dev"
+optional = false
+python-versions = "*"
+
+[[package]]
+name = "parsimonious"
+version = "0.8.1"
+description = "(Soon to be) the fastest pure-Python PEG parser I could muster"
+category = "main"
+optional = false
+python-versions = "*"
+
+[package.dependencies]
+six = ">=1.9.0"
+
+[[package]]
+name = "regex"
+version = "2020.10.11"
+description = "Alternative regular expression module, to replace re."
+category = "main"
+optional = false
+python-versions = "*"
+
+[[package]]
+name = "six"
+version = "1.15.0"
+description = "Python 2 and 3 compatibility utilities"
+category = "main"
+optional = false
+python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*"
+
+[[package]]
+name = "typed-ast"
+version = "1.4.1"
+description = "a fork of Python 2 and 3 ast modules with type comment support"
+category = "dev"
+optional = false
+python-versions = "*"
+
+[[package]]
+name = "typing-extensions"
+version = "3.7.4.3"
+description = "Backported and Experimental Type Hints for Python 3.5+"
+category = "dev"
+optional = false
+python-versions = "*"
+
+[metadata]
+lock-version = "1.1"
+python-versions = "^3.7"
+content-hash = "d319360e78308e2272c729ac4a26f170b4836cc7c7fade980e771c58aa685806"
+
+[metadata.files]
+mypy = [
+ {file = "mypy-0.790-cp35-cp35m-macosx_10_6_x86_64.whl", hash = "sha256:bd03b3cf666bff8d710d633d1c56ab7facbdc204d567715cb3b9f85c6e94f669"},
+ {file = "mypy-0.790-cp35-cp35m-manylinux1_x86_64.whl", hash = "sha256:2170492030f6faa537647d29945786d297e4862765f0b4ac5930ff62e300d802"},
+ {file = "mypy-0.790-cp35-cp35m-win_amd64.whl", hash = "sha256:e86bdace26c5fe9cf8cb735e7cedfe7850ad92b327ac5d797c656717d2ca66de"},
+ {file = "mypy-0.790-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:e97e9c13d67fbe524be17e4d8025d51a7dca38f90de2e462243ab8ed8a9178d1"},
+ {file = "mypy-0.790-cp36-cp36m-manylinux1_x86_64.whl", hash = "sha256:0d34d6b122597d48a36d6c59e35341f410d4abfa771d96d04ae2c468dd201abc"},
+ {file = "mypy-0.790-cp36-cp36m-win_amd64.whl", hash = "sha256:72060bf64f290fb629bd4a67c707a66fd88ca26e413a91384b18db3876e57ed7"},
+ {file = "mypy-0.790-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:eea260feb1830a627fb526d22fbb426b750d9f5a47b624e8d5e7e004359b219c"},
+ {file = "mypy-0.790-cp37-cp37m-manylinux1_x86_64.whl", hash = "sha256:c614194e01c85bb2e551c421397e49afb2872c88b5830e3554f0519f9fb1c178"},
+ {file = "mypy-0.790-cp37-cp37m-win_amd64.whl", hash = "sha256:0a0d102247c16ce93c97066443d11e2d36e6cc2a32d8ccc1f705268970479324"},
+ {file = "mypy-0.790-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:cf4e7bf7f1214826cf7333627cb2547c0db7e3078723227820d0a2490f117a01"},
+ {file = "mypy-0.790-cp38-cp38-manylinux1_x86_64.whl", hash = "sha256:af4e9ff1834e565f1baa74ccf7ae2564ae38c8df2a85b057af1dbbc958eb6666"},
+ {file = "mypy-0.790-cp38-cp38-win_amd64.whl", hash = "sha256:da56dedcd7cd502ccd3c5dddc656cb36113dd793ad466e894574125945653cea"},
+ {file = "mypy-0.790-py3-none-any.whl", hash = "sha256:2842d4fbd1b12ab422346376aad03ff5d0805b706102e475e962370f874a5122"},
+ {file = "mypy-0.790.tar.gz", hash = "sha256:2b21ba45ad9ef2e2eb88ce4aeadd0112d0f5026418324176fd494a6824b74975"},
+]
+mypy-extensions = [
+ {file = "mypy_extensions-0.4.3-py2.py3-none-any.whl", hash = "sha256:090fedd75945a69ae91ce1303b5824f428daf5a028d2f6ab8a299250a846f15d"},
+ {file = "mypy_extensions-0.4.3.tar.gz", hash = "sha256:2d82818f5bb3e369420cb3c4060a7970edba416647068eb4c5343488a6c604a8"},
+]
+parsimonious = [
+ {file = "parsimonious-0.8.1.tar.gz", hash = "sha256:3add338892d580e0cb3b1a39e4a1b427ff9f687858fdd61097053742391a9f6b"},
+]
+regex = [
+ {file = "regex-2020.10.11-cp27-cp27m-win32.whl", hash = "sha256:4f5c0fe46fb79a7adf766b365cae56cafbf352c27358fda811e4a1dc8216d0db"},
+ {file = "regex-2020.10.11-cp27-cp27m-win_amd64.whl", hash = "sha256:39a5ef30bca911f5a8a3d4476f5713ed4d66e313d9fb6755b32bec8a2e519635"},
+ {file = "regex-2020.10.11-cp36-cp36m-manylinux1_i686.whl", hash = "sha256:7c4fc5a8ec91a2254bb459db27dbd9e16bba1dabff638f425d736888d34aaefa"},
+ {file = "regex-2020.10.11-cp36-cp36m-manylinux1_x86_64.whl", hash = "sha256:d537e270b3e6bfaea4f49eaf267984bfb3628c86670e9ad2a257358d3b8f0955"},
+ {file = "regex-2020.10.11-cp36-cp36m-manylinux2010_i686.whl", hash = "sha256:a8240df4957a5b0e641998a5d78b3c4ea762c845d8cb8997bf820626826fde9a"},
+ {file = "regex-2020.10.11-cp36-cp36m-manylinux2010_x86_64.whl", hash = "sha256:4302153abb96859beb2c778cc4662607a34175065fc2f33a21f49eb3fbd1ccd3"},
+ {file = "regex-2020.10.11-cp36-cp36m-win32.whl", hash = "sha256:c077c9d04a040dba001cf62b3aff08fd85be86bccf2c51a770c77377662a2d55"},
+ {file = "regex-2020.10.11-cp36-cp36m-win_amd64.whl", hash = "sha256:46ab6070b0d2cb85700b8863b3f5504c7f75d8af44289e9562195fe02a8dd72d"},
+ {file = "regex-2020.10.11-cp37-cp37m-manylinux1_i686.whl", hash = "sha256:d629d750ebe75a88184db98f759633b0a7772c2e6f4da529f0027b4a402c0e2f"},
+ {file = "regex-2020.10.11-cp37-cp37m-manylinux1_x86_64.whl", hash = "sha256:8e7ef296b84d44425760fe813cabd7afbb48c8dd62023018b338bbd9d7d6f2f0"},
+ {file = "regex-2020.10.11-cp37-cp37m-manylinux2010_i686.whl", hash = "sha256:e490f08897cb44e54bddf5c6e27deca9b58c4076849f32aaa7a0b9f1730f2c20"},
+ {file = "regex-2020.10.11-cp37-cp37m-manylinux2010_x86_64.whl", hash = "sha256:850339226aa4fec04916386577674bb9d69abe0048f5d1a99f91b0004bfdcc01"},
+ {file = "regex-2020.10.11-cp37-cp37m-win32.whl", hash = "sha256:60c4f64d9a326fe48e8738c3dbc068e1edc41ff7895a9e3723840deec4bc1c28"},
+ {file = "regex-2020.10.11-cp37-cp37m-win_amd64.whl", hash = "sha256:8ba3efdd60bfee1aa784dbcea175eb442d059b576934c9d099e381e5a9f48930"},
+ {file = "regex-2020.10.11-cp38-cp38-manylinux1_i686.whl", hash = "sha256:2308491b3e6c530a3bb38a8a4bb1dc5fd32cbf1e11ca623f2172ba17a81acef1"},
+ {file = "regex-2020.10.11-cp38-cp38-manylinux1_x86_64.whl", hash = "sha256:b8806649983a1c78874ec7e04393ef076805740f6319e87a56f91f1767960212"},
+ {file = "regex-2020.10.11-cp38-cp38-manylinux2010_i686.whl", hash = "sha256:a2a31ee8a354fa3036d12804730e1e20d58bc4e250365ead34b9c30bbe9908c3"},
+ {file = "regex-2020.10.11-cp38-cp38-manylinux2010_x86_64.whl", hash = "sha256:d9d53518eeed12190744d366ec4a3f39b99d7daa705abca95f87dd8b442df4ad"},
+ {file = "regex-2020.10.11-cp38-cp38-win32.whl", hash = "sha256:3d5a8d007116021cf65355ada47bf405656c4b3b9a988493d26688275fde1f1c"},
+ {file = "regex-2020.10.11-cp38-cp38-win_amd64.whl", hash = "sha256:f579caecbbca291b0fcc7d473664c8c08635da2f9b1567c22ea32311c86ef68c"},
+ {file = "regex-2020.10.11-cp39-cp39-manylinux1_i686.whl", hash = "sha256:8c8c42aa5d3ac9a49829c4b28a81bebfa0378996f9e0ca5b5ab8a36870c3e5ee"},
+ {file = "regex-2020.10.11-cp39-cp39-manylinux1_x86_64.whl", hash = "sha256:c529ba90c1775697a65b46c83d47a2d3de70f24d96da5d41d05a761c73b063af"},
+ {file = "regex-2020.10.11-cp39-cp39-manylinux2010_i686.whl", hash = "sha256:6cf527ec2f3565248408b61dd36e380d799c2a1047eab04e13a2b0c15dd9c767"},
+ {file = "regex-2020.10.11-cp39-cp39-manylinux2010_x86_64.whl", hash = "sha256:671c51d352cfb146e48baee82b1ee8d6ffe357c292f5e13300cdc5c00867ebfc"},
+ {file = "regex-2020.10.11-cp39-cp39-win32.whl", hash = "sha256:a63907332531a499b8cdfd18953febb5a4c525e9e7ca4ac147423b917244b260"},
+ {file = "regex-2020.10.11-cp39-cp39-win_amd64.whl", hash = "sha256:1a16afbfadaadc1397353f9b32e19a65dc1d1804c80ad73a14f435348ca017ad"},
+ {file = "regex-2020.10.11.tar.gz", hash = "sha256:463e770c48da76a8da82b8d4a48a541f314e0df91cbb6d873a341dbe578efafd"},
+]
+six = [
+ {file = "six-1.15.0-py2.py3-none-any.whl", hash = "sha256:8b74bedcbbbaca38ff6d7491d76f2b06b3592611af620f8426e82dddb04a5ced"},
+ {file = "six-1.15.0.tar.gz", hash = "sha256:30639c035cdb23534cd4aa2dd52c3bf48f06e5f4a941509c8bafd8ce11080259"},
+]
+typed-ast = [
+ {file = "typed_ast-1.4.1-cp35-cp35m-manylinux1_i686.whl", hash = "sha256:73d785a950fc82dd2a25897d525d003f6378d1cb23ab305578394694202a58c3"},
+ {file = "typed_ast-1.4.1-cp35-cp35m-manylinux1_x86_64.whl", hash = "sha256:aaee9905aee35ba5905cfb3c62f3e83b3bec7b39413f0a7f19be4e547ea01ebb"},
+ {file = "typed_ast-1.4.1-cp35-cp35m-win32.whl", hash = "sha256:0c2c07682d61a629b68433afb159376e24e5b2fd4641d35424e462169c0a7919"},
+ {file = "typed_ast-1.4.1-cp35-cp35m-win_amd64.whl", hash = "sha256:4083861b0aa07990b619bd7ddc365eb7fa4b817e99cf5f8d9cf21a42780f6e01"},
+ {file = "typed_ast-1.4.1-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:269151951236b0f9a6f04015a9004084a5ab0d5f19b57de779f908621e7d8b75"},
+ {file = "typed_ast-1.4.1-cp36-cp36m-manylinux1_i686.whl", hash = "sha256:24995c843eb0ad11a4527b026b4dde3da70e1f2d8806c99b7b4a7cf491612652"},
+ {file = "typed_ast-1.4.1-cp36-cp36m-manylinux1_x86_64.whl", hash = "sha256:fe460b922ec15dd205595c9b5b99e2f056fd98ae8f9f56b888e7a17dc2b757e7"},
+ {file = "typed_ast-1.4.1-cp36-cp36m-win32.whl", hash = "sha256:4e3e5da80ccbebfff202a67bf900d081906c358ccc3d5e3c8aea42fdfdfd51c1"},
+ {file = "typed_ast-1.4.1-cp36-cp36m-win_amd64.whl", hash = "sha256:249862707802d40f7f29f6e1aad8d84b5aa9e44552d2cc17384b209f091276aa"},
+ {file = "typed_ast-1.4.1-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:8ce678dbaf790dbdb3eba24056d5364fb45944f33553dd5869b7580cdbb83614"},
+ {file = "typed_ast-1.4.1-cp37-cp37m-manylinux1_i686.whl", hash = "sha256:c9e348e02e4d2b4a8b2eedb48210430658df6951fa484e59de33ff773fbd4b41"},
+ {file = "typed_ast-1.4.1-cp37-cp37m-manylinux1_x86_64.whl", hash = "sha256:bcd3b13b56ea479b3650b82cabd6b5343a625b0ced5429e4ccad28a8973f301b"},
+ {file = "typed_ast-1.4.1-cp37-cp37m-win32.whl", hash = "sha256:d5d33e9e7af3b34a40dc05f498939f0ebf187f07c385fd58d591c533ad8562fe"},
+ {file = "typed_ast-1.4.1-cp37-cp37m-win_amd64.whl", hash = "sha256:0666aa36131496aed8f7be0410ff974562ab7eeac11ef351def9ea6fa28f6355"},
+ {file = "typed_ast-1.4.1-cp38-cp38-macosx_10_15_x86_64.whl", hash = "sha256:d205b1b46085271b4e15f670058ce182bd1199e56b317bf2ec004b6a44f911f6"},
+ {file = "typed_ast-1.4.1-cp38-cp38-manylinux1_i686.whl", hash = "sha256:6daac9731f172c2a22ade6ed0c00197ee7cc1221aa84cfdf9c31defeb059a907"},
+ {file = "typed_ast-1.4.1-cp38-cp38-manylinux1_x86_64.whl", hash = "sha256:498b0f36cc7054c1fead3d7fc59d2150f4d5c6c56ba7fb150c013fbc683a8d2d"},
+ {file = "typed_ast-1.4.1-cp38-cp38-win32.whl", hash = "sha256:715ff2f2df46121071622063fc7543d9b1fd19ebfc4f5c8895af64a77a8c852c"},
+ {file = "typed_ast-1.4.1-cp38-cp38-win_amd64.whl", hash = "sha256:fc0fea399acb12edbf8a628ba8d2312f583bdbdb3335635db062fa98cf71fca4"},
+ {file = "typed_ast-1.4.1-cp39-cp39-macosx_10_15_x86_64.whl", hash = "sha256:d43943ef777f9a1c42bf4e552ba23ac77a6351de620aa9acf64ad54933ad4d34"},
+ {file = "typed_ast-1.4.1.tar.gz", hash = "sha256:8c8aaad94455178e3187ab22c8b01a3837f8ee50e09cf31f1ba129eb293ec30b"},
+]
+typing-extensions = [
+ {file = "typing_extensions-3.7.4.3-py2-none-any.whl", hash = "sha256:dafc7639cde7f1b6e1acc0f457842a83e722ccca8eef5270af2d74792619a89f"},
+ {file = "typing_extensions-3.7.4.3-py3-none-any.whl", hash = "sha256:7cb407020f00f7bfc3cb3e7881628838e69d8f3fcab2f64742a5e76b2f841918"},
+ {file = "typing_extensions-3.7.4.3.tar.gz", hash = "sha256:99d4073b617d30288f569d3f13d2bd7548c3a7e4c8de87db09a9d29bb3a4a60c"},
+]
diff --git a/pyproject.toml b/pyproject.toml
new file mode 100644
index 0000000..ef013a4
--- /dev/null
+++ b/pyproject.toml
@@ -0,0 +1,26 @@
+[tool.poetry]
+name = "crowbar-reference-compiler"
+version = "0.0.1"
+description = "the reference compiler for the Crowbar programming language"
+authors = ["Melody Horn <melody@boringcactus.com>"]
+license = "BlueOak-1.0.0"
+readme = "README.md"
+repository = "https://git.sr.ht/~boringcactus/crowbar-reference-compiler"
+classifiers = [
+ "Development Status :: 2 - Pre-Alpha",
+ "Intended Audience :: Developers",
+ "Programming Language :: Other",
+ "Topic :: Software Development :: Compilers",
+]
+
+[tool.poetry.dependencies]
+python = "^3.7"
+parsimonious = "^0.8.1"
+regex = "^2020.10.11"
+
+[tool.poetry.dev-dependencies]
+mypy = "^0.790"
+
+[build-system]
+requires = ["poetry-core>=1.0.0"]
+build-backend = "poetry.core.masonry.api"
diff --git a/tests/__init__.py b/tests/__init__.py
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/tests/__init__.py
diff --git a/tests/test_parsing.py b/tests/test_parsing.py
new file mode 100644
index 0000000..91787a6
--- /dev/null
+++ b/tests/test_parsing.py
@@ -0,0 +1,75 @@
+import unittest
+
+from crowbar_reference_compiler import parse_header, parse_implementation, scan
+
+
+class TestParsing(unittest.TestCase):
+ def test_basic(self):
+ print(parse_header(scan("int x();")))
+
+ def test_scdoc_str(self):
+ # adapted from https://git.sr.ht/~sircmpwn/scdoc/tree/master/include/str.h
+ print(parse_header(scan(r"""
+include "stdint.h";
+
+struct str {
+ char *str;
+ typedef size_t len;
+ typedef size_t size;
+};
+
+struct str *str_create();
+void str_free(struct str *str);
+void str_reset(struct str *str);
+int str_append_ch(struct str *str, typedef uint32_t ch);
+""")))
+ # adapted from https://git.sr.ht/~sircmpwn/scdoc/tree/master/src/string.c
+ print(parse_implementation(scan(r"""
+include "stdlib.h";
+include "stdint.h";
+include "str.h";
+include "unicode.h";
+
+int ensure_capacity(struct str *str, typedef size_t len) {
+ if (len + 1 >= str->size) {
+ char *new = realloc(str->str, str->size * 2);
+ if (!new) {
+ return 0;
+ }
+ str->str = new;
+ str->size *= 2;
+ }
+ return 1;
+}
+
+struct str *str_create() {
+ struct str *str = calloc(1, sizeof(struct str));
+ str->str = malloc(16);
+ str->size = 16;
+ str->len = 0;
+ str->str[0] = '\0';
+ return str;
+}
+
+void str_free(struct str *str) {
+ if (!str) {
+ return;
+ }
+ free(str->str);
+ free(str);
+}
+
+int str_append_ch(struct str *str, typedef uint32_t ch) {
+ int size = utf8_chsize(ch);
+ if (size <= 0) {
+ return -1;
+ }
+ if (!ensure_capacity(str, str->len + size)) {
+ return -1;
+ }
+ utf8_encode(&str->str[str->len], ch);
+ str->len += size;
+ str->str[str->len] = '\0';
+ return size;
+}
+""")))