From 732ef5e9787e618ead3a18f9f6aa2ed1f03f1670 Mon Sep 17 00:00:00 2001 From: Melody Horn Date: Wed, 14 Oct 2020 17:44:38 -0600 Subject: throw early draft of parser into the world --- .build.yml | 15 +++ .gitignore | 3 + README.md | 0 crowbar_reference_compiler/__init__.py | 2 + crowbar_reference_compiler/parser.py | 200 +++++++++++++++++++++++++++++++++ crowbar_reference_compiler/scanner.py | 76 +++++++++++++ poetry.lock | 157 ++++++++++++++++++++++++++ pyproject.toml | 26 +++++ tests/__init__.py | 0 tests/test_parsing.py | 75 +++++++++++++ 10 files changed, 554 insertions(+) create mode 100644 .build.yml create mode 100644 .gitignore create mode 100644 README.md create mode 100644 crowbar_reference_compiler/__init__.py create mode 100644 crowbar_reference_compiler/parser.py create mode 100644 crowbar_reference_compiler/scanner.py create mode 100644 poetry.lock create mode 100644 pyproject.toml create mode 100644 tests/__init__.py create mode 100644 tests/test_parsing.py diff --git a/.build.yml b/.build.yml new file mode 100644 index 0000000..7080980 --- /dev/null +++ b/.build.yml @@ -0,0 +1,15 @@ +image: alpine/latest +packages: + - python3 +sources: + - https://git.sr.ht/~boringcactus/crowbar-reference-compiler +tasks: + - setup: | + pip install poetry + poetry config virtualenvs.create false + cd crowbar-reference-compiler + poetry install + - test: | + cd crowbar-reference-compiler + poetry run mypy -p crowbar_reference_compiler + poetry run python -m unittest diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..95b58df --- /dev/null +++ b/.gitignore @@ -0,0 +1,3 @@ +/.venv +/.idea +/.mypy_cache diff --git a/README.md b/README.md new file mode 100644 index 0000000..e69de29 diff --git a/crowbar_reference_compiler/__init__.py b/crowbar_reference_compiler/__init__.py new file mode 100644 index 0000000..7c7fca7 --- /dev/null +++ b/crowbar_reference_compiler/__init__.py @@ -0,0 +1,2 @@ +from .parser import parse_header, parse_implementation +from .scanner import scan diff --git a/crowbar_reference_compiler/parser.py b/crowbar_reference_compiler/parser.py new file mode 100644 index 0000000..9bb3179 --- /dev/null +++ b/crowbar_reference_compiler/parser.py @@ -0,0 +1,200 @@ +from parsimonious import TokenGrammar, ParseError, IncompleteParseError # type: ignore + +grammar = TokenGrammar( + r""" +HeaderFile = HeaderFileElement+ +HeaderFileElement = IncludeStatement / + TypeDeclaration / + FunctionDeclaration + +ImplementationFile = ImplementationFileElement+ +ImplementationFileElement = HeaderFileElement / + FunctionDefinition + +IncludeStatement = "include" string_literal ";" + +TypeDeclaration = StructDeclaration / + EnumDeclaration / + TypedefDeclaration +StructDeclaration = "struct" identifier "{" VariableDeclaration+ "}" ";" +EnumDeclaration = "enum" identifier "{" EnumBody "}" ";" +EnumBody = (identifier ("=" Expression)? "," EnumBody) / + (identifier ("=" Expression)? ","?) +TypedefDeclaration = "typedef" identifier "=" Type ";" + +FunctionDeclaration = FunctionSignature ";" +FunctionDefinition = FunctionSignature Block +FunctionSignature = Type identifier "(" SignatureArguments? ")" +SignatureArguments = (Type identifier "," SignatureArguments) / + (Type identifier ","?) + +Block = "{" Statement* "}" + +Statement = VariableDefinition / + VariableDeclaration / + IfStatement / + SwitchStatement / + WhileStatement / + DoWhileStatement / + ForStatement / + FlowControlStatement / + AssignmentStatement / + ExpressionStatement + +VariableDefinition = Type identifier "=" Expression ";" +VariableDeclaration = Type identifier ";" + +IfStatement = ("if" Expression Block "else" Block) / + ("if" Expression Block) + +SwitchStatement = "switch" Expression "{" SwitchCase+ "}" +SwitchCase = (CaseSpecifier Block) / + ("default" Block) +CaseSpecifier = ("case" Expression "," CaseSpecifier) / + ("case" Expression ","?) + +WhileStatement = "while" Expression Block +DoWhileStatement = "do" Block "while" Expression ";" +ForStatement = "for" VariableDefinition? ";" Expression ";" AssignmentStatementBody? Block + +FlowControlStatement = ("continue" ";") / + ("break" ";") / + ("return" Expression? ";") + +AssignmentStatement = AssignmentStatementBody ";" +AssignmentStatementBody = (AssignmentTargetExpression "=" Expression) / + (AssignmentTargetExpression "+=" Expression) / + (AssignmentTargetExpression "-=" Expression) / + (AssignmentTargetExpression "*=" Expression) / + (AssignmentTargetExpression "/=" Expression) / + (AssignmentTargetExpression "%=" Expression) / + (AssignmentTargetExpression "&=" Expression) / + (AssignmentTargetExpression "^=" Expression) / + (AssignmentTargetExpression "|=" Expression) / + (AssignmentTargetExpression "++") / + (AssignmentTargetExpression "--") + +ExpressionStatement = Expression ";" + +Type = ("const" BasicType) / + (BasicType "*") / + (BasicType "[" Expression "]") / + (BasicType "function" "(" (BasicType ",")* ")") / + BasicType +BasicType = "void" / + IntegerType / + ("signed" IntegerType) / + ("unsigned" IntegerType) / + "float" / + "double" / + "bool" / + ("struct" identifier) / + ("enum" identifier) / + ("typedef" identifier) / + ("(" Type ")") +IntegerType = "char" / + "short" / + "int" / + "long" + +AssignmentTargetExpression = identifier ATEElementSuffix* +ATEElementSuffix = ("[" Expression "]") / + ("." identifier) / + ("->" identifier) + +AtomicExpression = identifier / + constant / + string_literal / + ("(" Expression ")") + +ObjectExpression = (AtomicExpression ObjectSuffix*) / + ArrayLiteralExpression / + StructLiteralExpression +ObjectSuffix = ("[" Expression "]") / + ("(" CommasExpressionList? ")") / + ("." identifier) / + ("->" identifier) +CommasExpressionList = (Expression "," CommasExpressionList) / + (Expression ","?) +ArrayLiteralExpression = "{" CommasExpressionList "}" +StructLiteralExpression = "{" StructLiteralBody "}" +StructLiteralBody = (StructLiteralElement "," StructLiteralBody?) / + (StructLiteralElement ","?) +StructLiteralElement = "." identifier "=" Expression + +FactorExpression = ("(" Type ")" FactorExpression) / + ("&" FactorExpression) / + ("*" FactorExpression) / + ("+" FactorExpression) / + ("-" FactorExpression) / + ("~" FactorExpression) / + ("!" FactorExpression) / + ("sizeof" FactorExpression) / + ("sizeof" Type) / + ObjectExpression + +TermExpression = FactorExpression TermSuffix* +TermSuffix = ("*" FactorExpression) / + ("/" FactorExpression) / + ("%" FactorExpression) + +ArithmeticExpression = TermExpression ArithmeticSuffix* +ArithmeticSuffix = ("+" TermExpression) / + ("-" TermExpression) + +BitwiseOpExpression = (ArithmeticExpression "<<" ArithmeticExpression) / + (ArithmeticExpression ">>" ArithmeticExpression) / + (ArithmeticExpression "^" ArithmeticExpression) / + (ArithmeticExpression ("&" ArithmeticExpression)+) / + (ArithmeticExpression ("|" ArithmeticExpression)+) / + ArithmeticExpression + +ComparisonExpression = (BitwiseOpExpression "==" BitwiseOpExpression) / + (BitwiseOpExpression "!=" BitwiseOpExpression) / + (BitwiseOpExpression "<=" BitwiseOpExpression) / + (BitwiseOpExpression ">=" BitwiseOpExpression) / + (BitwiseOpExpression "<" BitwiseOpExpression) / + (BitwiseOpExpression ">" BitwiseOpExpression) / + BitwiseOpExpression + +Expression = (ComparisonExpression ("&&" ComparisonExpression)+) / + (ComparisonExpression ("||" ComparisonExpression)+) / + ComparisonExpression + +identifier = "identifier" +constant = "constant" +string_literal = "string_literal" +""") + + +class LegibleParseError(ParseError): + def line(self): + return "🤷" + + def column(self): + return "🤷" + + +class LegibleIncompleteParseError(IncompleteParseError): + def line(self): + return "🤷" + + def column(self): + return "🤷" + + +def parse_from_rule(rule, tokens): + try: + return rule.parse(tokens) + except IncompleteParseError as error: + raise LegibleIncompleteParseError(error.text, error.pos, error.expr) + except ParseError as error: + raise LegibleParseError(error.text, error.pos, error.expr) + + +def parse_header(tokens): + return parse_from_rule(grammar['HeaderFile'], tokens) + + +def parse_implementation(tokens): + return parse_from_rule(grammar['ImplementationFile'], tokens) diff --git a/crowbar_reference_compiler/scanner.py b/crowbar_reference_compiler/scanner.py new file mode 100644 index 0000000..fff8c35 --- /dev/null +++ b/crowbar_reference_compiler/scanner.py @@ -0,0 +1,76 @@ +from dataclasses import dataclass +from typing import Optional, overload, List, Union + +import regex as re # type: ignore + + +@dataclass +class Token: + type: str + data: Optional[str] = None + + def __repr__(self) -> str: + if self.data is not None: + return "{}: {}".format(self.type, repr(self.data)) + else: + return repr(self.type) + + +class GenerousTokenList(List[Token]): + def __getitem__(self, i): + try: + return super(GenerousTokenList, self).__getitem__(i) + except IndexError: + return Token('') + + +KEYWORD = re.compile("bool|break|case|char|const|continue|default|do|double|else|enum|extern|float|for|function|if|include|int|long|return|short|signed|sizeof|struct|switch|typedef|unsigned|void|while") +IDENTIFIER = re.compile(r"[\p{L}\p{Pc}\p{Cf}\p{Sk}\p{Mn}][\p{L}\p{Pc}\p{Cf}\p{Sk}\p{Mn}\p{N}]*") +CONSTANT = re.compile(r"""([0-9_]+)|(0[bB][01_]+)|(0[xX][0-9a-fA-F_]+)|([0-9_]+(\.[0-9_]+|[eE][0-9_]+|\.[0-9_]+[eE][0-9_]+))|('([^\'\\]|\\'|\\"|\\\\|\\r|\\n|\\t|\\0|\\x[0-9a-fA-F]{2}|\\u[0-9a-fA-F]{4}|\\U[0-9a-fA-F]{8})')""") +STRING_LITERAL = re.compile(r'''"([^\\"]|\\'|\\"|\\\\|\\r|\\n|\\t|\\0|\\x[0-9a-fA-F]{2}|\\u[0-9a-fA-F]{4}|\\U[0-9a-fA-F]{8})*"''') +PUNCTUATOR = re.compile(r"->|\+\+|--|>>|<<|<=|>=|&&|\|\||[=!+\-*/%&|^]=|[\[\](){}.,+\-*/%;!&|^~><=]") +WHITESPACE = re.compile(r"[\p{Zs}\p{Cc}]+") +COMMENT = re.compile(r"(//[^\n]*\n)|(/\*.*?\*/)", re.DOTALL) + + +def scan(code): + result = [] + remaining = code + + while len(remaining) > 0: + match = COMMENT.match(remaining) + if match: + remaining = remaining[match.end():] + continue + match = WHITESPACE.match(remaining) + if match: + remaining = remaining[match.end():] + continue + match = KEYWORD.match(remaining) + if match: + result.append(Token(match.group())) + remaining = remaining[match.end():] + continue + match = IDENTIFIER.match(remaining) + if match: + result.append(Token('identifier', match.group())) + remaining = remaining[match.end():] + continue + match = CONSTANT.match(remaining) + if match: + result.append(Token('constant', match.group())) + remaining = remaining[match.end():] + continue + match = STRING_LITERAL.match(remaining) + if match: + result.append(Token('string_literal', match.group())) + remaining = remaining[match.end():] + continue + match = PUNCTUATOR.match(remaining) + if match: + result.append(Token(match.group())) + remaining = remaining[match.end():] + continue + raise ValueError("unrecognized code in scanner: {}".format(repr(remaining[:20]))) + + return GenerousTokenList(result) diff --git a/poetry.lock b/poetry.lock new file mode 100644 index 0000000..ca35235 --- /dev/null +++ b/poetry.lock @@ -0,0 +1,157 @@ +[[package]] +name = "mypy" +version = "0.790" +description = "Optional static typing for Python" +category = "dev" +optional = false +python-versions = ">=3.5" + +[package.dependencies] +mypy-extensions = ">=0.4.3,<0.5.0" +typed-ast = ">=1.4.0,<1.5.0" +typing-extensions = ">=3.7.4" + +[package.extras] +dmypy = ["psutil (>=4.0)"] + +[[package]] +name = "mypy-extensions" +version = "0.4.3" +description = "Experimental type system extensions for programs checked with the mypy typechecker." +category = "dev" +optional = false +python-versions = "*" + +[[package]] +name = "parsimonious" +version = "0.8.1" +description = "(Soon to be) the fastest pure-Python PEG parser I could muster" +category = "main" +optional = false +python-versions = "*" + +[package.dependencies] +six = ">=1.9.0" + +[[package]] +name = "regex" +version = "2020.10.11" +description = "Alternative regular expression module, to replace re." +category = "main" +optional = false +python-versions = "*" + +[[package]] +name = "six" +version = "1.15.0" +description = "Python 2 and 3 compatibility utilities" +category = "main" +optional = false +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*" + +[[package]] +name = "typed-ast" +version = "1.4.1" +description = "a fork of Python 2 and 3 ast modules with type comment support" +category = "dev" +optional = false +python-versions = "*" + +[[package]] +name = "typing-extensions" +version = "3.7.4.3" +description = "Backported and Experimental Type Hints for Python 3.5+" +category = "dev" +optional = false +python-versions = "*" + +[metadata] +lock-version = "1.1" +python-versions = "^3.7" +content-hash = "d319360e78308e2272c729ac4a26f170b4836cc7c7fade980e771c58aa685806" + +[metadata.files] +mypy = [ + {file = "mypy-0.790-cp35-cp35m-macosx_10_6_x86_64.whl", hash = "sha256:bd03b3cf666bff8d710d633d1c56ab7facbdc204d567715cb3b9f85c6e94f669"}, + {file = "mypy-0.790-cp35-cp35m-manylinux1_x86_64.whl", hash = "sha256:2170492030f6faa537647d29945786d297e4862765f0b4ac5930ff62e300d802"}, + {file = "mypy-0.790-cp35-cp35m-win_amd64.whl", hash = "sha256:e86bdace26c5fe9cf8cb735e7cedfe7850ad92b327ac5d797c656717d2ca66de"}, + {file = "mypy-0.790-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:e97e9c13d67fbe524be17e4d8025d51a7dca38f90de2e462243ab8ed8a9178d1"}, + {file = "mypy-0.790-cp36-cp36m-manylinux1_x86_64.whl", hash = "sha256:0d34d6b122597d48a36d6c59e35341f410d4abfa771d96d04ae2c468dd201abc"}, + {file = "mypy-0.790-cp36-cp36m-win_amd64.whl", hash = "sha256:72060bf64f290fb629bd4a67c707a66fd88ca26e413a91384b18db3876e57ed7"}, + {file = "mypy-0.790-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:eea260feb1830a627fb526d22fbb426b750d9f5a47b624e8d5e7e004359b219c"}, + {file = "mypy-0.790-cp37-cp37m-manylinux1_x86_64.whl", hash = "sha256:c614194e01c85bb2e551c421397e49afb2872c88b5830e3554f0519f9fb1c178"}, + {file = "mypy-0.790-cp37-cp37m-win_amd64.whl", hash = "sha256:0a0d102247c16ce93c97066443d11e2d36e6cc2a32d8ccc1f705268970479324"}, + {file = "mypy-0.790-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:cf4e7bf7f1214826cf7333627cb2547c0db7e3078723227820d0a2490f117a01"}, + {file = "mypy-0.790-cp38-cp38-manylinux1_x86_64.whl", hash = "sha256:af4e9ff1834e565f1baa74ccf7ae2564ae38c8df2a85b057af1dbbc958eb6666"}, + {file = "mypy-0.790-cp38-cp38-win_amd64.whl", hash = "sha256:da56dedcd7cd502ccd3c5dddc656cb36113dd793ad466e894574125945653cea"}, + {file = "mypy-0.790-py3-none-any.whl", hash = "sha256:2842d4fbd1b12ab422346376aad03ff5d0805b706102e475e962370f874a5122"}, + {file = "mypy-0.790.tar.gz", hash = "sha256:2b21ba45ad9ef2e2eb88ce4aeadd0112d0f5026418324176fd494a6824b74975"}, +] +mypy-extensions = [ + {file = "mypy_extensions-0.4.3-py2.py3-none-any.whl", hash = "sha256:090fedd75945a69ae91ce1303b5824f428daf5a028d2f6ab8a299250a846f15d"}, + {file = "mypy_extensions-0.4.3.tar.gz", hash = "sha256:2d82818f5bb3e369420cb3c4060a7970edba416647068eb4c5343488a6c604a8"}, +] +parsimonious = [ + {file = "parsimonious-0.8.1.tar.gz", hash = "sha256:3add338892d580e0cb3b1a39e4a1b427ff9f687858fdd61097053742391a9f6b"}, +] +regex = [ + {file = "regex-2020.10.11-cp27-cp27m-win32.whl", hash = "sha256:4f5c0fe46fb79a7adf766b365cae56cafbf352c27358fda811e4a1dc8216d0db"}, + {file = "regex-2020.10.11-cp27-cp27m-win_amd64.whl", hash = "sha256:39a5ef30bca911f5a8a3d4476f5713ed4d66e313d9fb6755b32bec8a2e519635"}, + {file = "regex-2020.10.11-cp36-cp36m-manylinux1_i686.whl", hash = "sha256:7c4fc5a8ec91a2254bb459db27dbd9e16bba1dabff638f425d736888d34aaefa"}, + {file = "regex-2020.10.11-cp36-cp36m-manylinux1_x86_64.whl", hash = "sha256:d537e270b3e6bfaea4f49eaf267984bfb3628c86670e9ad2a257358d3b8f0955"}, + {file = "regex-2020.10.11-cp36-cp36m-manylinux2010_i686.whl", hash = "sha256:a8240df4957a5b0e641998a5d78b3c4ea762c845d8cb8997bf820626826fde9a"}, + {file = "regex-2020.10.11-cp36-cp36m-manylinux2010_x86_64.whl", hash = "sha256:4302153abb96859beb2c778cc4662607a34175065fc2f33a21f49eb3fbd1ccd3"}, + {file = "regex-2020.10.11-cp36-cp36m-win32.whl", hash = "sha256:c077c9d04a040dba001cf62b3aff08fd85be86bccf2c51a770c77377662a2d55"}, + {file = "regex-2020.10.11-cp36-cp36m-win_amd64.whl", hash = "sha256:46ab6070b0d2cb85700b8863b3f5504c7f75d8af44289e9562195fe02a8dd72d"}, + {file = "regex-2020.10.11-cp37-cp37m-manylinux1_i686.whl", hash = "sha256:d629d750ebe75a88184db98f759633b0a7772c2e6f4da529f0027b4a402c0e2f"}, + {file = "regex-2020.10.11-cp37-cp37m-manylinux1_x86_64.whl", hash = "sha256:8e7ef296b84d44425760fe813cabd7afbb48c8dd62023018b338bbd9d7d6f2f0"}, + {file = "regex-2020.10.11-cp37-cp37m-manylinux2010_i686.whl", hash = "sha256:e490f08897cb44e54bddf5c6e27deca9b58c4076849f32aaa7a0b9f1730f2c20"}, + {file = "regex-2020.10.11-cp37-cp37m-manylinux2010_x86_64.whl", hash = "sha256:850339226aa4fec04916386577674bb9d69abe0048f5d1a99f91b0004bfdcc01"}, + {file = "regex-2020.10.11-cp37-cp37m-win32.whl", hash = "sha256:60c4f64d9a326fe48e8738c3dbc068e1edc41ff7895a9e3723840deec4bc1c28"}, + {file = "regex-2020.10.11-cp37-cp37m-win_amd64.whl", hash = "sha256:8ba3efdd60bfee1aa784dbcea175eb442d059b576934c9d099e381e5a9f48930"}, + {file = "regex-2020.10.11-cp38-cp38-manylinux1_i686.whl", hash = "sha256:2308491b3e6c530a3bb38a8a4bb1dc5fd32cbf1e11ca623f2172ba17a81acef1"}, + {file = "regex-2020.10.11-cp38-cp38-manylinux1_x86_64.whl", hash = "sha256:b8806649983a1c78874ec7e04393ef076805740f6319e87a56f91f1767960212"}, + {file = "regex-2020.10.11-cp38-cp38-manylinux2010_i686.whl", hash = "sha256:a2a31ee8a354fa3036d12804730e1e20d58bc4e250365ead34b9c30bbe9908c3"}, + {file = "regex-2020.10.11-cp38-cp38-manylinux2010_x86_64.whl", hash = "sha256:d9d53518eeed12190744d366ec4a3f39b99d7daa705abca95f87dd8b442df4ad"}, + {file = "regex-2020.10.11-cp38-cp38-win32.whl", hash = "sha256:3d5a8d007116021cf65355ada47bf405656c4b3b9a988493d26688275fde1f1c"}, + {file = "regex-2020.10.11-cp38-cp38-win_amd64.whl", hash = "sha256:f579caecbbca291b0fcc7d473664c8c08635da2f9b1567c22ea32311c86ef68c"}, + {file = "regex-2020.10.11-cp39-cp39-manylinux1_i686.whl", hash = "sha256:8c8c42aa5d3ac9a49829c4b28a81bebfa0378996f9e0ca5b5ab8a36870c3e5ee"}, + {file = "regex-2020.10.11-cp39-cp39-manylinux1_x86_64.whl", hash = "sha256:c529ba90c1775697a65b46c83d47a2d3de70f24d96da5d41d05a761c73b063af"}, + {file = "regex-2020.10.11-cp39-cp39-manylinux2010_i686.whl", hash = "sha256:6cf527ec2f3565248408b61dd36e380d799c2a1047eab04e13a2b0c15dd9c767"}, + {file = "regex-2020.10.11-cp39-cp39-manylinux2010_x86_64.whl", hash = "sha256:671c51d352cfb146e48baee82b1ee8d6ffe357c292f5e13300cdc5c00867ebfc"}, + {file = "regex-2020.10.11-cp39-cp39-win32.whl", hash = "sha256:a63907332531a499b8cdfd18953febb5a4c525e9e7ca4ac147423b917244b260"}, + {file = "regex-2020.10.11-cp39-cp39-win_amd64.whl", hash = "sha256:1a16afbfadaadc1397353f9b32e19a65dc1d1804c80ad73a14f435348ca017ad"}, + {file = "regex-2020.10.11.tar.gz", hash = "sha256:463e770c48da76a8da82b8d4a48a541f314e0df91cbb6d873a341dbe578efafd"}, +] +six = [ + {file = "six-1.15.0-py2.py3-none-any.whl", hash = "sha256:8b74bedcbbbaca38ff6d7491d76f2b06b3592611af620f8426e82dddb04a5ced"}, + {file = "six-1.15.0.tar.gz", hash = "sha256:30639c035cdb23534cd4aa2dd52c3bf48f06e5f4a941509c8bafd8ce11080259"}, +] +typed-ast = [ + {file = "typed_ast-1.4.1-cp35-cp35m-manylinux1_i686.whl", hash = "sha256:73d785a950fc82dd2a25897d525d003f6378d1cb23ab305578394694202a58c3"}, + {file = "typed_ast-1.4.1-cp35-cp35m-manylinux1_x86_64.whl", hash = "sha256:aaee9905aee35ba5905cfb3c62f3e83b3bec7b39413f0a7f19be4e547ea01ebb"}, + {file = "typed_ast-1.4.1-cp35-cp35m-win32.whl", hash = "sha256:0c2c07682d61a629b68433afb159376e24e5b2fd4641d35424e462169c0a7919"}, + {file = "typed_ast-1.4.1-cp35-cp35m-win_amd64.whl", hash = "sha256:4083861b0aa07990b619bd7ddc365eb7fa4b817e99cf5f8d9cf21a42780f6e01"}, + {file = "typed_ast-1.4.1-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:269151951236b0f9a6f04015a9004084a5ab0d5f19b57de779f908621e7d8b75"}, + {file = "typed_ast-1.4.1-cp36-cp36m-manylinux1_i686.whl", hash = "sha256:24995c843eb0ad11a4527b026b4dde3da70e1f2d8806c99b7b4a7cf491612652"}, + {file = "typed_ast-1.4.1-cp36-cp36m-manylinux1_x86_64.whl", hash = "sha256:fe460b922ec15dd205595c9b5b99e2f056fd98ae8f9f56b888e7a17dc2b757e7"}, + {file = "typed_ast-1.4.1-cp36-cp36m-win32.whl", hash = "sha256:4e3e5da80ccbebfff202a67bf900d081906c358ccc3d5e3c8aea42fdfdfd51c1"}, + {file = "typed_ast-1.4.1-cp36-cp36m-win_amd64.whl", hash = "sha256:249862707802d40f7f29f6e1aad8d84b5aa9e44552d2cc17384b209f091276aa"}, + {file = "typed_ast-1.4.1-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:8ce678dbaf790dbdb3eba24056d5364fb45944f33553dd5869b7580cdbb83614"}, + {file = "typed_ast-1.4.1-cp37-cp37m-manylinux1_i686.whl", hash = "sha256:c9e348e02e4d2b4a8b2eedb48210430658df6951fa484e59de33ff773fbd4b41"}, + {file = "typed_ast-1.4.1-cp37-cp37m-manylinux1_x86_64.whl", hash = "sha256:bcd3b13b56ea479b3650b82cabd6b5343a625b0ced5429e4ccad28a8973f301b"}, + {file = "typed_ast-1.4.1-cp37-cp37m-win32.whl", hash = "sha256:d5d33e9e7af3b34a40dc05f498939f0ebf187f07c385fd58d591c533ad8562fe"}, + {file = "typed_ast-1.4.1-cp37-cp37m-win_amd64.whl", hash = "sha256:0666aa36131496aed8f7be0410ff974562ab7eeac11ef351def9ea6fa28f6355"}, + {file = "typed_ast-1.4.1-cp38-cp38-macosx_10_15_x86_64.whl", hash = "sha256:d205b1b46085271b4e15f670058ce182bd1199e56b317bf2ec004b6a44f911f6"}, + {file = "typed_ast-1.4.1-cp38-cp38-manylinux1_i686.whl", hash = "sha256:6daac9731f172c2a22ade6ed0c00197ee7cc1221aa84cfdf9c31defeb059a907"}, + {file = "typed_ast-1.4.1-cp38-cp38-manylinux1_x86_64.whl", hash = "sha256:498b0f36cc7054c1fead3d7fc59d2150f4d5c6c56ba7fb150c013fbc683a8d2d"}, + {file = "typed_ast-1.4.1-cp38-cp38-win32.whl", hash = "sha256:715ff2f2df46121071622063fc7543d9b1fd19ebfc4f5c8895af64a77a8c852c"}, + {file = "typed_ast-1.4.1-cp38-cp38-win_amd64.whl", hash = "sha256:fc0fea399acb12edbf8a628ba8d2312f583bdbdb3335635db062fa98cf71fca4"}, + {file = "typed_ast-1.4.1-cp39-cp39-macosx_10_15_x86_64.whl", hash = "sha256:d43943ef777f9a1c42bf4e552ba23ac77a6351de620aa9acf64ad54933ad4d34"}, + {file = "typed_ast-1.4.1.tar.gz", hash = "sha256:8c8aaad94455178e3187ab22c8b01a3837f8ee50e09cf31f1ba129eb293ec30b"}, +] +typing-extensions = [ + {file = "typing_extensions-3.7.4.3-py2-none-any.whl", hash = "sha256:dafc7639cde7f1b6e1acc0f457842a83e722ccca8eef5270af2d74792619a89f"}, + {file = "typing_extensions-3.7.4.3-py3-none-any.whl", hash = "sha256:7cb407020f00f7bfc3cb3e7881628838e69d8f3fcab2f64742a5e76b2f841918"}, + {file = "typing_extensions-3.7.4.3.tar.gz", hash = "sha256:99d4073b617d30288f569d3f13d2bd7548c3a7e4c8de87db09a9d29bb3a4a60c"}, +] diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..ef013a4 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,26 @@ +[tool.poetry] +name = "crowbar-reference-compiler" +version = "0.0.1" +description = "the reference compiler for the Crowbar programming language" +authors = ["Melody Horn "] +license = "BlueOak-1.0.0" +readme = "README.md" +repository = "https://git.sr.ht/~boringcactus/crowbar-reference-compiler" +classifiers = [ + "Development Status :: 2 - Pre-Alpha", + "Intended Audience :: Developers", + "Programming Language :: Other", + "Topic :: Software Development :: Compilers", +] + +[tool.poetry.dependencies] +python = "^3.7" +parsimonious = "^0.8.1" +regex = "^2020.10.11" + +[tool.poetry.dev-dependencies] +mypy = "^0.790" + +[build-system] +requires = ["poetry-core>=1.0.0"] +build-backend = "poetry.core.masonry.api" diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/test_parsing.py b/tests/test_parsing.py new file mode 100644 index 0000000..91787a6 --- /dev/null +++ b/tests/test_parsing.py @@ -0,0 +1,75 @@ +import unittest + +from crowbar_reference_compiler import parse_header, parse_implementation, scan + + +class TestParsing(unittest.TestCase): + def test_basic(self): + print(parse_header(scan("int x();"))) + + def test_scdoc_str(self): + # adapted from https://git.sr.ht/~sircmpwn/scdoc/tree/master/include/str.h + print(parse_header(scan(r""" +include "stdint.h"; + +struct str { + char *str; + typedef size_t len; + typedef size_t size; +}; + +struct str *str_create(); +void str_free(struct str *str); +void str_reset(struct str *str); +int str_append_ch(struct str *str, typedef uint32_t ch); +"""))) + # adapted from https://git.sr.ht/~sircmpwn/scdoc/tree/master/src/string.c + print(parse_implementation(scan(r""" +include "stdlib.h"; +include "stdint.h"; +include "str.h"; +include "unicode.h"; + +int ensure_capacity(struct str *str, typedef size_t len) { + if (len + 1 >= str->size) { + char *new = realloc(str->str, str->size * 2); + if (!new) { + return 0; + } + str->str = new; + str->size *= 2; + } + return 1; +} + +struct str *str_create() { + struct str *str = calloc(1, sizeof(struct str)); + str->str = malloc(16); + str->size = 16; + str->len = 0; + str->str[0] = '\0'; + return str; +} + +void str_free(struct str *str) { + if (!str) { + return; + } + free(str->str); + free(str); +} + +int str_append_ch(struct str *str, typedef uint32_t ch) { + int size = utf8_chsize(ch); + if (size <= 0) { + return -1; + } + if (!ensure_capacity(str, str->len + size)) { + return -1; + } + utf8_encode(&str->str[str->len], ch); + str->len += size; + str->str[str->len] = '\0'; + return size; +} +"""))) -- cgit v1.2.3