aboutsummaryrefslogtreecommitdiff
path: root/crowbar_reference_compiler
diff options
context:
space:
mode:
authorMelody Horn <melody@boringcactus.com>2020-10-14 17:44:38 -0600
committerMelody Horn <melody@boringcactus.com>2020-10-14 17:44:38 -0600
commit732ef5e9787e618ead3a18f9f6aa2ed1f03f1670 (patch)
treec4d456cf8a6c426ad4a9347a9c1c498fe35c2017 /crowbar_reference_compiler
downloadreference-compiler-732ef5e9787e618ead3a18f9f6aa2ed1f03f1670.tar.gz
reference-compiler-732ef5e9787e618ead3a18f9f6aa2ed1f03f1670.zip
throw early draft of parser into the world
Diffstat (limited to 'crowbar_reference_compiler')
-rw-r--r--crowbar_reference_compiler/__init__.py2
-rw-r--r--crowbar_reference_compiler/parser.py200
-rw-r--r--crowbar_reference_compiler/scanner.py76
3 files changed, 278 insertions, 0 deletions
diff --git a/crowbar_reference_compiler/__init__.py b/crowbar_reference_compiler/__init__.py
new file mode 100644
index 0000000..7c7fca7
--- /dev/null
+++ b/crowbar_reference_compiler/__init__.py
@@ -0,0 +1,2 @@
+from .parser import parse_header, parse_implementation
+from .scanner import scan
diff --git a/crowbar_reference_compiler/parser.py b/crowbar_reference_compiler/parser.py
new file mode 100644
index 0000000..9bb3179
--- /dev/null
+++ b/crowbar_reference_compiler/parser.py
@@ -0,0 +1,200 @@
+from parsimonious import TokenGrammar, ParseError, IncompleteParseError # type: ignore
+
+grammar = TokenGrammar(
+ r"""
+HeaderFile = HeaderFileElement+
+HeaderFileElement = IncludeStatement /
+ TypeDeclaration /
+ FunctionDeclaration
+
+ImplementationFile = ImplementationFileElement+
+ImplementationFileElement = HeaderFileElement /
+ FunctionDefinition
+
+IncludeStatement = "include" string_literal ";"
+
+TypeDeclaration = StructDeclaration /
+ EnumDeclaration /
+ TypedefDeclaration
+StructDeclaration = "struct" identifier "{" VariableDeclaration+ "}" ";"
+EnumDeclaration = "enum" identifier "{" EnumBody "}" ";"
+EnumBody = (identifier ("=" Expression)? "," EnumBody) /
+ (identifier ("=" Expression)? ","?)
+TypedefDeclaration = "typedef" identifier "=" Type ";"
+
+FunctionDeclaration = FunctionSignature ";"
+FunctionDefinition = FunctionSignature Block
+FunctionSignature = Type identifier "(" SignatureArguments? ")"
+SignatureArguments = (Type identifier "," SignatureArguments) /
+ (Type identifier ","?)
+
+Block = "{" Statement* "}"
+
+Statement = VariableDefinition /
+ VariableDeclaration /
+ IfStatement /
+ SwitchStatement /
+ WhileStatement /
+ DoWhileStatement /
+ ForStatement /
+ FlowControlStatement /
+ AssignmentStatement /
+ ExpressionStatement
+
+VariableDefinition = Type identifier "=" Expression ";"
+VariableDeclaration = Type identifier ";"
+
+IfStatement = ("if" Expression Block "else" Block) /
+ ("if" Expression Block)
+
+SwitchStatement = "switch" Expression "{" SwitchCase+ "}"
+SwitchCase = (CaseSpecifier Block) /
+ ("default" Block)
+CaseSpecifier = ("case" Expression "," CaseSpecifier) /
+ ("case" Expression ","?)
+
+WhileStatement = "while" Expression Block
+DoWhileStatement = "do" Block "while" Expression ";"
+ForStatement = "for" VariableDefinition? ";" Expression ";" AssignmentStatementBody? Block
+
+FlowControlStatement = ("continue" ";") /
+ ("break" ";") /
+ ("return" Expression? ";")
+
+AssignmentStatement = AssignmentStatementBody ";"
+AssignmentStatementBody = (AssignmentTargetExpression "=" Expression) /
+ (AssignmentTargetExpression "+=" Expression) /
+ (AssignmentTargetExpression "-=" Expression) /
+ (AssignmentTargetExpression "*=" Expression) /
+ (AssignmentTargetExpression "/=" Expression) /
+ (AssignmentTargetExpression "%=" Expression) /
+ (AssignmentTargetExpression "&=" Expression) /
+ (AssignmentTargetExpression "^=" Expression) /
+ (AssignmentTargetExpression "|=" Expression) /
+ (AssignmentTargetExpression "++") /
+ (AssignmentTargetExpression "--")
+
+ExpressionStatement = Expression ";"
+
+Type = ("const" BasicType) /
+ (BasicType "*") /
+ (BasicType "[" Expression "]") /
+ (BasicType "function" "(" (BasicType ",")* ")") /
+ BasicType
+BasicType = "void" /
+ IntegerType /
+ ("signed" IntegerType) /
+ ("unsigned" IntegerType) /
+ "float" /
+ "double" /
+ "bool" /
+ ("struct" identifier) /
+ ("enum" identifier) /
+ ("typedef" identifier) /
+ ("(" Type ")")
+IntegerType = "char" /
+ "short" /
+ "int" /
+ "long"
+
+AssignmentTargetExpression = identifier ATEElementSuffix*
+ATEElementSuffix = ("[" Expression "]") /
+ ("." identifier) /
+ ("->" identifier)
+
+AtomicExpression = identifier /
+ constant /
+ string_literal /
+ ("(" Expression ")")
+
+ObjectExpression = (AtomicExpression ObjectSuffix*) /
+ ArrayLiteralExpression /
+ StructLiteralExpression
+ObjectSuffix = ("[" Expression "]") /
+ ("(" CommasExpressionList? ")") /
+ ("." identifier) /
+ ("->" identifier)
+CommasExpressionList = (Expression "," CommasExpressionList) /
+ (Expression ","?)
+ArrayLiteralExpression = "{" CommasExpressionList "}"
+StructLiteralExpression = "{" StructLiteralBody "}"
+StructLiteralBody = (StructLiteralElement "," StructLiteralBody?) /
+ (StructLiteralElement ","?)
+StructLiteralElement = "." identifier "=" Expression
+
+FactorExpression = ("(" Type ")" FactorExpression) /
+ ("&" FactorExpression) /
+ ("*" FactorExpression) /
+ ("+" FactorExpression) /
+ ("-" FactorExpression) /
+ ("~" FactorExpression) /
+ ("!" FactorExpression) /
+ ("sizeof" FactorExpression) /
+ ("sizeof" Type) /
+ ObjectExpression
+
+TermExpression = FactorExpression TermSuffix*
+TermSuffix = ("*" FactorExpression) /
+ ("/" FactorExpression) /
+ ("%" FactorExpression)
+
+ArithmeticExpression = TermExpression ArithmeticSuffix*
+ArithmeticSuffix = ("+" TermExpression) /
+ ("-" TermExpression)
+
+BitwiseOpExpression = (ArithmeticExpression "<<" ArithmeticExpression) /
+ (ArithmeticExpression ">>" ArithmeticExpression) /
+ (ArithmeticExpression "^" ArithmeticExpression) /
+ (ArithmeticExpression ("&" ArithmeticExpression)+) /
+ (ArithmeticExpression ("|" ArithmeticExpression)+) /
+ ArithmeticExpression
+
+ComparisonExpression = (BitwiseOpExpression "==" BitwiseOpExpression) /
+ (BitwiseOpExpression "!=" BitwiseOpExpression) /
+ (BitwiseOpExpression "<=" BitwiseOpExpression) /
+ (BitwiseOpExpression ">=" BitwiseOpExpression) /
+ (BitwiseOpExpression "<" BitwiseOpExpression) /
+ (BitwiseOpExpression ">" BitwiseOpExpression) /
+ BitwiseOpExpression
+
+Expression = (ComparisonExpression ("&&" ComparisonExpression)+) /
+ (ComparisonExpression ("||" ComparisonExpression)+) /
+ ComparisonExpression
+
+identifier = "identifier"
+constant = "constant"
+string_literal = "string_literal"
+""")
+
+
+class LegibleParseError(ParseError):
+ def line(self):
+ return "🤷"
+
+ def column(self):
+ return "🤷"
+
+
+class LegibleIncompleteParseError(IncompleteParseError):
+ def line(self):
+ return "🤷"
+
+ def column(self):
+ return "🤷"
+
+
+def parse_from_rule(rule, tokens):
+ try:
+ return rule.parse(tokens)
+ except IncompleteParseError as error:
+ raise LegibleIncompleteParseError(error.text, error.pos, error.expr)
+ except ParseError as error:
+ raise LegibleParseError(error.text, error.pos, error.expr)
+
+
+def parse_header(tokens):
+ return parse_from_rule(grammar['HeaderFile'], tokens)
+
+
+def parse_implementation(tokens):
+ return parse_from_rule(grammar['ImplementationFile'], tokens)
diff --git a/crowbar_reference_compiler/scanner.py b/crowbar_reference_compiler/scanner.py
new file mode 100644
index 0000000..fff8c35
--- /dev/null
+++ b/crowbar_reference_compiler/scanner.py
@@ -0,0 +1,76 @@
+from dataclasses import dataclass
+from typing import Optional, overload, List, Union
+
+import regex as re # type: ignore
+
+
+@dataclass
+class Token:
+ type: str
+ data: Optional[str] = None
+
+ def __repr__(self) -> str:
+ if self.data is not None:
+ return "{}: {}".format(self.type, repr(self.data))
+ else:
+ return repr(self.type)
+
+
+class GenerousTokenList(List[Token]):
+ def __getitem__(self, i):
+ try:
+ return super(GenerousTokenList, self).__getitem__(i)
+ except IndexError:
+ return Token('')
+
+
+KEYWORD = re.compile("bool|break|case|char|const|continue|default|do|double|else|enum|extern|float|for|function|if|include|int|long|return|short|signed|sizeof|struct|switch|typedef|unsigned|void|while")
+IDENTIFIER = re.compile(r"[\p{L}\p{Pc}\p{Cf}\p{Sk}\p{Mn}][\p{L}\p{Pc}\p{Cf}\p{Sk}\p{Mn}\p{N}]*")
+CONSTANT = re.compile(r"""([0-9_]+)|(0[bB][01_]+)|(0[xX][0-9a-fA-F_]+)|([0-9_]+(\.[0-9_]+|[eE][0-9_]+|\.[0-9_]+[eE][0-9_]+))|('([^\'\\]|\\'|\\"|\\\\|\\r|\\n|\\t|\\0|\\x[0-9a-fA-F]{2}|\\u[0-9a-fA-F]{4}|\\U[0-9a-fA-F]{8})')""")
+STRING_LITERAL = re.compile(r'''"([^\\"]|\\'|\\"|\\\\|\\r|\\n|\\t|\\0|\\x[0-9a-fA-F]{2}|\\u[0-9a-fA-F]{4}|\\U[0-9a-fA-F]{8})*"''')
+PUNCTUATOR = re.compile(r"->|\+\+|--|>>|<<|<=|>=|&&|\|\||[=!+\-*/%&|^]=|[\[\](){}.,+\-*/%;!&|^~><=]")
+WHITESPACE = re.compile(r"[\p{Zs}\p{Cc}]+")
+COMMENT = re.compile(r"(//[^\n]*\n)|(/\*.*?\*/)", re.DOTALL)
+
+
+def scan(code):
+ result = []
+ remaining = code
+
+ while len(remaining) > 0:
+ match = COMMENT.match(remaining)
+ if match:
+ remaining = remaining[match.end():]
+ continue
+ match = WHITESPACE.match(remaining)
+ if match:
+ remaining = remaining[match.end():]
+ continue
+ match = KEYWORD.match(remaining)
+ if match:
+ result.append(Token(match.group()))
+ remaining = remaining[match.end():]
+ continue
+ match = IDENTIFIER.match(remaining)
+ if match:
+ result.append(Token('identifier', match.group()))
+ remaining = remaining[match.end():]
+ continue
+ match = CONSTANT.match(remaining)
+ if match:
+ result.append(Token('constant', match.group()))
+ remaining = remaining[match.end():]
+ continue
+ match = STRING_LITERAL.match(remaining)
+ if match:
+ result.append(Token('string_literal', match.group()))
+ remaining = remaining[match.end():]
+ continue
+ match = PUNCTUATOR.match(remaining)
+ if match:
+ result.append(Token(match.group()))
+ remaining = remaining[match.end():]
+ continue
+ raise ValueError("unrecognized code in scanner: {}".format(repr(remaining[:20])))
+
+ return GenerousTokenList(result)