aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--crowbar_reference_compiler/parser.py301
-rw-r--r--crowbar_reference_compiler/scanner.py60
-rw-r--r--tests/test_hello_world.py2
-rw-r--r--tests/test_parsing.py34
4 files changed, 203 insertions, 194 deletions
diff --git a/crowbar_reference_compiler/parser.py b/crowbar_reference_compiler/parser.py
index 9bb3179..8400480 100644
--- a/crowbar_reference_compiler/parser.py
+++ b/crowbar_reference_compiler/parser.py
@@ -2,169 +2,154 @@ from parsimonious import TokenGrammar, ParseError, IncompleteParseError # type:
grammar = TokenGrammar(
r"""
-HeaderFile = HeaderFileElement+
-HeaderFileElement = IncludeStatement /
- TypeDeclaration /
- FunctionDeclaration
-
-ImplementationFile = ImplementationFileElement+
-ImplementationFileElement = HeaderFileElement /
- FunctionDefinition
-
-IncludeStatement = "include" string_literal ";"
-
-TypeDeclaration = StructDeclaration /
- EnumDeclaration /
- TypedefDeclaration
-StructDeclaration = "struct" identifier "{" VariableDeclaration+ "}" ";"
-EnumDeclaration = "enum" identifier "{" EnumBody "}" ";"
-EnumBody = (identifier ("=" Expression)? "," EnumBody) /
- (identifier ("=" Expression)? ","?)
-TypedefDeclaration = "typedef" identifier "=" Type ";"
-
-FunctionDeclaration = FunctionSignature ";"
-FunctionDefinition = FunctionSignature Block
-FunctionSignature = Type identifier "(" SignatureArguments? ")"
-SignatureArguments = (Type identifier "," SignatureArguments) /
- (Type identifier ","?)
-
-Block = "{" Statement* "}"
+HeaderFile <- IncludeStatement* HeaderFileElement+
+HeaderFileElement <- TypeDefinition / FunctionDeclaration / VariableDefinition / VariableDeclaration
+
+ImplementationFile <- IncludeStatement* ImplementationFileElement+
+ImplementationFileElement <- TypeDefinition / VariableDefinition / FunctionDefinition
+
+IncludeStatement <- 'include' string-literal ';'
+
+TypeDefinition <- StructDefinition / EnumDefinition / UnionDefinition
+StructDefinition <- NormalStructDefinition / OpaqueStructDefinition
+NormalStructDefinition <- 'struct' identifier '{' VariableDeclaration+ '}'
+OpaqueStructDefinition <- 'opaque' 'struct' identifier ';'
+EnumDefinition <- 'enum' identifier '{' EnumMember (',' EnumMember)* ','? '}'
+EnumMember <- identifier ('=' Expression)?
+UnionDefinition <- RobustUnionDefinition / FragileUnionDefinition
+RobustUnionDefinition <- 'union' identifier '{' VariableDeclaration UnionBody '}'
+UnionBody <- 'switch' '(' identifier ')' '{' UnionBodySet+ '}'
+UnionBodySet <- CaseSpecifier+ (VariableDeclaration / ';')
+FragileUnionDefinition <- 'fragile' 'union' identifier '{' VariableDeclaration+ '}'
+
+FunctionDeclaration <- FunctionSignature ';'
+FunctionDefinition <- FunctionSignature Block
+FunctionSignature <- Type identifier '(' SignatureArguments? ')'
+SignatureArguments <- Type identifier (',' Type identifier)* ','?
+
+Block <- '{' Statement* '}'
-Statement = VariableDefinition /
- VariableDeclaration /
- IfStatement /
- SwitchStatement /
- WhileStatement /
- DoWhileStatement /
- ForStatement /
- FlowControlStatement /
- AssignmentStatement /
- ExpressionStatement
-
-VariableDefinition = Type identifier "=" Expression ";"
-VariableDeclaration = Type identifier ";"
-
-IfStatement = ("if" Expression Block "else" Block) /
- ("if" Expression Block)
-
-SwitchStatement = "switch" Expression "{" SwitchCase+ "}"
-SwitchCase = (CaseSpecifier Block) /
- ("default" Block)
-CaseSpecifier = ("case" Expression "," CaseSpecifier) /
- ("case" Expression ","?)
-
-WhileStatement = "while" Expression Block
-DoWhileStatement = "do" Block "while" Expression ";"
-ForStatement = "for" VariableDefinition? ";" Expression ";" AssignmentStatementBody? Block
-
-FlowControlStatement = ("continue" ";") /
- ("break" ";") /
- ("return" Expression? ";")
-
-AssignmentStatement = AssignmentStatementBody ";"
-AssignmentStatementBody = (AssignmentTargetExpression "=" Expression) /
- (AssignmentTargetExpression "+=" Expression) /
- (AssignmentTargetExpression "-=" Expression) /
- (AssignmentTargetExpression "*=" Expression) /
- (AssignmentTargetExpression "/=" Expression) /
- (AssignmentTargetExpression "%=" Expression) /
- (AssignmentTargetExpression "&=" Expression) /
- (AssignmentTargetExpression "^=" Expression) /
- (AssignmentTargetExpression "|=" Expression) /
- (AssignmentTargetExpression "++") /
- (AssignmentTargetExpression "--")
-
-ExpressionStatement = Expression ";"
-
-Type = ("const" BasicType) /
- (BasicType "*") /
- (BasicType "[" Expression "]") /
- (BasicType "function" "(" (BasicType ",")* ")") /
- BasicType
-BasicType = "void" /
- IntegerType /
- ("signed" IntegerType) /
- ("unsigned" IntegerType) /
- "float" /
- "double" /
- "bool" /
- ("struct" identifier) /
- ("enum" identifier) /
- ("typedef" identifier) /
- ("(" Type ")")
-IntegerType = "char" /
- "short" /
- "int" /
- "long"
-
-AssignmentTargetExpression = identifier ATEElementSuffix*
-ATEElementSuffix = ("[" Expression "]") /
- ("." identifier) /
- ("->" identifier)
-
-AtomicExpression = identifier /
- constant /
- string_literal /
- ("(" Expression ")")
-
-ObjectExpression = (AtomicExpression ObjectSuffix*) /
- ArrayLiteralExpression /
- StructLiteralExpression
-ObjectSuffix = ("[" Expression "]") /
- ("(" CommasExpressionList? ")") /
- ("." identifier) /
- ("->" identifier)
-CommasExpressionList = (Expression "," CommasExpressionList) /
- (Expression ","?)
-ArrayLiteralExpression = "{" CommasExpressionList "}"
-StructLiteralExpression = "{" StructLiteralBody "}"
-StructLiteralBody = (StructLiteralElement "," StructLiteralBody?) /
- (StructLiteralElement ","?)
-StructLiteralElement = "." identifier "=" Expression
-
-FactorExpression = ("(" Type ")" FactorExpression) /
- ("&" FactorExpression) /
- ("*" FactorExpression) /
- ("+" FactorExpression) /
- ("-" FactorExpression) /
- ("~" FactorExpression) /
- ("!" FactorExpression) /
- ("sizeof" FactorExpression) /
- ("sizeof" Type) /
- ObjectExpression
-
-TermExpression = FactorExpression TermSuffix*
-TermSuffix = ("*" FactorExpression) /
- ("/" FactorExpression) /
- ("%" FactorExpression)
-
-ArithmeticExpression = TermExpression ArithmeticSuffix*
-ArithmeticSuffix = ("+" TermExpression) /
- ("-" TermExpression)
-
-BitwiseOpExpression = (ArithmeticExpression "<<" ArithmeticExpression) /
- (ArithmeticExpression ">>" ArithmeticExpression) /
- (ArithmeticExpression "^" ArithmeticExpression) /
- (ArithmeticExpression ("&" ArithmeticExpression)+) /
- (ArithmeticExpression ("|" ArithmeticExpression)+) /
- ArithmeticExpression
-
-ComparisonExpression = (BitwiseOpExpression "==" BitwiseOpExpression) /
- (BitwiseOpExpression "!=" BitwiseOpExpression) /
- (BitwiseOpExpression "<=" BitwiseOpExpression) /
- (BitwiseOpExpression ">=" BitwiseOpExpression) /
- (BitwiseOpExpression "<" BitwiseOpExpression) /
- (BitwiseOpExpression ">" BitwiseOpExpression) /
- BitwiseOpExpression
-
-Expression = (ComparisonExpression ("&&" ComparisonExpression)+) /
- (ComparisonExpression ("||" ComparisonExpression)+) /
- ComparisonExpression
+Statement <- VariableDefinition / StructureStatement / FlowControlStatement / AssignmentStatement / FragileStatement / ExpressionStatement / EmptyStatement
+EmptyStatement <- ';'
+FragileStatement <- 'fragile' Statement
+ExpressionStatement <- Expression ';'
+
+VariableDeclaration <- Type identifier ';'
+VariableDefinition <- Type identifier '=' Expression ';'
+
+StructureStatement <- IfStatement / SwitchStatement / WhileStatement / DoWhileStatement / ForStatement
+IfStatement <- 'if' '(' Expression ')' Block ('else' Block)?
+SwitchStatement <- 'switch' '(' Expression ')' '{' (CaseSpecifier / Statement)+ '}'
+CaseSpecifier <- ('case' Expression ':') / ('default' ':')
+WhileStatement <- 'while' '(' Expression ')' Block
+DoWhileStatement <- 'do' Block 'while' '(' Expression ')' ';'
+ForStatement <- 'for' '(' ForInit? ';' Expression ';' ForUpdate? ')' Block
+ForInit <- ForInitializer (',' ForInitializer)* ','?
+ForInitializer <- Type identifier '=' Expression
+ForUpdate <- AssignmentBody (',' AssignmentBody)* ','?
+
+FlowControlStatement <- ContinueStatement / BreakStatement / ReturnStatement
+ContinueStatement <- 'continue' ';'
+BreakStatement <- 'break' ';'
+ReturnStatement <- 'return' Expression? ';'
+
+AssignmentStatement <- AssignmentBody ';'
+AssignmentBody <- DirectAssignmentBody / UpdateAssignmentBody / CrementAssignmentBody
+DirectAssignmentBody <- Expression '=' Expression
+UpdateAssignmentBody <- Expression ('+=' / '-=' / '*=' / '/=' / '%=' / '&=' / '^=' / '|=') Expression
+CrementAssignmentBody <- Expression ('++' / '--')
+
+Type <- ConstType / PointerType / ArrayType / FunctionType / BasicType
+ConstType <- 'const' BasicType
+PointerType <- BasicType '*'
+ArrayType <- BasicType '[' Expression ']'
+FunctionType <- BasicType 'function' '(' FunctionTypeArgs? ')'
+FunctionTypeArgs <- BasicType (',' BasicType)* ','?
+BasicType <- 'void' / 'bool' / 'float32' / 'float64' /
+ 'int8' / 'int16' / 'int32' / 'int64' / 'intaddr' / 'intmax' / 'intsize' /
+ 'uint8' / 'uint16' / 'uint32' / 'uint64' / 'uintaddr' / 'uintmax' / 'uintsize' /
+ ('struct' identifier) / ('enum' identifier) / ('union' identifier) / ('(' Type ')')
+
+
+AtomicExpression <- identifier / constant / 'true' / 'false' / string-literal / ('(' Expression ')')
+
+ObjectExpression <- (AtomicExpression ObjectSuffix*) / ArrayLiteral / StructLiteral
+ObjectSuffix <- ArrayIndexSuffix / FunctionCallSuffix / StructElementSuffix / StructPointerElementSuffix
+
+ArrayIndexSuffix <- '[' Expression ']'
+
+FunctionCallSuffix <- '(' CommasExpressionList? ')'
+CommasExpressionList <- Expression (',' Expression)* ','?
+
+StructElementSuffix <- '.' identifier
+
+StructPointerElementSuffix <- '->' identifier
+
+ArrayLiteral <- '{' CommasExpressionList '}'
+
+StructLiteral <- '{' StructLiteralElement (',' StructLiteralElement)* ','? '}'
+StructLiteralElement <- '.' identifier '=' Expression
+
+FactorExpression <- CastExpression / AddressOfExpression / DerefExpression / PositiveExpression / NegativeExpression / BitwiseNotExpression / LogicalNotExpression / SizeofExpression / ObjectExpression
+
+CastExpression <- '(' Type ')' ObjectExpression
+
+AddressOfExpression <- '&' ObjectExpression
+
+DerefExpression <- '*' ObjectExpression
+
+PositiveExpression <- '+' ObjectExpression
+
+NegativeExpression <- '-' ObjectExpression
+
+BitwiseNotExpression <- '~' ObjectExpression
+
+LogicalNotExpression <- '!' ObjectExpression
+
+SizeofExpression <- ('sizeof' ObjectExpression) / ('sizeof' Type)
+
+TermExpression <- FactorExpression TermSuffix?
+TermSuffix <- ('*' FactorExpression)+ / ('/' FactorExpression)+ / ('%' FactorExpression)+
+
+ArithmeticExpression <- TermExpression ArithmeticSuffix?
+ArithmeticSuffix <- ('+' TermExpression)+ / ('-' TermExpression)+
+
+BitwiseOpExpression <- ShiftExpression / XorExpression / BitwiseAndExpression / BitwiseOrExpression / ArithmeticExpression
+
+ShiftExpression <- (ArithmeticExpression '<<' ArithmeticExpression) / (ArithmeticExpression '>>' ArithmeticExpression)
+
+XorExpression <- ArithmeticExpression '^' ArithmeticExpression
+
+BitwiseAndExpression <- ArithmeticExpression ('&' ArithmeticExpression)+
+
+BitwiseOrExpression <- ArithmeticExpression ('|' ArithmeticExpression)+
+
+ComparisonExpression <- EqualExpression / NotEqualExpression / LessEqExpression / GreaterEqExpression / LessThanExpression / GreaterThanExpression / BitwiseOpExpression
+
+EqualExpression <- BitwiseOpExpression '==' BitwiseOpExpression
+
+NotEqualExpression <- BitwiseOpExpression '!=' BitwiseOpExpression
+
+LessEqExpression <- BitwiseOpExpression '<=' BitwiseOpExpression
+
+GreaterEqExpression <- BitwiseOpExpression '>=' BitwiseOpExpression
+
+LessThanExpression <- BitwiseOpExpression '<' BitwiseOpExpression
+
+GreaterThanExpression <- BitwiseOpExpression '>' BitwiseOpExpression
+
+LogicalOpExpression <- LogicalAndExpression / LogicalOrExpression / ComparisonExpression
+
+LogicalAndExpression <- ComparisonExpression ('&&' ComparisonExpression)+
+
+LogicalOrExpression <- ComparisonExpression ('||' ComparisonExpression)+
+
+Expression <- LogicalOpExpression
identifier = "identifier"
constant = "constant"
string_literal = "string_literal"
-""")
+""".replace(' <- ', ' = ').replace('string-literal', 'string_literal'))
class LegibleParseError(ParseError):
diff --git a/crowbar_reference_compiler/scanner.py b/crowbar_reference_compiler/scanner.py
index ea34536..1d846f3 100644
--- a/crowbar_reference_compiler/scanner.py
+++ b/crowbar_reference_compiler/scanner.py
@@ -1,5 +1,5 @@
from dataclasses import dataclass
-from typing import Optional, overload, List, Union
+from typing import Optional, List
import regex as re # type: ignore
@@ -24,12 +24,33 @@ class GenerousTokenList(List[Token]):
return Token('')
-KEYWORD = re.compile("bool|break|case|char|const|continue|default|do|double|else|enum|extern|float|for|fragile|function|if|include|int|long|return|short|signed|sizeof|struct|switch|typedef|unsigned|void|while")
+KEYWORD = re.compile(r"""
+ bool|break|
+ case|char|const|continue|
+ default|do|
+ else|enum|
+ false|float32|float64|for|fragile|function|
+ if|include|int8|int16|int32|int64|intmax|intsize|
+ opaque|
+ return|
+ sizeof|struct|switch|
+ true|
+ uint8|uint16|uint32|uint64|uintaddr|uintmax|uintsize|union|
+ void|
+ while""", re.VERBOSE)
IDENTIFIER = re.compile(r"[\p{L}\p{Pc}\p{Sk}\p{Mn}][\p{L}\p{Pc}\p{Sk}\p{Mn}\p{N}]*")
-CONSTANT = re.compile(r"""([0-9_]+)|(0[bB][01_]+)|0o[0-7_]+|(0[xX][0-9a-fA-F_]+)|([0-9_]+(\.[0-9_]+|[eE][0-9_]+|\.[0-9_]+[eE][0-9_]+))|('([^\'\\]|\\'|\\"|\\\\|\\r|\\n|\\t|\\0|\\x[0-9a-fA-F]{2}|\\u[0-9a-fA-F]{4}|\\U[0-9a-fA-F]{8})')""")
-STRING_LITERAL = re.compile(r'''"([^\\"]|\\'|\\"|\\\\|\\r|\\n|\\t|\\0|\\x[0-9a-fA-F]{2}|\\u[0-9a-fA-F]{4}|\\U[0-9a-fA-F]{8})*"''')
+DECIMAL_CONSTANT = re.compile(r"[0-9_]+")
+BINARY_CONSTANT = re.compile(r"0[bB][01_]+")
+OCTAL_CONSTANT = re.compile(r"0o[0-7_]+")
+HEX_CONSTANT = re.compile(r"0[xX][0-9a-fA-F]+")
+FLOAT_CONSTANT = re.compile(r"[0-9_]+\.[0-9_]+([eE][+-]?[0-9_]+)?")
+HEX_FLOAT_CONSTANT = re.compile(r"0(fx|FX)[0-9a-fA-F_]+\.[0-9a-fA-F_]+[pP][+-]?[0-9_]+")
+
+_ESCAPE_SEQUENCE = r"""\\['"\\rnt0]|\\x[0-9a-fA-F]{2}|\\u[0-9a-fA-F]{4}|\\U[0-9a-fA-F]{8}"""
+CHAR_CONSTANT = re.compile(r"'([^'\\]|" + _ESCAPE_SEQUENCE + r")'")
+STRING_LITERAL = re.compile(r'"([^"\\]|' + _ESCAPE_SEQUENCE + r')+"')
PUNCTUATOR = re.compile(r"->|\+\+|--|>>|<<|<=|>=|&&|\|\||[=!+\-*/%&|^]=|[\[\](){}.,+\-*/%;!&|^~><=]")
-WHITESPACE = re.compile(r"[\p{Zs}\p{Cc}]+")
+WHITESPACE = re.compile(r"[\p{Z}\p{Cc}]+")
COMMENT = re.compile(r"(//[^\n]*\n)|(/\*.*?\*/)", re.DOTALL)
@@ -46,20 +67,25 @@ def scan(code):
if match:
remaining = remaining[match.end():]
continue
- match = KEYWORD.match(remaining)
- if match:
- result.append(Token(match.group()))
- remaining = remaining[match.end():]
+ kw_match = KEYWORD.match(remaining)
+ id_match = IDENTIFIER.match(remaining)
+ if kw_match and ((not id_match) or len(kw_match.group()) == len(id_match.group())):
+ result.append(Token(kw_match.group()))
+ remaining = remaining[kw_match.end():]
continue
- match = IDENTIFIER.match(remaining)
- if match:
- result.append(Token('identifier', match.group()))
- remaining = remaining[match.end():]
+ if id_match:
+ result.append(Token('identifier', id_match.group()))
+ remaining = remaining[id_match.end():]
continue
- match = CONSTANT.match(remaining)
- if match:
- result.append(Token('constant', match.group()))
- remaining = remaining[match.end():]
+ was_constant = False
+ for constant in [DECIMAL_CONSTANT, BINARY_CONSTANT, OCTAL_CONSTANT, HEX_CONSTANT, FLOAT_CONSTANT, HEX_FLOAT_CONSTANT, CHAR_CONSTANT]:
+ match = constant.match(remaining)
+ if match:
+ result.append(Token('constant', match.group()))
+ remaining = remaining[match.end():]
+ was_constant = True
+ break
+ if was_constant:
continue
match = STRING_LITERAL.match(remaining)
if match:
diff --git a/tests/test_hello_world.py b/tests/test_hello_world.py
index ec1ccdd..594ab4f 100644
--- a/tests/test_hello_world.py
+++ b/tests/test_hello_world.py
@@ -8,7 +8,7 @@ class TestHelloWorld(unittest.TestCase):
code = r"""
include "stdio.hro";
-int main() {
+int32 main() {
printf("Hello, world!\n");
return 0;
}
diff --git a/tests/test_parsing.py b/tests/test_parsing.py
index 91787a6..7463fe7 100644
--- a/tests/test_parsing.py
+++ b/tests/test_parsing.py
@@ -5,41 +5,39 @@ from crowbar_reference_compiler import parse_header, parse_implementation, scan
class TestParsing(unittest.TestCase):
def test_basic(self):
- print(parse_header(scan("int x();")))
+ print(parse_header(scan("int8 x();")))
def test_scdoc_str(self):
# adapted from https://git.sr.ht/~sircmpwn/scdoc/tree/master/include/str.h
print(parse_header(scan(r"""
-include "stdint.h";
-
struct str {
- char *str;
- typedef size_t len;
- typedef size_t size;
-};
+ (uint8[size])* str;
+ uintsize len;
+ uintsize size;
+}
struct str *str_create();
void str_free(struct str *str);
void str_reset(struct str *str);
-int str_append_ch(struct str *str, typedef uint32_t ch);
+intsize str_append_ch(struct str *str, uint32 ch);
""")))
# adapted from https://git.sr.ht/~sircmpwn/scdoc/tree/master/src/string.c
print(parse_implementation(scan(r"""
-include "stdlib.h";
-include "stdint.h";
-include "str.h";
-include "unicode.h";
+include "stdlib.hro";
+include "stdint.hro";
+include "str.hro";
+include "unicode.hro";
-int ensure_capacity(struct str *str, typedef size_t len) {
+bool ensure_capacity(struct str *str, intsize len) {
if (len + 1 >= str->size) {
- char *new = realloc(str->str, str->size * 2);
+ (uint8[str->size * 2])* new = realloc(str->str, str->size * 2);
if (!new) {
- return 0;
+ return false;
}
str->str = new;
str->size *= 2;
}
- return 1;
+ return true;
}
struct str *str_create() {
@@ -59,8 +57,8 @@ void str_free(struct str *str) {
free(str);
}
-int str_append_ch(struct str *str, typedef uint32_t ch) {
- int size = utf8_chsize(ch);
+intsize str_append_ch(struct str *str, uint32 ch) {
+ intsize size = utf8_chsize(ch);
if (size <= 0) {
return -1;
}