1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
|
from dataclasses import dataclass
from typing import Iterable, Iterator, List, Optional, Tuple
from .parse_util import *
__all__ = [
'TokenString',
'Token',
'MacroToken',
'TextToken',
'tokenize',
]
class TokenString(Iterable['Token']):
def __init__(self, my_tokens: List['Token'] = None):
if my_tokens is None:
my_tokens = []
self._tokens = my_tokens
def __eq__(self, other) -> bool:
return isinstance(other, TokenString) and self._tokens == other._tokens
def __iter__(self) -> Iterator['Token']:
return iter(self._tokens)
def split_once(self, delimiter: str) -> Optional[Tuple['TokenString', 'TokenString']]:
result0 = []
self_iter = iter(self._tokens)
for t in self_iter:
if isinstance(t, TextToken) and delimiter in t.text:
before, after = t.text.split(delimiter, 1)
result0.append(TextToken(before))
result1 = [TextToken(after), *self_iter]
return TokenString(result0), TokenString(result1)
result0.append(t)
return None
def lstrip(self):
first_token = self._tokens[0]
if isinstance(first_token, TextToken):
first_token.text = first_token.text.lstrip()
self._tokens[0] = first_token
def rstrip(self):
last_token = self._tokens[-1]
if isinstance(last_token, TextToken):
last_token.text = last_token.text.rstrip()
self._tokens[-1] = last_token
@dataclass()
class Token:
pass
@dataclass()
class TextToken(Token):
text: str
@dataclass()
class MacroToken(Token):
name: str
replacement: Optional[Tuple[TokenString, TokenString]] = None
macro_name = take_while1(lambda c: c.isalnum() or c in ['.', '_'])
def macro_expansion_body(end: str) -> Parser[MacroToken]:
subst = preceded(tag(":"), separated_pair(tokens('='), '=', tokens(end)))
return map_parser(pair(macro_name, opt(subst)), MacroToken)
parens_macro_expansion = delimited(tag('$('), macro_expansion_body(')'), tag(')'))
braces_macro_expansion = delimited(tag('${'), macro_expansion_body('}'), tag('}'))
def build_tiny_expansion(name_probably: str) -> Token:
if name_probably == '$':
return TextToken('$')
else:
return MacroToken(name_probably)
tiny_macro_expansion = map_parser(preceded(tag('$'), verify(any_char, lambda c: c not in ['(', '{'])), build_tiny_expansion)
macro_expansion = alt(tiny_macro_expansion, parens_macro_expansion, braces_macro_expansion)
just_text = map_parser(take_till1(lambda c: c == '$'), TextToken)
def text_until(end: str) -> Parser[TextToken]:
return map_parser(take_till1(lambda c: c in ['$', end]), TextToken)
def single_token(until: Optional[str] = None) -> Parser[Token]:
if until is None:
text = just_text
else:
text = text_until(until)
return alt(text, macro_expansion)
empty_tokens = map_parser(tag(''), lambda _: TokenString([TextToken('')]))
def tokens(until: Optional[str] = None) -> Parser[TokenString]:
return alt(map_parser(many1(single_token(until)), TokenString), empty_tokens)
full_text_tokens = all_consuming(tokens())
def tokenize(text: str) -> TokenString:
result, _ = full_text_tokens(text)
return result
# TODO handle errors
# TODO test any of this
|