from dataclasses import dataclass from typing import Iterable, Iterator, List, Optional, Tuple from .parse_util import * __all__ = [ 'TokenString', 'Token', 'MacroToken', 'TextToken', 'tokenize', ] class TokenString(Iterable['Token']): def __init__(self, my_tokens: List['Token'] = None): if my_tokens is None: my_tokens = [] self._tokens = my_tokens def __eq__(self, other) -> bool: return isinstance(other, TokenString) and self._tokens == other._tokens def __iter__(self) -> Iterator['Token']: return iter(self._tokens) def split_once(self, delimiter: str) -> Optional[Tuple['TokenString', 'TokenString']]: result0 = [] self_iter = iter(self._tokens) for t in self_iter: if isinstance(t, TextToken) and delimiter in t.text: before, after = t.text.split(delimiter, 1) result0.append(TextToken(before)) result1 = [TextToken(after), *self_iter] return TokenString(result0), TokenString(result1) result0.append(t) return None def lstrip(self): first_token = self._tokens[0] if isinstance(first_token, TextToken): first_token.text = first_token.text.lstrip() self._tokens[0] = first_token def rstrip(self): last_token = self._tokens[-1] if isinstance(last_token, TextToken): last_token.text = last_token.text.rstrip() self._tokens[-1] = last_token @dataclass() class Token: pass @dataclass() class TextToken(Token): text: str @dataclass() class MacroToken(Token): name: str replacement: Optional[Tuple[TokenString, TokenString]] = None macro_name = take_while1(lambda c: c.isalnum() or c in ['.', '_']) def macro_expansion_body(end: str) -> Parser[MacroToken]: subst = preceded(tag(":"), separated_pair(tokens('='), '=', tokens(end))) return map_parser(pair(macro_name, opt(subst)), MacroToken) parens_macro_expansion = delimited(tag('$('), macro_expansion_body(')'), tag(')')) braces_macro_expansion = delimited(tag('${'), macro_expansion_body('}'), tag('}')) def build_tiny_expansion(name_probably: str) -> Token: if name_probably == '$': return TextToken('$') else: return MacroToken(name_probably) tiny_macro_expansion = map_parser(preceded(tag('$'), verify(any_char, lambda c: c not in ['(', '{'])), build_tiny_expansion) macro_expansion = alt(tiny_macro_expansion, parens_macro_expansion, braces_macro_expansion) just_text = map_parser(take_till1(lambda c: c == '$'), TextToken) def text_until(end: str) -> Parser[TextToken]: return map_parser(take_till1(lambda c: c in ['$', end]), TextToken) def single_token(until: Optional[str] = None) -> Parser[Token]: if until is None: text = just_text else: text = text_until(until) return alt(text, macro_expansion) empty_tokens = map_parser(tag(''), lambda _: TokenString([TextToken('')])) def tokens(until: Optional[str] = None) -> Parser[TokenString]: return alt(map_parser(many1(single_token(until)), TokenString), empty_tokens) full_text_tokens = all_consuming(tokens()) def tokenize(text: str) -> TokenString: result, _ = full_text_tokens(text) return result # TODO handle errors # TODO test any of this