From 2ef831feb8cd1b2393196877b7b7c93ac49093d7 Mon Sep 17 00:00:00 2001 From: Melody Horn Date: Fri, 30 Apr 2021 21:53:31 -0600 Subject: overhaul parsing to match ABNF from spec --- ctec/parse_utils.py | 71 ++++++++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 59 insertions(+), 12 deletions(-) (limited to 'ctec/parse_utils.py') diff --git a/ctec/parse_utils.py b/ctec/parse_utils.py index 3dd5e63..fe3ed06 100644 --- a/ctec/parse_utils.py +++ b/ctec/parse_utils.py @@ -3,21 +3,27 @@ from typing import Callable, List, Optional, Tuple, TypeVar __all__ = [ 'ParseResult', 'Parser', + 'as_predicate', 'alt', 'tag', + 'itag', 'take_till1', + 'take_while0', 'take_while1', 'take_n', 'any_char', 'all_consuming', 'map_parser', + 'and_then', 'opt', 'verify', 'many0', 'many1', 'delimited', 'pair', + 'triple', 'preceded', + 'followed', 'separated_pair', 'separated_triple', 'separated_many0', @@ -31,6 +37,11 @@ T3 = TypeVar('T3') ParseResult = Optional[Tuple[T, str]] Parser = Callable[[str], ParseResult[T]] +def as_predicate(parser: Parser[T]) -> Callable[[str], bool]: + def check(text: str) -> bool: + return parser(text) is not None + return check + def alt(*parsers: Parser[T]) -> Parser[T]: def parse(text: str) -> ParseResult[T]: for parser in parsers[:-1]: @@ -40,13 +51,29 @@ def alt(*parsers: Parser[T]) -> Parser[T]: return parsers[-1](text) return parse -def tag(tag_text: str) -> Parser[None]: - def parse(text: str) -> ParseResult[None]: +def tag(tag_text: str) -> Parser[str]: + def parse(text: str) -> ParseResult[str]: if text.startswith(tag_text): - return None, text[len(tag_text):] + return tag_text, text[len(tag_text):] + return None + return parse + +# case-insensitive tag +def itag(tag_text: str) -> Parser[str]: + def parse(text: str) -> ParseResult[str]: + if text.casefold().startswith(tag_text.casefold()): + return tag_text, text[len(tag_text):] return None return parse +def take_while0(predicate: Callable[[str], bool]) -> Parser[str]: + def parse(text: str) -> ParseResult[str]: + for i in range(len(text)): + if not predicate(text[i]): + return text[:i], text[i:] + return text, "" + return parse + def take_while1(predicate: Callable[[str], bool]) -> Parser[str]: def parse(text: str) -> ParseResult[str]: if len(text) == 0 or not predicate(text[0]): @@ -96,6 +123,15 @@ def map_parser(parser: Parser[T1], mapper: Callable[[T1], T2]) -> Parser[T2]: return mapper(result), extra return parse +def and_then(first_parser: Parser[T1], get_second_parser: Callable[[T1], Parser[T2]]) -> Parser[T2]: + def parse(text: str) -> ParseResult[T2]: + parsed_result = first_parser(text) + if parsed_result is None: + return None + result, _ = parsed_result + return get_second_parser(result)(text) + return parse + def opt(parser: Parser[T]) -> Parser[Optional[T]]: def parse(text: str) -> ParseResult[Optional[T]]: result = parser(text) @@ -194,21 +230,32 @@ def pair(first_parser: Parser[T1], second_parser: Parser[T2]) -> Parser[Tuple[T1 return (first_result, second_result), extra return parse -def preceded(before_parser: Parser[T1], parser: Parser[T]) -> Parser[T]: - def parse(text: str) -> ParseResult[T]: - before_result = before_parser(text) - if before_result is None: +def triple(first_parser: Parser[T1], second_parser: Parser[T2], third_parser: Parser[T3]) -> Parser[Tuple[T1, T2, T3]]: + def parse(text: str) -> ParseResult[Tuple[T1, T2, T3]]: + first_parsed_result = first_parser(text) + if first_parsed_result is None: return None - _, extra = before_result + first_result, extra = first_parsed_result - parsed_result = parser(extra) - if parsed_result is None: + second_parsed_result = second_parser(extra) + if second_parsed_result is None: return None - result, extra = parsed_result + second_result, extra = second_parsed_result - return result, extra + third_parsed_result = third_parser(extra) + if third_parsed_result is None: + return None + third_result, extra = third_parsed_result + + return (first_result, second_result, third_result), extra return parse +def preceded(before_parser: Parser[T1], parser: Parser[T]) -> Parser[T]: + return map_parser(pair(before_parser, parser), lambda x: x[1]) + +def followed(parser: Parser[T], after_parser: Parser[T1]) -> Parser[T]: + return map_parser(pair(parser, after_parser), lambda x: x[0]) + def separated_pair(first_parser: Parser[T1], between_parser: Parser[T], second_parser: Parser[T2]) -> Parser[Tuple[T1, T2]]: def parse(text: str) -> ParseResult[Tuple[T1, T2]]: first_parsed_result = first_parser(text) -- cgit v1.2.3