From bbf01ada6b2f748618db820624bf768989a11924 Mon Sep 17 00:00:00 2001 From: Melody Horn Date: Fri, 30 Apr 2021 18:52:36 -0600 Subject: fetch folders --- ctec/parse_utils.py | 260 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 260 insertions(+) create mode 100644 ctec/parse_utils.py (limited to 'ctec/parse_utils.py') diff --git a/ctec/parse_utils.py b/ctec/parse_utils.py new file mode 100644 index 0000000..3dd5e63 --- /dev/null +++ b/ctec/parse_utils.py @@ -0,0 +1,260 @@ +from typing import Callable, List, Optional, Tuple, TypeVar + +__all__ = [ + 'ParseResult', + 'Parser', + 'alt', + 'tag', + 'take_till1', + 'take_while1', + 'take_n', + 'any_char', + 'all_consuming', + 'map_parser', + 'opt', + 'verify', + 'many0', + 'many1', + 'delimited', + 'pair', + 'preceded', + 'separated_pair', + 'separated_triple', + 'separated_many0', +] + +T = TypeVar('T') +T1 = TypeVar('T1') +T2 = TypeVar('T2') +T3 = TypeVar('T3') + +ParseResult = Optional[Tuple[T, str]] +Parser = Callable[[str], ParseResult[T]] + +def alt(*parsers: Parser[T]) -> Parser[T]: + def parse(text: str) -> ParseResult[T]: + for parser in parsers[:-1]: + result = parser(text) + if result is not None: + return result + return parsers[-1](text) + return parse + +def tag(tag_text: str) -> Parser[None]: + def parse(text: str) -> ParseResult[None]: + if text.startswith(tag_text): + return None, text[len(tag_text):] + return None + return parse + +def take_while1(predicate: Callable[[str], bool]) -> Parser[str]: + def parse(text: str) -> ParseResult[str]: + if len(text) == 0 or not predicate(text[0]): + return None + for i in range(1, len(text)): + if not predicate(text[i]): + return text[:i], text[i:] + return text, "" + return parse + +def take_till1(predicate: Callable[[str], bool]) -> Parser[str]: + return take_while1(lambda x: not predicate(x)) + +def take_n(n: int) -> Parser[str]: + def parse(text: str) -> ParseResult[str]: + if len(text) < n: + return None + return text[:n], text[n:] + return parse + +def any_char(text: str) -> ParseResult[str]: + if len(text) > 0: + return text[0], text[1:] + return None + +def all_consuming(parser: Parser[T], *, debug=False) -> Parser[T]: + def parse(text: str) -> ParseResult[T]: + parsed_result = parser(text) + if parsed_result is None: + if debug: + print('all_consuming: parser failed') + return None + result, extra = parsed_result + if len(extra) > 0: + if debug: + print('all_consuming: leftover text {}', repr(extra)) + return None + return result, '' + return parse + +def map_parser(parser: Parser[T1], mapper: Callable[[T1], T2]) -> Parser[T2]: + def parse(text: str) -> ParseResult[T2]: + parsed_result = parser(text) + if parsed_result is None: + return None + result, extra = parsed_result + return mapper(result), extra + return parse + +def opt(parser: Parser[T]) -> Parser[Optional[T]]: + def parse(text: str) -> ParseResult[Optional[T]]: + result = parser(text) + if result is None: + return None, text + return result + return parse + +def verify(parser: Parser[T], predicate: Callable[[T], bool]) -> Parser[T]: + def parse(text: str) -> ParseResult[T]: + parsed_result = parser(text) + if parsed_result is None: + return None + result, extra = parsed_result + if predicate(result): + return result, extra + return None + return parse + +def many0(parser: Parser[T]) -> Parser[List[T]]: + def parse(text: str) -> ParseResult[List[T]]: + result = [] + parser_result = parser(text) + while parser_result is not None: + this_result, text = parser_result + result.append(this_result) + parser_result = parser(text) + return result, text + return parse + +def many1(parser: Parser[T]) -> Parser[List[T]]: + def parse(text: str) -> ParseResult[List[T]]: + parser_result = parser(text) + if parser_result is None: + return None + this_result, extra = parser_result + result = [this_result] + + parser_result = parser(extra) + while parser_result is not None: + this_result, extra = parser_result + result.append(this_result) + parser_result = parser(extra) + return result, extra + return parse + +def separated_many0(parser: Parser[T], separator_parser: Parser) -> Parser[List[T]]: + def parse(text: str) -> ParseResult[List[T]]: + result = [] + while True: + parser_result = parser(text) + if parser_result is None: + break + this_result, text = parser_result + result.append(this_result) + + separator_result = separator_parser(text) + if separator_result is None: + break + _, text = separator_result + return result, text + return parse + +def delimited(before_parser: Parser[T1], parser: Parser[T], after_parser: Parser[T2]) -> Parser[T]: + def parse(text: str) -> ParseResult[T]: + before_result = before_parser(text) + if before_result is None: + return None + _, extra = before_result + + parsed_result = parser(extra) + if parsed_result is None: + return None + result, extra = parsed_result + + after_result = after_parser(extra) + if after_result is None: + return None + _, extra = after_result + + return result, extra + return parse + +def pair(first_parser: Parser[T1], second_parser: Parser[T2]) -> Parser[Tuple[T1, T2]]: + def parse(text: str) -> ParseResult[Tuple[T1, T2]]: + first_parsed_result = first_parser(text) + if first_parsed_result is None: + return None + first_result, extra = first_parsed_result + + second_parsed_result = second_parser(extra) + if second_parsed_result is None: + return None + second_result, extra = second_parsed_result + + return (first_result, second_result), extra + return parse + +def preceded(before_parser: Parser[T1], parser: Parser[T]) -> Parser[T]: + def parse(text: str) -> ParseResult[T]: + before_result = before_parser(text) + if before_result is None: + return None + _, extra = before_result + + parsed_result = parser(extra) + if parsed_result is None: + return None + result, extra = parsed_result + + return result, extra + return parse + +def separated_pair(first_parser: Parser[T1], between_parser: Parser[T], second_parser: Parser[T2]) -> Parser[Tuple[T1, T2]]: + def parse(text: str) -> ParseResult[Tuple[T1, T2]]: + first_parsed_result = first_parser(text) + if first_parsed_result is None: + return None + first_result, extra = first_parsed_result + + between_result = between_parser(extra) + if between_result is None: + return None + _, extra = between_result + + second_parsed_result = second_parser(extra) + if second_parsed_result is None: + return None + second_result, extra = second_parsed_result + + return (first_result, second_result), extra + return parse + +def separated_triple(first_parser: Parser[T1], between12_parser: Parser, second_parser: Parser[T2], between23_parser: Parser, third_parser: Parser[T3]) -> Parser[Tuple[T1, T2, T3]]: + def parse(text: str) -> ParseResult[Tuple[T1, T2, T3]]: + first_parsed_result = first_parser(text) + if first_parsed_result is None: + return None + first_result, extra = first_parsed_result + + between_result = between12_parser(extra) + if between_result is None: + return None + _, extra = between_result + + second_parsed_result = second_parser(extra) + if second_parsed_result is None: + return None + second_result, extra = second_parsed_result + + between_result = between23_parser(extra) + if between_result is None: + return None + _, extra = between_result + + third_parsed_result = third_parser(extra) + if third_parsed_result is None: + return None + third_result, extra = third_parsed_result + + return (first_result, second_result, third_result), extra + return parse -- cgit v1.2.3