From 2180a5802310032fa048643da03825c82a91ea32 Mon Sep 17 00:00:00 2001 From: Melody Horn Date: Sat, 1 May 2021 04:47:02 -0600 Subject: avoid unicode fuckery wherever possible --- ctec/parse_utils.py | 80 +++++++++++++++++++++++++++-------------------------- 1 file changed, 41 insertions(+), 39 deletions(-) (limited to 'ctec/parse_utils.py') diff --git a/ctec/parse_utils.py b/ctec/parse_utils.py index 890d36f..7ba2ccf 100644 --- a/ctec/parse_utils.py +++ b/ctec/parse_utils.py @@ -37,16 +37,16 @@ T1 = TypeVar('T1') T2 = TypeVar('T2') T3 = TypeVar('T3') -ParseResult = Optional[Tuple[T, str]] -Parser = Callable[[str], ParseResult[T]] +ParseResult = Optional[Tuple[T, bytes]] +Parser = Callable[[bytes], ParseResult[T]] -def as_predicate(parser: Parser[T]) -> Callable[[str], bool]: - def check(text: str) -> bool: - return parser(text) is not None +def as_predicate(parser: Parser[T]) -> Callable[[int], bool]: + def check(text: int) -> bool: + return parser(bytes([text])) is not None return check def alt(*parsers: Parser[T]) -> Parser[T]: - def parse(text: str) -> ParseResult[T]: + def parse(text: bytes) -> ParseResult[T]: for parser in parsers[:-1]: result = parser(text) if result is not None: @@ -54,56 +54,58 @@ def alt(*parsers: Parser[T]) -> Parser[T]: return parsers[-1](text) return parse -def tag(tag_text: str) -> Parser[str]: - def parse(text: str) -> ParseResult[str]: +def tag(tag_text: bytes) -> Parser[bytes]: + def parse(text: bytes) -> ParseResult[bytes]: if text.startswith(tag_text): return tag_text, text[len(tag_text):] return None return parse # case-insensitive tag -def itag(tag_text: str) -> Parser[str]: - def parse(text: str) -> ParseResult[str]: - if text.casefold().startswith(tag_text.casefold()): +def itag(tag_text: bytes) -> Parser[bytes]: + def parse(text: bytes) -> ParseResult[bytes]: + tag_str = tag_text.decode() + text_str = text.decode() + if text_str.casefold().startswith(tag_str.casefold()): return tag_text, text[len(tag_text):] return None return parse -def take_while0(predicate: Callable[[str], bool]) -> Parser[str]: - def parse(text: str) -> ParseResult[str]: +def take_while0(predicate: Callable[[int], bool]) -> Parser[bytes]: + def parse(text: bytes) -> ParseResult[bytes]: for i in range(len(text)): if not predicate(text[i]): return text[:i], text[i:] - return text, "" + return text, b"" return parse -def take_while1(predicate: Callable[[str], bool]) -> Parser[str]: - def parse(text: str) -> ParseResult[str]: +def take_while1(predicate: Callable[[int], bool]) -> Parser[bytes]: + def parse(text: bytes) -> ParseResult[bytes]: if len(text) == 0 or not predicate(text[0]): return None for i in range(1, len(text)): if not predicate(text[i]): return text[:i], text[i:] - return text, "" + return text, b"" return parse -def take_till1(predicate: Callable[[str], bool]) -> Parser[str]: +def take_till1(predicate: Callable[[int], bool]) -> Parser[bytes]: return take_while1(lambda x: not predicate(x)) -def take_n(n: int) -> Parser[str]: - def parse(text: str) -> ParseResult[str]: +def take_n(n: int) -> Parser[bytes]: + def parse(text: bytes) -> ParseResult[bytes]: if len(text) < n: return None return text[:n], text[n:] return parse -def any_char(text: str) -> ParseResult[str]: +def any_char(text: bytes) -> ParseResult[bytes]: if len(text) > 0: - return text[0], text[1:] + return text[0:1], text[1:] return None def all_consuming(parser: Parser[T], *, debug=False) -> Parser[T]: - def parse(text: str) -> ParseResult[T]: + def parse(text: bytes) -> ParseResult[T]: parsed_result = parser(text) if parsed_result is None: if debug: @@ -114,11 +116,11 @@ def all_consuming(parser: Parser[T], *, debug=False) -> Parser[T]: if debug: print('all_consuming: leftover text {}', repr(extra)) return None - return result, '' + return result, b'' return parse def map_parser(parser: Parser[T1], mapper: Callable[[T1], T2]) -> Parser[T2]: - def parse(text: str) -> ParseResult[T2]: + def parse(text: bytes) -> ParseResult[T2]: parsed_result = parser(text) if parsed_result is None: return None @@ -127,7 +129,7 @@ def map_parser(parser: Parser[T1], mapper: Callable[[T1], T2]) -> Parser[T2]: return parse def and_then(first_parser: Parser[T1], get_second_parser: Callable[[T1], Parser[T2]]) -> Parser[T2]: - def parse(text: str) -> ParseResult[T2]: + def parse(text: bytes) -> ParseResult[T2]: parsed_result = first_parser(text) if parsed_result is None: return None @@ -136,7 +138,7 @@ def and_then(first_parser: Parser[T1], get_second_parser: Callable[[T1], Parser[ return parse def opt(parser: Parser[T]) -> Parser[Optional[T]]: - def parse(text: str) -> ParseResult[Optional[T]]: + def parse(text: bytes) -> ParseResult[Optional[T]]: result = parser(text) if result is None: return None, text @@ -144,7 +146,7 @@ def opt(parser: Parser[T]) -> Parser[Optional[T]]: return parse def verify(parser: Parser[T], predicate: Callable[[T], bool]) -> Parser[T]: - def parse(text: str) -> ParseResult[T]: + def parse(text: bytes) -> ParseResult[T]: parsed_result = parser(text) if parsed_result is None: return None @@ -155,7 +157,7 @@ def verify(parser: Parser[T], predicate: Callable[[T], bool]) -> Parser[T]: return parse def many0(parser: Parser[T]) -> Parser[List[T]]: - def parse(text: str) -> ParseResult[List[T]]: + def parse(text: bytes) -> ParseResult[List[T]]: result = [] parser_result = parser(text) while parser_result is not None: @@ -166,7 +168,7 @@ def many0(parser: Parser[T]) -> Parser[List[T]]: return parse def many1(parser: Parser[T]) -> Parser[List[T]]: - def parse(text: str) -> ParseResult[List[T]]: + def parse(text: bytes) -> ParseResult[List[T]]: parser_result = parser(text) if parser_result is None: return None @@ -182,7 +184,7 @@ def many1(parser: Parser[T]) -> Parser[List[T]]: return parse def many_m_n(parser: Parser[T], min_inclusive: int, max_inclusive: int) -> Parser[List[T]]: - def parse(text: str) -> ParseResult[List[T]]: + def parse(text: bytes) -> ParseResult[List[T]]: result: List[T] = [] while len(result) < min_inclusive: parser_result = parser(text) @@ -201,7 +203,7 @@ def many_m_n(parser: Parser[T], min_inclusive: int, max_inclusive: int) -> Parse return parse def separated_many0(parser: Parser[T], separator_parser: Parser) -> Parser[List[T]]: - def parse(text: str) -> ParseResult[List[T]]: + def parse(text: bytes) -> ParseResult[List[T]]: result = [] while True: parser_result = parser(text) @@ -221,7 +223,7 @@ def separated_many1(parser: Parser[T], separator_parser: Parser) -> Parser[List[ return verify(separated_many0(parser, separator_parser), lambda result: len(result) > 0) def delimited(before_parser: Parser[T1], parser: Parser[T], after_parser: Parser[T2]) -> Parser[T]: - def parse(text: str) -> ParseResult[T]: + def parse(text: bytes) -> ParseResult[T]: before_result = before_parser(text) if before_result is None: return None @@ -241,7 +243,7 @@ def delimited(before_parser: Parser[T1], parser: Parser[T], after_parser: Parser return parse def pair(first_parser: Parser[T1], second_parser: Parser[T2]) -> Parser[Tuple[T1, T2]]: - def parse(text: str) -> ParseResult[Tuple[T1, T2]]: + def parse(text: bytes) -> ParseResult[Tuple[T1, T2]]: first_parsed_result = first_parser(text) if first_parsed_result is None: return None @@ -256,7 +258,7 @@ def pair(first_parser: Parser[T1], second_parser: Parser[T2]) -> Parser[Tuple[T1 return parse def triple(first_parser: Parser[T1], second_parser: Parser[T2], third_parser: Parser[T3]) -> Parser[Tuple[T1, T2, T3]]: - def parse(text: str) -> ParseResult[Tuple[T1, T2, T3]]: + def parse(text: bytes) -> ParseResult[Tuple[T1, T2, T3]]: first_parsed_result = first_parser(text) if first_parsed_result is None: return None @@ -286,7 +288,7 @@ def followed(parser: Parser[T], after_parser: Parser[T1]) -> Parser[T]: return map_parser(pair(parser, after_parser), first) def separated_pair(first_parser: Parser[T1], between_parser: Parser[T], second_parser: Parser[T2]) -> Parser[Tuple[T1, T2]]: - def parse(text: str) -> ParseResult[Tuple[T1, T2]]: + def parse(text: bytes) -> ParseResult[Tuple[T1, T2]]: first_parsed_result = first_parser(text) if first_parsed_result is None: return None @@ -306,7 +308,7 @@ def separated_pair(first_parser: Parser[T1], between_parser: Parser[T], second_p return parse def separated_triple(first_parser: Parser[T1], between12_parser: Parser, second_parser: Parser[T2], between23_parser: Parser, third_parser: Parser[T3]) -> Parser[Tuple[T1, T2, T3]]: - def parse(text: str) -> ParseResult[Tuple[T1, T2, T3]]: + def parse(text: bytes) -> ParseResult[Tuple[T1, T2, T3]]: first_parsed_result = first_parser(text) if first_parsed_result is None: return None @@ -335,5 +337,5 @@ def separated_triple(first_parser: Parser[T1], between12_parser: Parser, second_ return (first_result, second_result, third_result), extra return parse -def string_concat(parser: Parser[Sequence[str]]) -> Parser[str]: - return map_parser(parser, ''.join) +def string_concat(parser: Parser[Sequence[bytes]]) -> Parser[bytes]: + return map_parser(parser, b''.join) -- cgit v1.2.3