From db61eecb92347f963edf9283746cdde4d88fb67b Mon Sep 17 00:00:00 2001 From: Melody Horn Date: Mon, 5 Apr 2021 17:39:33 -0600 Subject: hit a weird compiler edge case i think --- src/makefile/token.rs | 138 +++++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 114 insertions(+), 24 deletions(-) diff --git a/src/makefile/token.rs b/src/makefile/token.rs index b880700..a79598a 100644 --- a/src/makefile/token.rs +++ b/src/makefile/token.rs @@ -8,7 +8,7 @@ use nom::{ character::complete::anychar, combinator::{all_consuming, map, opt, verify}, error::{context, convert_error, ContextError, ParseError, VerboseError}, - multi::many1, + multi::fold_many1, sequence::{delimited, pair, preceded, separated_pair}, Finish, IResult, }; @@ -42,6 +42,10 @@ impl TokenString { }]) } + pub fn just(token: Token) -> Self { + Self(vec![token]) + } + pub fn tokens(&self) -> impl Iterator { self.0.iter() } @@ -174,27 +178,51 @@ impl fmt::Display for Token { } } +#[derive(Clone, Copy)] +enum Delimiter { + Parens, + Braces, +} + +impl Delimiter { + fn start(&self) -> &'static str { + match self { + Self::Parens => "(", + Self::Braces => "{", + } + } + + fn end(&self) -> &'static str { + match self { + Self::Parens => ")", + Self::Braces => "}", + } + } +} + fn macro_function_name<'a, E: Err<'a>>( end: char, + context: Delimiter, ) -> impl FnMut(&'a str) -> IResult<&'a str, TokenString, E> + 'a { - tokens_but_not(vec![':', '#', '=', ' ', end]) + tokens_but_not(vec![':', '#', '=', ' ', end], context) } fn macro_expansion_body<'a, E: Err<'a>>( end: char, + delim: Delimiter, ) -> impl FnMut(&'a str) -> IResult<&'a str, Token, E> + 'a { let subst = preceded( tag(":"), separated_pair( - tokens_but_not(vec!['=']), + tokens_but_not(vec!['='], delim), tag("="), - tokens_but_not(vec![end]), + tokens_but_not(vec![end], delim), ), ); context( "macro_expansion_body", map( - pair(macro_function_name(end), opt(subst)), + pair(macro_function_name(end, delim), opt(subst)), |(name, replacement)| Token::MacroExpansion { name, replacement }, ), ) @@ -203,14 +231,18 @@ fn macro_expansion_body<'a, E: Err<'a>>( #[cfg(feature = "full")] fn function_call_body<'a, E: Err<'a>>( end: char, + delim: Delimiter, ) -> impl FnMut(&'a str) -> IResult<&'a str, Token, E> { context( "function_call_body", map( separated_pair( - macro_function_name(end), + macro_function_name(end, delim), space1, - separated_list1(pair(tag(","), space0), tokens_but_not(vec![',', end])), + separated_list1( + pair(tag(","), space0), + tokens_but_not(vec![',', end], delim), + ), ), |(name, args)| Token::FunctionCall { name, args }, ), @@ -218,8 +250,14 @@ fn function_call_body<'a, E: Err<'a>>( } #[cfg(feature = "full")] -fn macro_body<'a, E: Err<'a>>(end: char) -> impl FnMut(&'a str) -> IResult<&'a str, Token, E> { - alt((function_call_body(end), macro_expansion_body(end))) +fn macro_body<'a, E: Err<'a>>( + end: char, + context: Delimiter, +) -> impl FnMut(&'a str) -> IResult<&'a str, Token, E> { + alt(( + function_call_body(end, context), + macro_expansion_body(end, context), + )) } #[cfg(not(feature = "full"))] @@ -228,11 +266,11 @@ fn macro_body<'a, E: Err<'a>>(end: char) -> impl FnMut(&'a str) -> IResult<&'a s } fn parens_macro_expansion<'a, E: Err<'a>>(input: &'a str) -> IResult<&'a str, Token, E> { - delimited(tag("$("), macro_body(')'), tag(")"))(input) + delimited(tag("$("), macro_body(')', Delimiter::Parens), tag(")"))(input) } fn braces_macro_expansion<'a, E: Err<'a>>(input: &'a str) -> IResult<&'a str, Token, E> { - delimited(tag("${"), macro_body('}'), tag("}"))(input) + delimited(tag("${"), macro_body('}', Delimiter::Parens), tag("}"))(input) } fn tiny_macro_expansion<'a, E: Err<'a>>(input: &'a str) -> IResult<&'a str, Token, E> { @@ -249,30 +287,53 @@ fn tiny_macro_expansion<'a, E: Err<'a>>(input: &'a str) -> IResult<&'a str, Toke })(input) } -fn macro_expansion<'a, E: Err<'a>>(input: &'a str) -> IResult<&'a str, Token, E> { +fn macro_expansion<'a, E: Err<'a>>(input: &'a str) -> IResult<&'a str, TokenString, E> { context( "macro_expansion", - alt(( - tiny_macro_expansion, - parens_macro_expansion, - braces_macro_expansion, - )), + map( + alt(( + tiny_macro_expansion, + parens_macro_expansion, + braces_macro_expansion, + )), + TokenString::just, + ), )(input) } fn text_but_not<'a, E: Err<'a>>( - ends: Vec, -) -> impl FnMut(&'a str) -> IResult<&'a str, Token, E> { + ends: Vec, // TODO don't allocate an entire Vec for this (just using slices creates Lifetime Fuckery) +) -> impl FnMut(&'a str) -> IResult<&'a str, TokenString, E> { map( take_till1(move |c| c == '$' || ends.contains(&c)), - |x: &str| Token::Text(x.into()), + |x: &str| TokenString::text(x), // TODO don't allocate an entire Vec for that + ) +} + +fn nested_delimiters<'a, E: Err<'a>>( + ends: Vec, + context: Delimiter, +) -> impl FnMut(&'a str) -> IResult<&'a str, TokenString, E> { + delimited( + tag(context.start()), + tokens_but_not(ends, context), + tag(context.end()), ) } fn single_token_but_not<'a, E: Err<'a>>( ends: Vec, -) -> impl FnMut(&'a str) -> IResult<&'a str, Token, E> { - alt((text_but_not(ends), macro_expansion)) + context: Delimiter, +) -> impl FnMut(&'a str) -> IResult<&'a str, TokenString, E> { + alt(( + text_but_not(ends.clone()), + macro_expansion, + nested_delimiters(ends, context), + )) +} + +fn single_token<'a, E: Err<'a>>(input: &'a str) -> IResult<&'a str, TokenString, E> { + alt((text_but_not(vec![]), macro_expansion))(input) } fn empty_tokens<'a, E: Err<'a>>(input: &'a str) -> IResult<&'a str, TokenString, E> { @@ -282,17 +343,31 @@ fn empty_tokens<'a, E: Err<'a>>(input: &'a str) -> IResult<&'a str, TokenString, )(input) } +fn fold_tokens<'a, E: Err<'a>>( + parser: impl FnMut(&'a str) -> IResult<&'a str, TokenString, E>, +) -> impl FnMut(&'a str) -> IResult<&'a str, TokenString, E> { + fold_many1(parser, TokenString::empty(), |mut acc, x| { + acc.extend(x); + acc + }) +} + fn tokens_but_not<'a, E: Err<'a>>( ends: Vec, + context: Delimiter, ) -> impl FnMut(&'a str) -> IResult<&'a str, TokenString, E> { alt(( - map(many1(single_token_but_not(ends)), TokenString), + fold_tokens(single_token_but_not(ends, context)), empty_tokens, )) } +fn tokens<'a, E: Err<'a>>(input: &'a str) -> IResult<&'a str, TokenString, E> { + alt((fold_tokens(single_token), empty_tokens))(input) +} + fn full_text_tokens<'a, E: Err<'a>>(input: &'a str) -> IResult<&'a str, TokenString, E> { - all_consuming(tokens_but_not(vec![]))(input) + all_consuming(tokens)(input) } pub fn tokenize(input: &str) -> eyre::Result { @@ -488,4 +563,19 @@ mod test { ); Ok(()) } + + #[cfg(feature = "full")] + #[test] + fn i_will_attack_and_destroy_god() -> R { + let text = "$(shell echo (hi) (bro) yeet)"; + let tokens = tokenize(text)?; + assert_eq!( + tokens, + TokenString(vec![Token::FunctionCall { + name: TokenString::text("shell"), + args: vec![TokenString::text("echo (hi) (bro) (yeet)")], + }]) + ); + Ok(()) + } } -- cgit v1.2.3