use std::fmt; use std::str::FromStr; use eyre::WrapErr; use nom::{ branch::alt, bytes::complete::{tag, take_till1}, character::complete::anychar, combinator::{all_consuming, map, opt, verify}, error::{context, convert_error, ContextError, ParseError, VerboseError}, multi::fold_many1, sequence::{delimited, pair, preceded, separated_pair, tuple}, Finish, IResult, }; #[cfg(feature = "full")] use nom::{ character::complete::{space0, space1}, multi::separated_list1, }; use regex::Regex; trait Err<'a>: 'a + ParseError<&'a str> + ContextError<&'a str> {} impl<'a, T: 'a + ParseError<&'a str> + ContextError<&'a str>> Err<'a> for T {} #[allow(clippy::module_name_repetitions)] #[derive(PartialEq, Eq, Clone, Debug)] pub struct TokenString(Vec); impl TokenString { pub const fn empty() -> Self { Self(vec![]) } pub fn text(text: impl Into) -> Self { Self(vec![Token::Text(text.into())]) } pub fn r#macro(name: impl Into) -> Self { Self(vec![Token::MacroExpansion { name: Self::text(name), replacement: None, }]) } pub fn just(token: Token) -> Self { Self(vec![token]) } pub fn tokens(&self) -> impl Iterator { self.0.iter() } pub fn first_token_mut(&mut self) -> &mut Token { &mut self.0[0] } pub fn split_once(&self, delimiter: &str) -> Option<(Self, Self)> { let mut result0 = vec![]; let mut iter = self.0.iter(); while let Some(t) = iter.next() { match t { Token::Text(text) if text.contains(delimiter) => { let split_text = text.splitn(2, delimiter); let pieces = split_text.collect::>(); assert_eq!(pieces.len(), 2, "wrong number of pieces!"); result0.push(Token::Text(pieces[0].into())); let mut result1 = vec![Token::Text(pieces[1].into())]; result1.extend(iter.cloned()); return Some((Self(result0), Self(result1))); } _ => result0.push(t.clone()), } } None } pub fn starts_with(&self, pattern: &str) -> bool { match self.0.first() { Some(Token::Text(t)) => t.starts_with(pattern), _ => false, } } pub fn ends_with(&self, pattern: &str) -> bool { match self.0.last() { Some(Token::Text(t)) => t.ends_with(pattern), _ => false, } } pub fn strip_prefix(&mut self, prefix: &str) { if let Some(Token::Text(t)) = self.0.first_mut() { if let Some(x) = t.strip_prefix(prefix) { *t = x.into() } } } pub fn strip_suffix(&mut self, suffix: &str) { if let Some(Token::Text(t)) = self.0.last_mut() { if let Some(x) = t.strip_suffix(suffix) { *t = x.into() } } } pub fn extend(&mut self, other: Self) { let mut incoming = other.0.into_iter().peekable(); if let Some(Token::Text(text)) = self.0.last_mut() { while let Some(Token::Text(incoming_text)) = incoming.next_if(|x| matches!(x, Token::Text(_))) { text.push_str(&incoming_text); } } self.0.extend(incoming); } pub fn trim_start(&mut self) { if let Some(Token::Text(t)) = self.0.first_mut() { *t = t.trim_start().into(); } } pub fn trim_end(&mut self) { if let Some(Token::Text(t)) = self.0.last_mut() { *t = t.trim_end().into(); } } pub fn is_empty(&self) -> bool { match self.0.first() { None => true, Some(Token::Text(t)) if t.is_empty() && self.0.len() == 1 => true, _ => false, } } pub fn contains_text(&self, pattern: &str) -> bool { self.0.iter().any(|x| { if let Token::Text(x) = x { x.contains(pattern) } else { false } }) } pub fn matches_regex(&self, regex: &Regex) -> bool { self.0.iter().any(|x| { if let Token::Text(x) = x { regex.is_match(x) } else { false } }) } /// Returns (token index within string, pattern index within token). pub fn find(&self, pattern: &str) -> Option<(usize, usize)> { self.0 .iter() .enumerate() .find_map(|(token_index, token)| match token { Token::Text(text) => text .find(pattern) .map(|pattern_index| (token_index, pattern_index)), _ => None, }) } } impl fmt::Display for TokenString { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { for t in &self.0 { write!(f, "{}", t)?; } Ok(()) } } impl PartialEq for TokenString { fn eq(&self, other: &str) -> bool { match self.0.as_slice() { [Token::Text(x)] => x == other, _ => false, } } } #[derive(PartialEq, Eq, Clone, Debug)] pub enum Token { Text(String), MacroExpansion { name: TokenString, replacement: Option<(TokenString, TokenString)>, }, #[cfg(feature = "full")] FunctionCall { name: TokenString, args: Vec, }, } impl fmt::Display for Token { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { match self { Self::Text(t) => write!(f, "{}", t), Self::MacroExpansion { name, replacement: None, } => write!(f, "$({})", name), Self::MacroExpansion { name, replacement: Some((r1, r2)), } => write!(f, "$({}:{}={})", name, r1, r2), #[cfg(feature = "full")] Self::FunctionCall { name, args } => write!( f, "$({} {})", name, args.iter() .map(|x| format!("{}", x)) .collect::>() .join(", ") ), } } } #[derive(Clone, Copy)] enum Delimiter { Parens, Braces, } impl Delimiter { const fn start(&self) -> &'static str { match self { Self::Parens => "(", Self::Braces => "{", } } const fn start_char(&self) -> char { match self { Self::Parens => '(', Self::Braces => '{', } } const fn end(&self) -> &'static str { match self { Self::Parens => ")", Self::Braces => "}", } } const fn end_char(&self) -> char { match self { Delimiter::Parens => ')', Delimiter::Braces => '}', } } } fn macro_function_name<'a, E: Err<'a>>( end: char, context: Delimiter, ) -> impl FnMut(&'a str) -> IResult<&'a str, TokenString, E> + 'a { tokens_but_not(vec![':', '#', '=', ' ', end], context) } fn macro_expansion_body<'a, E: Err<'a>>( end: char, delim: Delimiter, ) -> impl FnMut(&'a str) -> IResult<&'a str, TokenString, E> + 'a { let subst = preceded( tag(":"), separated_pair( tokens_but_not(vec!['='], delim), tag("="), tokens_but_not(vec![end], delim), ), ); context( "macro_expansion_body", map( pair(macro_function_name(end, delim), opt(subst)), |(name, replacement)| TokenString::just(Token::MacroExpansion { name, replacement }), ), ) } #[cfg(feature = "full")] fn function_call_body<'a, E: Err<'a>>( end: char, delim: Delimiter, ) -> impl FnMut(&'a str) -> IResult<&'a str, TokenString, E> { context( "function_call_body", map( separated_pair( macro_function_name(end, delim), space1, separated_list1( pair(tag(","), space0), tokens_but_not(vec![',', end], delim), ), ), |(name, args)| TokenString::just(Token::FunctionCall { name, args }), ), ) } #[cfg(feature = "full")] fn macro_body<'a, E: Err<'a>>( end: char, context: Delimiter, ) -> impl FnMut(&'a str) -> IResult<&'a str, TokenString, E> { alt(( function_call_body(end, context), macro_expansion_body(end, context), )) } #[cfg(not(feature = "full"))] fn macro_body<'a, E: Err<'a>>( end: char, context: Delimiter, ) -> impl FnMut(&'a str) -> IResult<&'a str, TokenString, E> { macro_expansion_body(end, context) } fn parens_macro_expansion<'a, E: Err<'a>>(input: &'a str) -> IResult<&'a str, TokenString, E> { delimited(tag("$("), macro_body(')', Delimiter::Parens), tag(")"))(input) } fn braces_macro_expansion<'a, E: Err<'a>>(input: &'a str) -> IResult<&'a str, TokenString, E> { delimited(tag("${"), macro_body('}', Delimiter::Braces), tag("}"))(input) } fn tiny_macro_expansion<'a, E: Err<'a>>(input: &'a str) -> IResult<&'a str, TokenString, E> { let raw = preceded(tag("$"), verify(anychar, |&c| c != '(' && c != '{')); map(raw, |c| { if c == '$' { TokenString::text("$") } else { TokenString::r#macro(c) } })(input) } fn macro_expansion<'a, E: Err<'a>>(input: &'a str) -> IResult<&'a str, TokenString, E> { context( "macro_expansion", alt(( tiny_macro_expansion, parens_macro_expansion, braces_macro_expansion, )), )(input) } fn text_but_not<'a, E: Err<'a>>( ends: Vec, // TODO don't allocate an entire Vec for this (just using slices creates Lifetime Fuckery) ) -> impl FnMut(&'a str) -> IResult<&'a str, TokenString, E> { map( take_till1(move |c| c == '$' || ends.contains(&c)), TokenString::text, // TODO don't allocate an entire Vec for that ) } fn nested_delimiters<'a, E: Err<'a>>( context: Delimiter, ) -> impl FnMut(&'a str) -> IResult<&'a str, TokenString, E> { map( tuple(( tag(context.start()), move |x| tokens_but_not(vec![context.end_char()], context)(x), tag(context.end()), )), |(left, center, right)| { let mut tokens = TokenString::text(left); tokens.extend(center); tokens.extend(TokenString::text(right)); tokens }, ) } fn single_token_but_not<'a, E: Err<'a>>( ends: Vec, context: Delimiter, ) -> impl FnMut(&'a str) -> IResult<&'a str, TokenString, E> { let mut tbn_ends = ends.clone(); tbn_ends.push(context.start_char()); alt(( text_but_not(tbn_ends), macro_expansion, nested_delimiters(context), )) } fn single_token<'a, E: Err<'a>>(input: &'a str) -> IResult<&'a str, TokenString, E> { alt((text_but_not(vec![]), macro_expansion))(input) } fn empty_tokens<'a, E: Err<'a>>(input: &'a str) -> IResult<&'a str, TokenString, E> { context( "empty_tokens", map(tag(""), |_| TokenString(vec![Token::Text(String::new())])), )(input) } fn fold_tokens<'a, E: Err<'a>>( parser: impl FnMut(&'a str) -> IResult<&'a str, TokenString, E>, ) -> impl FnMut(&'a str) -> IResult<&'a str, TokenString, E> { fold_many1(parser, TokenString::empty, |mut acc, x| { acc.extend(x); acc }) } fn tokens_but_not<'a, E: Err<'a>>( ends: Vec, context: Delimiter, ) -> impl FnMut(&'a str) -> IResult<&'a str, TokenString, E> { alt(( fold_tokens(single_token_but_not(ends, context)), empty_tokens, )) } fn tokens<'a, E: Err<'a>>(input: &'a str) -> IResult<&'a str, TokenString, E> { alt((fold_tokens(single_token), empty_tokens))(input) } fn full_text_tokens<'a, E: Err<'a>>(input: &'a str) -> IResult<&'a str, TokenString, E> { all_consuming(tokens)(input) } pub fn tokenize(input: &str) -> eyre::Result { let (_, result) = full_text_tokens(input) .finish() .map_err(|err: VerboseError<&str>| eyre::eyre!(convert_error(input, err))) .with_context(|| format!("couldn't parse {:?}", input))?; Ok(result) } impl FromStr for TokenString { type Err = eyre::Error; fn from_str(s: &str) -> Result { tokenize(s) } } #[cfg(test)] mod test { use super::*; type R = eyre::Result<()>; impl From> for TokenString { fn from(x: Vec) -> Self { Self(x) } } fn token_text(text: impl Into) -> Token { Token::Text(text.into()) } fn token_macro_expansion(name: impl Into) -> Token { Token::MacroExpansion { name: TokenString::text(name), replacement: None, } } fn token_macro_expansion_replacement( name: impl Into, subst1: impl Into, subst2: impl Into, ) -> Token { Token::MacroExpansion { name: TokenString::text(name), replacement: Some((subst1.into(), subst2.into())), } } #[cfg(feature = "full")] fn token_function_call(name: impl Into, args: Vec>) -> Token { Token::FunctionCall { name: TokenString::text(name), args: args.into_iter().map(|x| x.into()).collect(), } } #[test] fn no_macros() -> R { let text = "This is an example sentence! There aren't macros in it at all!"; let tokens = tokenize(text)?; assert_eq!(tokens, TokenString(vec![token_text(text)])); Ok(()) } #[test] fn no_replacement() -> R { let text = "This is a $Q sentence! There are $(BORING) macros in it at ${YEET}!"; let tokens = tokenize(text)?; assert_eq!( tokens, TokenString(vec![ token_text("This is a "), token_macro_expansion("Q"), token_text(" sentence! There are "), token_macro_expansion("BORING"), token_text(" macros in it at "), token_macro_expansion("YEET"), token_text("!"), ]) ); Ok(()) } #[test] fn escaped() -> R { let text = "This costs $$2 to run, which isn't ideal"; let tokens = tokenize(text)?; assert_eq!( tokens, TokenString(vec![token_text("This costs $2 to run, which isn't ideal"),]) ); Ok(()) } #[test] fn replacement() -> R { let text = "Can I get a $(DATA:.c=.oof) in this ${SWAG:.yolo=}"; let tokens = tokenize(text)?; assert_eq!( tokens, TokenString(vec![ token_text("Can I get a "), token_macro_expansion_replacement( "DATA", vec![token_text(".c")], vec![token_text(".oof")] ), token_text(" in this "), token_macro_expansion_replacement( "SWAG", vec![token_text(".yolo")], vec![token_text("")] ), ]) ); Ok(()) } #[test] fn hell() -> R { let text = "$(OOF:${ouch:hi=hey} there=$(owie:$(my)=${bones})), bro."; let tokens = tokenize(text)?; assert_eq!( tokens, TokenString(vec![ token_macro_expansion_replacement( "OOF", vec![ token_macro_expansion_replacement( "ouch", vec![token_text("hi")], vec![token_text("hey")] ), token_text(" there"), ], vec![token_macro_expansion_replacement( "owie", vec![token_macro_expansion("my")], vec![token_macro_expansion("bones")], ),], ), token_text(", bro."), ]) ); Ok(()) } #[cfg(feature = "full")] #[test] fn function_hell() -> R { let text = "$(foo bar, $(baz))"; let tokens = tokenize(text)?; assert_eq!( tokens, TokenString(vec![token_function_call( "foo", vec![TokenString::text("bar"), tokenize("$(baz)")?] )]) ); Ok(()) } #[test] fn triple_mega_deluxe_super_hell() -> R { let text = "$($($(a)b)c)"; let tokens = tokenize(text)?; assert_eq!( tokens, TokenString(vec![Token::MacroExpansion { name: TokenString(vec![ Token::MacroExpansion { name: TokenString(vec![ Token::MacroExpansion { name: TokenString::text("a"), replacement: None, }, token_text("b"), ]), replacement: None, }, token_text("c") ]), replacement: None, }]), ); Ok(()) } #[cfg(feature = "full")] #[test] fn i_will_attack_and_destroy_god() -> R { let text = "$(shell echo (hi) (bro) yeet)"; let tokens = tokenize(text)?; assert_eq!( tokens, TokenString(vec![Token::FunctionCall { name: TokenString::text("shell"), args: vec![TokenString::text("echo (hi) (bro) yeet")], }]) ); Ok(()) } #[cfg(feature = "full")] #[test] fn quoted_function_call_comma() -> R { let text = "$(egg $$(bug a, b/c))"; let tokens = tokenize(text)?; assert_eq!( tokens, TokenString::just(Token::FunctionCall { name: TokenString::text("egg"), args: vec![TokenString::text("$(bug a, b/c)")], }) ); Ok(()) } #[cfg(feature = "full")] #[test] fn unbalanced_parentheses_rejected() -> R { let text = "$(egg ()"; assert!(tokenize(text).is_err()); Ok(()) } }