use std::fmt; use std::str::FromStr; use eyre::WrapErr; use nom::{ branch::alt, bytes::complete::{tag, take_till1, take_while1}, character::complete::anychar, combinator::{all_consuming, map, opt, verify}, error::{context, convert_error, ContextError, ParseError, VerboseError}, multi::many1, sequence::{delimited, pair, preceded, separated_pair}, Finish, IResult, }; #[cfg(feature = "full")] use nom::{character::complete::space1, multi::separated_list1}; trait Err<'a>: 'a + ParseError<&'a str> + ContextError<&'a str> {} impl<'a, T: 'a + ParseError<&'a str> + ContextError<&'a str>> Err<'a> for T {} #[allow(clippy::module_name_repetitions)] #[derive(PartialEq, Eq, Clone, Debug)] pub struct TokenString(Vec); impl TokenString { pub fn text(text: impl Into) -> Self { Self(vec![Token::Text(text.into())]) } #[cfg(feature = "full")] pub fn r#macro(name: impl Into) -> Self { Self(vec![Token::MacroExpansion { name: name.into(), replacement: None, }]) } pub fn tokens(&self) -> impl Iterator { self.0.iter() } pub fn first_token_mut(&mut self) -> &mut Token { &mut self.0[0] } pub fn split_once(&self, delimiter: char) -> Option<(Self, Self)> { let mut result0 = vec![]; let mut iter = self.0.iter(); while let Some(t) = iter.next() { match t { Token::Text(text) if text.contains(delimiter) => { let split_text = text.splitn(2, delimiter); let pieces = split_text.collect::>(); assert_eq!(pieces.len(), 2, "wrong number of pieces!"); result0.push(Token::Text(pieces[0].into())); let mut result1 = vec![Token::Text(pieces[1].into())]; result1.extend(iter.cloned()); return Some((Self(result0), Self(result1))); } _ => result0.push(t.clone()), } } None } pub fn starts_with(&self, pattern: &str) -> bool { match self.0.first() { Some(Token::Text(t)) => t.starts_with(pattern), _ => false, } } pub fn ends_with(&self, pattern: &str) -> bool { match self.0.last() { Some(Token::Text(t)) => t.ends_with(pattern), _ => false, } } pub fn strip_prefix(&mut self, suffix: &str) { if let Some(Token::Text(t)) = self.0.first_mut() { if let Some(x) = t.strip_prefix(suffix) { *t = x.into() } } } pub fn strip_suffix(&mut self, suffix: &str) { if let Some(Token::Text(t)) = self.0.last_mut() { if let Some(x) = t.strip_suffix(suffix) { *t = x.into() } } } pub fn extend(&mut self, other: Self) { self.0.extend(other.0); } pub fn trim_start(&mut self) { if let Some(Token::Text(t)) = self.0.first_mut() { *t = t.trim_start().into(); } } pub fn trim_end(&mut self) { if let Some(Token::Text(t)) = self.0.last_mut() { *t = t.trim_end().into(); } } pub fn is_empty(&self) -> bool { match self.0.get(0) { None => true, Some(Token::Text(t)) if t.is_empty() && self.0.len() == 1 => true, _ => false, } } } impl fmt::Display for TokenString { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { for t in &self.0 { write!(f, "{}", t)?; } Ok(()) } } #[derive(PartialEq, Eq, Clone, Debug)] pub enum Token { Text(String), MacroExpansion { name: String, replacement: Option<(TokenString, TokenString)>, }, #[cfg(feature = "full")] FunctionCall { name: String, args: Vec, }, } impl fmt::Display for Token { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { match self { Self::Text(t) => write!(f, "{}", t), Self::MacroExpansion { name, replacement: None, } => write!(f, "$({})", name), Self::MacroExpansion { name, replacement: Some((r1, r2)), } => write!(f, "$({}:{}={})", name, r1, r2), #[cfg(feature = "full")] Self::FunctionCall { name, args } => write!( f, "$({} {})", name, args.iter() .map(|x| format!("{}", x)) .collect::>() .join(", ") ), } } } fn macro_function_name<'a, E: Err<'a>>(input: &'a str) -> IResult<&'a str, &'a str, E> { // POSIX says "periods, underscores, digits, and alphabetics from the portable character set" // one GNUism is a function with a - in the name take_while1(|c: char| c == '.' || c == '_' || c.is_alphanumeric() || c == '-')(input) } fn macro_expansion_body<'a, E: Err<'a>>( end: char, ) -> impl FnMut(&'a str) -> IResult<&'a str, Token, E> + 'a { let subst = preceded( tag(":"), separated_pair( tokens_but_not(vec!['=']), tag("="), tokens_but_not(vec![end]), ), ); context( "macro_expansion_body", map( pair(macro_function_name, opt(subst)), |(name, replacement)| Token::MacroExpansion { name: name.into(), replacement, }, ), ) } #[cfg(feature = "full")] fn function_call_body<'a, E: Err<'a>>( end: char, ) -> impl FnMut(&'a str) -> IResult<&'a str, Token, E> { context( "function_call_body", map( separated_pair( macro_function_name, space1, separated_list1(tag(","), tokens_but_not(vec![',', end])), ), |(name, args)| Token::FunctionCall { name: name.into(), args, }, ), ) } #[cfg(feature = "full")] fn macro_body<'a, E: Err<'a>>(end: char) -> impl FnMut(&'a str) -> IResult<&'a str, Token, E> { alt((function_call_body(end), macro_expansion_body(end))) } #[cfg(not(feature = "full"))] fn macro_body<'a, E: Err<'a>>(end: char) -> impl FnMut(&'a str) -> IResult<&'a str, Token, E> { macro_expansion_body(end) } fn parens_macro_expansion<'a, E: Err<'a>>(input: &'a str) -> IResult<&'a str, Token, E> { delimited(tag("$("), macro_body(')'), tag(")"))(input) } fn braces_macro_expansion<'a, E: Err<'a>>(input: &'a str) -> IResult<&'a str, Token, E> { delimited(tag("${"), macro_body('}'), tag("}"))(input) } fn tiny_macro_expansion<'a, E: Err<'a>>(input: &'a str) -> IResult<&'a str, Token, E> { let raw = preceded(tag("$"), verify(anychar, |&c| c != '(' && c != '{')); map(raw, |c| { if c == '$' { Token::Text("$".into()) } else { Token::MacroExpansion { name: c.to_string(), replacement: None, } } })(input) } fn macro_expansion<'a, E: Err<'a>>(input: &'a str) -> IResult<&'a str, Token, E> { context( "macro_expansion", alt(( tiny_macro_expansion, parens_macro_expansion, braces_macro_expansion, )), )(input) } fn text_but_not<'a, E: Err<'a>>( ends: Vec, ) -> impl FnMut(&'a str) -> IResult<&'a str, Token, E> { map( take_till1(move |c| c == '$' || ends.contains(&c)), |x: &str| Token::Text(x.into()), ) } fn single_token_but_not<'a, E: Err<'a>>( ends: Vec, ) -> impl FnMut(&'a str) -> IResult<&'a str, Token, E> { alt((text_but_not(ends), macro_expansion)) } fn empty_tokens<'a, E: Err<'a>>(input: &'a str) -> IResult<&'a str, TokenString, E> { context( "empty_tokens", map(tag(""), |_| TokenString(vec![Token::Text(String::new())])), )(input) } fn tokens_but_not<'a, E: Err<'a>>( ends: Vec, ) -> impl FnMut(&'a str) -> IResult<&'a str, TokenString, E> { alt(( map(many1(single_token_but_not(ends)), TokenString), empty_tokens, )) } fn full_text_tokens<'a, E: Err<'a>>(input: &'a str) -> IResult<&'a str, TokenString, E> { all_consuming(tokens_but_not(vec![]))(input) } pub fn tokenize(input: &str) -> eyre::Result { let (_, result) = full_text_tokens(input) .finish() .map_err(|err: VerboseError<&str>| eyre::eyre!(convert_error(input, err))) .with_context(|| format!("couldn't parse {:?}", input))?; Ok(result) } impl FromStr for TokenString { type Err = eyre::Error; fn from_str(s: &str) -> Result { tokenize(s) } } #[cfg(test)] mod test { use super::*; type R = eyre::Result<()>; impl From> for TokenString { fn from(x: Vec) -> Self { Self(x) } } fn token_text(text: impl Into) -> Token { Token::Text(text.into()) } fn token_macro_expansion(name: impl Into) -> Token { Token::MacroExpansion { name: name.into(), replacement: None, } } fn token_macro_expansion_replacement( name: impl Into, subst1: impl Into, subst2: impl Into, ) -> Token { Token::MacroExpansion { name: name.into(), replacement: Some((subst1.into(), subst2.into())), } } #[cfg(feature = "full")] fn token_function_call(name: impl Into, args: Vec>) -> Token { Token::FunctionCall { name: name.into(), args: args.into_iter().map(|x| x.into()).collect(), } } #[test] fn no_macros() -> R { let text = "This is an example sentence! There aren't macros in it at all!"; let tokens = tokenize(text)?; assert_eq!(tokens, TokenString(vec![token_text(text)])); Ok(()) } #[test] fn no_replacement() -> R { let text = "This is a $Q sentence! There are $(BORING) macros in it at ${YEET}!"; let tokens = tokenize(text)?; assert_eq!( tokens, TokenString(vec![ token_text("This is a "), token_macro_expansion("Q"), token_text(" sentence! There are "), token_macro_expansion("BORING"), token_text(" macros in it at "), token_macro_expansion("YEET"), token_text("!"), ]) ); Ok(()) } #[test] fn escaped() -> R { let text = "This costs $$2 to run, which isn't ideal"; let tokens = tokenize(text)?; assert_eq!( tokens, TokenString(vec![ token_text("This costs "), token_text("$"), token_text("2 to run, which isn't ideal"), ]) ); Ok(()) } #[test] fn replacement() -> R { let text = "Can I get a $(DATA:.c=.oof) in this ${SWAG:.yolo=}"; let tokens = tokenize(text)?; assert_eq!( tokens, TokenString(vec![ token_text("Can I get a "), token_macro_expansion_replacement( "DATA", vec![token_text(".c")], vec![token_text(".oof")] ), token_text(" in this "), token_macro_expansion_replacement( "SWAG", vec![token_text(".yolo")], vec![token_text("")] ), ]) ); Ok(()) } #[test] fn hell() -> R { let text = "$(OOF:${ouch:hi=hey} there=$(owie:$(my)=${bones})), bro."; let tokens = tokenize(text)?; assert_eq!( tokens, TokenString(vec![ token_macro_expansion_replacement( "OOF", vec![ token_macro_expansion_replacement( "ouch", vec![token_text("hi")], vec![token_text("hey")] ), token_text(" there"), ], vec![token_macro_expansion_replacement( "owie", vec![token_macro_expansion("my")], vec![token_macro_expansion("bones")], ),], ), token_text(", bro."), ]) ); Ok(()) } #[cfg(feature = "full")] #[test] fn function_hell() -> R { let text = "$(foo bar, $(baz))"; let tokens = tokenize(text)?; assert_eq!( tokens, TokenString(vec![token_function_call( "foo", vec![TokenString::text("bar"), tokenize(" $(baz)")?] )]) ); Ok(()) } }