use std::fmt; use std::str::FromStr; use anyhow::Context; use nom::{ branch::alt, bytes::complete::{tag, take_till1, take_while1}, character::complete::{anychar, space1}, combinator::{all_consuming, map, opt, verify}, multi::{many1, separated_list1}, sequence::{delimited, pair, preceded, separated_pair}, Finish, IResult, }; #[derive(PartialEq, Eq, Clone, Debug)] pub(crate) struct TokenString(Vec); impl TokenString { pub(crate) fn text(text: impl Into) -> Self { Self(vec![Token::Text(text.into())]) } pub(crate) fn r#macro(name: impl Into) -> Self { Self(vec![Token::MacroExpansion { name: name.into(), replacement: None, }]) } pub(crate) fn tokens(&self) -> impl Iterator { self.0.iter() } pub(crate) fn first_token_mut(&mut self) -> &mut Token { &mut self.0[0] } pub(crate) fn split_once(&self, delimiter: char) -> Option<(Self, Self)> { let mut result0 = vec![]; let mut iter = self.0.iter(); while let Some(t) = iter.next() { match t { Token::Text(text) if text.contains(delimiter) => { let split_text = text.splitn(2, delimiter); let pieces = split_text.collect::>(); assert_eq!(pieces.len(), 2, "wrong number of pieces!"); result0.push(Token::Text(pieces[0].into())); let mut result1 = vec![Token::Text(pieces[1].into())]; result1.extend(iter.cloned()); return Some((Self(result0), Self(result1))); } _ => result0.push(t.clone()), } } None } pub(crate) fn starts_with(&self, pattern: &str) -> bool { match self.0.first() { Some(Token::Text(t)) => t.starts_with(pattern), _ => false, } } pub(crate) fn ends_with(&self, pattern: &str) -> bool { match self.0.last() { Some(Token::Text(t)) => t.ends_with(pattern), _ => false, } } pub(crate) fn strip_prefix(&mut self, suffix: &str) { if let Some(Token::Text(t)) = self.0.first_mut() { if let Some(x) = t.strip_prefix(suffix) { *t = x.into() } } } pub(crate) fn strip_suffix(&mut self, suffix: &str) { if let Some(Token::Text(t)) = self.0.last_mut() { if let Some(x) = t.strip_suffix(suffix) { *t = x.into() } } } pub(crate) fn extend(&mut self, other: Self) { self.0.extend(other.0); } pub(crate) fn trim_start(&mut self) { if let Some(Token::Text(t)) = self.0.first_mut() { *t = t.trim_start().into(); } } pub(crate) fn trim_end(&mut self) { if let Some(Token::Text(t)) = self.0.last_mut() { *t = t.trim_end().into(); } } } impl fmt::Display for TokenString { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { for t in &self.0 { write!(f, "{}", t)?; } Ok(()) } } #[derive(PartialEq, Eq, Clone, Debug)] pub(crate) enum Token { Text(String), MacroExpansion { name: String, replacement: Option<(TokenString, TokenString)>, }, FunctionCall { name: String, args: Vec, }, } impl fmt::Display for Token { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { match self { Self::Text(t) => write!(f, "{}", t), Self::MacroExpansion { name, replacement: None, } => write!(f, "$({})", name), Self::MacroExpansion { name, replacement: Some((r1, r2)), } => write!(f, "$({}:{}={})", name, r1, r2), Self::FunctionCall { name, args } => write!( f, "$({} {})", name, args.iter() .map(|x| format!("{}", x)) .collect::>() .join(", ") ), } } } fn macro_function_name(input: &str) -> IResult<&str, &str> { // POSIX says "periods, underscores, digits, and alphabetics from the portable character set" // one GNUism is a function with a - in the name take_while1(|c: char| c == '.' || c == '_' || c.is_alphanumeric() || c == '-')(input) } fn macro_expansion_body<'a>(end: char) -> impl FnMut(&'a str) -> IResult<&'a str, Token> + 'a { let subst = preceded( tag(":"), separated_pair( tokens_but_not(vec!['=']), tag("="), tokens_but_not(vec![end]), ), ); map( pair(macro_function_name, opt(subst)), |(name, replacement)| Token::MacroExpansion { name: name.into(), replacement, }, ) } fn function_call_body<'a>(end: char) -> impl FnMut(&'a str) -> IResult<&'a str, Token> { map( separated_pair( macro_function_name, space1, separated_list1(tag(","), tokens_but_not(vec![',', end])), ), |(name, args)| Token::FunctionCall { name: name.into(), args, }, ) } fn parens_macro_expansion(input: &str) -> IResult<&str, Token> { delimited( tag("$("), alt((macro_expansion_body(')'), function_call_body(')'))), tag(")"), )(input) } fn braces_macro_expansion(input: &str) -> IResult<&str, Token> { delimited( tag("${"), alt((macro_expansion_body('}'), function_call_body(')'))), tag("}"), )(input) } fn tiny_macro_expansion(input: &str) -> IResult<&str, Token> { let raw = preceded(tag("$"), verify(anychar, |&c| c != '(' && c != '{')); map(raw, |c| { if c == '$' { Token::Text("$".into()) } else { Token::MacroExpansion { name: c.to_string(), replacement: None, } } })(input) } fn macro_expansion(input: &str) -> IResult<&str, Token> { alt(( tiny_macro_expansion, parens_macro_expansion, braces_macro_expansion, ))(input) } fn text_but_not<'a>(ends: Vec) -> impl FnMut(&'a str) -> IResult<&'a str, Token> { map( take_till1(move |c| c == '$' || ends.contains(&c)), |x: &str| Token::Text(x.into()), ) } fn single_token_but_not<'a>(ends: Vec) -> impl FnMut(&'a str) -> IResult<&'a str, Token> { alt((text_but_not(ends), macro_expansion)) } fn empty_tokens(input: &str) -> IResult<&str, TokenString> { map(tag(""), |_| TokenString(vec![Token::Text(String::new())]))(input) } fn tokens_but_not<'a>(ends: Vec) -> impl FnMut(&'a str) -> IResult<&'a str, TokenString> { alt(( map(many1(single_token_but_not(ends)), TokenString), empty_tokens, )) } fn full_text_tokens(input: &str) -> IResult<&str, TokenString> { all_consuming(tokens_but_not(vec![]))(input) } pub(crate) fn tokenize(input: &str) -> anyhow::Result { let (_, result) = full_text_tokens(input) .finish() .map_err(|err| anyhow::anyhow!(err.to_string())) .with_context(|| format!("couldn't parse {:?}", input))?; Ok(result) } impl FromStr for TokenString { type Err = anyhow::Error; fn from_str(s: &str) -> Result { tokenize(s) } } #[cfg(test)] mod test { use super::{tokenize, Token, TokenString}; type R = anyhow::Result<()>; impl From> for TokenString { fn from(x: Vec) -> Self { TokenString(x) } } fn token_text(text: impl Into) -> Token { Token::Text(text.into()) } fn token_macro_expansion(name: impl Into) -> Token { Token::MacroExpansion { name: name.into(), replacement: None, } } fn token_macro_expansion_replacement( name: impl Into, subst1: impl Into, subst2: impl Into, ) -> Token { Token::MacroExpansion { name: name.into(), replacement: Some((subst1.into(), subst2.into())), } } #[test] fn no_macros() -> R { let text = "This is an example sentence! There aren't macros in it at all!"; let tokens = tokenize(text)?; assert_eq!(tokens, TokenString(vec![token_text(text)])); Ok(()) } #[test] fn no_replacement() -> R { let text = "This is a $Q sentence! There are $(BORING) macros in it at ${YEET}!"; let tokens = tokenize(text)?; assert_eq!( tokens, TokenString(vec![ token_text("This is a "), token_macro_expansion("Q"), token_text(" sentence! There are "), token_macro_expansion("BORING"), token_text(" macros in it at "), token_macro_expansion("YEET"), token_text("!"), ]) ); Ok(()) } #[test] fn escaped() -> R { let text = "This costs $$2 to run, which isn't ideal"; let tokens = tokenize(text)?; assert_eq!( tokens, TokenString(vec![ token_text("This costs "), token_text("$"), token_text("2 to run, which isn't ideal"), ]) ); Ok(()) } #[test] fn replacement() -> R { let text = "Can I get a $(DATA:.c=.oof) in this ${SWAG:.yolo=}"; let tokens = tokenize(text)?; assert_eq!( tokens, TokenString(vec![ token_text("Can I get a "), token_macro_expansion_replacement( "DATA", vec![token_text(".c")], vec![token_text(".oof")] ), token_text(" in this "), token_macro_expansion_replacement( "SWAG", vec![token_text(".yolo")], vec![token_text("")] ), ]) ); Ok(()) } #[test] fn hell() -> R { let text = "$(OOF:${ouch:hi=hey} there=$(owie:$(my)=${bones})), bro."; let tokens = tokenize(text)?; assert_eq!( tokens, TokenString(vec![ token_macro_expansion_replacement( "OOF", vec![ token_macro_expansion_replacement( "ouch", vec![token_text("hi")], vec![token_text("hey")] ), token_text(" there"), ], vec![token_macro_expansion_replacement( "owie", vec![token_macro_expansion("my")], vec![token_macro_expansion("bones")], ),], ), token_text(", bro."), ]) ); Ok(()) } }