use std::fmt; use std::str::FromStr; use nom::{ Finish, IResult, branch::alt, bytes::complete::{tag, take_till1, take_while1}, character::complete::anychar, combinator::{all_consuming, map, opt, verify}, multi::many1, sequence::{delimited, pair, preceded, separated_pair}, }; #[derive(PartialEq, Eq, Clone, Debug)] pub struct TokenString(Vec); impl TokenString { pub fn text(text: impl Into) -> Self { Self(vec![Token::Text(text.into())]) } pub fn tokens(&self) -> impl Iterator { self.0.iter() } pub fn first_token_mut(&mut self) -> &mut Token { &mut self.0[0] } pub fn split_once(&self, delimiter: char) -> Option<(TokenString, TokenString)> { let mut result0 = vec![]; let mut iter = self.0.iter(); while let Some(t) = iter.next() { match t { Token::Text(text) if text.contains(delimiter) => { let split_text = text.splitn(2, delimiter); let pieces = split_text.collect::>(); assert_eq!(pieces.len(), 2, "wrong number of pieces!"); result0.push(Token::Text(pieces[0].into())); let mut result1 = vec![Token::Text(pieces[1].into())]; result1.extend(iter.cloned()); return Some((TokenString(result0), TokenString(result1))); } _ => result0.push(t.clone()), } } None } pub fn ends_with(&self, pattern: &str) -> bool { match self.0.last() { Some(Token::Text(t)) => t.ends_with(pattern), _ => false } } pub fn strip_suffix(&mut self, suffix: &str) { if let Some(Token::Text(t)) = self.0.last_mut() { if let Some(x) = t.strip_suffix(suffix) { *t = x.into() } } } pub fn extend(&mut self, other: TokenString) { self.0.extend(other.0); } pub fn trim_start(&mut self) { if let Some(Token::Text(t)) = self.0.first_mut() { *t = t.trim_start().into(); } } } impl fmt::Display for TokenString { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { for t in &self.0 { write!(f, "{}", t)?; } Ok(()) } } #[derive(PartialEq, Eq, Clone, Debug)] pub enum Token { Text(String), MacroExpansion { name: String, replacement: Option<(TokenString, TokenString)>, }, } impl fmt::Display for Token { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { match self { Token::Text(t) => write!(f, "{}", t), Token::MacroExpansion { name, replacement: None } => write!(f, "$({})", name), Token::MacroExpansion { name, replacement: Some((r1, r2)) } => write!(f, "$({}:{}={})", name, r1, r2), } } } fn macro_name(input: &str) -> IResult<&str, &str> { // POSIX says "periods, underscores, digits, and alphabetics from the portable character set" take_while1(|c: char| { c == '.' || c == '_' || c.is_alphanumeric() })(input) } fn macro_expansion_body<'a>(end: char) -> impl FnMut(&'a str) -> IResult<&'a str, Token> { let subst = preceded(tag(":"), separated_pair(tokens_but_not('='), tag("="), tokens_but_not(end))); map( pair(macro_name, opt(subst)), |(name, replacement)| Token::MacroExpansion { name: name.into(), replacement }, ) } fn parens_macro_expansion(input: &str) -> IResult<&str, Token> { delimited(tag("$("), macro_expansion_body(')'), tag(")"))(input) } fn braces_macro_expansion(input: &str) -> IResult<&str, Token> { delimited(tag("${"), macro_expansion_body('}'), tag("}"))(input) } fn tiny_macro_expansion(input: &str) -> IResult<&str, Token> { let raw = preceded(tag("$"), verify(anychar, |&c| c != '(' && c != '{')); map(raw, |c| { if c == '$' { Token::Text("$".into()) } else { Token::MacroExpansion { name: c.to_string(), replacement: None, } } })(input) } fn macro_expansion(input: &str) -> IResult<&str, Token> { alt((tiny_macro_expansion, parens_macro_expansion, braces_macro_expansion))(input) } fn text(input: &str) -> IResult<&str, Token> { map(take_till1(|c| c == '$'), |x: &str| Token::Text(x.into()))(input) } fn text_but_not<'a>(end: char) -> impl FnMut(&'a str) -> IResult<&'a str, Token> { map(take_till1(move |c| c == '$' || c == end), |x: &str| Token::Text(x.into())) } fn single_token(input: &str) -> IResult<&str, Token> { alt((text, macro_expansion))(input) } fn single_token_but_not<'a>(end: char) -> impl FnMut(&'a str) -> IResult<&'a str, Token> { alt((text_but_not(end), macro_expansion)) } fn empty_tokens(input: &str) -> IResult<&str, TokenString> { map(tag(""), |_| TokenString(vec![Token::Text(String::new())]))(input) } fn tokens(input: &str) -> IResult<&str, TokenString> { alt((map(many1(single_token), TokenString), empty_tokens))(input) } fn tokens_but_not<'a>(end: char) -> impl FnMut(&'a str) -> IResult<&'a str, TokenString> { alt((map(many1(single_token_but_not(end)), TokenString), empty_tokens)) } fn full_text_tokens(input: &str) -> IResult<&str, TokenString> { all_consuming(tokens)(input) } pub fn tokenize(input: &str) -> TokenString { // TODO handle errors gracefully let (_, result) = full_text_tokens(input).expect("couldn't parse"); result } impl FromStr for TokenString { // TODO figure out how to get nom errors working (Error<&str> doesn't work because lifetimes) type Err = (); fn from_str(s: &str) -> Result { full_text_tokens(s).finish() .map(|(_, x)| x) .map_err(|_| ()) } } #[cfg(test)] mod test { use super::{Token, TokenString, tokenize}; impl From> for TokenString { fn from(x: Vec) -> Self { TokenString(x) } } fn token_text(text: impl Into) -> Token { Token::Text(text.into()) } fn token_macro_expansion(name: impl Into) -> Token { Token::MacroExpansion { name: name.into(), replacement: None } } fn token_macro_expansion_replacement(name: impl Into, subst1: impl Into, subst2: impl Into) -> Token { Token::MacroExpansion { name: name.into(), replacement: Some((subst1.into(), subst2.into())) } } #[test] fn no_macros() { let text = "This is an example sentence! There aren't macros in it at all!"; let tokens = tokenize(text); assert_eq!(tokens, TokenString(vec![token_text(text)])); } #[test] fn no_replacement() { let text = "This is a $Q sentence! There are $(BORING) macros in it at ${YEET}!"; let tokens = tokenize(text); assert_eq!(tokens, TokenString(vec![ token_text("This is a "), token_macro_expansion("Q"), token_text(" sentence! There are "), token_macro_expansion("BORING"), token_text(" macros in it at "), token_macro_expansion("YEET"), token_text("!"), ])); } #[test] fn escaped() { let text = "This costs $$2 to run, which isn't ideal"; let tokens = tokenize(text); assert_eq!(tokens, TokenString(vec![ token_text("This costs "), token_text("$"), token_text("2 to run, which isn't ideal"), ])); } #[test] fn replacement() { let text = "Can I get a $(DATA:.c=.oof) in this ${SWAG:.yolo=}"; let tokens = tokenize(text); assert_eq!(tokens, TokenString(vec![ token_text("Can I get a "), token_macro_expansion_replacement("DATA", vec![token_text(".c")], vec![token_text(".oof")]), token_text(" in this "), token_macro_expansion_replacement("SWAG", vec![token_text(".yolo")], vec![token_text("")]), ])); } #[test] fn hell() { let text = "$(OOF:${ouch:hi=hey} there=$(owie:$(my)=${bones})), bro."; let tokens = tokenize(text); assert_eq!(tokens, TokenString(vec![ token_macro_expansion_replacement( "OOF", vec![ token_macro_expansion_replacement("ouch", vec![token_text("hi")], vec![token_text("hey")]), token_text(" there"), ], vec![ token_macro_expansion_replacement( "owie", vec![token_macro_expansion("my")], vec![token_macro_expansion("bones")], ), ], ), token_text(", bro."), ])); } }