From 1844cb79ae82e71610573f133c5ed7aeeb0c50b6 Mon Sep 17 00:00:00 2001 From: Melody Horn Date: Tue, 23 Mar 2021 23:27:45 -0600 Subject: man i don't even fuckin know anymore --- src/makefile/token.rs | 236 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 236 insertions(+) create mode 100644 src/makefile/token.rs (limited to 'src/makefile/token.rs') diff --git a/src/makefile/token.rs b/src/makefile/token.rs new file mode 100644 index 0000000..3f69b50 --- /dev/null +++ b/src/makefile/token.rs @@ -0,0 +1,236 @@ +use std::str::FromStr; + +use nom::{ + Finish, IResult, + branch::alt, + bytes::complete::{tag, take_till1, take_while1}, + character::complete::anychar, + combinator::{all_consuming, map, opt, verify}, + multi::many1, + sequence::{delimited, pair, preceded, separated_pair}, +}; + +#[derive(PartialEq, Eq, Clone, Debug)] +pub struct TokenString(Vec); + +impl TokenString { + pub fn tokens(&self) -> impl Iterator { + self.0.iter() + } + + pub fn first_token_mut(&mut self) -> &mut Token { + &mut self.0[0] + } + + pub fn split_once(&self, delimiter: char) -> Option<(TokenString, TokenString)> { + let mut result0 = vec![]; + let mut iter = self.0.iter(); + while let Some(t) = iter.next() { + match t { + Token::Text(text) if text.contains(delimiter) => { + let split_text = text.splitn(2, delimiter); + let pieces = split_text.collect::>(); + assert_eq!(pieces.len(), 2, "wrong number of pieces!"); + result0.push(Token::Text(pieces[0].into())); + let mut result1 = vec![Token::Text(pieces[1].into())]; + result1.extend(iter.cloned()); + return Some((TokenString(result0), TokenString(result1))); + } + _ => result0.push(t.clone()), + } + } + None + } +} + +#[derive(PartialEq, Eq, Clone, Debug)] +pub enum Token { + Text(String), + MacroExpansion { + name: String, + replacement: Option<(TokenString, TokenString)>, + }, +} + +fn macro_name(input: &str) -> IResult<&str, &str> { + // POSIX says "periods, underscores, digits, and alphabetics from the portable character set" + take_while1(|c: char| { + c == '.' || c == '_' || c.is_alphanumeric() + })(input) +} + +fn macro_expansion_body<'a>(end: char) -> impl FnMut(&'a str) -> IResult<&'a str, Token> { + let subst = preceded(tag(":"), separated_pair(tokens_but_not('='), tag("="), tokens_but_not(end))); + map( + pair(macro_name, opt(subst)), + |(name, replacement)| Token::MacroExpansion { name: name.into(), replacement }, + ) +} + +fn parens_macro_expansion(input: &str) -> IResult<&str, Token> { + delimited(tag("$("), macro_expansion_body(')'), tag(")"))(input) +} + +fn braces_macro_expansion(input: &str) -> IResult<&str, Token> { + delimited(tag("${"), macro_expansion_body('}'), tag("}"))(input) +} + +fn tiny_macro_expansion(input: &str) -> IResult<&str, Token> { + let raw = preceded(tag("$"), verify(anychar, |&c| c != '(' && c != '{')); + map(raw, |c| { + if c == '$' { + Token::Text("$".into()) + } else { + Token::MacroExpansion { + name: c.to_string(), + replacement: None, + } + } + })(input) +} + +fn macro_expansion(input: &str) -> IResult<&str, Token> { + alt((tiny_macro_expansion, parens_macro_expansion, braces_macro_expansion))(input) +} + +fn text(input: &str) -> IResult<&str, Token> { + map(take_till1(|c| c == '$'), |x: &str| Token::Text(x.into()))(input) +} + +fn text_but_not<'a>(end: char) -> impl FnMut(&'a str) -> IResult<&'a str, Token> { + map(take_till1(move |c| c == '$' || c == end), |x: &str| Token::Text(x.into())) +} + +fn single_token(input: &str) -> IResult<&str, Token> { + alt((text, macro_expansion))(input) +} + +fn single_token_but_not<'a>(end: char) -> impl FnMut(&'a str) -> IResult<&'a str, Token> { + alt((text_but_not(end), macro_expansion)) +} + +fn empty_tokens(input: &str) -> IResult<&str, TokenString> { + map(tag(""), |_| TokenString(vec![Token::Text(String::new())]))(input) +} + +fn tokens(input: &str) -> IResult<&str, TokenString> { + alt((map(many1(single_token), TokenString), empty_tokens))(input) +} + +fn tokens_but_not<'a>(end: char) -> impl FnMut(&'a str) -> IResult<&'a str, TokenString> { + alt((map(many1(single_token_but_not(end)), TokenString), empty_tokens)) +} + +fn full_text_tokens(input: &str) -> IResult<&str, TokenString> { + all_consuming(tokens)(input) +} + +pub fn tokenize(input: &str) -> TokenString { + // TODO handle errors gracefully + let (_, result) = full_text_tokens(input).expect("couldn't parse"); + result +} + +impl FromStr for TokenString { + // TODO figure out how to get nom errors working (Error<&str> doesn't work because lifetimes) + type Err = (); + + fn from_str(s: &str) -> Result { + full_text_tokens(s).finish() + .map(|(_, x)| x) + .map_err(|_| ()) + } +} + +#[cfg(test)] +mod test { + use super::{Token, TokenString, tokenize}; + + impl From> for TokenString { + fn from(x: Vec) -> Self { + TokenString(x) + } + } + + fn token_text(text: impl Into) -> Token { + Token::Text(text.into()) + } + + fn token_macro_expansion(name: impl Into) -> Token { + Token::MacroExpansion { name: name.into(), replacement: None } + } + + fn token_macro_expansion_replacement(name: impl Into, + subst1: impl Into, + subst2: impl Into) -> Token { + Token::MacroExpansion { name: name.into(), replacement: Some((subst1.into(), subst2.into())) } + } + + #[test] + fn no_macros() { + let text = "This is an example sentence! There aren't macros in it at all!"; + let tokens = tokenize(text); + assert_eq!(tokens, TokenString(vec![token_text(text)])); + } + + #[test] + fn no_replacement() { + let text = "This is a $Q sentence! There are $(BORING) macros in it at ${YEET}!"; + let tokens = tokenize(text); + assert_eq!(tokens, TokenString(vec![ + token_text("This is a "), + token_macro_expansion("Q"), + token_text(" sentence! There are "), + token_macro_expansion("BORING"), + token_text(" macros in it at "), + token_macro_expansion("YEET"), + token_text("!"), + ])); + } + + #[test] + fn escaped() { + let text = "This costs $$2 to run, which isn't ideal"; + let tokens = tokenize(text); + assert_eq!(tokens, TokenString(vec![ + token_text("This costs "), + token_text("$"), + token_text("2 to run, which isn't ideal"), + ])); + } + + #[test] + fn replacement() { + let text = "Can I get a $(DATA:.c=.oof) in this ${SWAG:.yolo=}"; + let tokens = tokenize(text); + assert_eq!(tokens, TokenString(vec![ + token_text("Can I get a "), + token_macro_expansion_replacement("DATA", vec![token_text(".c")], vec![token_text(".oof")]), + token_text(" in this "), + token_macro_expansion_replacement("SWAG", vec![token_text(".yolo")], vec![token_text("")]), + ])); + } + + #[test] + fn hell() { + let text = "$(OOF:${ouch:hi=hey} there=$(owie:$(my)=${bones})), bro."; + let tokens = tokenize(text); + assert_eq!(tokens, TokenString(vec![ + token_macro_expansion_replacement( + "OOF", + vec![ + token_macro_expansion_replacement("ouch", vec![token_text("hi")], vec![token_text("hey")]), + token_text(" there"), + ], + vec![ + token_macro_expansion_replacement( + "owie", + vec![token_macro_expansion("my")], + vec![token_macro_expansion("bones")], + ), + ], + ), + token_text(", bro."), + ])); + } +} -- cgit v1.2.3