aboutsummaryrefslogtreecommitdiff
path: root/src/makefile/token.rs
diff options
context:
space:
mode:
Diffstat (limited to 'src/makefile/token.rs')
-rw-r--r--src/makefile/token.rs236
1 files changed, 236 insertions, 0 deletions
diff --git a/src/makefile/token.rs b/src/makefile/token.rs
new file mode 100644
index 0000000..3f69b50
--- /dev/null
+++ b/src/makefile/token.rs
@@ -0,0 +1,236 @@
+use std::str::FromStr;
+
+use nom::{
+ Finish, IResult,
+ branch::alt,
+ bytes::complete::{tag, take_till1, take_while1},
+ character::complete::anychar,
+ combinator::{all_consuming, map, opt, verify},
+ multi::many1,
+ sequence::{delimited, pair, preceded, separated_pair},
+};
+
+#[derive(PartialEq, Eq, Clone, Debug)]
+pub struct TokenString(Vec<Token>);
+
+impl TokenString {
+ pub fn tokens(&self) -> impl Iterator<Item=&Token> {
+ self.0.iter()
+ }
+
+ pub fn first_token_mut(&mut self) -> &mut Token {
+ &mut self.0[0]
+ }
+
+ pub fn split_once(&self, delimiter: char) -> Option<(TokenString, TokenString)> {
+ let mut result0 = vec![];
+ let mut iter = self.0.iter();
+ while let Some(t) = iter.next() {
+ match t {
+ Token::Text(text) if text.contains(delimiter) => {
+ let split_text = text.splitn(2, delimiter);
+ let pieces = split_text.collect::<Vec<_>>();
+ assert_eq!(pieces.len(), 2, "wrong number of pieces!");
+ result0.push(Token::Text(pieces[0].into()));
+ let mut result1 = vec![Token::Text(pieces[1].into())];
+ result1.extend(iter.cloned());
+ return Some((TokenString(result0), TokenString(result1)));
+ }
+ _ => result0.push(t.clone()),
+ }
+ }
+ None
+ }
+}
+
+#[derive(PartialEq, Eq, Clone, Debug)]
+pub enum Token {
+ Text(String),
+ MacroExpansion {
+ name: String,
+ replacement: Option<(TokenString, TokenString)>,
+ },
+}
+
+fn macro_name(input: &str) -> IResult<&str, &str> {
+ // POSIX says "periods, underscores, digits, and alphabetics from the portable character set"
+ take_while1(|c: char| {
+ c == '.' || c == '_' || c.is_alphanumeric()
+ })(input)
+}
+
+fn macro_expansion_body<'a>(end: char) -> impl FnMut(&'a str) -> IResult<&'a str, Token> {
+ let subst = preceded(tag(":"), separated_pair(tokens_but_not('='), tag("="), tokens_but_not(end)));
+ map(
+ pair(macro_name, opt(subst)),
+ |(name, replacement)| Token::MacroExpansion { name: name.into(), replacement },
+ )
+}
+
+fn parens_macro_expansion(input: &str) -> IResult<&str, Token> {
+ delimited(tag("$("), macro_expansion_body(')'), tag(")"))(input)
+}
+
+fn braces_macro_expansion(input: &str) -> IResult<&str, Token> {
+ delimited(tag("${"), macro_expansion_body('}'), tag("}"))(input)
+}
+
+fn tiny_macro_expansion(input: &str) -> IResult<&str, Token> {
+ let raw = preceded(tag("$"), verify(anychar, |&c| c != '(' && c != '{'));
+ map(raw, |c| {
+ if c == '$' {
+ Token::Text("$".into())
+ } else {
+ Token::MacroExpansion {
+ name: c.to_string(),
+ replacement: None,
+ }
+ }
+ })(input)
+}
+
+fn macro_expansion(input: &str) -> IResult<&str, Token> {
+ alt((tiny_macro_expansion, parens_macro_expansion, braces_macro_expansion))(input)
+}
+
+fn text(input: &str) -> IResult<&str, Token> {
+ map(take_till1(|c| c == '$'), |x: &str| Token::Text(x.into()))(input)
+}
+
+fn text_but_not<'a>(end: char) -> impl FnMut(&'a str) -> IResult<&'a str, Token> {
+ map(take_till1(move |c| c == '$' || c == end), |x: &str| Token::Text(x.into()))
+}
+
+fn single_token(input: &str) -> IResult<&str, Token> {
+ alt((text, macro_expansion))(input)
+}
+
+fn single_token_but_not<'a>(end: char) -> impl FnMut(&'a str) -> IResult<&'a str, Token> {
+ alt((text_but_not(end), macro_expansion))
+}
+
+fn empty_tokens(input: &str) -> IResult<&str, TokenString> {
+ map(tag(""), |_| TokenString(vec![Token::Text(String::new())]))(input)
+}
+
+fn tokens(input: &str) -> IResult<&str, TokenString> {
+ alt((map(many1(single_token), TokenString), empty_tokens))(input)
+}
+
+fn tokens_but_not<'a>(end: char) -> impl FnMut(&'a str) -> IResult<&'a str, TokenString> {
+ alt((map(many1(single_token_but_not(end)), TokenString), empty_tokens))
+}
+
+fn full_text_tokens(input: &str) -> IResult<&str, TokenString> {
+ all_consuming(tokens)(input)
+}
+
+pub fn tokenize(input: &str) -> TokenString {
+ // TODO handle errors gracefully
+ let (_, result) = full_text_tokens(input).expect("couldn't parse");
+ result
+}
+
+impl FromStr for TokenString {
+ // TODO figure out how to get nom errors working (Error<&str> doesn't work because lifetimes)
+ type Err = ();
+
+ fn from_str(s: &str) -> Result<Self, Self::Err> {
+ full_text_tokens(s).finish()
+ .map(|(_, x)| x)
+ .map_err(|_| ())
+ }
+}
+
+#[cfg(test)]
+mod test {
+ use super::{Token, TokenString, tokenize};
+
+ impl From<Vec<Token>> for TokenString {
+ fn from(x: Vec<Token>) -> Self {
+ TokenString(x)
+ }
+ }
+
+ fn token_text(text: impl Into<String>) -> Token {
+ Token::Text(text.into())
+ }
+
+ fn token_macro_expansion(name: impl Into<String>) -> Token {
+ Token::MacroExpansion { name: name.into(), replacement: None }
+ }
+
+ fn token_macro_expansion_replacement(name: impl Into<String>,
+ subst1: impl Into<TokenString>,
+ subst2: impl Into<TokenString>) -> Token {
+ Token::MacroExpansion { name: name.into(), replacement: Some((subst1.into(), subst2.into())) }
+ }
+
+ #[test]
+ fn no_macros() {
+ let text = "This is an example sentence! There aren't macros in it at all!";
+ let tokens = tokenize(text);
+ assert_eq!(tokens, TokenString(vec![token_text(text)]));
+ }
+
+ #[test]
+ fn no_replacement() {
+ let text = "This is a $Q sentence! There are $(BORING) macros in it at ${YEET}!";
+ let tokens = tokenize(text);
+ assert_eq!(tokens, TokenString(vec![
+ token_text("This is a "),
+ token_macro_expansion("Q"),
+ token_text(" sentence! There are "),
+ token_macro_expansion("BORING"),
+ token_text(" macros in it at "),
+ token_macro_expansion("YEET"),
+ token_text("!"),
+ ]));
+ }
+
+ #[test]
+ fn escaped() {
+ let text = "This costs $$2 to run, which isn't ideal";
+ let tokens = tokenize(text);
+ assert_eq!(tokens, TokenString(vec![
+ token_text("This costs "),
+ token_text("$"),
+ token_text("2 to run, which isn't ideal"),
+ ]));
+ }
+
+ #[test]
+ fn replacement() {
+ let text = "Can I get a $(DATA:.c=.oof) in this ${SWAG:.yolo=}";
+ let tokens = tokenize(text);
+ assert_eq!(tokens, TokenString(vec![
+ token_text("Can I get a "),
+ token_macro_expansion_replacement("DATA", vec![token_text(".c")], vec![token_text(".oof")]),
+ token_text(" in this "),
+ token_macro_expansion_replacement("SWAG", vec![token_text(".yolo")], vec![token_text("")]),
+ ]));
+ }
+
+ #[test]
+ fn hell() {
+ let text = "$(OOF:${ouch:hi=hey} there=$(owie:$(my)=${bones})), bro.";
+ let tokens = tokenize(text);
+ assert_eq!(tokens, TokenString(vec![
+ token_macro_expansion_replacement(
+ "OOF",
+ vec![
+ token_macro_expansion_replacement("ouch", vec![token_text("hi")], vec![token_text("hey")]),
+ token_text(" there"),
+ ],
+ vec![
+ token_macro_expansion_replacement(
+ "owie",
+ vec![token_macro_expansion("my")],
+ vec![token_macro_expansion("bones")],
+ ),
+ ],
+ ),
+ token_text(", bro."),
+ ]));
+ }
+}