From 1844cb79ae82e71610573f133c5ed7aeeb0c50b6 Mon Sep 17 00:00:00 2001 From: Melody Horn Date: Tue, 23 Mar 2021 23:27:45 -0600 Subject: man i don't even fuckin know anymore --- Cargo.lock | 122 ++++++++++++++ Cargo.toml | 7 + README.md | 8 +- src/args.rs | 26 +-- src/main.rs | 35 +++- src/makefile/mod.rs | 444 ++++++++++++++++++++++++++++++++++++++++++++++++++ src/makefile/token.rs | 236 +++++++++++++++++++++++++++ 7 files changed, 862 insertions(+), 16 deletions(-) create mode 100644 src/makefile/mod.rs create mode 100644 src/makefile/token.rs diff --git a/Cargo.lock b/Cargo.lock index 190d96a..00bc731 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1,5 +1,14 @@ # This file is automatically @generated by Cargo. # It is not intended for manual editing. +[[package]] +name = "aho-corasick" +version = "0.7.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7404febffaa47dac81aa44dba71523c9d069b1bdc50a77db41195149e17f68e5" +dependencies = [ + "memchr", +] + [[package]] name = "ansi_term" version = "0.11.0" @@ -9,6 +18,12 @@ dependencies = [ "winapi", ] +[[package]] +name = "arrayvec" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "23b62fc65de8e4e7f52534fb52b0f3ed04746ae267519eef2a83941e8085068b" + [[package]] name = "atty" version = "0.2.14" @@ -26,6 +41,24 @@ version = "1.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cf1de2fe8c75bc145a2f577add951f8134889b4795d47466a54a5c846d691693" +[[package]] +name = "bitvec" +version = "0.19.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8942c8d352ae1838c9dda0b0ca2ab657696ef2232a20147cf1b30ae1a9cb4321" +dependencies = [ + "funty", + "radium", + "tap", + "wyz", +] + +[[package]] +name = "cfg-if" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" + [[package]] name = "clap" version = "2.33.3" @@ -41,6 +74,12 @@ dependencies = [ "vec_map", ] +[[package]] +name = "funty" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fed34cd105917e91daa4da6b3728c47b068749d6a62c59811f06ed2ac71d9da7" + [[package]] name = "heck" version = "0.3.2" @@ -65,6 +104,19 @@ version = "1.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" +[[package]] +name = "lexical-core" +version = "0.7.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "21f866863575d0e1d654fbeeabdc927292fdf862873dc3c96c6f753357e13374" +dependencies = [ + "arrayvec", + "bitflags", + "cfg-if", + "ryu", + "static_assertions", +] + [[package]] name = "libc" version = "0.2.91" @@ -75,9 +127,32 @@ checksum = "8916b1f6ca17130ec6568feccee27c156ad12037880833a3b842a823236502e7" name = "makers" version = "0.1.0" dependencies = [ + "lazy_static", + "libc", + "nom", + "regex", "structopt", ] +[[package]] +name = "memchr" +version = "2.3.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0ee1c47aaa256ecabcaea351eae4a9b01ef39ed810004e298d2511ed284b1525" + +[[package]] +name = "nom" +version = "6.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e7413f999671bd4745a7b624bd370a569fb6bc574b23c83a3c5ed2e453f3d5e2" +dependencies = [ + "bitvec", + "funty", + "lexical-core", + "memchr", + "version_check", +] + [[package]] name = "proc-macro-error" version = "1.0.4" @@ -120,6 +195,41 @@ dependencies = [ "proc-macro2", ] +[[package]] +name = "radium" +version = "0.5.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "941ba9d78d8e2f7ce474c015eea4d9c6d25b6a3327f9832ee29a4de27f91bbb8" + +[[package]] +name = "regex" +version = "1.4.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "957056ecddbeba1b26965114e191d2e8589ce74db242b6ea25fc4062427a5c19" +dependencies = [ + "aho-corasick", + "memchr", + "regex-syntax", +] + +[[package]] +name = "regex-syntax" +version = "0.6.23" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "24d5f089152e60f62d28b835fbff2cd2e8dc0baf1ac13343bef92ab7eed84548" + +[[package]] +name = "ryu" +version = "1.0.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "71d301d4193d031abdd79ff7e3dd721168a9572ef3fe51a1517aba235bd8f86e" + +[[package]] +name = "static_assertions" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f" + [[package]] name = "strsim" version = "0.8.0" @@ -161,6 +271,12 @@ dependencies = [ "unicode-xid", ] +[[package]] +name = "tap" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "55937e1799185b12863d447f42597ed69d9928686b8d88a1df17376a097d8369" + [[package]] name = "textwrap" version = "0.11.0" @@ -221,3 +337,9 @@ name = "winapi-x86_64-pc-windows-gnu" version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" + +[[package]] +name = "wyz" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "85e60b0d1b5f99db2556934e21937020776a5d31520bf169e851ac44e6420214" diff --git a/Cargo.toml b/Cargo.toml index 59129c2..9395ded 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -10,4 +10,11 @@ keywords = ["build", "make"] categories = ["development-tools"] [dependencies] +lazy_static = "1.4.0" +libc = "0.2.91" +nom = "6.1.2" +regex = "1.4.5" structopt = "0.3.21" + +# [target.'cfg(unix)'.dependencies] +# signal-hook = "0.3.7" diff --git a/README.md b/README.md index 71b3c3f..a1daa2b 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,9 @@ # makers -A [POSIX-compatible](https://pubs.opengroup.org/onlinepubs/9699919799/utilities/make.html) make implemented in Rust. +A (mostly) [POSIX-compatible](https://pubs.opengroup.org/onlinepubs/9699919799/utilities/make.html) make implemented in Rust. + +## conformance + +- internationalization (`LANG`/`LC_ALL`/`LC_CTYPE`/`LC_MESSAGES`) not implemented +- XSI conformance (SCCS integration) not implemented +- signal handling not implemented diff --git a/src/args.rs b/src/args.rs index 1fe6731..963c01e 100644 --- a/src/args.rs +++ b/src/args.rs @@ -4,13 +4,13 @@ use std::path::PathBuf; use structopt::StructOpt; -#[derive(StructOpt, Debug, PartialEq, Eq)] +#[derive(StructOpt, Debug, PartialEq, Eq, Clone)] #[structopt(author, about)] pub struct Args { /// Cause environment variables, including those with null values, to override macro /// assignments within makefiles. #[structopt(short, long)] - environment_overrides: bool, + pub environment_overrides: bool, /// Specify a different makefile (or '-' for standard input). /// @@ -20,20 +20,20 @@ pub struct Args { /// specified. The effect of specifying the same option-argument more than once is /// unspecified. #[structopt(short = "f", long = "file", visible_alias = "makefile", number_of_values = 1, parse(from_os_str))] - makefile: Vec, + pub makefile: Vec, /// Ignore error codes returned by invoked commands. /// /// This mode is the same as if the special target .IGNORE were specified without /// prerequisites. #[structopt(short, long)] - ignore_errors: bool, + pub ignore_errors: bool, /// Continue to update other targets that do not depend on the current target if a /// non-ignored error occurs while executing the commands to bring a target /// up-to-date. #[structopt(short, long)] - keep_going: bool, + pub keep_going: bool, /// Write commands that would be executed on standard output, but do not execute them /// (but execute lines starting with '+'). @@ -42,14 +42,14 @@ pub struct Args { /// lines with an at-sign ( '@' ) character prefix shall be written to standard /// output. #[structopt(short = "n", long, visible_alias = "just-print", visible_alias = "recon")] - dry_run: bool, + pub dry_run: bool, /// Write to standard output the complete set of macro definitions and target /// descriptions. /// /// The output format is unspecified. #[structopt(short, long, visible_alias = "print-data-base")] - print_everything: bool, + pub print_everything: bool, /// Return a zero exit value if the target file is up-to-date; otherwise, return an /// exit value of 1. @@ -58,11 +58,11 @@ pub struct Args { /// command line (associated with the targets) with a ( '+' ) prefix /// shall be executed. #[structopt(short, long)] - question: bool, + pub question: bool, /// Clear the suffix list and do not use the built-in rules. #[structopt(short = "r", long)] - no_builtin_rules: bool, + pub no_builtin_rules: bool, /// Terminate make if an error occurs while executing the commands to bring a target /// up-to-date (default behavior, required by POSIX to be also a flag for some @@ -70,7 +70,7 @@ pub struct Args { /// /// This shall be the default and the opposite of -k. #[structopt(short = "S", long, visible_alias = "stop", hidden = true, overrides_with="keep-going")] - no_keep_going: bool, + pub no_keep_going: bool, /// Do not write makefile command lines or touch messages to standard output before /// executing. @@ -78,7 +78,7 @@ pub struct Args { /// This mode shall be the same as if the special target .SILENT were specified /// without prerequisites. #[structopt(short, long, visible_alias = "quiet")] - silent: bool, + pub silent: bool, /// Update the modification time of each target as though a touch target had been /// executed. @@ -89,14 +89,14 @@ pub struct Args { /// the makefile command lines associated with each target are not executed. However, /// a command line with a ( '+' ) prefix shall be executed. #[structopt(short, long)] - touch: bool, + pub touch: bool, /// Target names or macro definitions. /// /// If no target is specified, while make is processing the makefiles, the first /// target that make encounters that is not a special target or an inference rule /// shall be used. - targets_or_macros: Vec, + pub targets_or_macros: Vec, } impl Args { diff --git a/src/main.rs b/src/main.rs index 2316546..4048886 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,9 +1,40 @@ +use std::fs::metadata; +use std::io::stdin; +use std::path::PathBuf; mod args; +mod makefile; use args::Args; +use makefile::Makefile; fn main() { - let args = Args::from_env_and_args(); - dbg!(args); + let mut args = Args::from_env_and_args(); + // If no makefile is specified, try some options. + if args.makefile.is_empty() { + if metadata("./makefile").is_ok() { + args.makefile = vec!["./makefile".into()]; + } else if metadata("./Makefile").is_ok() { + args.makefile = vec!["./Makefile".into()]; + } else { + // TODO handle error gracefully + panic!("no makefile found"); + } + } + // Read in the makefile(s) specified. + // TODO dump command-line args into MAKEFLAGS + // TODO dump command-line macros into environment + // TODO add SHELL macro + let mut makefile = Makefile::new(args.clone()); + if !args.no_builtin_rules { + makefile.add_builtins(); + } + makefile.add_env(); + for filename in &args.makefile { + if filename == &PathBuf::from("-") { + makefile.and_read(stdin().lock()); + } else { + makefile.and_read_file(filename); + }; + } } diff --git a/src/makefile/mod.rs b/src/makefile/mod.rs new file mode 100644 index 0000000..75873d2 --- /dev/null +++ b/src/makefile/mod.rs @@ -0,0 +1,444 @@ +use std::collections::HashMap; +use std::env; +use std::fs::{File, metadata}; +use std::io::{BufRead, BufReader}; +use std::path::Path; +use std::time::SystemTime; + +use lazy_static::lazy_static; +use regex::Regex; + +use crate::args::Args; + +mod token; + +use token::{tokenize, Token, TokenString}; + +pub enum RuleType { + Inference, + Target, +} + +#[derive(PartialEq, Eq, Clone)] +pub struct Rule { + name: String, + prerequisites: Vec, + commands: Vec, +} + +impl Rule { + pub fn r#type(&self) -> RuleType { + if self.name.contains(".") && !self.name.contains("/") { + RuleType::Inference + } else { + RuleType::Target + } + } + + fn execute_commands(&self, file: &Makefile, target: &Target) { + for command in &self.commands { + command.execute(file, target); + } + } +} + +#[derive(PartialEq, Eq, Clone)] +pub struct Target { + name: String, + prerequisites: Vec, + rule: Option, + already_updated: bool, +} + +impl Target { + fn modified_time(&self) -> Option { + metadata(&self.name) + .and_then(|metadata| metadata.modified()) + .ok() + } + + fn newer_than(&self, other: &Target) -> Option { + Some(match (self.modified_time(), other.modified_time()) { + (Some(self_mtime), Some(other_mtime)) => self_mtime >= other_mtime, + // per POSIX: "If the target does not exist after the target has been + // successfully made up-to-date, the target shall be treated as being + // newer than any target for which it is a prerequisite." + (None, _) if self.already_updated && other.prerequisites.contains(&self.name) => true, + (_, None) if other.already_updated && self.prerequisites.contains(&other.name) => false, + _ => return None, + }) + } + + fn is_up_to_date(&self, file: &mut Makefile) -> bool { + if self.already_updated { + return true; + } + let exists = metadata(&self.name).is_ok(); + if exists && self.rule.is_none() { + return true; + } + let newer_than_all_dependencies = self.prerequisites + .iter() + .all(|t| self.newer_than(&file.get_target(t)).unwrap_or(false)); + if exists && newer_than_all_dependencies { + return true; + } + false + } + + fn update(&mut self, file: &mut Makefile) { + for prereq in &self.prerequisites { + file.update_target(prereq); + } + if !self.is_up_to_date(file) { + match &self.rule { + Some(rule) => rule.execute_commands(file, self), + None => panic!("target doesn't exist & no rule to make it"), // TODO handle this error well + } + } + self.already_updated = true; + } +} + +#[derive(PartialEq, Eq, Clone)] +pub struct CommandLine { + /// If the command prefix contains a , or the -i option is present, or + /// the special target .IGNORE has either the current target as a prerequisite or has + /// no prerequisites, any error found while executing the command shall be ignored. + ignore_errors: bool, + /// If the command prefix contains an at-sign and the make utility command line -n + /// option is not specified, or the -s option is present, or the special target + /// .SILENT has either the current target as a prerequisite or has no prerequisites, + /// the command shall not be written to standard output before it is executed. + silent: bool, + /// If the command prefix contains a , this indicates a makefile command + /// line that shall be executed even if -n, -q, or -t is specified. + always_execute: bool, + execution_line: TokenString, +} + +impl CommandLine { + fn from(mut line: TokenString) -> Self { + let mut ignore_errors = false; + let mut silent = false; + let mut always_execute = false; + + if let Token::Text(text) = line.first_token_mut() { + let mut text_chars = text.chars().peekable(); + loop { + match text_chars.peek() { + Some('-') | Some('@') | Some('+') => match text_chars.next() { + Some('-') => ignore_errors = true, + Some('@') => silent = true, + Some('+') => always_execute = true, + _ => unreachable!() + }, + _ => break, + } + } + *text = text_chars.collect(); + } + + CommandLine { + ignore_errors, + silent, + always_execute, + execution_line: line, + } + } + + fn execute(&self, file: &Makefile, target: &Target) { + let avoid_execution = file.args.dry_run || file.args.question || file.args.touch; + if avoid_execution && !self.always_execute { + return; + } + + let execution_line = file.expand_macros(&self.execution_line); + + let self_silent = self.silent && !file.args.dry_run; + let special_target_silent = file.rules.get(".SILENT") + .map_or(false, |silent_target| { + silent_target.prerequisites.is_empty() || silent_target.prerequisites.contains(&target.name) + }); + let silent = self_silent || file.args.silent || special_target_silent; + if !silent { + println!("{}", execution_line); + } + + let special_target_ignore = file.rules.get(".IGNORE") + .map_or(false, |ignore_target| { + ignore_target.prerequisites.is_empty() || ignore_target.prerequisites.contains(&target.name) + }); + let ignore_error = self.ignore_errors || file.args.ignore_errors || special_target_ignore; + + // TODO don't fuck this up + let execution_line = ::std::ffi::CString::new(execution_line.as_bytes()) + .expect("execution line shouldn't have a null in the middle"); + // TODO pass shell "-e" if errors are not ignored + let return_value = unsafe { libc::system(execution_line.as_ptr()) }; + if return_value != 0 { + // apparently there was an error. do we care? + if !ignore_error { + // TODO handle this error gracefully + panic!("error from command execution!"); + } + } + } +} + +enum MacroSource { + File, + CommandLineOrMAKEFLAGS, + Environment, + Builtin, +} + +pub struct Makefile { + rules: HashMap, + macros: HashMap, + targets: HashMap, + args: Args, +} + +impl Makefile { + pub fn new(args: Args) -> Makefile { + Makefile { + rules: HashMap::new(), + macros: HashMap::new(), + targets: HashMap::new(), + args, + } + } + + pub fn add_builtins(&mut self) -> &mut Makefile { + self.rules.extend(BUILTIN_RULES.iter().map(|(name, rule)| (name.to_string(), rule.clone()))); + self + } + + pub fn add_env(&mut self) -> &mut Makefile { + self.macros.extend(env::vars() + .filter_map(|(name, value)| { + if name == "MAKEFLAGS" || name == "SHELL" { + None + } else { + Some((name, (MacroSource::Environment, TokenString::from(vec![Token::Text(value)])))) + } + }) + ); + self + } + + pub fn and_read_file(&mut self, path: impl AsRef) -> &mut Makefile { + let file = File::open(path); + // TODO handle errors + let file = file.expect("couldn't open makefile!"); + let file_reader = BufReader::new(file); + self.and_read(file_reader) + } + + pub fn and_read(&mut self, source: impl BufRead) -> &mut Makefile { + let mut lines_iter = source.lines().peekable(); + while lines_iter.peek().is_some() { + let line = match lines_iter.next() { + Some(x) => x, + // fancy Rust trick: break-with-an-argument to return a value from a + // `loop` expression + None => break, + }; + // TODO handle I/O errors at all + let mut line = line.expect("failed to read line of makefile!"); + + // handle escaped newlines (TODO exception for command lines) + while line.ends_with(r"\") { + let next_line = match lines_iter.next() { + Some(x) => x, + None => Ok("".into()), + }; + let next_line = next_line.expect("failed to read line of makefile!"); + let next_line = next_line.trim_start(); + line.push(' '); + line.push_str(next_line); + } + // handle comments + lazy_static! { + static ref COMMENT: Regex = Regex::new("#.*$").unwrap(); + } + let line = COMMENT.replace(&line, "").into_owned(); + + // handle include lines + if let Some(line) = line.strip_prefix("include ") { + // remove extra leading space + let line = line.trim_start(); + let line = self.expand_macros(&tokenize(line)); + let fields = line.split_whitespace(); + // POSIX says we only have to handle a single filename, but GNU make + // handles arbitrarily many filenames, and it's not like that's more work + // TODO have some way of linting for non-portable constructs + for field in fields { + self.and_read_file(field); + } + } else if line.trim().is_empty() { + // handle blank lines + continue; + } else { + // unfortunately, rules vs macros can't be determined until after + // macro tokenizing. so that's suboptimal. + // TODO errors + let line_tokens: TokenString = line.parse().unwrap(); + + enum LineType { + Rule, + Macro, + Unknown, + } + + fn get_line_type(line_tokens: &TokenString) -> LineType { + for token in line_tokens.tokens() { + if let Token::Text(text) = token { + let colon_idx = text.find(":"); + let equals_idx = text.find("="); + match (colon_idx, equals_idx) { + (Some(_), None) => { + return LineType::Rule; + } + (Some(c), Some(e)) if c < e => { + return LineType::Rule; + } + (None, Some(_)) => { + return LineType::Macro; + } + (Some(c), Some(e)) if e < c => { + return LineType::Macro; + } + _ => {} + } + } + } + LineType::Unknown + } + + let line_type = get_line_type(&line_tokens); + + match line_type { + LineType::Rule => { + let (targets, not_targets) = line_tokens.split_once(':').unwrap(); + let targets = self.expand_macros(&targets); + let targets = targets.split_whitespace().map(|x| x.into()).collect::>(); + let (prerequisites, mut commands) = match not_targets.split_once(';') { + Some((prerequisites, commands)) => (prerequisites, vec![commands]), + None => (not_targets, vec![]), + }; + let prerequisites = self.expand_macros(&prerequisites); + let prerequisites = prerequisites.split_whitespace().map(|x| x.into()).collect::>(); + + while lines_iter.peek().and_then(|x| x.as_ref().ok()).map_or(false, |line| line.starts_with('\t')) { + let line = lines_iter.next().unwrap().unwrap(); + let line = line.strip_prefix("\t").unwrap(); + commands.push(line.parse().unwrap()); + } + + let commands = commands.into_iter() + .map(CommandLine::from) + .collect::>(); + + for target in targets { + match self.rules.get_mut(&target) { + Some(old_rule) if commands.is_empty() => { + old_rule.prerequisites.extend(prerequisites.clone()); + } + _ => { + self.rules.insert(target.clone(), Rule { + name: target, + prerequisites: prerequisites.clone(), + commands: commands.clone(), + }); + } + } + } + }, + LineType::Macro => { + let (name, value) = line_tokens.split_once('=').unwrap(); + let name = self.expand_macros(&name); + match self.macros.get(&name) { + // We always let command line or MAKEFLAGS macros override macros from the file. + Some((MacroSource::CommandLineOrMAKEFLAGS, _)) => continue, + // We let environment variables override macros from the file only if the command-line argument to do that was given + Some((MacroSource::Environment, _)) if self.args.environment_overrides => continue, + _ => {} + } + self.macros.insert(name, (MacroSource::File, value)); + } + LineType::Unknown => { + panic!("Unknown line {:?}", line_tokens); + } + } + } + } + self + } + + fn get_target(&mut self, name: impl Into) -> &mut Target { + let name = name.into(); + { + let rules_get_name = self.rules.get(&name); + let make_target = || { + if let Some(target_rule) = rules_get_name { + return Target { + name: name.clone(), + prerequisites: target_rule.prerequisites.clone(), + already_updated: false, + rule: Some(target_rule.clone()) + }; + } + panic!("uhhhhh i don't even know anymore bro"); + }; + self.targets + .entry(name.clone()) + .or_insert_with(make_target); + } + self.targets.get_mut(&name).unwrap() + } + + fn update_target(&mut self, name: impl Into) { + // This is the dumbest fucking thing I've ever had to do. + // We can't leave it in the map, because then we have overlapping mutable borrows of self, + // so we have to remove it from the map, do the work, and then re-insert it into the map. + // Fuck this so much. + // Why the goddamn hell do I even write Rust. + let name = name.into(); + { + let _ = self.get_target(name.clone()); + } + let mut target = self.targets.remove(&name).unwrap(); + target.update(self); + self.targets.insert(name.clone(), target); + } + + fn expand_macros(&self, text: &TokenString) -> String { + let mut result = String::new(); + for token in text.tokens() { + match token { + Token::Text(t) => result.push_str(t), + Token::MacroExpansion { name, replacement } => { + let (_, macro_value) = &self.macros[name]; + let macro_value = self.expand_macros(macro_value); + let macro_value = match replacement { + Some((subst1, subst2)) => { + let subst1 = self.expand_macros(subst1); + let subst1_suffix = regex::escape(&subst1); + let subst1_suffix = Regex::new(&format!(r"{}\b", subst1_suffix)).unwrap(); + let subst2 = self.expand_macros(subst2); + subst1_suffix.replace_all(¯o_value, subst2).to_string() + }, + None => macro_value, + }; + result.push_str(¯o_value); + } + } + } + return result; + } +} + +const BUILTIN_RULES: &'static [(&'static str, Rule)] = &[]; +const BUILTIN_SUFFIX_LIST: &'static [&'static str] = &[]; diff --git a/src/makefile/token.rs b/src/makefile/token.rs new file mode 100644 index 0000000..3f69b50 --- /dev/null +++ b/src/makefile/token.rs @@ -0,0 +1,236 @@ +use std::str::FromStr; + +use nom::{ + Finish, IResult, + branch::alt, + bytes::complete::{tag, take_till1, take_while1}, + character::complete::anychar, + combinator::{all_consuming, map, opt, verify}, + multi::many1, + sequence::{delimited, pair, preceded, separated_pair}, +}; + +#[derive(PartialEq, Eq, Clone, Debug)] +pub struct TokenString(Vec); + +impl TokenString { + pub fn tokens(&self) -> impl Iterator { + self.0.iter() + } + + pub fn first_token_mut(&mut self) -> &mut Token { + &mut self.0[0] + } + + pub fn split_once(&self, delimiter: char) -> Option<(TokenString, TokenString)> { + let mut result0 = vec![]; + let mut iter = self.0.iter(); + while let Some(t) = iter.next() { + match t { + Token::Text(text) if text.contains(delimiter) => { + let split_text = text.splitn(2, delimiter); + let pieces = split_text.collect::>(); + assert_eq!(pieces.len(), 2, "wrong number of pieces!"); + result0.push(Token::Text(pieces[0].into())); + let mut result1 = vec![Token::Text(pieces[1].into())]; + result1.extend(iter.cloned()); + return Some((TokenString(result0), TokenString(result1))); + } + _ => result0.push(t.clone()), + } + } + None + } +} + +#[derive(PartialEq, Eq, Clone, Debug)] +pub enum Token { + Text(String), + MacroExpansion { + name: String, + replacement: Option<(TokenString, TokenString)>, + }, +} + +fn macro_name(input: &str) -> IResult<&str, &str> { + // POSIX says "periods, underscores, digits, and alphabetics from the portable character set" + take_while1(|c: char| { + c == '.' || c == '_' || c.is_alphanumeric() + })(input) +} + +fn macro_expansion_body<'a>(end: char) -> impl FnMut(&'a str) -> IResult<&'a str, Token> { + let subst = preceded(tag(":"), separated_pair(tokens_but_not('='), tag("="), tokens_but_not(end))); + map( + pair(macro_name, opt(subst)), + |(name, replacement)| Token::MacroExpansion { name: name.into(), replacement }, + ) +} + +fn parens_macro_expansion(input: &str) -> IResult<&str, Token> { + delimited(tag("$("), macro_expansion_body(')'), tag(")"))(input) +} + +fn braces_macro_expansion(input: &str) -> IResult<&str, Token> { + delimited(tag("${"), macro_expansion_body('}'), tag("}"))(input) +} + +fn tiny_macro_expansion(input: &str) -> IResult<&str, Token> { + let raw = preceded(tag("$"), verify(anychar, |&c| c != '(' && c != '{')); + map(raw, |c| { + if c == '$' { + Token::Text("$".into()) + } else { + Token::MacroExpansion { + name: c.to_string(), + replacement: None, + } + } + })(input) +} + +fn macro_expansion(input: &str) -> IResult<&str, Token> { + alt((tiny_macro_expansion, parens_macro_expansion, braces_macro_expansion))(input) +} + +fn text(input: &str) -> IResult<&str, Token> { + map(take_till1(|c| c == '$'), |x: &str| Token::Text(x.into()))(input) +} + +fn text_but_not<'a>(end: char) -> impl FnMut(&'a str) -> IResult<&'a str, Token> { + map(take_till1(move |c| c == '$' || c == end), |x: &str| Token::Text(x.into())) +} + +fn single_token(input: &str) -> IResult<&str, Token> { + alt((text, macro_expansion))(input) +} + +fn single_token_but_not<'a>(end: char) -> impl FnMut(&'a str) -> IResult<&'a str, Token> { + alt((text_but_not(end), macro_expansion)) +} + +fn empty_tokens(input: &str) -> IResult<&str, TokenString> { + map(tag(""), |_| TokenString(vec![Token::Text(String::new())]))(input) +} + +fn tokens(input: &str) -> IResult<&str, TokenString> { + alt((map(many1(single_token), TokenString), empty_tokens))(input) +} + +fn tokens_but_not<'a>(end: char) -> impl FnMut(&'a str) -> IResult<&'a str, TokenString> { + alt((map(many1(single_token_but_not(end)), TokenString), empty_tokens)) +} + +fn full_text_tokens(input: &str) -> IResult<&str, TokenString> { + all_consuming(tokens)(input) +} + +pub fn tokenize(input: &str) -> TokenString { + // TODO handle errors gracefully + let (_, result) = full_text_tokens(input).expect("couldn't parse"); + result +} + +impl FromStr for TokenString { + // TODO figure out how to get nom errors working (Error<&str> doesn't work because lifetimes) + type Err = (); + + fn from_str(s: &str) -> Result { + full_text_tokens(s).finish() + .map(|(_, x)| x) + .map_err(|_| ()) + } +} + +#[cfg(test)] +mod test { + use super::{Token, TokenString, tokenize}; + + impl From> for TokenString { + fn from(x: Vec) -> Self { + TokenString(x) + } + } + + fn token_text(text: impl Into) -> Token { + Token::Text(text.into()) + } + + fn token_macro_expansion(name: impl Into) -> Token { + Token::MacroExpansion { name: name.into(), replacement: None } + } + + fn token_macro_expansion_replacement(name: impl Into, + subst1: impl Into, + subst2: impl Into) -> Token { + Token::MacroExpansion { name: name.into(), replacement: Some((subst1.into(), subst2.into())) } + } + + #[test] + fn no_macros() { + let text = "This is an example sentence! There aren't macros in it at all!"; + let tokens = tokenize(text); + assert_eq!(tokens, TokenString(vec![token_text(text)])); + } + + #[test] + fn no_replacement() { + let text = "This is a $Q sentence! There are $(BORING) macros in it at ${YEET}!"; + let tokens = tokenize(text); + assert_eq!(tokens, TokenString(vec![ + token_text("This is a "), + token_macro_expansion("Q"), + token_text(" sentence! There are "), + token_macro_expansion("BORING"), + token_text(" macros in it at "), + token_macro_expansion("YEET"), + token_text("!"), + ])); + } + + #[test] + fn escaped() { + let text = "This costs $$2 to run, which isn't ideal"; + let tokens = tokenize(text); + assert_eq!(tokens, TokenString(vec![ + token_text("This costs "), + token_text("$"), + token_text("2 to run, which isn't ideal"), + ])); + } + + #[test] + fn replacement() { + let text = "Can I get a $(DATA:.c=.oof) in this ${SWAG:.yolo=}"; + let tokens = tokenize(text); + assert_eq!(tokens, TokenString(vec![ + token_text("Can I get a "), + token_macro_expansion_replacement("DATA", vec![token_text(".c")], vec![token_text(".oof")]), + token_text(" in this "), + token_macro_expansion_replacement("SWAG", vec![token_text(".yolo")], vec![token_text("")]), + ])); + } + + #[test] + fn hell() { + let text = "$(OOF:${ouch:hi=hey} there=$(owie:$(my)=${bones})), bro."; + let tokens = tokenize(text); + assert_eq!(tokens, TokenString(vec![ + token_macro_expansion_replacement( + "OOF", + vec![ + token_macro_expansion_replacement("ouch", vec![token_text("hi")], vec![token_text("hey")]), + token_text(" there"), + ], + vec![ + token_macro_expansion_replacement( + "owie", + vec![token_macro_expansion("my")], + vec![token_macro_expansion("bones")], + ), + ], + ), + token_text(", bro."), + ])); + } +} -- cgit v1.2.3