From 50b6d0f63329900ed9e6730096a293aebd44e452 Mon Sep 17 00:00:00 2001 From: Melody Horn Date: Fri, 2 Apr 2021 22:30:15 -0600 Subject: refactor makefile reading into a separate module --- src/makefile/input.rs | 648 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 648 insertions(+) create mode 100644 src/makefile/input.rs (limited to 'src/makefile/input.rs') diff --git a/src/makefile/input.rs b/src/makefile/input.rs new file mode 100644 index 0000000..fda81d0 --- /dev/null +++ b/src/makefile/input.rs @@ -0,0 +1,648 @@ +use std::cell::{Cell, RefCell}; +use std::collections::HashMap; +use std::fs::File; +use std::io::{BufRead, BufReader}; +use std::iter::Peekable; +use std::path::Path; +use std::rc::Rc; + +use eyre::{bail, eyre, Context, Result}; +use lazy_static::lazy_static; +use regex::Regex; + +use crate::args::Args; + +use super::command_line::CommandLine; +#[cfg(feature = "full")] +use super::conditional::{Line as ConditionalLine, State as ConditionalState}; +use super::inference_rules::InferenceRule; +use super::r#macro::{Set as MacroSet, Source as MacroSource}; +use super::target::Target; +use super::token::{tokenize, Token, TokenString}; + +enum LineType { + Rule, + Macro, + Unknown, +} + +impl LineType { + fn of(line_tokens: &TokenString) -> Self { + #[cfg(feature = "full")] + if line_tokens.starts_with("define ") { + return Self::Macro; + } + for token in line_tokens.tokens() { + if let Token::Text(text) = token { + let colon_idx = text.find(':'); + #[cfg(not(feature = "full"))] + let equals_idx = text.find('='); + #[cfg(feature = "full")] + let equals_idx = ["=", ":=", "::=", "?=", "+="] + .iter() + .filter_map(|p| text.find(p)) + .min(); + match (colon_idx, equals_idx) { + (Some(_), None) => { + return Self::Rule; + } + (Some(c), Some(e)) if c < e => { + return Self::Rule; + } + (None, Some(_)) => { + return Self::Macro; + } + (Some(c), Some(e)) if e <= c => { + return Self::Macro; + } + _ => {} + } + } + } + Self::Unknown + } +} + +fn inference_match<'a>( + targets: &[&'a str], + prerequisites: &[String], +) -> Option> { + lazy_static! { + static ref INFERENCE_RULE: Regex = + Regex::new(r"^(?P(\.[^/.]+)?)(?P\.[^/.]+)$").unwrap(); + static ref SPECIAL_TARGET: Regex = Regex::new(r"^\.[A-Z]+$").unwrap(); + } + + let inference_match = INFERENCE_RULE.captures(targets[0]); + let special_target_match = SPECIAL_TARGET.captures(targets[0]); + + let inference_rule = targets.len() == 1 + && prerequisites.is_empty() + && inference_match.is_some() + && special_target_match.is_none(); + if inference_rule { + inference_match + } else { + None + } +} + +pub struct MakefileReader<'a> { + inference_rules: Vec, + macros: MacroSet<'static, 'static>, + targets: RefCell>>>, + pub first_non_special_target: Option, + args: &'a Args, + // TODO borrow warnings from Python version +} + +impl<'a> MakefileReader<'a> { + pub fn new(args: &'a Args) -> Self { + let mut inference_rules = vec![]; + let mut macros = MacroSet::new(); + let mut targets = HashMap::new(); + let first_non_special_target = None; + + if !args.no_builtin_rules { + inference_rules.extend(builtin_inference_rules()); + macros.add_builtins(); + targets.extend( + builtin_targets() + .into_iter() + .map(|t| (t.name.clone(), Rc::new(RefCell::new(t)))), + ); + } + + macros.add_env(); + + for r#macro in args.macros() { + if let [name, value] = *r#macro.splitn(2, '=').collect::>() { + macros.set( + name.into(), + MacroSource::CommandLineOrMakeflags, + TokenString::text(value), + ); + } + } + + MakefileReader { + inference_rules, + macros, + targets: RefCell::new(targets), + first_non_special_target, + args, + } + } + + pub fn and_read_file(&mut self, path: impl AsRef) -> Result<()> { + let file = File::open(path); + // TODO handle errors + let file = file.context("couldn't open makefile!")?; + let file_reader = BufReader::new(file); + self.and_read(file_reader) + } + + pub fn and_read(&mut self, source: impl BufRead) -> Result<()> { + let mut lines_iter = source + .lines() + .enumerate() + .map(|(number, line)| (number.saturating_add(1), line)) + .map(|(line, x)| { + ( + line, + x.with_context(|| format!("failed to read line {} of makefile", line)), + ) + }) + .peekable(); + #[cfg(feature = "full")] + let mut conditional_stack: Vec = vec![]; + while let Some((line_number, line)) = lines_iter.next() { + let mut line = line?; + + // handle escaped newlines + while line.ends_with('\\') { + line.pop(); + line.push(' '); + if let Some((_, x)) = lines_iter.next() { + line.push_str(x?.trim_start()) + } + } + + // handle comments + lazy_static! { + static ref COMMENT: Regex = Regex::new("#.*$").unwrap(); + } + let line = COMMENT.replace(&line, "").into_owned(); + + #[cfg(feature = "full")] + if let Some(line) = ConditionalLine::from(&line, |t| self.expand_macros(t))? { + line.action( + conditional_stack.last(), + |name| self.macros.is_defined(name), + |t| self.expand_macros(t), + )? + .apply_to(&mut conditional_stack); + continue; + } + + // skip lines if we need to + #[cfg(feature = "full")] + if conditional_stack + .last() + .map_or(false, ConditionalState::skipping) + { + continue; + } + + // handle include lines + if let Some(line) = line.strip_prefix("include ") { + // remove extra leading space + let line = line.trim_start(); + let line = self.expand_macros(&tokenize(line)?)?; + let fields = line.split_whitespace(); + // POSIX says we only have to handle a single filename, but GNU make + // handles arbitrarily many filenames, and it's not like that's more work + for field in fields { + self.and_read_file(field)?; + } + continue; + } + + if line.trim().is_empty() { + // handle blank lines + continue; + } + // unfortunately, rules vs macros can't be determined until after + // macro tokenizing. so that's suboptimal. + + // TODO errors + let line_tokens: TokenString = line + .parse() + .with_context(|| format!("failed to parse line {}", line_number))?; + + let line_type = LineType::of(&line_tokens); + + // before we actually test it, see if it's only visible after expanding macros + let (line_tokens, line_type) = if let LineType::Unknown = line_type { + let line_tokens = TokenString::text(self.expand_macros(&line_tokens)?); + let line_type = LineType::of(&line_tokens); + (line_tokens, line_type) + } else { + (line_tokens, line_type) + }; + + match line_type { + LineType::Rule => self.read_rule(&line_tokens, line_number, &mut lines_iter)?, + LineType::Macro => self.read_macro(line_tokens, line_number, &mut lines_iter)?, + LineType::Unknown => { + if !line_tokens.is_empty() { + bail!( + "error: line {}: unknown line \"{}\"", + line_number, + line_tokens + ); + } + } + } + } + + Ok(()) + } + + fn read_rule( + &mut self, + line_tokens: &TokenString, + line_number: usize, + lines_iter: &mut Peekable)>>, + ) -> Result<()> { + let (targets, not_targets) = line_tokens + .split_once(':') + .ok_or_else(|| eyre!("read_rule couldn't find a ':' on line {}", line_number))?; + let targets = self.expand_macros(&targets)?; + let targets = targets.split_whitespace().collect::>(); + let (prerequisites, mut commands) = match not_targets.split_once(';') { + Some((prerequisites, mut command)) => { + while command.ends_with("\\") { + if let Some((_, next_line)) = lines_iter.next() { + command.strip_suffix("\\"); + command.extend(tokenize(&next_line?)?); + } else { + break; + } + } + (prerequisites, vec![command]) + } + None => (not_targets, vec![]), + }; + let prerequisites = self.expand_macros(&prerequisites)?; + let prerequisites = prerequisites + .split_whitespace() + .map(|x| x.into()) + .collect::>(); + + while let Some((_, x)) = lines_iter.next_if(|(_, x)| { + x.as_ref() + .ok() + .map_or(false, |line| line.starts_with('\t') || line.is_empty()) + }) { + let mut line = x?; + if !line.is_empty() { + line.remove(0); + } + if line.is_empty() { + continue; + } + while line.ends_with('\\') { + match lines_iter.next() { + Some((_, Ok(next_line))) => { + let next_line = next_line.strip_prefix("\t").unwrap_or(&next_line); + line.push('\n'); + line.push_str(next_line); + } + _ => break, + } + } + commands.push( + line.parse() + .with_context(|| format!("failed to parse line {}", line_number))?, + ); + } + + let commands = commands + .into_iter() + .map(CommandLine::from) + .collect::>(); + + if targets.is_empty() { + return Ok(()); + } + + // we don't know yet if it's a target rule or an inference rule + let inference_match = inference_match(&targets, &prerequisites); + + if let Some(inference_match) = inference_match { + let new_rule = InferenceRule { + product: inference_match.name("s1").unwrap().as_str().to_owned(), + prereq: inference_match.name("s2").unwrap().as_str().to_owned(), + commands, + }; + + self.inference_rules.retain(|existing_rule| { + (&existing_rule.prereq, &existing_rule.product) + != (&new_rule.prereq, &new_rule.product) + }); + self.inference_rules.push(new_rule); + } else { + for target in targets { + if self.first_non_special_target.is_none() && !target.starts_with('.') { + self.first_non_special_target = Some(target.into()); + } + let mut targets = self.targets.borrow_mut(); + match targets.get_mut(target) { + Some(old_target) + if commands.is_empty() + && !(target == ".SUFIXES" && prerequisites.is_empty()) => + { + let mut old_target = old_target.borrow_mut(); + let new_prerequisites = prerequisites + .iter() + .filter(|x| !old_target.prerequisites.contains(x)) + .cloned() + .collect::>(); + old_target.prerequisites.extend(new_prerequisites); + } + _ => { + let new_target = Target { + name: target.into(), + prerequisites: prerequisites.clone(), + commands: commands.clone(), + already_updated: Cell::new(false), + }; + targets.insert(target.into(), Rc::new(RefCell::new(new_target))); + } + } + } + } + + Ok(()) + } + + fn read_macro( + &mut self, + mut line_tokens: TokenString, + line_number: usize, + lines_iter: &mut Peekable)>>, + ) -> Result<()> { + let (name, mut value) = if cfg!(feature = "full") && line_tokens.starts_with("define ") { + line_tokens.strip_prefix("define "); + if line_tokens.ends_with("=") { + line_tokens.strip_suffix("="); + line_tokens.trim_end(); + } + let mut value = TokenString::empty(); + for (_, line) in lines_iter { + let line = line?; + if line == "endef" { + break; + } + if !value.is_empty() { + value.extend(TokenString::text("\n")); + } + value.extend(line.parse()?); + } + (line_tokens, value) + } else { + line_tokens + .split_once('=') + .ok_or_else(|| eyre!("read_rule couldn't find a ':' on line {}", line_number))? + }; + let name = self.expand_macros(&name)?; + // GNUisms are annoying, but popular + let mut expand_value = false; + let mut skip_if_defined = false; + let mut append = false; + + #[cfg(feature = "full")] + let name = if let Some(real_name) = name.strip_suffix("::") { + expand_value = true; + real_name + } else if let Some(real_name) = name.strip_suffix(":") { + expand_value = true; + real_name + } else if let Some(real_name) = name.strip_suffix("?") { + skip_if_defined = true; + real_name + } else if let Some(real_name) = name.strip_suffix("+") { + append = true; + real_name + } else { + &name + }; + + let name = name.trim_end(); + value.trim_start(); + + let value = if expand_value { + TokenString::text(self.expand_macros(&value)?) + } else { + value + }; + + match self.macros.get(name) { + // We always let command line or MAKEFLAGS macros override macros from the file. + Some((MacroSource::CommandLineOrMakeflags, _)) => return Ok(()), + // We let environment variables override macros from the file only if the command-line argument to do that was given + Some((MacroSource::Environment, _)) if self.args.environment_overrides => return Ok(()), + _ if skip_if_defined => return Ok(()), + _ => {} + } + + let value = match self.macros.pop(name) { + Some((_, mut old_value)) if append => { + // TODO eagerly expand if appending to eagerly-expanded macro + old_value.extend(TokenString::text(" ")); + old_value.extend(value); + old_value + } + _ => value, + }; + self.macros.set(name.into(), MacroSource::File, value); + + Ok(()) + } + + fn expand_macros(&self, text: &TokenString) -> Result { + self.macros.expand(text) + } +} + +impl<'a> From> for super::Makefile<'a> { + fn from(reader: MakefileReader<'a>) -> Self { + Self { + inference_rules: reader.inference_rules, + macros: reader.macros, + targets: reader.targets, + first_non_special_target: reader.first_non_special_target, + args: reader.args, + } + } +} + +fn builtin_inference_rules() -> Vec { + // This is a terrible idea. + macro_rules! prepend_dot { + ($x:tt) => { + concat!(".", stringify!($x)) + }; + () => { + "" + }; + } + + macro_rules! make { + {$(.$first:tt$(.$second:tt)?: + $($cmd:literal)+)+} => { + vec![$( + InferenceRule { + product: prepend_dot!($($second)?).into(), + prereq: concat!(".", stringify!($first)).into(), + commands: vec![$(CommandLine::from($cmd.parse().unwrap())),+], + } + ),+] + }; + } + + make! { + .c: + "$(CC) $(CFLAGS) $(LDFLAGS) -o $@ $<" + .f: + "$(FC) $(FFLAGS) $(LDFLAGS) -o $@ $<" + .sh: + "cp $< $@" + "chmod a+x $@" + + .c.o: + "$(CC) $(CFLAGS) -c $<" + .f.o: + "$(FC) $(FFLAGS) -c $<" + .y.o: + "$(YACC) $(YFLAGS) $<" + "$(CC) $(CFLAGS) -c y.tab.c" + "rm -f y.tab.c" + "mv y.tab.o $@" + .l.o: + "$(LEX) $(LFLAGS) $<" + "$(CC) $(CFLAGS) -c lex.yy.c" + "rm -f lex.yy.c" + "mv lex.yy.o $@" + .y.c: + "$(YACC) $(YFLAGS) $<" + "mv y.tab.c $@" + .l.c: + "$(LEX) $(LFLAGS) $<" + "mv lex.yy.c $@" + .c.a: + "$(CC) -c $(CFLAGS) $<" + "$(AR) $(ARFLAGS) $@ $*.o" + "rm -f $*.o" + .f.a: + "$(FC) -c $(FFLAGS) $<" + "$(AR) $(ARFLAGS) $@ $*.o" + "rm -f $*.o" + } +} +fn builtin_targets() -> Vec { + // even i'm not going to do that just for this + vec![Target { + name: ".SUFFIXES".into(), + prerequisites: vec![".o", ".c", ".y", ".l", ".a", ".sh", ".f"] + .into_iter() + .map(String::from) + .collect(), + commands: vec![], + already_updated: Cell::new(false), + }] +} + +#[cfg(test)] +mod test { + use std::io::Cursor; + + use super::*; + + type R = Result<()>; + + fn empty_makefile(args: &Args) -> MakefileReader { + MakefileReader { + inference_rules: vec![], + macros: MacroSet::new(), + targets: RefCell::new(HashMap::new()), + first_non_special_target: None, + args, + } + } + + #[cfg(feature = "full")] + #[test] + fn basic_conditionals() -> R { + let file = " +ifeq (1,1) +worked = yes +else ifeq (2,2) +worked = no +else +worked = perhaps +endif + "; + let args = Args::empty(); + let mut makefile = empty_makefile(&args); + makefile.and_read(Cursor::new(file))?; + assert_eq!( + makefile.expand_macros(&TokenString::r#macro("worked"))?, + "yes" + ); + Ok(()) + } + + #[cfg(feature = "full")] + #[test] + fn define_syntax() -> R { + let file = " +define foo = +bar +baz +endef + "; + let args = Args::empty(); + let mut makefile = empty_makefile(&args); + makefile.and_read(Cursor::new(file))?; + assert_eq!( + makefile.expand_macros(&TokenString::r#macro("foo"))?, + "bar\nbaz" + ); + Ok(()) + } + + #[test] + #[ignore = "I still haven't implemented `eval` or %-based macro substitution."] + fn eval() -> R { + // This, for the record, is a terrible misfeature. + // If you need this, you probably shouldn't be using Make. + // But a lot of people are using this and still use Make anyway, so here we go, + // I guess. + + let file = " +PROGRAMS = server client + +server_OBJS = server.o server_priv.o server_access.o +server_LIBS = priv protocol + +client_OBJS = client.o client_api.o client_mem.o +client_LIBS = protocol + +# Everything after this is generic + +.PHONY: all +all: $(PROGRAMS) + +define PROGRAM_template = + $(1): $$($(1)_OBJS) $$($(1)_LIBS:%=-l%) + ALL_OBJS += $$($(1)_OBJS) +endef + +$(foreach prog,$(PROGRAMS),$(eval $(call PROGRAM_template,$(prog)))) + +$(PROGRAMS): + $(LINK.o) $^ $(LDLIBS) -o $@ + +clean: + rm -f $(ALL_OBJS) $(PROGRAMS) + "; + + let args = Args::empty(); + let mut makefile = empty_makefile(&args); + makefile.and_read(Cursor::new(file))?; + assert!(makefile.targets.borrow().contains_key("server")); + Ok(()) + } +} -- cgit v1.2.3