diff options
Diffstat (limited to 'bird-machine-macros/src/dfa.rs')
-rw-r--r-- | bird-machine-macros/src/dfa.rs | 156 |
1 files changed, 156 insertions, 0 deletions
diff --git a/bird-machine-macros/src/dfa.rs b/bird-machine-macros/src/dfa.rs new file mode 100644 index 0000000..0aec059 --- /dev/null +++ b/bird-machine-macros/src/dfa.rs @@ -0,0 +1,156 @@ +use std::collections::{BTreeMap, BTreeSet, VecDeque}; + +use proc_macro2::TokenStream; +use quote::{quote, ToTokens}; + +use super::nfa::{Range, NFA}; + +/// A nondeterministic finite automaton. +/// By convention, always starts in state 0. +pub struct DFA { + transition_table: BTreeMap<usize, BTreeMap<Range, usize>>, + accept_states: BTreeSet<usize>, +} + +struct LabeledDFA<Label: Ord> { + start_state: Label, + transition_table: BTreeMap<Label, BTreeMap<Range, Label>>, + accept_states: BTreeSet<Label>, +} + +impl From<NFA> for LabeledDFA<BTreeSet<usize>> { + fn from(nfa: NFA) -> Self { + type StateSet = BTreeSet<usize>; + + let mut transition_table: BTreeMap<StateSet, BTreeMap<Range, StateSet>> = BTreeMap::new(); + + let mut queue: VecDeque<StateSet> = VecDeque::new(); + + let start_state = nfa.epsilon_closure(0); + queue.push_back(start_state.clone()); + + while let Some(state_set) = queue.pop_front() { + let state_set = nfa.epsilon_closure_all(state_set); + + let mut next_table: BTreeMap<Range, StateSet> = BTreeMap::new(); + for &state in &state_set { + let table_entry = nfa.transition_table.get(&state); + let non_epsilon_table_entries = table_entry + .iter() + .flat_map(|x| x.iter()) + .filter_map(|(c, s)| c.as_ref().map(|c| (c.clone(), s))); + for (c, next_states) in non_epsilon_table_entries { + // TODO check for non-disjoint ranges + next_table + .entry(c) + .or_default() + .extend(nfa.epsilon_closure_all(next_states.iter().copied())) + } + } + + for states in next_table.values() { + if !transition_table.contains_key(states) { + queue.push_back(states.clone()); + } + } + transition_table.insert(state_set, next_table); + } + + let accept_states = transition_table + .keys() + .chain(transition_table.values().flat_map(|v| v.values())) + .filter(|s| s.iter().any(|s| nfa.accept_states.contains(s))) + .cloned() + .collect(); + + dbg!(&accept_states); + + Self { + start_state, + transition_table, + accept_states, + } + } +} + +impl<Label: Ord> From<LabeledDFA<Label>> for DFA { + fn from(dfa: LabeledDFA<Label>) -> Self { + let mut state_names = BTreeMap::new(); + state_names.insert(&dfa.start_state, 0usize); + let mut next_name = 1usize; + + for reachable_state in dfa.transition_table.values().flat_map(|x| x.values()) { + state_names.entry(reachable_state).or_insert_with(|| { + let result = next_name; + next_name = next_name.saturating_add(1); + result + }); + } + + let transition_table = dfa + .transition_table + .iter() + .map(|(state, table)| { + ( + state_names[&state], + table + .iter() + .map(|(c, s)| (c.clone(), state_names[&s])) + .collect(), + ) + }) + .collect(); + let accept_states = state_names + .iter() + .filter(|(s, _)| dfa.accept_states.contains(s)) + .map(|(_, n)| n) + .copied() + .collect(); + + Self { + transition_table, + accept_states, + } + } +} + +impl From<NFA> for DFA { + fn from(a: NFA) -> Self { + let intermediate = LabeledDFA::from(a); + Self::from(intermediate) + } +} + +impl ToTokens for Range { + fn to_tokens(&self, tokens: &mut TokenStream) { + let start = self.0.start(); + let end = self.0.end(); + let result = quote!(#start..=#end); + result.to_tokens(tokens); + } +} + +impl ToTokens for DFA { + fn to_tokens(&self, tokens: &mut TokenStream) { + let transition = self + .transition_table + .iter() + .flat_map(|(current_state, table)| { + table + .iter() + .map(move |(ch, next_state)| quote!((#current_state, #ch) => #next_state,)) + }); + let accepts = self.accept_states.iter(); + let result = quote! { + let mut state = 0usize; + for c in input.chars() { + state = match (state, c) { + #(#transition)* + _ => return false, + }; + } + matches!(state, #(#accepts)|*) + }; + result.to_tokens(tokens); + } +} |