aboutsummaryrefslogtreecommitdiff
path: root/bird-machine-macros/src/dfa.rs
diff options
context:
space:
mode:
Diffstat (limited to 'bird-machine-macros/src/dfa.rs')
-rw-r--r--bird-machine-macros/src/dfa.rs156
1 files changed, 156 insertions, 0 deletions
diff --git a/bird-machine-macros/src/dfa.rs b/bird-machine-macros/src/dfa.rs
new file mode 100644
index 0000000..0aec059
--- /dev/null
+++ b/bird-machine-macros/src/dfa.rs
@@ -0,0 +1,156 @@
+use std::collections::{BTreeMap, BTreeSet, VecDeque};
+
+use proc_macro2::TokenStream;
+use quote::{quote, ToTokens};
+
+use super::nfa::{Range, NFA};
+
+/// A nondeterministic finite automaton.
+/// By convention, always starts in state 0.
+pub struct DFA {
+ transition_table: BTreeMap<usize, BTreeMap<Range, usize>>,
+ accept_states: BTreeSet<usize>,
+}
+
+struct LabeledDFA<Label: Ord> {
+ start_state: Label,
+ transition_table: BTreeMap<Label, BTreeMap<Range, Label>>,
+ accept_states: BTreeSet<Label>,
+}
+
+impl From<NFA> for LabeledDFA<BTreeSet<usize>> {
+ fn from(nfa: NFA) -> Self {
+ type StateSet = BTreeSet<usize>;
+
+ let mut transition_table: BTreeMap<StateSet, BTreeMap<Range, StateSet>> = BTreeMap::new();
+
+ let mut queue: VecDeque<StateSet> = VecDeque::new();
+
+ let start_state = nfa.epsilon_closure(0);
+ queue.push_back(start_state.clone());
+
+ while let Some(state_set) = queue.pop_front() {
+ let state_set = nfa.epsilon_closure_all(state_set);
+
+ let mut next_table: BTreeMap<Range, StateSet> = BTreeMap::new();
+ for &state in &state_set {
+ let table_entry = nfa.transition_table.get(&state);
+ let non_epsilon_table_entries = table_entry
+ .iter()
+ .flat_map(|x| x.iter())
+ .filter_map(|(c, s)| c.as_ref().map(|c| (c.clone(), s)));
+ for (c, next_states) in non_epsilon_table_entries {
+ // TODO check for non-disjoint ranges
+ next_table
+ .entry(c)
+ .or_default()
+ .extend(nfa.epsilon_closure_all(next_states.iter().copied()))
+ }
+ }
+
+ for states in next_table.values() {
+ if !transition_table.contains_key(states) {
+ queue.push_back(states.clone());
+ }
+ }
+ transition_table.insert(state_set, next_table);
+ }
+
+ let accept_states = transition_table
+ .keys()
+ .chain(transition_table.values().flat_map(|v| v.values()))
+ .filter(|s| s.iter().any(|s| nfa.accept_states.contains(s)))
+ .cloned()
+ .collect();
+
+ dbg!(&accept_states);
+
+ Self {
+ start_state,
+ transition_table,
+ accept_states,
+ }
+ }
+}
+
+impl<Label: Ord> From<LabeledDFA<Label>> for DFA {
+ fn from(dfa: LabeledDFA<Label>) -> Self {
+ let mut state_names = BTreeMap::new();
+ state_names.insert(&dfa.start_state, 0usize);
+ let mut next_name = 1usize;
+
+ for reachable_state in dfa.transition_table.values().flat_map(|x| x.values()) {
+ state_names.entry(reachable_state).or_insert_with(|| {
+ let result = next_name;
+ next_name = next_name.saturating_add(1);
+ result
+ });
+ }
+
+ let transition_table = dfa
+ .transition_table
+ .iter()
+ .map(|(state, table)| {
+ (
+ state_names[&state],
+ table
+ .iter()
+ .map(|(c, s)| (c.clone(), state_names[&s]))
+ .collect(),
+ )
+ })
+ .collect();
+ let accept_states = state_names
+ .iter()
+ .filter(|(s, _)| dfa.accept_states.contains(s))
+ .map(|(_, n)| n)
+ .copied()
+ .collect();
+
+ Self {
+ transition_table,
+ accept_states,
+ }
+ }
+}
+
+impl From<NFA> for DFA {
+ fn from(a: NFA) -> Self {
+ let intermediate = LabeledDFA::from(a);
+ Self::from(intermediate)
+ }
+}
+
+impl ToTokens for Range {
+ fn to_tokens(&self, tokens: &mut TokenStream) {
+ let start = self.0.start();
+ let end = self.0.end();
+ let result = quote!(#start..=#end);
+ result.to_tokens(tokens);
+ }
+}
+
+impl ToTokens for DFA {
+ fn to_tokens(&self, tokens: &mut TokenStream) {
+ let transition = self
+ .transition_table
+ .iter()
+ .flat_map(|(current_state, table)| {
+ table
+ .iter()
+ .map(move |(ch, next_state)| quote!((#current_state, #ch) => #next_state,))
+ });
+ let accepts = self.accept_states.iter();
+ let result = quote! {
+ let mut state = 0usize;
+ for c in input.chars() {
+ state = match (state, c) {
+ #(#transition)*
+ _ => return false,
+ };
+ }
+ matches!(state, #(#accepts)|*)
+ };
+ result.to_tokens(tokens);
+ }
+}