From 24207feb7726bd2db97693eb8fdd155d33612574 Mon Sep 17 00:00:00 2001 From: Melody Horn Date: Wed, 31 Mar 2021 23:04:09 -0600 Subject: basic sketch of general vibe --- .gitignore | 3 + Cargo.toml | 11 +++ README.md | 59 ++++++++++++++ bird-machine-macros/Cargo.toml | 15 ++++ bird-machine-macros/src/lib.rs | 179 +++++++++++++++++++++++++++++++++++++++++ bird-machine-macros/src/nfa.rs | 15 ++++ src/lib.rs | 105 ++++++++++++++++++++++++ 7 files changed, 387 insertions(+) create mode 100644 .gitignore create mode 100644 Cargo.toml create mode 100644 README.md create mode 100644 bird-machine-macros/Cargo.toml create mode 100644 bird-machine-macros/src/lib.rs create mode 100644 bird-machine-macros/src/nfa.rs create mode 100644 src/lib.rs diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..6936990 --- /dev/null +++ b/.gitignore @@ -0,0 +1,3 @@ +/target +**/*.rs.bk +Cargo.lock diff --git a/Cargo.toml b/Cargo.toml new file mode 100644 index 0000000..de204ce --- /dev/null +++ b/Cargo.toml @@ -0,0 +1,11 @@ +[package] +name = "bird-machine" +version = "0.1.0" +authors = ["Melody Horn / boringcactus "] +description = "Compile your regular expressions at compile time." +keywords = ["regex", "proc_macro"] +edition = "2018" + +[dependencies] +bird-machine-macros = { path = "./bird-machine-macros", version = "0.1.0" } +regex = "1.4" diff --git a/README.md b/README.md new file mode 100644 index 0000000..91b8e71 --- /dev/null +++ b/README.md @@ -0,0 +1,59 @@ +# bird-machine + +Compile your regular expressions at compile time. + +## Example: find a date + +```rust +use bird_machine::{bird_machine, Machine}; + +#[bird_machine(r"^\d{4}-\d{2}-\d{2}$")] +struct Date; + +assert!(Date::is_match("2014-01-01")); +``` + +## Example: iterating over capture groups + +```rust +use bird_machine::{bird_machine, Machine}; + +#[bird_machine(r"(\d{4})-(\d{2})-(\d{2})")] +struct Date<'a>(&'a str, &'a str, &'a str); +let input = "2012-03-14, 2013-01-01 and 2014-07-05"; +let match_info = Date::captures_iter(input) + .map(|x: Date| format!("Month: {} Day: {} Year: {}", x.1, x.2, x.0)); +let expected = [ + "Month: 03 Day: 14 Year: 2012", + "Month: 01 Day: 01 Year: 2013", + "Month: 07 Day: 05 Year: 2014", +]; +for (actual, expected) in match_info.zip(expected) { + assert_eq!(actual, expected); +} +``` + +# Example: replacement with named capture groups + +```rust +use bird_machine::{bird_machine, Machine}; + +#[bird_machine(r"(?P\d{4})-(?P\d{2})-(?P\d{2})")] +struct Date<'a> { + y: &'a str, + m: &'a str, + d: &'a str, +} +let before = "2012-03-14, 2013-01-01 and 2014-07-05"; +let after = Date::replace_all(before, "$m/$d/$y"); +assert_eq!(after, "03/14/2012, 01/01/2013 and 07/05/2014"); +``` + +# Example: compile-time rejection of invalid regular expressions + +```rust,compile_fail +use bird_machine::bird_machine; + +#[bird_machine(r"(oops i left this group open")] +struct Bad; +``` \ No newline at end of file diff --git a/bird-machine-macros/Cargo.toml b/bird-machine-macros/Cargo.toml new file mode 100644 index 0000000..e45569b --- /dev/null +++ b/bird-machine-macros/Cargo.toml @@ -0,0 +1,15 @@ +[package] +name = "bird-machine-macros" +version = "0.1.0" +authors = ["boringcactus / Melody Horn "] +description = "proc macros for bird-machine" +edition = "2018" + +[lib] +proc-macro = true + +[dependencies] +regex-syntax = "0.6" +proc-macro2 = "1.0" +quote = "1" +syn = "1.0" diff --git a/bird-machine-macros/src/lib.rs b/bird-machine-macros/src/lib.rs new file mode 100644 index 0000000..628ee91 --- /dev/null +++ b/bird-machine-macros/src/lib.rs @@ -0,0 +1,179 @@ +extern crate proc_macro; +use proc_macro::TokenStream; + +use syn::{parse_macro_input, DeriveInput, LitStr}; +use quote::{quote, ToTokens}; + +mod nfa; + +#[proc_macro_attribute] +pub fn bird_machine(args: TokenStream, input: TokenStream) -> TokenStream { + let input = parse_macro_input!(input as DeriveInput); + let input_regex = parse_macro_input!(args as LitStr); + + let input_type_name = &input.ident; + let input_lifetimes: Vec<_> = input.generics.lifetimes().collect(); + let lifetime = match input_lifetimes.as_slice() { + [] => quote!{ 'unrestricted }, + [lt] => quote!{ #lt }, + _ => panic!("multiple lifetime generics, what is this, pls to halp"), + }; + + let machine = build_machine(&input_regex); + dbg!(&machine); + + let (_, ty_generics, where_clause) = input.generics.split_for_impl(); + + let impl_decl = quote! { + impl<#lifetime> ::bird_machine::Machine<#lifetime> for #input_type_name #ty_generics #where_clause + }; + let original_regex = quote! { + const ORIGINAL_REGEX: &'static str = #input_regex; + }; + let captures = quote! { + fn captures(text: &#lifetime str) -> Option { + todo!() + } + }; + let captures_iter = quote! { + type CaptureIterator = ::std::iter::Empty; + fn captures_iter(text: &#lifetime str) -> Self::CaptureIterator { + todo!() + } + }; + let find = quote! { + fn find(text: &#lifetime str) -> Option<::bird_machine::Match<#lifetime>> { + todo!() + } + }; + let find_at = quote! { + fn find_at(text: &#lifetime str, start: usize) -> Option<::bird_machine::Match<#lifetime>> { + todo!() + } + }; + let find_iter = quote! { + type FindIterator = ::std::iter::Empty<::bird_machine::Match<#lifetime>>; + fn find_iter(text: &#lifetime str) -> Self::FindIterator { + todo!() + } + }; + let is_match = quote! { + fn is_match(text: &#lifetime str) -> bool { + todo!() + } + }; + let is_match_at = quote! { + fn is_match_at(text: &#lifetime str, start: usize) -> bool { + todo!() + } + }; + let replace = quote! { + fn replace( + text: &#lifetime str, + rep: impl ::bird_machine::Replacer<#lifetime, Self>, + ) -> ::std::borrow::Cow<#lifetime, str> { + todo!() + } + }; + let replace_all = quote! { + fn replace_all( + text: &#lifetime str, + rep: impl ::bird_machine::Replacer<#lifetime, Self>, + ) -> ::std::borrow::Cow<#lifetime, str> { + todo!() + } + }; + let replacen = quote! { + fn replacen( + text: &#lifetime str, + limit: usize, + rep: impl ::bird_machine::Replacer<#lifetime, Self>, + ) -> ::std::borrow::Cow<#lifetime, str> { + todo!() + } + }; + let split = quote! { + type SplitIterator = ::std::iter::Empty<&#lifetime str>; + fn split(text: &#lifetime str) -> Self::SplitIterator { + todo!() + } + }; + let splitn = quote! { + type SplitNIterator = ::std::iter::Empty<&#lifetime str>; + fn splitn(text: &#lifetime str, limit: usize) -> Self::SplitNIterator { + todo!() + } + }; + + let tokens = quote! { + #input + + #machine + + #impl_decl { + #original_regex + #captures + #captures_iter + #find + #find_at + #find_iter + #is_match + #is_match_at + #replace + #replace_all + #replacen + #split + #splitn + } + }; + + eprintln!( + "{impl_decl} {{\n\n\ + {original_regex}\n\n\ + {captures}\n\n\ + {captures_iter}\n\n\ + {find}\n\n\ + {find_at}\n\n\ + {find_iter}\n\n\ + {is_match}\n\n\ + {is_match_at}\n\n\ + {replace}\n\n\ + {replace_all}\n\n\ + {replacen}\n\n\ + {split}\n\n\ + {splitn}\n\n\ + }}", + impl_decl = impl_decl, + original_regex = original_regex, + captures = captures, + captures_iter = captures_iter, + find = find, + find_at = find_at, + find_iter = find_iter, + is_match = is_match, + is_match_at = is_match_at, + replace = replace, + replace_all = replace_all, + replacen = replacen, + split = split, + splitn = splitn, + ); + + tokens.into() +} + +fn build_machine(regex: &LitStr) -> proc_macro2::TokenStream { + let regex_text = regex.value(); + let regex_ir = regex_syntax::Parser::new() + .parse(®ex_text); + let regex_ir = match regex_ir { + Ok(x) => x, + Err(err ) => panic!("error compiling regex {}: {}", regex.to_token_stream(), err), + }; + dbg!(®ex_ir); + + // shout out to all the professors who've taught me how to do this + let mut built_nfa = nfa::NFA::default(); + + todo!() +} diff --git a/bird-machine-macros/src/nfa.rs b/bird-machine-macros/src/nfa.rs new file mode 100644 index 0000000..b7b3612 --- /dev/null +++ b/bird-machine-macros/src/nfa.rs @@ -0,0 +1,15 @@ +use std::collections::{HashMap, HashSet}; + +#[derive(Default)] +pub struct NFA { + state_count: usize, + transition_table: HashMap, HashSet>, +} + +impl NFA { + pub fn new_state(&mut self) -> usize { + let result = self.state_count; + self.state_count += 1; + result + } +} diff --git a/src/lib.rs b/src/lib.rs new file mode 100644 index 0000000..a886aa0 --- /dev/null +++ b/src/lib.rs @@ -0,0 +1,105 @@ +//! # bird-machine +//! +//! Compile your regular expressions at compile time. +//! +//! ## Example: find a date +//! +//! ```rust +//! use bird_machine::{bird_machine, Machine}; +//! +//! #[bird_machine(r"^\d{4}-\d{2}-\d{2}$")] +//! struct Date; +//! +//! assert!(Date::is_match("2014-01-01")); +//! ``` +//! +//! ## Example: iterating over capture groups +//! +//! ```rust +//! use bird_machine::{bird_machine, Machine}; +//! +//! #[bird_machine(r"(\d{4})-(\d{2})-(\d{2})")] +//! struct Date<'a>(&'a str, &'a str, &'a str); +//! let input = "2012-03-14, 2013-01-01 and 2014-07-05"; +//! let match_info = Date::captures_iter(input) +//! .map(|x: Date| format!("Month: {} Day: {} Year: {}", x.1, x.2, x.0)); +//! let expected = [ +//! "Month: 03 Day: 14 Year: 2012", +//! "Month: 01 Day: 01 Year: 2013", +//! "Month: 07 Day: 05 Year: 2014", +//! ]; +//! for (actual, expected) in match_info.zip(expected) { +//! assert_eq!(actual, expected); +//! } +//! ``` +//! +//! # Example: replacement with named capture groups +//! +//! ```rust +//! use bird_machine::{bird_machine, Machine}; +//! +//! #[bird_machine(r"(?P\d{4})-(?P\d{2})-(?P\d{2})")] +//! struct Date<'a> { +//! y: &'a str, +//! m: &'a str, +//! d: &'a str, +//! } +//! let before = "2012-03-14, 2013-01-01 and 2014-07-05"; +//! let after = Date::replace_all(before, "$m/$d/$y"); +//! assert_eq!(after, "03/14/2012, 01/01/2013 and 07/05/2014"); +//! ``` +//! +//! # Example: compile-time rejection of invalid regular expressions +//! +//! ```rust,compile_fail +//! use bird_machine::bird_machine; +//! +//! #[bird_machine(r"(oops i left this group open")] +//! struct Bad; +//! ``` +use std::borrow::Cow; + +pub use regex::Match; + +pub use bird_machine_macros::bird_machine; + +pub trait Machine<'t>: Sized { + const ORIGINAL_REGEX: &'static str; + + fn captures(text: &'t str) -> Option; + + // rust smdh why can this not just return impl Iterator + type CaptureIterator: Iterator; + fn captures_iter(text: &'t str) -> Self::CaptureIterator; + + fn find(text: &'t str) -> Option>; + fn find_at(text: &'t str, start: usize) -> Option>; + + // once again i am asking why trait methods can't return impl Iterator + type FindIterator: Iterator>; + fn find_iter(text: &'t str) -> Self::FindIterator; + + fn is_match(text: &'t str) -> bool; + fn is_match_at(text: &'t str, start: usize) -> bool; + + fn replace(text: &'t str, rep: impl Replacer<'t, Self>) -> Cow<'t, str>; + fn replace_all(text: &'t str, rep: impl Replacer<'t, Self>) -> Cow<'t, str>; + fn replacen(text: &'t str, limit: usize, rep: impl Replacer<'t, Self>) -> Cow<'t, str>; + + type SplitIterator: Iterator; + fn split(text: &'t str) -> Self::SplitIterator; + type SplitNIterator: Iterator; + fn splitn(text: &'t str, limit: usize) -> Self::SplitNIterator; +} + +pub trait Replacer<'t, M: Machine<'t>> { + fn replace_append(&mut self, r#match: &M, dst: &mut String); +} + +impl<'t, M: Machine<'t>, S: AsRef, F: FnMut(&M) -> S> Replacer<'t, M> for F { + fn replace_append(&mut self, r#match: &M, dst: &mut String) { + let replaced_with = self(r#match); + let replaced_with: &str = replaced_with.as_ref(); + dst.push_str(replaced_with); + } +} -- cgit v1.2.3