aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMelody Horn <melody@boringcactus.com>2021-03-31 23:04:09 -0600
committerMelody Horn <melody@boringcactus.com>2021-03-31 23:04:09 -0600
commit24207feb7726bd2db97693eb8fdd155d33612574 (patch)
treeaa0ee9c2deb20105db7239c2de75593f8b25256f
downloadbird-machine-24207feb7726bd2db97693eb8fdd155d33612574.tar.gz
bird-machine-24207feb7726bd2db97693eb8fdd155d33612574.zip
basic sketch of general vibe
-rw-r--r--.gitignore3
-rw-r--r--Cargo.toml11
-rw-r--r--README.md59
-rw-r--r--bird-machine-macros/Cargo.toml15
-rw-r--r--bird-machine-macros/src/lib.rs179
-rw-r--r--bird-machine-macros/src/nfa.rs15
-rw-r--r--src/lib.rs105
7 files changed, 387 insertions, 0 deletions
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..6936990
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,3 @@
+/target
+**/*.rs.bk
+Cargo.lock
diff --git a/Cargo.toml b/Cargo.toml
new file mode 100644
index 0000000..de204ce
--- /dev/null
+++ b/Cargo.toml
@@ -0,0 +1,11 @@
+[package]
+name = "bird-machine"
+version = "0.1.0"
+authors = ["Melody Horn / boringcactus <melody@boringcactus.com>"]
+description = "Compile your regular expressions at compile time."
+keywords = ["regex", "proc_macro"]
+edition = "2018"
+
+[dependencies]
+bird-machine-macros = { path = "./bird-machine-macros", version = "0.1.0" }
+regex = "1.4"
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..91b8e71
--- /dev/null
+++ b/README.md
@@ -0,0 +1,59 @@
+# bird-machine
+
+Compile your regular expressions at compile time.
+
+## Example: find a date
+
+```rust
+use bird_machine::{bird_machine, Machine};
+
+#[bird_machine(r"^\d{4}-\d{2}-\d{2}$")]
+struct Date;
+
+assert!(Date::is_match("2014-01-01"));
+```
+
+## Example: iterating over capture groups
+
+```rust
+use bird_machine::{bird_machine, Machine};
+
+#[bird_machine(r"(\d{4})-(\d{2})-(\d{2})")]
+struct Date<'a>(&'a str, &'a str, &'a str);
+let input = "2012-03-14, 2013-01-01 and 2014-07-05";
+let match_info = Date::captures_iter(input)
+ .map(|x: Date| format!("Month: {} Day: {} Year: {}", x.1, x.2, x.0));
+let expected = [
+ "Month: 03 Day: 14 Year: 2012",
+ "Month: 01 Day: 01 Year: 2013",
+ "Month: 07 Day: 05 Year: 2014",
+];
+for (actual, expected) in match_info.zip(expected) {
+ assert_eq!(actual, expected);
+}
+```
+
+# Example: replacement with named capture groups
+
+```rust
+use bird_machine::{bird_machine, Machine};
+
+#[bird_machine(r"(?P<y>\d{4})-(?P<m>\d{2})-(?P<d>\d{2})")]
+struct Date<'a> {
+ y: &'a str,
+ m: &'a str,
+ d: &'a str,
+}
+let before = "2012-03-14, 2013-01-01 and 2014-07-05";
+let after = Date::replace_all(before, "$m/$d/$y");
+assert_eq!(after, "03/14/2012, 01/01/2013 and 07/05/2014");
+```
+
+# Example: compile-time rejection of invalid regular expressions
+
+```rust,compile_fail
+use bird_machine::bird_machine;
+
+#[bird_machine(r"(oops i left this group open")]
+struct Bad;
+``` \ No newline at end of file
diff --git a/bird-machine-macros/Cargo.toml b/bird-machine-macros/Cargo.toml
new file mode 100644
index 0000000..e45569b
--- /dev/null
+++ b/bird-machine-macros/Cargo.toml
@@ -0,0 +1,15 @@
+[package]
+name = "bird-machine-macros"
+version = "0.1.0"
+authors = ["boringcactus / Melody Horn <melody@boringcactus.com>"]
+description = "proc macros for bird-machine"
+edition = "2018"
+
+[lib]
+proc-macro = true
+
+[dependencies]
+regex-syntax = "0.6"
+proc-macro2 = "1.0"
+quote = "1"
+syn = "1.0"
diff --git a/bird-machine-macros/src/lib.rs b/bird-machine-macros/src/lib.rs
new file mode 100644
index 0000000..628ee91
--- /dev/null
+++ b/bird-machine-macros/src/lib.rs
@@ -0,0 +1,179 @@
+extern crate proc_macro;
+use proc_macro::TokenStream;
+
+use syn::{parse_macro_input, DeriveInput, LitStr};
+use quote::{quote, ToTokens};
+
+mod nfa;
+
+#[proc_macro_attribute]
+pub fn bird_machine(args: TokenStream, input: TokenStream) -> TokenStream {
+ let input = parse_macro_input!(input as DeriveInput);
+ let input_regex = parse_macro_input!(args as LitStr);
+
+ let input_type_name = &input.ident;
+ let input_lifetimes: Vec<_> = input.generics.lifetimes().collect();
+ let lifetime = match input_lifetimes.as_slice() {
+ [] => quote!{ 'unrestricted },
+ [lt] => quote!{ #lt },
+ _ => panic!("multiple lifetime generics, what is this, pls to halp"),
+ };
+
+ let machine = build_machine(&input_regex);
+ dbg!(&machine);
+
+ let (_, ty_generics, where_clause) = input.generics.split_for_impl();
+
+ let impl_decl = quote! {
+ impl<#lifetime> ::bird_machine::Machine<#lifetime> for #input_type_name #ty_generics #where_clause
+ };
+ let original_regex = quote! {
+ const ORIGINAL_REGEX: &'static str = #input_regex;
+ };
+ let captures = quote! {
+ fn captures(text: &#lifetime str) -> Option<Self> {
+ todo!()
+ }
+ };
+ let captures_iter = quote! {
+ type CaptureIterator = ::std::iter::Empty<Self>;
+ fn captures_iter(text: &#lifetime str) -> Self::CaptureIterator {
+ todo!()
+ }
+ };
+ let find = quote! {
+ fn find(text: &#lifetime str) -> Option<::bird_machine::Match<#lifetime>> {
+ todo!()
+ }
+ };
+ let find_at = quote! {
+ fn find_at(text: &#lifetime str, start: usize) -> Option<::bird_machine::Match<#lifetime>> {
+ todo!()
+ }
+ };
+ let find_iter = quote! {
+ type FindIterator = ::std::iter::Empty<::bird_machine::Match<#lifetime>>;
+ fn find_iter(text: &#lifetime str) -> Self::FindIterator {
+ todo!()
+ }
+ };
+ let is_match = quote! {
+ fn is_match(text: &#lifetime str) -> bool {
+ todo!()
+ }
+ };
+ let is_match_at = quote! {
+ fn is_match_at(text: &#lifetime str, start: usize) -> bool {
+ todo!()
+ }
+ };
+ let replace = quote! {
+ fn replace(
+ text: &#lifetime str,
+ rep: impl ::bird_machine::Replacer<#lifetime, Self>,
+ ) -> ::std::borrow::Cow<#lifetime, str> {
+ todo!()
+ }
+ };
+ let replace_all = quote! {
+ fn replace_all(
+ text: &#lifetime str,
+ rep: impl ::bird_machine::Replacer<#lifetime, Self>,
+ ) -> ::std::borrow::Cow<#lifetime, str> {
+ todo!()
+ }
+ };
+ let replacen = quote! {
+ fn replacen(
+ text: &#lifetime str,
+ limit: usize,
+ rep: impl ::bird_machine::Replacer<#lifetime, Self>,
+ ) -> ::std::borrow::Cow<#lifetime, str> {
+ todo!()
+ }
+ };
+ let split = quote! {
+ type SplitIterator = ::std::iter::Empty<&#lifetime str>;
+ fn split(text: &#lifetime str) -> Self::SplitIterator {
+ todo!()
+ }
+ };
+ let splitn = quote! {
+ type SplitNIterator = ::std::iter::Empty<&#lifetime str>;
+ fn splitn(text: &#lifetime str, limit: usize) -> Self::SplitNIterator {
+ todo!()
+ }
+ };
+
+ let tokens = quote! {
+ #input
+
+ #machine
+
+ #impl_decl {
+ #original_regex
+ #captures
+ #captures_iter
+ #find
+ #find_at
+ #find_iter
+ #is_match
+ #is_match_at
+ #replace
+ #replace_all
+ #replacen
+ #split
+ #splitn
+ }
+ };
+
+ eprintln!(
+ "{impl_decl} {{\n\n\
+ {original_regex}\n\n\
+ {captures}\n\n\
+ {captures_iter}\n\n\
+ {find}\n\n\
+ {find_at}\n\n\
+ {find_iter}\n\n\
+ {is_match}\n\n\
+ {is_match_at}\n\n\
+ {replace}\n\n\
+ {replace_all}\n\n\
+ {replacen}\n\n\
+ {split}\n\n\
+ {splitn}\n\n\
+ }}",
+ impl_decl = impl_decl,
+ original_regex = original_regex,
+ captures = captures,
+ captures_iter = captures_iter,
+ find = find,
+ find_at = find_at,
+ find_iter = find_iter,
+ is_match = is_match,
+ is_match_at = is_match_at,
+ replace = replace,
+ replace_all = replace_all,
+ replacen = replacen,
+ split = split,
+ splitn = splitn,
+ );
+
+ tokens.into()
+}
+
+fn build_machine(regex: &LitStr) -> proc_macro2::TokenStream {
+ let regex_text = regex.value();
+ let regex_ir = regex_syntax::Parser::new()
+ .parse(&regex_text);
+ let regex_ir = match regex_ir {
+ Ok(x) => x,
+ Err(err ) => panic!("error compiling regex {}: {}", regex.to_token_stream(), err),
+ };
+ dbg!(&regex_ir);
+
+ // shout out to all the professors who've taught me how to do this
+ let mut built_nfa = nfa::NFA::default();
+
+ todo!()
+}
diff --git a/bird-machine-macros/src/nfa.rs b/bird-machine-macros/src/nfa.rs
new file mode 100644
index 0000000..b7b3612
--- /dev/null
+++ b/bird-machine-macros/src/nfa.rs
@@ -0,0 +1,15 @@
+use std::collections::{HashMap, HashSet};
+
+#[derive(Default)]
+pub struct NFA {
+ state_count: usize,
+ transition_table: HashMap<Option<char>, HashSet<usize>>,
+}
+
+impl NFA {
+ pub fn new_state(&mut self) -> usize {
+ let result = self.state_count;
+ self.state_count += 1;
+ result
+ }
+}
diff --git a/src/lib.rs b/src/lib.rs
new file mode 100644
index 0000000..a886aa0
--- /dev/null
+++ b/src/lib.rs
@@ -0,0 +1,105 @@
+//! # bird-machine
+//!
+//! Compile your regular expressions at compile time.
+//!
+//! ## Example: find a date
+//!
+//! ```rust
+//! use bird_machine::{bird_machine, Machine};
+//!
+//! #[bird_machine(r"^\d{4}-\d{2}-\d{2}$")]
+//! struct Date;
+//!
+//! assert!(Date::is_match("2014-01-01"));
+//! ```
+//!
+//! ## Example: iterating over capture groups
+//!
+//! ```rust
+//! use bird_machine::{bird_machine, Machine};
+//!
+//! #[bird_machine(r"(\d{4})-(\d{2})-(\d{2})")]
+//! struct Date<'a>(&'a str, &'a str, &'a str);
+//! let input = "2012-03-14, 2013-01-01 and 2014-07-05";
+//! let match_info = Date::captures_iter(input)
+//! .map(|x: Date| format!("Month: {} Day: {} Year: {}", x.1, x.2, x.0));
+//! let expected = [
+//! "Month: 03 Day: 14 Year: 2012",
+//! "Month: 01 Day: 01 Year: 2013",
+//! "Month: 07 Day: 05 Year: 2014",
+//! ];
+//! for (actual, expected) in match_info.zip(expected) {
+//! assert_eq!(actual, expected);
+//! }
+//! ```
+//!
+//! # Example: replacement with named capture groups
+//!
+//! ```rust
+//! use bird_machine::{bird_machine, Machine};
+//!
+//! #[bird_machine(r"(?P<y>\d{4})-(?P<m>\d{2})-(?P<d>\d{2})")]
+//! struct Date<'a> {
+//! y: &'a str,
+//! m: &'a str,
+//! d: &'a str,
+//! }
+//! let before = "2012-03-14, 2013-01-01 and 2014-07-05";
+//! let after = Date::replace_all(before, "$m/$d/$y");
+//! assert_eq!(after, "03/14/2012, 01/01/2013 and 07/05/2014");
+//! ```
+//!
+//! # Example: compile-time rejection of invalid regular expressions
+//!
+//! ```rust,compile_fail
+//! use bird_machine::bird_machine;
+//!
+//! #[bird_machine(r"(oops i left this group open")]
+//! struct Bad;
+//! ```
+use std::borrow::Cow;
+
+pub use regex::Match;
+
+pub use bird_machine_macros::bird_machine;
+
+pub trait Machine<'t>: Sized {
+ const ORIGINAL_REGEX: &'static str;
+
+ fn captures(text: &'t str) -> Option<Self>;
+
+ // rust smdh why can this not just return impl Iterator
+ type CaptureIterator: Iterator<Item = Self>;
+ fn captures_iter(text: &'t str) -> Self::CaptureIterator;
+
+ fn find(text: &'t str) -> Option<Match<'t>>;
+ fn find_at(text: &'t str, start: usize) -> Option<Match<'t>>;
+
+ // once again i am asking why trait methods can't return impl Iterator
+ type FindIterator: Iterator<Item = Match<'t>>;
+ fn find_iter(text: &'t str) -> Self::FindIterator;
+
+ fn is_match(text: &'t str) -> bool;
+ fn is_match_at(text: &'t str, start: usize) -> bool;
+
+ fn replace(text: &'t str, rep: impl Replacer<'t, Self>) -> Cow<'t, str>;
+ fn replace_all(text: &'t str, rep: impl Replacer<'t, Self>) -> Cow<'t, str>;
+ fn replacen(text: &'t str, limit: usize, rep: impl Replacer<'t, Self>) -> Cow<'t, str>;
+
+ type SplitIterator: Iterator<Item = &'t str>;
+ fn split(text: &'t str) -> Self::SplitIterator;
+ type SplitNIterator: Iterator<Item = &'t str>;
+ fn splitn(text: &'t str, limit: usize) -> Self::SplitNIterator;
+}
+
+pub trait Replacer<'t, M: Machine<'t>> {
+ fn replace_append(&mut self, r#match: &M, dst: &mut String);
+}
+
+impl<'t, M: Machine<'t>, S: AsRef<str>, F: FnMut(&M) -> S> Replacer<'t, M> for F {
+ fn replace_append(&mut self, r#match: &M, dst: &mut String) {
+ let replaced_with = self(r#match);
+ let replaced_with: &str = replaced_with.as_ref();
+ dst.push_str(replaced_with);
+ }
+}