| // Copyright 2014 The Rust Project Developers. See the COPYRIGHT |
| // file at the top-level directory of this distribution and at |
| // http://rust-lang.org/COPYRIGHT. |
| // |
| // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or |
| // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license |
| // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your |
| // option. This file may not be copied, modified, or distributed |
| // except according to those terms. |
| |
| // Enable the benchmarking harness. |
| #![feature(test)] |
| |
| #[macro_use] |
| extern crate lazy_static; |
| #[cfg(not(any(feature = "re-rust", feature = "re-rust-bytes")))] |
| extern crate libc; |
| #[cfg(feature = "re-pcre1")] |
| extern crate libpcre_sys; |
| #[cfg(feature = "re-onig")] |
| extern crate onig; |
| #[cfg(any( |
| feature = "re-rust", |
| feature = "re-rust-bytes", |
| ))] |
| extern crate regex; |
| #[cfg(feature = "re-rust")] |
| extern crate regex_syntax; |
| extern crate test; |
| |
| #[cfg(feature = "re-onig")] |
| pub use ffi::onig::Regex; |
| #[cfg(feature = "re-pcre1")] |
| pub use ffi::pcre1::Regex; |
| #[cfg(feature = "re-pcre2")] |
| pub use ffi::pcre2::Regex; |
| #[cfg(any( |
| feature = "re-stdcpp", |
| feature = "re-boost", |
| ))] |
| pub use ffi::stdcpp::Regex; |
| #[cfg(feature = "re-re2")] |
| pub use ffi::re2::Regex; |
| #[cfg(feature = "re-dphobos")] |
| pub use ffi::d_phobos::Regex; |
| #[cfg(feature = "re-rust")] |
| pub use regex::{Regex, RegexSet}; |
| #[cfg(feature = "re-rust-bytes")] |
| pub use regex::bytes::{Regex, RegexSet}; |
| #[cfg(feature = "re-tcl")] |
| pub use ffi::tcl::Regex; |
| |
| // Usage: regex!(pattern) |
| // |
| // Builds a ::Regex from a borrowed string. |
| // |
| // Due to macro scoping rules, this definition only applies for the modules |
| // defined below. Effectively, it allows us to use the same tests for both |
| // native and dynamic regexes. |
| macro_rules! regex { |
| ($re:expr) => { ::Regex::new(&$re.to_owned()).unwrap() } |
| } |
| |
| // Usage: text!(haystack) |
| // |
| // Builds a ::Text from an owned string. |
| // |
| // This macro is called on every input searched in every benchmark. It is |
| // called exactly once per benchmark and its time is not included in the |
| // benchmark timing. |
| // |
| // The text given to the macro is always a String, which is guaranteed to be |
| // valid UTF-8. |
| // |
| // The return type should be an owned value that can deref to whatever the |
| // regex accepts in its `is_match` and `find_iter` methods. |
| #[cfg(feature = "re-tcl")] |
| macro_rules! text { |
| ($text:expr) => {{ |
| use ffi::tcl::Text; |
| Text::new($text) |
| }} |
| } |
| |
| #[cfg(feature = "re-rust-bytes")] |
| macro_rules! text { |
| ($text:expr) => {{ |
| let text: String = $text; |
| text.into_bytes() |
| }} |
| } |
| |
| #[cfg(any( |
| feature = "re-onig", |
| feature = "re-pcre1", |
| feature = "re-pcre2", |
| feature = "re-stdcpp", |
| feature = "re-boost", |
| feature = "re-re2", |
| feature = "re-dphobos", |
| feature = "re-rust", |
| ))] |
| macro_rules! text { |
| ($text:expr) => { $text } |
| } |
| |
| // The type of the value yielded by the `text!` macro defined above. |
| #[cfg(feature = "re-tcl")] |
| type Text = ffi::tcl::Text; |
| #[cfg(feature = "re-rust-bytes")] |
| type Text = Vec<u8>; |
| #[cfg(any( |
| feature = "re-onig", |
| feature = "re-pcre1", |
| feature = "re-pcre2", |
| feature = "re-stdcpp", |
| feature = "re-boost", |
| feature = "re-re2", |
| feature = "re-dphobos", |
| feature = "re-rust", |
| ))] |
| type Text = String; |
| |
| // Macros for writing benchmarks easily. We provide macros for benchmarking |
| // matches, non-matches and for finding all successive non-overlapping matches |
| // in a string (including a check that the count is correct). |
| |
| // USAGE: bench_match!(name, pattern, haystack) |
| // |
| // This benchmarks how fast a regular expression can report whether it matches |
| // a particular haystack. If the regex doesn't match, then the benchmark fails. |
| // Regexes are compiled exactly once. |
| // |
| // name is an identifier for the benchmark. |
| // |
| // pattern should be a &'static str representing the regular expression. |
| // |
| // haystack should be a String. |
| macro_rules! bench_match { |
| ($name:ident, $pattern:expr, $haystack:expr) => { |
| bench_is_match!($name, true, regex!($pattern), $haystack); |
| } |
| } |
| |
| // USAGE: bench_not_match!(name, pattern, haystack) |
| // |
| // This benchmarks how fast a regular expression can report whether it matches |
| // a particular haystack. If the regex matches, then the benchmark fails. |
| // Regexes are compiled exactly once. |
| // |
| // name is an identifier for the benchmark. |
| // |
| // pattern should be a &'static str representing the regular expression. |
| // |
| // haystack should be a String. |
| macro_rules! bench_not_match { |
| ($name:ident, $pattern:expr, $haystack:expr) => { |
| bench_is_match!($name, false, regex!($pattern), $haystack); |
| } |
| } |
| |
| // USAGE: bench_is_match!(name, is_match, regex, haystack) |
| // |
| // This benchmarks how fast a regular expression can report whether it matches |
| // a particular haystack. If the regex match status doesn't match is_match, |
| // then the benchmark fails. Regexes are compiled exactly once. |
| // |
| // name is an identifier for the benchmark. |
| // |
| // is_match reports whether the regex is expected to match the haystack or not. |
| // |
| // regex should be a ::Regex. |
| // |
| // haystack should be a String. |
| macro_rules! bench_is_match { |
| ($name:ident, $is_match:expr, $re:expr, $haystack:expr) => { |
| #[bench] |
| fn $name(b: &mut Bencher) { |
| use std::sync::Mutex; |
| |
| // Why do we use lazy_static here? It seems sensible to just |
| // compile a regex outside of the b.iter() call and be done with |
| // it. However, it seems like Rust's benchmark harness actually |
| // calls the entire benchmark function multiple times. This doesn't |
| // factor into the timings reported in the benchmarks, but it does |
| // make the benchmarks take substantially longer to run because |
| // they're spending a lot of time recompiling regexes. |
| lazy_static! { |
| static ref RE: Mutex<Regex> = Mutex::new($re); |
| static ref TEXT: Mutex<Text> = Mutex::new(text!($haystack)); |
| }; |
| let re = RE.lock().unwrap(); |
| let text = TEXT.lock().unwrap(); |
| b.bytes = text.len() as u64; |
| b.iter(|| { |
| if re.is_match(&text) != $is_match { |
| if $is_match { |
| panic!("expected match, got not match"); |
| } else { |
| panic!("expected no match, got match"); |
| } |
| } |
| }); |
| } |
| } |
| } |
| |
| // USAGE: bench_find!(name, pattern, count, haystack) |
| // |
| // This benchmarks how fast a regular expression can count all successive |
| // non-overlapping matches in haystack. If the count reported does not match |
| // the count given, then the benchmark fails. |
| // |
| // name is an identifier for the benchmark. |
| // |
| // pattern should be a &'static str representing the regular expression. |
| // |
| // haystack should be a String. |
| macro_rules! bench_find { |
| ($name:ident, $pattern:expr, $count:expr, $haystack:expr) => { |
| #[bench] |
| fn $name(b: &mut Bencher) { |
| use std::sync::Mutex; |
| |
| lazy_static! { |
| static ref RE: Mutex<Regex> = Mutex::new(regex!($pattern)); |
| static ref TEXT: Mutex<Text> = Mutex::new(text!($haystack)); |
| }; |
| let re = RE.lock().unwrap(); |
| let text = TEXT.lock().unwrap(); |
| b.bytes = text.len() as u64; |
| b.iter(|| { |
| let count = re.find_iter(&text).count(); |
| assert_eq!($count, count) |
| }); |
| } |
| } |
| } |
| |
| // USAGE: bench_captures!(name, pattern, groups, haystack); |
| // |
| // CONTRACT: |
| // Given: |
| // ident, the desired benchmarking function name |
| // pattern : ::Regex, the regular expression to be executed |
| // groups : usize, the number of capture groups |
| // haystack : String, the string to search |
| // bench_captures will benchmark how fast re.captures() produces |
| // the capture groups in question. |
| macro_rules! bench_captures { |
| ($name:ident, $pattern:expr, $count:expr, $haystack:expr) => { |
| |
| #[cfg(feature = "re-rust")] |
| #[bench] |
| fn $name(b: &mut Bencher) { |
| use std::sync::Mutex; |
| |
| lazy_static! { |
| static ref RE: Mutex<Regex> = Mutex::new($pattern); |
| static ref TEXT: Mutex<Text> = Mutex::new(text!($haystack)); |
| }; |
| let re = RE.lock().unwrap(); |
| let text = TEXT.lock().unwrap(); |
| b.bytes = text.len() as u64; |
| b.iter(|| { |
| match re.captures(&text) { |
| None => assert!(false, "no captures"), |
| Some(caps) => assert_eq!($count + 1, caps.len()), |
| } |
| }); |
| } |
| } |
| } |
| |
| // USAGE: bench_is_match_set!(name, is_match, regex, haystack) |
| macro_rules! bench_is_match_set { |
| ($name:ident, $is_match:expr, $re:expr, $haystack:expr) => { |
| #[bench] |
| fn $name(b: &mut Bencher) { |
| use std::sync::Mutex; |
| lazy_static! { |
| static ref RE: Mutex<RegexSet> = Mutex::new($re); |
| static ref TEXT: Mutex<Text> = Mutex::new(text!($haystack)); |
| }; |
| let re = RE.lock().unwrap(); |
| let text = TEXT.lock().unwrap(); |
| b.bytes = text.len() as u64; |
| b.iter(|| { |
| if re.is_match(&text) != $is_match { |
| if $is_match { |
| panic!("expected match, got not match"); |
| } else { |
| panic!("expected no match, got match"); |
| } |
| } |
| }); |
| } |
| } |
| } |
| |
| // USAGE: bench_matches_set!(name, is_match, regex, haystack) |
| macro_rules! bench_matches_set { |
| ($name:ident, $is_match:expr, $re:expr, $haystack:expr) => { |
| #[bench] |
| fn $name(b: &mut Bencher) { |
| use std::sync::Mutex; |
| lazy_static! { |
| static ref RE: Mutex<RegexSet> = Mutex::new($re); |
| static ref TEXT: Mutex<Text> = Mutex::new(text!($haystack)); |
| }; |
| let re = RE.lock().unwrap(); |
| let text = TEXT.lock().unwrap(); |
| b.bytes = text.len() as u64; |
| b.iter(|| { |
| if re.matches(&text).matched_any() != $is_match { |
| if $is_match { |
| panic!("expected match, got not match"); |
| } else { |
| panic!("expected no match, got match"); |
| } |
| } |
| }); |
| } |
| } |
| } |
| |
| mod ffi; |
| mod misc; |
| mod regexdna; |
| mod sherlock; |