Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Engine: Regex: Avoid compiling and storing the same regex multiple times #98

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
56 changes: 46 additions & 10 deletions engine/src/rhs_types/regex/imp_real.rs
Original file line number Diff line number Diff line change
@@ -1,30 +1,66 @@
use crate::{FilterParser, RegexFormat};
use std::borrow::Borrow;
use std::collections::HashSet;
use std::sync::{Arc, Mutex, OnceLock};

pub use regex::Error;

/// Wrapper around [`regex::bytes::Regex`]
#[derive(Clone)]
pub struct Regex {
compiled_regex: regex::bytes::Regex,
compiled_regex: Arc<regex::bytes::Regex>,
format: RegexFormat,
}

fn get_regex_pool() -> &'static Mutex<HashSet<Regex>> {
static REGEX_POOL: OnceLock<Mutex<HashSet<Regex>>> = OnceLock::new();
REGEX_POOL.get_or_init(|| Mutex::new(HashSet::new()))
}

impl Drop for Regex {
fn drop(&mut self) {
// check whether this is the last strong reference to the regex, and
// avoid deadlock by making sure to drop the last cached regex only
// after we've dropped the lock on the pool.
let cached_regex = if Arc::strong_count(&self.compiled_regex) == 2
&& Arc::weak_count(&self.compiled_regex) == 0
{
let mut pool = get_regex_pool().lock().unwrap();
pool.take(self.as_str())
} else {
None
};

// now we can safely drop the regex, as there's no deadlock
drop(cached_regex);
}
}

impl Regex {
/// Compiles a regular expression.
pub fn new(
pattern: &str,
format: RegexFormat,
parser: &FilterParser<'_>,
) -> Result<Self, Error> {
::regex::bytes::RegexBuilder::new(pattern)
let mut pool = get_regex_pool().lock().unwrap();
if let Some(regex) = pool.get(pattern) {
return Ok(regex.clone());
}

let compiled_regex = ::regex::bytes::RegexBuilder::new(pattern)
.unicode(false)
.size_limit(parser.regex_compiled_size_limit)
.dfa_size_limit(parser.regex_dfa_size_limit)
.build()
.map(|r| Regex {
compiled_regex: r,
format,
})
.build()?;

let regex = Self {
compiled_regex: Arc::from(compiled_regex),
format,
};

pool.insert(regex.clone());
Ok(regex)
}

/// Returns true if and only if the regex matches the string given.
Expand All @@ -43,9 +79,9 @@ impl Regex {
}
}

impl From<Regex> for regex::bytes::Regex {
fn from(regex: Regex) -> Self {
regex.compiled_regex
impl Borrow<str> for Regex {
fn borrow(&self) -> &str {
self.compiled_regex.as_str()
}
}

Expand Down