Skip to content

Commit

Permalink
benchmarking
Browse files Browse the repository at this point in the history
  • Loading branch information
bnkc committed Jul 19, 2024
1 parent 7387609 commit d02a53a
Show file tree
Hide file tree
Showing 3 changed files with 249 additions and 34 deletions.
189 changes: 189 additions & 0 deletions scripts/benchmark.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,189 @@
import timeit
import statistics
import random
import string
from emv import EmailValidator
from email_validator import validate_email, EmailNotValidError, EmailUndeliverableError


# Generate random email addresses
def generate_random_email(valid=True):
domains = ["example.com", "test.org", "sample.net", "email.co"]
ipv4 = ["127.0.0.1", "192.168.0.1"]
ipv6 = ["[::1]", "[2001:db8::1]"]

# Generate a valid local part
def generate_local_part():
local_part_length = random.randint(1, 64)
local_part = "".join(
random.choices(
string.ascii_letters + string.digits + "._%+-", k=local_part_length
)
)
# Ensure the local part does not start or end with a dot, or contain consecutive dots
while (
local_part.startswith(".") or local_part.endswith(".") or ".." in local_part
):
local_part = "".join(
random.choices(
string.ascii_letters + string.digits + "._%+-", k=local_part_length
)
)
return local_part

local_part = generate_local_part()

if valid:
domain = random.choice(domains)
if random.choice([True, False]):
domain = (
random.choice(ipv4)
if random.choice([True, False])
else random.choice(ipv6)
)
else:
domain = "".join(
random.choices(
string.ascii_letters + string.digits, k=random.randint(1, 10)
)
)

return f"{local_part}@{domain}"


# Setup the email addresses to be validated
valid_emails = [generate_random_email(valid=True) for _ in range(10)]
invalid_emails = [generate_random_email(valid=False) for _ in range(10)]

# Initialize your EmailValidator
levs_validator = EmailValidator(allow_domain_literal=True)


# Define the test functions
def test_levs_validator_valid():
for email in valid_emails:
levs_validator.email(email)


def test_levs_validator_invalid():
for email in invalid_emails:
try:
levs_validator.email(email)
except Exception:
pass


def test_python_validator_valid():
for email in valid_emails:
try:
validate_email(email)
except (EmailNotValidError, EmailUndeliverableError):
pass


def test_python_validator_invalid():
for email in invalid_emails:
try:
validate_email(email)
except (EmailNotValidError, EmailUndeliverableError):
pass


# Function to run the benchmark
def run_benchmark(func, num_iterations):
times = timeit.repeat(func, repeat=5, number=num_iterations)
avg_time = statistics.mean(times) / num_iterations
stddev_time = statistics.stdev(times) / num_iterations
return times, avg_time, stddev_time


# Benchmarking
if __name__ == "__main__":
num_iterations = 100 # Number of iterations for benchmarking

# Benchmark your EmailValidator
(
levs_validator_valid_times,
avg_levs_validator_valid,
stddev_levs_validator_valid,
) = run_benchmark(test_levs_validator_valid, num_iterations)
(
levs_validator_invalid_times,
avg_levs_validator_invalid,
stddev_levs_validator_invalid,
) = run_benchmark(test_levs_validator_invalid, num_iterations)

# Benchmark python-email-validator
(
python_validator_valid_times,
avg_python_validator_valid,
stddev_python_validator_valid,
) = run_benchmark(test_python_validator_valid, num_iterations)
(
python_validator_invalid_times,
avg_python_validator_invalid,
stddev_python_validator_invalid,
) = run_benchmark(test_python_validator_invalid, num_iterations)

# Calculate percentage differences
valid_percentage_difference = (
(avg_python_validator_valid - avg_levs_validator_valid)
/ avg_python_validator_valid
) * 100
invalid_percentage_difference = (
(avg_python_validator_invalid - avg_levs_validator_invalid)
/ avg_python_validator_invalid
) * 100

# Calculate speedup factors
valid_speedup_factor = avg_python_validator_valid / avg_levs_validator_valid
invalid_speedup_factor = avg_python_validator_invalid / avg_levs_validator_invalid

# Print the results
print(f"Number of iterations: {num_iterations}")
print()

print("Benchmarking Results (in seconds):")
print("===================================")
print(f"Levs EmailValidator (valid email):")
print(
f" Average time: {avg_levs_validator_valid:.10f} ± {stddev_levs_validator_valid:.10f} (stddev)"
)
print(f" Times: {levs_validator_valid_times}")
print()

print(f"Levs EmailValidator (invalid email):")
print(
f" Average time: {avg_levs_validator_invalid:.10f} ± {stddev_levs_validator_invalid:.10f} (stddev)"
)
print(f" Times: {levs_validator_invalid_times}")
print()

print(f"Python EmailValidator (valid email):")
print(
f" Average time: {avg_python_validator_valid:.10f} ± {stddev_python_validator_valid:.10f} (stddev)"
)
print(f" Times: {python_validator_valid_times}")
print()

print(f"Python EmailValidator (invalid email):")
print(
f" Average time: {avg_python_validator_invalid:.10f} ± {stddev_python_validator_invalid:.10f} (stddev)"
)
print(f" Times: {python_validator_invalid_times}")
print()

print("Performance Comparison:")
print("=======================")
print(
f"Levs EmailValidator is {valid_percentage_difference:.2f}% faster than Python EmailValidator for valid emails."
)
print(
f"Levs EmailValidator is {invalid_percentage_difference:.2f}% faster than Python EmailValidator for invalid emails."
)
print(
f"Levs EmailValidator has a speedup factor of {valid_speedup_factor:.2f} times for valid emails."
)
print(
f"Levs EmailValidator has a speedup factor of {invalid_speedup_factor:.2f} times for invalid emails."
)
65 changes: 35 additions & 30 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -60,32 +60,39 @@ pub struct EmailParts {
pub local_part: String,
pub domain: String,
}

#[derive(Debug, Default)]
#[pyclass]
pub struct EmailValidator {
allow_smtputf8: bool,
allow_empty_local: bool,
allow_quoted_local: bool,
allow_domain_literal: bool,
allow_display_name: bool,
check_deliverability: bool,
globally_deliverable: bool,
timeout: Option<u64>,
// allow_display_name: bool,
// check_deliverability: bool,
// globally_deliverable: bool,
// timeout: Option<u64>,
}

#[pymethods]
impl EmailValidator {
#[new]
fn new() -> Self {
Self {
allow_smtputf8: false,
allow_empty_local: false,
allow_quoted_local: false,
allow_domain_literal: false,
allow_display_name: false,
check_deliverability: false,
globally_deliverable: false,
timeout: None,
#[pyo3(signature = (
allow_smtputf8 = false,
allow_empty_local = false,
allow_quoted_local = false,
allow_domain_literal = false
))]
pub fn new(
allow_smtputf8: bool,
allow_empty_local: bool,
allow_quoted_local: bool,
allow_domain_literal: bool,
) -> Self {
EmailValidator {
allow_smtputf8,
allow_empty_local,
allow_quoted_local,
allow_domain_literal,
}
}

Expand All @@ -101,16 +108,14 @@ impl EmailValidator {
// Validate domain
self.domain(&domain)?;

let validated_email = ValidatedEmail {
// Validate length
validate_length(&local_part, &domain)?;

Ok(ValidatedEmail {
local_part,
domain,
is_valid: true,
};

// Validate length
validate_length(&validated_email)?;

Ok(validated_email)
})
}

pub fn local_part(&self, local_part: &str) -> PyResult<()> {
Expand Down Expand Up @@ -351,9 +356,9 @@ fn split_email(email: &str) -> Result<EmailParts, PyErr> {
})
}

fn validate_length(email: &ValidatedEmail) -> Result<(), PyErr> {
fn validate_length(local_part: &str, domain: &str) -> Result<(), PyErr> {
// Validate email length
if email.local_part.len() + email.domain.len() + 1 > MAX_EMAIL_ADDRESS_LENGTH {
if local_part.len() + domain.len() + 1 > MAX_EMAIL_ADDRESS_LENGTH {
return Err(LengthError::new_err("The email is too long".to_string()));
}
Ok(())
Expand Down Expand Up @@ -420,7 +425,7 @@ mod tests {

#[test]
fn test_validate_email() {
let validate = EmailValidator::new();
let validate = EmailValidator::default();

// Valid email addresses
assert!(validate.email("[email protected]").is_ok());
Expand All @@ -440,7 +445,7 @@ mod tests {

#[test]
fn test_validate_domain() {
let mut validate = EmailValidator::new();
let mut validate = EmailValidator::default();
assert!(validate.domain("domain.com").is_ok());
assert!(validate.domain("invali*d.com").is_err()); // Invalid character
validate.allow_domain_literal = true;
Expand All @@ -456,7 +461,7 @@ mod tests {

#[test]
fn test_validate_local_part() {
let validate = EmailValidator::new();
let validate = EmailValidator::default();

assert!(validate.local_part("example").is_ok());
assert!(validate.local_part("user.name").is_ok());
Expand Down Expand Up @@ -495,7 +500,7 @@ mod tests {
// Valid internationalized local parts
let validate_with_smtputf8 = EmailValidator {
allow_smtputf8: true,
..EmailValidator::new()
..EmailValidator::default()
};

assert!(validate_with_smtputf8.local_part("用户").is_ok());
Expand All @@ -507,7 +512,7 @@ mod tests {
// Valid internationalized local parts
let validate_no_smtputf8 = EmailValidator {
allow_smtputf8: false,
..EmailValidator::new()
..EmailValidator::default()
};

assert!(validate_no_smtputf8.local_part("üsername").is_err());
Expand All @@ -520,7 +525,7 @@ mod tests {
// Valid quoted local parts
let validate_with_quoted = EmailValidator {
allow_quoted_local: true,
..EmailValidator::new()
..EmailValidator::default()
};

assert!(validate_with_quoted.local_part("\"user name\"").is_ok());
Expand Down
29 changes: 25 additions & 4 deletions tests/test_emv.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,29 @@
from emv import EmailValidator


def test_sum_as_string():
validator = EmailValidator()
assert validator.email("[email protected]")
def test_validate_email():
validate = EmailValidator(allow_domain_literal=True)
assert validate.email("[email protected]")
with pytest.raises(Exception):
validator.email("plainaddress")
validate.email("plainaddress")


# def test_domain_literal() -> None:
# validate = EmailValidator(allow_domain_literal=true)

# Check parsing IPv4 addresses.
# validated = validate.email("me@[127.0.0.1]")
# assert validated.domain == "[127.0.0.1]"
# assert repr(validated.domain_address) == "IPv4Address('127.0.0.1')"

# # Check parsing IPv6 addresses.
# validated = validate_email("me@[IPv6:::1]", allow_domain_literal=True)
# assert validated.domain == "[IPv6:::1]"
# assert repr(validated.domain_address) == "IPv6Address('::1')"

# # Check that IPv6 addresses are normalized.
# validated = validate_email(
# "me@[IPv6:0000:0000:0000:0000:0000:0000:0000:0001]", allow_domain_literal=True
# )
# assert validated.domain == "[IPv6:::1]"
# assert repr(validated.domain_address) == "IPv6Address('::1')"

0 comments on commit d02a53a

Please sign in to comment.