diff --git a/scripts/benchmark.py b/scripts/benchmark.py new file mode 100644 index 0000000..56412f8 --- /dev/null +++ b/scripts/benchmark.py @@ -0,0 +1,189 @@ +import timeit +import statistics +import random +import string +from emv import EmailValidator +from email_validator import validate_email, EmailNotValidError, EmailUndeliverableError + + +# Generate random email addresses +def generate_random_email(valid=True): + domains = ["example.com", "test.org", "sample.net", "email.co"] + ipv4 = ["127.0.0.1", "192.168.0.1"] + ipv6 = ["[::1]", "[2001:db8::1]"] + + # Generate a valid local part + def generate_local_part(): + local_part_length = random.randint(1, 64) + local_part = "".join( + random.choices( + string.ascii_letters + string.digits + "._%+-", k=local_part_length + ) + ) + # Ensure the local part does not start or end with a dot, or contain consecutive dots + while ( + local_part.startswith(".") or local_part.endswith(".") or ".." in local_part + ): + local_part = "".join( + random.choices( + string.ascii_letters + string.digits + "._%+-", k=local_part_length + ) + ) + return local_part + + local_part = generate_local_part() + + if valid: + domain = random.choice(domains) + if random.choice([True, False]): + domain = ( + random.choice(ipv4) + if random.choice([True, False]) + else random.choice(ipv6) + ) + else: + domain = "".join( + random.choices( + string.ascii_letters + string.digits, k=random.randint(1, 10) + ) + ) + + return f"{local_part}@{domain}" + + +# Setup the email addresses to be validated +valid_emails = [generate_random_email(valid=True) for _ in range(10)] +invalid_emails = [generate_random_email(valid=False) for _ in range(10)] + +# Initialize your EmailValidator +levs_validator = EmailValidator(allow_domain_literal=True) + + +# Define the test functions +def test_levs_validator_valid(): + for email in valid_emails: + levs_validator.email(email) + + +def test_levs_validator_invalid(): + for email in invalid_emails: + try: + levs_validator.email(email) + except Exception: + pass + + +def test_python_validator_valid(): + for email in valid_emails: + try: + validate_email(email) + except (EmailNotValidError, EmailUndeliverableError): + pass + + +def test_python_validator_invalid(): + for email in invalid_emails: + try: + validate_email(email) + except (EmailNotValidError, EmailUndeliverableError): + pass + + +# Function to run the benchmark +def run_benchmark(func, num_iterations): + times = timeit.repeat(func, repeat=5, number=num_iterations) + avg_time = statistics.mean(times) / num_iterations + stddev_time = statistics.stdev(times) / num_iterations + return times, avg_time, stddev_time + + +# Benchmarking +if __name__ == "__main__": + num_iterations = 100 # Number of iterations for benchmarking + + # Benchmark your EmailValidator + ( + levs_validator_valid_times, + avg_levs_validator_valid, + stddev_levs_validator_valid, + ) = run_benchmark(test_levs_validator_valid, num_iterations) + ( + levs_validator_invalid_times, + avg_levs_validator_invalid, + stddev_levs_validator_invalid, + ) = run_benchmark(test_levs_validator_invalid, num_iterations) + + # Benchmark python-email-validator + ( + python_validator_valid_times, + avg_python_validator_valid, + stddev_python_validator_valid, + ) = run_benchmark(test_python_validator_valid, num_iterations) + ( + python_validator_invalid_times, + avg_python_validator_invalid, + stddev_python_validator_invalid, + ) = run_benchmark(test_python_validator_invalid, num_iterations) + + # Calculate percentage differences + valid_percentage_difference = ( + (avg_python_validator_valid - avg_levs_validator_valid) + / avg_python_validator_valid + ) * 100 + invalid_percentage_difference = ( + (avg_python_validator_invalid - avg_levs_validator_invalid) + / avg_python_validator_invalid + ) * 100 + + # Calculate speedup factors + valid_speedup_factor = avg_python_validator_valid / avg_levs_validator_valid + invalid_speedup_factor = avg_python_validator_invalid / avg_levs_validator_invalid + + # Print the results + print(f"Number of iterations: {num_iterations}") + print() + + print("Benchmarking Results (in seconds):") + print("===================================") + print(f"Levs EmailValidator (valid email):") + print( + f" Average time: {avg_levs_validator_valid:.10f} ± {stddev_levs_validator_valid:.10f} (stddev)" + ) + print(f" Times: {levs_validator_valid_times}") + print() + + print(f"Levs EmailValidator (invalid email):") + print( + f" Average time: {avg_levs_validator_invalid:.10f} ± {stddev_levs_validator_invalid:.10f} (stddev)" + ) + print(f" Times: {levs_validator_invalid_times}") + print() + + print(f"Python EmailValidator (valid email):") + print( + f" Average time: {avg_python_validator_valid:.10f} ± {stddev_python_validator_valid:.10f} (stddev)" + ) + print(f" Times: {python_validator_valid_times}") + print() + + print(f"Python EmailValidator (invalid email):") + print( + f" Average time: {avg_python_validator_invalid:.10f} ± {stddev_python_validator_invalid:.10f} (stddev)" + ) + print(f" Times: {python_validator_invalid_times}") + print() + + print("Performance Comparison:") + print("=======================") + print( + f"Levs EmailValidator is {valid_percentage_difference:.2f}% faster than Python EmailValidator for valid emails." + ) + print( + f"Levs EmailValidator is {invalid_percentage_difference:.2f}% faster than Python EmailValidator for invalid emails." + ) + print( + f"Levs EmailValidator has a speedup factor of {valid_speedup_factor:.2f} times for valid emails." + ) + print( + f"Levs EmailValidator has a speedup factor of {invalid_speedup_factor:.2f} times for invalid emails." + ) diff --git a/src/lib.rs b/src/lib.rs index 3578f91..dadfa97 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -60,32 +60,39 @@ pub struct EmailParts { pub local_part: String, pub domain: String, } - +#[derive(Debug, Default)] #[pyclass] pub struct EmailValidator { allow_smtputf8: bool, allow_empty_local: bool, allow_quoted_local: bool, allow_domain_literal: bool, - allow_display_name: bool, - check_deliverability: bool, - globally_deliverable: bool, - timeout: Option, + // allow_display_name: bool, + // check_deliverability: bool, + // globally_deliverable: bool, + // timeout: Option, } #[pymethods] impl EmailValidator { #[new] - fn new() -> Self { - Self { - allow_smtputf8: false, - allow_empty_local: false, - allow_quoted_local: false, - allow_domain_literal: false, - allow_display_name: false, - check_deliverability: false, - globally_deliverable: false, - timeout: None, + #[pyo3(signature = ( + allow_smtputf8 = false, + allow_empty_local = false, + allow_quoted_local = false, + allow_domain_literal = false + ))] + pub fn new( + allow_smtputf8: bool, + allow_empty_local: bool, + allow_quoted_local: bool, + allow_domain_literal: bool, + ) -> Self { + EmailValidator { + allow_smtputf8, + allow_empty_local, + allow_quoted_local, + allow_domain_literal, } } @@ -101,16 +108,14 @@ impl EmailValidator { // Validate domain self.domain(&domain)?; - let validated_email = ValidatedEmail { + // Validate length + validate_length(&local_part, &domain)?; + + Ok(ValidatedEmail { local_part, domain, is_valid: true, - }; - - // Validate length - validate_length(&validated_email)?; - - Ok(validated_email) + }) } pub fn local_part(&self, local_part: &str) -> PyResult<()> { @@ -351,9 +356,9 @@ fn split_email(email: &str) -> Result { }) } -fn validate_length(email: &ValidatedEmail) -> Result<(), PyErr> { +fn validate_length(local_part: &str, domain: &str) -> Result<(), PyErr> { // Validate email length - if email.local_part.len() + email.domain.len() + 1 > MAX_EMAIL_ADDRESS_LENGTH { + if local_part.len() + domain.len() + 1 > MAX_EMAIL_ADDRESS_LENGTH { return Err(LengthError::new_err("The email is too long".to_string())); } Ok(()) @@ -420,7 +425,7 @@ mod tests { #[test] fn test_validate_email() { - let validate = EmailValidator::new(); + let validate = EmailValidator::default(); // Valid email addresses assert!(validate.email("example@domain.com").is_ok()); @@ -440,7 +445,7 @@ mod tests { #[test] fn test_validate_domain() { - let mut validate = EmailValidator::new(); + let mut validate = EmailValidator::default(); assert!(validate.domain("domain.com").is_ok()); assert!(validate.domain("invali*d.com").is_err()); // Invalid character validate.allow_domain_literal = true; @@ -456,7 +461,7 @@ mod tests { #[test] fn test_validate_local_part() { - let validate = EmailValidator::new(); + let validate = EmailValidator::default(); assert!(validate.local_part("example").is_ok()); assert!(validate.local_part("user.name").is_ok()); @@ -495,7 +500,7 @@ mod tests { // Valid internationalized local parts let validate_with_smtputf8 = EmailValidator { allow_smtputf8: true, - ..EmailValidator::new() + ..EmailValidator::default() }; assert!(validate_with_smtputf8.local_part("用户").is_ok()); @@ -507,7 +512,7 @@ mod tests { // Valid internationalized local parts let validate_no_smtputf8 = EmailValidator { allow_smtputf8: false, - ..EmailValidator::new() + ..EmailValidator::default() }; assert!(validate_no_smtputf8.local_part("üsername").is_err()); @@ -520,7 +525,7 @@ mod tests { // Valid quoted local parts let validate_with_quoted = EmailValidator { allow_quoted_local: true, - ..EmailValidator::new() + ..EmailValidator::default() }; assert!(validate_with_quoted.local_part("\"user name\"").is_ok()); diff --git a/tests/test_emv.py b/tests/test_emv.py index cc31efe..bb00cbd 100644 --- a/tests/test_emv.py +++ b/tests/test_emv.py @@ -2,8 +2,29 @@ from emv import EmailValidator -def test_sum_as_string(): - validator = EmailValidator() - assert validator.email("example@domain.com") +def test_validate_email(): + validate = EmailValidator(allow_domain_literal=True) + assert validate.email("example@domain.com") with pytest.raises(Exception): - validator.email("plainaddress") + validate.email("plainaddress") + + +# def test_domain_literal() -> None: +# validate = EmailValidator(allow_domain_literal=true) + +# Check parsing IPv4 addresses. +# validated = validate.email("me@[127.0.0.1]") +# assert validated.domain == "[127.0.0.1]" +# assert repr(validated.domain_address) == "IPv4Address('127.0.0.1')" + +# # Check parsing IPv6 addresses. +# validated = validate_email("me@[IPv6:::1]", allow_domain_literal=True) +# assert validated.domain == "[IPv6:::1]" +# assert repr(validated.domain_address) == "IPv6Address('::1')" + +# # Check that IPv6 addresses are normalized. +# validated = validate_email( +# "me@[IPv6:0000:0000:0000:0000:0000:0000:0000:0001]", allow_domain_literal=True +# ) +# assert validated.domain == "[IPv6:::1]" +# assert repr(validated.domain_address) == "IPv6Address('::1')"