From 118ba7c5f425fbfe5787189dc03f7ba2069eee91 Mon Sep 17 00:00:00 2001 From: William Storey Date: Wed, 6 Mar 2024 22:25:54 +0000 Subject: [PATCH 01/11] Normalize equivalent domain names --- CHANGELOG.md | 2 ++ .../com/maxmind/minfraud/request/Email.java | 22 ++++++++++++++++--- .../maxmind/minfraud/request/EmailTest.java | 17 +++++++++++++- 3 files changed, 37 insertions(+), 4 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 2011003f..2692293d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,8 @@ CHANGELOG ------------------ * Added `PXP_FINANCIAL` and `TRUSTPAY` to the `Payment.Processor` enum. +* Equivalent domain names are now normalized when `hashAddress` is used. + For example, `googlemail.com` will become `gmail.com`. 3.3.0 (2023-12-05) ------------------ diff --git a/src/main/java/com/maxmind/minfraud/request/Email.java b/src/main/java/com/maxmind/minfraud/request/Email.java index b2abd7bc..2c65d56a 100644 --- a/src/main/java/com/maxmind/minfraud/request/Email.java +++ b/src/main/java/com/maxmind/minfraud/request/Email.java @@ -21,9 +21,10 @@ public final class Email extends AbstractModel { private final boolean hashAddress; private final String domain; private static final Map typoDomains; + private static final Map equivalentDomains; static { - HashMap m = new HashMap<>() {{ + HashMap typoDomainsMap = new HashMap<>() {{ // gmail.com put("35gmai.com", "gmail.com"); put("636gmail.com", "gmail.com"); @@ -35,8 +36,19 @@ public final class Email extends AbstractModel { // outlook.com put("putlook.com", "outlook.com"); }}; - - typoDomains = Collections.unmodifiableMap(m); + typoDomains = Collections.unmodifiableMap(typoDomainsMap); + + HashMap equivalentDomainsMap = new HashMap<>() {{ + put("googlemail.com", "gmail.com"); + put("pm.me", "protonmail.com"); + put("proton.me", "protonmail.com"); + put("yandex.by", "yandex.ru"); + put("yandex.com", "yandex.ru"); + put("yandex.kz", "yandex.ru"); + put("yandex.ua", "yandex.ru"); + put("ya.ru", "yandex.ru"); + }}; + equivalentDomains = Collections.unmodifiableMap(equivalentDomainsMap); } private Email(Email.Builder builder) { @@ -204,6 +216,10 @@ private String cleanDomain(String domain) { domain = typoDomains.get(domain); } + if (equivalentDomains.containsKey(domain)) { + domain = equivalentDomains.get(domain); + } + return domain; } diff --git a/src/test/java/com/maxmind/minfraud/request/EmailTest.java b/src/test/java/com/maxmind/minfraud/request/EmailTest.java index cf3543c3..e481631f 100644 --- a/src/test/java/com/maxmind/minfraud/request/EmailTest.java +++ b/src/test/java/com/maxmind/minfraud/request/EmailTest.java @@ -5,6 +5,10 @@ import static org.junit.jupiter.api.Assertions.assertThrows; import com.maxmind.minfraud.request.Email.Builder; +import java.math.BigInteger; +import java.nio.charset.StandardCharsets; +import java.security.MessageDigest; +import java.security.NoSuchAlgorithmException; import java.util.HashMap; import java.util.Map; import org.junit.jupiter.api.Test; @@ -89,7 +93,7 @@ public void testGetAddressWithoutSettingIt() { } @Test - public void testNormalizing() { + public void testNormalizing() throws NoSuchAlgorithmException { Email e; e = new Builder().address("test@maxmind.com").hashAddress().build(); @@ -171,6 +175,17 @@ public void testNormalizing() { e = new Builder(false).address("test@.").hashAddress().build(); assertEquals("246a848af2f8394e3adbc738dbe43720", e.getAddress(), "MD5"); assertEquals(".", e.getDomain(), "domain"); + + e = new Builder(false).address("foo@googlemail.com").hashAddress().build(); + assertEquals(toMD5("foo@gmail.com"), e.getAddress(), "MD5"); + assertEquals("googlemail.com", e.getDomain(), "domain"); + } + + private String toMD5(String s) throws NoSuchAlgorithmException { + MessageDigest d = MessageDigest.getInstance("MD5"); + d.update(s.getBytes(StandardCharsets.UTF_8)); + BigInteger i = new BigInteger(1, d.digest()); + return String.format("%032x", i); } @Test From 965f60a7897349c4fc1a0200b453051441952381 Mon Sep 17 00:00:00 2001 From: William Storey Date: Wed, 6 Mar 2024 22:28:40 +0000 Subject: [PATCH 02/11] Remove periods from gmail.com local parts --- CHANGELOG.md | 3 +++ src/main/java/com/maxmind/minfraud/request/Email.java | 4 ++++ src/test/java/com/maxmind/minfraud/request/EmailTest.java | 4 ++++ 3 files changed, 11 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 2692293d..5a983838 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,9 @@ CHANGELOG * Added `PXP_FINANCIAL` and `TRUSTPAY` to the `Payment.Processor` enum. * Equivalent domain names are now normalized when `hashAddress` is used. For example, `googlemail.com` will become `gmail.com`. +* Periods are now removed from `gmail.com` email address local parts when + `hashAddress` is used. For example, `f.o.o@gmail.com` will become + `foo@gmail.com`. 3.3.0 (2023-12-05) ------------------ diff --git a/src/main/java/com/maxmind/minfraud/request/Email.java b/src/main/java/com/maxmind/minfraud/request/Email.java index 2c65d56a..cfad158c 100644 --- a/src/main/java/com/maxmind/minfraud/request/Email.java +++ b/src/main/java/com/maxmind/minfraud/request/Email.java @@ -196,6 +196,10 @@ private String cleanAddress(String address) { localPart = localPart.substring(0, stopCharIndex); } + if (domain.equals("gmail.com")) { + localPart = localPart.replace(".", ""); + } + return localPart + "@" + domain; } diff --git a/src/test/java/com/maxmind/minfraud/request/EmailTest.java b/src/test/java/com/maxmind/minfraud/request/EmailTest.java index e481631f..388ad201 100644 --- a/src/test/java/com/maxmind/minfraud/request/EmailTest.java +++ b/src/test/java/com/maxmind/minfraud/request/EmailTest.java @@ -179,6 +179,10 @@ public void testNormalizing() throws NoSuchAlgorithmException { e = new Builder(false).address("foo@googlemail.com").hashAddress().build(); assertEquals(toMD5("foo@gmail.com"), e.getAddress(), "MD5"); assertEquals("googlemail.com", e.getDomain(), "domain"); + + e = new Builder(false).address("foo.bar@gmail.com").hashAddress().build(); + assertEquals(toMD5("foobar@gmail.com"), e.getAddress(), "MD5"); + assertEquals("gmail.com", e.getDomain(), "domain"); } private String toMD5(String s) throws NoSuchAlgorithmException { From 66c2f56ec61c83644cc8c9c8ef96a8e0c0869ae3 Mon Sep 17 00:00:00 2001 From: William Storey Date: Wed, 6 Mar 2024 22:42:34 +0000 Subject: [PATCH 03/11] Normalize fastmail alias subdomains --- CHANGELOG.md | 3 + .../com/maxmind/minfraud/request/Email.java | 141 +++++++++++++++++- .../maxmind/minfraud/request/EmailTest.java | 8 + 3 files changed, 150 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 5a983838..74a97457 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,9 @@ CHANGELOG * Periods are now removed from `gmail.com` email address local parts when `hashAddress` is used. For example, `f.o.o@gmail.com` will become `foo@gmail.com`. +* Fastmail alias subdomain email addresses are now normalized when + `hashAddress` is used. For example, `alias@user.fastmail.com` will become + `user@fastmail.com`. 3.3.0 (2023-12-05) ------------------ diff --git a/src/main/java/com/maxmind/minfraud/request/Email.java b/src/main/java/com/maxmind/minfraud/request/Email.java index cfad158c..8a0824cd 100644 --- a/src/main/java/com/maxmind/minfraud/request/Email.java +++ b/src/main/java/com/maxmind/minfraud/request/Email.java @@ -7,6 +7,7 @@ import java.nio.charset.StandardCharsets; import java.security.MessageDigest; import java.security.NoSuchAlgorithmException; +import java.util.Arrays; import java.util.Collections; import java.util.HashMap; import java.util.Map; @@ -22,6 +23,7 @@ public final class Email extends AbstractModel { private final String domain; private static final Map typoDomains; private static final Map equivalentDomains; + private static final Map fastmailDomains; static { HashMap typoDomainsMap = new HashMap<>() {{ @@ -34,8 +36,8 @@ public final class Email extends AbstractModel { put("gmil.com", "gmail.com"); put("yahoogmail.com", "gmail.com"); // outlook.com - put("putlook.com", "outlook.com"); - }}; + put("putlook.com", "outlook.com"); + }}; typoDomains = Collections.unmodifiableMap(typoDomainsMap); HashMap equivalentDomainsMap = new HashMap<>() {{ @@ -49,6 +51,127 @@ public final class Email extends AbstractModel { put("ya.ru", "yandex.ru"); }}; equivalentDomains = Collections.unmodifiableMap(equivalentDomainsMap); + + HashMap fastmailDomainsMap = new HashMap<>() {{ + put("123mail.org", true); + put("150mail.com", true); + put("150ml.com", true); + put("16mail.com", true); + put("2-mail.com", true); + put("4email.net", true); + put("50mail.com", true); + put("airpost.net", true); + put("allmail.net", true); + put("bestmail.us", true); + put("cluemail.com", true); + put("elitemail.org", true); + put("emailcorner.net", true); + put("emailengine.net", true); + put("emailengine.org", true); + put("emailgroups.net", true); + put("emailplus.org", true); + put("emailuser.net", true); + put("eml.cc", true); + put("f-m.fm", true); + put("fast-email.com", true); + put("fast-mail.org", true); + put("fastem.com", true); + put("fastemail.us", true); + put("fastemailer.com", true); + put("fastest.cc", true); + put("fastimap.com", true); + put("fastmail.cn", true); + put("fastmail.co.uk", true); + put("fastmail.com", true); + put("fastmail.com.au", true); + put("fastmail.de", true); + put("fastmail.es", true); + put("fastmail.fm", true); + put("fastmail.fr", true); + put("fastmail.im", true); + put("fastmail.in", true); + put("fastmail.jp", true); + put("fastmail.mx", true); + put("fastmail.net", true); + put("fastmail.nl", true); + put("fastmail.org", true); + put("fastmail.se", true); + put("fastmail.to", true); + put("fastmail.tw", true); + put("fastmail.uk", true); + put("fastmail.us", true); + put("fastmailbox.net", true); + put("fastmessaging.com", true); + put("fea.st", true); + put("fmail.co.uk", true); + put("fmailbox.com", true); + put("fmgirl.com", true); + put("fmguy.com", true); + put("ftml.net", true); + put("h-mail.us", true); + put("hailmail.net", true); + put("imap-mail.com", true); + put("imap.cc", true); + put("imapmail.org", true); + put("inoutbox.com", true); + put("internet-e-mail.com", true); + put("internet-mail.org", true); + put("internetemails.net", true); + put("internetmailing.net", true); + put("jetemail.net", true); + put("justemail.net", true); + put("letterboxes.org", true); + put("mail-central.com", true); + put("mail-page.com", true); + put("mailandftp.com", true); + put("mailas.com", true); + put("mailbolt.com", true); + put("mailc.net", true); + put("mailcan.com", true); + put("mailforce.net", true); + put("mailftp.com", true); + put("mailhaven.com", true); + put("mailingaddress.org", true); + put("mailite.com", true); + put("mailmight.com", true); + put("mailnew.com", true); + put("mailsent.net", true); + put("mailservice.ms", true); + put("mailup.net", true); + put("mailworks.org", true); + put("ml1.net", true); + put("mm.st", true); + put("myfastmail.com", true); + put("mymacmail.com", true); + put("nospammail.net", true); + put("ownmail.net", true); + put("petml.com", true); + put("postinbox.com", true); + put("postpro.net", true); + put("proinbox.com", true); + put("promessage.com", true); + put("realemail.net", true); + put("reallyfast.biz", true); + put("reallyfast.info", true); + put("rushpost.com", true); + put("sent.as", true); + put("sent.at", true); + put("sent.com", true); + put("speedpost.net", true); + put("speedymail.org", true); + put("ssl-mail.com", true); + put("swift-mail.com", true); + put("the-fastest.net", true); + put("the-quickest.com", true); + put("theinternetemail.com", true); + put("veryfast.biz", true); + put("veryspeedy.net", true); + put("warpmail.net", true); + put("xsmail.com", true); + put("yepmail.net", true); + put("your-mail.com", true); + }}; + fastmailDomains = Collections.unmodifiableMap(fastmailDomainsMap); } private Email(Email.Builder builder) { @@ -200,6 +323,20 @@ private String cleanAddress(String address) { localPart = localPart.replace(".", ""); } + String[] domainParts = domain.split("\\."); + if (domainParts.length > 2) { + String possibleDomain = String.join( + ".", + Arrays.copyOfRange(domainParts, 1, domainParts.length) + ); + if (fastmailDomains.containsKey(possibleDomain)) { + domain = possibleDomain; + if (!localPart.equals("")) { + localPart = domainParts[0]; + } + } + } + return localPart + "@" + domain; } diff --git a/src/test/java/com/maxmind/minfraud/request/EmailTest.java b/src/test/java/com/maxmind/minfraud/request/EmailTest.java index 388ad201..e04825e4 100644 --- a/src/test/java/com/maxmind/minfraud/request/EmailTest.java +++ b/src/test/java/com/maxmind/minfraud/request/EmailTest.java @@ -183,6 +183,14 @@ public void testNormalizing() throws NoSuchAlgorithmException { e = new Builder(false).address("foo.bar@gmail.com").hashAddress().build(); assertEquals(toMD5("foobar@gmail.com"), e.getAddress(), "MD5"); assertEquals("gmail.com", e.getDomain(), "domain"); + + e = new Builder(false).address("alias@user.fastmail.com").hashAddress().build(); + assertEquals(toMD5("user@fastmail.com"), e.getAddress(), "MD5"); + assertEquals("user.fastmail.com", e.getDomain(), "domain"); + + e = new Builder(false).address("foo@bar.example.com").hashAddress().build(); + assertEquals(toMD5("foo@bar.example.com"), e.getAddress(), "MD5"); + assertEquals("bar.example.com", e.getDomain(), "domain"); } private String toMD5(String s) throws NoSuchAlgorithmException { From 3bc3f10f37d9a2f76619479f69e2e9ecb2b676ac Mon Sep 17 00:00:00 2001 From: William Storey Date: Wed, 6 Mar 2024 22:47:35 +0000 Subject: [PATCH 04/11] Remove alias parts from additional yahoo domains --- CHANGELOG.md | 4 ++ .../com/maxmind/minfraud/request/Email.java | 60 ++++++++++++++++++- .../maxmind/minfraud/request/EmailTest.java | 4 ++ 3 files changed, 67 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 74a97457..17653925 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -13,6 +13,10 @@ CHANGELOG * Fastmail alias subdomain email addresses are now normalized when `hashAddress` is used. For example, `alias@user.fastmail.com` will become `user@fastmail.com`. +* Additional `yahoo.com` email addresses now have aliases removed from + their local part when `hashAddress` is used. For example, + `foo-bar@yahoo.com` will become `foo@yahoo.com` for additional + `yahoo.com` domains. 3.3.0 (2023-12-05) ------------------ diff --git a/src/main/java/com/maxmind/minfraud/request/Email.java b/src/main/java/com/maxmind/minfraud/request/Email.java index 8a0824cd..f69dc022 100644 --- a/src/main/java/com/maxmind/minfraud/request/Email.java +++ b/src/main/java/com/maxmind/minfraud/request/Email.java @@ -24,6 +24,7 @@ public final class Email extends AbstractModel { private static final Map typoDomains; private static final Map equivalentDomains; private static final Map fastmailDomains; + private static final Map yahooDomains; static { HashMap typoDomainsMap = new HashMap<>() {{ @@ -172,6 +173,63 @@ public final class Email extends AbstractModel { put("your-mail.com", true); }}; fastmailDomains = Collections.unmodifiableMap(fastmailDomainsMap); + + HashMap yahooDomainsMap = new HashMap<>() {{ + put("y7mail.com", true); + put("yahoo.at", true); + put("yahoo.be", true); + put("yahoo.bg", true); + put("yahoo.ca", true); + put("yahoo.cl", true); + put("yahoo.co.id", true); + put("yahoo.co.il", true); + put("yahoo.co.in", true); + put("yahoo.co.kr", true); + put("yahoo.co.nz", true); + put("yahoo.co.th", true); + put("yahoo.co.uk", true); + put("yahoo.co.za", true); + put("yahoo.com", true); + put("yahoo.com.ar", true); + put("yahoo.com.au", true); + put("yahoo.com.br", true); + put("yahoo.com.co", true); + put("yahoo.com.hk", true); + put("yahoo.com.hr", true); + put("yahoo.com.mx", true); + put("yahoo.com.my", true); + put("yahoo.com.pe", true); + put("yahoo.com.ph", true); + put("yahoo.com.sg", true); + put("yahoo.com.tr", true); + put("yahoo.com.tw", true); + put("yahoo.com.ua", true); + put("yahoo.com.ve", true); + put("yahoo.com.vn", true); + put("yahoo.cz", true); + put("yahoo.de", true); + put("yahoo.dk", true); + put("yahoo.ee", true); + put("yahoo.es", true); + put("yahoo.fi", true); + put("yahoo.fr", true); + put("yahoo.gr", true); + put("yahoo.hu", true); + put("yahoo.ie", true); + put("yahoo.in", true); + put("yahoo.it", true); + put("yahoo.lt", true); + put("yahoo.lv", true); + put("yahoo.nl", true); + put("yahoo.no", true); + put("yahoo.pl", true); + put("yahoo.pt", true); + put("yahoo.ro", true); + put("yahoo.se", true); + put("yahoo.sk", true); + put("ymail.com", true); + }}; + yahooDomains = Collections.unmodifiableMap(yahooDomainsMap); } private Email(Email.Builder builder) { @@ -309,7 +367,7 @@ private String cleanAddress(String address) { domain = cleanDomain(domain); int stopChar; - if (domain.equals("yahoo.com")) { + if (yahooDomains.containsKey(domain)) { stopChar = '-'; } else { stopChar = '+'; diff --git a/src/test/java/com/maxmind/minfraud/request/EmailTest.java b/src/test/java/com/maxmind/minfraud/request/EmailTest.java index e04825e4..9a539e9e 100644 --- a/src/test/java/com/maxmind/minfraud/request/EmailTest.java +++ b/src/test/java/com/maxmind/minfraud/request/EmailTest.java @@ -191,6 +191,10 @@ public void testNormalizing() throws NoSuchAlgorithmException { e = new Builder(false).address("foo@bar.example.com").hashAddress().build(); assertEquals(toMD5("foo@bar.example.com"), e.getAddress(), "MD5"); assertEquals("bar.example.com", e.getDomain(), "domain"); + + e = new Builder(false).address("foo-bar@ymail.com").hashAddress().build(); + assertEquals(toMD5("foo@ymail.com"), e.getAddress(), "MD5"); + assertEquals("ymail.com", e.getDomain(), "domain"); } private String toMD5(String s) throws NoSuchAlgorithmException { From 0a664b49e4f7168e0344a2f8e6241f9bdc7d2836 Mon Sep 17 00:00:00 2001 From: William Storey Date: Wed, 6 Mar 2024 22:57:28 +0000 Subject: [PATCH 05/11] Remove duplicate .com strings --- CHANGELOG.md | 3 +++ src/main/java/com/maxmind/minfraud/request/Email.java | 2 ++ src/test/java/com/maxmind/minfraud/request/EmailTest.java | 4 ++++ 3 files changed, 9 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 17653925..87028df4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -17,6 +17,9 @@ CHANGELOG their local part when `hashAddress` is used. For example, `foo-bar@yahoo.com` will become `foo@yahoo.com` for additional `yahoo.com` domains. +* Duplicate `.com`s are now removed from email domain names when + `hashAddress` is used. For example, `example.com.com` will become + `example.com`. 3.3.0 (2023-12-05) ------------------ diff --git a/src/main/java/com/maxmind/minfraud/request/Email.java b/src/main/java/com/maxmind/minfraud/request/Email.java index f69dc022..26365892 100644 --- a/src/main/java/com/maxmind/minfraud/request/Email.java +++ b/src/main/java/com/maxmind/minfraud/request/Email.java @@ -411,6 +411,8 @@ private String cleanDomain(String domain) { domain = IDN.toASCII(domain); + domain = domain.replaceAll("(?:\\.com){2,}$", ".com"); + if (typoDomains.containsKey(domain)) { domain = typoDomains.get(domain); } diff --git a/src/test/java/com/maxmind/minfraud/request/EmailTest.java b/src/test/java/com/maxmind/minfraud/request/EmailTest.java index 9a539e9e..b8cee2da 100644 --- a/src/test/java/com/maxmind/minfraud/request/EmailTest.java +++ b/src/test/java/com/maxmind/minfraud/request/EmailTest.java @@ -195,6 +195,10 @@ public void testNormalizing() throws NoSuchAlgorithmException { e = new Builder(false).address("foo-bar@ymail.com").hashAddress().build(); assertEquals(toMD5("foo@ymail.com"), e.getAddress(), "MD5"); assertEquals("ymail.com", e.getDomain(), "domain"); + + e = new Builder(false).address("foo@example.com.com").hashAddress().build(); + assertEquals(toMD5("foo@example.com"), e.getAddress(), "MD5"); + assertEquals("example.com.com", e.getDomain(), "domain"); } private String toMD5(String s) throws NoSuchAlgorithmException { From f3f39e206e0a50746ffccf048e97674002a63af5 Mon Sep 17 00:00:00 2001 From: William Storey Date: Wed, 6 Mar 2024 22:59:45 +0000 Subject: [PATCH 06/11] Remove extraneous characters after .com --- CHANGELOG.md | 3 +++ src/main/java/com/maxmind/minfraud/request/Email.java | 1 + src/test/java/com/maxmind/minfraud/request/EmailTest.java | 4 ++++ 3 files changed, 8 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 87028df4..6d87df8a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -20,6 +20,9 @@ CHANGELOG * Duplicate `.com`s are now removed from email domain names when `hashAddress` is used. For example, `example.com.com` will become `example.com`. +* Extraneous characters after `.com` are now removed from email domain + names when `hashAddress` is used. For example, `example.comfoo` will + become `example.com`. 3.3.0 (2023-12-05) ------------------ diff --git a/src/main/java/com/maxmind/minfraud/request/Email.java b/src/main/java/com/maxmind/minfraud/request/Email.java index 26365892..0da5b0e2 100644 --- a/src/main/java/com/maxmind/minfraud/request/Email.java +++ b/src/main/java/com/maxmind/minfraud/request/Email.java @@ -412,6 +412,7 @@ private String cleanDomain(String domain) { domain = IDN.toASCII(domain); domain = domain.replaceAll("(?:\\.com){2,}$", ".com"); + domain = domain.replaceAll("\\.com[^.]+$", ".com"); if (typoDomains.containsKey(domain)) { domain = typoDomains.get(domain); diff --git a/src/test/java/com/maxmind/minfraud/request/EmailTest.java b/src/test/java/com/maxmind/minfraud/request/EmailTest.java index b8cee2da..eaf8d623 100644 --- a/src/test/java/com/maxmind/minfraud/request/EmailTest.java +++ b/src/test/java/com/maxmind/minfraud/request/EmailTest.java @@ -199,6 +199,10 @@ public void testNormalizing() throws NoSuchAlgorithmException { e = new Builder(false).address("foo@example.com.com").hashAddress().build(); assertEquals(toMD5("foo@example.com"), e.getAddress(), "MD5"); assertEquals("example.com.com", e.getDomain(), "domain"); + + e = new Builder(false).address("foo@example.comfoo").hashAddress().build(); + assertEquals(toMD5("foo@example.com"), e.getAddress(), "MD5"); + assertEquals("example.comfoo", e.getDomain(), "domain"); } private String toMD5(String s) throws NoSuchAlgorithmException { From c1b7ddc63b7406f70edfa4d5322f137250a7e5fd Mon Sep 17 00:00:00 2001 From: William Storey Date: Wed, 6 Mar 2024 23:01:10 +0000 Subject: [PATCH 07/11] Normalize some .com typos --- CHANGELOG.md | 2 ++ src/main/java/com/maxmind/minfraud/request/Email.java | 1 + src/test/java/com/maxmind/minfraud/request/EmailTest.java | 4 ++++ 3 files changed, 7 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 6d87df8a..f6362210 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -23,6 +23,8 @@ CHANGELOG * Extraneous characters after `.com` are now removed from email domain names when `hashAddress` is used. For example, `example.comfoo` will become `example.com`. +* Certain `.com` typos are now normalized to `.com` when `hashAddress` is + used. For example, `example.cam` will become `example.com`. 3.3.0 (2023-12-05) ------------------ diff --git a/src/main/java/com/maxmind/minfraud/request/Email.java b/src/main/java/com/maxmind/minfraud/request/Email.java index 0da5b0e2..0857aadc 100644 --- a/src/main/java/com/maxmind/minfraud/request/Email.java +++ b/src/main/java/com/maxmind/minfraud/request/Email.java @@ -413,6 +413,7 @@ private String cleanDomain(String domain) { domain = domain.replaceAll("(?:\\.com){2,}$", ".com"); domain = domain.replaceAll("\\.com[^.]+$", ".com"); + domain = domain.replaceAll("(?:\\.(?:com|c[a-z]{1,2}m|co[ln]|[dsvx]o[mn]|))$", ".com"); if (typoDomains.containsKey(domain)) { domain = typoDomains.get(domain); diff --git a/src/test/java/com/maxmind/minfraud/request/EmailTest.java b/src/test/java/com/maxmind/minfraud/request/EmailTest.java index eaf8d623..0dfaf07f 100644 --- a/src/test/java/com/maxmind/minfraud/request/EmailTest.java +++ b/src/test/java/com/maxmind/minfraud/request/EmailTest.java @@ -203,6 +203,10 @@ public void testNormalizing() throws NoSuchAlgorithmException { e = new Builder(false).address("foo@example.comfoo").hashAddress().build(); assertEquals(toMD5("foo@example.com"), e.getAddress(), "MD5"); assertEquals("example.comfoo", e.getDomain(), "domain"); + + e = new Builder(false).address("foo@example.cam").hashAddress().build(); + assertEquals(toMD5("foo@example.com"), e.getAddress(), "MD5"); + assertEquals("example.cam", e.getDomain(), "domain"); } private String toMD5(String s) throws NoSuchAlgorithmException { From 17aad1d1eb48aad0697ba28e5f4985496a9ae1ca Mon Sep 17 00:00:00 2001 From: William Storey Date: Wed, 6 Mar 2024 23:02:35 +0000 Subject: [PATCH 08/11] Normalize more gmail leading digit domains --- CHANGELOG.md | 3 +++ src/main/java/com/maxmind/minfraud/request/Email.java | 1 + src/test/java/com/maxmind/minfraud/request/EmailTest.java | 4 ++++ 3 files changed, 8 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index f6362210..9210ec99 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -25,6 +25,9 @@ CHANGELOG become `example.com`. * Certain `.com` typos are now normalized to `.com` when `hashAddress` is used. For example, `example.cam` will become `example.com`. +* Additional `gmail.com` domain names with leading digits are now + normalized when `hashAddress` is used. For example, `100gmail.com` will + become `gmail.com`. 3.3.0 (2023-12-05) ------------------ diff --git a/src/main/java/com/maxmind/minfraud/request/Email.java b/src/main/java/com/maxmind/minfraud/request/Email.java index 0857aadc..960d4754 100644 --- a/src/main/java/com/maxmind/minfraud/request/Email.java +++ b/src/main/java/com/maxmind/minfraud/request/Email.java @@ -414,6 +414,7 @@ private String cleanDomain(String domain) { domain = domain.replaceAll("(?:\\.com){2,}$", ".com"); domain = domain.replaceAll("\\.com[^.]+$", ".com"); domain = domain.replaceAll("(?:\\.(?:com|c[a-z]{1,2}m|co[ln]|[dsvx]o[mn]|))$", ".com"); + domain = domain.replaceAll("^\\d+(?:gmail?\\.com)$", "gmail.com"); if (typoDomains.containsKey(domain)) { domain = typoDomains.get(domain); diff --git a/src/test/java/com/maxmind/minfraud/request/EmailTest.java b/src/test/java/com/maxmind/minfraud/request/EmailTest.java index 0dfaf07f..f3cb89b4 100644 --- a/src/test/java/com/maxmind/minfraud/request/EmailTest.java +++ b/src/test/java/com/maxmind/minfraud/request/EmailTest.java @@ -207,6 +207,10 @@ public void testNormalizing() throws NoSuchAlgorithmException { e = new Builder(false).address("foo@example.cam").hashAddress().build(); assertEquals(toMD5("foo@example.com"), e.getAddress(), "MD5"); assertEquals("example.cam", e.getDomain(), "domain"); + + e = new Builder(false).address("foo@10000gmail.com").hashAddress().build(); + assertEquals(toMD5("foo@gmail.com"), e.getAddress(), "MD5"); + assertEquals("10000gmail.com", e.getDomain(), "domain"); } private String toMD5(String s) throws NoSuchAlgorithmException { From 4ebd2ee7980a64614ae961fe0fa5ee6815ba7031 Mon Sep 17 00:00:00 2001 From: William Storey Date: Wed, 6 Mar 2024 23:05:02 +0000 Subject: [PATCH 09/11] Update typo domains map --- CHANGELOG.md | 2 ++ src/main/java/com/maxmind/minfraud/request/Email.java | 11 +++++++---- 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 9210ec99..0490a9c5 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -28,6 +28,8 @@ CHANGELOG * Additional `gmail.com` domain names with leading digits are now normalized when `hashAddress` is used. For example, `100gmail.com` will become `gmail.com`. +* Additional `gmail.com` typos are now normalized when `hashAddress` is + used. For example, `gmali.com` will become `gmail.com`. 3.3.0 (2023-12-05) ------------------ diff --git a/src/main/java/com/maxmind/minfraud/request/Email.java b/src/main/java/com/maxmind/minfraud/request/Email.java index 960d4754..14f11f51 100644 --- a/src/main/java/com/maxmind/minfraud/request/Email.java +++ b/src/main/java/com/maxmind/minfraud/request/Email.java @@ -29,13 +29,16 @@ public final class Email extends AbstractModel { static { HashMap typoDomainsMap = new HashMap<>() {{ // gmail.com - put("35gmai.com", "gmail.com"); - put("636gmail.com", "gmail.com"); + put("gmai.com", "gmail.com"); put("gamil.com", "gmail.com"); - put("gmail.comu", "gmail.com"); + put("gmali.com", "gmail.com"); put("gmial.com", "gmail.com"); put("gmil.com", "gmail.com"); - put("yahoogmail.com", "gmail.com"); + put("gmaill.com", "gmail.com"); + put("gmailm.com", "gmail.com"); + put("gmailo.com", "gmail.com"); + put("gmailyhoo.com", "gmail.com"); + put("yahoogmail.com", "gmail.com"); // outlook.com put("putlook.com", "outlook.com"); }}; From c3489708af97f01b5afaff5e230a5ddbf1e57a00 Mon Sep 17 00:00:00 2001 From: William Storey Date: Thu, 7 Mar 2024 16:20:10 +0000 Subject: [PATCH 10/11] Improve indentation --- checkstyle-suppressions.xml | 1 - .../com/maxmind/minfraud/request/Email.java | 18 +++++++++--------- 2 files changed, 9 insertions(+), 10 deletions(-) diff --git a/checkstyle-suppressions.xml b/checkstyle-suppressions.xml index b8e79da8..962f9579 100644 --- a/checkstyle-suppressions.xml +++ b/checkstyle-suppressions.xml @@ -5,5 +5,4 @@ "https://checkstyle.org/dtds/suppressions_1_0.dtd"> - diff --git a/src/main/java/com/maxmind/minfraud/request/Email.java b/src/main/java/com/maxmind/minfraud/request/Email.java index 14f11f51..f27b4123 100644 --- a/src/main/java/com/maxmind/minfraud/request/Email.java +++ b/src/main/java/com/maxmind/minfraud/request/Email.java @@ -28,18 +28,18 @@ public final class Email extends AbstractModel { static { HashMap typoDomainsMap = new HashMap<>() {{ - // gmail.com - put("gmai.com", "gmail.com"); - put("gamil.com", "gmail.com"); - put("gmali.com", "gmail.com"); - put("gmial.com", "gmail.com"); - put("gmil.com", "gmail.com"); - put("gmaill.com", "gmail.com"); - put("gmailm.com", "gmail.com"); + // gmail.com + put("gmai.com", "gmail.com"); + put("gamil.com", "gmail.com"); + put("gmali.com", "gmail.com"); + put("gmial.com", "gmail.com"); + put("gmil.com", "gmail.com"); + put("gmaill.com", "gmail.com"); + put("gmailm.com", "gmail.com"); put("gmailo.com", "gmail.com"); put("gmailyhoo.com", "gmail.com"); put("yahoogmail.com", "gmail.com"); - // outlook.com + // outlook.com put("putlook.com", "outlook.com"); }}; typoDomains = Collections.unmodifiableMap(typoDomainsMap); From 804e9c1ba7ece28f2b8f986703de4fc83ff5b559 Mon Sep 17 00:00:00 2001 From: William Storey Date: Thu, 7 Mar 2024 17:10:23 +0000 Subject: [PATCH 11/11] Remove now empty suppressions file --- checkstyle-suppressions.xml | 8 -------- checkstyle.xml | 6 ------ pom.xml | 1 - 3 files changed, 15 deletions(-) delete mode 100644 checkstyle-suppressions.xml diff --git a/checkstyle-suppressions.xml b/checkstyle-suppressions.xml deleted file mode 100644 index 962f9579..00000000 --- a/checkstyle-suppressions.xml +++ /dev/null @@ -1,8 +0,0 @@ - - - - - - diff --git a/checkstyle.xml b/checkstyle.xml index d1ff3394..dfcfa6f6 100644 --- a/checkstyle.xml +++ b/checkstyle.xml @@ -29,12 +29,6 @@ - - - - - diff --git a/pom.xml b/pom.xml index f6cca119..7cb86899 100644 --- a/pom.xml +++ b/pom.xml @@ -127,7 +127,6 @@ true checkstyle.xml - checkstyle-suppressions.xml warning