From e7720426885bc1b1f3c90f90b125c9f8dee9decd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Silvio=20Br=C3=A4ndle?= Date: Fri, 10 Jan 2025 16:51:41 +0000 Subject: [PATCH] Refactor phone context parsing for RFC3966 numbers. --- .../i18n/phonenumbers/AsYouTypeFormatter.java | 6 +- .../google/i18n/phonenumbers/Constants.java | 212 ++++++++++++ .../i18n/phonenumbers/PhoneContextParser.java | 166 ++++++++++ .../i18n/phonenumbers/PhoneNumberMatcher.java | 11 +- .../phonenumbers/PhoneNumberNormalizer.java | 40 +++ .../i18n/phonenumbers/PhoneNumberUtil.java | 310 +++--------------- .../i18n/phonenumbers/ShortNumberInfo.java | 2 +- .../phonenumbers/PhoneContextParserTest.java | 109 ++++++ .../phonenumbers/PhoneNumberUtilTest.java | 2 +- 9 files changed, 577 insertions(+), 281 deletions(-) create mode 100644 java/libphonenumber/src/com/google/i18n/phonenumbers/Constants.java create mode 100644 java/libphonenumber/src/com/google/i18n/phonenumbers/PhoneContextParser.java create mode 100644 java/libphonenumber/src/com/google/i18n/phonenumbers/PhoneNumberNormalizer.java create mode 100644 java/libphonenumber/test/com/google/i18n/phonenumbers/PhoneContextParserTest.java diff --git a/java/libphonenumber/src/com/google/i18n/phonenumbers/AsYouTypeFormatter.java b/java/libphonenumber/src/com/google/i18n/phonenumbers/AsYouTypeFormatter.java index be11be57ea..a3642c6e23 100644 --- a/java/libphonenumber/src/com/google/i18n/phonenumbers/AsYouTypeFormatter.java +++ b/java/libphonenumber/src/com/google/i18n/phonenumbers/AsYouTypeFormatter.java @@ -573,7 +573,7 @@ private String removeNationalPrefixFromNationalNumber() { */ private boolean attemptToExtractIdd() { Pattern internationalPrefix = - regexCache.getPatternForRegex("\\" + PhoneNumberUtil.PLUS_SIGN + "|" + regexCache.getPatternForRegex("\\" + Constants.PLUS_SIGN + "|" + currentMetadata.getInternationalPrefix()); Matcher iddMatcher = internationalPrefix.matcher(accruedInputWithoutFormatting); if (iddMatcher.lookingAt()) { @@ -584,7 +584,7 @@ private boolean attemptToExtractIdd() { prefixBeforeNationalNumber.setLength(0); prefixBeforeNationalNumber.append( accruedInputWithoutFormatting.substring(0, startOfCountryCallingCode)); - if (accruedInputWithoutFormatting.charAt(0) != PhoneNumberUtil.PLUS_SIGN) { + if (accruedInputWithoutFormatting.charAt(0) != Constants.PLUS_SIGN) { prefixBeforeNationalNumber.append(SEPARATOR_BEFORE_NATIONAL_NUMBER); } return true; @@ -631,7 +631,7 @@ private boolean attemptToExtractCountryCallingCode() { // digit or the plus sign. private char normalizeAndAccrueDigitsAndPlusSign(char nextChar, boolean rememberPosition) { char normalizedChar; - if (nextChar == PhoneNumberUtil.PLUS_SIGN) { + if (nextChar == Constants.PLUS_SIGN) { normalizedChar = nextChar; accruedInputWithoutFormatting.append(nextChar); } else { diff --git a/java/libphonenumber/src/com/google/i18n/phonenumbers/Constants.java b/java/libphonenumber/src/com/google/i18n/phonenumbers/Constants.java new file mode 100644 index 0000000000..6d285debf8 --- /dev/null +++ b/java/libphonenumber/src/com/google/i18n/phonenumbers/Constants.java @@ -0,0 +1,212 @@ +/* + * Copyright (C) 2025 The Libphonenumber Authors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.google.i18n.phonenumbers; + +import java.util.Arrays; +import java.util.Collections; +import java.util.HashMap; +import java.util.HashSet; +import java.util.Map; +import java.util.Set; +import java.util.regex.Pattern; + +/** Constants used by the PhoneNumberUtil. */ +final class Constants { + // The maximum length of the country calling code. + static final int MAX_LENGTH_COUNTRY_CODE = 3; + + // Map of country calling codes that use a mobile token before the area code. One example of when + // this is relevant is when determining the length of the national destination code, which should + // be the length of the area code plus the length of the mobile token. + static final Map MOBILE_TOKEN_MAPPINGS; + + // Set of country codes that have geographically assigned mobile numbers (see GEO_MOBILE_COUNTRIES + // below) which are not based on *area codes*. For example, in China mobile numbers start with a + // carrier indicator, and beyond that are geographically assigned: this carrier indicator is not + // considered to be an area code. + static final Set GEO_MOBILE_COUNTRIES_WITHOUT_MOBILE_AREA_CODES; + + // Set of country codes that doesn't have national prefix, but it has area codes. + static final Set COUNTRIES_WITHOUT_NATIONAL_PREFIX_WITH_AREA_CODES; + + // Set of country calling codes that have geographically assigned mobile numbers. This may not be + // complete; we add calling codes case by case, as we find geographical mobile numbers or hear + // from user reports. Note that countries like the US, where we can't distinguish between + // fixed-line or mobile numbers, are not listed here, since we consider FIXED_LINE_OR_MOBILE to be + // a possibly geographically-related type anyway (like FIXED_LINE). + static final Set GEO_MOBILE_COUNTRIES; + + // The PLUS_SIGN signifies the international prefix. + static final char PLUS_SIGN = '+'; + + static final String RFC3966_PHONE_CONTEXT = ";phone-context="; + + // A map that contains characters that are essential when dialling. That means any of the + // characters in this map must not be removed from a number when dialling, otherwise the call + // will not reach the intended destination. + static final Map DIALLABLE_CHAR_MAPPINGS; + + + // Only upper-case variants of alpha characters are stored. + static final Map ALPHA_MAPPINGS; + + // For performance reasons, amalgamate both into one map. + static final Map ALPHA_PHONE_MAPPINGS; + + // Separate map of all symbols that we wish to retain when formatting alpha numbers. This + // includes digits, ASCII letters and number grouping symbols such as "-" and " ". + static final Map ALL_PLUS_NUMBER_GROUPING_SYMBOLS; + + static { + HashMap mobileTokenMap = new HashMap<>(); + mobileTokenMap.put(54, "9"); + MOBILE_TOKEN_MAPPINGS = Collections.unmodifiableMap(mobileTokenMap); + + HashSet geoMobileCountriesWithoutMobileAreaCodes = new HashSet<>(); + geoMobileCountriesWithoutMobileAreaCodes.add(86); // China + GEO_MOBILE_COUNTRIES_WITHOUT_MOBILE_AREA_CODES = + Collections.unmodifiableSet(geoMobileCountriesWithoutMobileAreaCodes); + + HashSet countriesWithoutNationalPrefixWithAreaCodes = new HashSet<>(); + countriesWithoutNationalPrefixWithAreaCodes.add(52); // Mexico + COUNTRIES_WITHOUT_NATIONAL_PREFIX_WITH_AREA_CODES = + Collections.unmodifiableSet(countriesWithoutNationalPrefixWithAreaCodes); + + HashSet geoMobileCountries = new HashSet<>(); + geoMobileCountries.add(52); // Mexico + geoMobileCountries.add(54); // Argentina + geoMobileCountries.add(55); // Brazil + geoMobileCountries.add(62); // Indonesia: some prefixes only (fixed CMDA wireless) + geoMobileCountries.addAll(geoMobileCountriesWithoutMobileAreaCodes); + GEO_MOBILE_COUNTRIES = Collections.unmodifiableSet(geoMobileCountries); + + // Simple ASCII digits map used to populate ALPHA_PHONE_MAPPINGS and + // ALL_PLUS_NUMBER_GROUPING_SYMBOLS. + HashMap asciiDigitMappings = new HashMap<>(); + asciiDigitMappings.put('0', '0'); + asciiDigitMappings.put('1', '1'); + asciiDigitMappings.put('2', '2'); + asciiDigitMappings.put('3', '3'); + asciiDigitMappings.put('4', '4'); + asciiDigitMappings.put('5', '5'); + asciiDigitMappings.put('6', '6'); + asciiDigitMappings.put('7', '7'); + asciiDigitMappings.put('8', '8'); + asciiDigitMappings.put('9', '9'); + + HashMap alphaMap = new HashMap<>(40); + alphaMap.put('A', '2'); + alphaMap.put('B', '2'); + alphaMap.put('C', '2'); + alphaMap.put('D', '3'); + alphaMap.put('E', '3'); + alphaMap.put('F', '3'); + alphaMap.put('G', '4'); + alphaMap.put('H', '4'); + alphaMap.put('I', '4'); + alphaMap.put('J', '5'); + alphaMap.put('K', '5'); + alphaMap.put('L', '5'); + alphaMap.put('M', '6'); + alphaMap.put('N', '6'); + alphaMap.put('O', '6'); + alphaMap.put('P', '7'); + alphaMap.put('Q', '7'); + alphaMap.put('R', '7'); + alphaMap.put('S', '7'); + alphaMap.put('T', '8'); + alphaMap.put('U', '8'); + alphaMap.put('V', '8'); + alphaMap.put('W', '9'); + alphaMap.put('X', '9'); + alphaMap.put('Y', '9'); + alphaMap.put('Z', '9'); + ALPHA_MAPPINGS = Collections.unmodifiableMap(alphaMap); + + HashMap combinedMap = new HashMap<>(100); + combinedMap.putAll(ALPHA_MAPPINGS); + combinedMap.putAll(asciiDigitMappings); + ALPHA_PHONE_MAPPINGS = Collections.unmodifiableMap(combinedMap); + + HashMap diallableCharMap = new HashMap<>(); + diallableCharMap.putAll(asciiDigitMappings); + diallableCharMap.put(PLUS_SIGN, PLUS_SIGN); + diallableCharMap.put('*', '*'); + diallableCharMap.put('#', '#'); + DIALLABLE_CHAR_MAPPINGS = Collections.unmodifiableMap(diallableCharMap); + + HashMap allPlusNumberGroupings = new HashMap<>(); + // Put (lower letter -> upper letter) and (upper letter -> upper letter) mappings. + for (char c : ALPHA_MAPPINGS.keySet()) { + allPlusNumberGroupings.put(Character.toLowerCase(c), c); + allPlusNumberGroupings.put(c, c); + } + allPlusNumberGroupings.putAll(asciiDigitMappings); + // Put grouping symbols. + allPlusNumberGroupings.put('-', '-'); + allPlusNumberGroupings.put('\uFF0D', '-'); + allPlusNumberGroupings.put('\u2010', '-'); + allPlusNumberGroupings.put('\u2011', '-'); + allPlusNumberGroupings.put('\u2012', '-'); + allPlusNumberGroupings.put('\u2013', '-'); + allPlusNumberGroupings.put('\u2014', '-'); + allPlusNumberGroupings.put('\u2015', '-'); + allPlusNumberGroupings.put('\u2212', '-'); + allPlusNumberGroupings.put('/', '/'); + allPlusNumberGroupings.put('\uFF0F', '/'); + allPlusNumberGroupings.put(' ', ' '); + allPlusNumberGroupings.put('\u3000', ' '); + allPlusNumberGroupings.put('\u2060', ' '); + allPlusNumberGroupings.put('.', '.'); + allPlusNumberGroupings.put('\uFF0E', '.'); + ALL_PLUS_NUMBER_GROUPING_SYMBOLS = Collections.unmodifiableMap(allPlusNumberGroupings); + } + + static final String DIGITS = "\\p{Nd}"; + // We accept alpha characters in phone numbers, ASCII only, upper and lower case. + static final String VALID_ALPHA = + Arrays.toString(ALPHA_MAPPINGS.keySet().toArray()).replaceAll("[, \\[\\]]", "") + + Arrays.toString(ALPHA_MAPPINGS.keySet().toArray()) + .toLowerCase().replaceAll("[, \\[\\]]", ""); + + // We use this pattern to check if the phone number has at least three letters in it - if so, then + // we treat it as a number where some phone-number digits are represented by letters. + static final Pattern VALID_ALPHA_PHONE_PATTERN = Pattern.compile("(?:.*?[A-Za-z]){3}.*"); + + // Regular expression of valid global-number-digits for the phone-context parameter, following the + // syntax defined in RFC3966. + static final String RFC3966_VISUAL_SEPARATOR = "[\\-\\.\\(\\)]?"; + static final String RFC3966_PHONE_DIGIT = + "(" + DIGITS + "|" + RFC3966_VISUAL_SEPARATOR + ")"; + static final String RFC3966_GLOBAL_NUMBER_DIGITS = + "^\\" + PLUS_SIGN + RFC3966_PHONE_DIGIT + "*" + DIGITS + RFC3966_PHONE_DIGIT + "*$"; + static final Pattern RFC3966_GLOBAL_NUMBER_DIGITS_PATTERN = + Pattern.compile(RFC3966_GLOBAL_NUMBER_DIGITS); + + // Regular expression of valid domainname for the phone-context parameter, following the syntax + // defined in RFC3966. + static final String ALPHANUM = VALID_ALPHA + DIGITS; + static final String RFC3966_DOMAINLABEL = + "[" + ALPHANUM + "]+((\\-)*[" + ALPHANUM + "])*"; + static final String RFC3966_TOPLABEL = + "[" + VALID_ALPHA + "]+((\\-)*[" + ALPHANUM + "])*"; + static final String RFC3966_DOMAINNAME = + "^(" + RFC3966_DOMAINLABEL + "\\.)*" + RFC3966_TOPLABEL + "\\.?$"; + static final Pattern RFC3966_DOMAINNAME_PATTERN = Pattern.compile(RFC3966_DOMAINNAME); + + private Constants() {} +} diff --git a/java/libphonenumber/src/com/google/i18n/phonenumbers/PhoneContextParser.java b/java/libphonenumber/src/com/google/i18n/phonenumbers/PhoneContextParser.java new file mode 100644 index 0000000000..de1c7e8fae --- /dev/null +++ b/java/libphonenumber/src/com/google/i18n/phonenumbers/PhoneContextParser.java @@ -0,0 +1,166 @@ +/* + * Copyright (C) 2025 The Libphonenumber Authors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.google.i18n.phonenumbers; + +import java.util.Set; + +/** Utility class for parsing the phone-context parameter of a phone number. */ +final class PhoneContextParser { + private final Set countryCallingCodeSet; + + PhoneContextParser(Set countryCallingCodeSet) { + this.countryCallingCodeSet = countryCallingCodeSet; + } + + /** + * Extracts the value of the phone-context parameter of numberToExtractFrom, following the + * syntax defined in RFC3966. + * + * @return the extracted string (possibly empty), or null if no phone-context parameter is + * found. + */ + private static String extractPhoneContext(String number) { + int indexOfPhoneContext = number.indexOf(Constants.RFC3966_PHONE_CONTEXT); + + // If no phone-context parameter is present + if (indexOfPhoneContext == -1) { + return null; + } + + int phoneContextStart = indexOfPhoneContext + Constants.RFC3966_PHONE_CONTEXT.length(); + // If phone-context parameter is empty + if (phoneContextStart >= number.length()) { + return ""; + } + + int phoneContextEnd = number.indexOf(';', phoneContextStart); + // If phone-context is the last parameter + if (phoneContextEnd < 0) { + return number.substring(phoneContextStart); + } else { + return number.substring(phoneContextStart, phoneContextEnd); + } + } + + /** + * Returns whether the value of phoneContext follows the syntax defined in RFC3966. + */ + private static boolean isValid(String phoneContext) { + if (phoneContext.equals("")) { + return false; + } + + // Does phone-context value match pattern of global-number-digits or domain name + return Constants.RFC3966_GLOBAL_NUMBER_DIGITS_PATTERN.matcher(phoneContext).matches() + || Constants.RFC3966_DOMAINNAME_PATTERN.matcher(phoneContext).matches(); + } + + /** Checks if the int is a valid country calling code. */ + private boolean isValidCountryCode(int countryCode) { + return countryCallingCodeSet.contains(countryCode); + } + + /** + * Parses the value of the phone-context parameter of number, following the syntax defined in + * RFC3966. + * + * @return the parsed phone-context parameter as a PhoneContext object, or null if no + * phone-context parameter is found. + */ + private PhoneContext parsePhoneContext(String phoneContext) { + // Ignore phone-context values that do not start with a plus sign. Could be a domain name. + if (phoneContext.charAt(0) != Constants.PLUS_SIGN) { + return new PhoneContext().setRawContext(phoneContext).setCountryCode(null); + } + + // Remove the plus sign from the phone context and normalize the digits. + String normalizedPhoneContext = + PhoneNumberNormalizer.normalizeDigitsOnly(phoneContext.substring(1)); + + // Check if the phone context is a valid country calling code. + if (!normalizedPhoneContext.equals("") + && normalizedPhoneContext.length() <= Constants.MAX_LENGTH_COUNTRY_CODE) { + int potentialCountryCode = Integer.parseInt(normalizedPhoneContext); + if (isValidCountryCode(potentialCountryCode)) { + return new PhoneContext().setRawContext(phoneContext).setCountryCode(potentialCountryCode); + } + } + + // If the country code is not valid, return the phone context as is. + return new PhoneContext().setRawContext(phoneContext).setCountryCode(null); + } + + /** + * Parses the phone-context parameter of number, following the syntax defined in RFC3966. + * + * @return the parsed phone-context parameter as a PhoneContext object, or null if no + * phone-context parameter is found. + * @throws NumberParseException if the phone-context parameter is invalid. + */ + PhoneContext parse(String number) throws NumberParseException { + String phoneContext = extractPhoneContext(number); + + if (phoneContext == null) { + return null; + } + + if (!isValid(phoneContext)) { + throw new NumberParseException(NumberParseException.ErrorType.NOT_A_NUMBER, + "The phone-context value is invalid."); + } + + return parsePhoneContext(phoneContext); + } + + /** Represents the parsed phone-context parameter of an RFC3966 tel-URI. */ + static class PhoneContext { + /** The raw value of the phone-context parameter. */ + private String rawContext_ = null; + + /** + * The country code of the phone-context parameter if the phone-context parameter is exactly + * and only a + followed by a valid country code. + * + *

+ * For example, if the phone-context parameter is "+1", the country code is 1. If the + * phone-context parameter is "+123", the country code is null. + */ + private Integer countryCode_ = null; + + /** Get the value for {@link #rawContext_} */ + String getRawContext() { + return rawContext_; + } + + /** Set the value for {@link #rawContext_} */ + PhoneContext setRawContext(String value) { + rawContext_ = value; + return this; + } + + /** Get the value for {@link #countryCode_} */ + Integer getCountryCode() { + return countryCode_; + } + + /** Set the value for {@link #countryCode_} */ + PhoneContext setCountryCode(Integer value) { + countryCode_ = value; + return this; + } + } +} diff --git a/java/libphonenumber/src/com/google/i18n/phonenumbers/PhoneNumberMatcher.java b/java/libphonenumber/src/com/google/i18n/phonenumbers/PhoneNumberMatcher.java index b812551db0..359171bd57 100644 --- a/java/libphonenumber/src/com/google/i18n/phonenumbers/PhoneNumberMatcher.java +++ b/java/libphonenumber/src/com/google/i18n/phonenumbers/PhoneNumberMatcher.java @@ -151,8 +151,7 @@ final class PhoneNumberMatcher implements Iterator { /* The maximum number of digits allowed in a digit-separated block. As we allow all digits in a * single block, set high enough to accommodate the entire national number and the international * country code. */ - int digitBlockLimit = - PhoneNumberUtil.MAX_LENGTH_FOR_NSN + PhoneNumberUtil.MAX_LENGTH_COUNTRY_CODE; + int digitBlockLimit = PhoneNumberUtil.MAX_LENGTH_FOR_NSN + Constants.MAX_LENGTH_COUNTRY_CODE; /* Limit on the number of blocks separated by punctuation. Uses digitBlockLimit since some * formats use spaces to separate each digit. */ String blockLimit = limit(0, digitBlockLimit); @@ -569,7 +568,7 @@ boolean checkNumberGroupingIsValid( PhoneNumber number, CharSequence candidate, PhoneNumberUtil util, NumberGroupingChecker checker) { StringBuilder normalizedCandidate = - PhoneNumberUtil.normalizeDigits(candidate, true /* keep non-digits */); + PhoneNumberNormalizer.normalizeDigits(candidate, true /* keep non-digits */); String[] formattedNumberGroups = getNationalNumberGroups(util, number); if (checker.checkGroups(util, number, normalizedCandidate, formattedNumberGroups)) { return true; @@ -618,7 +617,7 @@ static boolean containsMoreThanOneSlashInNationalNumber(PhoneNumber number, Stri (number.getCountryCodeSource() == CountryCodeSource.FROM_NUMBER_WITH_PLUS_SIGN || number.getCountryCodeSource() == CountryCodeSource.FROM_NUMBER_WITHOUT_PLUS_SIGN); if (candidateHasCountryCode - && PhoneNumberUtil.normalizeDigitsOnly(candidate.substring(0, firstSlashInBodyIndex)) + && PhoneNumberNormalizer.normalizeDigitsOnly(candidate.substring(0, firstSlashInBodyIndex)) .equals(Integer.toString(number.getCountryCode()))) { // Any more slashes and this is illegal. return candidate.substring(secondSlashInBodyIndex + 1).contains("/"); @@ -646,7 +645,7 @@ static boolean containsOnlyValidXChars( } // This is the extension sign case, in which the 'x' or 'X' should always precede the // extension number. - } else if (!PhoneNumberUtil.normalizeDigitsOnly(candidate.substring(index)).equals( + } else if (!PhoneNumberNormalizer.normalizeDigitsOnly(candidate.substring(index)).equals( number.getExtension())) { return false; } @@ -685,7 +684,7 @@ static boolean isNationalPrefixPresentIfRequired(PhoneNumber number, PhoneNumber return true; } // Normalize the remainder. - String rawInputCopy = PhoneNumberUtil.normalizeDigitsOnly(number.getRawInput()); + String rawInputCopy = PhoneNumberNormalizer.normalizeDigitsOnly(number.getRawInput()); StringBuilder rawInput = new StringBuilder(rawInputCopy); // Check if we found a national prefix and/or carrier code at the start of the raw input, and // return the result. diff --git a/java/libphonenumber/src/com/google/i18n/phonenumbers/PhoneNumberNormalizer.java b/java/libphonenumber/src/com/google/i18n/phonenumbers/PhoneNumberNormalizer.java new file mode 100644 index 0000000000..8a5493d39b --- /dev/null +++ b/java/libphonenumber/src/com/google/i18n/phonenumbers/PhoneNumberNormalizer.java @@ -0,0 +1,40 @@ +package com.google.i18n.phonenumbers; + +/** Utility class for normalizing phone numbers. */ +final class PhoneNumberNormalizer { + private PhoneNumberNormalizer() {} + + /** + * Normalizes a string of characters representing a phone number. This converts wide-ascii and + * arabic-indic numerals to European numerals, and strips punctuation and alpha characters. + * + * @param number a string of characters representing a phone number + * @return the normalized string version of the phone number + */ + static String normalizeDigitsOnly(CharSequence number) { + return normalizeDigits(number, false /* strip non-digits */).toString(); + } + + /** + * Helper method for normalizing a string of characters representing a phone number. See {@link + * PhoneNumberUtil#normalize(StringBuilder)} and {@link #normalizeDigitsOnly(CharSequence)} for + * more details. + * + * @param number a string of characters representing a phone number + * @param keepNonDigits whether to keep non-digits in the normalized string + * @return the normalized string version of the phone number + */ + static StringBuilder normalizeDigits(CharSequence number, boolean keepNonDigits) { + StringBuilder normalizedDigits = new StringBuilder(number.length()); + for (int i = 0; i < number.length(); i++) { + char c = number.charAt(i); + int digit = Character.digit(c, 10); + if (digit != -1) { + normalizedDigits.append(digit); + } else if (keepNonDigits) { + normalizedDigits.append(c); + } + } + return normalizedDigits; + } +} diff --git a/java/libphonenumber/src/com/google/i18n/phonenumbers/PhoneNumberUtil.java b/java/libphonenumber/src/com/google/i18n/phonenumbers/PhoneNumberUtil.java index 6d8004f182..ea289fab46 100644 --- a/java/libphonenumber/src/com/google/i18n/phonenumbers/PhoneNumberUtil.java +++ b/java/libphonenumber/src/com/google/i18n/phonenumbers/PhoneNumberUtil.java @@ -16,6 +16,12 @@ package com.google.i18n.phonenumbers; +import com.google.i18n.phonenumbers.PhoneContextParser.PhoneContext; +import com.google.i18n.phonenumbers.PhoneNumberUtil.Leniency; +import com.google.i18n.phonenumbers.PhoneNumberUtil.MatchType; +import com.google.i18n.phonenumbers.PhoneNumberUtil.PhoneNumberFormat; +import com.google.i18n.phonenumbers.PhoneNumberUtil.PhoneNumberType; +import com.google.i18n.phonenumbers.PhoneNumberUtil.ValidationResult; import com.google.i18n.phonenumbers.Phonemetadata.NumberFormat; import com.google.i18n.phonenumbers.Phonemetadata.PhoneMetadata; import com.google.i18n.phonenumbers.Phonemetadata.PhoneNumberDesc; @@ -63,8 +69,6 @@ public class PhoneNumberUtil { private static final int MIN_LENGTH_FOR_NSN = 2; // The ITU says the maximum length should be 15, but we have found longer numbers in Germany. static final int MAX_LENGTH_FOR_NSN = 17; - // The maximum length of the country calling code. - static final int MAX_LENGTH_COUNTRY_CODE = 3; // We don't allow input strings for parsing to be longer than 250 chars. This prevents malicious // input from overflowing the regular-expression engine. private static final int MAX_INPUT_STRING_LENGTH = 250; @@ -74,157 +78,12 @@ public class PhoneNumberUtil { private static final int NANPA_COUNTRY_CODE = 1; - // Map of country calling codes that use a mobile token before the area code. One example of when - // this is relevant is when determining the length of the national destination code, which should - // be the length of the area code plus the length of the mobile token. - private static final Map MOBILE_TOKEN_MAPPINGS; - - // Set of country codes that have geographically assigned mobile numbers (see GEO_MOBILE_COUNTRIES - // below) which are not based on *area codes*. For example, in China mobile numbers start with a - // carrier indicator, and beyond that are geographically assigned: this carrier indicator is not - // considered to be an area code. - private static final Set GEO_MOBILE_COUNTRIES_WITHOUT_MOBILE_AREA_CODES; - - // Set of country codes that doesn't have national prefix, but it has area codes. - private static final Set COUNTRIES_WITHOUT_NATIONAL_PREFIX_WITH_AREA_CODES; - - // Set of country calling codes that have geographically assigned mobile numbers. This may not be - // complete; we add calling codes case by case, as we find geographical mobile numbers or hear - // from user reports. Note that countries like the US, where we can't distinguish between - // fixed-line or mobile numbers, are not listed here, since we consider FIXED_LINE_OR_MOBILE to be - // a possibly geographically-related type anyway (like FIXED_LINE). - private static final Set GEO_MOBILE_COUNTRIES; - - // The PLUS_SIGN signifies the international prefix. - static final char PLUS_SIGN = '+'; - private static final char STAR_SIGN = '*'; private static final String RFC3966_EXTN_PREFIX = ";ext="; private static final String RFC3966_PREFIX = "tel:"; - private static final String RFC3966_PHONE_CONTEXT = ";phone-context="; private static final String RFC3966_ISDN_SUBADDRESS = ";isub="; - // A map that contains characters that are essential when dialling. That means any of the - // characters in this map must not be removed from a number when dialling, otherwise the call - // will not reach the intended destination. - private static final Map DIALLABLE_CHAR_MAPPINGS; - - // Only upper-case variants of alpha characters are stored. - private static final Map ALPHA_MAPPINGS; - - // For performance reasons, amalgamate both into one map. - private static final Map ALPHA_PHONE_MAPPINGS; - - // Separate map of all symbols that we wish to retain when formatting alpha numbers. This - // includes digits, ASCII letters and number grouping symbols such as "-" and " ". - private static final Map ALL_PLUS_NUMBER_GROUPING_SYMBOLS; - - static { - HashMap mobileTokenMap = new HashMap<>(); - mobileTokenMap.put(54, "9"); - MOBILE_TOKEN_MAPPINGS = Collections.unmodifiableMap(mobileTokenMap); - - HashSet geoMobileCountriesWithoutMobileAreaCodes = new HashSet<>(); - geoMobileCountriesWithoutMobileAreaCodes.add(86); // China - GEO_MOBILE_COUNTRIES_WITHOUT_MOBILE_AREA_CODES = - Collections.unmodifiableSet(geoMobileCountriesWithoutMobileAreaCodes); - - HashSet countriesWithoutNationalPrefixWithAreaCodes = new HashSet<>(); - countriesWithoutNationalPrefixWithAreaCodes.add(52); // Mexico - COUNTRIES_WITHOUT_NATIONAL_PREFIX_WITH_AREA_CODES = - Collections.unmodifiableSet(countriesWithoutNationalPrefixWithAreaCodes); - - HashSet geoMobileCountries = new HashSet<>(); - geoMobileCountries.add(52); // Mexico - geoMobileCountries.add(54); // Argentina - geoMobileCountries.add(55); // Brazil - geoMobileCountries.add(62); // Indonesia: some prefixes only (fixed CMDA wireless) - geoMobileCountries.addAll(geoMobileCountriesWithoutMobileAreaCodes); - GEO_MOBILE_COUNTRIES = Collections.unmodifiableSet(geoMobileCountries); - - // Simple ASCII digits map used to populate ALPHA_PHONE_MAPPINGS and - // ALL_PLUS_NUMBER_GROUPING_SYMBOLS. - HashMap asciiDigitMappings = new HashMap<>(); - asciiDigitMappings.put('0', '0'); - asciiDigitMappings.put('1', '1'); - asciiDigitMappings.put('2', '2'); - asciiDigitMappings.put('3', '3'); - asciiDigitMappings.put('4', '4'); - asciiDigitMappings.put('5', '5'); - asciiDigitMappings.put('6', '6'); - asciiDigitMappings.put('7', '7'); - asciiDigitMappings.put('8', '8'); - asciiDigitMappings.put('9', '9'); - - HashMap alphaMap = new HashMap<>(40); - alphaMap.put('A', '2'); - alphaMap.put('B', '2'); - alphaMap.put('C', '2'); - alphaMap.put('D', '3'); - alphaMap.put('E', '3'); - alphaMap.put('F', '3'); - alphaMap.put('G', '4'); - alphaMap.put('H', '4'); - alphaMap.put('I', '4'); - alphaMap.put('J', '5'); - alphaMap.put('K', '5'); - alphaMap.put('L', '5'); - alphaMap.put('M', '6'); - alphaMap.put('N', '6'); - alphaMap.put('O', '6'); - alphaMap.put('P', '7'); - alphaMap.put('Q', '7'); - alphaMap.put('R', '7'); - alphaMap.put('S', '7'); - alphaMap.put('T', '8'); - alphaMap.put('U', '8'); - alphaMap.put('V', '8'); - alphaMap.put('W', '9'); - alphaMap.put('X', '9'); - alphaMap.put('Y', '9'); - alphaMap.put('Z', '9'); - ALPHA_MAPPINGS = Collections.unmodifiableMap(alphaMap); - - HashMap combinedMap = new HashMap<>(100); - combinedMap.putAll(ALPHA_MAPPINGS); - combinedMap.putAll(asciiDigitMappings); - ALPHA_PHONE_MAPPINGS = Collections.unmodifiableMap(combinedMap); - - HashMap diallableCharMap = new HashMap<>(); - diallableCharMap.putAll(asciiDigitMappings); - diallableCharMap.put(PLUS_SIGN, PLUS_SIGN); - diallableCharMap.put('*', '*'); - diallableCharMap.put('#', '#'); - DIALLABLE_CHAR_MAPPINGS = Collections.unmodifiableMap(diallableCharMap); - - HashMap allPlusNumberGroupings = new HashMap<>(); - // Put (lower letter -> upper letter) and (upper letter -> upper letter) mappings. - for (char c : ALPHA_MAPPINGS.keySet()) { - allPlusNumberGroupings.put(Character.toLowerCase(c), c); - allPlusNumberGroupings.put(c, c); - } - allPlusNumberGroupings.putAll(asciiDigitMappings); - // Put grouping symbols. - allPlusNumberGroupings.put('-', '-'); - allPlusNumberGroupings.put('\uFF0D', '-'); - allPlusNumberGroupings.put('\u2010', '-'); - allPlusNumberGroupings.put('\u2011', '-'); - allPlusNumberGroupings.put('\u2012', '-'); - allPlusNumberGroupings.put('\u2013', '-'); - allPlusNumberGroupings.put('\u2014', '-'); - allPlusNumberGroupings.put('\u2015', '-'); - allPlusNumberGroupings.put('\u2212', '-'); - allPlusNumberGroupings.put('/', '/'); - allPlusNumberGroupings.put('\uFF0F', '/'); - allPlusNumberGroupings.put(' ', ' '); - allPlusNumberGroupings.put('\u3000', ' '); - allPlusNumberGroupings.put('\u2060', ' '); - allPlusNumberGroupings.put('.', '.'); - allPlusNumberGroupings.put('\uFF0E', '.'); - ALL_PLUS_NUMBER_GROUPING_SYMBOLS = Collections.unmodifiableMap(allPlusNumberGroupings); - } - // Pattern that makes it easy to distinguish whether a region has a single international dialing // prefix or not. If a region has a single international prefix (e.g. 011 in USA), it will be // represented as a string that contains a sequence of ASCII digits, and possibly a tilde, which @@ -243,16 +102,10 @@ public class PhoneNumberUtil { static final String VALID_PUNCTUATION = "-x\u2010-\u2015\u2212\u30FC\uFF0D-\uFF0F " + "\u00A0\u00AD\u200B\u2060\u3000()\uFF08\uFF09\uFF3B\uFF3D.\\[\\]/~\u2053\u223C\uFF5E"; - private static final String DIGITS = "\\p{Nd}"; - // We accept alpha characters in phone numbers, ASCII only, upper and lower case. - private static final String VALID_ALPHA = - Arrays.toString(ALPHA_MAPPINGS.keySet().toArray()).replaceAll("[, \\[\\]]", "") - + Arrays.toString(ALPHA_MAPPINGS.keySet().toArray()) - .toLowerCase().replaceAll("[, \\[\\]]", ""); static final String PLUS_CHARS = "+\uFF0B"; static final Pattern PLUS_CHARS_PATTERN = Pattern.compile("[" + PLUS_CHARS + "]+"); private static final Pattern SEPARATOR_PATTERN = Pattern.compile("[" + VALID_PUNCTUATION + "]+"); - private static final Pattern CAPTURING_DIGIT_PATTERN = Pattern.compile("(" + DIGITS + ")"); + private static final Pattern CAPTURING_DIGIT_PATTERN = Pattern.compile("(" + Constants.DIGITS + ")"); // Regular expression of acceptable characters that may start a phone number for the purposes of // parsing. This allows us to strip away meaningless prefixes to phone numbers that may be @@ -260,7 +113,7 @@ public class PhoneNumberUtil { // does not contain alpha characters, although they may be used later in the number. It also does // not include other punctuation, as this will be stripped later during parsing and is of no // information value when parsing a number. - private static final String VALID_START_CHAR = "[" + PLUS_CHARS + DIGITS + "]"; + private static final String VALID_START_CHAR = "[" + PLUS_CHARS + Constants.DIGITS + "]"; private static final Pattern VALID_START_CHAR_PATTERN = Pattern.compile(VALID_START_CHAR); // Regular expression of characters typically used to start a second phone number for the purposes @@ -277,10 +130,6 @@ public class PhoneNumberUtil { private static final String UNWANTED_END_CHARS = "[[\\P{N}&&\\P{L}]&&[^#]]+$"; static final Pattern UNWANTED_END_CHAR_PATTERN = Pattern.compile(UNWANTED_END_CHARS); - // We use this pattern to check if the phone number has at least three letters in it - if so, then - // we treat it as a number where some phone-number digits are represented by letters. - private static final Pattern VALID_ALPHA_PHONE_PATTERN = Pattern.compile("(?:.*?[A-Za-z]){3}.*"); - // Regular expression of viable phone numbers. This is location independent. Checks we have at // least three leading digits, and only valid punctuation, alpha characters and // digits in the phone number. Does not include extension data. @@ -298,9 +147,9 @@ public class PhoneNumberUtil { // // Note VALID_PUNCTUATION starts with a -, so must be the first in the range. private static final String VALID_PHONE_NUMBER = - DIGITS + "{" + MIN_LENGTH_FOR_NSN + "}" + "|" - + "[" + PLUS_CHARS + "]*+(?:[" + VALID_PUNCTUATION + STAR_SIGN + "]*" + DIGITS + "){3,}[" - + VALID_PUNCTUATION + STAR_SIGN + VALID_ALPHA + DIGITS + "]*"; + Constants.DIGITS + "{" + MIN_LENGTH_FOR_NSN + "}" + "|" + + "[" + PLUS_CHARS + "]*+(?:[" + VALID_PUNCTUATION + STAR_SIGN + "]*" + Constants.DIGITS + "){3,}[" + + VALID_PUNCTUATION + STAR_SIGN + Constants.VALID_ALPHA + Constants.DIGITS + "]*"; // Default extension prefix to use when formatting. This will be put in front of any extension // component of the number, after the main national number is formatted. For example, if you wish @@ -314,33 +163,12 @@ public class PhoneNumberUtil { private static final String EXTN_PATTERNS_FOR_PARSING = createExtnPattern(true); static final String EXTN_PATTERNS_FOR_MATCHING = createExtnPattern(false); - // Regular expression of valid global-number-digits for the phone-context parameter, following the - // syntax defined in RFC3966. - private static final String RFC3966_VISUAL_SEPARATOR = "[\\-\\.\\(\\)]?"; - private static final String RFC3966_PHONE_DIGIT = - "(" + DIGITS + "|" + RFC3966_VISUAL_SEPARATOR + ")"; - private static final String RFC3966_GLOBAL_NUMBER_DIGITS = - "^\\" + PLUS_SIGN + RFC3966_PHONE_DIGIT + "*" + DIGITS + RFC3966_PHONE_DIGIT + "*$"; - static final Pattern RFC3966_GLOBAL_NUMBER_DIGITS_PATTERN = - Pattern.compile(RFC3966_GLOBAL_NUMBER_DIGITS); - - // Regular expression of valid domainname for the phone-context parameter, following the syntax - // defined in RFC3966. - private static final String ALPHANUM = VALID_ALPHA + DIGITS; - private static final String RFC3966_DOMAINLABEL = - "[" + ALPHANUM + "]+((\\-)*[" + ALPHANUM + "])*"; - private static final String RFC3966_TOPLABEL = - "[" + VALID_ALPHA + "]+((\\-)*[" + ALPHANUM + "])*"; - private static final String RFC3966_DOMAINNAME = - "^(" + RFC3966_DOMAINLABEL + "\\.)*" + RFC3966_TOPLABEL + "\\.?$"; - static final Pattern RFC3966_DOMAINNAME_PATTERN = Pattern.compile(RFC3966_DOMAINNAME); - /** * Helper method for constructing regular expressions for parsing. Creates an expression that * captures up to maxLength digits. */ private static String extnDigits(int maxLength) { - return "(" + DIGITS + "{1," + maxLength + "})"; + return "(" + Constants.DIGITS + "{1," + maxLength + "})"; } /** @@ -453,6 +281,8 @@ private static String createExtnPattern(boolean forParsing) { private static PhoneNumberUtil instance = null; + private final PhoneContextParser phoneContextParser; + public static final String REGION_CODE_FOR_NON_GEO_ENTITY = "001"; /** @@ -707,6 +537,7 @@ abstract boolean verify( // @VisibleForTesting PhoneNumberUtil(MetadataSource metadataSource, Map> countryCallingCodeToRegionCodeMap) { + this.phoneContextParser = new PhoneContextParser(countryCallingCodeToRegionCodeMap.keySet()); this.metadataSource = metadataSource; this.countryCallingCodeToRegionCodeMap = countryCallingCodeToRegionCodeMap; for (Map.Entry> entry : countryCallingCodeToRegionCodeMap.entrySet()) { @@ -802,11 +633,11 @@ static boolean isViablePhoneNumber(CharSequence number) { * normalized in place */ static StringBuilder normalize(StringBuilder number) { - Matcher m = VALID_ALPHA_PHONE_PATTERN.matcher(number); + Matcher m = Constants.VALID_ALPHA_PHONE_PATTERN.matcher(number); if (m.matches()) { - number.replace(0, number.length(), normalizeHelper(number, ALPHA_PHONE_MAPPINGS, true)); + number.replace(0, number.length(), normalizeHelper(number, Constants.ALPHA_PHONE_MAPPINGS, true)); } else { - number.replace(0, number.length(), normalizeDigitsOnly(number)); + number.replace(0, number.length(), PhoneNumberNormalizer.normalizeDigitsOnly(number)); } return number; } @@ -819,21 +650,7 @@ static StringBuilder normalize(StringBuilder number) { * @return the normalized string version of the phone number */ public static String normalizeDigitsOnly(CharSequence number) { - return normalizeDigits(number, false /* strip non-digits */).toString(); - } - - static StringBuilder normalizeDigits(CharSequence number, boolean keepNonDigits) { - StringBuilder normalizedDigits = new StringBuilder(number.length()); - for (int i = 0; i < number.length(); i++) { - char c = number.charAt(i); - int digit = Character.digit(c, 10); - if (digit != -1) { - normalizedDigits.append(digit); - } else if (keepNonDigits) { - normalizedDigits.append(c); - } - } - return normalizedDigits; + return PhoneNumberNormalizer.normalizeDigitsOnly(number); } /** @@ -844,7 +661,7 @@ static StringBuilder normalizeDigits(CharSequence number, boolean keepNonDigits) * @return the normalized string version of the phone number */ public static String normalizeDiallableCharsOnly(CharSequence number) { - return normalizeHelper(number, DIALLABLE_CHAR_MAPPINGS, true /* remove non matches */); + return normalizeHelper(number, Constants.DIALLABLE_CHAR_MAPPINGS, true /* remove non matches */); } /** @@ -852,7 +669,7 @@ public static String normalizeDiallableCharsOnly(CharSequence number) { * existing formatting. */ public static String convertAlphaCharactersInNumber(CharSequence number) { - return normalizeHelper(number, ALPHA_PHONE_MAPPINGS, false); + return normalizeHelper(number, Constants.ALPHA_PHONE_MAPPINGS, false); } /** @@ -909,7 +726,7 @@ public int getLengthOfGeographicalAreaCode(PhoneNumber number) { // Note:this is our general assumption, but there are exceptions which are tracked in // COUNTRIES_WITHOUT_NATIONAL_PREFIX_WITH_AREA_CODES. if (!metadata.hasNationalPrefix() && !number.isItalianLeadingZero() - && !COUNTRIES_WITHOUT_NATIONAL_PREFIX_WITH_AREA_CODES.contains(countryCallingCode)) { + && !Constants.COUNTRIES_WITHOUT_NATIONAL_PREFIX_WITH_AREA_CODES.contains(countryCallingCode)) { return 0; } @@ -917,7 +734,7 @@ public int getLengthOfGeographicalAreaCode(PhoneNumber number) { // Note this is a rough heuristic; it doesn't cover Indonesia well, for example, where area // codes are present for some mobile phones but not for others. We have no better way of // representing this in the metadata at this point. - && GEO_MOBILE_COUNTRIES_WITHOUT_MOBILE_AREA_CODES.contains(countryCallingCode)) { + && Constants.GEO_MOBILE_COUNTRIES_WITHOUT_MOBILE_AREA_CODES.contains(countryCallingCode)) { return 0; } @@ -1011,8 +828,8 @@ public int getLengthOfNationalDestinationCode(PhoneNumber number) { * @return the mobile token, as a string, for the given country calling code */ public static String getCountryMobileToken(int countryCallingCode) { - if (MOBILE_TOKEN_MAPPINGS.containsKey(countryCallingCode)) { - return MOBILE_TOKEN_MAPPINGS.get(countryCallingCode); + if (Constants.MOBILE_TOKEN_MAPPINGS.containsKey(countryCallingCode)) { + return Constants.MOBILE_TOKEN_MAPPINGS.get(countryCallingCode); } return ""; } @@ -1254,7 +1071,7 @@ public boolean isNumberGeographical(PhoneNumber phoneNumber) { public boolean isNumberGeographical(PhoneNumberType phoneNumberType, int countryCallingCode) { return phoneNumberType == PhoneNumberType.FIXED_LINE || phoneNumberType == PhoneNumberType.FIXED_LINE_OR_MOBILE - || (GEO_MOBILE_COUNTRIES.contains(countryCallingCode) + || (Constants.GEO_MOBILE_COUNTRIES.contains(countryCallingCode) && phoneNumberType == PhoneNumberType.MOBILE); } @@ -1722,7 +1539,7 @@ public String formatInOriginalFormat(PhoneNumber number, String regionCallingFro } candidateNationalPrefixRule = candidateNationalPrefixRule.substring(0, indexOfFirstGroup); - candidateNationalPrefixRule = normalizeDigitsOnly(candidateNationalPrefixRule); + candidateNationalPrefixRule = PhoneNumberNormalizer.normalizeDigitsOnly(candidateNationalPrefixRule); if (candidateNationalPrefixRule.length() == 0) { // National prefix not used when formatting this number. formattedNumber = nationalFormat; @@ -1754,7 +1571,7 @@ public String formatInOriginalFormat(PhoneNumber number, String regionCallingFro // national prefix is assumed to be in digits-only form. private boolean rawInputContainsNationalPrefix(String rawInput, String nationalPrefix, String regionCode) { - String normalizedNationalNumber = normalizeDigitsOnly(rawInput); + String normalizedNationalNumber = PhoneNumberNormalizer.normalizeDigitsOnly(rawInput); if (normalizedNationalNumber.startsWith(nationalPrefix)) { try { // Some Japanese numbers (e.g. 00777123) might be mistaken to contain the national prefix @@ -1824,7 +1641,7 @@ public String formatOutOfCountryKeepingAlphaChars(PhoneNumber number, // the number in raw_input with the parsed number. // To do this, first we normalize punctuation. We retain number grouping symbols such as " " // only. - rawInput = normalizeHelper(rawInput, ALL_PLUS_NUMBER_GROUPING_SYMBOLS, true); + rawInput = normalizeHelper(rawInput, Constants.ALL_PLUS_NUMBER_GROUPING_SYMBOLS, true); // Now we trim everything before the first three digits in the parsed number. We choose three // because all valid alpha numbers have 3 digits at the start - if it does not, then we don't // trim anything at all. Similarly, if the national number was less than three digits, we don't @@ -1929,13 +1746,13 @@ private void prefixNumberWithCountryCallingCode(int countryCallingCode, StringBuilder formattedNumber) { switch (numberFormat) { case E164: - formattedNumber.insert(0, countryCallingCode).insert(0, PLUS_SIGN); + formattedNumber.insert(0, countryCallingCode).insert(0, Constants.PLUS_SIGN); return; case INTERNATIONAL: - formattedNumber.insert(0, " ").insert(0, countryCallingCode).insert(0, PLUS_SIGN); + formattedNumber.insert(0, " ").insert(0, countryCallingCode).insert(0, Constants.PLUS_SIGN); return; case RFC3966: - formattedNumber.insert(0, "-").insert(0, countryCallingCode).insert(0, PLUS_SIGN) + formattedNumber.insert(0, "-").insert(0, countryCallingCode).insert(0, Constants.PLUS_SIGN) .insert(0, RFC3966_PREFIX); return; case NATIONAL: @@ -2569,7 +2386,7 @@ public boolean isAlphaNumber(CharSequence number) { } StringBuilder strippedNumber = new StringBuilder(number); maybeStripExtension(strippedNumber); - return VALID_ALPHA_PHONE_PATTERN.matcher(strippedNumber).matches(); + return Constants.VALID_ALPHA_PHONE_PATTERN.matcher(strippedNumber).matches(); } /** @@ -2848,7 +2665,7 @@ int extractCountryCode(StringBuilder fullNumber, StringBuilder nationalNumber) { } int potentialCountryCode; int numberLength = fullNumber.length(); - for (int i = 1; i <= MAX_LENGTH_COUNTRY_CODE && i <= numberLength; i++) { + for (int i = 1; i <= Constants.MAX_LENGTH_COUNTRY_CODE && i <= numberLength; i++) { potentialCountryCode = Integer.parseInt(fullNumber.substring(0, i)); if (countryCallingCodeToRegionCodeMap.containsKey(potentialCountryCode)) { nationalNumber.append(fullNumber.substring(i)); @@ -2970,7 +2787,7 @@ private boolean parsePrefixAsIdd(Pattern iddPattern, StringBuilder number) { // cannot begin with 0. Matcher digitMatcher = CAPTURING_DIGIT_PATTERN.matcher(number.substring(matchEnd)); if (digitMatcher.find()) { - String normalizedGroup = normalizeDigitsOnly(digitMatcher.group(1)); + String normalizedGroup = PhoneNumberNormalizer.normalizeDigitsOnly(digitMatcher.group(1)); if (normalizedGroup.equals("0")) { return false; } @@ -3385,71 +3202,24 @@ private void parseHelper(CharSequence numberToParse, String defaultRegion, phoneNumber.setNationalNumber(Long.parseLong(normalizedNationalNumber.toString())); } - /** - * Extracts the value of the phone-context parameter of numberToExtractFrom where the index of - * ";phone-context=" is the parameter indexOfPhoneContext, following the syntax defined in - * RFC3966. - * - * @return the extracted string (possibly empty), or null if no phone-context parameter is found. - */ - private String extractPhoneContext(String numberToExtractFrom, int indexOfPhoneContext) { - // If no phone-context parameter is present - if (indexOfPhoneContext == -1) { - return null; - } - - int phoneContextStart = indexOfPhoneContext + RFC3966_PHONE_CONTEXT.length(); - // If phone-context parameter is empty - if (phoneContextStart >= numberToExtractFrom.length()) { - return ""; - } - - int phoneContextEnd = numberToExtractFrom.indexOf(';', phoneContextStart); - // If phone-context is not the last parameter - if (phoneContextEnd != -1) { - return numberToExtractFrom.substring(phoneContextStart, phoneContextEnd); - } else { - return numberToExtractFrom.substring(phoneContextStart); - } - } - - /** - * Returns whether the value of phoneContext follows the syntax defined in RFC3966. - */ - private boolean isPhoneContextValid(String phoneContext) { - if (phoneContext == null) { - return true; - } - if (phoneContext.length() == 0) { - return false; - } - - // Does phone-context value match pattern of global-number-digits or domainname - return RFC3966_GLOBAL_NUMBER_DIGITS_PATTERN.matcher(phoneContext).matches() - || RFC3966_DOMAINNAME_PATTERN.matcher(phoneContext).matches(); - } - /** * Converts numberToParse to a form that we can parse and write it to nationalNumber if it is * written in RFC3966; otherwise extract a possible number out of it and write to nationalNumber. */ private void buildNationalNumberForParsing(String numberToParse, StringBuilder nationalNumber) throws NumberParseException { - int indexOfPhoneContext = numberToParse.indexOf(RFC3966_PHONE_CONTEXT); + int indexOfPhoneContext = numberToParse.indexOf(Constants.RFC3966_PHONE_CONTEXT); + + PhoneContext phoneContext = phoneContextParser.parse(numberToParse); - String phoneContext = extractPhoneContext(numberToParse, indexOfPhoneContext); - if (!isPhoneContextValid(phoneContext)) { - throw new NumberParseException(NumberParseException.ErrorType.NOT_A_NUMBER, - "The phone-context value is invalid."); - } if (phoneContext != null) { // If the phone context contains a phone number prefix, we need to capture it, whereas domains // will be ignored. - if (phoneContext.charAt(0) == PLUS_SIGN) { + if (phoneContext.getRawContext().charAt(0) == Constants.PLUS_SIGN) { // Additional parameters might follow the phone context. If so, we will remove them here // because the parameters after phone context are not important for parsing the phone // number. - nationalNumber.append(phoneContext); + nationalNumber.append(phoneContext.getRawContext()); } // Now append everything between the "tel:" prefix and the phone-context. This should include diff --git a/java/libphonenumber/src/com/google/i18n/phonenumbers/ShortNumberInfo.java b/java/libphonenumber/src/com/google/i18n/phonenumbers/ShortNumberInfo.java index 7ce2972bac..ffc6b49b2e 100644 --- a/java/libphonenumber/src/com/google/i18n/phonenumbers/ShortNumberInfo.java +++ b/java/libphonenumber/src/com/google/i18n/phonenumbers/ShortNumberInfo.java @@ -458,7 +458,7 @@ private boolean matchesEmergencyNumberHelper(CharSequence number, String regionC return false; } - String normalizedNumber = PhoneNumberUtil.normalizeDigitsOnly(possibleNumber); + String normalizedNumber = PhoneNumberNormalizer.normalizeDigitsOnly(possibleNumber); boolean allowPrefixMatchForRegion = allowPrefixMatch && !REGIONS_WHERE_EMERGENCY_NUMBERS_MUST_BE_EXACT.contains(regionCode); return matcherApi.matchNationalNumber(normalizedNumber, metadata.getEmergency(), diff --git a/java/libphonenumber/test/com/google/i18n/phonenumbers/PhoneContextParserTest.java b/java/libphonenumber/test/com/google/i18n/phonenumbers/PhoneContextParserTest.java new file mode 100644 index 0000000000..ae34ad936d --- /dev/null +++ b/java/libphonenumber/test/com/google/i18n/phonenumbers/PhoneContextParserTest.java @@ -0,0 +1,109 @@ +/* + * Copyright (C) 2025 The Libphonenumber Authors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.google.i18n.phonenumbers; + +import static org.junit.Assert.assertThrows; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNull; +import java.util.Collections; +import java.util.HashSet; +import java.util.Set; +import org.junit.function.ThrowingRunnable; +import com.google.i18n.phonenumbers.PhoneContextParser.PhoneContext; +import junit.framework.TestCase; + +/** + * Unit tests for PhoneContextParser.java + */ +public class PhoneContextParserTest extends TestCase { + + private static final Set countryCallingCodeSet; + static { + Set tempSet = new HashSet<>(); + tempSet.add(64); + countryCallingCodeSet = Collections.unmodifiableSet(tempSet); + } + + /** + * An instance of PhoneContextParser. + */ + protected final PhoneContextParser phoneContextParser = + new PhoneContextParser(countryCallingCodeSet); + + public void testParseShouldWorkAsExpected() throws NumberParseException { + PhoneContext actual; + + actual = phoneContextParser.parse("tel:03-331-6005;phone-context=+64"); + assertEquals("+64", actual.getRawContext()); + assertEquals(new Integer(64), actual.getCountryCode()); + + actual = phoneContextParser.parse("tel:03-331-6005;phone-context=example.com"); + assertEquals("example.com", actual.getRawContext()); + assertNull(actual.getCountryCode()); + + actual = phoneContextParser.parse("03-331-6005;phone-context=+64;"); + assertEquals("+64", actual.getRawContext()); + assertEquals(new Integer(64), actual.getCountryCode()); + + actual = phoneContextParser.parse("+64-3-331-6005;phone-context=+64;"); + assertEquals("+64", actual.getRawContext()); + assertEquals(new Integer(64), actual.getCountryCode()); + + actual = phoneContextParser.parse("tel:03-331-6005;foo=bar;phone-context=+64;baz=qux"); + assertEquals("+64", actual.getRawContext()); + assertEquals(new Integer(64), actual.getCountryCode()); + + actual = phoneContextParser.parse("tel:03-331-6005"); + assertNull(actual); + + actual = phoneContextParser.parse("tel:03-331-6005;phone-context=+0"); + assertEquals("+0", actual.getRawContext()); + assertNull(actual.getCountryCode()); + + actual = phoneContextParser.parse("tel:03-331-6005;phone-context=+1234"); + assertEquals("+1234", actual.getRawContext()); + assertNull(actual.getCountryCode()); + } + + public void testParseShouldFailForInvalidPhoneContext() throws NumberParseException { + assertThrows( + NumberParseException.class, + new ThrowingRunnable() { + @Override + public void run() throws NumberParseException { + phoneContextParser.parse("tel:03-331-6005;phone-context="); + } + }); + assertThrows( + NumberParseException.class, + new ThrowingRunnable() { + @Override + public void run() throws NumberParseException { + phoneContextParser.parse("tel:03-331-6005;phone-context=;"); + } + }); + assertThrows( + NumberParseException.class, + new ThrowingRunnable() { + @Override + public void run() throws NumberParseException { + phoneContextParser.parse("tel:03-331-6005;phone-context=0"); + } + }); + } + +} diff --git a/java/libphonenumber/test/com/google/i18n/phonenumbers/PhoneNumberUtilTest.java b/java/libphonenumber/test/com/google/i18n/phonenumbers/PhoneNumberUtilTest.java index 2b9345c4c7..f3bdf445d3 100644 --- a/java/libphonenumber/test/com/google/i18n/phonenumbers/PhoneNumberUtilTest.java +++ b/java/libphonenumber/test/com/google/i18n/phonenumbers/PhoneNumberUtilTest.java @@ -481,7 +481,7 @@ public void testNormaliseStripAlphaCharacters() { String expectedOutput = "03456234"; assertEquals("Conversion did not correctly remove alpha character", expectedOutput, - PhoneNumberUtil.normalizeDigitsOnly(inputNumber)); + PhoneNumberNormalizer.normalizeDigitsOnly(inputNumber)); } public void testNormaliseStripNonDiallableCharacters() {