diff --git a/.eslintrc.json b/.eslintrc.json index ee74712..2797b0d 100644 --- a/.eslintrc.json +++ b/.eslintrc.json @@ -24,7 +24,7 @@ "import/no-cycle": "error", "import/no-duplicates": ["error", { "prefer-inline": true }], "import/no-extraneous-dependencies": ["error"], - "import/no-internal-modules": ["error", { "allow": ["*/lib/*"] }], + "import/no-internal-modules": ["error", { "allow": ["*/lib/*", "world_countries_lists/data/**/*.json"] }], "import/no-named-as-default": "off", "import/no-named-as-default-member": "off", "quotes": ["error", "single", { "avoidEscape": true }] diff --git a/package-lock.json b/package-lock.json index 62f740f..b06a80c 100644 --- a/package-lock.json +++ b/package-lock.json @@ -12,14 +12,17 @@ "@observablehq/framework": "^1.9.0", "d3-dsv": "^3.0.1", "d3-time-format": "^4.1.0", + "diacritics": "^1.3.0", "effect": "^3.3.5", "i18n-iso-countries": "^7.11.2", - "iso-639-1": "^3.1.2" + "iso-639-1": "^3.1.2", + "world_countries_lists": "^2.9.0" }, "devDependencies": { "@dotenvx/dotenvx": "^1.5.0", "@trivago/prettier-plugin-sort-imports": "^4.3.0", "@tsconfig/node20": "^20.1.4", + "@types/diacritics": "^1.3.3", "@types/node": "^20.14.9", "@typescript-eslint/eslint-plugin": "^7.14.1", "@typescript-eslint/parser": "^7.14.1", @@ -2018,6 +2021,12 @@ "integrity": "sha512-sqgsT69YFeLWf5NtJ4Xq/xAF8p4ZQHlmGW74Nu2tD4+g5fAsposc4ZfaaPixVu4y01BEiDCWLRDCvDM5JOsRxg==", "dev": true }, + "node_modules/@types/diacritics": { + "version": "1.3.3", + "resolved": "https://registry.npmjs.org/@types/diacritics/-/diacritics-1.3.3.tgz", + "integrity": "sha512-wt0tBItmBsOUVZ8+MCrkBMoVfH/EUZeTXwYSekVVYilZlGDYssREUR+sX72mHvl2IrbdCKgpYARXKh3awD2how==", + "dev": true + }, "node_modules/@types/estree": { "version": "1.0.5", "resolved": "https://registry.npmjs.org/@types/estree/-/estree-1.0.5.tgz", @@ -8870,6 +8879,11 @@ "node": ">=0.10.0" } }, + "node_modules/world_countries_lists": { + "version": "2.9.0", + "resolved": "https://registry.npmjs.org/world_countries_lists/-/world_countries_lists-2.9.0.tgz", + "integrity": "sha512-VAS+QcV3khDD120VhFOYx6ntSKHWdigp0+MSwj2wxdAnHpbPnKhDWkLOw8A1zxkMnFmpieG9pp30FW4etwHPWw==" + }, "node_modules/wrap-ansi": { "version": "9.0.0", "resolved": "https://registry.npmjs.org/wrap-ansi/-/wrap-ansi-9.0.0.tgz", diff --git a/package.json b/package.json index fbf9cf2..83b3fff 100644 --- a/package.json +++ b/package.json @@ -9,14 +9,17 @@ "@observablehq/framework": "^1.9.0", "d3-dsv": "^3.0.1", "d3-time-format": "^4.1.0", + "diacritics": "^1.3.0", "effect": "^3.3.5", "i18n-iso-countries": "^7.11.2", - "iso-639-1": "^3.1.2" + "iso-639-1": "^3.1.2", + "world_countries_lists": "^2.9.0" }, "devDependencies": { "@dotenvx/dotenvx": "^1.5.0", "@trivago/prettier-plugin-sort-imports": "^4.3.0", "@tsconfig/node20": "^20.1.4", + "@types/diacritics": "^1.3.3", "@types/node": "^20.14.9", "@typescript-eslint/eslint-plugin": "^7.14.1", "@typescript-eslint/parser": "^7.14.1", diff --git a/src/lib/Iso3166.ts b/src/lib/Iso3166.ts index de36d08..4298463 100644 --- a/src/lib/Iso3166.ts +++ b/src/lib/Iso3166.ts @@ -1,6 +1,8 @@ import { Schema } from '@effect/schema' +import diacritics from 'diacritics' import { Array, Option, type Predicate, String, flow } from 'effect' import iso3166 from 'i18n-iso-countries' +import subDivisions from 'world_countries_lists/data/subdivisions/subdivisions.json' export type Alpha2Code = iso3166.Alpha2Code @@ -14,6 +16,18 @@ export const guessCountry: (location: string) => Option.Option = flo String.replaceAll(/\((.+?)\)/g, ', $1'), String.replaceAll(/( and | - )/gi, ', '), location => Array.prepend(Array.map(location.split(',').reverse(), String.trim), location), - Array.findFirst(Option.liftNullable(location => iso3166.getAlpha2Code(location, 'en'))), + Array.findFirst(location => + Option.fromNullable(iso3166.getAlpha2Code(location, 'en')).pipe( + Option.orElse(() => + Option.map( + Array.findFirst( + subDivisions, + subDivision => diacritics.remove(location).toLowerCase() === subDivision.name.toLowerCase(), + ), + subDivision => subDivision.country, + ), + ), + ), + ), Option.filter(isAlpha2Code), ) diff --git a/test/Iso3166.test.ts b/test/Iso3166.test.ts index b0d8cef..002ffe8 100644 --- a/test/Iso3166.test.ts +++ b/test/Iso3166.test.ts @@ -5,18 +5,23 @@ import * as _ from '../src/lib/Iso3166.js' describe('guessCountry', () => { test.for([ ['Algiers ( Algeria)', 'DZ'], + ['Baylor College of Medicine, Houston, Texas', 'US'], ['Chicago, IL, USA', 'US'], ['Colorado - United States', 'US'], ['Czech Republic', 'CZ'], ['Czechia', 'CZ'], ['GHANA', 'GH'], + ['London, Ontario', 'CA'], ['London, UK', 'GB'], ['London, United Kingdom', 'GB'], + ['Québec', 'CA'], ['Rio de Janeiro, Brazil.', 'BR'], + ['Sunnyvale, California', 'US'], ['UK', 'GB'], ['U.K.', 'GB'], ['United Kingdom', 'GB'], ['united states', 'US'], + ['Washington, DC', 'US'], ])('guesses %s', ([input, expected]) => { const actual = _.guessCountry(input) @@ -24,18 +29,13 @@ describe('guessCountry', () => { }) test.for([ - 'Baylor College of Medicine, Houston, Texas', 'Beijing University of Technology, Beijing 100124, PR China', 'Fayetteville GA (near Atlanta)', 'London', - 'London, Ontario', 'Mars', 'New York City and Los Angeles', - 'Québec', 'Southeast Asia', - 'Sunnyvale, California', 'The UK', - 'Washington, DC', ])("doesn't guess %s", input => { const actual = _.guessCountry(input) diff --git a/tsconfig.json b/tsconfig.json index 923b79b..7410bb5 100644 --- a/tsconfig.json +++ b/tsconfig.json @@ -1,7 +1,8 @@ { "extends": "@tsconfig/node20/tsconfig.json", "compilerOptions": { - "exactOptionalPropertyTypes": true + "exactOptionalPropertyTypes": true, + "resolveJsonModule": true }, "include": ["src", "test", "*.ts"] }