diff --git a/src/utils/__test__/slugify.test.ts b/src/utils/__test__/slugify.test.ts new file mode 100644 index 00000000..92f7f8aa --- /dev/null +++ b/src/utils/__test__/slugify.test.ts @@ -0,0 +1,53 @@ +import assert from 'node:assert'; +import { describe, it } from 'node:test'; + +import { slugify } from '../slugify.js'; + +describe('slugify', () => { + it('should pass through output alphabet unchanged', () => { + assert.equal(slugify('abcdefghijklmnopqrstuvwxyz0123456789_.-'), 'abcdefghijklmnopqrstuvwxyz0123456789_.-'); + }); + it('should lowercase uppercase ASCII characters', () => { + assert.equal(slugify('ABCDEFGHIJKLMNOPQRSTUVWXYZ'), 'abcdefghijklmnopqrstuvwxyz'); + }); + it('should replace spaces with hyphens', () => { + assert.equal(slugify('Upper North Island'), 'upper-north-island'); + }); + it('should remove diacritics', () => { + ['á', 'Á', 'ä', 'Ä', 'ā', 'Ā'].forEach((value) => { + assert.equal(slugify(value), 'a'); + }); + ['é', 'É', 'ē', 'Ē'].forEach((value) => { + assert.equal(slugify(value), 'e'); + }); + ['ì', 'Ì', 'ī', 'Ī'].forEach((value) => { + assert.equal(slugify(value), 'i'); + }); + ['ó', 'Ó', 'ô', 'Ô', 'ö', 'Ö', 'ō', 'Ō'].forEach((value) => { + assert.equal(slugify(value), 'o'); + }); + ['ü', 'Ü', 'ū', 'Ū'].forEach((value) => { + assert.equal(slugify(value), 'u'); + }); + }); + it('should convert "ø" (U+00F8) and "Ø" (U+00D8) to "o"', () => { + ['ø', 'Ø'].forEach((value) => { + assert.equal(slugify(value), 'o'); + }); + }); + it('should handle decomposed characters', () => { + assert.equal(slugify('\u0041\u0304'), 'a'); + }); + it('should treat any unhandled characters as an error', () => { + assert.throws( + () => { + slugify('“a\\b//c—;\n”'); + }, + { + name: 'Error', + message: 'Unhandled characters: "\\n", "/", ";", "\\", "—", "“", "”"', + cause: { characters: ['\n', '/', ';', '\\', '—', '“', '”'] }, + }, + ); + }); +}); diff --git a/src/utils/slugify.ts b/src/utils/slugify.ts new file mode 100644 index 00000000..dcefc75e --- /dev/null +++ b/src/utils/slugify.ts @@ -0,0 +1,30 @@ +/** + * @param input Human-readable string + * @returns String slug. See src/utils/__test__/slugify.test.ts for examples. + */ +export function slugify(input: string): string { + const result = removeDiacritics(input).replaceAll('ø', 'o').replaceAll('Ø', 'O').replaceAll(' ', '-').toLowerCase(); + + const unhandledCharacters = result.match(/[^abcdefghijklmnopqrstuvwxyz0123456789_.-]/g); + if (unhandledCharacters) { + const sortedUniqueCharacters = Array.from(new Set(unhandledCharacters)).sort(); + const formattedCharacters = sortedUniqueCharacters.map((character) => { + return JSON.stringify(character).replaceAll('\\\\', '\\'); + }); + throw Error(`Unhandled characters: ${formattedCharacters.join(', ')}`, { + cause: { characters: sortedUniqueCharacters }, + }); + } + + return result; +} + +/** + * Normalization form decomposition (NFD) splits characters like "ā" into their + * [combining diacritical mark](https://www.unicode.org/charts/PDF/U0300.pdf) and the character which is being modified + * by the diacritic. This way we can remove the macron from "ā", the accent from "é", and the like. + */ +function removeDiacritics(input: string): string { + const combiningDiacriticalMarks = /[\u0300-\u036F]/g; + return input.normalize('NFD').replaceAll(combiningDiacriticalMarks, ''); +} diff --git a/tsconfig.json b/tsconfig.json index 25aa6d6f..c8c83f01 100644 --- a/tsconfig.json +++ b/tsconfig.json @@ -1,6 +1,8 @@ { "extends": "@linzjs/style/tsconfig.base.json", "compilerOptions": { + "lib": ["ES2022"], + "target": "ES2022", "outDir": "build" } }