-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtransform.js
116 lines (98 loc) · 2.83 KB
/
transform.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
import fs from 'fs/promises'
import words from './data/words.json' assert { type: 'json' }
const excludedTypes = [
'abreviatura',
'adverbio',
'anticuado',
'artículo',
'aumentativodiminutivo',
'compuesto',
'conjugado',
'conjunción',
'contracción',
'expresión',
'frase',
'interjección',
'locución',
'onomatopeya',
'preposición',
'pronombre',
'propio',
'topónimo',
]
async function run() {
const output = {
words: {},
largestWordLength: 0,
largestWord: '',
}
const lastSilabesWithMatchingWord = new Set()
for (const word of words) {
if (
hasMinCharsLength(word) &&
isSingleWord(word) &&
isNotPlural(word) &&
isNotExcludedType(word)
) {
const lastSilabe = removeAccents(getLastSilabe(word))
lastSilabesWithMatchingWord.add(lastSilabe)
output.words[removeAccents(word.palabra)] = lastSilabe
if (word.palabra.length > output.largestWordLength) {
output.largestWordLength = word.palabra.length
output.largestWord = word.palabra
}
}
}
const wordList = Object.keys(output.words)
const matchingEndings = new Set()
for (const [silabe] of lastSilabesWithMatchingWord.entries()) {
for (const word of wordList) {
if (word.startsWith(silabe)) {
matchingEndings.add(silabe)
}
}
}
const matches = Array.from(matchingEndings)
printInfo(output, lastSilabesWithMatchingWord, matches)
return Promise.all([
fs.writeFile('output/matching-silabes.json', JSON.stringify(matches), 'utf-8'),
fs.writeFile('output/words.json', JSON.stringify(output), 'utf-8'),
])
}
function getLastSilabe(word) {
return word['sílabas'].split(' ').pop().replace('*', '').replace(/^rr/, 'r')
}
function hasMinCharsLength(word) {
return word.palabra.length > 3
}
function isSingleWord(word) {
return !word.palabra.includes(' ')
}
function isNotPlural(word) {
return word['número'] !== 'plural'
}
function isNotExcludedType(word) {
return !word.tipo.split('/').some((w) => excludedTypes.includes(w))
}
function removeAccents(str) {
return str
.replaceAll('ñ', '\\001')
.replaceAll('ü', '\\002')
.normalize('NFD')
.replace(/\p{Diacritic}/gu, '')
.replaceAll('\\001', 'ñ')
.replaceAll('\\002', 'ü')
}
function printInfo(output, lastSilabesWithMatchingWord, matches) {
console.log('===============================================')
console.log('Total words:', Object.keys(output.words).length)
console.log('Total endings:', lastSilabesWithMatchingWord.size)
console.log('Total endings with matching word:', matches.length)
console.log(`Largest word: ${output.largestWord} (${output.largestWordLength} letters)`)
console.log('===============================================\n')
}
run()
.then(() => {
console.log('🎉 Output generated!')
})
.catch(console.error)