Skip to content

Commit

Permalink
fix(performance): #201 segment text before call to hunspell and cache…
Browse files Browse the repository at this point in the history
… results
  • Loading branch information
rainer-exxcellent committed Feb 7, 2025
1 parent 1c27551 commit 5ed7383
Show file tree
Hide file tree
Showing 3 changed files with 79 additions and 12 deletions.
52 changes: 42 additions & 10 deletions lib/informativeTests/informativeTest_6_3_8.js
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@ import { execFile } from 'node:child_process'
import bcp47 from 'bcp47'

const ajv = new Ajv()
// cache results of spell to improve perfromance
const spellCheckedWords2Result = new Map()

const inputSchema = /** @type {const} */ ({
additionalProperties: true,
Expand Down Expand Up @@ -241,10 +243,15 @@ export default async function informativeTest_6_3_8(
}

const lang = bcp47.parse(doc.document.lang)
if (!lang?.langtag.language.language) return ctx
if (!lang?.langtag.language.language) {
return ctx
}
const dictionary = `${lang.langtag.language.language}${
typeof lang.langtag.region === 'string' ? `_${lang.langtag.region}` : ''
}`
// @ts-ignore
const segmenter = new Intl.Segmenter([dictionary], { granularity: 'word' })
const urlPattern = /(https?|ftp):\/\/[^\s/$.?#].[^\s]*/i

for (const path of [
'/document/acknowledgments[]/names[]',
Expand Down Expand Up @@ -357,17 +364,42 @@ export default async function informativeTest_6_3_8(
*/
async function checkField(instancePath, text) {
if (typeof text !== 'string') return
const result = await spellCheckString({
text,
dictionary,
hunspell: params.hunspell,
})
if (!result.ok) {

// URL's are not properly segmented. Remove it before segmentation
const textWithOutUrl = text.replace(urlPattern, '')

const segmentedText = segmenter.segment(textWithOutUrl)
const segments = [...segmentedText]
.filter((s) => s.isWordLike)
.map((s) => s.segment)

const checkResults = []
for (const segment of segments) {
let spellCheckResult = spellCheckedWords2Result.get(segment)

if (!spellCheckResult) {
spellCheckResult = await spellCheckString({
// @ts-ignore
text: segment,
dictionary: dictionary,
hunspell: params.hunspell,
})
spellCheckedWords2Result.set(segment, spellCheckResult)
}
if (!spellCheckResult.ok) {
checkResults.push(spellCheckResult)
}
}

if (checkResults.length > 0) {
const words = checkResults.flatMap((result) =>
result.mistakes.map((/** @type {{ word: any; }} */ m) => m.word)
)
ctx.infos.push({
instancePath,
message: `there are spelling mistakes in: ${result.mistakes
.map((m) => m.word)
.join(', ')}`,
message: `there are spelling mistakes in: ${[...new Set(words)].join(
', '
)}`,
})
}
}
Expand Down
1 change: 1 addition & 0 deletions scripts/test.js
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ import { fileURLToPath } from 'url'

spawn('mocha', ['tests', ...process.argv.slice(2)], {
stdio: 'inherit',
shell: true,
env: {
...process.env,
DICPATH: fileURLToPath(new URL('../tests/dicts', import.meta.url)),
Expand Down
38 changes: 36 additions & 2 deletions tests/informativeTest_6_3_8.js
Original file line number Diff line number Diff line change
Expand Up @@ -6,13 +6,47 @@ const failingExamples = await readExampleFiles(
new URL('informativeTest_6_3_8/failing', import.meta.url)
)

// Word muss match to test csaf/csaf_2.0/test/validator/data/informative/oasis_csaf_tc-csaf_2_0-2021-6-3-08-11.json
// and csaf/csaf_2.0/test/validator/data/informative/oasis_csaf_tc-csaf_2_0-2021-6-3-08-01.json

const hunspellMap = new Map([
['Security', '*'],
['researchers', '*'],
['found', '*'],
['multiple', '*'],
['vulnerabilities', '*'],
['in', '*'],
['XYZ', '*'],
['Secruity', '# error'],
['OASIS', '*'],
['CSAF', '*'],
['TC', '*'],
['Informative', '*'],
['test', '*'],
['Spell', '*'],
['check', '*'],
['valid', '*'],
['example', '*'],
['failing', '*'],
['1', '*'],
['Initial', '*'],
['version', '*'],
['1', '*'],
['1', '*'],
])

describe('Informative test 6.3.8', function () {
describe('failing examples', function () {
for (const [title, failingExample] of failingExamples) {
it(title, async function () {
const result = await informativeTest_6_3_8(failingExample, {
async hunspell() {
return 'Hunspell vMOCK\n\n# wrongword 1'
async hunspell({ dictionary, input }) {
const answer = hunspellMap.get(input)
if (answer) {
return 'Hunspell vMOCK\n\n' + answer
} else {
throw new Error('Hunspell vMOCK. Unknoww word ' + input)
}
},
})

Expand Down

0 comments on commit 5ed7383

Please sign in to comment.