Skip to content

Commit

Permalink
feat(perfromance): #201 add error handling for hunspell, spellcheck a…
Browse files Browse the repository at this point in the history
…lso URLs
  • Loading branch information
rainer-exxcellent committed Feb 14, 2025
1 parent 2ea9269 commit e34f2f0
Showing 1 changed file with 43 additions and 20 deletions.
63 changes: 43 additions & 20 deletions lib/informativeTests/informativeTest_6_3_8.js
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ const ajv = new Ajv()
// cache results of spell to improve perfromance
const spellCheckedWords2Result = new Map()
/**
* @type {import("child_process").ChildProcessWithoutNullStreams}
* @type {import("child_process").ChildProcessWithoutNullStreams | null }
*/
let hunspellSpawn
/**
Expand Down Expand Up @@ -263,7 +263,7 @@ export default async function informativeTest_6_3_8(
const segmenter = new Intl.Segmenter(lang.langtag.language, {
granularity: 'word',
})
const urlPattern = /(https?|ftp):\/\/[^\s/$.?#].[^\s]*/i
const urlPattern = /(https?|ftp):\/\/[^\s/$.?#].[^\s]*/gi

for (const path of [
'/document/acknowledgments[]/names[]',
Expand Down Expand Up @@ -370,20 +370,40 @@ export default async function informativeTest_6_3_8(
}
}

/**
* @param {string} [text]
*/
function segmentString(text) {

// URL's are not properly segmented. Remove it before segmentation
if (text) {
const urlStringMatches = text.matchAll(urlPattern)
const textWithOutUrl = text.replace(urlPattern, '')

const segmentedText = segmenter.segment(textWithOutUrl)
const segments = [...segmentedText]
.filter((s) => s.isWordLike)
.map((s) => s.segment)
for (const match of urlStringMatches) {
segments.push(match[0])
}
return segments
} else {
return [];
}
}

/**
* @param {string} instancePath
* @param {string} [text]
*/
async function checkField(instancePath, text) {
if (typeof text !== 'string') return

// URL's are not properly segmented. Remove it before segmentation
const textWithOutUrl = text.replace(urlPattern, '')
if (typeof text !== 'string') {
return
}

const segmentedText = segmenter.segment(textWithOutUrl)
const segments = [...segmentedText]
.filter((s) => s.isWordLike)
.map((s) => s.segment)
const segments = segmentString(text)

const checkResults = []
for (const segment of segments) {
Expand Down Expand Up @@ -418,7 +438,7 @@ export default async function informativeTest_6_3_8(

if (hunspellSpawn) {
hunspellInterface.close()
hunspellSpawn.kill('SIGINT')
hunspellSpawn.kill()
}
return ctx
}
Expand Down Expand Up @@ -466,6 +486,16 @@ async function runHunspell({ dictionary, input }) {
shell: true,
})
hunspellInterface = createInterface(hunspellSpawn.stdout)
hunspellSpawn.on('exit', function () {
hunspellSpawn = null
})
hunspellSpawn.stdin.on('error', function() {
hunspellSpawn = null
});
hunspellSpawn.on('error', function () {
hunspellSpawn = null
}
)
}

// @ts-ignore
Expand All @@ -481,19 +511,12 @@ async function runHunspell({ dictionary, input }) {
}
}
hunspellInterface.on('line', handler)
hunspellSpawn.stdin.write(input + EOL)
if( hunspellSpawn) {
hunspellSpawn.stdin.write(input + EOL)
}
})

/** @type {string} */
return result
}

/**
* @param {object} params
* @param {string} params.dictionary
* @param {string} params.input
* @returns
*/
async function runHunspellMock({ dictionary, input }) {
return ''
}

0 comments on commit e34f2f0

Please sign in to comment.