secvisogram · rainer-exxcellent · Feb 7, 2025 · Feb 7, 2025 · Feb 7, 2025 · Feb 7, 2025
diff --git a/hunspell.js b/hunspell.js
diff --git a/hunspell/getHunspellAvailableLangs.js b/hunspell/getHunspellAvailableLangs.js
diff --git a/lib/informativeTests/informativeTest_6_3_8.js b/lib/informativeTests/informativeTest_6_3_8.js
@@ -1,8 +1,26 @@
 import Ajv from 'ajv/dist/jtd.js'
-import { execFile } from 'node:child_process'
 import bcp47 from 'bcp47'
+import { spawn } from 'child_process'
+import { createInterface } from 'node:readline/promises'
+import { EOL } from 'node:os'
+
+/**
+ * This is the timeout in milliseconds after which the connection to hunspell
+ * declared as dead.
+ */
+const HUNSPELL_TIMEOUT = 1000
 
 const ajv = new Ajv()
+// cache results of spell to improve perfromance
+const spellCheckedWords2Result = new Map()
+/**
+ * @type {import("child_process").ChildProcessWithoutNullStreams | null }
+ */
+let hunspellSpawn
+/**
+ * @type {import("readline/promises").Interface}
+ */
+let hunspellInterface
 
 const inputSchema = /** @type {const} */ ({
   additionalProperties: true,
@@ -241,10 +259,17 @@ export default async function informativeTest_6_3_8(
   }
 
   const lang = bcp47.parse(doc.document.lang)
-  if (!lang?.langtag.language.language) return ctx
+  if (!lang?.langtag.language.language) {
+    return ctx
+  }
   const dictionary = `${lang.langtag.language.language}${
     typeof lang.langtag.region === 'string' ? `_${lang.langtag.region}` : ''
   }`
+  // @ts-ignore
+  const segmenter = new Intl.Segmenter(lang.langtag.language, {
+    granularity: 'word',
+  })
+  const urlPattern = /(https?|ftp):\/\/[^\s/$.?#].[^\s]*/gi
 
   for (const path of [
     '/document/acknowledgments[]/names[]',
@@ -304,7 +329,7 @@ export default async function informativeTest_6_3_8(
         `${prefix}${branchIndex}/product/name`,
         branch.product?.name
       )
-      checkBranches(
+      await checkBranches(
         `${prefix}${branchIndex}/branches/`,
         Array.isArray(branch.branches) ? branch.branches : []
       )
@@ -351,27 +376,74 @@ export default async function informativeTest_6_3_8(
     }
   }
 
+  /**
+   * @param {string} [text]
+   */
+  function segmentString(text) {
+    // URL's are not properly segmented. Remove it before segmentation
+    if (text) {
+      const urlStringMatches = text.matchAll(urlPattern)
+      const textWithOutUrl = text.replace(urlPattern, '')
+
+      const segmentedText = segmenter.segment(textWithOutUrl)
+      const segments = [...segmentedText]
+        .filter((s) => s.isWordLike)
+        .map((s) => s.segment)
+      for (const match of urlStringMatches) {
+        segments.push(match[0])
+      }
+      return segments
+    } else {
+      return []
+    }
+  }
+
   /**
    * @param {string} instancePath
    * @param {string} [text]
    */
   async function checkField(instancePath, text) {
-    if (typeof text !== 'string') return
-    const result = await spellCheckString({
-      text,
-      dictionary,
-      hunspell: params.hunspell,
-    })
-    if (!result.ok) {
+    if (typeof text !== 'string') {
+      return
+    }
+
+    const segments = segmentString(text)
+
+    const checkResults = []
+    for (const segment of segments) {
+      let spellCheckResult = spellCheckedWords2Result.get(segment)
+
+      if (!spellCheckResult) {
+        spellCheckResult = await spellCheckString({
+          // @ts-ignore
+          text: segment,
+          dictionary: dictionary,
+          hunspell: params.hunspell,
+        })
+        spellCheckedWords2Result.set(segment, spellCheckResult)
+      }
+      if (!spellCheckResult.ok) {
+        checkResults.push(spellCheckResult)
+      }
+    }
+
+    if (checkResults.length > 0) {
+      const words = checkResults.flatMap((result) =>
+        result.mistakes.map((/** @type {{ word: any; }} */ m) => m.word)
+      )
       ctx.infos.push({
         instancePath,
-        message: `there are spelling mistakes in: ${result.mistakes
-          .map((m) => m.word)
-          .join(', ')}`,
+        message: `there are spelling mistakes in: ${[...new Set(words)].join(
+          ', '
+        )}`,
       })
     }
   }
 
+  if (hunspellSpawn) {
+    hunspellInterface.close()
+    hunspellSpawn.kill()
+  }
   return ctx
 }
 
@@ -384,7 +456,8 @@ export default async function informativeTest_6_3_8(
 async function spellCheckString({ text, dictionary, hunspell }) {
   /** @type {string} */
   const result = await hunspell({ dictionary, input: text })
-  const lines = result.split('\n').slice(1)
+
+  const lines = result.split('\n')
   const errors = lines
     .filter((l) => l.startsWith('# ') || l.startsWith('& '))
     .map((l) => {
@@ -410,13 +483,54 @@ async function spellCheckString({ text, dictionary, hunspell }) {
  * @returns
  */
 async function runHunspell({ dictionary, input }) {
-  /** @type {string} */
+  debugger
   const result = await new Promise((resolve, reject) => {
-    const child = execFile('hunspell', ['-d', dictionary], (err, stdout) => {
-      if (err) return reject(err)
-      resolve(stdout)
-    })
-    child.stdin?.end(input)
+    const abortController = new AbortController()
+    const { signal } = abortController
+    let resolved = false
+
+    const abortOnHunspellError = () => {
+      if (!signal.aborted && !resolved) {
+        abortController.abort()
+        return reject(new Error('Lost hunspell connection'))
+      }
+    }
+
+    if (!hunspellSpawn) {
+      hunspellSpawn = spawn('hunspell', ['-d', dictionary], {
+        stdio: 'pipe',
+        shell: true,
+      })
+        .on('error', abortOnHunspellError)
+        .on('exit', abortOnHunspellError)
+      hunspellSpawn.stdin.on('error', abortOnHunspellError)
+      hunspellSpawn.stdout.on('error', abortOnHunspellError)
+      hunspellInterface = createInterface(hunspellSpawn.stdout)
+    }
+
+    /* 
+      Here we install a timeout which is used as watchdog to detect a broken hunspell
+      input connection.
+    */
+    const timeout = setTimeout(abortOnHunspellError, HUNSPELL_TIMEOUT)
+
+    /** @type {string[]} */
+    const buffer = []
+    /** @param {string} line */
+    const handler = (line) => {
+      clearTimeout(timeout)
+      if (signal.aborted || resolved) return
+      if (line !== '') buffer.push(line)
+      else {
+        hunspellInterface.off('line', handler)
+        resolved = true
+        resolve(buffer.join(EOL))
+      }
+    }
+    hunspellInterface.on('line', handler)
+    hunspellSpawn.stdin.write(input + EOL)
   })
+
+  /** @type {string} */
   return result
 }
diff --git a/scripts/test.js b/scripts/test.js
@@ -5,6 +5,7 @@ import { fileURLToPath } from 'url'
 
 spawn('mocha', ['tests', ...process.argv.slice(2)], {
   stdio: 'inherit',
+  shell: true,
   env: {
     ...process.env,
     DICPATH: fileURLToPath(new URL('../tests/dicts', import.meta.url)),

diff --git a/tests/informativeTest_6_3_8.js b/tests/informativeTest_6_3_8.js
@@ -6,13 +6,47 @@ const failingExamples = await readExampleFiles(
   new URL('informativeTest_6_3_8/failing', import.meta.url)
 )
 
+// Word muss match to test csaf/csaf_2.0/test/validator/data/informative/oasis_csaf_tc-csaf_2_0-2021-6-3-08-11.json
+// and csaf/csaf_2.0/test/validator/data/informative/oasis_csaf_tc-csaf_2_0-2021-6-3-08-01.json
+
+const hunspellMap = new Map([
+  ['Security', '*'],
+  ['researchers', '*'],
+  ['found', '*'],
+  ['multiple', '*'],
+  ['vulnerabilities', '*'],
+  ['in', '*'],
+  ['XYZ', '*'],
+  ['Secruity', '# error'],
+  ['OASIS', '*'],
+  ['CSAF', '*'],
+  ['TC', '*'],
+  ['Informative', '*'],
+  ['test', '*'],
+  ['Spell', '*'],
+  ['check', '*'],
+  ['valid', '*'],
+  ['example', '*'],
+  ['failing', '*'],
+  ['1', '*'],
+  ['Initial', '*'],
+  ['version', '*'],
+  ['1', '*'],
+  ['1', '*'],
+])
+
 describe('Informative test 6.3.8', function () {
   describe('failing examples', function () {
     for (const [title, failingExample] of failingExamples) {
       it(title, async function () {
         const result = await informativeTest_6_3_8(failingExample, {
-          async hunspell() {
-            return 'Hunspell vMOCK\n\n# wrongword 1'
+          async hunspell({ dictionary, input }) {
+            const answer = hunspellMap.get(input)
+            if (answer) {
+              return 'Hunspell vMOCK\n\n' + answer
+            } else {
+              throw new Error('Hunspell vMOCK. Unknoww word ' + input)
+            }
           },
         })