Ignore punctuation characters when looking for mentions

hypothesis · Feb 7, 2025 · 521e24a · 521e24a
1 parent e9d9845
commit 521e24a
Show file tree

Hide file tree

Showing 4 changed files with 75 additions and 20 deletions.
diff --git a/src/sidebar/helpers/mentions.ts b/src/sidebar/helpers/mentions.ts
@@ -11,19 +11,24 @@ import { buildAccountID } from './account-id';
  */
 export function wrapMentions(text: string, authority: string): string {
   return text.replace(
-    // Capture both the potential empty character before the mention (space, tab
-    // or new line), and the term following the `@` character.
-    // When we build the mention tag, we need to prepend that empty character to
+    // Capture both the potential empty character (space, tab or new line) or
+    // punctuation char before the mention, and the term following the `@`
+    // character.
+    // When we build the mention tag, we need to prepend that prev character to
     // avoid altering the spacing and structure of the text.
-    /(^|\s)@(\w+)(?=\s|$)/g,
-    (match, precedingWhitespace, username) => {
+    //
+    // To match the username, we only look for `A-Za-z0-9._` characters, which
+    // is what the server allows.
+    // See: https://github.com/hypothesis/h/blob/b8d0d4c/h/schemas/api/user.py#L21
+    /(^|[\s,.;:|?!'"\-()[\]{}])@([A-Za-z0-9._]+)(?=[\s,.;:|?!'"\-()[\]{}]|$)/g,
+    (match, precedingChar, username) => {
       const tag = document.createElement('a');
 
       tag.setAttribute('data-hyp-mention', '');
       tag.setAttribute('data-userid', buildAccountID(username, authority));
       tag.textContent = `@${username}`;
 
-      return `${precedingWhitespace}${tag.outerHTML}`;
+      return `${precedingChar}${tag.outerHTML}`;
     },
   );
 }

diff --git a/src/sidebar/helpers/test/mentions-test.js b/src/sidebar/helpers/test/mentions-test.js
@@ -40,9 +40,21 @@ look at ${mentionTag('foo', 'example.com')} comment`,
   },
   // Multiple mentions
   {
-    text: 'Hey @jane look at this quote from @rob',
+    text: 'Hey @jane, look at this quote from @rob',
     authority: 'example.com',
-    textWithTags: `Hey ${mentionTag('jane', 'example.com')} look at this quote from ${mentionTag('rob', 'example.com')}`,
+    textWithTags: `Hey ${mentionTag('jane', 'example.com')}, look at this quote from ${mentionTag('rob', 'example.com')}`,
+  },
+  // Mentions wrapped in punctuation chars
+  {
+    text: '(@jane) {@rob} and @john?',
+    authority: 'example.com',
+    textWithTags: `(${mentionTag('jane', 'example.com')}) {${mentionTag('rob', 'example.com')}} and ${mentionTag('john', 'example.com')}?`,
+  },
+  // username-like patterns with invalid chars should be ignored
+  {
+    text: 'Hello @not+a/user=name',
+    authority: 'example.com',
+    textWithTags: `Hello @not+a/user=name`,
   },
   // Email addresses should be ignored
   {

diff --git a/src/sidebar/util/term-before-position.ts b/src/sidebar/util/term-before-position.ts
@@ -1,11 +1,9 @@
 /**
- * Returns the "word" right before a specific position in an input string.
- *
- * In this context, a word is anything between a space, newline or tab, and
- * provided position.
+ * Returns the text before a specific position in a string until an empty
+ * character or punctuation character appears.
  */
 export function termBeforePosition(text: string, position: number): string {
-  return text.slice(0, position).match(/\S+$/)?.[0] ?? '';
+  return text.slice(0, position).match(/[^\s,.;:|?!'"\-()[\]{}]+$/)?.[0] ?? '';
 }
 
 export type WordOffsets = {
@@ -28,16 +26,20 @@ export function getContainingWordOffsets(
   text: string,
   referencePosition: number,
 ): WordOffsets {
-  const precedingEmptyCharPos = text
-    .slice(0, referencePosition)
-    .search(/\s\S*$/);
-  const subsequentEmptyCharPos = text.slice(referencePosition).search(/\s/);
+  const precedingText = text.slice(0, referencePosition);
+  const matches = [...precedingText.matchAll(/[\s,.;:|?!'"\-()[\]{}]/g)];
+  const precedingCharPos =
+    matches.length > 0 ? Math.max(...matches.map(match => match.index)) : -1;
+
+  const subsequentCharPos = text
+    .slice(referencePosition)
+    .search(/[\s,.;:|?!'"\-()[\]{}]/);
 
   return {
-    start: (precedingEmptyCharPos === -1 ? -1 : precedingEmptyCharPos) + 1,
+    start: precedingCharPos + 1,
     end:
-      subsequentEmptyCharPos === -1
+      subsequentCharPos === -1
         ? text.length
-        : referencePosition + subsequentEmptyCharPos,
+        : referencePosition + subsequentCharPos,
   };
 }
diff --git a/src/sidebar/util/test/term-before-position-test.js b/src/sidebar/util/test/term-before-position-test.js
@@ -68,6 +68,42 @@ describe('term-before-position', () => {
       expectedTerm: 'li',
       expectedOffsets: { start: 32, end: 37 },
     },
+
+    // Including punctuation characters
+    ...[
+      ',',
+      '.',
+      ';',
+      ':',
+      '|',
+      '?',
+      '!',
+      "'",
+      '"',
+      '-',
+      '(',
+      ')',
+      '[',
+      ']',
+      '{',
+      '}',
+    ].flatMap(char => [
+      {
+        text: `Foo${char}$ bar`,
+        expectedTerm: '',
+        expectedOffsets: { start: 4, end: 4 },
+      },
+      {
+        text: `${char}Foo$ bar`,
+        expectedTerm: 'Foo',
+        expectedOffsets: { start: 1, end: 4 },
+      },
+      {
+        text: `hello ${char}fo$o${char} bar`,
+        expectedTerm: 'fo',
+        expectedOffsets: { start: 7, end: 10 },
+      },
+    ]),
   ].forEach(({ text, expectedTerm, expectedOffsets }) => {
     // Get the position of the `$` sign in the text, then remove it
     const position = text.indexOf('$');