Ignore punctuation characters when looking for mentions

hypothesis · Feb 7, 2025 · 6573a3a · 6573a3a
1 parent e9d9845
commit 6573a3a
Show file tree

Hide file tree

Showing 5 changed files with 207 additions and 146 deletions.
diff --git a/src/sidebar/components/MarkdownEditor.tsx b/src/sidebar/components/MarkdownEditor.tsx
@@ -29,17 +29,17 @@ import {
 } from 'preact/hooks';
 
 import { isMacOS } from '../../shared/user-agent';
+import {
+  getContainingWordOffsets,
+  termBeforePosition,
+} from '../helpers/mentions';
 import {
   LinkType,
   convertSelectionToLink,
   toggleBlockStyle,
   toggleSpanStyle,
 } from '../markdown-commands';
 import type { EditorState } from '../markdown-commands';
-import {
-  getContainingWordOffsets,
-  termBeforePosition,
-} from '../util/term-before-position';
 import MarkdownView from './MarkdownView';
 import MentionPopover from './MentionPopover';
 

diff --git a/src/sidebar/helpers/mentions.ts b/src/sidebar/helpers/mentions.ts
@@ -11,19 +11,24 @@ import { buildAccountID } from './account-id';
  */
 export function wrapMentions(text: string, authority: string): string {
   return text.replace(
-    // Capture both the potential empty character before the mention (space, tab
-    // or new line), and the term following the `@` character.
-    // When we build the mention tag, we need to prepend that empty character to
+    // Capture both the potential empty char (space, tab or new line) or
+    // punctuation char before the mention, and the term following the `@`
+    // character.
+    // When we build the mention tag, we need to prepend that prev character to
     // avoid altering the spacing and structure of the text.
-    /(^|\s)@(\w+)(?=\s|$)/g,
-    (match, precedingWhitespace, username) => {
+    //
+    // To match the username, we only look for `A-Za-z0-9._` characters, which
+    // is what the server allows.
+    // See: https://github.com/hypothesis/h/blob/b8d0d4c/h/schemas/api/user.py#L21
+    /(^|[\s,.;:|?!'"\-()[\]{}])@([A-Za-z0-9._]+)(?=[\s,.;:|?!'"\-()[\]{}]|$)/g,
+    (match, precedingChar, username) => {
       const tag = document.createElement('a');
 
       tag.setAttribute('data-hyp-mention', '');
       tag.setAttribute('data-userid', buildAccountID(username, authority));
       tag.textContent = `@${username}`;
 
-      return `${precedingWhitespace}${tag.outerHTML}`;
+      return `${precedingChar}${tag.outerHTML}`;
     },
   );
 }
@@ -115,3 +120,50 @@ export function renderMentionTags(
 
   return foundMentions;
 }
+
+/**
+ * Returns the word at a specific position in a string, surrounded by empty
+ * characters or punctuation characters.
+ */
+export function termBeforePosition(text: string, position: number): string {
+  const { start } = getContainingWordOffsets(text, position);
+  return text.slice(start, position);
+}
+
+export type WordOffsets = {
+  start: number;
+  end: number;
+};
+
+/**
+ * Returns the `start` and `end` positions for the word that overlaps with
+ * provided reference position.
+ *
+ * For example, given the text "hello hypothesis", and the reference position 9
+ * (which corresponds to the `p` character) it will return the start and end of
+ * the word `hypothesis`, hence { start: 6, end: 16 }.
+ *
+ * Useful to get the offsets of the word matching the caret position in text
+ * inputs and textareas.
+ */
+export function getContainingWordOffsets(
+  text: string,
+  referencePosition: number,
+): WordOffsets {
+  const precedingText = text.slice(0, referencePosition);
+  const matches = [...precedingText.matchAll(/[\s,.;:|?!'"\-()[\]{}]/g)];
+  const precedingCharPos =
+    matches.length > 0 ? Math.max(...matches.map(match => match.index)) : -1;
+
+  const subsequentCharPos = text
+    .slice(referencePosition)
+    .search(/[\s,.;:|?!'"\-()[\]{}]/);
+
+  return {
+    start: precedingCharPos + 1,
+    end:
+      subsequentCharPos === -1
+        ? text.length
+        : referencePosition + subsequentCharPos,
+  };
+}
diff --git a/src/sidebar/helpers/test/mentions-test.js b/src/sidebar/helpers/test/mentions-test.js
@@ -1,4 +1,10 @@
-import { renderMentionTags, unwrapMentions, wrapMentions } from '../mentions';
+import {
+  renderMentionTags,
+  unwrapMentions,
+  wrapMentions,
+  getContainingWordOffsets,
+  termBeforePosition,
+} from '../mentions';
 
 const mentionTag = (username, authority) =>
   `<a data-hyp-mention="" data-userid="acct:${username}@${authority}">@${username}</a>`;
@@ -40,9 +46,21 @@ look at ${mentionTag('foo', 'example.com')} comment`,
   },
   // Multiple mentions
   {
-    text: 'Hey @jane look at this quote from @rob',
+    text: 'Hey @jane, look at this quote from @rob',
     authority: 'example.com',
-    textWithTags: `Hey ${mentionTag('jane', 'example.com')} look at this quote from ${mentionTag('rob', 'example.com')}`,
+    textWithTags: `Hey ${mentionTag('jane', 'example.com')}, look at this quote from ${mentionTag('rob', 'example.com')}`,
+  },
+  // Mentions wrapped in punctuation chars
+  {
+    text: '(@jane) {@rob} and @john?',
+    authority: 'example.com',
+    textWithTags: `(${mentionTag('jane', 'example.com')}) {${mentionTag('rob', 'example.com')}} and ${mentionTag('john', 'example.com')}?`,
+  },
+  // username-like patterns with invalid chars should be ignored
+  {
+    text: 'Hello @not+a/user=name',
+    authority: 'example.com',
+    textWithTags: `Hello @not+a/user=name`,
   },
   // Email addresses should be ignored
   {
@@ -119,3 +137,127 @@ describe('renderMentionTags', () => {
     assert.equal(fourthMention, '@user_id_missing');
   });
 });
+
+// To make these tests more predictable, we place the `$` sign in the position
+// to be checked. That way it's easier to see what is the "word" preceding it.
+// The test will then get the `$` sign index and remove it from the text
+// before passing it to `termBeforePosition`.
+[
+  // First and last positions
+  {
+    text: '$Hello world',
+    expectedTerm: '',
+    expectedOffsets: { start: 0, end: 5 },
+  },
+  {
+    text: 'Hello world$',
+    expectedTerm: 'world',
+    expectedOffsets: { start: 6, end: 11 },
+  },
+
+  // Position in the middle of words
+  {
+    text: 'Hell$o world',
+    expectedTerm: 'Hell',
+    expectedOffsets: { start: 0, end: 5 },
+  },
+  {
+    text: 'Hello wor$ld',
+    expectedTerm: 'wor',
+    expectedOffsets: { start: 6, end: 11 },
+  },
+
+  // Position preceded by "empty space"
+  {
+    text: 'Hello $world',
+    expectedTerm: '',
+    expectedOffsets: { start: 6, end: 11 },
+  },
+  {
+    text: `Text with
+      multiple
+      $
+      lines
+      `,
+    expectedTerm: '',
+    expectedOffsets: { start: 31, end: 31 },
+  },
+
+  // Position preceded by/in the middle of a word for multi-line text
+  {
+    text: `Text with$
+      multiple
+
+      lines
+      `,
+    expectedTerm: 'with',
+    expectedOffsets: { start: 5, end: 9 },
+  },
+  {
+    text: `Text with
+      multiple
+
+      li$nes
+      `,
+    expectedTerm: 'li',
+    expectedOffsets: { start: 32, end: 37 },
+  },
+
+  // Including punctuation characters
+  ...[
+    ',',
+    '.',
+    ';',
+    ':',
+    '|',
+    '?',
+    '!',
+    "'",
+    '"',
+    '-',
+    '(',
+    ')',
+    '[',
+    ']',
+    '{',
+    '}',
+  ].flatMap(char => [
+    {
+      text: `Foo${char}$ bar`,
+      expectedTerm: '',
+      expectedOffsets: { start: 4, end: 4 },
+    },
+    {
+      text: `${char}Foo$ bar`,
+      expectedTerm: 'Foo',
+      expectedOffsets: { start: 1, end: 4 },
+    },
+    {
+      text: `hello ${char}fo$o${char} bar`,
+      expectedTerm: 'fo',
+      expectedOffsets: { start: 7, end: 10 },
+    },
+  ]),
+].forEach(({ text, expectedTerm, expectedOffsets }) => {
+  // Get the position of the `$` sign in the text, then remove it
+  const position = text.indexOf('$');
+  const textWithoutDollarSign = text.replace('$', '');
+
+  describe('termBeforePosition', () => {
+    it('returns the term right before provided position', () => {
+      assert.equal(
+        termBeforePosition(textWithoutDollarSign, position),
+        expectedTerm,
+      );
+    });
+  });
+
+  describe('getContainingWordOffsets', () => {
+    it('returns expected offsets', () => {
+      assert.deepEqual(
+        getContainingWordOffsets(textWithoutDollarSign, position),
+        expectedOffsets,
+      );
+    });
+  });
+});
diff --git a/src/sidebar/util/term-before-position.ts b/src/sidebar/util/term-before-position.ts