From 6573a3a02aaaf0e3a097d1f49f7fd82037f8c8c1 Mon Sep 17 00:00:00 2001 From: Alejandro Celaya Date: Thu, 6 Feb 2025 15:17:31 +0100 Subject: [PATCH] Ignore punctuation characters when looking for mentions --- src/sidebar/components/MarkdownEditor.tsx | 8 +- src/sidebar/helpers/mentions.ts | 64 +++++++- src/sidebar/helpers/test/mentions-test.js | 148 +++++++++++++++++- src/sidebar/util/term-before-position.ts | 43 ----- .../util/test/term-before-position-test.js | 90 ----------- 5 files changed, 207 insertions(+), 146 deletions(-) delete mode 100644 src/sidebar/util/term-before-position.ts delete mode 100644 src/sidebar/util/test/term-before-position-test.js diff --git a/src/sidebar/components/MarkdownEditor.tsx b/src/sidebar/components/MarkdownEditor.tsx index 809c7e9a60c..52a7d7fd2a0 100644 --- a/src/sidebar/components/MarkdownEditor.tsx +++ b/src/sidebar/components/MarkdownEditor.tsx @@ -29,6 +29,10 @@ import { } from 'preact/hooks'; import { isMacOS } from '../../shared/user-agent'; +import { + getContainingWordOffsets, + termBeforePosition, +} from '../helpers/mentions'; import { LinkType, convertSelectionToLink, @@ -36,10 +40,6 @@ import { toggleSpanStyle, } from '../markdown-commands'; import type { EditorState } from '../markdown-commands'; -import { - getContainingWordOffsets, - termBeforePosition, -} from '../util/term-before-position'; import MarkdownView from './MarkdownView'; import MentionPopover from './MentionPopover'; diff --git a/src/sidebar/helpers/mentions.ts b/src/sidebar/helpers/mentions.ts index a09cdca1846..e2e39b1a493 100644 --- a/src/sidebar/helpers/mentions.ts +++ b/src/sidebar/helpers/mentions.ts @@ -11,19 +11,24 @@ import { buildAccountID } from './account-id'; */ export function wrapMentions(text: string, authority: string): string { return text.replace( - // Capture both the potential empty character before the mention (space, tab - // or new line), and the term following the `@` character. - // When we build the mention tag, we need to prepend that empty character to + // Capture both the potential empty char (space, tab or new line) or + // punctuation char before the mention, and the term following the `@` + // character. + // When we build the mention tag, we need to prepend that prev character to // avoid altering the spacing and structure of the text. - /(^|\s)@(\w+)(?=\s|$)/g, - (match, precedingWhitespace, username) => { + // + // To match the username, we only look for `A-Za-z0-9._` characters, which + // is what the server allows. + // See: https://github.com/hypothesis/h/blob/b8d0d4c/h/schemas/api/user.py#L21 + /(^|[\s,.;:|?!'"\-()[\]{}])@([A-Za-z0-9._]+)(?=[\s,.;:|?!'"\-()[\]{}]|$)/g, + (match, precedingChar, username) => { const tag = document.createElement('a'); tag.setAttribute('data-hyp-mention', ''); tag.setAttribute('data-userid', buildAccountID(username, authority)); tag.textContent = `@${username}`; - return `${precedingWhitespace}${tag.outerHTML}`; + return `${precedingChar}${tag.outerHTML}`; }, ); } @@ -115,3 +120,50 @@ export function renderMentionTags( return foundMentions; } + +/** + * Returns the word at a specific position in a string, surrounded by empty + * characters or punctuation characters. + */ +export function termBeforePosition(text: string, position: number): string { + const { start } = getContainingWordOffsets(text, position); + return text.slice(start, position); +} + +export type WordOffsets = { + start: number; + end: number; +}; + +/** + * Returns the `start` and `end` positions for the word that overlaps with + * provided reference position. + * + * For example, given the text "hello hypothesis", and the reference position 9 + * (which corresponds to the `p` character) it will return the start and end of + * the word `hypothesis`, hence { start: 6, end: 16 }. + * + * Useful to get the offsets of the word matching the caret position in text + * inputs and textareas. + */ +export function getContainingWordOffsets( + text: string, + referencePosition: number, +): WordOffsets { + const precedingText = text.slice(0, referencePosition); + const matches = [...precedingText.matchAll(/[\s,.;:|?!'"\-()[\]{}]/g)]; + const precedingCharPos = + matches.length > 0 ? Math.max(...matches.map(match => match.index)) : -1; + + const subsequentCharPos = text + .slice(referencePosition) + .search(/[\s,.;:|?!'"\-()[\]{}]/); + + return { + start: precedingCharPos + 1, + end: + subsequentCharPos === -1 + ? text.length + : referencePosition + subsequentCharPos, + }; +} diff --git a/src/sidebar/helpers/test/mentions-test.js b/src/sidebar/helpers/test/mentions-test.js index 802a3aebc1c..5ca676a8bb3 100644 --- a/src/sidebar/helpers/test/mentions-test.js +++ b/src/sidebar/helpers/test/mentions-test.js @@ -1,4 +1,10 @@ -import { renderMentionTags, unwrapMentions, wrapMentions } from '../mentions'; +import { + renderMentionTags, + unwrapMentions, + wrapMentions, + getContainingWordOffsets, + termBeforePosition, +} from '../mentions'; const mentionTag = (username, authority) => `@${username}`; @@ -40,9 +46,21 @@ look at ${mentionTag('foo', 'example.com')} comment`, }, // Multiple mentions { - text: 'Hey @jane look at this quote from @rob', + text: 'Hey @jane, look at this quote from @rob', authority: 'example.com', - textWithTags: `Hey ${mentionTag('jane', 'example.com')} look at this quote from ${mentionTag('rob', 'example.com')}`, + textWithTags: `Hey ${mentionTag('jane', 'example.com')}, look at this quote from ${mentionTag('rob', 'example.com')}`, + }, + // Mentions wrapped in punctuation chars + { + text: '(@jane) {@rob} and @john?', + authority: 'example.com', + textWithTags: `(${mentionTag('jane', 'example.com')}) {${mentionTag('rob', 'example.com')}} and ${mentionTag('john', 'example.com')}?`, + }, + // username-like patterns with invalid chars should be ignored + { + text: 'Hello @not+a/user=name', + authority: 'example.com', + textWithTags: `Hello @not+a/user=name`, }, // Email addresses should be ignored { @@ -119,3 +137,127 @@ describe('renderMentionTags', () => { assert.equal(fourthMention, '@user_id_missing'); }); }); + +// To make these tests more predictable, we place the `$` sign in the position +// to be checked. That way it's easier to see what is the "word" preceding it. +// The test will then get the `$` sign index and remove it from the text +// before passing it to `termBeforePosition`. +[ + // First and last positions + { + text: '$Hello world', + expectedTerm: '', + expectedOffsets: { start: 0, end: 5 }, + }, + { + text: 'Hello world$', + expectedTerm: 'world', + expectedOffsets: { start: 6, end: 11 }, + }, + + // Position in the middle of words + { + text: 'Hell$o world', + expectedTerm: 'Hell', + expectedOffsets: { start: 0, end: 5 }, + }, + { + text: 'Hello wor$ld', + expectedTerm: 'wor', + expectedOffsets: { start: 6, end: 11 }, + }, + + // Position preceded by "empty space" + { + text: 'Hello $world', + expectedTerm: '', + expectedOffsets: { start: 6, end: 11 }, + }, + { + text: `Text with + multiple + $ + lines + `, + expectedTerm: '', + expectedOffsets: { start: 31, end: 31 }, + }, + + // Position preceded by/in the middle of a word for multi-line text + { + text: `Text with$ + multiple + + lines + `, + expectedTerm: 'with', + expectedOffsets: { start: 5, end: 9 }, + }, + { + text: `Text with + multiple + + li$nes + `, + expectedTerm: 'li', + expectedOffsets: { start: 32, end: 37 }, + }, + + // Including punctuation characters + ...[ + ',', + '.', + ';', + ':', + '|', + '?', + '!', + "'", + '"', + '-', + '(', + ')', + '[', + ']', + '{', + '}', + ].flatMap(char => [ + { + text: `Foo${char}$ bar`, + expectedTerm: '', + expectedOffsets: { start: 4, end: 4 }, + }, + { + text: `${char}Foo$ bar`, + expectedTerm: 'Foo', + expectedOffsets: { start: 1, end: 4 }, + }, + { + text: `hello ${char}fo$o${char} bar`, + expectedTerm: 'fo', + expectedOffsets: { start: 7, end: 10 }, + }, + ]), +].forEach(({ text, expectedTerm, expectedOffsets }) => { + // Get the position of the `$` sign in the text, then remove it + const position = text.indexOf('$'); + const textWithoutDollarSign = text.replace('$', ''); + + describe('termBeforePosition', () => { + it('returns the term right before provided position', () => { + assert.equal( + termBeforePosition(textWithoutDollarSign, position), + expectedTerm, + ); + }); + }); + + describe('getContainingWordOffsets', () => { + it('returns expected offsets', () => { + assert.deepEqual( + getContainingWordOffsets(textWithoutDollarSign, position), + expectedOffsets, + ); + }); + }); +}); diff --git a/src/sidebar/util/term-before-position.ts b/src/sidebar/util/term-before-position.ts deleted file mode 100644 index 4540dec916f..00000000000 --- a/src/sidebar/util/term-before-position.ts +++ /dev/null @@ -1,43 +0,0 @@ -/** - * Returns the "word" right before a specific position in an input string. - * - * In this context, a word is anything between a space, newline or tab, and - * provided position. - */ -export function termBeforePosition(text: string, position: number): string { - return text.slice(0, position).match(/\S+$/)?.[0] ?? ''; -} - -export type WordOffsets = { - start: number; - end: number; -}; - -/** - * Returns the `start` and `end` positions for the word that overlaps with - * provided reference position. - * - * For example, given the text "hello hypothesis", and the reference position 9 - * (which corresponds to the `p` character) it will return the start and end of - * the word `hypothesis`, hence { start: 6, end: 16 }. - * - * Useful to get the offsets of the word matching the caret position in text - * inputs and textareas. - */ -export function getContainingWordOffsets( - text: string, - referencePosition: number, -): WordOffsets { - const precedingEmptyCharPos = text - .slice(0, referencePosition) - .search(/\s\S*$/); - const subsequentEmptyCharPos = text.slice(referencePosition).search(/\s/); - - return { - start: (precedingEmptyCharPos === -1 ? -1 : precedingEmptyCharPos) + 1, - end: - subsequentEmptyCharPos === -1 - ? text.length - : referencePosition + subsequentEmptyCharPos, - }; -} diff --git a/src/sidebar/util/test/term-before-position-test.js b/src/sidebar/util/test/term-before-position-test.js deleted file mode 100644 index f1f6a8c9a01..00000000000 --- a/src/sidebar/util/test/term-before-position-test.js +++ /dev/null @@ -1,90 +0,0 @@ -import { - getContainingWordOffsets, - termBeforePosition, -} from '../term-before-position'; - -describe('term-before-position', () => { - // To make these tests more predictable, we place the `$` sign in the position - // to be checked. That way it's easier to see what is the "word" preceding it. - // The test will then get the `$` sign index and remove it from the text - // before passing it to `termBeforePosition`. - [ - // First and last positions - { - text: '$Hello world', - expectedTerm: '', - expectedOffsets: { start: 0, end: 5 }, - }, - { - text: 'Hello world$', - expectedTerm: 'world', - expectedOffsets: { start: 6, end: 11 }, - }, - - // Position in the middle of words - { - text: 'Hell$o world', - expectedTerm: 'Hell', - expectedOffsets: { start: 0, end: 5 }, - }, - { - text: 'Hello wor$ld', - expectedTerm: 'wor', - expectedOffsets: { start: 6, end: 11 }, - }, - - // Position preceded by "empty space" - { - text: 'Hello $world', - expectedTerm: '', - expectedOffsets: { start: 6, end: 11 }, - }, - { - text: `Text with - multiple - $ - lines - `, - expectedTerm: '', - expectedOffsets: { start: 31, end: 31 }, - }, - - // Position preceded by/in the middle of a word for multi-line text - { - text: `Text with$ - multiple - - lines - `, - expectedTerm: 'with', - expectedOffsets: { start: 5, end: 9 }, - }, - { - text: `Text with - multiple - - li$nes - `, - expectedTerm: 'li', - expectedOffsets: { start: 32, end: 37 }, - }, - ].forEach(({ text, expectedTerm, expectedOffsets }) => { - // Get the position of the `$` sign in the text, then remove it - const position = text.indexOf('$'); - const textWithoutDollarSign = text.replace('$', ''); - - it('`termBeforePosition` returns the term right before provided position', () => { - assert.equal( - termBeforePosition(textWithoutDollarSign, position), - expectedTerm, - ); - }); - - it('`getContainingWordOffsets` returns expected offsets', () => { - assert.deepEqual( - getContainingWordOffsets(textWithoutDollarSign, position), - expectedOffsets, - ); - }); - }); -});