Skip to content

Commit

Permalink
Ignore punctuation characters when trying to match mentions
Browse files Browse the repository at this point in the history
  • Loading branch information
acelaya committed Feb 10, 2025
1 parent 0a589c3 commit aa75693
Show file tree
Hide file tree
Showing 5 changed files with 226 additions and 157 deletions.
10 changes: 5 additions & 5 deletions src/sidebar/components/MarkdownEditor.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -29,17 +29,17 @@ import {
} from 'preact/hooks';

import { isMacOS } from '../../shared/user-agent';
import {
getContainingMentionOffsets,
termBeforePosition,
} from '../helpers/mentions';
import {
LinkType,
convertSelectionToLink,
toggleBlockStyle,
toggleSpanStyle,
} from '../markdown-commands';
import type { EditorState } from '../markdown-commands';
import {
getContainingWordOffsets,
termBeforePosition,
} from '../util/term-before-position';
import { getCaretCoordinates } from '../util/textarea-caret-position';
import MarkdownView from './MarkdownView';
import MentionPopover from './MentionPopover';
Expand Down Expand Up @@ -264,7 +264,7 @@ function TextArea({
(suggestion: UserItem) => {
const textarea = textareaRef.current!;
const { value } = textarea;
const { start, end } = getContainingWordOffsets(
const { start, end } = getContainingMentionOffsets(
value,
textarea.selectionStart,
);
Expand Down
85 changes: 69 additions & 16 deletions src/sidebar/helpers/mentions.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,19 @@
import type { Mention } from '../../types/api';
import { buildAccountID } from './account-id';

// Pattern that matches characters treated as the boundary of a mention.
const BOUNDARY_CHARS = String.raw`[\s,.;:|?!'"\-()[\]{}]`;

// Pattern that matches Hypothesis usernames.
// See https://github.com/hypothesis/h/blob/797d9a4/h/models/user.py#L25
const USERNAME_PAT = '[A-Za-z0-9_][A-Za-z0-9._]+[A-Za-z0-9_]';

// Pattern that finds user mentions in text.
const MENTIONS_PAT = new RegExp(
`(^|${BOUNDARY_CHARS})@(${USERNAME_PAT})(?=${BOUNDARY_CHARS}|$)`,
'g',
);

/**
* Wrap all occurrences of @mentions in provided text into the corresponding
* special tag, as long as they are surrounded by "empty" space (space, tab, new
Expand All @@ -10,22 +23,15 @@ import { buildAccountID } from './account-id';
* `<a data-hyp-mention data-userid="acct:[email protected]">@someuser</a>`
*/
export function wrapMentions(text: string, authority: string): string {
return text.replace(
// Capture both the potential empty character before the mention (space, tab
// or new line), and the term following the `@` character.
// When we build the mention tag, we need to prepend that empty character to
// avoid altering the spacing and structure of the text.
/(^|\s)@(\w+)(?=\s|$)/g,
(match, precedingWhitespace, username) => {
const tag = document.createElement('a');

tag.setAttribute('data-hyp-mention', '');
tag.setAttribute('data-userid', buildAccountID(username, authority));
tag.textContent = `@${username}`;

return `${precedingWhitespace}${tag.outerHTML}`;
},
);
return text.replace(MENTIONS_PAT, (match, precedingChar, username) => {
const tag = document.createElement('a');

tag.setAttribute('data-hyp-mention', '');
tag.setAttribute('data-userid', buildAccountID(username, authority));
tag.textContent = `@${username}`;

return `${precedingChar}${tag.outerHTML}`;
});
}

/**
Expand Down Expand Up @@ -115,3 +121,50 @@ export function renderMentionTags(

return foundMentions;
}

/**
* Returns the word at a specific position in a string, surrounded by empty
* characters or punctuation characters.
*/
export function termBeforePosition(text: string, position: number): string {
const { start } = getContainingMentionOffsets(text, position);
return text.slice(start, position);
}

export type WordOffsets = {
start: number;
end: number;
};

/**
* Returns the `start` and `end` positions for the word or mention that overlaps
* with provided reference position.
*
* For example, given the text "hello @hypothesis", and the reference position 9
* (which corresponds to the `y` character) it will return the start and end of
* the `@hypothesis` mention, hence { start: 6, end: 17 }.
*
* Useful to get the offsets of the mention matching the caret position in text
* inputs and textareas.
*/
export function getContainingMentionOffsets(
text: string,
referencePosition: number,
): WordOffsets {
const precedingText = text.slice(0, referencePosition);
const matches = [...precedingText.matchAll(new RegExp(BOUNDARY_CHARS, 'g'))];
const precedingCharPos =
matches.length > 0 ? Math.max(...matches.map(match => match.index)) : -1;

const subsequentCharPos = text
.slice(referencePosition)
.search(new RegExp(BOUNDARY_CHARS));

return {
start: precedingCharPos + 1,
end:
subsequentCharPos === -1
? text.length
: referencePosition + subsequentCharPos,
};
}
155 changes: 152 additions & 3 deletions src/sidebar/helpers/test/mentions-test.js
Original file line number Diff line number Diff line change
@@ -1,4 +1,10 @@
import { renderMentionTags, unwrapMentions, wrapMentions } from '../mentions';
import {
renderMentionTags,
unwrapMentions,
wrapMentions,
getContainingMentionOffsets,
termBeforePosition,
} from '../mentions';

const mentionTag = (username, authority) =>
`<a data-hyp-mention="" data-userid="acct:${username}@${authority}">@${username}</a>`;
Expand Down Expand Up @@ -40,16 +46,35 @@ look at ${mentionTag('foo', 'example.com')} comment`,
},
// Multiple mentions
{
text: 'Hey @jane look at this quote from @rob',
text: 'Hey @jane, look at this quote from @rob',
authority: 'example.com',
textWithTags: `Hey ${mentionTag('jane', 'example.com')} look at this quote from ${mentionTag('rob', 'example.com')}`,
textWithTags: `Hey ${mentionTag('jane', 'example.com')}, look at this quote from ${mentionTag('rob', 'example.com')}`,
},
// Mentions wrapped in punctuation chars
{
text: '(@jane) {@rob} and @john?',
authority: 'example.com',
textWithTags: `(${mentionTag('jane', 'example.com')}) {${mentionTag('rob', 'example.com')}} and ${mentionTag('john', 'example.com')}?`,
},
// username-like patterns with invalid chars should be ignored
{
text: 'Hello @not+a/user=name',
authority: 'example.com',
textWithTags: `Hello @not+a/user=name`,
},
// Email addresses should be ignored
{
text: 'Ignore email: [email protected]',
authority: 'example.com',
textWithTags: 'Ignore email: [email protected]',
},
// Trailing dots should not be considered part of the mention, but dots
// in-between should
{
text: 'Hello @jane.doe.',
authority: 'example.com',
textWithTags: `Hello ${mentionTag('jane.doe', 'example.com')}.`,
},
].forEach(({ text, authority, textWithTags }) => {
describe('wrapMentions', () => {
it('wraps every mention in a mention tag', () => {
Expand Down Expand Up @@ -119,3 +144,127 @@ describe('renderMentionTags', () => {
assert.equal(fourthMention, '@user_id_missing');
});
});

// To make these tests more predictable, we place the `$` sign in the position
// to be checked. That way it's easier to see what is the "word" preceding it.
// The test will then get the `$` sign index and remove it from the text
// before passing it to `termBeforePosition`.
[
// First and last positions
{
text: '$Hello world',
expectedTerm: '',
expectedOffsets: { start: 0, end: 5 },
},
{
text: 'Hello world$',
expectedTerm: 'world',
expectedOffsets: { start: 6, end: 11 },
},

// Position in the middle of words
{
text: 'Hell$o world',
expectedTerm: 'Hell',
expectedOffsets: { start: 0, end: 5 },
},
{
text: 'Hello wor$ld',
expectedTerm: 'wor',
expectedOffsets: { start: 6, end: 11 },
},

// Position preceded by "empty space"
{
text: 'Hello $world',
expectedTerm: '',
expectedOffsets: { start: 6, end: 11 },
},
{
text: `Text with
multiple
$
lines
`,
expectedTerm: '',
expectedOffsets: { start: 31, end: 31 },
},

// Position preceded by/in the middle of a word for multi-line text
{
text: `Text with$
multiple
lines
`,
expectedTerm: 'with',
expectedOffsets: { start: 5, end: 9 },
},
{
text: `Text with
multiple
li$nes
`,
expectedTerm: 'li',
expectedOffsets: { start: 32, end: 37 },
},

// Including punctuation characters
...[
',',
'.',
';',
':',
'|',
'?',
'!',
"'",
'"',
'-',
'(',
')',
'[',
']',
'{',
'}',
].flatMap(char => [
{
text: `Foo${char}$ bar`,
expectedTerm: '',
expectedOffsets: { start: 4, end: 4 },
},
{
text: `${char}Foo$ bar`,
expectedTerm: 'Foo',
expectedOffsets: { start: 1, end: 4 },
},
{
text: `hello ${char}fo$o${char} bar`,
expectedTerm: 'fo',
expectedOffsets: { start: 7, end: 10 },
},
]),
].forEach(({ text, expectedTerm, expectedOffsets }) => {
// Get the position of the `$` sign in the text, then remove it
const position = text.indexOf('$');
const textWithoutDollarSign = text.replace('$', '');

describe('termBeforePosition', () => {
it('returns the term right before provided position', () => {
assert.equal(
termBeforePosition(textWithoutDollarSign, position),
expectedTerm,
);
});
});

describe('getContainingMentionOffsets', () => {
it('returns expected offsets', () => {
assert.deepEqual(
getContainingMentionOffsets(textWithoutDollarSign, position),
expectedOffsets,
);
});
});
});
43 changes: 0 additions & 43 deletions src/sidebar/util/term-before-position.ts

This file was deleted.

Loading

0 comments on commit aa75693

Please sign in to comment.