Skip to content

Commit

Permalink
Ignore punctuation characters when looking for mentions
Browse files Browse the repository at this point in the history
  • Loading branch information
acelaya committed Feb 7, 2025
1 parent e9d9845 commit 521e24a
Show file tree
Hide file tree
Showing 4 changed files with 75 additions and 20 deletions.
17 changes: 11 additions & 6 deletions src/sidebar/helpers/mentions.ts
Original file line number Diff line number Diff line change
Expand Up @@ -11,19 +11,24 @@ import { buildAccountID } from './account-id';
*/
export function wrapMentions(text: string, authority: string): string {
return text.replace(
// Capture both the potential empty character before the mention (space, tab
// or new line), and the term following the `@` character.
// When we build the mention tag, we need to prepend that empty character to
// Capture both the potential empty character (space, tab or new line) or
// punctuation char before the mention, and the term following the `@`
// character.
// When we build the mention tag, we need to prepend that prev character to
// avoid altering the spacing and structure of the text.
/(^|\s)@(\w+)(?=\s|$)/g,
(match, precedingWhitespace, username) => {
//
// To match the username, we only look for `A-Za-z0-9._` characters, which
// is what the server allows.
// See: https://github.com/hypothesis/h/blob/b8d0d4c/h/schemas/api/user.py#L21
/(^|[\s,.;:|?!'"\-()[\]{}])@([A-Za-z0-9._]+)(?=[\s,.;:|?!'"\-()[\]{}]|$)/g,
(match, precedingChar, username) => {
const tag = document.createElement('a');

tag.setAttribute('data-hyp-mention', '');
tag.setAttribute('data-userid', buildAccountID(username, authority));
tag.textContent = `@${username}`;

return `${precedingWhitespace}${tag.outerHTML}`;
return `${precedingChar}${tag.outerHTML}`;
},
);
}
Expand Down
16 changes: 14 additions & 2 deletions src/sidebar/helpers/test/mentions-test.js
Original file line number Diff line number Diff line change
Expand Up @@ -40,9 +40,21 @@ look at ${mentionTag('foo', 'example.com')} comment`,
},
// Multiple mentions
{
text: 'Hey @jane look at this quote from @rob',
text: 'Hey @jane, look at this quote from @rob',
authority: 'example.com',
textWithTags: `Hey ${mentionTag('jane', 'example.com')} look at this quote from ${mentionTag('rob', 'example.com')}`,
textWithTags: `Hey ${mentionTag('jane', 'example.com')}, look at this quote from ${mentionTag('rob', 'example.com')}`,
},
// Mentions wrapped in punctuation chars
{
text: '(@jane) {@rob} and @john?',
authority: 'example.com',
textWithTags: `(${mentionTag('jane', 'example.com')}) {${mentionTag('rob', 'example.com')}} and ${mentionTag('john', 'example.com')}?`,
},
// username-like patterns with invalid chars should be ignored
{
text: 'Hello @not+a/user=name',
authority: 'example.com',
textWithTags: `Hello @not+a/user=name`,
},
// Email addresses should be ignored
{
Expand Down
26 changes: 14 additions & 12 deletions src/sidebar/util/term-before-position.ts
Original file line number Diff line number Diff line change
@@ -1,11 +1,9 @@
/**
* Returns the "word" right before a specific position in an input string.
*
* In this context, a word is anything between a space, newline or tab, and
* provided position.
* Returns the text before a specific position in a string until an empty
* character or punctuation character appears.
*/
export function termBeforePosition(text: string, position: number): string {
return text.slice(0, position).match(/\S+$/)?.[0] ?? '';
return text.slice(0, position).match(/[^\s,.;:|?!'"\-()[\]{}]+$/)?.[0] ?? '';
}

export type WordOffsets = {
Expand All @@ -28,16 +26,20 @@ export function getContainingWordOffsets(
text: string,
referencePosition: number,
): WordOffsets {
const precedingEmptyCharPos = text
.slice(0, referencePosition)
.search(/\s\S*$/);
const subsequentEmptyCharPos = text.slice(referencePosition).search(/\s/);
const precedingText = text.slice(0, referencePosition);
const matches = [...precedingText.matchAll(/[\s,.;:|?!'"\-()[\]{}]/g)];
const precedingCharPos =
matches.length > 0 ? Math.max(...matches.map(match => match.index)) : -1;

const subsequentCharPos = text
.slice(referencePosition)
.search(/[\s,.;:|?!'"\-()[\]{}]/);

return {
start: (precedingEmptyCharPos === -1 ? -1 : precedingEmptyCharPos) + 1,
start: precedingCharPos + 1,
end:
subsequentEmptyCharPos === -1
subsequentCharPos === -1
? text.length
: referencePosition + subsequentEmptyCharPos,
: referencePosition + subsequentCharPos,
};
}
36 changes: 36 additions & 0 deletions src/sidebar/util/test/term-before-position-test.js
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,42 @@ describe('term-before-position', () => {
expectedTerm: 'li',
expectedOffsets: { start: 32, end: 37 },
},

// Including punctuation characters
...[
',',
'.',
';',
':',
'|',
'?',
'!',
"'",
'"',
'-',
'(',
')',
'[',
']',
'{',
'}',
].flatMap(char => [
{
text: `Foo${char}$ bar`,
expectedTerm: '',
expectedOffsets: { start: 4, end: 4 },
},
{
text: `${char}Foo$ bar`,
expectedTerm: 'Foo',
expectedOffsets: { start: 1, end: 4 },
},
{
text: `hello ${char}fo$o${char} bar`,
expectedTerm: 'fo',
expectedOffsets: { start: 7, end: 10 },
},
]),
].forEach(({ text, expectedTerm, expectedOffsets }) => {
// Get the position of the `$` sign in the text, then remove it
const position = text.indexOf('$');
Expand Down

0 comments on commit 521e24a

Please sign in to comment.