Skip to content

Commit

Permalink
Ignore punctuation characters when looking for mentions
Browse files Browse the repository at this point in the history
  • Loading branch information
acelaya committed Feb 7, 2025
1 parent e9d9845 commit 6573a3a
Show file tree
Hide file tree
Showing 5 changed files with 207 additions and 146 deletions.
8 changes: 4 additions & 4 deletions src/sidebar/components/MarkdownEditor.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -29,17 +29,17 @@ import {
} from 'preact/hooks';

import { isMacOS } from '../../shared/user-agent';
import {
getContainingWordOffsets,
termBeforePosition,
} from '../helpers/mentions';
import {
LinkType,
convertSelectionToLink,
toggleBlockStyle,
toggleSpanStyle,
} from '../markdown-commands';
import type { EditorState } from '../markdown-commands';
import {
getContainingWordOffsets,
termBeforePosition,
} from '../util/term-before-position';
import MarkdownView from './MarkdownView';
import MentionPopover from './MentionPopover';

Expand Down
64 changes: 58 additions & 6 deletions src/sidebar/helpers/mentions.ts
Original file line number Diff line number Diff line change
Expand Up @@ -11,19 +11,24 @@ import { buildAccountID } from './account-id';
*/
export function wrapMentions(text: string, authority: string): string {
return text.replace(
// Capture both the potential empty character before the mention (space, tab
// or new line), and the term following the `@` character.
// When we build the mention tag, we need to prepend that empty character to
// Capture both the potential empty char (space, tab or new line) or
// punctuation char before the mention, and the term following the `@`
// character.
// When we build the mention tag, we need to prepend that prev character to
// avoid altering the spacing and structure of the text.
/(^|\s)@(\w+)(?=\s|$)/g,
(match, precedingWhitespace, username) => {
//
// To match the username, we only look for `A-Za-z0-9._` characters, which
// is what the server allows.
// See: https://github.com/hypothesis/h/blob/b8d0d4c/h/schemas/api/user.py#L21
/(^|[\s,.;:|?!'"\-()[\]{}])@([A-Za-z0-9._]+)(?=[\s,.;:|?!'"\-()[\]{}]|$)/g,
(match, precedingChar, username) => {
const tag = document.createElement('a');

tag.setAttribute('data-hyp-mention', '');
tag.setAttribute('data-userid', buildAccountID(username, authority));
tag.textContent = `@${username}`;

return `${precedingWhitespace}${tag.outerHTML}`;
return `${precedingChar}${tag.outerHTML}`;
},
);
}
Expand Down Expand Up @@ -115,3 +120,50 @@ export function renderMentionTags(

return foundMentions;
}

/**
* Returns the word at a specific position in a string, surrounded by empty
* characters or punctuation characters.
*/
export function termBeforePosition(text: string, position: number): string {
const { start } = getContainingWordOffsets(text, position);
return text.slice(start, position);
}

export type WordOffsets = {
start: number;
end: number;
};

/**
* Returns the `start` and `end` positions for the word that overlaps with
* provided reference position.
*
* For example, given the text "hello hypothesis", and the reference position 9
* (which corresponds to the `p` character) it will return the start and end of
* the word `hypothesis`, hence { start: 6, end: 16 }.
*
* Useful to get the offsets of the word matching the caret position in text
* inputs and textareas.
*/
export function getContainingWordOffsets(
text: string,
referencePosition: number,
): WordOffsets {
const precedingText = text.slice(0, referencePosition);
const matches = [...precedingText.matchAll(/[\s,.;:|?!'"\-()[\]{}]/g)];
const precedingCharPos =
matches.length > 0 ? Math.max(...matches.map(match => match.index)) : -1;

const subsequentCharPos = text
.slice(referencePosition)
.search(/[\s,.;:|?!'"\-()[\]{}]/);

return {
start: precedingCharPos + 1,
end:
subsequentCharPos === -1
? text.length
: referencePosition + subsequentCharPos,
};
}
148 changes: 145 additions & 3 deletions src/sidebar/helpers/test/mentions-test.js
Original file line number Diff line number Diff line change
@@ -1,4 +1,10 @@
import { renderMentionTags, unwrapMentions, wrapMentions } from '../mentions';
import {
renderMentionTags,
unwrapMentions,
wrapMentions,
getContainingWordOffsets,
termBeforePosition,
} from '../mentions';

const mentionTag = (username, authority) =>
`<a data-hyp-mention="" data-userid="acct:${username}@${authority}">@${username}</a>`;
Expand Down Expand Up @@ -40,9 +46,21 @@ look at ${mentionTag('foo', 'example.com')} comment`,
},
// Multiple mentions
{
text: 'Hey @jane look at this quote from @rob',
text: 'Hey @jane, look at this quote from @rob',
authority: 'example.com',
textWithTags: `Hey ${mentionTag('jane', 'example.com')} look at this quote from ${mentionTag('rob', 'example.com')}`,
textWithTags: `Hey ${mentionTag('jane', 'example.com')}, look at this quote from ${mentionTag('rob', 'example.com')}`,
},
// Mentions wrapped in punctuation chars
{
text: '(@jane) {@rob} and @john?',
authority: 'example.com',
textWithTags: `(${mentionTag('jane', 'example.com')}) {${mentionTag('rob', 'example.com')}} and ${mentionTag('john', 'example.com')}?`,
},
// username-like patterns with invalid chars should be ignored
{
text: 'Hello @not+a/user=name',
authority: 'example.com',
textWithTags: `Hello @not+a/user=name`,
},
// Email addresses should be ignored
{
Expand Down Expand Up @@ -119,3 +137,127 @@ describe('renderMentionTags', () => {
assert.equal(fourthMention, '@user_id_missing');
});
});

// To make these tests more predictable, we place the `$` sign in the position
// to be checked. That way it's easier to see what is the "word" preceding it.
// The test will then get the `$` sign index and remove it from the text
// before passing it to `termBeforePosition`.
[
// First and last positions
{
text: '$Hello world',
expectedTerm: '',
expectedOffsets: { start: 0, end: 5 },
},
{
text: 'Hello world$',
expectedTerm: 'world',
expectedOffsets: { start: 6, end: 11 },
},

// Position in the middle of words
{
text: 'Hell$o world',
expectedTerm: 'Hell',
expectedOffsets: { start: 0, end: 5 },
},
{
text: 'Hello wor$ld',
expectedTerm: 'wor',
expectedOffsets: { start: 6, end: 11 },
},

// Position preceded by "empty space"
{
text: 'Hello $world',
expectedTerm: '',
expectedOffsets: { start: 6, end: 11 },
},
{
text: `Text with
multiple
$
lines
`,
expectedTerm: '',
expectedOffsets: { start: 31, end: 31 },
},

// Position preceded by/in the middle of a word for multi-line text
{
text: `Text with$
multiple
lines
`,
expectedTerm: 'with',
expectedOffsets: { start: 5, end: 9 },
},
{
text: `Text with
multiple
li$nes
`,
expectedTerm: 'li',
expectedOffsets: { start: 32, end: 37 },
},

// Including punctuation characters
...[
',',
'.',
';',
':',
'|',
'?',
'!',
"'",
'"',
'-',
'(',
')',
'[',
']',
'{',
'}',
].flatMap(char => [
{
text: `Foo${char}$ bar`,
expectedTerm: '',
expectedOffsets: { start: 4, end: 4 },
},
{
text: `${char}Foo$ bar`,
expectedTerm: 'Foo',
expectedOffsets: { start: 1, end: 4 },
},
{
text: `hello ${char}fo$o${char} bar`,
expectedTerm: 'fo',
expectedOffsets: { start: 7, end: 10 },
},
]),
].forEach(({ text, expectedTerm, expectedOffsets }) => {
// Get the position of the `$` sign in the text, then remove it
const position = text.indexOf('$');
const textWithoutDollarSign = text.replace('$', '');

describe('termBeforePosition', () => {
it('returns the term right before provided position', () => {
assert.equal(
termBeforePosition(textWithoutDollarSign, position),
expectedTerm,
);
});
});

describe('getContainingWordOffsets', () => {
it('returns expected offsets', () => {
assert.deepEqual(
getContainingWordOffsets(textWithoutDollarSign, position),
expectedOffsets,
);
});
});
});
43 changes: 0 additions & 43 deletions src/sidebar/util/term-before-position.ts

This file was deleted.

Loading

0 comments on commit 6573a3a

Please sign in to comment.