-
Notifications
You must be signed in to change notification settings - Fork 203
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Ignore punctuation characters when trying to match mentions
- Loading branch information
Showing
5 changed files
with
226 additions
and
157 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,6 +1,19 @@ | ||
import type { Mention } from '../../types/api'; | ||
import { buildAccountID } from './account-id'; | ||
|
||
// Pattern that matches characters treated as the boundary of a mention. | ||
const BOUNDARY_CHARS = String.raw`[\s,.;:|?!'"\-()[\]{}]`; | ||
|
||
// Pattern that matches Hypothesis usernames. | ||
// See https://github.com/hypothesis/h/blob/797d9a4/h/models/user.py#L25 | ||
const USERNAME_PAT = '[A-Za-z0-9_][A-Za-z0-9._]+[A-Za-z0-9_]'; | ||
|
||
// Pattern that finds user mentions in text. | ||
const MENTIONS_PAT = new RegExp( | ||
`(^|${BOUNDARY_CHARS})@(${USERNAME_PAT})(?=${BOUNDARY_CHARS}|$)`, | ||
'g', | ||
); | ||
|
||
/** | ||
* Wrap all occurrences of @mentions in provided text into the corresponding | ||
* special tag, as long as they are surrounded by "empty" space (space, tab, new | ||
|
@@ -10,22 +23,15 @@ import { buildAccountID } from './account-id'; | |
* `<a data-hyp-mention data-userid="acct:[email protected]">@someuser</a>` | ||
*/ | ||
export function wrapMentions(text: string, authority: string): string { | ||
return text.replace( | ||
// Capture both the potential empty character before the mention (space, tab | ||
// or new line), and the term following the `@` character. | ||
// When we build the mention tag, we need to prepend that empty character to | ||
// avoid altering the spacing and structure of the text. | ||
/(^|\s)@(\w+)(?=\s|$)/g, | ||
(match, precedingWhitespace, username) => { | ||
const tag = document.createElement('a'); | ||
|
||
tag.setAttribute('data-hyp-mention', ''); | ||
tag.setAttribute('data-userid', buildAccountID(username, authority)); | ||
tag.textContent = `@${username}`; | ||
|
||
return `${precedingWhitespace}${tag.outerHTML}`; | ||
}, | ||
); | ||
return text.replace(MENTIONS_PAT, (match, precedingChar, username) => { | ||
const tag = document.createElement('a'); | ||
|
||
tag.setAttribute('data-hyp-mention', ''); | ||
tag.setAttribute('data-userid', buildAccountID(username, authority)); | ||
tag.textContent = `@${username}`; | ||
|
||
return `${precedingChar}${tag.outerHTML}`; | ||
}); | ||
} | ||
|
||
/** | ||
|
@@ -115,3 +121,50 @@ export function renderMentionTags( | |
|
||
return foundMentions; | ||
} | ||
|
||
/** | ||
* Returns the word at a specific position in a string, surrounded by empty | ||
* characters or punctuation characters. | ||
*/ | ||
export function termBeforePosition(text: string, position: number): string { | ||
const { start } = getContainingMentionOffsets(text, position); | ||
return text.slice(start, position); | ||
} | ||
|
||
export type WordOffsets = { | ||
start: number; | ||
end: number; | ||
}; | ||
|
||
/** | ||
* Returns the `start` and `end` positions for the word or mention that overlaps | ||
* with provided reference position. | ||
* | ||
* For example, given the text "hello @hypothesis", and the reference position 9 | ||
* (which corresponds to the `y` character) it will return the start and end of | ||
* the `@hypothesis` mention, hence { start: 6, end: 17 }. | ||
* | ||
* Useful to get the offsets of the mention matching the caret position in text | ||
* inputs and textareas. | ||
*/ | ||
export function getContainingMentionOffsets( | ||
text: string, | ||
referencePosition: number, | ||
): WordOffsets { | ||
const precedingText = text.slice(0, referencePosition); | ||
const matches = [...precedingText.matchAll(new RegExp(BOUNDARY_CHARS, 'g'))]; | ||
const precedingCharPos = | ||
matches.length > 0 ? Math.max(...matches.map(match => match.index)) : -1; | ||
|
||
const subsequentCharPos = text | ||
.slice(referencePosition) | ||
.search(new RegExp(BOUNDARY_CHARS)); | ||
|
||
return { | ||
start: precedingCharPos + 1, | ||
end: | ||
subsequentCharPos === -1 | ||
? text.length | ||
: referencePosition + subsequentCharPos, | ||
}; | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,4 +1,10 @@ | ||
import { renderMentionTags, unwrapMentions, wrapMentions } from '../mentions'; | ||
import { | ||
renderMentionTags, | ||
unwrapMentions, | ||
wrapMentions, | ||
getContainingMentionOffsets, | ||
termBeforePosition, | ||
} from '../mentions'; | ||
|
||
const mentionTag = (username, authority) => | ||
`<a data-hyp-mention="" data-userid="acct:${username}@${authority}">@${username}</a>`; | ||
|
@@ -40,16 +46,35 @@ look at ${mentionTag('foo', 'example.com')} comment`, | |
}, | ||
// Multiple mentions | ||
{ | ||
text: 'Hey @jane look at this quote from @rob', | ||
text: 'Hey @jane, look at this quote from @rob', | ||
authority: 'example.com', | ||
textWithTags: `Hey ${mentionTag('jane', 'example.com')} look at this quote from ${mentionTag('rob', 'example.com')}`, | ||
textWithTags: `Hey ${mentionTag('jane', 'example.com')}, look at this quote from ${mentionTag('rob', 'example.com')}`, | ||
}, | ||
// Mentions wrapped in punctuation chars | ||
{ | ||
text: '(@jane) {@rob} and @john?', | ||
authority: 'example.com', | ||
textWithTags: `(${mentionTag('jane', 'example.com')}) {${mentionTag('rob', 'example.com')}} and ${mentionTag('john', 'example.com')}?`, | ||
}, | ||
// username-like patterns with invalid chars should be ignored | ||
{ | ||
text: 'Hello @not+a/user=name', | ||
authority: 'example.com', | ||
textWithTags: `Hello @not+a/user=name`, | ||
}, | ||
// Email addresses should be ignored | ||
{ | ||
text: 'Ignore email: [email protected]', | ||
authority: 'example.com', | ||
textWithTags: 'Ignore email: [email protected]', | ||
}, | ||
// Trailing dots should not be considered part of the mention, but dots | ||
// in-between should | ||
{ | ||
text: 'Hello @jane.doe.', | ||
authority: 'example.com', | ||
textWithTags: `Hello ${mentionTag('jane.doe', 'example.com')}.`, | ||
}, | ||
].forEach(({ text, authority, textWithTags }) => { | ||
describe('wrapMentions', () => { | ||
it('wraps every mention in a mention tag', () => { | ||
|
@@ -119,3 +144,127 @@ describe('renderMentionTags', () => { | |
assert.equal(fourthMention, '@user_id_missing'); | ||
}); | ||
}); | ||
|
||
// To make these tests more predictable, we place the `$` sign in the position | ||
// to be checked. That way it's easier to see what is the "word" preceding it. | ||
// The test will then get the `$` sign index and remove it from the text | ||
// before passing it to `termBeforePosition`. | ||
[ | ||
// First and last positions | ||
{ | ||
text: '$Hello world', | ||
expectedTerm: '', | ||
expectedOffsets: { start: 0, end: 5 }, | ||
}, | ||
{ | ||
text: 'Hello world$', | ||
expectedTerm: 'world', | ||
expectedOffsets: { start: 6, end: 11 }, | ||
}, | ||
|
||
// Position in the middle of words | ||
{ | ||
text: 'Hell$o world', | ||
expectedTerm: 'Hell', | ||
expectedOffsets: { start: 0, end: 5 }, | ||
}, | ||
{ | ||
text: 'Hello wor$ld', | ||
expectedTerm: 'wor', | ||
expectedOffsets: { start: 6, end: 11 }, | ||
}, | ||
|
||
// Position preceded by "empty space" | ||
{ | ||
text: 'Hello $world', | ||
expectedTerm: '', | ||
expectedOffsets: { start: 6, end: 11 }, | ||
}, | ||
{ | ||
text: `Text with | ||
multiple | ||
$ | ||
lines | ||
`, | ||
expectedTerm: '', | ||
expectedOffsets: { start: 31, end: 31 }, | ||
}, | ||
|
||
// Position preceded by/in the middle of a word for multi-line text | ||
{ | ||
text: `Text with$ | ||
multiple | ||
lines | ||
`, | ||
expectedTerm: 'with', | ||
expectedOffsets: { start: 5, end: 9 }, | ||
}, | ||
{ | ||
text: `Text with | ||
multiple | ||
li$nes | ||
`, | ||
expectedTerm: 'li', | ||
expectedOffsets: { start: 32, end: 37 }, | ||
}, | ||
|
||
// Including punctuation characters | ||
...[ | ||
',', | ||
'.', | ||
';', | ||
':', | ||
'|', | ||
'?', | ||
'!', | ||
"'", | ||
'"', | ||
'-', | ||
'(', | ||
')', | ||
'[', | ||
']', | ||
'{', | ||
'}', | ||
].flatMap(char => [ | ||
{ | ||
text: `Foo${char}$ bar`, | ||
expectedTerm: '', | ||
expectedOffsets: { start: 4, end: 4 }, | ||
}, | ||
{ | ||
text: `${char}Foo$ bar`, | ||
expectedTerm: 'Foo', | ||
expectedOffsets: { start: 1, end: 4 }, | ||
}, | ||
{ | ||
text: `hello ${char}fo$o${char} bar`, | ||
expectedTerm: 'fo', | ||
expectedOffsets: { start: 7, end: 10 }, | ||
}, | ||
]), | ||
].forEach(({ text, expectedTerm, expectedOffsets }) => { | ||
// Get the position of the `$` sign in the text, then remove it | ||
const position = text.indexOf('$'); | ||
const textWithoutDollarSign = text.replace('$', ''); | ||
|
||
describe('termBeforePosition', () => { | ||
it('returns the term right before provided position', () => { | ||
assert.equal( | ||
termBeforePosition(textWithoutDollarSign, position), | ||
expectedTerm, | ||
); | ||
}); | ||
}); | ||
|
||
describe('getContainingMentionOffsets', () => { | ||
it('returns expected offsets', () => { | ||
assert.deepEqual( | ||
getContainingMentionOffsets(textWithoutDollarSign, position), | ||
expectedOffsets, | ||
); | ||
}); | ||
}); | ||
}); |
This file was deleted.
Oops, something went wrong.
Oops, something went wrong.