Skip to content

Commit

Permalink
Merge pull request #54 from sorcio/fix-nested-html
Browse files Browse the repository at this point in the history
Improve parsing of HTML anchor content
  • Loading branch information
manuelpuyol authored May 16, 2022
2 parents eac0cd5 + fd12c0f commit feca08c
Show file tree
Hide file tree
Showing 2 changed files with 30 additions and 5 deletions.
13 changes: 8 additions & 5 deletions src/paste-markdown-html.ts
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,9 @@ function onPaste(event: ClipboardEvent) {
// Generate DOM tree from HTML string
const parser = new DOMParser()
const doc = parser.parseFromString(textHTMLClean, 'text/html')
const walker = doc.createTreeWalker(doc.body, NodeFilter.SHOW_ELEMENT)
const walker = doc.createTreeWalker(doc.body, NodeFilter.SHOW_ELEMENT, node =>
node.parentNode && isLink(node.parentNode) ? NodeFilter.FILTER_REJECT : NodeFilter.FILTER_ACCEPT
)

const markdown = convertToMarkdown(plaintext, walker)

Expand All @@ -56,7 +58,9 @@ function convertToMarkdown(plaintext: string, walker: TreeWalker): string {
// Walk through the DOM tree
while (currentNode && index < NODE_LIMIT) {
index++
const text = isLink(currentNode) ? currentNode.textContent || '' : (currentNode.firstChild as Text)?.wholeText || ''
const text = isLink(currentNode)
? (currentNode.textContent || '').replace(/[\t\n\r ]+/g, ' ')
: (currentNode.firstChild as Text)?.wholeText || ''

// No need to transform whitespace
if (isEmptyString(text)) {
Expand All @@ -69,7 +73,7 @@ function convertToMarkdown(plaintext: string, walker: TreeWalker): string {

if (markdownFoundIndex >= 0) {
if (isLink(currentNode)) {
const markdownLink = linkify(currentNode)
const markdownLink = linkify(currentNode, text)
// Transform 'example link plus more text' into 'example [link](example link) plus more text'
// Method: 'example [link](example link) plus more text' = 'example ' + '[link](example link)' + ' plus more text'
markdown =
Expand Down Expand Up @@ -100,8 +104,7 @@ function hasHTML(transfer: DataTransfer): boolean {
}

// Makes markdown link from a link element, avoiding special GitHub links
function linkify(element: HTMLAnchorElement): string {
const label = element.textContent || ''
function linkify(element: HTMLAnchorElement, label: string): string {
const url = element.href || ''
let markdown = ''

Expand Down
22 changes: 22 additions & 0 deletions test/test.js
Original file line number Diff line number Diff line change
Expand Up @@ -146,6 +146,28 @@ describe('paste-markdown', function () {
assert.equal(textarea.value, markdownSentence)
})

it('deals with links with nested html', function () {
// eslint-disable-next-line github/unescaped-html-literal
const sentence = `<a href="https://example.com/"><span>foo</span></a>
<a href="https://example.com/">bar</a>
foo bar`
const plaintextSentence = 'foo bar foo bar'
const markdownSentence = '[foo](https://example.com/) [bar](https://example.com/) foo bar'

paste(textarea, {'text/html': sentence, 'text/plain': plaintextSentence})
assert.equal(textarea.value, markdownSentence)
})

it('deals with link labels that contains line breaks in html', function () {
// eslint-disable-next-line github/unescaped-html-literal
const sentence = '<a href="https://example.com/">foo\nbar</a>'
const plaintextSentence = 'foo bar'
const markdownSentence = '[foo bar](https://example.com/)'

paste(textarea, {'text/html': sentence, 'text/plain': plaintextSentence})
assert.equal(textarea.value, markdownSentence)
})

it("doesn't render any markdown for html link without corresponding plaintext", function () {
// eslint-disable-next-line github/unescaped-html-literal
const link = `<meta charset='utf-8'><a href="https://github.com/monalisa/playground/issues/1">
Expand Down

0 comments on commit feca08c

Please sign in to comment.