Skip to content

Commit

Permalink
Enable automatic URL linking
Browse files Browse the repository at this point in the history
Automatically detect links in the text content of a file and automatically
generate link annotations at the appropriate locations to achieve
automatic link detection and hyperlinking.
  • Loading branch information
ryzokuken committed Jan 16, 2025
1 parent 711bf2b commit 864c116
Show file tree
Hide file tree
Showing 14 changed files with 415 additions and 5 deletions.
75 changes: 75 additions & 0 deletions test/integration/autolinker_spec.mjs
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
/* Copyright 2025 Mozilla Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

import { closePages, loadAndWait } from "./test_utils.mjs";

describe("autolinker", function () {
describe("mozilla.org.pdf", function () {
let pages;

beforeAll(async () => {
pages = await loadAndWait("mozilla.org.pdf", ".annotationLayer");
});

afterAll(async () => {
await closePages(pages);
});

it("must appropriately add link annotations when relevant", async () => {
await Promise.all(
pages.map(async ([browserName, page]) => {
const url = await page.$$eval(
".annotationLayer > .linkAnnotation > a",
annotations => annotations.map(a => a.href)
);
expect(url.length).withContext(`In ${browserName}`).toEqual(1);
expect(url[0])
.withContext(`In ${browserName}`)
.toEqual("http://www.mozilla.org/");
})
);
});
});

describe("link.pdf", function () {
let pages;

beforeAll(async () => {
pages = await loadAndWait("link.pdf", ".annotationLayer");
});

afterAll(async () => {
await closePages(pages);
});

it("must not add links when not necessary", async () => {
await Promise.all(
pages.map(async ([browserName, page]) => {
const links = await page.$$eval(
".annotationLayer > .linkAnnotation > a",
annotations =>
annotations.map(a => a.getAttribute("data-element-id"))
);
expect(links.length).withContext(`In ${browserName}`).toEqual(3);
links.forEach(link =>
expect(link)
.withContext(`In ${browserName}`)
.not.toEqual("undefined")
);
})
);
});
});
});
1 change: 1 addition & 0 deletions test/integration/jasmine-boot.js
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ async function runTests(results) {
spec_files: [
"accessibility_spec.mjs",
"annotation_spec.mjs",
"autolinker_spec.mjs",
"caret_browsing_spec.mjs",
"copy_paste_spec.mjs",
"find_spec.mjs",
Expand Down
2 changes: 2 additions & 0 deletions test/pdfs/.gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -695,3 +695,5 @@
!issue18911.pdf
!issue19207.pdf
!issue19239.pdf
!link.pdf
!mozilla.org.pdf
Binary file added test/pdfs/link.pdf
Binary file not shown.
Binary file added test/pdfs/mozilla.org.pdf
Binary file not shown.
166 changes: 166 additions & 0 deletions test/unit/autolinker_spec.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,166 @@
/* Copyright 2025 Mozilla Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

import { Autolinker } from "../../web/autolinker.js";

describe("autolinker", function () {
it("should correctly find URLs", function () {
const matches = Autolinker.findLinks("http://www.example.com");
expect(matches[0].url).toEqual("http://www.example.com/");
});

it("should correctly find simple valid URLs", function () {
const links = [
"http://subdomain.example.com/path/to/page?query=param",
"www.example.com/path/to/resource",
"http://example.com/path?query=value#fragment",
];
const matches = Autolinker.findLinks(links.join("\n"));
expect(matches.length).toEqual(3);
expect(matches[0].url).toEqual(
"http://subdomain.example.com/path/to/page?query=param"
);
expect(matches[1].url).toEqual("http://www.example.com/path/to/resource");
expect(matches[2].url).toEqual(
"http://example.com/path?query=value#fragment"
);
});

it("should correctly find emails", function () {
const emails = [
"mailto:[email protected]",
"mailto:[email protected]",
"[email protected]",
"[email protected]",
"[email protected]", // '_' is ok before '@'.
"[email protected]", // '-' is ok in user name.
"[email protected]", // Stop at consecutive '.'.
"[email protected]", // Remove heading '.'.
"[email protected]?/", // Trim ending invalid chars.
"fan{[email protected]", // Trim beginning invalid chars.
"[email protected]..", // Trim the ending periods.
"[email protected]", // Keep the original case.
];
const matches = Autolinker.findLinks(emails.join("\n"));
expect(matches.length).toEqual(12);
expect(matches[0].url).toEqual("mailto:[email protected]");
expect(matches[1].url).toEqual("mailto:[email protected]");
expect(matches[2].url).toEqual("mailto:[email protected]");
expect(matches[3].url).toEqual("mailto:[email protected]");
expect(matches[4].url).toEqual("mailto:[email protected]");
expect(matches[5].url).toEqual("mailto:[email protected]");
expect(matches[6].url).toEqual("mailto:[email protected]");
expect(matches[7].url).toEqual("mailto:[email protected]");
expect(matches[8].url).toEqual("mailto:[email protected]");
// expect(matches[9].url).toEqual("mailto:[email protected]");
expect(matches[10].url).toEqual("mailto:[email protected]");
expect(matches[11].url).toEqual("mailto:[email protected]");
});

it("should correctly handle complex or edge cases", function () {
const links = [
"https://example.com/path/to/page?query=param&another=val#section",
"www.example.com/resource/(parentheses)-allowed/",
"http://example.com/path_with_underscores",
"http://www.example.com:8080/port/test",
"https://example.com/encoded%20spaces%20in%20path",
"mailto:[email protected]",
"www.abc.com/#%%^&&*(",
"www.a.com/#a=@?q=rr&r=y",
"http://a.com/1/2/3/4\\5\\6",
"http://www.example.com/foo;bar",
];
const matches = Autolinker.findLinks(links.join("\n"));
expect(matches.length).toEqual(10);
expect(matches[0].url).toEqual(
"https://example.com/path/to/page?query=param&another=val#section"
);
expect(matches[1].url).toEqual(
"http://www.example.com/resource/(parentheses)-allowed/"
);
expect(matches[2].url).toEqual("http://example.com/path_with_underscores");
expect(matches[3].url).toEqual("http://www.example.com:8080/port/test");
expect(matches[4].url).toEqual(
"https://example.com/encoded%20spaces%20in%20path"
);
expect(matches[5].url).toEqual("mailto:[email protected]");
// expect(matches[6].url).toEqual("http://www.abc.com/#%%^&&*("); TODO: Fix error in regex to get this right.
expect(matches[7].url).toEqual("http://www.a.com/#a=@?q=rr&r=y");
expect(matches[8].url).toEqual("http://a.com/1/2/3/4/5/6");
expect(matches[9].url).toEqual("http://www.example.com/foo;bar");
});

it("shouldn't find false positives", function () {
const links = [
"not a valid URL",
"htp://misspelled-protocol.com",
"example.com (missing protocol)",
"https://[::1] (IPv6 loopback)",
"http:// (just protocol)",
"", // Blank.
"http", // No colon.
"www.", // Missing domain.
"https-and-www", // Dash not colon.
"http:/abc.com", // Missing slash.
"http://((()),", // Only invalid chars in host name.
"ftp://example.com", // Ftp scheme is not supported.
"http:example.com", // Missing slashes.
"http//[example.com", // Invalid IPv6 address.
"http//[00:00:00:00:00:00", // Invalid IPv6 address.
"http//[]", // Empty IPv6 address.
"abc.example.com", // URL without scheme.
];
const matches = Autolinker.findLinks(links.join("\n"));
expect(matches.length).toEqual(0);
});

it("should correctly find links among mixed content", function () {
const links = [
"Here's a URL: https://example.com and an email: mailto:[email protected]",
"www.example.com and more text",
"Check this: http://example.com/path?query=1 and this mailto:[email protected]",
];
const matches = Autolinker.findLinks(links.join("\n"));
expect(matches.length).toEqual(5);
expect(matches[0].url).toEqual("https://example.com/");
expect(matches[1].url).toEqual("mailto:[email protected]");
expect(matches[2].url).toEqual("http://www.example.com/");
expect(matches[3].url).toEqual("http://example.com/path?query=1");
expect(matches[4].url).toEqual("mailto:[email protected]");
});

it("should correctly work with special characters", function () {
const links = [
"https://example.com/path/to/page?query=value&symbol=£",
"mailto:[email protected]",
"http://example.com/@user",
"https://example.com/path#@anchor",
"www.测试.net",
// "www.测试。net。", Not currently accepted by `createValidAbsoluteUrl`.
"www.测试.net;",
];
const matches = Autolinker.findLinks(links.join("\n"));
expect(matches.length).toEqual(6);
expect(matches[0].url).toEqual(
"https://example.com/path/to/page?query=value&symbol=%C2%A3"
);
expect(matches[1].url).toEqual("mailto:[email protected]");
expect(matches[2].url).toEqual("http://example.com/@user");
expect(matches[3].url).toEqual("https://example.com/path#@anchor");
expect(matches[4].url).toEqual("http://www.xn--0zwm56d.net/");
expect(matches[5].url).toEqual("http://www.xn--0zwm56d.net/");
// expect(matches[6].url).toEqual("http://www.xn--0zwm56d.net/");
});
});
1 change: 1 addition & 0 deletions test/unit/clitests.json
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
"annotation_storage_spec.js",
"api_spec.js",
"app_options_spec.js",
"autolinker_spec.js",
"bidi_spec.js",
"canvas_factory_spec.js",
"cff_parser_spec.js",
Expand Down
22 changes: 20 additions & 2 deletions web/annotation_layer_builder.js
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
// eslint-disable-next-line max-len
/** @typedef {import("../src/display/editor/tools.js").AnnotationEditorUIManager} AnnotationEditorUIManager */

import { AnnotationLayer } from "pdfjs-lib";
import { AnnotationLayer, Util } from "pdfjs-lib";
import { PresentationModeState } from "./ui_utils.js";

/**
Expand Down Expand Up @@ -97,7 +97,7 @@ class AnnotationLayerBuilder {
* @returns {Promise<void>} A promise that is resolved when rendering of the
* annotations is complete.
*/
async render(viewport, options, intent = "display") {
async render(viewport, options, intent = "display", linkAnnotations) {
if (this.div) {
if (this._cancelled || !this.annotationLayer) {
return;
Expand All @@ -119,6 +119,24 @@ class AnnotationLayerBuilder {
return;
}

const uniqueLinks = linkAnnotations.filter(link => {
for (const annotation of annotations) {
const area = rect =>
Math.abs(rect[2] - rect[0]) * Math.abs(rect[3] - rect[1]);
const intersect = Util.intersect(annotation.rect, link.rect); // Find the intersection between the annotation and the link.
if (
annotation.subtype === "Link" &&
annotation.url === link.url &&
intersect !== null &&
area(intersect) / area(link.rect) > 0.5 // If the overlap is more than 50%.
) {
return false;
}
}
return true;
});
annotations.push(...uniqueLinks);

// Create an annotation layer div and render the annotations
// if there is at least one annotation.
const div = (this.div = document.createElement("div"));
Expand Down
1 change: 1 addition & 0 deletions web/app.js
Original file line number Diff line number Diff line change
Expand Up @@ -493,6 +493,7 @@ const PDFViewerApplication = {
abortSignal: this._globalAbortController.signal,
enableHWA,
supportsPinchToZoom: this.supportsPinchToZoom,
enableAutolinking: AppOptions.get("enableAutolinking"),
});
this.pdfViewer = pdfViewer;

Expand Down
6 changes: 6 additions & 0 deletions web/app_options.js
Original file line number Diff line number Diff line change
Expand Up @@ -236,6 +236,12 @@ const defaultOptions = {
value: false,
kind: OptionKind.VIEWER + OptionKind.PREFERENCE,
},
enableAutolinking: {
// TODO: remove it when unnecessary.
/** @type {boolean} */
value: false,
kind: OptionKind.VIEWER,
},
externalLinkRel: {
/** @type {string} */
value: "noopener noreferrer nofollow",
Expand Down
Loading

0 comments on commit 864c116

Please sign in to comment.