diff --git a/flying-saucer-pdf-itext5/src/main/java/org/xhtmlrenderer/pdf/HTMLOutline.java b/flying-saucer-pdf-itext5/src/main/java/org/xhtmlrenderer/pdf/HTMLOutline.java new file mode 100644 index 000000000..5f23f8c4c --- /dev/null +++ b/flying-saucer-pdf-itext5/src/main/java/org/xhtmlrenderer/pdf/HTMLOutline.java @@ -0,0 +1,235 @@ +/* + * {{{ header & license + * Copyright (c) 2016 Stanimir Stamenkov + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public License + * as published by the Free Software Foundation; either version 2.1 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * }}} + */ +package org.xhtmlrenderer.pdf; + +import java.util.IdentityHashMap; +import java.util.List; +import java.util.Map; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +import org.w3c.dom.Element; +import org.w3c.dom.Node; +import org.w3c.dom.traversal.DocumentTraversal; +import org.w3c.dom.traversal.NodeFilter; +import org.w3c.dom.traversal.NodeIterator; + +import org.xhtmlrenderer.pdf.ITextOutputDevice.Bookmark; +import org.xhtmlrenderer.render.Box; + +class HTMLOutline { + + private static final Pattern HEADING = + Pattern.compile("h([1-6])", Pattern.CASE_INSENSITIVE); + + /** sectioning roots */ + private static final Pattern ROOT = + Pattern.compile("blockquote|details|fieldset|figure|td", + Pattern.CASE_INSENSITIVE); + + private static final Pattern WS = Pattern.compile("\\s+"); + + private static final int MAX_NAME_LENGTH = 200; + + private final HTMLOutline parent; + + private final int level; + + private final Bookmark bookmark; + + private HTMLOutline() { + this(0, "root", null); + } + + private HTMLOutline(int level, String name, HTMLOutline parent) { + this.level = level; + this.bookmark = new Bookmark(name, ""); + this.parent = parent; + if (parent != null) { + parent.bookmark.addChild(bookmark); + } + } + + /** + * Creates a bookmark list of the document outline generated for the given + * element context (usually the root document element). + *

+ * The current algorithm is more simple than the one suggested in the HTML5 + * specification such as it is not affected by + * sectioning + * content but just the heading level. For + * example:

+ *
+     * <body>
+     *   <h1>Foo</h1>
+     *   <h3>Bar</h3>
+     *   <blockquote>
+     *     <h5>Bla</h5>
+     *   </blockquote>
+     *   <p>Baz</p>
+     *   <h2>Quux</h2>
+     *   <section>
+     *     <h3>Thud</h3>
+     *   </section>
+     *   <h4>Grunt</h4>
+     * </body>
+ *

+ * Should generate outline as:

+ *
    + *
  1. Foo + *
      + *
    1. Bar
    2. + *
    3. Quux
    4. + *
    5. Thud
    6. + *
    7. Grunt
    8. + *
  2. + *
+ *

+ * But it generates outline as:

+ *
    + *
  1. Foo + *
      + *
    1. Bar
    2. + *
    3. Quux + *
        + *
      1. Thud + *
          + *
        1. Grunt
        2. + *
      2. + *
    4. + *
  2. + *
+ * + *

Example document customizations

+ * + *
Include non-heading element as bookmark (level 4)
+ *
+     * <strong data-pdf-bookmark="4">Foo bar</strong>
+ * + *
Specify bookmark name
+ *
+     * <tr data-pdf-bookmark="5" data-pdf-bookmark-name="Bar baz">...</tr>
+ * + *
Exclude individual heading from bookmarks
+ *
+     * <h3 data-pdf-bookmark="none">Baz qux</h3>
+ * + *
Prevent automatic bookmarks for the whole of the document
+ *
+     * <html data-pdf-bookmark="exclude">...</html>
+ * + * @param context the top element a sectioning outline would be generated for; + * @param box box hierarchy the outline bookmarks would get mapped into. + * @return Bookmarks of the outline generated for the given element context. + * @see Creating an outline + */ + public static List generate(Element context, Box box) { + NodeIterator iterator = NestedSectioningFilter.iterator(context); + + HTMLOutline root = new HTMLOutline(); + HTMLOutline current = root; + Map map = new IdentityHashMap(); + + for (Element element = (Element) iterator.nextNode(); + element != null; element = (Element) iterator.nextNode()) { + int level; + try { + level = Integer.parseInt(getOutlineLevel(element)); + if (level < 1) { + continue; // Illegal value + } + } catch (NumberFormatException e) { + continue; // Invalid value + } + + String name = getBookmarkName(element); + + while (current.level >= level) { + current = current.parent; + } + current = new HTMLOutline(level, name, current); + map.put(element, current.bookmark); + } + initBoxRefs(map, box); + return root.bookmark.getChildren(); + } // generate(Element, Box) : List + + private static void initBoxRefs(Map map, Box box) { + Bookmark bookmark = map.get(box.getElement()); + if (bookmark != null) { + bookmark.setBox(box); + } + for (int i = 0, len = box.getChildCount(); i < len; i++) { + initBoxRefs(map, box.getChild(i)); + } + } + + private static String getBookmarkName(Element element) { + String name = element.getAttribute("data-pdf-bookmark-name").trim(); + if (name.isEmpty()) { + name = element.getTextContent(); + } + name = WS.matcher(name.trim()).replaceAll(" "); + if (name.length() > MAX_NAME_LENGTH) { + name = name.substring(0, MAX_NAME_LENGTH); + } + return name; + } + + static String getOutlineLevel(Element element) { + String bookmark = element.getAttribute("data-pdf-bookmark").trim(); + if (bookmark.isEmpty()) { + Matcher heading = HEADING.matcher(element.getTagName()); + if (heading.matches()) { + bookmark = heading.group(1); + } else if (ROOT.matcher(element.getTagName()).matches()) { + bookmark = "exclude"; + } else { + bookmark = "none"; + } + } + return bookmark; + } + + + private static class NestedSectioningFilter implements NodeFilter { + + static final NestedSectioningFilter INSTANCE = new NestedSectioningFilter(); + + static NodeIterator iterator(Element root) { + return ((DocumentTraversal) root.getOwnerDocument()) + .createNodeIterator(root, SHOW_ELEMENT, INSTANCE, true); + } + + @Override + public short acceptNode(Node n) { + String outlineLevel = getOutlineLevel((Element) n); + if (outlineLevel.equalsIgnoreCase("none")) { + return FILTER_SKIP; + } + return outlineLevel.equalsIgnoreCase("exclude") + ? FILTER_REJECT + : FILTER_ACCEPT; + } + + } // class NestedSectioningFilter + + +} // class HTMLOutline diff --git a/flying-saucer-pdf-itext5/src/main/java/org/xhtmlrenderer/pdf/ITextOutputDevice.java b/flying-saucer-pdf-itext5/src/main/java/org/xhtmlrenderer/pdf/ITextOutputDevice.java index 59f199192..77b02357b 100644 --- a/flying-saucer-pdf-itext5/src/main/java/org/xhtmlrenderer/pdf/ITextOutputDevice.java +++ b/flying-saucer-pdf-itext5/src/main/java/org/xhtmlrenderer/pdf/ITextOutputDevice.java @@ -20,7 +20,6 @@ package org.xhtmlrenderer.pdf; import java.awt.BasicStroke; -import java.awt.Color; import java.awt.Point; import java.awt.Rectangle; import java.awt.Shape; @@ -35,8 +34,6 @@ import java.awt.geom.Point2D; import java.io.IOException; import java.net.URI; -import java.net.URISyntaxException; -import java.net.URL; import java.util.ArrayList; import java.util.Collections; import java.util.Comparator; @@ -66,7 +63,6 @@ import org.xhtmlrenderer.pdf.ITextFontResolver.FontDescription; import org.xhtmlrenderer.render.AbstractOutputDevice; import org.xhtmlrenderer.render.BlockBox; -import org.xhtmlrenderer.render.BorderPainter; import org.xhtmlrenderer.render.Box; import org.xhtmlrenderer.render.FSFont; import org.xhtmlrenderer.render.InlineLayoutBox; @@ -906,6 +902,9 @@ public void finish(RenderingContext c, Box root) { } private void writeOutline(RenderingContext c, Box root) { + if (_bookmarks.isEmpty()) { + _bookmarks = HTMLOutline.generate(root.getElement(), root); + } if (_bookmarks.size() > 0) { _writer.setViewerPreferences(PdfWriter.PageModeUseOutlines); writeBookmarks(c, root, _writer.getRootOutline(), _bookmarks); @@ -931,15 +930,16 @@ private int getPageRefY(Box box) { private void writeBookmark(RenderingContext c, Box root, PdfOutline parent, Bookmark bookmark) { String href = bookmark.getHRef(); PdfDestination target = null; + Box box = bookmark.getBox(); if (href.length() > 0 && href.charAt(0) == '#') { - Box box = _sharedContext.getBoxById(href.substring(1)); - if (box != null) { - PageBox page = root.getLayer().getPage(c, getPageRefY(box)); - int distanceFromTop = page.getMarginBorderPadding(c, CalculatedStyle.TOP); - distanceFromTop += box.getAbsY() - page.getTop(); - target = new PdfDestination(PdfDestination.XYZ, 0, normalizeY(distanceFromTop / _dotsPerPoint), 0); - target.addPage(_writer.getPageReference(_startPageNo + page.getPageNo() + 1)); - } + box = _sharedContext.getBoxById(href.substring(1)); + } + if (box != null) { + PageBox page = root.getLayer().getPage(c, getPageRefY(box)); + int distanceFromTop = page.getMarginBorderPadding(c, CalculatedStyle.TOP); + distanceFromTop += box.getAbsY() - page.getTop(); + target = new PdfDestination(PdfDestination.XYZ, 0, normalizeY(distanceFromTop / _dotsPerPoint), 0); + target.addPage(_writer.getPageReference(_startPageNo + page.getPageNo() + 1)); } if (target == null) { target = _defaultDestination; @@ -980,9 +980,10 @@ private void loadBookmark(Bookmark parent, Element bookmark) { } } - private static class Bookmark { + static class Bookmark { private String _name; private String _HRef; + private Box _box; private List _children; @@ -994,6 +995,14 @@ public Bookmark(String name, String href) { _HRef = href; } + public Box getBox() { + return _box; + } + + public void setBox(Box box) { + _box = box; + } + public String getHRef() { return _HRef; }