JabRef · InAnYan · Feb 3, 2025 · Feb 3, 2025 · Feb 3, 2025 · Feb 4, 2025
diff --git a/src/main/java/org/jabref/gui/externalfiles/ImportHandler.java b/src/main/java/org/jabref/gui/externalfiles/ImportHandler.java
@@ -176,6 +176,18 @@ public List<ImportFilesResultItemViewModel> call() {
                                 message = bibtexParserResult.getErrorMessage();
                             }
                             addResultToList(file, success, message);
+                        } else if (FileUtil.isEpubFile(file)) {
+                            ParserResult result = contentImporter.importEpubContent(file);
+                            List<BibEntry> entries = result.getDatabase().getEntries();
+                            boolean success = !result.hasWarnings();
+                            String message;
+                            if (success) {
+                                message = Localization.lang("ePUB entry was successfully imported");
+                            } else {
+                                message = result.getErrorMessage();
+                            }
+                            entriesToAdd.addAll(entries);
+                            addResultToList(file, true, message);
                         } else {
                             BibEntry emptyEntryWithLink = createEmptyEntryWithLink(file);
                             entriesToAdd.add(emptyEntryWithLink);

diff --git a/src/main/java/org/jabref/gui/externalfiletype/StandardExternalFileType.java b/src/main/java/org/jabref/gui/externalfiletype/StandardExternalFileType.java
@@ -28,7 +28,7 @@ public enum StandardExternalFileType implements ExternalFileType {
     TIFF(Localization.lang("%0 image", "TIFF"), "tiff", "image/tiff", "gimp", "picture", IconTheme.JabRefIcons.PICTURE),
     URL("URL", "html", "text/html", "firefox", "www", IconTheme.JabRefIcons.WWW),
     MHT("MHT", "mht", "multipart/related", "firefox", "www", IconTheme.JabRefIcons.WWW),
-    ePUB("ePUB", "epub", "application/epub+zip", "firefox", "www", IconTheme.JabRefIcons.WWW),
+    ePUB("ePUB", "epub", "application/epub+zip", "firefox", "www", IconTheme.JabRefIcons.BOOK),
     MARKDOWN("Markdown", "md", "text/markdown", "emacs", "emacs", IconTheme.JabRefIcons.FILE_TEXT);
     private final String name;
     private final String extension;

diff --git a/src/main/java/org/jabref/logic/externalfiles/ExternalFilesContentImporter.java b/src/main/java/org/jabref/logic/externalfiles/ExternalFilesContentImporter.java
@@ -3,10 +3,14 @@
 import java.io.IOException;
 import java.nio.file.Path;
 
+import javax.xml.parsers.ParserConfigurationException;
+import javax.xml.xpath.XPathExpressionException;
+
 import org.jabref.logic.FilePreferences;
 import org.jabref.logic.importer.ImportFormatPreferences;
 import org.jabref.logic.importer.OpenDatabase;
 import org.jabref.logic.importer.ParserResult;
+import org.jabref.logic.importer.fileformat.EpubImporter;
 import org.jabref.logic.importer.fileformat.PdfMergeMetadataImporter;
 import org.jabref.model.database.BibDatabaseContext;
 import org.jabref.model.util.FileUpdateMonitor;
@@ -27,6 +31,14 @@ public ParserResult importPDFContent(Path file, BibDatabaseContext context, File
         }
     }
 
+    public ParserResult importEpubContent(Path file) {
+        try {
+            return new EpubImporter(importFormatPreferences).importDatabase(file);
+        } catch (IOException | XPathExpressionException | ParserConfigurationException e) {
+            return ParserResult.fromError(e);
+        }
+    }
+
     public ParserResult importFromBibFile(Path bibFile, FileUpdateMonitor fileUpdateMonitor) throws IOException {
         return OpenDatabase.loadDatabase(bibFile, importFormatPreferences, fileUpdateMonitor);
     }

diff --git a/src/main/java/org/jabref/logic/importer/ImportFormatReader.java b/src/main/java/org/jabref/logic/importer/ImportFormatReader.java
@@ -9,6 +9,9 @@
 import java.util.SortedSet;
 import java.util.TreeSet;
 
+import javax.xml.parsers.ParserConfigurationException;
+import javax.xml.xpath.XPathExpressionException;
+
 import org.jabref.logic.citationkeypattern.CitationKeyPatternPreferences;
 import org.jabref.logic.importer.fileformat.BiblioscapeImporter;
 import org.jabref.logic.importer.fileformat.BibtexImporter;
@@ -17,6 +20,7 @@
 import org.jabref.logic.importer.fileformat.CopacImporter;
 import org.jabref.logic.importer.fileformat.EndnoteImporter;
 import org.jabref.logic.importer.fileformat.EndnoteXmlImporter;
+import org.jabref.logic.importer.fileformat.EpubImporter;
 import org.jabref.logic.importer.fileformat.InspecImporter;
 import org.jabref.logic.importer.fileformat.IsiImporter;
 import org.jabref.logic.importer.fileformat.MedlineImporter;
@@ -37,10 +41,15 @@
 import org.jabref.model.entry.BibEntry;
 import org.jabref.model.util.FileUpdateMonitor;
 
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
 public class ImportFormatReader {
 
     public static final String BIBTEX_FORMAT = "BibTeX";
 
+    private final static Logger LOGGER = LoggerFactory.getLogger(ImportFormatReader.class);
+
     /**
      * All import formats.
      * Sorted accordingly to {@link Importer#compareTo}, which defaults to alphabetically by the name
@@ -89,6 +98,15 @@ public void reset() {
         formats.add(new BibtexImporter(importFormatPreferences, fileUpdateMonitor));
         formats.add(new CitaviXmlImporter());
 
+        // {@link EpubImporter} constructs `XPath`s internally, and compilation may throw errors.
+        // {@link EpubReader} also constructs {@link DocumentBuilder}, whose "constructor" can also throw error.
+        // Hacky workaround.
+        try {
+            formats.add(new EpubImporter(importFormatPreferences));
+        } catch (XPathExpressionException | ParserConfigurationException e) {
+            LOGGER.error("Unable to construct `EpubImporter`. `EpubImporter` will not be added to available importers", e);
+        }
+
         // Get custom import formats
         formats.addAll(importerPreferences.getCustomImporters());
     }

diff --git a/src/main/java/org/jabref/logic/importer/ParserResult.java b/src/main/java/org/jabref/logic/importer/ParserResult.java
@@ -45,6 +45,10 @@ public ParserResult(BibDatabase database, MetaData metaData, Set<BibEntryType> e
         this.entryTypes = Objects.requireNonNull(entryTypes);
     }
 
+    public static ParserResult fromEntry(BibEntry entry) {
+        return new ParserResult(Collections.singleton(entry));
+    }
+
     public static ParserResult fromErrorMessage(String message) {
         ParserResult parserResult = new ParserResult();
         parserResult.addWarning(message);

diff --git a/src/main/java/org/jabref/logic/importer/fileformat/EpubImporter.java b/src/main/java/org/jabref/logic/importer/fileformat/EpubImporter.java
@@ -0,0 +1,174 @@
+package org.jabref.logic.importer.fileformat;
+
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.IOException;
+import java.nio.file.FileSystem;
+import java.nio.file.FileSystems;
+import java.nio.file.FileVisitResult;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.nio.file.SimpleFileVisitor;
+import java.nio.file.attribute.BasicFileAttributes;
+import java.util.Arrays;
+import java.util.List;
+import java.util.Optional;
+
+import javax.xml.parsers.DocumentBuilder;
+import javax.xml.parsers.DocumentBuilderFactory;
+import javax.xml.parsers.ParserConfigurationException;
+import javax.xml.xpath.XPath;
+import javax.xml.xpath.XPathExpression;
+import javax.xml.xpath.XPathExpressionException;
+import javax.xml.xpath.XPathFactory;
+
+import org.jabref.gui.util.OptionalObjectProperty;
+import org.jabref.logic.importer.ImportFormatPreferences;
+import org.jabref.logic.importer.Importer;
+import org.jabref.logic.importer.ParserResult;
+import org.jabref.logic.l10n.Localization;
+import org.jabref.logic.util.FileType;
+import org.jabref.logic.util.StandardFileType;
+import org.jabref.logic.util.io.FileUtil;
+import org.jabref.logic.util.io.XMLUtil;
+import org.jabref.model.entry.BibEntry;
+import org.jabref.model.entry.LinkedFile;
+import org.jabref.model.entry.field.Field;
+import org.jabref.model.entry.field.StandardField;
+import org.jabref.model.entry.types.StandardEntryType;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+import org.w3c.dom.Document;
+import org.xml.sax.SAXException;
+
+public class EpubImporter extends Importer {
+    private static final char[] EPUB_HEADER_MAGIC_NUMBER = {0x50, 0x4b, 0x03, 0x04};
+
+    private final DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
+    private final DocumentBuilder builder = factory.newDocumentBuilder();
+
+    private final XPathFactory xPathFactory = XPathFactory.newInstance();
+    private final XPath xpath = xPathFactory.newXPath();
+
+    private final XPathExpression titlePath = xpath.compile("/package/metadata/title");
+    private final XPathExpression creatorPath = xpath.compile("/package/metadata/creator");
+    private final XPathExpression identifierPath = xpath.compile("/package/metadata/identifier");
+    private final XPathExpression languagePath = xpath.compile("/package/metadata/language");
+    private final XPathExpression sourcePath = xpath.compile("/package/metadata/source");
+    private final XPathExpression descriptionPath = xpath.compile("/package/metadata/description");
+    private final XPathExpression subjectPath = xpath.compile("/package/metadata/subject");
+
+    private BibEntry entry = new BibEntry(StandardEntryType.Book);
+
+    private final ImportFormatPreferences importFormatPreferences;
+
+    public EpubImporter(ImportFormatPreferences importFormatPreferences) throws XPathExpressionException, ParserConfigurationException {
+        this.importFormatPreferences = importFormatPreferences;
+    }
+
+    // ePUB is a ZIP-based format, so this method will clash with other ZIP-based formats.
+    // Currently, only `.ctv6bak` is found.
+    @Override
+    public boolean isRecognizedFormat(BufferedReader input) throws IOException {
+        char[] header = new char[EPUB_HEADER_MAGIC_NUMBER.length];
+        int nRead = input.read(header);
+        return nRead == EPUB_HEADER_MAGIC_NUMBER.length && Arrays.equals(header, EPUB_HEADER_MAGIC_NUMBER);
+    }
+
+    @Override
+    public ParserResult importDatabase(Path filePath) throws IOException {
+        // Not in functional programming style, but making {@link entry} a local mutable variable makes it easier
+        // to write {@link addField}.
+        // Potentially, this class won't work properly in concurrent situations.
+
+        entry = new BibEntry(StandardEntryType.Book);
+
+        try (FileSystem fileSystem = FileSystems.newFileSystem(filePath)) {
+            OptionalObjectProperty<Path> metadataFilePath = OptionalObjectProperty.empty();
+
+            Files.walkFileTree(fileSystem.getPath("/"), new SimpleFileVisitor<>() {
+                @Override
+                public FileVisitResult visitFile(Path file, BasicFileAttributes attrs) {
+                    if (file.toString().endsWith(".opf")) {
+                        metadataFilePath.set(Optional.of(file));
+                        return FileVisitResult.TERMINATE;
+                    }
+                    return FileVisitResult.CONTINUE;
+                }
+            });
+
+            if (metadataFilePath.get().isEmpty()) {
+                return ParserResult.fromErrorMessage(Localization.lang("Could not find metadata file. Possibly corrupted ePUB file."));
+            }
+
+            File metadataFile = FileUtil.remapZipPath(metadataFilePath.get().get()).toFile();
+            Document document = builder.parse(metadataFile);
+
+            Optional<String> title = XMLUtil.getNodeContentByXPath(document, titlePath);
+            Optional<String> identifier = XMLUtil.getNodeContentByXPath(document, identifierPath);
+            Optional<String> source = XMLUtil.getNodeContentByXPath(document, sourcePath);
+            Optional<String> description = XMLUtil.getNodeContentByXPath(document, descriptionPath);
+
+            List<String> authors = XMLUtil.getNodesContentByXPath(document, creatorPath);
+            List<String> subjects = XMLUtil.getNodesContentByXPath(document, subjectPath);
+            List<String> languages = XMLUtil.getNodesContentByXPath(document, languagePath);
+
+            // TODO: Extract editors.
+
+            addField(StandardField.TITLE, title);
+            addField(StandardField.ABSTRACT, description);
+
+            if (source.isPresent()) {
+                addField(StandardField.URL, source);
+            } else {
+                addField(StandardField.URL, identifier);
+            }
+
+            addField(StandardField.AUTHOR, Optional.of(String.join(" and ", authors)));
+
+            // Might not be the right way. Leaving, as it still contains information.
+            addField(StandardField.LANGUAGE, Optional.of(String.join(" and ", languages)));
+
+            entry.addKeywords(subjects, importFormatPreferences.bibEntryPreferences().getKeywordSeparator());
+
+            entry.addFile(new LinkedFile("", filePath.toAbsolutePath(), StandardFileType.EPUB.getName()));
+
+            return ParserResult.fromEntry(entry);
+        } catch (SAXException | XPathExpressionException e) {
+            return ParserResult.fromError(e);
+        }
+    }
+
+    // Tradeoff between conforming to controversial code standard and code simplicity.
+    // This refs: https://peps.python.org/pep-0008/#a-foolish-consistency-is-the-hobgoblin-of-little-minds.
+    private void addField(Field field, Optional<String> value) {
+        value.ifPresent(it -> entry.setField(field, it));
+    }
+
+    @Override
+    public ParserResult importDatabase(BufferedReader input) throws IOException {
+        throw new UnsupportedOperationException("EpubImporter does not support importDatabase(BufferedReader reader). "
+                + "Instead use importDatabase(Path filePath).");
+    }
+
+    @Override
+    public String getId() {
+        return "epub";
+    }
+
+    @Override
+    public String getName() {
+        return "ePUB";
+    }
+
+    @Override
+    public String getDescription() {
+        return Localization.lang("Import the popular e-book file format ePUB");
+    }
+
+    @Override
+    public FileType getFileType() {
+        return StandardFileType.EPUB;
+    }
+}
diff --git a/src/main/java/org/jabref/logic/util/StandardFileType.java b/src/main/java/org/jabref/logic/util/StandardFileType.java
@@ -46,7 +46,8 @@ public enum StandardFileType implements FileType {
     XML("XML", "xml"),
     XMP("XMP", "xmp"),
     YAML("YAML Markup", "yaml"),
-    ZIP("Zip Archive", "zip");
+    ZIP("Zip Archive", "zip"),
+    EPUB("ePUB", "epub");
 
     private final List<String> extensions;
     private final String name;

diff --git a/src/main/java/org/jabref/logic/util/io/FileUtil.java b/src/main/java/org/jabref/logic/util/io/FileUtil.java
@@ -1,6 +1,7 @@
 package org.jabref.logic.util.io;
 
 import java.io.File;
+import java.io.FileOutputStream;
 import java.io.IOException;
 import java.io.UncheckedIOException;
 import java.nio.file.FileSystems;
@@ -18,6 +19,7 @@
 import java.util.Locale;
 import java.util.Objects;
 import java.util.Optional;
+import java.util.UUID;
 import java.util.regex.Pattern;
 import java.util.stream.Collectors;
 import java.util.stream.Stream;
@@ -490,6 +492,17 @@ public static boolean isPDFFile(Path file) {
         return extension.isPresent() && StandardFileType.PDF.getExtensions().contains(extension.get());
     }
 
+    /**
+     * Test if the file is a ePUB file by simply checking the extension to be ".epub"
+     *
+     * @param file The file to check
+     * @return True if file extension is ".pdf", false otherwise
+     */
+    public static boolean isEpubFile(Path file) {
+        Optional<String> extension = FileUtil.getFileExtension(file);
+        return extension.isPresent() && StandardFileType.EPUB.getExtensions().contains(extension.get());
+    }
+
     /**
      * @return Path of current panel database directory or the standard working directory in case the database was not saved yet
      */
@@ -585,4 +598,19 @@ public static String shortenFileName(String fileName, Integer maxLength) {
     public static boolean isCharLegal(char c) {
         return Arrays.binarySearch(ILLEGAL_CHARS, c) < 0;
     }
+
+    /**
+     * When you need to read (extract) a file from a ZIP archive, you cannot convert `ZipPath` (`ZipPath` is a private class) to {@link File}.
+     * One way of solving this problem is to make a temporary file, and copy ZIP file contents to the temporary file.
+     * <p>
+     * Adapted from <a href="https://stackoverflow.com/a/79077999/10037342">...</a>.
+     */
+    public static Path remapZipPath(Path zipPath) throws IOException {
+        final File tempFile = File.createTempFile("PREFIX" + UUID.randomUUID(), "SUFFIX");
+        tempFile.deleteOnExit();
+        try (FileOutputStream out = new FileOutputStream(tempFile)) {
+            Files.copy(zipPath, out);
+        }
+        return tempFile.toPath();
+    }
 }