diff --git a/NOTICE b/NOTICE
index 0591ebb..3c4fdad 100644
--- a/NOTICE
+++ b/NOTICE
@@ -1,4 +1,4 @@
- Copyright 2012-2019 OpenSextant.org
+ Copyright 2012-2021 MITRE Corporation
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
@@ -19,7 +19,7 @@
* Software and Noncommercial Computer Software Documentation Clause
* 252.227-7014 (JUN 1995)
*
- * (c) 2012-2014 The MITRE Corporation. All Rights Reserved.
+ * (c) 2012-2021 The MITRE Corporation. All Rights Reserved.
* **************************************************************************
diff --git a/pom.xml b/pom.xml
index 68886e2..e8eee3d 100644
--- a/pom.xml
+++ b/pom.xml
@@ -1,598 +1,603 @@
- 4.0.0
- org.opensextant
- 3.4.0
- XText
- opensextant-xponents-xtext
- Content extraction simplified! Retrieve text, data and metadata from binary documents using Tika and
+ xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+ 4.0.0
+ org.opensextant
+ 3.5.0-SNAPSHOT
+ XText
+ opensextant-xponents-xtext
+ Content extraction simplified! Retrieve text, data and metadata from binary documents using Tika and
similar toolkits
- 2013
- https://opensextant.github.io/XText
-
- scm:git:https://github.com/OpenSextant/XText.git
- scm:git:https://github.com/OpenSextant/XText.git
- https://github.com/OpenSextant/XText.git
-
-
-
- Apache 2
- http://www.apache.org/licenses/LICENSE-2.0.txt
-
-
-
-
- Marc Ubaldino
- ubaldino@mitre.org
- MITRE
-
- Lead
-
-
-
-
- 1.7.30
-
-
-
- org.opensextant
- opensextant-xponents-core
- 3.4.0
-
-
- org.opensextant
- giscore
-
-
- org.opensextant
- geodesy
-
-
- com.norconex.language
- langdetect
-
-
- org.locationtech.spatial4j
- spatial4j
-
-
-
-
- gnu.getopt
- java-getopt
- 1.0.13
- test
-
-
- org.apache.commons
- commons-lang3
- 3.12.0
-
-
-
- commons-lang
- commons-lang
- 2.6
- runtime
-
-
- com.ibm.icu
- icu4j
- 65.1
-
-
- org.apache.commons
- commons-text
- 1.9
-
-
- commons-codec
- commons-codec
- 1.15
-
-
- commons-io
- commons-io
- 2.8.0
-
-
- commons-logging
- commons-logging
- 1.2
- runtime
-
-
- org.apache.commons
- commons-compress
- 1.20
-
-
-
- com.pff
- java-libpst
- 0.9.3
-
-
- org.apache.tika
- tika-core
- 1.24.1
-
-
- org.apache.tika
- tika-parsers
- 1.24.1
-
-
- org.apache.sis.storage
- sis-netcdf
-
-
- pdfbox
- org.apache.pdfbox
-
-
- commons-logging
- commons-logging
-
-
- httpservices
- edu.ucar
-
-
- junrar
- com.github.junrar
-
-
- netcdf4
- edu.ucar
-
-
- grib
- edu.ucar
-
-
- cdm
- edu.ucar
-
-
- cxf-rt-rs-client
- org.apache.cxf
-
-
- vorbis-java-core
- org.gagravarr
-
-
- vorbis-java-tika
- org.gagravarr
-
-
- sis-metadata
- org.apache.sis.core
-
-
- sis-utility
- org.apache.sis.core
-
-
- jmatio
- net.sourceforge.jmatio
-
-
- opennlp-tools
- org.apache.opennlp
-
-
- org.json
- json
-
-
- edu.usc.ir
- sentiment-analysis-parser
-
-
- org.tallison
- jmatio
-
-
- com.rometools
- rome
-
-
- org.apache.uima
- uimafit-core
-
-
- org.apache.uima
- uimaj-core
-
-
-
-
- net.htmlparser.jericho
- jericho-html
- 3.4
-
-
- xml-apis
- xml-apis
- 1.4.01
-
-
- org.slf4j
- slf4j-api
-
-
- junit
- junit
- test
-
-
- com.sun.mail
- javax.mail
- 1.5.1
-
-
- org.apache.pdfbox
- pdfbox
- 2.0.22
-
-
- commons-logging
- commons-logging
-
-
-
-
-
- ch.qos.logback
- logback-classic
-
-
-
- org.apache.httpcomponents
- fluent-hc
- 4.5.13
- runtime
-
-
- commons-logging
- commons-logging
-
-
-
-
- org.apache.httpcomponents
- httpclient-cache
- 4.5.13
- runtime
-
-
- commons-logging
- commons-logging
-
-
-
-
- org.apache.httpcomponents
- httpclient
- 4.5.13
-
-
- commons-logging
- commons-logging
-
-
-
-
- org.apache.httpcomponents
- httpcore
- 4.4.14
-
-
- org.apache.httpcomponents
- httpmime
- 4.5.13
-
-
- joda-time
- joda-time
- 2.10.6
-
-
- de.l3s.boilerpipe
- boilerpipe
- 1.1.0
-
-
- javax.activation
- activation
- 1.1
-
-
- org.jodd
- jodd-json
- 5.1.5
-
-
-
+ 2013
+ https://opensextant.github.io/XText
+
+ scm:git:https://github.com/OpenSextant/XText.git
+ scm:git:https://github.com/OpenSextant/XText.git
+ https://github.com/OpenSextant/XText.git
+
+
+
+ Apache 2
+ http://www.apache.org/licenses/LICENSE-2.0.txt
+
+
+
+
+ Marc Ubaldino
+ ubaldino@mitre.org
+ MITRE
+
+ Lead
+
+
+
+
+ 1.7.30
+
-
-
- junit
- junit
- 4.13.1
- test
-
-
-
- org.slf4j
- slf4j-api
- ${slf4j.version}
-
-
- ch.qos.logback
- logback-classic
- 1.2.3
-
+
+ org.opensextant
+ opensextant-xponents-core
+ 3.5.0-SNAPSHOT
+
+
+ org.opensextant
+ giscore
+
+
+ org.opensextant
+ geodesy
+
+
+ com.norconex.language
+ langdetect
+
+
+ org.locationtech.spatial4j
+ spatial4j
+
+
+
+
+ gnu.getopt
+ java-getopt
+ 1.0.13
+ test
+
+
+ org.apache.commons
+ commons-lang3
+ 3.12.0
+
+
+
+ commons-lang
+ commons-lang
+ 2.6
+ runtime
+
+
+ com.ibm.icu
+ icu4j
+ 70.1
+
+
+ org.apache.commons
+ commons-text
+ 1.9
+
+
+ commons-codec
+ commons-codec
+ 1.15
+
+
+ commons-io
+ commons-io
+ 2.10.0
+
+
+ commons-logging
+ commons-logging
+ 1.2
+ runtime
+
+
+ org.apache.commons
+ commons-compress
+ 1.21
+
+
+
+ com.pff
+ java-libpst
+ 0.9.3
+
+
+ org.apache.tika
+ tika-core
+ 1.27
+
+
+ org.apache.tika
+ tika-parsers
+ 1.27
+
+
+ org.apache.sis.storage
+ sis-netcdf
+
+
+ pdfbox
+ org.apache.pdfbox
+
+
+ commons-logging
+ commons-logging
+
+
+ httpservices
+ edu.ucar
+
+
+ junrar
+ com.github.junrar
+
+
+ netcdf4
+ edu.ucar
+
+
+ grib
+ edu.ucar
+
+
+ cdm
+ edu.ucar
+
+
+ cxf-rt-rs-client
+ org.apache.cxf
+
+
+ vorbis-java-core
+ org.gagravarr
+
+
+ vorbis-java-tika
+ org.gagravarr
+
+
+ sis-metadata
+ org.apache.sis.core
+
+
+ sis-utility
+ org.apache.sis.core
+
+
+ jmatio
+ net.sourceforge.jmatio
+
+
+ opennlp-tools
+ org.apache.opennlp
+
+
+ org.json
+ json
+
+
+ edu.usc.ir
+ sentiment-analysis-parser
+
+
+ org.tallison
+ jmatio
+
+
+ com.rometools
+ rome
+
+
+ org.apache.uima
+ uimafit-core
+
+
+ org.apache.uima
+ uimaj-core
+
+
+
+
+ net.htmlparser.jericho
+ jericho-html
+ 3.4
+
+
+ xml-apis
+ xml-apis
+ 1.4.01
+
+
+ org.slf4j
+ slf4j-api
+
+
+ junit
+ junit
+ test
+
+
+ com.sun.mail
+ javax.mail
+ 1.5.1
+
+
+ org.apache.pdfbox
+ pdfbox
+ 2.0.24
+
+
+ commons-logging
+ commons-logging
+
+
+
+
+
+ ch.qos.logback
+ logback-classic
+
+
+
+ org.apache.httpcomponents
+ fluent-hc
+ 4.5.13
+ runtime
+
+
+ commons-logging
+ commons-logging
+
+
+
+
+ org.apache.httpcomponents
+ httpclient-cache
+ 4.5.13
+ runtime
+
+
+ commons-logging
+ commons-logging
+
+
+
+
+ org.apache.httpcomponents
+ httpclient
+ 4.5.13
+
+
+ commons-logging
+ commons-logging
+
+
+
+
+ org.apache.httpcomponents
+ httpcore
+ 4.4.14
+
+
+ org.apache.httpcomponents
+ httpmime
+ 4.5.13
+
+
+ joda-time
+ joda-time
+ 2.10.13
+
+
+ de.l3s.boilerpipe
+ boilerpipe
+ 1.1.0
+
+
+ javax.activation
+ activation
+ 1.1
+
+
+ org.jodd
+ jodd-json
+ 5.1.5
+
-
-
-
-
-
- org.apache.maven.plugins
- maven-dependency-plugin
- 3.1.2
-
-
- org.apache.maven.plugins
- maven-compiler-plugin
- 3.8.1
-
-
- org.apache.maven.plugins
- maven-javadoc-plugin
- 3.2.0
-
-
- org.apache.maven.plugins
- maven-source-plugin
- 3.2.1
-
-
- org.apache.maven.plugins
- maven-deploy-plugin
- 3.0.0-M1
-
-
- org.apache.maven.plugins
- maven-jar-plugin
- 3.2.0
-
-
- attach-tests
-
- test-jar
-
-
-
-
-
- org.apache.maven.plugins
- maven-checkstyle-plugin
- 3.1.1
-
-
- org.apache.maven.plugins
- maven-surefire-plugin
- 3.0.0-M4
-
-
- org.codehaus.mojo
- findbugs-maven-plugin
- 3.0.5
-
-
- org.sonarsource.scanner.maven
- sonar-maven-plugin
- 3.8.0.2131
-
-
-
-
-
- maven-javadoc-plugin
-
- <img alt='[OpenSextant Logo]' height='36'
- width='36'
- src='doc-files/opensextant-manual-logo.png'/><br>copyright
- OpenSextant.org, 2013-2020
-
- true
- XText - Content Extraction Simplified
- false
-
-
-
- attach-javadoc
-
- jar
-
-
-
-
-
- org.apache.maven.plugins
- maven-release-plugin
-
- true
- true
- release
- deploy
-
-
-
- maven-compiler-plugin
-
- 1.8
- 1.8
- 1.8
- -Xlint:all,-path
- true
- true
-
-
-
- maven-source-plugin
-
-
- attach-sources
-
- jar
-
-
-
-
-
-
- maven-checkstyle-plugin
-
- checkstyle.xml
- checkstyle.indentChars=4
- checkstyle-suppressions.xml
- true
- false
-
-
-
-
- org.codehaus.mojo
- findbugs-maven-plugin
-
- true
-
-
-
-
- maven-dependency-plugin
-
-
- default-cli
-
- copy-dependencies
-
-
-
- lib
- runtime
- test
-
-
-
- dependency-analysis
-
- analyze-only
-
- verify
-
-
-
-
-
-
- maven-surefire-plugin
-
-
- ${basedir}/src/test/resources
-
-
-
-
-
-
-
-
- org.apache.maven.plugins
- maven-project-info-reports-plugin
- 2.7
-
-
- false
-
-
-
- maven-javadoc-plugin
-
-
-
- javadoc
-
-
-
-
-
-
-
-
-
- release
-
+
+
+
+
+ junit
+ junit
+ 4.13.1
+ test
+
+
+
+ org.slf4j
+ slf4j-api
+ ${slf4j.version}
+
+
+ ch.qos.logback
+ logback-classic
+ 1.2.3
+
+
+
+
+
+
+
+ org.apache.maven.plugins
+ maven-dependency-plugin
+ 3.1.2
+
+
+ org.apache.maven.plugins
+ maven-compiler-plugin
+ 3.8.1
+
+
+ org.apache.maven.plugins
+ maven-javadoc-plugin
+ 3.2.0
+
+
+ org.apache.maven.plugins
+ maven-source-plugin
+ 3.2.1
+
+
+ org.apache.maven.plugins
+ maven-deploy-plugin
+ 3.0.0-M1
+
+
+ org.apache.maven.plugins
+ maven-jar-plugin
+ 3.2.0
+
+
+ attach-tests
+
+ test-jar
+
+
+
+
+
+ org.apache.maven.plugins
+ maven-checkstyle-plugin
+ 3.1.1
+
+
+ org.apache.maven.plugins
+ maven-surefire-plugin
+ 3.0.0-M4
+
+
+ org.codehaus.mojo
+ findbugs-maven-plugin
+ 3.0.5
+
+
+ org.sonarsource.scanner.maven
+ sonar-maven-plugin
+ 3.8.0.2131
+
+
+ org.apache.maven.plugins
+ maven-release-plugin
+ 3.0.0-M4
+
+
+
-
- maven-source-plugin
-
-
- attach-sources
-
- jar-no-fork
-
-
-
-
-
-
- org.apache.maven.plugins
- maven-gpg-plugin
- 1.6
-
-
- sign-artifacts
- verify
-
- sign
-
-
-
-
-
- org.sonatype.plugins
- nexus-staging-maven-plugin
- 1.6.7
- true
-
- ossrh
- https://oss.sonatype.org/
- true
-
-
+
+ maven-javadoc-plugin
+
+ 1.8
+ <img alt='[OpenSextant Logo]' height='36'
+ width='36'
+ src='doc-files/opensextant-manual-logo.png'/><br>copyright
+ OpenSextant.org, 2013-2021
+
+ true
+ XText - Content Extraction Simplified
+ false
+
+
+
+ attach-javadoc
+
+ jar
+
+
+
+
+
+ maven-release-plugin
+
+ true
+ true
+ release
+ deploy
+
+
+
+ maven-compiler-plugin
+
+ 1.8
+ 1.8
+ 1.8
+ -Xlint:all,-path
+ true
+ true
+
+
+
+ maven-source-plugin
+
+
+ attach-sources
+
+ jar
+
+
+
+
+
+
+ maven-checkstyle-plugin
+
+ checkstyle.xml
+ checkstyle.indentChars=4
+ checkstyle-suppressions.xml
+ true
+ false
+
+
+
+
+ org.codehaus.mojo
+ findbugs-maven-plugin
+
+ true
+
+
+
+
+ maven-dependency-plugin
+
+
+ default-cli
+
+ copy-dependencies
+
+
+
+ lib
+ runtime
+ test
+
+
+
+ dependency-analysis
+
+ analyze-only
+
+ verify
+
+
+
+
+
+
+ maven-surefire-plugin
+
+
+ ${basedir}/src/test/resources
+
+
+
-
-
-
-
-
-
- ossrh
- https://oss.sonatype.org/content/repositories/snapshots
-
-
- ossrh
- https://oss.sonatype.org/service/local/staging/deploy/maven2
-
-
+
+
+
+
+ org.apache.maven.plugins
+ maven-project-info-reports-plugin
+ 2.7
+
+
+ false
+
+
+
+ maven-javadoc-plugin
+
+
+
+ javadoc
+
+
+
+
+
+
+
+
+
+ release
+
+
+
+ maven-source-plugin
+
+
+ attach-sources
+
+ jar-no-fork
+
+
+
+
+
+
+ org.apache.maven.plugins
+ maven-gpg-plugin
+ 1.6
+
+
+ sign-artifacts
+ verify
+
+ sign
+
+
+
+
+
+ org.sonatype.plugins
+ nexus-staging-maven-plugin
+ 1.6.7
+ true
+
+ ossrh
+ https://oss.sonatype.org/
+ true
+
+
+
+
+
+
+
+
+
+ ossrh
+ https://oss.sonatype.org/content/repositories/snapshots
+
+
+ ossrh
+ https://oss.sonatype.org/service/local/staging/deploy/maven2
+
+
diff --git a/src/main/java/org/opensextant/xtext/XText.java b/src/main/java/org/opensextant/xtext/XText.java
index 4f04ecb..f0f29c8 100644
--- a/src/main/java/org/opensextant/xtext/XText.java
+++ b/src/main/java/org/opensextant/xtext/XText.java
@@ -1,20 +1,3 @@
-/*
- *
- * Copyright 2012-2013 The MITRE Corporation.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- *
- */
/// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~|
//
// _____ ____ __ __
@@ -28,6 +11,7 @@
// \/_/
//
// OpenSextant XText
+// Copyright 2012-2021 MITRE
// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~|
//
package org.opensextant.xtext;
@@ -63,6 +47,7 @@
import org.opensextant.xtext.converters.TextTranscodingConverter;
import org.opensextant.xtext.converters.TikaHTMLConverter;
import org.opensextant.xtext.converters.WebArchiveConverter;
+import org.opensextant.xtext.converters.OfficeConverter;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -126,16 +111,16 @@ public PathManager getPathManager() {
private final int maxHTMLBuffer = 5 * maxBuffer;
private long maxFileSize = FILE_SIZE_LIMIT;
- protected Set archiveFileTypes = new HashSet();
+ protected Set archiveFileTypes = new HashSet<>();
/**
*
*/
- public static Map converters = new HashMap();
+ public static Map converters = new HashMap<>();
private Converter defaultConversion;
private Converter embeddedConversion;
- private final Set requestedFileTypes = new HashSet();
- private final Set ignoreFileTypes = new HashSet();
+ private final Set requestedFileTypes = new HashSet<>();
+ private final Set ignoreFileTypes = new HashSet<>();
private boolean allowNoExtension = false;
/**
@@ -682,14 +667,8 @@ public ConvertedDocument convertFile(File input, ConvertedDocument parent) throw
long t2 = System.currentTimeMillis();
int duration = (int) (t2 - t1);
if (textDoc != null) {
- // Buffer can be null. If you got this far, you are interested
- // in the file, as it passed
- // all filters above. Return the document with whatever metadata
- // it found.
- // if (textDoc.buffer == null) {
- // throw new
- // IOException("Engineering error: Doc converted, but converter failed to setText()");
- // }
+ // Buffer can be null. If you got this far, you are interested in the file, as it passed
+ // all filters above. Return the document with whatever metadata it found.
if (paths.isSaving() && textDoc.is_converted) {
// Get Parent info in there.
if (parent != null) {
@@ -935,7 +914,9 @@ public void setup() throws IOException {
requestedFileTypes.add("xhtml");
}
- MessageConverter emailParser = new MessageConverter();
+ boolean useMSOffice = false;
+ Converter emailParser = useMSOffice ? new OfficeConverter() : new MessageConverter();
+
mimetype = "eml";
if (requestedFileTypes.contains(mimetype)) {
converters.put(mimetype, emailParser);
diff --git a/src/main/java/org/opensextant/xtext/converters/OfficeConverter.java b/src/main/java/org/opensextant/xtext/converters/OfficeConverter.java
new file mode 100644
index 0000000..5b5445e
--- /dev/null
+++ b/src/main/java/org/opensextant/xtext/converters/OfficeConverter.java
@@ -0,0 +1,64 @@
+package org.opensextant.xtext.converters;
+
+import java.io.IOException;
+import java.io.InputStream;
+
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.metadata.TikaCoreProperties;
+import org.apache.tika.parser.ParseContext;
+import org.apache.tika.parser.microsoft.OfficeParser;
+import org.apache.tika.sax.BodyContentHandler;
+import org.opensextant.xtext.ConvertedDocument;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+
+public class OfficeConverter extends ConverterAdapter {
+
+
+ protected Logger logger = LoggerFactory.getLogger(getClass());
+ private OfficeParser parser = new OfficeParser();
+
+ public OfficeConverter() { }
+
+
+ /**
+ *
+ * @param input input stream
+ * @param doc File
+ * @return ConvertedDocument
+ * @throws IOException on IO failure with stream or conversion of content
+ */
+ @Override
+ protected ConvertedDocument conversionImplementation(InputStream input, java.io.File doc)
+ throws IOException {
+ Metadata metadata = new Metadata();
+ ParseContext ctx = new ParseContext();
+ BodyContentHandler handler = new BodyContentHandler();
+
+ try {
+ parser.parse(input, handler, metadata, ctx);
+ } catch (NoClassDefFoundError classErr){
+ throw new IOException("Unable to parse content due to Tika misconfiguration", classErr);
+ } catch (Exception xerr) {
+ throw new IOException("Unable to parse content", xerr);
+ } finally {
+ input.close();
+ }
+
+ /* Construct a response */
+ ConvertedDocument textdoc = new ConvertedDocument(doc);
+
+ /* Add essential metadata */
+ textdoc.addTitle(metadata.get(TikaCoreProperties.TITLE));
+ textdoc.setEncoding(metadata.get(Metadata.CONTENT_ENCODING));
+ textdoc.addCreateDate(metadata.getDate(TikaCoreProperties.CREATED));
+ textdoc.addAuthor(metadata.get(TikaCoreProperties.CREATOR));
+
+ /* Mark the document as converted */
+ textdoc.is_converted = true;
+ return textdoc;
+ }
+
+
+}
diff --git a/src/test/java/OfficeParserTool.java b/src/test/java/OfficeParserTool.java
new file mode 100644
index 0000000..34d4721
--- /dev/null
+++ b/src/test/java/OfficeParserTool.java
@@ -0,0 +1,44 @@
+
+import org.opensextant.xtext.ConvertedDocument;
+import org.opensextant.xtext.converters.MessageConverter;
+import org.opensextant.xtext.converters.OfficeConverter;
+
+public class OfficeParserTool {
+
+ /*
+ * TODO: Both MessageConverter and OfficeConverter are failing in basic .EML and .MSG files.
+ * no standards....
+ */
+
+
+ public static void main(String[] args) {
+ OfficeConverter converter = new OfficeConverter();
+
+ String msMsg, mimeMsg = null;
+ ConvertedDocument msdoc = null, mimedoc = null;
+ try {
+ msdoc = converter.convert(args[0]);
+ msMsg = "success - " + msdoc.getProperty("title");
+ } catch (Exception err) {
+ //err.printStackTrace();
+ msMsg = err.getMessage();
+ }
+
+ try {
+ mimedoc = new MessageConverter().convert(args[0]);
+ mimeMsg = "success - " + mimedoc.getProperty("title");
+ } catch (Exception err) {
+ mimeMsg = err.getMessage();
+ }
+
+ System.out.println("MS OfficeConverter\n\tResult:" + msMsg);
+ if (msdoc != null) {
+ System.out.println("\tDoc " + msdoc.toString());
+ }
+
+ System.out.println("MIME MessageConverter\n\tResult:" + mimeMsg);
+ if (mimedoc != null) {
+ System.out.println("\tDoc " + mimedoc.toString());
+ }
+ }
+}
diff --git a/src/test/java/org/opensextant/xtext/converters/test/TestOfficeMail.java b/src/test/java/org/opensextant/xtext/converters/test/TestOfficeMail.java
new file mode 100644
index 0000000..f091991
--- /dev/null
+++ b/src/test/java/org/opensextant/xtext/converters/test/TestOfficeMail.java
@@ -0,0 +1,40 @@
+package org.opensextant.xtext.converters.test;
+
+import static org.junit.Assert.*;
+
+import java.io.File;
+import java.io.IOException;
+
+import org.apache.commons.io.FileUtils;
+import org.junit.BeforeClass;
+import org.junit.ClassRule;
+import org.junit.Test;
+import org.junit.rules.TemporaryFolder;
+import org.opensextant.xtext.ConvertedDocument;
+import org.opensextant.xtext.converters.OfficeConverter;
+
+public class TestOfficeMail {
+
+ @ClassRule
+ public static final TemporaryFolder TEMP_DIR = new TemporaryFolder();
+ private static File TEST_FILE = null;
+
+ @BeforeClass
+ public static void setupTemporaryFolder() throws IOException {
+ TEST_FILE = TEMP_DIR.newFile("mimeEmailWithAttachmentsTest.eml");
+ FileUtils.copyInputStreamToFile(
+ MessageConverterTest.class.getResourceAsStream("mimeEmailWithAttachmentsTest.eml"), TEST_FILE);
+ }
+
+ // @Test
+ public void testMailMessageParser() {
+ OfficeConverter converter = new OfficeConverter();
+
+ try {
+ ConvertedDocument doc = converter.convert(TEST_FILE);
+ } catch (Exception err) {
+ err.printStackTrace();
+ fail("EML conversion failed");
+ }
+ }
+}