From 48adcf9f32fe5b208fce60287d797f7d75d482ec Mon Sep 17 00:00:00 2001 From: Carl Wilson Date: Wed, 8 Nov 2023 21:08:17 +0100 Subject: [PATCH] FEAT: Spreadsheet validation and dev docs * implemented spreadsheet only validation; * tests and convenience functions for spreadsheet validation; * updated version to 0.9.0; and * added developer documentation. --- README.md | 2 + docs/DEVELOPER.md | 155 ++++++++++++++++++ docs/VALIDATION.md | 10 +- odf-apps/pom.xml | 2 +- odf-core/pom.xml | 2 +- .../odf/document/Documents.java | 43 +++++ .../odf/document/OdfDocumentImpl.java | 10 ++ .../odf/document/OpenDocument.java | 3 + .../odf/document/OpenDocumentImpl.java | 6 + .../odf/validation/ValidatingParserImpl.java | 3 +- .../odf/validation/Validator.java | 42 ++++- .../odf/messages/Messages.properties | 2 + .../odf/validation/ValidatorsTest.java | 16 ++ pom.xml | 2 +- 14 files changed, 284 insertions(+), 14 deletions(-) create mode 100644 docs/DEVELOPER.md create mode 100644 odf-core/src/main/java/org/openpreservation/odf/document/Documents.java diff --git a/README.md b/README.md index 7446a026..ec929ca4 100644 --- a/README.md +++ b/README.md @@ -17,6 +17,8 @@ You may read more about the technical details of the validation checks [here](do ## Quick Start +For developer instructions with Maven locations and examples please see [DEVELOPER.md](docs/DEVELOPER.md). + ### Prerequisites To run the software you'll need a [Java 8](https://www.java.com/en/download/manual.jsp) JRE or newer. diff --git a/docs/DEVELOPER.md b/docs/DEVELOPER.md new file mode 100644 index 00000000..f8c5822c --- /dev/null +++ b/docs/DEVELOPER.md @@ -0,0 +1,155 @@ +# ODF Validation developer documentation + +For developers wishing to integrate the ODF Validator into their own applications, the following information may be useful. You'll need to use the odf-core package which is currently in the OPF's Maven repository. + +## Setting up the Maven repository + +For now the Maven artefacts are hosted on the OPF's artifactory server. To use them you'll need to add the following to your Maven setting file (usually ~/.m2/settings.xml): + +```xml + + + + + + + false + + central + opf-dev + https://artifactory.openpreservation.org/artifactory/opf-dev + + + + snapshots + opf-dev + https://artifactory.openpreservation.org/artifactory/opf-dev + + + artifactory + + + + artifactory + + +``` + +## Including the core validation library + +To include the core validation library in your project, add the following dependency to your pom.xml: + +```xml + + org.openpreservation.odf + odf-core + 0.9.0 + +``` + +## Parsing an ODF package + +The library allows a non-validating parse of an ODF package, indeed this is a pre-requisite to valdiation which is performed against a package instance. The following code snippet shows how to parse an ODF package: + +```java +import org.openpreservation.odf.pkg.FileEntry; +import org.openpreservation.odf.pkg.Manifest; +import org.openpreservation.odf.pkg.OdfPackage; +import org.openpreservation.odf.pkg.OdfPackages; +import org.openpreservation.odf.pkg.PackageParser; + +// Get a package parser instance +PackageParser packageParser = OdfPackages.getPackageParser(); + +File packageFile = new File("path/to/package.ods"); +OdfPackage odfPackage = packageParser.parsePackage(packageFile); + +// Get the package manifest +Manifest manifest = odfPackage.getManifest(); + +// Get the file entries from the manifest +for (FileEntry entry : manifest.getEntries()) { + // Get the entry declared MIME type + String mediaType = entry.getMediaType(); + // Get the entry declared full path + String fullPath = entry.getFullPath(); + // Get the entry Input Stream + try (InputStream is = odfPackage.getEntryStream(entry)) { + // Do something with the entry + } +} +``` + +## Validating an ODF package + +```java +import org.openpreservation.messages.Message; +import org.openpreservation.odf.pkg.OdfPackage; +import org.openpreservation.odf.validation.ValidatingParser; +import org.openpreservation.odf.validation.ValidationReport; +import org.openpreservation.odf.validation.Validators; + +ValidatingParser packageParser = Validators.getValidatingParser(); + +File packageFile = new File("path/to/package.ods"); + +// Get the OdfPackage instance from the parser +OdfPackage odfPackage = packageParser.parsePackage(packageFile.toPath()); + +// Now validate the package and get the validation report +ValidationReport report = packageParser.validatePackage(odfPackage); + +// Is the package valid? +if (report.isValid()) { + System.out.println("Package is valid"); + // Get any warnings or info message (no errors as the package is valid) + List messages = report.getMessages(); + // Loop through the messages + for (Message message : messages) { + // Get the message id + System.out.println(message.getId()); + // Get the message severity (INFO, WARNING, ERROR) + System.out.println(message.getSeverity()); + // Print out the message text + System.out.println(message.getMessage()); + } +} else { + System.out.println("Package is not valid"); + // Get the error messages + List messages = report.getErrors(); + for (Message message : messages) { + // Get the message id + System.out.println(message.getId()); + // Print out the message text + System.out.println(message.getMessage()); + } +} +``` + +## Validation of Spreadsheets Only + +The ODF Validator can be used to validate spreadsheets only. This is useful if you want to validate a spreadsheet without having to parse the entire package. The following code snippet shows how to validate a spreadsheet: + +```java +import org.openpreservation.messages.Message; +import org.openpreservation.odf.validation.ValidationReport; +import org.openpreservation.odf.validation.Validator; + +Validator validator = new Validator(); +ValidationReport report = validator.validateSpreadsheet(new File("path/to/package.ods")); +if (!report.isValid()) { + List messages = report.getMessages(); + // Loop through the messages + for (Message message : messages) { + // Get the message id + System.out.println(message.getId()); + // Get the message severity (INFO, WARNING, ERROR) + System.out.println(message.getSeverity()); + // Print out the message text + System.out.println(message.getMessage()); + } +} else { + System.out.println("The document is valid"); +} +``` \ No newline at end of file diff --git a/docs/VALIDATION.md b/docs/VALIDATION.md index ed9ca778..744d222e 100644 --- a/docs/VALIDATION.md +++ b/docs/VALIDATION.md @@ -25,6 +25,8 @@ All files contained in the Zip file shall be non compressed (STORED) or compress ### PKG-3 +An OpenDocument package SHALL only contain the "META-INF/manifest.xml" and files containg the term "signatures" in their name in the "META-INF" folder. File %s does not meet this criteria. + It (an OpenDocument package) may contain files whose relative paths begin with “META-INF/” and whose names contain the string “signatures”. These file shall meet the following requirements: * D.1: The files shall be well-formed XML files in accordance with [XML1.0]. @@ -33,14 +35,6 @@ It (an OpenDocument package) may contain files whose relative paths begin with * D.3: The files shall be valid with respect to the digital signature schema defined in appendix A.2 OpenDocument Digital Signature Schema. -TODO: This needs expanding to cover digital signature file validation. It appears that sub-directories are valid if they contain digital signature files. - -Should the presence of empty directories below META-INF be considered an error? - -ALL files that don't contain the string "signatures" should be a validation error. - -Any files that contain the string "signatures" should be checked against D1-D3. This is implemented but the reporting logic needs to be improved. - ### PKG-9 (Error) An OpenDocument package SHALL be a well formed Zip Archive. diff --git a/odf-apps/pom.xml b/odf-apps/pom.xml index 86db9b5e..dcf6a81b 100644 --- a/odf-apps/pom.xml +++ b/odf-apps/pom.xml @@ -5,7 +5,7 @@ org.openpreservation.odf odf-validator - 0.1.0-SNAPSHOT + 0.9.0 odf-apps diff --git a/odf-core/pom.xml b/odf-core/pom.xml index 576364f9..71603664 100644 --- a/odf-core/pom.xml +++ b/odf-core/pom.xml @@ -5,7 +5,7 @@ org.openpreservation.odf odf-validator - 0.1.0-SNAPSHOT + 0.9.0 org.openpreservation.odf diff --git a/odf-core/src/main/java/org/openpreservation/odf/document/Documents.java b/odf-core/src/main/java/org/openpreservation/odf/document/Documents.java new file mode 100644 index 00000000..f593eeaf --- /dev/null +++ b/odf-core/src/main/java/org/openpreservation/odf/document/Documents.java @@ -0,0 +1,43 @@ +package org.openpreservation.odf.document; + +import java.util.Objects; + +import org.openpreservation.format.xml.ParseResult; +import org.openpreservation.odf.pkg.OdfPackage; +import org.openpreservation.odf.xml.Metadata; +import org.openpreservation.odf.xml.OdfXmlDocument; +import org.openpreservation.odf.xml.OdfXmlDocuments; + +public class Documents { + private Documents() { + throw new AssertionError("Utility class 'Documents' should not be instantiated"); + } + + public static final OpenDocument openDocumentOf(OdfDocument document) { + Objects.requireNonNull(document, "OdfDocument parameter document cannot be null"); + return OpenDocumentImpl.of(document); + } + + public static final OpenDocument openDocumentOf(OdfPackage pkg) { + Objects.requireNonNull(pkg, "OdfPackage pkg document cannot be null"); + return OpenDocumentImpl.of(pkg); + } + + public static final OdfDocument odfDocumentOf(final OdfXmlDocument xmlDocument, final Metadata metadata) { + Objects.requireNonNull(xmlDocument, "OdfXmlDocument parameter xmlDocument cannot be null"); + Objects.requireNonNull(metadata, "Metadata parameter metadata cannot be null"); + return OdfDocumentImpl.of(xmlDocument, metadata); + } + + public static final OdfDocument odfDocumentOf(final ParseResult parseResult, final Metadata metadata) { + Objects.requireNonNull(parseResult, "ParseResult parameter parseResult cannot be null"); + Objects.requireNonNull(metadata, "Metadata parameter metadata cannot be null"); + return OdfDocumentImpl.of(OdfXmlDocuments.odfXmlDocumentOf(parseResult), metadata); + } + + public static final OdfDocument odfDocumentOf(final ParseResult parseResult) { + Objects.requireNonNull(parseResult, "ParseResult parameter parseResult cannot be null"); + return OdfDocumentImpl.of(parseResult); + } + +} diff --git a/odf-core/src/main/java/org/openpreservation/odf/document/OdfDocumentImpl.java b/odf-core/src/main/java/org/openpreservation/odf/document/OdfDocumentImpl.java index 68ce0069..592f5ade 100644 --- a/odf-core/src/main/java/org/openpreservation/odf/document/OdfDocumentImpl.java +++ b/odf-core/src/main/java/org/openpreservation/odf/document/OdfDocumentImpl.java @@ -22,12 +22,18 @@ static final OdfDocument of(final OdfXmlDocument xmlDocument, final Metadata met Objects.requireNonNull(metadata, "Metadata parameter metadata cannot be null"); return new OdfDocumentImpl(xmlDocument, metadata); } + static final OdfDocument of(final ParseResult parseResult, final Metadata metadata) { Objects.requireNonNull(parseResult, "ParseResult parameter parseResult cannot be null"); Objects.requireNonNull(metadata, "Metadata parameter metadata cannot be null"); return new OdfDocumentImpl(OdfXmlDocuments.odfXmlDocumentOf(parseResult), metadata); } + static final OdfDocument of(final ParseResult parseResult) { + Objects.requireNonNull(parseResult, "ParseResult parameter parseResult cannot be null"); + return new OdfDocumentImpl(OdfXmlDocuments.odfXmlDocumentOf(parseResult)); + } + static final OdfDocument from(final InputStream docStream) throws IOException, ParserConfigurationException, SAXException { Objects.requireNonNull(docStream, "InputStream parameter docStream cannot be null"); @@ -50,6 +56,10 @@ static final OdfDocument from(final InputStream docStream) private final OdfXmlDocument xmlDocument; private final Metadata metadata; + private OdfDocumentImpl(final OdfXmlDocument xmlDocument) { + this(xmlDocument, null); + } + private OdfDocumentImpl(final OdfXmlDocument xmlDocument, final Metadata metadata) { super(); this.xmlDocument = xmlDocument; diff --git a/odf-core/src/main/java/org/openpreservation/odf/document/OpenDocument.java b/odf-core/src/main/java/org/openpreservation/odf/document/OpenDocument.java index 15b13c78..983b007f 100644 --- a/odf-core/src/main/java/org/openpreservation/odf/document/OpenDocument.java +++ b/odf-core/src/main/java/org/openpreservation/odf/document/OpenDocument.java @@ -2,6 +2,7 @@ import java.util.Collection; +import org.openpreservation.odf.fmt.Formats; import org.openpreservation.odf.pkg.OdfPackage; public interface OpenDocument { @@ -43,4 +44,6 @@ public interface OpenDocument { * @return the ODF Package for the OpenDocument */ public OdfPackage getPackage(); + + public Formats getFormat(); } diff --git a/odf-core/src/main/java/org/openpreservation/odf/document/OpenDocumentImpl.java b/odf-core/src/main/java/org/openpreservation/odf/document/OpenDocumentImpl.java index ff0cb45a..d512f7cb 100644 --- a/odf-core/src/main/java/org/openpreservation/odf/document/OpenDocumentImpl.java +++ b/odf-core/src/main/java/org/openpreservation/odf/document/OpenDocumentImpl.java @@ -5,6 +5,7 @@ import java.util.List; import java.util.Objects; +import org.openpreservation.odf.fmt.Formats; import org.openpreservation.odf.pkg.OdfPackage; import org.openpreservation.odf.pkg.OdfPackageDocument; @@ -59,6 +60,11 @@ public OdfPackage getPackage() { return this.pkg; } + @Override + public Formats getFormat() { + return (this.isPackage()) ? this.pkg.getDetectedFormat() : Formats.fromMime(this.document.getXmlDocument().getMimeType()); + } + @Override public int hashCode() { return Objects.hash(document, pkg); diff --git a/odf-core/src/main/java/org/openpreservation/odf/validation/ValidatingParserImpl.java b/odf-core/src/main/java/org/openpreservation/odf/validation/ValidatingParserImpl.java index a084e3ae..71de7e54 100644 --- a/odf-core/src/main/java/org/openpreservation/odf/validation/ValidatingParserImpl.java +++ b/odf-core/src/main/java/org/openpreservation/odf/validation/ValidatingParserImpl.java @@ -21,6 +21,7 @@ import org.openpreservation.messages.Message; import org.openpreservation.messages.MessageFactory; import org.openpreservation.messages.Messages; +import org.openpreservation.odf.document.Documents; import org.openpreservation.odf.fmt.OdfFormats; import org.openpreservation.odf.pkg.FileEntry; import org.openpreservation.odf.pkg.Manifest; @@ -84,7 +85,7 @@ public OdfPackage parsePackage(final InputStream toParse, final String name) thr } private ValidationReport validate(final OdfPackage odfPackage) { - final ValidationReport report = ValidationReport.of(odfPackage.getName()); + final ValidationReport report = ValidationReport.of(odfPackage.getName(), Documents.openDocumentOf(odfPackage)); report.add(OdfFormats.MIMETYPE, checkMimeEntry(odfPackage)); if (!odfPackage.hasManifest()) { report.add(OdfPackages.PATH_MANIFEST, FACTORY.getError("PKG-4")); diff --git a/odf-core/src/main/java/org/openpreservation/odf/validation/Validator.java b/odf-core/src/main/java/org/openpreservation/odf/validation/Validator.java index 54210d63..d99e1be2 100644 --- a/odf-core/src/main/java/org/openpreservation/odf/validation/Validator.java +++ b/odf-core/src/main/java/org/openpreservation/odf/validation/Validator.java @@ -1,5 +1,6 @@ package org.openpreservation.odf.validation; +import java.io.File; import java.io.FileNotFoundException; import java.io.IOException; import java.nio.file.Files; @@ -14,6 +15,7 @@ import org.openpreservation.format.xml.XmlValidator; import org.openpreservation.messages.MessageFactory; import org.openpreservation.messages.Messages; +import org.openpreservation.odf.document.Documents; import org.openpreservation.odf.fmt.Formats; import org.openpreservation.odf.pkg.OdfPackage; import org.openpreservation.odf.pkg.OdfPackages; @@ -33,9 +35,45 @@ public Validator() { super(); } + public ValidationReport validateSpreadsheet(final Path toValidate) throws ParserConfigurationException, IOException, SAXException { + Objects.requireNonNull(toValidate, String.format(Checks.NOT_NULL, "Path", "toValidate")); + return validateSingleFormat(toValidate, Formats.ODS); + } + + public ValidationReport validateSpreadsheet(final File toValidate) throws ParserConfigurationException, IOException, SAXException { + Objects.requireNonNull(toValidate, String.format(Checks.NOT_NULL, "Path", "toValidate")); + return validateSingleFormat(toValidate, Formats.ODS); + } + + public ValidationReport validateSingleFormat(final File toValidate, final Formats legal) throws ParserConfigurationException, IOException, SAXException { + Objects.requireNonNull(toValidate, String.format(Checks.NOT_NULL, "File", "toValidate")); + Objects.requireNonNull(legal, String.format(Checks.NOT_NULL, "Formats", "legal")); + return validateSingleFormat(toValidate.toPath(), legal); + } + + public ValidationReport validateSingleFormat(final Path toValidate, final Formats legal) throws ParserConfigurationException, IOException, SAXException { + Objects.requireNonNull(toValidate, String.format(Checks.NOT_NULL, "Path", "toValidate")); + Objects.requireNonNull(legal, String.format(Checks.NOT_NULL, "Formats", "legal")); + ValidationReport report = validate(toValidate); + if (report.document == null || report.document.getFormat() == null) { + report.add(toValidate.toString(), FACTORY.getError("DOC-6")); + } else { + Formats detectedFmt = report.document.getFormat(); + if (detectedFmt != legal) { + report.add(toValidate.toString(), FACTORY.getError("DOC-7", legal.mime, detectedFmt.mime)); + } + } + return report; + } + + public ValidationReport validate(final File toValidate) throws ParserConfigurationException, IOException, SAXException { + Objects.requireNonNull(toValidate, String.format(Checks.NOT_NULL, "File", "toValidate")); + return validate(toValidate.toPath()); + } + public ValidationReport validate(final Path toValidate) throws ParserConfigurationException, IOException, SAXException { - Objects.requireNonNull(toValidate, String.format(Checks.NOT_NULL, "String", "toValidate")); + Objects.requireNonNull(toValidate, String.format(Checks.NOT_NULL, "Path", "toValidate")); // Check if the path exists and is not a directory existingFileCheck(toValidate); @@ -69,9 +107,9 @@ private ValidationReport validatePackage(final Path toValidate) throws ParserCon return parser.validatePackage(pckg); } private ValidationReport validateOpenDocumentXml(final Path toValidate) throws ParserConfigurationException, SAXException, IOException { - final ValidationReport report = ValidationReport.of(toValidate.toString()); final XmlParser checker = new XmlParser(); ParseResult parseResult = checker.parse(toValidate); + final ValidationReport report = (parseResult.isWellFormed()) ? ValidationReport.of(toValidate.toString(), Documents.openDocumentOf(Documents.odfDocumentOf(parseResult))) : ValidationReport.of(toValidate.toString()); if (parseResult.isWellFormed()) { Version version = Version.ODF_13; final XmlValidator validator = new XmlValidator(); diff --git a/odf-core/src/main/resources/org/openpreservation/odf/messages/Messages.properties b/odf-core/src/main/resources/org/openpreservation/odf/messages/Messages.properties index 89f9a7c1..ace7ab41 100644 --- a/odf-core/src/main/resources/org/openpreservation/odf/messages/Messages.properties +++ b/odf-core/src/main/resources/org/openpreservation/odf/messages/Messages.properties @@ -6,6 +6,8 @@ DOC-2 = OpenDocument version %s detected. DOC-3 = OpenDocument MIMETYPE %s detected DOC-4 = Invalid MIMETYPE declaration %s detected. DOC-5 = No MIMETYPE declaration detected. +DOC-6 = OpenDocument document SHALL be format %s, no format was detected. +DOC-7 = OpenDocument document SHALL be format %s, but format %s was detected. PKG-1 = All files contained in the Zip file shall be non compressed (STORED) or compressed using the “deflate” (DEFLATED) algorithm. Zip entry %s is compressed with an unknown algorithm. PKG-2 = An OpenDocument package SHOULD contain a file "mimetype". PKG-3 = An OpenDocument package SHALL only contain the "META-INF/manifest.xml" and files containg the term "signatures" in their name in the "META-INF" folder. File %s does not meet this criteria. diff --git a/odf-core/src/test/java/org/openpreservation/odf/validation/ValidatorsTest.java b/odf-core/src/test/java/org/openpreservation/odf/validation/ValidatorsTest.java index 07effa1c..3869651c 100644 --- a/odf-core/src/test/java/org/openpreservation/odf/validation/ValidatorsTest.java +++ b/odf-core/src/test/java/org/openpreservation/odf/validation/ValidatorsTest.java @@ -1,14 +1,20 @@ package org.openpreservation.odf.validation; +import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertNotNull; import static org.junit.Assert.assertTrue; +import java.io.File; +import java.io.IOException; +import java.net.URISyntaxException; + import javax.xml.parsers.ParserConfigurationException; import org.junit.Test; import org.openpreservation.format.zip.ZipEntry; import org.openpreservation.format.zip.Zips; +import org.openpreservation.odf.fmt.TestFiles; import org.xml.sax.SAXException; public class ValidatorsTest { @@ -35,4 +41,14 @@ public void testInValidCompression() { ZipEntry entry = Zips.zipEntryInstance("name", 0, 0, 0, java.util.zip.ZipEntry.CENATT, false, null); assertFalse("CENATT should NOT be valid", Validators.isCompressionValid(entry)); } + + @Test + public void validateSpecificFormat() throws ParserConfigurationException, IOException, SAXException, URISyntaxException { + Validator validator = new Validator(); + ValidationReport report = validator.validateSpreadsheet(new File(TestFiles.EMPTY_ODS.toURI()).toPath()); + assertTrue("Package should be valid." , report.isValid()); + report = validator.validateSpreadsheet(new File(TestFiles.DSIG_INVALID.toURI()).toPath()); + assertFalse("Package should NOT be valid, spreadsheets only." , report.isValid()); + assertEquals(1, report.getMessages().stream().filter(m -> m.getId().equals("DOC-7")).count()); + } } diff --git a/pom.xml b/pom.xml index 847b9eed..d68ae3b6 100644 --- a/pom.xml +++ b/pom.xml @@ -10,7 +10,7 @@ org.openpreservation.odf odf-validator - 0.1.0-SNAPSHOT + 0.9.0 pom