From 15636e2afe295fb3520e12c786332c789a277734 Mon Sep 17 00:00:00 2001 From: Menzo Date: Wed, 14 Dec 2016 05:54:51 +0100 Subject: [PATCH 1/5] Upgraded Saxon to 9.7.0-14 --- pom.xml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pom.xml b/pom.xml index 071e8a4..e904b0c 100644 --- a/pom.xml +++ b/pom.xml @@ -20,7 +20,7 @@ net.sf.saxon Saxon-HE - 9.6.0-5 + 9.7.0-14 jar @@ -100,4 +100,4 @@ file://${project.basedir}/lib --> - \ No newline at end of file + From 550cd28270c9b27fdd3c04891260eee82b0e9f03 Mon Sep 17 00:00:00 2001 From: Menzo Date: Wed, 12 Apr 2017 09:35:27 +0200 Subject: [PATCH 2/5] Added the CLARIN Nexus repository. --- pom.xml | 42 ++++++++++++++++++++++++------------------ 1 file changed, 24 insertions(+), 18 deletions(-) diff --git a/pom.xml b/pom.xml index e904b0c..cb937c6 100644 --- a/pom.xml +++ b/pom.xml @@ -62,8 +62,10 @@ maven-assembly-plugin - make-assembly SchemAnon - package + make-assembly SchemAnon + + package + single @@ -79,25 +81,29 @@ SchemAnon false - + - + - - - CLARIN-Snapshot - https://nexus.clarin.eu/content/repositories/clarin-snapshot - - - CLARIN - https://nexus.clarin.eu/content/repositories/Clarin - - - + + + + CLARIN + CLARIN Repository + https://nexus.clarin.eu/content/repositories/Clarin + + false + + + From bcda64a834cb8feb3a10439affe28bb5e3833559 Mon Sep 17 00:00:00 2001 From: Menzo Date: Sat, 9 Sep 2017 09:41:51 +0200 Subject: [PATCH 3/5] Now supports Schematron only validation, i.e., not only embedded in a XML Schema. --- src/main/java/nl/mpi/tla/schemanon/Main.java | 10 +-- .../java/nl/mpi/tla/schemanon/SchemAnon.java | 77 +++++++++++++++---- 2 files changed, 67 insertions(+), 20 deletions(-) diff --git a/src/main/java/nl/mpi/tla/schemanon/Main.java b/src/main/java/nl/mpi/tla/schemanon/Main.java index 00a9039..71ac511 100644 --- a/src/main/java/nl/mpi/tla/schemanon/Main.java +++ b/src/main/java/nl/mpi/tla/schemanon/Main.java @@ -25,10 +25,6 @@ import java.util.Collection; import java.util.ArrayList; import java.util.List; -import java.util.logging.Level; -import java.util.logging.Logger; -import javax.xml.transform.stream.StreamSource; -import nl.mpi.tla.schemanon.Message; import org.apache.commons.io.FileUtils; import joptsimple.OptionParser; import joptsimple.OptionSet; @@ -62,8 +58,8 @@ public static int validate(SchemAnon tron, File input) { } private static void showHelp() { - System.err.println("INF: SchemAnon -- ? *"); - System.err.println("INF: URL to the XSD Schema"); + System.err.println("INF: SchemAnon -- ? *"); + System.err.println("INF: URL to the XSD Schema or Schematron rules"); System.err.println("INF: input directory or file (default: STDIN)"); System.err.println("INF: file extension to filter on in the input directory (optional)"); System.err.println("INF: SchemAnon options:"); @@ -84,7 +80,7 @@ public static void main(String[] args) { List arg = options.nonOptionArguments(); if (arg.size()<1) { - System.err.println("FTL: no XSD Schema specified!"); + System.err.println("FTL: no XSD Schema or Schematron rules specified!"); showHelp(); System.exit(1); } diff --git a/src/main/java/nl/mpi/tla/schemanon/SchemAnon.java b/src/main/java/nl/mpi/tla/schemanon/SchemAnon.java index 532bd5f..5c1702a 100644 --- a/src/main/java/nl/mpi/tla/schemanon/SchemAnon.java +++ b/src/main/java/nl/mpi/tla/schemanon/SchemAnon.java @@ -18,7 +18,6 @@ import java.io.File; import java.io.IOException; -import java.io.InputStream; import java.net.URL; import java.util.List; import javax.xml.transform.Source; @@ -72,6 +71,14 @@ public class SchemAnon { */ private XdmNode validationReport = null; private LSResourceResolver resourceResolver = null; + + /** + * The type of schema document + */ + + private XdmNode schema = null; + public enum Type { XSD, SCH }; + private Type type = null; public SchemAnon(Source srcSchema,String phase) { this.srcSchema = srcSchema; @@ -90,6 +97,34 @@ public SchemAnon(URL schemaURL) { this(schemaURL,null); } + /** + * Return the type of the schema: + * - XSD: XML Schema (might have embedded Schematron rules) + * - SCH: Schematron rules + * @return Type + * @throws SchemAnonException + */ + public Type getType() throws SchemAnonException { + if (type == null) { + try { + // Load the schema + this.schema = SaxonUtils.buildDocument(this.srcSchema); + // XSD or Schematron? + SaxonUtils.declareXPathNamespace("sch", "http://purl.oclc.org/dsdl/schematron"); + SaxonUtils.declareXPathNamespace("xs", "http://www.w3.org/2001/XMLSchema"); + if (SaxonUtils.evaluateXPath(schema, "exists(/sch:schema)").effectiveBooleanValue()) + this.type = Type.SCH; + else if (SaxonUtils.evaluateXPath(schema, "exists(/xs:schema)").effectiveBooleanValue()) + this.type = Type.XSD; + else + throw new SchemAnonException("Unknown schema type! Only XSD or Schematron are supported!"); + } catch (SaxonApiException ex) { + throw new SchemAnonException(ex); + } + } + return this.type; + } + /** * Returns the Schematron XSLT, and loads it just-in-time. * @@ -99,8 +134,6 @@ public SchemAnon(URL schemaURL) { private synchronized XsltExecutable getSchematron() throws SchemAnonException, IOException { if (schemaTron == null) { try { - // Load the schema - XdmNode schema = SaxonUtils.buildDocument(srcSchema); // Load the Schematron XSL to extract the Schematron rules; XsltTransformer extractSchXsl = SaxonUtils.buildTransformer(SchemAnon.class.getResource("/schematron/ExtractSchFromXSD-2.xsl")).load(); // Load the Schematron XSLs to 'compile' Schematron rules; @@ -110,18 +143,30 @@ private synchronized XsltExecutable getSchematron() throws SchemAnonException, I if (this.phase!=null) compileSchXsl.setParameter(new QName("phase"), new XdmAtomicValue(this.phase)); - // Setup the pipeline + // Setup the pipeline (going backwards) XdmDestination destination = new XdmDestination(); - extractSchXsl.setSource(schema.asSource()); - extractSchXsl.setDestination(includeSchXsl); - includeSchXsl.setDestination(expandSchXsl); - expandSchXsl.setDestination(compileSchXsl); compileSchXsl.setDestination(destination); - // Extract the Schematron rules from the schema - extractSchXsl.transform(); + expandSchXsl.setDestination(compileSchXsl); + includeSchXsl.setDestination(expandSchXsl); + XsltTransformer start = null; + if (this.getType()==Type.XSD) { + // Extract the Schematron rules from the schema + extractSchXsl.setDestination(includeSchXsl); + extractSchXsl.setSource(schema.asSource()); + start = extractSchXsl; + } else if (this.getType()==Type.SCH) { + includeSchXsl.setSource(schema.asSource()); + start = includeSchXsl; + } else + throw new SchemAnonException("Unknown schema type! Only XSD or Schematron are supported!"); + // start the pipeline + start.transform(); + //System.err.println("DBG: SCH[\n"+destination.getXdmNode().toString()+"\n]SCH"); // Compile the Schematron rules XSL schemaTron = SaxonUtils.buildTransformer(destination.getXdmNode()); } catch (SaxonApiException ex) { + System.err.println("!ERR: unexpected exception while compiling Schematron validation for source["+this.srcSchema.getSystemId()+"]: "+ex); + ex.printStackTrace(System.err); throw new SchemAnonException(ex); } } @@ -146,12 +191,15 @@ public boolean validateSchematron(Source src) throws SchemAnonException, IOExcep XdmDestination destination = new XdmDestination(); schematronXsl.setDestination(destination); schematronXsl.transform(); + //System.err.println("DBG: SVRL[\n"+destination.getXdmNode().toString()+"\n]SVRL"); validationReport = destination.getXdmNode(); - + SaxonUtils.declareXPathNamespace("svrl", "http://purl.oclc.org/dsdl/svrl"); return ((net.sf.saxon.value.BooleanValue) SaxonUtils.evaluateXPath(validationReport, "empty(//svrl:failed-assert[(preceding-sibling::svrl:fired-rule)[last()][empty(@role) or @role!='warning']])").evaluateSingle().getUnderlyingValue()).getBooleanValue(); } catch (SaxonApiException ex) { + System.err.println("!ERR: unexpected exception while doing Schematron validation for source["+src.getSystemId()+"]: "+ex); + ex.printStackTrace(System.err); throw new SchemAnonException(ex); } } @@ -182,6 +230,9 @@ private synchronized Schema getXSD() throws SchemAnonException, IOException { } public boolean validateXSD(Source src) throws SchemAnonException, IOException { + if (this.getType() != Type.XSD) + return true; + if (msgList == null) msgList = new java.util.ArrayList(); try { @@ -202,7 +253,7 @@ public boolean validateXSD(Source src) throws SchemAnonException, IOException { msgList.add(msg); return false; } catch (Exception ex) { - System.err.println("!ERR: unexpected exception while processing source["+src.getSystemId()+"]: "+ex); + System.err.println("!ERR: unexpected exception while doing XSD validation for source["+src.getSystemId()+"]: "+ex); ex.printStackTrace(System.err); throw new SchemAnonException(ex); } @@ -240,7 +291,7 @@ public boolean validate(Source src) throws SchemAnonException, IOException { msg.test = null; msg.location = null; msg.error = true; - msg.text = ex.getMessage(); + msg.text = (ex instanceof SchemAnonException?ex.getCause().getMessage():ex.getMessage()); msgList.add(msg); return false; } From f55df0a589acb77b25846e3fbd4e9f4dc2e39b57 Mon Sep 17 00:00:00 2001 From: Menzo Date: Mon, 11 Sep 2017 13:36:29 +0200 Subject: [PATCH 4/5] Added command line options (and handling) to: - save the SVRL report (-s), will save a .svrl next to the validates - quiet the output (-q) --- src/main/java/nl/mpi/tla/schemanon/Main.java | 38 ++++++++++++------- .../java/nl/mpi/tla/schemanon/Message.java | 2 +- .../java/nl/mpi/tla/schemanon/SaxonUtils.java | 19 +++++++++- .../java/nl/mpi/tla/schemanon/SchemAnon.java | 9 ++++- .../mpi/tla/schemanon/SchemAnonException.java | 2 +- .../mpi/tla/schemanon/SimpleErrorHandler.java | 17 +++++++++ src/main/resources/identity.xsl | 14 +++++++ 7 files changed, 83 insertions(+), 18 deletions(-) create mode 100644 src/main/resources/identity.xsl diff --git a/src/main/java/nl/mpi/tla/schemanon/Main.java b/src/main/java/nl/mpi/tla/schemanon/Main.java index 71ac511..131b5a7 100644 --- a/src/main/java/nl/mpi/tla/schemanon/Main.java +++ b/src/main/java/nl/mpi/tla/schemanon/Main.java @@ -1,5 +1,5 @@ /* - * Copyright (C) 2014 The Language Archive - Max Planck Institute for Psycholinguistics + * Copyright (C) 2014 - 2017 The Language Archive - Max Planck Institute for Psycholinguistics, Meertens Institute * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -28,6 +28,7 @@ import org.apache.commons.io.FileUtils; import joptsimple.OptionParser; import joptsimple.OptionSet; +import net.sf.saxon.s9api.SaxonApiException; /** @@ -35,21 +36,24 @@ */ public class Main { - public static int validate(SchemAnon tron, File input) { + public static int validate(SchemAnon tron, File input, boolean svrl, boolean quiet) { int code = 0; try { - if (tron.validate(input)) { - System.out.println("SchemAnon["+input+"]: VALID"); - } else { - System.out.println("SchemAnon["+input+"]: INVALID!"); + if (!tron.validate(input)) code = 1; + if (!quiet) { + System.out.println("SchemAnon["+input+"]: "+(code==0?"VALID":"INVALID!")); + for (Message msg : tron.getMessages()) { + System.out.println("" + (msg.isError() ? "ERROR" : "WARNING") + (msg.getLocation() != null ? " at " + msg.getLocation() : "")); + System.out.println(" " + msg.getText()); + } + System.out.println(); } - for (Message msg : tron.getMessages()) { - System.out.println("" + (msg.isError() ? "ERROR" : "WARNING") + (msg.getLocation() != null ? " at " + msg.getLocation() : "")); - System.out.println(" " + msg.getText()); + if (svrl) { + File output = new File(input.getPath()+".svrl"); + SaxonUtils.save(tron.getReport().asSource(),output); } - System.out.println(); - } catch (SchemAnonException | IOException ex) { + } catch (SaxonApiException | SchemAnonException | IOException ex) { System.err.println("FATAL: validating file["+input+"]: "+ex); ex.printStackTrace(System.err); System.exit(4); @@ -64,15 +68,21 @@ private static void showHelp() { System.err.println("INF: file extension to filter on in the input directory (optional)"); System.err.println("INF: SchemAnon options:"); System.err.println("INF: -p= Schematron phase to use (optional)"); + System.err.println("INF: -s Save the Schematron SVRL report (default: don't save)"); + System.err.println("INF: -q Be quiet: no messages to the terminal"); } public static void main(String[] args) { + boolean quiet = false; + boolean svrl = false; String phase = null; // check command line - OptionParser parser = new OptionParser( "p:?*" ); + OptionParser parser = new OptionParser( "p:sq?*" ); OptionSet options = parser.parse(args); if (options.has("p")) phase = (String)options.valueOf("p"); + svrl = options.has("s"); + quiet = options.has("q"); if (options.has("?")) { showHelp(); System.exit(0); @@ -108,7 +118,7 @@ public static void main(String[] args) { inputs.add(location); } for (File input:inputs) - code = validate(tron,input)>0?1:code; + code = validate(tron,input,svrl,quiet)>0?1:code; } else { try { BufferedReader in = new BufferedReader(new InputStreamReader(System.in)); @@ -117,7 +127,7 @@ public static void main(String[] args) { line = line.trim(); if (!line.startsWith("#")) { File input = new File(line); - code = validate(tron,input)>0?1:code; + code = validate(tron,input,svrl,quiet)>0?1:code; } } } catch(IOException ex) { diff --git a/src/main/java/nl/mpi/tla/schemanon/Message.java b/src/main/java/nl/mpi/tla/schemanon/Message.java index 6d4c7c4..a9a6485 100644 --- a/src/main/java/nl/mpi/tla/schemanon/Message.java +++ b/src/main/java/nl/mpi/tla/schemanon/Message.java @@ -1,5 +1,5 @@ /* - * Copyright (C) 2014 menzowindhouwer + * Copyright (C) 2014 - 2017 The Language Archive - Max Planck Institute for Psycholinguistics, Meertens Institute * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by diff --git a/src/main/java/nl/mpi/tla/schemanon/SaxonUtils.java b/src/main/java/nl/mpi/tla/schemanon/SaxonUtils.java index 6d740b8..f962033 100644 --- a/src/main/java/nl/mpi/tla/schemanon/SaxonUtils.java +++ b/src/main/java/nl/mpi/tla/schemanon/SaxonUtils.java @@ -1,5 +1,5 @@ /* - * Copyright (C) 2014 The Language Archive - Max Planck Institute for Psycholinguistics + * Copyright (C) 2014 - 2017 The Language Archive - Max Planck Institute for Psycholinguistics, Meertens Institute * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -30,6 +30,7 @@ import net.sf.saxon.s9api.XdmNode; import net.sf.saxon.s9api.XsltCompiler; import net.sf.saxon.s9api.XsltExecutable; +import net.sf.saxon.s9api.XsltTransformer; import org.w3c.dom.Document; import org.w3c.dom.Node; @@ -218,4 +219,20 @@ static public XPathSelector evaluateXPath(XdmItem ctxt, String xp) throws SaxonA static public XdmNode wrapNode(Node node) { return getDocumentBuilder().wrap(node); } + + /** + * Save a XML Source to a file. + */ + static public void save(Source source,File result) throws SaxonApiException { + try { + XsltTransformer transformer = buildTransformer(SaxonUtils.class.getResource("/identity.xsl")).load(); + transformer.setSource(source); + transformer.setDestination(getProcessor().newSerializer(result)); + transformer.transform(); + transformer.close(); + } catch (Exception ex) { + throw new SaxonApiException(ex); + } + } + } diff --git a/src/main/java/nl/mpi/tla/schemanon/SchemAnon.java b/src/main/java/nl/mpi/tla/schemanon/SchemAnon.java index 5c1702a..e616010 100644 --- a/src/main/java/nl/mpi/tla/schemanon/SchemAnon.java +++ b/src/main/java/nl/mpi/tla/schemanon/SchemAnon.java @@ -1,5 +1,5 @@ /* - * Copyright (C) 2014 The Language Archive - Max Planck Institute for Psycholinguistics + * Copyright (C) 2014 - 2017 The Language Archive - Max Planck Institute for Psycholinguistics, Meertens Institute * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -327,4 +327,11 @@ public List getMessages() throws SchemAnonException { } return msgList; } + + /** + * Get the SVRL validation report + */ + public XdmNode getReport() { + return validationReport; + } } diff --git a/src/main/java/nl/mpi/tla/schemanon/SchemAnonException.java b/src/main/java/nl/mpi/tla/schemanon/SchemAnonException.java index e057fe4..6180c2f 100644 --- a/src/main/java/nl/mpi/tla/schemanon/SchemAnonException.java +++ b/src/main/java/nl/mpi/tla/schemanon/SchemAnonException.java @@ -1,5 +1,5 @@ /* - * Copyright (C) 2014 The Language Archive - Max Planck Institute for Psycholinguistics + * Copyright (C) 2014 - 2017 The Language Archive - Max Planck Institute for Psycholinguistics, Meertens Institute * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by diff --git a/src/main/java/nl/mpi/tla/schemanon/SimpleErrorHandler.java b/src/main/java/nl/mpi/tla/schemanon/SimpleErrorHandler.java index 34d1df7..51db689 100644 --- a/src/main/java/nl/mpi/tla/schemanon/SimpleErrorHandler.java +++ b/src/main/java/nl/mpi/tla/schemanon/SimpleErrorHandler.java @@ -1,3 +1,20 @@ +/* + * Copyright (C) 2014 - 2017 The Language Archive - Max Planck Institute for Psycholinguistics, Meertens Institute + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + package nl.mpi.tla.schemanon; import java.util.List; diff --git a/src/main/resources/identity.xsl b/src/main/resources/identity.xsl new file mode 100644 index 0000000..69ec394 --- /dev/null +++ b/src/main/resources/identity.xsl @@ -0,0 +1,14 @@ + + + + + + + + + + + From 917a15c167a0139fd3d92f9977642e5e7e04610a Mon Sep 17 00:00:00 2001 From: Menzo Date: Fri, 6 Oct 2017 15:58:51 +0200 Subject: [PATCH 5/5] Cleanup, getting ready for the 1.1 release. --- pom.xml | 2 +- src/main/java/nl/mpi/tla/schemanon/Main.java | 33 +++++++++++++++----- 2 files changed, 27 insertions(+), 8 deletions(-) diff --git a/pom.xml b/pom.xml index cb937c6..1150a9b 100644 --- a/pom.xml +++ b/pom.xml @@ -3,7 +3,7 @@ 4.0.0 nl.mpi.tla SchemAnon - 1.0-SNAPSHOT + 1.1 jar diff --git a/src/main/java/nl/mpi/tla/schemanon/Main.java b/src/main/java/nl/mpi/tla/schemanon/Main.java index 131b5a7..32b8a06 100644 --- a/src/main/java/nl/mpi/tla/schemanon/Main.java +++ b/src/main/java/nl/mpi/tla/schemanon/Main.java @@ -24,11 +24,13 @@ import java.net.URL; import java.util.Collection; import java.util.ArrayList; +import java.util.Arrays; import java.util.List; import org.apache.commons.io.FileUtils; import joptsimple.OptionParser; import joptsimple.OptionSet; import net.sf.saxon.s9api.SaxonApiException; +import org.apache.commons.io.comparator.SizeFileComparator; /** @@ -42,8 +44,8 @@ public static int validate(SchemAnon tron, File input, boolean svrl, boolean qui if (!tron.validate(input)) code = 1; if (!quiet) { - System.out.println("SchemAnon["+input+"]: "+(code==0?"VALID":"INVALID!")); for (Message msg : tron.getMessages()) { + System.out.println("SchemAnon["+input+"]: "+(code==0?"VALID":"INVALID!")); System.out.println("" + (msg.isError() ? "ERROR" : "WARNING") + (msg.getLocation() != null ? " at " + msg.getLocation() : "")); System.out.println(" " + msg.getText()); } @@ -56,33 +58,35 @@ public static int validate(SchemAnon tron, File input, boolean svrl, boolean qui } catch (SaxonApiException | SchemAnonException | IOException ex) { System.err.println("FATAL: validating file["+input+"]: "+ex); ex.printStackTrace(System.err); - System.exit(4); } return code; } private static void showHelp() { System.err.println("INF: SchemAnon -- ? *"); - System.err.println("INF: URL to the XSD Schema or Schematron rules"); + System.err.println("INF: URL to the XSD Schema and/or Schematron rules"); System.err.println("INF: input directory or file (default: STDIN)"); - System.err.println("INF: file extension to filter on in the input directory (optional)"); + System.err.println("INF: file extension to filter on in the input directory (default: xml)"); System.err.println("INF: SchemAnon options:"); System.err.println("INF: -p= Schematron phase to use (optional)"); System.err.println("INF: -s Save the Schematron SVRL report (default: don't save)"); - System.err.println("INF: -q Be quiet: no messages to the terminal"); + System.err.println("INF: -i Print progress info (default: on progress info)"); + System.err.println("INF: -q Be quiet (default: print validation info)"); } public static void main(String[] args) { boolean quiet = false; boolean svrl = false; + boolean iter = false; String phase = null; // check command line - OptionParser parser = new OptionParser( "p:sq?*" ); + OptionParser parser = new OptionParser( "p:sqi?*" ); OptionSet options = parser.parse(args); if (options.has("p")) phase = (String)options.valueOf("p"); svrl = options.has("s"); quiet = options.has("q"); + iter = options.has("i"); if (options.has("?")) { showHelp(); System.exit(0); @@ -113,12 +117,27 @@ public static void main(String[] args) { ArrayList extensions = new ArrayList(); for (int e=2;e0?1:code; + if (iter && quiet) + System.err.println(">> "+(code>0?"INVALID":"VALID")); + } } else { try { BufferedReader in = new BufferedReader(new InputStreamReader(System.in));