Skip to content

Commit

Permalink
Merge pull request #690 from Mailaender/xml-perf
Browse files Browse the repository at this point in the history
Fixed XML based formats parsing the whole document during content matching
  • Loading branch information
eselmeister authored Aug 3, 2021
2 parents 8d640c8 + 59ae122 commit 96beeca
Show file tree
Hide file tree
Showing 13 changed files with 85 additions and 82 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -14,18 +14,15 @@

import java.io.File;

import javax.xml.bind.JAXBContext;
import javax.xml.bind.Unmarshaller;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;

import org.eclipse.chemclipse.converter.core.AbstractMagicNumberMatcher;
import org.eclipse.chemclipse.converter.core.IMagicNumberMatcher;
import org.eclipse.chemclipse.msd.converter.supplier.mzdata.internal.io.IFormat;
import org.eclipse.chemclipse.msd.converter.supplier.mzdata.internal.support.IConstants;
import org.eclipse.chemclipse.msd.converter.supplier.mzdata.internal.support.SpecificationValidator;
import org.eclipse.chemclipse.msd.converter.supplier.mzdata.internal.v105.model.MzData;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.NodeList;

public class ChromatogramMagicNumberMatcher extends AbstractMagicNumberMatcher implements IMagicNumberMatcher {
Expand All @@ -45,13 +42,17 @@ public boolean checkFileFormat(File file) {
DocumentBuilderFactory documentBuilderFactory = DocumentBuilderFactory.newInstance();
DocumentBuilder documentBuilder = documentBuilderFactory.newDocumentBuilder();
Document document = documentBuilder.parse(file);
NodeList nodeList = document.getElementsByTagName(IConstants.NODE_MZ_DATA);
//
JAXBContext jaxbContext = JAXBContext.newInstance(IFormat.CONTEXT_PATH_V_105);
Unmarshaller unmarshaller = jaxbContext.createUnmarshaller();
MzData mzData = (MzData)unmarshaller.unmarshal(nodeList.item(0));
if(mzData.getSpectrumList().getCount() > 1) {
isValidFormat = true;
NodeList root = document.getElementsByTagName(IConstants.NODE_MZ_DATA);
if(root.getLength() != 1) {
return isValidFormat;
}
NodeList spectrumList = document.getElementsByTagName(IConstants.NODE_SPECTRUM_LIST);
if(spectrumList.getLength() > 0) {
Element element = (Element)spectrumList.item(0);
int spectrumCount = Integer.parseInt(element.getAttribute("count"));
if(spectrumCount > 1) {
isValidFormat = true;
}
}
} catch(Exception e) {
// Print no exception.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,18 +14,15 @@

import java.io.File;

import javax.xml.bind.JAXBContext;
import javax.xml.bind.Unmarshaller;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;

import org.eclipse.chemclipse.converter.core.AbstractMagicNumberMatcher;
import org.eclipse.chemclipse.converter.core.IMagicNumberMatcher;
import org.eclipse.chemclipse.msd.converter.supplier.mzdata.internal.io.IFormat;
import org.eclipse.chemclipse.msd.converter.supplier.mzdata.internal.support.IConstants;
import org.eclipse.chemclipse.msd.converter.supplier.mzdata.internal.support.SpecificationValidator;
import org.eclipse.chemclipse.msd.converter.supplier.mzdata.internal.v105.model.MzData;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.NodeList;

public class MassSpectrumMagicNumberMatcher extends AbstractMagicNumberMatcher implements IMagicNumberMatcher {
Expand All @@ -45,13 +42,17 @@ public boolean checkFileFormat(File file) {
DocumentBuilderFactory documentBuilderFactory = DocumentBuilderFactory.newInstance();
DocumentBuilder documentBuilder = documentBuilderFactory.newDocumentBuilder();
Document document = documentBuilder.parse(file);
NodeList nodeList = document.getElementsByTagName(IConstants.NODE_MZ_DATA);
//
JAXBContext jaxbContext = JAXBContext.newInstance(IFormat.CONTEXT_PATH_V_105);
Unmarshaller unmarshaller = jaxbContext.createUnmarshaller();
MzData mzData = (MzData)unmarshaller.unmarshal(nodeList.item(0));
if(mzData.getSpectrumList().getCount() == 1) {
isValidFormat = true;
NodeList root = document.getElementsByTagName(IConstants.NODE_MZ_DATA);
if(root.getLength() != 1) {
return isValidFormat;
}
NodeList spectrumList = document.getElementsByTagName(IConstants.NODE_SPECTRUM_LIST);
if(spectrumList.getLength() > 0) {
Element element = (Element)spectrumList.item(0);
int spectrumCount = Integer.parseInt(element.getAttribute("count"));
if(spectrumCount == 1) {
isValidFormat = true;
}
}
} catch(Exception e) {
// Print no exception.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -95,8 +95,6 @@ public void writeChromatogram(File file, IChromatogramMSD chromatogram, IProgres
marshaller.marshal(mzData, file);
} catch(JAXBException e) {
logger.warn(e);
} catch(Exception e) {
e.printStackTrace();
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,4 +21,5 @@ public interface IConstants {
String EXPORT_SCANS = "Export Scans";
String SCAN = "Scan";
String NODE_MZ_DATA = "mzData";
String NODE_SPECTRUM_LIST = "spectrumList";
}
Original file line number Diff line number Diff line change
Expand Up @@ -19,4 +19,7 @@ Bundle-RequiredExecutionEnvironment: JavaSE-1.8
Bundle-ActivationPolicy: lazy
Import-Package: org.apache.commons.lang3.tuple;version="3.1.0",
org.eclipse.chemclipse.xxd.converter.supplier.io.exception
Export-Package: org.eclipse.chemclipse.msd.converter.supplier.mzml.preferences
Export-Package: org.eclipse.chemclipse.msd.converter.supplier.mzml.converter.io,
org.eclipse.chemclipse.msd.converter.supplier.mzml.internal.converter,
org.eclipse.chemclipse.msd.converter.supplier.mzml.internal.v110.model,
org.eclipse.chemclipse.msd.converter.supplier.mzml.preferences
Original file line number Diff line number Diff line change
Expand Up @@ -14,18 +14,15 @@

import java.io.File;

import javax.xml.bind.JAXBContext;
import javax.xml.bind.Unmarshaller;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;

import org.eclipse.chemclipse.converter.core.AbstractMagicNumberMatcher;
import org.eclipse.chemclipse.converter.core.IMagicNumberMatcher;
import org.eclipse.chemclipse.msd.converter.supplier.mzml.converter.io.IFormat;
import org.eclipse.chemclipse.msd.converter.supplier.mzml.internal.converter.IConstants;
import org.eclipse.chemclipse.msd.converter.supplier.mzml.internal.converter.SpecificationValidator;
import org.eclipse.chemclipse.msd.converter.supplier.mzml.internal.v110.model.MzML;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.NodeList;

public class ChromatogramMagicNumberMatcher extends AbstractMagicNumberMatcher implements IMagicNumberMatcher {
Expand All @@ -45,13 +42,17 @@ public boolean checkFileFormat(File file) {
DocumentBuilderFactory documentBuilderFactory = DocumentBuilderFactory.newInstance();
DocumentBuilder documentBuilder = documentBuilderFactory.newDocumentBuilder();
Document document = documentBuilder.parse(file);
NodeList nodeList = document.getElementsByTagName(IConstants.NODE_MZML);
//
JAXBContext jaxbContext = JAXBContext.newInstance(IFormat.CONTEXT_PATH_V_110);
Unmarshaller unmarshaller = jaxbContext.createUnmarshaller();
MzML mzML = (MzML)unmarshaller.unmarshal(nodeList.item(0));
if(mzML.getRun().getChromatogramList() != null) {
isValidFormat = true;
NodeList root = document.getElementsByTagName(IConstants.NODE_MZML);
if(root.getLength() != 1) {
return isValidFormat;
}
NodeList chromatogramList = document.getElementsByTagName(IConstants.NODE_CHROMATOGRAM_LIST);
if(chromatogramList.getLength() > 0) {
Element element = (Element)chromatogramList.item(0);
int chromatogramCount = Integer.parseInt(element.getAttribute("count"));
if(chromatogramCount > 0) {
isValidFormat = true;
}
}
} catch(Exception e) {
// fail silently
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,19 +14,15 @@

import java.io.File;

import javax.xml.bind.JAXBContext;
import javax.xml.bind.Unmarshaller;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;

import org.eclipse.chemclipse.converter.core.AbstractMagicNumberMatcher;
import org.eclipse.chemclipse.converter.core.IMagicNumberMatcher;
import org.eclipse.chemclipse.msd.converter.supplier.mzml.converter.io.IFormat;
import org.eclipse.chemclipse.msd.converter.supplier.mzml.internal.converter.IConstants;
import org.eclipse.chemclipse.msd.converter.supplier.mzml.internal.converter.SpecificationValidator;
import org.eclipse.chemclipse.msd.converter.supplier.mzml.internal.v110.model.MzML;
import org.eclipse.chemclipse.msd.converter.supplier.mzml.internal.v110.model.SpectrumListType;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.NodeList;

public class MassSpectrumMagicNumberMatcher extends AbstractMagicNumberMatcher implements IMagicNumberMatcher {
Expand All @@ -46,14 +42,17 @@ public boolean checkFileFormat(File file) {
DocumentBuilderFactory documentBuilderFactory = DocumentBuilderFactory.newInstance();
DocumentBuilder documentBuilder = documentBuilderFactory.newDocumentBuilder();
Document document = documentBuilder.parse(file);
NodeList nodeList = document.getElementsByTagName(IConstants.NODE_MZML);
//
JAXBContext jaxbContext = JAXBContext.newInstance(IFormat.CONTEXT_PATH_V_110);
Unmarshaller unmarshaller = jaxbContext.createUnmarshaller();
MzML mzML = (MzML)unmarshaller.unmarshal(nodeList.item(0));
SpectrumListType spectrumList = mzML.getRun().getSpectrumList();
if(spectrumList != null && spectrumList.getCount().intValue() == 1) {
isValidFormat = true;
NodeList root = document.getElementsByTagName(IConstants.NODE_MZML);
if(root.getLength() != 1) {
return isValidFormat;
}
NodeList spectrumList = document.getElementsByTagName(IConstants.NODE_SPECTRUM_LIST);
if(spectrumList.getLength() > 0) {
Element element = (Element)spectrumList.item(0);
int spectrumCount = Integer.parseInt(element.getAttribute("count"));
if(spectrumCount == 1) {
isValidFormat = true;
}
}
} catch(Exception e) {
// fail silently
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,4 +22,6 @@ public interface IConstants {
//
String NODE_RUN = "run";
String NODE_MZML = "mzML";
String NODE_CHROMATOGRAM_LIST = "chromatogramList";
String NODE_SPECTRUM_LIST = "spectrumList";
}
Original file line number Diff line number Diff line change
Expand Up @@ -77,12 +77,16 @@ public IChromatogramOverview readOverview(File file, IProgressMonitor monitor) t
//
RunType run = XmlReader.getMzML(file, contextPath).getRun();
for(ChromatogramType chromatogramType : run.getChromatogramList().getChromatogram()) {
for(BinaryDataArrayType binaryDataArrayType : chromatogramType.getBinaryDataArrayList().getBinaryDataArray()) {
Pair<String, double[]> binaryData = BinaryReader.parseBinaryData(binaryDataArrayType);
if(binaryData.getKey().equals("time")) {
retentionTimes = binaryData.getValue();
} else if(binaryData.getKey().equals("intensity")) {
intensities = binaryData.getValue();
if(chromatogramType.getId().equals("TIC")) {
if(chromatogramType.getCvParam().stream().anyMatch(n -> n.getAccession().equals("MS:1000235") && n.getName().equals("total ion current chromatogram"))) {
for(BinaryDataArrayType binaryDataArrayType : chromatogramType.getBinaryDataArrayList().getBinaryDataArray()) {
Pair<String, double[]> binaryData = BinaryReader.parseBinaryData(binaryDataArrayType);
if(binaryData.getKey().equals("time")) {
retentionTimes = binaryData.getValue();
} else if(binaryData.getKey().equals("intensity")) {
intensities = binaryData.getValue();
}
}
}
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
import javax.xml.bind.JAXBContext;
import javax.xml.bind.JAXBException;
import javax.xml.bind.Marshaller;
import javax.xml.datatype.DatatypeConfigurationException;
import javax.xml.datatype.DatatypeFactory;

import org.eclipse.chemclipse.converter.exceptions.FileIsNotWriteableException;
Expand Down Expand Up @@ -169,8 +170,8 @@ public void writeChromatogram(File file, IChromatogramMSD chromatogram, IProgres
marshaller.marshal(mzML, file);
} catch(JAXBException e) {
logger.warn(e);
} catch(Exception e) {
e.printStackTrace();
} catch(DatatypeConfigurationException e) {
logger.warn(e);
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -14,17 +14,13 @@

import java.io.File;

import javax.xml.bind.JAXBContext;
import javax.xml.bind.Unmarshaller;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;

import org.eclipse.chemclipse.converter.core.AbstractMagicNumberMatcher;
import org.eclipse.chemclipse.converter.core.IMagicNumberMatcher;
import org.eclipse.chemclipse.msd.converter.supplier.mzxml.internal.io.IConstants;
import org.eclipse.chemclipse.msd.converter.supplier.mzxml.internal.io.IFormat;
import org.eclipse.chemclipse.msd.converter.supplier.mzxml.internal.io.SpecificationValidator;
import org.eclipse.chemclipse.msd.converter.supplier.mzxml.internal.v32.model.MsRun;
import org.w3c.dom.Document;
import org.w3c.dom.NodeList;

Expand All @@ -45,16 +41,15 @@ public boolean checkFileFormat(File file) {
DocumentBuilderFactory documentBuilderFactory = DocumentBuilderFactory.newInstance();
DocumentBuilder documentBuilder = documentBuilderFactory.newDocumentBuilder();
Document document = documentBuilder.parse(file);
NodeList nodeList = document.getElementsByTagName(IConstants.NODE_MS_RUN);
//
JAXBContext jaxbContext = JAXBContext.newInstance(IFormat.CONTEXT_PATH_V_320);
Unmarshaller unmarshaller = jaxbContext.createUnmarshaller();
MsRun msRun = (MsRun)unmarshaller.unmarshal(nodeList.item(0));
if(msRun.getScan().size() > 1) {
isValidFormat = true;
NodeList root = document.getElementsByTagName(IConstants.NODE_MZXML);
if(root.getLength() != 1) {
return isValidFormat;
}
NodeList scanList = document.getElementsByTagName(IConstants.NODE_SCAN);
if(scanList.getLength() > 1)
isValidFormat = true;
} catch(Exception e) {
e.printStackTrace();
// fail silently
}
return isValidFormat;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,17 +14,13 @@

import java.io.File;

import javax.xml.bind.JAXBContext;
import javax.xml.bind.Unmarshaller;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;

import org.eclipse.chemclipse.converter.core.AbstractMagicNumberMatcher;
import org.eclipse.chemclipse.converter.core.IMagicNumberMatcher;
import org.eclipse.chemclipse.msd.converter.supplier.mzxml.internal.io.IConstants;
import org.eclipse.chemclipse.msd.converter.supplier.mzxml.internal.io.IFormat;
import org.eclipse.chemclipse.msd.converter.supplier.mzxml.internal.io.SpecificationValidator;
import org.eclipse.chemclipse.msd.converter.supplier.mzxml.internal.v32.model.MsRun;
import org.w3c.dom.Document;
import org.w3c.dom.NodeList;

Expand All @@ -45,16 +41,15 @@ public boolean checkFileFormat(File file) {
DocumentBuilderFactory documentBuilderFactory = DocumentBuilderFactory.newInstance();
DocumentBuilder documentBuilder = documentBuilderFactory.newDocumentBuilder();
Document document = documentBuilder.parse(file);
NodeList nodeList = document.getElementsByTagName(IConstants.NODE_MS_RUN);
//
JAXBContext jaxbContext = JAXBContext.newInstance(IFormat.CONTEXT_PATH_V_320);
Unmarshaller unmarshaller = jaxbContext.createUnmarshaller();
MsRun msRun = (MsRun)unmarshaller.unmarshal(nodeList.item(0));
if(msRun.getScan().size() == 1) {
isValidFormat = true;
NodeList root = document.getElementsByTagName(IConstants.NODE_MZXML);
if(root.getLength() != 1) {
return isValidFormat;
}
NodeList scanList = document.getElementsByTagName(IConstants.NODE_SCAN);
if(scanList.getLength() == 1)
isValidFormat = true;
} catch(Exception e) {
e.printStackTrace();
// fail silently
}
return isValidFormat;
}
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*******************************************************************************
* Copyright (c) 2008, 2018 Lablicate GmbH.
* Copyright (c) 2008, 2021 Lablicate GmbH.
*
* All rights reserved.
* This program and the accompanying materials are made available under the
Expand All @@ -20,5 +20,7 @@ public interface IConstants {
String EXPORT_SCANS = "Export Scans";
String SCAN = "Scan";
//
String NODE_MZXML = "mzXML";
String NODE_MS_RUN = "msRun";
String NODE_SCAN = "scan";
}

0 comments on commit 96beeca

Please sign in to comment.