From 2b1754886dfdb917ca6caaab82fca518eda7a13a Mon Sep 17 00:00:00 2001 From: yokochi47 Date: Thu, 5 Dec 2024 20:57:28 +0900 Subject: [PATCH] Add genetic XML lexer, parser, and TopSpin peak lsit parserlistener and reader --- wwpdb/utils/nmr/pk/TopSpinPKParserListener.py | 413 ++++++++++ wwpdb/utils/nmr/pk/TopSpinPKReader.py | 216 ++++++ wwpdb/utils/nmr/pk/XMLLexer.py | 153 ++++ wwpdb/utils/nmr/pk/XMLParser.py | 716 ++++++++++++++++++ wwpdb/utils/nmr/pk/XwinNmrPKReader.py | 2 +- .../antlr-grammars-v4.10/XMLLexer.g4 | 93 +++ .../antlr-grammars-v4.10/XMLParser.g4 | 78 ++ 7 files changed, 1670 insertions(+), 1 deletion(-) create mode 100644 wwpdb/utils/nmr/pk/TopSpinPKParserListener.py create mode 100644 wwpdb/utils/nmr/pk/TopSpinPKReader.py create mode 100644 wwpdb/utils/nmr/pk/XMLLexer.py create mode 100644 wwpdb/utils/nmr/pk/XMLParser.py create mode 100644 wwpdb/utils/tests-nmr/antlr-grammars-v4.10/XMLLexer.g4 create mode 100644 wwpdb/utils/tests-nmr/antlr-grammars-v4.10/XMLParser.g4 diff --git a/wwpdb/utils/nmr/pk/TopSpinPKParserListener.py b/wwpdb/utils/nmr/pk/TopSpinPKParserListener.py new file mode 100644 index 00000000..81daa857 --- /dev/null +++ b/wwpdb/utils/nmr/pk/TopSpinPKParserListener.py @@ -0,0 +1,413 @@ +## +# File: TopSpinPKParserListener.py +# Date: 05-Dec-2024 +# +# Updates: +""" ParserLister class for TOPSPIN PK files. + @author: Masashi Yokochi +""" +import sys +import numpy as np + +from antlr4 import ParseTreeListener + +try: + from wwpdb.utils.nmr.pk.XMLParser import XMLParser + from wwpdb.utils.nmr.pk.BasePKParserListener import BasePKParserListener + from wwpdb.utils.nmr.mr.ParserListenerUtil import (REPRESENTATIVE_MODEL_ID, + REPRESENTATIVE_ALT_ID, + getPkRow) + +except ImportError: + from nmr.pk.XMLParser import XMLParser + from nmr.pk.BasePKParserListener import BasePKParserListener + from nmr.mr.ParserListenerUtil import (REPRESENTATIVE_MODEL_ID, + REPRESENTATIVE_ALT_ID, + getPkRow) + + +# This class defines a complete listener for a parse tree produced by XMLParser. +class TopSpinPKParserListener(ParseTreeListener, BasePKParserListener): + + __cur_path = None + + __f1_ppm = None + __f2_ppm = None + __f3_ppm = None + __f4_ppm = None + __intensity = None + __volume = None + __annotation = None + + def __init__(self, verbose=True, log=sys.stdout, + representativeModelId=REPRESENTATIVE_MODEL_ID, + representativeAltId=REPRESENTATIVE_ALT_ID, + mrAtomNameMapping=None, + cR=None, caC=None, ccU=None, csStat=None, nefT=None, + reasons=None): + super().__init__(verbose, log, representativeModelId, representativeAltId, + mrAtomNameMapping, cR, caC, ccU, csStat, nefT, reasons) + + self.file_type = 'nm-pea-top' + self.software_name = 'TOPSPIN' + + # Enter a parse tree produced by XMLParser#document. + def enterDocument(self, ctx: XMLParser.DocumentContext): # pylint: disable=unused-argument + self.__cur_path = '' + + self.enter() + + # Exit a parse tree produced by XMLParser#document. + def exitDocument(self, ctx: XMLParser.DocumentContext): # pylint: disable=unused-argument + + if len(self.spectral_dim) > 0: + for d, v in self.spectral_dim.items(): + for _id, _v in v.items(): + self.acq_dim_id = 1 + for __d, __v in _v.items(): + if 'freq_hint' in __v: + if len(__v['freq_hint']) > 0: + center = np.mean(np.array(__v['freq_hint'])) + + if __v['atom_isotope_number'] is None: + if 125 < center < 130: + __v['atom_type'] = 'C' + __v['atom_isotope_number'] = 13 + __v['axis_code'] = 'C_aro' + elif 115 < center < 125: + __v['atom_type'] = 'N' + __v['atom_isotope_number'] = 15 + __v['axis_code'] = 'N_ami' + elif 170 < center < 180: + __v['atom_type'] = 'C' + __v['atom_isotope_number'] = 13 + __v['axis_code'] = 'CO' + elif 6 < center < 9: + __v['atom_type'] = 'H' + __v['atom_isotope_number'] = 1 + __v['axis_code'] = 'H_ami_or_aro' + elif 4 < center < 6: + __v['atom_type'] = 'H' + __v['atom_isotope_number'] = 1 + __v['axis_code'] = 'H' + elif 60 < center < 90: + __v['atom_type'] = 'C' + __v['atom_isotope_number'] = 13 + __v['axis_code'] = 'C' + elif 30 < center < 50: + __v['atom_type'] = 'C' + __v['atom_isotope_number'] = 13 + __v['axis_code'] = 'C_ali' + + isotope_number = __v['atom_isotope_number'] + + if isotope_number is not None: + __v['acquisition'] = 'yes' if __d == self.acq_dim_id\ + and (isotope_number == 1 or (isotope_number == 13 and self.exptlMethod == 'SOLID-STATE NMR')) else 'no' + + if __d == 1 and __v['acquisition'] == 'no': + self.acq_dim_id = self.num_of_dim + + __v['under_sampling_type'] = 'not observed' if __v['acquisition'] == 'yes' else 'aliased' + + if __v['spectral_region'] is None and len(__v['freq_hint']) > 0: + atom_type = __v['atom_type'] + if 125 < center < 130 and atom_type == 'C': + __v['spectral_region'] = 'C_aro' + elif 115 < center < 125 and atom_type == 'N': + __v['spectral_region'] = 'N_ami' + elif 170 < center < 180 and atom_type == 'C': + __v['spectral_region'] = 'CO' + elif 6 < center < 9 and atom_type == 'H': + __v['spectral_region'] = 'H_ami_or_aro' + elif 4 < center < 6 and atom_type == 'H': + __v['spectral_region'] = 'H_all' + elif 60 < center < 90 and atom_type == 'C': + __v['spectral_region'] = 'C_all' + elif 30 < center < 50 and atom_type == 'C': + __v['spectral_region'] = 'C_ali' + + if len(__v['freq_hint']) > 0 and d > 2 and __d >= 2\ + and self.exptlMethod != 'SOLID-STATE NMR' and __v['atom_isotope_number'] == 13: + max_ppm = max(__v['freq_hint']) + min_ppm = min(__v['freq_hint']) + width = max_ppm - min_ppm + if center < 100.0 and width < 50.0: + __v['under_sampling_type'] = 'fold' + + del __v['freq_hint'] + + for __v in _v.values(): + if __v['axis_code'] == 'H_ami_or_aro': + has_a = any(___v['spectral_region'] == 'C_aro' for ___v in _v.values()) + __v['axis_code'] = 'H_aro' if has_a else 'H_ami' + if __v['spectral_region'] == 'H_ami_or_aro': + has_a = any(___v['spectral_region'] == 'C_aro' for ___v in _v.values()) + __v['spectral_region'] = 'H_aro' if has_a else 'H_ami' + + if self.debug: + print(f'num_of_dim: {d}, list_id: {_id}') + for __d, __v in _v.items(): + print(f'{__d} {__v}') + + self.exit() + + self.__cur_path = None + + # Enter a parse tree produced by XMLParser#prolog. + def enterProlog(self, ctx: XMLParser.PrologContext): # pylint: disable=unused-argument + pass + + # Exit a parse tree produced by XMLParser#prolog. + def exitProlog(self, ctx: XMLParser.PrologContext): # pylint: disable=unused-argument + pass + + # Enter a parse tree produced by XMLParser#content. + def enterContent(self, ctx: XMLParser.ContentContext): # pylint: disable=unused-argument + pass + + # Exit a parse tree produced by XMLParser#content. + def exitContent(self, ctx: XMLParser.ContentContext): # pylint: disable=unused-argument + pass + + # Enter a parse tree produced by XMLParser#element. + def enterElement(self, ctx: XMLParser.ElementContext): + self.__cur_path += '/' + str(ctx.Name(0)) + + if self.__cur_path == '/PeakList/PeakList2D': + self.num_of_dim = 2 + self.fillCurrentSpectralDim() + + elif self.__cur_path == '/PeakList/PeakList3D': + self.num_of_dim = 3 + self.fillCurrentSpectralDim() + + elif self.__cur_path == '/PeakList/PeakList4D': + self.num_of_dim = 4 + self.fillCurrentSpectralDim() + + elif self.__cur_path == '/PeakList/PeakList2D/Peak2D': + self.peaks2D += 1 + + self.__f1_ppm = None + self.__f2_ppm = None + self.__intensity = None + self.__volume = None + self.__annotation = None + + elif self.__cur_path == '/PeakList/PeakList3D/Peak3D': + self.peaks3D += 1 + + self.__f1_ppm = None + self.__f2_ppm = None + self.__f3_ppm = None + self.__intensity = None + self.__volume = None + self.__annotation = None + + elif self.__cur_path == '/PeakList/PeakList4D/Peak4D': + self.peaks4D += 1 + + self.__f1_ppm = None + self.__f2_ppm = None + self.__f3_ppm = None + self.__f4_ppm = None + self.__intensity = None + self.__volume = None + self.__annotation = None + + # Exit a parse tree produced by XMLParser#element. + def exitElement(self, ctx: XMLParser.ElementContext): # pylint: disable=unused-argument + + if self.__cur_path == '/PeakList/PeakList2D/Peak2D': + + if None in (self.__f1_ppm, self.__f2_ppm)\ + or (self.__intensity is None and self.__volume is None): + self.peaks2D -= 1 + return + + index = self.peaks2D + + dstFunc = self.validatePeak2D(index, self.__f1_ppm, self.__f2_ppm, None, None, None, None, + None, None, None, None, self.__intensity, None, self.__volume, None) + + if dstFunc is None: + self.peaks2D -= 1 + return + + cur_spectral_dim = self.spectral_dim[self.num_of_dim][self.cur_list_id] + + cur_spectral_dim[1]['freq_hint'].append(self.__f1_ppm) + cur_spectral_dim[2]['freq_hint'].append(self.__f2_ppm) + + if self.createSfDict__: + sf = self.getSf() + + if self.debug: + print(f"subtype={self.cur_subtype} id={self.peaks2D} (index={index}) {dstFunc}") + + if self.createSfDict__ and sf is not None: + sf['index_id'] += 1 + + row = getPkRow(self.cur_subtype, sf['id'], sf['index_id'], + sf['list_id'], self.entryId, dstFunc, + self.authToStarSeq, self.authToOrigSeq, self.offsetHolder, + details=self.__annotation) + sf['loop'].add_data(row) + + elif self.__cur_path == '/PeakList/PeakList3D/Peak3D': + + if None in (self.__f1_ppm, self.__f2_ppm, self.__f3_ppm)\ + or (self.__intensity is None and self.__volume is None): + self.peaks3D -= 1 + return + + index = self.peaks3D + + dstFunc = self.validatePeak3D(index, self.__f1_ppm, self.__f2_ppm, self.__f3_ppm, None, None, None, None, None, None, + None, None, None, None, None, None, self.__intensity, None, self.__volume, None) + + if dstFunc is None: + self.peaks3D -= 1 + return + + cur_spectral_dim = self.spectral_dim[self.num_of_dim][self.cur_list_id] + + cur_spectral_dim[1]['freq_hint'].append(self.__f1_ppm) + cur_spectral_dim[2]['freq_hint'].append(self.__f2_ppm) + cur_spectral_dim[3]['freq_hint'].append(self.__f3_ppm) + + if self.createSfDict__: + sf = self.getSf() + + if self.debug: + print(f"subtype={self.cur_subtype} id={self.peaks3D} (index={index}) {dstFunc}") + + if self.createSfDict__ and sf is not None: + sf['index_id'] += 1 + + row = getPkRow(self.cur_subtype, sf['id'], sf['index_id'], + sf['list_id'], self.entryId, dstFunc, + self.authToStarSeq, self.authToOrigSeq, self.offsetHolder, + details=self.__annotation) + sf['loop'].add_data(row) + + elif self.__cur_path == '/PeakList/PeakList4D/Peak4D': + + if None in (self.__f1_ppm, self.__f2_ppm, self.__f3_ppm, self.__f4_ppm)\ + or (self.__intensity is None and self.__volume is None): + self.peaks4D -= 1 + return + + index = self.peaks4D + + dstFunc = self.validatePeak4D(index, self.__f1_ppm, self.__f2_ppm, self.__f3_ppm, self.__f4_ppm, None, None, None, None, None, None, None, None, + None, None, None, None, None, None, None, None, self.__intensity, None, self.__volume, None) + + if dstFunc is None: + self.peaks4D -= 1 + return + + cur_spectral_dim = self.spectral_dim[self.num_of_dim][self.cur_list_id] + + cur_spectral_dim[1]['freq_hint'].append(self.__f1_ppm) + cur_spectral_dim[2]['freq_hint'].append(self.__f2_ppm) + cur_spectral_dim[3]['freq_hint'].append(self.__f3_ppm) + cur_spectral_dim[4]['freq_hint'].append(self.__f4_ppm) + + if self.createSfDict__: + sf = self.getSf() + + if self.debug: + print(f"subtype={self.cur_subtype} id={self.peaks4D} (index={index}) {dstFunc}") + + if self.createSfDict__ and sf is not None: + sf['index_id'] += 1 + + row = getPkRow(self.cur_subtype, sf['id'], sf['index_id'], + sf['list_id'], self.entryId, dstFunc, + self.authToStarSeq, self.authToOrigSeq, self.offsetHolder, + details=self.__annotation) + sf['loop'].add_data(row) + + self.__cur_path = self.__cur_path[:-(1 + len(str(ctx.Name(0))))] + + # Enter a parse tree produced by XMLParser#reference. + def enterReference(self, ctx: XMLParser.ReferenceContext): # pylint: disable=unused-argument + pass + + # Exit a parse tree produced by XMLParser#reference. + def exitReference(self, ctx: XMLParser.ReferenceContext): # pylint: disable=unused-argument + pass + + # Enter a parse tree produced by XMLParser#attribute. + def enterAttribute(self, ctx: XMLParser.AttributeContext): + + if ctx.Name() and ctx.STRING(): + name = str(ctx.Name()) + string = str(ctx.STRING())[1:-1] + + if self.__cur_path == '/PeakList/PeakList2D/Peak2D': + + if name == 'F1': + self.__f1_ppm = float(string) + elif name == 'F2': + self.__f2_ppm = float(string) + elif name == 'intensity': + self.__intensity = string + elif name == 'volume': + self.__volume = string + elif name == 'annotation': + self.__annotation = string + + elif self.__cur_path == '/PeakList/PeakList3D/Peak3D': + + if name == 'F1': + self.__f1_ppm = float(string) + elif name == 'F2': + self.__f2_ppm = float(string) + elif name == 'F3': + self.__f3_ppm = float(string) + elif name == 'intensity': + self.__intensity = string + elif name == 'volume': + self.__volume = string + elif name == 'annotation': + self.__annotation = string + + elif self.__cur_path == '/PeakList/PeakList4D/Peak4D': + + if name == 'F1': + self.__f1_ppm = float(string) + elif name == 'F2': + self.__f2_ppm = float(string) + elif name == 'F3': + self.__f3_ppm = float(string) + elif name == 'F4': + self.__f4_ppm = float(string) + elif name == 'intensity': + self.__intensity = string + elif name == 'volume': + self.__volume = string + elif name == 'annotation': + self.__annotation = string + + # Exit a parse tree produced by XMLParser#attribute. + def exitAttribute(self, ctx: XMLParser.AttributeContext): + pass + + # Enter a parse tree produced by XMLParser#chardata. + def enterChardata(self, ctx: XMLParser.ChardataContext): + pass + + # Exit a parse tree produced by XMLParser#chardata. + def exitChardata(self, ctx: XMLParser.ChardataContext): + pass + + # Enter a parse tree produced by XMLParser#misc. + def enterMisc(self, ctx: XMLParser.MiscContext): # pylint: disable=unused-argument + pass + + # Exit a parse tree produced by XMLParser#misc. + def exitMisc(self, ctx: XMLParser.MiscContext): # pylint: disable=unused-argument + pass diff --git a/wwpdb/utils/nmr/pk/TopSpinPKReader.py b/wwpdb/utils/nmr/pk/TopSpinPKReader.py new file mode 100644 index 00000000..defe3535 --- /dev/null +++ b/wwpdb/utils/nmr/pk/TopSpinPKReader.py @@ -0,0 +1,216 @@ +## +# TopSpinPKReader.py +# +# Update: +## +""" A collection of classes for parsing TOPSPIN PK files. +""" +import sys +import os + +from antlr4 import InputStream, CommonTokenStream, ParseTreeWalker + +try: + from wwpdb.utils.nmr.mr.LexerErrorListener import LexerErrorListener + from wwpdb.utils.nmr.mr.ParserErrorListener import ParserErrorListener + from wwpdb.utils.nmr.pk.XMLLexer import XMLLexer + from wwpdb.utils.nmr.pk.XMLParser import XMLParser + from wwpdb.utils.nmr.pk.TopSpinPKParserListener import TopSpinPKParserListener + from wwpdb.utils.nmr.mr.ParserListenerUtil import (coordAssemblyChecker, + MAX_ERROR_REPORT, + REPRESENTATIVE_MODEL_ID, + REPRESENTATIVE_ALT_ID) + from wwpdb.utils.nmr.io.CifReader import CifReader + from wwpdb.utils.nmr.ChemCompUtil import ChemCompUtil + from wwpdb.utils.nmr.BMRBChemShiftStat import BMRBChemShiftStat + from wwpdb.utils.nmr.NEFTranslator.NEFTranslator import NEFTranslator +except ImportError: + from nmr.mr.LexerErrorListener import LexerErrorListener + from nmr.mr.ParserErrorListener import ParserErrorListener + from nmr.pk.XMLLexer import XMLLexer + from nmr.pk.XMLParser import XMLParser + from nmr.pk.TopSpinPKParserListener import TopSpinPKParserListener + from nmr.mr.ParserListenerUtil import (coordAssemblyChecker, + MAX_ERROR_REPORT, + REPRESENTATIVE_MODEL_ID, + REPRESENTATIVE_ALT_ID) + from nmr.io.CifReader import CifReader + from nmr.ChemCompUtil import ChemCompUtil + from nmr.BMRBChemShiftStat import BMRBChemShiftStat + from nmr.NEFTranslator.NEFTranslator import NEFTranslator + + +class TopSpinPKReader: + """ Accessor methods for parsing TOPSPIN PK files. + """ + + def __init__(self, verbose=True, log=sys.stdout, + representativeModelId=REPRESENTATIVE_MODEL_ID, + representativeAltId=REPRESENTATIVE_ALT_ID, + mrAtomNameMapping=None, + cR=None, caC=None, ccU=None, csStat=None, nefT=None, + reasons=None): + self.__verbose = verbose + self.__lfh = log + self.__debug = False + + self.__maxLexerErrorReport = MAX_ERROR_REPORT + self.__maxParserErrorReport = MAX_ERROR_REPORT + + self.__representativeModelId = representativeModelId + self.__representativeAltId = representativeAltId + self.__mrAtomNameMapping = mrAtomNameMapping + + # CCD accessing utility + self.__ccU = ChemCompUtil(verbose, log) if ccU is None else ccU + + if cR is not None and caC is None: + caC = coordAssemblyChecker(verbose, log, representativeModelId, representativeAltId, + cR, self.__ccU, None, None, fullCheck=False) + + self.__cR = cR + self.__caC = caC + + # BMRB chemical shift statistics + self.__csStat = BMRBChemShiftStat(verbose, log, self.__ccU) if csStat is None else csStat + + # NEFTranslator + self.__nefT = NEFTranslator(verbose, log, self.__ccU, self.__csStat) if nefT is None else nefT + if nefT is None: + self.__nefT.set_remediation_mode(True) + + # reasons for re-parsing request from the previous trial + self.__reasons = reasons + + def setDebugMode(self, debug): + self.__debug = debug + + def setLexerMaxErrorReport(self, maxErrReport): + self.__maxLexerErrorReport = maxErrReport + + def setParserMaxErrorReport(self, maxErrReport): + self.__maxParserErrorReport = maxErrReport + + def parse(self, pkFilePath, cifFilePath=None, isFilePath=True, + createSfDict=False, originalFileName=None, listIdCounter=None, entryId=None): + """ Parse TOPSPIN PK file. + @return: TopSpinPKParserListener for success or None otherwise, ParserErrorListener, LexerErrorListener. + """ + + ifh = None + + try: + + if isFilePath: + pkString = None + + if not os.access(pkFilePath, os.R_OK): + if self.__verbose: + self.__lfh.write(f"TopSpinPKReader.parse() {pkFilePath} is not accessible.\n") + return None, None, None + + ifh = open(pkFilePath, 'r') # pylint: disable=consider-using-with + input = InputStream(ifh.read()) + + else: + pkFilePath, pkString = None, pkFilePath + + if pkString is None or len(pkString) == 0: + if self.__verbose: + self.__lfh.write("TopSpinPKReader.parse() Empty string.\n") + return None, None, None + + input = InputStream(pkString) + + if cifFilePath is not None: + if not os.access(cifFilePath, os.R_OK): + if self.__verbose: + self.__lfh.write(f"TopSpinPKReader.parse() {cifFilePath} is not accessible.\n") + return None, None, None + + if self.__cR is None: + self.__cR = CifReader(self.__verbose, self.__lfh) + if not self.__cR.parse(cifFilePath): + return None, None, None + + lexer = XMLLexer(input) + lexer.removeErrorListeners() + + lexer_error_listener = LexerErrorListener(pkFilePath, maxErrorReport=self.__maxLexerErrorReport) + lexer.addErrorListener(lexer_error_listener) + + messageList = lexer_error_listener.getMessageList() + + if messageList is not None and self.__verbose: + for description in messageList: + self.__lfh.write(f"[Syntax error] line {description['line_number']}:{description['column_position']} {description['message']}\n") + if 'input' in description: + self.__lfh.write(f"{description['input']}\n") + self.__lfh.write(f"{description['marker']}\n") + + stream = CommonTokenStream(lexer) + parser = XMLParser(stream) + # try with simpler/faster SLL prediction mode + # parser._interp.predictionMode = PredictionMode.SLL # pylint: disable=protected-access + parser.removeErrorListeners() + parser_error_listener = ParserErrorListener(pkFilePath, maxErrorReport=self.__maxParserErrorReport) + parser.addErrorListener(parser_error_listener) + tree = parser.document() + + walker = ParseTreeWalker() + listener = TopSpinPKParserListener(self.__verbose, self.__lfh, + self.__representativeModelId, + self.__representativeAltId, + self.__mrAtomNameMapping, + self.__cR, self.__caC, + self.__ccU, self.__csStat, self.__nefT, + self.__reasons) + listener.setDebugMode(self.__debug) + listener.createSfDict(createSfDict) + if createSfDict: + if originalFileName is not None: + listener.setOriginaFileName(originalFileName) + if listIdCounter is not None: + listener.setListIdCounter(listIdCounter) + if entryId is not None: + listener.setEntryId(entryId) + walker.walk(listener, tree) + + messageList = parser_error_listener.getMessageList() + + if messageList is not None and self.__verbose: + for description in messageList: + self.__lfh.write(f"[Syntax error] line {description['line_number']}:{description['column_position']} {description['message']}\n") + if 'input' in description: + self.__lfh.write(f"{description['input']}\n") + self.__lfh.write(f"{description['marker']}\n") + + if self.__verbose: + if listener.warningMessage is not None and len(listener.warningMessage) > 0: + print('\n'.join(listener.warningMessage)) + if isFilePath: + print(listener.getContentSubtype()) + + return listener, parser_error_listener, lexer_error_listener + + except IOError as e: + if self.__verbose: + self.__lfh.write(f"+TopSpinPKReader.parse() ++ Error - {str(e)}\n") + return None, None, None + # pylint: disable=unreachable + """ debug code + except Exception as e: + if self.__verbose and isFilePath: + self.__lfh.write(f"+TopSpinPKReader.parse() ++ Error - {pkFilePath!r} - {str(e)}\n") + return None, None, None + """ + finally: + if isFilePath and ifh is not None: + ifh.close() + + +if __name__ == "__main__": + reader = TopSpinPKReader(True) + reader.setDebugMode(True) + reader.parse('../../tests-nmr/mock-data-bruker-peak-list/tspp.xml', + '../../tests-nmr/mock-data-remediation/2js7/2js7.cif') # dummy diff --git a/wwpdb/utils/nmr/pk/XMLLexer.py b/wwpdb/utils/nmr/pk/XMLLexer.py new file mode 100644 index 00000000..8e02f205 --- /dev/null +++ b/wwpdb/utils/nmr/pk/XMLLexer.py @@ -0,0 +1,153 @@ +# Generated from XMLLexer.g4 by ANTLR 4.13.0 +from antlr4 import * +from io import StringIO +import sys +if sys.version_info[1] > 5: + from typing import TextIO +else: + from typing.io import TextIO + + +def serializedATN(): + return [ + 4,0,18,231,6,-1,6,-1,6,-1,2,0,7,0,2,1,7,1,2,2,7,2,2,3,7,3,2,4,7, + 4,2,5,7,5,2,6,7,6,2,7,7,7,2,8,7,8,2,9,7,9,2,10,7,10,2,11,7,11,2, + 12,7,12,2,13,7,13,2,14,7,14,2,15,7,15,2,16,7,16,2,17,7,17,2,18,7, + 18,2,19,7,19,2,20,7,20,2,21,7,21,2,22,7,22,2,23,7,23,1,0,1,0,1,0, + 1,0,1,0,1,0,5,0,58,8,0,10,0,12,0,61,9,0,1,0,1,0,1,0,1,0,1,1,1,1, + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,5,1,78,8,1,10,1,12,1,81,9,1, + 1,1,1,1,1,1,1,1,1,2,1,2,1,2,1,2,5,2,91,8,2,10,2,12,2,94,9,2,1,2, + 1,2,1,2,1,2,1,3,1,3,1,3,1,3,1,4,1,4,1,4,1,4,4,4,108,8,4,11,4,12, + 4,109,1,4,1,4,1,4,1,4,1,4,1,4,1,4,4,4,119,8,4,11,4,12,4,120,1,4, + 1,4,3,4,125,8,4,1,5,1,5,3,5,129,8,5,1,5,4,5,132,8,5,11,5,12,5,133, + 1,6,1,6,1,6,1,6,1,7,1,7,1,7,1,7,1,7,1,7,1,7,1,7,1,7,1,7,1,8,1,8, + 1,8,1,8,1,8,1,8,1,8,1,8,1,9,4,9,159,8,9,11,9,12,9,160,1,10,1,10, + 1,10,1,10,1,11,1,11,1,11,1,11,1,11,1,12,1,12,1,12,1,12,1,12,1,13, + 1,13,1,14,1,14,1,15,1,15,5,15,183,8,15,10,15,12,15,186,9,15,1,15, + 1,15,1,15,5,15,191,8,15,10,15,12,15,194,9,15,1,15,3,15,197,8,15, + 1,16,1,16,5,16,201,8,16,10,16,12,16,204,9,16,1,17,1,17,1,17,1,17, + 1,18,1,18,1,19,1,19,1,20,1,20,1,20,1,20,3,20,218,8,20,1,21,3,21, + 221,8,21,1,22,1,22,1,22,1,22,1,22,1,23,1,23,1,23,1,23,3,59,79,92, + 0,24,3,1,5,2,7,3,9,4,11,5,13,6,15,7,17,8,19,0,21,9,23,10,25,11,27, + 12,29,13,31,14,33,15,35,16,37,17,39,0,41,0,43,0,45,0,47,18,49,0, + 3,0,1,2,9,2,0,9,9,32,32,2,0,38,38,60,60,2,0,34,34,60,60,2,0,39,39, + 60,60,3,0,9,10,13,13,32,32,3,0,48,57,65,70,97,102,1,0,48,57,3,0, + 183,183,768,879,8255,8256,9,0,58,58,65,90,95,95,97,122,8304,8591, + 11264,12271,12289,55295,63744,64975,65008,65533,241,0,3,1,0,0,0, + 0,5,1,0,0,0,0,7,1,0,0,0,0,9,1,0,0,0,0,11,1,0,0,0,0,13,1,0,0,0,0, + 15,1,0,0,0,0,17,1,0,0,0,0,19,1,0,0,0,0,21,1,0,0,0,1,23,1,0,0,0,1, + 25,1,0,0,0,1,27,1,0,0,0,1,29,1,0,0,0,1,31,1,0,0,0,1,33,1,0,0,0,1, + 35,1,0,0,0,1,37,1,0,0,0,2,47,1,0,0,0,2,49,1,0,0,0,3,51,1,0,0,0,5, + 66,1,0,0,0,7,86,1,0,0,0,9,99,1,0,0,0,11,124,1,0,0,0,13,131,1,0,0, + 0,15,135,1,0,0,0,17,139,1,0,0,0,19,149,1,0,0,0,21,158,1,0,0,0,23, + 162,1,0,0,0,25,166,1,0,0,0,27,171,1,0,0,0,29,176,1,0,0,0,31,178, + 1,0,0,0,33,196,1,0,0,0,35,198,1,0,0,0,37,205,1,0,0,0,39,209,1,0, + 0,0,41,211,1,0,0,0,43,217,1,0,0,0,45,220,1,0,0,0,47,222,1,0,0,0, + 49,227,1,0,0,0,51,52,5,60,0,0,52,53,5,33,0,0,53,54,5,45,0,0,54,55, + 5,45,0,0,55,59,1,0,0,0,56,58,9,0,0,0,57,56,1,0,0,0,58,61,1,0,0,0, + 59,60,1,0,0,0,59,57,1,0,0,0,60,62,1,0,0,0,61,59,1,0,0,0,62,63,5, + 45,0,0,63,64,5,45,0,0,64,65,5,62,0,0,65,4,1,0,0,0,66,67,5,60,0,0, + 67,68,5,33,0,0,68,69,5,91,0,0,69,70,5,67,0,0,70,71,5,68,0,0,71,72, + 5,65,0,0,72,73,5,84,0,0,73,74,5,65,0,0,74,75,5,91,0,0,75,79,1,0, + 0,0,76,78,9,0,0,0,77,76,1,0,0,0,78,81,1,0,0,0,79,80,1,0,0,0,79,77, + 1,0,0,0,80,82,1,0,0,0,81,79,1,0,0,0,82,83,5,93,0,0,83,84,5,93,0, + 0,84,85,5,62,0,0,85,6,1,0,0,0,86,87,5,60,0,0,87,88,5,33,0,0,88,92, + 1,0,0,0,89,91,9,0,0,0,90,89,1,0,0,0,91,94,1,0,0,0,92,93,1,0,0,0, + 92,90,1,0,0,0,93,95,1,0,0,0,94,92,1,0,0,0,95,96,5,62,0,0,96,97,1, + 0,0,0,97,98,6,2,0,0,98,8,1,0,0,0,99,100,5,38,0,0,100,101,3,35,16, + 0,101,102,5,59,0,0,102,10,1,0,0,0,103,104,5,38,0,0,104,105,5,35, + 0,0,105,107,1,0,0,0,106,108,3,41,19,0,107,106,1,0,0,0,108,109,1, + 0,0,0,109,107,1,0,0,0,109,110,1,0,0,0,110,111,1,0,0,0,111,112,5, + 59,0,0,112,125,1,0,0,0,113,114,5,38,0,0,114,115,5,35,0,0,115,116, + 5,120,0,0,116,118,1,0,0,0,117,119,3,39,18,0,118,117,1,0,0,0,119, + 120,1,0,0,0,120,118,1,0,0,0,120,121,1,0,0,0,121,122,1,0,0,0,122, + 123,5,59,0,0,123,125,1,0,0,0,124,103,1,0,0,0,124,113,1,0,0,0,125, + 12,1,0,0,0,126,132,7,0,0,0,127,129,5,13,0,0,128,127,1,0,0,0,128, + 129,1,0,0,0,129,130,1,0,0,0,130,132,5,10,0,0,131,126,1,0,0,0,131, + 128,1,0,0,0,132,133,1,0,0,0,133,131,1,0,0,0,133,134,1,0,0,0,134, + 14,1,0,0,0,135,136,5,60,0,0,136,137,1,0,0,0,137,138,6,6,1,0,138, + 16,1,0,0,0,139,140,5,60,0,0,140,141,5,63,0,0,141,142,5,120,0,0,142, + 143,5,109,0,0,143,144,5,108,0,0,144,145,1,0,0,0,145,146,3,37,17, + 0,146,147,1,0,0,0,147,148,6,7,1,0,148,18,1,0,0,0,149,150,5,60,0, + 0,150,151,5,63,0,0,151,152,1,0,0,0,152,153,3,35,16,0,153,154,1,0, + 0,0,154,155,6,8,2,0,155,156,6,8,3,0,156,20,1,0,0,0,157,159,8,1,0, + 0,158,157,1,0,0,0,159,160,1,0,0,0,160,158,1,0,0,0,160,161,1,0,0, + 0,161,22,1,0,0,0,162,163,5,62,0,0,163,164,1,0,0,0,164,165,6,10,4, + 0,165,24,1,0,0,0,166,167,5,63,0,0,167,168,5,62,0,0,168,169,1,0,0, + 0,169,170,6,11,4,0,170,26,1,0,0,0,171,172,5,47,0,0,172,173,5,62, + 0,0,173,174,1,0,0,0,174,175,6,12,4,0,175,28,1,0,0,0,176,177,5,47, + 0,0,177,30,1,0,0,0,178,179,5,61,0,0,179,32,1,0,0,0,180,184,5,34, + 0,0,181,183,8,2,0,0,182,181,1,0,0,0,183,186,1,0,0,0,184,182,1,0, + 0,0,184,185,1,0,0,0,185,187,1,0,0,0,186,184,1,0,0,0,187,197,5,34, + 0,0,188,192,5,39,0,0,189,191,8,3,0,0,190,189,1,0,0,0,191,194,1,0, + 0,0,192,190,1,0,0,0,192,193,1,0,0,0,193,195,1,0,0,0,194,192,1,0, + 0,0,195,197,5,39,0,0,196,180,1,0,0,0,196,188,1,0,0,0,197,34,1,0, + 0,0,198,202,3,45,21,0,199,201,3,43,20,0,200,199,1,0,0,0,201,204, + 1,0,0,0,202,200,1,0,0,0,202,203,1,0,0,0,203,36,1,0,0,0,204,202,1, + 0,0,0,205,206,7,4,0,0,206,207,1,0,0,0,207,208,6,17,0,0,208,38,1, + 0,0,0,209,210,7,5,0,0,210,40,1,0,0,0,211,212,7,6,0,0,212,42,1,0, + 0,0,213,218,3,45,21,0,214,218,2,45,46,0,215,218,3,41,19,0,216,218, + 7,7,0,0,217,213,1,0,0,0,217,214,1,0,0,0,217,215,1,0,0,0,217,216, + 1,0,0,0,218,44,1,0,0,0,219,221,7,8,0,0,220,219,1,0,0,0,221,46,1, + 0,0,0,222,223,5,63,0,0,223,224,5,62,0,0,224,225,1,0,0,0,225,226, + 6,22,4,0,226,48,1,0,0,0,227,228,9,0,0,0,228,229,1,0,0,0,229,230, + 6,23,2,0,230,50,1,0,0,0,19,0,1,2,59,79,92,109,120,124,128,131,133, + 160,184,192,196,202,217,220,5,6,0,0,5,1,0,3,0,0,5,2,0,4,0,0 + ] + +class XMLLexer(Lexer): + + atn = ATNDeserializer().deserialize(serializedATN()) + + decisionsToDFA = [ DFA(ds, i) for i, ds in enumerate(atn.decisionToState) ] + + INSIDE = 1 + PROC_INSTR = 2 + + COMMENT = 1 + CDATA = 2 + DTD = 3 + EntityRef = 4 + CharRef = 5 + SEA_WS = 6 + OPEN = 7 + XMLDeclOpen = 8 + TEXT = 9 + CLOSE = 10 + SPECIAL_CLOSE = 11 + SLASH_CLOSE = 12 + SLASH = 13 + EQUALS = 14 + STRING = 15 + Name = 16 + S = 17 + PI = 18 + + channelNames = [ u"DEFAULT_TOKEN_CHANNEL", u"HIDDEN" ] + + modeNames = [ "DEFAULT_MODE", "INSIDE", "PROC_INSTR" ] + + literalNames = [ "", + "'<'", "'>'", "'/>'", "'/'", "'='" ] + + symbolicNames = [ "", + "COMMENT", "CDATA", "DTD", "EntityRef", "CharRef", "SEA_WS", + "OPEN", "XMLDeclOpen", "TEXT", "CLOSE", "SPECIAL_CLOSE", "SLASH_CLOSE", + "SLASH", "EQUALS", "STRING", "Name", "S", "PI" ] + + ruleNames = [ "COMMENT", "CDATA", "DTD", "EntityRef", "CharRef", "SEA_WS", + "OPEN", "XMLDeclOpen", "SPECIAL_OPEN", "TEXT", "CLOSE", + "SPECIAL_CLOSE", "SLASH_CLOSE", "SLASH", "EQUALS", "STRING", + "Name", "S", "HEXDIGIT", "DIGIT", "NameChar", "NameStartChar", + "PI", "IGNORE" ] + + grammarFileName = "XMLLexer.g4" + + def __init__(self, input=None, output:TextIO = sys.stdout): + super().__init__(input, output) + self.checkVersion("4.13.0") + self._interp = LexerATNSimulator(self, self.atn, self.decisionsToDFA, PredictionContextCache()) + self._actions = None + self._predicates = None + + diff --git a/wwpdb/utils/nmr/pk/XMLParser.py b/wwpdb/utils/nmr/pk/XMLParser.py new file mode 100644 index 00000000..d5177a40 --- /dev/null +++ b/wwpdb/utils/nmr/pk/XMLParser.py @@ -0,0 +1,716 @@ +# Generated from XMLParser.g4 by ANTLR 4.13.0 +# encoding: utf-8 +from antlr4 import * +from io import StringIO +import sys +if sys.version_info[1] > 5: + from typing import TextIO +else: + from typing.io import TextIO + +def serializedATN(): + return [ + 4,1,18,98,2,0,7,0,2,1,7,1,2,2,7,2,2,3,7,3,2,4,7,4,2,5,7,5,2,6,7, + 6,2,7,7,7,1,0,3,0,18,8,0,1,0,5,0,21,8,0,10,0,12,0,24,9,0,1,0,1,0, + 5,0,28,8,0,10,0,12,0,31,9,0,1,0,1,0,1,1,1,1,5,1,37,8,1,10,1,12,1, + 40,9,1,1,1,1,1,1,2,3,2,45,8,2,1,2,1,2,1,2,1,2,1,2,3,2,52,8,2,1,2, + 3,2,55,8,2,5,2,57,8,2,10,2,12,2,60,9,2,1,3,1,3,1,3,5,3,65,8,3,10, + 3,12,3,68,9,3,1,3,1,3,1,3,1,3,1,3,1,3,1,3,1,3,1,3,1,3,5,3,80,8,3, + 10,3,12,3,83,9,3,1,3,3,3,86,8,3,1,4,1,4,1,5,1,5,1,5,1,5,1,6,1,6, + 1,7,1,7,1,7,0,0,8,0,2,4,6,8,10,12,14,0,3,1,0,4,5,2,0,6,6,9,9,3,0, + 1,1,6,6,18,18,103,0,17,1,0,0,0,2,34,1,0,0,0,4,44,1,0,0,0,6,85,1, + 0,0,0,8,87,1,0,0,0,10,89,1,0,0,0,12,93,1,0,0,0,14,95,1,0,0,0,16, + 18,3,2,1,0,17,16,1,0,0,0,17,18,1,0,0,0,18,22,1,0,0,0,19,21,3,14, + 7,0,20,19,1,0,0,0,21,24,1,0,0,0,22,20,1,0,0,0,22,23,1,0,0,0,23,25, + 1,0,0,0,24,22,1,0,0,0,25,29,3,6,3,0,26,28,3,14,7,0,27,26,1,0,0,0, + 28,31,1,0,0,0,29,27,1,0,0,0,29,30,1,0,0,0,30,32,1,0,0,0,31,29,1, + 0,0,0,32,33,5,0,0,1,33,1,1,0,0,0,34,38,5,8,0,0,35,37,3,10,5,0,36, + 35,1,0,0,0,37,40,1,0,0,0,38,36,1,0,0,0,38,39,1,0,0,0,39,41,1,0,0, + 0,40,38,1,0,0,0,41,42,5,11,0,0,42,3,1,0,0,0,43,45,3,12,6,0,44,43, + 1,0,0,0,44,45,1,0,0,0,45,58,1,0,0,0,46,52,3,6,3,0,47,52,3,8,4,0, + 48,52,5,2,0,0,49,52,5,18,0,0,50,52,5,1,0,0,51,46,1,0,0,0,51,47,1, + 0,0,0,51,48,1,0,0,0,51,49,1,0,0,0,51,50,1,0,0,0,52,54,1,0,0,0,53, + 55,3,12,6,0,54,53,1,0,0,0,54,55,1,0,0,0,55,57,1,0,0,0,56,51,1,0, + 0,0,57,60,1,0,0,0,58,56,1,0,0,0,58,59,1,0,0,0,59,5,1,0,0,0,60,58, + 1,0,0,0,61,62,5,7,0,0,62,66,5,16,0,0,63,65,3,10,5,0,64,63,1,0,0, + 0,65,68,1,0,0,0,66,64,1,0,0,0,66,67,1,0,0,0,67,69,1,0,0,0,68,66, + 1,0,0,0,69,70,5,10,0,0,70,71,3,4,2,0,71,72,5,7,0,0,72,73,5,13,0, + 0,73,74,5,16,0,0,74,75,5,10,0,0,75,86,1,0,0,0,76,77,5,7,0,0,77,81, + 5,16,0,0,78,80,3,10,5,0,79,78,1,0,0,0,80,83,1,0,0,0,81,79,1,0,0, + 0,81,82,1,0,0,0,82,84,1,0,0,0,83,81,1,0,0,0,84,86,5,12,0,0,85,61, + 1,0,0,0,85,76,1,0,0,0,86,7,1,0,0,0,87,88,7,0,0,0,88,9,1,0,0,0,89, + 90,5,16,0,0,90,91,5,14,0,0,91,92,5,15,0,0,92,11,1,0,0,0,93,94,7, + 1,0,0,94,13,1,0,0,0,95,96,7,2,0,0,96,15,1,0,0,0,11,17,22,29,38,44, + 51,54,58,66,81,85 + ] + +class XMLParser ( Parser ): + + grammarFileName = "XMLParser.g4" + + atn = ATNDeserializer().deserialize(serializedATN()) + + decisionsToDFA = [ DFA(ds, i) for i, ds in enumerate(atn.decisionToState) ] + + sharedContextCache = PredictionContextCache() + + literalNames = [ "", "", "", "", + "", "", "", "'<'", "", + "", "'>'", "", "'/>'", "'/'", "'='" ] + + symbolicNames = [ "", "COMMENT", "CDATA", "DTD", "EntityRef", + "CharRef", "SEA_WS", "OPEN", "XMLDeclOpen", "TEXT", + "CLOSE", "SPECIAL_CLOSE", "SLASH_CLOSE", "SLASH", + "EQUALS", "STRING", "Name", "S", "PI" ] + + RULE_document = 0 + RULE_prolog = 1 + RULE_content = 2 + RULE_element = 3 + RULE_reference = 4 + RULE_attribute = 5 + RULE_chardata = 6 + RULE_misc = 7 + + ruleNames = [ "document", "prolog", "content", "element", "reference", + "attribute", "chardata", "misc" ] + + EOF = Token.EOF + COMMENT=1 + CDATA=2 + DTD=3 + EntityRef=4 + CharRef=5 + SEA_WS=6 + OPEN=7 + XMLDeclOpen=8 + TEXT=9 + CLOSE=10 + SPECIAL_CLOSE=11 + SLASH_CLOSE=12 + SLASH=13 + EQUALS=14 + STRING=15 + Name=16 + S=17 + PI=18 + + def __init__(self, input:TokenStream, output:TextIO = sys.stdout): + super().__init__(input, output) + self.checkVersion("4.13.0") + self._interp = ParserATNSimulator(self, self.atn, self.decisionsToDFA, self.sharedContextCache) + self._predicates = None + + + + + class DocumentContext(ParserRuleContext): + __slots__ = 'parser' + + def __init__(self, parser, parent:ParserRuleContext=None, invokingState:int=-1): + super().__init__(parent, invokingState) + self.parser = parser + + def element(self): + return self.getTypedRuleContext(XMLParser.ElementContext,0) + + + def EOF(self): + return self.getToken(XMLParser.EOF, 0) + + def prolog(self): + return self.getTypedRuleContext(XMLParser.PrologContext,0) + + + def misc(self, i:int=None): + if i is None: + return self.getTypedRuleContexts(XMLParser.MiscContext) + else: + return self.getTypedRuleContext(XMLParser.MiscContext,i) + + + def getRuleIndex(self): + return XMLParser.RULE_document + + def enterRule(self, listener:ParseTreeListener): + if hasattr( listener, "enterDocument" ): + listener.enterDocument(self) + + def exitRule(self, listener:ParseTreeListener): + if hasattr( listener, "exitDocument" ): + listener.exitDocument(self) + + + + + def document(self): + + localctx = XMLParser.DocumentContext(self, self._ctx, self.state) + self.enterRule(localctx, 0, self.RULE_document) + self._la = 0 # Token type + try: + self.enterOuterAlt(localctx, 1) + self.state = 17 + self._errHandler.sync(self) + _la = self._input.LA(1) + if _la==8: + self.state = 16 + self.prolog() + + + self.state = 22 + self._errHandler.sync(self) + _la = self._input.LA(1) + while (((_la) & ~0x3f) == 0 and ((1 << _la) & 262210) != 0): + self.state = 19 + self.misc() + self.state = 24 + self._errHandler.sync(self) + _la = self._input.LA(1) + + self.state = 25 + self.element() + self.state = 29 + self._errHandler.sync(self) + _la = self._input.LA(1) + while (((_la) & ~0x3f) == 0 and ((1 << _la) & 262210) != 0): + self.state = 26 + self.misc() + self.state = 31 + self._errHandler.sync(self) + _la = self._input.LA(1) + + self.state = 32 + self.match(XMLParser.EOF) + except RecognitionException as re: + localctx.exception = re + self._errHandler.reportError(self, re) + self._errHandler.recover(self, re) + finally: + self.exitRule() + return localctx + + + class PrologContext(ParserRuleContext): + __slots__ = 'parser' + + def __init__(self, parser, parent:ParserRuleContext=None, invokingState:int=-1): + super().__init__(parent, invokingState) + self.parser = parser + + def XMLDeclOpen(self): + return self.getToken(XMLParser.XMLDeclOpen, 0) + + def SPECIAL_CLOSE(self): + return self.getToken(XMLParser.SPECIAL_CLOSE, 0) + + def attribute(self, i:int=None): + if i is None: + return self.getTypedRuleContexts(XMLParser.AttributeContext) + else: + return self.getTypedRuleContext(XMLParser.AttributeContext,i) + + + def getRuleIndex(self): + return XMLParser.RULE_prolog + + def enterRule(self, listener:ParseTreeListener): + if hasattr( listener, "enterProlog" ): + listener.enterProlog(self) + + def exitRule(self, listener:ParseTreeListener): + if hasattr( listener, "exitProlog" ): + listener.exitProlog(self) + + + + + def prolog(self): + + localctx = XMLParser.PrologContext(self, self._ctx, self.state) + self.enterRule(localctx, 2, self.RULE_prolog) + self._la = 0 # Token type + try: + self.enterOuterAlt(localctx, 1) + self.state = 34 + self.match(XMLParser.XMLDeclOpen) + self.state = 38 + self._errHandler.sync(self) + _la = self._input.LA(1) + while _la==16: + self.state = 35 + self.attribute() + self.state = 40 + self._errHandler.sync(self) + _la = self._input.LA(1) + + self.state = 41 + self.match(XMLParser.SPECIAL_CLOSE) + except RecognitionException as re: + localctx.exception = re + self._errHandler.reportError(self, re) + self._errHandler.recover(self, re) + finally: + self.exitRule() + return localctx + + + class ContentContext(ParserRuleContext): + __slots__ = 'parser' + + def __init__(self, parser, parent:ParserRuleContext=None, invokingState:int=-1): + super().__init__(parent, invokingState) + self.parser = parser + + def chardata(self, i:int=None): + if i is None: + return self.getTypedRuleContexts(XMLParser.ChardataContext) + else: + return self.getTypedRuleContext(XMLParser.ChardataContext,i) + + + def element(self, i:int=None): + if i is None: + return self.getTypedRuleContexts(XMLParser.ElementContext) + else: + return self.getTypedRuleContext(XMLParser.ElementContext,i) + + + def reference(self, i:int=None): + if i is None: + return self.getTypedRuleContexts(XMLParser.ReferenceContext) + else: + return self.getTypedRuleContext(XMLParser.ReferenceContext,i) + + + def CDATA(self, i:int=None): + if i is None: + return self.getTokens(XMLParser.CDATA) + else: + return self.getToken(XMLParser.CDATA, i) + + def PI(self, i:int=None): + if i is None: + return self.getTokens(XMLParser.PI) + else: + return self.getToken(XMLParser.PI, i) + + def COMMENT(self, i:int=None): + if i is None: + return self.getTokens(XMLParser.COMMENT) + else: + return self.getToken(XMLParser.COMMENT, i) + + def getRuleIndex(self): + return XMLParser.RULE_content + + def enterRule(self, listener:ParseTreeListener): + if hasattr( listener, "enterContent" ): + listener.enterContent(self) + + def exitRule(self, listener:ParseTreeListener): + if hasattr( listener, "exitContent" ): + listener.exitContent(self) + + + + + def content(self): + + localctx = XMLParser.ContentContext(self, self._ctx, self.state) + self.enterRule(localctx, 4, self.RULE_content) + self._la = 0 # Token type + try: + self.enterOuterAlt(localctx, 1) + self.state = 44 + self._errHandler.sync(self) + _la = self._input.LA(1) + if _la==6 or _la==9: + self.state = 43 + self.chardata() + + + self.state = 58 + self._errHandler.sync(self) + _alt = self._interp.adaptivePredict(self._input,7,self._ctx) + while _alt!=2 and _alt!=ATN.INVALID_ALT_NUMBER: + if _alt==1: + self.state = 51 + self._errHandler.sync(self) + token = self._input.LA(1) + if token in [7]: + self.state = 46 + self.element() + pass + elif token in [4, 5]: + self.state = 47 + self.reference() + pass + elif token in [2]: + self.state = 48 + self.match(XMLParser.CDATA) + pass + elif token in [18]: + self.state = 49 + self.match(XMLParser.PI) + pass + elif token in [1]: + self.state = 50 + self.match(XMLParser.COMMENT) + pass + else: + raise NoViableAltException(self) + + self.state = 54 + self._errHandler.sync(self) + _la = self._input.LA(1) + if _la==6 or _la==9: + self.state = 53 + self.chardata() + + + self.state = 60 + self._errHandler.sync(self) + _alt = self._interp.adaptivePredict(self._input,7,self._ctx) + + except RecognitionException as re: + localctx.exception = re + self._errHandler.reportError(self, re) + self._errHandler.recover(self, re) + finally: + self.exitRule() + return localctx + + + class ElementContext(ParserRuleContext): + __slots__ = 'parser' + + def __init__(self, parser, parent:ParserRuleContext=None, invokingState:int=-1): + super().__init__(parent, invokingState) + self.parser = parser + + def OPEN(self, i:int=None): + if i is None: + return self.getTokens(XMLParser.OPEN) + else: + return self.getToken(XMLParser.OPEN, i) + + def Name(self, i:int=None): + if i is None: + return self.getTokens(XMLParser.Name) + else: + return self.getToken(XMLParser.Name, i) + + def CLOSE(self, i:int=None): + if i is None: + return self.getTokens(XMLParser.CLOSE) + else: + return self.getToken(XMLParser.CLOSE, i) + + def content(self): + return self.getTypedRuleContext(XMLParser.ContentContext,0) + + + def SLASH(self): + return self.getToken(XMLParser.SLASH, 0) + + def attribute(self, i:int=None): + if i is None: + return self.getTypedRuleContexts(XMLParser.AttributeContext) + else: + return self.getTypedRuleContext(XMLParser.AttributeContext,i) + + + def SLASH_CLOSE(self): + return self.getToken(XMLParser.SLASH_CLOSE, 0) + + def getRuleIndex(self): + return XMLParser.RULE_element + + def enterRule(self, listener:ParseTreeListener): + if hasattr( listener, "enterElement" ): + listener.enterElement(self) + + def exitRule(self, listener:ParseTreeListener): + if hasattr( listener, "exitElement" ): + listener.exitElement(self) + + + + + def element(self): + + localctx = XMLParser.ElementContext(self, self._ctx, self.state) + self.enterRule(localctx, 6, self.RULE_element) + self._la = 0 # Token type + try: + self.state = 85 + self._errHandler.sync(self) + la_ = self._interp.adaptivePredict(self._input,10,self._ctx) + if la_ == 1: + self.enterOuterAlt(localctx, 1) + self.state = 61 + self.match(XMLParser.OPEN) + self.state = 62 + self.match(XMLParser.Name) + self.state = 66 + self._errHandler.sync(self) + _la = self._input.LA(1) + while _la==16: + self.state = 63 + self.attribute() + self.state = 68 + self._errHandler.sync(self) + _la = self._input.LA(1) + + self.state = 69 + self.match(XMLParser.CLOSE) + self.state = 70 + self.content() + self.state = 71 + self.match(XMLParser.OPEN) + self.state = 72 + self.match(XMLParser.SLASH) + self.state = 73 + self.match(XMLParser.Name) + self.state = 74 + self.match(XMLParser.CLOSE) + pass + + elif la_ == 2: + self.enterOuterAlt(localctx, 2) + self.state = 76 + self.match(XMLParser.OPEN) + self.state = 77 + self.match(XMLParser.Name) + self.state = 81 + self._errHandler.sync(self) + _la = self._input.LA(1) + while _la==16: + self.state = 78 + self.attribute() + self.state = 83 + self._errHandler.sync(self) + _la = self._input.LA(1) + + self.state = 84 + self.match(XMLParser.SLASH_CLOSE) + pass + + + except RecognitionException as re: + localctx.exception = re + self._errHandler.reportError(self, re) + self._errHandler.recover(self, re) + finally: + self.exitRule() + return localctx + + + class ReferenceContext(ParserRuleContext): + __slots__ = 'parser' + + def __init__(self, parser, parent:ParserRuleContext=None, invokingState:int=-1): + super().__init__(parent, invokingState) + self.parser = parser + + def EntityRef(self): + return self.getToken(XMLParser.EntityRef, 0) + + def CharRef(self): + return self.getToken(XMLParser.CharRef, 0) + + def getRuleIndex(self): + return XMLParser.RULE_reference + + def enterRule(self, listener:ParseTreeListener): + if hasattr( listener, "enterReference" ): + listener.enterReference(self) + + def exitRule(self, listener:ParseTreeListener): + if hasattr( listener, "exitReference" ): + listener.exitReference(self) + + + + + def reference(self): + + localctx = XMLParser.ReferenceContext(self, self._ctx, self.state) + self.enterRule(localctx, 8, self.RULE_reference) + self._la = 0 # Token type + try: + self.enterOuterAlt(localctx, 1) + self.state = 87 + _la = self._input.LA(1) + if not(_la==4 or _la==5): + self._errHandler.recoverInline(self) + else: + self._errHandler.reportMatch(self) + self.consume() + except RecognitionException as re: + localctx.exception = re + self._errHandler.reportError(self, re) + self._errHandler.recover(self, re) + finally: + self.exitRule() + return localctx + + + class AttributeContext(ParserRuleContext): + __slots__ = 'parser' + + def __init__(self, parser, parent:ParserRuleContext=None, invokingState:int=-1): + super().__init__(parent, invokingState) + self.parser = parser + + def Name(self): + return self.getToken(XMLParser.Name, 0) + + def EQUALS(self): + return self.getToken(XMLParser.EQUALS, 0) + + def STRING(self): + return self.getToken(XMLParser.STRING, 0) + + def getRuleIndex(self): + return XMLParser.RULE_attribute + + def enterRule(self, listener:ParseTreeListener): + if hasattr( listener, "enterAttribute" ): + listener.enterAttribute(self) + + def exitRule(self, listener:ParseTreeListener): + if hasattr( listener, "exitAttribute" ): + listener.exitAttribute(self) + + + + + def attribute(self): + + localctx = XMLParser.AttributeContext(self, self._ctx, self.state) + self.enterRule(localctx, 10, self.RULE_attribute) + try: + self.enterOuterAlt(localctx, 1) + self.state = 89 + self.match(XMLParser.Name) + self.state = 90 + self.match(XMLParser.EQUALS) + self.state = 91 + self.match(XMLParser.STRING) + except RecognitionException as re: + localctx.exception = re + self._errHandler.reportError(self, re) + self._errHandler.recover(self, re) + finally: + self.exitRule() + return localctx + + + class ChardataContext(ParserRuleContext): + __slots__ = 'parser' + + def __init__(self, parser, parent:ParserRuleContext=None, invokingState:int=-1): + super().__init__(parent, invokingState) + self.parser = parser + + def TEXT(self): + return self.getToken(XMLParser.TEXT, 0) + + def SEA_WS(self): + return self.getToken(XMLParser.SEA_WS, 0) + + def getRuleIndex(self): + return XMLParser.RULE_chardata + + def enterRule(self, listener:ParseTreeListener): + if hasattr( listener, "enterChardata" ): + listener.enterChardata(self) + + def exitRule(self, listener:ParseTreeListener): + if hasattr( listener, "exitChardata" ): + listener.exitChardata(self) + + + + + def chardata(self): + + localctx = XMLParser.ChardataContext(self, self._ctx, self.state) + self.enterRule(localctx, 12, self.RULE_chardata) + self._la = 0 # Token type + try: + self.enterOuterAlt(localctx, 1) + self.state = 93 + _la = self._input.LA(1) + if not(_la==6 or _la==9): + self._errHandler.recoverInline(self) + else: + self._errHandler.reportMatch(self) + self.consume() + except RecognitionException as re: + localctx.exception = re + self._errHandler.reportError(self, re) + self._errHandler.recover(self, re) + finally: + self.exitRule() + return localctx + + + class MiscContext(ParserRuleContext): + __slots__ = 'parser' + + def __init__(self, parser, parent:ParserRuleContext=None, invokingState:int=-1): + super().__init__(parent, invokingState) + self.parser = parser + + def COMMENT(self): + return self.getToken(XMLParser.COMMENT, 0) + + def PI(self): + return self.getToken(XMLParser.PI, 0) + + def SEA_WS(self): + return self.getToken(XMLParser.SEA_WS, 0) + + def getRuleIndex(self): + return XMLParser.RULE_misc + + def enterRule(self, listener:ParseTreeListener): + if hasattr( listener, "enterMisc" ): + listener.enterMisc(self) + + def exitRule(self, listener:ParseTreeListener): + if hasattr( listener, "exitMisc" ): + listener.exitMisc(self) + + + + + def misc(self): + + localctx = XMLParser.MiscContext(self, self._ctx, self.state) + self.enterRule(localctx, 14, self.RULE_misc) + self._la = 0 # Token type + try: + self.enterOuterAlt(localctx, 1) + self.state = 95 + _la = self._input.LA(1) + if not((((_la) & ~0x3f) == 0 and ((1 << _la) & 262210) != 0)): + self._errHandler.recoverInline(self) + else: + self._errHandler.reportMatch(self) + self.consume() + except RecognitionException as re: + localctx.exception = re + self._errHandler.reportError(self, re) + self._errHandler.recover(self, re) + finally: + self.exitRule() + return localctx + + + + + diff --git a/wwpdb/utils/nmr/pk/XwinNmrPKReader.py b/wwpdb/utils/nmr/pk/XwinNmrPKReader.py index 535c99be..4d1c5074 100644 --- a/wwpdb/utils/nmr/pk/XwinNmrPKReader.py +++ b/wwpdb/utils/nmr/pk/XwinNmrPKReader.py @@ -213,4 +213,4 @@ def parse(self, pkFilePath, cifFilePath=None, isFilePath=True, reader = XwinNmrPKReader(True) reader.setDebugMode(True) reader.parse('../../tests-nmr/mock-data-bruker-peak-list/tspp-xwin.txt', - '../../tests-nmr/mock-data-remediation/2js7/2js7.cif') + '../../tests-nmr/mock-data-remediation/2js7/2js7.cif') # dummy diff --git a/wwpdb/utils/tests-nmr/antlr-grammars-v4.10/XMLLexer.g4 b/wwpdb/utils/tests-nmr/antlr-grammars-v4.10/XMLLexer.g4 new file mode 100644 index 00000000..d9b4555f --- /dev/null +++ b/wwpdb/utils/tests-nmr/antlr-grammars-v4.10/XMLLexer.g4 @@ -0,0 +1,93 @@ +/* + [The "BSD licence"] + Copyright (c) 2013 Terence Parr + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + 1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + 3. The name of the author may not be used to endorse or promote products + derived from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +/** XML lexer derived from ANTLR v4 ref guide book example */ + +// $antlr-format alignTrailingComments true, columnLimit 150, maxEmptyLinesToKeep 1, reflowComments false, useTab false +// $antlr-format allowShortRulesOnASingleLine true, allowShortBlocksOnASingleLine true, minEmptyLines 0, alignSemicolons ownLine +// $antlr-format alignColons trailing, singleLineOverrulesHangingColon true, alignLexerCommands true, alignLabels true, alignTrailers true + +lexer grammar XMLLexer; + +// Default "mode": Everything OUTSIDE of a tag +COMMENT : ''; +CDATA : ''; +/** Scarf all DTD stuff, Entity Declarations like , + * and Notation Declarations + */ +DTD : '' -> skip; +EntityRef : '&' Name ';'; +CharRef : '&#' DIGIT+ ';' | '&#x' HEXDIGIT+ ';'; +SEA_WS : (' ' | '\t' | '\r'? '\n')+; + +OPEN : '<' -> pushMode(INSIDE); +XMLDeclOpen : ' pushMode(INSIDE); +SPECIAL_OPEN : ' more, pushMode(PROC_INSTR); + +TEXT: ~[<&]+; // match any 16 bit char other than < and & + +// ----------------- Everything INSIDE of a tag --------------------- +mode INSIDE; + +CLOSE : '>' -> popMode; +SPECIAL_CLOSE : '?>' -> popMode; // close +SLASH_CLOSE : '/>' -> popMode; +SLASH : '/'; +EQUALS : '='; +STRING : '"' ~[<"]* '"' | '\'' ~[<']* '\''; +Name : NameStartChar NameChar*; +S : [ \t\r\n] -> skip; + +fragment HEXDIGIT: [a-fA-F0-9]; + +fragment DIGIT: [0-9]; + +fragment NameChar: + NameStartChar + | '-' + | '.' + | DIGIT + | '\u00B7' + | '\u0300' ..'\u036F' + | '\u203F' ..'\u2040' +; + +fragment NameStartChar: + [_:a-zA-Z] + | '\u2070' ..'\u218F' + | '\u2C00' ..'\u2FEF' + | '\u3001' ..'\uD7FF' + | '\uF900' ..'\uFDCF' + | '\uFDF0' ..'\uFFFD' +; + +// ----------------- Handle --------------------- +mode PROC_INSTR; + +PI : '?>' -> popMode; // close +IGNORE : . -> more; \ No newline at end of file diff --git a/wwpdb/utils/tests-nmr/antlr-grammars-v4.10/XMLParser.g4 b/wwpdb/utils/tests-nmr/antlr-grammars-v4.10/XMLParser.g4 new file mode 100644 index 00000000..92527995 --- /dev/null +++ b/wwpdb/utils/tests-nmr/antlr-grammars-v4.10/XMLParser.g4 @@ -0,0 +1,78 @@ +/* + [The "BSD licence"] + Copyright (c) 2013 Terence Parr + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + 1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + 3. The name of the author may not be used to endorse or promote products + derived from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +/** XML parser derived from ANTLR v4 ref guide book example */ + +// $antlr-format alignTrailingComments true, columnLimit 150, minEmptyLines 1, maxEmptyLinesToKeep 1, reflowComments false, useTab false +// $antlr-format allowShortRulesOnASingleLine false, allowShortBlocksOnASingleLine true, alignSemicolons hanging, alignColons hanging + +parser grammar XMLParser; + +options { + tokenVocab = XMLLexer; +} + +document + : prolog? misc* element misc* EOF + ; + +prolog + : XMLDeclOpen attribute* SPECIAL_CLOSE + ; + +content + : chardata? ((element | reference | CDATA | PI | COMMENT) chardata?)* + ; + +element + : '<' Name attribute* '>' content '<' '/' Name '>' + | '<' Name attribute* '/>' + ; + +reference + : EntityRef + | CharRef + ; + +attribute + : Name '=' STRING + ; // Our STRING is AttValue in spec + +/** ``All text that is not markup constitutes the character data of + * the document.'' + */ +chardata + : TEXT + | SEA_WS + ; + +misc + : COMMENT + | PI + | SEA_WS + ; \ No newline at end of file