From 598240a9b1719a853ed70a6f5d88038b17bd352e Mon Sep 17 00:00:00 2001 From: yokochi47 Date: Thu, 5 Dec 2024 16:30:46 +0900 Subject: [PATCH] Add XEASY PROT file lexer, parser, parserlistener, and reader --- wwpdb/utils/nmr/NmrDpReport.py | 3 +- wwpdb/utils/nmr/README.md | 8 + wwpdb/utils/nmr/mr/AmberPTParserListener.py | 5 +- wwpdb/utils/nmr/mr/AriaMRParserListener.py | 1 + wwpdb/utils/nmr/mr/CharmmCRDParserListener.py | 3 +- wwpdb/utils/nmr/mr/CharmmCRDReader.py | 24 +- wwpdb/utils/nmr/mr/GromacsPTParserListener.py | 2 +- wwpdb/utils/nmr/mr/ParserListenerUtil.py | 296 +---- wwpdb/utils/nmr/pk/BasePKParserListener.py | 358 ++++++ wwpdb/utils/nmr/pk/NmrPipePKParserListener.py | 26 +- wwpdb/utils/nmr/pk/NmrViewPKParserListener.py | 128 +- wwpdb/utils/nmr/pk/SparkyPKParserListener.py | 26 +- wwpdb/utils/nmr/pk/XeasyPKParserListener.py | 86 +- wwpdb/utils/nmr/pk/XeasyPKReader.py | 25 +- wwpdb/utils/nmr/pk/XeasyPROTLexer.py | 160 +++ wwpdb/utils/nmr/pk/XeasyPROTParser.py | 246 ++++ wwpdb/utils/nmr/pk/XeasyPROTParserListener.py | 1025 +++++++++++++++++ wwpdb/utils/nmr/pk/XeasyPROTReader.py | 196 ++++ .../antlr-grammars-v4.10/XeasyPROTLexer.g4 | 48 + .../antlr-grammars-v4.10/XeasyPROTParser.g4 | 30 + .../antlr-grammars-v4.9/XeasyPROTLexer.g4 | 1 + .../antlr-grammars-v4.9/XeasyPROTParser.g4 | 1 + .../nmr-data-procesing-report-schema-v4.json | 1 + 23 files changed, 2232 insertions(+), 467 deletions(-) create mode 100644 wwpdb/utils/nmr/pk/XeasyPROTLexer.py create mode 100644 wwpdb/utils/nmr/pk/XeasyPROTParser.py create mode 100644 wwpdb/utils/nmr/pk/XeasyPROTParserListener.py create mode 100644 wwpdb/utils/nmr/pk/XeasyPROTReader.py create mode 100644 wwpdb/utils/tests-nmr/antlr-grammars-v4.10/XeasyPROTLexer.g4 create mode 100644 wwpdb/utils/tests-nmr/antlr-grammars-v4.10/XeasyPROTParser.g4 create mode 120000 wwpdb/utils/tests-nmr/antlr-grammars-v4.9/XeasyPROTLexer.g4 create mode 120000 wwpdb/utils/tests-nmr/antlr-grammars-v4.9/XeasyPROTParser.g4 diff --git a/wwpdb/utils/nmr/NmrDpReport.py b/wwpdb/utils/nmr/NmrDpReport.py index b1f0b90e..18d105dd 100644 --- a/wwpdb/utils/nmr/NmrDpReport.py +++ b/wwpdb/utils/nmr/NmrDpReport.py @@ -88,6 +88,7 @@ # 14-Nov-2024 M. Yokochi - add 'nm-aux-cha' file type for CHARMM extended CRD (CARD) file acting as CHARMM topology definition # 19-Nov-2024 M. Yokochi - add support for pH titration data (NMR restraint remediation) # 22-Nov-2024 M. Yokochi - add 'nm-res-noa' file type for CYANA NOA (NOE Assignment) file +# 05-Dec-2024 M. Yokochi - add 'nm-aux-xea' file type for XEASY PROT (Assignment) file (NMR restraint remediation) ## """ Wrapper class for NMR data processing report. @author: Masashi Yokochi @@ -1752,7 +1753,7 @@ def __init__(self, verbose=True, log=sys.stdout): 'stats_of_exptl_data') self.file_types = ('pdbx', 'nef', 'nmr-star', - 'nm-aux-amb', 'nm-aux-cha', 'nm-aux-gro', + 'nm-aux-amb', 'nm-aux-cha', 'nm-aux-gro', 'nm-aux-xea', 'nm-res-amb', 'nm-res-ari', 'nm-res-bio', 'nm-res-cha', 'nm-res-cns', 'nm-res-cya', 'nm-res-dyn', 'nm-res-gro', 'nm-res-isd', 'nm-res-mr', 'nm-res-noa', 'nm-res-oth', 'nm-res-ros', 'nm-res-sax', 'nm-res-syb', diff --git a/wwpdb/utils/nmr/README.md b/wwpdb/utils/nmr/README.md index 6df335f7..1e9f2d3f 100644 --- a/wwpdb/utils/nmr/README.md +++ b/wwpdb/utils/nmr/README.md @@ -295,4 +295,12 @@ nm-res-xpl|nm-res-xpl|nmr-restraints/xplor-nih|Restraint file in XPLOR-NIH forma nm-res-oth|nm-res-oth|nmr-restraints/any|Restraint file in other format nm-res-mr|**not applicable**|nmr-restraints/pdb-mr|Restraint file in PDB-MR format nm-res-sax|**not applicable**|nmr-restraints/any|SAX CSV file +nm-pea-ari|**not applicable**|nmr-peaks/any|Spectral peak list file in ARIA format +nm-pea-pip|**not applicable**|nmr-peaks/any|Spectral peak list file in NMRPIPE format +nm-pea-spa|**not applicable**|nmr-peaks/any|Spectral peak list file in SPARKY format +nm-pea-top|**not applicable**|nmr-peaks/any|Spectral peak list file in TOPSPIN format +nm-pea-vie|**not applicable**|nmr-peaks/any|Spectral peak list file in NMRVIEW format +nm-aux-xea|**not applicable**|nmr-peaks/any|Assignment file in XEASY format +nm-pea-xea|**not applicable**|nmr-peaks/any|Spectral peak list file in XEASY format +nm-pea-xwi|**not applicable**|nmr-peaks/any|Spectral peak list file in XWINNMR format nm-pea-any|nm-pea-any|nmr-peaks/any|Any spectral peak list file diff --git a/wwpdb/utils/nmr/mr/AmberPTParserListener.py b/wwpdb/utils/nmr/mr/AmberPTParserListener.py index 95252d77..b8382682 100644 --- a/wwpdb/utils/nmr/mr/AmberPTParserListener.py +++ b/wwpdb/utils/nmr/mr/AmberPTParserListener.py @@ -345,7 +345,7 @@ def is_metal_elem(prev_atom_name, prev_seq_id, seq_id): atomName = _atomName retrievedAtomNumList.append(atomNum) - if (terminus[atomNum - 1] and ancAtomName.endswith('T'))\ + if (0 < atomNum < len(terminus) + 1 and terminus[atomNum - 1] and ancAtomName.endswith('T'))\ or is_segment(prevCompId, prevAtomName, compId, atomName)\ or is_ligand(prevCompId, compId)\ or is_metal_ion(compId, atomName)\ @@ -1753,7 +1753,9 @@ def exitTree_chain_classification_statement(self, ctx: AmberPTParser.Tree_chain_ # Enter a parse tree produced by AmberPTParser#format_function. def enterFormat_function(self, ctx: AmberPTParser.Format_functionContext): + try: + if ctx.Fortran_format_A(): g = self.__a_format_pat.search(str(ctx.Fortran_format_A())).groups() # self.__cur_column_len = int(g[0]) @@ -1766,6 +1768,7 @@ def enterFormat_function(self, ctx: AmberPTParser.Format_functionContext): g = self.__e_format_pat.search(str(ctx.Fortran_format_E())).groups() # self.__cur_column_len = int(g[0]) self.__cur_word_len = int(g[1]) + except AttributeError: # self.__cur_column_len = None self.__cur_word_len = None diff --git a/wwpdb/utils/nmr/mr/AriaMRParserListener.py b/wwpdb/utils/nmr/mr/AriaMRParserListener.py index 0232d25a..d2ef178c 100644 --- a/wwpdb/utils/nmr/mr/AriaMRParserListener.py +++ b/wwpdb/utils/nmr/mr/AriaMRParserListener.py @@ -2933,6 +2933,7 @@ def exitAtom_pair(self, ctx: AriaMRParser.Atom_pairContext): # pylint: disable= # Enter a parse tree produced by AriaMRParser#atom_selection. def enterAtom_selection(self, ctx: AriaMRParser.Atom_selectionContext): + try: atom_sel = {'atom_id': str(ctx.Simple_name(1)).upper()} diff --git a/wwpdb/utils/nmr/mr/CharmmCRDParserListener.py b/wwpdb/utils/nmr/mr/CharmmCRDParserListener.py index 04b784d5..e28ae86f 100644 --- a/wwpdb/utils/nmr/mr/CharmmCRDParserListener.py +++ b/wwpdb/utils/nmr/mr/CharmmCRDParserListener.py @@ -241,7 +241,7 @@ def is_metal_elem(prev_atom_name, prev_seq_id, seq_id): atomName = _atomName retrievedAtomNumList.append(atomNum) - if (terminus[atomNum - 1] and ancAtomName.endswith('T'))\ + if (0 < atomNum < len(terminus) + 1 and terminus[atomNum - 1] and ancAtomName.endswith('T'))\ or is_segment(prevAsymId, prevCompId, prevAtomName, asymId, compId, atomName)\ or is_ligand(prevCompId, compId)\ or is_metal_ion(compId, atomName)\ @@ -956,6 +956,7 @@ def enterAtom_coordinate(self, ctx: CharmmCRDParser.Atom_coordinateContext): # def exitAtom_coordinate(self, ctx: CharmmCRDParser.Atom_coordinateContext): try: + nr = int(str(ctx.Integer(0))) seqId = int(str(ctx.Integer(1))) diff --git a/wwpdb/utils/nmr/mr/CharmmCRDReader.py b/wwpdb/utils/nmr/mr/CharmmCRDReader.py index d8448281..3535d8de 100644 --- a/wwpdb/utils/nmr/mr/CharmmCRDReader.py +++ b/wwpdb/utils/nmr/mr/CharmmCRDReader.py @@ -76,7 +76,7 @@ def setLexerMaxErrorReport(self, maxErrReport): def setParserMaxErrorReport(self, maxErrReport): self.__maxParserErrorReport = maxErrReport - def parse(self, ptFilePath, cifFilePath=None, isFilePath=True): + def parse(self, crdFilePath, cifFilePath=None, isFilePath=True): """ Parse CHARMM CRD file. @return: CharmmCRDParserListener for success or None otherwise, ParserErrorListener, LexerErrorListener. """ @@ -86,25 +86,25 @@ def parse(self, ptFilePath, cifFilePath=None, isFilePath=True): try: if isFilePath: - ptString = None + crdString = None - if not os.access(ptFilePath, os.R_OK): + if not os.access(crdFilePath, os.R_OK): if self.__verbose: - self.__lfh.write(f"CharmmCRDReader.parse() {ptFilePath} is not accessible.\n") + self.__lfh.write(f"CharmmCRDReader.parse() {crdFilePath} is not accessible.\n") return None, None, None - ifh = open(ptFilePath, 'r') # pylint: disable=consider-using-with + ifh = open(crdFilePath, 'r') # pylint: disable=consider-using-with input = InputStream(ifh.read()) else: - ptFilePath, ptString = None, ptFilePath + crdFilePath, crdString = None, crdFilePath - if ptString is None or len(ptString) == 0: + if crdString is None or len(crdString) == 0: if self.__verbose: self.__lfh.write("CharmmCRDReader.parse() Empty string.\n") return None, None, None - input = InputStream(ptString) + input = InputStream(crdString) if cifFilePath is not None: if not os.access(cifFilePath, os.R_OK): @@ -120,7 +120,7 @@ def parse(self, ptFilePath, cifFilePath=None, isFilePath=True): lexer = CharmmCRDLexer(input) lexer.removeErrorListeners() - lexer_error_listener = LexerErrorListener(ptFilePath, maxErrorReport=self.__maxLexerErrorReport) + lexer_error_listener = LexerErrorListener(crdFilePath, maxErrorReport=self.__maxLexerErrorReport) lexer.addErrorListener(lexer_error_listener) messageList = lexer_error_listener.getMessageList() @@ -137,7 +137,7 @@ def parse(self, ptFilePath, cifFilePath=None, isFilePath=True): # try with simpler/faster SLL prediction mode parser._interp.predictionMode = PredictionMode.SLL # pylint: disable=protected-access parser.removeErrorListeners() - parser_error_listener = ParserErrorListener(ptFilePath, maxErrorReport=self.__maxParserErrorReport) + parser_error_listener = ParserErrorListener(crdFilePath, maxErrorReport=self.__maxParserErrorReport) parser.addErrorListener(parser_error_listener) tree = parser.charmm_crd() @@ -159,7 +159,7 @@ def parse(self, ptFilePath, cifFilePath=None, isFilePath=True): self.__lfh.write(f"{description['input']}\n") self.__lfh.write(f"{description['marker']}\n") elif messageList is None and cifFilePath is None: - parser_error_listener = ParserErrorListener(ptFilePath, maxErrorReport=self.__maxParserErrorReport) + parser_error_listener = ParserErrorListener(crdFilePath, maxErrorReport=self.__maxParserErrorReport) if self.__verbose: if listener.warningMessage is not None and len(listener.warningMessage) > 0: @@ -177,7 +177,7 @@ def parse(self, ptFilePath, cifFilePath=None, isFilePath=True): """ debug code except Exception as e: if self.__verbose and isFilePath: - self.__lfh.write(f"+CharmmCRDReader.parse() ++ Error - {ptFilePath!r} - {str(e)}\n") + self.__lfh.write(f"+CharmmCRDReader.parse() ++ Error - {crdFilePath!r} - {str(e)}\n") return None, None, None """ finally: diff --git a/wwpdb/utils/nmr/mr/GromacsPTParserListener.py b/wwpdb/utils/nmr/mr/GromacsPTParserListener.py index 3c9b3c25..6f33d3de 100644 --- a/wwpdb/utils/nmr/mr/GromacsPTParserListener.py +++ b/wwpdb/utils/nmr/mr/GromacsPTParserListener.py @@ -285,7 +285,7 @@ def is_metal_elem(prev_atom_name, prev_seq_id, seq_id): atomName = _atomName retrievedAtomNumList.append(atomNum) - if (terminus[atomNum - 1] and ancAtomName.endswith('T'))\ + if (0 < atomNum < len(terminus) + 1 and terminus[atomNum - 1] and ancAtomName.endswith('T'))\ or is_segment(prevCompId, prevAtomName, compId, atomName)\ or is_ligand(prevCompId, compId)\ or is_metal_ion(compId, atomName)\ diff --git a/wwpdb/utils/nmr/mr/ParserListenerUtil.py b/wwpdb/utils/nmr/mr/ParserListenerUtil.py index e656e0b4..e399bf33 100644 --- a/wwpdb/utils/nmr/mr/ParserListenerUtil.py +++ b/wwpdb/utils/nmr/mr/ParserListenerUtil.py @@ -22,7 +22,7 @@ import pynmrstar from operator import itemgetter -from typing import Any, List, IO, Set, Tuple, Optional +from typing import Any, List, IO, Tuple, Optional from wwpdb.utils.align.alignlib import PairwiseAlign # pylint: disable=no-name-in-module @@ -39,8 +39,7 @@ isReservedLigCode, alignPolymerSequence, assignPolymerSequence, - getScoreOfSeqAlign, - getOneLetterCode) + getScoreOfSeqAlign) except ImportError: from nmr.io.CifReader import SYMBOLS_ELEMENT from nmr.AlignUtil import (monDict3, @@ -54,8 +53,7 @@ isReservedLigCode, alignPolymerSequence, assignPolymerSequence, - getScoreOfSeqAlign, - getOneLetterCode) + getScoreOfSeqAlign) MAX_ERROR_REPORT = 1 MAX_ERR_LINENUM_REPORT = 20 @@ -2091,10 +2089,6 @@ SPECTRAL_DIM_TRANSFER_TEMPLATE = {'indirect': None, 'type': None} -PEAK_ASSIGNMENT_SEPARATOR_PAT = re.compile('[^0-9A-Za-z]+') -PEAK_ASSIGNMENT_RESID_PAT = re.compile('[0-9]+') -PEAK_HALF_SPIN_NUCLEUS = ('H', 'Q', 'M', 'C', 'N', 'P', 'F') - def toRegEx(string: str) -> str: """ Return regular expression for a given string including XPLOR-NIH wildcard format. @@ -9971,287 +9965,3 @@ def hasKeyValue(d: Optional[dict] = None, key: Any = None) -> bool: return d[key] is not None return False - - -def extractPeakAssignment(numOfDim: int, string: str, segIdSet: Set[str], compIdSet: Set[str], altCompIdSet: Set[str], - aa: bool, dna: bool, rna: bool, nefT) -> Optional[List[dict]]: - """ Extract peak assignment from a given string. - """ - - if numOfDim not in (1, 2, 3, 4): - return None - - _str = PEAK_ASSIGNMENT_SEPARATOR_PAT.sub(' ', string.upper()).split() - lenStr = len(_str) - - segIdLike, resIdLike, resNameLike, atomNameLike, _atomNameLike, __atomNameLike, ___atomNameLike =\ - [False] * lenStr, [False] * lenStr, [False] * lenStr, [False] * lenStr, [False] * lenStr, [False] * lenStr, [False] * lenStr - - segIdSpan, resIdSpan, resNameSpan, atomNameSpan, _atomNameSpan, __atomNameSpan, ___atomNameSpan =\ - [None] * lenStr, [None] * lenStr, [None] * lenStr, [None] * lenStr, [None] * lenStr, [None] * lenStr, [None] * lenStr - - aaOnly = aa and not dna and not rna - if aaOnly: - oneLetterCodeSet = [getOneLetterCode(compId) for compId in compIdSet] - - for idx, term in enumerate(_str): - for segId in segIdSet: - if term.startswith(segId): - segIdLike[idx] = True - segIdSpan[idx] = (0, len(segId)) - break - - resIdTest = PEAK_ASSIGNMENT_RESID_PAT.search(term) - if resIdTest: - resIdLike[idx] = True - resIdSpan[idx] = resIdTest.span() - - minIndex = len(term) - - for compId in compIdSet: - if compId in term: - resNameLike[idx] = True - index = term.index(compId) - if index < minIndex: - resNameSpan[idx] = (index, index + len(compId)) - minIndex = index - - if not resNameLike[idx]: - for compId in altCompIdSet: - if compId in term: - resNameLike[idx] = True - index = term.index(compId) - if index < minIndex: - resNameSpan[idx] = (index, index + len(compId)) - minIndex = index - - if not resNameLike[idx] and aaOnly: - for compId in oneLetterCodeSet: - if compId in term: - resNameLike[idx] = True - index = term.index(compId) - if index < minIndex: - resNameSpan[idx] = (index, index + len(compId)) - minIndex = index - - for elem in PEAK_HALF_SPIN_NUCLEUS: - if len(elem) == 1: - if elem in term: - index = term.rindex(elem) - atomId = term[index:len(term)] - if resNameLike[idx]: - compId = term[resNameSpan[idx][0]:resNameSpan[idx][1]] - if len(compId) == 1 and aaOnly: - compId = next(k for k, v in monDict3.items() if v == compId) - _, _, details = nefT.get_valid_star_atom_in_xplor(compId, atomId, leave_unmatched=True) - if details is None: - atomNameLike[idx] = True - atomNameSpan[idx] = (index, len(term)) - break - for compId in compIdSet: - _, _, details = nefT.get_valid_star_atom_in_xplor(compId, atomId, leave_unmatched=True) - if details is None: - atomNameLike[idx] = True - atomNameSpan[idx] = (index, len(term)) - break - if atomNameLike[idx]: - break - - if atomNameLike[idx]: - _term = term[0:atomNameSpan[idx][0]] - for elem in PEAK_HALF_SPIN_NUCLEUS: - if len(elem) == 1: - if elem in _term: - index = _term.rindex(elem) - atomId = _term[index:len(_term)] - if resNameLike[idx]: - compId = _term[resNameSpan[idx][0]:resNameSpan[idx][1]] - if len(compId) == 1 and aaOnly: - compId = next(k for k, v in monDict3.items() if v == compId) - _, _, details = nefT.get_valid_star_atom_in_xplor(compId, atomId, leave_unmatched=True) - if details is None: - _atomNameLike[idx] = True - _atomNameSpan[idx] = (index, len(_term)) - break - for compId in compIdSet: - _, _, details = nefT.get_valid_star_atom_in_xplor(compId, atomId, leave_unmatched=True) - if details is None: - _atomNameLike[idx] = True - _atomNameSpan[idx] = (index, len(_term)) - break - if _atomNameLike[idx]: - break - - if numOfDim >= 3 and _atomNameLike[idx]: - __term = term[0:_atomNameSpan[idx][0]] - for elem in PEAK_HALF_SPIN_NUCLEUS: - if len(elem) == 1: - if elem in __term: - index = __term.rindex(elem) - atomId = __term[index:len(__term)] - if resNameLike[idx]: - compId = __term[resNameSpan[idx][0]:resNameSpan[idx][1]] - if len(compId) == 1 and aaOnly: - compId = next(k for k, v in monDict3.items() if v == compId) - _, _, details = nefT.get_valid_star_atom_in_xplor(compId, atomId, leave_unmatched=True) - if details is None: - __atomNameLike[idx] = True - __atomNameSpan[idx] = (index, len(__term)) - break - for compId in compIdSet: - _, _, details = nefT.get_valid_star_atom_in_xplor(compId, atomId, leave_unmatched=True) - if details is None: - __atomNameLike[idx] = True - __atomNameSpan[idx] = (index, len(__term)) - break - if __atomNameLike[idx]: - break - - if numOfDim >= 4 and __atomNameLike[idx]: - ___term = term[0:__atomNameSpan[idx][0]] - for elem in PEAK_HALF_SPIN_NUCLEUS: - if len(elem) == 1: - if elem in ___term: - index = ___term.rindex(elem) - atomId = ___term[index:len(___term)] - if resNameLike[idx]: - compId = ___term[resNameSpan[idx][0]:resNameSpan[idx][1]] - if len(compId) == 1 and aaOnly: - compId = next(k for k, v in monDict3.items() if v == compId) - _, _, details = nefT.get_valid_star_atom_in_xplor(compId, atomId, leave_unmatched=True) - if details is None: - ___atomNameLike[idx] = True - ___atomNameSpan[idx] = (index, len(___term)) - break - for compId in compIdSet: - _, _, details = nefT.get_valid_star_atom_in_xplor(compId, atomId, leave_unmatched=True) - if details is None: - ___atomNameLike[idx] = True - ___atomNameSpan[idx] = (index, len(___term)) - break - if ___atomNameLike[idx]: - break - - atomNameCount = 0 - for idx in range(lenStr): - if atomNameLike[idx]: - atomNameCount += 1 - if _atomNameLike[idx]: - atomNameCount += 1 - if __atomNameLike[idx]: - atomNameCount += 1 - if ___atomNameLike[idx]: - atomNameCount += 1 - - if atomNameCount < numOfDim: - return None - - if atomNameCount > numOfDim: - atomNameCount = 0 - ignoreBefore = False - for idx in range(lenStr - 1, 0, -1): - if ignoreBefore: - atomNameLike[idx] = _atomNameLike[idx] = __atomNameLike[idx] = ___atomNameLike[idx] = False - else: - if atomNameLike[idx]: - atomNameCount += 1 - if _atomNameLike[idx]: - atomNameCount += 1 - if __atomNameLike[idx]: - atomNameCount += 1 - if ___atomNameLike[idx]: - atomNameCount += 1 - if atomNameCount >= numOfDim: - ignoreBefore = True - - for idx in range(lenStr): - if ___atomNameLike[idx]: - if resNameLike[idx]: - if resNameSpan[idx][1] > ___atomNameSpan[idx][0]: - resNameLike[idx] = False - if resIdLike[idx]: - if resNameSpan[idx][1] > ___atomNameSpan[idx][0]: - resIdLike[idx] = False - if segIdLike[idx]: - if segIdSpan[idx][1] > ___atomNameSpan[idx][0]: - segIdLike[idx] = False - - elif __atomNameLike[idx]: - if resNameLike[idx]: - if resNameSpan[idx][1] > __atomNameSpan[idx][0]: - resNameLike[idx] = False - if resIdLike[idx]: - if resNameSpan[idx][1] > __atomNameSpan[idx][0]: - resIdLike[idx] = False - if segIdLike[idx]: - if segIdSpan[idx][1] > __atomNameSpan[idx][0]: - segIdLike[idx] = False - - elif _atomNameLike[idx]: - if resNameLike[idx]: - if resNameSpan[idx][1] > _atomNameSpan[idx][0]: - resNameLike[idx] = False - if resIdLike[idx]: - if resNameSpan[idx][1] > _atomNameSpan[idx][0]: - resIdLike[idx] = False - if segIdLike[idx]: - if segIdSpan[idx][1] > _atomNameSpan[idx][0]: - segIdLike[idx] = False - - elif atomNameLike[idx]: - if resNameLike[idx]: - if resNameSpan[idx][1] > atomNameSpan[idx][0]: - resNameLike[idx] = False - if resIdLike[idx]: - if resNameSpan[idx][1] > atomNameSpan[idx][0]: - resIdLike[idx] = False - if segIdLike[idx]: - if segIdSpan[idx][1] > atomNameSpan[idx][0]: - segIdLike[idx] = False - - if resNameLike[idx]: - if segIdLike[idx]: - if segIdSpan[idx][1] > resNameSpan[idx][0]: - segIdLike[idx] = False - - resIdCount = 0 - for idx in range(lenStr): - if resIdLike[idx]: - resIdCount += 1 - - if resIdCount == 0: - return None - - ret = [] - - segId = resId = resName = atomName = None - dimId = 0 - for idx, term in enumerate(_str): - if segIdLike[idx]: - segId = term[segIdSpan[idx][0]:segIdSpan[idx][1]] - if resIdLike[idx]: - resId = int(term[resIdSpan[idx][0]:resIdSpan[idx][1]]) - if resNameLike[idx]: - resName = term[resNameSpan[idx][0]:resNameSpan[idx][1]] - if len(resName) == 1 and aaOnly: - resName = next(k for k, v in monDict3.items() if v == resName) - if resId is None: - return None - if ___atomNameLike[idx]: - atomName = term[___atomNameSpan[idx][0]:___atomNameSpan[idx][1]] - dimId += 1 - ret.append({'dim_id': dimId, 'chain_id': segId, 'seq_id': resId, 'comp_id': resName, 'atom_id': atomName}) - if __atomNameLike[idx]: - atomName = term[__atomNameSpan[idx][0]:__atomNameSpan[idx][1]] - dimId += 1 - ret.append({'dim_id': dimId, 'chain_id': segId, 'seq_id': resId, 'comp_id': resName, 'atom_id': atomName}) - if _atomNameLike[idx]: - atomName = term[_atomNameSpan[idx][0]:_atomNameSpan[idx][1]] - dimId += 1 - ret.append({'dim_id': dimId, 'chain_id': segId, 'seq_id': resId, 'comp_id': resName, 'atom_id': atomName}) - if atomNameLike[idx]: - atomName = term[atomNameSpan[idx][0]:atomNameSpan[idx][1]] - dimId += 1 - ret.append({'dim_id': dimId, 'chain_id': segId, 'seq_id': resId, 'comp_id': resName, 'atom_id': atomName}) - - return ret if len(ret) == numOfDim else None # ignore multiple assignments for a peak diff --git a/wwpdb/utils/nmr/pk/BasePKParserListener.py b/wwpdb/utils/nmr/pk/BasePKParserListener.py index 637de37f..fa949a9b 100644 --- a/wwpdb/utils/nmr/pk/BasePKParserListener.py +++ b/wwpdb/utils/nmr/pk/BasePKParserListener.py @@ -52,6 +52,7 @@ zincIonCode, calciumIonCode, isReservedLigCode, + getOneLetterCode, updatePolySeqRst, revertPolySeqRst, sortPolySeqRst, @@ -108,6 +109,7 @@ zincIonCode, calciumIonCode, isReservedLigCode, + getOneLetterCode, updatePolySeqRst, revertPolySeqRst, sortPolySeqRst, @@ -129,6 +131,11 @@ retrieveOriginalSeqIdFromMRMap) +PEAK_ASSIGNMENT_SEPARATOR_PAT = re.compile('[^0-9A-Za-z]+') +PEAK_ASSIGNMENT_RESID_PAT = re.compile('[0-9]+') +PEAK_HALF_SPIN_NUCLEUS = ('H', 'Q', 'M', 'C', 'N', 'P', 'F') + + class BasePKParserListener(): file_type = None @@ -394,6 +401,19 @@ def setListIdCounter(self, listIdCounter: dict): def setEntryId(self, entryId: str): self.entryId = entryId + def enter(self): + self.num_of_dim = -1 + self.acq_dim_id = 1 + self.spectral_dim = {} + self.listIdInternal = {} + self.chainNumberDict = {} + self.extResKey = [] + self.polySeqRst = [] + self.polySeqRstFailed = [] + self.polySeqRstFailedAmbig = [] + self.compIdMap = {} + self.f = [] + def exit(self): try: @@ -897,6 +917,344 @@ def validatePeak4D(self, index: int, pos_1: float, pos_2: float, pos_3: float, p return dstFunc + def extractPeakAssignment(self, numOfDim: int, string: str, src_index: int) -> Optional[List[dict]]: + """ Extract peak assignment from a given string. + """ + + if numOfDim not in (1, 2, 3, 4): + return None + + _str = PEAK_ASSIGNMENT_SEPARATOR_PAT.sub(' ', string.upper()).split() + lenStr = len(_str) + + segIdLike, resIdLike, resNameLike, atomNameLike, _atomNameLike, __atomNameLike, ___atomNameLike =\ + [False] * lenStr, [False] * lenStr, [False] * lenStr, [False] * lenStr, [False] * lenStr, [False] * lenStr, [False] * lenStr + + segIdSpan, resIdSpan, resNameSpan, atomNameSpan, _atomNameSpan, __atomNameSpan, ___atomNameSpan =\ + [None] * lenStr, [None] * lenStr, [None] * lenStr, [None] * lenStr, [None] * lenStr, [None] * lenStr, [None] * lenStr + + aaOnly = self.polyPeptide and not self.polyDeoxyribonucleotide and not self.polyRibonucleotide + oneLetterCodeSet = [getOneLetterCode(compId) for compId in self.compIdSet] if aaOnly else [] + + for idx, term in enumerate(_str): + for segId in self.authAsymIdSet: + if term.startswith(segId): + segIdLike[idx] = True + segIdSpan[idx] = (0, len(segId)) + break + + resIdTest = PEAK_ASSIGNMENT_RESID_PAT.search(term) + if resIdTest: + resIdLike[idx] = True + resIdSpan[idx] = resIdTest.span() + + minIndex = len(term) + + for compId in self.compIdSet: + if compId in term: + resNameLike[idx] = True + index = term.index(compId) + if index < minIndex: + resNameSpan[idx] = (index, index + len(compId)) + minIndex = index + + if not resNameLike[idx]: + for compId in self.altCompIdSet: + if compId in term: + resNameLike[idx] = True + index = term.index(compId) + if index < minIndex: + resNameSpan[idx] = (index, index + len(compId)) + minIndex = index + + if not resNameLike[idx] and aaOnly: + for compId in oneLetterCodeSet: + if compId in term: + resNameLike[idx] = True + index = term.index(compId) + if index < minIndex: + resNameSpan[idx] = (index, index + len(compId)) + minIndex = index + + for elem in PEAK_HALF_SPIN_NUCLEUS: + if len(elem) == 1: + if elem in term: + index = term.rindex(elem) + atomId = term[index:len(term)] + if resNameLike[idx]: + compId = term[resNameSpan[idx][0]:resNameSpan[idx][1]] + if len(compId) == 1 and aaOnly: + compId = next(k for k, v in monDict3.items() if v == compId) + _, _, details = self.nefT.get_valid_star_atom_in_xplor(compId, atomId, leave_unmatched=True) + if details is None: + atomNameLike[idx] = True + atomNameSpan[idx] = (index, len(term)) + break + for compId in self.compIdSet: + _, _, details = self.nefT.get_valid_star_atom_in_xplor(compId, atomId, leave_unmatched=True) + if details is None: + atomNameLike[idx] = True + atomNameSpan[idx] = (index, len(term)) + break + if atomNameLike[idx]: + break + + if atomNameLike[idx]: + _term = term[0:atomNameSpan[idx][0]] + for elem in PEAK_HALF_SPIN_NUCLEUS: + if len(elem) == 1: + if elem in _term: + index = _term.rindex(elem) + atomId = _term[index:len(_term)] + if resNameLike[idx]: + compId = _term[resNameSpan[idx][0]:resNameSpan[idx][1]] + if len(compId) == 1 and aaOnly: + compId = next(k for k, v in monDict3.items() if v == compId) + _, _, details = self.nefT.get_valid_star_atom_in_xplor(compId, atomId, leave_unmatched=True) + if details is None: + _atomNameLike[idx] = True + _atomNameSpan[idx] = (index, len(_term)) + break + for compId in self.compIdSet: + _, _, details = self.nefT.get_valid_star_atom_in_xplor(compId, atomId, leave_unmatched=True) + if details is None: + _atomNameLike[idx] = True + _atomNameSpan[idx] = (index, len(_term)) + break + if _atomNameLike[idx]: + break + + if numOfDim >= 3 and _atomNameLike[idx]: + __term = term[0:_atomNameSpan[idx][0]] + for elem in PEAK_HALF_SPIN_NUCLEUS: + if len(elem) == 1: + if elem in __term: + index = __term.rindex(elem) + atomId = __term[index:len(__term)] + if resNameLike[idx]: + compId = __term[resNameSpan[idx][0]:resNameSpan[idx][1]] + if len(compId) == 1 and aaOnly: + compId = next(k for k, v in monDict3.items() if v == compId) + _, _, details = self.nefT.get_valid_star_atom_in_xplor(compId, atomId, leave_unmatched=True) + if details is None: + __atomNameLike[idx] = True + __atomNameSpan[idx] = (index, len(__term)) + break + for compId in self.compIdSet: + _, _, details = self.nefT.get_valid_star_atom_in_xplor(compId, atomId, leave_unmatched=True) + if details is None: + __atomNameLike[idx] = True + __atomNameSpan[idx] = (index, len(__term)) + break + if __atomNameLike[idx]: + break + + if numOfDim >= 4 and __atomNameLike[idx]: + ___term = term[0:__atomNameSpan[idx][0]] + for elem in PEAK_HALF_SPIN_NUCLEUS: + if len(elem) == 1: + if elem in ___term: + index = ___term.rindex(elem) + atomId = ___term[index:len(___term)] + if resNameLike[idx]: + compId = ___term[resNameSpan[idx][0]:resNameSpan[idx][1]] + if len(compId) == 1 and aaOnly: + compId = next(k for k, v in monDict3.items() if v == compId) + _, _, details = self.nefT.get_valid_star_atom_in_xplor(compId, atomId, leave_unmatched=True) + if details is None: + ___atomNameLike[idx] = True + ___atomNameSpan[idx] = (index, len(___term)) + break + for compId in self.compIdSet: + _, _, details = self.nefT.get_valid_star_atom_in_xplor(compId, atomId, leave_unmatched=True) + if details is None: + ___atomNameLike[idx] = True + ___atomNameSpan[idx] = (index, len(___term)) + break + if ___atomNameLike[idx]: + break + + atomNameCount = 0 + for idx in range(lenStr): + if atomNameLike[idx]: + atomNameCount += 1 + if _atomNameLike[idx]: + atomNameCount += 1 + if __atomNameLike[idx]: + atomNameCount += 1 + if ___atomNameLike[idx]: + atomNameCount += 1 + + if atomNameCount < numOfDim: + return None + + if atomNameCount > numOfDim: + atomNameCount = 0 + ignoreBefore = False + for idx in range(lenStr - 1, 0, -1): + if ignoreBefore: + atomNameLike[idx] = _atomNameLike[idx] = __atomNameLike[idx] = ___atomNameLike[idx] = False + else: + if atomNameLike[idx]: + atomNameCount += 1 + if _atomNameLike[idx]: + atomNameCount += 1 + if __atomNameLike[idx]: + atomNameCount += 1 + if ___atomNameLike[idx]: + atomNameCount += 1 + if atomNameCount >= numOfDim: + ignoreBefore = True + + for idx in range(lenStr): + if ___atomNameLike[idx]: + if resNameLike[idx]: + if resNameSpan[idx][1] > ___atomNameSpan[idx][0]: + resNameLike[idx] = False + if resIdLike[idx]: + if resIdSpan[idx][1] > ___atomNameSpan[idx][0]: + resIdLike[idx] = False + if segIdLike[idx]: + if segIdSpan[idx][1] > ___atomNameSpan[idx][0]: + segIdLike[idx] = False + + elif __atomNameLike[idx]: + if resNameLike[idx]: + if resNameSpan[idx][1] > __atomNameSpan[idx][0]: + resNameLike[idx] = False + if resIdLike[idx]: + if resIdSpan[idx][1] > __atomNameSpan[idx][0]: + resIdLike[idx] = False + if segIdLike[idx]: + if segIdSpan[idx][1] > __atomNameSpan[idx][0]: + segIdLike[idx] = False + + elif _atomNameLike[idx]: + if resNameLike[idx]: + if resNameSpan[idx][1] > _atomNameSpan[idx][0]: + resNameLike[idx] = False + if resIdLike[idx]: + if resIdSpan[idx][1] > _atomNameSpan[idx][0]: + resIdLike[idx] = False + if segIdLike[idx]: + if segIdSpan[idx][1] > _atomNameSpan[idx][0]: + segIdLike[idx] = False + + elif atomNameLike[idx]: + if resNameLike[idx]: + if resNameSpan[idx][1] > atomNameSpan[idx][0]: + resNameLike[idx] = False + if resIdLike[idx]: + if resIdSpan[idx][1] > atomNameSpan[idx][0]: + resIdLike[idx] = False + if segIdLike[idx]: + if segIdSpan[idx][1] > atomNameSpan[idx][0]: + segIdLike[idx] = False + + if resNameLike[idx]: + if segIdLike[idx]: + if segIdSpan[idx][1] > resNameSpan[idx][0]: + if numOfDim > 1 or not any(resNameLike[_idx] for _idx in range(idx + 1, lenStr)): + segIdLike[idx] = False + else: + resNameLike[idx] = False + + resIdCount = 0 + for idx in range(lenStr): + if resIdLike[idx]: + resIdCount += 1 + + if resIdCount == 0: + return None + + ret = [] + + segId = resId = resName = atomName = None + dimId = 0 + for idx, term in enumerate(_str): + if segIdLike[idx]: + segId = term[segIdSpan[idx][0]:segIdSpan[idx][1]] + if resIdLike[idx]: + resId = int(term[resIdSpan[idx][0]:resIdSpan[idx][1]]) + if resNameLike[idx]: + resName = term[resNameSpan[idx][0]:resNameSpan[idx][1]] + if len(resName) == 1 and aaOnly: + resName = next(k for k, v in monDict3.items() if v == resName) + if ___atomNameLike[idx]: + if resId is None: + return None + atomName = term[___atomNameSpan[idx][0]:___atomNameSpan[idx][1]] + if segId is None and resName is None: + chainAssign = self.assignCoordPolymerSequenceWithoutCompId(resId, atomName, src_index) + if len(chainAssign) > 0: + segId, _, resName, _ = chainAssign[0] + elif segId is None: + chainAssign, _ = self.assignCoordPolymerSequence(segId, resId, resName, atomName, src_index) + if len(chainAssign) > 0: + segId = chainAssign[0][0] + elif resName is None: + chainAssign = self.assignCoordPolymerSequenceWithChainIdWithoutCompId(segId, resId, atomName, src_index) + if len(chainAssign) > 0: + resName = chainAssign[0][2] + dimId += 1 + ret.append({'dim_id': dimId, 'chain_id': segId, 'seq_id': resId, 'comp_id': resName, 'atom_id': atomName}) + if __atomNameLike[idx]: + if resId is None: + return None + atomName = term[__atomNameSpan[idx][0]:__atomNameSpan[idx][1]] + if segId is None and resName is None: + chainAssign = self.assignCoordPolymerSequenceWithoutCompId(resId, atomName, src_index) + if len(chainAssign) > 0: + segId, _, resName, _ = chainAssign[0] + elif segId is None: + chainAssign, _ = self.assignCoordPolymerSequence(segId, resId, resName, atomName, src_index) + if len(chainAssign) > 0: + segId = chainAssign[0][0] + elif resName is None: + chainAssign = self.assignCoordPolymerSequenceWithChainIdWithoutCompId(segId, resId, atomName, src_index) + if len(chainAssign) > 0: + resName = chainAssign[0][2] + dimId += 1 + ret.append({'dim_id': dimId, 'chain_id': segId, 'seq_id': resId, 'comp_id': resName, 'atom_id': atomName}) + if _atomNameLike[idx]: + if resId is None: + return None + atomName = term[_atomNameSpan[idx][0]:_atomNameSpan[idx][1]] + if segId is None and resName is None: + chainAssign = self.assignCoordPolymerSequenceWithoutCompId(resId, atomName, src_index) + if len(chainAssign) > 0: + segId, _, resName, _ = chainAssign[0] + elif segId is None: + chainAssign, _ = self.assignCoordPolymerSequence(segId, resId, resName, atomName, src_index) + if len(chainAssign) > 0: + segId = chainAssign[0][0] + elif resName is None: + chainAssign = self.assignCoordPolymerSequenceWithChainIdWithoutCompId(segId, resId, atomName, src_index) + if len(chainAssign) > 0: + resName = chainAssign[0][2] + dimId += 1 + ret.append({'dim_id': dimId, 'chain_id': segId, 'seq_id': resId, 'comp_id': resName, 'atom_id': atomName}) + if atomNameLike[idx]: + if resId is None: + return None + atomName = term[atomNameSpan[idx][0]:atomNameSpan[idx][1]] + if segId is None and resName is None: + chainAssign = self.assignCoordPolymerSequenceWithoutCompId(resId, atomName, src_index) + if len(chainAssign) > 0: + segId, _, resName, _ = chainAssign[0] + elif segId is None: + chainAssign, _ = self.assignCoordPolymerSequence(segId, resId, resName, atomName, src_index) + if len(chainAssign) > 0: + segId = chainAssign[0][0] + elif resName is None: + chainAssign = self.assignCoordPolymerSequenceWithChainIdWithoutCompId(segId, resId, atomName, src_index) + if len(chainAssign) > 0: + resName = chainAssign[0][2] + dimId += 1 + ret.append({'dim_id': dimId, 'chain_id': segId, 'seq_id': resId, 'comp_id': resName, 'atom_id': atomName}) + + return ret if len(ret) == numOfDim else None # ignore multiple assignments for a peak + def getRealChainSeqId(self, ps: dict, seqId: int, compId: Optional[str], isPolySeq=True) -> Tuple[str, int, Optional[str]]: if compId is not None: compId = _compId = translateToStdResName(compId, ccU=self.ccU) diff --git a/wwpdb/utils/nmr/pk/NmrPipePKParserListener.py b/wwpdb/utils/nmr/pk/NmrPipePKParserListener.py index 71bbe399..16c0d336 100644 --- a/wwpdb/utils/nmr/pk/NmrPipePKParserListener.py +++ b/wwpdb/utils/nmr/pk/NmrPipePKParserListener.py @@ -24,7 +24,6 @@ SPECTRAL_DIM_TEMPLATE, getMaxEffDigits, roundString, - extractPeakAssignment, getPkRow) from wwpdb.utils.nmr.AlignUtil import emptyValue @@ -37,7 +36,6 @@ SPECTRAL_DIM_TEMPLATE, getMaxEffDigits, roundString, - extractPeakAssignment, getPkRow) from nmr.AlignUtil import emptyValue @@ -59,17 +57,7 @@ def __init__(self, verbose=True, log=sys.stdout, # Enter a parse tree produced by NmrPipePKParser#nmrpipe_pk. def enterNmrpipe_pk(self, ctx: NmrPipePKParser.Nmrpipe_pkContext): # pylint: disable=unused-argument - self.num_of_dim = -1 - self.acq_dim_id = 1 - self.spectral_dim = {} - self.listIdInternal = {} - self.chainNumberDict = {} - self.extResKey = [] - self.polySeqRst = [] - self.polySeqRstFailed = [] - self.polySeqRstFailedAmbig = [] - self.compIdMap = {} - self.f = [] + self.enter() # Exit a parse tree produced by DynamoMRParser#dynamo_mr. def exitNmrpipe_pk(self, ctx: NmrPipePKParser.Nmrpipe_pkContext): # pylint: disable=unused-argument @@ -320,9 +308,7 @@ def exitPeak_2d(self, ctx: NmrPipePKParser.Peak_2dContext): has_assignments = False if ass is not None: - assignments =\ - extractPeakAssignment(self.num_of_dim, ass, self.authAsymIdSet, self.compIdSet, self.altCompIdSet, - self.polyPeptide, self.polyDeoxyribonucleotide, self.polyRibonucleotide, self.nefT) + assignments = self.extractPeakAssignment(self.num_of_dim, ass, index) if assignments is not None: @@ -515,9 +501,7 @@ def exitPeak_3d(self, ctx: NmrPipePKParser.Peak_3dContext): has_assignments = False if ass is not None: - assignments =\ - extractPeakAssignment(self.num_of_dim, ass, self.authAsymIdSet, self.compIdSet, self.altCompIdSet, - self.polyPeptide, self.polyDeoxyribonucleotide, self.polyRibonucleotide, self.nefT) + assignments = self.extractPeakAssignment(self.num_of_dim, ass, index) if assignments is not None: @@ -735,9 +719,7 @@ def exitPeak_4d(self, ctx: NmrPipePKParser.Peak_4dContext): has_assignments = False if ass is not None: - assignments =\ - extractPeakAssignment(self.num_of_dim, ass, self.authAsymIdSet, self.compIdSet, self.altCompIdSet, - self.polyPeptide, self.polyDeoxyribonucleotide, self.polyRibonucleotide, self.nefT) + assignments = self.extractPeakAssignment(self.num_of_dim, ass, index) if assignments is not None: diff --git a/wwpdb/utils/nmr/pk/NmrViewPKParserListener.py b/wwpdb/utils/nmr/pk/NmrViewPKParserListener.py index b22d999d..48f3bafb 100644 --- a/wwpdb/utils/nmr/pk/NmrViewPKParserListener.py +++ b/wwpdb/utils/nmr/pk/NmrViewPKParserListener.py @@ -20,7 +20,6 @@ REPRESENTATIVE_MODEL_ID, REPRESENTATIVE_ALT_ID, SPECTRAL_DIM_TEMPLATE, - extractPeakAssignment, getPkRow) from wwpdb.utils.nmr.AlignUtil import emptyValue @@ -31,7 +30,6 @@ REPRESENTATIVE_MODEL_ID, REPRESENTATIVE_ALT_ID, SPECTRAL_DIM_TEMPLATE, - extractPeakAssignment, getPkRow) from nmr.AlignUtil import emptyValue @@ -57,17 +55,7 @@ def __init__(self, verbose=True, log=sys.stdout, # Enter a parse tree produced by NmrViewPKParser#nmrview_pk. def enterNmrview_pk(self, ctx: NmrViewPKParser.Nmrview_pkContext): # pylint: disable=unused-argument - self.num_of_dim = -1 - self.acq_dim_id = 1 - self.spectral_dim = {} - self.listIdInternal = {} - self.chainNumberDict = {} - self.extResKey = [] - self.polySeqRst = [] - self.polySeqRstFailed = [] - self.polySeqRstFailedAmbig = [] - self.compIdMap = {} - self.f = [] + self.enter() # Exit a parse tree produced by NmrViewPKParser#nmrview_pk. def exitNmrview_pk(self, ctx: NmrViewPKParser.Nmrview_pkContext): # pylint: disable=unused-argument @@ -286,13 +274,13 @@ def exitPeak_2d(self, ctx: NmrViewPKParser.Peak_2dContext): if L1 is not None and L2 is not None: assignments = [{}] * self.num_of_dim - try: - assignments[0] = extractPeakAssignment(1, L1, self.authAsymIdSet, self.compIdSet, self.altCompIdSet, - self.polyPeptide, self.polyDeoxyribonucleotide, self.polyRibonucleotide, self.nefT)[0] - assignments[1] = extractPeakAssignment(1, L2, self.authAsymIdSet, self.compIdSet, self.altCompIdSet, - self.polyPeptide, self.polyDeoxyribonucleotide, self.polyRibonucleotide, self.nefT)[0] - except Exception: - pass + + assignment0 = self.extractPeakAssignment(1, L1, index) + if assignment0 is not None: + assignments[0] = assignment0[0] + assignment1 = self.extractPeakAssignment(1, L2, index) + if assignment1 is not None: + assignments[1] = assignment1[0] if all(len(a) > 0 for a in assignments): @@ -464,15 +452,16 @@ def exitPeak_3d(self, ctx: NmrViewPKParser.Peak_3dContext): if L1 is not None and L2 is not None and L3 is not None: assignments = [{}] * self.num_of_dim - try: - assignments[0] = extractPeakAssignment(1, L1, self.authAsymIdSet, self.compIdSet, self.altCompIdSet, - self.polyPeptide, self.polyDeoxyribonucleotide, self.polyRibonucleotide, self.nefT)[0] - assignments[1] = extractPeakAssignment(1, L2, self.authAsymIdSet, self.compIdSet, self.altCompIdSet, - self.polyPeptide, self.polyDeoxyribonucleotide, self.polyRibonucleotide, self.nefT)[0] - assignments[2] = extractPeakAssignment(1, L3, self.authAsymIdSet, self.compIdSet, self.altCompIdSet, - self.polyPeptide, self.polyDeoxyribonucleotide, self.polyRibonucleotide, self.nefT)[0] - except Exception: - pass + + assignment0 = self.extractPeakAssignment(1, L1, index) + if assignment0 is not None: + assignments[0] = assignment0[0] + assignment1 = self.extractPeakAssignment(1, L2, index) + if assignment1 is not None: + assignments[1] = assignment1[0] + assignment2 = self.extractPeakAssignment(1, L3, index) + if assignment2 is not None: + assignments[2] = assignment2[0] if all(len(a) > 0 for a in assignments): @@ -667,17 +656,19 @@ def exitPeak_4d(self, ctx: NmrViewPKParser.Peak_4dContext): if L1 is not None and L2 is not None and L3 is not None and L4 is not None: assignments = [{}] * self.num_of_dim - try: - assignments[0] = extractPeakAssignment(1, L1, self.authAsymIdSet, self.compIdSet, self.altCompIdSet, - self.polyPeptide, self.polyDeoxyribonucleotide, self.polyRibonucleotide, self.nefT)[0] - assignments[1] = extractPeakAssignment(1, L2, self.authAsymIdSet, self.compIdSet, self.altCompIdSet, - self.polyPeptide, self.polyDeoxyribonucleotide, self.polyRibonucleotide, self.nefT)[0] - assignments[2] = extractPeakAssignment(1, L3, self.authAsymIdSet, self.compIdSet, self.altCompIdSet, - self.polyPeptide, self.polyDeoxyribonucleotide, self.polyRibonucleotide, self.nefT)[0] - assignments[3] = extractPeakAssignment(1, L4, self.authAsymIdSet, self.compIdSet, self.altCompIdSet, - self.polyPeptide, self.polyDeoxyribonucleotide, self.polyRibonucleotide, self.nefT)[0] - except Exception: - pass + + assignment0 = self.extractPeakAssignment(1, L1, index) + if assignment0 is not None: + assignments[0] = assignment0[0] + assignment1 = self.extractPeakAssignment(1, L2, index) + if assignment1 is not None: + assignments[1] = assignment1[0] + assignment2 = self.extractPeakAssignment(1, L3, index) + if assignment2 is not None: + assignments[2] = assignment2[0] + assignment3 = self.extractPeakAssignment(1, L4, index) + if assignment3 is not None: + assignments[3] = assignment3[0] if all(len(a) > 0 for a in assignments): @@ -835,13 +826,13 @@ def exitPeak_wo_eju_2d(self, ctx: NmrViewPKParser.Peak_wo_eju_2dContext): if L1 is not None and L2 is not None: assignments = [{}] * self.num_of_dim - try: - assignments[0] = extractPeakAssignment(1, L1, self.authAsymIdSet, self.compIdSet, self.altCompIdSet, - self.polyPeptide, self.polyDeoxyribonucleotide, self.polyRibonucleotide, self.nefT)[0] - assignments[1] = extractPeakAssignment(1, L2, self.authAsymIdSet, self.compIdSet, self.altCompIdSet, - self.polyPeptide, self.polyDeoxyribonucleotide, self.polyRibonucleotide, self.nefT)[0] - except Exception: - pass + + assignment0 = self.extractPeakAssignment(1, L1, index) + if assignment0 is not None: + assignments[0] = assignment0[0] + assignment1 = self.extractPeakAssignment(1, L2, index) + if assignment1 is not None: + assignments[1] = assignment1[0] if all(len(a) > 0 for a in assignments): @@ -984,15 +975,16 @@ def exitPeak_wo_eju_3d(self, ctx: NmrViewPKParser.Peak_wo_eju_3dContext): if L1 is not None and L2 is not None and L3 is not None: assignments = [{}] * self.num_of_dim - try: - assignments[0] = extractPeakAssignment(1, L1, self.authAsymIdSet, self.compIdSet, self.altCompIdSet, - self.polyPeptide, self.polyDeoxyribonucleotide, self.polyRibonucleotide, self.nefT)[0] - assignments[1] = extractPeakAssignment(1, L2, self.authAsymIdSet, self.compIdSet, self.altCompIdSet, - self.polyPeptide, self.polyDeoxyribonucleotide, self.polyRibonucleotide, self.nefT)[0] - assignments[2] = extractPeakAssignment(1, L3, self.authAsymIdSet, self.compIdSet, self.altCompIdSet, - self.polyPeptide, self.polyDeoxyribonucleotide, self.polyRibonucleotide, self.nefT)[0] - except Exception: - pass + + assignment0 = self.extractPeakAssignment(1, L1, index) + if assignment0 is not None: + assignments[0] = assignment0[0] + assignment1 = self.extractPeakAssignment(1, L2, index) + if assignment1 is not None: + assignments[1] = assignment1[0] + assignment2 = self.extractPeakAssignment(1, L3, index) + if assignment2 is not None: + assignments[2] = assignment2[0] if all(len(a) > 0 for a in assignments): @@ -1155,17 +1147,19 @@ def exitPeak_wo_eju_4d(self, ctx: NmrViewPKParser.Peak_wo_eju_4dContext): if L1 is not None and L2 is not None and L3 is not None and L4 is not None: assignments = [{}] * self.num_of_dim - try: - assignments[0] = extractPeakAssignment(1, L1, self.authAsymIdSet, self.compIdSet, self.altCompIdSet, - self.polyPeptide, self.polyDeoxyribonucleotide, self.polyRibonucleotide, self.nefT)[0] - assignments[1] = extractPeakAssignment(1, L2, self.authAsymIdSet, self.compIdSet, self.altCompIdSet, - self.polyPeptide, self.polyDeoxyribonucleotide, self.polyRibonucleotide, self.nefT)[0] - assignments[2] = extractPeakAssignment(1, L3, self.authAsymIdSet, self.compIdSet, self.altCompIdSet, - self.polyPeptide, self.polyDeoxyribonucleotide, self.polyRibonucleotide, self.nefT)[0] - assignments[3] = extractPeakAssignment(1, L4, self.authAsymIdSet, self.compIdSet, self.altCompIdSet, - self.polyPeptide, self.polyDeoxyribonucleotide, self.polyRibonucleotide, self.nefT)[0] - except Exception: - pass + + assignment0 = self.extractPeakAssignment(1, L1, index) + if assignment0 is not None: + assignments[0] = assignment0[0] + assignment1 = self.extractPeakAssignment(1, L2, index) + if assignment1 is not None: + assignments[1] = assignment1[0] + assignment2 = self.extractPeakAssignment(1, L3, index) + if assignment2 is not None: + assignments[2] = assignment2[0] + assignment3 = self.extractPeakAssignment(1, L4, index) + if assignment3 is not None: + assignments[3] = assignment3[0] if all(len(a) > 0 for a in assignments): diff --git a/wwpdb/utils/nmr/pk/SparkyPKParserListener.py b/wwpdb/utils/nmr/pk/SparkyPKParserListener.py index 8f556374..c76cf305 100644 --- a/wwpdb/utils/nmr/pk/SparkyPKParserListener.py +++ b/wwpdb/utils/nmr/pk/SparkyPKParserListener.py @@ -18,7 +18,6 @@ from wwpdb.utils.nmr.mr.ParserListenerUtil import (REPRESENTATIVE_MODEL_ID, REPRESENTATIVE_ALT_ID, SPECTRAL_DIM_TEMPLATE, - extractPeakAssignment, getPkRow) except ImportError: @@ -27,7 +26,6 @@ from nmr.mr.ParserListenerUtil import (REPRESENTATIVE_MODEL_ID, REPRESENTATIVE_ALT_ID, SPECTRAL_DIM_TEMPLATE, - extractPeakAssignment, getPkRow) @@ -50,16 +48,7 @@ def __init__(self, verbose=True, log=sys.stdout, # Enter a parse tree produced by SparkyPKParser#sparky_pk. def enterSparky_pk(self, ctx: SparkyPKParser.Sparky_pkContext): # pylint: disable=unused-argument - self.num_of_dim = -1 - self.spectral_dim = {} - self.listIdInternal = {} - self.chainNumberDict = {} - self.extResKey = [] - self.polySeqRst = [] - self.polySeqRstFailed = [] - self.polySeqRstFailedAmbig = [] - self.compIdMap = {} - self.f = [] + self.enter() # Exit a parse tree produced by SparkyPKParser#sparky_pk. def exitSparky_pk(self, ctx: SparkyPKParser.Sparky_pkContext): # pylint: disable=unused-argument @@ -250,9 +239,7 @@ def exitPeak_2d(self, ctx: SparkyPKParser.Peak_2dContext): has_assignments = False if ass is not None: - assignments =\ - extractPeakAssignment(self.num_of_dim, ass, self.authAsymIdSet, self.compIdSet, self.altCompIdSet, - self.polyPeptide, self.polyDeoxyribonucleotide, self.polyRibonucleotide, self.nefT) + assignments = self.extractPeakAssignment(self.num_of_dim, ass, index) if assignments is not None: @@ -376,10 +363,7 @@ def exitPeak_3d(self, ctx: SparkyPKParser.Peak_3dContext): has_assignments = False if ass is not None: - assignments =\ - extractPeakAssignment(self.num_of_dim, ass, self.authAsymIdSet, self.compIdSet, self.altCompIdSet, - self.polyPeptide, self.polyDeoxyribonucleotide, self.polyRibonucleotide, self.nefT) - print(f'{ass=} {assignments=}') + assignments = self.extractPeakAssignment(self.num_of_dim, ass, index) if assignments is not None: @@ -518,9 +502,7 @@ def exitPeak_4d(self, ctx: SparkyPKParser.Peak_4dContext): has_assignments = False if ass is not None: - assignments =\ - extractPeakAssignment(self.num_of_dim, ass, self.authAsymIdSet, self.compIdSet, self.altCompIdSet, - self.polyPeptide, self.polyDeoxyribonucleotide, self.polyRibonucleotide, self.nefT) + assignments = self.extractPeakAssignment(self.num_of_dim, ass, index) if assignments is not None: diff --git a/wwpdb/utils/nmr/pk/XeasyPKParserListener.py b/wwpdb/utils/nmr/pk/XeasyPKParserListener.py index 2eb697ec..57c4b601 100644 --- a/wwpdb/utils/nmr/pk/XeasyPKParserListener.py +++ b/wwpdb/utils/nmr/pk/XeasyPKParserListener.py @@ -20,7 +20,6 @@ REPRESENTATIVE_MODEL_ID, REPRESENTATIVE_ALT_ID, SPECTRAL_DIM_TEMPLATE, - extractPeakAssignment, getPkRow) from wwpdb.utils.nmr.AlignUtil import emptyValue @@ -31,7 +30,6 @@ REPRESENTATIVE_MODEL_ID, REPRESENTATIVE_ALT_ID, SPECTRAL_DIM_TEMPLATE, - extractPeakAssignment, getPkRow) from nmr.AlignUtil import emptyValue @@ -40,32 +38,25 @@ class XeasyPKParserListener(ParseTreeListener, BasePKParserListener): __labels = None + __atomNumberDict = None def __init__(self, verbose=True, log=sys.stdout, representativeModelId=REPRESENTATIVE_MODEL_ID, representativeAltId=REPRESENTATIVE_ALT_ID, mrAtomNameMapping=None, cR=None, caC=None, ccU=None, csStat=None, nefT=None, - reasons=None): + atomNumberDict=None, reasons=None): super().__init__(verbose, log, representativeModelId, representativeAltId, mrAtomNameMapping, cR, caC, ccU, csStat, nefT, reasons) self.file_type = 'nm-pea-xea' self.software_name = 'XEASY' + self.__atomNumberDict = atomNumberDict + # Enter a parse tree produced by XeasyPKParser#xeasy_pk. def enterXeasy_pk(self, ctx: XeasyPKParser.Xeasy_pkContext): # pylint: disable=unused-argument - self.num_of_dim = -1 - self.acq_dim_id = 1 - self.spectral_dim = {} - self.listIdInternal = {} - self.chainNumberDict = {} - self.extResKey = [] - self.polySeqRst = [] - self.polySeqRstFailed = [] - self.polySeqRstFailedAmbig = [] - self.compIdMap = {} - self.f = [] + self.enter() # Exit a parse tree produced by XeasyPKParser#xeasy_pk. def exitXeasy_pk(self, ctx: XeasyPKParser.Xeasy_pkContext): # pylint: disable=unused-argument @@ -325,13 +316,13 @@ def exitPeak_2d(self, ctx: XeasyPKParser.Peak_2dContext): if x_ass is not None and y_ass is not None: assignments = [{}] * self.num_of_dim - try: - assignments[0] = extractPeakAssignment(1, x_ass, self.authAsymIdSet, self.compIdSet, self.altCompIdSet, - self.polyPeptide, self.polyDeoxyribonucleotide, self.polyRibonucleotide, self.nefT)[0] - assignments[1] = extractPeakAssignment(1, y_ass, self.authAsymIdSet, self.compIdSet, self.altCompIdSet, - self.polyPeptide, self.polyDeoxyribonucleotide, self.polyRibonucleotide, self.nefT)[0] - except Exception: - pass + + assignment0 = self.extractPeakAssignment(1, x_ass, index) + if assignment0 is not None: + assignments[0] = assignment0[0] + assignment1 = self.extractPeakAssignment(1, y_ass, index) + if assignment1 is not None: + assignments[1] = assignment1[0] if all(len(a) > 0 for a in assignments): @@ -488,15 +479,16 @@ def exitPeak_3d(self, ctx: XeasyPKParser.Peak_3dContext): if x_ass is not None and y_ass is not None and z_ass is not None: assignments = [{}] * self.num_of_dim - try: - assignments[0] = extractPeakAssignment(1, x_ass, self.authAsymIdSet, self.compIdSet, self.altCompIdSet, - self.polyPeptide, self.polyDeoxyribonucleotide, self.polyRibonucleotide, self.nefT)[0] - assignments[1] = extractPeakAssignment(1, y_ass, self.authAsymIdSet, self.compIdSet, self.altCompIdSet, - self.polyPeptide, self.polyDeoxyribonucleotide, self.polyRibonucleotide, self.nefT)[0] - assignments[2] = extractPeakAssignment(1, z_ass, self.authAsymIdSet, self.compIdSet, self.altCompIdSet, - self.polyPeptide, self.polyDeoxyribonucleotide, self.polyRibonucleotide, self.nefT)[0] - except Exception: - pass + + assignment0 = self.extractPeakAssignment(1, x_ass, index) + if assignment0 is not None: + assignments[0] = assignment0[0] + assignment1 = self.extractPeakAssignment(1, y_ass, index) + if assignment1 is not None: + assignments[1] = assignment1[0] + assignment2 = self.extractPeakAssignment(1, z_ass, index) + if assignment2 is not None: + assignments[2] = assignment2[0] if all(len(a) > 0 for a in assignments): @@ -669,17 +661,19 @@ def exitPeak_4d(self, ctx: XeasyPKParser.Peak_4dContext): if x_ass is not None and y_ass is not None and z_ass is not None and a_ass is not None: assignments = [{}] * self.num_of_dim - try: - assignments[0] = extractPeakAssignment(1, x_ass, self.authAsymIdSet, self.compIdSet, self.altCompIdSet, - self.polyPeptide, self.polyDeoxyribonucleotide, self.polyRibonucleotide, self.nefT)[0] - assignments[1] = extractPeakAssignment(1, y_ass, self.authAsymIdSet, self.compIdSet, self.altCompIdSet, - self.polyPeptide, self.polyDeoxyribonucleotide, self.polyRibonucleotide, self.nefT)[0] - assignments[2] = extractPeakAssignment(1, z_ass, self.authAsymIdSet, self.compIdSet, self.altCompIdSet, - self.polyPeptide, self.polyDeoxyribonucleotide, self.polyRibonucleotide, self.nefT)[0] - assignments[3] = extractPeakAssignment(1, a_ass, self.authAsymIdSet, self.compIdSet, self.altCompIdSet, - self.polyPeptide, self.polyDeoxyribonucleotide, self.polyRibonucleotide, self.nefT)[0] - except Exception: - pass + + assignment0 = self.extractPeakAssignment(1, x_ass, index) + if assignment0 is not None: + assignments[0] = assignment0[0] + assignment1 = self.extractPeakAssignment(1, y_ass, index) + if assignment1 is not None: + assignments[1] = assignment1[0] + assignment2 = self.extractPeakAssignment(1, z_ass, index) + if assignment2 is not None: + assignments[2] = assignment2[0] + assignment3 = self.extractPeakAssignment(1, a_ass, index) + if assignment3 is not None: + assignments[3] = assignment3[0] if all(len(a) > 0 for a in assignments): @@ -808,9 +802,15 @@ def enterAssign(self, ctx: XeasyPKParser.AssignContext): # pylint: disable=unus # Exit a parse tree produced by XeasyPKParser#assign. def exitAssign(self, ctx: XeasyPKParser.AssignContext): if ctx.Simple_name() and ctx.Integer(): - self.assignmentSelection.append(str(ctx.Integer()) + str(ctx.Simple_name())) + self.assignmentSelection.append(f'{str(ctx.Integer())} {str(ctx.Simple_name())}') else: - self.assignmentSelection.append(None) + ai = int(str(ctx.Integer())) + if ai == 0 or self.__atomNumberDict is None or ai not in self.__atomNumberDict: + self.assignmentSelection.append(None) + else: + factor = self.__atomNumberDict[ai] + self.assignmentSelection.append(f"{factor['chain_id']} {factor['seq_id']} " + f"{factor['comp_id']} {factor['auth_atom_id']}") def fillCurrentSpectralDim(self): for _dim_id in range(1, self.num_of_dim + 1): diff --git a/wwpdb/utils/nmr/pk/XeasyPKReader.py b/wwpdb/utils/nmr/pk/XeasyPKReader.py index 78503ee9..07626863 100644 --- a/wwpdb/utils/nmr/pk/XeasyPKReader.py +++ b/wwpdb/utils/nmr/pk/XeasyPKReader.py @@ -16,6 +16,7 @@ from wwpdb.utils.nmr.pk.XeasyPKLexer import XeasyPKLexer from wwpdb.utils.nmr.pk.XeasyPKParser import XeasyPKParser from wwpdb.utils.nmr.pk.XeasyPKParserListener import XeasyPKParserListener + from wwpdb.utils.nmr.pk.XeasyPROTReader import XeasyPROTReader from wwpdb.utils.nmr.mr.ParserListenerUtil import (coordAssemblyChecker, MAX_ERROR_REPORT, REPRESENTATIVE_MODEL_ID, @@ -30,6 +31,7 @@ from nmr.pk.XeasyPKLexer import XeasyPKLexer from nmr.pk.XeasyPKParser import XeasyPKParser from nmr.pk.XeasyPKParserListener import XeasyPKParserListener + from nmr.pk.XeasyPROTReader import XeasyPROTReader from nmr.mr.ParserListenerUtil import (coordAssemblyChecker, MAX_ERROR_REPORT, REPRESENTATIVE_MODEL_ID, @@ -49,7 +51,7 @@ def __init__(self, verbose=True, log=sys.stdout, representativeAltId=REPRESENTATIVE_ALT_ID, mrAtomNameMapping=None, cR=None, caC=None, ccU=None, csStat=None, nefT=None, - reasons=None): + atomNumberDict=None, reasons=None): self.__verbose = verbose self.__lfh = log self.__debug = False @@ -79,6 +81,9 @@ def __init__(self, verbose=True, log=sys.stdout, if nefT is None: self.__nefT.set_remediation_mode(True) + # XeasyPROTParserListener.getAtomNumberDict() + self.__atomNumberDict = atomNumberDict + # reasons for re-parsing request from the previous trial self.__reasons = reasons @@ -91,7 +96,7 @@ def setLexerMaxErrorReport(self, maxErrReport): def setParserMaxErrorReport(self, maxErrReport): self.__maxParserErrorReport = maxErrReport - def parse(self, pkFilePath, cifFilePath=None, isFilePath=True, + def parse(self, pkFilePath, cifFilePath=None, protFilePath=None, isFilePath=True, createSfDict=False, originalFileName=None, listIdCounter=None, entryId=None): """ Parse XEASY PK file. @return: XeasyPKParserListener for success or None otherwise, ParserErrorListener, LexerErrorListener. @@ -133,6 +138,17 @@ def parse(self, pkFilePath, cifFilePath=None, isFilePath=True, if not self.__cR.parse(cifFilePath): return None, None, None + if protFilePath is not None and self.__atomNumberDict is None: + ptR = XeasyPROTReader(self.__verbose, self.__lfh, + self.__representativeModelId, + self.__representativeAltId, + self.__mrAtomNameMapping, + self.__cR, self.__caC, + self.__ccU, self.__csStat, self.__nefT) + protPL, _, _ = ptR.parse(protFilePath, cifFilePath) + if protPL is not None: + self.__atomNumberDict = protPL.getAtomNumberDict() + lexer = XeasyPKLexer(input) lexer.removeErrorListeners() @@ -164,7 +180,7 @@ def parse(self, pkFilePath, cifFilePath=None, isFilePath=True, self.__mrAtomNameMapping, self.__cR, self.__caC, self.__ccU, self.__csStat, self.__nefT, - self.__reasons) + self.__atomNumberDict, self.__reasons) listener.setDebugMode(self.__debug) listener.createSfDict(createSfDict) if createSfDict: @@ -213,4 +229,5 @@ def parse(self, pkFilePath, cifFilePath=None, isFilePath=True, reader = XeasyPKReader(True) reader.setDebugMode(True) reader.parse('../../tests-nmr/mock-data-remediation/7a2d/aro.peaks', # -ignored-as-pea-any', - '../../tests-nmr/mock-data-remediation/7a2d/7a2d.cif') + '../../tests-nmr/mock-data-remediation/7a2d/7a2d.cif', + '../../tests-nmr/mock-data-remediation/7a2d/all.prot') diff --git a/wwpdb/utils/nmr/pk/XeasyPROTLexer.py b/wwpdb/utils/nmr/pk/XeasyPROTLexer.py new file mode 100644 index 00000000..1ae8efa4 --- /dev/null +++ b/wwpdb/utils/nmr/pk/XeasyPROTLexer.py @@ -0,0 +1,160 @@ +# Generated from XeasyPROTLexer.g4 by ANTLR 4.13.0 +from antlr4 import * +from io import StringIO +import sys +if sys.version_info[1] > 5: + from typing import TextIO +else: + from typing.io import TextIO + + +def serializedATN(): + return [ + 4,0,11,262,6,-1,2,0,7,0,2,1,7,1,2,2,7,2,2,3,7,3,2,4,7,4,2,5,7,5, + 2,6,7,6,2,7,7,7,2,8,7,8,2,9,7,9,2,10,7,10,2,11,7,11,2,12,7,12,2, + 13,7,13,2,14,7,14,2,15,7,15,2,16,7,16,2,17,7,17,2,18,7,18,1,0,3, + 0,41,8,0,1,0,1,0,1,1,3,1,46,8,1,1,1,1,1,3,1,50,8,1,1,2,1,2,1,2,1, + 2,1,2,1,2,3,2,58,8,2,1,3,1,3,1,4,4,4,63,8,4,11,4,12,4,64,1,5,4,5, + 68,8,5,11,5,12,5,69,1,5,5,5,73,8,5,10,5,12,5,76,9,5,1,5,5,5,79,8, + 5,10,5,12,5,82,9,5,1,5,5,5,85,8,5,10,5,12,5,88,9,5,1,5,1,5,1,6,4, + 6,93,8,6,11,6,12,6,94,1,6,5,6,98,8,6,10,6,12,6,101,9,6,1,6,5,6,104, + 8,6,10,6,12,6,107,9,6,1,6,5,6,110,8,6,10,6,12,6,113,9,6,1,6,1,6, + 1,7,4,7,118,8,7,11,7,12,7,119,1,7,5,7,123,8,7,10,7,12,7,126,9,7, + 1,7,5,7,129,8,7,10,7,12,7,132,9,7,1,7,5,7,135,8,7,10,7,12,7,138, + 9,7,1,7,1,7,1,8,1,8,1,9,1,9,1,10,1,10,3,10,148,8,10,1,11,1,11,3, + 11,152,8,11,1,12,1,12,3,12,156,8,12,1,13,1,13,5,13,160,8,13,10,13, + 12,13,163,9,13,1,14,4,14,166,8,14,11,14,12,14,167,1,14,1,14,1,15, + 4,15,173,8,15,11,15,12,15,174,1,16,1,16,1,16,5,16,180,8,16,10,16, + 12,16,183,9,16,1,16,1,16,1,16,1,16,1,17,1,17,1,17,4,17,192,8,17, + 11,17,12,17,193,1,17,1,17,4,17,198,8,17,11,17,12,17,199,1,17,1,17, + 4,17,204,8,17,11,17,12,17,205,1,17,1,17,1,17,1,17,1,17,1,17,3,17, + 214,8,17,1,17,5,17,217,8,17,10,17,12,17,220,9,17,1,17,1,17,1,17, + 1,17,1,18,1,18,1,18,4,18,229,8,18,11,18,12,18,230,1,18,1,18,4,18, + 235,8,18,11,18,12,18,236,1,18,1,18,4,18,241,8,18,11,18,12,18,242, + 1,18,1,18,1,18,1,18,1,18,1,18,3,18,251,8,18,1,18,5,18,254,8,18,10, + 18,12,18,257,9,18,1,18,1,18,1,18,1,18,1,181,0,19,1,1,3,2,5,0,7,0, + 9,0,11,3,13,4,15,5,17,6,19,0,21,0,23,0,25,0,27,0,29,7,31,8,33,9, + 35,10,37,11,1,0,8,2,0,43,43,45,45,1,0,48,57,2,0,10,10,13,13,2,0, + 65,90,97,122,5,0,35,35,42,43,45,46,63,63,95,95,2,0,34,34,39,39,2, + 0,9,9,32,32,5,0,33,33,35,35,38,38,59,59,92,92,294,0,1,1,0,0,0,0, + 3,1,0,0,0,0,11,1,0,0,0,0,13,1,0,0,0,0,15,1,0,0,0,0,17,1,0,0,0,0, + 29,1,0,0,0,0,31,1,0,0,0,0,33,1,0,0,0,0,35,1,0,0,0,0,37,1,0,0,0,1, + 40,1,0,0,0,3,45,1,0,0,0,5,57,1,0,0,0,7,59,1,0,0,0,9,62,1,0,0,0,11, + 67,1,0,0,0,13,92,1,0,0,0,15,117,1,0,0,0,17,141,1,0,0,0,19,143,1, + 0,0,0,21,147,1,0,0,0,23,151,1,0,0,0,25,155,1,0,0,0,27,157,1,0,0, + 0,29,165,1,0,0,0,31,172,1,0,0,0,33,176,1,0,0,0,35,213,1,0,0,0,37, + 250,1,0,0,0,39,41,7,0,0,0,40,39,1,0,0,0,40,41,1,0,0,0,41,42,1,0, + 0,0,42,43,3,9,4,0,43,2,1,0,0,0,44,46,7,0,0,0,45,44,1,0,0,0,45,46, + 1,0,0,0,46,49,1,0,0,0,47,50,3,9,4,0,48,50,3,5,2,0,49,47,1,0,0,0, + 49,48,1,0,0,0,50,4,1,0,0,0,51,52,3,9,4,0,52,53,5,46,0,0,53,54,3, + 9,4,0,54,58,1,0,0,0,55,56,5,46,0,0,56,58,3,9,4,0,57,51,1,0,0,0,57, + 55,1,0,0,0,58,6,1,0,0,0,59,60,7,1,0,0,60,8,1,0,0,0,61,63,3,7,3,0, + 62,61,1,0,0,0,63,64,1,0,0,0,64,62,1,0,0,0,64,65,1,0,0,0,65,10,1, + 0,0,0,66,68,5,35,0,0,67,66,1,0,0,0,68,69,1,0,0,0,69,67,1,0,0,0,69, + 70,1,0,0,0,70,74,1,0,0,0,71,73,8,2,0,0,72,71,1,0,0,0,73,76,1,0,0, + 0,74,72,1,0,0,0,74,75,1,0,0,0,75,80,1,0,0,0,76,74,1,0,0,0,77,79, + 5,35,0,0,78,77,1,0,0,0,79,82,1,0,0,0,80,78,1,0,0,0,80,81,1,0,0,0, + 81,86,1,0,0,0,82,80,1,0,0,0,83,85,8,2,0,0,84,83,1,0,0,0,85,88,1, + 0,0,0,86,84,1,0,0,0,86,87,1,0,0,0,87,89,1,0,0,0,88,86,1,0,0,0,89, + 90,6,5,0,0,90,12,1,0,0,0,91,93,5,33,0,0,92,91,1,0,0,0,93,94,1,0, + 0,0,94,92,1,0,0,0,94,95,1,0,0,0,95,99,1,0,0,0,96,98,8,2,0,0,97,96, + 1,0,0,0,98,101,1,0,0,0,99,97,1,0,0,0,99,100,1,0,0,0,100,105,1,0, + 0,0,101,99,1,0,0,0,102,104,5,33,0,0,103,102,1,0,0,0,104,107,1,0, + 0,0,105,103,1,0,0,0,105,106,1,0,0,0,106,111,1,0,0,0,107,105,1,0, + 0,0,108,110,8,2,0,0,109,108,1,0,0,0,110,113,1,0,0,0,111,109,1,0, + 0,0,111,112,1,0,0,0,112,114,1,0,0,0,113,111,1,0,0,0,114,115,6,6, + 0,0,115,14,1,0,0,0,116,118,5,59,0,0,117,116,1,0,0,0,118,119,1,0, + 0,0,119,117,1,0,0,0,119,120,1,0,0,0,120,124,1,0,0,0,121,123,8,2, + 0,0,122,121,1,0,0,0,123,126,1,0,0,0,124,122,1,0,0,0,124,125,1,0, + 0,0,125,130,1,0,0,0,126,124,1,0,0,0,127,129,5,59,0,0,128,127,1,0, + 0,0,129,132,1,0,0,0,130,128,1,0,0,0,130,131,1,0,0,0,131,136,1,0, + 0,0,132,130,1,0,0,0,133,135,8,2,0,0,134,133,1,0,0,0,135,138,1,0, + 0,0,136,134,1,0,0,0,136,137,1,0,0,0,137,139,1,0,0,0,138,136,1,0, + 0,0,139,140,6,7,0,0,140,16,1,0,0,0,141,142,3,27,13,0,142,18,1,0, + 0,0,143,144,7,3,0,0,144,20,1,0,0,0,145,148,3,19,9,0,146,148,3,7, + 3,0,147,145,1,0,0,0,147,146,1,0,0,0,148,22,1,0,0,0,149,152,3,21, + 10,0,150,152,7,4,0,0,151,149,1,0,0,0,151,150,1,0,0,0,152,24,1,0, + 0,0,153,156,3,23,11,0,154,156,7,5,0,0,155,153,1,0,0,0,155,154,1, + 0,0,0,156,26,1,0,0,0,157,161,3,23,11,0,158,160,3,25,12,0,159,158, + 1,0,0,0,160,163,1,0,0,0,161,159,1,0,0,0,161,162,1,0,0,0,162,28,1, + 0,0,0,163,161,1,0,0,0,164,166,7,6,0,0,165,164,1,0,0,0,166,167,1, + 0,0,0,167,165,1,0,0,0,167,168,1,0,0,0,168,169,1,0,0,0,169,170,6, + 14,1,0,170,30,1,0,0,0,171,173,7,2,0,0,172,171,1,0,0,0,173,174,1, + 0,0,0,174,172,1,0,0,0,174,175,1,0,0,0,175,32,1,0,0,0,176,181,5,123, + 0,0,177,180,3,33,16,0,178,180,9,0,0,0,179,177,1,0,0,0,179,178,1, + 0,0,0,180,183,1,0,0,0,181,182,1,0,0,0,181,179,1,0,0,0,182,184,1, + 0,0,0,183,181,1,0,0,0,184,185,5,125,0,0,185,186,1,0,0,0,186,187, + 6,16,0,0,187,34,1,0,0,0,188,214,7,7,0,0,189,191,5,47,0,0,190,192, + 5,47,0,0,191,190,1,0,0,0,192,193,1,0,0,0,193,191,1,0,0,0,193,194, + 1,0,0,0,194,214,1,0,0,0,195,197,5,42,0,0,196,198,5,42,0,0,197,196, + 1,0,0,0,198,199,1,0,0,0,199,197,1,0,0,0,199,200,1,0,0,0,200,214, + 1,0,0,0,201,203,5,61,0,0,202,204,5,61,0,0,203,202,1,0,0,0,204,205, + 1,0,0,0,205,203,1,0,0,0,205,206,1,0,0,0,206,214,1,0,0,0,207,208, + 5,82,0,0,208,209,5,69,0,0,209,210,5,77,0,0,210,211,5,65,0,0,211, + 212,5,82,0,0,212,214,5,75,0,0,213,188,1,0,0,0,213,189,1,0,0,0,213, + 195,1,0,0,0,213,201,1,0,0,0,213,207,1,0,0,0,214,218,1,0,0,0,215, + 217,5,32,0,0,216,215,1,0,0,0,217,220,1,0,0,0,218,216,1,0,0,0,218, + 219,1,0,0,0,219,221,1,0,0,0,220,218,1,0,0,0,221,222,3,31,15,0,222, + 223,1,0,0,0,223,224,6,17,0,0,224,36,1,0,0,0,225,251,7,7,0,0,226, + 228,5,47,0,0,227,229,5,47,0,0,228,227,1,0,0,0,229,230,1,0,0,0,230, + 228,1,0,0,0,230,231,1,0,0,0,231,251,1,0,0,0,232,234,5,42,0,0,233, + 235,5,42,0,0,234,233,1,0,0,0,235,236,1,0,0,0,236,234,1,0,0,0,236, + 237,1,0,0,0,237,251,1,0,0,0,238,240,5,61,0,0,239,241,5,61,0,0,240, + 239,1,0,0,0,241,242,1,0,0,0,242,240,1,0,0,0,242,243,1,0,0,0,243, + 251,1,0,0,0,244,245,5,82,0,0,245,246,5,69,0,0,246,247,5,77,0,0,247, + 248,5,65,0,0,248,249,5,82,0,0,249,251,5,75,0,0,250,225,1,0,0,0,250, + 226,1,0,0,0,250,232,1,0,0,0,250,238,1,0,0,0,250,244,1,0,0,0,251, + 255,1,0,0,0,252,254,8,2,0,0,253,252,1,0,0,0,254,257,1,0,0,0,255, + 253,1,0,0,0,255,256,1,0,0,0,256,258,1,0,0,0,257,255,1,0,0,0,258, + 259,3,31,15,0,259,260,1,0,0,0,260,261,6,18,0,0,261,38,1,0,0,0,36, + 0,40,45,49,57,64,69,74,80,86,94,99,105,111,119,124,130,136,147,151, + 155,161,167,174,179,181,193,199,205,213,218,230,236,242,250,255, + 2,0,1,0,6,0,0 + ] + +class XeasyPROTLexer(Lexer): + + atn = ATNDeserializer().deserialize(serializedATN()) + + decisionsToDFA = [ DFA(ds, i) for i, ds in enumerate(atn.decisionToState) ] + + Integer = 1 + Float = 2 + SHARP_COMMENT = 3 + EXCLM_COMMENT = 4 + SMCLN_COMMENT = 5 + Simple_name = 6 + SPACE = 7 + RETURN = 8 + ENCLOSE_COMMENT = 9 + SECTION_COMMENT = 10 + LINE_COMMENT = 11 + + channelNames = [ u"DEFAULT_TOKEN_CHANNEL", u"HIDDEN" ] + + modeNames = [ "DEFAULT_MODE" ] + + literalNames = [ "", + ] + + symbolicNames = [ "", + "Integer", "Float", "SHARP_COMMENT", "EXCLM_COMMENT", "SMCLN_COMMENT", + "Simple_name", "SPACE", "RETURN", "ENCLOSE_COMMENT", "SECTION_COMMENT", + "LINE_COMMENT" ] + + ruleNames = [ "Integer", "Float", "DEC_DOT_DEC", "DEC_DIGIT", "DECIMAL", + "SHARP_COMMENT", "EXCLM_COMMENT", "SMCLN_COMMENT", "Simple_name", + "ALPHA", "ALPHA_NUM", "START_CHAR", "NAME_CHAR", "SIMPLE_NAME", + "SPACE", "RETURN", "ENCLOSE_COMMENT", "SECTION_COMMENT", + "LINE_COMMENT" ] + + grammarFileName = "XeasyPROTLexer.g4" + + def __init__(self, input=None, output:TextIO = sys.stdout): + super().__init__(input, output) + self.checkVersion("4.13.0") + self._interp = LexerATNSimulator(self, self.atn, self.decisionsToDFA, PredictionContextCache()) + self._actions = None + self._predicates = None + + diff --git a/wwpdb/utils/nmr/pk/XeasyPROTParser.py b/wwpdb/utils/nmr/pk/XeasyPROTParser.py new file mode 100644 index 00000000..49d2b1c7 --- /dev/null +++ b/wwpdb/utils/nmr/pk/XeasyPROTParser.py @@ -0,0 +1,246 @@ +# Generated from XeasyPROTParser.g4 by ANTLR 4.13.0 +# encoding: utf-8 +from antlr4 import * +from io import StringIO +import sys +if sys.version_info[1] > 5: + from typing import TextIO +else: + from typing.io import TextIO + +def serializedATN(): + return [ + 4,1,11,23,2,0,7,0,2,1,7,1,2,2,7,2,1,0,4,0,8,8,0,11,0,12,0,9,1,0, + 1,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,2,1,2,1,2,0,0,3,0,2,4,0,1,2,0, + 1,1,6,6,20,0,7,1,0,0,0,2,13,1,0,0,0,4,20,1,0,0,0,6,8,3,2,1,0,7,6, + 1,0,0,0,8,9,1,0,0,0,9,7,1,0,0,0,9,10,1,0,0,0,10,11,1,0,0,0,11,12, + 5,0,0,1,12,1,1,0,0,0,13,14,5,1,0,0,14,15,5,2,0,0,15,16,5,2,0,0,16, + 17,5,6,0,0,17,18,3,4,2,0,18,19,5,8,0,0,19,3,1,0,0,0,20,21,7,0,0, + 0,21,5,1,0,0,0,1,9 + ] + +class XeasyPROTParser ( Parser ): + + grammarFileName = "XeasyPROTParser.g4" + + atn = ATNDeserializer().deserialize(serializedATN()) + + decisionsToDFA = [ DFA(ds, i) for i, ds in enumerate(atn.decisionToState) ] + + sharedContextCache = PredictionContextCache() + + literalNames = [ ] + + symbolicNames = [ "", "Integer", "Float", "SHARP_COMMENT", + "EXCLM_COMMENT", "SMCLN_COMMENT", "Simple_name", "SPACE", + "RETURN", "ENCLOSE_COMMENT", "SECTION_COMMENT", "LINE_COMMENT" ] + + RULE_xeasy_prot = 0 + RULE_prot = 1 + RULE_residue = 2 + + ruleNames = [ "xeasy_prot", "prot", "residue" ] + + EOF = Token.EOF + Integer=1 + Float=2 + SHARP_COMMENT=3 + EXCLM_COMMENT=4 + SMCLN_COMMENT=5 + Simple_name=6 + SPACE=7 + RETURN=8 + ENCLOSE_COMMENT=9 + SECTION_COMMENT=10 + LINE_COMMENT=11 + + def __init__(self, input:TokenStream, output:TextIO = sys.stdout): + super().__init__(input, output) + self.checkVersion("4.13.0") + self._interp = ParserATNSimulator(self, self.atn, self.decisionsToDFA, self.sharedContextCache) + self._predicates = None + + + + + class Xeasy_protContext(ParserRuleContext): + __slots__ = 'parser' + + def __init__(self, parser, parent:ParserRuleContext=None, invokingState:int=-1): + super().__init__(parent, invokingState) + self.parser = parser + + def EOF(self): + return self.getToken(XeasyPROTParser.EOF, 0) + + def prot(self, i:int=None): + if i is None: + return self.getTypedRuleContexts(XeasyPROTParser.ProtContext) + else: + return self.getTypedRuleContext(XeasyPROTParser.ProtContext,i) + + + def getRuleIndex(self): + return XeasyPROTParser.RULE_xeasy_prot + + def enterRule(self, listener:ParseTreeListener): + if hasattr( listener, "enterXeasy_prot" ): + listener.enterXeasy_prot(self) + + def exitRule(self, listener:ParseTreeListener): + if hasattr( listener, "exitXeasy_prot" ): + listener.exitXeasy_prot(self) + + + + + def xeasy_prot(self): + + localctx = XeasyPROTParser.Xeasy_protContext(self, self._ctx, self.state) + self.enterRule(localctx, 0, self.RULE_xeasy_prot) + self._la = 0 # Token type + try: + self.enterOuterAlt(localctx, 1) + self.state = 7 + self._errHandler.sync(self) + _la = self._input.LA(1) + while True: + self.state = 6 + self.prot() + self.state = 9 + self._errHandler.sync(self) + _la = self._input.LA(1) + if not (_la==1): + break + + self.state = 11 + self.match(XeasyPROTParser.EOF) + except RecognitionException as re: + localctx.exception = re + self._errHandler.reportError(self, re) + self._errHandler.recover(self, re) + finally: + self.exitRule() + return localctx + + + class ProtContext(ParserRuleContext): + __slots__ = 'parser' + + def __init__(self, parser, parent:ParserRuleContext=None, invokingState:int=-1): + super().__init__(parent, invokingState) + self.parser = parser + + def Integer(self): + return self.getToken(XeasyPROTParser.Integer, 0) + + def Float(self, i:int=None): + if i is None: + return self.getTokens(XeasyPROTParser.Float) + else: + return self.getToken(XeasyPROTParser.Float, i) + + def Simple_name(self): + return self.getToken(XeasyPROTParser.Simple_name, 0) + + def residue(self): + return self.getTypedRuleContext(XeasyPROTParser.ResidueContext,0) + + + def RETURN(self): + return self.getToken(XeasyPROTParser.RETURN, 0) + + def getRuleIndex(self): + return XeasyPROTParser.RULE_prot + + def enterRule(self, listener:ParseTreeListener): + if hasattr( listener, "enterProt" ): + listener.enterProt(self) + + def exitRule(self, listener:ParseTreeListener): + if hasattr( listener, "exitProt" ): + listener.exitProt(self) + + + + + def prot(self): + + localctx = XeasyPROTParser.ProtContext(self, self._ctx, self.state) + self.enterRule(localctx, 2, self.RULE_prot) + try: + self.enterOuterAlt(localctx, 1) + self.state = 13 + self.match(XeasyPROTParser.Integer) + self.state = 14 + self.match(XeasyPROTParser.Float) + self.state = 15 + self.match(XeasyPROTParser.Float) + self.state = 16 + self.match(XeasyPROTParser.Simple_name) + self.state = 17 + self.residue() + self.state = 18 + self.match(XeasyPROTParser.RETURN) + except RecognitionException as re: + localctx.exception = re + self._errHandler.reportError(self, re) + self._errHandler.recover(self, re) + finally: + self.exitRule() + return localctx + + + class ResidueContext(ParserRuleContext): + __slots__ = 'parser' + + def __init__(self, parser, parent:ParserRuleContext=None, invokingState:int=-1): + super().__init__(parent, invokingState) + self.parser = parser + + def Integer(self): + return self.getToken(XeasyPROTParser.Integer, 0) + + def Simple_name(self): + return self.getToken(XeasyPROTParser.Simple_name, 0) + + def getRuleIndex(self): + return XeasyPROTParser.RULE_residue + + def enterRule(self, listener:ParseTreeListener): + if hasattr( listener, "enterResidue" ): + listener.enterResidue(self) + + def exitRule(self, listener:ParseTreeListener): + if hasattr( listener, "exitResidue" ): + listener.exitResidue(self) + + + + + def residue(self): + + localctx = XeasyPROTParser.ResidueContext(self, self._ctx, self.state) + self.enterRule(localctx, 4, self.RULE_residue) + self._la = 0 # Token type + try: + self.enterOuterAlt(localctx, 1) + self.state = 20 + _la = self._input.LA(1) + if not(_la==1 or _la==6): + self._errHandler.recoverInline(self) + else: + self._errHandler.reportMatch(self) + self.consume() + except RecognitionException as re: + localctx.exception = re + self._errHandler.reportError(self, re) + self._errHandler.recover(self, re) + finally: + self.exitRule() + return localctx + + + + + diff --git a/wwpdb/utils/nmr/pk/XeasyPROTParserListener.py b/wwpdb/utils/nmr/pk/XeasyPROTParserListener.py new file mode 100644 index 00000000..d25a6b92 --- /dev/null +++ b/wwpdb/utils/nmr/pk/XeasyPROTParserListener.py @@ -0,0 +1,1025 @@ +## +# File: XeasyPROTParserListener.py +# Date: 05-Dec-2024 +# +# Updates: +""" ParserLister class for XEASY PROT files. + @author: Masashi Yokochi +""" +import sys +import collections +import copy + +from antlr4 import ParseTreeListener +from rmsd.calculate_rmsd import NAMES_ELEMENT # noqa: F401 pylint: disable=no-name-in-module, import-error, unused-import + +from wwpdb.utils.align.alignlib import PairwiseAlign # pylint: disable=no-name-in-module + +try: + from wwpdb.utils.nmr.pk.XeasyPROTParser import XeasyPROTParser + from wwpdb.utils.nmr.pk.BasePKParserListener import BasePKParserListener + from wwpdb.utils.nmr.mr.ParserListenerUtil import (coordAssemblyChecker, + translateToStdAtomName, + translateToStdAtomNameOfDmpc, + translateToStdResName, + translateToLigandName, + REPRESENTATIVE_MODEL_ID, + REPRESENTATIVE_ALT_ID) + from wwpdb.utils.nmr.ChemCompUtil import ChemCompUtil + from wwpdb.utils.nmr.BMRBChemShiftStat import BMRBChemShiftStat + from wwpdb.utils.nmr.NEFTranslator.NEFTranslator import NEFTranslator + from wwpdb.utils.nmr.AlignUtil import (monDict3, + protonBeginCode, + aminoProtonCode, + isReservedLigCode, + letterToDigit, indexToLetter, + alignPolymerSequence, + assignPolymerSequence, + trimSequenceAlignment, + retrieveAtomIdentFromMRMap, + alignPolymerSequenceWithConflicts, + getRestraintFormatName, + getOneLetterCodeCanSequence) +except ImportError: + from nmr.pk.XeasyPROTParser import XeasyPROTParser + from nmr.pk.BasePKParserListener import BasePKParserListener + from nmr.mr.ParserListenerUtil import (coordAssemblyChecker, + translateToStdAtomName, + translateToStdAtomNameOfDmpc, + translateToStdResName, + translateToLigandName, + REPRESENTATIVE_MODEL_ID, + REPRESENTATIVE_ALT_ID) + from nmr.ChemCompUtil import ChemCompUtil + from nmr.BMRBChemShiftStat import BMRBChemShiftStat + from nmr.NEFTranslator.NEFTranslator import NEFTranslator + from nmr.AlignUtil import (monDict3, + protonBeginCode, + aminoProtonCode, + isReservedLigCode, + letterToDigit, indexToLetter, + alignPolymerSequence, + assignPolymerSequence, + trimSequenceAlignment, + retrieveAtomIdentFromMRMap, + alignPolymerSequenceWithConflicts, + getRestraintFormatName, + getOneLetterCodeCanSequence) + + +# This class defines a complete listener for a parse tree produced by XeasyPROTParser. +class XeasyPROTParserListener(ParseTreeListener): + + __file_type = 'nm-aux-xea' + + # atom name mapping of public MR file between the archive coordinates and submitted ones + __mrAtomNameMapping = None + + # CCD accessing utility + __ccU = None + + # BMRB chemical shift statistics + __csStat = None + + # NEFTranslator + __nefT = None + + # Pairwise align + __pA = None + + # coordinates information generated by ParserListenerUtil.coordAssemblyChecker() + __polySeqModel = None + __nonPolyModel = None + __branchedModel = None + __nonPolySeqModel = None + __coordAtomSite = None + __coordUnobsRes = None + + __hasPolySeqModel = False + __hasNonPolyModel = False + __hasBranchedModel = False + __noWaterMol = True + + # polymer sequence of XEASY PROT file + __polySeqPrmTop = None + + __seqAlign = None + __chainAssign = None + + # residue + __cur_residue = None + + # atoms + __atoms = [] + + # XEASY atom number dictionary + __atomNumberDict = None + + __f = None + warningMessage = None + + __base_parser_listener = None + + def __init__(self, verbose=True, log=sys.stdout, + representativeModelId=REPRESENTATIVE_MODEL_ID, + representativeAltId=REPRESENTATIVE_ALT_ID, + mrAtomNameMapping=None, + cR=None, caC=None, ccU=None, csStat=None, nefT=None): + + self.__mrAtomNameMapping = None if mrAtomNameMapping is None or len(mrAtomNameMapping) == 0 else mrAtomNameMapping + + # CCD accessing utility + self.__ccU = ChemCompUtil(verbose, log) if ccU is None else ccU + + if cR is not None: + ret = coordAssemblyChecker(verbose, log, representativeModelId, representativeAltId, + cR, self.__ccU, caC, None, fullCheck=True) + self.__polySeqModel = ret['polymer_sequence'] + self.__nonPolyModel = ret['non_polymer'] + self.__branchedModel = ret['branched'] + self.__coordAtomSite = ret['coord_atom_site'] + self.__coordUnobsRes = ret['coord_unobs_res'] + + self.__hasPolySeqModel = self.__polySeqModel is not None and len(self.__polySeqModel) > 0 + self.__hasNonPolyModel = self.__nonPolyModel is not None and len(self.__nonPolyModel) > 0 + self.__hasBranchedModel = self.__branchedModel is not None and len(self.__branchedModel) > 0 + self.__noWaterMol = not self.__hasNonPolyModel or not any(np['comp_id'][0] == 'HOH' for np in self.__nonPolyModel) + + # BMRB chemical shift statistics + self.__csStat = BMRBChemShiftStat(verbose, log, self.__ccU) if csStat is None else csStat + + # NEFTranslator + self.__nefT = NEFTranslator(verbose, log, self.__ccU, self.__csStat) if nefT is None else nefT + + # Pairwise align + if self.__hasPolySeqModel: + self.__pA = PairwiseAlign() + self.__pA.setVerbose(verbose) + + self.__base_parser_listener = BasePKParserListener(verbose, log, representativeModelId, representativeAltId, + mrAtomNameMapping, cR, caC, self.__ccU, self.__csStat, self.__nefT) + self.__base_parser_listener.enter() + + self.protStatements = 0 + + # Enter a parse tree produced by XeasyPROTParser#xeasy_prot. + def enterXeasy_prot(self, ctx: XeasyPROTParser.Xeasy_protContext): # pylint: disable=unused-argument + self.__atomNumberDict = {} + self.__polySeqPrmTop = [] + self.__f = [] + + # Exit a parse tree produced by XeasyPROTParser#xeasy_prot. + def exitXeasy_prot(self, ctx: XeasyPROTParser.Xeasy_protContext): # pylint: disable=unused-argument + + try: + + if not self.__hasPolySeqModel: + return + + if len(self.__atoms) == 0: + return + + chainIndex = letterToDigit(self.__polySeqModel[0]['chain_id']) - 1 # set tentative chain_id from label_asym_id, which will be assigned to coordinate auth_asym_id + chainId = indexToLetter(chainIndex) + + terminus = [atom['auth_atom_id'].endswith('T') for atom in self.__atoms] + + atomTotal = len(self.__atoms) + if terminus[0]: + terminus[0] = False + for i in range(0, atomTotal - 1): + j = i + 1 + if terminus[i] and terminus[j]: + terminus[i] = False + if terminus[-1]: + terminus[-1] = False + + seqIdList, compIdList, retrievedAtomNumList = [], [], [] + + NON_METAL_ELEMENTS = ('H', 'C', 'N', 'O', 'P', 'S') + + def is_segment(prev_asym_id, prev_comp_id, prev_atom_name, asym_id, comp_id, atom_name): + if prev_asym_id is None or prev_comp_id is None: + return False + if prev_asym_id != asym_id: + return True + is_prev_term_atom = prev_atom_name.endswith('T') + if is_prev_term_atom and atom_name.endswith('T'): + return True + is_prev_3_prime_comp = prev_comp_id.endswith('3') + if is_prev_3_prime_comp and (is_prev_term_atom + or self.__csStat.peptideLike(translateToStdResName(comp_id, ccU=self.__ccU))): + return True + return comp_id.endswith('5')\ + and (is_prev_3_prime_comp + or self.__csStat.peptideLike(translateToStdResName(prev_comp_id, ccU=self.__ccU))) + + def is_ligand(prev_comp_id, comp_id): + if prev_comp_id is None or not self.__hasNonPolyModel: + return False + if not prev_comp_id.endswith('3') or prev_comp_id == comp_id: + return False + for np in self.__nonPolyModel: + if comp_id in np['comp_id']: + return True + for np in self.__nonPolyModel: + if 'alt_comp_id' in np: + if comp_id in np['alt_comp_id']: + return True + return False + + def is_metal_ion(comp_id, atom_name): + if comp_id is None: + return False + if comp_id != atom_name: + return False + return comp_id.split('+')[0].title() in NAMES_ELEMENT\ + or comp_id.split('-')[0].title() in NAMES_ELEMENT + + def is_metal_elem(prev_atom_name, prev_seq_id, seq_id): + if len(prev_atom_name) == 0: + return False + return prev_seq_id != seq_id and prev_atom_name[0] not in NON_METAL_ELEMENTS + + hasSegCompId = False + ancAtomName = prevAtomName = '' + prevAsymId = prevSeqId = prevCompId = None + offset = 0 + for atom in self.__atoms: + atomNum = atom['atom_number'] + atomName = atom['auth_atom_id'] + asymId = atom['auth_chain_id'] + _seqId = atom['auth_seq_id'] + compId = atom['auth_comp_id'] + if self.__noWaterMol and (compId in ('HOH', 'H2O', 'WAT') or (len(compId) > 3 and compId[:3] in ('HOH', 'H2O', 'WAT'))): + break + if not hasSegCompId and (compId.endswith('5') or compId.endswith('3')): + hasSegCompId = True + if not hasSegCompId and compId not in monDict3 and self.__mrAtomNameMapping is not None and atomName[0] in protonBeginCode: + _, compId, _atomName = retrieveAtomIdentFromMRMap(self.__ccU, self.__mrAtomNameMapping, _seqId, compId, atomName) + if _atomName != atomName: + atomName = _atomName + retrievedAtomNumList.append(atomNum) + if (0 < atomNum < len(terminus) + 1 and terminus[atomNum - 1] and ancAtomName.endswith('T'))\ + or is_segment(prevAsymId, prevCompId, prevAtomName, asymId, compId, atomName)\ + or is_ligand(prevCompId, compId)\ + or is_metal_ion(compId, atomName)\ + or is_metal_ion(prevCompId, prevAtomName)\ + or is_metal_elem(prevAtomName, prevSeqId, _seqId): + + self.__polySeqPrmTop.append({'chain_id': chainId, + 'seq_id': seqIdList, + 'auth_comp_id': compIdList}) + seqIdList, compIdList = [], [] + chainIndex += 1 + chainId = indexToLetter(chainIndex) + offset = 1 - _seqId + + seqId = _seqId + offset + if seqId not in seqIdList: + seqIdList.append(seqId) + compIdList.append(compId) + self.__atomNumberDict[atomNum] = {'chain_id': chainId, + 'seq_id': seqId, + 'auth_comp_id': compId, + 'auth_atom_id': atomName} + ancAtomName = prevAtomName + prevAtomName = atomName + prevAsymId = asymId + prevSeqId = _seqId + prevCompId = compId + + self.__polySeqPrmTop.append({'chain_id': chainId, + 'seq_id': seqIdList, + 'auth_comp_id': compIdList}) + + nonPolyCompIdList = [] + if self.__hasNonPolyModel: + for np in self.__nonPolyModel: + compId = np['comp_id'][0] + if compId not in nonPolyCompIdList: + nonPolyCompIdList.append(compId) + + for ps in self.__polySeqPrmTop: + chainId = ps['chain_id'] + compIdList = [] + for seqId, authCompId in zip(ps['seq_id'], ps['auth_comp_id']): + authAtomIds = [translateToStdAtomName(atomNum['auth_atom_id'], atomNum['auth_comp_id'], + ccU=self.__ccU, unambig=True) + for atomNum in self.__atomNumberDict.values() + if atomNum['chain_id'] == chainId + and atomNum['seq_id'] == seqId + and atomNum['auth_atom_id'][0] not in protonBeginCode] + authCompId = translateToStdResName(authCompId, ccU=self.__ccU) + if self.__ccU.updateChemCompDict(authCompId): + chemCompAtomIds = [cca[self.__ccU.ccaAtomId] for cca in self.__ccU.lastAtomList] + valid = True + for _atomId in authAtomIds: + if _atomId not in chemCompAtomIds: + valid = False + break + if not valid: + break + if valid: + compIdList.append(authCompId) + for k, atomNum in self.__atomNumberDict.items(): + if atomNum['chain_id'] == chainId and atomNum['seq_id'] == seqId: + atomNum['comp_id'] = authCompId + + if authCompId in nonPolyCompIdList and self.__mrAtomNameMapping is not None\ + and atomNum['auth_atom_id'][0] in protonBeginCode and k not in retrievedAtomNumList: + _, _, atomId = retrieveAtomIdentFromMRMap(self.__ccU, self.__mrAtomNameMapping, None, authCompId, atomNum['auth_atom_id'], None, None, True) + else: + atomId = atomNum['auth_atom_id'] + + atomId = translateToStdAtomName(atomId, authCompId, chemCompAtomIds, ccU=self.__ccU, unambig=True) + + if atomId[0] not in protonBeginCode or atomId in chemCompAtomIds: + atomNum['atom_id'] = atomId + else: + if atomId in chemCompAtomIds: + atomNum['atom_id'] = atomId + + else: + compId = self.__csStat.getSimilarCompIdFromAtomIds([translateToStdAtomName(atomNum['auth_atom_id'], + atomNum['auth_comp_id'], + ccU=self.__ccU, + unambig=True) + for atomNum in self.__atomNumberDict.values() + if atomNum['chain_id'] == chainId + and atomNum['seq_id'] == seqId]) + + if self.__hasNonPolyModel and compId != authCompId: + ligands = 0 + for np in self.__nonPolyModel: + if 'alt_comp_id' in np: + ligands += np['alt_comp_id'].count(authCompId) + if ligands > 0: + for np in self.__nonPolyModel: + if authCompId in np['alt_comp_id']: + compId = np['comp_id'][0] + break + if ligands == 0: + __compId = None + for np in self.__nonPolyModel: + for ligand in np['comp_id']: + __compId = translateToLigandName(authCompId, ligand, self.__ccU) + if __compId == ligand: + ligands += 1 + if ligands == 1: + compId = __compId + elif len(self.__nonPolyModel) == 1 and self.__ccU.updateChemCompDict(authCompId): + if self.__ccU.lastChemCompDict['_chem_comp.pdbx_release_status'] == 'OBS' or isReservedLigCode(authCompId): + compId = self.__nonPolyModel[0]['comp_id'][0] + + if compId is not None: + compIdList.append(compId + '?') # decide when coordinate is available + chemCompAtomIds = None + if self.__ccU.updateChemCompDict(compId): + chemCompAtomIds = [cca[self.__ccU.ccaAtomId] for cca in self.__ccU.lastAtomList] + for k, atomNum in self.__atomNumberDict.items(): + if atomNum['chain_id'] == chainId and atomNum['seq_id'] == seqId: + atomNum['comp_id'] = compId + + if compId in nonPolyCompIdList and self.__mrAtomNameMapping is not None\ + and atomNum['auth_atom_id'][0] in protonBeginCode and k not in retrievedAtomNumList: + _, _, atomId = retrieveAtomIdentFromMRMap(self.__ccU, self.__mrAtomNameMapping, None, compId, atomNum['auth_atom_id'], None, None, True) + else: + atomId = atomNum['auth_atom_id'] + + atomId = translateToStdAtomName(atomId, compId, chemCompAtomIds, ccU=self.__ccU, unambig=True) + + if chemCompAtomIds is not None and atomId in chemCompAtomIds: + atomNum['atom_id'] = atomId + elif chemCompAtomIds is not None: + if atomId in chemCompAtomIds: + atomNum['atom_id'] = atomId + else: + compIdList.append('.') + unknownAtomIds = [_atomId for _atomId in authAtomIds if _atomId not in chemCompAtomIds] + self.__f.append(f"[Unknown atom name] " + f"{unknownAtomIds} are unknown atom names for {authCompId} residue.") + compIdList.append(f"? {authCompId} {unknownAtomIds}") + else: + compId = self.__csStat.getSimilarCompIdFromAtomIds([atomNum['auth_atom_id'] + for atomNum in self.__atomNumberDict.values() + if atomNum['chain_id'] == chainId + and atomNum['seq_id'] == seqId]) + + if self.__hasNonPolyModel and compId != authCompId: + ligands = 0 + for np in self.__nonPolyModel: + if 'alt_comp_id' in np: + ligands += np['alt_comp_id'].count(authCompId) + if ligands == 1: + for np in self.__nonPolyModel: + if authCompId in np['alt_comp_id']: + compId = np['comp_id'][0] + if ligands == 0: + __compId = None + for np in self.__nonPolyModel: + for ligand in np['comp_id']: + __compId = translateToLigandName(authCompId, ligand, self.__ccU) + if __compId == ligand: + ligands += 1 + if ligands == 1: + compId = __compId + elif len(self.__nonPolyModel) == 1 and self.__ccU.updateChemCompDict(authCompId): + if self.__ccU.lastChemCompDict['_chem_comp.pdbx_release_status'] == 'OBS' or isReservedLigCode(authCompId): + compId = self.__nonPolyModel[0]['comp_id'][0] + + if compId is not None: + compIdList.append(compId + '?') # decide when coordinate is available + chemCompAtomIds = None + if self.__ccU.updateChemCompDict(compId): + chemCompAtomIds = [cca[self.__ccU.ccaAtomId] for cca in self.__ccU.lastAtomList] + for k, atomNum in self.__atomNumberDict.items(): + if atomNum['chain_id'] == chainId and atomNum['seq_id'] == seqId: + atomNum['comp_id'] = compId + + if compId in nonPolyCompIdList and self.__mrAtomNameMapping is not None\ + and atomNum['auth_atom_id'][0] in protonBeginCode and k not in retrievedAtomNumList: + _, _, atomId = retrieveAtomIdentFromMRMap(self.__ccU, self.__mrAtomNameMapping, None, compId, atomNum['auth_atom_id'], None, None, True) + else: + atomId = atomNum['auth_atom_id'] + + atomId = translateToStdAtomName(atomId, compId, chemCompAtomIds, ccU=self.__ccU, unambig=True) + + if chemCompAtomIds is not None and atomId in chemCompAtomIds: + atomNum['atom_id'] = atomId + elif chemCompAtomIds is not None: + if atomId in chemCompAtomIds: + atomNum['atom_id'] = atomId + else: + compIdList.append('.') + """ deferred to assignNonPolymer() + self.__f.append(f"[Unknown residue name] " + f"{authCompId!r} is unknown residue name.") + """ + + ps['comp_id'] = compIdList + + polySeqModel = copy.copy(self.__polySeqModel) + if self.__hasBranchedModel: + polySeqModel.extend(self.__branchedModel) + + self.__seqAlign, compIdMapping = alignPolymerSequence(self.__pA, polySeqModel, self.__polySeqPrmTop) + + if len(self.__seqAlign) == 0: + for c in range(1, 5): + self.__seqAlign, compIdMapping = alignPolymerSequenceWithConflicts(self.__pA, polySeqModel, self.__polySeqPrmTop, c) + if len(self.__seqAlign) > 0: + break + + if len(self.__seqAlign) == 0: + len_cif_na = sum(len(ps_cif['seq_id']) for ps_cif in polySeqModel if 'identical_chain_id' in ps_cif and len(ps_cif['seq_id']) > 3) + len_top_na = sum(len(ps_top['seq_id']) for ps_top in self.__polySeqPrmTop + if len(ps_top['seq_id']) > 3 and any(compId in ('DA?', 'DT?', 'DG?', 'DC?', 'A?', 'U?', 'G?', 'C?') for compId in ps_top['comp_id'])) + if len_cif_na == len_top_na: + chainIdList, seqIdList, authCompIdList = [], [], [] + for ps_top in self.__polySeqPrmTop: + len_ps_cif_seq = len(ps_top['seq_id']) + if len_ps_cif_seq > 3 and any(compId in ('DA?', 'DT?', 'DG?', 'DC?', 'A?', 'U?', 'G?', 'C?') for compId in ps_top['comp_id']): + chainId = ps_top['chain_id'] + for seqId, compId in zip(ps_top['seq_id'], ps_top['auth_comp_id']): + chainIdList.append(chainId) + seqIdList.append(seqId) + authCompIdList.append(compId) + + chainIndex = letterToDigit(self.__polySeqModel[0]['chain_id']) - 1 + idOffset = 0 + + touched = [] + + polySeqPrmTop = [] + for ps_cif in polySeqModel: + len_ps_cif_seq = len(ps_cif['seq_id']) + if 'identical_chain_id' in ps_cif and len_ps_cif_seq > 3: + chainId = indexToLetter(chainIndex) + polySeqPrmTop.append({'chain_id': chainId, + 'seq_id': seqIdList[idOffset:idOffset + len_ps_cif_seq], + 'comp_id': ps_cif['comp_id'], + 'auth_comp_id': authCompIdList[idOffset:idOffset + len_ps_cif_seq]}) + + for idx, (_chainId, _seqId) in enumerate(zip(chainIdList[idOffset:idOffset + len_ps_cif_seq], + seqIdList[idOffset:idOffset + len_ps_cif_seq])): + for k, atomNum in self.__atomNumberDict.items(): + if atomNum['chain_id'] == _chainId and atomNum['seq_id'] == _seqId: + atomNum['chain_id'] = chainId + atomNum['cif_comp_id'] = ps_cif['comp_id'][idx] + touched.append(k) + + idOffset += len_ps_cif_seq + chainIndex += 1 + + for ps_top in self.__polySeqPrmTop: + if len(ps_top['seq_id']) > 3 and any(compId in ('DA?', 'DT?', 'DG?', 'DC?', 'A?', 'U?', 'G?', 'C?') for compId in ps_top['comp_id']): + continue + _chainId = copy.copy(ps_top['chain_id']) + chainId = indexToLetter(chainIndex) + ps_top['chain_id'] = chainId + polySeqPrmTop.append(ps_top) + + for k, atomNum in self.__atomNumberDict.items(): + if k in touched: + continue + if atomNum['chain_id'] == _chainId: + atomNum['chain_id'] = chainId + touched.append(k) + + chainIndex += 1 + + self.__polySeqPrmTop = polySeqPrmTop + + self.__seqAlign, compIdMapping = alignPolymerSequence(self.__pA, polySeqModel, self.__polySeqPrmTop) + + _seqAlign = copy.copy(self.__seqAlign) + for sa in _seqAlign: + if sa['ref_chain_id'] != sa['test_chain_id']: + self.__seqAlign.remove(sa) + + # test chain assignment before applying comp_id mapping + self.__chainAssign, message = assignPolymerSequence(self.__pA, self.__ccU, self.__file_type, self.__polySeqModel, self.__polySeqPrmTop, self.__seqAlign) + + for cmap in compIdMapping: + if any(ca for ca in self.__chainAssign if ca['test_chain_id'] == cmap['chain_id']): + for k, atomNum in self.__atomNumberDict.items(): + if atomNum['chain_id'] == cmap['chain_id'] and atomNum['seq_id'] == cmap['seq_id']: + atomNum['comp_id'] = cmap['comp_id'] + atomNum['auth_comp_id'] = cmap['auth_comp_id'] + + self.__chainAssign, message = assignPolymerSequence(self.__pA, self.__ccU, self.__file_type, self.__polySeqModel, self.__polySeqPrmTop, self.__seqAlign) + + if len(message) > 0: + self.__f.extend(message) + + if len(self.__seqAlign) == 0: + mrFormatName = getRestraintFormatName(self.__file_type) + _a_mr_format_name = 'the ' + mrFormatName + + ref_code = getOneLetterCodeCanSequence(self.__polySeqModel[0]['comp_id']) + test_code = getOneLetterCodeCanSequence(self.__polySeqPrmTop[0]['comp_id']) + + hint = '' + if abs(len(ref_code) - len(test_code)) < 20 and len(ref_code) > 40: + hint = f"For example, coordinates ({self.__polySeqModel[0]['auth_chain_id']}): {ref_code} vs topology: {test_code}. " + + self.__f.append(f"[Sequence mismatch] Polymer sequence between the coordinate and {_a_mr_format_name} data does not match. {hint}" + "Please verify the two sequences and re-upload the correct file(s) if required.") + + assi_ref_chain_ids = {} + proc_test_chain_ids, atom_nums, delete_atom_nums = [], [], [] + + def update_atom_num(seq_align, orphan): + ref_chain_id = seq_align['ref_chain_id'] + test_chain_id = seq_align['test_chain_id'] + + if ref_chain_id in assi_ref_chain_ids or test_chain_id in proc_test_chain_ids: + return + + ps_cif = next(ps for ps in self.__polySeqModel if ps['auth_chain_id'] == ref_chain_id) + + if ref_chain_id not in assi_ref_chain_ids: + assi_ref_chain_ids[ref_chain_id] = seq_align['length'] - seq_align['matched'] - seq_align['conflict'] + else: + assi_ref_chain_ids[ref_chain_id] -= seq_align['matched'] + seq_align['conflict'] + proc_test_chain_ids.append(test_chain_id) + + offset = first_seq_id = None + + for atom_num, atomNum in self.__atomNumberDict.items(): + if atom_num in atom_nums: + continue + if atomNum['chain_id'] == test_chain_id: + atom_nums.append(atom_num) + + test_seq_id = atomNum['seq_id'] + + if first_seq_id is None: + first_seq_id = test_seq_id + + if test_seq_id in seq_align['test_seq_id']: + idx = seq_align['test_seq_id'].index(test_seq_id) + if 'ref_auth_seq_id' in seq_align and idx < len(seq_align['ref_auth_seq_id']): + ref_seq_id = seq_align['ref_auth_seq_id'][idx] + elif offset is not None: + ref_seq_id = test_seq_id + offset + else: + continue + elif offset is not None: + ref_seq_id = test_seq_id + offset + else: + continue + + if offset is None: + offset = ref_seq_id - test_seq_id + + atomNum['chain_id'] = ref_chain_id + atomNum['seq_id'] = ref_seq_id + + if ref_seq_id in ps_cif['auth_seq_id']: + idx = ps_cif['auth_seq_id'].index(ref_seq_id) + atomNum['comp_id'] = ps_cif['comp_id'][idx] + if 'atom_id' not in atomNum: + compId = atomNum['comp_id'] + atomId = atomNum['auth_atom_id'] + if self.__ccU.updateChemCompDict(compId): + chemCompAtomIds = [cca[self.__ccU.ccaAtomId] for cca in self.__ccU.lastAtomList] + atomId = translateToStdAtomName(atomId, compId, chemCompAtomIds, ccU=self.__ccU, unambig=True) + if atomId in chemCompAtomIds: + atomNum['atom_id'] = atomId + continue + + if orphan and test_seq_id == first_seq_id\ + and self.__csStat.peptideLike(translateToStdResName(atomNum['comp_id'], ccU=self.__ccU)): + if self.__ccU.updateChemCompDict(atomNum['comp_id']): + chemCompAtomIds = [cca[self.__ccU.ccaAtomId] for cca in self.__ccU.lastAtomList] + leavingAtomIds = [cca[self.__ccU.ccaAtomId] for cca in self.__ccU.lastAtomList + if cca[self.__ccU.ccaLeavingAtomFlag] == 'Y'] + if atomNum['atom_id'] not in chemCompAtomIds or atomNum['atom_id'] in leavingAtomIds: + delete_atom_nums.append(atom_num) + + while True: + + orphanPolySeqPrmTop = [] + + for ps in self.__polySeqPrmTop: + test_chain_id = ps['chain_id'] + if test_chain_id in proc_test_chain_ids: + continue + try: + ca = next(ca for ca in self.__chainAssign if ca['test_chain_id'] == test_chain_id) + + ref_chain_id = ca['ref_chain_id'] + sa = next((sa for sa in self.__seqAlign + if sa['ref_chain_id'] == ref_chain_id and sa['test_chain_id'] == test_chain_id), None) + + if sa is not None: # and sa['conflict'] == 0: + update_atom_num(sa, False) + + except StopIteration: + orphanPolySeqPrmTop.append(ps) + + resolved = False + + if len(orphanPolySeqPrmTop) > 0: + max_length = max(len(ps['seq_id']) for ps in orphanPolySeqPrmTop) + __polySeqModel__ = [ps for ps in self.__polySeqModel + if ps['auth_chain_id'] not in assi_ref_chain_ids + or assi_ref_chain_ids[ps['auth_chain_id']] >= max_length] + __seqAlign__, _ = alignPolymerSequence(self.__pA, __polySeqModel__, orphanPolySeqPrmTop) + if len(__seqAlign__) > 0: + for sa in __seqAlign__: + if sa['conflict'] == 0: + update_atom_num(sa, True) + + resolved = True + + if not resolved: + for c in range(1, 5): + __seqAlign__, _ = alignPolymerSequenceWithConflicts(self.__pA, __polySeqModel__, orphanPolySeqPrmTop, c) + if len(__seqAlign__) > 0: + for sa in __seqAlign__: + if sa['conflict'] <= c: + update_atom_num(sa, True) + + resolved = True + if resolved: + break + + if not resolved: + break + + for ps in self.__polySeqPrmTop: + test_chain_id = ps['chain_id'] + + if test_chain_id in proc_test_chain_ids: + continue + + for cif_ps in self.__polySeqModel: + ref_chain_id = cif_ps['auth_chain_id'] + + if ref_chain_id in assi_ref_chain_ids: + continue + + len_gap = abs(len(ps['seq_id']) - len(cif_ps['auth_seq_id'])) + + if len_gap > 20: + continue + + if len_gap == 0: + offset = cif_ps['auth_seq_id'][0] - ps['seq_id'][0] + + for atomNum in self.__atomNumberDict.values(): + if atomNum['chain_id'] == test_chain_id: + atomNum['chain_id'] = ref_chain_id + if len_gap == 0: + atomNum['seq_id'] += offset + + proc_test_chain_ids.append(test_chain_id) + assi_ref_chain_ids[ref_chain_id] = len_gap + + if len(delete_atom_nums) > 0: + for atom_num in sorted(delete_atom_nums, reverse=True): + del self.__atomNumberDict[atom_num] + + for atomNum in self.__atomNumberDict.values(): + if 'atom_id' in atomNum and atomNum['atom_id'] in aminoProtonCode: + _seqKey = (atomNum['chain_id'], atomNum['seq_id'] - 1) + seqKey = (atomNum['chain_id'], atomNum['seq_id']) + if _seqKey in self.__coordUnobsRes and seqKey in self.__coordAtomSite: + coordAtomSite = self.__coordAtomSite[seqKey] + if atomNum['atom_id'] not in coordAtomSite['atom_id']: + for atomId in aminoProtonCode: + if atomId in coordAtomSite['atom_id']: + atomNum['atom_id'] = atomId + break + + if self.__chainAssign is not None: + trimSequenceAlignment(self.__seqAlign, self.__chainAssign) + + if self.__hasNonPolyModel: + + # metal ion + if any(ps for ps in self.__polySeqPrmTop + if len(ps['seq_id']) == 1 and ps['comp_id'][0].title() in NAMES_ELEMENT): + self.assignMetalIon() + + # other non-polymer + nonPolyIndices = [idx for idx, ps in enumerate(self.__polySeqPrmTop) + if not any(ca for ca in self.__chainAssign + if ca['test_chain_id'] == ps['chain_id']) + and len(set(ps['comp_id'])) > 0 and ps['comp_id'][0] == '.'] + + if len(nonPolyIndices) > 0: + self.assignNonPolymer(nonPolyIndices) + + for idx in sorted(nonPolyIndices, reverse=True): + del self.__polySeqPrmTop[idx] + + if self.__hasNonPolyModel: + compIdMapping = {} + mappedSeqVal, mappedAtomNum = [], [] + + for np in self.__nonPolyModel: + authChainId = np['auth_chain_id'] + authSeqId = np['auth_seq_id'][0] + compId = np['comp_id'][0] + + for k, v in self.__atomNumberDict.items(): + if k in mappedAtomNum: + continue + if 'comp_id' in v and v['comp_id'] == compId: + seqKey = (v['comp_id'], v['chain_id'], v['seq_id']) + seqVal = (authChainId, authSeqId) + if seqKey not in compIdMapping: + if seqVal not in mappedSeqVal: + compIdMapping[seqKey] = seqVal + if seqKey in compIdMapping: + v['chain_id'], v['seq_id'] = compIdMapping[seqKey] + mappedSeqVal.append(seqVal) + mappedAtomNum.append(k) + + if any(f for f in message if '[Concatenated sequence]' in f): # DAOTHER-9511: resolve concatenated sequence + test_chain_id_map = {} + for ca in self.__chainAssign: + ref_chain_id = ca['ref_chain_id'] + test_chain_id = ca['test_chain_id'] + if test_chain_id not in test_chain_id_map: + test_chain_id_map[test_chain_id] = [] + test_chain_id_map[test_chain_id].append(ref_chain_id) + + _test_chain_id_map = copy.copy(test_chain_id_map) + for test_chain_id, cmap in _test_chain_id_map.items(): + if len(cmap) < 2: + del test_chain_id_map[test_chain_id] + + if len(test_chain_id_map) > 0: + cmap = {} + for test_chain_id, ref_chain_ids in test_chain_id_map.items(): + ref_chain_id0 = ref_chain_ids[0] + for ref_chain_id in ref_chain_ids[1:]: + sa = next((sa for sa in self.__seqAlign if sa['ref_chain_id'] == ref_chain_id and sa['test_chain_id'] == test_chain_id), None) + if sa is None: + continue + ps = next((ps for ps in self.__polySeqModel if ps['auth_chain_id'] == ref_chain_id), None) + if ps is None: + continue + for auth_seq_id, comp_id in zip(ps['auth_seq_id'], ps['comp_id']): + seq_key = (ref_chain_id0, auth_seq_id) + cmap[seq_key] = (ref_chain_id, comp_id) + if len(cmap) > 0: + for atomNum in self.__atomNumberDict.values(): + seq_key = (atomNum['chain_id'], atomNum['seq_id']) + if seq_key in cmap: + atomNum['chain_id'], atomNum['comp_id'] = cmap[seq_key] + if 'atom_id' not in atomNum: + compId = atomNum['comp_id'] + atomId = atomNum['auth_atom_id'] + if self.__ccU.updateChemCompDict(compId): + chemCompAtomIds = [cca[self.__ccU.ccaAtomId] for cca in self.__ccU.lastAtomList] + atomId = translateToStdAtomName(atomId, compId, chemCompAtomIds, ccU=self.__ccU, unambig=True) + if atomId in chemCompAtomIds: + atomNum['atom_id'] = atomId + + finally: + self.warningMessage = sorted(list(set(self.__f)), key=self.__f.index) + + def assignMetalIon(self): + if not self.__hasNonPolyModel: + return + + metals = collections.Counter(s2['comp_id'][0] for s2 in self.__polySeqPrmTop + if len(s2['seq_id']) == 1 and s2['comp_id'][0].title() in NAMES_ELEMENT).most_common() + + for metal in metals: + compId = metal[0] + + atomNums = [atomNum for atomNum in self.__atomNumberDict.values() + if atomNum['auth_comp_id'] == compId and atomNum['auth_atom_id'] == compId] + + nonPolys = [nonPoly for nonPoly in self.__nonPolyModel + if nonPoly['comp_id'][0] == compId] + + for atomNum, nonPoly in zip(atomNums, nonPolys): + atomNum['chain_id'] = nonPoly['auth_chain_id'] + atomNum['seq_id'] = nonPoly['auth_seq_id'][0] + + def assignNonPolymer(self, nonPolyIndices): + if not self.__hasNonPolyModel: + return + + authCompIds = [] + + for idx, ps in enumerate(self.__polySeqPrmTop): + if idx not in nonPolyIndices: + continue + for authCompId, compId in zip(ps['auth_comp_id'], ps['comp_id']): + if compId != '.': + continue + authCompIds.append(authCompId) + + nonPolyCompIds = collections.Counter(authCompIds).most_common() + + compIds = [] + for nonPoly in self.__nonPolyModel: + compId = nonPoly['comp_id'][0] + if compId.title() in NAMES_ELEMENT: + continue + compIds.append(compId) + + refCompIds = collections.Counter(compIds).most_common() + + comp_id_mapping = {} + + for authCompId in nonPolyCompIds: + refCompId = next((compId[0] for compId in refCompIds if compId[1] == authCompId[1] and compId[1] not in comp_id_mapping.values()), None) + if refCompId is None: + self.__f.append(f"[Unknown residue name] " + f"{authCompId[0]!r} is unknown residue name.") + continue + comp_id_mapping[authCompId[0]] = refCompId + + for authCompId, compId in comp_id_mapping.items(): + chemCompAtomIds = None + if self.__ccU.updateChemCompDict(compId): + chemCompAtomIds = [cca[self.__ccU.ccaAtomId] for cca in self.__ccU.lastAtomList] + + authSeqKeys = [] + + for idx, ps in enumerate(self.__polySeqPrmTop): + if idx not in nonPolyIndices: + continue + _chainId = ps['chain_id'] + for _authCompId, _compId, _seqId in zip(ps['auth_comp_id'], ps['comp_id'], ps['seq_id']): + if _authCompId != authCompId or _compId != '.': + continue + authSeqKeys.append((_chainId, _seqId)) + + nonPolys = [nonPoly for nonPoly in self.__nonPolyModel + if nonPoly['comp_id'][0] == compId] + + reported_auth_atom_id = [] + + for authSeqKey, nonPoly in zip(authSeqKeys, nonPolys): + atomNums = [atomNum for atomNum in self.__atomNumberDict.values() + if atomNum['chain_id'] == authSeqKey[0] and atomNum['seq_id'] == authSeqKey[1]] + authAtomNames = [atomNum['auth_atom_id'] for atomNum in self.__atomNumberDict.values() + if atomNum['chain_id'] == authSeqKey[0] and atomNum['seq_id'] == authSeqKey[1]] + + for atomNum in atomNums: + atomNum['chain_id'] = nonPoly['auth_chain_id'] + atomNum['seq_id'] = nonPoly['auth_seq_id'][0] + atomNum['comp_id'] = compId + authAtomId = atomNum['auth_atom_id'] + if chemCompAtomIds is not None and authAtomId in chemCompAtomIds: + atomNum['atom_id'] = authAtomId + else: + dmpcNameSystemId = -1 + if compId == 'PX4': + if 'OE' in authAtomNames: + dmpcNameSystemId = 1 + elif 'OS31' in authAtomNames: + dmpcNameSystemId = 2 + elif 'O21' in authAtomNames: + if 'C314' in authAtomNames: + dmpcNameSystemId = 3 + elif 'C114' in authAtomNames: + dmpcNameSystemId = 4 + + if dmpcNameSystemId != -1: + atomId = translateToStdAtomNameOfDmpc(authAtomId, dmpcNameSystemId) + else: + atomId = translateToStdAtomName(authAtomId, compId, chemCompAtomIds, ccU=self.__ccU) + + if atomId in chemCompAtomIds: + atomNum['atom_id'] = atomId + else: + _, _, atomId = retrieveAtomIdentFromMRMap(self.__ccU, self.__mrAtomNameMapping, None, compId, authAtomId, None, None, True) + + if atomId in chemCompAtomIds: + atomNum['atom_id'] = atomId + continue + if authAtomId not in reported_auth_atom_id: + atomNum['atom_id'] = atomNum['auth_atom_id'] + if authAtomId == "HO5'" and atomNum['seq_id'] == 1 and self.__csStat.getTypeOfCompId(compId)[1]: + continue + self.__f.append(f"[Unknown atom name] " + f"{authAtomId!r} is not recognized as the atom name of {compId!r} residue " + f"(the original residue label is {authCompId!r}).") + reported_auth_atom_id.append(authAtomId) + + # Enter a parse tree produced by XeasyPROTParser#prot. + def enterProt(self, ctx: XeasyPROTParser.ProtContext): # pylint: disable=unused-argument + self.protStatements += 1 + + # Exit a parse tree produced by XeasyPROTParser#prot. + def exitProt(self, ctx: XeasyPROTParser.ProtContext): + + try: + + nr = int(str(ctx.Integer())) + # shift = float(str(ctx.Float(0))) + # shift_error = float(str(ctx.Float(1))) + atomId = str(ctx.Simple_name()) + ass = f'{self.__cur_residue} {atomId}' + + assignment = self.__base_parser_listener.extractPeakAssignment(1, ass, nr)[0] + + atom = {'atom_number': nr, + 'auth_chain_id': assignment['chain_id'], + 'auth_seq_id': assignment['seq_id'], + 'auth_comp_id': assignment['comp_id'], + 'auth_atom_id': atomId} + + if any(v is None for v in atom.values()): + self.protStatements -= 1 + return + + if atom not in self.__atoms: + self.__atoms.append(atom) + + except (ValueError, TypeError): + self.protStatements -= 1 + + # Enter a parse tree produced by XeasyPROTParser#residue. + def enterResidue(self, ctx: XeasyPROTParser.ResidueContext): + if ctx.Integer(): + self.__cur_residue = str(ctx.Integer()) + else: + self.__cur_residue = str(ctx.Simple_name()) + + # Exit a parse tree produced by XeasyPROTParser#residue. + def exitResidue(self, ctx: XeasyPROTParser.ResidueContext): # pylint: disable=unused-argument + pass + + def getContentSubtype(self): + """ Return content subtype of XEASY PROT file. + """ + + contentSubtype = {'prot': self.protStatements} + + return {k: 1 for k, v in contentSubtype.items() if v > 0} + + def getAtomNumberDict(self): + """ Return XEASY atomic number dictionary. + """ + return self.__atomNumberDict + + def getPolymerSequence(self): + """ Return polymer sequence of XEASY PROT file. + """ + return None if self.__polySeqPrmTop is None or len(self.__polySeqPrmTop) == 0 else self.__polySeqPrmTop + + def getSequenceAlignment(self): + """ Return sequence alignment between coordinates and XEASY PROT. + """ + return None if self.__seqAlign is None or len(self.__seqAlign) == 0 else self.__seqAlign + + def getChainAssignment(self): + """ Return chain assignment between coordinates and XEASY PROT. + """ + return None if self.__chainAssign is None or len(self.__chainAssign) == 0 else self.__chainAssign + + +# del XeasyPROTParser diff --git a/wwpdb/utils/nmr/pk/XeasyPROTReader.py b/wwpdb/utils/nmr/pk/XeasyPROTReader.py new file mode 100644 index 00000000..6ff25934 --- /dev/null +++ b/wwpdb/utils/nmr/pk/XeasyPROTReader.py @@ -0,0 +1,196 @@ +## +# XeasyPROTReader.py +# +# Update: +## +""" A collection of classes for parsing XEASY PROT files. +""" +import sys +import os + +from antlr4 import InputStream, CommonTokenStream, ParseTreeWalker + +try: + from wwpdb.utils.nmr.mr.LexerErrorListener import LexerErrorListener + from wwpdb.utils.nmr.mr.ParserErrorListener import ParserErrorListener + from wwpdb.utils.nmr.pk.XeasyPROTLexer import XeasyPROTLexer + from wwpdb.utils.nmr.pk.XeasyPROTParser import XeasyPROTParser + from wwpdb.utils.nmr.pk.XeasyPROTParserListener import XeasyPROTParserListener + from wwpdb.utils.nmr.mr.ParserListenerUtil import (coordAssemblyChecker, + MAX_ERROR_REPORT, + REPRESENTATIVE_MODEL_ID, + REPRESENTATIVE_ALT_ID) + from wwpdb.utils.nmr.io.CifReader import CifReader + from wwpdb.utils.nmr.ChemCompUtil import ChemCompUtil + from wwpdb.utils.nmr.BMRBChemShiftStat import BMRBChemShiftStat + from wwpdb.utils.nmr.NEFTranslator.NEFTranslator import NEFTranslator +except ImportError: + from nmr.mr.LexerErrorListener import LexerErrorListener + from nmr.mr.ParserErrorListener import ParserErrorListener + from nmr.pk.XeasyPROTLexer import XeasyPROTLexer + from nmr.pk.XeasyPROTParser import XeasyPROTParser + from nmr.pk.XeasyPROTParserListener import XeasyPROTParserListener + from nmr.mr.ParserListenerUtil import (coordAssemblyChecker, + MAX_ERROR_REPORT, + REPRESENTATIVE_MODEL_ID, + REPRESENTATIVE_ALT_ID) + from nmr.io.CifReader import CifReader + from nmr.ChemCompUtil import ChemCompUtil + from nmr.BMRBChemShiftStat import BMRBChemShiftStat + from nmr.NEFTranslator.NEFTranslator import NEFTranslator + + +class XeasyPROTReader: + """ Accessor methods for parsing XEASY PROT files. + """ + + def __init__(self, verbose=True, log=sys.stdout, + representativeModelId=REPRESENTATIVE_MODEL_ID, + representativeAltId=REPRESENTATIVE_ALT_ID, + mrAtomNameMapping=None, + cR=None, caC=None, ccU=None, csStat=None, nefT=None): + self.__verbose = verbose + self.__lfh = log + + self.__maxLexerErrorReport = MAX_ERROR_REPORT + self.__maxParserErrorReport = MAX_ERROR_REPORT + + self.__representativeModelId = representativeModelId + self.__representativeAltId = representativeAltId + self.__mrAtomNameMapping = mrAtomNameMapping + + # CCD accessing utility + self.__ccU = ChemCompUtil(verbose, log) if ccU is None else ccU + + if cR is not None and caC is None: + caC = coordAssemblyChecker(verbose, log, representativeModelId, representativeAltId, + cR, self.__ccU, None, None, fullCheck=False) + + self.__cR = cR + self.__caC = caC + + # BMRB chemical shift statistics + self.__csStat = BMRBChemShiftStat(verbose, log, self.__ccU) if csStat is None else csStat + + # NEFTranslator + self.__nefT = NEFTranslator(verbose, log, self.__ccU, self.__csStat) if nefT is None else nefT + + def setLexerMaxErrorReport(self, maxErrReport): + self.__maxLexerErrorReport = maxErrReport + + def setParserMaxErrorReport(self, maxErrReport): + self.__maxParserErrorReport = maxErrReport + + def parse(self, protFilePath, cifFilePath=None, isFilePath=True): + """ Parse XEASY PROT file. + @return: XeasyPROTParserListener for success or None otherwise, ParserErrorListener, LexerErrorListener. + """ + + ifh = None + + try: + + if isFilePath: + protString = None + + if not os.access(protFilePath, os.R_OK): + if self.__verbose: + self.__lfh.write(f"XeasyPROTReader.parse() {protFilePath} is not accessible.\n") + return None, None, None + + ifh = open(protFilePath, 'r') # pylint: disable=consider-using-with + input = InputStream(ifh.read()) + + else: + protFilePath, protString = None, protFilePath + + if protString is None or len(protString) == 0: + if self.__verbose: + self.__lfh.write("XeasyPROTReader.parse() Empty string.\n") + return None, None, None + + input = InputStream(protString) + + if cifFilePath is not None: + if not os.access(cifFilePath, os.R_OK): + if self.__verbose: + self.__lfh.write(f"XeasyPROTReader.parse() {cifFilePath} is not accessible.\n") + return None, None, None + + if self.__cR is None: + self.__cR = CifReader(self.__verbose, self.__lfh) + if not self.__cR.parse(cifFilePath): + return None, None, None + + lexer = XeasyPROTLexer(input) + lexer.removeErrorListeners() + + lexer_error_listener = LexerErrorListener(protFilePath, maxErrorReport=self.__maxLexerErrorReport) + lexer.addErrorListener(lexer_error_listener) + + messageList = lexer_error_listener.getMessageList() + + if messageList is not None and self.__verbose: + for description in messageList: + self.__lfh.write(f"[Syntax error] line {description['line_number']}:{description['column_position']} {description['message']}\n") + if 'input' in description: + self.__lfh.write(f"{description['input']}\n") + self.__lfh.write(f"{description['marker']}\n") + + stream = CommonTokenStream(lexer) + parser = XeasyPROTParser(stream) + # try with simpler/faster SLL prediction mode + # parser._interp.predictionMode = PredictionMode.SLL # pylint: disable=protected-access + parser.removeErrorListeners() + parser_error_listener = ParserErrorListener(protFilePath, maxErrorReport=self.__maxParserErrorReport) + parser.addErrorListener(parser_error_listener) + tree = parser.xeasy_prot() + + walker = ParseTreeWalker() + listener = XeasyPROTParserListener(self.__verbose, self.__lfh, + self.__representativeModelId, + self.__representativeAltId, + self.__mrAtomNameMapping, + self.__cR, self.__caC, + self.__ccU, self.__csStat, self.__nefT) + walker.walk(listener, tree) + + messageList = parser_error_listener.getMessageList() + + if messageList is not None and self.__verbose: + for description in messageList: + self.__lfh.write(f"[Syntax error] line {description['line_number']}:{description['column_position']} {description['message']}\n") + if 'input' in description: + self.__lfh.write(f"{description['input']}\n") + self.__lfh.write(f"{description['marker']}\n") + elif messageList is None and cifFilePath is None: + parser_error_listener = ParserErrorListener(protFilePath, maxErrorReport=self.__maxParserErrorReport) + + if self.__verbose: + if listener.warningMessage is not None and len(listener.warningMessage) > 0: + print('\n'.join(listener.warningMessage)) + if isFilePath: + print(listener.getContentSubtype()) + + return listener, parser_error_listener, lexer_error_listener + + except IOError as e: + if self.__verbose: + self.__lfh.write(f"+XeasyPROTReader.parse() ++ Error - {str(e)}\n") + return None, None, None + # pylint: disable=unreachable + """ debug code + except Exception as e: + if self.__verbose and isFilePath: + self.__lfh.write(f"+XeasyPROTReader.parse() ++ Error - {protFilePath!r} - {str(e)}\n") + return None, None, None + """ + finally: + if isFilePath and ifh is not None: + ifh.close() + + +if __name__ == "__main__": + reader = XeasyPROTReader(True) + reader.parse('../../tests-nmr/mock-data-remediation/7a2d/all.prot', + '../../tests-nmr/mock-data-remediation/7a2d/7a2d.cif') diff --git a/wwpdb/utils/tests-nmr/antlr-grammars-v4.10/XeasyPROTLexer.g4 b/wwpdb/utils/tests-nmr/antlr-grammars-v4.10/XeasyPROTLexer.g4 new file mode 100644 index 00000000..decaf74e --- /dev/null +++ b/wwpdb/utils/tests-nmr/antlr-grammars-v4.10/XeasyPROTLexer.g4 @@ -0,0 +1,48 @@ +/* + Xeasy PROT lexer grammar for ANTLR v4. + Copyright 2024 Masashi Yokochi + +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +lexer grammar XeasyPROTLexer; + +Integer: ('+' | '-')? DECIMAL; +Float: ('+' | '-')? (DECIMAL | DEC_DOT_DEC); +//Real: ('+' | '-')? (DECIMAL | DEC_DOT_DEC) ([Ee] ('+' | '-')? DECIMAL)?; +fragment DEC_DOT_DEC: (DECIMAL '.' DECIMAL) | ('.' DECIMAL); +fragment DEC_DIGIT: [0-9]; +fragment DECIMAL: DEC_DIGIT+; + +SHARP_COMMENT: '#'+ ~[\r\n]* '#'* ~[\r\n]* -> channel(HIDDEN); +EXCLM_COMMENT: '!'+ ~[\r\n]* '!'* ~[\r\n]* -> channel(HIDDEN); +SMCLN_COMMENT: ';'+ ~[\r\n]* ';'* ~[\r\n]* -> channel(HIDDEN); + +Simple_name: SIMPLE_NAME; +//Residue_number: Integer; +//Residue_name: SIMPLE_NAME; +//Atom_name: ALPHA_NUM ATM_NAME_CHAR*; + +fragment ALPHA: [A-Za-z]; +fragment ALPHA_NUM: ALPHA | DEC_DIGIT; +fragment START_CHAR: ALPHA_NUM | '_' | '-' | '+' | '.' | '*' | '#' | '?'; +fragment NAME_CHAR: START_CHAR | '\'' | '"'; +//fragment ATM_NAME_CHAR: ALPHA_NUM | '\''; +fragment SIMPLE_NAME: START_CHAR NAME_CHAR*; + +SPACE: [ \t]+ -> skip; +RETURN: [\r\n]+; + +ENCLOSE_COMMENT: '{' (ENCLOSE_COMMENT | .)*? '}' -> channel(HIDDEN); +SECTION_COMMENT: ('#' | '!' | ';' | '\\' | '&' | '/' '/'+ | '*' '*'+ | '=' '='+ | 'REMARK') ' '* RETURN -> channel(HIDDEN); +LINE_COMMENT: ('#' | '!' | ';' | '\\' | '&' | '/' '/'+ | '*' '*'+ | '=' '='+ | 'REMARK') ~[\r\n]* RETURN -> channel(HIDDEN); + diff --git a/wwpdb/utils/tests-nmr/antlr-grammars-v4.10/XeasyPROTParser.g4 b/wwpdb/utils/tests-nmr/antlr-grammars-v4.10/XeasyPROTParser.g4 new file mode 100644 index 00000000..b4e830af --- /dev/null +++ b/wwpdb/utils/tests-nmr/antlr-grammars-v4.10/XeasyPROTParser.g4 @@ -0,0 +1,30 @@ +/* + Xeasy PROT parser grammar for ANTLR v4. + Copyright 2024 Masashi Yokochi + +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +parser grammar XeasyPROTParser; + +options { tokenVocab=XeasyPROTLexer; } + +xeasy_prot: + prot+ + EOF; + +prot: + Integer Float Float Simple_name residue RETURN; + +residue: + Integer | Simple_name; + diff --git a/wwpdb/utils/tests-nmr/antlr-grammars-v4.9/XeasyPROTLexer.g4 b/wwpdb/utils/tests-nmr/antlr-grammars-v4.9/XeasyPROTLexer.g4 new file mode 120000 index 00000000..d2a4d908 --- /dev/null +++ b/wwpdb/utils/tests-nmr/antlr-grammars-v4.9/XeasyPROTLexer.g4 @@ -0,0 +1 @@ +../antlr-grammars-v4.10/XeasyPROTLexer.g4 \ No newline at end of file diff --git a/wwpdb/utils/tests-nmr/antlr-grammars-v4.9/XeasyPROTParser.g4 b/wwpdb/utils/tests-nmr/antlr-grammars-v4.9/XeasyPROTParser.g4 new file mode 120000 index 00000000..1dbf391a --- /dev/null +++ b/wwpdb/utils/tests-nmr/antlr-grammars-v4.9/XeasyPROTParser.g4 @@ -0,0 +1 @@ +../antlr-grammars-v4.10/XeasyPROTParser.g4 \ No newline at end of file diff --git a/wwpdb/utils/tests-nmr/json-schema/nmr-data-procesing-report-schema-v4.json b/wwpdb/utils/tests-nmr/json-schema/nmr-data-procesing-report-schema-v4.json index 74330927..45cad1bf 100644 --- a/wwpdb/utils/tests-nmr/json-schema/nmr-data-procesing-report-schema-v4.json +++ b/wwpdb/utils/tests-nmr/json-schema/nmr-data-procesing-report-schema-v4.json @@ -22,6 +22,7 @@ "nm-aux-amb", "nm-aux-cha", "nm-aux-gro", + "nm-aux-xea", "nm-res-amb", "nm-res-ari", "nm-res-bio",