From 7b70c197138ecd21fa9523e834cba6c66b6c699e Mon Sep 17 00:00:00 2001 From: yokochi47 Date: Fri, 17 Jan 2025 23:54:52 +0900 Subject: [PATCH] 1. Reconciled fix for 7x8m and recent fix, 2. Code refactoring, 3. Add version and license information --- wwpdb/utils/nmr/AlignUtil.py | 41 +- wwpdb/utils/nmr/BMRBChemShiftStat.py | 7 + wwpdb/utils/nmr/ChemCompUtil.py | 16 +- wwpdb/utils/nmr/CifToNmrStar.py | 77 +- wwpdb/utils/nmr/NmrDpReport.py | 6 + wwpdb/utils/nmr/NmrDpUtility.py | 167 +- wwpdb/utils/nmr/NmrVrptUtility.py | 18 +- wwpdb/utils/nmr/ann/BMRBAnnTasks.py | 8 +- wwpdb/utils/nmr/ann/OneDepAnnTasks.py | 8 +- wwpdb/utils/nmr/io/ChemCompReader.py | 209 ++- wwpdb/utils/nmr/io/CifReader.py | 1408 ++++++++--------- wwpdb/utils/nmr/io/mmCIFUtil.py | 160 +- wwpdb/utils/nmr/mr/AmberMRParserListener.py | 38 +- wwpdb/utils/nmr/mr/AmberMRReader.py | 6 + wwpdb/utils/nmr/mr/AmberPTParserListener.py | 20 +- wwpdb/utils/nmr/mr/AmberPTReader.py | 6 + wwpdb/utils/nmr/mr/AriaMRParserListener.py | 18 +- wwpdb/utils/nmr/mr/AriaMRReader.py | 6 + wwpdb/utils/nmr/mr/BiosymMRParserListener.py | 19 +- wwpdb/utils/nmr/mr/BiosymMRReader.py | 6 + wwpdb/utils/nmr/mr/CharmmCRDParserListener.py | 14 +- wwpdb/utils/nmr/mr/CharmmCRDReader.py | 6 + wwpdb/utils/nmr/mr/CharmmMRParserListener.py | 24 +- wwpdb/utils/nmr/mr/CharmmMRReader.py | 6 + wwpdb/utils/nmr/mr/CnsMRParserListener.py | 25 +- wwpdb/utils/nmr/mr/CnsMRReader.py | 6 + wwpdb/utils/nmr/mr/CyanaMRParserListener.py | 44 +- wwpdb/utils/nmr/mr/CyanaMRReader.py | 6 + wwpdb/utils/nmr/mr/CyanaNOAParserListener.py | 16 +- wwpdb/utils/nmr/mr/CyanaNOAReader.py | 6 + wwpdb/utils/nmr/mr/DynamoMRParserListener.py | 23 +- wwpdb/utils/nmr/mr/DynamoMRReader.py | 6 + wwpdb/utils/nmr/mr/GromacsMRParserListener.py | 14 +- wwpdb/utils/nmr/mr/GromacsMRReader.py | 6 + wwpdb/utils/nmr/mr/GromacsPTParserListener.py | 16 +- wwpdb/utils/nmr/mr/GromacsPTReader.py | 6 + wwpdb/utils/nmr/mr/IsdMRParserListener.py | 16 +- wwpdb/utils/nmr/mr/IsdMRReader.py | 6 + wwpdb/utils/nmr/mr/LexerErrorListener.py | 6 + wwpdb/utils/nmr/mr/ParserErrorListener.py | 6 + wwpdb/utils/nmr/mr/ParserListenerUtil.py | 30 +- wwpdb/utils/nmr/mr/RosettaMRParserListener.py | 20 +- wwpdb/utils/nmr/mr/RosettaMRReader.py | 6 + wwpdb/utils/nmr/mr/SybylMRParserListener.py | 16 +- wwpdb/utils/nmr/mr/SybylMRReader.py | 6 + wwpdb/utils/nmr/mr/XplorMRParserListener.py | 27 +- wwpdb/utils/nmr/mr/XplorMRReader.py | 6 + wwpdb/utils/nmr/nef/NEFTranslator.py | 10 +- wwpdb/utils/nmr/pk/AriaPKParserListener.py | 6 + wwpdb/utils/nmr/pk/AriaPKReader.py | 6 + wwpdb/utils/nmr/pk/BasePKParserListener.py | 15 +- wwpdb/utils/nmr/pk/NmrPipePKParserListener.py | 12 +- wwpdb/utils/nmr/pk/NmrPipePKReader.py | 6 + wwpdb/utils/nmr/pk/NmrViewPKParserListener.py | 18 +- wwpdb/utils/nmr/pk/NmrViewPKReader.py | 6 + wwpdb/utils/nmr/pk/SparkyPKParserListener.py | 18 +- wwpdb/utils/nmr/pk/SparkyPKReader.py | 6 + wwpdb/utils/nmr/pk/TopSpinPKParserListener.py | 6 + wwpdb/utils/nmr/pk/TopSpinPKReader.py | 6 + wwpdb/utils/nmr/pk/VnmrPKParserListener.py | 18 +- wwpdb/utils/nmr/pk/VnmrPKReader.py | 6 + wwpdb/utils/nmr/pk/XeasyPKParserListener.py | 12 +- wwpdb/utils/nmr/pk/XeasyPKReader.py | 6 + wwpdb/utils/nmr/pk/XeasyPROTParserListener.py | 16 +- wwpdb/utils/nmr/pk/XeasyPROTReader.py | 6 + wwpdb/utils/nmr/pk/XwinNmrPKParserListener.py | 6 + wwpdb/utils/nmr/pk/XwinNmrPKReader.py | 6 + wwpdb/utils/nmr/rci/RCI.py | 16 +- 68 files changed, 1682 insertions(+), 1138 deletions(-) diff --git a/wwpdb/utils/nmr/AlignUtil.py b/wwpdb/utils/nmr/AlignUtil.py index 5b6ad3f4..b6854c9f 100644 --- a/wwpdb/utils/nmr/AlignUtil.py +++ b/wwpdb/utils/nmr/AlignUtil.py @@ -6,6 +6,12 @@ """ Utilities for pairwise alignment. @author: Masashi Yokochi """ +__docformat__ = "restructuredtext en" +__author__ = "Masashi Yokochi" +__email__ = "yokochi@protein.osaka-u.ac.jp" +__license__ = "Apache License 2.0" +__version__ = "1.0.0" + import copy import json import re @@ -444,7 +450,7 @@ def getGaugeCode(seqIdList: List[int], offset: int = 0) -> str: break _offset += 1 - if lastSeqId is None or nextSeqId is None or nextSeqId > lastSeqId + 1: + if None in (lastSeqId, nextSeqId) or nextSeqId > lastSeqId + 1: p = idx + offset gaugeCode = gaugeCode[0:p] + ' ' + gaugeCode[p:] offset += 1 @@ -917,7 +923,7 @@ def syncCompIdOfPolySeqRst(polySeqRst: List[dict], compIdMap: dict): """ Synchronize residue names of polymer sequence of the current MR file. """ - if polySeqRst is None or compIdMap is None or len(polySeqRst) == 0 or len(compIdMap) == 0: + if None in (polySeqRst, compIdMap) or len(polySeqRst) == 0 or len(compIdMap) == 0: return for ps in polySeqRst: @@ -1007,7 +1013,7 @@ def alignPolymerSequence(pA, polySeqModel: List[dict], polySeqRst: List[dict], seqAlign, compIdMapping = [], [] - if pA is None or polySeqModel is None or polySeqRst is None: + if None in (pA, polySeqModel, polySeqRst): return seqAlign, compIdMapping tabooList, inhibitList = [], [] @@ -1130,7 +1136,7 @@ def alignPolymerSequence(pA, polySeqModel: List[dict], polySeqRst: List[dict], for p in range(len(s1[seq_id_name]) - 1): s_p = s1[seq_id_name][p] s_q = s1[seq_id_name][p + 1] - if s_p is None or s_q is None or s_p not in s2['seq_id'] or s_q not in s2['seq_id']: + if None in (s_p, s_q) or s_p not in s2['seq_id'] or s_q not in s2['seq_id']: continue if s_p + 1 != s_q: beg = s2['seq_id'].index(s_p) @@ -1160,7 +1166,7 @@ def alignPolymerSequence(pA, polySeqModel: List[dict], polySeqRst: List[dict], for p in range(len(s1[seq_id_name]) - 1): s_p = s1[seq_id_name][p] s_q = s1[seq_id_name][p + 1] - if s_p is None or s_q is None or s_p not in __s2['seq_id'] or s_q not in __s2['seq_id']: + if None in (s_p, s_q) or s_p not in __s2['seq_id'] or s_q not in __s2['seq_id']: continue if s_p + 1 != s_q: beg = __s2['seq_id'].index(s_p) @@ -1197,7 +1203,7 @@ def alignPolymerSequence(pA, polySeqModel: List[dict], polySeqRst: List[dict], for p in range(len(s1[seq_id_name]) - 1): s_p = s1[seq_id_name][p] s_q = s1[seq_id_name][p + 1] - if s_p is None or s_q is None or s_p not in s2['seq_id'] or s_q not in s2['seq_id']: + if None in (s_p, s_q) or s_p not in s2['seq_id'] or s_q not in s2['seq_id']: continue if s_p + 1 != s_q: idx1 = idx2 = 0 @@ -1435,7 +1441,7 @@ def alignPolymerSequenceWithConflicts(pA, polySeqModel: List[dict], polySeqRst: seqAlign, compIdMapping = [], [] - if pA is None or polySeqModel is None or polySeqRst is None: + if None in (pA, polySeqModel, polySeqRst): return seqAlign, compIdMapping truncated = None @@ -1513,7 +1519,7 @@ def alignPolymerSequenceWithConflicts(pA, polySeqModel: List[dict], polySeqRst: for p in range(len(s1[seq_id_name]) - 1): s_p = s1[seq_id_name][p] s_q = s1[seq_id_name][p + 1] - if s_p is None or s_q is None or s_p not in s2['seq_id'] or s_q not in s2['seq_id']: + if None in (s_p, s_q) or s_p not in s2['seq_id'] or s_q not in s2['seq_id']: continue if s_p + 1 != s_q: beg = s2['seq_id'].index(s_p) @@ -1543,7 +1549,7 @@ def alignPolymerSequenceWithConflicts(pA, polySeqModel: List[dict], polySeqRst: for p in range(len(s1[seq_id_name]) - 1): s_p = s1[seq_id_name][p] s_q = s1[seq_id_name][p + 1] - if s_p is None or s_q is None or s_p not in __s2['seq_id'] or s_q not in __s2['seq_id']: + if None in (s_p, s_q) or s_p not in __s2['seq_id'] or s_q not in __s2['seq_id']: continue if s_p + 1 != s_q: beg = __s2['seq_id'].index(s_p) @@ -1580,7 +1586,7 @@ def alignPolymerSequenceWithConflicts(pA, polySeqModel: List[dict], polySeqRst: for p in range(len(s1[seq_id_name]) - 1): s_p = s1[seq_id_name][p] s_q = s1[seq_id_name][p + 1] - if s_p is None or s_q is None or s_p not in s2['seq_id'] or s_q not in s2['seq_id']: + if None in (s_p, s_q) or s_p not in s2['seq_id'] or s_q not in s2['seq_id']: continue if s_p + 1 != s_q: idx1 = idx2 = 0 @@ -1749,7 +1755,7 @@ def assignPolymerSequence(pA, ccU, fileType: str, polySeqModel: List[dict], poly """ Assign polymer sequences of restraints. """ - if pA is None or polySeqModel is None or polySeqRst is None or seqAlign is None: + if None in (pA, polySeqModel, polySeqRst, seqAlign): return None, [] warnings = [] @@ -2041,7 +2047,7 @@ def trimSequenceAlignment(seqAlign: List[dict], chainAssign: List[dict]): """ Trim ineffective sequence alignments. """ - if seqAlign is None or chainAssign is None: + if None in (seqAlign, chainAssign): return ineffSeqAlignIdx = list(range(len(seqAlign) - 1, -1, -1)) @@ -2693,7 +2699,7 @@ def splitPolySeqRstForMultimers(pA, polySeqModel: List[dict], polySeqRst: List[d """ Split polymer sequence of the current MR file for multimers. """ - if polySeqModel is None or polySeqRst is None or chainAssign is None: + if None in (polySeqModel, polySeqRst, chainAssign): return None, None target_chain_ids = {} @@ -2858,7 +2864,7 @@ def splitPolySeqRstForExactNoes(pA, polySeqModel: List[dict], polySeqRst: List[d """ Split polymer sequence of the current MR file for eNOEs-guided multiple conformers. """ - if polySeqModel is None or polySeqRst is None or chainAssign is None: + if None in (polySeqModel, polySeqRst, chainAssign): return None, None, None target_chain_ids = {} @@ -3088,6 +3094,7 @@ def retrieveRemappedChainId(chainIdRemap: dict, seqId: int) -> Tuple[Optional[st def retrieveOriginalSeqIdFromMRMap(chainIdRemap: dict, chainId: str, seqId: int) -> int: """ Retrieve the original seq_id from mapping dictionary based on sequence alignments. """ + return next((_seqId for _seqId, remap in chainIdRemap.items() if remap['chain_id'] == chainId and remap['seq_id'] == seqId), seqId) @@ -3097,7 +3104,7 @@ def splitPolySeqRstForNonPoly(ccU, nonPolyModel: List[dict], polySeqRst: List[di """ Split polymer sequence of the current MR file for non-polymer. """ - if polySeqRst is None or nonPolyModel is None or seqAlign is None or chainAssign is None: + if None in (polySeqRst, nonPolyModel, seqAlign, chainAssign): return None, None comp_ids = set() @@ -3180,7 +3187,7 @@ def splitPolySeqRstForNonPoly(ccU, nonPolyModel: List[dict], polySeqRst: List[di for target, candidate in zip_longest(targets, candidates): - if target is None or candidate is None: + if None in (target, candidate): break test_chain_id = target['chain_id'] @@ -3229,7 +3236,7 @@ def splitPolySeqRstForBranched(pA, polySeqModel: List[dict], branchedModel: List """ Split polymer sequence of the current MR file for branched polymer. """ - if polySeqRst is None or polySeqModel is None or branchedModel is None or chainAssign is None: + if None in (polySeqRst, polySeqModel, branchedModel, chainAssign): return None, None target_chain_ids = {} diff --git a/wwpdb/utils/nmr/BMRBChemShiftStat.py b/wwpdb/utils/nmr/BMRBChemShiftStat.py index 9bfa8f98..87e80c89 100644 --- a/wwpdb/utils/nmr/BMRBChemShiftStat.py +++ b/wwpdb/utils/nmr/BMRBChemShiftStat.py @@ -21,6 +21,12 @@ """ Wrapper class for retrieving BMRB chemical shift statistics. @author: Masashi Yokochi """ +__docformat__ = "restructuredtext en" +__author__ = "Masashi Yokochi" +__email__ = "yokochi@protein.osaka-u.ac.jp" +__license__ = "Apache License 2.0" +__version__ = "1.0.0" + import sys import os import csv @@ -2318,6 +2324,7 @@ def __updateCompIdSet(self): def updateStatCsvFiles(self) -> bool: """ Update BMRB chemical shift statistics. """ + import requests # pylint: disable=import-outside-toplevel import datetime # pylint: disable=import-outside-toplevel from dateutil.parser import parse as parsedate # pylint: disable=import-outside-toplevel diff --git a/wwpdb/utils/nmr/ChemCompUtil.py b/wwpdb/utils/nmr/ChemCompUtil.py index 880f273f..96150ad7 100644 --- a/wwpdb/utils/nmr/ChemCompUtil.py +++ b/wwpdb/utils/nmr/ChemCompUtil.py @@ -13,6 +13,12 @@ """ Wrapper class for retrieving chemical component dictionary. @author: Masashi Yokochi """ +__docformat__ = "restructuredtext en" +__author__ = "Masashi Yokochi" +__email__ = "yokochi@protein.osaka-u.ac.jp" +__license__ = "Apache License 2.0" +__version__ = "1.0.0" + import os import sys import pickle @@ -153,21 +159,24 @@ def load_dict_from_pickle(file_name): self.__cachedDict = load_dict_from_pickle(self.__cacheFile) self.__failedCompId = [] - def updateChemCompDict(self, compId: str) -> bool: + def updateChemCompDict(self, compId: str, ligand: bool = True) -> bool: """ Update CCD information for a given comp_id. @return: True for successfully update CCD information or False for the case a given comp_id does not exist in CCD """ - if compId in emptyValue or not ccd_id_pattern.match(compId) or is_reserved_lig_code(compId): + if compId in emptyValue: return False compId = compId.upper() + if not ccd_id_pattern.match(compId) or (not ligand and is_reserved_lig_code(compId)): + return False + if compId in self.__failedCompId: return False if compId != self.lastCompId: - self.lastStatus = False if '_' in compId else self.__ccR.setCompId(compId) + self.lastStatus = False if '_' in compId else self.__ccR.setCompId(compId, ligand) self.lastCompId = compId if self.lastStatus: @@ -468,6 +477,7 @@ def getAtomsBasedOnGreekLetterSystem(self, compId: str, atomId: str) -> List[str def hasBond(self, compId: str, atomId1: str, atomId2: str) -> bool: """ Return whether given two atoms are connected by a covalent bond. """ + return atomId2 in self.getBondedAtoms(compId, atomId1) def peptideLike(self, compId: Optional[str] = None) -> bool: diff --git a/wwpdb/utils/nmr/CifToNmrStar.py b/wwpdb/utils/nmr/CifToNmrStar.py index d490987b..b85f0d75 100644 --- a/wwpdb/utils/nmr/CifToNmrStar.py +++ b/wwpdb/utils/nmr/CifToNmrStar.py @@ -15,6 +15,12 @@ """ Wrapper class for CIF to NMR-STAR converter. @author: Masashi Yokochi """ +__docformat__ = "restructuredtext en" +__author__ = "Masashi Yokochi" +__email__ = "yokochi@protein.osaka-u.ac.jp" +__license__ = "Apache License 2.0" +__version__ = "1.0.0" + import sys import os import re @@ -67,7 +73,7 @@ def get_value_safe(d: Optional[Union[dict, list, tuple]] = None, key: Optional = @return: value for a key, None (by default) otherwise """ - if d is None or key is None: + if None in (d, key): return default return d.get(key, default) @@ -270,7 +276,7 @@ def convert(self, cifPath: Optional[str] = None, strPath: Optional[str] = None, """ Convert CIF formatted NMR data file to normalized NMR-STAR file. """ - if cifPath is None or strPath is None: + if None in (cifPath, strPath): return False try: @@ -639,7 +645,7 @@ def set_entry_id(self, strData: Union[pynmrstar.Entry, pynmrstar.Saveframe, pynm try: if self.schema[fqtn]['entryIdFlg'] == 'Y': - sf.add_tag(entry_id_tag, entryId) + set_sf_tag(sf, entry_id_tag, entryId) modified = True except KeyError: pass @@ -659,12 +665,19 @@ def set_entry_id(self, strData: Union[pynmrstar.Entry, pynmrstar.Saveframe, pynm pass if not filled: - entry_id_tag = lp.category + '.Entry_ID' + if 'Entry_ID' not in lp.tags: + entry_id_tag = lp.category + '.Entry_ID' - lp.add_tag(entry_id_tag) + lp.add_tag(entry_id_tag) - for row in lp: - row.append(entryId) + for row in lp: + row.append(entryId) + + else: + col = lp.tags.index('Entry_ID') + + for row in lp: + row[col] = entryId modified = True @@ -692,7 +705,7 @@ def set_entry_id(self, strData: Union[pynmrstar.Entry, pynmrstar.Saveframe, pynm try: if self.schema[fqtn]['entryIdFlg'] == 'Y': - sf.add_tag(entry_id_tag, entryId) + set_sf_tag(sf, entry_id_tag, entryId) modified = True except KeyError: pass @@ -712,12 +725,19 @@ def set_entry_id(self, strData: Union[pynmrstar.Entry, pynmrstar.Saveframe, pynm pass if not filled: - entry_id_tag = lp.category + '.Entry_ID' + if 'Entry_ID' not in lp.tags: + entry_id_tag = lp.category + '.Entry_ID' - lp.add_tag(entry_id_tag) + lp.add_tag(entry_id_tag) - for row in lp: - row.append(entryId) + for row in lp: + row.append(entryId) + + else: + col = lp.tags.index('Entry_ID') + + for row in lp: + row[col] = entryId modified = True @@ -739,12 +759,19 @@ def set_entry_id(self, strData: Union[pynmrstar.Entry, pynmrstar.Saveframe, pynm pass if not filled: - entry_id_tag = lp.category + '.Entry_ID' + if 'Entry_ID' not in lp.tags: + entry_id_tag = lp.category + '.Entry_ID' + + lp.add_tag(entry_id_tag) + + for row in lp: + row.append(entryId) - lp.add_tag(entry_id_tag) + else: + col = lp.tags.index('Entry_ID') - for row in lp: - row.append(entryId) + for row in lp: + row[col] = entryId modified = True @@ -801,6 +828,9 @@ def normalize(self, strData: Union[pynmrstar.Entry, pynmrstar.Saveframe, pynmrst """ Wrapper function of normalize_str() and normalize_nef(). """ + if strData is None: + return strData + try: sf = strData.frame_list[0] if sf.category.startswith('nef'): @@ -814,9 +844,13 @@ def normalize_str(self, strData: Union[pynmrstar.Entry, pynmrstar.Saveframe, pyn @see: pynmrstar.entry.normalize """ + if strData is None: + return strData + def sf_key(sf): """ Helper function to sort the saveframes. - Returns (category order, saveframe order) """ + Returns (category order, saveframe order) + """ # If not a real category, generate an artificial but stable order > the real saveframes try: @@ -841,7 +875,8 @@ def sf_key(sf): def lp_key(lp): """ Helper function to sort the loops. - Returns (category order) """ + Returns (category order) + """ try: category_order = self.category_order.index(lp.category) @@ -870,9 +905,13 @@ def normalize_nef(self, strData: Union[pynmrstar.Entry, pynmrstar.Saveframe, pyn """ Sort saveframes of NEF. """ + if strData is None: + return strData + def sf_key(sf): """ Helper function to sort the saveframes. - Returns (saveframe order) """ + Returns (saveframe order) + """ try: category_order = self.category_order_nef.index(sf.tag_prefix) diff --git a/wwpdb/utils/nmr/NmrDpReport.py b/wwpdb/utils/nmr/NmrDpReport.py index 9f0b9e0d..36806916 100644 --- a/wwpdb/utils/nmr/NmrDpReport.py +++ b/wwpdb/utils/nmr/NmrDpReport.py @@ -94,6 +94,12 @@ """ Wrapper class for NMR data processing report. @author: Masashi Yokochi """ +__docformat__ = "restructuredtext en" +__author__ = "Masashi Yokochi" +__email__ = "yokochi@protein.osaka-u.ac.jp" +__license__ = "Apache License 2.0" +__version__ = "4.1.1" + import sys import json import copy diff --git a/wwpdb/utils/nmr/NmrDpUtility.py b/wwpdb/utils/nmr/NmrDpUtility.py index 8d761aa9..7fa7f92f 100644 --- a/wwpdb/utils/nmr/NmrDpUtility.py +++ b/wwpdb/utils/nmr/NmrDpUtility.py @@ -208,6 +208,12 @@ """ Wrapper class for NMR data processing. @author: Masashi Yokochi """ +__docformat__ = "restructuredtext en" +__author__ = "Masashi Yokochi" +__email__ = "yokochi@protein.osaka-u.ac.jp" +__license__ = "Apache License 2.0" +__version__ = "4.1.1" + import sys import os import itertools @@ -1794,7 +1800,7 @@ def __init__(self, verbose: bool = False, log: IO = sys.stderr): self.__csStat = BMRBChemShiftStat(self.__verbose, self.__lfh, self.__ccU) # CifToNmrStar - self.__c2S = CifToNmrStar(self.__verbose) + self.__c2S = CifToNmrStar(self.__lfh) # NEFTranslator self.__nefT = NEFTranslator(self.__verbose, self.__lfh, self.__ccU, self.__csStat, self.__c2S) @@ -8449,13 +8455,13 @@ def __fixFormatIssueOfInputSource(self, file_list_id: int, file_name: str, file_ j -= 1 j += 1 - i = 1 + i = 0 with open(_srcPath, 'r', encoding='utf-8') as ifh, \ open(_srcPath + '~', 'w', encoding='utf-8') as ofh: ofh.write('data_' + os.path.basename(srcPath) + '\n\n') for line in ifh: - if i <= j: + if i < j: ofh.write(line) i += 1 @@ -8515,7 +8521,7 @@ def __fixFormatIssueOfInputSource(self, file_list_id: int, file_name: str, file_ line_num = int(g[0]) - i = 1 + i = 0 with open(_srcPath, 'r', encoding='utf-8') as ifh, \ open(_srcPath + '~', 'w', encoding='utf-8') as ofh: @@ -8557,7 +8563,7 @@ def __fixFormatIssueOfInputSource(self, file_list_id: int, file_name: str, file_ line_num = int(g[0]) - i = 1 + i = 0 with open(_srcPath, 'r', encoding='utf-8') as ifh, \ open(_srcPath + '~', 'w', encoding='utf-8') as ofh: @@ -8708,7 +8714,7 @@ def __fixFormatIssueOfInputSource(self, file_list_id: int, file_name: str, file_ line_num = int(g[0]) - i = 1 + i = 0 with open(_srcPath, 'r', encoding='utf-8') as ifh, \ open(_srcPath + '~', 'w', encoding='utf-8') as ofh: @@ -8862,7 +8868,7 @@ def __fixFormatIssueOfInputSource(self, file_list_id: int, file_name: str, file_ pass_loop = False lp_loc = -1 - i = 1 + i = 0 with open(_srcPath, 'r', encoding='utf-8') as ifh: for line in ifh: @@ -8900,7 +8906,7 @@ def __fixFormatIssueOfInputSource(self, file_list_id: int, file_name: str, file_ if 'sf_category' not in target: ignored_loop_locations.extend(list(range(target['loop_location'], target['stop_location'] + 1))) - i = 1 + i = 0 with open(_srcPath, 'r', encoding='utf-8') as ifh, \ open(_srcPath + '~', 'w', encoding='utf-8') as ofh: @@ -8962,7 +8968,7 @@ def __fixFormatIssueOfInputSource(self, file_list_id: int, file_name: str, file_ pass_sf_framecode = pass_category_1 = pass_category_2 = pass_sf_loop = False - i = 1 + i = 0 with open(_srcPath, 'r', encoding='utf-8') as ifh: for line in ifh: @@ -9040,7 +9046,7 @@ def __fixFormatIssueOfInputSource(self, file_list_id: int, file_name: str, file_ if target['category_type_2'] == 'loop': loop_category_locations.extend(_range) - i = 1 + i = 0 with open(_srcPath, 'r', encoding='utf-8') as ifh, \ open(_srcPath + '~', 'w', encoding='utf-8') as ofh: @@ -9108,10 +9114,10 @@ def __fixFormatIssueOfInputSource(self, file_list_id: int, file_name: str, file_ self.__lfh.write(f"+{self.__class_name__}.__validateInputSource() ++ Warning - {warn}\n") if __pynmrstar_v3_3__: - msg_pattern = re.compile(r'^.*' + msg_template + r" Error occurred in tag _\S+ with value (\S+) which conflicts with the saveframe name (\S+)\. " + msg_pattern = re.compile(r'^.*' + msg_template + r" Error occurred in tag _\S+ with value ([\S ]+) which conflicts with the saveframe name (\S+)\. " r"Error detected on line (\d+).*$") else: - msg_pattern = re.compile(r'^.*' + msg_template + r" Error occurred in tag _\S+ with value (\S+) which conflicts with.* the saveframe name (\S+)\. " + msg_pattern = re.compile(r'^.*' + msg_template + r" Error occurred in tag _\S+ with value ([\S ]+) which conflicts with.* the saveframe name (\S+)\. " r"Error detected on line (\d+).*$") try: @@ -9122,13 +9128,16 @@ def __fixFormatIssueOfInputSource(self, file_list_id: int, file_name: str, file_ saveframe_name = g[1] line_num = int(g[2]) - i = 1 + i = 0 with open(_srcPath, 'r', encoding='utf-8') as ifh, \ open(_srcPath + '~', 'w', encoding='utf-8') as ofh: for line in ifh: if i == line_num: - ofh.write(re.sub(sf_framecode + r'\s$', saveframe_name + r'\n', line)) + if sf_framecode not in emptyValue: + ofh.write(re.sub(r'["\']?' + sf_framecode + r'["\']?\s*$', saveframe_name + r'\n', line)) + else: + ofh.write(re.sub(rf'\{sf_framecode}\s*$', saveframe_name + r'\n', line)) else: ofh.write(line) i += 1 @@ -9148,7 +9157,7 @@ def __fixFormatIssueOfInputSource(self, file_list_id: int, file_name: str, file_ if not is_valid: - retry = len(message['error']) != len(_message['error']) + retry = len(message['error']) != len(_message['error']) or message['error'] != _message['error'] if not retry: @@ -9157,7 +9166,7 @@ def __fixFormatIssueOfInputSource(self, file_list_id: int, file_name: str, file_ retry = True break - if retry and len_tmp_paths < 10: + if retry and len_tmp_paths < 40: return self.__fixFormatIssueOfInputSource(file_list_id, file_name, file_type, _srcPath, fileSubType, _message, tmpPaths, allowEmpty) @@ -9371,7 +9380,7 @@ def __rescueFormerNef(self, file_list_id: int) -> bool: sf_category = self.sf_categories[file_type][content_subtype] lp_category = self.lp_categories[file_type][content_subtype] - if sf_category is None or lp_category is None: + if None in (sf_category, lp_category): continue for sf in self.__star_data[file_list_id].get_saveframes_by_category(sf_category): @@ -9401,7 +9410,7 @@ def __rescueFormerNef(self, file_list_id: int) -> bool: sf_category = self.sf_categories[file_type][content_subtype] lp_category = self.lp_categories[file_type][content_subtype] - if sf_category is None or lp_category is None: + if None in (sf_category, lp_category): continue if self.__star_data_type[file_list_id] == 'Loop': @@ -9744,7 +9753,7 @@ def __rescueImmatureStr(self, file_list_id: int) -> bool: if content_subtype.startswith('spectral_peak'): lp_category = self.aux_lp_categories[file_type][content_subtype][0] # _Spectral_dim - if sf_category is None or lp_category is None: + if None in (sf_category, lp_category): continue if self.__star_data_type[file_list_id] == 'Loop': @@ -11957,7 +11966,7 @@ def __detectContentSubTypeOfLegacyMr(self) -> bool: for (i, j) in itertools.combinations(range(0, ar_path_len), 2): - if md5_list[i] is None or md5_list[j] is None: + if None in (md5_list[i], md5_list[j]): continue if md5_list[i] == md5_list[j]: @@ -17605,7 +17614,7 @@ def __isConsistentSequence(self) -> bool: subtype1 = subtype_pair[0] # poly_seq will appear only on subtype1 subtype2 = subtype_pair[1] - if subtype1 is None or subtype2 is None: + if None in (subtype1, subtype2): continue # reference polymer sequence exists @@ -17752,7 +17761,7 @@ def __testSequenceConsistency(self) -> bool: subtype1 = subtype_pair[0] # poly_seq will appear only on subtype1 subtype2 = subtype_pair[1] - if subtype1 is None or subtype2 is None: + if None in (subtype1, subtype2): continue lp_category2 = self.lp_categories[file_type][subtype2] @@ -21833,7 +21842,7 @@ def ext_atoms(row): if val_1 is None and val_2 is None: continue - if val_1 is None or val_2 is None: + if None in (val_1, val_2): redundant = False continue @@ -22123,7 +22132,7 @@ def ext_atom_names(row): elif atom_id == 'CB': cb_chem_shift_1 = _row[value_name] - if ca_chem_shift_1 is None or cb_chem_shift_1 is None: + if None in (ca_chem_shift_1, cb_chem_shift_1): if _row[chain_id_name] == chain_id_1 and _row[seq_id_name] > seq_id_1: break else: @@ -22144,7 +22153,7 @@ def ext_atom_names(row): elif atom_id == 'CB': cb_chem_shift_2 = _row[value_name] - if ca_chem_shift_2 is None or cb_chem_shift_2 is None: + if None in (ca_chem_shift_2, cb_chem_shift_2): if _row[chain_id_name] == chain_id_2 and _row[seq_id_name] > seq_id_2: break else: @@ -22294,7 +22303,7 @@ def ext_atom_names(row): elif atom_id == 'CB': cb_chem_shift_1 = _row[value_name] - if ca_chem_shift_1 is None or cb_chem_shift_1 is None: + if None in (ca_chem_shift_1, cb_chem_shift_1): if _row[chain_id_name] == chain_id_1 and _row[seq_id_name] > seq_id_1: break else: @@ -22315,7 +22324,7 @@ def ext_atom_names(row): elif atom_id == 'CB': cb_chem_shift_2 = _row[value_name] - if ca_chem_shift_2 is None or cb_chem_shift_2 is None: + if None in (ca_chem_shift_2, cb_chem_shift_2): if _row[chain_id_name] == chain_id_2 and _row[seq_id_name] > seq_id_2: break else: @@ -22847,7 +22856,7 @@ def __testDataConsistencyInAuxLoopOfSpectralPeak(self, file_name: str, file_type for row in lp_data: for j in range(num_dim): - if min_points[j] is None or max_points[j] is None: + if None in (min_points[j], max_points[j]): continue position = row[position_names[j]] @@ -22866,7 +22875,7 @@ def __testDataConsistencyInAuxLoopOfSpectralPeak(self, file_name: str, file_type if self.__verbose: self.__lfh.write(f"+{self.__class_name__}.__testDataConsistencyInAuxLoopOfSpectralPeak() ++ Warning - {err}\n") - if min_limits[j] is None or max_limits[j] is None: + if None in (min_limits[j], max_limits[j]): continue if position < min_limits[j] or position > max_limits[j]: @@ -23013,7 +23022,7 @@ def __testDataConsistencyInAuxLoopOfSpectralPeakAlt(self, file_name: str, file_t j = row[dim_id_name] - 1 - if j >= num_dim or min_points[j] is None or max_points[j] is None: + if j >= num_dim or None in (min_points[j], max_points[j]): continue position = row[position_name] @@ -23032,7 +23041,7 @@ def __testDataConsistencyInAuxLoopOfSpectralPeakAlt(self, file_name: str, file_t if self.__verbose: self.__lfh.write(f"+{self.__class_name__}.__testDataConsistencyInAuxLoopOfSpectralPeakAlt() ++ Warning - {warn}\n") - if min_limits[j] is None or max_limits[j] is None: + if None in (min_limits[j], max_limits[j]): continue if position < min_limits[j] or position > max_limits[j]: @@ -25307,7 +25316,7 @@ def get_auth_seq_scheme(chain_id, seq_id): auth_seq_id = next((ref_seq_id for ref_seq_id, test_seq_id in zip(sa[_ref_seq_id_name], sa['test_seq_id']) if test_seq_id == seq_id), None) - if (auth_asym_id is None or auth_seq_id is None) and br_seq_align is not None: + if None in (auth_asym_id, auth_seq_id) and br_seq_align is not None: auth_asym_id = next((ca['ref_chain_id'] for ca in br_chain_assign if ca['test_chain_id'] == chain_id), None) if auth_asym_id is not None: sa = next((sa for sa in br_seq_align @@ -25318,7 +25327,7 @@ def get_auth_seq_scheme(chain_id, seq_id): auth_seq_id = next((ref_seq_id for ref_seq_id, test_seq_id in zip(sa[_ref_seq_id_name], sa['test_seq_id']) if test_seq_id == seq_id), None) - if (auth_asym_id is None or auth_seq_id is None) and np_seq_align is not None: + if None in (auth_asym_id, auth_seq_id) and np_seq_align is not None: auth_asym_id = next((ca['ref_chain_id'] for ca in np_chain_assign if ca['test_chain_id'] == chain_id), None) if auth_asym_id is not None: sa = next((sa for sa in np_seq_align @@ -25350,7 +25359,7 @@ def get_label_seq_scheme(chain_id, seq_id): in zip(sa[_ref_seq_id_name], sa['test_seq_id']) if ref_seq_id == seq_id), (None, None)) - if (auth_asym_id is None or auth_seq_id is None) and br_seq_align is not None: + if None in (auth_asym_id, auth_seq_id) and br_seq_align is not None: auth_asym_id = next((ca['ref_chain_id'] for ca in br_chain_assign if ca['test_chain_id'] == chain_id), None) if auth_asym_id is not None: sa = next((sa for sa in br_seq_align @@ -25364,7 +25373,7 @@ def get_label_seq_scheme(chain_id, seq_id): in zip(sa[_ref_seq_id_name], sa['test_seq_id']) if ref_seq_id == seq_id), (None, None)) - if (auth_asym_id is None or auth_seq_id is None) and np_seq_align is not None: + if None in (auth_asym_id, auth_seq_id) and np_seq_align is not None: auth_asym_id = next((ca['ref_chain_id'] for ca in np_chain_assign if ca['test_chain_id'] == chain_id), None) if auth_asym_id is not None: sa = next((sa for sa in np_seq_align @@ -30127,7 +30136,7 @@ def __getCoordBondLength(self, cif_chain_id_1: str, cif_seq_id_1: int, cif_atom_ a_1 = next((a for a in atom_site_1 if a['model_id'] == model_id), None) a_2 = next((a for a in atom_site_2 if a['model_id'] == model_id), None) - if a_1 is None or a_2 is None: + if None in (a_1, a_2): continue bond.append({'model_id': model_id, 'distance': float(f"{distance(to_np_array(a_1), to_np_array(a_2)):.3f}")}) @@ -30876,7 +30885,7 @@ def get_auth_seq_scheme(chain_id, seq_id): auth_seq_id = next((ref_seq_id for ref_seq_id, test_seq_id in zip(sa[_ref_seq_id_name], sa['test_seq_id']) if test_seq_id == seq_id), None) - if (auth_asym_id is None or auth_seq_id is None) and br_seq_align is not None: + if None in (auth_asym_id, auth_seq_id) and br_seq_align is not None: auth_asym_id = next((ca['ref_chain_id'] for ca in br_chain_assign if ca['test_chain_id'] == chain_id), None) if auth_asym_id is not None: sa = next((sa for sa in br_seq_align @@ -30887,7 +30896,7 @@ def get_auth_seq_scheme(chain_id, seq_id): auth_seq_id = next((ref_seq_id for ref_seq_id, test_seq_id in zip(sa[_ref_seq_id_name], sa['test_seq_id']) if test_seq_id == seq_id), None) - if (auth_asym_id is None or auth_seq_id is None) and np_seq_align is not None: + if None in (auth_asym_id, auth_seq_id) and np_seq_align is not None: auth_asym_id = next((ca['ref_chain_id'] for ca in np_chain_assign if ca['test_chain_id'] == chain_id), None) if auth_asym_id is not None: sa = next((sa for sa in np_seq_align @@ -31552,7 +31561,7 @@ def get_auth_seq_scheme(chain_id, seq_id): auth_seq_id += offset break - if (auth_asym_id is None or auth_seq_id is None) and br_seq_align is not None: + if None in (auth_asym_id, auth_seq_id) and br_seq_align is not None: auth_asym_id = next((ca['ref_chain_id'] for ca in br_chain_assign if ca['test_chain_id'] == chain_id), None) if auth_asym_id is not None: sa = next((sa for sa in br_seq_align @@ -31562,7 +31571,7 @@ def get_auth_seq_scheme(chain_id, seq_id): auth_seq_id = next((ref_seq_id for ref_seq_id, test_seq_id in zip(sa[_ref_seq_id_name], sa['test_seq_id']) if test_seq_id == seq_id), None) - if (auth_asym_id is None or auth_seq_id is None) and np_seq_align is not None: + if None in (auth_asym_id, auth_seq_id) and np_seq_align is not None: auth_asym_id = next((ca['ref_chain_id'] for ca in np_chain_assign if ca['test_chain_id'] == chain_id), None) if auth_asym_id is not None: sa = next((sa for sa in np_seq_align @@ -31572,7 +31581,7 @@ def get_auth_seq_scheme(chain_id, seq_id): auth_seq_id = next((ref_seq_id for ref_seq_id, test_seq_id in zip(sa[_ref_seq_id_name], sa['test_seq_id']) if test_seq_id == seq_id), None) - if auth_asym_id is None or auth_seq_id is None: + if None in (auth_asym_id, auth_seq_id): entity_id_name = key_entity_id_names[d] if entity_id_name not in loop.tags: continue @@ -36366,7 +36375,7 @@ def get_auth_seq_scheme(chain_id, seq_id): auth_seq_id = next((ref_seq_id for ref_seq_id, test_seq_id in zip(sa[_ref_seq_id_name], sa['test_seq_id']) if test_seq_id == seq_id), None) - if (auth_asym_id is None or auth_seq_id is None) and br_seq_align is not None: + if None in (auth_asym_id, auth_seq_id) and br_seq_align is not None: auth_asym_id = next((ca['ref_chain_id'] for ca in br_chain_assign if ca['test_chain_id'] == chain_id), None) if auth_asym_id is not None: sa = next((sa for sa in br_seq_align @@ -36377,7 +36386,7 @@ def get_auth_seq_scheme(chain_id, seq_id): auth_seq_id = next((ref_seq_id for ref_seq_id, test_seq_id in zip(sa[_ref_seq_id_name], sa['test_seq_id']) if test_seq_id == seq_id), None) - if (auth_asym_id is None or auth_seq_id is None) and np_seq_align is not None: + if None in (auth_asym_id, auth_seq_id) and np_seq_align is not None: auth_asym_id = next((ca['ref_chain_id'] for ca in np_chain_assign if ca['test_chain_id'] == chain_id), None) if auth_asym_id is not None: sa = next((sa for sa in np_seq_align @@ -38427,7 +38436,7 @@ def __calculateStatsOfAssignedChemShift(self, file_list_id: int, sf_framecode: s break - if (not has_cs_stat) or std_value is None or std_value <= 0.0 or avg_value is None: + if (not has_cs_stat) or None in (std_value, avg_value) or std_value <= 0.0: continue z_score = (value - avg_value) / std_value @@ -38520,7 +38529,7 @@ def __calculateStatsOfAssignedChemShift(self, file_list_id: int, sf_framecode: s elif atom_id == 'CB': cb_chem_shift = row[value_name] - if ca_chem_shift is None or cb_chem_shift is None: + if None in (ca_chem_shift, cb_chem_shift): if row[chain_id_name] == _chain_id and row[seq_id_name] > seq_id: break else: @@ -38603,7 +38612,7 @@ def __calculateStatsOfAssignedChemShift(self, file_list_id: int, sf_framecode: s elif atom_id == 'CG': cg_chem_shift = row[value_name] - if cb_chem_shift is None or cg_chem_shift is None: + if None in (cb_chem_shift, cg_chem_shift): if row[chain_id_name] == _chain_id and row[seq_id_name] > seq_id: break else: @@ -38715,7 +38724,7 @@ def __calculateStatsOfAssignedChemShift(self, file_list_id: int, sf_framecode: s elif atom_id == 'NE2': ne2_chem_shift = row[value_name] - if cg_chem_shift is None or cd2_chem_shift is None or nd1_chem_shift is None or ne2_chem_shift is None: + if None in (cg_chem_shift, cd2_chem_shift, nd1_chem_shift, ne2_chem_shift): if row[chain_id_name] == _chain_id and row[seq_id_name] > seq_id: break else: @@ -38827,7 +38836,7 @@ def __calculateStatsOfAssignedChemShift(self, file_list_id: int, sf_framecode: s elif _atom_id == 'CG2': cg2_chem_shift = row[value_name] - if cg1_chem_shift is None or cg2_chem_shift is None: + if None in (cg1_chem_shift, cg2_chem_shift): if row[chain_id_name] == _chain_id and row[seq_id_name] > seq_id: break else: @@ -38920,7 +38929,7 @@ def __calculateStatsOfAssignedChemShift(self, file_list_id: int, sf_framecode: s elif _atom_id == 'CD2': cd2_chem_shift = row[value_name] - if cd1_chem_shift is None or cd2_chem_shift is None: + if None in (cd1_chem_shift, cd2_chem_shift): if row[chain_id_name] == _chain_id and row[seq_id_name] > seq_id: break else: @@ -39165,7 +39174,7 @@ def __calculateStatsOfAssignedChemShift(self, file_list_id: int, sf_framecode: s elif atom_id == 'CB': cb_chem_shift = row[value_name] - if ca_chem_shift is None or cb_chem_shift is None: + if None in (ca_chem_shift, cb_chem_shift): if row[chain_id_name] == _chain_id and row[seq_id_name] > seq_id: break else: @@ -39773,7 +39782,7 @@ def get_est_target_value(row): target_value_1 = get_est_target_value(row_1) target_value_2 = get_est_target_value(row_2) - if target_value_1 is None or target_value_2 is None: + if None in (target_value_1, target_value_2): continue if target_value_1 == target_value_2: @@ -39853,7 +39862,7 @@ def get_est_target_value(row): if target_value_1 is None and target_value_2 is None: continue - if target_value_1 is None or target_value_2 is None: + if None in (target_value_1, target_value_2): redundant = False continue @@ -41046,7 +41055,7 @@ def ext_atoms(row): phi['seq_id'] = seq_id_common[0][0] phi['comp_id'] = comp_id_common[0][0] phi['value'] = target_value - phi['error'] = None if lower_limit is None or upper_limit is None else [lower_limit, upper_limit] + phi['error'] = None if None in (lower_limit, upper_limit) else [lower_limit, upper_limit] phi_list.append(phi) elif data_type.startswith('psi_'): @@ -41055,7 +41064,7 @@ def ext_atoms(row): psi['seq_id'] = seq_id_common[0][0] psi['comp_id'] = comp_id_common[0][0] psi['value'] = target_value - psi['error'] = None if lower_limit is None or upper_limit is None else [lower_limit, upper_limit] + psi['error'] = None if None in (lower_limit, upper_limit) else [lower_limit, upper_limit] psi_list.append(psi) elif data_type.startswith('chi1_'): @@ -41064,7 +41073,7 @@ def ext_atoms(row): chi1['seq_id'] = seq_ids[0] chi1['comp_id'] = comp_ids[0] chi1['value'] = target_value - chi1['error'] = None if lower_limit is None or upper_limit is None else [lower_limit, upper_limit] + chi1['error'] = None if None in (lower_limit, upper_limit) else [lower_limit, upper_limit] chi1_list.append(chi1) elif data_type.startswith('chi2_'): @@ -41073,7 +41082,7 @@ def ext_atoms(row): chi2['seq_id'] = seq_ids[0] chi2['comp_id'] = comp_ids[0] chi2['value'] = target_value - chi2['error'] = None if lower_limit is None or upper_limit is None else [lower_limit, upper_limit] + chi2['error'] = None if None in (lower_limit, upper_limit) else [lower_limit, upper_limit] chi2_list.append(chi2) # detect weight @@ -41261,7 +41270,7 @@ def ext_atoms(row): target_value_1 = get_est_target_value(row_1) target_value_2 = get_est_target_value(row_2) - if target_value_1 is None or target_value_2 is None: + if None in (target_value_1, target_value_2): continue while target_value_1 > 180.0: @@ -41360,7 +41369,7 @@ def ext_atoms(row): if target_value_1 is None and target_value_2 is None: continue - if target_value_1 is None or target_value_2 is None: + if None in (target_value_1, target_value_2): redundant = False continue @@ -41777,7 +41786,7 @@ def ext_atom_names(row): target_value_1 = get_est_target_value(row_1) target_value_2 = get_est_target_value(row_2) - if target_value_1 is None or target_value_2 is None: + if None in (target_value_1, target_value_2): continue if target_value_1 == target_value_2: @@ -41846,7 +41855,7 @@ def ext_atom_names(row): if target_value_1 is None and target_value_2 is None: continue - if target_value_1 is None or target_value_2 is None: + if None in (target_value_1, target_value_2): redundant = False continue @@ -42042,7 +42051,7 @@ def __calculateStatsOfSpectralPeak(self, file_list_id: int, sf_framecode: str, n sp_freq = None if center_point is None: - center_point = None if first_point is None or sp_width is None else (first_point - sp_width / 2.0) + center_point = None if None in (first_point, sp_width) else (first_point - sp_width / 2.0) if under_sampling_type is not None and under_sampling_type in emptyValue: under_sampling_type = None @@ -42073,9 +42082,9 @@ def __calculateStatsOfSpectralPeak(self, file_list_id: int, sf_framecode: str, n center_point /= sp_freq sp_width /= sp_freq - last_point = None if first_point is None or sp_width is None else (first_point - sp_width) + last_point = None if None in (first_point, sp_width) else (first_point - sp_width) - if center_point is None or last_point is None: + if None in (center_point, last_point): spectral_region = atom_type elif atom_type == 'H': if mag_link_id is None: @@ -42121,7 +42130,7 @@ def __calculateStatsOfSpectralPeak(self, file_list_id: int, sf_framecode: str, n _sp_freq = None if _center_point is None: - _center_point = None if _first_point is None or _sp_width is None else (_first_point - _sp_width / 2.0) + _center_point = None if None in (_first_point, _sp_width) else (_first_point - _sp_width / 2.0) if _axis_unit == 'Hz' and _sp_freq is not None and _first_point is not None\ and _center_point is not None and _sp_width is not None: @@ -42129,9 +42138,9 @@ def __calculateStatsOfSpectralPeak(self, file_list_id: int, sf_framecode: str, n _center_point /= _sp_freq _sp_width /= _sp_freq - _last_point = None if _first_point is None or _sp_width is None else (_first_point - _sp_width) + _last_point = None if None in (_first_point, _sp_width) else (_first_point - _sp_width) - if _center_point is None or _last_point is None: + if None in (_center_point, _last_point): spectral_region = 'H' elif _center_point > 100.0 and _sp_width < 60.0: spectral_region = 'H-aromatic' @@ -42301,7 +42310,7 @@ def __calculateStatsOfSpectralPeakAlt(self, file_list_id: int, sf_framecode: str sp_freq = None if center_point is None: - center_point = None if first_point is None or sp_width is None else (first_point - sp_width / 2.0) + center_point = None if None in (first_point, sp_width) else (first_point - sp_width / 2.0) if under_sampling_type is not None and under_sampling_type in emptyValue: under_sampling_type = None @@ -42331,9 +42340,9 @@ def __calculateStatsOfSpectralPeakAlt(self, file_list_id: int, sf_framecode: str center_point /= sp_freq sp_width /= sp_freq - last_point = None if first_point is None or sp_width is None else (first_point - sp_width) + last_point = None if None in (first_point, sp_width) else (first_point - sp_width) - if center_point is None or last_point is None: + if None in (center_point, last_point): spectral_region = atom_type elif atom_type == 'H': if mag_link_id is None: @@ -42366,16 +42375,16 @@ def __calculateStatsOfSpectralPeakAlt(self, file_list_id: int, sf_framecode: str _sp_freq = None if _center_point is None: - _center_point = None if _first_point is None or _sp_width is None else (_first_point - _sp_width / 2.0) + _center_point = None if None in (_first_point, _sp_width) else (_first_point - _sp_width / 2.0) if _axis_unit == 'Hz' and _sp_freq is not None and _first_point is not None and _center_point is not None and _sp_width is not None: _first_point /= _sp_freq _center_point /= _sp_freq _sp_width /= _sp_freq - _last_point = None if _first_point is None or _sp_width is None else (_first_point - _sp_width) + _last_point = None if None in (_first_point, _sp_width) else (_first_point - _sp_width) - if _center_point is None or _last_point is None: + if None in (_center_point, _last_point): spectral_region = 'H' elif _center_point > 100.0 and _sp_width < 60.0: spectral_region = 'H-aromatic' @@ -45094,7 +45103,7 @@ def __assignCoordPolymerSequence(self) -> bool: for p in range(len(_s2['auth_seq_id']) - 1): s_p = _s2['auth_seq_id'][p] s_q = _s2['auth_seq_id'][p + 1] - if s_p is None or s_q is None or s_p + 1 == s_q: + if None in (s_p, s_q) or s_p + 1 == s_q: continue for s_o in range(s_p + 1, s_q): if s_o in __s1['seq_id']: @@ -45622,7 +45631,7 @@ def __assignCoordPolymerSequence(self) -> bool: for p in range(len(_s1['auth_seq_id']) - 1): s_p = _s1['auth_seq_id'][p] s_q = _s1['auth_seq_id'][p + 1] - if s_p is None or s_q is None or s_p + 1 == s_q: + if None in (s_p, s_q) or s_p + 1 == s_q: continue for s_o in range(s_p + 1, s_q): if s_o in __s2['seq_id']: @@ -46696,7 +46705,7 @@ def get_coord_atom_site_of(chain_id, seq_id, comp_id): in zip(cif_ps['seq_id'], cif_ps['comp_id']) if _seq_id == seq_id), (None, None)) - if cif_seq_id is None or cif_comp_id is None: + if None in (cif_seq_id, cif_comp_id): continue else: @@ -51906,7 +51915,7 @@ def __mapCoordDisulfideBond2Nmr(self, bond_list) -> bool: nmr_chain_id_1, nmr_seq_id_1, nmr_comp_id_1, nmr_chain_id_2, nmr_seq_id_2, nmr_comp_id_2) - if ca_chem_shift_1 is None or cb_chem_shift_1 is None or ca_chem_shift_2 is None or cb_chem_shift_2 is None: + if None in (ca_chem_shift_1, cb_chem_shift_1, ca_chem_shift_2, cb_chem_shift_2): pass else: break @@ -52063,7 +52072,7 @@ def __mapCoordDisulfideBond2Nmr__(self, file_name: str, file_type: str, content_ elif atom_id == 'CB' and cb_chem_shift_2 is None: cb_chem_shift_2 = row[value_name] - if ca_chem_shift_1 is None or cb_chem_shift_1 is None or ca_chem_shift_2 is None or cb_chem_shift_2 is None: + if None in (ca_chem_shift_1, cb_chem_shift_1, ca_chem_shift_2, cb_chem_shift_2): pass else: break @@ -52302,7 +52311,7 @@ def __mapCoordOtherBond2Nmr(self, bond_list: List[dict]) -> bool: nmr_chain_id_1, nmr_seq_id_1, nmr_comp_id_1, nmr_chain_id_2, nmr_seq_id_2, nmr_comp_id_2) - if ca_chem_shift_1 is None or cb_chem_shift_1 is None or ca_chem_shift_2 is None or cb_chem_shift_2 is None: + if None in (ca_chem_shift_1, cb_chem_shift_1, ca_chem_shift_2, cb_chem_shift_2): pass else: break @@ -52459,7 +52468,7 @@ def __mapCoordOtherBond2Nmr__(self, file_name: str, file_type: str, content_subt elif atom_id == 'CB' and cb_chem_shift_2 is None: cb_chem_shift_2 = row[value_name] - if ca_chem_shift_1 is None or cb_chem_shift_1 is None or ca_chem_shift_2 is None or cb_chem_shift_2 is None: + if None in (ca_chem_shift_1, cb_chem_shift_1, ca_chem_shift_2, cb_chem_shift_2): pass else: break diff --git a/wwpdb/utils/nmr/NmrVrptUtility.py b/wwpdb/utils/nmr/NmrVrptUtility.py index 94e6abd6..00c7c234 100644 --- a/wwpdb/utils/nmr/NmrVrptUtility.py +++ b/wwpdb/utils/nmr/NmrVrptUtility.py @@ -15,6 +15,12 @@ @author: Masashi Yokochi @note: This class is alternative implementation of wwpdb.apps.validation.src.RestraintValidation.BMRBRestraintsAnalysis """ +__docformat__ = "restructuredtext en" +__author__ = "Masashi Yokochi, Kumaran Baskaran" +__email__ = "yokochi@protein.osaka-u.ac.jp, baskaran@uchc.edu" +__license__ = "Apache License 2.0" +__version__ = "v1.2" + import os import sys import gzip @@ -226,6 +232,7 @@ def dist_error(lower_bound: Optional[float], upper_bound: Optional[float], dist: """ Return distance outlier for given lower_bound and upper_bound. @author: Masashi Yokochi """ + error = 0.0 try: @@ -338,6 +345,7 @@ def angle_diff(x: float, y: float) -> float: @author: Kumaran Baskaran @see: wwpdb.apps.validation.src.RestraintValidation.BMRBRestraintsAnalysis.angle_diff.ac """ + if x < 0.0: x += 360.0 if y < 0.0: @@ -363,6 +371,7 @@ def check_angle_range_overlap(x, y, c, g, t: float = 0.5): @author: Kumaran Baskaran @see: wwpdb.apps.validation.src.RestraintValidation.BMRBRestraintsAnalysis.angle_diff.check_ac """ + l = angle_diff(x, c) # noqa: E741 r = angle_diff(y, c) @@ -432,6 +441,7 @@ def rdc_error(lower_bound: Optional[float], upper_bound: Optional[float], rdc: f """ Return RDC outlier for given lower_bound and upper_bound. @author: Masashi Yokochi """ + error = 0.0 try: @@ -512,8 +522,6 @@ class NmrVrptUtility: """ Wrapper class for NMR restraint analysis. """ - __version__ = "v1.2" - def __init__(self, verbose: bool = False, log: IO = sys.stderr, cR: Optional[CifReader] = None, caC: Optional[dict] = None, ccU: Optional[ChemCompUtil] = None, csStat: Optional[BMRBChemShiftStat] = None): @@ -1430,7 +1438,7 @@ def __extractGenDistConstraint(self) -> bool: lower_linear_limit = r.get('lower_linear_limit') upper_linear_limit = r.get('upper_linear_limit') - if atom_id_1 is None or atom_id_2 is None\ + if None in (atom_id_1, atom_id_2)\ or not isinstance(auth_seq_id_1, int) or not isinstance(auth_seq_id_2, int): if 'HOH' not in (comp_id_1, comp_id_2): self.__lfh.write(f"+{self.__class_name__}.__extractGenDistConstraint() ++ Error - distance restraint {rest_key} {r} is not interpretable, " @@ -1671,7 +1679,7 @@ def __extractTorsionAngleConstraint(self) -> bool: ins_code_3 = r.get('ins_code_3', '?') ins_code_4 = r.get('ins_code_4', '?') - if atom_id_1 is None or atom_id_2 is None or atom_id_3 is None or atom_id_4 is None\ + if None in (atom_id_1, atom_id_2, atom_id_3, atom_id_4)\ or not isinstance(auth_seq_id_1, int) or not isinstance(auth_seq_id_2, int)\ or not isinstance(auth_seq_id_3, int) or not isinstance(auth_seq_id_4, int): if angle_type not in ('PPA', 'UNNAMED'): @@ -1850,7 +1858,7 @@ def __extractRdcConstraint(self) -> bool: ins_code_1 = r.get('ins_code_1', '?') ins_code_2 = r.get('ins_code_2', '?') - if atom_id_1 is None or atom_id_2 is None\ + if None in (atom_id_1, atom_id_2)\ or not isinstance(auth_seq_id_1, int) or not isinstance(auth_seq_id_2, int): self.__lfh.write(f"+{self.__class_name__}.__extractRdcConstraint() ++ Error - RDC restraint {rest_key} {r} is not interpretable, " f"{os.path.basename(self.__nmrDataPath)}.\n") diff --git a/wwpdb/utils/nmr/ann/BMRBAnnTasks.py b/wwpdb/utils/nmr/ann/BMRBAnnTasks.py index 9d53f66f..66e70032 100644 --- a/wwpdb/utils/nmr/ann/BMRBAnnTasks.py +++ b/wwpdb/utils/nmr/ann/BMRBAnnTasks.py @@ -6,6 +6,12 @@ """ Wrapper class for BMRB annotation tasks. @author: Masashi Yokochi """ +__docformat__ = "restructuredtext en" +__author__ = "Masashi Yokochi" +__email__ = "yokochi@protein.osaka-u.ac.jp" +__license__ = "Apache License 2.0" +__version__ = "1.0.0" + import sys import re import copy @@ -88,7 +94,7 @@ def __init__(self, verbose: bool, log: IO, self.__csStat = BMRBChemShiftStat(verbose, log, self.__ccU) if csStat is None else csStat # CifToNmrStar - self.__c2S = CifToNmrStar(verbose) if c2S is None else c2S + self.__c2S = CifToNmrStar(log) if c2S is None else c2S self.__defSfLabelTag = ['_Assigned_chem_shift_list.Sample_condition_list_label', '_Assigned_chem_shift_list.Chem_shift_reference_label', diff --git a/wwpdb/utils/nmr/ann/OneDepAnnTasks.py b/wwpdb/utils/nmr/ann/OneDepAnnTasks.py index e48f3328..e5fd8e48 100644 --- a/wwpdb/utils/nmr/ann/OneDepAnnTasks.py +++ b/wwpdb/utils/nmr/ann/OneDepAnnTasks.py @@ -7,6 +7,12 @@ """ Wrapper class for OneDep annotation tasks - merge NMRIF metadata. @author: Masashi Yokochi """ +__docformat__ = "restructuredtext en" +__author__ = "Masashi Yokochi" +__email__ = "yokochi@protein.osaka-u.ac.jp" +__license__ = "Apache License 2.0" +__version__ = "1.0.0" + import sys # import csv import pynmrstar @@ -145,7 +151,7 @@ def __init__(self, verbose: bool, log: IO, self.__entryId = entryId # CifToNmrStar - # self.__c2S = CifToNmrStar(verbose) if c2S is None else c2S + # self.__c2S = CifToNmrStar(log) if c2S is None else c2S # derived from wwpdb.apps.deposit.depui.constant.REQUIREMENTS self.__cifPages = ['nmrsample', diff --git a/wwpdb/utils/nmr/io/ChemCompReader.py b/wwpdb/utils/nmr/io/ChemCompReader.py index c0aa74f4..d81df08e 100644 --- a/wwpdb/utils/nmr/io/ChemCompReader.py +++ b/wwpdb/utils/nmr/io/ChemCompReader.py @@ -3,20 +3,19 @@ # Date: 31-May-2010 John Westbrook # # Update: -# 06-Aug-2010 - jdw - Generalized construction of methods to apply to any category -# Add accessors for lists of dictionaries -# 12-May-2011 - rps - Added check for None when asking for category Object in __getDataList() -# 2012-10-24 RPS Updated to reflect reorganization of modules in pdbx packages -# 17-Jan-2025 - MY - Added is_reserved_lig_code() from AlignUtil.py (DAOTHER-7204, 7388) +# 06-Aug-2010 - jdw - generalized construction of methods to apply to any category +# add accessors for lists of dictionaries +# 12-May-2011 - rps - added check for None when asking for category Object in __getDataList() +# 24-Oct-2012 - rps - updated to reflect reorganization of modules in pdbx packages +# 17-Jan-2025 - my - added is_reserved_lig_code() from AlignUtil.py (DAOTHER-7204, 7388) ## -""" A collection of classes supporting chemical component dictionary data files +""" A collection of classes parsing CCD CIF files. """ __docformat__ = "restructuredtext en" -__author__ = "John Westbrook" -__email__ = "jwest@rcsb.rutgers.edu" +__author__ = "John Westbrook, Masashi Yokochi" +__email__ = "jwest@rcsb.rutgers.edu, yokochi@protein.osaka-u.ac.jp" __license__ = "Creative Commons Attribution 3.0 Unported" -__version__ = "V0.01" - +__version__ = "1.0.3" import sys import os @@ -25,7 +24,6 @@ from mmcif.io.PdbxReader import PdbxReader from typing import IO, List, Optional - try: from wwpdb.utils.nmr.AlignUtil import emptyValue except ImportError: @@ -36,10 +34,10 @@ def is_reserved_lig_code(comp_id: str) -> bool: - """ Return a given comp_id is reserved for new ligands (DAOTHER-7204, 7388) + """ Return a given comp_id is reserved for new ligands. (DAOTHER-7204, 7388) """ - if comp_id.upper() in ('LIG', 'DRG', 'INH'): + if comp_id in ('LIG', 'DRG', 'INH'): return True if len(comp_id) == 2 and comp_id[0].isdigit() and comp_id[1].isdigit() and comp_id != '00': @@ -49,7 +47,7 @@ def is_reserved_lig_code(comp_id: str) -> bool: class ChemCompReader: - """ Accessor methods chemical component definition data files + """ Accessor methods for parsing CCD CIF files. """ def __init__(self, verbose: bool = True, log: IO = sys.stdout): @@ -61,9 +59,11 @@ def __init__(self, verbose: bool = True, log: IO = sys.stdout): self.__dBlock = None self.__topCachePath = None - self.__ccU = None + self.__compId = None self.__filePath = None + self.__cachedCompId = None + self.__cDict = { 'chem_comp': [ ('_chem_comp.id', '%s', 'str', ''), @@ -140,23 +140,27 @@ def __init__(self, verbose: bool = True, log: IO = sys.stdout): } def setCachePath(self, topCachePath: str = '/data/components/ligand-dict-v4'): - """ Set the top file tree of chemical component dictionary + """ Set the top file tree of CCD. """ self.__topCachePath = topCachePath - def setCompId(self, compId: str) -> bool: - """ Set chemical component definition data file path of the input chemical component + def setCompId(self, compId: str, ligand: bool = True) -> bool: + """ Set chemical component definition data file path of the input compId. """ - if compId in emptyValue or not ccd_id_pattern.match(compId) or is_reserved_lig_code(compId): + if compId in emptyValue: return False - self.__ccU = compId.upper() + self.__compId = compId.upper() - hashKey = self.__ccU[-2:] if len(self.__ccU) > 3 else self.__ccU[0] + if not ccd_id_pattern.match(self.__compId) or (not ligand and is_reserved_lig_code(self.__compId)): + return False - self.__filePath = os.path.join(self.__topCachePath, hashKey, self.__ccU, self.__ccU + '.cif') + self.__filePath = os.path.join(self.__topCachePath, + self.__compId[-2:] if len(self.__compId) > 3 else self.__compId[0], + self.__compId, + self.__compId + '.cif') if not os.access(self.__filePath, os.R_OK): if self.__verbose: @@ -165,16 +169,19 @@ def setCompId(self, compId: str) -> bool: return True - def setFilePath(self, filePath: str, compId: Optional[str] = None) -> bool: - """ Set data file path directory with chemical component ID + def setFilePath(self, filePath: str, compId: str) -> bool: + """ Set data file path directory with compId. """ try: - if compId in emptyValue or not ccd_id_pattern.match(compId) or is_reserved_lig_code(compId): + if compId in emptyValue: return False - self.__ccU = str(compId).upper() + self.__compId = compId.upper() + + if not ccd_id_pattern.match(self.__compId) or is_reserved_lig_code(self.__compId): + return False self.__filePath = filePath @@ -191,7 +198,7 @@ def setFilePath(self, filePath: str, compId: Optional[str] = None) -> bool: return False def getAtomList(self) -> List[list]: - """ Get a list of list of data from the chem_comp_atom category + """ Get a list of list of data from the chem_comp_atom category. """ self.__getComp() @@ -199,7 +206,7 @@ def getAtomList(self) -> List[list]: return self.__getDataList(catName='chem_comp_atom') def getBonds(self) -> List[list]: - """ Get a list of list of data from the chem_comp_bond category + """ Get a list of list of data from the chem_comp_bond category. """ self.__getComp() @@ -207,46 +214,47 @@ def getBonds(self) -> List[list]: return self.__getDataList(catName='chem_comp_bond') def getChemCompDict(self) -> dict: - """ Get a list of dictionaries of a chem_comp category + """ Get a dictionary of the chem_comp category. """ try: self.__getComp() - dL = self.__getDictList(catName='chem_comp') - return dL[0] + return self.__getDictList(catName='chem_comp')[0] except Exception: return {} def __getComp(self) -> bool: - """ Get the definition data for the input chemical component - Data is read from chemical component definition file stored in the organization - of CVS repository for chemical components + """ Get the definition data for the input compId. @return: True for success or False otherwise """ + if self.__compId == self.__cachedCompId: + return True + try: - block = self.__getDataBlock(self.__filePath, self.__ccU) + if self.__setDataBlock(self.__getDataBlock()): + self.__cachedCompId = self.__compId + return True - return self.__setDataBlock(block) + return False except Exception as e: if self.__verbose: self.__lfh.write(f"+{self.__class_name__}.__getComp() ++ Error - {str(e)}\n") return False - def __getDataBlock(self, filePath: str, blockId: Optional[str] = None): - """ Worker method to read chemical component definition file and set the target datablock - corresponding to the target chemical component + def __getDataBlock(self, blockId: Optional[str] = None): + """ Worker method to read CCD CIF file and set the target datablock. @return: the first datablock if no blockId is provided """ try: - with open(filePath, 'r', encoding='utf-8') as ifh: + with open(self.__filePath, 'r', encoding='utf-8') as ifh: myBlockList = [] pRd = PdbxReader(ifh) pRd.read(myBlockList) @@ -267,121 +275,96 @@ def __getDataBlock(self, filePath: str, blockId: Optional[str] = None): block.printIt(self.__lfh) return block - return None - except Exception as e: if self.__verbose: self.__lfh.write(f"+{self.__class_name__}.__getDataBlock() ++ Error - {str(e)}\n") - return None + + return None def __setDataBlock(self, dataBlock) -> bool: - """ Assigns the input datablock as the active internal datablock containing the - target chemical component definition + """ Assigns the input datablock as the active internal datablock. """ - ok = False + if dataBlock is None: + self.__dBlock = None + return False try: if dataBlock.getType() == 'data': self.__dBlock = dataBlock - ok = True - else: - self.__dBlock = None + return True except Exception: pass - return ok + self.__dBlock = None + + return False def __getDictList(self, catName: str = 'chem_comp') -> List[dict]: - """ Return a list of dictionaries of the input category + """ Return a list of dictionaries of the input category. """ - # Get category object - from current datablock - itTupList = self.__cDict[catName] catObj = self.__dBlock.getObj(catName) - # Get column name index + if catObj is None: + return [] + itDict = {} itNameList = catObj.getItemNameList() for idxIt, itName in enumerate(itNameList): itDict[itName] = idxIt - # Find the mapping to the local category definition colDict = {} + for itTup in self.__cDict[catName]: + colDict[itTup[0]] = itDict[itTup[0]] if itTup[0] in itDict else -1 - for _ii, itTup in enumerate(itTupList): - if itTup[0] in itDict: - colDict[itTup[0]] = itDict[itTup[0]] - else: - colDict[itTup[0]] = -1 - - rowList = catObj.getRowList() dList = [] - for row in rowList: - tD = {} - for k, v in colDict.items(): - if v < 0: - tD[k] = '' - else: - tD[k] = row[v] + for row in catObj.getRowList(): + tD = {k: '' if v < 0 else row[v] for k, v in colDict.items()} dList.append(tD) return dList def __getDataList(self, catName: str = 'chem_comp_bond') -> List[list]: """ Return a list a list of data from the input category including - data types and default value replacement + data types and default value replacement. """ - itTupList = self.__cDict[catName] - dataList = [] catObj = self.__dBlock.getObj(catName) - if catObj is not None: - itDict = {} - itNameList = catObj.getItemNameList() - for idxIt, itName in enumerate(itNameList): - itDict[itName] = idxIt - - colTupList = [] - # (column index of data or -1, type name, [default value]) - for _ii, itTup in enumerate(itTupList): - if itTup[0] in itDict: - colTupList.append((itDict[itTup[0]], itTup[2], itTup[3])) - else: - colTupList.append((-1, itTup[2], itTup[3])) - - rowList = catObj.getRowList() + if catObj is None: + return [] - for row in rowList: - uR = [] - for cTup in colTupList: + def apply_type(ctype, default, val): + if val in emptyValue: + return default + if ctype == 'int': + return int(val) + if ctype == 'float': + return float(val) + return val - if cTup[0] < 0: - uR.append(self.__applyType(cTup[1], cTup[2], cTup[2])) - else: - uR.append(self.__applyType(cTup[1], cTup[2], row[cTup[0]])) - - dataList.append(uR) + itDict = {} + for idxIt, itName in enumerate(catObj.getItemNameList()): + itDict[itName] = idxIt - return dataList + colTupList = [] + for itTup in self.__cDict[catName]: + if itTup[0] in itDict: + colTupList.append((itDict[itTup[0]], itTup[2], itTup[3])) + else: + colTupList.append((-1, itTup[2], itTup[3])) - def __applyType(self, ctype: str, default, val): # pylint: disable=no-self-use - """ Apply type conversion to the input value and assign default values to missing values - """ + dList = [] + for row in catObj.getRowList(): + uR = [] + for cTup in colTupList: + if cTup[0] < 0: + uR.append(apply_type(cTup[1], cTup[2], cTup[2])) + else: + uR.append(apply_type(cTup[1], cTup[2], row[cTup[0]])) + dList.append(uR) - tval = val - if val is None: - tval = default - if isinstance(tval, str) and (len(tval) < 1 or tval in ('.', '?')): - tval = default - if ctype == "int": - return int(str(tval)) - if ctype == "float": - return float(str(tval)) - if ctype == "str": - return str(tval) - - return tval + return dList diff --git a/wwpdb/utils/nmr/io/CifReader.py b/wwpdb/utils/nmr/io/CifReader.py index d96e24a9..5e5f33d7 100644 --- a/wwpdb/utils/nmr/io/CifReader.py +++ b/wwpdb/utils/nmr/io/CifReader.py @@ -3,40 +3,45 @@ # Date: 31-May-2010 John Westbrook # # Update: -# 06-Aug-2010 - jdw - Generalized construction of methods to apply to any category -# Add accessors for lists of dictionaries -# 12-May-2011 - rps - Added check for None when asking for category Object in __getDataList() -# 2012-10-24 RPS Updated to reflect reorganization of modules in pdbx packages -# 23-Jul-2019 my - forked original code to wwpdb.util.nmr.CifReader -# 30-Jul-2019 my - add 'range-float' as filter item type -# 05-Aug-2019 my - add 'enum' as filter item type -# 28-Jan-2020 my - add 'withStructConf' option of getPolymerSequence -# 19-Mar-2020 my - add hasItem() -# 24-Mar-2020 my - add 'identical_chain_id' in results of getPolymerSequence() -# 15-Apr-2020 my - add 'total_models' option of getPolymerSequence (DAOTHER-4060) -# 19-Apr-2020 my - add random rotation test for detection of non-superimposed models (DAOTHER-4060) -# 08-May-2020 my - make sure parse() is run only once (DAOTHER-5654) -# 20-Nov-2020 my - additional support for insertion code in getPolymerSequence() (DAOTHER-6128) -# 29-Jun-2021 my - add 'auth_chain_id', 'identical_auth_chain_id' in results of getPolymerSequence() if possible (DAOTHER-7108) -# 14-Jan-2022 my - precise RMSD calculation with domain and medoid model identification (DAOTHER-4060, 7544) -# 02-Feb-2022 my - add 'abs-int', 'abs-float', 'range-int', 'range-abs-int', 'range-abs-float' as filter item types and 'not_equal_to' range filter (NMR restraint remediation) -# 30-Mar-2022 my - add support for _atom_site.label_alt_id (DAOTHER-4060, 7544, NMR restraint remediation) -# 06-Apr-2022 my - add support for auth_comp_id (DAOTHER-7690) -# 04-Aug-2022 my - detect sequence gaps in auth_seq_id, 'gap_in_auth_seq' (NMR restraint remediation) -# 10-Feb-2023 my - add 'fetch_first_match' filter to process large assembly avoiding forced timeout (NMR restraint remediation) -# 14-Apr-2023 my - enable to use cache datablock (NMR restraint remediation) -# 19-Apr-2023 my - support multiple datablock (NMR restraint validation) -# 24-Apr-2023 my - add 'default' attribute for key items (NMR restraint validation) -# 18-Dec-2023 my - add calculate_uninstanced_coord() (DAOTHER-8945) -# 24-Jan-2024 my - add 'default-from' attribute for key/data items (D_1300043061) -# 21-Feb-2024 my - add support for discontinuous model_id (NMR restraint remediation, 2n6j) -# 07-Mar-2024 my - extract pdbx_poly_seq_scheme.auth_mon_id as alt_cmop_id to prevent sequence mismatch due to 5-letter CCD ID (DAOTHER-9158 vs D_1300043061) -# 20-Aug-2024 my - support truncated loop sequence in the model (DAOTHER-9644) -# 10-Sep-2024 my - ignore identical polymer sequence extensions within polynucleotide multiplexes (DAOTHER-9674) -# 18-Sep-2024 my - add 'starts-with-alnum' item type (DAOTHER-9694) +# 06-Aug-2010 - jdw - generalized construction of methods to apply to any category +# add accessors for lists of dictionaries +# 12-May-2011 - rps - added check for None when asking for category Object in __getDataList() +# 24-Oct-2012 - rps - updated to reflect reorganization of modules in pdbx packages +# 23-Jul-2019 - my - forked original code to wwpdb.util.nmr.CifReader +# 30-Jul-2019 - my - add 'range-float' as filter item type +# 05-Aug-2019 - my - add 'enum' as filter item type +# 28-Jan-2020 - my - add 'withStructConf' option of getPolymerSequence +# 19-Mar-2020 - my - add hasItem() +# 24-Mar-2020 - my - add 'identical_chain_id' in results of getPolymerSequence() +# 15-Apr-2020 - my - add 'total_models' option of getPolymerSequence (DAOTHER-4060) +# 19-Apr-2020 - my - add random rotation test for detection of non-superimposed models (DAOTHER-4060) +# 08-May-2020 - my - make sure parse() is run only once (DAOTHER-5654) +# 20-Nov-2020 - my - additional support for insertion code in getPolymerSequence() (DAOTHER-6128) +# 29-Jun-2021 - my - add 'auth_chain_id', 'identical_auth_chain_id' in results of getPolymerSequence() if possible (DAOTHER-7108) +# 14-Jan-2022 - my - precise RMSD calculation with domain and medoid model identification (DAOTHER-4060, 7544) +# 02-Feb-2022 - my - add 'abs-int', 'abs-float', 'range-int', 'range-abs-int', 'range-abs-float' as filter item types and 'not_equal_to' range filter (NMR restraint remediation) +# 30-Mar-2022 - my - add support for _atom_site.label_alt_id (DAOTHER-4060, 7544, NMR restraint remediation) +# 06-Apr-2022 - my - add support for auth_comp_id (DAOTHER-7690) +# 04-Aug-2022 - my - detect sequence gaps in auth_seq_id, 'gap_in_auth_seq' (NMR restraint remediation) +# 10-Feb-2023 - my - add 'fetch_first_match' filter to process large assembly avoiding forced timeout (NMR restraint remediation) +# 14-Apr-2023 - my - enable to use cache datablock (NMR restraint remediation) +# 19-Apr-2023 - my - support multiple datablock (NMR restraint validation) +# 24-Apr-2023 - my - add 'default' attribute for key items (NMR restraint validation) +# 18-Dec-2023 - my - add calculate_uninstanced_coord() (DAOTHER-8945) +# 24-Jan-2024 - my - add 'default-from' attribute for key/data items (D_1300043061) +# 21-Feb-2024 - my - add support for discontinuous model_id (NMR restraint remediation, 2n6j) +# 07-Mar-2024 - my - extract pdbx_poly_seq_scheme.auth_mon_id as alt_cmop_id to prevent sequence mismatch due to 5-letter CCD ID (DAOTHER-9158 vs D_1300043061) +# 20-Aug-2024 - my - support truncated loop sequence in the model (DAOTHER-9644) +# 10-Sep-2024 - my - ignore identical polymer sequence extensions within polynucleotide multiplexes (DAOTHER-9674) +# 18-Sep-2024 - my - add 'starts-with-alnum' item type (DAOTHER-9694) ## -""" A collection of classes for parsing CIF files +""" A collection of classes for parsing CIF files, extracting polymer sequence, and RMSD calculation. """ +__docformat__ = "restructuredtext en" +__author__ = "John Westbrook, Masashi Yokochi" +__email__ = "jwest@rcsb.rutgers.edu, yokochi@protein.osaka-u.ac.jp" +__license__ = "Creative Commons Attribution 3.0 Unported" +__version__ = "1.0.3" import sys import os @@ -62,6 +67,12 @@ reorder_hungarian, reorder_brute, reorder_distance, quaternion_rotate) +try: + from wwpdb.utils.nmr.AlignUtil import emptyValue +except ImportError: + from nmr.AlignUtil import emptyValue + + # must be one of kabsch_rmsd, quaternion_rmsd, None ROTATION_METHOD = quaternion_rmsd # must be one of reorder_hungarian, reorder_brute, reorder_distance, None @@ -85,7 +96,8 @@ def M(axis: list, theta: float) -> list: - """ Return the rotation matrix associated with counterclockwise rotation about the given axis by theta radians + """ Return the rotation matrix associated with counterclockwise rotation + about the given axis by theta radians. """ axis = np.asarray(axis) @@ -101,14 +113,15 @@ def M(axis: list, theta: float) -> list: def to_np_array(a: dict) -> list: - """ Return Numpy array of a given Cartesian coordinate in {'x': float, 'y': float, 'z': float} format + """ Return Numpy array of a given Cartesian coordinate + in {'x': float, 'y': float, 'z': float} format. """ return np.asarray([a['x'], a['y'], a['z']], dtype=float) def get_coordinates(p: list) -> [list, list]: - """ Convert list of atoms for RMSD calculation + """ Convert list of atoms for RMSD calculation. @return: a vector set of the coordinates """ @@ -128,7 +141,7 @@ def get_coordinates(p: list) -> [list, list]: def calculate_rmsd(p: list, q: list) -> float: - """ Calculate RMSD of two coordinates + """ Calculate RMSD of two coordinates. @return: RMSD value """ @@ -184,7 +197,8 @@ def calculate_rmsd(p: list, q: list) -> float: def calculate_uninstanced_coord(p_coord: list, q_coord: list, s_coord: list) -> [list, float]: - """ Calculate RMSD of two reference coordinates (p_coord, q_coord) and complement missing coordinate (s_coord). (DAOTHER-8945) + """ Calculate RMSD of two reference coordinates (p_coord, q_coord) + and complement missing coordinate (s_coord). (DAOTHER-8945) @return: complemented coordinates, RMSD value """ @@ -207,7 +221,7 @@ def calculate_uninstanced_coord(p_coord: list, q_coord: list, s_coord: list) -> class CifReader: - """ Accessor methods for parsing CIF files + """ Accessor methods for parsing CIF files, extracting polymer sequence, and RMSD calculation. """ def __init__(self, verbose: bool = True, log: IO = sys.stdout, @@ -244,7 +258,6 @@ def __init__(self, verbose: bool = True, log: IO = sys.stdout, self.__cachePath = None # preset values - self.emptyValue = (None, '', '.', '?', 'null') self.trueValue = ('true', 't', 'yes', 'y', '1') # allowed item types @@ -281,7 +294,7 @@ def __init__(self, verbose: bool = True, log: IO = sys.stdout, self.__rmsd_overlaid_exactly = 0.01 def parse(self, filePath: str, dirPath: Optional[str] = None) -> bool: - """ Parse CIF file, and set internal active datablock if possible + """ Parse CIF file, and set internal active datablock if possible. @return: True for success or False otherwise """ @@ -314,8 +327,8 @@ def parse(self, filePath: str, dirPath: Optional[str] = None) -> bool: return False def __getDataBlockFromFile(self, blockId: Optional[str] = None): - """ Worker method to read cif file and set the target datablock - If no blockId is provided return the first datablock + """ Worker method to read CIF file and set the target datablock. + If no blockId is provided return the first datablock. @return: target datablock """ @@ -361,36 +374,36 @@ def __getDataBlockFromFile(self, blockId: Optional[str] = None): return None def __setDataBlock(self, dataBlock: Optional[str] = None) -> bool: - """ Assigns the input datablock as the active internal datablock + """ Assigns the input datablock as the active internal datablock. @return: True for success or False otherwise """ - ok = False - try: if dataBlock.getType() == 'data': self.__dBlock = dataBlock - ok = True + if self.__use_cache and not os.path.exists(self.__cachePath): with open(self.__cachePath, 'wb') as ofh: pickle.dump(dataBlock, ofh) - else: - self.__dBlock = None + + return True except Exception: pass - return ok + self.__dBlock = None + + return False def getFilePath(self) -> str: - """ Return cif file path + """ Return CIF file path. """ return self.__filePath def getHashCode(self) -> str: - """ Return hash code of the cif file + """ Return hash code of the CIF file. """ if self.__hashCode is None: @@ -400,14 +413,14 @@ def getHashCode(self) -> str: return self.__hashCode def getDataBlockList(self) -> list: - """ Return whole list of datablock + """ Return whole list of datablock. """ return self.__dBlockList def getDataBlock(self, blockId: Optional[str] = None): - """ Return target datablock - Return None in case current blockId does not exist or no blockId does not match + """ Return target datablock. + Return None in case current blockId does not exist or no blockId does not match. @return: target datablock """ @@ -422,7 +435,7 @@ def getDataBlock(self, blockId: Optional[str] = None): return dBlock if self.__setDataBlock(dBlock) else None def hasCategory(self, catName: str, blockId: Optional[str] = None) -> bool: - """ Return whether a given category exists + """ Return whether a given category exists. """ if blockId is not None and self.__dBlock is not None and self.__dBlock.getName() != blockId: @@ -434,7 +447,7 @@ def hasCategory(self, catName: str, blockId: Optional[str] = None) -> bool: return catName in self.__dBlock.getObjNameList() def hasItem(self, catName: str, itName: str, blockId: Optional[str] = None) -> bool: - """ Return whether a given item exists in a category + """ Return whether a given item exists in a category. """ if blockId is not None and self.__dBlock is not None and self.__dBlock.getName() != blockId: @@ -443,7 +456,6 @@ def hasItem(self, catName: str, itName: str, blockId: Optional[str] = None) -> b if self.__dBlock is None: return False - # get category object catObj = self.__dBlock.getObj(catName) if catObj is None: @@ -454,7 +466,7 @@ def hasItem(self, catName: str, itName: str, blockId: Optional[str] = None) -> b return itName in [name[len_catName:] for name in catObj.getItemNameList()] def getItemTags(self, catName: str, blockId: Optional[str] = None) -> List[str]: - """ Return item tag names of a given category + """ Return item tag names of a given category. """ if blockId is not None and self.__dBlock is not None and self.__dBlock.getName() != blockId: @@ -463,7 +475,6 @@ def getItemTags(self, catName: str, blockId: Optional[str] = None) -> List[str]: if self.__dBlock is None: return [] - # get category object catObj = self.__dBlock.getObj(catName) if catObj is None: @@ -474,7 +485,7 @@ def getItemTags(self, catName: str, blockId: Optional[str] = None) -> List[str]: return [name[len_catName:] for name in catObj.getItemNameList()] def getRowLength(self, catName: str, blockId: Optional[str] = None) -> int: - """ Return length of rows of a given category + """ Return length of rows of a given category. """ if blockId is not None and self.__dBlock is not None and self.__dBlock.getName() != blockId: @@ -483,7 +494,6 @@ def getRowLength(self, catName: str, blockId: Optional[str] = None) -> int: if self.__dBlock is None: return 0 - # get category object catObj = self.__dBlock.getObj(catName) if catObj is not None: @@ -492,7 +502,7 @@ def getRowLength(self, catName: str, blockId: Optional[str] = None) -> int: return 0 def getRowList(self, catName: str, blockId: Optional[str] = None) -> List[list]: - """ Return length of rows of a given category + """ Return length of rows of a given category. """ if blockId is not None and self.__dBlock is not None and self.__dBlock.getName() != blockId: @@ -501,7 +511,6 @@ def getRowList(self, catName: str, blockId: Optional[str] = None) -> List[list]: if self.__dBlock is None: return [] - # get category object catObj = self.__dBlock.getObj(catName) if catObj is not None: @@ -510,43 +519,36 @@ def getRowList(self, catName: str, blockId: Optional[str] = None) -> List[list]: return [] def getDictList(self, catName: str, blockId: Optional[str] = None) -> List[dict]: - """ Return a list of dictionaries of a given category + """ Return a list of dictionaries of a given category. """ - dList = [] - if blockId is not None and self.__dBlock is not None and self.__dBlock.getName() != blockId: self.__setDataBlock(self.getDataBlock(blockId)) if self.__dBlock is None: - return dList + return [] - # get category object catObj = self.__dBlock.getObj(catName) - if catObj is not None: - len_catName = len(catName) + 2 + if catObj is None: + return [] - # get column name index - itDict = {} - itNameList = catObj.getItemNameList() - for idxIt, itName in enumerate(itNameList): - itDict[itName[len_catName:]] = idxIt + len_catName = len(catName) + 2 - # get row list - rowList = catObj.getRowList() + itDict = {} + for idxIt, itName in enumerate(catObj.getItemNameList()): + itDict[itName[len_catName:]] = idxIt - for row in rowList: - tD = {} - for k, v in itDict.items(): - tD[k] = row[v] - dList.append(tD) + dList = [] + for row in catObj.getRowList(): + tD = {k: row[v] for k, v in itDict.items()} + dList.append(tD) return dList def getDictListWithFilter(self, catName: str, dataItems: List[dict], filterItems: Optional[List[dict]] = None, blockId: Optional[str] = None) -> List[dict]: - """ Return a list of dictionaries of a given category with filter + """ Return a list of dictionaries of a given category with filter. """ dataNames = [d['name'] for d in dataItems] @@ -563,167 +565,162 @@ def getDictListWithFilter(self, catName: str, dataItems: List[dict], filterItems if f['type'] not in self.itemTypes: raise TypeError(f"Type {f['type']} of filter item {f['name']} must be one of {self.itemTypes}.") - dList = [] - if blockId is not None and self.__dBlock is not None and self.__dBlock.getName() != blockId: self.__setDataBlock(self.getDataBlock(blockId)) if self.__dBlock is None: - return dList + return [] - # get category object catObj = self.__dBlock.getObj(catName) - if catObj is not None: - len_catName = len(catName) + 2 + if catObj is None: + return [] - # get column name index - colDict, fcolDict, fetchDict = {}, {}, {} # 'fetch_first_match': True + len_catName = len(catName) + 2 - itNameList = [name[len_catName:] for name in catObj.getItemNameList()] + # get column name index + colDict, fcolDict, fetchDict = {}, {}, {} # 'fetch_first_match': True - for idxIt, itName in enumerate(itNameList): - if itName in dataNames: - colDict[itName] = idxIt - if filterNames is not None and itName in filterNames: - fcolDict[itName] = idxIt + itNameList = [name[len_catName:] for name in catObj.getItemNameList()] - if set(dataNames) & set(itNameList) != set(dataNames): - raise LookupError(f"Missing one of data items {dataNames}.") + for idxIt, itName in enumerate(itNameList): + if itName in dataNames: + colDict[itName] = idxIt + if filterNames is not None and itName in filterNames: + fcolDict[itName] = idxIt - if filterItems is not None and set(filterNames) & set(itNameList) != set(filterNames): - raise LookupError(f"Missing one of filter items {filterNames}.") + if set(dataNames) & set(itNameList) != set(dataNames): + raise LookupError(f"Missing one of data items {dataNames}.") - # get row list - rowList = catObj.getRowList() + if filterItems is not None and set(filterNames) & set(itNameList) != set(filterNames): + raise LookupError(f"Missing one of filter items {filterNames}.") - abort = False + abort = False - for row in rowList: - keep = True - if filterItems is not None: - for filterItem in filterItems: - name = filterItem['name'] - val = row[fcolDict[name]] - if val in self.emptyValue: - if 'value' in filterItem and filterItem['value'] not in self.emptyValue: - keep = False - break - else: - filterItemType = filterItem['type'] - if filterItemType in ('str', 'enum'): - pass - elif filterItemType == 'starts-with-alnum': - if not val[0].isalnum(): - keep = False - break - elif filterItemType == 'bool': - val = val.lower() in self.trueValue - elif filterItemType == 'int': - try: - val = int(val) - except ValueError: - keep = False - break - elif filterItemType == 'float': - try: - val = float(val) - except ValueError: - keep = False - break - elif filterItemType in ('abs-int', 'range-abs-int'): - try: - val = abs(int(val)) - except ValueError: - keep = False - break - else: # 'range-float', 'range-abs-float' - try: - val = abs(float(val)) - except ValueError: - keep = False - break - if filterItemType in ('range-int', 'range-abs-int', 'range-float', 'range-abs-float'): - _range = filterItem['range'] - if ('min_exclusive' in _range and val <= _range['min_exclusive'])\ - or ('min_inclusive' in _range and val < _range['min_inclusive'])\ - or ('max_inclusive' in _range and val > _range['max_inclusive'])\ - or ('max_exclusive' in _range and val >= _range['max_exclusive'])\ - or ('not_equal_to' in _range and val == _range['not_equal_to']): - keep = False - break - elif filterItemType == 'enum': - if val not in filterItem['enum']: - keep = False - break - if 'fetch_first_match' in filterItem and filterItem['fetch_first_match']: - if name not in fetchDict: - fetchDict[name] = val - elif val != fetchDict[name]: - keep = False - abort = True - break - else: - if val != filterItem['value']: - keep = False - break - if 'fetch_first_match' in filterItem and filterItem['fetch_first_match']: - if name not in fetchDict: - fetchDict[name] = val - elif val != fetchDict[name]: - keep = False - abort = True - break - - if keep: - tD = {} - for dataItem in dataItems: - val = row[colDict[dataItem['name']]] - if val in self.emptyValue: - if 'default-from' in dataItem and dataItem['default-from'] in colDict: - val = row[colDict[dataItem['default-from']]] - else: - val = dataItem.get('default') - dataItemType = dataItem['type'] - if dataItemType in ('str', 'enum'): + dList = [] + for row in catObj.getRowList(): + keep = True + if filterItems is not None: + for filterItem in filterItems: + name = filterItem['name'] + val = row[fcolDict[name]] + if val in emptyValue: + if 'value' in filterItem and filterItem['value'] not in emptyValue: + keep = False + break + else: + filterItemType = filterItem['type'] + if filterItemType in ('str', 'enum'): pass - elif dataItemType == 'starts-with-alnum': + elif filterItemType == 'starts-with-alnum': if not val[0].isalnum(): - val = None - elif dataItemType == 'bool': + keep = False + break + elif filterItemType == 'bool': val = val.lower() in self.trueValue - elif dataItemType == 'int' and val is not None: + elif filterItemType == 'int': try: val = int(val) except ValueError: - val = None - elif val is not None: - val = float(val) - if 'alt_name' in dataItem: - tD[dataItem['alt_name']] = val + keep = False + break + elif filterItemType == 'float': + try: + val = float(val) + except ValueError: + keep = False + break + elif filterItemType in ('abs-int', 'range-abs-int'): + try: + val = abs(int(val)) + except ValueError: + keep = False + break + else: # 'range-float', 'range-abs-float' + try: + val = abs(float(val)) + except ValueError: + keep = False + break + if filterItemType in ('range-int', 'range-abs-int', 'range-float', 'range-abs-float'): + _range = filterItem['range'] + if ('min_exclusive' in _range and val <= _range['min_exclusive'])\ + or ('min_inclusive' in _range and val < _range['min_inclusive'])\ + or ('max_inclusive' in _range and val > _range['max_inclusive'])\ + or ('max_exclusive' in _range and val >= _range['max_exclusive'])\ + or ('not_equal_to' in _range and val == _range['not_equal_to']): + keep = False + break + elif filterItemType == 'enum': + if val not in filterItem['enum']: + keep = False + break + if 'fetch_first_match' in filterItem and filterItem['fetch_first_match']: + if name not in fetchDict: + fetchDict[name] = val + elif val != fetchDict[name]: + keep = False + abort = True + break else: - tD[dataItem['name']] = val - dList.append(tD) + if val != filterItem['value']: + keep = False + break + if 'fetch_first_match' in filterItem and filterItem['fetch_first_match']: + if name not in fetchDict: + fetchDict[name] = val + elif val != fetchDict[name]: + keep = False + abort = True + break - elif abort: - break + if keep: + tD = {} + for dataItem in dataItems: + val = row[colDict[dataItem['name']]] + if val in emptyValue: + if 'default-from' in dataItem and dataItem['default-from'] in colDict: + val = row[colDict[dataItem['default-from']]] + else: + val = dataItem.get('default') + dataItemType = dataItem['type'] + if dataItemType in ('str', 'enum'): + pass + elif dataItemType == 'starts-with-alnum': + if not val[0].isalnum(): + val = None + elif dataItemType == 'bool': + val = val.lower() in self.trueValue + elif dataItemType == 'int' and val is not None: + try: + val = int(val) + except ValueError: + val = None + elif val is not None: + val = float(val) + if 'alt_name' in dataItem: + tD[dataItem['alt_name']] = val + else: + tD[dataItem['name']] = val + dList.append(tD) + + elif abort: + break return dList def getPolymerSequence(self, catName: str, keyItems: List[dict], withStructConf: bool = False, withRmsd: bool = False, alias: bool = False, totalModels: int = 1, effModelIds: Optional[List[int]] = None, repAltId: str = 'A') -> List[dict]: - """ Extracts sequence from a given loop in a CIF file + """ Extract sequence from a given loop in a CIF file. """ keyNames = [k['name'] for k in keyItems] lenKeyItems = len(keyItems) - asm = [] # assembly of a loop - if self.__dBlock is None: - return asm + return [] repModelId = effModelIds[0] if effModelIds is not None else 1 @@ -749,248 +746,325 @@ def getPolymerSequence(self, catName: str, keyItems: List[dict], del mis['test_auth_chain_id'] misPolyLink.append(mis) - # get category object catObj = self.__dBlock.getObj(catName) - if catObj is not None: - len_catName = len(catName) + 2 - - # get column name index - itDict, altDict = {}, {} + if catObj is None: + return [] - itNameList = [name[len_catName:] for name in catObj.getItemNameList()] + len_catName = len(catName) + 2 - for idxIt, itName in enumerate(itNameList): - itDict[itName] = idxIt - if itName in keyNames: - altDict[next(k['alt_name'] if 'alt_name' in k else itName for k in keyItems if k['name'] == itName)] = idxIt + # get column name index + itDict, altDict = {}, {} + + itNameList = [name[len_catName:] for name in catObj.getItemNameList()] + + for idxIt, itName in enumerate(itNameList): + itDict[itName] = idxIt + if itName in keyNames: + altDict[next(k['alt_name'] if 'alt_name' in k else itName for k in keyItems if k['name'] == itName)] = idxIt + + if set(keyNames) & set(itDict.keys()) != set(keyNames): + raise LookupError(f"Missing one of data items {keyNames}.") + + # get row list + rowList = catObj.getRowList() + _rowList = None + unmapSeqIds, unmapAuthSeqIds, mapAuthSeqIds = {}, {}, {} + chainIdWoDefault = set() + + entityPoly = self.getDictList('entity_poly') + + # DAOTHER-9674 + for row in rowList: + for j in range(lenKeyItems): + itCol = itDict[keyNames[j]] + if itCol < len(row) and row[itCol] in emptyValue: + if 'default-from' in keyItems[j] and keyItems[j]['default-from'] in keyNames: + if catName == 'pdbx_poly_seq_scheme': + if 'alt_name' in keyItems[j] and keyItems[j]['alt_name'] == 'auth_comp_id': + c = row[altDict['chain_id']] + etype = next((e['type'] for e in entityPoly if 'pdbx_strand_id' in e and c in e['pdbx_strand_id'].split(',')), None) + if etype is not None and 'polypeptide' not in etype: + if c not in unmapSeqIds: + unmapSeqIds[c], unmapAuthSeqIds[c] = [], [] + compId = row[altDict['comp_id']] + if compId in emptyValue or not compId[0].isalnum(): # DAOTHER-9694 + continue + unmapSeqIds[c].append((row[altDict['seq_id']], compId)) + unmapAuthSeqIds[c].append(row[altDict['auth_seq_id']]) + if _rowList is None: + _rowList = copy.deepcopy(rowList) + continue + if 'default' not in keyItems[j] or keyItems[j]['default'] not in emptyValue: + raise ValueError(f"{keyNames[j]} must not be empty.") - if set(keyNames) & set(itDict.keys()) != set(keyNames): - raise LookupError(f"Missing one of data items {keyNames}.") + # DAOTHER-9674 + if catName == 'pdbx_poly_seq_scheme' and 'auth_comp_id' in altDict: + for row in rowList: + if row[altDict['auth_comp_id']] not in emptyValue: + c = row[altDict['chain_id']] + etype = next((e['type'] for e in entityPoly if 'pdbx_strand_id' in e and c in e['pdbx_strand_id'].split(',')), None) + if etype is not None and 'polypeptide' not in etype: + if c not in mapAuthSeqIds: + mapAuthSeqIds[c] = [] + compId = row[altDict['comp_id']] + if compId in emptyValue or not compId[0].isalnum(): # DAOTHER-9694 + continue + mapAuthSeqIds[c].append(row[altDict['auth_seq_id']]) + + # DAOTHER-9674 + if len(unmapSeqIds) > 1: + for (i, j) in itertools.combinations(unmapSeqIds.keys(), 2): + if (i not in chainIdWoDefault or j not in chainIdWoDefault)\ + and unmapSeqIds[i] == unmapSeqIds[j]\ + and (len(unmapAuthSeqIds[i]) % len(mapAuthSeqIds[i]) == 0 + or len(mapAuthSeqIds[i]) % len(unmapAuthSeqIds[i]) == 0): + chainIdWoDefault.add(i) + chainIdWoDefault.add(j) + + if len(chainIdWoDefault) > 1: + rowList = [] + + for row in _rowList: + skip = False + for j in range(lenKeyItems): + itCol = itDict[keyNames[j]] + if itCol < len(row) and row[itCol] in emptyValue: + if 'default-from' in keyItems[j] and keyItems[j]['default-from'] in keyNames: + if catName == 'pdbx_poly_seq_scheme': + if 'alt_name' in keyItems[j] and keyItems[j]['alt_name'] == 'auth_comp_id': + c = row[altDict['chain_id']] + if c in chainIdWoDefault: + skip = True + break + row[itCol] = row[itDict[keyItems[j]['default-from']]] + if not skip: + rowList.append(row) + + for row in rowList: + for j in range(lenKeyItems): + itCol = itDict[keyNames[j]] + if itCol < len(row) and row[itCol] in emptyValue: + if 'default-from' in keyItems[j] and keyItems[j]['default-from'] in keyNames: + if catName == 'pdbx_poly_seq_scheme': + if 'alt_name' in keyItems[j] and keyItems[j]['alt_name'] == 'auth_comp_id': + c = row[altDict['chain_id']] + row[itCol] = row[itDict[keyItems[j]['default-from']]] + continue - # get row list - rowList = catObj.getRowList() - _rowList = None - unmapSeqIds, unmapAuthSeqIds, mapAuthSeqIds = {}, {}, {} - chainIdWoDefault = set() + compDict, seqDict, insCodeDict, authSeqDict, labelSeqDict, authChainDict =\ + {}, {}, {}, {}, {}, {} - entityPoly = self.getDictList('entity_poly') + chain_id_col = altDict['chain_id'] + seq_id_col = altDict['seq_id'] + comp_id_col = altDict['comp_id'] + ins_code_col = -1 if 'ins_code' not in altDict else altDict['ins_code'] + label_seq_col = seq_id_col if 'label_seq_id' not in altDict else altDict['label_seq_id'] + auth_chain_id_col = -1 if 'auth_chain_id' not in altDict else altDict['auth_chain_id'] + auth_seq_id_col = -1 if 'auth_seq_id' not in altDict else altDict['auth_seq_id'] + auth_comp_id_col = -1 if 'auth_comp_id' not in altDict else altDict['auth_comp_id'] + alt_comp_id_col = -1 if 'alt_comp_id' not in altDict else altDict['alt_comp_id'] - # DAOTHER-9674 - for row in rowList: - for j in range(lenKeyItems): - itCol = itDict[keyNames[j]] - if itCol < len(row) and row[itCol] in self.emptyValue: - if 'default-from' in keyItems[j] and keyItems[j]['default-from'] in keyNames: - if catName == 'pdbx_poly_seq_scheme': - if 'alt_name' in keyItems[j] and keyItems[j]['alt_name'] == 'auth_comp_id': - c = row[altDict['chain_id']] - etype = next((e['type'] for e in entityPoly if 'pdbx_strand_id' in e and c in e['pdbx_strand_id'].split(',')), None) - if etype is not None and 'polypeptide' not in etype: - if c not in unmapSeqIds: - unmapSeqIds[c], unmapAuthSeqIds[c] = [], [] - compId = row[altDict['comp_id']] - if compId in self.emptyValue or not compId[0].isalnum(): # DAOTHER-9694 - continue - unmapSeqIds[c].append((row[altDict['seq_id']], compId)) - unmapAuthSeqIds[c].append(row[altDict['auth_seq_id']]) - if _rowList is None: - _rowList = copy.deepcopy(rowList) - continue - if 'default' not in keyItems[j] or keyItems[j]['default'] not in self.emptyValue: - raise ValueError(f"{keyNames[j]} must not be empty.") - - # DAOTHER-9674 - if catName == 'pdbx_poly_seq_scheme' and 'auth_comp_id' in altDict: - for row in rowList: - if row[altDict['auth_comp_id']] not in self.emptyValue: - c = row[altDict['chain_id']] - etype = next((e['type'] for e in entityPoly if 'pdbx_strand_id' in e and c in e['pdbx_strand_id'].split(',')), None) - if etype is not None and 'polypeptide' not in etype: - if c not in mapAuthSeqIds: - mapAuthSeqIds[c] = [] - compId = row[altDict['comp_id']] - if compId in self.emptyValue or not compId[0].isalnum(): # DAOTHER-9694 - continue - mapAuthSeqIds[c].append(row[altDict['auth_seq_id']]) - - # DAOTHER-9674 - if len(unmapSeqIds) > 1: - for (i, j) in itertools.combinations(unmapSeqIds.keys(), 2): - if (i not in chainIdWoDefault or j not in chainIdWoDefault)\ - and unmapSeqIds[i] == unmapSeqIds[j]\ - and (len(unmapAuthSeqIds[i]) % len(mapAuthSeqIds[i]) == 0 - or len(mapAuthSeqIds[i]) % len(unmapAuthSeqIds[i]) == 0): - chainIdWoDefault.add(i) - chainIdWoDefault.add(j) - - if len(chainIdWoDefault) > 1: - rowList = [] - - for row in _rowList: - skip = False - for j in range(lenKeyItems): - itCol = itDict[keyNames[j]] - if itCol < len(row) and row[itCol] in self.emptyValue: - if 'default-from' in keyItems[j] and keyItems[j]['default-from'] in keyNames: - if catName == 'pdbx_poly_seq_scheme': - if 'alt_name' in keyItems[j] and keyItems[j]['alt_name'] == 'auth_comp_id': - c = row[altDict['chain_id']] - if c in chainIdWoDefault: - skip = True - break - row[itCol] = row[itDict[keyItems[j]['default-from']]] - if not skip: - rowList.append(row) + authScheme = auth_seq_id_col != -1 - for row in rowList: - for j in range(lenKeyItems): - itCol = itDict[keyNames[j]] - if itCol < len(row) and row[itCol] in self.emptyValue: - if 'default-from' in keyItems[j] and keyItems[j]['default-from'] in keyNames: - if catName == 'pdbx_poly_seq_scheme': - if 'alt_name' in keyItems[j] and keyItems[j]['alt_name'] == 'auth_comp_id': - c = row[altDict['chain_id']] - row[itCol] = row[itDict[keyItems[j]['default-from']]] - continue + chainIds = sorted(set(row[chain_id_col] for row in rowList), key=lambda x: (len(x), x)) - compDict, seqDict, insCodeDict, authSeqDict, labelSeqDict, authChainDict =\ - {}, {}, {}, {}, {}, {} + if ins_code_col == -1: + if catName == 'pdbx_nonpoly_scheme': + sortedSeq = sorted(set((row[chain_id_col], int(row[seq_id_col]), row[comp_id_col]) for row in rowList), + key=itemgetter(1)) + else: + sortedSeq = sorted(set((row[chain_id_col], int(row[seq_id_col]), row[comp_id_col]) for row in rowList), + key=lambda x: (len(x[0]), x[0], x[1])) - chain_id_col = altDict['chain_id'] - seq_id_col = altDict['seq_id'] - comp_id_col = altDict['comp_id'] - ins_code_col = -1 if 'ins_code' not in altDict else altDict['ins_code'] - label_seq_col = seq_id_col if 'label_seq_id' not in altDict else altDict['label_seq_id'] - auth_chain_id_col = -1 if 'auth_chain_id' not in altDict else altDict['auth_chain_id'] - auth_seq_id_col = -1 if 'auth_seq_id' not in altDict else altDict['auth_seq_id'] - auth_comp_id_col = -1 if 'auth_comp_id' not in altDict else altDict['auth_comp_id'] - alt_comp_id_col = -1 if 'alt_comp_id' not in altDict else altDict['alt_comp_id'] + keyDict = {(row[chain_id_col], int(row[seq_id_col])): row[comp_id_col] for row in rowList} - authScheme = auth_seq_id_col != -1 + for row in rowList: + key = (row[chain_id_col], int(row[seq_id_col])) + if keyDict[key] != row[comp_id_col]: + raise KeyError(f"Sequence must be unique. {itNameList[chain_id_col]} {row[chain_id_col]}, " + f"{itNameList[seq_id_col]} {row[seq_id_col]}, " + f"{itNameList[comp_id_col]} {row[comp_id_col]} vs {keyDict[key]}.") - chainIds = sorted(set(row[chain_id_col] for row in rowList), key=lambda x: (len(x), x)) + for c in chainIds: + compDict[c] = [x[2] for x in sortedSeq if x[0] == c] + seqDict[c] = [x[1] for x in sortedSeq if x[0] == c] - if ins_code_col == -1: - if catName == 'pdbx_nonpoly_scheme': - sortedSeq = sorted(set((row[chain_id_col], int(row[seq_id_col]), row[comp_id_col]) for row in rowList), - key=itemgetter(1)) + else: + if catName == 'pdbx_nonpoly_scheme': + sortedSeq = sorted(set((row[chain_id_col], int(row[seq_id_col]), row[ins_code_col], row[label_seq_col], row[comp_id_col]) for row in rowList), + key=itemgetter(1)) + else: + if all(row[label_seq_col].isdigit() for row in rowList): + sortedSeq = sorted(set((row[chain_id_col], int(row[seq_id_col]), row[ins_code_col], int(row[label_seq_col]), row[comp_id_col]) for row in rowList), + key=lambda x: (len(x[0]), x[0], x[3])) else: - sortedSeq = sorted(set((row[chain_id_col], int(row[seq_id_col]), row[comp_id_col]) for row in rowList), + sortedSeq = sorted(set((row[chain_id_col], int(row[seq_id_col]), row[ins_code_col], row[label_seq_col], row[comp_id_col]) for row in rowList), key=lambda x: (len(x[0]), x[0], x[1])) - keyDict = {(row[chain_id_col], int(row[seq_id_col])): row[comp_id_col] for row in rowList} + keyDict = {(row[chain_id_col], int(row[seq_id_col]), row[ins_code_col], row[label_seq_col]): row[comp_id_col] for row in rowList} + + for row in rowList: + key = (row[chain_id_col], int(row[seq_id_col]), row[ins_code_col], row[label_seq_col]) + if keyDict[key] != row[comp_id_col]: + raise KeyError(f"Sequence must be unique. {itNameList[chain_id_col]} {row[chain_id_col]}, " + f"{itNameList[seq_id_col]} {row[seq_id_col]}, " + f"{itNameList[ins_code_col]} {row[ins_code_col]}, " + f"{itNameList[label_seq_col]} {row[label_seq_col]}, " + f"{itNameList[comp_id_col]} {row[comp_id_col]} vs {keyDict[key]}.") + + for c in chainIds: + compDict[c] = [x[4] for x in sortedSeq if x[0] == c] + seqDict[c] = [x[1] for x in sortedSeq if x[0] == c] + insCodeDict[c] = [x[2] for x in sortedSeq if x[0] == c] + labelSeqDict[c] = [x[3] for x in sortedSeq if x[0] == c] + + chainIds = [] + for x in sortedSeq: + if x[0] not in chainIds: + chainIds.append(x[0]) + + if auth_chain_id_col != -1: + for row in rowList: + c = row[chain_id_col] + if c not in authChainDict: + authChainDict[c] = row[auth_chain_id_col] + + if authScheme: + for c in chainIds: + authSeqDict[c] = [] + for s in seqDict[c]: + row = next((row for row in rowList if row[chain_id_col] == c and int(row[seq_id_col]) == s), None) + if row is not None: + if row[auth_seq_id_col] not in emptyValue: + try: + _s = int(row[auth_seq_id_col]) + except ValueError: + _s = None + authSeqDict[c].append(_s) + else: + authSeqDict[c].append(None) + + largeAssembly = catName == 'pdbx_poly_seq_scheme' and len(chainIds) > LEN_MAJOR_ASYM_ID - for row in rowList: - key = (row[chain_id_col], int(row[seq_id_col])) - if keyDict[key] != row[comp_id_col]: - raise KeyError(f"Sequence must be unique. {itNameList[chain_id_col]} {row[chain_id_col]}, " - f"{itNameList[seq_id_col]} {row[seq_id_col]}, " - f"{itNameList[comp_id_col]} {row[comp_id_col]} vs {keyDict[key]}.") + caRmsd = caWellDefinedRegion = None + polyPeptideChains, polyPeptideLengths = [], [] - for c in chainIds: - compDict[c] = [x[2] for x in sortedSeq if x[0] == c] - seqDict[c] = [x[1] for x in sortedSeq if x[0] == c] + _seqDict = copy.deepcopy(seqDict) - else: - if catName == 'pdbx_nonpoly_scheme': - sortedSeq = sorted(set((row[chain_id_col], int(row[seq_id_col]), row[ins_code_col], row[label_seq_col], row[comp_id_col]) for row in rowList), - key=itemgetter(1)) - else: - if all(row[label_seq_col].isdigit() for row in rowList): - sortedSeq = sorted(set((row[chain_id_col], int(row[seq_id_col]), row[ins_code_col], int(row[label_seq_col]), row[comp_id_col]) for row in rowList), - key=lambda x: (len(x[0]), x[0], x[3])) - else: - sortedSeq = sorted(set((row[chain_id_col], int(row[seq_id_col]), row[ins_code_col], row[label_seq_col], row[comp_id_col]) for row in rowList), - key=lambda x: (len(x[0]), x[0], x[1])) - - keyDict = {(row[chain_id_col], int(row[seq_id_col]), row[ins_code_col], row[label_seq_col]): row[comp_id_col] for row in rowList} - - for row in rowList: - key = (row[chain_id_col], int(row[seq_id_col]), row[ins_code_col], row[label_seq_col]) - if keyDict[key] != row[comp_id_col]: - raise KeyError(f"Sequence must be unique. {itNameList[chain_id_col]} {row[chain_id_col]}, " - f"{itNameList[seq_id_col]} {row[seq_id_col]}, " - f"{itNameList[ins_code_col]} {row[ins_code_col]}, " - f"{itNameList[label_seq_col]} {row[label_seq_col]}, " - f"{itNameList[comp_id_col]} {row[comp_id_col]} vs {keyDict[key]}.") - - for c in chainIds: - compDict[c] = [x[4] for x in sortedSeq if x[0] == c] - seqDict[c] = [x[1] for x in sortedSeq if x[0] == c] - insCodeDict[c] = [x[2] for x in sortedSeq if x[0] == c] - labelSeqDict[c] = [x[3] for x in sortedSeq if x[0] == c] - - chainIds = [] - for x in sortedSeq: - if x[0] not in chainIds: - chainIds.append(x[0]) + asm = [] # assembly of a loop + for i, c in enumerate(chainIds): + ent = {} # entity + ident = False + if len(asm) > 0 and largeAssembly: + _ent = asm[-1] + if 'identical_chain_id' in _ent and c in _ent['identical_chain_id']: + ident = True + + if ident: + ent = copy.copy(asm[-1]) + + ent['chain_id'] = ent['auth_chain_id'] = c if auth_chain_id_col != -1: - for row in rowList: - c = row[chain_id_col] - if c not in authChainDict: - authChainDict[c] = row[auth_chain_id_col] - - if authScheme: - for c in chainIds: - authSeqDict[c] = [] - for s in seqDict[c]: - row = next((row for row in rowList if row[chain_id_col] == c and int(row[seq_id_col]) == s), None) - if row is not None: - if row[auth_seq_id_col] not in self.emptyValue: - try: - _s = int(row[auth_seq_id_col]) - except ValueError: - _s = None - authSeqDict[c].append(_s) - else: - authSeqDict[c].append(None) + ent['auth_chain_id'] = authChainDict[c] - largeAssembly = catName == 'pdbx_poly_seq_scheme' and len(chainIds) > LEN_MAJOR_ASYM_ID + if not ident: - caRmsd = caWellDefinedRegion = None - polyPeptideChains, polyPeptideLengths = [], [] + etype = next((e['type'] for e in entityPoly if 'pdbx_strand_id' in e and c in e['pdbx_strand_id'].split(',')), None) - _seqDict = copy.deepcopy(seqDict) + # DAOTHER-9644: support for truncated loop in the model + if withRmsd and catName == 'pdbx_poly_seq_scheme' and len(authSeqDict) > 0: # avoid interference of ParserListenerUtils.coordAssemblyChecker() - for i, c in enumerate(chainIds): - ent = {} # entity + if len(misPolyLink) > 0: - ident = False - if len(asm) > 0 and largeAssembly: - _ent = asm[-1] - if 'identical_chain_id' in _ent and c in _ent['identical_chain_id']: - ident = True + for mis in misPolyLink: - if ident: - ent = copy.copy(asm[-1]) + if mis['auth_chain_id'] != (authChainDict[c] if auth_chain_id_col != -1 else c): + continue - ent['chain_id'] = ent['auth_chain_id'] = c - if auth_chain_id_col != -1: - ent['auth_chain_id'] = authChainDict[c] + auth_seq_id_1 = mis['auth_seq_id_1'] + auth_seq_id_2 = mis['auth_seq_id_2'] - if not ident: + if auth_seq_id_1 in authSeqDict[c]\ + and auth_seq_id_2 in authSeqDict[c]\ + and auth_seq_id_1 < auth_seq_id_2: - etype = next((e['type'] for e in entityPoly if 'pdbx_strand_id' in e and c in e['pdbx_strand_id'].split(',')), None) + for auth_seq_id_ in range(auth_seq_id_1 + 1, auth_seq_id_2): + auth_seq_id_list = list(filter(None, authSeqDict[c])) - # DAOTHER-9644: support for truncated loop in the model - if withRmsd and catName == 'pdbx_poly_seq_scheme' and len(authSeqDict) > 0: # avoid interference of ParserListenerUtils.coordAssemblyChecker() + if auth_seq_id_ < min(auth_seq_id_list): + pos = 0 + elif auth_seq_id_ > max(auth_seq_id_list): + pos = len(auth_seq_id_list) + else: + for idx, _auth_seq_id_ in enumerate(auth_seq_id_list): + if _auth_seq_id_ < auth_seq_id_: + continue + pos = idx + break - if len(misPolyLink) > 0: + authSeqDict[c].insert(pos, auth_seq_id_) + compDict[c].insert(pos, '.') # DAOTHER-9644: comp_id must be specified at Macromelucule page + if ins_code_col != -1: + insCodeDict[c].insert(pos, '.') - for mis in misPolyLink: + # DAOTHER-9644: insert label_seq_id for truncated loop in the coordinates + seqDict[c] = labelSeqDict[c] = list(range(1, len(authSeqDict[c]) + 1)) - if mis['auth_chain_id'] != (authChainDict[c] if auth_chain_id_col != -1 else c): - continue + # DAOTHER-9644: simulate pdbx_poly_seq_scheme category + elif etype is not None: - auth_seq_id_1 = mis['auth_seq_id_1'] - auth_seq_id_2 = mis['auth_seq_id_2'] + if 'polypeptide' in etype: + BEG_ATOM = "C" + END_ATOM = "N" + else: + BEG_ATOM = "O3'" + END_ATOM = "P" + + has_ins_code = False + + for p in range(len(authSeqDict[c]) - 1): + s_p = authSeqDict[c][p] + s_q = authSeqDict[c][p + 1] + + if None in (s_p, s_q): + continue - if auth_seq_id_1 in authSeqDict[c]\ - and auth_seq_id_2 in authSeqDict[c]\ - and auth_seq_id_1 < auth_seq_id_2: + if s_p == s_q: + has_ins_code = True + continue + + if s_p + 1 != s_q: + if has_ins_code: + has_ins_code = False + continue + + auth_seq_id_1 = s_p + auth_seq_id_2 = s_q + + _beg =\ + self.getDictListWithFilter('atom_site', + CARTN_DATA_ITEMS, + [{'name': 'label_asym_id', 'type': 'str', 'value': c}, + {'name': 'auth_seq_id', 'type': 'int', 'value': auth_seq_id_1}, + {'name': 'label_atom_id', 'type': 'starts-with-alnum', 'value': BEG_ATOM}, + {'name': 'pdbx_PDB_model_num', 'type': 'int', 'value': repModelId}, + {'name': 'label_alt_id', 'type': 'enum', 'enum': (repAltId,)} + ]) + + _end =\ + self.getDictListWithFilter('atom_site', + CARTN_DATA_ITEMS, + [{'name': 'label_asym_id', 'type': 'str', 'value': c}, + {'name': 'auth_seq_id', 'type': 'int', 'value': auth_seq_id_2}, + {'name': 'label_atom_id', 'type': 'starts-with-alnum', 'value': END_ATOM}, + {'name': 'pdbx_PDB_model_num', 'type': 'int', 'value': repModelId}, + {'name': 'label_alt_id', 'type': 'enum', 'enum': (repAltId,)} + ]) + + if len(_beg) == 1 and len(_end) == 1 and np.linalg.norm(to_np_array(_beg[0]) - to_np_array(_end[0])) > 5.0: for auth_seq_id_ in range(auth_seq_id_1 + 1, auth_seq_id_2): auth_seq_id_list = list(filter(None, authSeqDict[c])) @@ -1013,300 +1087,148 @@ def getPolymerSequence(self, catName: str, keyItems: List[dict], # DAOTHER-9644: insert label_seq_id for truncated loop in the coordinates seqDict[c] = labelSeqDict[c] = list(range(1, len(authSeqDict[c]) + 1)) - # DAOTHER-9644: simulate pdbx_poly_seq_scheme category - elif etype is not None: - - if 'polypeptide' in etype: - BEG_ATOM = "C" - END_ATOM = "N" - else: - BEG_ATOM = "O3'" - END_ATOM = "P" - - has_ins_code = False - - for p in range(len(authSeqDict[c]) - 1): - s_p = authSeqDict[c][p] - s_q = authSeqDict[c][p + 1] - - if s_p is None or s_q is None: - continue - - if s_p == s_q: - has_ins_code = True - continue - - if s_p + 1 != s_q: - - if has_ins_code: - has_ins_code = False - continue - - auth_seq_id_1 = s_p - auth_seq_id_2 = s_q - - _beg =\ - self.getDictListWithFilter('atom_site', - CARTN_DATA_ITEMS, - [{'name': 'label_asym_id', 'type': 'str', 'value': c}, - {'name': 'auth_seq_id', 'type': 'int', 'value': auth_seq_id_1}, - {'name': 'label_atom_id', 'type': 'starts-with-alnum', 'value': BEG_ATOM}, - {'name': 'pdbx_PDB_model_num', 'type': 'int', 'value': repModelId}, - {'name': 'label_alt_id', 'type': 'enum', 'enum': (repAltId,)} - ]) - - _end =\ - self.getDictListWithFilter('atom_site', - CARTN_DATA_ITEMS, - [{'name': 'label_asym_id', 'type': 'str', 'value': c}, - {'name': 'auth_seq_id', 'type': 'int', 'value': auth_seq_id_2}, - {'name': 'label_atom_id', 'type': 'starts-with-alnum', 'value': END_ATOM}, - {'name': 'pdbx_PDB_model_num', 'type': 'int', 'value': repModelId}, - {'name': 'label_alt_id', 'type': 'enum', 'enum': (repAltId,)} - ]) - - if len(_beg) == 1 and len(_end) == 1 and np.linalg.norm(to_np_array(_beg[0]) - to_np_array(_end[0])) > 5.0: - for auth_seq_id_ in range(auth_seq_id_1 + 1, auth_seq_id_2): - auth_seq_id_list = list(filter(None, authSeqDict[c])) - - if auth_seq_id_ < min(auth_seq_id_list): - pos = 0 - elif auth_seq_id_ > max(auth_seq_id_list): - pos = len(auth_seq_id_list) - else: - for idx, _auth_seq_id_ in enumerate(auth_seq_id_list): - if _auth_seq_id_ < auth_seq_id_: - continue - pos = idx - break - - authSeqDict[c].insert(pos, auth_seq_id_) - compDict[c].insert(pos, '.') # DAOTHER-9644: comp_id must be specified at Macromelucule page - if ins_code_col != -1: - insCodeDict[c].insert(pos, '.') - - # DAOTHER-9644: insert label_seq_id for truncated loop in the coordinates - seqDict[c] = labelSeqDict[c] = list(range(1, len(authSeqDict[c]) + 1)) - - ent['seq_id'] = seqDict[c] - ent['comp_id'] = compDict[c] - if c in insCodeDict: - if any(ic for ic in insCodeDict[c] if ic not in self.emptyValue): - ent['ins_code'] = insCodeDict[c] - if any(s for s in labelSeqDict[c] if s not in self.emptyValue): - if c in labelSeqDict and all(isinstance(s, int) for s in labelSeqDict[c]): - ent['auth_seq_id'] = authSeqDict[c] if authScheme else seqDict[c] - ent['label_seq_id'] = labelSeqDict[c] - ent['seq_id'] = ent['label_seq_id'] + ent['seq_id'] = seqDict[c] + ent['comp_id'] = compDict[c] + if c in insCodeDict: + if any(ic for ic in insCodeDict[c] if ic not in emptyValue): + ent['ins_code'] = insCodeDict[c] + if any(s for s in labelSeqDict[c] if s not in emptyValue): + if c in labelSeqDict and all(isinstance(s, int) for s in labelSeqDict[c]): + ent['auth_seq_id'] = authSeqDict[c] if authScheme else seqDict[c] + ent['label_seq_id'] = labelSeqDict[c] + ent['seq_id'] = ent['label_seq_id'] + + if authScheme: + ent['auth_seq_id'] = authSeqDict[c] + ent['gap_in_auth_seq'] = False + for p in range(len(authSeqDict[c]) - 1): + s_p = ent['auth_seq_id'][p] + s_q = ent['auth_seq_id'][p + 1] + if None in (s_p, s_q): + continue + if s_p + 1 != s_q: + ent['gap_in_auth_seq'] = True + break + if auth_comp_id_col != -1: + ent['auth_comp_id'] = [] if authScheme: - ent['auth_seq_id'] = authSeqDict[c] - ent['gap_in_auth_seq'] = False - for p in range(len(authSeqDict[c]) - 1): - s_p = ent['auth_seq_id'][p] - s_q = ent['auth_seq_id'][p + 1] - if s_p is None or s_q is None: - continue - if s_p + 1 != s_q: - ent['gap_in_auth_seq'] = True - break - - if auth_comp_id_col != -1: - ent['auth_comp_id'] = [] - if authScheme: - if ins_code_col != -1: - for s, ic in zip(authSeqDict[c], insCodeDict[c]): - row = next((row for row in rowList if row[chain_id_col] == c - and int(row[auth_seq_id_col]) == s and row[ins_code_col] == ic), None) - if row is not None: - comp_id = row[auth_comp_id_col] - if comp_id not in self.emptyValue: - ent['auth_comp_id'].append(comp_id) - else: - ent['auth_comp_id'].append('.') - else: - ent['auth_comp_id'].append('.') - else: - for s in authSeqDict[c]: - row = next((row for row in rowList if row[chain_id_col] == c - and int(row[auth_seq_id_col]) == s), None) - if row is not None: - comp_id = row[auth_comp_id_col] - if comp_id not in self.emptyValue: - ent['auth_comp_id'].append(comp_id) - else: - ent['auth_comp_id'].append('.') + if ins_code_col != -1: + for s, ic in zip(authSeqDict[c], insCodeDict[c]): + row = next((row for row in rowList if row[chain_id_col] == c + and int(row[auth_seq_id_col]) == s and row[ins_code_col] == ic), None) + if row is not None: + comp_id = row[auth_comp_id_col] + if comp_id not in emptyValue: + ent['auth_comp_id'].append(comp_id) else: ent['auth_comp_id'].append('.') + else: + ent['auth_comp_id'].append('.') else: - for s in seqDict[c]: + for s in authSeqDict[c]: row = next((row for row in rowList if row[chain_id_col] == c - and int(row[seq_id_col]) == s), None) + and int(row[auth_seq_id_col]) == s), None) if row is not None: comp_id = row[auth_comp_id_col] - if comp_id not in self.emptyValue: + if comp_id not in emptyValue: ent['auth_comp_id'].append(comp_id) else: ent['auth_comp_id'].append('.') else: ent['auth_comp_id'].append('.') else: - ent['auth_comp_id'] = ent['comp_id'] - - if alt_comp_id_col != -1: - ent['alt_comp_id'] = [] - if authScheme: - if ins_code_col != -1: - for s, ic in zip(authSeqDict[c], insCodeDict[c]): - row = next((row for row in rowList if row[chain_id_col] == c - and int(row[auth_seq_id_col]) == s and row[ins_code_col] == ic), None) - if row is not None: - comp_id = row[alt_comp_id_col] - if comp_id not in self.emptyValue: - ent['alt_comp_id'].append(comp_id) - else: - ent['alt_comp_id'].append('.') - else: - ent['alt_comp_id'].append('.') + for s in seqDict[c]: + row = next((row for row in rowList if row[chain_id_col] == c + and int(row[seq_id_col]) == s), None) + if row is not None: + comp_id = row[auth_comp_id_col] + if comp_id not in emptyValue: + ent['auth_comp_id'].append(comp_id) + else: + ent['auth_comp_id'].append('.') else: - for s in authSeqDict[c]: - row = next((row for row in rowList if row[chain_id_col] == c - and int(row[auth_seq_id_col]) == s), None) - if row is not None: - comp_id = row[alt_comp_id_col] - if comp_id not in self.emptyValue: - ent['alt_comp_id'].append(comp_id) - else: - ent['alt_comp_id'].append('.') + ent['auth_comp_id'].append('.') + else: + ent['auth_comp_id'] = ent['comp_id'] + + if alt_comp_id_col != -1: + ent['alt_comp_id'] = [] + if authScheme: + if ins_code_col != -1: + for s, ic in zip(authSeqDict[c], insCodeDict[c]): + row = next((row for row in rowList if row[chain_id_col] == c + and int(row[auth_seq_id_col]) == s and row[ins_code_col] == ic), None) + if row is not None: + comp_id = row[alt_comp_id_col] + if comp_id not in emptyValue: + ent['alt_comp_id'].append(comp_id) else: ent['alt_comp_id'].append('.') + else: + ent['alt_comp_id'].append('.') else: - for s in seqDict[c]: + for s in authSeqDict[c]: row = next((row for row in rowList if row[chain_id_col] == c - and int(row[seq_id_col]) == s), None) + and int(row[auth_seq_id_col]) == s), None) if row is not None: comp_id = row[alt_comp_id_col] - if comp_id not in self.emptyValue: + if comp_id not in emptyValue: ent['alt_comp_id'].append(comp_id) else: ent['alt_comp_id'].append('.') else: ent['alt_comp_id'].append('.') - - if withStructConf and i < LEN_MAJOR_ASYM_ID: # to process large assembly avoiding forced timeout - ent['struct_conf'] = self.__extractStructConf(c, authSeqDict[c] if authScheme else seqDict[c], not authScheme) - - # to process large assembly avoiding forced timeout (2ms7, 21 chains) - if withRmsd and etype is not None and totalModels > 1 and i < LEN_MAJOR_ASYM_ID / 2: - ent['type'] = etype - - randomM = None - if self.__random_rotaion_test: - randomM = {} - for model_id in effModelIds: - axis = [random.uniform(-1.0, 1.0), random.uniform(-1.0, 1.0), random.uniform(-1.0, 1.0)] - if self.__single_model_rotation_test: - theta = 0.0 if model_id > 1 else np.pi / 4.0 + else: + for s in seqDict[c]: + row = next((row for row in rowList if row[chain_id_col] == c + and int(row[seq_id_col]) == s), None) + if row is not None: + comp_id = row[alt_comp_id_col] + if comp_id not in emptyValue: + ent['alt_comp_id'].append(comp_id) else: - theta = random.uniform(-np.pi, np.pi) - randomM[model_id] = M(axis, theta) + ent['alt_comp_id'].append('.') + else: + ent['alt_comp_id'].append('.') + + if withStructConf and i < LEN_MAJOR_ASYM_ID: # to process large assembly avoiding forced timeout + ent['struct_conf'] = self.__extractStructConf(c, authSeqDict[c] if authScheme else seqDict[c], not authScheme) + + # to process large assembly avoiding forced timeout (2ms7, 21 chains) + if withRmsd and etype is not None and totalModels > 1 and i < LEN_MAJOR_ASYM_ID / 2: + ent['type'] = etype + + randomM = None + if self.__random_rotaion_test: + randomM = {} + for model_id in effModelIds: + axis = [random.uniform(-1.0, 1.0), random.uniform(-1.0, 1.0), random.uniform(-1.0, 1.0)] + if self.__single_model_rotation_test: + theta = 0.0 if model_id > 1 else np.pi / 4.0 + else: + theta = random.uniform(-np.pi, np.pi) + randomM[model_id] = M(axis, theta) - if 'polypeptide' in etype: + if 'polypeptide' in etype: - if caRmsd is None: + if caRmsd is None: - polyPeptideChains = [c] - polyPeptideLengths = [len(_seqDict[c])] + polyPeptideChains = [c] + polyPeptideLengths = [len(_seqDict[c])] - for c2 in chainIds: + for c2 in chainIds: - if c2 == c: - continue + if c2 == c: + continue - etype2 = next((e['type'] for e in entityPoly if 'pdbx_strand_id' in e and c2 in e['pdbx_strand_id'].split(',')), None) - - if etype2 is not None and 'polypeptide' in etype2: - polyPeptideChains.append(c2) - polyPeptideLengths.append(len(_seqDict[c2])) - - ca_atom_sites = self.getDictListWithFilter('atom_site', - [{'name': 'Cartn_x', 'type': 'float', 'alt_name': 'x'}, - {'name': 'Cartn_y', 'type': 'float', 'alt_name': 'y'}, - {'name': 'Cartn_z', 'type': 'float', 'alt_name': 'z'}, - {'name': 'label_asym_id', 'type': 'str', 'alt_name': 'chain_id'}, - {'name': 'auth_seq_id', 'type': 'int', 'alt_name': 'seq_id'}, - {'name': 'ndb_model' if alias else 'pdbx_PDB_model_num', 'type': 'int', 'alt_name': 'model_id'}, - {'name': 'type_symbol', 'type': 'str', 'alt_name': 'element'} - ], - [{'name': 'label_asym_id', 'type': 'enum', - 'enum': polyPeptideChains}, - {'name': 'label_atom_id', 'type': 'starts-with-alnum', 'value': 'CA'}, - {'name': 'label_alt_id', 'type': 'enum', - 'enum': (repAltId,)}, - {'name': 'type_symbol', 'type': 'str', 'value': 'C'}]) - - co_atom_sites = self.getDictListWithFilter('atom_site', - [{'name': 'Cartn_x', 'type': 'float', 'alt_name': 'x'}, - {'name': 'Cartn_y', 'type': 'float', 'alt_name': 'y'}, - {'name': 'Cartn_z', 'type': 'float', 'alt_name': 'z'}, - {'name': 'label_asym_id', 'type': 'str', 'alt_name': 'chain_id'}, - {'name': 'auth_seq_id', 'type': 'int', 'alt_name': 'seq_id'}, - {'name': 'ndb_model' if alias else 'pdbx_PDB_model_num', 'type': 'int', 'alt_name': 'model_id'}, - {'name': 'type_symbol', 'type': 'str', 'alt_name': 'element'} - ], - [{'name': 'label_asym_id', 'type': 'enum', - 'enum': polyPeptideChains}, - {'name': 'label_atom_id', 'type': 'starts-with-alnum', 'value': 'C'}, - {'name': 'label_alt_id', 'type': 'enum', - 'enum': (repAltId,)}, - {'name': 'type_symbol', 'type': 'str', 'value': 'C'}]) - - bb_atom_sites = self.getDictListWithFilter('atom_site', - [{'name': 'Cartn_x', 'type': 'float', 'alt_name': 'x'}, - {'name': 'Cartn_y', 'type': 'float', 'alt_name': 'y'}, - {'name': 'Cartn_z', 'type': 'float', 'alt_name': 'z'}, - {'name': 'label_asym_id', 'type': 'str', 'alt_name': 'chain_id'}, - {'name': 'auth_seq_id', 'type': 'int', 'alt_name': 'seq_id'}, - {'name': 'ndb_model' if alias else 'pdbx_PDB_model_num', 'type': 'int', 'alt_name': 'model_id'}, - {'name': 'type_symbol', 'type': 'str', 'alt_name': 'element'} - ], - [{'name': 'label_asym_id', 'type': 'enum', - 'enum': polyPeptideChains}, - {'name': 'label_atom_id', 'type': 'starts-with-alnum', 'value': 'N'}, - {'name': 'label_alt_id', 'type': 'enum', - 'enum': (repAltId,)}, - {'name': 'type_symbol', 'type': 'str', 'value': 'N'}]) - - bb_atom_sites.extend(ca_atom_sites) - bb_atom_sites.extend(co_atom_sites) - - caRmsd, caWellDefinedRegion = self.__calculateRmsd(polyPeptideChains, polyPeptideLengths, - totalModels, effModelIds, - ca_atom_sites, bb_atom_sites, randomM) - - if caRmsd is not None: - ent['ca_rmsd'] = caRmsd[polyPeptideChains.index(c)] - if caWellDefinedRegion is not None: - ent['well_defined_region'] = caWellDefinedRegion[polyPeptideChains.index(c)] - - elif 'ribonucleotide' in etype: - - p_atom_sites = self.getDictListWithFilter('atom_site', - [{'name': 'Cartn_x', 'type': 'float', 'alt_name': 'x'}, - {'name': 'Cartn_y', 'type': 'float', 'alt_name': 'y'}, - {'name': 'Cartn_z', 'type': 'float', 'alt_name': 'z'}, - {'name': 'label_asym_id', 'type': 'str', 'alt_name': 'chain_id'}, - {'name': 'auth_seq_id', 'type': 'int', 'alt_name': 'seq_id'}, - {'name': 'ndb_model' if alias else 'pdbx_PDB_model_num', 'type': 'int', 'alt_name': 'model_id'}, - {'name': 'type_symbol', 'type': 'str', 'alt_name': 'element'} - ], - [{'name': 'label_asym_id', 'type': 'str', 'value': c}, - {'name': 'label_atom_id', 'type': 'starts-with-alnum', 'value': 'P'}, - {'name': 'label_alt_id', 'type': 'enum', - 'enum': (repAltId,)}, - {'name': 'type_symbol', 'type': 'str', 'value': 'P'}]) + etype2 = next((e['type'] for e in entityPoly if 'pdbx_strand_id' in e and c2 in e['pdbx_strand_id'].split(',')), None) - bb_atom_sites = self.getDictListWithFilter('atom_site', + if etype2 is not None and 'polypeptide' in etype2: + polyPeptideChains.append(c2) + polyPeptideLengths.append(len(_seqDict[c2])) + + ca_atom_sites = self.getDictListWithFilter('atom_site', [{'name': 'Cartn_x', 'type': 'float', 'alt_name': 'x'}, {'name': 'Cartn_y', 'type': 'float', 'alt_name': 'y'}, {'name': 'Cartn_z', 'type': 'float', 'alt_name': 'z'}, @@ -1315,46 +1237,123 @@ def getPolymerSequence(self, catName: str, keyItems: List[dict], {'name': 'ndb_model' if alias else 'pdbx_PDB_model_num', 'type': 'int', 'alt_name': 'model_id'}, {'name': 'type_symbol', 'type': 'str', 'alt_name': 'element'} ], - [{'name': 'label_asym_id', 'type': 'str', 'value': c}, - {'name': 'label_atom_id', 'type': 'enum', - 'enum': ("C5'", "C4'", "C3'")}, + [{'name': 'label_asym_id', 'type': 'enum', + 'enum': polyPeptideChains}, + {'name': 'label_atom_id', 'type': 'starts-with-alnum', 'value': 'CA'}, {'name': 'label_alt_id', 'type': 'enum', 'enum': (repAltId,)}, {'name': 'type_symbol', 'type': 'str', 'value': 'C'}]) - bb_atom_sites.extend(p_atom_sites) - - pRmsd, pWellDefinedRegion = self.__calculateRmsd([c], [len(_seqDict[c])], - totalModels, effModelIds, - p_atom_sites, bb_atom_sites, randomM) - - if pRmsd is not None: - ent['p_rmsd'] = pRmsd[0] - if pWellDefinedRegion is not None: - ent['well_defined_region'] = pWellDefinedRegion[0] + co_atom_sites = self.getDictListWithFilter('atom_site', + [{'name': 'Cartn_x', 'type': 'float', 'alt_name': 'x'}, + {'name': 'Cartn_y', 'type': 'float', 'alt_name': 'y'}, + {'name': 'Cartn_z', 'type': 'float', 'alt_name': 'z'}, + {'name': 'label_asym_id', 'type': 'str', 'alt_name': 'chain_id'}, + {'name': 'auth_seq_id', 'type': 'int', 'alt_name': 'seq_id'}, + {'name': 'ndb_model' if alias else 'pdbx_PDB_model_num', 'type': 'int', 'alt_name': 'model_id'}, + {'name': 'type_symbol', 'type': 'str', 'alt_name': 'element'} + ], + [{'name': 'label_asym_id', 'type': 'enum', + 'enum': polyPeptideChains}, + {'name': 'label_atom_id', 'type': 'starts-with-alnum', 'value': 'C'}, + {'name': 'label_alt_id', 'type': 'enum', + 'enum': (repAltId,)}, + {'name': 'type_symbol', 'type': 'str', 'value': 'C'}]) - if len(chainIds) > 1: - identity = [] - for _c in chainIds: - if _c == c: - continue - if compDict[_c] == compDict[c]: - identity.append(_c) - if len(identity) > 0: - ent['identical_chain_id'] = identity - if auth_chain_id_col != -1: - ent['identical_auth_chain_id'] = [authChainDict[c] for c in identity] + bb_atom_sites = self.getDictListWithFilter('atom_site', + [{'name': 'Cartn_x', 'type': 'float', 'alt_name': 'x'}, + {'name': 'Cartn_y', 'type': 'float', 'alt_name': 'y'}, + {'name': 'Cartn_z', 'type': 'float', 'alt_name': 'z'}, + {'name': 'label_asym_id', 'type': 'str', 'alt_name': 'chain_id'}, + {'name': 'auth_seq_id', 'type': 'int', 'alt_name': 'seq_id'}, + {'name': 'ndb_model' if alias else 'pdbx_PDB_model_num', 'type': 'int', 'alt_name': 'model_id'}, + {'name': 'type_symbol', 'type': 'str', 'alt_name': 'element'} + ], + [{'name': 'label_asym_id', 'type': 'enum', + 'enum': polyPeptideChains}, + {'name': 'label_atom_id', 'type': 'starts-with-alnum', 'value': 'N'}, + {'name': 'label_alt_id', 'type': 'enum', + 'enum': (repAltId,)}, + {'name': 'type_symbol', 'type': 'str', 'value': 'N'}]) + + bb_atom_sites.extend(ca_atom_sites) + bb_atom_sites.extend(co_atom_sites) + + caRmsd, caWellDefinedRegion = self.__calculateRmsd(polyPeptideChains, polyPeptideLengths, + totalModels, effModelIds, + ca_atom_sites, bb_atom_sites, randomM) + + if caRmsd is not None: + ent['ca_rmsd'] = caRmsd[polyPeptideChains.index(c)] + if caWellDefinedRegion is not None: + ent['well_defined_region'] = caWellDefinedRegion[polyPeptideChains.index(c)] + + elif 'ribonucleotide' in etype: + + p_atom_sites = self.getDictListWithFilter('atom_site', + [{'name': 'Cartn_x', 'type': 'float', 'alt_name': 'x'}, + {'name': 'Cartn_y', 'type': 'float', 'alt_name': 'y'}, + {'name': 'Cartn_z', 'type': 'float', 'alt_name': 'z'}, + {'name': 'label_asym_id', 'type': 'str', 'alt_name': 'chain_id'}, + {'name': 'auth_seq_id', 'type': 'int', 'alt_name': 'seq_id'}, + {'name': 'ndb_model' if alias else 'pdbx_PDB_model_num', 'type': 'int', 'alt_name': 'model_id'}, + {'name': 'type_symbol', 'type': 'str', 'alt_name': 'element'} + ], + [{'name': 'label_asym_id', 'type': 'str', 'value': c}, + {'name': 'label_atom_id', 'type': 'starts-with-alnum', 'value': 'P'}, + {'name': 'label_alt_id', 'type': 'enum', + 'enum': (repAltId,)}, + {'name': 'type_symbol', 'type': 'str', 'value': 'P'}]) + + bb_atom_sites = self.getDictListWithFilter('atom_site', + [{'name': 'Cartn_x', 'type': 'float', 'alt_name': 'x'}, + {'name': 'Cartn_y', 'type': 'float', 'alt_name': 'y'}, + {'name': 'Cartn_z', 'type': 'float', 'alt_name': 'z'}, + {'name': 'label_asym_id', 'type': 'str', 'alt_name': 'chain_id'}, + {'name': 'auth_seq_id', 'type': 'int', 'alt_name': 'seq_id'}, + {'name': 'ndb_model' if alias else 'pdbx_PDB_model_num', 'type': 'int', 'alt_name': 'model_id'}, + {'name': 'type_symbol', 'type': 'str', 'alt_name': 'element'} + ], + [{'name': 'label_asym_id', 'type': 'str', 'value': c}, + {'name': 'label_atom_id', 'type': 'enum', + 'enum': ("C5'", "C4'", "C3'")}, + {'name': 'label_alt_id', 'type': 'enum', + 'enum': (repAltId,)}, + {'name': 'type_symbol', 'type': 'str', 'value': 'C'}]) + + bb_atom_sites.extend(p_atom_sites) + + pRmsd, pWellDefinedRegion = self.__calculateRmsd([c], [len(_seqDict[c])], + totalModels, effModelIds, + p_atom_sites, bb_atom_sites, randomM) + + if pRmsd is not None: + ent['p_rmsd'] = pRmsd[0] + if pWellDefinedRegion is not None: + ent['well_defined_region'] = pWellDefinedRegion[0] + + if len(chainIds) > 1: + identity = [] + for _c in chainIds: + if _c == c: + continue + if compDict[_c] == compDict[c]: + identity.append(_c) + if len(identity) > 0: + ent['identical_chain_id'] = identity + if auth_chain_id_col != -1: + ent['identical_auth_chain_id'] = [authChainDict[c] for c in identity] - if len(unmapSeqIds) > 0 and c in unmapSeqIds and c in chainIdWoDefault: - ent['unmapped_seq_id'] = [int(s) for s, r in unmapSeqIds[c]] - ent['unmapped_auth_seq_id'] = [int(s) for s in unmapAuthSeqIds[c]] + if len(unmapSeqIds) > 0 and c in unmapSeqIds and c in chainIdWoDefault: + ent['unmapped_seq_id'] = [int(s) for s, r in unmapSeqIds[c]] + ent['unmapped_auth_seq_id'] = [int(s) for s in unmapAuthSeqIds[c]] - asm.append(ent) + asm.append(ent) return asm def __extractStructConf(self, chain_id: str, seq_ids: List[int], label_scheme: bool = True) -> List[Optional[str]]: - """ Extract structure conformational annotations + """ Extract structure conformational annotations. """ ret = [None] * len(seq_ids) @@ -1396,10 +1395,10 @@ def __extractStructConf(self, chain_id: str, seq_ids: List[int], label_scheme: b def __calculateRmsd(self, chain_ids: List[str], lengths: List[int], total_models: int = 1, eff_model_ids: Optional[List[str]] = None, atom_sites: Optional[List[dict]] = None, bb_atom_sites: Optional[List[dict]] = None, randomM: Optional[List[list]] = None) -> Tuple[Optional[List[dict]], Optional[List[dict]]]: - """ Calculate RMSD of alpha carbons/phosphates in the ensemble + """ Calculate RMSD of alpha carbons/phosphates in the ensemble. """ - if atom_sites is None or bb_atom_sites is None: + if None in (atom_sites, bb_atom_sites): return None, None _atom_site_dict = {} @@ -1487,8 +1486,7 @@ def __calculateRmsd(self, chain_ids: List[str], lengths: List[int], total_models else: q = max(1.0 - math.sqrt(d_var[j, i] / max_d_var), 0.0) - d_ord[i, j] = q - d_ord[j, i] = q + d_ord[i, j] = d_ord[j, i] = q _, v = np.linalg.eig(d_ord) diff --git a/wwpdb/utils/nmr/io/mmCIFUtil.py b/wwpdb/utils/nmr/io/mmCIFUtil.py index ca45296f..336b8ae1 100644 --- a/wwpdb/utils/nmr/io/mmCIFUtil.py +++ b/wwpdb/utils/nmr/io/mmCIFUtil.py @@ -1,18 +1,19 @@ -""" -File: mmCIFUtil.py -Author: Zukang Feng -Update: 21-August-2012 -Version: 001 Initial version +## +# File: mmCIFUtil.py +# Date: 21-Aug-2012 Zukang Feng +# # Update: -# 07-Apr-2020 M. Yokochi - Re-write Zukang's version to being aware of multiple datablocks -# 30-May-2024 M. Yokochi - Resolve duplication of datablock/saveframe name (DAOTHER-9437) -# 16-Jan-2025 M. Yokochi - Abandon symbolic label representations in mmCIF for mutual format conversion +# 07-Apr-2020 M. Yokochi - re-write Zukang's version to being aware of multiple datablocks +# 30-May-2024 M. Yokochi - resolve duplication of datablock/saveframe name (DAOTHER-9437) +# 16-Jan-2025 M. Yokochi - abandon symbolic label representations in mmCIF for mutual format conversion ## +""" A collection of classes for manipulating CIF files containing multiple datablocks. """ - -__author__ = "Zukang Feng" -__email__ = "zfeng@rcsb.rutgers.edu" -__version__ = "V0.001" +__docformat__ = "restructuredtext en" +__author__ = "Zukang Feng, Masashi Yokochi" +__email__ = "zfeng@rcsb.rutgers.edu, yokochi@protein.osaka-u.ac.jp" +__license__ = "Creative Commons Attribution 3.0 Unported" +__version__ = "1.0.3" import sys import os @@ -26,19 +27,24 @@ from mmcif.io.PdbxReader import PdbxReader from mmcif.io.PdbxWriter import PdbxWriter +try: + from wwpdb.utils.nmr.AlignUtil import emptyValue +except ImportError: + from nmr.AlignUtil import emptyValue + label_symbol_pattern = re.compile(r'^\$[^\s\$\?\\\'\"\`;]+$') def get_ext_block_name(name: str, ext: int = 1) -> str: - """ Return unique block name avoiding duplication + """ Return unique block name avoiding duplication. """ return name if ext == 1 else f'{name}_{ext}' def abandon_symbolic_labels(containerList: list): - """ Abandon symbolic label representations that serve as saveframe pointers in NMR-STAR + """ Abandon symbolic label representations that serve as saveframe pointers in NMR-STAR. """ for container in containerList: @@ -57,10 +63,10 @@ def abandon_symbolic_labels(containerList: list): class mmCIFUtil: - """ Using pdbx mmCIF utility to parse mmCIF file + """ Accessor methods for manipulating CIF files containing multiple datablocks. """ - def __init__(self, verbose: bool = False, log: IO = sys.stderr, filePath: Optional[str] = None): # pylint: disable=unused-argument + def __init__(self, verbose: bool = False, log: IO = sys.stderr, filePath: Optional[str] = None): self.__class_name__ = self.__class__.__name__ self.__verbose = verbose @@ -71,7 +77,7 @@ def __init__(self, verbose: bool = False, log: IO = sys.stderr, filePath: Option self.__dataMap = {} self.__blockNameList = [] - if not self.__filePath: + if self.__filePath is None: return try: @@ -84,7 +90,7 @@ def __init__(self, verbose: bool = False, log: IO = sys.stderr, filePath: Option pRd = PdbxReader(ifh) pRd.read(self.__dataList) - if self.__dataList: + if len(self.__dataList) > 0: is_star = all(container.getType() == 'data' for container in self.__dataList) idx = 0 for container in self.__dataList: @@ -109,33 +115,33 @@ def __init__(self, verbose: bool = False, log: IO = sys.stderr, filePath: Option self.__lfh.write(f"+{self.__class_name__} ++ Error - Read {self.__filePath} failed {str(e)}\n") def GetBlockIDList(self) -> List[str]: - """ Return list of block ID + """ Return list of blockID. """ return self.__blockNameList def GetValueAndItemByBlock(self, blockName: str, catName: str, ext: int = 1) -> Tuple[List[dict], List[str]]: - """ Get category values and item names + """ Get category values and item names. """ - dList, iList = [], [] if blockName not in self.__dataMap: - return dList, iList + return [], [] catObj = self.__dataList[self.__dataMap[get_ext_block_name(blockName, ext)]].getObj(catName) - if not catObj: - return dList, iList + + if catObj is None: + return [], [] iList = catObj.getAttributeList() - rowList = catObj.getRowList() - for row in rowList: + dList = [] + for row in catObj.getRowList(): try: tD = {} for idxIt, itName in enumerate(iList): - if row[idxIt] != "?" and row[idxIt] != ".": + if row[idxIt] not in emptyValue: tD[itName] = row[idxIt] - if tD: + if len(tD) > 0: dList.append(tD) except IndexError: @@ -144,26 +150,24 @@ def GetValueAndItemByBlock(self, blockName: str, catName: str, ext: int = 1) -> return dList, iList def GetValue(self, blockName: str, catName: str, ext: int = 1) -> List[dict]: - """ Get category values in a given Data Block and Category - The results are stored in a list of dictionaries with item name as key + """ Get category values in a given datablock and category. """ return self.GetValueAndItemByBlock(blockName, catName, ext)[0] def GetSingleValue(self, blockName: str, catName: str, itemName: str, ext: int) -> Any: - """ Get the first value of a given Data Block, Category, Item + """ Get the first value of a given datablock, category, and item. """ - text = '' - dlist = self.GetValue(blockName, catName, ext) - if dlist: - if itemName in dlist[0]: - text = dlist[0][itemName] - return text + + if len(dlist) > 0 and itemName in dlist[0]: + return dlist[0][itemName] + + return '' def UpdateSingleRowValue(self, blockName: str, catName: str, itemName: str, rowIndex: int, value: Any, ext: int = 1): - """ Update value in single row + """ Update single row with a given value. """ if blockName not in self.__dataMap: @@ -171,13 +175,13 @@ def UpdateSingleRowValue(self, blockName: str, catName: str, itemName: str, rowI catObj = self.__dataList[self.__dataMap[get_ext_block_name(blockName, ext)]].getObj(catName) - if not catObj: + if catObj is None: return catObj.setValue(value, itemName, rowIndex) def UpdateMultipleRowsValue(self, blockName: str, catName: str, itemName: str, value: Any, ext: int = 1): - """ Update value in multiple rows + """ Update multiple rows with a given value. """ if blockName not in self.__dataMap: @@ -185,24 +189,21 @@ def UpdateMultipleRowsValue(self, blockName: str, catName: str, itemName: str, v catObj = self.__dataList[self.__dataMap[get_ext_block_name(blockName, ext)]].getObj(catName) - if not catObj: + if catObj is None: return - rowNo = catObj.getRowCount() - for rowIndex in range(0, rowNo): + for rowIndex in range(0, catObj.getRowCount()): catObj.setValue(value, itemName, rowIndex) def AddBlock(self, blockName: str, ext: int = 1): - """ Add Data Block + """ Add a datablock. """ - container = DataContainer(blockName) - self.__dataMap[get_ext_block_name(blockName, ext)] = len(self.__dataList) - self.__dataList.append(container) + self.__dataList.append(DataContainer(blockName)) def AddCategory(self, blockName: str, catName: str, items: List[str], ext: int = 1): - """ Add Category in a given Data Block + """ Add a category in a given datablock. """ if blockName not in self.__dataMap: @@ -216,7 +217,7 @@ def AddCategory(self, blockName: str, catName: str, items: List[str], ext: int = self.__dataList[self.__dataMap[get_ext_block_name(blockName, ext)]].append(category) def RemoveCategory(self, blockName: str, catName: str, ext: int = 1): - """ Remove Category in a given Data Block + """ Remove a category in a given datablock. """ if blockName not in self.__dataMap: @@ -232,7 +233,7 @@ def RemoveCategory(self, blockName: str, catName: str, ext: int = 1): self.__dataList[idx].remove(catName) def MoveCategoryToTop(self, blockName: str, catName: str, ext: int = 1): - """ Move Category to top in a given Data Block + """ Move category to top in a given datablock. """ if blockName not in self.__dataMap: @@ -258,7 +259,7 @@ def MoveCategoryToTop(self, blockName: str, catName: str, ext: int = 1): self.__dataList[idx].append(_catObj) def InsertData(self, blockName: str, catName: str, dataList: list, ext: int = 1): - """ Insert data in a given Data Block and Category + """ Insert data in a given datablock and category. """ if blockName not in self.__dataMap: @@ -273,7 +274,7 @@ def InsertData(self, blockName: str, catName: str, dataList: list, ext: int = 1) catObj.append(data) def ExtendCategory(self, blockName: str, catName: str, items: List[str], dataList: list, col: int = -1, ext: int = 1): - """ Extend existing Category in a given Data Block + """ Extend existing category in a given datablock. """ if blockName not in self.__dataMap: @@ -286,7 +287,7 @@ def ExtendCategory(self, blockName: str, catName: str, items: List[str], dataLis append_items = col < 0 or col >= catObj.getAttributeCount() - data_len = len(dataList) + len_data = len(dataList) empty_row = ['.'] * len(items) if append_items: @@ -294,11 +295,8 @@ def ExtendCategory(self, blockName: str, catName: str, items: List[str], dataLis for item in items: catObj.appendAttribute(item) - rowList = catObj.getRowList() - - for idx, row in enumerate(rowList): - - if idx < data_len: + for idx, row in enumerate(catObj.getRowList()): + if idx < len_data: row.extend(dataList[idx]) else: row.extend(empty_row) @@ -306,17 +304,13 @@ def ExtendCategory(self, blockName: str, catName: str, items: List[str], dataLis else: attrNameList = catObj.getAttributeList() - _attrNameList = attrNameList[0:col] + items + attrNameList[col:] - rowList = catObj.getRowList() - _rowList = [] - - for idx, row in enumerate(rowList): + for idx, row in enumerate(catObj.getRowList()): _row = row[0:col] - if idx < data_len: + if idx < len_data: _row.extend(dataList[idx]) else: _row.extend(empty_row) @@ -329,10 +323,10 @@ def ExtendCategory(self, blockName: str, catName: str, items: List[str], dataLis catObj.setRowList(_rowList) def CopyValueInRow(self, blockName: str, catName: str, srcItems: List[str], dstItems: List[str], ext: int = 1): - """ Copy value from source items to destination items + """ Copy values of source items to destination items. """ - if srcItems is None or dstItems is None or len(srcItems) != len(dstItems): + if None in (srcItems, dstItems) or len(srcItems) != len(dstItems): return if blockName not in self.__dataMap: @@ -361,7 +355,7 @@ def CopyValueInRow(self, blockName: str, catName: str, srcItems: List[str], dstI row[dst_cols[j]] = row[src_col] def WriteCif(self, outputFilePath: Optional[str] = None): - """ Write out cif file + """ Write CIF file. """ if not outputFilePath: @@ -372,19 +366,13 @@ def WriteCif(self, outputFilePath: Optional[str] = None): pdbxW.write(self.__dataList) def GetCategories(self) -> dict: - """ Get all Categories in all Data Blocks + """ Get all categories in all datablocks. """ - data = {} - - for container in self.__dataList: - blockName = container.getName() - data[blockName] = container.getObjNameList() - - return data + return {container.getName(): container.getObjNameList() for container in self.__dataList} def GetAttributes(self, blockName: str, catName: str, ext: int = 1) -> List[str]: - """ Get item name in Data Block and Category + """ Get item name in given datablock and category. """ if blockName not in self.__dataMap: @@ -392,13 +380,13 @@ def GetAttributes(self, blockName: str, catName: str, ext: int = 1) -> List[str] catObj = self.__dataList[self.__dataMap[get_ext_block_name(blockName, ext)]].getObj(catName) - if not catObj: + if catObj is None: return [] return catObj.getAttributeList() def GetRowLength(self, blockName: str, catName: str, ext: int = 1) -> int: - """ Return length of rows of a given category + """ Return length of rows of a given datablock and category. """ if blockName not in self.__dataMap: @@ -406,13 +394,13 @@ def GetRowLength(self, blockName: str, catName: str, ext: int = 1) -> int: catObj = self.__dataList[self.__dataMap[get_ext_block_name(blockName, ext)]].getObj(catName) - if not catObj: + if catObj is None: return 0 return len(catObj.getRowList()) def GetRowList(self, blockName: str, catName: str, ext: int = 1) -> List[list]: - """ Get a list of list of a geven Data Block and Category + """ Get a list of list of a given datablock and category. """ if blockName not in self.__dataMap: @@ -420,13 +408,13 @@ def GetRowList(self, blockName: str, catName: str, ext: int = 1) -> List[list]: catObj = self.__dataList[self.__dataMap[get_ext_block_name(blockName, ext)]].getObj(catName) - if not catObj: + if catObj is None: return [] return catObj.getRowList() def GetDictList(self, blockName: str, catName: str, ext: int = 1) -> dict: - """ Get a list of dictionaries of a given Data Block and Category + """ Get a list of dictionary of a given datablock and category. """ dList, iList = self.GetValueAndItemByBlock(blockName, catName, ext) @@ -435,15 +423,15 @@ def GetDictList(self, blockName: str, catName: str, ext: int = 1) -> dict: return {catName: {"Items": iList, "Values": data}} def GetDataBlock(self, blockName: str, ext: int = 1) -> dict: - """ Get a list of dictionaries of a given Data Block + """ Get a dictionary of a given datablock. """ - data = {} - categories = self.GetCategories() + if blockName not in categories: - return data + return {} + data = {} for catName in categories[blockName]: data.update(self.GetDictList(blockName, catName, ext)) diff --git a/wwpdb/utils/nmr/mr/AmberMRParserListener.py b/wwpdb/utils/nmr/mr/AmberMRParserListener.py index a0cc6f16..861ea574 100644 --- a/wwpdb/utils/nmr/mr/AmberMRParserListener.py +++ b/wwpdb/utils/nmr/mr/AmberMRParserListener.py @@ -6,6 +6,12 @@ """ ParserLister class for AMBER MR files. @author: Masashi Yokochi """ +__docformat__ = "restructuredtext en" +__author__ = "Masashi Yokochi" +__email__ = "yokochi@protein.osaka-u.ac.jp" +__license__ = "Apache License 2.0" +__version__ = "1.0.0" + import sys import copy import collections @@ -1412,7 +1418,7 @@ def exitRestraint_statement(self, ctx: AmberMRParser.Restraint_statementContext) if cca[self.__ccU.ccaAtomId] == dihed_factors[mis_idx - 1]['atom_id']), None) atomId2 = next((cca[self.__ccU.ccaAtomId] for cca in self.__ccU.lastAtomList if cca[self.__ccU.ccaAtomId] == dihed_factors[mis_idx + 1]['atom_id']), None) - if atomId1 is None or atomId2 is None: + if None in (atomId1, atomId2): rescued = False else: bondedTo1 = self.__ccU.getBondedAtoms(compId, atomId1) @@ -1473,7 +1479,7 @@ def exitRestraint_statement(self, ctx: AmberMRParser.Restraint_statementContext) if self.__createSfDict and isinstance(memberId, int): star_atom1 = getStarAtom(self.__authToStarSeq, self.__authToOrigSeq, self.__offsetHolder, copy.copy(atom1)) star_atom2 = getStarAtom(self.__authToStarSeq, self.__authToOrigSeq, self.__offsetHolder, copy.copy(atom2)) - if star_atom1 is None or star_atom2 is None or isIdenticalRestraint([star_atom1, star_atom2], self.__nefT): + if None in (star_atom1, star_atom2) or isIdenticalRestraint([star_atom1, star_atom2], self.__nefT): continue if self.__createSfDict and memberLogicCode == '.': altAtomId1, altAtomId2 = getAltProtonIdInBondConstraint(atoms, self.__csStat) @@ -3676,7 +3682,7 @@ def exitRestraint_statement(self, ctx: AmberMRParser.Restraint_statementContext) if self.__createSfDict and isinstance(memberId, int): star_atom1 = getStarAtom(self.__authToStarSeq, self.__authToOrigSeq, self.__offsetHolder, copy.copy(atom1)) star_atom2 = getStarAtom(self.__authToStarSeq, self.__authToOrigSeq, self.__offsetHolder, copy.copy(atom2)) - if star_atom1 is None or star_atom2 is None or isIdenticalRestraint([star_atom1, star_atom2], self.__nefT): + if None in (star_atom1, star_atom2) or isIdenticalRestraint([star_atom1, star_atom2], self.__nefT): continue if self.__createSfDict and memberLogicCode == '.': altAtomId1, altAtomId2 = getAltProtonIdInBondConstraint(atoms, self.__csStat) @@ -5187,6 +5193,7 @@ def getAtomNumberDictFromAmbmaskInfo(self, seqId: int, atomId: str, order: int = authChainId: Optional[str] = None) -> Optional[dict]: """ Return atom number dictionary like component from Amber 10 ambmask information. """ + if not self.__hasPolySeq and not self.__hasNonPolySeq: return None @@ -5653,6 +5660,7 @@ def getAtomNumberDictFromAmbmaskInfo(self, seqId: int, atomId: str, order: int = def reportSanderCommentIssue(self, subtype_name: str): """ Report Sander comment issue. """ + if self.lastComment is None: self.__f.append(f"[Missing data] {self.__getCurrentRestraint()}" "Failed to recognize AMBER atom numbers in the restraint file " @@ -5673,6 +5681,7 @@ def reportSanderCommentIssue(self, subtype_name: str): def updateSanderAtomNumberDict(self, factor: dict, cifCheck: bool = True, useDefault: bool = True, useAuthSeqScheme: bool = False) -> bool: """ Try to update Sander atom number dictionary. """ + if not self.__hasPolySeq and not self.__hasNonPolySeq: return False @@ -6160,7 +6169,7 @@ def updateSanderAtomNumberDict(self, factor: dict, cifCheck: bool = True, useDef ligands += 1 if ligands == 1: authCompId = __compId - elif len(self.__nonPoly) == 1 and self.__ccU.updateChemCompDict(authCompId): + elif len(self.__nonPoly) == 1 and self.__ccU.updateChemCompDict(authCompId, False): if self.__ccU.lastChemCompDict['_chem_comp.pdbx_release_status'] == 'OBS': authCompId = self.__nonPoly[0]['comp_id'][0] ligands = 1 @@ -6495,6 +6504,7 @@ def updateSanderAtomNumberDict(self, factor: dict, cifCheck: bool = True, useDef def updateSanderAtomNumberDictWithAmbigCode(self, factor: dict, cifCheck: bool = True, useDefault: bool = True, useAuthSeqScheme: bool = False) -> bool: """ Try to update Sander atom number dictionary. """ + if not self.__hasPolySeq and not self.__hasNonPolySeq: return False @@ -7232,6 +7242,7 @@ def updateSanderAtomNumberDictWithAmbigCode(self, factor: dict, cifCheck: bool = def checkDistSequenceOffset(self, seqId1: int, compId1: str, seqId2: int, compId2: str) -> bool: """ Try to find sequence offset from Sander comments. """ + if not self.__hasPolySeq or self.__cur_subtype != 'dist': return False @@ -7280,6 +7291,7 @@ def selectRealisticBondConstraint(self, atom1: str, atom2: str, alt_atom_id1: st ) -> Tuple[str, str]: """ Return realistic bond constraint taking into account the current coordinates. """ + if not self.__hasCoord: return atom1, atom2 @@ -7394,6 +7406,7 @@ def selectRealisticChi2AngleConstraint(self, atom1: str, atom2: str, atom3: str, ) -> dict: """ Return realistic chi2 angle constraint taking into account the current coordinates. """ + if not self.__hasCoord: return dst_func @@ -7566,6 +7579,7 @@ def selectRealisticChi2AngleConstraint(self, atom1: str, atom2: str, atom3: str, def getNeighborCandidateAtom(self, factor: dict, src_atom: dict, around: float) -> Optional[dict]: """ Try to find neighbor atom from given conditions. """ + if not self.__hasCoord: return None @@ -11297,13 +11311,13 @@ def __getCurrentRestraint(self, dataset: Optional[str] = None, n: Optional[int] if self.__cur_subtype == 'dihed': return f"[Check the {self.dihedRestraints}th row of torsional angle restraints] " if self.__cur_subtype == 'rdc': - if dataset is None or n is None: + if None in (dataset, n): return f"[Check the {self.rdcRestraints}th row of residual dipolar coupling restraints] " return f"[Check the {n}th row of residual dipolar coupling restraints (dataset={dataset})] " if self.__cur_subtype == 'plane': return f"[Check the {self.planeRestraints}th row of plane-point/plane angle restraints] " if self.__cur_subtype == 'noepk': - if dataset is None or n is None: + if None in (dataset, n): return f"[Check the {self.noepkRestraints}th row of NOESY volume restraints] " return f"[Check the {n}th row of NOESY volume restraints (dataset={dataset})] " if self.__cur_subtype == 'procs': @@ -11311,11 +11325,11 @@ def __getCurrentRestraint(self, dataset: Optional[str] = None, n: Optional[int] return f"[Check the {self.procsRestraints}th row of chemical shift restraints] " return f"[Check the {n}th row of chemical shift restraints] " if self.__cur_subtype == 'pcs': - if dataset is None or n is None: + if None in (dataset, n): return f"[Check the {self.pcsRestraints}th row of pseudocontact shift restraints] " return f"[Check the {n}th row of pseudocontact shift restraints (name of paramagnetic center={dataset})] " if self.__cur_subtype == 'csa': - if dataset is None or n is None: + if None in (dataset, n): return f"[Check the {self.csaRestraints}th row of residual CSA or pseudo-CSA restraints] " return f"[Check the {n}th row of residual CSA or pseudo-CSA restraints (dataset={dataset})] " if self.__cur_subtype == 'geo': @@ -11436,41 +11450,49 @@ def getContentSubtype(self) -> dict: def getPolymerSequence(self) -> Optional[List[dict]]: """ Return polymer sequence of AMBER MR file. """ + return None if self.__polySeqRst is None or len(self.__polySeqRst) == 0 else self.__polySeqRst def getSequenceAlignment(self) -> Optional[List[dict]]: """ Return sequence alignment between coordinates and AMBER MR. """ + return None if self.__seqAlign is None or len(self.__seqAlign) == 0 else self.__seqAlign def getChainAssignment(self) -> Optional[List[dict]]: """ Return chain assignment between coordinates and AMBER MR. """ + return None if self.__chainAssign is None or len(self.__chainAssign) == 0 else self.__chainAssign def getAtomNumberDict(self) -> dict: """ Return AMBER atomic number dictionary. """ + return self.__atomNumberDict def getSanderAtomNumberDict(self) -> dict: """ Return AMBER atomic number dictionary based on Sander comments. """ + return self.__sanderAtomNumberDict def getReasonsForReparsing(self) -> Optional[dict]: """ Return reasons for re-parsing AMBER MR file. """ + return None if len(self.reasonsForReParsing) == 0 else self.reasonsForReParsing def hasComments(self) -> bool: """ Return whether Sander comments are available. """ + return self.__hasComments def getSfDict(self) -> Tuple[dict, Optional[dict]]: """ Return a dictionary of pynmrstar saveframes. """ + if len(self.sfDict) == 0: return self.__listIdCounter, None ign_keys = [] diff --git a/wwpdb/utils/nmr/mr/AmberMRReader.py b/wwpdb/utils/nmr/mr/AmberMRReader.py index 409a352e..84c894e2 100644 --- a/wwpdb/utils/nmr/mr/AmberMRReader.py +++ b/wwpdb/utils/nmr/mr/AmberMRReader.py @@ -5,6 +5,12 @@ ## """ A collection of classes for parsing AMBER MR files. """ +__docformat__ = "restructuredtext en" +__author__ = "Masashi Yokochi" +__email__ = "yokochi@protein.osaka-u.ac.jp" +__license__ = "Apache License 2.0" +__version__ = "1.0.0" + import sys import os import copy diff --git a/wwpdb/utils/nmr/mr/AmberPTParserListener.py b/wwpdb/utils/nmr/mr/AmberPTParserListener.py index 4d0cd98e..b668c51c 100644 --- a/wwpdb/utils/nmr/mr/AmberPTParserListener.py +++ b/wwpdb/utils/nmr/mr/AmberPTParserListener.py @@ -6,6 +6,12 @@ """ ParserLister class for AMBER PT files. @author: Masashi Yokochi """ +__docformat__ = "restructuredtext en" +__author__ = "Masashi Yokochi" +__email__ = "yokochi@protein.osaka-u.ac.jp" +__license__ = "Apache License 2.0" +__version__ = "1.0.0" + import sys import re import collections @@ -72,6 +78,7 @@ def chunk_string(line: str, length: int = 4) -> List[str]: """ Split a string into fixed length chunks. """ + return [line[i:i + length] for i in range(0, len(line), length)] @@ -258,7 +265,7 @@ def exitAmber_pt(self, ctx: AmberPTParser.Amber_ptContext): # pylint: disable=u if not self.__hasPolySeqModel: return - if self.__residueLabel is None or self.__residuePointer is None or self.__atomName is None or self.__amberAtomType is None: + if None in (self.__residueLabel, self.__residuePointer, self.__atomName, self.__amberAtomType): return if len(self.__residueLabel) == 0 or len(self.__residuePointer) == 0 or len(self.__atomName) == 0 or len(self.__amberAtomType) == 0: @@ -462,7 +469,7 @@ def is_metal_elem(prev_atom_name, prev_seq_id, seq_id): ligands += 1 if ligands == 1: compId = __compId - elif len(self.__nonPolyModel) == 1 and self.__ccU.updateChemCompDict(authCompId): + elif len(self.__nonPolyModel) == 1 and self.__ccU.updateChemCompDict(authCompId, False): if self.__ccU.lastChemCompDict['_chem_comp.pdbx_release_status'] == 'OBS': compId = self.__nonPolyModel[0]['comp_id'][0] @@ -522,7 +529,7 @@ def is_metal_elem(prev_atom_name, prev_seq_id, seq_id): ligands += 1 if ligands == 1: compId = __compId - elif len(self.__nonPolyModel) == 1 and self.__ccU.updateChemCompDict(authCompId): + elif len(self.__nonPolyModel) == 1 and self.__ccU.updateChemCompDict(authCompId, False): if self.__ccU.lastChemCompDict['_chem_comp.pdbx_release_status'] == 'OBS': compId = self.__nonPolyModel[0]['comp_id'][0] @@ -1841,36 +1848,43 @@ def getVersionInfo(self) -> Tuple[Optional[str], Optional[str], Optional[str]]: """ Return version information of AMBER parameter/topology file. @return: version, date, time """ + return self.__version, self.__date, self.__time def getTitle(self) -> Optional[str]: """ Return title of AMBER parameter/topology file. """ + return self.__title def getRadiusSet(self) -> Optional[str]: """ Return radius set of AMBER parameter/topology file. """ + return self.__radiusSet def getAtomNumberDict(self) -> dict: """ Return AMBER atomic number dictionary. """ + return self.__atomNumberDict def getPolymerSequence(self) -> Optional[List[dict]]: """ Return polymer sequence of AMBER parameter/topology file. """ + return None if self.__polySeqPrmTop is None or len(self.__polySeqPrmTop) == 0 else self.__polySeqPrmTop def getSequenceAlignment(self) -> Optional[List[dict]]: """ Return sequence alignment between coordinates and AMBER parameter/topology. """ + return None if self.__seqAlign is None or len(self.__seqAlign) == 0 else self.__seqAlign def getChainAssignment(self) -> Optional[List[dict]]: """ Return chain assignment between coordinates and AMBER parameter/topology. """ + return None if self.__chainAssign is None or len(self.__chainAssign) == 0 else self.__chainAssign # del AmberPTParser diff --git a/wwpdb/utils/nmr/mr/AmberPTReader.py b/wwpdb/utils/nmr/mr/AmberPTReader.py index 42a989f3..0af216d4 100644 --- a/wwpdb/utils/nmr/mr/AmberPTReader.py +++ b/wwpdb/utils/nmr/mr/AmberPTReader.py @@ -5,6 +5,12 @@ ## """ A collection of classes for parsing AMBER PT files. """ +__docformat__ = "restructuredtext en" +__author__ = "Masashi Yokochi" +__email__ = "yokochi@protein.osaka-u.ac.jp" +__license__ = "Apache License 2.0" +__version__ = "1.0.0" + import sys import os diff --git a/wwpdb/utils/nmr/mr/AriaMRParserListener.py b/wwpdb/utils/nmr/mr/AriaMRParserListener.py index f9a34e0d..3651c526 100644 --- a/wwpdb/utils/nmr/mr/AriaMRParserListener.py +++ b/wwpdb/utils/nmr/mr/AriaMRParserListener.py @@ -6,6 +6,12 @@ """ ParserLister class for ARIA MR files. @author: Masashi Yokochi """ +__docformat__ = "restructuredtext en" +__author__ = "Masashi Yokochi" +__email__ = "yokochi@protein.osaka-u.ac.jp" +__license__ = "Apache License 2.0" +__version__ = "1.0.0" + import sys import re import itertools @@ -849,7 +855,7 @@ def exitDistance_restraint(self, ctx: AriaMRParser.Distance_restraintContext): if self.__createSfDict and isinstance(memberId, int): star_atom1 = getStarAtom(self.__authToStarSeq, self.__authToOrigSeq, self.__offsetHolder, copy.copy(atom1)) star_atom2 = getStarAtom(self.__authToStarSeq, self.__authToOrigSeq, self.__offsetHolder, copy.copy(atom2)) - if star_atom1 is None or star_atom2 is None or isIdenticalRestraint([star_atom1, star_atom2], self.__nefT): + if None in (star_atom1, star_atom2) or isIdenticalRestraint([star_atom1, star_atom2], self.__nefT): continue if self.__createSfDict and memberLogicCode == '.': altAtomId1, altAtomId2 = getAltProtonIdInBondConstraint(atoms, self.__csStat) @@ -1372,7 +1378,7 @@ def comp_id_in_polymer(np): ligands += 1 if ligands == 1: compId = _compId = __compId - elif len(self.__nonPoly) == 1 and self.__ccU.updateChemCompDict(_compId): + elif len(self.__nonPoly) == 1 and self.__ccU.updateChemCompDict(_compId, False): if self.__ccU.lastChemCompDict['_chem_comp.pdbx_release_status'] == 'OBS': compId = _compId = self.__nonPoly[0]['comp_id'][0] ligands = 1 @@ -1966,7 +1972,7 @@ def comp_id_in_polymer(np): ligands += 1 if ligands == 1: compId = _compId = __compId - elif len(self.__nonPoly) == 1 and self.__ccU.updateChemCompDict(_compId): + elif len(self.__nonPoly) == 1 and self.__ccU.updateChemCompDict(_compId, False): if self.__ccU.lastChemCompDict['_chem_comp.pdbx_release_status'] == 'OBS': compId = _compId = self.__nonPoly[0]['comp_id'][0] ligands = 1 @@ -2756,6 +2762,7 @@ def selectRealisticBondConstraint(self, atom1: str, atom2: str, alt_atom_id1: st ) -> Tuple[str, str]: """ Return realistic bond constraint taking into account the current coordinates. """ + if not self.__hasCoord: return atom1, atom2 @@ -3113,26 +3120,31 @@ def getContentSubtype(self) -> dict: def getPolymerSequence(self) -> Optional[List[dict]]: """ Return polymer sequence of ARIA MR file. """ + return None if self.__polySeqRst is None or len(self.__polySeqRst) == 0 else self.__polySeqRst def getSequenceAlignment(self) -> Optional[List[dict]]: """ Return sequence alignment between coordinates and ARIA MR. """ + return None if self.__seqAlign is None or len(self.__seqAlign) == 0 else self.__seqAlign def getChainAssignment(self) -> Optional[List[dict]]: """ Return chain assignment between coordinates and ARIA MR. """ + return None if self.__chainAssign is None or len(self.__chainAssign) == 0 else self.__chainAssign def getReasonsForReparsing(self) -> Optional[dict]: """ Return reasons for re-parsing ARIA MR file. """ + return None if len(self.reasonsForReParsing) == 0 else self.reasonsForReParsing def getSfDict(self) -> Tuple[dict, Optional[dict]]: """ Return a dictionary of pynmrstar saveframes. """ + if len(self.sfDict) == 0: return self.__listIdCounter, None ign_keys = [] diff --git a/wwpdb/utils/nmr/mr/AriaMRReader.py b/wwpdb/utils/nmr/mr/AriaMRReader.py index f7696ae3..1432f574 100644 --- a/wwpdb/utils/nmr/mr/AriaMRReader.py +++ b/wwpdb/utils/nmr/mr/AriaMRReader.py @@ -5,6 +5,12 @@ ## """ A collection of classes for parsing ARIA MR files. """ +__docformat__ = "restructuredtext en" +__author__ = "Masashi Yokochi" +__email__ = "yokochi@protein.osaka-u.ac.jp" +__license__ = "Apache License 2.0" +__version__ = "1.0.0" + import sys import os diff --git a/wwpdb/utils/nmr/mr/BiosymMRParserListener.py b/wwpdb/utils/nmr/mr/BiosymMRParserListener.py index df95f9bc..17fd4a6c 100644 --- a/wwpdb/utils/nmr/mr/BiosymMRParserListener.py +++ b/wwpdb/utils/nmr/mr/BiosymMRParserListener.py @@ -6,6 +6,12 @@ """ ParserLister class for BIOSYM MR files. @author: Masashi Yokochi """ +__docformat__ = "restructuredtext en" +__author__ = "Masashi Yokochi" +__email__ = "yokochi@protein.osaka-u.ac.jp" +__license__ = "Apache License 2.0" +__version__ = "1.0.0" + import sys import re import itertools @@ -830,7 +836,7 @@ def exitDistance_restraint(self, ctx: BiosymMRParser.Distance_restraintContext): if self.__createSfDict and isinstance(memberId, int): star_atom1 = getStarAtom(self.__authToStarSeq, self.__authToOrigSeq, self.__offsetHolder, copy.copy(atom1)) star_atom2 = getStarAtom(self.__authToStarSeq, self.__authToOrigSeq, self.__offsetHolder, copy.copy(atom2)) - if star_atom1 is None or star_atom2 is None or isIdenticalRestraint([star_atom1, star_atom2], self.__nefT): + if None in (star_atom1, star_atom2) or isIdenticalRestraint([star_atom1, star_atom2], self.__nefT): continue if self.__createSfDict and memberLogicCode == '.': altAtomId1, altAtomId2 = getAltProtonIdInBondConstraint(atoms, self.__csStat) @@ -969,7 +975,7 @@ def exitDistance_constraint(self, ctx: BiosymMRParser.Distance_constraintContext if self.__createSfDict and isinstance(memberId, int): star_atom1 = getStarAtom(self.__authToStarSeq, self.__authToOrigSeq, self.__offsetHolder, copy.copy(atom1)) star_atom2 = getStarAtom(self.__authToStarSeq, self.__authToOrigSeq, self.__offsetHolder, copy.copy(atom2)) - if star_atom1 is None or star_atom2 is None or isIdenticalRestraint([star_atom1, star_atom2], self.__nefT): + if None in (star_atom1, star_atom2) or isIdenticalRestraint([star_atom1, star_atom2], self.__nefT): continue if self.__createSfDict and memberLogicCode == '.': altAtomId1, altAtomId2 = getAltProtonIdInBondConstraint(atoms, self.__csStat) @@ -1547,7 +1553,7 @@ def comp_id_in_polymer(np): ligands += 1 if ligands == 1: compId = _compId = __compId - elif len(self.__nonPoly) == 1 and self.__ccU.updateChemCompDict(_compId): + elif len(self.__nonPoly) == 1 and self.__ccU.updateChemCompDict(_compId, False): if self.__ccU.lastChemCompDict['_chem_comp.pdbx_release_status'] == 'OBS': compId = _compId = self.__nonPoly[0]['comp_id'][0] ligands = 1 @@ -2331,6 +2337,7 @@ def selectRealisticBondConstraint(self, atom1: str, atom2: str, alt_atom_id1: st ) -> Tuple[str, str]: """ Return realistic bond constraint taking into account the current coordinates. """ + if not self.__hasCoord: return atom1, atom2 @@ -2441,6 +2448,7 @@ def selectRealisticChi2AngleConstraint(self, atom1: str, atom2: str, atom3: str, ) -> dict: """ Return realistic chi2 angle constraint taking into account the current coordinates. """ + if not self.__hasCoord: return dst_func @@ -3439,26 +3447,31 @@ def getContentSubtype(self) -> dict: def getPolymerSequence(self) -> Optional[List[dict]]: """ Return polymer sequence of BIOSYM MR file. """ + return None if self.__polySeqRst is None or len(self.__polySeqRst) == 0 else self.__polySeqRst def getSequenceAlignment(self) -> Optional[List[dict]]: """ Return sequence alignment between coordinates and BIOSYM MR. """ + return None if self.__seqAlign is None or len(self.__seqAlign) == 0 else self.__seqAlign def getChainAssignment(self) -> Optional[List[dict]]: """ Return chain assignment between coordinates and BIOSYM MR. """ + return None if self.__chainAssign is None or len(self.__chainAssign) == 0 else self.__chainAssign def getReasonsForReparsing(self) -> Optional[dict]: """ Return reasons for re-parsing BIOSYM MR file. """ + return None if len(self.reasonsForReParsing) == 0 else self.reasonsForReParsing def getSfDict(self) -> Tuple[dict, Optional[dict]]: """ Return a dictionary of pynmrstar saveframes. """ + if len(self.sfDict) == 0: return self.__listIdCounter, None ign_keys = [] diff --git a/wwpdb/utils/nmr/mr/BiosymMRReader.py b/wwpdb/utils/nmr/mr/BiosymMRReader.py index 22f5255b..a73a3595 100644 --- a/wwpdb/utils/nmr/mr/BiosymMRReader.py +++ b/wwpdb/utils/nmr/mr/BiosymMRReader.py @@ -5,6 +5,12 @@ ## """ A collection of classes for parsing BIOSYM MR files. """ +__docformat__ = "restructuredtext en" +__author__ = "Masashi Yokochi" +__email__ = "yokochi@protein.osaka-u.ac.jp" +__license__ = "Apache License 2.0" +__version__ = "1.0.0" + import sys import os diff --git a/wwpdb/utils/nmr/mr/CharmmCRDParserListener.py b/wwpdb/utils/nmr/mr/CharmmCRDParserListener.py index 752ffe3d..8f8a2499 100644 --- a/wwpdb/utils/nmr/mr/CharmmCRDParserListener.py +++ b/wwpdb/utils/nmr/mr/CharmmCRDParserListener.py @@ -6,6 +6,12 @@ """ ParserLister class for CHARMM CRD files. @author: Masashi Yokochi """ +__docformat__ = "restructuredtext en" +__author__ = "Masashi Yokochi" +__email__ = "yokochi@protein.osaka-u.ac.jp" +__license__ = "Apache License 2.0" +__version__ = "1.0.0" + import sys import collections import copy @@ -353,7 +359,7 @@ def is_metal_elem(prev_atom_name, prev_seq_id, seq_id): ligands += 1 if ligands == 1: compId = __compId - elif len(self.__nonPolyModel) == 1 and self.__ccU.updateChemCompDict(authCompId): + elif len(self.__nonPolyModel) == 1 and self.__ccU.updateChemCompDict(authCompId, False): if self.__ccU.lastChemCompDict['_chem_comp.pdbx_release_status'] == 'OBS': compId = self.__nonPolyModel[0]['comp_id'][0] @@ -409,7 +415,7 @@ def is_metal_elem(prev_atom_name, prev_seq_id, seq_id): ligands += 1 if ligands == 1: compId = __compId - elif len(self.__nonPolyModel) == 1 and self.__ccU.updateChemCompDict(authCompId): + elif len(self.__nonPolyModel) == 1 and self.__ccU.updateChemCompDict(authCompId, False): if self.__ccU.lastChemCompDict['_chem_comp.pdbx_release_status'] == 'OBS': compId = self.__nonPolyModel[0]['comp_id'][0] @@ -993,21 +999,25 @@ def getContentSubtype(self) -> dict: def getAtomNumberDict(self) -> Optional[dict]: """ Return CHARMM atomic number dictionary. """ + return self.__atomNumberDict def getPolymerSequence(self) -> Optional[List[dict]]: """ Return polymer sequence of CHARMM CRD file. """ + return None if self.__polySeqPrmTop is None or len(self.__polySeqPrmTop) == 0 else self.__polySeqPrmTop def getSequenceAlignment(self) -> Optional[List[dict]]: """ Return sequence alignment between coordinates and CHARMM CRD. """ + return None if self.__seqAlign is None or len(self.__seqAlign) == 0 else self.__seqAlign def getChainAssignment(self) -> Optional[List[dict]]: """ Return chain assignment between coordinates and CHARMM CRD. """ + return None if self.__chainAssign is None or len(self.__chainAssign) == 0 else self.__chainAssign diff --git a/wwpdb/utils/nmr/mr/CharmmCRDReader.py b/wwpdb/utils/nmr/mr/CharmmCRDReader.py index 5110bfa1..1423aa01 100644 --- a/wwpdb/utils/nmr/mr/CharmmCRDReader.py +++ b/wwpdb/utils/nmr/mr/CharmmCRDReader.py @@ -5,6 +5,12 @@ ## """ A collection of classes for parsing CHARMM CRD files. """ +__docformat__ = "restructuredtext en" +__author__ = "Masashi Yokochi" +__email__ = "yokochi@protein.osaka-u.ac.jp" +__license__ = "Apache License 2.0" +__version__ = "1.0.0" + import sys import os diff --git a/wwpdb/utils/nmr/mr/CharmmMRParserListener.py b/wwpdb/utils/nmr/mr/CharmmMRParserListener.py index cb542f3a..6bb2984e 100644 --- a/wwpdb/utils/nmr/mr/CharmmMRParserListener.py +++ b/wwpdb/utils/nmr/mr/CharmmMRParserListener.py @@ -6,6 +6,12 @@ """ ParserLister class for CHARMM MR files. @author: Masashi Yokochi """ +__docformat__ = "restructuredtext en" +__author__ = "Masashi Yokochi" +__email__ = "yokochi@protein.osaka-u.ac.jp" +__license__ = "Apache License 2.0" +__version__ = "1.0.0" + import sys import re import itertools @@ -1320,7 +1326,7 @@ def exitDistance_restraint(self, ctx: CharmmMRParser.Distance_restraintContext): if self.__createSfDict and isinstance(memberId, int): star_atom1 = getStarAtom(self.__authToStarSeq, self.__authToOrigSeq, self.__offsetHolder, copy.copy(atom1)) star_atom2 = getStarAtom(self.__authToStarSeq, self.__authToOrigSeq, self.__offsetHolder, copy.copy(atom2)) - if star_atom1 is None or star_atom2 is None or isIdenticalRestraint([star_atom1, star_atom2], self.__nefT): + if None in (star_atom1, star_atom2) or isIdenticalRestraint([star_atom1, star_atom2], self.__nefT): continue if self.__createSfDict and memberLogicCode == '.': altAtomId1, altAtomId2 = getAltProtonIdInBondConstraint(atoms, self.__csStat) @@ -2700,6 +2706,7 @@ def consumeFactor_expressions(self, clauseName: str = 'atom selection expression def __consumeFactor_expressions(self, _factor: dict, clauseName: str = 'atom selection expression', cifCheck: bool = True) -> dict: """ Consume factor expressions as atom selection if possible. """ + if not self.__hasPolySeq and not self.__hasNonPolySeq: return _factor @@ -3716,7 +3723,7 @@ def __consumeFactor_expressions__(self, _factor: dict, cifCheck: bool, _atomSele elif pref_alt_auth_seq_id: continue - if self.__authToInsCode is None or len(self.__authToInsCode) == 0 or _compId_ is None: + if None in (self.__authToInsCode, _compId_) or len(self.__authToInsCode) == 0: seqKey, coordAtomSite = self.getCoordAtomSiteOf(chainId, seqId, compId, cifCheck=cifCheck) else: compId = _compId_ @@ -4420,7 +4427,7 @@ def __consumeFactor_expressions__(self, _factor: dict, cifCheck: bool, _atomSele return foundCompId def getRealCompId(self, compId: str) -> str: - if self.__ccU.updateChemCompDict(compId): + if self.__ccU.updateChemCompDict(compId, False): if self.__ccU.lastChemCompDict['_chem_comp.pdbx_release_status'] == 'OBS' and '_chem_comp.pdbx_replaced_by' in self.__ccU.lastChemCompDict: replacedBy = self.__ccU.lastChemCompDict['_chem_comp.pdbx_replaced_by'] if replacedBy not in emptyValue and self.__ccU.updateChemCompDict(replacedBy): @@ -4681,6 +4688,7 @@ def doesNonPolySeqIdMatchWithPolySeqUnobs(self, chainId: str, seqId: int) -> boo def checkDistSequenceOffset(self, chainId: str, seqId: int, compId: str, origAtomId: str) -> bool: """ Try to find sequence offset. """ + if not self.__hasPolySeq or self.__cur_subtype != 'dist': return False @@ -4757,7 +4765,7 @@ def __intersectionFactor_expressions(self, _factor: dict, atomSelection: Optiona return _factor def __intersectionAtom_selections(self, _selection1: List[dict], _selection2: List[dict]) -> List[dict]: # pylint: disable=no-self-use - if _selection1 is None or len(_selection1) == 0 or _selection2 is None or len(_selection2) == 0: + if None in (_selection1, _selection2) or len(_selection1) == 0 or len(_selection2) == 0: return [] if isinstance(_selection2[0], str) and _selection2[0] == '*': @@ -6253,6 +6261,7 @@ def selectRealisticBondConstraint(self, atom1: str, atom2: str, alt_atom_id1: st ) -> Tuple[str, str]: """ Return realistic bond constraint taking into account the current coordinates. """ + if not self.__hasCoord: return atom1, atom2 @@ -6363,6 +6372,7 @@ def selectRealisticChi2AngleConstraint(self, atom1: str, atom2: str, atom3: str, ) -> dict: """ Return realistic chi2 angle constraint taking into account the current coordinates. """ + if not self.__hasCoord: return dst_func @@ -6766,6 +6776,7 @@ def getContentSubtype(self) -> dict: def hasAnyRestraints(self) -> bool: """ Return whether any restraint is parsed successfully. """ + if self.__createSfDict: if len(self.sfDict) == 0: return False @@ -6779,26 +6790,31 @@ def hasAnyRestraints(self) -> bool: def getPolymerSequence(self) -> Optional[List[dict]]: """ Return polymer sequence of CHARMM MR file. """ + return None if self.__polySeqRst is None or len(self.__polySeqRst) == 0 else self.__polySeqRst def getSequenceAlignment(self) -> Optional[List[dict]]: """ Return sequence alignment between coordinates and CHARMM MR. """ + return None if self.__seqAlign is None or len(self.__seqAlign) == 0 else self.__seqAlign def getChainAssignment(self) -> Optional[List[dict]]: """ Return chain assignment between coordinates and CHARMM MR. """ + return None if self.__chainAssign is None or len(self.__chainAssign) == 0 else self.__chainAssign def getReasonsForReparsing(self) -> Optional[dict]: """ Return reasons for re-parsing CHARMM MR file. """ + return None if len(self.reasonsForReParsing) == 0 else self.reasonsForReParsing def getSfDict(self) -> Tuple[dict, Optional[dict]]: """ Return a dictionary of pynmrstar saveframes. """ + if len(self.sfDict) == 0: return self.__listIdCounter, None ign_keys = [] diff --git a/wwpdb/utils/nmr/mr/CharmmMRReader.py b/wwpdb/utils/nmr/mr/CharmmMRReader.py index f3d7e9b1..7e9b6751 100644 --- a/wwpdb/utils/nmr/mr/CharmmMRReader.py +++ b/wwpdb/utils/nmr/mr/CharmmMRReader.py @@ -5,6 +5,12 @@ ## """ A collection of classes for parsing CHARMM MR files. """ +__docformat__ = "restructuredtext en" +__author__ = "Masashi Yokochi" +__email__ = "yokochi@protein.osaka-u.ac.jp" +__license__ = "Apache License 2.0" +__version__ = "1.0.0" + import sys import os diff --git a/wwpdb/utils/nmr/mr/CnsMRParserListener.py b/wwpdb/utils/nmr/mr/CnsMRParserListener.py index a639f7dc..7b52b19e 100644 --- a/wwpdb/utils/nmr/mr/CnsMRParserListener.py +++ b/wwpdb/utils/nmr/mr/CnsMRParserListener.py @@ -6,6 +6,12 @@ """ ParserLister class for CNS MR files. @author: Masashi Yokochi """ +__docformat__ = "restructuredtext en" +__author__ = "Masashi Yokochi" +__email__ = "yokochi@protein.osaka-u.ac.jp" +__license__ = "Apache License 2.0" +__version__ = "1.0.0" + import sys import re import itertools @@ -2022,7 +2028,7 @@ def exitNoe_assign(self, ctx: CnsMRParser.Noe_assignContext): # pylint: disable if self.__createSfDict and isinstance(memberId, int): star_atom1 = getStarAtom(self.__authToStarSeq, self.__authToOrigSeq, self.__offsetHolder, copy.copy(atom1)) star_atom2 = getStarAtom(self.__authToStarSeq, self.__authToOrigSeq, self.__offsetHolder, copy.copy(atom2)) - if star_atom1 is None or star_atom2 is None or isIdenticalRestraint([star_atom1, star_atom2], self.__nefT): + if None in (star_atom1, star_atom2) or isIdenticalRestraint([star_atom1, star_atom2], self.__nefT): continue if self.__createSfDict and memberLogicCode == '.': altAtomId1, altAtomId2 = getAltProtonIdInBondConstraint(atoms, self.__csStat) @@ -4986,6 +4992,7 @@ def consumeFactor_expressions(self, clauseName: str = 'atom selection expression def __consumeFactor_expressions(self, _factor: dict, clauseName: str = 'atom selection expression', cifCheck: bool = True) -> dict: """ Consume factor expressions as atom selection if possible. """ + if not self.__hasPolySeq and not self.__hasNonPolySeq: return _factor @@ -5955,7 +5962,7 @@ def __consumeFactor_expressions__(self, _factor: dict, cifCheck: bool, _atomSele elif pref_alt_auth_seq_id: continue - if self.__authToInsCode is None or len(self.__authToInsCode) == 0 or _compId_ is None: + if None in (self.__authToInsCode, _compId_) or len(self.__authToInsCode) == 0: seqKey, coordAtomSite = self.getCoordAtomSiteOf(chainId, seqId, compId, cifCheck=cifCheck) else: compId = _compId_ @@ -6704,7 +6711,7 @@ def __consumeFactor_expressions__(self, _factor: dict, cifCheck: bool, _atomSele return foundCompId def getRealCompId(self, compId: str) -> str: - if self.__ccU.updateChemCompDict(compId): + if self.__ccU.updateChemCompDict(compId, False): if self.__ccU.lastChemCompDict['_chem_comp.pdbx_release_status'] == 'OBS' and '_chem_comp.pdbx_replaced_by' in self.__ccU.lastChemCompDict: replacedBy = self.__ccU.lastChemCompDict['_chem_comp.pdbx_replaced_by'] if replacedBy not in emptyValue and self.__ccU.updateChemCompDict(replacedBy): @@ -6965,6 +6972,7 @@ def doesNonPolySeqIdMatchWithPolySeqUnobs(self, chainId: str, seqId: int) -> boo def checkDistSequenceOffset(self, chainId: str, seqId: int, compId: str, origAtomId: str) -> bool: """ Try to find sequence offset. """ + if not self.__hasPolySeq or self.__cur_subtype != 'dist': return False @@ -7041,7 +7049,7 @@ def __intersectionFactor_expressions(self, _factor: dict, atomSelection: Optiona return _factor def __intersectionAtom_selections(self, _selection1: List[dict], _selection2: List[dict]) -> List[dict]: # pylint: disable=no-self-use - if _selection1 is None or len(_selection1) == 0 or _selection2 is None or len(_selection2) == 0: + if None in (_selection1, _selection2) or len(_selection1) == 0 or len(_selection2) == 0: return [] if isinstance(_selection2[0], str) and _selection2[0] == '*': @@ -8943,6 +8951,7 @@ def selectRealisticBondConstraint(self, atom1: str, atom2: str, alt_atom_id1: st ) -> Tuple[str, str]: """ Return realistic bond constraint taking into account the current coordinates. """ + if not self.__hasCoord: return atom1, atom2 @@ -9053,6 +9062,7 @@ def selectRealisticChi2AngleConstraint(self, atom1: str, atom2: str, atom3: str, ) -> dict: """ Return realistic chi2 angle constraint taking into account the current coordinates. """ + if not self.__hasCoord: return dst_func @@ -9221,6 +9231,7 @@ def selectRealisticChi2AngleConstraint(self, atom1: str, atom2: str, atom3: str, def isRealisticDistanceRestraint(self, atom1: str, atom2: str, dst_func: dict) -> bool: """ Return whether a given distance restraint is realistic in the assembly. """ + if not self.__hasCoord: return True @@ -10187,6 +10198,7 @@ def getContentSubtype(self) -> dict: def hasAnyRestraints(self) -> bool: """ Return whether any restraint is parsed successfully. """ + if self.__createSfDict: if len(self.sfDict) == 0: return False @@ -10200,26 +10212,31 @@ def hasAnyRestraints(self) -> bool: def getPolymerSequence(self) -> Optional[List[dict]]: """ Return polymer sequence of CNS MR file. """ + return None if self.__polySeqRst is None or len(self.__polySeqRst) == 0 else self.__polySeqRst def getSequenceAlignment(self) -> Optional[List[dict]]: """ Return sequence alignment between coordinates and CNS MR. """ + return None if self.__seqAlign is None or len(self.__seqAlign) == 0 else self.__seqAlign def getChainAssignment(self) -> Optional[List[dict]]: """ Return chain assignment between coordinates and CNS MR. """ + return None if self.__chainAssign is None or len(self.__chainAssign) == 0 else self.__chainAssign def getReasonsForReparsing(self) -> Optional[dict]: """ Return reasons for re-parsing CNS MR file. """ + return None if len(self.reasonsForReParsing) == 0 else self.reasonsForReParsing def getSfDict(self) -> Tuple[dict, Optional[dict]]: """ Return a dictionary of pynmrstar saveframes. """ + if len(self.sfDict) == 0: return self.__listIdCounter, None ign_keys = [] diff --git a/wwpdb/utils/nmr/mr/CnsMRReader.py b/wwpdb/utils/nmr/mr/CnsMRReader.py index cfe89e8f..3ad6c187 100644 --- a/wwpdb/utils/nmr/mr/CnsMRReader.py +++ b/wwpdb/utils/nmr/mr/CnsMRReader.py @@ -5,6 +5,12 @@ ## """ A collection of classes for parsing CNS MR files. """ +__docformat__ = "restructuredtext en" +__author__ = "Masashi Yokochi" +__email__ = "yokochi@protein.osaka-u.ac.jp" +__license__ = "Apache License 2.0" +__version__ = "1.0.0" + import sys import os diff --git a/wwpdb/utils/nmr/mr/CyanaMRParserListener.py b/wwpdb/utils/nmr/mr/CyanaMRParserListener.py index 62bb2356..6cbfcf9c 100644 --- a/wwpdb/utils/nmr/mr/CyanaMRParserListener.py +++ b/wwpdb/utils/nmr/mr/CyanaMRParserListener.py @@ -6,6 +6,12 @@ """ ParserLister class for CYANA MR files. @author: Masashi Yokochi """ +__docformat__ = "restructuredtext en" +__author__ = "Masashi Yokochi" +__email__ = "yokochi@protein.osaka-u.ac.jp" +__license__ = "Apache License 2.0" +__version__ = "1.0.0" + import sys import re import itertools @@ -1447,7 +1453,7 @@ def exitDistance_restraint(self, ctx: CyanaMRParser.Distance_restraintContext): if self.__createSfDict and isinstance(memberId, int): star_atom1 = getStarAtom(self.__authToStarSeq, self.__authToOrigSeq, self.__offsetHolder, copy.copy(atom1)) star_atom2 = getStarAtom(self.__authToStarSeq, self.__authToOrigSeq, self.__offsetHolder, copy.copy(atom2)) - if star_atom1 is None or star_atom2 is None or isIdenticalRestraint([star_atom1, star_atom2], self.__nefT): + if None in (star_atom1, star_atom2) or isIdenticalRestraint([star_atom1, star_atom2], self.__nefT): continue if has_intra_chain and (atom1['chain_id'] != atom2['chain_id'] or atom1['chain_id'] not in rep_chain_id_set): continue @@ -1622,7 +1628,7 @@ def exitDistance_restraint(self, ctx: CyanaMRParser.Distance_restraintContext): atom_id_2 = ccb[self.__ccU.ccbAtomId2] atom_id_3 = ccb[self.__ccU.ccbAtomId1] break - if atom_id_2 is None or atom_id_3 is None: + if None in (atom_id_2, atom_id_3): continue atom2 = copy.copy(atom1) atom2['atom_id'] = atom_id_2 @@ -2047,7 +2053,7 @@ def exitDistance_wo_comp_restraint(self, chainId1: str, seqId1: int, atomId1: st if self.__createSfDict and isinstance(memberId, int): star_atom1 = getStarAtom(self.__authToStarSeq, self.__authToOrigSeq, self.__offsetHolder, copy.copy(atom1)) star_atom2 = getStarAtom(self.__authToStarSeq, self.__authToOrigSeq, self.__offsetHolder, copy.copy(atom2)) - if star_atom1 is None or star_atom2 is None or isIdenticalRestraint([star_atom1, star_atom2], self.__nefT): + if None in (star_atom1, star_atom2) or isIdenticalRestraint([star_atom1, star_atom2], self.__nefT): continue if has_intra_chain and (atom1['chain_id'] != atom2['chain_id'] or atom1['chain_id'] not in rep_chain_id_set): continue @@ -2222,7 +2228,7 @@ def exitDistance_wo_comp_restraint(self, chainId1: str, seqId1: int, atomId1: st atom_id_2 = ccb[self.__ccU.ccbAtomId2] atom_id_3 = ccb[self.__ccU.ccbAtomId1] break - if atom_id_2 is None or atom_id_3 is None: + if None in (atom_id_2, atom_id_3): continue atom2 = copy.copy(atom1) atom2['atom_id'] = atom_id_2 @@ -2805,7 +2811,7 @@ def comp_id_in_polymer(np): ligands += 1 if ligands == 1: compId = _compId = __compId - elif len(self.__nonPoly) == 1 and self.__ccU.updateChemCompDict(_compId): + elif len(self.__nonPoly) == 1 and self.__ccU.updateChemCompDict(_compId, False): if self.__ccU.lastChemCompDict['_chem_comp.pdbx_release_status'] == 'OBS': compId = _compId = self.__nonPoly[0]['comp_id'][0] ligands = 1 @@ -3447,7 +3453,7 @@ def comp_id_in_polymer(np): ligands += 1 if ligands == 1: compId = _compId = __compId - elif len(self.__nonPoly) == 1 and self.__ccU.updateChemCompDict(_compId): + elif len(self.__nonPoly) == 1 and self.__ccU.updateChemCompDict(_compId, False): if self.__ccU.lastChemCompDict['_chem_comp.pdbx_release_status'] == 'OBS': compId = _compId = self.__nonPoly[0]['comp_id'][0] ligands = 1 @@ -4749,6 +4755,7 @@ def selectRealisticBondConstraint(self, atom1: str, atom2: str, alt_atom_id1: st ) -> Tuple[str, str]: """ Return realistic bond constraint taking into account the current coordinates. """ + if not self.__hasCoord: return atom1, atom2 @@ -4859,6 +4866,7 @@ def selectRealisticChi2AngleConstraint(self, atom1: str, atom2: str, atom3: str, ) -> dict: """ Return realistic chi2 angle constraint taking into account the current coordinates. """ + if not self.__hasCoord: return dst_func @@ -6463,7 +6471,7 @@ def exitFixres_distance_restraint(self, ctx: CyanaMRParser.Fixres_distance_restr if self.__createSfDict and isinstance(memberId, int): star_atom1 = getStarAtom(self.__authToStarSeq, self.__authToOrigSeq, self.__offsetHolder, copy.copy(atom1)) star_atom2 = getStarAtom(self.__authToStarSeq, self.__authToOrigSeq, self.__offsetHolder, copy.copy(atom2)) - if star_atom1 is None or star_atom2 is None or isIdenticalRestraint([star_atom1, star_atom2], self.__nefT): + if None in (star_atom1, star_atom2) or isIdenticalRestraint([star_atom1, star_atom2], self.__nefT): continue if has_intra_chain and (atom1['chain_id'] != atom2['chain_id'] or atom1['chain_id'] not in rep_chain_id_set): continue @@ -6813,7 +6821,7 @@ def exitFixresw_distance_restraint(self, ctx: CyanaMRParser.Fixresw_distance_res if self.__createSfDict and isinstance(memberId, int): star_atom1 = getStarAtom(self.__authToStarSeq, self.__authToOrigSeq, self.__offsetHolder, copy.copy(atom1)) star_atom2 = getStarAtom(self.__authToStarSeq, self.__authToOrigSeq, self.__offsetHolder, copy.copy(atom2)) - if star_atom1 is None or star_atom2 is None or isIdenticalRestraint([star_atom1, star_atom2], self.__nefT): + if None in (star_atom1, star_atom2) or isIdenticalRestraint([star_atom1, star_atom2], self.__nefT): continue if has_intra_chain and (atom1['chain_id'] != atom2['chain_id'] or atom1['chain_id'] not in rep_chain_id_set): continue @@ -7070,7 +7078,7 @@ def exitFixresw2_distance_restraint(self, ctx: CyanaMRParser.Fixresw2_distance_r if self.__createSfDict and isinstance(memberId, int): star_atom1 = getStarAtom(self.__authToStarSeq, self.__authToOrigSeq, self.__offsetHolder, copy.copy(atom1)) star_atom2 = getStarAtom(self.__authToStarSeq, self.__authToOrigSeq, self.__offsetHolder, copy.copy(atom2)) - if star_atom1 is None or star_atom2 is None or isIdenticalRestraint([star_atom1, star_atom2], self.__nefT): + if None in (star_atom1, star_atom2) or isIdenticalRestraint([star_atom1, star_atom2], self.__nefT): continue if has_intra_chain and (atom1['chain_id'] != atom2['chain_id'] or atom1['chain_id'] not in rep_chain_id_set): continue @@ -7343,7 +7351,7 @@ def exitFixatm_distance_restraint(self, ctx: CyanaMRParser.Fixatm_distance_restr if self.__createSfDict and isinstance(memberId, int): star_atom1 = getStarAtom(self.__authToStarSeq, self.__authToOrigSeq, self.__offsetHolder, copy.copy(atom1)) star_atom2 = getStarAtom(self.__authToStarSeq, self.__authToOrigSeq, self.__offsetHolder, copy.copy(atom2)) - if star_atom1 is None or star_atom2 is None or isIdenticalRestraint([star_atom1, star_atom2], self.__nefT): + if None in (star_atom1, star_atom2) or isIdenticalRestraint([star_atom1, star_atom2], self.__nefT): continue if has_intra_chain and (atom1['chain_id'] != atom2['chain_id'] or atom1['chain_id'] not in rep_chain_id_set): continue @@ -7693,7 +7701,7 @@ def exitFixatmw_distance_restraint(self, ctx: CyanaMRParser.Fixatmw_distance_res if self.__createSfDict and isinstance(memberId, int): star_atom1 = getStarAtom(self.__authToStarSeq, self.__authToOrigSeq, self.__offsetHolder, copy.copy(atom1)) star_atom2 = getStarAtom(self.__authToStarSeq, self.__authToOrigSeq, self.__offsetHolder, copy.copy(atom2)) - if star_atom1 is None or star_atom2 is None or isIdenticalRestraint([star_atom1, star_atom2], self.__nefT): + if None in (star_atom1, star_atom2) or isIdenticalRestraint([star_atom1, star_atom2], self.__nefT): continue if has_intra_chain and (atom1['chain_id'] != atom2['chain_id'] or atom1['chain_id'] not in rep_chain_id_set): continue @@ -7950,7 +7958,7 @@ def exitFixatmw2_distance_restraint(self, ctx: CyanaMRParser.Fixatmw2_distance_r if self.__createSfDict and isinstance(memberId, int): star_atom1 = getStarAtom(self.__authToStarSeq, self.__authToOrigSeq, self.__offsetHolder, copy.copy(atom1)) star_atom2 = getStarAtom(self.__authToStarSeq, self.__authToOrigSeq, self.__offsetHolder, copy.copy(atom2)) - if star_atom1 is None or star_atom2 is None or isIdenticalRestraint([star_atom1, star_atom2], self.__nefT): + if None in (star_atom1, star_atom2) or isIdenticalRestraint([star_atom1, star_atom2], self.__nefT): continue if has_intra_chain and (atom1['chain_id'] != atom2['chain_id'] or atom1['chain_id'] not in rep_chain_id_set): continue @@ -8133,7 +8141,7 @@ def exitQconvr_distance_restraint(self, ctx: CyanaMRParser.Qconvr_distance_restr if self.__createSfDict and isinstance(memberId, int): star_atom1 = getStarAtom(self.__authToStarSeq, self.__authToOrigSeq, self.__offsetHolder, copy.copy(atom1)) star_atom2 = getStarAtom(self.__authToStarSeq, self.__authToOrigSeq, self.__offsetHolder, copy.copy(atom2)) - if star_atom1 is None or star_atom2 is None or isIdenticalRestraint([star_atom1, star_atom2], self.__nefT): + if None in (star_atom1, star_atom2) or isIdenticalRestraint([star_atom1, star_atom2], self.__nefT): continue if has_intra_chain and (atom1['chain_id'] != atom2['chain_id'] or atom1['chain_id'] not in rep_chain_id_set): continue @@ -8763,7 +8771,7 @@ def exitDistance_w_chain_restraint(self, ctx: CyanaMRParser.Distance_w_chain_res if self.__createSfDict and isinstance(memberId, int): star_atom1 = getStarAtom(self.__authToStarSeq, self.__authToOrigSeq, self.__offsetHolder, copy.copy(atom1)) star_atom2 = getStarAtom(self.__authToStarSeq, self.__authToOrigSeq, self.__offsetHolder, copy.copy(atom2)) - if star_atom1 is None or star_atom2 is None or isIdenticalRestraint([star_atom1, star_atom2], self.__nefT): + if None in (star_atom1, star_atom2) or isIdenticalRestraint([star_atom1, star_atom2], self.__nefT): continue if self.__createSfDict and memberLogicCode == '.': altAtomId1, altAtomId2 = getAltProtonIdInBondConstraint(atoms, self.__csStat) @@ -9584,7 +9592,7 @@ def exitCco_restraint(self, ctx: CyanaMRParser.Cco_restraintContext): atom_id_2 = ccb[self.__ccU.ccbAtomId2] atom_id_3 = ccb[self.__ccU.ccbAtomId1] break - if atom_id_2 is None or atom_id_3 is None: + if None in (atom_id_2, atom_id_3): continue atom2 = copy.copy(atom1) atom2['atom_id'] = atom_id_2 @@ -10689,26 +10697,31 @@ def getEffectiveContentSubtype(self) -> dict: def getPolymerSequence(self) -> Optional[List[dict]]: """ Return polymer sequence of CYANA MR file. """ + return None if self.__polySeqRst is None or len(self.__polySeqRst) == 0 else self.__polySeqRst def getSequenceAlignment(self) -> Optional[List[dict]]: """ Return sequence alignment between coordinates and CYANA MR. """ + return None if self.__seqAlign is None or len(self.__seqAlign) == 0 else self.__seqAlign def getChainAssignment(self) -> Optional[List[dict]]: """ Return chain assignment between coordinates and CYANA MR. """ + return None if self.__chainAssign is None or len(self.__chainAssign) == 0 else self.__chainAssign def getReasonsForReparsing(self) -> Optional[dict]: """ Return reasons for re-parsing CYANA MR file. """ + return None if len(self.reasonsForReParsing) == 0 else self.reasonsForReParsing def getTypeOfDistanceRestraints(self) -> str: """ Return type of distance restraints of the CYANA MR file. """ + if self.__file_ext is not None: if self.__file_ext in ('upl', 'lol'): return self.__file_ext @@ -10733,6 +10746,7 @@ def getTypeOfDistanceRestraints(self) -> str: def getSfDict(self) -> Tuple[dict, Optional[dict]]: """ Return a dictionary of pynmrstar saveframes. """ + if len(self.sfDict) == 0: return self.__listIdCounter, None ign_keys = [] diff --git a/wwpdb/utils/nmr/mr/CyanaMRReader.py b/wwpdb/utils/nmr/mr/CyanaMRReader.py index b6614300..b38e4033 100644 --- a/wwpdb/utils/nmr/mr/CyanaMRReader.py +++ b/wwpdb/utils/nmr/mr/CyanaMRReader.py @@ -5,6 +5,12 @@ ## """ A collection of classes for parsing CYANA MR files. """ +__docformat__ = "restructuredtext en" +__author__ = "Masashi Yokochi" +__email__ = "yokochi@protein.osaka-u.ac.jp" +__license__ = "Apache License 2.0" +__version__ = "1.0.0" + import sys import os diff --git a/wwpdb/utils/nmr/mr/CyanaNOAParserListener.py b/wwpdb/utils/nmr/mr/CyanaNOAParserListener.py index a788131b..4feb882b 100644 --- a/wwpdb/utils/nmr/mr/CyanaNOAParserListener.py +++ b/wwpdb/utils/nmr/mr/CyanaNOAParserListener.py @@ -6,6 +6,12 @@ """ ParserLister class for CYANA NOA files. @author: Masashi Yokochi """ +__docformat__ = "restructuredtext en" +__author__ = "Masashi Yokochi" +__email__ = "yokochi@protein.osaka-u.ac.jp" +__license__ = "Apache License 2.0" +__version__ = "1.0.0" + import sys import re import itertools @@ -853,7 +859,7 @@ def exitNoe_assignments(self, ctx: CyanaNOAParser.Noe_assignmentsContext): # py if self.__createSfDict and isinstance(memberId, int): star_atom1 = getStarAtom(self.__authToStarSeq, self.__authToOrigSeq, self.__offsetHolder, copy.copy(atom1)) star_atom2 = getStarAtom(self.__authToStarSeq, self.__authToOrigSeq, self.__offsetHolder, copy.copy(atom2)) - if star_atom1 is None or star_atom2 is None or isIdenticalRestraint([star_atom1, star_atom2], self.__nefT): + if None in (star_atom1, star_atom2) or isIdenticalRestraint([star_atom1, star_atom2], self.__nefT): continue if has_intra_chain and (atom1['chain_id'] != atom2['chain_id'] or atom1['chain_id'] not in rep_chain_id_set): continue @@ -1388,7 +1394,7 @@ def comp_id_in_polymer(np): ligands += 1 if ligands == 1: compId = _compId = __compId - elif len(self.__nonPoly) == 1 and self.__ccU.updateChemCompDict(_compId): + elif len(self.__nonPoly) == 1 and self.__ccU.updateChemCompDict(_compId, False): if self.__ccU.lastChemCompDict['_chem_comp.pdbx_release_status'] == 'OBS': compId = _compId = self.__nonPoly[0]['comp_id'][0] ligands = 1 @@ -2170,6 +2176,7 @@ def selectRealisticBondConstraint(self, atom1: str, atom2: str, alt_atom_id1: st ) -> Tuple[str, str]: """ Return realistic bond constraint taking into account the current coordinates. """ + if not self.__hasCoord: return atom1, atom2 @@ -2512,26 +2519,31 @@ def getContentSubtype(self) -> dict: def getPolymerSequence(self) -> Optional[List[dict]]: """ Return polymer sequence of CYANA NOA file. """ + return None if self.__polySeqRst is None or len(self.__polySeqRst) == 0 else self.__polySeqRst def getSequenceAlignment(self) -> Optional[List[dict]]: """ Return sequence alignment between coordinates and CYANA NOA. """ + return None if self.__seqAlign is None or len(self.__seqAlign) == 0 else self.__seqAlign def getChainAssignment(self) -> Optional[List[dict]]: """ Return chain assignment between coordinates and CYANA NOA. """ + return None if self.__chainAssign is None or len(self.__chainAssign) == 0 else self.__chainAssign def getReasonsForReparsing(self) -> Optional[dict]: """ Return reasons for re-parsing CYANA NOA file. """ + return None if len(self.reasonsForReParsing) == 0 else self.reasonsForReParsing def getSfDict(self) -> Tuple[dict, Optional[dict]]: """ Return a dictionary of pynmrstar saveframes. """ + if len(self.sfDict) == 0: return self.__listIdCounter, None ign_keys = [] diff --git a/wwpdb/utils/nmr/mr/CyanaNOAReader.py b/wwpdb/utils/nmr/mr/CyanaNOAReader.py index 38264509..c274b2a4 100644 --- a/wwpdb/utils/nmr/mr/CyanaNOAReader.py +++ b/wwpdb/utils/nmr/mr/CyanaNOAReader.py @@ -5,6 +5,12 @@ ## """ A collection of classes for parsing CYANA NOA files. """ +__docformat__ = "restructuredtext en" +__author__ = "Masashi Yokochi" +__email__ = "yokochi@protein.osaka-u.ac.jp" +__license__ = "Apache License 2.0" +__version__ = "1.0.0" + import sys import os diff --git a/wwpdb/utils/nmr/mr/DynamoMRParserListener.py b/wwpdb/utils/nmr/mr/DynamoMRParserListener.py index e58d56db..0494ea3d 100644 --- a/wwpdb/utils/nmr/mr/DynamoMRParserListener.py +++ b/wwpdb/utils/nmr/mr/DynamoMRParserListener.py @@ -6,6 +6,12 @@ """ ParserLister class for DYNAMO/PALES/TALOS MR files. @author: Masashi Yokochi """ +__docformat__ = "restructuredtext en" +__author__ = "Masashi Yokochi" +__email__ = "yokochi@protein.osaka-u.ac.jp" +__license__ = "Apache License 2.0" +__version__ = "1.0.0" + import sys import re import itertools @@ -942,7 +948,7 @@ def exitDistance_restraint(self, ctx: DynamoMRParser.Distance_restraintContext): if self.__createSfDict and isinstance(memberId, int): star_atom1 = getStarAtom(self.__authToStarSeq, self.__authToOrigSeq, self.__offsetHolder, copy.copy(atom1)) star_atom2 = getStarAtom(self.__authToStarSeq, self.__authToOrigSeq, self.__offsetHolder, copy.copy(atom2)) - if star_atom1 is None or star_atom2 is None or isIdenticalRestraint([star_atom1, star_atom2], self.__nefT): + if None in (star_atom1, star_atom2) or isIdenticalRestraint([star_atom1, star_atom2], self.__nefT): continue if has_intra_chain and (atom1['chain_id'] != atom2['chain_id'] or atom1['chain_id'] not in rep_chain_id_set): continue @@ -1117,7 +1123,7 @@ def exitDistance_restraint_sw_segid(self, ctx: DynamoMRParser.Distance_restraint if self.__createSfDict and isinstance(memberId, int): star_atom1 = getStarAtom(self.__authToStarSeq, self.__authToOrigSeq, self.__offsetHolder, copy.copy(atom1)) star_atom2 = getStarAtom(self.__authToStarSeq, self.__authToOrigSeq, self.__offsetHolder, copy.copy(atom2)) - if star_atom1 is None or star_atom2 is None or isIdenticalRestraint([star_atom1, star_atom2], self.__nefT): + if None in (star_atom1, star_atom2) or isIdenticalRestraint([star_atom1, star_atom2], self.__nefT): continue if self.__createSfDict and memberLogicCode == '.': altAtomId1, altAtomId2 = getAltProtonIdInBondConstraint(atoms, self.__csStat) @@ -1291,7 +1297,7 @@ def exitDistance_restraint_ew_segid(self, ctx: DynamoMRParser.Distance_restraint if self.__createSfDict and isinstance(memberId, int): star_atom1 = getStarAtom(self.__authToStarSeq, self.__authToOrigSeq, self.__offsetHolder, copy.copy(atom1)) star_atom2 = getStarAtom(self.__authToStarSeq, self.__authToOrigSeq, self.__offsetHolder, copy.copy(atom2)) - if star_atom1 is None or star_atom2 is None or isIdenticalRestraint([star_atom1, star_atom2], self.__nefT): + if None in (star_atom1, star_atom2) or isIdenticalRestraint([star_atom1, star_atom2], self.__nefT): continue if self.__createSfDict and memberLogicCode == '.': altAtomId1, altAtomId2 = getAltProtonIdInBondConstraint(atoms, self.__csStat) @@ -1863,7 +1869,7 @@ def comp_id_in_polymer(np): ligands += 1 if ligands == 1: compId = _compId = __compId - elif len(self.__nonPoly) == 1 and self.__ccU.updateChemCompDict(_compId): + elif len(self.__nonPoly) == 1 and self.__ccU.updateChemCompDict(_compId, False): if self.__ccU.lastChemCompDict['_chem_comp.pdbx_release_status'] == 'OBS': compId = _compId = self.__nonPoly[0]['comp_id'][0] ligands = 1 @@ -2725,6 +2731,7 @@ def selectRealisticBondConstraint(self, atom1: str, atom2: str, alt_atom_id1: st ) -> Tuple[str, str]: """ Return realistic bond constraint taking into account the current coordinates. """ + if not self.__hasCoord: return atom1, atom2 @@ -2835,6 +2842,7 @@ def selectRealisticChi2AngleConstraint(self, atom1: str, atom2: str, atom3: str, ) -> dict: """ Return realistic chi2 angle constraint taking into account the current coordinates. """ + if not self.__hasCoord: return dst_func @@ -5576,7 +5584,7 @@ def exitNumber(self, ctx: DynamoMRParser.NumberContext): def __getCurrentRestraint(self, n: Optional[int] = None, g: Optional[int] = None) -> str: if self.__cur_subtype == 'dist': - if n is None or g is None: + if None in (n, g): return f"[Check the {self.distRestraints}th row of distance restraints] " return f"[Check the {self.distRestraints}th row of distance restraints (index={n}, group={g})] " if self.__cur_subtype == 'dihed': @@ -5729,26 +5737,31 @@ def getContentSubtype(self) -> dict: def getPolymerSequence(self) -> Optional[List[dict]]: """ Return polymer sequence of DYNAMO/PALES/TALOS MR file. """ + return None if self.__polySeqRst is None or len(self.__polySeqRst) == 0 else self.__polySeqRst def getSequenceAlignment(self) -> Optional[List[dict]]: """ Return sequence alignment between coordinates and DYNAMO/PALES/TALOS MR. """ + return None if self.__seqAlign is None or len(self.__seqAlign) == 0 else self.__seqAlign def getChainAssignment(self) -> Optional[List[dict]]: """ Return chain assignment between coordinates and DYNAMO/PALES/TALOS MR. """ + return None if self.__chainAssign is None or len(self.__chainAssign) == 0 else self.__chainAssign def getReasonsForReparsing(self) -> Optional[dict]: """ Return reasons for re-parsing DYNAMO/PALES/TALOS MR file. """ + return None if len(self.reasonsForReParsing) == 0 else self.reasonsForReParsing def getSfDict(self) -> Tuple[dict, Optional[dict]]: """ Return a dictionary of pynmrstar saveframes. """ + if len(self.sfDict) == 0: return self.__listIdCounter, None ign_keys = [] diff --git a/wwpdb/utils/nmr/mr/DynamoMRReader.py b/wwpdb/utils/nmr/mr/DynamoMRReader.py index 3ba6aee7..1ce6a21c 100644 --- a/wwpdb/utils/nmr/mr/DynamoMRReader.py +++ b/wwpdb/utils/nmr/mr/DynamoMRReader.py @@ -5,6 +5,12 @@ ## """ A collection of classes for parsing DYNAMO/PALES/TALOS MR files. """ +__docformat__ = "restructuredtext en" +__author__ = "Masashi Yokochi" +__email__ = "yokochi@protein.osaka-u.ac.jp" +__license__ = "Apache License 2.0" +__version__ = "1.0.0" + import sys import os diff --git a/wwpdb/utils/nmr/mr/GromacsMRParserListener.py b/wwpdb/utils/nmr/mr/GromacsMRParserListener.py index 98ef50c7..f360fe86 100644 --- a/wwpdb/utils/nmr/mr/GromacsMRParserListener.py +++ b/wwpdb/utils/nmr/mr/GromacsMRParserListener.py @@ -6,6 +6,12 @@ """ ParserLister class for GROMACS MR files. @author: Masashi Yokochi """ +__docformat__ = "restructuredtext en" +__author__ = "Masashi Yokochi" +__email__ = "yokochi@protein.osaka-u.ac.jp" +__license__ = "Apache License 2.0" +__version__ = "1.0.0" + import sys import copy import itertools @@ -500,7 +506,7 @@ def get_eff_digits(val): if self.__createSfDict and isinstance(memberId, int): star_atom1 = getStarAtom(self.__authToStarSeq, self.__authToOrigSeq, self.__offsetHolder, copy.copy(atom1)) star_atom2 = getStarAtom(self.__authToStarSeq, self.__authToOrigSeq, self.__offsetHolder, copy.copy(atom2)) - if star_atom1 is None or star_atom2 is None or isIdenticalRestraint([star_atom1, star_atom2], self.__nefT): + if None in (star_atom1, star_atom2) or isIdenticalRestraint([star_atom1, star_atom2], self.__nefT): continue if self.__createSfDict and memberLogicCode == '.': altAtomId1, altAtomId2 = getAltProtonIdInBondConstraint(atoms, self.__csStat) @@ -634,6 +640,7 @@ def selectRealisticBondConstraint(self, atom1: str, atom2: str, alt_atom_id1: st ) -> Tuple[str, str]: """ Return realistic bond constraint taking into account the current coordinates. """ + if not self.__hasCoord: return atom1, atom2 @@ -744,6 +751,7 @@ def selectRealisticChi2AngleConstraint(self, atom1: str, atom2: str, atom3: str, ) -> dict: """ Return realistic chi2 angle constraint taking into account the current coordinates. """ + if not self.__hasCoord: return dst_func @@ -1973,21 +1981,25 @@ def getContentSubtype(self) -> dict: def getPolymerSequence(self) -> Optional[List[dict]]: """ Return polymer sequence of GROMACS MR file. """ + return None if self.__polySeqRst is None or len(self.__polySeqRst) == 0 else self.__polySeqRst def getSequenceAlignment(self) -> Optional[List[dict]]: """ Return sequence alignment between coordinates and GROMACS MR. """ + return None if self.__seqAlign is None or len(self.__seqAlign) == 0 else self.__seqAlign def getChainAssignment(self) -> Optional[List[dict]]: """ Return chain assignment between coordinates and GROMACS MR. """ + return None if self.__chainAssign is None or len(self.__chainAssign) == 0 else self.__chainAssign def getSfDict(self) -> Tuple[dict, Optional[dict]]: """ Return a dictionary of pynmrstar saveframes. """ + if len(self.sfDict) == 0: return self.__listIdCounter, None ign_keys = [] diff --git a/wwpdb/utils/nmr/mr/GromacsMRReader.py b/wwpdb/utils/nmr/mr/GromacsMRReader.py index 4caf55b8..1980b835 100644 --- a/wwpdb/utils/nmr/mr/GromacsMRReader.py +++ b/wwpdb/utils/nmr/mr/GromacsMRReader.py @@ -5,6 +5,12 @@ ## """ A collection of classes for parsing GROMACS MR files. """ +__docformat__ = "restructuredtext en" +__author__ = "Masashi Yokochi" +__email__ = "yokochi@protein.osaka-u.ac.jp" +__license__ = "Apache License 2.0" +__version__ = "1.0.0" + import sys import os diff --git a/wwpdb/utils/nmr/mr/GromacsPTParserListener.py b/wwpdb/utils/nmr/mr/GromacsPTParserListener.py index 459cc776..64e1695f 100644 --- a/wwpdb/utils/nmr/mr/GromacsPTParserListener.py +++ b/wwpdb/utils/nmr/mr/GromacsPTParserListener.py @@ -6,6 +6,12 @@ """ ParserLister class for GROMACS PT files. @author: Masashi Yokochi """ +__docformat__ = "restructuredtext en" +__author__ = "Masashi Yokochi" +__email__ = "yokochi@protein.osaka-u.ac.jp" +__license__ = "Apache License 2.0" +__version__ = "1.0.0" + import sys import collections import copy @@ -401,7 +407,7 @@ def is_metal_elem(prev_atom_name, prev_seq_id, seq_id): ligands += 1 if ligands == 1: compId = __compId - elif len(self.__nonPolyModel) == 1 and self.__ccU.updateChemCompDict(authCompId): + elif len(self.__nonPolyModel) == 1 and self.__ccU.updateChemCompDict(authCompId, False): if self.__ccU.lastChemCompDict['_chem_comp.pdbx_release_status'] == 'OBS': compId = self.__nonPolyModel[0]['comp_id'][0] @@ -461,7 +467,7 @@ def is_metal_elem(prev_atom_name, prev_seq_id, seq_id): ligands += 1 if ligands == 1: compId = __compId - elif len(self.__nonPolyModel) == 1 and self.__ccU.updateChemCompDict(authCompId): + elif len(self.__nonPolyModel) == 1 and self.__ccU.updateChemCompDict(authCompId, False): if self.__ccU.lastChemCompDict['_chem_comp.pdbx_release_status'] == 'OBS': compId = self.__nonPolyModel[0]['comp_id'][0] @@ -1697,31 +1703,37 @@ def getContentSubtype(self) -> dict: def getSystem(self) -> Optional[str]: """ Return system name of GROMACS parameter/topology file. """ + return self.__system def getMolecules(self) -> List[dict]: """ Return list of molecules and its number of copies in GROMACS parameter/topology file. """ + return self.__molecules def getAtomNumberDict(self) -> dict: """ Return GROMACS atomic number dictionary. """ + return self.__atomNumberDict def getPolymerSequence(self) -> Optional[List[dict]]: """ Return polymer sequence of GROMACS parameter/topology file. """ + return None if self.__polySeqPrmTop is None or len(self.__polySeqPrmTop) == 0 else self.__polySeqPrmTop def getSequenceAlignment(self) -> Optional[List[dict]]: """ Return sequence alignment between coordinates and GROMACS parameter/topology. """ + return None if self.__seqAlign is None or len(self.__seqAlign) == 0 else self.__seqAlign def getChainAssignment(self) -> Optional[List[dict]]: """ Return chain assignment between coordinates and GROMACS parameter/topology. """ + return None if self.__chainAssign is None or len(self.__chainAssign) == 0 else self.__chainAssign # del GromacsPTParser diff --git a/wwpdb/utils/nmr/mr/GromacsPTReader.py b/wwpdb/utils/nmr/mr/GromacsPTReader.py index 6002f149..3b9f0cc5 100644 --- a/wwpdb/utils/nmr/mr/GromacsPTReader.py +++ b/wwpdb/utils/nmr/mr/GromacsPTReader.py @@ -5,6 +5,12 @@ ## """ A collection of classes for parsing GROMACS PT files. """ +__docformat__ = "restructuredtext en" +__author__ = "Masashi Yokochi" +__email__ = "yokochi@protein.osaka-u.ac.jp" +__license__ = "Apache License 2.0" +__version__ = "1.0.0" + import sys import os diff --git a/wwpdb/utils/nmr/mr/IsdMRParserListener.py b/wwpdb/utils/nmr/mr/IsdMRParserListener.py index 5a7810fb..9fc15a4e 100644 --- a/wwpdb/utils/nmr/mr/IsdMRParserListener.py +++ b/wwpdb/utils/nmr/mr/IsdMRParserListener.py @@ -6,6 +6,12 @@ """ ParserLister class for ISD MR files. @author: Masashi Yokochi """ +__docformat__ = "restructuredtext en" +__author__ = "Masashi Yokochi" +__email__ = "yokochi@protein.osaka-u.ac.jp" +__license__ = "Apache License 2.0" +__version__ = "1.0.0" + import sys import re import copy @@ -795,7 +801,7 @@ def exitDistance_restraint(self, ctx: IsdMRParser.Distance_restraintContext): if self.__createSfDict and isinstance(memberId, int): star_atom1 = getStarAtom(self.__authToStarSeq, self.__authToOrigSeq, self.__offsetHolder, copy.copy(atom1)) star_atom2 = getStarAtom(self.__authToStarSeq, self.__authToOrigSeq, self.__offsetHolder, copy.copy(atom2)) - if star_atom1 is None or star_atom2 is None or isIdenticalRestraint([star_atom1, star_atom2], self.__nefT): + if None in (star_atom1, star_atom2) or isIdenticalRestraint([star_atom1, star_atom2], self.__nefT): continue if has_intra_chain and (atom1['chain_id'] != atom2['chain_id'] or atom1['chain_id'] not in rep_chain_id_set): continue @@ -1332,7 +1338,7 @@ def comp_id_in_polymer(np): ligands += 1 if ligands == 1: compId = _compId = __compId - elif len(self.__nonPoly) == 1 and self.__ccU.updateChemCompDict(_compId): + elif len(self.__nonPoly) == 1 and self.__ccU.updateChemCompDict(_compId, False): if self.__ccU.lastChemCompDict['_chem_comp.pdbx_release_status'] == 'OBS': compId = _compId = self.__nonPoly[0]['comp_id'][0] ligands = 1 @@ -2077,6 +2083,7 @@ def selectRealisticBondConstraint(self, atom1: str, atom2: str, alt_atom_id1: st ) -> Tuple[str, str]: """ Return realistic bond constraint taking into account the current coordinates. """ + if not self.__hasCoord: return atom1, atom2 @@ -2331,26 +2338,31 @@ def getContentSubtype(self) -> dict: def getPolymerSequence(self) -> Optional[List[dict]]: """ Return polymer sequence of ISD MR file. """ + return None if self.__polySeqRst is None or len(self.__polySeqRst) == 0 else self.__polySeqRst def getSequenceAlignment(self) -> Optional[List[dict]]: """ Return sequence alignment between coordinates and ISD MR. """ + return None if self.__seqAlign is None or len(self.__seqAlign) == 0 else self.__seqAlign def getChainAssignment(self) -> Optional[List[dict]]: """ Return chain assignment between coordinates and ISD MR. """ + return None if self.__chainAssign is None or len(self.__chainAssign) == 0 else self.__chainAssign def getReasonsForReparsing(self) -> Optional[dict]: """ Return reasons for re-parsing ISD MR file. """ + return None if len(self.reasonsForReParsing) == 0 else self.reasonsForReParsing def getSfDict(self) -> Tuple[dict, Optional[dict]]: """ Return a dictionary of pynmrstar saveframes. """ + if len(self.sfDict) == 0: return self.__listIdCounter, None ign_keys = [] diff --git a/wwpdb/utils/nmr/mr/IsdMRReader.py b/wwpdb/utils/nmr/mr/IsdMRReader.py index bf4ff872..5e3a6881 100644 --- a/wwpdb/utils/nmr/mr/IsdMRReader.py +++ b/wwpdb/utils/nmr/mr/IsdMRReader.py @@ -5,6 +5,12 @@ ## """ A collection of classes for parsing ISD MR files. """ +__docformat__ = "restructuredtext en" +__author__ = "Masashi Yokochi" +__email__ = "yokochi@protein.osaka-u.ac.jp" +__license__ = "Apache License 2.0" +__version__ = "1.0.0" + import sys import os diff --git a/wwpdb/utils/nmr/mr/LexerErrorListener.py b/wwpdb/utils/nmr/mr/LexerErrorListener.py index d88d0990..bd5db627 100644 --- a/wwpdb/utils/nmr/mr/LexerErrorListener.py +++ b/wwpdb/utils/nmr/mr/LexerErrorListener.py @@ -6,6 +6,12 @@ """ Inheritance of ANTLR ErrorListener for Lexer. @author: Masashi Yokochi """ +__docformat__ = "restructuredtext en" +__author__ = "Masashi Yokochi" +__email__ = "yokochi@protein.osaka-u.ac.jp" +__license__ = "Apache License 2.0" +__version__ = "1.0.0" + from antlr4.error.ErrorListener import ErrorListener from typing import List, Optional diff --git a/wwpdb/utils/nmr/mr/ParserErrorListener.py b/wwpdb/utils/nmr/mr/ParserErrorListener.py index 691611ab..c005a09d 100644 --- a/wwpdb/utils/nmr/mr/ParserErrorListener.py +++ b/wwpdb/utils/nmr/mr/ParserErrorListener.py @@ -6,6 +6,12 @@ """ Inheritance of ANTLR ErrorListener for Parser. @author: Masashi Yokochi """ +__docformat__ = "restructuredtext en" +__author__ = "Masashi Yokochi" +__email__ = "yokochi@protein.osaka-u.ac.jp" +__license__ = "Apache License 2.0" +__version__ = "1.0.0" + import re from antlr4.error.ErrorListener import ErrorListener diff --git a/wwpdb/utils/nmr/mr/ParserListenerUtil.py b/wwpdb/utils/nmr/mr/ParserListenerUtil.py index 5a6cbbbe..bf9cffcf 100644 --- a/wwpdb/utils/nmr/mr/ParserListenerUtil.py +++ b/wwpdb/utils/nmr/mr/ParserListenerUtil.py @@ -13,6 +13,12 @@ """ Utilities for MR/PT parser listener. @author: Masashi Yokochi """ +__docformat__ = "restructuredtext en" +__author__ = "Masashi Yokochi" +__email__ = "yokochi@protein.osaka-u.ac.jp" +__license__ = "Apache License 2.0" +__version__ = "1.0.0" + import sys import re import copy @@ -3694,7 +3700,7 @@ def translateToStdResName(compId: str, refCompId: Optional[str] = None, ccU=None if ccU is not None and ccU.updateChemCompDict(compId[:3]): return compId[:3] - if ccU is not None and ccU.updateChemCompDict(compId): + if ccU is not None and ccU.updateChemCompDict(compId, False): if ccU.lastChemCompDict['_chem_comp.pdbx_release_status'] == 'OBS' and '_chem_comp.pdbx_replaced_by' in ccU.lastChemCompDict: replaced_by = ccU.lastChemCompDict['_chem_comp.pdbx_replaced_by'] if replaced_by not in emptyValue and ccU.updateChemCompDict(replaced_by): @@ -3745,7 +3751,7 @@ def coordAssemblyChecker(verbose: bool = True, log: IO = sys.stdout, nonPolyAuthMonIdName = 'auth_mon_id' if cR.hasItem('pdbx_nonpoly_scheme', 'auth_mon_id') else 'mon_id' branchedAuthMonIdName = 'auth_mon_id' if cR.hasItem('pdbx_branch_scheme', 'auth_mon_id') else 'mon_id' - if polySeq is None or misPolyLink is None or nmrExtPolySeq is None or modResidue is None or splitLigand is None: + if None in (polySeq, misPolyLink, nmrExtPolySeq, modResidue, splitLigand): changed = True # loop categories @@ -4031,7 +4037,7 @@ def coordAssemblyChecker(verbose: bool = True, log: IO = sys.stdout, s_p = ps['auth_seq_id'][p] s_q = ps['auth_seq_id'][p + 1] - if s_p is None or s_q is None: + if None in (s_p, s_q): continue if s_p == s_q: @@ -4303,7 +4309,7 @@ def coordAssemblyChecker(verbose: bool = True, log: IO = sys.stdout, for p in range(len(ent['auth_seq_id']) - 1): s_p = ent['auth_seq_id'][p] s_q = ent['auth_seq_id'][p + 1] - if s_p is None or s_q is None: + if None in (s_p, s_q): continue if s_p + 1 != s_q: ent['gap_in_auth_seq'] = True @@ -4491,8 +4497,7 @@ def coordAssemblyChecker(verbose: bool = True, log: IO = sys.stdout, altAuthCompId = 'pdbx_auth_comp_id' if 'pdbx_auth_comp_id' in tags else None altAuthAtomId = 'pdbx_auth_atom_name' if 'pdbx_auth_atom_name' in tags else None - if coordAtomSite is None or labelToAuthSeq is None or authToLabelSeq is None or chemCompAtom is None\ - or authAtomNameToId is None or authAtomNameToIdExt is None: + if None in (coordAtomSite, labelToAuthSeq, authToLabelSeq, chemCompAtom, authAtomNameToId, authAtomNameToIdExt): changed = True dataItems = [{'name': authAsymId, 'type': 'str', 'alt_name': 'chain_id'}, @@ -4865,7 +4870,7 @@ def coordAssemblyChecker(verbose: bool = True, log: IO = sys.stdout, if labelSeqKey not in labelToAuthSeq: labelToAuthSeq[labelSeqKey] = authSeqKey - if authToStarSeq is None or authToEntityType is None or entityAssembly is None or authToStarSeqAnn is None: + if None in (authToStarSeq, authToEntityType, entityAssembly, authToStarSeqAnn): changed = True authToStarSeq, authToOrigSeq, authToInsCode, authToEntityType, authToStarSeqAnn =\ @@ -6136,7 +6141,7 @@ def getTypeOfDihedralRestraint(polypeptide: bool, polynucleotide: bool, carbohyd # DAOTHER-9063: permit dihedral angle restraint across entities due to ligand split def is_connected(): - if cR is None or ccU is None: + if None in (cR, ccU): return False for idx, atom2 in enumerate(atoms): if idx == 0: @@ -6691,7 +6696,7 @@ def isCyclicPolymer(cR, polySeq: List[dict], authAsymId: str, """ Return whether a given chain is cyclic polymer based on coordinate annotation. """ - if cR is None or polySeq is None: + if None in (cR, polySeq): return False ps = next((ps for ps in polySeq if ps['auth_chain_id'] == authAsymId), None) @@ -6916,7 +6921,7 @@ def getCoordBondLength(cR, asymId1: str, seqId1: int, atomId1: str, a_1 = next((a for a in atom_site_1 if a['model_id'] == model_id), None) a_2 = next((a for a in atom_site_2 if a['model_id'] == model_id), None) - if a_1 is None or a_2 is None: + if None in (a_1, a_2): continue bond.append({'model_id': model_id, 'distance': float(f"{numpy.linalg.norm(to_np_array(a_1) - to_np_array(a_2)):.3f}")}) @@ -8557,6 +8562,7 @@ def getDstFuncForSsBond(atom1: dict, atom2: dict) -> dict: def getDstFuncAsNoe() -> dict: """ Return default upper/lower limits as an NOE. """ + return {'weight': '1.0', 'lower_limit': '2.0', 'upper_limit': str(DIST_AMBIG_MED)} @@ -10114,13 +10120,13 @@ def getDistConstraintType(atomSelectionSet: List[List[dict]], dstFunc: dict, csS atom1 = atomSelectionSet[0][0] atom2 = atomSelectionSet[1][0] - if atom1 is None or atom2 is None: + if None in (atom1, atom2): return None atom_id_1 = atom1['atom_id'] if 'atom_id' in atom1 else atom1['auth_atom_id'] atom_id_2 = atom2['atom_id'] if 'atom_id' in atom2 else atom2['auth_atom_id'] - if atom_id_1 is None or atom_id_2 is None: + if None in (atom_id_1, atom_id_2): return None if ('comp_id' in atom1 and atom1['comp_id'] == atom_id_1)\ diff --git a/wwpdb/utils/nmr/mr/RosettaMRParserListener.py b/wwpdb/utils/nmr/mr/RosettaMRParserListener.py index 12591668..8548f9ac 100644 --- a/wwpdb/utils/nmr/mr/RosettaMRParserListener.py +++ b/wwpdb/utils/nmr/mr/RosettaMRParserListener.py @@ -6,6 +6,12 @@ """ ParserLister class for ROSETTA MR files. @author: Masashi Yokochi """ +__docformat__ = "restructuredtext en" +__author__ = "Masashi Yokochi" +__email__ = "yokochi@protein.osaka-u.ac.jp" +__license__ = "Apache License 2.0" +__version__ = "1.0.0" + import sys import re import copy @@ -1115,7 +1121,7 @@ def exitAtom_pair_restraint(self, ctx: RosettaMRParser.Atom_pair_restraintContex if self.__createSfDict and isinstance(memberId, int): star_atom1 = getStarAtom(self.__authToStarSeq, self.__authToOrigSeq, self.__offsetHolder, copy.copy(atom1)) star_atom2 = getStarAtom(self.__authToStarSeq, self.__authToOrigSeq, self.__offsetHolder, copy.copy(atom2)) - if star_atom1 is None or star_atom2 is None or isIdenticalRestraint([star_atom1, star_atom2], self.__nefT): + if None in (star_atom1, star_atom2) or isIdenticalRestraint([star_atom1, star_atom2], self.__nefT): continue if has_intra_chain and (atom1['chain_id'] != atom2['chain_id'] or atom1['chain_id'] not in rep_chain_id_set): continue @@ -2140,6 +2146,7 @@ def selectRealisticBondConstraint(self, atom1: str, atom2: str, alt_atom_id1: st ) -> Tuple[str, str]: """ Return realistic bond constraint taking into account the current coordinates. """ + if not self.__hasCoord: return atom1, atom2 @@ -2250,6 +2257,7 @@ def selectRealisticChi2AngleConstraint(self, atom1: str, atom2: str, atom3: str, ) -> dict: """ Return realistic chi2 angle constraint taking into account the current coordinates. """ + if not self.__hasCoord: return dst_func @@ -4889,7 +4897,7 @@ def exitDisulfide_bond_linkage(self, ctx: RosettaMRParser.Disulfide_bond_linkage if self.__createSfDict and isinstance(memberId, int): star_atom1 = getStarAtom(self.__authToStarSeq, self.__authToOrigSeq, self.__offsetHolder, copy.copy(atom1)) star_atom2 = getStarAtom(self.__authToStarSeq, self.__authToOrigSeq, self.__offsetHolder, copy.copy(atom2)) - if star_atom1 is None or star_atom2 is None or isIdenticalRestraint([star_atom1, star_atom2], self.__nefT): + if None in (star_atom1, star_atom2) or isIdenticalRestraint([star_atom1, star_atom2], self.__nefT): continue if has_intra_chain and (atom1['chain_id'] != atom2['chain_id'] or atom1['chain_id'] not in rep_chain_id_set): continue @@ -5046,7 +5054,7 @@ def exitAtom_pair_w_chain_restraint(self, ctx: RosettaMRParser.Atom_pair_w_chain if self.__createSfDict and isinstance(memberId, int): star_atom1 = getStarAtom(self.__authToStarSeq, self.__authToOrigSeq, self.__offsetHolder, copy.copy(atom1)) star_atom2 = getStarAtom(self.__authToStarSeq, self.__authToOrigSeq, self.__offsetHolder, copy.copy(atom2)) - if star_atom1 is None or star_atom2 is None or isIdenticalRestraint([star_atom1, star_atom2], self.__nefT): + if None in (star_atom1, star_atom2) or isIdenticalRestraint([star_atom1, star_atom2], self.__nefT): continue if has_intra_chain and (atom1['chain_id'] != atom2['chain_id'] or atom1['chain_id'] not in rep_chain_id_set): continue @@ -5315,6 +5323,7 @@ def getEffectiveContentSubtype(self) -> dict: def hasAnyRestraints(self) -> bool: """ Return whether any restraint is parsed successfully. """ + if self.__createSfDict: if len(self.sfDict) == 0: return False @@ -5328,26 +5337,31 @@ def hasAnyRestraints(self) -> bool: def getPolymerSequence(self) -> Optional[List[dict]]: """ Return polymer sequence of ROSETTA MR file. """ + return None if self.__polySeqRst is None or len(self.__polySeqRst) == 0 else self.__polySeqRst def getSequenceAlignment(self) -> Optional[List[dict]]: """ Return sequence alignment between coordinates and ROSETTA MR. """ + return None if self.__seqAlign is None or len(self.__seqAlign) == 0 else self.__seqAlign def getChainAssignment(self) -> Optional[List[dict]]: """ Return chain assignment between coordinates and ROSETTA MR. """ + return None if self.__chainAssign is None or len(self.__chainAssign) == 0 else self.__chainAssign def getReasonsForReparsing(self) -> Optional[dict]: """ Return reasons for re-parsing ROSETTA MR file. """ + return None if len(self.reasonsForReParsing) == 0 else self.reasonsForReParsing def getSfDict(self) -> Tuple[dict, Optional[dict]]: """ Return a dictionary of pynmrstar saveframes. """ + if len(self.sfDict) == 0: return self.__listIdCounter, None ign_keys = [] diff --git a/wwpdb/utils/nmr/mr/RosettaMRReader.py b/wwpdb/utils/nmr/mr/RosettaMRReader.py index 568248dc..4b28bfba 100644 --- a/wwpdb/utils/nmr/mr/RosettaMRReader.py +++ b/wwpdb/utils/nmr/mr/RosettaMRReader.py @@ -5,6 +5,12 @@ ## """ A collection of classes for parsing ROSETTA MR files. """ +__docformat__ = "restructuredtext en" +__author__ = "Masashi Yokochi" +__email__ = "yokochi@protein.osaka-u.ac.jp" +__license__ = "Apache License 2.0" +__version__ = "1.0.0" + import sys import os diff --git a/wwpdb/utils/nmr/mr/SybylMRParserListener.py b/wwpdb/utils/nmr/mr/SybylMRParserListener.py index b30019d8..8aecae64 100644 --- a/wwpdb/utils/nmr/mr/SybylMRParserListener.py +++ b/wwpdb/utils/nmr/mr/SybylMRParserListener.py @@ -6,6 +6,12 @@ """ ParserLister class for SYBYL MR files. @author: Masashi Yokochi """ +__docformat__ = "restructuredtext en" +__author__ = "Masashi Yokochi" +__email__ = "yokochi@protein.osaka-u.ac.jp" +__license__ = "Apache License 2.0" +__version__ = "1.0.0" + import sys import re import copy @@ -800,7 +806,7 @@ def exitDistance_restraint(self, ctx: SybylMRParser.Distance_restraintContext): if self.__createSfDict and isinstance(memberId, int): star_atom1 = getStarAtom(self.__authToStarSeq, self.__authToOrigSeq, self.__offsetHolder, copy.copy(atom1)) star_atom2 = getStarAtom(self.__authToStarSeq, self.__authToOrigSeq, self.__offsetHolder, copy.copy(atom2)) - if star_atom1 is None or star_atom2 is None or isIdenticalRestraint([star_atom1, star_atom2], self.__nefT): + if None in (star_atom1, star_atom2) or isIdenticalRestraint([star_atom1, star_atom2], self.__nefT): continue if has_intra_chain and (atom1['chain_id'] != atom2['chain_id'] or atom1['chain_id'] not in rep_chain_id_set): continue @@ -1340,7 +1346,7 @@ def comp_id_in_polymer(np): ligands += 1 if ligands == 1: compId = _compId = __compId - elif len(self.__nonPoly) == 1 and self.__ccU.updateChemCompDict(_compId): + elif len(self.__nonPoly) == 1 and self.__ccU.updateChemCompDict(_compId, False): if self.__ccU.lastChemCompDict['_chem_comp.pdbx_release_status'] == 'OBS': compId = _compId = self.__nonPoly[0]['comp_id'][0] ligands = 1 @@ -2086,6 +2092,7 @@ def selectRealisticBondConstraint(self, atom1: str, atom2: str, alt_atom_id1: st ) -> Tuple[str, str]: """ Return realistic bond constraint taking into account the current coordinates. """ + if not self.__hasCoord: return atom1, atom2 @@ -2358,26 +2365,31 @@ def getContentSubtype(self) -> dict: def getPolymerSequence(self) -> Optional[List[dict]]: """ Return polymer sequence of SYBYL MR file. """ + return None if self.__polySeqRst is None or len(self.__polySeqRst) == 0 else self.__polySeqRst def getSequenceAlignment(self) -> Optional[List[dict]]: """ Return sequence alignment between coordinates and SYBYL MR. """ + return None if self.__seqAlign is None or len(self.__seqAlign) == 0 else self.__seqAlign def getChainAssignment(self) -> Optional[List[dict]]: """ Return chain assignment between coordinates and SYBYL MR. """ + return None if self.__chainAssign is None or len(self.__chainAssign) == 0 else self.__chainAssign def getReasonsForReparsing(self) -> Optional[dict]: """ Return reasons for re-parsing SYBYL MR file. """ + return None if len(self.reasonsForReParsing) == 0 else self.reasonsForReParsing def getSfDict(self) -> Tuple[dict, Optional[dict]]: """ Return a dictionary of pynmrstar saveframes. """ + if len(self.sfDict) == 0: return self.__listIdCounter, None ign_keys = [] diff --git a/wwpdb/utils/nmr/mr/SybylMRReader.py b/wwpdb/utils/nmr/mr/SybylMRReader.py index cec08b9f..3b8adfb2 100644 --- a/wwpdb/utils/nmr/mr/SybylMRReader.py +++ b/wwpdb/utils/nmr/mr/SybylMRReader.py @@ -5,6 +5,12 @@ ## """ A collection of classes for parsing SYBYL MR files. """ +__docformat__ = "restructuredtext en" +__author__ = "Masashi Yokochi" +__email__ = "yokochi@protein.osaka-u.ac.jp" +__license__ = "Apache License 2.0" +__version__ = "1.0.0" + import sys import os diff --git a/wwpdb/utils/nmr/mr/XplorMRParserListener.py b/wwpdb/utils/nmr/mr/XplorMRParserListener.py index 3abcf0fe..04b1afc6 100644 --- a/wwpdb/utils/nmr/mr/XplorMRParserListener.py +++ b/wwpdb/utils/nmr/mr/XplorMRParserListener.py @@ -6,6 +6,12 @@ """ ParserLister class for XPLOR-NIH MR files. @author: Masashi Yokochi """ +__docformat__ = "restructuredtext en" +__author__ = "Masashi Yokochi" +__email__ = "yokochi@protein.osaka-u.ac.jp" +__license__ = "Apache License 2.0" +__version__ = "1.0.0" + import sys import re import itertools @@ -2520,7 +2526,7 @@ def exitNoe_assign(self, ctx: XplorMRParser.Noe_assignContext): # pylint: disab if self.__createSfDict and isinstance(memberId, int): star_atom1 = getStarAtom(self.__authToStarSeq, self.__authToOrigSeq, self.__offsetHolder, copy.copy(atom1)) star_atom2 = getStarAtom(self.__authToStarSeq, self.__authToOrigSeq, self.__offsetHolder, copy.copy(atom2)) - if star_atom1 is None or star_atom2 is None or isIdenticalRestraint([star_atom1, star_atom2], self.__nefT): + if None in (star_atom1, star_atom2) or isIdenticalRestraint([star_atom1, star_atom2], self.__nefT): continue if self.__createSfDict and memberLogicCode == '.': altAtomId1, altAtomId2 = getAltProtonIdInBondConstraint(atoms, self.__csStat) @@ -8523,7 +8529,7 @@ def proc_as_if_noe_assign(): if self.__createSfDict and isinstance(memberId, int): star_atom1 = getStarAtom(self.__authToStarSeq, self.__authToOrigSeq, self.__offsetHolder, copy.copy(atom1)) star_atom2 = getStarAtom(self.__authToStarSeq, self.__authToOrigSeq, self.__offsetHolder, copy.copy(atom2)) - if star_atom1 is None or star_atom2 is None or isIdenticalRestraint([star_atom1, star_atom2], self.__nefT): + if None in (star_atom1, star_atom2) or isIdenticalRestraint([star_atom1, star_atom2], self.__nefT): continue if self.__createSfDict and memberLogicCode == '.': altAtomId1, altAtomId2 = getAltProtonIdInBondConstraint(atoms, self.__csStat) @@ -9158,6 +9164,7 @@ def consumeFactor_expressions(self, clauseName: str = 'atom selection expression def __consumeFactor_expressions(self, _factor: dict, clauseName: str = 'atom selection expression', cifCheck: bool = True) -> dict: """ Consume factor expressions as atom selection if possible. """ + if not self.__hasPolySeq and not self.__hasNonPolySeq: return _factor @@ -10146,7 +10153,7 @@ def __consumeFactor_expressions__(self, _factor: dict, cifCheck: bool, _atomSele elif pref_alt_auth_seq_id: continue - if self.__authToInsCode is None or len(self.__authToInsCode) == 0 or _compId_ is None: + if None in (self.__authToInsCode, _compId_) or len(self.__authToInsCode) == 0: seqKey, coordAtomSite = self.getCoordAtomSiteOf(chainId, seqId, compId, cifCheck=cifCheck) else: compId = _compId_ @@ -10906,7 +10913,7 @@ def __consumeFactor_expressions__(self, _factor: dict, cifCheck: bool, _atomSele return foundCompId def getRealCompId(self, compId: str) -> str: - if self.__ccU.updateChemCompDict(compId): + if self.__ccU.updateChemCompDict(compId, False): if self.__ccU.lastChemCompDict['_chem_comp.pdbx_release_status'] == 'OBS' and '_chem_comp.pdbx_replaced_by' in self.__ccU.lastChemCompDict: replacedBy = self.__ccU.lastChemCompDict['_chem_comp.pdbx_replaced_by'] if replacedBy not in emptyValue and self.__ccU.updateChemCompDict(replacedBy): @@ -11167,6 +11174,7 @@ def doesNonPolySeqIdMatchWithPolySeqUnobs(self, chainId: str, seqId: int) -> boo def checkDistSequenceOffset(self, chainId: str, seqId: int, compId: str, origAtomId: str) -> bool: """ Try to find sequence offset. """ + if not self.__hasPolySeq or self.__cur_subtype != 'dist': return False @@ -11243,7 +11251,7 @@ def __intersectionFactor_expressions(self, _factor: dict, atomSelection: Optiona return _factor def __intersectionAtom_selections(self, _selection1: List[dict], _selection2: List[dict]) -> List[dict]: # pylint: disable=no-self-use - if _selection1 is None or len(_selection1) == 0 or _selection2 is None or len(_selection2) == 0: + if None in (_selection1, _selection2) or len(_selection1) == 0 or len(_selection2) == 0: return [] if isinstance(_selection2[0], str) and _selection2[0] == '*': @@ -13059,6 +13067,7 @@ def selectRealisticBondConstraint(self, atom1: str, atom2: str, alt_atom_id1: st ) -> Tuple[str, str]: """ Return realistic bond constraint taking into account the current coordinates. """ + if not self.__hasCoord: return atom1, atom2 @@ -13169,6 +13178,7 @@ def selectRealisticChi2AngleConstraint(self, atom1: str, atom2: str, atom3: str, ) -> dict: """ Return realistic chi2 angle constraint taking into account the current coordinates. """ + if not self.__hasCoord: return dst_func @@ -13337,6 +13347,7 @@ def selectRealisticChi2AngleConstraint(self, atom1: str, atom2: str, atom3: str, def isRealisticDistanceRestraint(self, atom1: str, atom2: str, dst_func: dict) -> bool: """ Return whether a given distance restraint is realistic in the assembly. """ + if not self.__hasCoord: return True @@ -14703,6 +14714,7 @@ def getContentSubtype(self) -> dict: def hasAnyRestraints(self) -> bool: """ Return whether any restraint is parsed successfully. """ + if self.__createSfDict: if len(self.sfDict) == 0: return False @@ -14716,26 +14728,31 @@ def hasAnyRestraints(self) -> bool: def getPolymerSequence(self) -> Optional[List[dict]]: """ Return polymer sequence of XPLOR-NIH MR file. """ + return None if self.__polySeqRst is None or len(self.__polySeqRst) == 0 else self.__polySeqRst def getSequenceAlignment(self) -> Optional[List[dict]]: """ Return sequence alignment between coordinates and XPLOR-NIH MR. """ + return None if self.__seqAlign is None or len(self.__seqAlign) == 0 else self.__seqAlign def getChainAssignment(self) -> Optional[List[dict]]: """ Return chain assignment between coordinates and XPLOR-NIH MR. """ + return None if self.__chainAssign is None or len(self.__chainAssign) == 0 else self.__chainAssign def getReasonsForReparsing(self) -> Optional[dict]: """ Return reasons for re-parsing XPLOR-NIH MR file. """ + return None if len(self.reasonsForReParsing) == 0 else self.reasonsForReParsing def getSfDict(self) -> Tuple[dict, Optional[dict]]: """ Return a dictionary of pynmrstar saveframes. """ + if len(self.sfDict) == 0: return self.__listIdCounter, None ign_keys = [] diff --git a/wwpdb/utils/nmr/mr/XplorMRReader.py b/wwpdb/utils/nmr/mr/XplorMRReader.py index 8d00bded..f91515ef 100644 --- a/wwpdb/utils/nmr/mr/XplorMRReader.py +++ b/wwpdb/utils/nmr/mr/XplorMRReader.py @@ -5,6 +5,12 @@ ## """ A collection of classes for parsing XPLOR-NIH MR files. """ +__docformat__ = "restructuredtext en" +__author__ = "Masashi Yokochi" +__email__ = "yokochi@protein.osaka-u.ac.jp" +__license__ = "Apache License 2.0" +__version__ = "1.0.0" + import sys import os diff --git a/wwpdb/utils/nmr/nef/NEFTranslator.py b/wwpdb/utils/nmr/nef/NEFTranslator.py index 208ff148..9e7b915b 100644 --- a/wwpdb/utils/nmr/nef/NEFTranslator.py +++ b/wwpdb/utils/nmr/nef/NEFTranslator.py @@ -107,6 +107,12 @@ """ Bi-directional translator between NEF and NMR-STAR @author: Kumaran Baskaran, Masashi Yokochi """ +__docformat__ = "restructuredtext en" +__author__ = "Masashi Yokochi, Kumaran Baskaran" +__email__ = "yokochi@protein.osaka-u.ac.jp, baskaran@uchc.edu" +__license__ = "Apache License 2.0" +__version__ = "4.1.1" + import sys import os import ntpath @@ -178,7 +184,6 @@ __package_name__ = 'wwpdb.utils.nmr' -__version__ = '4.1.1' __pynmrstar_v3_3_1__ = version.parse(pynmrstar.__version__) >= version.parse("3.3.1") @@ -1493,7 +1498,7 @@ def __init__(self, verbose: bool = False, log: IO = sys.stderr, self.__csStat = BMRBChemShiftStat(self.__verbose, self.__lfh) if csStat is None else csStat # CifToNmrStar - self.__c2S = CifToNmrStar(self.__verbose) if c2S is None else c2S + self.__c2S = CifToNmrStar(self.__lfh) if c2S is None else c2S # readable item type self.readableItemType = {'str': 'a string', @@ -1517,6 +1522,7 @@ def __init__(self, verbose: bool = False, log: IO = sys.stderr, def get_ccu(self): """ Get instance of ChemCompUtil. """ + return self.__ccU def set_remediation_mode(self, flag: bool): diff --git a/wwpdb/utils/nmr/pk/AriaPKParserListener.py b/wwpdb/utils/nmr/pk/AriaPKParserListener.py index 0f13c38f..1d44d690 100644 --- a/wwpdb/utils/nmr/pk/AriaPKParserListener.py +++ b/wwpdb/utils/nmr/pk/AriaPKParserListener.py @@ -7,6 +7,12 @@ @author: Masashi Yokochi @see: https://aria-test.pasteur.fr/documentation/input-format/version-2.1/spectrum """ +__docformat__ = "restructuredtext en" +__author__ = "Masashi Yokochi" +__email__ = "yokochi@protein.osaka-u.ac.jp" +__license__ = "Apache License 2.0" +__version__ = "1.0.0" + import sys from antlr4 import ParseTreeListener diff --git a/wwpdb/utils/nmr/pk/AriaPKReader.py b/wwpdb/utils/nmr/pk/AriaPKReader.py index 26c6b8cb..342454ca 100644 --- a/wwpdb/utils/nmr/pk/AriaPKReader.py +++ b/wwpdb/utils/nmr/pk/AriaPKReader.py @@ -5,6 +5,12 @@ ## """ A collection of classes for parsing ARIA PK files. """ +__docformat__ = "restructuredtext en" +__author__ = "Masashi Yokochi" +__email__ = "yokochi@protein.osaka-u.ac.jp" +__license__ = "Apache License 2.0" +__version__ = "1.0.0" + import sys import os diff --git a/wwpdb/utils/nmr/pk/BasePKParserListener.py b/wwpdb/utils/nmr/pk/BasePKParserListener.py index 258fb196..c060889a 100644 --- a/wwpdb/utils/nmr/pk/BasePKParserListener.py +++ b/wwpdb/utils/nmr/pk/BasePKParserListener.py @@ -6,6 +6,12 @@ """ ParserLister base class for any peak list file. @author: Masashi Yokochi """ +__docformat__ = "restructuredtext en" +__author__ = "Masashi Yokochi" +__email__ = "yokochi@protein.osaka-u.ac.jp" +__license__ = "Apache License 2.0" +__version__ = "1.0.0" + import sys import re import copy @@ -3084,7 +3090,7 @@ def comp_id_in_polymer(np): ligands += 1 if ligands == 1: compId = _compId = __compId - elif len(self.nonPoly) == 1 and self.ccU.updateChemCompDict(_compId): + elif len(self.nonPoly) == 1 and self.ccU.updateChemCompDict(_compId, False): if self.ccU.lastChemCompDict['_chem_comp.pdbx_release_status'] == 'OBS': compId = _compId = self.nonPoly[0]['comp_id'][0] ligands = 1 @@ -3737,7 +3743,7 @@ def comp_id_in_polymer(np): ligands += 1 if ligands == 1: compId = _compId = __compId - elif len(self.nonPoly) == 1 and self.ccU.updateChemCompDict(_compId): + elif len(self.nonPoly) == 1 and self.ccU.updateChemCompDict(_compId, False): if self.ccU.lastChemCompDict['_chem_comp.pdbx_release_status'] == 'OBS': compId = _compId = self.nonPoly[0]['comp_id'][0] ligands = 1 @@ -5061,26 +5067,31 @@ def getContentSubtype(self) -> dict: def getPolymerSequence(self) -> Optional[List[dict]]: """ Return polymer sequence of PK file. """ + return None if self.polySeqRst is None or len(self.polySeqRst) == 0 else self.polySeqRst def getSequenceAlignment(self) -> Optional[List[dict]]: """ Return sequence alignment between coordinates and PK. """ + return None if self.seqAlign is None or len(self.seqAlign) == 0 else self.seqAlign def getChainAssignment(self) -> Optional[List[dict]]: """ Return chain assignment between coordinates and PK. """ + return None if self.chainAssign is None or len(self.chainAssign) == 0 else self.chainAssign def getReasonsForReparsing(self) -> Optional[dict]: """ Return reasons for re-parsing PK file. """ + return None if len(self.reasonsForReParsing) == 0 else self.reasonsForReParsing def getSfDict(self) -> Tuple[dict, Optional[dict]]: """ Return a dictionary of pynmrstar saveframes. """ + if len(self.sfDict) == 0: return self.__listIdCounter, None ign_keys = [] diff --git a/wwpdb/utils/nmr/pk/NmrPipePKParserListener.py b/wwpdb/utils/nmr/pk/NmrPipePKParserListener.py index 0659f31b..2d8a6d7e 100644 --- a/wwpdb/utils/nmr/pk/NmrPipePKParserListener.py +++ b/wwpdb/utils/nmr/pk/NmrPipePKParserListener.py @@ -6,6 +6,12 @@ """ ParserLister class for NMRPIPE PK files. @author: Masashi Yokochi """ +__docformat__ = "restructuredtext en" +__author__ = "Masashi Yokochi" +__email__ = "yokochi@protein.osaka-u.ac.jp" +__license__ = "Apache License 2.0" +__version__ = "1.0.0" + import sys import re import copy @@ -239,7 +245,7 @@ def exitPeak_2d(self, ctx: NmrPipePKParser.Peak_2dContext): if not self.hasPolySeq and not self.hasNonPolySeq: return - if x_ppm is None or y_ppm is None or type != 1: + if None in (x_ppm, y_ppm) or type != 1: self.peaks2D -= 1 return @@ -376,7 +382,7 @@ def exitPeak_3d(self, ctx: NmrPipePKParser.Peak_3dContext): if not self.hasPolySeq and not self.hasNonPolySeq: return - if x_ppm is None or y_ppm is None or z_ppm is None or type != 1: + if None in (x_ppm, y_ppm, z_ppm) or type != 1: self.peaks3D -= 1 return @@ -526,7 +532,7 @@ def exitPeak_4d(self, ctx: NmrPipePKParser.Peak_4dContext): if not self.hasPolySeq and not self.hasNonPolySeq: return - if x_ppm is None or y_ppm is None or z_ppm is None or a_ppm is None or type != 1: + if None in (x_ppm, y_ppm, z_ppm, a_ppm) or type != 1: self.peaks4D -= 1 return diff --git a/wwpdb/utils/nmr/pk/NmrPipePKReader.py b/wwpdb/utils/nmr/pk/NmrPipePKReader.py index 1586438a..cf5a9790 100644 --- a/wwpdb/utils/nmr/pk/NmrPipePKReader.py +++ b/wwpdb/utils/nmr/pk/NmrPipePKReader.py @@ -5,6 +5,12 @@ ## """ A collection of classes for parsing NMRPIPE PK files. """ +__docformat__ = "restructuredtext en" +__author__ = "Masashi Yokochi" +__email__ = "yokochi@protein.osaka-u.ac.jp" +__license__ = "Apache License 2.0" +__version__ = "1.0.0" + import sys import os diff --git a/wwpdb/utils/nmr/pk/NmrViewPKParserListener.py b/wwpdb/utils/nmr/pk/NmrViewPKParserListener.py index 6cdf1dd3..58d9afe0 100644 --- a/wwpdb/utils/nmr/pk/NmrViewPKParserListener.py +++ b/wwpdb/utils/nmr/pk/NmrViewPKParserListener.py @@ -6,6 +6,12 @@ """ ParserLister class for NMRVIEW PK files. @author: Masashi Yokochi """ +__docformat__ = "restructuredtext en" +__author__ = "Masashi Yokochi" +__email__ = "yokochi@protein.osaka-u.ac.jp" +__license__ = "Apache License 2.0" +__version__ = "1.0.0" + import sys import re import copy @@ -205,7 +211,7 @@ def exitPeak_2d(self, ctx: NmrViewPKParser.Peak_2dContext): if not self.hasPolySeq and not self.hasNonPolySeq: return - if P1 is None or P2 is None or stat != 0: + if None in (P1, P2) or stat != 0: self.peaks2D -= 1 return @@ -309,7 +315,7 @@ def exitPeak_3d(self, ctx: NmrViewPKParser.Peak_3dContext): if not self.hasPolySeq and not self.hasNonPolySeq: return - if P1 is None or P2 is None or P3 is None or stat != 0: + if None in (P1, P2, P3) or stat != 0: self.peaks3D -= 1 return @@ -428,7 +434,7 @@ def exitPeak_4d(self, ctx: NmrViewPKParser.Peak_4dContext): if not self.hasPolySeq and not self.hasNonPolySeq: return - if P1 is None or P2 is None or P3 is None or P4 is None or stat != 0: + if None in (P1, P2, P3, P4) or stat != 0: self.peaks4D -= 1 return @@ -521,7 +527,7 @@ def exitPeak_wo_eju_2d(self, ctx: NmrViewPKParser.Peak_wo_eju_2dContext): if not self.hasPolySeq and not self.hasNonPolySeq: return - if P1 is None or P2 is None or stat != 0: + if None in (P1, P2) or stat != 0: self.peaks2D -= 1 return @@ -611,7 +617,7 @@ def exitPeak_wo_eju_3d(self, ctx: NmrViewPKParser.Peak_wo_eju_3dContext): if not self.hasPolySeq and not self.hasNonPolySeq: return - if P1 is None or P2 is None or P3 is None or stat != 0: + if None in (P1, P2, P3) or stat != 0: self.peaks3D -= 1 return @@ -713,7 +719,7 @@ def exitPeak_wo_eju_4d(self, ctx: NmrViewPKParser.Peak_wo_eju_4dContext): if not self.hasPolySeq and not self.hasNonPolySeq: return - if P1 is None or P2 is None or P3 is None or P4 is None or stat != 0: + if None in (P1, P2, P3, P4) or stat != 0: self.peaks4D -= 1 return diff --git a/wwpdb/utils/nmr/pk/NmrViewPKReader.py b/wwpdb/utils/nmr/pk/NmrViewPKReader.py index 73e79488..f94ea6f3 100644 --- a/wwpdb/utils/nmr/pk/NmrViewPKReader.py +++ b/wwpdb/utils/nmr/pk/NmrViewPKReader.py @@ -5,6 +5,12 @@ ## """ A collection of classes for parsing NMRVIEW PK files. """ +__docformat__ = "restructuredtext en" +__author__ = "Masashi Yokochi" +__email__ = "yokochi@protein.osaka-u.ac.jp" +__license__ = "Apache License 2.0" +__version__ = "1.0.0" + import sys import os diff --git a/wwpdb/utils/nmr/pk/SparkyPKParserListener.py b/wwpdb/utils/nmr/pk/SparkyPKParserListener.py index 30f5b02c..23461780 100644 --- a/wwpdb/utils/nmr/pk/SparkyPKParserListener.py +++ b/wwpdb/utils/nmr/pk/SparkyPKParserListener.py @@ -6,6 +6,12 @@ """ ParserLister class for SPARKY PK files. @author: Masashi Yokochi """ +__docformat__ = "restructuredtext en" +__author__ = "Masashi Yokochi" +__email__ = "yokochi@protein.osaka-u.ac.jp" +__license__ = "Apache License 2.0" +__version__ = "1.0.0" + import sys from antlr4 import ParseTreeListener @@ -135,7 +141,7 @@ def exitPeak_2d(self, ctx: SparkyPKParser.Peak_2dContext): if not self.hasPolySeq and not self.hasNonPolySeq: return - if x_ppm is None or y_ppm is None: + if None in (x_ppm, y_ppm): self.peaks2D -= 1 return @@ -210,7 +216,7 @@ def exitPeak_3d(self, ctx: SparkyPKParser.Peak_3dContext): if not self.hasPolySeq and not self.hasNonPolySeq: return - if x_ppm is None or y_ppm is None or z_ppm is None: + if None in (x_ppm, y_ppm, z_ppm): self.peaks3D -= 1 return @@ -287,7 +293,7 @@ def exitPeak_4d(self, ctx: SparkyPKParser.Peak_4dContext): if not self.hasPolySeq and not self.hasNonPolySeq: return - if x_ppm is None or y_ppm is None or z_ppm is None or a_ppm is None: + if None in (x_ppm, y_ppm, z_ppm, a_ppm): self.peaks4D -= 1 return @@ -366,7 +372,7 @@ def exitPeak_wo_assign(self, ctx: SparkyPKParser.Peak_wo_assignContext): # pyli if not self.hasPolySeq and not self.hasNonPolySeq: return - if x_ppm is None or y_ppm is None: + if None in (x_ppm, y_ppm): self.peaks2D -= 1 return @@ -416,7 +422,7 @@ def exitPeak_wo_assign(self, ctx: SparkyPKParser.Peak_wo_assignContext): # pyli if not self.hasPolySeq and not self.hasNonPolySeq: return - if x_ppm is None or y_ppm is None or z_ppm is None: + if None in (x_ppm, y_ppm, z_ppm): self.peaks3D -= 1 return @@ -468,7 +474,7 @@ def exitPeak_wo_assign(self, ctx: SparkyPKParser.Peak_wo_assignContext): # pyli if not self.hasPolySeq and not self.hasNonPolySeq: return - if x_ppm is None or y_ppm is None or z_ppm is None or a_ppm is None: + if None in (x_ppm, y_ppm, z_ppm, a_ppm): self.peaks4D -= 1 return diff --git a/wwpdb/utils/nmr/pk/SparkyPKReader.py b/wwpdb/utils/nmr/pk/SparkyPKReader.py index e6050275..5608a88d 100644 --- a/wwpdb/utils/nmr/pk/SparkyPKReader.py +++ b/wwpdb/utils/nmr/pk/SparkyPKReader.py @@ -5,6 +5,12 @@ ## """ A collection of classes for parsing SPARKY PK files. """ +__docformat__ = "restructuredtext en" +__author__ = "Masashi Yokochi" +__email__ = "yokochi@protein.osaka-u.ac.jp" +__license__ = "Apache License 2.0" +__version__ = "1.0.0" + import sys import os diff --git a/wwpdb/utils/nmr/pk/TopSpinPKParserListener.py b/wwpdb/utils/nmr/pk/TopSpinPKParserListener.py index 4801d6bd..b543620d 100644 --- a/wwpdb/utils/nmr/pk/TopSpinPKParserListener.py +++ b/wwpdb/utils/nmr/pk/TopSpinPKParserListener.py @@ -6,6 +6,12 @@ """ ParserLister class for TOPSPIN PK files. @author: Masashi Yokochi """ +__docformat__ = "restructuredtext en" +__author__ = "Masashi Yokochi" +__email__ = "yokochi@protein.osaka-u.ac.jp" +__license__ = "Apache License 2.0" +__version__ = "1.0.0" + import sys from antlr4 import ParseTreeListener diff --git a/wwpdb/utils/nmr/pk/TopSpinPKReader.py b/wwpdb/utils/nmr/pk/TopSpinPKReader.py index 87c23137..e7403bbe 100644 --- a/wwpdb/utils/nmr/pk/TopSpinPKReader.py +++ b/wwpdb/utils/nmr/pk/TopSpinPKReader.py @@ -5,6 +5,12 @@ ## """ A collection of classes for parsing TOPSPIN PK files. """ +__docformat__ = "restructuredtext en" +__author__ = "Masashi Yokochi" +__email__ = "yokochi@protein.osaka-u.ac.jp" +__license__ = "Apache License 2.0" +__version__ = "1.0.0" + import sys import os diff --git a/wwpdb/utils/nmr/pk/VnmrPKParserListener.py b/wwpdb/utils/nmr/pk/VnmrPKParserListener.py index 90483b89..08344f7d 100644 --- a/wwpdb/utils/nmr/pk/VnmrPKParserListener.py +++ b/wwpdb/utils/nmr/pk/VnmrPKParserListener.py @@ -6,6 +6,12 @@ """ ParserLister class for VNMR PK files. @author: Masashi Yokochi """ +__docformat__ = "restructuredtext en" +__author__ = "Masashi Yokochi" +__email__ = "yokochi@protein.osaka-u.ac.jp" +__license__ = "Apache License 2.0" +__version__ = "1.0.0" + import sys import copy import re @@ -207,7 +213,7 @@ def exitPeak_ll2d(self, ctx: VnmrPKParser.Peak_ll2dContext): if not self.hasPolySeq and not self.hasNonPolySeq: return - if x_ppm is None or y_ppm is None: + if None in (x_ppm, y_ppm): self.peaks2D -= 1 return @@ -308,7 +314,7 @@ def exitPeak_ll3d(self, ctx: VnmrPKParser.Peak_ll3dContext): y_lw_hz = self.numberSelection[offset + 1] z_lw_hz = self.numberSelection[offset + 2] - if x_ppm is None or y_ppm is None or z_ppm is None: + if None in (x_ppm, y_ppm, z_ppm): self.peaks3D -= 1 return @@ -415,7 +421,7 @@ def exitPeak_ll4d(self, ctx: VnmrPKParser.Peak_ll4dContext): if not self.hasPolySeq and not self.hasNonPolySeq: return - if x_ppm is None or y_ppm is None or z_ppm is None or a_ppm is None: + if None in (x_ppm, y_ppm, z_ppm, a_ppm): self.peaks4D -= 1 return @@ -536,7 +542,7 @@ def exitPeak_2d(self, ctx: VnmrPKParser.Peak_2dContext): if not self.hasPolySeq and not self.hasNonPolySeq: return - if x_ppm is None or y_ppm is None: + if None in (x_ppm, y_ppm): self.peaks2D -= 1 return @@ -639,7 +645,7 @@ def exitPeak_3d(self, ctx: VnmrPKParser.Peak_3dContext): if not self.hasPolySeq and not self.hasNonPolySeq: return - if x_ppm is None or y_ppm is None or z_ppm is None: + if None in (x_ppm, y_ppm, z_ppm): self.peaks3D -= 1 return @@ -745,7 +751,7 @@ def exitPeak_4d(self, ctx: VnmrPKParser.Peak_4dContext): if not self.hasPolySeq and not self.hasNonPolySeq: return - if x_ppm is None or y_ppm is None or z_ppm is None or a_ppm is None: + if None in (x_ppm, y_ppm, z_ppm, a_ppm): self.peaks4D -= 1 return diff --git a/wwpdb/utils/nmr/pk/VnmrPKReader.py b/wwpdb/utils/nmr/pk/VnmrPKReader.py index 05ea8855..57e9e7a2 100644 --- a/wwpdb/utils/nmr/pk/VnmrPKReader.py +++ b/wwpdb/utils/nmr/pk/VnmrPKReader.py @@ -5,6 +5,12 @@ ## """ A collection of classes for parsing VNMR PK files. """ +__docformat__ = "restructuredtext en" +__author__ = "Masashi Yokochi" +__email__ = "yokochi@protein.osaka-u.ac.jp" +__license__ = "Apache License 2.0" +__version__ = "1.0.0" + import sys import os diff --git a/wwpdb/utils/nmr/pk/XeasyPKParserListener.py b/wwpdb/utils/nmr/pk/XeasyPKParserListener.py index 5db75c9b..159536df 100644 --- a/wwpdb/utils/nmr/pk/XeasyPKParserListener.py +++ b/wwpdb/utils/nmr/pk/XeasyPKParserListener.py @@ -6,6 +6,12 @@ """ ParserLister class for XEASY PK files. @author: Masashi Yokochi """ +__docformat__ = "restructuredtext en" +__author__ = "Masashi Yokochi" +__email__ = "yokochi@protein.osaka-u.ac.jp" +__license__ = "Apache License 2.0" +__version__ = "1.0.0" + import sys import re import copy @@ -203,7 +209,7 @@ def exitPeak_2d(self, ctx: XeasyPKParser.Peak_2dContext): if not self.hasPolySeq and not self.hasNonPolySeq: return - if x_ppm is None or y_ppm is None or type != 0: + if None in (x_ppm, y_ppm) or type != 0: self.peaks2D -= 1 return @@ -298,7 +304,7 @@ def exitPeak_3d(self, ctx: XeasyPKParser.Peak_3dContext): if not self.hasPolySeq and not self.hasNonPolySeq: return - if x_ppm is None or y_ppm is None or z_ppm is None or type != 0: + if None in (x_ppm, y_ppm, z_ppm) or type != 0: self.peaks3D -= 1 return @@ -395,7 +401,7 @@ def exitPeak_4d(self, ctx: XeasyPKParser.Peak_4dContext): if not self.hasPolySeq and not self.hasNonPolySeq: return - if x_ppm is None or y_ppm is None or z_ppm is None or a_ppm is None or type != 0: + if None in (x_ppm, y_ppm, z_ppm, a_ppm) or type != 0: self.peaks4D -= 1 return diff --git a/wwpdb/utils/nmr/pk/XeasyPKReader.py b/wwpdb/utils/nmr/pk/XeasyPKReader.py index 3788bd0f..65f27d0a 100644 --- a/wwpdb/utils/nmr/pk/XeasyPKReader.py +++ b/wwpdb/utils/nmr/pk/XeasyPKReader.py @@ -5,6 +5,12 @@ ## """ A collection of classes for parsing XEASY PK files. """ +__docformat__ = "restructuredtext en" +__author__ = "Masashi Yokochi" +__email__ = "yokochi@protein.osaka-u.ac.jp" +__license__ = "Apache License 2.0" +__version__ = "1.0.0" + import sys import os diff --git a/wwpdb/utils/nmr/pk/XeasyPROTParserListener.py b/wwpdb/utils/nmr/pk/XeasyPROTParserListener.py index 68f4f6bb..0080a4e5 100644 --- a/wwpdb/utils/nmr/pk/XeasyPROTParserListener.py +++ b/wwpdb/utils/nmr/pk/XeasyPROTParserListener.py @@ -6,6 +6,12 @@ """ ParserLister class for XEASY PROT files. @author: Masashi Yokochi """ +__docformat__ = "restructuredtext en" +__author__ = "Masashi Yokochi" +__email__ = "yokochi@protein.osaka-u.ac.jp" +__license__ = "Apache License 2.0" +__version__ = "1.0.0" + import sys import collections import copy @@ -203,7 +209,7 @@ def exitXeasy_prot(self, ctx: XeasyPROTParser.Xeasy_protContext): # pylint: dis NON_METAL_ELEMENTS = ('H', 'C', 'N', 'O', 'P', 'S') def is_segment(prev_asym_id, prev_comp_id, prev_atom_name, asym_id, comp_id, atom_name): - if prev_asym_id is None or prev_comp_id is None: + if None in (prev_asym_id, prev_comp_id): return False if prev_asym_id != asym_id: return True @@ -372,7 +378,7 @@ def is_metal_elem(prev_atom_name, prev_seq_id, seq_id): ligands += 1 if ligands == 1: compId = __compId - elif len(self.__nonPolyModel) == 1 and self.__ccU.updateChemCompDict(authCompId): + elif len(self.__nonPolyModel) == 1 and self.__ccU.updateChemCompDict(authCompId, False): if self.__ccU.lastChemCompDict['_chem_comp.pdbx_release_status'] == 'OBS': compId = self.__nonPolyModel[0]['comp_id'][0] @@ -428,7 +434,7 @@ def is_metal_elem(prev_atom_name, prev_seq_id, seq_id): ligands += 1 if ligands == 1: compId = __compId - elif len(self.__nonPolyModel) == 1 and self.__ccU.updateChemCompDict(authCompId): + elif len(self.__nonPolyModel) == 1 and self.__ccU.updateChemCompDict(authCompId, False): if self.__ccU.lastChemCompDict['_chem_comp.pdbx_release_status'] == 'OBS': compId = self.__nonPolyModel[0]['comp_id'][0] @@ -1017,21 +1023,25 @@ def getContentSubtype(self) -> dict: def getAtomNumberDict(self) -> dict: """ Return XEASY atomic number dictionary. """ + return self.__atomNumberDict def getPolymerSequence(self) -> Optional[List[dict]]: """ Return polymer sequence of XEASY PROT file. """ + return None if self.__polySeqPrmTop is None or len(self.__polySeqPrmTop) == 0 else self.__polySeqPrmTop def getSequenceAlignment(self) -> Optional[List[dict]]: """ Return sequence alignment between coordinates and XEASY PROT. """ + return None if self.__seqAlign is None or len(self.__seqAlign) == 0 else self.__seqAlign def getChainAssignment(self) -> Optional[List[dict]]: """ Return chain assignment between coordinates and XEASY PROT. """ + return None if self.__chainAssign is None or len(self.__chainAssign) == 0 else self.__chainAssign diff --git a/wwpdb/utils/nmr/pk/XeasyPROTReader.py b/wwpdb/utils/nmr/pk/XeasyPROTReader.py index 4f4b39c1..647672eb 100644 --- a/wwpdb/utils/nmr/pk/XeasyPROTReader.py +++ b/wwpdb/utils/nmr/pk/XeasyPROTReader.py @@ -5,6 +5,12 @@ ## """ A collection of classes for parsing XEASY PROT files. """ +__docformat__ = "restructuredtext en" +__author__ = "Masashi Yokochi" +__email__ = "yokochi@protein.osaka-u.ac.jp" +__license__ = "Apache License 2.0" +__version__ = "1.0.0" + import sys import os diff --git a/wwpdb/utils/nmr/pk/XwinNmrPKParserListener.py b/wwpdb/utils/nmr/pk/XwinNmrPKParserListener.py index b78e4592..c9fb2bf7 100644 --- a/wwpdb/utils/nmr/pk/XwinNmrPKParserListener.py +++ b/wwpdb/utils/nmr/pk/XwinNmrPKParserListener.py @@ -6,6 +6,12 @@ """ ParserLister class for XWINNMR PK files. @author: Masashi Yokochi """ +__docformat__ = "restructuredtext en" +__author__ = "Masashi Yokochi" +__email__ = "yokochi@protein.osaka-u.ac.jp" +__license__ = "Apache License 2.0" +__version__ = "1.0.0" + import sys from antlr4 import ParseTreeListener diff --git a/wwpdb/utils/nmr/pk/XwinNmrPKReader.py b/wwpdb/utils/nmr/pk/XwinNmrPKReader.py index 5bd64cae..37deb534 100644 --- a/wwpdb/utils/nmr/pk/XwinNmrPKReader.py +++ b/wwpdb/utils/nmr/pk/XwinNmrPKReader.py @@ -5,6 +5,12 @@ ## """ A collection of classes for parsing XWINNMR PK files. """ +__docformat__ = "restructuredtext en" +__author__ = "Masashi Yokochi" +__email__ = "yokochi@protein.osaka-u.ac.jp" +__license__ = "Apache License 2.0" +__version__ = "1.0.0" + import sys import os diff --git a/wwpdb/utils/nmr/rci/RCI.py b/wwpdb/utils/nmr/rci/RCI.py index eb455e11..96b2b02b 100644 --- a/wwpdb/utils/nmr/rci/RCI.py +++ b/wwpdb/utils/nmr/rci/RCI.py @@ -1,13 +1,23 @@ ## -# File: RCI.py +# File: RCI.py derived from RCI v_1n_10_6_12_A with Python 2 to 3 conversion and simplified API # Date: 12-Jul-2021 # # Updates: # 13-Oct-2021 M. Yokochi - code revision according to PEP8 using Pylint (DAOTHER-7389, issue #5) ## """ Wrapper class for Random Coil Index calculation. - @author: Masashi Yokochi + @author: Gary Strangman, Masashi Yokochi + Reference: + Mark V. Berjanskii, David S. Wishart (2005) A Simple Method To Predict Protein + Flexibility Using Secondary Chemical Shifts. Journal of the American Chemical + DOI: 10.1021/ja054842f """ +__docformat__ = "restructuredtext en" +__author__ = "Gary Strangman, Masashi Yokochi" +__email__ = "strang@nmr.mgh.harvard.edu, yokochi@protein.osaka-u.ac.jp" +__license__ = "General Public License (GPL) v2" +__version__ = "v_1n_10_6_12_A" + import sys import numpy as np @@ -6186,6 +6196,7 @@ def __gap_fill2(self, L_list: list, l_atom: list) -> List[list]: def __smoothing(self, L_smooth: list, L_list: list, l_atom_type: str) -> List[list]: """ Smoothing function. """ + # Building residue number list first_residue = last_residue = first_residue_flag = 0 smooth_factor = L_smooth[2] @@ -7269,6 +7280,7 @@ def __end_effect5(self, l_list: list) -> list: def __final_smoothing(self, L_smooth: int, L_list: list) -> List[list]: """ Smoothing function for final result. """ + # Building residue number list first_residue = last_residue = first_residue_flag = 0 smooth_factor = L_smooth