diff --git a/wwpdb/utils/nmr/CifToNmrStar.py b/wwpdb/utils/nmr/CifToNmrStar.py index b85f0d75..1a9075af 100644 --- a/wwpdb/utils/nmr/CifToNmrStar.py +++ b/wwpdb/utils/nmr/CifToNmrStar.py @@ -107,7 +107,7 @@ def get_first_sf_tag(sf: pynmrstar.Saveframe, tag: str, default: str = '') -> An def set_sf_tag(sf: pynmrstar.Saveframe, tag: str, value: Any): - """ Set saveframe tag. + """ Set saveframe tag with a given value. """ tagNames = [t[0] for t in sf.tags] @@ -130,6 +130,23 @@ def set_sf_tag(sf: pynmrstar.Saveframe, tag: str, value: Any): sf.tags[tagNames.index(tag)][1] = value +def set_lp_tag(lp: pynmrstar.Loop, tag: str, value: Any): + """ Set loop tag with a given value. + """ + + if tag not in lp.tags: + lp.add_tag(tag) + + for row in lp: + row.append(value) + + else: + col = lp.tags.index(tag) + + for row in lp: + row[col] = value + + def retrieve_symbolic_labels(strData: pynmrstar.Entry): """ Retrieve symbolic label representations that serve as saveframe pointers in NMR-STAR. """ @@ -665,19 +682,7 @@ def set_entry_id(self, strData: Union[pynmrstar.Entry, pynmrstar.Saveframe, pynm pass if not filled: - if 'Entry_ID' not in lp.tags: - entry_id_tag = lp.category + '.Entry_ID' - - lp.add_tag(entry_id_tag) - - for row in lp: - row.append(entryId) - - else: - col = lp.tags.index('Entry_ID') - - for row in lp: - row[col] = entryId + set_lp_tag(lp, 'Entry_ID', entryId) modified = True @@ -725,19 +730,7 @@ def set_entry_id(self, strData: Union[pynmrstar.Entry, pynmrstar.Saveframe, pynm pass if not filled: - if 'Entry_ID' not in lp.tags: - entry_id_tag = lp.category + '.Entry_ID' - - lp.add_tag(entry_id_tag) - - for row in lp: - row.append(entryId) - - else: - col = lp.tags.index('Entry_ID') - - for row in lp: - row[col] = entryId + set_lp_tag(lp, 'Entry_ID', entryId) modified = True @@ -759,19 +752,7 @@ def set_entry_id(self, strData: Union[pynmrstar.Entry, pynmrstar.Saveframe, pynm pass if not filled: - if 'Entry_ID' not in lp.tags: - entry_id_tag = lp.category + '.Entry_ID' - - lp.add_tag(entry_id_tag) - - for row in lp: - row.append(entryId) - - else: - col = lp.tags.index('Entry_ID') - - for row in lp: - row[col] = entryId + set_lp_tag(lp, 'Entry_ID', entryId) modified = True diff --git a/wwpdb/utils/nmr/NmrDpUtility.py b/wwpdb/utils/nmr/NmrDpUtility.py index 7fa7f92f..1b6756cb 100644 --- a/wwpdb/utils/nmr/NmrDpUtility.py +++ b/wwpdb/utils/nmr/NmrDpUtility.py @@ -20137,7 +20137,7 @@ def __validateAtomNomenclature__(self, file_name: str, file_type: str, content_s and atom_id == "HO5'": continue - err = f"Invalid atom_id {atom_id!r} (comp_id {comp_id!r}) in a loop {lp_category}." + err = f"Invalid atom name {atom_id!r} (comp_id {comp_id!r}) in a loop {lp_category}." self.report.error.appendDescription('invalid_atom_nomenclature', {'file_name': file_name, 'sf_framecode': sf_framecode, 'category': lp_category, @@ -20240,15 +20240,29 @@ def __validateAtomNomenclature__(self, file_name: str, file_type: str, content_s cc_name = f"(Not available due to CCD status code {cc_rel_status})" cc_name = '' if cc_name is None else ', ' + cc_name - err = f"Invalid atom_id {atom_id!r} (comp_id {comp_id!r}{cc_name}) in a loop {lp_category}." + if content_subtype.startswith('spectral_peak'): - self.report.error.appendDescription('invalid_atom_nomenclature', - {'file_name': file_name, 'sf_framecode': sf_framecode, 'category': lp_category, - 'description': err}) - self.report.setError() + err = f"Unmatched atom name {atom_id!r} (comp_id {comp_id!r}{cc_name}) in a loop {lp_category}." - if self.__verbose: - self.__lfh.write(f"+{self.__class_name__}.__validateAtomNomenclature() ++ Error - {err}\n") + self.report.warning.appendDescription('atom_nomenclature_mismatch', + {'file_name': file_name, 'sf_framecode': sf_framecode, 'category': lp_category, + 'description': err}) + self.report.setWarning() + + if self.__verbose: + self.__lfh.write(f"+{self.__class_name__}.__validateAtomNomenclature() ++ Warning - {err}\n") + + else: + + err = f"Invalid atom name {atom_id!r} (comp_id {comp_id!r}{cc_name}) in a loop {lp_category}." + + self.report.error.appendDescription('invalid_atom_nomenclature', + {'file_name': file_name, 'sf_framecode': sf_framecode, 'category': lp_category, + 'description': err}) + self.report.setError() + + if self.__verbose: + self.__lfh.write(f"+{self.__class_name__}.__validateAtomNomenclature() ++ Error - {err}\n") # non-standard residue else: @@ -20858,7 +20872,7 @@ def __validateAtomTypeOfCsLoop__(self, file_name: str, file_type: str, if self.__remediation_mode and 1 in isotope_nums and atom_id[0] in ('Q', 'M'): # DAOTHER-8663, 8751, 9520 continue - err = f"Invalid atom_id {atom_id!r} (atom_type {atom_type!r}) in a loop {lp_category}." + err = f"Invalid atom name {atom_id!r} (atom_type {atom_type!r}) in a loop {lp_category}." self.report.error.appendDescription('invalid_atom_nomenclature', {'file_name': file_name, 'sf_framecode': sf_framecode, 'category': lp_category, @@ -21110,8 +21124,7 @@ def __validateAmbigCodeOfCsLoop__(self, file_name: str, aux_tags = [aux_lp_category + '.' + item for item in aux_items] - for tag in aux_tags: - aux_lp.add_tag(tag) + aux_lp.add_tag(aux_tags) inter_residue_seq_id = {} @@ -25475,8 +25488,7 @@ def delete_aux_loop(): tags = [lp_category + '.' + item for item in items] - for tag in tags: - lp.add_tag(tag) + lp.add_tag(tags) for idx, row in enumerate(loop): @@ -26306,8 +26318,7 @@ def fill_cs_row(lp, index, _row, prefer_auth_atom_name, coord_atom_site, _seq_ke tags = [lp_category + '.' + item for item in items] - for tag in tags: - lp.add_tag(tag) + lp.add_tag(tags) prefer_auth_atom_name = False @@ -27931,8 +27942,7 @@ def test_seq_id_offset_as_is(lp, index, _row, _idx, chain_id, seq_id, comp_id, o aux_tags = [aux_lp_category + '.' + item for item in aux_items] - for tag in aux_tags: - aux_lp.add_tag(tag) + aux_lp.add_tag(aux_tags) inter_residue_seq_id = {} @@ -28576,8 +28586,7 @@ def __testCsPseudoAtomNameConsistencyInMrLoop(self) -> bool: lp = pynmrstar.Loop.from_scratch(lp_category) - for tag in loop.tags: - lp.add_tag(lp_category + '.' + tag) + lp.add_tag(loop.tags) chain_id_col = loop.tags.index(cs_chain_id_name) seq_id_col = loop.tags.index(cs_seq_id_name) @@ -32331,8 +32340,7 @@ def __updateGenDistConstIdInMrStr(self, sf_item: dict) -> bool: lp = pynmrstar.Loop.from_scratch(loop.category) - for tag in loop.tags: - lp.add_tag(loop.category + '.' + tag) + lp.add_tag(loop.tags) id_col = loop.tags.index('ID') if 'Index_ID' not in loop.tags: @@ -32600,8 +32608,7 @@ def __updateTorsionAngleConstIdInMrStr(self, sf_item: dict) -> bool: # pylint: lp = pynmrstar.Loop.from_scratch(loop.category) - for tag in loop.tags: - lp.add_tag(loop.category + '.' + tag) + lp.add_tag(loop.tags) id_col = loop.tags.index('ID') if 'Index_ID' not in loop.tags: @@ -36466,8 +36473,7 @@ def get_auth_seq_scheme(chain_id, seq_id): tags = [lp_category + '.' + item for item in items] - for tag in tags: - lp.add_tag(tag) + lp.add_tag(tags) prefer_auth_atom_name = False @@ -46940,13 +46946,24 @@ def get_coord_atom_site_of(chain_id, seq_id, comp_id): if seq_key in coord_unobs_atom and atom_id_ in coord_unobs_atom[seq_key]['atom_ids']: coord_issue = True - self.report.error.appendDescription('hydrogen_not_instantiated' if checked else 'coordinate_issue' if coord_issue else 'atom_not_found', - {'file_name': file_name, 'sf_framecode': sf_framecode, 'category': lp_category, - 'description': err}) - self.report.setError() + if content_subtype.startswith('spectral_peak'): + self.report.warning.appendDescription('atom_nomenclature_mismatch', + {'file_name': file_name, 'sf_framecode': sf_framecode, 'category': lp_category, + 'description': err}) + self.report.setWarning() - if self.__verbose: - self.__lfh.write(f"+{self.__class_name__}.__testCoordAtomIdConsistency() ++ Error - {err}\n") + if self.__verbose: + self.__lfh.write(f"+{self.__class_name__}.__testCoordAtomIdConsistency() ++ Warning - {err}\n") + + else: + + self.report.error.appendDescription('hydrogen_not_instantiated' if checked else 'coordinate_issue' if coord_issue else 'atom_not_found', + {'file_name': file_name, 'sf_framecode': sf_framecode, 'category': lp_category, + 'description': err}) + self.report.setError() + + if self.__verbose: + self.__lfh.write(f"+{self.__class_name__}.__testCoordAtomIdConsistency() ++ Error - {err}\n") else: @@ -47297,9 +47314,7 @@ def __appendIndexTag(self) -> bool: lp = pynmrstar.Loop.from_scratch(lp_category) lp.add_tag(lp_tag) - - for tag in loop.tags: - lp.add_tag(lp_category + '.' + tag) + lp.add_tag(loop.tags) for idx, row in enumerate(loop, start=1): lp.add_data([str(idx)] + row) @@ -47913,8 +47928,7 @@ def __updatePolymerSequence(self) -> bool: tags = [lp_category + '.' + _item['name'] for _item in ea_key_items] tags.extend([lp_category + '.' + _item['name'] for _item in ea_data_items]) - for tag in tags: - ea_loop.add_tag(tag) + ea_loop.add_tag(tags) for item in entity_assembly: entity_id = item['entity_id'] @@ -48232,8 +48246,7 @@ def __updatePolymerSequence(self) -> bool: tags = [lp_category + '.' + _item['name'] for _item in b_key_items] - for tag in tags: - b_loop.add_tag(tag) + b_loop.add_tag(tags) bonds = self.__cR.getDictList('struct_conn') @@ -48547,8 +48560,7 @@ def __updatePolymerSequence(self) -> bool: tags = [lp_category + '.' + _item['name'] for _item in b_key_items] tags.extend([lp_category + '.' + _item['name'] for _item in b_data_items]) - for tag in tags: - b_loop.add_tag(tag) + b_loop.add_tag(tags) bonds = self.__cR.getDictList('struct_conn') @@ -48681,8 +48693,7 @@ def __updatePolymerSequence(self) -> bool: tags = [lp_category + '.' + _item['name'] for _item in eda_key_items] tags.extend([lp_category + '.' + _item['name'] for _item in eda_data_items]) - for tag in tags: - eda_loop.add_tag(tag) + eda_loop.add_tag(tags) index = 1 @@ -49329,8 +49340,7 @@ def __updatePolymerSequence(self) -> bool: tags = [lp_category + '.' + _item['name'] for _item in ecn_key_items] tags.extend([lp_category + '.' + _item['name'] for _item in ecn_data_items]) - for tag in tags: - ecn_loop.add_tag(tag) + ecn_loop.add_tag(tags) ent_name_coms = self.__cR.getDictList('entity_name_com') for ent_name_com in ent_name_coms: @@ -49363,8 +49373,7 @@ def __updatePolymerSequence(self) -> bool: tags = [lp_category + '.' + _item['name'] for _item in esn_key_items] tags.extend([lp_category + '.' + _item['name'] for _item in esn_data_items]) - for tag in tags: - esn_loop.add_tag(tag) + esn_loop.add_tag(tags) ent_name_syss = self.__cR.getDictList('entity_name_sys') for ent_name_sys in ent_name_syss: @@ -49394,8 +49403,7 @@ def __updatePolymerSequence(self) -> bool: tags = [lp_category + '.' + _item['name'] for _item in ek_key_items] tags.extend([lp_category + '.' + _item['name'] for _item in ek_data_items]) - for tag in tags: - ek_loop.add_tag(tag) + ek_loop.add_tag(tags) ent_keys = self.__cR.getDictList('entity_keywords') for ent_key in ent_keys: @@ -49427,8 +49435,7 @@ def __updatePolymerSequence(self) -> bool: tags = [lp_category + '.' + _item['name'] for _item in eci_key_items] tags.extend([lp_category + '.' + _item['name'] for _item in eci_data_items]) - for tag in tags: - eci_loop.add_tag(tag) + eci_loop.add_tag(tags) index = 1 @@ -49539,8 +49546,7 @@ def __updatePolymerSequence(self) -> bool: tags = [lp_category + '.' + _item['name'] for _item in eps_key_items] tags.extend([lp_category + '.' + _item['name'] for _item in eps_data_items]) - for tag in tags: - eps_loop.add_tag(tag) + eps_loop.add_tag(tags) seq_keys = set() @@ -55571,8 +55577,7 @@ def __sortCsLoop(self) -> bool: lp = pynmrstar.Loop.from_scratch(lp_category) - for tag in loop.tags: - lp.add_tag(lp_category + '.' + tag) + lp.add_tag(loop.tags) dat = [int(idx) for idx in loop.get_tag([idx_name])] @@ -56879,8 +56884,7 @@ def __mergeLegacyCsAndMr(self) -> bool: tags = [lp_category + '.' + _item['name'] for _item in cf_key_items] tags.extend([lp_category + '.' + _item['name'] for _item in cf_data_items]) - for tag in tags: - cf_loop.add_tag(tag) + cf_loop.add_tag(tags) # inspect _Software saveframes to extend Software_ID in _Constraint_file loop @@ -57243,8 +57247,7 @@ def __mergeLegacyCsAndMr(self) -> bool: tags = [lp_category + '.' + item for item in items] - for tag in tags: - lp.add_tag(tag) + lp.add_tag(tags) for content_subtype in self.nmr_rep_content_subtypes: sf_category = self.sf_categories[file_type][content_subtype] @@ -57275,8 +57278,7 @@ def __mergeLegacyCsAndMr(self) -> bool: tags = [lp_category + '.' + item for item in items] - for tag in tags: - lp.add_tag(tag) + lp.add_tag(tags) datum_counter = self.__getDatumCounter(master_entry) @@ -58732,8 +58734,7 @@ def __updateConstraintStats(self) -> bool: tags = [lp_category + '.' + _item['name'] for _item in cf_key_items] tags.extend([lp_category + '.' + _item['name'] for _item in cf_data_items]) - for tag in tags: - cf_loop.add_tag(tag) + cf_loop.add_tag(tags) if has_key_value(input_source_dic, 'content_subtype'): @@ -58803,8 +58804,7 @@ def __updateConstraintStats(self) -> bool: tags = [lp_category + '.' + item for item in items] - for tag in tags: - lp.add_tag(tag) + lp.add_tag(tags) for content_subtype in self.nmr_rep_content_subtypes: sf_category = self.sf_categories[file_type][content_subtype] @@ -58836,8 +58836,7 @@ def __updateConstraintStats(self) -> bool: tags = [lp_category + '.' + item for item in items] - for tag in tags: - lp.add_tag(tag) + lp.add_tag(tags) datum_counter = self.__getDatumCounter(master_entry) diff --git a/wwpdb/utils/nmr/ann/BMRBAnnTasks.py b/wwpdb/utils/nmr/ann/BMRBAnnTasks.py index 66e70032..1732fac2 100644 --- a/wwpdb/utils/nmr/ann/BMRBAnnTasks.py +++ b/wwpdb/utils/nmr/ann/BMRBAnnTasks.py @@ -347,8 +347,7 @@ def not_title(title: str): tags = [lp_category + '.' + item for item in items] - for tag in tags: - lp.add_tag(tag) + lp.add_tag(tags) for exp in exp_list: row = exp @@ -674,8 +673,7 @@ def not_title(title: str): tags = ['Hetero', 'Mon_ID', 'Num', 'Comp_index_ID', 'Entry_ID', 'Entity_ID'] - for tag in tags: - _lp.add_tag(lp_category + '.' + tag) + _lp.add_tag(tags) dat = lp.get_tag(['ID', 'Comp_ID']) @@ -1125,8 +1123,7 @@ def not_title(title: str): tags = ['Type', 'Val', 'Val_err', 'Val_units'] - for tag in tags: - _lp.add_tag(lp_category + '.' + tag) + _lp.add_tag(tags) data = lp.get_tag(tags) @@ -1677,8 +1674,7 @@ def is_natural_abundance(isotopic_labeling: str): _lp = pynmrstar.Loop.from_scratch(lp_category) - for tag in lp.tags: - _lp.add_tag(lp_category + '.' + tag) + _lp.add_tag(lp.tags) cur_id = 1 @@ -1786,8 +1782,7 @@ def is_natural_abundance(isotopic_labeling: str): 'Concentration_val_units', 'Concentration_val_err'] - for tag in tags: - _lp.add_tag(lp_category + '.' + tag) + _lp.add_tag(tags) data = lp.get_tag(tags) @@ -1836,8 +1831,7 @@ def is_natural_abundance(isotopic_labeling: str): 'Concentration_val_units', 'Concentration_val_err'] - for tag in tags: - _lp.add_tag(lp_category + '.' + tag) + _lp.add_tag(tags) data = lp.get_tag(tags) @@ -2098,8 +2092,7 @@ def is_natural_abundance(isotopic_labeling: str): tags = [lp_category + '.' + item for item in items] - for tag in tags: - lp.add_tag(tag) + lp.add_tag(tags) for n in isotope_numbers: if n in ALLOWED_ISOTOPE_NUMBERS: @@ -2180,8 +2173,7 @@ def is_natural_abundance(isotopic_labeling: str): tags = [lp_category + '.' + item for item in items] - for tag in tags: - lp.add_tag(tag) + lp.add_tag(tags) for n in isotope_numbers: if n in ALLOWED_ISOTOPE_NUMBERS: @@ -2605,8 +2597,7 @@ def is_natural_abundance(isotopic_labeling: str): 'Entity_assembly_name_1', 'Comp_ID_1', 'Seq_ID_1', 'Atom_ID_1', 'Entity_assembly_name_2', 'Comp_ID_2', 'Seq_ID_2', 'Atom_ID_2'] - for tag in tags: - _lp.add_tag(lp_category + '.' + tag) + _lp.add_tag(tags) dat = lp.get_tag(tags) @@ -2620,8 +2611,7 @@ def is_natural_abundance(isotopic_labeling: str): 'Comp_ID_1', 'Seq_ID_1', 'Atom_ID_1', 'Comp_ID_2', 'Seq_ID_2', 'Atom_ID_2'] - for tag in tags: - _lp.add_tag(lp_category + '.' + tag) + _lp.add_tag(tags) _lp.add_data(['?'] * len(tags)) diff --git a/wwpdb/utils/nmr/io/ChemCompReader.py b/wwpdb/utils/nmr/io/ChemCompReader.py index d81df08e..b1bc1f21 100644 --- a/wwpdb/utils/nmr/io/ChemCompReader.py +++ b/wwpdb/utils/nmr/io/ChemCompReader.py @@ -62,7 +62,7 @@ def __init__(self, verbose: bool = True, log: IO = sys.stdout): self.__compId = None self.__filePath = None - self.__cachedCompId = None + self.__lastCompId = None self.__cDict = { 'chem_comp': [ @@ -231,13 +231,13 @@ def __getComp(self) -> bool: @return: True for success or False otherwise """ - if self.__compId == self.__cachedCompId: + if self.__compId == self.__lastCompId: return True try: if self.__setDataBlock(self.__getDataBlock()): - self.__cachedCompId = self.__compId + self.__lastCompId = self.__compId return True return False diff --git a/wwpdb/utils/nmr/mr/ParserListenerUtil.py b/wwpdb/utils/nmr/mr/ParserListenerUtil.py index bf9cffcf..44766955 100644 --- a/wwpdb/utils/nmr/mr/ParserListenerUtil.py +++ b/wwpdb/utils/nmr/mr/ParserListenerUtil.py @@ -7321,8 +7321,7 @@ def getLoop(mrSubtype: str, reduced: bool = True, hasInsCode: bool = False) -> O else: tags.extend([prefix + item['name'] for item in NMR_STAR_LP_DATA_ITEMS[contentSubtype]]) - for tag in tags: - lp.add_tag(tag) + lp.add_tag(tags) return lp @@ -7347,8 +7346,7 @@ def getPkLoop(pkSubtype: str) -> Optional[pynmrstar.Loop]: tags = [prefix + item['name'] for item in NMR_STAR_LP_KEY_ITEMS[contentSubtype]] tags.extend([prefix + item['name'] for item in NMR_STAR_LP_DATA_ITEMS[pkSubtype]]) - for tag in tags: - lp.add_tag(tag) + lp.add_tag(tags) return lp @@ -7377,8 +7375,7 @@ def getAltLoops(mrSubtype: str) -> Optional[List[pynmrstar.Loop]]: tags = [prefix + item['name'] for item in NMR_STAR_ALT_LP_KEY_ITEMS[contentSubtype][catName]] tags.extend([prefix + item['name'] for item in NMR_STAR_ALT_LP_DATA_ITEMS[contentSubtype][catName]]) - for tag in tags: - alt_lp.add_tag(tag) + alt_lp.add_tag(tags) alt_lps.append(alt_lp) @@ -7409,8 +7406,7 @@ def getAuxLoops(mrSubtype: str) -> Optional[List[pynmrstar.Loop]]: tags = [prefix + item['name'] for item in NMR_STAR_AUX_LP_KEY_ITEMS[contentSubtype][catName]] tags.extend([prefix + item['name'] for item in NMR_STAR_AUX_LP_DATA_ITEMS[contentSubtype][catName]]) - for tag in tags: - aux_lp.add_tag(tag) + aux_lp.add_tag(tags) aux_lps.append(aux_lp) diff --git a/wwpdb/utils/nmr/nef/NEFTranslator.py b/wwpdb/utils/nmr/nef/NEFTranslator.py index 9e7b915b..e8137454 100644 --- a/wwpdb/utils/nmr/nef/NEFTranslator.py +++ b/wwpdb/utils/nmr/nef/NEFTranslator.py @@ -10225,8 +10225,7 @@ def __star2nef_peak_can(self, in_sf: pynmrstar.Saveframe, out_sf: pynmrstar.Save out_tags.append(f"_nef_peak.residue_name_{d}") out_tags.append(f"_nef_peak.atom_name_{d}") - for tag in out_tags: - out_lp.add_tag(tag) + out_lp.add_tag(out_tags) pk_tags = pk_loop.get_tag_names() @@ -10495,8 +10494,7 @@ def get_red_sf_framecode(sf): if len(tags) == 0: continue - for tag in tags: - lp.add_tag(tag) + lp.add_tag(tags) if loop.category == '_nef_sequence': if self.authSeqMap is None: @@ -10747,8 +10745,7 @@ def get_red_sf_framecode(sf): if len(tags) == 0: continue - for tag in tags: - lp.add_tag(tag) + lp.add_tag(tags) if loop.category == '_nef_sequence': if self.authSeqMap is None: @@ -11001,8 +10998,7 @@ def get_nef_sf_framecode(sf, prefix): if len(tags) == 0: continue - for tag in tags: - lp.add_tag(tag) + lp.add_tag(tags) if loop.category == '_Chem_comp_assembly': if self.authSeqMap is None: @@ -11202,8 +11198,7 @@ def get_nef_sf_framecode(sf, prefix): if len(tags) == 0: continue - for tag in tags: - lp.add_tag(tag) + lp.add_tag(tags) if loop.category == '_Chem_comp_assembly': if self.authSeqMap is None: