Report inconsistency between BMRB CS statistics and CCD, Fix MEA atom…

… nomenclature of BMRB CS statistics (DAOTHER-9317)
wwPDB · Apr 19, 2024 · 3f59a79 · 3f59a79
1 parent fc59fd3
commit 3f59a79
Show file tree

Hide file tree

Showing 5 changed files with 184 additions and 3 deletions.
diff --git a/wwpdb/utils/nmr/BMRBChemShiftStat.py b/wwpdb/utils/nmr/BMRBChemShiftStat.py
@@ -16,6 +16,7 @@
 # 11-Nov-2022  M. Yokochi - add getProtonsInSameGroup() (NMR restraint remediation)
 # 20-Apr-2023  M. Yokochi - change backbone definition to be consistent with NMR restraint validation
 # 13-Dec-2023  M. Yokochi - support peptide-like residues containing symmetric aromatic ring (DAOTHER-8945)
+# 19-Apr-2024  M. Yokochi - add testAtomNomenclatureOfLibrary (DAOTHER-9317)
 ##
 """ Wrapper class for retrieving BMRB chemical shift statistics.
     @author: Masashi Yokochi
@@ -1044,8 +1045,44 @@ def __checkAtomNomenclature(self, atom_id):
                         and a[self.__ccU.ccaCTerminalAtomFlag] == 'N'))):
             return True
 
+        comp_id = self.__ccU.lastCompId
+        ref_atom_ids = [a[self.__ccU.ccaAtomId] for a in self.__ccU.lastAtomList]
+        ref_alt_atom_ids = [a[self.__ccU.ccaAltAtomId] for a in self.__ccU.lastAtomList]
+
+        if len(ref_atom_ids) == 0:
+            if self.__verbose:
+                cc_rel_status = self.__ccU.lastChemCompDict['_chem_comp.pdbx_release_status']
+                self.__lfh.write(f"+BMRBChemShiftStat.__checkAtomNomenclature() ++ Error  - {comp_id} is not valid CCD ID, status code: {cc_rel_status}\n")
+            return False
+
+        if atom_id in ref_atom_ids and atom_id in ref_alt_atom_ids:
+            _ref_atom_id = next(a[self.__ccU.ccaAtomId] for a in self.__ccU.lastAtomList
+                                if a[self.__ccU.ccaAltAtomId] == atom_id)
+
+            if atom_id == _ref_atom_id:
+                return True
+
+            if self.__verbose:
+                self.__lfh.write(f"+BMRBChemShiftStat.__checkAtomNomenclature() ++ Warning  - {comp_id}:{atom_id} is valid, "
+                                 f"but _chem_comp.alt_atom_id matched with different atom_id {_ref_atom_id}\n")
+
+            return True
+
+        if atom_id in ref_atom_ids and atom_id not in ref_alt_atom_ids:
+            return True
+
+        if atom_id not in ref_alt_atom_ids and atom_id in ref_alt_atom_ids:
+            _ref_atom_id = next(a[self.__ccU.ccaAtomId] for a in self.__ccU.lastAtomList
+                                if a[self.__ccU.ccaAltAtomId] == atom_id)
+
+            if self.__verbose:
+                self.__lfh.write(f"+BMRBChemShiftStat.__checkAtomNomenclature() ++ Error  - {comp_id}:{atom_id} matched with _chem_comp.alt_atom_id only. "
+                                 f"It should be {_ref_atom_id}\n")
+
+            return False
+
         if self.__verbose:
-            self.__lfh.write(f"+BMRBChemShiftStat.__checkAtomNomenclature() ++ Error  - Invalid atom nomenclature {atom_id}, comp_id {self.__ccU.lastCompId}\n")
+            self.__lfh.write(f"+BMRBChemShiftStat.__checkAtomNomenclature() ++ Error  - {comp_id}:{atom_id} did not match with any atom in CCD\n")
 
         return False
 
@@ -1628,6 +1665,146 @@ def __updateCompIdSet(self):
 
         self.__all_comp_ids |= self.__oth_comp_ids
 
+    def testAtomNomenclatureOfLibrary(self):
+        """ Report inconsistencies between BMRB chemical shift statistics and current CCD.
+        """
+
+        def check_bmrb_cs_stat(atm_list):
+
+            ret = {'warning': 0, 'error': 0}
+
+            comp_ids = set(item['comp_id'] for item in atm_list)
+
+            for comp_id in comp_ids:
+
+                if not self.__ccU.updateChemCompDict(comp_id):
+                    print(f'[Error] {comp_id} does not match with any CCD ID.')
+                    ret['error'] += 1
+                    continue
+
+                _list = [a for a in atm_list if a['comp_id'] == comp_id]
+
+                ref_atom_ids = [a[self.__ccU.ccaAtomId] for a in self.__ccU.lastAtomList]
+
+                if len(ref_atom_ids) == 0:
+                    cc_rel_status = self.__ccU.lastChemCompDict['_chem_comp.pdbx_release_status']
+                    print(f'[Error] {comp_id} is not valid CCD ID, status code: {cc_rel_status}.')
+                    ret['error'] += 1
+                    continue
+
+                ref_alt_atom_ids = [a[self.__ccU.ccaAltAtomId] for a in self.__ccU.lastAtomList]
+
+                peptide_like = self.__ccU.peptideLike()
+
+                leaving_atom_list = [a[self.__ccU.ccaAtomId] for a in self.__ccU.lastAtomList
+                                     if not (a[self.__ccU.ccaLeavingAtomFlag] != 'Y'
+                                             or (peptide_like
+                                                 and a[self.__ccU.ccaNTerminalAtomFlag] == 'N'
+                                                 and a[self.__ccU.ccaCTerminalAtomFlag] == 'N'))]
+
+                for a in _list:
+                    atom_id = a['atom_id']
+
+                    if atom_id in leaving_atom_list:
+                        print(f'[Warning] {comp_id}:{atom_id} is leaving atom.')
+                        ret['warning'] += 1
+
+                    if atom_id in ref_atom_ids and atom_id in ref_alt_atom_ids:
+                        _ref_atom_id = next(a[self.__ccU.ccaAtomId] for a in self.__ccU.lastAtomList
+                                            if a[self.__ccU.ccaAltAtomId] == atom_id)
+                        if atom_id == _ref_atom_id:
+                            continue
+                        print(f'[Warning] {comp_id}:{atom_id} is valid, but _chem_comp.alt_atom_id matched with different atom_id {_ref_atom_id}.')
+                        ret['warning'] += 1
+
+                    elif atom_id in ref_atom_ids and atom_id not in ref_alt_atom_ids:
+                        continue
+
+                    elif atom_id not in ref_alt_atom_ids and atom_id in ref_alt_atom_ids:
+                        _ref_atom_id = next(a[self.__ccU.ccaAtomId] for a in self.__ccU.lastAtomList
+                                            if a[self.__ccU.ccaAltAtomId] == atom_id)
+                        print(f'[Error] {comp_id}:{atom_id} matched with _chem_comp.alt_atom_id only. It should be {_ref_atom_id}.')
+                        ret['error'] += 1
+
+                    else:
+                        print(f'[Error] {comp_id}:{atom_id} did not match with any atom in CCD.')
+                        ret['error'] += 1
+
+            return ret
+
+        status = True
+
+        print('\nBMRB CS statistics name: aa_filt')
+        result = check_bmrb_cs_stat(self.aa_filt)
+        if result['warning'] == 0 and result['error'] == 0:
+            print('OK')
+        elif result['error'] > 0:
+            print(f"{result['error']} Error, {result['warning']} Warning")
+            status = False
+        else:
+            print(f"{result['warning']} Warning")
+
+        print('\nBMRB CS statistics name: dna_filt')
+        result = check_bmrb_cs_stat(self.dna_filt)
+        if result['warning'] == 0 and result['error'] == 0:
+            print('OK')
+        elif result['error'] > 0:
+            print(f"{result['error']} Error, {result['warning']} Warning")
+            status = False
+        else:
+            print(f"{result['warning']} Warning")
+
+        print('\nBMRB CS statistics name: rna_filt')
+        result = check_bmrb_cs_stat(self.rna_filt)
+        if result['warning'] == 0 and result['error'] == 0:
+            print('OK')
+        elif result['error'] > 0:
+            print(f"{result['error']} Error, {result['warning']} Warning")
+        else:
+            print(f"{result['warning']} Warning")
+
+        print('\nBMRB CS statistics name: aa_full')
+        result = check_bmrb_cs_stat(self.aa_full)
+        if result['warning'] == 0 and result['error'] == 0:
+            print('OK')
+        elif result['error'] > 0:
+            print(f"{result['error']} Error, {result['warning']} Warning")
+            status = False
+        else:
+            print(f"{result['warning']} Warning")
+
+        print('\nBMRB CS statistics name: dna_full')
+        result = check_bmrb_cs_stat(self.dna_full)
+        if result['warning'] == 0 and result['error'] == 0:
+            print('OK')
+        elif result['error'] > 0:
+            print(f"{result['error']} Error, {result['warning']} Warning")
+            status = False
+        else:
+            print(f"{result['warning']} Warning")
+
+        print('\nBMRB CS statistics name: rna_full')
+        result = check_bmrb_cs_stat(self.rna_full)
+        if result['warning'] == 0 and result['error'] == 0:
+            print('OK')
+        elif result['error'] > 0:
+            print(f"{result['error']} Error, {result['warning']} Warning")
+            status = False
+        else:
+            print(f"{result['warning']} Warning")
+
+        print('\nBMRB CS statistics name: others')
+        result = check_bmrb_cs_stat(self.others)
+        if result['warning'] == 0 and result['error'] == 0:
+            print('OK')
+        elif result['error'] > 0:
+            print(f"{result['error']} Error, {result['warning']} Warning")
+            status = False
+        else:
+            print(f"{result['warning']} Warning")
+
+        return status
+
     def getAtomLikeNameSet(self, excl_minor_atom=False, primary=False, minimum_len=1):
         """ Return atom like names of all standard residues.
         """

diff --git a/wwpdb/utils/nmr/bmrb_cs_stat/aa_full.pkl b/wwpdb/utils/nmr/bmrb_cs_stat/aa_full.pkl
diff --git a/wwpdb/utils/nmr/bmrb_cs_stat/others.csv b/wwpdb/utils/nmr/bmrb_cs_stat/others.csv
@@ -3169,8 +3169,8 @@ MC,HM'2,4,3.579,3.671,3.622,0.041
 MC,HM'3,4,3.579,3.671,3.622,0.041
 MEA,H,2,2.25,3.08,2.665,0.587
 MEA,HA,2,4.95,5.22,5.085,0.191
-MEA,HB2,2,3.04,3.15,3.095,0.078
-MEA,HB3,2,3.22,3.36,3.290,0.099
+MEA,HB1,2,3.04,3.15,3.095,0.078
+MEA,HB2,2,3.22,3.36,3.290,0.099
 MEA,HD1,2,7.22,7.25,7.235,0.021
 MEA,HE1,2,7.31,7.36,7.335,0.035
 MEG,H,2,8.03,8.33,8.180,0.212

diff --git a/wwpdb/utils/nmr/bmrb_cs_stat/others.pkl b/wwpdb/utils/nmr/bmrb_cs_stat/others.pkl
diff --git a/wwpdb/utils/tests-nmr/test_BMRBChemShiftStat.py b/wwpdb/utils/tests-nmr/test_BMRBChemShiftStat.py
@@ -6,6 +6,7 @@
 # 16-Apr-2020  M. Yokochi - fix ambiguity code of atom name starts with 'Q' (e.g. LYZ:QZ)
 # 20-Nov-2020  M. Yokochi - add unit test for HEM, HEB, HEC (DAOTHER-6366)
 # 13-Oct-2021  M. Yokochi - code refactoring according to PEP8 using Pylint (DAOTHER-7389, issue #5)
+# 19-Apr-2024  M. yokochi - add unit test to verify BMRB CS statistsics are filtered by CCD (DAOTHER-9317)
 ##
 import unittest
 
@@ -163,6 +164,9 @@ def test_peptide_line(self):
         self.assertEqual(self.bmrb_cs_stat.peptideLike('D4P'), True)
         self.assertEqual(self.bmrb_cs_stat.peptideLike('GHP'), True)
 
+    def test_atom_nomenclature(self):
+        self.assertEqual(self.bmrb_cs_stat.testAtomNomenclatureOfLibrary(), True)
+
 
 if __name__ == '__main__':
     unittest.main()