DAOTHER-9317: Add support for atom name conversion for general methyl…

…, methylen, amino groups in BMRB chemical shift statistics
wwPDB · Apr 19, 2024 · d17d7ea · d17d7ea
1 parent c3bab52
commit d17d7ea
Show file tree

Hide file tree

Showing 6 changed files with 124 additions and 2 deletions.
diff --git a/wwpdb/utils/nmr/BMRBChemShiftStat.py b/wwpdb/utils/nmr/BMRBChemShiftStat.py
@@ -793,6 +793,11 @@ def loadStatFromCsvFile(self, file_name, primary_th, secondary_th=None, comp_id_
                 if not self.__ccU.updateChemCompDict(comp_id):
                     continue
 
+                rep_methyl_protons = self.__ccU.getRepMethylProtons(comp_id)
+                non_rep_methyl_protons = self.__ccU.getNonRepMethylProtons(comp_id)
+                rep_methylene_protons = self.__ccU.getRepMethyleneOrAminoProtons(comp_id)
+                non_rep_methylene_protons = self.__ccU.getNonRepMethyleneOrAminoProtons(comp_id)
+
                 _atom_id = row['atom_id']
 
                 # methyl proton group
@@ -921,6 +926,42 @@ def loadStatFromCsvFile(self, file_name, primary_th, secondary_th=None, comp_id_
                         if not any(a['comp_id'] == _row['comp_id'] and a['atom_id'] == _row['atom_id'] for a in atm_list):
                             atm_list.append(_row)
 
+                # DAOTHER-9317: representative methyl group
+                elif any(rep_methyl_proton.startswith(_atom_id) for rep_methyl_proton in rep_methyl_protons
+                         if rep_methyl_proton != _atom_id and 0 <= len(rep_methyl_proton) - len(_atom_id) <= 1 and _atom_id not in non_rep_methyl_protons):
+
+                    rep_methyl_proton = next(rep_methyl_proton for rep_methyl_proton in rep_methyl_protons
+                                             if rep_methyl_proton.startswith(_atom_id))
+
+                    for _atom_id in self.__ccU.getProtonsInSameGroup(comp_id, rep_methyl_proton):
+                        _row = {}
+                        _row['comp_id'] = comp_id
+                        _row['atom_id'] = _atom_id
+
+                        __status, __comp_id, __atom_id = self.checkAtomNomenclature(_row['atom_id'])
+                        if not __status:
+                            continue
+
+                        if _row['comp_id'] != __comp_id:
+                            _row['comp_id'] = __comp_id
+                        if _row['atom_id'] != __atom_id:
+                            _row['atom_id'] = __atom_id
+
+                        _row['count'] = int(row['count'])
+                        _row['avg'] = float(row['avg'])
+                        try:
+                            _row['std'] = float(row['std'])
+                        except ValueError:
+                            _row['std'] = None
+                        _row['min'] = float(row['min'])
+                        _row['max'] = float(row['max'])
+                        _row['desc'] = 'methyl'
+                        _row['primary'] = False
+                        _row['norm_freq'] = None
+
+                        if not any(a['comp_id'] == _row['comp_id'] and a['atom_id'] == _row['atom_id'] for a in atm_list):
+                            atm_list.append(_row)
+
                 # geminal proton group
                 elif _atom_id.startswith('Q'):
                     _atom_id = re.sub(r'^Q', 'H', _atom_id)
@@ -947,7 +988,7 @@ def loadStatFromCsvFile(self, file_name, primary_th, secondary_th=None, comp_id_
                             _row['std'] = None
                         _row['min'] = float(row['min'])
                         _row['max'] = float(row['max'])
-                        _row['desc'] = 'methyl'
+                        _row['desc'] = 'isolated'
                         _row['primary'] = False
                         _row['norm_freq'] = None
 
@@ -987,6 +1028,42 @@ def loadStatFromCsvFile(self, file_name, primary_th, secondary_th=None, comp_id_
                     if not any(a['comp_id'] == _row['comp_id'] and a['atom_id'] == _row['atom_id'] for a in atm_list):
                         atm_list.append(_row)
 
+                # DAOTHER-9317: general methylene/amino group
+                elif any(rep_methylene_proton.startswith(_atom_id) for rep_methylene_proton in rep_methylene_protons
+                         if rep_methylene_proton != _atom_id and 0 <= len(rep_methylene_proton) - len(_atom_id) <= 1 and _atom_id not in non_rep_methylene_protons):
+
+                    rep_methylene_proton = next(rep_methylene_proton for rep_methylene_proton in rep_methylene_protons
+                                                if rep_methylene_proton.startswith(_atom_id))
+
+                    for _atom_id in self.__ccU.getProtonsInSameGroup(comp_id, rep_methylene_proton):
+                        _row = {}
+                        _row['comp_id'] = comp_id
+                        _row['atom_id'] = _atom_id
+
+                        __status, __comp_id, __atom_id = self.checkAtomNomenclature(_row['atom_id'])
+                        if not __status:
+                            continue
+
+                        if _row['comp_id'] != __comp_id:
+                            _row['comp_id'] = __comp_id
+                        if _row['atom_id'] != __atom_id:
+                            _row['atom_id'] = __atom_id
+
+                        _row['count'] = int(row['count'])
+                        _row['avg'] = float(row['avg'])
+                        try:
+                            _row['std'] = float(row['std'])
+                        except ValueError:
+                            _row['std'] = None
+                        _row['min'] = float(row['min'])
+                        _row['max'] = float(row['max'])
+                        _row['desc'] = 'isolated'
+                        _row['primary'] = False
+                        _row['norm_freq'] = None
+
+                        if not any(a['comp_id'] == _row['comp_id'] and a['atom_id'] == _row['atom_id'] for a in atm_list):
+                            atm_list.append(_row)
+
         comp_ids = set(item['comp_id'] for item in atm_list)
 
         if secondary_th is not None:  # extract rest of atoms for non-standard residues

diff --git a/wwpdb/utils/nmr/ChemCompUtil.py b/wwpdb/utils/nmr/ChemCompUtil.py
@@ -8,6 +8,7 @@
 # 13-Jun-2023  M. Yokochi - add getEffectiveFormulaWeight()
 # 07-Dec-2023  M. Yokochi - add support for PTM items (backbone, n_terminal, c_terminal atom flags)
 # 13-Dec-2023  M. Yokochi - add getAtomsBasedOnGreekLetterSystem(), peptideLike() and getTypeOfCompId() (DAOTHER-8945)
+# 19-Apr-2024  M. Yokochi - add getRepMethyleneOrAminoProtons() and getNonRepMethyleneOrAminoProtons() (DAOTHER-9317)
 ##
 """ Wrapper class for retrieving chemical component dictionary.
     @author: Masashi Yokochi
@@ -246,6 +247,50 @@ def getNonRepMethylProtons(self, compId):
 
         return atmList
 
+    def getRepMethyleneOrAminoProtons(self, compId):
+        """ Return representative protons in methylene/amino group of a given comp_id.
+        """
+
+        if compId != self.lastCompId and not self.updateChemCompDict(compId):
+            return []
+
+        atmList = []
+
+        corns = (a[self.ccaAtomId] for a in self.lastAtomList if a[self.ccaTypeSymbol] in ('C', 'N'))
+
+        for corn in corns:
+            protons = [(b[self.ccbAtomId1] if b[self.ccbAtomId1] != corn else b[self.ccbAtomId2])
+                       for b in self.lastBonds
+                       if (b[self.ccbAtomId1] == corn and b[self.ccbAtomId2][0] in protonBeginCode)
+                       or (b[self.ccbAtomId2] == corn and b[self.ccbAtomId1][0] in protonBeginCode)]
+            if len(protons) != 2:
+                continue
+            atmList.append(protons[0])
+
+        return atmList
+
+    def getNonRepMethyleneOrAminoProtons(self, compId):
+        """ Return non-representative protons in methylene/amino group of a given comp_id.
+        """
+
+        if compId != self.lastCompId and not self.updateChemCompDict(compId):
+            return []
+
+        atmList = []
+
+        corns = (a[self.ccaAtomId] for a in self.lastAtomList if a[self.ccaTypeSymbol] in ('C', 'N'))
+
+        for corn in corns:
+            protons = [(b[self.ccbAtomId1] if b[self.ccbAtomId1] != corn else b[self.ccbAtomId2])
+                       for b in self.lastBonds
+                       if (b[self.ccbAtomId1] == corn and b[self.ccbAtomId2][0] in protonBeginCode)
+                       or (b[self.ccbAtomId2] == corn and b[self.ccbAtomId1][0] in protonBeginCode)]
+            if len(protons) != 2:
+                continue
+            atmList.extend(protons[1:])
+
+        return atmList
+
     def getBondedAtoms(self, compId, atomId, exclProton=False, onlyProton=False):
         """ Return bonded atoms to a given atom.
         """

diff --git a/wwpdb/utils/nmr/NEFTranslator/NEFTranslator.py b/wwpdb/utils/nmr/NEFTranslator/NEFTranslator.py
@@ -5058,7 +5058,7 @@ def guess_ambiguity_code(atom_list):
                             if atom_list[0] in v:
                                 len_v = len(v)
                                 if len_v == 2:
-                                    return 2  # methylen/amino
+                                    return 2  # methylene/amino
                                 if len_v == 1:
                                     if k[0] == 'C' and self.chemCompTopo is not None and comp_id in self.chemCompTopo\
                                        and any(len(tv) == 2 and tv[0][0] == 'C' and tv[1][0] == 'C'

diff --git a/wwpdb/utils/nmr/bmrb_cs_stat/aa_filt.pkl b/wwpdb/utils/nmr/bmrb_cs_stat/aa_filt.pkl
diff --git a/wwpdb/utils/nmr/bmrb_cs_stat/aa_full.pkl b/wwpdb/utils/nmr/bmrb_cs_stat/aa_full.pkl
diff --git a/wwpdb/utils/nmr/bmrb_cs_stat/others.pkl b/wwpdb/utils/nmr/bmrb_cs_stat/others.pkl