Skip to content

Commit

Permalink
DAOTHER-9317: Add support for atom name conversion for general methyl…
Browse files Browse the repository at this point in the history
…, methylen, amino groups in BMRB chemical shift statistics
  • Loading branch information
yokochi47 committed Apr 19, 2024
1 parent c3bab52 commit d17d7ea
Show file tree
Hide file tree
Showing 6 changed files with 124 additions and 2 deletions.
79 changes: 78 additions & 1 deletion wwpdb/utils/nmr/BMRBChemShiftStat.py
Original file line number Diff line number Diff line change
Expand Up @@ -793,6 +793,11 @@ def loadStatFromCsvFile(self, file_name, primary_th, secondary_th=None, comp_id_
if not self.__ccU.updateChemCompDict(comp_id):
continue

rep_methyl_protons = self.__ccU.getRepMethylProtons(comp_id)
non_rep_methyl_protons = self.__ccU.getNonRepMethylProtons(comp_id)
rep_methylene_protons = self.__ccU.getRepMethyleneOrAminoProtons(comp_id)
non_rep_methylene_protons = self.__ccU.getNonRepMethyleneOrAminoProtons(comp_id)

_atom_id = row['atom_id']

# methyl proton group
Expand Down Expand Up @@ -921,6 +926,42 @@ def loadStatFromCsvFile(self, file_name, primary_th, secondary_th=None, comp_id_
if not any(a['comp_id'] == _row['comp_id'] and a['atom_id'] == _row['atom_id'] for a in atm_list):
atm_list.append(_row)

# DAOTHER-9317: representative methyl group
elif any(rep_methyl_proton.startswith(_atom_id) for rep_methyl_proton in rep_methyl_protons
if rep_methyl_proton != _atom_id and 0 <= len(rep_methyl_proton) - len(_atom_id) <= 1 and _atom_id not in non_rep_methyl_protons):

rep_methyl_proton = next(rep_methyl_proton for rep_methyl_proton in rep_methyl_protons
if rep_methyl_proton.startswith(_atom_id))

for _atom_id in self.__ccU.getProtonsInSameGroup(comp_id, rep_methyl_proton):
_row = {}
_row['comp_id'] = comp_id
_row['atom_id'] = _atom_id

__status, __comp_id, __atom_id = self.checkAtomNomenclature(_row['atom_id'])
if not __status:
continue

if _row['comp_id'] != __comp_id:
_row['comp_id'] = __comp_id
if _row['atom_id'] != __atom_id:
_row['atom_id'] = __atom_id

_row['count'] = int(row['count'])
_row['avg'] = float(row['avg'])
try:
_row['std'] = float(row['std'])
except ValueError:
_row['std'] = None
_row['min'] = float(row['min'])
_row['max'] = float(row['max'])
_row['desc'] = 'methyl'
_row['primary'] = False
_row['norm_freq'] = None

if not any(a['comp_id'] == _row['comp_id'] and a['atom_id'] == _row['atom_id'] for a in atm_list):
atm_list.append(_row)

# geminal proton group
elif _atom_id.startswith('Q'):
_atom_id = re.sub(r'^Q', 'H', _atom_id)
Expand All @@ -947,7 +988,7 @@ def loadStatFromCsvFile(self, file_name, primary_th, secondary_th=None, comp_id_
_row['std'] = None
_row['min'] = float(row['min'])
_row['max'] = float(row['max'])
_row['desc'] = 'methyl'
_row['desc'] = 'isolated'
_row['primary'] = False
_row['norm_freq'] = None

Expand Down Expand Up @@ -987,6 +1028,42 @@ def loadStatFromCsvFile(self, file_name, primary_th, secondary_th=None, comp_id_
if not any(a['comp_id'] == _row['comp_id'] and a['atom_id'] == _row['atom_id'] for a in atm_list):
atm_list.append(_row)

# DAOTHER-9317: general methylene/amino group
elif any(rep_methylene_proton.startswith(_atom_id) for rep_methylene_proton in rep_methylene_protons
if rep_methylene_proton != _atom_id and 0 <= len(rep_methylene_proton) - len(_atom_id) <= 1 and _atom_id not in non_rep_methylene_protons):

rep_methylene_proton = next(rep_methylene_proton for rep_methylene_proton in rep_methylene_protons
if rep_methylene_proton.startswith(_atom_id))

for _atom_id in self.__ccU.getProtonsInSameGroup(comp_id, rep_methylene_proton):
_row = {}
_row['comp_id'] = comp_id
_row['atom_id'] = _atom_id

__status, __comp_id, __atom_id = self.checkAtomNomenclature(_row['atom_id'])
if not __status:
continue

if _row['comp_id'] != __comp_id:
_row['comp_id'] = __comp_id
if _row['atom_id'] != __atom_id:
_row['atom_id'] = __atom_id

_row['count'] = int(row['count'])
_row['avg'] = float(row['avg'])
try:
_row['std'] = float(row['std'])
except ValueError:
_row['std'] = None
_row['min'] = float(row['min'])
_row['max'] = float(row['max'])
_row['desc'] = 'isolated'
_row['primary'] = False
_row['norm_freq'] = None

if not any(a['comp_id'] == _row['comp_id'] and a['atom_id'] == _row['atom_id'] for a in atm_list):
atm_list.append(_row)

comp_ids = set(item['comp_id'] for item in atm_list)

if secondary_th is not None: # extract rest of atoms for non-standard residues
Expand Down
45 changes: 45 additions & 0 deletions wwpdb/utils/nmr/ChemCompUtil.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
# 13-Jun-2023 M. Yokochi - add getEffectiveFormulaWeight()
# 07-Dec-2023 M. Yokochi - add support for PTM items (backbone, n_terminal, c_terminal atom flags)
# 13-Dec-2023 M. Yokochi - add getAtomsBasedOnGreekLetterSystem(), peptideLike() and getTypeOfCompId() (DAOTHER-8945)
# 19-Apr-2024 M. Yokochi - add getRepMethyleneOrAminoProtons() and getNonRepMethyleneOrAminoProtons() (DAOTHER-9317)
##
""" Wrapper class for retrieving chemical component dictionary.
@author: Masashi Yokochi
Expand Down Expand Up @@ -246,6 +247,50 @@ def getNonRepMethylProtons(self, compId):

return atmList

def getRepMethyleneOrAminoProtons(self, compId):
""" Return representative protons in methylene/amino group of a given comp_id.
"""

if compId != self.lastCompId and not self.updateChemCompDict(compId):
return []

atmList = []

corns = (a[self.ccaAtomId] for a in self.lastAtomList if a[self.ccaTypeSymbol] in ('C', 'N'))

for corn in corns:
protons = [(b[self.ccbAtomId1] if b[self.ccbAtomId1] != corn else b[self.ccbAtomId2])
for b in self.lastBonds
if (b[self.ccbAtomId1] == corn and b[self.ccbAtomId2][0] in protonBeginCode)
or (b[self.ccbAtomId2] == corn and b[self.ccbAtomId1][0] in protonBeginCode)]
if len(protons) != 2:
continue
atmList.append(protons[0])

return atmList

def getNonRepMethyleneOrAminoProtons(self, compId):
""" Return non-representative protons in methylene/amino group of a given comp_id.
"""

if compId != self.lastCompId and not self.updateChemCompDict(compId):
return []

atmList = []

corns = (a[self.ccaAtomId] for a in self.lastAtomList if a[self.ccaTypeSymbol] in ('C', 'N'))

for corn in corns:
protons = [(b[self.ccbAtomId1] if b[self.ccbAtomId1] != corn else b[self.ccbAtomId2])
for b in self.lastBonds
if (b[self.ccbAtomId1] == corn and b[self.ccbAtomId2][0] in protonBeginCode)
or (b[self.ccbAtomId2] == corn and b[self.ccbAtomId1][0] in protonBeginCode)]
if len(protons) != 2:
continue
atmList.extend(protons[1:])

return atmList

def getBondedAtoms(self, compId, atomId, exclProton=False, onlyProton=False):
""" Return bonded atoms to a given atom.
"""
Expand Down
2 changes: 1 addition & 1 deletion wwpdb/utils/nmr/NEFTranslator/NEFTranslator.py
Original file line number Diff line number Diff line change
Expand Up @@ -5058,7 +5058,7 @@ def guess_ambiguity_code(atom_list):
if atom_list[0] in v:
len_v = len(v)
if len_v == 2:
return 2 # methylen/amino
return 2 # methylene/amino
if len_v == 1:
if k[0] == 'C' and self.chemCompTopo is not None and comp_id in self.chemCompTopo\
and any(len(tv) == 2 and tv[0][0] == 'C' and tv[1][0] == 'C'
Expand Down
Binary file modified wwpdb/utils/nmr/bmrb_cs_stat/aa_filt.pkl
Binary file not shown.
Binary file modified wwpdb/utils/nmr/bmrb_cs_stat/aa_full.pkl
Binary file not shown.
Binary file modified wwpdb/utils/nmr/bmrb_cs_stat/others.pkl
Binary file not shown.

0 comments on commit d17d7ea

Please sign in to comment.