Skip to content

Commit

Permalink
Fix ambiguous residue prediction (2mpg)
Browse files Browse the repository at this point in the history
  • Loading branch information
yokochi47 committed Jul 29, 2024
1 parent c1317f6 commit 3e4a177
Show file tree
Hide file tree
Showing 12 changed files with 71 additions and 78 deletions.
21 changes: 17 additions & 4 deletions wwpdb/utils/nmr/AlignUtil.py
Original file line number Diff line number Diff line change
Expand Up @@ -688,7 +688,7 @@ def updatePolySeqRst(polySeqRst, chainId, seqId, compId: str, authCompId=None):
ps['auth_comp_id'].append(compId if authCompId in emptyValue else authCompId)


def updatePolySeqRstAmbig(polySeqRstAmb, chainId, seqId, compIds: list, polySeqRst):
def updatePolySeqRstAmbig(polySeqRstAmb, chainId, seqId, compIds: list):
""" Update polymer sequence of the current MR file.
"""

Expand All @@ -707,9 +707,6 @@ def updatePolySeqRstAmbig(polySeqRstAmb, chainId, seqId, compIds: list, polySeqR
ps['comp_ids'].append(_compIds)
else:
ps['comp_ids'][ps['seq_id'].index(seqId)] &= _compIds
_compId = ps['comp_ids'][ps['seq_id'].index(seqId)]
if len(_compId) == 1:
updatePolySeqRst(polySeqRst, chainId, seqId, list(_compId)[0])


def mergePolySeqRstAmbig(polySeqRst, polySeqRstAmb):
Expand All @@ -719,6 +716,22 @@ def mergePolySeqRstAmbig(polySeqRst, polySeqRstAmb):
if len(polySeqRstAmb) == 0:
return

for _ps in polySeqRstAmb:
chainId = _ps['chain_id']

ps = next((ps for ps in polySeqRst if ps['chain_id'] == chainId), None)

if ps is not None:
continue

__ps = copy.copy(_ps)

for idx, (seqId, compIds) in enumerate(zip(__ps['seq_id'], __ps['comp_ids'])):
if len(compIds) == 1:
updatePolySeqRst(polySeqRst, chainId, seqId, list(compIds)[0])
del _ps['seq_id'][idx]
del _ps['comp_ids'][idx]

for ps in polySeqRst:
chainId = ps['chain_id']

Expand Down
5 changes: 0 additions & 5 deletions wwpdb/utils/nmr/mr/AriaMRParserListener.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,6 @@
zincIonCode,
isReservedLigCode,
updatePolySeqRst,
mergePolySeqRstAmbig,
sortPolySeqRst,
alignPolymerSequence,
assignPolymerSequence,
Expand Down Expand Up @@ -135,7 +134,6 @@
zincIonCode,
isReservedLigCode,
updatePolySeqRst,
mergePolySeqRstAmbig,
sortPolySeqRst,
alignPolymerSequence,
assignPolymerSequence,
Expand Down Expand Up @@ -672,9 +670,6 @@ def exitAria_mr(self, ctx: AriaMRParser.Aria_mrContext): # pylint: disable=unus
# del self.reasonsForReParsing['seq_id_remap']
# """
if 'local_seq_scheme' in self.reasonsForReParsing and len(self.reasonsForReParsing) == 1:
if len(self.__polySeqRstFailed) > 0:
if len(self.__polySeqRstFailedAmbig) > 0:
mergePolySeqRstAmbig(self.__polySeqRstFailed, self.__polySeqRstFailedAmbig)
sortPolySeqRst(self.__polySeqRstFailed)
if len(self.__polySeqRstFailed) > 0:
self.reasonsForReParsing['extend_seq_scheme'] = self.__polySeqRstFailed
Expand Down
5 changes: 0 additions & 5 deletions wwpdb/utils/nmr/mr/BiosymMRParserListener.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,6 @@
zincIonCode,
isReservedLigCode,
updatePolySeqRst,
mergePolySeqRstAmbig,
sortPolySeqRst,
alignPolymerSequence,
assignPolymerSequence,
Expand Down Expand Up @@ -149,7 +148,6 @@
zincIonCode,
isReservedLigCode,
updatePolySeqRst,
mergePolySeqRstAmbig,
sortPolySeqRst,
alignPolymerSequence,
assignPolymerSequence,
Expand Down Expand Up @@ -681,9 +679,6 @@ def exitBiosym_mr(self, ctx: BiosymMRParser.Biosym_mrContext): # pylint: disabl
# del self.reasonsForReParsing['seq_id_remap']
# """
if 'local_seq_scheme' in self.reasonsForReParsing and len(self.reasonsForReParsing) == 1:
if len(self.__polySeqRstFailed) > 0:
if len(self.__polySeqRstFailedAmbig) > 0:
mergePolySeqRstAmbig(self.__polySeqRstFailed, self.__polySeqRstFailedAmbig)
sortPolySeqRst(self.__polySeqRstFailed)
if len(self.__polySeqRstFailed) > 0:
self.reasonsForReParsing['extend_seq_scheme'] = self.__polySeqRstFailed
Expand Down
23 changes: 12 additions & 11 deletions wwpdb/utils/nmr/mr/CharmmMRParserListener.py
Original file line number Diff line number Diff line change
Expand Up @@ -668,9 +668,8 @@ def set_label_seq_scheme():
if 'branched_remap' not in self.reasonsForReParsing:
self.reasonsForReParsing['branched_remap'] = branchedMapping

mergePolySeqRstAmbig(self.__polySeqRstFailed, self.__polySeqRstFailedAmbig)
if len(self.__polySeqRstFailed) > 0:
if len(self.__polySeqRstFailedAmbig) > 0:
mergePolySeqRstAmbig(self.__polySeqRstFailed, self.__polySeqRstFailedAmbig)
sortPolySeqRst(self.__polySeqRstFailed)

seqAlignFailed, _ = alignPolymerSequence(self.__pA, self.__polySeq, self.__polySeqRstFailed)
Expand All @@ -682,6 +681,8 @@ def set_label_seq_scheme():
if _ps is None:
continue
for seqId, compIds in zip(_ps['seq_id'], _ps['comp_ids']):
_matched = 0
_compId = None
for compId in list(compIds):
_polySeqRstFailed = copy.deepcopy(self.__polySeqRstFailed)
updatePolySeqRst(_polySeqRstFailed, chainId, seqId, compId)
Expand All @@ -690,7 +691,11 @@ def set_label_seq_scheme():
_sa = next((_sa for _sa in _seqAlignFailed if _sa['test_chain_id'] == chainId), None)
if _sa is None or _sa['conflict'] > 0:
continue
updatePolySeqRst(self.__polySeqRstFailed, chainId, seqId, compId)
if _sa['matched'] > _matched:
_matched = _sa['matched']
_compId = compId
if _compId is not None:
updatePolySeqRst(self.__polySeqRstFailed, chainId, seqId, _compId)
sortPolySeqRst(self.__polySeqRstFailed)

seqAlignFailed, _ = alignPolymerSequence(self.__pA, self.__polySeq, self.__polySeqRstFailed)
Expand Down Expand Up @@ -774,7 +779,7 @@ def set_label_seq_scheme():
offsets[auth_seq_id - offset] = ref_auth_seq_id - auth_seq_id
if 'global_auth_sequence_offset' not in self.reasonsForReParsing:
self.reasonsForReParsing['global_auth_sequence_offset'] = {}
self.reasonsForReParsing['global_auth_sequence_offset'][ref_chain_id] = offsets
self.reasonsForReParsing['global_auth_sequence_offset'][ref_chain_id] = offsets

if len(chainAssignFailed) == 0:
valid_auth_seq = valid_label_seq = True
Expand Down Expand Up @@ -820,10 +825,9 @@ def set_label_seq_scheme():

# attempt to resolve case where there is no valid restraint, but only insufficient atom selection errors
# due to arbitrary shift of sequence number that does not match with any coordinate sequence schemes (2lzs)
mergePolySeqRstAmbig(self.__polySeqRstFailed, self.__polySeqRstFailedAmbig)
if self.__reasons is None and len(self.__polySeqRst) == 0 and len(self.__polySeqRstFailed) > 0\
and any(f for f in self.__f if '[Insufficient atom selection]' in f):
if len(self.__polySeqRstFailedAmbig) > 0:
mergePolySeqRstAmbig(self.__polySeqRstFailed, self.__polySeqRstFailedAmbig)
sortPolySeqRst(self.__polySeqRstFailed)

seqAlignFailed, _ = alignPolymerSequence(self.__pA, self.__polySeq, self.__polySeqRstFailed)
Expand Down Expand Up @@ -1040,9 +1044,7 @@ def set_label_seq_scheme():
self.reasonsForReParsing['chain_id_remap'] = chainIdRemap

if 'local_seq_scheme' in self.reasonsForReParsing and len(self.reasonsForReParsing) == 1:
if len(self.__polySeqRstFailed) > 0:
if len(self.__polySeqRstFailedAmbig) > 0:
mergePolySeqRstAmbig(self.__polySeqRstFailed, self.__polySeqRstFailedAmbig)
mergePolySeqRstAmbig(self.__polySeqRstFailed, self.__polySeqRstFailedAmbig)
sortPolySeqRst(self.__polySeqRstFailed)
if len(self.__polySeqRstFailed) > 0:
self.reasonsForReParsing['extend_seq_scheme'] = self.__polySeqRstFailed
Expand Down Expand Up @@ -3384,8 +3386,7 @@ def update_np_seq_id_remap_request(np, ligands):
if len(compIds) == 1:
updatePolySeqRst(self.__polySeqRstFailed, _factor['chain_id'][0], _factor['seq_id'][0], compIds[0])
else:
updatePolySeqRstAmbig(self.__polySeqRstFailedAmbig, _factor['chain_id'][0], _factor['seq_id'][0], compIds,
self.__polySeqRstFailed)
updatePolySeqRstAmbig(self.__polySeqRstFailedAmbig, _factor['chain_id'][0], _factor['seq_id'][0], compIds)

if ligands == 0:
self.__preferAuthSeq = not self.__preferAuthSeq
Expand Down
23 changes: 12 additions & 11 deletions wwpdb/utils/nmr/mr/CnsMRParserListener.py
Original file line number Diff line number Diff line change
Expand Up @@ -821,9 +821,8 @@ def set_label_seq_scheme():
if 'branched_remap' not in self.reasonsForReParsing:
self.reasonsForReParsing['branched_remap'] = branchedMapping

mergePolySeqRstAmbig(self.__polySeqRstFailed, self.__polySeqRstFailedAmbig)
if len(self.__polySeqRstFailed) > 0:
if len(self.__polySeqRstFailedAmbig) > 0:
mergePolySeqRstAmbig(self.__polySeqRstFailed, self.__polySeqRstFailedAmbig)
sortPolySeqRst(self.__polySeqRstFailed)

seqAlignFailed, _ = alignPolymerSequence(self.__pA, self.__polySeq, self.__polySeqRstFailed)
Expand All @@ -835,6 +834,8 @@ def set_label_seq_scheme():
if _ps is None:
continue
for seqId, compIds in zip(_ps['seq_id'], _ps['comp_ids']):
_matched = 0
_compId = None
for compId in list(compIds):
_polySeqRstFailed = copy.deepcopy(self.__polySeqRstFailed)
updatePolySeqRst(_polySeqRstFailed, chainId, seqId, compId)
Expand All @@ -843,7 +844,11 @@ def set_label_seq_scheme():
_sa = next((_sa for _sa in _seqAlignFailed if _sa['test_chain_id'] == chainId), None)
if _sa is None or _sa['conflict'] > 0:
continue
updatePolySeqRst(self.__polySeqRstFailed, chainId, seqId, compId)
if _sa['matched'] > _matched:
_matched = _sa['matched']
_compId = compId
if _compId is not None:
updatePolySeqRst(self.__polySeqRstFailed, chainId, seqId, _compId)
sortPolySeqRst(self.__polySeqRstFailed)

seqAlignFailed, _ = alignPolymerSequence(self.__pA, self.__polySeq, self.__polySeqRstFailed)
Expand Down Expand Up @@ -927,7 +932,7 @@ def set_label_seq_scheme():
offsets[auth_seq_id - offset] = ref_auth_seq_id - auth_seq_id
if 'global_auth_sequence_offset' not in self.reasonsForReParsing:
self.reasonsForReParsing['global_auth_sequence_offset'] = {}
self.reasonsForReParsing['global_auth_sequence_offset'][ref_chain_id] = offsets
self.reasonsForReParsing['global_auth_sequence_offset'][ref_chain_id] = offsets

if len(chainAssignFailed) == 0:
valid_auth_seq = valid_label_seq = True
Expand Down Expand Up @@ -973,10 +978,9 @@ def set_label_seq_scheme():

# attempt to resolve case where there is no valid restraint, but only insufficient atom selection errors
# due to arbitrary shift of sequence number that does not match with any coordinate sequence schemes (2lzs)
mergePolySeqRstAmbig(self.__polySeqRstFailed, self.__polySeqRstFailedAmbig)
if self.__reasons is None and len(self.__polySeqRst) == 0 and len(self.__polySeqRstFailed) > 0\
and any(f for f in self.__f if '[Insufficient atom selection]' in f):
if len(self.__polySeqRstFailedAmbig) > 0:
mergePolySeqRstAmbig(self.__polySeqRstFailed, self.__polySeqRstFailedAmbig)
sortPolySeqRst(self.__polySeqRstFailed)

seqAlignFailed, _ = alignPolymerSequence(self.__pA, self.__polySeq, self.__polySeqRstFailed)
Expand Down Expand Up @@ -1193,9 +1197,7 @@ def set_label_seq_scheme():
self.reasonsForReParsing['chain_id_remap'] = chainIdRemap

if 'local_seq_scheme' in self.reasonsForReParsing and len(self.reasonsForReParsing) == 1:
if len(self.__polySeqRstFailed) > 0:
if len(self.__polySeqRstFailedAmbig) > 0:
mergePolySeqRstAmbig(self.__polySeqRstFailed, self.__polySeqRstFailedAmbig)
mergePolySeqRstAmbig(self.__polySeqRstFailed, self.__polySeqRstFailedAmbig)
sortPolySeqRst(self.__polySeqRstFailed)
if len(self.__polySeqRstFailed) > 0:
self.reasonsForReParsing['extend_seq_scheme'] = self.__polySeqRstFailed
Expand Down Expand Up @@ -5607,8 +5609,7 @@ def update_np_seq_id_remap_request(np, ligands):
if len(compIds) == 1:
updatePolySeqRst(self.__polySeqRstFailed, _factor['chain_id'][0], _factor['seq_id'][0], compIds[0])
else:
updatePolySeqRstAmbig(self.__polySeqRstFailedAmbig, _factor['chain_id'][0], _factor['seq_id'][0], compIds,
self.__polySeqRstFailed)
updatePolySeqRstAmbig(self.__polySeqRstFailedAmbig, _factor['chain_id'][0], _factor['seq_id'][0], compIds)

if ligands == 0 and not self.__has_nx:
self.__preferAuthSeq = not self.__preferAuthSeq
Expand Down
13 changes: 4 additions & 9 deletions wwpdb/utils/nmr/mr/CyanaMRParserListener.py
Original file line number Diff line number Diff line change
Expand Up @@ -688,9 +688,8 @@ def exitCyana_mr(self, ctx: CyanaMRParser.Cyana_mrContext): # pylint: disable=u
if 'branched_remap' not in self.reasonsForReParsing:
self.reasonsForReParsing['branched_remap'] = branchedMapping

mergePolySeqRstAmbig(self.__polySeqRstFailed, self.__polySeqRstFailedAmbig)
if len(self.__polySeqRstFailed) > 0:
if len(self.__polySeqRstFailedAmbig) > 0:
mergePolySeqRstAmbig(self.__polySeqRstFailed, self.__polySeqRstFailedAmbig)
sortPolySeqRst(self.__polySeqRstFailed)

seqAlignFailed, _ = alignPolymerSequence(self.__pA, self.__polySeq, self.__polySeqRstFailed)
Expand Down Expand Up @@ -858,9 +857,7 @@ def exitCyana_mr(self, ctx: CyanaMRParser.Cyana_mrContext): # pylint: disable=u
# del self.reasonsForReParsing['seq_id_remap']
# """
if 'local_seq_scheme' in self.reasonsForReParsing and len(self.reasonsForReParsing) == 1:
if len(self.__polySeqRstFailed) > 0:
if len(self.__polySeqRstFailedAmbig) > 0:
mergePolySeqRstAmbig(self.__polySeqRstFailed, self.__polySeqRstFailedAmbig)
mergePolySeqRstAmbig(self.__polySeqRstFailed, self.__polySeqRstFailedAmbig)
sortPolySeqRst(self.__polySeqRstFailed)
if len(self.__polySeqRstFailed) > 0:
self.reasonsForReParsing['extend_seq_scheme'] = self.__polySeqRstFailed
Expand Down Expand Up @@ -3591,8 +3588,7 @@ def assignCoordPolymerSequenceWithoutCompId(self, seqId, atomId=None):
if len(compIds) == 1:
updatePolySeqRst(self.__polySeqRstFailed, chainId, seqId, compIds[0])
else:
updatePolySeqRstAmbig(self.__polySeqRstFailedAmbig, chainId, seqId, compIds,
self.__polySeqRstFailed)
updatePolySeqRstAmbig(self.__polySeqRstFailedAmbig, chainId, seqId, compIds)

return list(chainAssign)

Expand Down Expand Up @@ -3809,8 +3805,7 @@ def assignCoordPolymerSequenceWithChainIdWithoutCompId(self, fixedChainId, seqId
if len(compIds) == 1:
updatePolySeqRst(self.__polySeqRstFailed, fixedChainId, seqId, compIds[0])
else:
updatePolySeqRstAmbig(self.__polySeqRstFailedAmbig, fixedChainId, seqId, compIds,
self.__polySeqRstFailed)
updatePolySeqRstAmbig(self.__polySeqRstFailedAmbig, fixedChainId, seqId, compIds)

return list(chainAssign)

Expand Down
Loading

0 comments on commit 3e4a177

Please sign in to comment.