Skip to content

Commit

Permalink
Empirical remedy for case lexer error listener returns inconsistent v…
Browse files Browse the repository at this point in the history
…alues (2n07, 2bgo, 5xi1)
  • Loading branch information
yokochi47 committed Nov 25, 2024
1 parent 5dd0a2e commit 3f0efeb
Show file tree
Hide file tree
Showing 15 changed files with 296 additions and 245 deletions.
14 changes: 14 additions & 0 deletions wwpdb/utils/nmr/BMRBChemShiftStat.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
import copy
import pickle
import collections
import shutil

from operator import itemgetter

Expand Down Expand Up @@ -2323,6 +2324,19 @@ def update_csv_file(csv_file):
r = requests.get(self.url_for_bmrb_cs_stat_dir + csv_file, timeout=5.0)
with open(os.path.join(self.stat_dir + csv_file), 'w') as f_out:
f_out.write(r.text)
if csv_file in ('rna_filt.csv', 'rna_full.csv'):
src_csv_file = os.path.join(self.stat_dir, csv_file)
dst_csv_file = src_csv_file + '~'
shutil.copyfile(src_csv_file, dst_csv_file)
with open(dst_csv_file, 'r') as f_in, \
open(src_csv_file, 'w') as f_out:
for line in f_in:
row = line.split(',')
if row[0] in ('A', 'C', 'G', 'U') and row[1] == '"H5"""':
continue
f_out.write(line)
os.remove(dst_csv_file)

except Exception as e:
self.__lfh.write(f"+BMRBChemShiftStat.updateStatCsvFiles() ++ Error - {e}\n")

Expand Down
74 changes: 49 additions & 25 deletions wwpdb/utils/nmr/NmrDpUtility.py
Original file line number Diff line number Diff line change
Expand Up @@ -11270,6 +11270,7 @@ def __detectContentSubTypeOfLegacyMr(self):
try:

if file_type in parsable_mr_file_types:

sll_pred = False
if file_path in self.__sll_pred_holder and file_type in self.__sll_pred_holder[file_path]:
sll_pred = self.__sll_pred_holder[file_path][file_type]
Expand Down Expand Up @@ -12780,11 +12781,11 @@ def __divideLegacyMrIfNecessary(self, file_path, file_type, err_desc, src_path,

test_reader = self.__getSimpleMrPtFileReader('nm-res-gro', False)

_, parser_err_listener, lexer_err_listener = test_reader.parse(test_line, None, isFilePath=False)
_, _parser_err_listener, _lexer_err_listener = test_reader.parse(test_line, None, isFilePath=False)

has_lexer_error = lexer_err_listener is not None and lexer_err_listener.getMessageList() is not None
_has_lexer_error = _lexer_err_listener is not None and _lexer_err_listener.getMessageList() is not None

if not has_lexer_error:
if not _has_lexer_error:

if self.__mr_debug:
self.__lfh.write('DIV-MR-EXIT #3-6\n')
Expand All @@ -12793,11 +12794,11 @@ def __divideLegacyMrIfNecessary(self, file_path, file_type, err_desc, src_path,

test_reader = self.__getSimpleMrPtFileReader('nm-aux-gro', False)

_, parser_err_listener, lexer_err_listener = test_reader.parse(test_line, None, isFilePath=False)
_, _parser_err_listener, _lexer_err_listener = test_reader.parse(test_line, None, isFilePath=False)

has_lexer_error = lexer_err_listener is not None and lexer_err_listener.getMessageList() is not None
_has_lexer_error = _lexer_err_listener is not None and _lexer_err_listener.getMessageList() is not None

if not has_lexer_error:
if not _has_lexer_error:

if self.__mr_debug:
self.__lfh.write('DIV-MR-EXIT #3-7\n')
Expand Down Expand Up @@ -13046,7 +13047,6 @@ def __divideLegacyMrIfNecessary(self, file_path, file_type, err_desc, src_path,
if err_column_position == 0 and file_type not in linear_mr_file_types:

for test_file_type in linear_mr_file_types:

test_reader = self.__getSimpleMrPtFileReader(test_file_type, False)

listener, parser_err_listener, lexer_err_listener = test_reader.parse(test_line, None, isFilePath=False)
Expand Down Expand Up @@ -13213,6 +13213,9 @@ def __divideLegacyMrIfNecessary(self, file_path, file_type, err_desc, src_path,
if div_src:
os.remove(file_path)

if self.__mr_debug:
self.__lfh.write(f'{valid_types} {possible_types}\n')

os.rename(div_try_file, div_dst_file)

file_path = div_dst_file
Expand All @@ -13234,6 +13237,9 @@ def __divideLegacyMrIfNecessary(self, file_path, file_type, err_desc, src_path,
if set_valid_types == {'nm-res-cha', 'nm-res-cns', 'nm-res-xpl'}:
file_type = 'nm-res-cha'

if self.__mr_debug:
self.__lfh.write(f' -> {file_type}\n')

self.__testFormatValidityOfLegacyMr(file_path, file_type, src_path, offset)

if self.__mr_debug:
Expand Down Expand Up @@ -13350,12 +13356,12 @@ def __peelLegacyMrIfNecessary(self, file_path, file_type, err_desc, src_path, of

test_reader = self.__getSimpleMrPtFileReader(test_file_type, False)

_, parser_err_listener, lexer_err_listener = test_reader.parse(test_line, None, isFilePath=False)
_, _parser_err_listener, _lexer_err_listener = test_reader.parse(test_line, None, isFilePath=False)

has_lexer_error = lexer_err_listener is not None and lexer_err_listener.getMessageList() is not None
has_parser_error = parser_err_listener is not None and parser_err_listener.getMessageList() is not None
_has_lexer_error = _lexer_err_listener is not None and _lexer_err_listener.getMessageList() is not None
_has_parser_error = _parser_err_listener is not None and _parser_err_listener.getMessageList() is not None

if not has_lexer_error and not has_parser_error:
if not _has_lexer_error and not _has_parser_error:

if self.__mr_debug:
self.__lfh.write('PEEL-MR-EXIT #2\n')
Expand Down Expand Up @@ -13405,11 +13411,12 @@ def __peelLegacyMrIfNecessary(self, file_path, file_type, err_desc, src_path, of
has_content = bool(listener is not None and len(listener.getContentSubtype()) > 0)

if has_lexer_error or has_parser_error or not has_content:

test_reader = self.__getSimpleMrPtFileReader('nm-res-xpl', False)

_, _, lexer_err_listener = test_reader.parse(prev_input, None, isFilePath=False)
_, _parser_err_listener, _lexer_err_listener = test_reader.parse(prev_input, None, isFilePath=False)

has_lexer_error = lexer_err_listener is not None and lexer_err_listener.getMessageList() is not None
_has_lexer_error = _lexer_err_listener is not None and _lexer_err_listener.getMessageList() is not None

if not has_lexer_error:
err_line_number -= 1
Expand Down Expand Up @@ -13610,6 +13617,9 @@ def __peelLegacyMrIfNecessary(self, file_path, file_type, err_desc, src_path, of
if j3 == 0:
os.remove(div_try_file)

if self.__mr_debug:
self.__lfh.write(f'{valid_types} {possible_types}\n')

file_path = div_dst_file

if len_valid_types == 1:
Expand All @@ -13629,6 +13639,9 @@ def __peelLegacyMrIfNecessary(self, file_path, file_type, err_desc, src_path, of
if set_valid_types == {'nm-res-cha', 'nm-res-cns', 'nm-res-xpl'}:
file_type = 'nm-res-cha'

if self.__mr_debug:
self.__lfh.write(f' -> {file_type}\n')

self.__testFormatValidityOfLegacyMr(file_path, file_type, src_path, offset)

if self.__mr_debug:
Expand Down Expand Up @@ -13870,7 +13883,7 @@ def __divideLegacyMr(self, file_path, file_type, err_desc, src_path, offset):
if reader is None:
reader = self.__getSimpleMrPtFileReader(file_type, False)

_, _, lexer_err_listener = reader.parse(test_line, None, isFilePath=False)
_, parser_err_listener, lexer_err_listener = reader.parse(test_line, None, isFilePath=False)

has_lexer_error = lexer_err_listener is not None and lexer_err_listener.getMessageList() is not None

Expand Down Expand Up @@ -14140,6 +14153,9 @@ def __divideLegacyMr(self, file_path, file_type, err_desc, src_path, offset):

os.rename(div_try_file, div_dst_file)

if self.__mr_debug:
self.__lfh.write(f'{valid_types} {possible_types}\n')

file_path = div_dst_file

if len_valid_types == 1:
Expand All @@ -14159,6 +14175,9 @@ def __divideLegacyMr(self, file_path, file_type, err_desc, src_path, offset):
if set_valid_types == {'nm-res-cha', 'nm-res-cns', 'nm-res-xpl'}:
file_type = 'nm-res-cha'

if self.__mr_debug:
self.__lfh.write(f' -> {file_type}\n')

self.__testFormatValidityOfLegacyMr(file_path, file_type, src_path, offset)

if self.__mr_debug:
Expand Down Expand Up @@ -14273,10 +14292,9 @@ def __detectOtherPossibleFormatAsErrorOfLegacyMr(self, file_path, file_name, fil
valid_types.update(_valid_types)
possible_types.update(_possible_types)

if (not is_valid or multiple_check) and file_type != 'nm-res-cha':
if (not is_valid or 'Syntax error' in err) and file_type != 'nm-res-cha':
_is_valid, _err, _genuine_type, _valid_types, _possible_types =\
self.__detectOtherPossibleFormatAsErrorOfLegacyMr__(file_path, file_name, file_type, dismiss_err_lines, 'nm-res-cha',
agreed_w_cns=agreed_w_cns)
self.__detectOtherPossibleFormatAsErrorOfLegacyMr__(file_path, file_name, file_type, dismiss_err_lines, 'nm-res-cha')

is_valid |= _is_valid
err += _err
Expand Down Expand Up @@ -14384,9 +14402,9 @@ def __detectOtherPossibleFormatAsErrorOfLegacyMr(self, file_path, file_name, fil
valid_types.update(_valid_types)
possible_types.update(_possible_types)

if (not is_valid or multiple_check) and file_type != 'nm-res-noa':
if (not is_valid or multiple_check) and file_type != 'nm-res-ros':
_is_valid, _err, _genuine_type, _valid_types, _possible_types =\
self.__detectOtherPossibleFormatAsErrorOfLegacyMr__(file_path, file_name, file_type, dismiss_err_lines, 'nm-res-noa')
self.__detectOtherPossibleFormatAsErrorOfLegacyMr__(file_path, file_name, file_type, dismiss_err_lines, 'nm-res-ros')

is_valid |= _is_valid
err += _err
Expand All @@ -14395,9 +14413,9 @@ def __detectOtherPossibleFormatAsErrorOfLegacyMr(self, file_path, file_name, fil
valid_types.update(_valid_types)
possible_types.update(_possible_types)

if (not is_valid or multiple_check) and file_type != 'nm-res-ros':
if (not is_valid or multiple_check) and file_type != 'nm-res-syb':
_is_valid, _err, _genuine_type, _valid_types, _possible_types =\
self.__detectOtherPossibleFormatAsErrorOfLegacyMr__(file_path, file_name, file_type, dismiss_err_lines, 'nm-res-ros')
self.__detectOtherPossibleFormatAsErrorOfLegacyMr__(file_path, file_name, file_type, dismiss_err_lines, 'nm-res-syb')

is_valid |= _is_valid
err += _err
Expand All @@ -14406,9 +14424,9 @@ def __detectOtherPossibleFormatAsErrorOfLegacyMr(self, file_path, file_name, fil
valid_types.update(_valid_types)
possible_types.update(_possible_types)

if (not is_valid or multiple_check) and file_type != 'nm-res-syb':
if not is_valid and file_type != 'nm-res-noa':
_is_valid, _err, _genuine_type, _valid_types, _possible_types =\
self.__detectOtherPossibleFormatAsErrorOfLegacyMr__(file_path, file_name, file_type, dismiss_err_lines, 'nm-res-syb')
self.__detectOtherPossibleFormatAsErrorOfLegacyMr__(file_path, file_name, file_type, dismiss_err_lines, 'nm-res-noa')

is_valid |= _is_valid
err += _err
Expand Down Expand Up @@ -15541,6 +15559,9 @@ def split_concat_comp_id_seq_id(string):
len_valid_types = len(valid_types)
len_possible_types = len(possible_types)

if self.__mr_debug:
self.__lfh.write(f'{valid_types} {possible_types}\n')

if len_valid_types == 0 and len_possible_types == 0:

ins_msg = ''
Expand Down Expand Up @@ -16252,6 +16273,9 @@ def split_concat_comp_id_seq_id(string):
len_valid_types = len(valid_types)
len_possible_types = len(possible_types)

if self.__mr_debug:
self.__lfh.write(f'{valid_types} {possible_types}\n')

if len_valid_types == 0 and len_possible_types == 0:

ins_msg = ''
Expand Down Expand Up @@ -37866,7 +37890,7 @@ def __calculateStatsOfAssignedChemShift(self, file_list_id, sf_framecode, lp_dat
if len(rci) > 0:
ent['random_coil_index'] = rci

if file_type == 'nmr-star' and self.__star_data_type[file_list_id] != 'Loop':
if file_type == 'nmr-star' and self.__star_data_type[file_list_id] == 'Entry':
lp_category = self.lp_categories[file_type]['chem_shift']
sf = self.__star_data[file_list_id].get_saveframe_by_name(sf_framecode)
lp = next(lp for lp in sf.loops if lp.category == lp_category)
Expand Down Expand Up @@ -38772,7 +38796,7 @@ def __calculateStatsOfDistanceRestraint(self, file_list_id, sf_framecode, lp_dat
if len(range_of_vals) > 1:
ent['histogram_of_discrepancy'] = {'range_of_values': range_of_vals, 'number_of_values': transposed, 'annotations': dist_ann}

if file_type == 'nmr-star' and self.__star_data_type[file_list_id] != 'Loop':
if file_type == 'nmr-star' and self.__star_data_type[file_list_id] == 'Entry':
lp_category = self.lp_categories[file_type]['dist_restraint']
sf = self.__star_data[file_list_id].get_saveframe_by_name(sf_framecode)
lp = next(lp for lp in sf.loops if lp.category == lp_category)
Expand Down
10 changes: 5 additions & 5 deletions wwpdb/utils/nmr/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ where **setSource()** and **setLog()** are methods to add unified NMR data file

3. Add input file path and parameters

Any input file paths and parameters should be set through *addInput()* method:
Any input file paths and parameters should be set through **addInput()** method:

```python
def addInput(self, name=None, value=None, type='file')
Expand Down Expand Up @@ -280,19 +280,19 @@ nm-aux-amb|nm-aux-amb|nmr-restraints/any|Topology file in AMBER format
nm-res-amb|nm-res-amb|nmr-restraints/amber|Restraint file in AMBER format
nm-res-ari|nm-res-ari|nmr-restraints/aria|Restraint file in ARIA format
nm-res-bio|nm-res-bio|nmr-restraints/biosym|Restraint file in BIOSYM format
nm-aux-cha|not applicable|nmr-restraints/any|Topology file in CHARMM format (aka. CHARMM extended CRD)
nm-aux-cha|**not applicable**|nmr-restraints/any|Topology file in CHARMM format (aka. CHARMM extended CRD)
nm-res-cha|nm-res-cha|nmr-restraints/charmm|Restraint file in CHARMM format
nm-res-cns|nm-res-cns|nmr-restraints/cns|Restraint file in CNS format
nm-res-cya|nm-res-cya|nmr-restraints/cyana|Restraint file in CYANA format
nm-res-dyn|nm-res-dyn|nmr-restraints/dynamo|Restraint file in DYNAMO/PALES/TALOS format
nm-aux-gro|nm-aux-gro|nmr-restraints/any|Topology file in GROMACS format
nm-res-gro|nm-res-gro|nmr-restraints/gromacs|Restraint file in GROMACS format
nm-res-isd|nm-res-isd|nmr-restraints/isd|Restraint file in ISD format
nm-res-noa|not applicable|nmr-restraints/cyana|Restraint file in CYANA NOA format
nm-res-noa|**not applicable**|nmr-restraints/cyana|Restraint file in CYANA NOA format
nm-res-ros|nm-res-ros|nmr-restraints/rosetta|Restraint file in ROSETTA format
nm-res-syb|nm-res-syb|nmr-restraints/sybyl|Restraint file in SYBYL format
nm-res-xpl|nm-res-xpl|nmr-restraints/xplor-nih|Restraint file in XPLOR-NIH format
nm-res-oth|nm-res-oth|nmr-restraints/any|Restraint file in other format
nm-res-mr|not applicable|nmr-restraints/pdb-mr|Restraint file in PDB-MR format
nm-res-sax|not applicable|nmr-restraints/any|SAX CSV file
nm-res-mr|**not applicable**|nmr-restraints/pdb-mr|Restraint file in PDB-MR format
nm-res-sax|**not applicable**|nmr-restraints/any|SAX CSV file
nm-pea-any|nm-pea-any|nmr-peaks/any|Any spectral peak list file
Loading

0 comments on commit 3f0efeb

Please sign in to comment.