Skip to content

Commit

Permalink
Add spectral peak list lexer and parser grammars for NmrPipe, NmrView…
Browse files Browse the repository at this point in the history
…, Sparky, and Xeasy

Add spectral peak list file types for ARIA (nm-pea-ari) and XWIN-NMR (nm-pea-xwi)
  • Loading branch information
yokochi47 committed Nov 28, 2024
1 parent d2f5c32 commit bdfa21d
Show file tree
Hide file tree
Showing 19 changed files with 816 additions and 59 deletions.
16 changes: 5 additions & 11 deletions wwpdb/utils/nmr/NEFTranslator/NEFTranslator.py
Original file line number Diff line number Diff line change
Expand Up @@ -4707,9 +4707,7 @@ def skip_empty_value_error(lp, idx):
for idx, row in enumerate(tag_data):
ent = {} # entity

missing_mandatory_data = False
remove_bad_pattern = False
clear_bad_pattern = False
missing_mandatory_data = remove_bad_pattern = clear_bad_pattern = False

for j in range(tag_len):
name = tags[j]
Expand Down Expand Up @@ -12391,8 +12389,7 @@ def get_nef_sf_framecode(sf, prefix):

entity_del_atom_loop = next((loop for loop in saveframe if loop.category == '_Entity_deleted_atom'), None)

has_pk_can_format = False
has_pk_row_format = False
has_pk_can_format = has_pk_row_format = False

for loop in saveframe:

Expand Down Expand Up @@ -12453,8 +12450,7 @@ def get_nef_sf_framecode(sf, prefix):
pass

if saveframe.category == 'entry_information':
has_format_name = False
has_format_ver = False
has_format_name = has_format_ver = False
for tags in sf.tags:
if tags[0] == 'format_name':
has_format_name = True
Expand Down Expand Up @@ -12594,8 +12590,7 @@ def get_nef_sf_framecode(sf, prefix):

entity_del_atom_loop = next((loop for loop in saveframe if loop.category == '_Entity_deleted_atom'), None)

has_pk_can_format = False
has_pk_row_format = False
has_pk_can_format = has_pk_row_format = False

for loop in saveframe:

Expand Down Expand Up @@ -12656,8 +12651,7 @@ def get_nef_sf_framecode(sf, prefix):
pass

if sf.category == 'entry_information':
has_format_name = False
has_format_ver = False
has_format_name = has_format_ver = False
for tags in sf.tags:
if tags[0] == 'format_name':
has_format_name = True
Expand Down
65 changes: 18 additions & 47 deletions wwpdb/utils/nmr/NmrDpUtility.py
Original file line number Diff line number Diff line change
Expand Up @@ -10285,35 +10285,20 @@ def __detectContentSubTypeOfLegacyMr(self):
'nm-res-isd', 'nm-res-ari', 'nm-res-oth') or is_aux_amb or is_aux_gro or is_aux_cha else 1))
cs_atom_like_names = list(filter(is_half_spin_nuclei, atom_like_names)) # DAOTHER-7491

has_chem_shift = False
has_dist_restraint = False
has_dihed_restraint = False
has_rdc_restraint = False
has_plane_restraint = False
has_hbond_restraint = False
has_ssbond_restraint = False
has_rdc_origins = False
has_spectral_peak = False

has_coordinate = False
has_amb_coord = False
has_amb_inpcrd = False
has_ens_coord = False
has_topology = False

has_first_atom = False
has_chem_shift = has_dist_restraint = has_dihed_restraint = has_rdc_restraint =\
has_plane_restraint = has_hbond_restraint = has_ssbond_restraint = has_rdc_origins = has_spectral_peak =\
has_coordinate = has_amb_coord = has_amb_inpcrd = has_ens_coord = has_topology = has_first_atom = False

if file_type in ('nm-res-xpl', 'nm-res-cns'):

with open(file_path, 'r', encoding='utf-8') as ifh:

atom_likes = atom_unlikes = cs_atom_likes = resid_likes = real_likes = 0
names, resids = [], []
cs_range_like = dist_range_like = dihed_range_like = rdc_range_like = False

rdc_atom_names = set()

cs_range_like = dist_range_like = dihed_range_like = rdc_range_like = False

for line in ifh:

if line.startswith('ATOM ') and line.count('.') >= 3:
Expand Down Expand Up @@ -10475,15 +10460,13 @@ def __detectContentSubTypeOfLegacyMr(self):
elif file_type == 'nm-res-amb':

with open(file_path, 'r', encoding='utf-8') as ifh:

in_rst = in_iat = in_igr1 = in_igr2 = False
in_rst = in_iat = in_igr1 = in_igr2 =\
dist_range_like = dihed_range_like = rdc_range_like = False

names, values = [], []

pos = 0

dist_range_like = dihed_range_like = rdc_range_like = False

for line in ifh:

if line.startswith('ATOM ') and line.count('.') >= 3:
Expand Down Expand Up @@ -10663,19 +10646,16 @@ def __detectContentSubTypeOfLegacyMr(self):

if is_aux_amb:

has_atom_name = has_residue_label = has_residue_pointer = has_amb_atom_type = False

chk_atom_name_format = chk_residue_label_format = chk_residue_pointer_format = chk_amb_atom_type_format = False

in_atom_name = in_residue_label = in_residue_pointer = in_amb_atom_type = False
has_atom_name = has_residue_label = has_residue_pointer = has_amb_atom_type =\
chk_atom_name_format = chk_residue_label_format = chk_residue_pointer_format = chk_amb_atom_type_format =\
in_atom_name = in_residue_label = in_residue_pointer = in_amb_atom_type = False

atom_names = residue_labels = residue_pointers = amb_atom_types = 0

elif is_aux_gro:

has_system = has_molecules = has_atoms = False

in_system = in_molecules = in_atoms = False
has_system = has_molecules = has_atoms =\
in_system = in_molecules = in_atoms = False

system_names = molecule_names = atom_names = 0

Expand Down Expand Up @@ -11815,9 +11795,8 @@ def __detectContentSubTypeOfLegacyPk(self):
try:

header = True
in_header = pdb_record = cs_str = mr_str = False

has_datablock = has_anonymous_saveframe = has_save = has_loop = has_stop = False
in_header = pdb_record = cs_str = mr_str =\
has_datablock = has_anonymous_saveframe = has_save = has_loop = has_stop = False

first_str_line_num = last_str_line_num = -1

Expand Down Expand Up @@ -14657,9 +14636,8 @@ def __extractPublicMrFileIntoLegacyMr(self):
try:

header = True
in_header = pdb_record = footer = False

has_datablock = has_anonymous_saveframe = has_save = has_loop = has_stop = False
in_header = pdb_record = footer =\
has_datablock = has_anonymous_saveframe = has_save = has_loop = has_stop = False

first_str_line_num = last_str_line_num = -1

Expand Down Expand Up @@ -18276,7 +18254,6 @@ def __appendPolymerSequenceAlignment(self):
"""

is_done = True

update_poly_seq = False

self.__alt_chain = False
Expand Down Expand Up @@ -25760,9 +25737,7 @@ def fill_cs_row(lp, index, _row, prefer_auth_atom_name, coord_atom_site, _seq_ke

while True:

regenerate_request = False # DAOTHER-9065

can_auth_asym_id_mapping_failed = False # DAOTHER-9158
regenerate_request = can_auth_asym_id_mapping_failed = False # DAOTHER-9065, DAOTHER-9158

lp.clear_data()

Expand Down Expand Up @@ -52306,13 +52281,11 @@ def __resetBoolValueInLoop(self):
key_items = self.key_items[file_type][content_subtype]
data_items = self.data_items[file_type][content_subtype]

has_bool_key = False
has_bool_key = has_bool_data = False

if key_items is not None:
has_bool_key = next((k['type'] == 'bool' for k in key_items if k['type'] == 'bool'), False)

has_bool_data = False

if data_items is not None:
has_bool_data = next((d['type'] == 'bool' for d in data_items if d['type'] == 'bool'), False)

Expand Down Expand Up @@ -52408,13 +52381,11 @@ def __resetBoolValueInAuxLoop(self):
key_items = self.aux_key_items[file_type][content_subtype][lp_category]
data_items = self.aux_data_items[file_type][content_subtype][lp_category]

has_bool_key = False
has_bool_key = has_bool_data = False

if key_items is not None:
has_bool_key = next((k['type'] == 'bool' for k in key_items if k['type'] == 'bool'), False)

has_bool_data = False

if data_items is not None:
has_bool_data = next((d['type'] == 'bool' for d in data_items if d['type'] == 'bool'), False)

Expand Down
167 changes: 167 additions & 0 deletions wwpdb/utils/tests-nmr/antlr-grammars-v4.10/NmrPipePKLexer.g4
Original file line number Diff line number Diff line change
@@ -0,0 +1,167 @@
/*
NmrPipe PK (Spectral peak list) lexer grammar for ANTLR v4.
Copyright 2024 Masashi Yokochi
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

lexer grammar NmrPipePKLexer;

/* NmrPipe: Syntax
See also https://spin.niddk.nih.gov/NMRPipe/
*/

Data: 'DATA' -> pushMode(DATA_MODE);

Vars: 'VARS' -> pushMode(VARS_MODE);

Format: 'FORMAT' -> pushMode(FORMAT_MODE);

Null_value: 'NULLVALUE' -> pushMode(NULL_VALUE_MODE);
Null_string: 'NULLSTRING' -> pushMode(NULL_STRING_MODE);

Integer: ('+' | '-')? DECIMAL;
Float: ('+' | '-')? (DECIMAL | DEC_DOT_DEC);
Real: ('+' | '-')? (DECIMAL | DEC_DOT_DEC) ([Ee] ('+' | '-')? DECIMAL)?;
fragment DEC_DOT_DEC: (DECIMAL '.' DECIMAL) | ('.' DECIMAL);
fragment DEC_DIGIT: [0-9];
fragment DECIMAL: DEC_DIGIT+;

SHARP_COMMENT: '#'+ ~[\r\n]* '#'* ~[\r\n]* -> channel(HIDDEN);
EXCLM_COMMENT: '!'+ ~[\r\n]* '!'* ~[\r\n]* -> channel(HIDDEN);
SMCLN_COMMENT: ';'+ ~[\r\n]* ';'* ~[\r\n]* -> channel(HIDDEN);

Simple_name: SIMPLE_NAME;
//Residue_number: Integer;
//Residue_name: SIMPLE_NAME;
//Atom_name: ALPHA_NUM ATM_NAME_CHAR*;

fragment ALPHA: [A-Za-z];
fragment ALPHA_NUM: ALPHA | DEC_DIGIT;
fragment START_CHAR: ALPHA_NUM | '_' | '-' | '+' | '.' | '*' | '#' | '?';
fragment NAME_CHAR: START_CHAR | '\'' | '"';
//fragment ATM_NAME_CHAR: ALPHA_NUM | '\'';
fragment SIMPLE_NAME: START_CHAR NAME_CHAR*;

SPACE: [ \t\r\n]+ -> skip;
ENCLOSE_COMMENT: '{' (ENCLOSE_COMMENT | .)*? '}' -> channel(HIDDEN);
SECTION_COMMENT: ('#' | '!' | ';' | '\\' | '&' | '/' '/'+ | '*' '*'+ | '-' '-'+ | '+' '+'+ | '=' '='+ | 'REMARK') ' '* [\r\n]+ -> channel(HIDDEN);
LINE_COMMENT: ('#' | '!' | ';' | '\\' | '&' | '/' '/'+ | '*' '*'+ | '-' '-'+ | '+' '+'+ | '=' '='+ | 'REMARK') ~[\r\n]* -> channel(HIDDEN);

mode DATA_MODE;

X_axis_DA: 'X_AXIS';
Y_axis_DA: 'Y_AXIS';
Z_axis_DA: 'Z_AXIS';
A_axis_DA: 'A_AXIS';

Ppm_value_DA: Float 'ppm';

Integer_DA: ('+' | '-')? DECIMAL;
Float_DA: ('+' | '-')? (DECIMAL | DEC_DOT_DEC);
Real_DA: ('+' | '-')? (DECIMAL | DEC_DOT_DEC) ([Ee] ('+' | '-')? DECIMAL)?;

Simple_name_DA: SIMPLE_NAME;

SPACE_DA: [ \t]+ -> skip;
RETURN_DA: [\r\n]+ -> popMode;

//SECTION_COMMENT_DA: ('#' | '!' | ';' | '\\' | '&' | '/' '/'+ | '*' '*'+ | '-' '-'+ | '+' '+'+ | '=' '='+ | 'REMARK') ' '* [\r\n]+ -> channel(HIDDEN);
LINE_COMMENT_DA: ('#' | '!' | ';' | '\\' | '&' | '/' '/'+ | '*' '*'+ | '-' '-'+ | '+' '+'+ | '=' '='+ | 'REMARK') ~[\r\n]* -> channel(HIDDEN);

mode VARS_MODE;

Index: 'INDEX';
X_axis: 'X_AXIS';
Y_axis: 'Y_AXIS';
Z_axis: 'Z_AXIS';
A_axis: 'A_AXIS';

Dx: 'DX';
Dy: 'DY';
Dz: 'DZ';
Da: 'DA';

X_ppm: 'X_PPM';
Y_ppm: 'Y_PPM';
Z_ppm: 'Z_PPM';
A_ppm: 'A_PPM';

X_hz: 'X_HZ';
Y_hz: 'Y_HZ';
Z_hz: 'Z_HZ';
A_hz: 'A_HZ';

Xw: 'XW';
Yw: 'YW';
Zw: 'ZW';
Aw: 'AW';

Xw_hz: 'XW_HZ';
Yw_hz: 'YW_HZ';
Zw_hz: 'ZW_HZ';
Aw_hz: 'AW_HZ';

X1: 'X1';
X3: 'X3';
Y1: 'Y1';
Y3: 'Y3';
Z1: 'Z1';
Z3: 'Z3';
A1: 'A1';
A3: 'A3';

Height: 'HEIGHT';
DHeight: 'DHEIGHT';
Vol: 'VOL';
Pchi2: 'PCHI2';
Type: 'TYPE';
Ass: 'ASS';
ClustId: 'CLUSTID';
Memcnt: 'MEMCNT';

Integer_VA: ('+' | '-')? DECIMAL;
Float_VA: ('+' | '-')? (DECIMAL | DEC_DOT_DEC);
Real_VA: ('+' | '-')? (DECIMAL | DEC_DOT_DEC) ([Ee] ('+' | '-')? DECIMAL)?;

Simple_name_VA: SIMPLE_NAME;

SPACE_VA: [ \t]+ -> skip;
RETURN_VA: [\r\n]+ -> popMode;

//SECTION_COMMENT_VA: ('#' | '!' | ';' | '\\' | '&' | '/' '/'+ | '*' '*'+ | '-' '-'+ | '+' '+'+ | '=' '='+ | 'REMARK') ' '* [\r\n]+ -> channel(HIDDEN);
LINE_COMMENT_VA: ('#' | '!' | ';' | '\\' | '&' | '/' '/'+ | '*' '*'+ | '-' '-'+ | '+' '+'+ | '=' '='+ | 'REMARK') ~[\r\n]* -> channel(HIDDEN);

mode FORMAT_MODE;

Format_code: '%' DECIMAL? ('s' | 'd' | '.' DECIMAL 'f' | '+'? 'e');

SPACE_FO: [ \t]+ -> skip;
RETURN_FO: [\r\n]+ -> popMode;

//SECTION_COMMENT_FO: ('#' | '!' | ';' | '\\' | '&' | '/' '/'+ | '*' '*'+ | '-' '-'+ | '+' '+'+ | '=' '='+ | 'REMARK') ' '* [\r\n]+ -> channel(HIDDEN);
LINE_COMMENT_FO: ('#' | '!' | ';' | '\\' | '&' | '/' '/'+ | '*' '*'+ | '-' '-'+ | '+' '+'+ | '=' '='+ | 'REMARK') ~[\r\n]* -> channel(HIDDEN);

mode NULL_VALUE_MODE;

Any_name_NV: ~[ \t\r\n]+;

SPACE_NV: [ \t]+ -> skip;
RETURN_NV: [\r\n]+ -> popMode;

mode NULL_STRING_MODE;

Any_name_NS: ~[ \t\r\n]+;

SPACE_NS: [ \t]+ -> skip;
RETURN_NS: [\r\n]+ -> popMode;

Loading

0 comments on commit bdfa21d

Please sign in to comment.