Add spectral peak list lexer and parser grammars for NmrPipe, NmrView…

…, Sparky, and Xeasy Add spectral peak list file types for ARIA (nm-pea-ari) and XWIN-NMR (nm-pea-xwi)
wwPDB · Nov 28, 2024 · bdfa21d · bdfa21d
1 parent d2f5c32
commit bdfa21d
Show file tree

Hide file tree

Showing 19 changed files with 816 additions and 59 deletions.
diff --git a/wwpdb/utils/nmr/NEFTranslator/NEFTranslator.py b/wwpdb/utils/nmr/NEFTranslator/NEFTranslator.py
@@ -4707,9 +4707,7 @@ def skip_empty_value_error(lp, idx):
                 for idx, row in enumerate(tag_data):
                     ent = {}  # entity
 
-                    missing_mandatory_data = False
-                    remove_bad_pattern = False
-                    clear_bad_pattern = False
+                    missing_mandatory_data = remove_bad_pattern = clear_bad_pattern = False
 
                     for j in range(tag_len):
                         name = tags[j]
@@ -12391,8 +12389,7 @@ def get_nef_sf_framecode(sf, prefix):
 
                 entity_del_atom_loop = next((loop for loop in saveframe if loop.category == '_Entity_deleted_atom'), None)
 
-                has_pk_can_format = False
-                has_pk_row_format = False
+                has_pk_can_format = has_pk_row_format = False
 
                 for loop in saveframe:
 
@@ -12453,8 +12450,7 @@ def get_nef_sf_framecode(sf, prefix):
                         pass
 
                 if saveframe.category == 'entry_information':
-                    has_format_name = False
-                    has_format_ver = False
+                    has_format_name = has_format_ver = False
                     for tags in sf.tags:
                         if tags[0] == 'format_name':
                             has_format_name = True
@@ -12594,8 +12590,7 @@ def get_nef_sf_framecode(sf, prefix):
 
             entity_del_atom_loop = next((loop for loop in saveframe if loop.category == '_Entity_deleted_atom'), None)
 
-            has_pk_can_format = False
-            has_pk_row_format = False
+            has_pk_can_format = has_pk_row_format = False
 
             for loop in saveframe:
 
@@ -12656,8 +12651,7 @@ def get_nef_sf_framecode(sf, prefix):
                     pass
 
             if sf.category == 'entry_information':
-                has_format_name = False
-                has_format_ver = False
+                has_format_name = has_format_ver = False
                 for tags in sf.tags:
                     if tags[0] == 'format_name':
                         has_format_name = True

diff --git a/wwpdb/utils/nmr/NmrDpUtility.py b/wwpdb/utils/nmr/NmrDpUtility.py
@@ -10285,35 +10285,20 @@ def __detectContentSubTypeOfLegacyMr(self):
                                                                                  'nm-res-isd', 'nm-res-ari', 'nm-res-oth') or is_aux_amb or is_aux_gro or is_aux_cha else 1))
             cs_atom_like_names = list(filter(is_half_spin_nuclei, atom_like_names))  # DAOTHER-7491
 
-            has_chem_shift = False
-            has_dist_restraint = False
-            has_dihed_restraint = False
-            has_rdc_restraint = False
-            has_plane_restraint = False
-            has_hbond_restraint = False
-            has_ssbond_restraint = False
-            has_rdc_origins = False
-            has_spectral_peak = False
-
-            has_coordinate = False
-            has_amb_coord = False
-            has_amb_inpcrd = False
-            has_ens_coord = False
-            has_topology = False
-
-            has_first_atom = False
+            has_chem_shift = has_dist_restraint = has_dihed_restraint = has_rdc_restraint =\
+                has_plane_restraint = has_hbond_restraint = has_ssbond_restraint = has_rdc_origins = has_spectral_peak =\
+                has_coordinate = has_amb_coord = has_amb_inpcrd = has_ens_coord = has_topology = has_first_atom = False
 
             if file_type in ('nm-res-xpl', 'nm-res-cns'):
 
                 with open(file_path, 'r', encoding='utf-8') as ifh:
 
                     atom_likes = atom_unlikes = cs_atom_likes = resid_likes = real_likes = 0
                     names, resids = [], []
+                    cs_range_like = dist_range_like = dihed_range_like = rdc_range_like = False
 
                     rdc_atom_names = set()
 
-                    cs_range_like = dist_range_like = dihed_range_like = rdc_range_like = False
-
                     for line in ifh:
 
                         if line.startswith('ATOM ') and line.count('.') >= 3:
@@ -10475,15 +10460,13 @@ def __detectContentSubTypeOfLegacyMr(self):
             elif file_type == 'nm-res-amb':
 
                 with open(file_path, 'r', encoding='utf-8') as ifh:
-
-                    in_rst = in_iat = in_igr1 = in_igr2 = False
+                    in_rst = in_iat = in_igr1 = in_igr2 =\
+                        dist_range_like = dihed_range_like = rdc_range_like = False
 
                     names, values = [], []
 
                     pos = 0
 
-                    dist_range_like = dihed_range_like = rdc_range_like = False
-
                     for line in ifh:
 
                         if line.startswith('ATOM ') and line.count('.') >= 3:
@@ -10663,19 +10646,16 @@ def __detectContentSubTypeOfLegacyMr(self):
 
                 if is_aux_amb:
 
-                    has_atom_name = has_residue_label = has_residue_pointer = has_amb_atom_type = False
-
-                    chk_atom_name_format = chk_residue_label_format = chk_residue_pointer_format = chk_amb_atom_type_format = False
-
-                    in_atom_name = in_residue_label = in_residue_pointer = in_amb_atom_type = False
+                    has_atom_name = has_residue_label = has_residue_pointer = has_amb_atom_type =\
+                        chk_atom_name_format = chk_residue_label_format = chk_residue_pointer_format = chk_amb_atom_type_format =\
+                        in_atom_name = in_residue_label = in_residue_pointer = in_amb_atom_type = False
 
                     atom_names = residue_labels = residue_pointers = amb_atom_types = 0
 
                 elif is_aux_gro:
 
-                    has_system = has_molecules = has_atoms = False
-
-                    in_system = in_molecules = in_atoms = False
+                    has_system = has_molecules = has_atoms =\
+                        in_system = in_molecules = in_atoms = False
 
                     system_names = molecule_names = atom_names = 0
 
@@ -11815,9 +11795,8 @@ def __detectContentSubTypeOfLegacyPk(self):
             try:
 
                 header = True
-                in_header = pdb_record = cs_str = mr_str = False
-
-                has_datablock = has_anonymous_saveframe = has_save = has_loop = has_stop = False
+                in_header = pdb_record = cs_str = mr_str =\
+                    has_datablock = has_anonymous_saveframe = has_save = has_loop = has_stop = False
 
                 first_str_line_num = last_str_line_num = -1
 
@@ -14657,9 +14636,8 @@ def __extractPublicMrFileIntoLegacyMr(self):
             try:
 
                 header = True
-                in_header = pdb_record = footer = False
-
-                has_datablock = has_anonymous_saveframe = has_save = has_loop = has_stop = False
+                in_header = pdb_record = footer =\
+                    has_datablock = has_anonymous_saveframe = has_save = has_loop = has_stop = False
 
                 first_str_line_num = last_str_line_num = -1
 
@@ -18276,7 +18254,6 @@ def __appendPolymerSequenceAlignment(self):
         """
 
         is_done = True
-
         update_poly_seq = False
 
         self.__alt_chain = False
@@ -25760,9 +25737,7 @@ def fill_cs_row(lp, index, _row, prefer_auth_atom_name, coord_atom_site, _seq_ke
 
             while True:
 
-                regenerate_request = False  # DAOTHER-9065
-
-                can_auth_asym_id_mapping_failed = False  # DAOTHER-9158
+                regenerate_request = can_auth_asym_id_mapping_failed = False  # DAOTHER-9065, DAOTHER-9158
 
                 lp.clear_data()
 
@@ -52306,13 +52281,11 @@ def __resetBoolValueInLoop(self):
                     key_items = self.key_items[file_type][content_subtype]
                     data_items = self.data_items[file_type][content_subtype]
 
-                has_bool_key = False
+                has_bool_key = has_bool_data = False
 
                 if key_items is not None:
                     has_bool_key = next((k['type'] == 'bool' for k in key_items if k['type'] == 'bool'), False)
 
-                has_bool_data = False
-
                 if data_items is not None:
                     has_bool_data = next((d['type'] == 'bool' for d in data_items if d['type'] == 'bool'), False)
 
@@ -52408,13 +52381,11 @@ def __resetBoolValueInAuxLoop(self):
                         key_items = self.aux_key_items[file_type][content_subtype][lp_category]
                         data_items = self.aux_data_items[file_type][content_subtype][lp_category]
 
-                        has_bool_key = False
+                        has_bool_key = has_bool_data = False
 
                         if key_items is not None:
                             has_bool_key = next((k['type'] == 'bool' for k in key_items if k['type'] == 'bool'), False)
 
-                        has_bool_data = False
-
                         if data_items is not None:
                             has_bool_data = next((d['type'] == 'bool' for d in data_items if d['type'] == 'bool'), False)
 

diff --git a/wwpdb/utils/tests-nmr/antlr-grammars-v4.10/NmrPipePKLexer.g4 b/wwpdb/utils/tests-nmr/antlr-grammars-v4.10/NmrPipePKLexer.g4
@@ -0,0 +1,167 @@
+/*
+ NmrPipe PK (Spectral peak list) lexer grammar for ANTLR v4.
+ Copyright 2024 Masashi Yokochi
+
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+lexer grammar NmrPipePKLexer;
+
+/* NmrPipe: Syntax
+ See also https://spin.niddk.nih.gov/NMRPipe/
+*/
+
+Data:			'DATA' -> pushMode(DATA_MODE);
+
+Vars:			'VARS' -> pushMode(VARS_MODE);
+
+Format:			'FORMAT' -> pushMode(FORMAT_MODE);
+
+Null_value:		'NULLVALUE' -> pushMode(NULL_VALUE_MODE);
+Null_string:		'NULLSTRING' -> pushMode(NULL_STRING_MODE);
+
+Integer:		('+' | '-')? DECIMAL;
+Float:			('+' | '-')? (DECIMAL | DEC_DOT_DEC);
+Real:			('+' | '-')? (DECIMAL | DEC_DOT_DEC) ([Ee] ('+' | '-')? DECIMAL)?;
+fragment DEC_DOT_DEC:	(DECIMAL '.' DECIMAL) | ('.' DECIMAL);
+fragment DEC_DIGIT:	[0-9];
+fragment DECIMAL:	DEC_DIGIT+;
+
+SHARP_COMMENT:		'#'+ ~[\r\n]* '#'* ~[\r\n]* -> channel(HIDDEN);
+EXCLM_COMMENT:		'!'+ ~[\r\n]* '!'* ~[\r\n]* -> channel(HIDDEN);
+SMCLN_COMMENT:		';'+ ~[\r\n]* ';'* ~[\r\n]* -> channel(HIDDEN);
+
+Simple_name:		SIMPLE_NAME;
+//Residue_number:	Integer;
+//Residue_name:		SIMPLE_NAME;
+//Atom_name:		ALPHA_NUM ATM_NAME_CHAR*;
+
+fragment ALPHA:		[A-Za-z];
+fragment ALPHA_NUM:	ALPHA | DEC_DIGIT;
+fragment START_CHAR:	ALPHA_NUM | '_' | '-' | '+' | '.' | '*' | '#' | '?';
+fragment NAME_CHAR:	START_CHAR | '\'' | '"';
+//fragment ATM_NAME_CHAR:	ALPHA_NUM | '\'';
+fragment SIMPLE_NAME:	START_CHAR NAME_CHAR*;
+
+SPACE:			[ \t\r\n]+ -> skip;
+ENCLOSE_COMMENT:	'{' (ENCLOSE_COMMENT | .)*? '}' -> channel(HIDDEN);
+SECTION_COMMENT:	('#' | '!' | ';' | '\\' | '&' | '/' '/'+ | '*' '*'+ | '-' '-'+ | '+' '+'+ | '=' '='+ | 'REMARK') ' '* [\r\n]+ -> channel(HIDDEN);
+LINE_COMMENT:		('#' | '!' | ';' | '\\' | '&' | '/' '/'+ | '*' '*'+ | '-' '-'+ | '+' '+'+ | '=' '='+ | 'REMARK') ~[\r\n]* -> channel(HIDDEN);
+
+mode DATA_MODE;
+
+X_axis_DA:		'X_AXIS';
+Y_axis_DA:		'Y_AXIS';
+Z_axis_DA:		'Z_AXIS';
+A_axis_DA:		'A_AXIS';
+
+Ppm_value_DA:		Float 'ppm';
+
+Integer_DA:		('+' | '-')? DECIMAL;
+Float_DA:		('+' | '-')? (DECIMAL | DEC_DOT_DEC);
+Real_DA:		('+' | '-')? (DECIMAL | DEC_DOT_DEC) ([Ee] ('+' | '-')? DECIMAL)?;
+
+Simple_name_DA:		SIMPLE_NAME;
+
+SPACE_DA:		[ \t]+ -> skip;
+RETURN_DA:		[\r\n]+ -> popMode;
+
+//SECTION_COMMENT_DA:	('#' | '!' | ';' | '\\' | '&' | '/' '/'+ | '*' '*'+ | '-' '-'+ | '+' '+'+ | '=' '='+ | 'REMARK') ' '* [\r\n]+ -> channel(HIDDEN);
+LINE_COMMENT_DA:	('#' | '!' | ';' | '\\' | '&' | '/' '/'+ | '*' '*'+ | '-' '-'+ | '+' '+'+ | '=' '='+ | 'REMARK') ~[\r\n]* -> channel(HIDDEN);
+
+mode VARS_MODE;
+
+Index:			'INDEX';
+X_axis:			'X_AXIS';
+Y_axis:			'Y_AXIS';
+Z_axis:			'Z_AXIS';
+A_axis:			'A_AXIS';
+
+Dx:			'DX';
+Dy:			'DY';
+Dz:			'DZ';
+Da:			'DA';
+
+X_ppm:			'X_PPM';
+Y_ppm:			'Y_PPM';
+Z_ppm:			'Z_PPM';
+A_ppm:			'A_PPM';
+
+X_hz:			'X_HZ';
+Y_hz:			'Y_HZ';
+Z_hz:			'Z_HZ';
+A_hz:			'A_HZ';
+
+Xw:			'XW';
+Yw:			'YW';
+Zw:			'ZW';
+Aw:			'AW';
+
+Xw_hz:			'XW_HZ';
+Yw_hz:			'YW_HZ';
+Zw_hz:			'ZW_HZ';
+Aw_hz:			'AW_HZ';
+
+X1:			'X1';
+X3:			'X3';
+Y1:			'Y1';
+Y3:			'Y3';
+Z1:			'Z1';
+Z3:			'Z3';
+A1:			'A1';
+A3:			'A3';
+
+Height:			'HEIGHT';
+DHeight:		'DHEIGHT';
+Vol:			'VOL';
+Pchi2:			'PCHI2';
+Type:			'TYPE';
+Ass:			'ASS';
+ClustId:		'CLUSTID';
+Memcnt:			'MEMCNT';
+
+Integer_VA:		('+' | '-')? DECIMAL;
+Float_VA:		('+' | '-')? (DECIMAL | DEC_DOT_DEC);
+Real_VA:		('+' | '-')? (DECIMAL | DEC_DOT_DEC) ([Ee] ('+' | '-')? DECIMAL)?;
+
+Simple_name_VA:		SIMPLE_NAME;
+
+SPACE_VA:		[ \t]+ -> skip;
+RETURN_VA:		[\r\n]+ -> popMode;
+
+//SECTION_COMMENT_VA:	('#' | '!' | ';' | '\\' | '&' | '/' '/'+ | '*' '*'+ | '-' '-'+ | '+' '+'+ | '=' '='+ | 'REMARK') ' '* [\r\n]+ -> channel(HIDDEN);
+LINE_COMMENT_VA:	('#' | '!' | ';' | '\\' | '&' | '/' '/'+ | '*' '*'+ | '-' '-'+ | '+' '+'+ | '=' '='+ | 'REMARK') ~[\r\n]* -> channel(HIDDEN);
+
+mode FORMAT_MODE;
+
+Format_code:		'%' DECIMAL? ('s' | 'd' | '.' DECIMAL 'f' | '+'? 'e');
+
+SPACE_FO:		[ \t]+ -> skip;
+RETURN_FO:		[\r\n]+ -> popMode;
+
+//SECTION_COMMENT_FO:	('#' | '!' | ';' | '\\' | '&' | '/' '/'+ | '*' '*'+ | '-' '-'+ | '+' '+'+ | '=' '='+ | 'REMARK') ' '* [\r\n]+ -> channel(HIDDEN);
+LINE_COMMENT_FO:	('#' | '!' | ';' | '\\' | '&' | '/' '/'+ | '*' '*'+ | '-' '-'+ | '+' '+'+ | '=' '='+ | 'REMARK') ~[\r\n]* -> channel(HIDDEN);
+
+mode NULL_VALUE_MODE;
+
+Any_name_NV:		~[ \t\r\n]+;
+
+SPACE_NV:		[ \t]+ -> skip;
+RETURN_NV:		[\r\n]+ -> popMode;
+
+mode NULL_STRING_MODE;
+
+Any_name_NS:		~[ \t\r\n]+;
+
+SPACE_NS:		[ \t]+ -> skip;
+RETURN_NS:		[\r\n]+ -> popMode;
+