prepare data for slavic and romance families

cmu-llab · Apr 19, 2021 · 89208a4 · 89208a4
1 parent abae775
commit 89208a4
Show file tree

Hide file tree

Showing 5 changed files with 313 additions and 54 deletions.
diff --git a/scripts/dashboard.py b/scripts/dashboard.py
@@ -506,10 +506,19 @@ def get_pid_info(pid: int) -> Tuple[int, str]:
                             break
 
     # Job schedule.
-    lang2code = {'Gothic': 'Got', 'Old Norse': 'Non', 'Old English': 'Ang'}
+    lang2code = {'Gothic': 'Got', 'Old Norse': 'Non', 'Old English': 'Ang',
+                 'Spanish': 'Spa', 'Italian': 'Ita', 'French': 'Fra',
+                 'Russian': 'Rus', 'Ukranian': 'Ukr', 'Polish': 'Pol'}
     with st.beta_expander('Job schedule'):
-        lang = st.selectbox('language', ['Gothic', 'Old Norse', 'Old English'])
-        lang2config = {k: 'OPRLPgmc' + v for k, v in lang2code.items()}
+        lang = st.selectbox('language', ['Gothic', 'Old Norse', 'Old English',
+                                         'Spanish', 'Italian', 'French',
+                                         'Russian', 'Ukranian', 'Polish'])
+        if lang in ['Gothic', 'Old Norse', 'Old English']:
+            lang2config = {k: 'OPRLPgmc' + v for k, v in lang2code.items()}
+        elif lang in ['Spanish', 'Italian', 'French']:
+            lang2config = {k: 'OPRLLat' + v for k, v in lang2code.items()}
+        else:
+            lang2config = {k: 'OPRLSlaPro' + v for k, v in lang2code.items()}
         config = lang2config[lang]
         base_cmd = f'python sound_law/main.py --config {config} --mcts_config SmallSims --save_interval 1'
 

diff --git a/scripts/prepare_abc.py b/scripts/prepare_abc.py
@@ -282,7 +282,8 @@ def get_connected_sounds(ph, g, kept_dist_mat, kept_i2pp, kept_pp2i) -> PDF:
 
     # Add some phones to the dataset -- they might not be present in the original data.
     added_phones = ['oːː', 'eːː', 'õː', 'ĩː', 'xʷ', 'gʷ', 'hʷ', 'ay', 'iuː', 'ioː',
-                    'io', 'eːo', 'æa', 'æːa', 'eo', 'iːu', 'iu', 'ɣː', 'ðː', 'wː', 'θː', 'βː', 'øy']
+                    'io', 'eːo', 'æa', 'æːa', 'eo', 'iːu', 'iu', 'ɣː', 'ðː', 'wː', 'θː', 'βː', 'øy',
+                    'tʲː', 'dʲː']
     words_df = add_phones(raw_words_df, added_phones)
     st.write(f'{", ".join(added_phones)}')