Add checking if file exists during gathering samples, add parameter t…

…o distinguish between languages Signed-off-by: Adam Wawrzynski <[email protected]>
adamwawrzynski · Jun 7, 2019 · 1042e68 · 1042e68
1 parent c48a4ac
commit 1042e68
Show file tree

Hide file tree

Showing 2 changed files with 50 additions and 8 deletions.
diff --git a/modules/audio_processing.py b/modules/audio_processing.py
@@ -171,10 +171,19 @@ def get_samples(path, feasible_phonemes):
 
         # otherwise process files inside directory
         else:
-            features, _ = process_audio(path + '/' + filename + ".WAV")
+            if os.path.isfile((path + '/' + filename + ".WAV")):
+                features, _ = process_audio(path + '/' + filename + ".WAV")
+            else:
+                print("File {} doesn't exist.".format(path + '/' + filename + ".WAV"))
+                exit()
 
             sample.set_features(features)
-            tmp = get_phonemes_from_file(path + '/' + filename + ".PHN")
+
+            if os.path.isfile(path + '/' + filename + ".PHN"):
+                tmp = get_phonemes_from_file(path + '/' + filename + ".PHN")
+            else:
+                print("File {} doesn't exist.".format(path + '/' + filename + ".PHN"))
+                exit()
 
             # convert phoneme from ASCII to number representing class
             tmp = convert_phonemes_to_number(tmp, feasible_phonemes)
@@ -304,10 +313,19 @@ def get_samples_clarin(path, feasible_phonemes):
                 new_sample = True
                 basename = filename.split(".")[0]
 
-                features, _ = process_audio(path + '/' + basename + ".wav")
+                if os.path.isfile((path + '/' + basename + ".wav")):
+                    features, _ = process_audio(path + '/' + basename + ".wav")
+                else:
+                    print("File {} doesn't exist.".format(path + '/' + basename + ".wav"))
+                    exit()
 
                 sample.set_features(features)
-                tmp = get_phonemes_from_file(path + '/' + basename + ".PHN")
+
+                if os.path.isfile(path + '/' + filename + ".PHN"):
+                    tmp = get_phonemes_from_file(path + '/' + filename + ".PHN")
+                else:
+                    print("File {} doesn't exist.".format(path + '/' + filename + ".PHN"))
+                    exit()
 
                 # convert phoneme from ASCII to number representing class
                 tmp = convert_phonemes_to_number(tmp, feasible_phonemes)

diff --git a/modules/train.py b/modules/train.py
@@ -88,13 +88,25 @@ def train_model(name,
                 alphabet_path,
                 dataset_path,
                 restore,
+                language,
                 tensorboard=False,
                 verbose=False):
     """Trains model and saves pretrained weights to file."""
 
     # load alphabet and dataset from given paths
-    # dataset = ap.get_dataset(alphabet_path, dataset_path)
-    dataset = ap.get_dataset_clarin(alphabet_path, dataset_path)
+    # load alphabet and dataset from given paths
+    if language == "polish":
+        dataset = ap.get_dataset_clarin(alphabet_path, dataset_path)
+    elif language == "english":
+        dataset = ap.get_dataset(alphabet_path, dataset_path)
+    else:
+        print("Lanugage {} is not supported.".format(language))
+        exit()
+
+    if not dataset:
+        print("Dataset is empty.")
+        print("Check your dataset path and selected language.")
+        exit()
 
     # load model to retrain
     if restore == True:
@@ -183,12 +195,24 @@ def evaluate_model(model,
                 test_func,
                 alphabet_path,
                 dataset_path,
+                language,
                 verbose=False):
     """Checks and prints accuracy of pretrained model on given dataset."""
 
     # load alphabet and dataset from given paths
-    # dataset = ap.get_dataset(alphabet_path, dataset_path)
-    dataset = ap.get_dataset_clarin(alphabet_path, dataset_path)
+    if language == "polish":
+        dataset = ap.get_dataset_clarin(alphabet_path, dataset_path)
+    elif language == "english":
+        dataset = ap.get_dataset(alphabet_path, dataset_path)
+    else:
+        print("Lanugage {} is not supported.".format(language))
+        exit()
+
+    if not dataset:
+        print("Dataset is empty.")
+        print("Check your dataset path and selected language.")
+        exit()
+
     phonemes = ap.get_feasible_phonemes(alphabet_path)
 
     # load model to retrain