diff --git a/hparams/hparams.yaml b/hparams/hparams.yaml index 48e6f27a..2b4d60b7 100644 --- a/hparams/hparams.yaml +++ b/hparams/hparams.yaml @@ -1,6 +1,15 @@ --- default_iu3: + train2: + data_path: 'IU/*_split/*.wav' convert: - data_path: 'korean/*.wav' - one_full_wav: True - batch_size: 10 \ No newline at end of file + one_full_wav: False + batch_size: 4 +--- +iu: + train2: + data_path: 'IU/*_split/*.wav' + batch_size: 64 + convert: + one_full_wav: False + batch_size: 4 \ No newline at end of file diff --git a/tools/convert_amp.py b/tools/convert_amp.py new file mode 100644 index 00000000..c15d88ec --- /dev/null +++ b/tools/convert_amp.py @@ -0,0 +1,25 @@ +# -*- coding: utf-8 -*- +#!/usr/bin/env python + +from pydub import AudioSegment +import glob +from audio_utils import split_path +import os + +src_path = '/Users/avin/git/vc/datasets/IU/v_app_split' +target_path = '{}_amp'.format(src_path) +target_amp_in_db = -20 + + +def match_target_amplitude(sound, target_dBFS): + change_in_dBFS = target_dBFS - sound.dBFS + return sound.apply_gain(change_in_dBFS) + +if not os.path.exists(target_path): + os.mkdir(target_path) + +for filepath in glob.glob('{}/*.wav'.format(src_path)): + basepath, filename, _ = split_path(filepath) + sound = AudioSegment.from_wav(filepath) + normalized_sound = match_target_amplitude(sound, target_amp_in_db) + normalized_sound.export('{}/{}.wav'.format(target_path, filename), 'wav') \ No newline at end of file diff --git a/tools/mfcc_amplitude_normalization_test.py b/tools/mfcc_amp_normalization_test.py similarity index 97% rename from tools/mfcc_amplitude_normalization_test.py rename to tools/mfcc_amp_normalization_test.py index e3cc3d97..a6099fdf 100644 --- a/tools/mfcc_amplitude_normalization_test.py +++ b/tools/mfcc_amp_normalization_test.py @@ -21,7 +21,7 @@ n_fft=n_fft, hop_length=hop_length, win_length=win_length) -mag = np.abs(D) +mag = np.abs(D) # (1+n_fft//2, t) scaled_mag = mag * 200 # Get mel-spectrogram diff --git a/tools/plot_spectrogram.py b/tools/plot_spectrogram.py index 752c6405..ca3d5b46 100644 --- a/tools/plot_spectrogram.py +++ b/tools/plot_spectrogram.py @@ -7,11 +7,11 @@ import matplotlib.pyplot as plt from audio_utils import read, write -filename = '/Users/avin/git/vc/datasets/timit/TIMIT/TEST/DR1/FAKS0/SA1.wav' +filename = '/Users/avin/git/vc/outputs/male.wav' sr = 22050 -n_fft = 4096 +n_fft = 1024 len_hop = n_fft / 4 -plot_wav = True +plot_wav = False plot_spec = True # Waveforms @@ -37,7 +37,7 @@ if plot_spec: plt.figure(2) - librosa.display.specshow(librosa.amplitude_to_db(spec, ref=np.max), sr=sr, hop_length=len_hop, y_axis='log', x_axis='time') + librosa.display.specshow(librosa.amplitude_to_db(spec, ref=np.max), sr=sr, hop_length=len_hop, y_axis='linear', x_axis='time') plt.title('spectrogram') plt.colorbar(format='%+2.0f dB') diff --git a/tools/stat_amplitude.py b/tools/statistics_amp.py similarity index 57% rename from tools/stat_amplitude.py rename to tools/statistics_amp.py index a1172ae2..7ce70a20 100644 --- a/tools/stat_amplitude.py +++ b/tools/statistics_amp.py @@ -6,7 +6,9 @@ import librosa import numpy as np -src_path = '/Users/avin/git/vc/datasets/IU_split' +src_path = '/Users/avin/git/vc/datasets/IU/v_app_split' +# src_path = '/Users/avin/git/vc/datasets/IU/melon_radio_season1_split' +# src_path = '/Users/avin/git/vc/datasets/IU/melon_radio_season2_split' # src_path = '/Users/avin/git/vc/datasets/timit/TIMIT/TRAIN/*/*' # src_path = '/Users/avin/git/vc/datasets/kate/sense_and_sensibility_split' # src_path = '/Users/avin/git/vc/datasets/arctic/bdl' @@ -38,26 +40,25 @@ dbs = np.array(dbs) -mean = np.mean(amps) -std = np.std(amps) +mean_amps = np.mean(amps) +std_amps = np.std(amps) +max_amps = np.max(amps) +min_amps = np.min(amps) -max = np.max(amps) -min = np.min(amps) +mean_dbs = np.mean(dbs) +std_dbs = np.std(dbs) +max_dbs = np.max(dbs) +min_dbs = np.min(dbs) -# mean = np.mean(dbs) -# std = np.std(dbs) -# -# max = np.max(dbs) -# min = np.min(dbs) - -# mean = np.mean(log_amps) -# std = np.std(log_amps) -# -# max = np.max(log_amps) -# min = np.min(log_amps) +mean_log_amps = np.mean(log_amps) +std_log_amps = np.std(log_amps) +max_log_amps = np.max(log_amps) +min_log_amps = np.min(log_amps) # normalized = (values - mean) / std # normalized = (log_amps - min) / (max - min) -print("max: {}, min: {}, mean: {}, std: {}".format(max, min, mean, std)) +print("[amps] max: {}, min: {}, mean: {}, std: {}".format(max_amps, min_amps, mean_amps, std_amps)) +print("[log_amps] max: {}, min: {}, mean: {}, std: {}".format(max_log_amps, min_log_amps, mean_log_amps, std_log_amps)) +print("[decibels] max: {}, min: {}, mean: {}, std: {}".format(max_dbs, min_dbs, mean_dbs, std_dbs)) # print(normalized) \ No newline at end of file diff --git a/train2.py b/train2.py index 9445f789..b90db143 100644 --- a/train2.py +++ b/train2.py @@ -104,6 +104,8 @@ def get_arguments(): logdir2 = '{}/{}/train2'.format(logdir_path, case2) Hparam(case2).set_as_global_hparam() + print('case1: {}, case2: {}, logdir1: {}, logdir2: {}'.format(case1, case2, logdir1, logdir2)) + train(logdir1=logdir1, logdir2=logdir2) print("Done") \ No newline at end of file