Skip to content

Commit

Permalink
normalized volume
Browse files Browse the repository at this point in the history
  • Loading branch information
andabi committed Nov 22, 2017
1 parent f5f49fb commit 5d27586
Show file tree
Hide file tree
Showing 6 changed files with 62 additions and 25 deletions.
15 changes: 12 additions & 3 deletions hparams/hparams.yaml
Original file line number Diff line number Diff line change
@@ -1,6 +1,15 @@
---
default_iu3:
train2:
data_path: 'IU/*_split/*.wav'
convert:
data_path: 'korean/*.wav'
one_full_wav: True
batch_size: 10
one_full_wav: False
batch_size: 4
---
iu:
train2:
data_path: 'IU/*_split/*.wav'
batch_size: 64
convert:
one_full_wav: False
batch_size: 4
25 changes: 25 additions & 0 deletions tools/convert_amp.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
# -*- coding: utf-8 -*-
#!/usr/bin/env python

from pydub import AudioSegment
import glob
from audio_utils import split_path
import os

src_path = '/Users/avin/git/vc/datasets/IU/v_app_split'
target_path = '{}_amp'.format(src_path)
target_amp_in_db = -20


def match_target_amplitude(sound, target_dBFS):
change_in_dBFS = target_dBFS - sound.dBFS
return sound.apply_gain(change_in_dBFS)

if not os.path.exists(target_path):
os.mkdir(target_path)

for filepath in glob.glob('{}/*.wav'.format(src_path)):
basepath, filename, _ = split_path(filepath)
sound = AudioSegment.from_wav(filepath)
normalized_sound = match_target_amplitude(sound, target_amp_in_db)
normalized_sound.export('{}/{}.wav'.format(target_path, filename), 'wav')
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
n_fft=n_fft,
hop_length=hop_length,
win_length=win_length)
mag = np.abs(D)
mag = np.abs(D) # (1+n_fft//2, t)
scaled_mag = mag * 200

# Get mel-spectrogram
Expand Down
8 changes: 4 additions & 4 deletions tools/plot_spectrogram.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,11 @@
import matplotlib.pyplot as plt
from audio_utils import read, write

filename = '/Users/avin/git/vc/datasets/timit/TIMIT/TEST/DR1/FAKS0/SA1.wav'
filename = '/Users/avin/git/vc/outputs/male.wav'
sr = 22050
n_fft = 4096
n_fft = 1024
len_hop = n_fft / 4
plot_wav = True
plot_wav = False
plot_spec = True

# Waveforms
Expand All @@ -37,7 +37,7 @@
if plot_spec:
plt.figure(2)

librosa.display.specshow(librosa.amplitude_to_db(spec, ref=np.max), sr=sr, hop_length=len_hop, y_axis='log', x_axis='time')
librosa.display.specshow(librosa.amplitude_to_db(spec, ref=np.max), sr=sr, hop_length=len_hop, y_axis='linear', x_axis='time')
plt.title('spectrogram')
plt.colorbar(format='%+2.0f dB')

Expand Down
35 changes: 18 additions & 17 deletions tools/stat_amplitude.py → tools/statistics_amp.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,9 @@
import librosa
import numpy as np

src_path = '/Users/avin/git/vc/datasets/IU_split'
src_path = '/Users/avin/git/vc/datasets/IU/v_app_split'
# src_path = '/Users/avin/git/vc/datasets/IU/melon_radio_season1_split'
# src_path = '/Users/avin/git/vc/datasets/IU/melon_radio_season2_split'
# src_path = '/Users/avin/git/vc/datasets/timit/TIMIT/TRAIN/*/*'
# src_path = '/Users/avin/git/vc/datasets/kate/sense_and_sensibility_split'
# src_path = '/Users/avin/git/vc/datasets/arctic/bdl'
Expand Down Expand Up @@ -38,26 +40,25 @@
dbs = np.array(dbs)


mean = np.mean(amps)
std = np.std(amps)
mean_amps = np.mean(amps)
std_amps = np.std(amps)
max_amps = np.max(amps)
min_amps = np.min(amps)

max = np.max(amps)
min = np.min(amps)
mean_dbs = np.mean(dbs)
std_dbs = np.std(dbs)
max_dbs = np.max(dbs)
min_dbs = np.min(dbs)

# mean = np.mean(dbs)
# std = np.std(dbs)
#
# max = np.max(dbs)
# min = np.min(dbs)

# mean = np.mean(log_amps)
# std = np.std(log_amps)
#
# max = np.max(log_amps)
# min = np.min(log_amps)
mean_log_amps = np.mean(log_amps)
std_log_amps = np.std(log_amps)
max_log_amps = np.max(log_amps)
min_log_amps = np.min(log_amps)

# normalized = (values - mean) / std
# normalized = (log_amps - min) / (max - min)

print("max: {}, min: {}, mean: {}, std: {}".format(max, min, mean, std))
print("[amps] max: {}, min: {}, mean: {}, std: {}".format(max_amps, min_amps, mean_amps, std_amps))
print("[log_amps] max: {}, min: {}, mean: {}, std: {}".format(max_log_amps, min_log_amps, mean_log_amps, std_log_amps))
print("[decibels] max: {}, min: {}, mean: {}, std: {}".format(max_dbs, min_dbs, mean_dbs, std_dbs))
# print(normalized)
2 changes: 2 additions & 0 deletions train2.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,8 @@ def get_arguments():
logdir2 = '{}/{}/train2'.format(logdir_path, case2)
Hparam(case2).set_as_global_hparam()

print('case1: {}, case2: {}, logdir1: {}, logdir2: {}'.format(case1, case2, logdir1, logdir2))

train(logdir1=logdir1, logdir2=logdir2)

print("Done")

0 comments on commit 5d27586

Please sign in to comment.