Skip to content

Commit

Permalink
minor refactoring
Browse files Browse the repository at this point in the history
  • Loading branch information
andabi committed Dec 2, 2017
1 parent d67b308 commit b29f32a
Show file tree
Hide file tree
Showing 19 changed files with 27 additions and 536 deletions.
32 changes: 6 additions & 26 deletions tools/audio_utils.py → audio.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,5 @@
# -*- coding: utf-8 -*-
#!/usr/bin/env python
'''
By Dabi Ahn. [email protected].
https://www.github.com/andabi
'''

from pydub import AudioSegment
import os
Expand Down Expand Up @@ -34,29 +30,7 @@ def rewrite_mp3_to_wav(source_path, target_path):
AudioSegment.from_mp3(source_path).export(target_path, format='wav')


def split_path(path):
'''
'a/b/c.wav' => ('a/b', 'c', 'wav')
:param path: filepath = 'a/b/c.wav'
:return: basename, filename, and extension = ('a/b', 'c', 'wav')
'''
basepath, filename = os.path.split(path)
filename, extension = os.path.splitext(filename)
return basepath, filename, extension


def spectrogram2wav(mag, n_fft, win_length, hop_length, num_iters, phase_angle=None, length=None):
'''
:param mag: [f, t]
:param n_fft: n_fft
:param win_length: window length
:param hop_length: hop length
:param num_iters: num of iteration when griffin-lim reconstruction
:param phase_angle: phase angle
:param length: length of wav
:return:
'''
assert (num_iters > 0)
if phase_angle is None:
phase_angle = np.pi * np.random.rand(*mag.shape)
Expand All @@ -77,3 +51,9 @@ def preemphasis(x, coeff=0.97):

def inv_preemphasis(x, coeff=0.97):
return signal.lfilter([1], [1, -coeff], x)


def split(wav, top_db):
intervals = librosa.effects.split(wav, top_db=top_db)
wavs = map(lambda i: wav[i[0]: i[1]], intervals)
return wavs
2 changes: 1 addition & 1 deletion convert.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
from data_load import get_wav_batch, get_batch
from models import Model
import numpy as np
from utils import spectrogram2wav, inv_preemphasis
from audio import spectrogram2wav, inv_preemphasis
from hparam import logdir_path
import datetime
import tensorflow as tf
Expand Down
16 changes: 15 additions & 1 deletion data_load.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,12 +11,26 @@
from tensorflow.python.platform import tf_logging as logging

from hparam import Hparam
from utils import preemphasis, wav_random_crop
from audio import preemphasis
import numpy as np
import librosa
from hparam import data_path_base


def wav_random_crop(wav, sr, duration):
assert (wav.ndim <= 2)

target_len = sr * duration
wav_len = wav.shape[-1]
start = np.random.choice(range(np.maximum(1, wav_len - target_len)), 1)[0]
end = start + target_len
if wav.ndim == 1:
wav = wav[start:end]
else:
wav = wav[:, start:end]
return wav


def get_mfccs_and_phones(wav_file, sr, length, trim=False, random_crop=True):
hp = Hparam.get_global_hparam()

Expand Down
5 changes: 3 additions & 2 deletions hparam.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,11 @@
# path
## local
data_path_base = './datasets'
logdir_path = './logdir'
# logdir_path = './logdir'

## remote
# data_path_base = '/data/private/vc/datasets'
# logdir_path = '/data/private/vc/logdir'
logdir_path = '/data/private/vc/logdir'



Expand Down Expand Up @@ -73,6 +73,7 @@ def __call__(self):

def set_as_global_hparam(self):
Hparam.global_hparam = self.hparam
return Hparam.global_hparam

@staticmethod
def get_global_hparam():
Expand Down
15 changes: 0 additions & 15 deletions hparams/hparams.yaml

This file was deleted.

2 changes: 2 additions & 0 deletions models.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,7 @@ def _net2(self):
# CBHG1: mel-scale
pred_mel = cbhg(prenet_out, self.hp.train2.num_banks, self.hp.train2.hidden_units // 2, self.hp.train2.num_highway_blocks, self.hp.train2.norm_type, self.is_training, scope="cbhg1")
pred_mel = tf.layers.dense(pred_mel, self.y_mel.shape[-1]) # log magnitude: (N, T, n_mels)
# pred_mel = prenet_out

# CBHG2: linear-scale
pred_spec = tf.layers.dense(pred_mel, self.hp.train2.hidden_units // 2) # log magnitude: (N, T, n_mels)
Expand All @@ -119,6 +120,7 @@ def _net2(self):
def loss_net2(self):
loss_spec = tf.reduce_mean(tf.squared_difference(self.pred_spec, self.y_spec))
loss_mel = tf.reduce_mean(tf.squared_difference(self.pred_mel, self.y_mel))
# loss_mel = 0
loss = loss_spec + loss_mel
return loss

Expand Down
File renamed without changes.
55 changes: 0 additions & 55 deletions tools/comparison_wavs.py

This file was deleted.

25 changes: 0 additions & 25 deletions tools/convert_amp.py

This file was deleted.

31 changes: 0 additions & 31 deletions tools/griffin_lim_recon_test.py

This file was deleted.

21 changes: 0 additions & 21 deletions tools/librosa_load_test.py

This file was deleted.

14 changes: 0 additions & 14 deletions tools/librosa_split_test.py

This file was deleted.

49 changes: 0 additions & 49 deletions tools/mfcc_amp_normalization_test.py

This file was deleted.

14 changes: 0 additions & 14 deletions tools/mp3_to_wav.py

This file was deleted.

Loading

0 comments on commit b29f32a

Please sign in to comment.