Skip to content

Commit

Permalink
amplitude is normalized decibel
Browse files Browse the repository at this point in the history
  • Loading branch information
andabi committed Dec 11, 2017
1 parent c2d5200 commit f01f6b1
Show file tree
Hide file tree
Showing 6 changed files with 325 additions and 24 deletions.
8 changes: 8 additions & 0 deletions audio.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,14 @@ def inv_preemphasis(x, coeff=0.97):
return signal.lfilter([1], [1, -coeff], x)


def amp_to_db(amp):
return librosa.amplitude_to_db(amp)


def db_to_amp(db):
return librosa.db_to_amplitude(db)


def split(wav, top_db):
intervals = librosa.effects.split(wav, top_db=top_db)
wavs = map(lambda i: wav[i[0]: i[1]], intervals)
Expand Down
31 changes: 19 additions & 12 deletions convert.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,12 @@
from data_load import get_wav_batch, get_batch
from models import Model
import numpy as np
from audio import spectrogram2wav, inv_preemphasis
from audio import spectrogram2wav, inv_preemphasis, db_to_amp
from hparam import logdir_path
import datetime
import tensorflow as tf
from hparam import Hparam
from utils import denormalize_0_1


def convert(logdir, step, writer, queue=False):
Expand Down Expand Up @@ -41,31 +42,37 @@ def convert(logdir, step, writer, queue=False):
epoch, gs = Model.get_epoch_and_global_step(logdir, step=step)

if queue:
pred_log_specs, y_log_spec, ppgs = sess.run([model(), model.y_spec, model.ppgs])
pred_spec, y_spec, ppgs = sess.run([model(), model.y_spec, model.ppgs])
else:
if hp.convert.one_full_wav:
mfcc, spec, mel = get_wav_batch(model.mode, model.batch_size)
else:
mfcc, spec, mel = get_batch(model.mode, model.batch_size)

pred_log_specs, y_log_spec, ppgs = sess.run([model(), model.y_spec, model.ppgs], feed_dict={model.x_mfcc: mfcc, model.y_spec: spec, model.y_mel: mel})
pred_spec, y_spec, ppgs = sess.run([model(), model.y_spec, model.ppgs], feed_dict={model.x_mfcc: mfcc, model.y_spec: spec, model.y_mel: mel})

# De-quantization
# bins = np.linspace(0, 1, hp.default.quantize_db)
# y_spec = bins[y_spec]

# Denormalizatoin
# pred_log_specs = hp.mean_log_spec + hp.std_log_spec * pred_log_specs
# y_log_spec = hp.mean_log_spec + hp.std_log_spec * y_log_spec
# pred_log_specs = hp.min_log_spec + (hp.max_log_spec - hp.min_log_spec) * pred_log_specs
# y_log_spec = hp.min_log_spec + (hp.max_log_spec - hp.min_log_spec) * y_log_spec
pred_spec = denormalize_0_1(pred_spec, hp.default.max_db, hp.default.min_db)
y_spec = denormalize_0_1(y_spec, hp.default.max_db, hp.default.min_db)

# Db to amp
pred_spec = db_to_amp(pred_spec)
y_spec = db_to_amp(y_spec)

# Convert log of magnitude to magnitude
pred_specs, y_specs = np.e ** pred_log_specs, np.e ** y_log_spec
# pred_specs, y_specs = np.e ** pred_specs, np.e ** y_spec

# Emphasize the magnitude
pred_specs = np.power(pred_specs, hp.convert.emphasis_magnitude)
y_specs = np.power(y_specs, hp.convert.emphasis_magnitude)
pred_spec = np.power(pred_spec, hp.convert.emphasis_magnitude)
y_spec = np.power(y_spec, hp.convert.emphasis_magnitude)

# Spectrogram to waveform
audio = np.array(map(lambda spec: spectrogram2wav(spec.T, hp.default.n_fft, hp.default.win_length, hp.default.hop_length, hp.default.n_iter), pred_specs))
y_audio = np.array(map(lambda spec: spectrogram2wav(spec.T, hp.default.n_fft, hp.default.win_length, hp.default.hop_length, hp.default.n_iter), y_specs))
audio = np.array(map(lambda spec: spectrogram2wav(spec.T, hp.default.n_fft, hp.default.win_length, hp.default.hop_length, hp.default.n_iter), pred_spec))
y_audio = np.array(map(lambda spec: spectrogram2wav(spec.T, hp.default.n_fft, hp.default.win_length, hp.default.hop_length, hp.default.n_iter), y_spec))

# Apply inverse pre-emphasis
audio = inv_preemphasis(audio, coeff=hp.default.preemphasis)
Expand Down
26 changes: 14 additions & 12 deletions data_load.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
# /usr/bin/python2

import glob
import sys
import threading
from functools import wraps
from random import sample
Expand All @@ -11,10 +10,11 @@
from tensorflow.python.platform import tf_logging as logging

from hparam import Hparam
from audio import preemphasis
from audio import preemphasis, amp_to_db
import numpy as np
import librosa
from hparam import data_path_base
from utils import normalize_0_1


def wav_random_crop(wav, sr, duration):
Expand Down Expand Up @@ -119,19 +119,21 @@ def _get_mfcc_log_spec_and_log_mel_spec(wav, preemphasis_coeff, n_fft, win_lengt
mel_basis = librosa.filters.mel(hp.default.sr, hp.default.n_fft, hp.default.n_mels) # (n_mels, 1+n_fft//2)
mel = np.dot(mel_basis, mag) # (n_mels, t) # mel spectrogram

# Get mfccs
db = librosa.amplitude_to_db(mel)
mfccs = np.dot(librosa.filters.dct(hp.default.n_mfcc, db.shape[0]), db)
# Get mfccs, amp to db
mag_db = amp_to_db(mag)
mel_db = amp_to_db(mel)
mfccs = np.dot(librosa.filters.dct(hp.default.n_mfcc, mel_db.shape[0]), mel_db)

# Log
mag = np.log(mag + sys.float_info.epsilon)
mel = np.log(mel + sys.float_info.epsilon)
# Normalization (0 ~ 1)
mag_db = normalize_0_1(mag_db, hp.default.max_db, hp.default.min_db)
mel_db = normalize_0_1(mel_db, hp.default.max_db, hp.default.min_db)

# Normalization
# self.y_log_spec = (y_log_spec - hp.mean_log_spec) / hp.std_log_spec
# self.y_log_spec = (y_log_spec - hp.min_log_spec) / (hp.max_log_spec - hp.min_log_spec)
# Quantization
# bins = np.linspace(0, 1, hp.default.quantize_db)
# mag_db = np.digitize(mag_db, bins)
# mel_db = np.digitize(mel_db, bins)

return mfccs.T, mag.T, mel.T # (t, n_mfccs), (t, 1+n_fft/2), (t, n_mels)
return mfccs.T, mag_db.T, mel_db.T # (t, n_mfccs), (t, 1+n_fft/2), (t, n_mels)


# Adapted from the `sugartensor` code.
Expand Down
3 changes: 3 additions & 0 deletions hparams/default.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,9 @@ default:
n_iter: 60 # Number of inversion iterations
n_mels: 80
duration: 2
max_db: 35
min_db: -55
quantize_db: 101

# model
hidden_units: 256 # alias: E
Expand Down
270 changes: 270 additions & 0 deletions quantize.ipynb

Large diffs are not rendered by default.

11 changes: 11 additions & 0 deletions utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@

import os
import glob
import numpy as np


def split_path(path):
Expand All @@ -20,3 +21,13 @@ def remove_all_files(prefix):
files = glob.glob(prefix + '*')
for f in files:
os.remove(f)


def normalize_0_1(values, max, min):
normalized = np.clip((values - min) / (max - min), 0, 1)
return normalized


def denormalize_0_1(normalized, max, min):
values = np.clip(normalized, 0, 1) * (max - min) + min
return values

0 comments on commit f01f6b1

Please sign in to comment.