-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
0 parents
commit 1d02f74
Showing
38 changed files
with
2,113 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,8 @@ | ||
*.h5 | ||
*.hdf5 | ||
*.pyc | ||
cifar/data/quality* | ||
cifar/data/cifar* | ||
audio/data/Samples | ||
audio/data/bitrate* | ||
audio/data/Preproc* |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
## The Helmholtz Method: *Using Perceptual Compression to Reduce Machine Learning Complexity* |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,27 @@ | ||
import os | ||
import glob | ||
import subprocess | ||
|
||
def quantitize(input_dir, bitrate=128): | ||
input_dir = input_dir + str(bitrate) | ||
train_dir = glob.glob(os.path.join(input_dir, "*")) | ||
print train_dir | ||
for path in train_dir: | ||
files = glob.glob(os.path.join(path, "*.mp3")) | ||
for wav_name in files: | ||
name = wav_name[wav_name.rindex("/")+1:] | ||
out_dir = os.path.join("Bitrate_" + str(bitrate), | ||
wav_name[wav_name.index("/")+1:wav_name.rindex("/")]) | ||
out_name = name.split(".")[0] + ".wav" | ||
if not os.path.exists(out_dir): | ||
os.makedirs(out_dir) | ||
# print "lame -b %s %s %s" % (str(bitrate), wav_name, os.path.join(out_dir, out_name)) | ||
# os.system("lame -b %s %s %s" % (str(bitrate), wav_name, os.path.join(out_dir, out_name))) | ||
os.system('ffmpeg -i %s %s' % (wav_name, os.path.join(out_dir, out_name))) | ||
|
||
bitrate_list = [160] | ||
|
||
if __name__ == "__main__": | ||
for bitrate in bitrate_list: | ||
quantitize("bitrate_", bitrate) | ||
print ("bitrate_" + str(bitrate)) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
# (python prepare.py -i bitrate_8 -o Preproc_8)& | ||
# (python prepare.py -i bitrate_16 -o Preproc_16)& | ||
# (python prepare.py -i bitrate_32 -o Preproc_32)& | ||
# (python prepare.py -i bitrate_64 -o Preproc_64)& | ||
# (python prepare.py -i bitrate_96 -o Preproc_96)& | ||
# (python prepare.py -i bitrate_128 -o Preproc_128)& |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,196 @@ | ||
#! /usr/bin/env python3 | ||
|
||
from __future__ import print_function | ||
import numpy as np | ||
from panotti.datautils import * | ||
import librosa | ||
from audioread import NoBackendError | ||
import os | ||
from PIL import Image | ||
from functools import partial | ||
from imageio import imwrite | ||
import multiprocessing as mp | ||
from utils.resolve_osx_aliases import resolve_osx_alias | ||
|
||
# this is either just the regular shape, or it returns a leading 1 for mono | ||
def get_canonical_shape(signal): | ||
if len(signal.shape) == 1: | ||
return (1, signal.shape[0]) | ||
else: | ||
return signal.shape | ||
|
||
|
||
def find_max_shape(path, mono=False, sr=None, dur=None, clean=False): | ||
if (mono) and (sr is not None) and (dur is not None): # special case for speedy testing | ||
return [1, int(sr*dur)] | ||
shapes = [] | ||
for dirname, dirnames, filenames in os.walk(path): | ||
for filename in filenames: | ||
if not filename.startswith('.'): # ignore hidden files | ||
filepath = os.path.join(dirname, filename) | ||
try: | ||
signal, sr = librosa.load(filepath, mono=mono, sr=sr) | ||
except NoBackendError as e: | ||
print("Could not open audio file {}".format(filepath)) | ||
raise e | ||
if (clean): # Just take the first file and exit | ||
return get_canonical_shape(signal) | ||
shapes.append(get_canonical_shape(signal)) | ||
|
||
return (max(s[0] for s in shapes), max(s[1] for s in shapes)) | ||
|
||
|
||
def convert_one_file(printevery, class_index, class_files, nb_classes, classname, n_load, dirname, resample, mono, | ||
already_split, n_train, outpath, subdir, max_shape, clean, out_format, mels, phase, file_index): | ||
infilename = class_files[file_index] | ||
audio_path = dirname + '/' + infilename | ||
|
||
if (0 == file_index % printevery) or (file_index+1 == len(class_files)): | ||
print("\r Processing class ",class_index+1,"/",nb_classes,": \'",classname, | ||
"\', File ",file_index+1,"/", n_load,": ",audio_path," ", | ||
sep="",end="\r") | ||
sr = None | ||
if (resample is not None): | ||
sr = resample | ||
|
||
signal, sr = load_audio(audio_path, mono=mono, sr=sr) | ||
|
||
# Reshape / pad so all output files have same shape | ||
shape = get_canonical_shape(signal) # either the signal shape or a leading one | ||
if (shape != signal.shape): # this only evals to true for mono | ||
signal = np.reshape(signal, shape) | ||
#print("...reshaped mono so new shape = ",signal.shape, end="") | ||
#print(", max_shape = ",max_shape,end="") | ||
padded_signal = np.zeros(max_shape) # (previously found max_shape) allocate a long signal of zeros | ||
use_shape = list(max_shape[:]) | ||
use_shape[0] = min( shape[0], max_shape[0] ) | ||
use_shape[1] = min( shape[1], max_shape[1] ) | ||
#print(", use_shape = ",use_shape) | ||
padded_signal[:use_shape[0], :use_shape[1]] = signal[:use_shape[0], :use_shape[1]] | ||
|
||
layers = make_layered_melgram(padded_signal, sr, mels=mels, phase=phase) | ||
|
||
if not already_split: | ||
if (file_index >= n_train): | ||
outsub = "Test/" | ||
else: | ||
outsub = "Train/" | ||
else: | ||
outsub = subdir | ||
|
||
outfile = outpath + outsub + classname + '/' + infilename+'.'+out_format | ||
save_melgram(outfile, layers, out_format=out_format) | ||
return | ||
|
||
|
||
def preprocess_dataset(inpath="Samples/", outpath="Preproc/", train_percentage=0.8, resample=None, already_split=False, | ||
sequential=False, mono=False, dur=None, clean=False, out_format='npy', mels=96, phase=False): | ||
|
||
if (resample is not None): | ||
print(" Will be resampling at",resample,"Hz") | ||
|
||
if (True == already_split): | ||
print(" Data is already split into Train & Test") | ||
class_names = get_class_names(path=inpath+"Train/") # get the names of the subdirectories | ||
sampleset_subdirs = ["Train/","Test/"] | ||
else: | ||
print(" Will be imposing 80-20 (Train-Test) split") | ||
class_names = get_class_names(path=inpath) # get the names of the subdirectories | ||
sampleset_subdirs = ["./"] | ||
|
||
if (True == sequential): | ||
print(" Sequential ordering") | ||
else: | ||
print(" Shuffling ordering") | ||
|
||
print(" Finding max shape...") | ||
max_shape = find_max_shape(inpath, mono=mono, sr=resample, dur=dur, clean=clean) | ||
print(''' Padding all files with silence to fit shape: | ||
Channels : {} | ||
Samples : {} | ||
'''.format(max_shape[0], max_shape[1])) | ||
nb_classes = len(class_names) | ||
print("",len(class_names),"classes. class_names = ",class_names) | ||
|
||
train_outpath = outpath+"Train/" | ||
test_outpath = outpath+"Test/" | ||
if not os.path.exists(outpath): | ||
os.mkdir( outpath ); # make a new directory for preproc'd files | ||
os.mkdir( train_outpath ); | ||
os.mkdir( test_outpath ); | ||
|
||
parallel = False # set to false for debugging. when parallel jobs crash, usually no error messages are given, the system just hangs | ||
if (parallel): | ||
cpu_count = os.cpu_count() | ||
print("",cpu_count,"CPUs detected: Parallel execution across",cpu_count,"CPUs") | ||
else: | ||
cpu_count = 1 | ||
print("Serial execution") | ||
|
||
|
||
for subdir in sampleset_subdirs: #non-class subdirs of Samples (in case already split into Test/ Train; see above) | ||
|
||
|
||
for class_index, classname in enumerate(class_names): # go through the classes | ||
print("") # at the start of each new class, newline | ||
|
||
# make new Preproc/ subdirectories for class | ||
if not os.path.exists(train_outpath+classname): | ||
os.mkdir( train_outpath+classname ); | ||
os.mkdir( test_outpath+classname ); | ||
dirname = inpath+subdir+classname | ||
class_files = list(listdir_nohidden(dirname)) # all filenames for this class, skip hidden files | ||
class_files.sort() | ||
if (not sequential): # shuffle directory listing (e.g. to avoid alphabetic order) | ||
np.random.shuffle(class_files) # shuffle directory listing (e.g. to avoid alphabetic order) | ||
|
||
n_files = len(class_files) | ||
n_load = n_files # sometimes we may multiple by a small # for debugging | ||
n_train = int( n_load * train_percentage) | ||
|
||
printevery = 20 # how often to output status messages when processing lots of files | ||
|
||
file_indices = tuple( range(len(class_files)) ) | ||
|
||
if (not parallel): | ||
for file_index in file_indices: # loop over all files | ||
convert_one_file(printevery, class_index, class_files, nb_classes, classname, n_load, dirname, | ||
resample, mono, already_split, n_train, outpath, subdir, max_shape, clean, out_format, mels, phase, file_index) | ||
else: | ||
pool = mp.Pool(cpu_count) | ||
pool.map(partial(convert_one_file, printevery, class_index, class_files, nb_classes, classname, n_load, dirname, | ||
resample, mono, already_split, n_train, outpath, subdir, max_shape, clean, out_format, mels, phase), file_indices) | ||
pool.close() # shut down the pool | ||
|
||
|
||
print("") # at the very end, newline | ||
return | ||
|
||
if __name__ == '__main__': | ||
import platform | ||
import argparse | ||
parser = argparse.ArgumentParser(description="preprocess_data: convert sames to python-friendly data format for faster loading") | ||
parser.add_argument("-a", "--already", help="data is already split into Test & Train (default is to add 80-20 split",action="store_true") | ||
parser.add_argument("-s", "--sequential", help="don't randomly shuffle data for train/test split",action="store_true") | ||
parser.add_argument("-m", "--mono", help="convert input audio to mono",action="store_true") | ||
parser.add_argument("-r", "--resample", type=int, default=44100, help="convert input audio to mono") | ||
parser.add_argument('-d', "--dur", type=float, default=None, help='Max duration (in seconds) of each clip') | ||
parser.add_argument('-c', "--clean", help="Assume 'clean data'; Do not check to find max shape (faster)", action='store_true') | ||
parser.add_argument('-f','--format', help="format of output file (npz, jpeg, png, etc). Default = npz", type=str, default='npz') | ||
parser.add_argument('-i','--inpath', help="input directory for audio samples (default='Samples')", type=str, default='Samples') | ||
parser.add_argument('-o','--outpath', help="output directory for spectrograms (default='Preproc')", type=str, default='Preproc') | ||
parser.add_argument("--mels", help="number of mel coefficients to use in spectrograms", type=int, default=96) | ||
parser.add_argument("--phase", help="Include phase information as extra channels", action='store_true') | ||
|
||
args = parser.parse_args() | ||
if (('Darwin' == platform.system()) and (not args.mono)): | ||
# bug/feature in OS X that causes np.dot() to sometimes hang if multiprocessing is running | ||
mp.set_start_method('forkserver', force=True) # hopefully this here makes it never hang | ||
print(" WARNING: Using stereo files w/ multiprocessing on OSX may cause the program to hang.") | ||
print(" This is because of a mismatch between the way Python multiprocessing works and some Apple libraries") | ||
print(" If it hangs, try running with mono only (-m) or the --clean option, or turn off parallelism") | ||
print(" See https://github.com/numpy/numpy/issues/5752 for more on this.") | ||
print("") | ||
|
||
preprocess_dataset(inpath=args.inpath+'/', outpath=args.outpath+'/', resample=args.resample, already_split=args.already, sequential=args.sequential, mono=args.mono, | ||
dur=args.dur, clean=args.clean, out_format=args.format, mels=args.mels, phase=args.phase) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,23 @@ | ||
import os | ||
import glob | ||
|
||
def quantitize(input_dir, bitrate=128): | ||
train_dir = glob.glob(os.path.join(input_dir, "*")) | ||
for path in train_dir: | ||
files = glob.glob(os.path.join(path, "*.wav")) | ||
for wav_name in files: | ||
name = wav_name[wav_name.rindex("/")+1:] | ||
out_dir = os.path.join("bitrate_" + str(bitrate), | ||
wav_name[wav_name.index("/")+1:wav_name.rindex("/")]) | ||
out_name = name.split(".")[0] + ".mp3" | ||
if not os.path.exists(out_dir): | ||
os.makedirs(out_dir) | ||
# print "lame -b %s %s %s" % (str(bitrate), wav_name, os.path.join(out_dir, out_name)) | ||
os.system("lame -b %s %s %s" % (str(bitrate), wav_name, os.path.join(out_dir, out_name))) | ||
|
||
bitrate_list = [16, 32, 64, 96, 128, 160] | ||
|
||
if __name__ == "__main__": | ||
for bitrate in bitrate_list: | ||
quantitize("Samples", bitrate) | ||
print ("bitrate_" + str(bitrate)) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,29 @@ | ||
import os | ||
import argparse | ||
parser = argparse.ArgumentParser() | ||
|
||
parser.add_argument('--bitrate', type=int, default=128, help='Audio bitrate [default: 128kb/s]') | ||
parser.add_argument('--setting', type=int, default=0, help='Model architecture (0-5) [default: 0]') | ||
parser.add_argument('--all', type=bool, default=False, help='Collect all models [default: False]') | ||
|
||
FLAGS = parser.parse_args() | ||
|
||
bitrate = FLAGS.bitrate | ||
setting = FLAGS.setting | ||
ALL = FLAGS.all | ||
|
||
model_list = ["cnn_x4", "cnn_x3", "cnn_x2", "cnn_x3_mlp_0", "cnn_x3_mlp_64_128", "cnn_x3_mlp_128x2"] | ||
|
||
def collect(bitrate=128, setting=0): | ||
model = model_list[setting] | ||
history_path = os.path.join(model, "bitrate_" + str(bitrate) + "/history.csv") | ||
os.system("""cat %s | awk -F"," '{print $4}' | sort | tail -n 11 | head -n 10 | awk '{ total += $1 } END { print total/NR }'""" % history_path) | ||
|
||
|
||
if __name__ == "__main__": | ||
if not ALL: | ||
collect(bitrate, setting) | ||
else: | ||
for bitrate in [8, 16, 32, 64, 96, 128]: | ||
collect(bitrate, setting) | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,40 @@ | ||
import os | ||
import argparse | ||
parser = argparse.ArgumentParser() | ||
|
||
parser.add_argument('--bitrate', type=int, default=128, help='Audio bitrate [default: 128kb/s]') | ||
parser.add_argument('--setting', type=int, default=0, help='Model architecture (0-5) [default: 0]') | ||
parser.add_argument('--all', type=bool, default=False, help='Collect all models [default: False]') | ||
|
||
FLAGS = parser.parse_args() | ||
|
||
bitrate = FLAGS.bitrate | ||
setting = FLAGS.setting | ||
ALL = FLAGS.all | ||
|
||
model_list = ["cnn_x4", "cnn_x3", "cnn_x2", "cnn_x3_mlp_0", "cnn_x3_mlp_64_128", "cnn_x3_mlp_128x2"] | ||
|
||
def collect(bitrate=128, setting=0): | ||
model = model_list[setting] | ||
history_path = os.path.join(model, "bitrate_" + str(bitrate) + "/history.csv") | ||
f = open(history_path, "r") | ||
acc_list = [] | ||
for i, line in enumerate(f.readlines()): | ||
if i == 0: | ||
continue | ||
else: | ||
acc_list.append(float(line.split(",")[3])) | ||
max_acc = max(acc_list) | ||
print max_acc, max_acc * 0.985 | ||
for i, acc in enumerate(acc_list): | ||
if sum(acc_list[i:i+10]) / 10.0 > 0.99 * max_acc: | ||
print i | ||
break | ||
|
||
if __name__ == "__main__": | ||
if not ALL: | ||
collect(bitrate, setting) | ||
else: | ||
for bitrate in [8, 16, 32, 64, 96, 128]: | ||
collect(bitrate, setting) | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
__all__ = ["models"] |
Oops, something went wrong.