From 1d02f74e4a9f7cb6860434826ed0c59292f00920 Mon Sep 17 00:00:00 2001 From: wangjksjtu Date: Wed, 11 Apr 2018 20:41:26 +0800 Subject: [PATCH] Helmholtz-DL (ACMMM 2018) --- .gitignore | 8 + README.md | 1 + audio/data/convert.py | 27 ++ audio/data/prepare-all.sh | 6 + audio/data/prepare.py | 196 ++++++++++++++ audio/data/quantize.py | 23 ++ audio/logs/collect.py | 29 +++ audio/logs/collect_epoch.py | 40 +++ audio/model/__init__.py | 1 + audio/model/datautils.py | 245 ++++++++++++++++++ audio/model/models.py | 193 ++++++++++++++ .../parameters/model_101412_cnn_x3_mlp_0.txt | 43 +++ .../model_170436_cnn_x4_mlp_128.txt | 57 ++++ .../model_3715460_cnn_x2_mlp_128.txt | 41 +++ .../model_427492_cnn_x3_mlp_64_128.txt | 55 ++++ .../model_808356_cnn_x3_mlp_128.txt | 49 ++++ .../model_824868_cnn_x3_mlp_128x2.txt | 55 ++++ audio/scripts/train-all-cnns.sh | 6 + audio/scripts/train-all-settings.sh | 5 + audio/train.py | 108 ++++++++ cifar/data/__init__.py | 0 cifar/data/convert_cifar10.py | 108 ++++++++ cifar/data/prepare.sh | 1 + cifar/data/prepare_cifar.py | 80 ++++++ cifar/data/restore_images.py | 36 +++ cifar/data/validate.py | 40 +++ cifar/logs/README.md | 1 + cifar/models.py | 164 ++++++++++++ .../model_1079562_cnns-dense-64.txt | 45 ++++ cifar/parameters/model_1144138_all-cnnsx2.txt | 57 ++++ .../model_1276618_cnns-x2-dense-128.txt | 57 ++++ .../model_1620298_cnns-dense-128.txt | 45 ++++ .../model_1686090_cnns-dense-128-256.txt | 49 ++++ cifar/parameters/model_701386_all-cnns.txt | 45 ++++ cifar/provider.py | 33 +++ cifar/scripts/train-all-cnns.sh | 9 + cifar/scripts/train-all-settings.sh | 5 + cifar/train.py | 150 +++++++++++ 38 files changed, 2113 insertions(+) create mode 100644 .gitignore create mode 100644 README.md create mode 100644 audio/data/convert.py create mode 100644 audio/data/prepare-all.sh create mode 100755 audio/data/prepare.py create mode 100644 audio/data/quantize.py create mode 100644 audio/logs/collect.py create mode 100644 audio/logs/collect_epoch.py create mode 100644 audio/model/__init__.py create mode 100644 audio/model/datautils.py create mode 100644 audio/model/models.py create mode 100644 audio/model/parameters/model_101412_cnn_x3_mlp_0.txt create mode 100644 audio/model/parameters/model_170436_cnn_x4_mlp_128.txt create mode 100644 audio/model/parameters/model_3715460_cnn_x2_mlp_128.txt create mode 100644 audio/model/parameters/model_427492_cnn_x3_mlp_64_128.txt create mode 100644 audio/model/parameters/model_808356_cnn_x3_mlp_128.txt create mode 100644 audio/model/parameters/model_824868_cnn_x3_mlp_128x2.txt create mode 100644 audio/scripts/train-all-cnns.sh create mode 100644 audio/scripts/train-all-settings.sh create mode 100755 audio/train.py create mode 100644 cifar/data/__init__.py create mode 100644 cifar/data/convert_cifar10.py create mode 100644 cifar/data/prepare.sh create mode 100644 cifar/data/prepare_cifar.py create mode 100644 cifar/data/restore_images.py create mode 100644 cifar/data/validate.py create mode 100644 cifar/logs/README.md create mode 100644 cifar/models.py create mode 100644 cifar/parameters/model_1079562_cnns-dense-64.txt create mode 100644 cifar/parameters/model_1144138_all-cnnsx2.txt create mode 100644 cifar/parameters/model_1276618_cnns-x2-dense-128.txt create mode 100644 cifar/parameters/model_1620298_cnns-dense-128.txt create mode 100644 cifar/parameters/model_1686090_cnns-dense-128-256.txt create mode 100644 cifar/parameters/model_701386_all-cnns.txt create mode 100644 cifar/provider.py create mode 100644 cifar/scripts/train-all-cnns.sh create mode 100644 cifar/scripts/train-all-settings.sh create mode 100644 cifar/train.py diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..3df399e --- /dev/null +++ b/.gitignore @@ -0,0 +1,8 @@ +*.h5 +*.hdf5 +*.pyc +cifar/data/quality* +cifar/data/cifar* +audio/data/Samples +audio/data/bitrate* +audio/data/Preproc* diff --git a/README.md b/README.md new file mode 100644 index 0000000..dd9cf53 --- /dev/null +++ b/README.md @@ -0,0 +1 @@ +## The Helmholtz Method: *Using Perceptual Compression to Reduce Machine Learning Complexity* diff --git a/audio/data/convert.py b/audio/data/convert.py new file mode 100644 index 0000000..4c94984 --- /dev/null +++ b/audio/data/convert.py @@ -0,0 +1,27 @@ +import os +import glob +import subprocess + +def quantitize(input_dir, bitrate=128): + input_dir = input_dir + str(bitrate) + train_dir = glob.glob(os.path.join(input_dir, "*")) + print train_dir + for path in train_dir: + files = glob.glob(os.path.join(path, "*.mp3")) + for wav_name in files: + name = wav_name[wav_name.rindex("/")+1:] + out_dir = os.path.join("Bitrate_" + str(bitrate), + wav_name[wav_name.index("/")+1:wav_name.rindex("/")]) + out_name = name.split(".")[0] + ".wav" + if not os.path.exists(out_dir): + os.makedirs(out_dir) + # print "lame -b %s %s %s" % (str(bitrate), wav_name, os.path.join(out_dir, out_name)) + # os.system("lame -b %s %s %s" % (str(bitrate), wav_name, os.path.join(out_dir, out_name))) + os.system('ffmpeg -i %s %s' % (wav_name, os.path.join(out_dir, out_name))) + +bitrate_list = [160] + +if __name__ == "__main__": + for bitrate in bitrate_list: + quantitize("bitrate_", bitrate) + print ("bitrate_" + str(bitrate)) diff --git a/audio/data/prepare-all.sh b/audio/data/prepare-all.sh new file mode 100644 index 0000000..e147e2f --- /dev/null +++ b/audio/data/prepare-all.sh @@ -0,0 +1,6 @@ +# (python prepare.py -i bitrate_8 -o Preproc_8)& +# (python prepare.py -i bitrate_16 -o Preproc_16)& +# (python prepare.py -i bitrate_32 -o Preproc_32)& +# (python prepare.py -i bitrate_64 -o Preproc_64)& +# (python prepare.py -i bitrate_96 -o Preproc_96)& +# (python prepare.py -i bitrate_128 -o Preproc_128)& diff --git a/audio/data/prepare.py b/audio/data/prepare.py new file mode 100755 index 0000000..6e6b3d5 --- /dev/null +++ b/audio/data/prepare.py @@ -0,0 +1,196 @@ +#! /usr/bin/env python3 + +from __future__ import print_function +import numpy as np +from panotti.datautils import * +import librosa +from audioread import NoBackendError +import os +from PIL import Image +from functools import partial +from imageio import imwrite +import multiprocessing as mp +from utils.resolve_osx_aliases import resolve_osx_alias + +# this is either just the regular shape, or it returns a leading 1 for mono +def get_canonical_shape(signal): + if len(signal.shape) == 1: + return (1, signal.shape[0]) + else: + return signal.shape + + +def find_max_shape(path, mono=False, sr=None, dur=None, clean=False): + if (mono) and (sr is not None) and (dur is not None): # special case for speedy testing + return [1, int(sr*dur)] + shapes = [] + for dirname, dirnames, filenames in os.walk(path): + for filename in filenames: + if not filename.startswith('.'): # ignore hidden files + filepath = os.path.join(dirname, filename) + try: + signal, sr = librosa.load(filepath, mono=mono, sr=sr) + except NoBackendError as e: + print("Could not open audio file {}".format(filepath)) + raise e + if (clean): # Just take the first file and exit + return get_canonical_shape(signal) + shapes.append(get_canonical_shape(signal)) + + return (max(s[0] for s in shapes), max(s[1] for s in shapes)) + + +def convert_one_file(printevery, class_index, class_files, nb_classes, classname, n_load, dirname, resample, mono, + already_split, n_train, outpath, subdir, max_shape, clean, out_format, mels, phase, file_index): + infilename = class_files[file_index] + audio_path = dirname + '/' + infilename + + if (0 == file_index % printevery) or (file_index+1 == len(class_files)): + print("\r Processing class ",class_index+1,"/",nb_classes,": \'",classname, + "\', File ",file_index+1,"/", n_load,": ",audio_path," ", + sep="",end="\r") + sr = None + if (resample is not None): + sr = resample + + signal, sr = load_audio(audio_path, mono=mono, sr=sr) + + # Reshape / pad so all output files have same shape + shape = get_canonical_shape(signal) # either the signal shape or a leading one + if (shape != signal.shape): # this only evals to true for mono + signal = np.reshape(signal, shape) + #print("...reshaped mono so new shape = ",signal.shape, end="") + #print(", max_shape = ",max_shape,end="") + padded_signal = np.zeros(max_shape) # (previously found max_shape) allocate a long signal of zeros + use_shape = list(max_shape[:]) + use_shape[0] = min( shape[0], max_shape[0] ) + use_shape[1] = min( shape[1], max_shape[1] ) + #print(", use_shape = ",use_shape) + padded_signal[:use_shape[0], :use_shape[1]] = signal[:use_shape[0], :use_shape[1]] + + layers = make_layered_melgram(padded_signal, sr, mels=mels, phase=phase) + + if not already_split: + if (file_index >= n_train): + outsub = "Test/" + else: + outsub = "Train/" + else: + outsub = subdir + + outfile = outpath + outsub + classname + '/' + infilename+'.'+out_format + save_melgram(outfile, layers, out_format=out_format) + return + + +def preprocess_dataset(inpath="Samples/", outpath="Preproc/", train_percentage=0.8, resample=None, already_split=False, + sequential=False, mono=False, dur=None, clean=False, out_format='npy', mels=96, phase=False): + + if (resample is not None): + print(" Will be resampling at",resample,"Hz") + + if (True == already_split): + print(" Data is already split into Train & Test") + class_names = get_class_names(path=inpath+"Train/") # get the names of the subdirectories + sampleset_subdirs = ["Train/","Test/"] + else: + print(" Will be imposing 80-20 (Train-Test) split") + class_names = get_class_names(path=inpath) # get the names of the subdirectories + sampleset_subdirs = ["./"] + + if (True == sequential): + print(" Sequential ordering") + else: + print(" Shuffling ordering") + + print(" Finding max shape...") + max_shape = find_max_shape(inpath, mono=mono, sr=resample, dur=dur, clean=clean) + print(''' Padding all files with silence to fit shape: + Channels : {} + Samples : {} + '''.format(max_shape[0], max_shape[1])) + nb_classes = len(class_names) + print("",len(class_names),"classes. class_names = ",class_names) + + train_outpath = outpath+"Train/" + test_outpath = outpath+"Test/" + if not os.path.exists(outpath): + os.mkdir( outpath ); # make a new directory for preproc'd files + os.mkdir( train_outpath ); + os.mkdir( test_outpath ); + + parallel = False # set to false for debugging. when parallel jobs crash, usually no error messages are given, the system just hangs + if (parallel): + cpu_count = os.cpu_count() + print("",cpu_count,"CPUs detected: Parallel execution across",cpu_count,"CPUs") + else: + cpu_count = 1 + print("Serial execution") + + + for subdir in sampleset_subdirs: #non-class subdirs of Samples (in case already split into Test/ Train; see above) + + + for class_index, classname in enumerate(class_names): # go through the classes + print("") # at the start of each new class, newline + + # make new Preproc/ subdirectories for class + if not os.path.exists(train_outpath+classname): + os.mkdir( train_outpath+classname ); + os.mkdir( test_outpath+classname ); + dirname = inpath+subdir+classname + class_files = list(listdir_nohidden(dirname)) # all filenames for this class, skip hidden files + class_files.sort() + if (not sequential): # shuffle directory listing (e.g. to avoid alphabetic order) + np.random.shuffle(class_files) # shuffle directory listing (e.g. to avoid alphabetic order) + + n_files = len(class_files) + n_load = n_files # sometimes we may multiple by a small # for debugging + n_train = int( n_load * train_percentage) + + printevery = 20 # how often to output status messages when processing lots of files + + file_indices = tuple( range(len(class_files)) ) + + if (not parallel): + for file_index in file_indices: # loop over all files + convert_one_file(printevery, class_index, class_files, nb_classes, classname, n_load, dirname, + resample, mono, already_split, n_train, outpath, subdir, max_shape, clean, out_format, mels, phase, file_index) + else: + pool = mp.Pool(cpu_count) + pool.map(partial(convert_one_file, printevery, class_index, class_files, nb_classes, classname, n_load, dirname, + resample, mono, already_split, n_train, outpath, subdir, max_shape, clean, out_format, mels, phase), file_indices) + pool.close() # shut down the pool + + + print("") # at the very end, newline + return + +if __name__ == '__main__': + import platform + import argparse + parser = argparse.ArgumentParser(description="preprocess_data: convert sames to python-friendly data format for faster loading") + parser.add_argument("-a", "--already", help="data is already split into Test & Train (default is to add 80-20 split",action="store_true") + parser.add_argument("-s", "--sequential", help="don't randomly shuffle data for train/test split",action="store_true") + parser.add_argument("-m", "--mono", help="convert input audio to mono",action="store_true") + parser.add_argument("-r", "--resample", type=int, default=44100, help="convert input audio to mono") + parser.add_argument('-d', "--dur", type=float, default=None, help='Max duration (in seconds) of each clip') + parser.add_argument('-c', "--clean", help="Assume 'clean data'; Do not check to find max shape (faster)", action='store_true') + parser.add_argument('-f','--format', help="format of output file (npz, jpeg, png, etc). Default = npz", type=str, default='npz') + parser.add_argument('-i','--inpath', help="input directory for audio samples (default='Samples')", type=str, default='Samples') + parser.add_argument('-o','--outpath', help="output directory for spectrograms (default='Preproc')", type=str, default='Preproc') + parser.add_argument("--mels", help="number of mel coefficients to use in spectrograms", type=int, default=96) + parser.add_argument("--phase", help="Include phase information as extra channels", action='store_true') + + args = parser.parse_args() + if (('Darwin' == platform.system()) and (not args.mono)): + # bug/feature in OS X that causes np.dot() to sometimes hang if multiprocessing is running + mp.set_start_method('forkserver', force=True) # hopefully this here makes it never hang + print(" WARNING: Using stereo files w/ multiprocessing on OSX may cause the program to hang.") + print(" This is because of a mismatch between the way Python multiprocessing works and some Apple libraries") + print(" If it hangs, try running with mono only (-m) or the --clean option, or turn off parallelism") + print(" See https://github.com/numpy/numpy/issues/5752 for more on this.") + print("") + + preprocess_dataset(inpath=args.inpath+'/', outpath=args.outpath+'/', resample=args.resample, already_split=args.already, sequential=args.sequential, mono=args.mono, + dur=args.dur, clean=args.clean, out_format=args.format, mels=args.mels, phase=args.phase) diff --git a/audio/data/quantize.py b/audio/data/quantize.py new file mode 100644 index 0000000..1bb4adc --- /dev/null +++ b/audio/data/quantize.py @@ -0,0 +1,23 @@ +import os +import glob + +def quantitize(input_dir, bitrate=128): + train_dir = glob.glob(os.path.join(input_dir, "*")) + for path in train_dir: + files = glob.glob(os.path.join(path, "*.wav")) + for wav_name in files: + name = wav_name[wav_name.rindex("/")+1:] + out_dir = os.path.join("bitrate_" + str(bitrate), + wav_name[wav_name.index("/")+1:wav_name.rindex("/")]) + out_name = name.split(".")[0] + ".mp3" + if not os.path.exists(out_dir): + os.makedirs(out_dir) + # print "lame -b %s %s %s" % (str(bitrate), wav_name, os.path.join(out_dir, out_name)) + os.system("lame -b %s %s %s" % (str(bitrate), wav_name, os.path.join(out_dir, out_name))) + +bitrate_list = [16, 32, 64, 96, 128, 160] + +if __name__ == "__main__": + for bitrate in bitrate_list: + quantitize("Samples", bitrate) + print ("bitrate_" + str(bitrate)) diff --git a/audio/logs/collect.py b/audio/logs/collect.py new file mode 100644 index 0000000..f4fac7f --- /dev/null +++ b/audio/logs/collect.py @@ -0,0 +1,29 @@ +import os +import argparse +parser = argparse.ArgumentParser() + +parser.add_argument('--bitrate', type=int, default=128, help='Audio bitrate [default: 128kb/s]') +parser.add_argument('--setting', type=int, default=0, help='Model architecture (0-5) [default: 0]') +parser.add_argument('--all', type=bool, default=False, help='Collect all models [default: False]') + +FLAGS = parser.parse_args() + +bitrate = FLAGS.bitrate +setting = FLAGS.setting +ALL = FLAGS.all + +model_list = ["cnn_x4", "cnn_x3", "cnn_x2", "cnn_x3_mlp_0", "cnn_x3_mlp_64_128", "cnn_x3_mlp_128x2"] + +def collect(bitrate=128, setting=0): + model = model_list[setting] + history_path = os.path.join(model, "bitrate_" + str(bitrate) + "/history.csv") + os.system("""cat %s | awk -F"," '{print $4}' | sort | tail -n 11 | head -n 10 | awk '{ total += $1 } END { print total/NR }'""" % history_path) + + +if __name__ == "__main__": + if not ALL: + collect(bitrate, setting) + else: + for bitrate in [8, 16, 32, 64, 96, 128]: + collect(bitrate, setting) + diff --git a/audio/logs/collect_epoch.py b/audio/logs/collect_epoch.py new file mode 100644 index 0000000..e902cf9 --- /dev/null +++ b/audio/logs/collect_epoch.py @@ -0,0 +1,40 @@ +import os +import argparse +parser = argparse.ArgumentParser() + +parser.add_argument('--bitrate', type=int, default=128, help='Audio bitrate [default: 128kb/s]') +parser.add_argument('--setting', type=int, default=0, help='Model architecture (0-5) [default: 0]') +parser.add_argument('--all', type=bool, default=False, help='Collect all models [default: False]') + +FLAGS = parser.parse_args() + +bitrate = FLAGS.bitrate +setting = FLAGS.setting +ALL = FLAGS.all + +model_list = ["cnn_x4", "cnn_x3", "cnn_x2", "cnn_x3_mlp_0", "cnn_x3_mlp_64_128", "cnn_x3_mlp_128x2"] + +def collect(bitrate=128, setting=0): + model = model_list[setting] + history_path = os.path.join(model, "bitrate_" + str(bitrate) + "/history.csv") + f = open(history_path, "r") + acc_list = [] + for i, line in enumerate(f.readlines()): + if i == 0: + continue + else: + acc_list.append(float(line.split(",")[3])) + max_acc = max(acc_list) + print max_acc, max_acc * 0.985 + for i, acc in enumerate(acc_list): + if sum(acc_list[i:i+10]) / 10.0 > 0.99 * max_acc: + print i + break + +if __name__ == "__main__": + if not ALL: + collect(bitrate, setting) + else: + for bitrate in [8, 16, 32, 64, 96, 128]: + collect(bitrate, setting) + diff --git a/audio/model/__init__.py b/audio/model/__init__.py new file mode 100644 index 0000000..3c6e61b --- /dev/null +++ b/audio/model/__init__.py @@ -0,0 +1 @@ +__all__ = ["models"] diff --git a/audio/model/datautils.py b/audio/model/datautils.py new file mode 100644 index 0000000..50c5aa3 --- /dev/null +++ b/audio/model/datautils.py @@ -0,0 +1,245 @@ +from __future__ import print_function +import numpy as np +import librosa +import os +from os.path import isfile, splitext +from imageio import imread, imwrite +import glob + +def listdir_nohidden(path,subdirs_only=False, skip_csv=True): + ''' + ignore hidden files. call should be inside list(). subdirs_only means it ignores regular files + ''' + for f in os.listdir(path): + if not f.startswith('.'): # this skips the hidden + if ((False==subdirs_only) or (os.path.isdir(path+"/"+f))): + if ('.csv' == os.path.splitext(f)[1]) and (skip_csv): + pass + else: + yield f + + +# class names are subdirectory names in Preproc/ directory +def get_class_names(path="Preproc/Train/", sort=True): + if (sort): + class_names = sorted(list(listdir_nohidden(path, subdirs_only=True))) # sorted alphabetically for consistency with "ls" command + else: + class_names = listdir_nohidden(path) # not in same order as "ls", because Python + return class_names + + +def get_total_files(class_names, path="Preproc/Train/"): + sum_total = 0 + for subdir in class_names: + files = os.listdir(path+subdir) + n_files = len(files) + sum_total += n_files + return sum_total + +def save_melgram(outfile, melgram, out_format='npz'): + channels = melgram.shape[1] + melgram = melgram.astype(np.float16) + if (('jpeg' == out_format) or ('png' == out_format)) and (channels <=4): + melgram = np.moveaxis(melgram, 1, 3).squeeze() # we use the 'channels_first' in tensorflow, but images have channels_first. squeeze removes unit-size axes + melgram = np.flip(melgram, 0) # flip spectrogram image right-side-up before saving, for viewing + #print("first melgram.shape = ",melgram.shape,end="") + if (2 == channels): # special case: 1=greyscale, 3=RGB, 4=RGBA, ..no 2. so...? + # pad a channel of zeros (for blue) and you'll just be stuck with it forever. so channels will =3 + # TODO: this is SLOWWW + b = np.zeros((melgram.shape[0], melgram.shape[1], 3)) # 3-channel array of zeros + b[:,:,:-1] = melgram # fill the zeros on the 1st 2 channels + imwrite(outfile, b, format=out_format) + else: + imwrite(outfile, melgram, format=out_format) + elif ('npy' == out_format): + np.save(outfile,melgram=melgram) + else: + np.savez_compressed(outfile,melgram=melgram) # default is compressed npz file + return + + +def load_audio(audio_path, mono=None, sr=None, convertOSXaliases=True): # wrapper for librosa.load + try: + signal, sr = librosa.load(audio_path, mono=mono, sr=sr) + except NoBackendError as e: + if ('Darwin' == platform.system()): # handle OS X alias files gracefully + source = resolve_osx_alias(audio_path, convert=convertOSXaliases, already_checked_os=True) # convert to symlinks for next time + try: + signal, sr = librosa.load(source, mono=mono, sr=sr) + except NoBackendError as e: + print("\n*** ERROR: Could not open audio file {}".format(audio_path),"\n",flush=True) + raise e + else: + print("\n*** ERROR: Could not open audio file {}".format(audio_path),"\n",flush=True) + raise e + return signal, sr + + +def load_melgram(file_path): + #auto-detect load method based on filename extension + name, extension = os.path.splitext(file_path) + if ('.npy' == extension): + melgram = np.load(file_path) + elif ('.npz' == extension): # compressed npz file (preferred) + with np.load(file_path) as data: + melgram = data['melgram'] + elif ('.png' == extension) or ('.jpeg' == extension): + arr = imread(file_path) + melgram = np.reshape(arr, (1,1,arr.shape[0],arr.shape[1])) # convert 2-d image + melgram = np.flip(melgram, 0) # we save images 'rightside up' but librosa internally presents them 'upside down' + else: + print("load_melgram: Error: unrecognized file extension '",extension,"' for file ",file_path,sep="") + return melgram + + +def get_sample_dimensions(class_names, path='Preproc/Train/'): + classname = class_names[0] + audio_path = path + classname + '/' + infilename = os.listdir(audio_path)[0] + melgram = load_melgram(audio_path+infilename) + print(" get_sample_dimensions: "+infilename+": melgram.shape = ",melgram.shape) + return melgram.shape + + +def encode_class(class_name, class_names): # makes a "one-hot" vector for each class name called + try: + idx = class_names.index(class_name) + vec = np.zeros(len(class_names)) + vec[idx] = 1 + return vec + except ValueError: + return None + + +def decode_class(vec, class_names): # generates a number from the one-hot vector + return int(np.argmax(vec)) + + +def shuffle_XY_paths(X,Y,paths): # generates a randomized order, keeping X&Y(&paths) together + assert (X.shape[0] == Y.shape[0] ) + #print("shuffle_XY_paths: Y.shape[0], len(paths) = ",Y.shape[0], len(paths)) + idx = np.array(range(Y.shape[0])) + np.random.shuffle(idx) + newX = np.copy(X) + newY = np.copy(Y) + newpaths = paths[:] + for i in range(len(idx)): + newX[i] = X[idx[i],:,:] + newY[i] = Y[idx[i],:] + newpaths[i] = paths[idx[i]] + return newX, newY, newpaths + +def make_melgram(mono_sig, sr, n_mels=128): # @keunwoochoi upgraded form 96 to 128 mel bins in kapre + #melgram = librosa.logamplitude(librosa.feature.melspectrogram(mono_sig, # latest librosa deprecated logamplitude in favor of amplitude_to_db + # sr=sr, n_mels=96),ref_power=1.0)[np.newaxis,np.newaxis,:,:] + + melgram = librosa.amplitude_to_db(librosa.feature.melspectrogram(mono_sig, + sr=sr, n_mels=n_mels))[np.newaxis,:,:,np.newaxis] # last newaxis is b/c tensorflow wants 'channels_last' order + + ''' + # librosa docs also include a perceptual CQT example: + CQT = librosa.cqt(mono_sig, sr=sr, fmin=librosa.note_to_hz('A1')) + freqs = librosa.cqt_frequencies(CQT.shape[0], fmin=librosa.note_to_hz('A1')) + perceptual_CQT = librosa.perceptual_weighting(CQT**2, freqs, ref=np.max) + melgram = perceptual_CQT[np.newaxis,np.newaxis,:,:] + ''' + return melgram + +def make_phase_gram(mono_sig, sr, n_bins=128): + stft = librosa.stft(mono_sig)#, n_fft = (2*n_bins)-1) + magnitude, phase = librosa.magphase(stft) # we don't need magnitude + + # resample the phase array to match n_bins + phase = np.resize(phase, (n_bins, phase.shape[1]))[np.newaxis,:,:,np.newaxis] + return phase + + + +# turn multichannel audio as multiple melgram layers +def make_layered_melgram(signal, sr, mels=128, phase=False): + if (signal.ndim == 1): # given the way the preprocessing code is now, this may not get called + signal = np.reshape( signal, (1,signal.shape[0])) + + # get mel-spectrogram for each channel, and layer them into multi-dim array + for channel in range(signal.shape[0]): + melgram = make_melgram(signal[channel],sr, n_mels=mels) + + if (0 == channel): + layers = melgram + else: + layers = np.append(layers,melgram,axis=3) # we keep axis=0 free for keras batches, axis=3 means 'channels_last' + + if (phase): + phasegram = make_phase_gram(signal[channel],sr, n_bins=mels) + layers = np.append(layers,phasegram,axis=3) + return layers + + +def nearest_multiple( a, b ): # returns number smaller than a, which is the nearest multiple of b + return int(a/b) * b + + +# can be used for test dataset as well +def build_dataset(path="Preproc/Train/", load_frac=1.0, batch_size=None, tile=False): + + class_names = get_class_names(path=path) + print("class_names = ",class_names) + nb_classes = len(class_names) + + total_files = get_total_files(class_names, path=path) + total_load = int(total_files * load_frac) + if (batch_size is not None): # keras gets particular: dataset size must be mult. of batch_size + total_load = nearest_multiple( total_load, batch_size) + print(" total files = ",total_files,", going to load total_load = ",total_load) + + print("total files = ",total_files,", going to load total_load = ",total_load) + + # pre-allocate memory for speed (old method used np.concatenate, slow) + mel_dims = get_sample_dimensions(class_names,path=path) # get dims of sample data file + if (tile): + ldims = list(mel_dims) + ldims[3] = 3 + mel_dims = tuple(ldims) + print(" melgram dimensions: ",mel_dims) + X = np.zeros((total_load, mel_dims[1], mel_dims[2], mel_dims[3])) + Y = np.zeros((total_load, nb_classes)) + paths = [] + + load_count = 0 + for idx, classname in enumerate(class_names): + print("") + this_Y = np.array(encode_class(classname,class_names) ) + this_Y = this_Y[np.newaxis,:] + class_files = os.listdir(path+classname) + n_files = len(class_files) + n_load = int(n_files * load_frac) # n_load is how many files of THIS CLASS are expected to be loaded + printevery = 100 + + file_list = class_files[0:n_load] + for idx2, infilename in enumerate(file_list): + audio_path = path + classname + '/' + infilename + if (0 == idx2 % printevery) or (idx2+1 == len(class_files)): + print("\r Loading class ",idx+1,"/",nb_classes,": \'",classname, + "\', File ",idx2+1,"/", n_load,": ",audio_path," ", + sep="",end="") + + #auto-detect load method based on filename extension + melgram = load_melgram(audio_path) + if (tile) and (melgram.shape != mel_dims): + melgram = np.tile(melgram, 3) + elif (melgram.shape != mel_dims): + print("\n\n ERROR: mel_dims = ",mel_dims,", melgram.shape = ",melgram.shape) + X[load_count,:,:] = melgram + Y[load_count,:] = this_Y + paths.append(audio_path) + load_count += 1 + if (load_count >= total_load): # Abort loading files after last even multiple of batch size + break + + print("") + if ( load_count != total_load ): # check to make sure we loaded everything we thought we would + raise Exception("Loaded "+str(load_count)+" files but was expecting "+str(total_load) ) + + X, Y, paths = shuffle_XY_paths(X,Y,paths) # mix up classes, & files within classes + + return X, Y, paths, class_names diff --git a/audio/model/models.py b/audio/model/models.py new file mode 100644 index 0000000..a7aa7ca --- /dev/null +++ b/audio/model/models.py @@ -0,0 +1,193 @@ +from __future__ import print_function + +import keras +import tensorflow as tf +from keras.models import Sequential, Model, load_model, save_model +from keras.layers import Input, Dense, TimeDistributed, LSTM, Dropout, Activation +from keras.layers import Convolution2D, MaxPooling2D, Flatten, Conv2D +from keras.layers.normalization import BatchNormalization +from keras.layers.advanced_activations import ELU +from keras.optimizers import SGD, Adam + +from os.path import isfile +from tensorflow.python.client import device_lib +import h5py +import numpy as np + +def save_summary(model, header, suffix): + assert(suffix.split(".")[0] == "") + with open(header + suffix, 'w') as fh: + # Pass the file handle in as a lambda functions to make it callable + model.summary(print_fn=lambda x: fh.write(x + '\n')) + +# I have not attempted much optimization, however it *is* fairly understandable +def build_model(X_shape, nb_classes, cnn_layers=3, mlps=[128]): + # Inputs: + # X_shape = [ # spectrograms per batch, # audio channels, # spectrogram freq bins, # spectrogram time bins ] + # nb_classes = number of output n_classes + # nb_layers = number of conv-pooling sets in the CNN + from keras import backend as K + K.set_image_data_format('channels_last') # SHH changed on 3/1/2018 b/c tensorflow prefers channels_last + + nb_filters = 32 # number of convolutional filters = "feature maps" + kernel_size = (3, 3) # convolution kernel size + pool_size = (2, 2) # size of pooling area for max pooling + cl_dropout = 0.5 # conv. layer dropout + dl_dropout = 0.6 # dense layer dropout + + input_shape = (X_shape[1], X_shape[2], X_shape[3]) + model = Sequential() + model.add(Conv2D(nb_filters, kernel_size, padding='valid', input_shape=input_shape)) + model.add(BatchNormalization(axis=1)) + model.add(Activation('relu')) # Leave this relu & BN here. ELU is not good here (my experience) + + for layer in range(cnn_layers): # add more layers than just the first + model.add(Conv2D(nb_filters, kernel_size)) + #model.add(BatchNormalization(axis=1)) # ELU authors reccommend no BatchNorm. I confirm. + model.add(Activation('elu')) + model.add(MaxPooling2D(pool_size=pool_size)) + model.add(Dropout(cl_dropout)) + + model.add(Flatten()) + for units in mlps: + model.add(Dense(units)) # 128 is 'arbitrary' for now + #model.add(Activation('relu')) # relu (no BN) works ok here, however ELU works a bit better... + model.add(Activation('elu')) + model.add(Dropout(dl_dropout)) + model.add(Dense(nb_classes)) + model.add(Activation("softmax")) + return model + +# Used for when you want to use weights from a previously-trained model, +# with a different set/number of output classes +def attach_new_weights(model, new_nb_classes, n_pop = 2, n_p_dense = None, last_dropout = 0.6): + + # "penultimate" dense layer was originally 64 or 128. can change it here + if (n_p_dense is not None): + n_pop = 5 + + # pop off the last n_pop layers. We definitely want the last 2: Activation() and Dense(nb_classes) + for i in range(n_pop): + model.pop() + + if (n_p_dense is not None): + model.add(Dense(n_p_dense)) + model.add(Activation('elu')) + model.add(Dropout(last_dropout)) + + # attach final output layers + model.add(Dense(new_nb_classes)) # new_nb_classes = new number of output classes + model.add(Activation("softmax")) + return model + + +# Next two routines are for attaching class names inside the saved model .hdf5 weights file +# From https://stackoverflow.com/questions/44310448/attaching-class-labels-to-a-keras-model +def load_model_ext(filepath, custom_objects=None): + model = load_model(filepath, custom_objects=custom_objects) # load the model normally + + #--- Now load it again and look for additional useful metadata + f = h5py.File(filepath, mode='r') + + # initialize class_names with numbers (strings) in case hdf5 file doesn't have any + output_length = model.layers[-1].output_shape[1] + class_names = [str(x) for x in range(output_length)] + if 'class_names' in f.attrs: + class_names = f.attrs.get('class_names').tolist() + class_names = [x.decode() for x in class_names] + f.close() + return model, class_names + +def save_model_ext(model, filepath, overwrite=True, class_names=None): + save_model(model, filepath, overwrite) + if class_names is not None: + f = h5py.File(filepath, mode='a') + f.attrs['class_names'] = np.array(class_names, dtype='S') # have to encode it somehow + f.close() + + +# Freezing speeds up training by only declaring all but the last leave_last +# layers as non-trainable; but likely results in lower accuracy +# NOTE: In practice this achieves so little that I don't even use this: +# Most of the model parameters are in the last few layers anyway +def freeze_layers(model, train_last=3): + num_layers = len(model.layers) + freeze_layers = min( num_layers - train_last, num_layers ) # any train_last too big, freezes whole model + if (train_last < 0): # special flag to disable freezing + freeze_layers = 0 + print("Freezing ",freeze_layers,"/",num_layers," layers of model") + for i in range(freeze_layers): + model.layers[i].trainable = False + return model + + +# This is the main routine for setting up a model +def setup_model(X, class_names, try_checkpoint=True, + weights_file='weights.hdf5', quiet=True, missing_weights_fatal=False, multi_tag=False, setting=0): + ''' In the following, the reason we hang on to & return serial_model, + is because Keras can't save parallel models, but according to fchollet + the serial & parallel versions will always share the same weights + (Strange but true!) + ''' + + # Here's where one might 'swap out' different neural network 'model' choices + if setting == 0: + serial_model = build_model(X.shape, nb_classes=len(class_names), cnn_layers=3, mlps=[]) + elif setting == 1: + serial_model = build_model(X.shape, nb_classes=len(class_names), cnn_layers=4) + elif setting == 2: + serial_model = build_model(X.shape, nb_classes=len(class_names), cnn_layers=3, mlps=[64, 128]) + elif setting == 3: + serial_model = build_model(X.shape, nb_classes=len(class_names), cnn_layers=3) + elif setting == 4: + serial_model = build_model(X.shape, nb_classes=len(class_names), cnn_layers=3, mlps=[128, 128]) + elif setting == 5: + serial_model = build_model(X.shape, nb_classes=len(class_names), cnn_layers=2) + else: + raise NotImplementedError("The architecture is not implemented!") + + #serial_model = old_model(X.shape, nb_classes=len(class_names), nb_layers=nb_layers) + #serial_model = imageModels(X, nb_classes=len(class_names)) + + # don't bother with freezing layers, at least with the hope of trianing on a laptop. doesn't speed up by more than a factor of 2. + # serial_model = freeze_layers(serial_model, train_last = 3) + + # Initialize weights using checkpoint if it exists. + if (try_checkpoint): + print("Looking for previous weights...") + if ( isfile(weights_file) ): + print ('Weights file detected. Loading from ',weights_file) + loaded_model = load_model(weights_file) # strip any previous parallel part, to be added back in later + serial_model.set_weights( loaded_model.get_weights() ) # assign weights based on checkpoint + else: + if (missing_weights_fatal): + print("Need weights file to continue. Aborting") + assert(not missing_weights_fatal) + else: + print('No weights file detected, so starting from scratch.') + + + opt = 'adadelta' # Adam(lr = 0.00001) # So far, adadelta seems to work the best of things I've tried + metrics = ['accuracy'] + + if (multi_tag): # multi_tag means more than one class can be 'chosen' at a time; default is 'only one' + loss = 'binary_crossentropy' + else: + loss = 'categorical_crossentropy' + + serial_model.compile(loss=loss, optimizer=opt, metrics=metrics) + + if (not quiet): + serial_model.summary() # print out the model layers + + return serial_model # fchollet says to hang on to the serial model for checkpointing + +if __name__ == "__main__": + model_list = ["cnn_x3_mlp_0", "cnn_x4_mlp_128", "cnn_x3_mlp_64_128", "cnn_x3_mlp_128", "cnn_x3_mlp_128x2", "cnn_x2_mlp_128"] + parameters = [] + for setting in range(6): + model = setup_model(np.zeros((1, 96, 173, 1)),[0]*12, try_checkpoint=False, setting=setting) + save_summary(model, "parameters/model_" + str(model.count_params()) + "_" + model_list[setting], ".txt") + print (model_list[setting] + ": " + str(model.count_params())) + parameters.append(model.count_params()) + diff --git a/audio/model/parameters/model_101412_cnn_x3_mlp_0.txt b/audio/model/parameters/model_101412_cnn_x3_mlp_0.txt new file mode 100644 index 0000000..0dca071 --- /dev/null +++ b/audio/model/parameters/model_101412_cnn_x3_mlp_0.txt @@ -0,0 +1,43 @@ +_________________________________________________________________ +Layer (type) Output Shape Param # +================================================================= +conv2d_1 (Conv2D) (None, 94, 171, 32) 320 +_________________________________________________________________ +batch_normalization_1 (Batch (None, 94, 171, 32) 376 +_________________________________________________________________ +activation_1 (Activation) (None, 94, 171, 32) 0 +_________________________________________________________________ +conv2d_2 (Conv2D) (None, 92, 169, 32) 9248 +_________________________________________________________________ +activation_2 (Activation) (None, 92, 169, 32) 0 +_________________________________________________________________ +max_pooling2d_1 (MaxPooling2 (None, 46, 84, 32) 0 +_________________________________________________________________ +dropout_1 (Dropout) (None, 46, 84, 32) 0 +_________________________________________________________________ +conv2d_3 (Conv2D) (None, 44, 82, 32) 9248 +_________________________________________________________________ +activation_3 (Activation) (None, 44, 82, 32) 0 +_________________________________________________________________ +max_pooling2d_2 (MaxPooling2 (None, 22, 41, 32) 0 +_________________________________________________________________ +dropout_2 (Dropout) (None, 22, 41, 32) 0 +_________________________________________________________________ +conv2d_4 (Conv2D) (None, 20, 39, 32) 9248 +_________________________________________________________________ +activation_4 (Activation) (None, 20, 39, 32) 0 +_________________________________________________________________ +max_pooling2d_3 (MaxPooling2 (None, 10, 19, 32) 0 +_________________________________________________________________ +dropout_3 (Dropout) (None, 10, 19, 32) 0 +_________________________________________________________________ +flatten_1 (Flatten) (None, 6080) 0 +_________________________________________________________________ +dense_1 (Dense) (None, 12) 72972 +_________________________________________________________________ +activation_5 (Activation) (None, 12) 0 +================================================================= +Total params: 101,412 +Trainable params: 101,224 +Non-trainable params: 188 +_________________________________________________________________ diff --git a/audio/model/parameters/model_170436_cnn_x4_mlp_128.txt b/audio/model/parameters/model_170436_cnn_x4_mlp_128.txt new file mode 100644 index 0000000..3b1f55c --- /dev/null +++ b/audio/model/parameters/model_170436_cnn_x4_mlp_128.txt @@ -0,0 +1,57 @@ +_________________________________________________________________ +Layer (type) Output Shape Param # +================================================================= +conv2d_5 (Conv2D) (None, 94, 171, 32) 320 +_________________________________________________________________ +batch_normalization_2 (Batch (None, 94, 171, 32) 376 +_________________________________________________________________ +activation_6 (Activation) (None, 94, 171, 32) 0 +_________________________________________________________________ +conv2d_6 (Conv2D) (None, 92, 169, 32) 9248 +_________________________________________________________________ +activation_7 (Activation) (None, 92, 169, 32) 0 +_________________________________________________________________ +max_pooling2d_4 (MaxPooling2 (None, 46, 84, 32) 0 +_________________________________________________________________ +dropout_4 (Dropout) (None, 46, 84, 32) 0 +_________________________________________________________________ +conv2d_7 (Conv2D) (None, 44, 82, 32) 9248 +_________________________________________________________________ +activation_8 (Activation) (None, 44, 82, 32) 0 +_________________________________________________________________ +max_pooling2d_5 (MaxPooling2 (None, 22, 41, 32) 0 +_________________________________________________________________ +dropout_5 (Dropout) (None, 22, 41, 32) 0 +_________________________________________________________________ +conv2d_8 (Conv2D) (None, 20, 39, 32) 9248 +_________________________________________________________________ +activation_9 (Activation) (None, 20, 39, 32) 0 +_________________________________________________________________ +max_pooling2d_6 (MaxPooling2 (None, 10, 19, 32) 0 +_________________________________________________________________ +dropout_6 (Dropout) (None, 10, 19, 32) 0 +_________________________________________________________________ +conv2d_9 (Conv2D) (None, 8, 17, 32) 9248 +_________________________________________________________________ +activation_10 (Activation) (None, 8, 17, 32) 0 +_________________________________________________________________ +max_pooling2d_7 (MaxPooling2 (None, 4, 8, 32) 0 +_________________________________________________________________ +dropout_7 (Dropout) (None, 4, 8, 32) 0 +_________________________________________________________________ +flatten_2 (Flatten) (None, 1024) 0 +_________________________________________________________________ +dense_2 (Dense) (None, 128) 131200 +_________________________________________________________________ +activation_11 (Activation) (None, 128) 0 +_________________________________________________________________ +dropout_8 (Dropout) (None, 128) 0 +_________________________________________________________________ +dense_3 (Dense) (None, 12) 1548 +_________________________________________________________________ +activation_12 (Activation) (None, 12) 0 +================================================================= +Total params: 170,436 +Trainable params: 170,248 +Non-trainable params: 188 +_________________________________________________________________ diff --git a/audio/model/parameters/model_3715460_cnn_x2_mlp_128.txt b/audio/model/parameters/model_3715460_cnn_x2_mlp_128.txt new file mode 100644 index 0000000..d76b2db --- /dev/null +++ b/audio/model/parameters/model_3715460_cnn_x2_mlp_128.txt @@ -0,0 +1,41 @@ +_________________________________________________________________ +Layer (type) Output Shape Param # +================================================================= +conv2d_22 (Conv2D) (None, 94, 171, 32) 320 +_________________________________________________________________ +batch_normalization_6 (Batch (None, 94, 171, 32) 376 +_________________________________________________________________ +activation_33 (Activation) (None, 94, 171, 32) 0 +_________________________________________________________________ +conv2d_23 (Conv2D) (None, 92, 169, 32) 9248 +_________________________________________________________________ +activation_34 (Activation) (None, 92, 169, 32) 0 +_________________________________________________________________ +max_pooling2d_17 (MaxPooling (None, 46, 84, 32) 0 +_________________________________________________________________ +dropout_23 (Dropout) (None, 46, 84, 32) 0 +_________________________________________________________________ +conv2d_24 (Conv2D) (None, 44, 82, 32) 9248 +_________________________________________________________________ +activation_35 (Activation) (None, 44, 82, 32) 0 +_________________________________________________________________ +max_pooling2d_18 (MaxPooling (None, 22, 41, 32) 0 +_________________________________________________________________ +dropout_24 (Dropout) (None, 22, 41, 32) 0 +_________________________________________________________________ +flatten_6 (Flatten) (None, 28864) 0 +_________________________________________________________________ +dense_12 (Dense) (None, 128) 3694720 +_________________________________________________________________ +activation_36 (Activation) (None, 128) 0 +_________________________________________________________________ +dropout_25 (Dropout) (None, 128) 0 +_________________________________________________________________ +dense_13 (Dense) (None, 12) 1548 +_________________________________________________________________ +activation_37 (Activation) (None, 12) 0 +================================================================= +Total params: 3,715,460 +Trainable params: 3,715,272 +Non-trainable params: 188 +_________________________________________________________________ diff --git a/audio/model/parameters/model_427492_cnn_x3_mlp_64_128.txt b/audio/model/parameters/model_427492_cnn_x3_mlp_64_128.txt new file mode 100644 index 0000000..6e6b91d --- /dev/null +++ b/audio/model/parameters/model_427492_cnn_x3_mlp_64_128.txt @@ -0,0 +1,55 @@ +_________________________________________________________________ +Layer (type) Output Shape Param # +================================================================= +conv2d_10 (Conv2D) (None, 94, 171, 32) 320 +_________________________________________________________________ +batch_normalization_3 (Batch (None, 94, 171, 32) 376 +_________________________________________________________________ +activation_13 (Activation) (None, 94, 171, 32) 0 +_________________________________________________________________ +conv2d_11 (Conv2D) (None, 92, 169, 32) 9248 +_________________________________________________________________ +activation_14 (Activation) (None, 92, 169, 32) 0 +_________________________________________________________________ +max_pooling2d_8 (MaxPooling2 (None, 46, 84, 32) 0 +_________________________________________________________________ +dropout_9 (Dropout) (None, 46, 84, 32) 0 +_________________________________________________________________ +conv2d_12 (Conv2D) (None, 44, 82, 32) 9248 +_________________________________________________________________ +activation_15 (Activation) (None, 44, 82, 32) 0 +_________________________________________________________________ +max_pooling2d_9 (MaxPooling2 (None, 22, 41, 32) 0 +_________________________________________________________________ +dropout_10 (Dropout) (None, 22, 41, 32) 0 +_________________________________________________________________ +conv2d_13 (Conv2D) (None, 20, 39, 32) 9248 +_________________________________________________________________ +activation_16 (Activation) (None, 20, 39, 32) 0 +_________________________________________________________________ +max_pooling2d_10 (MaxPooling (None, 10, 19, 32) 0 +_________________________________________________________________ +dropout_11 (Dropout) (None, 10, 19, 32) 0 +_________________________________________________________________ +flatten_3 (Flatten) (None, 6080) 0 +_________________________________________________________________ +dense_4 (Dense) (None, 64) 389184 +_________________________________________________________________ +activation_17 (Activation) (None, 64) 0 +_________________________________________________________________ +dropout_12 (Dropout) (None, 64) 0 +_________________________________________________________________ +dense_5 (Dense) (None, 128) 8320 +_________________________________________________________________ +activation_18 (Activation) (None, 128) 0 +_________________________________________________________________ +dropout_13 (Dropout) (None, 128) 0 +_________________________________________________________________ +dense_6 (Dense) (None, 12) 1548 +_________________________________________________________________ +activation_19 (Activation) (None, 12) 0 +================================================================= +Total params: 427,492 +Trainable params: 427,304 +Non-trainable params: 188 +_________________________________________________________________ diff --git a/audio/model/parameters/model_808356_cnn_x3_mlp_128.txt b/audio/model/parameters/model_808356_cnn_x3_mlp_128.txt new file mode 100644 index 0000000..bd1d510 --- /dev/null +++ b/audio/model/parameters/model_808356_cnn_x3_mlp_128.txt @@ -0,0 +1,49 @@ +_________________________________________________________________ +Layer (type) Output Shape Param # +================================================================= +conv2d_14 (Conv2D) (None, 94, 171, 32) 320 +_________________________________________________________________ +batch_normalization_4 (Batch (None, 94, 171, 32) 376 +_________________________________________________________________ +activation_20 (Activation) (None, 94, 171, 32) 0 +_________________________________________________________________ +conv2d_15 (Conv2D) (None, 92, 169, 32) 9248 +_________________________________________________________________ +activation_21 (Activation) (None, 92, 169, 32) 0 +_________________________________________________________________ +max_pooling2d_11 (MaxPooling (None, 46, 84, 32) 0 +_________________________________________________________________ +dropout_14 (Dropout) (None, 46, 84, 32) 0 +_________________________________________________________________ +conv2d_16 (Conv2D) (None, 44, 82, 32) 9248 +_________________________________________________________________ +activation_22 (Activation) (None, 44, 82, 32) 0 +_________________________________________________________________ +max_pooling2d_12 (MaxPooling (None, 22, 41, 32) 0 +_________________________________________________________________ +dropout_15 (Dropout) (None, 22, 41, 32) 0 +_________________________________________________________________ +conv2d_17 (Conv2D) (None, 20, 39, 32) 9248 +_________________________________________________________________ +activation_23 (Activation) (None, 20, 39, 32) 0 +_________________________________________________________________ +max_pooling2d_13 (MaxPooling (None, 10, 19, 32) 0 +_________________________________________________________________ +dropout_16 (Dropout) (None, 10, 19, 32) 0 +_________________________________________________________________ +flatten_4 (Flatten) (None, 6080) 0 +_________________________________________________________________ +dense_7 (Dense) (None, 128) 778368 +_________________________________________________________________ +activation_24 (Activation) (None, 128) 0 +_________________________________________________________________ +dropout_17 (Dropout) (None, 128) 0 +_________________________________________________________________ +dense_8 (Dense) (None, 12) 1548 +_________________________________________________________________ +activation_25 (Activation) (None, 12) 0 +================================================================= +Total params: 808,356 +Trainable params: 808,168 +Non-trainable params: 188 +_________________________________________________________________ diff --git a/audio/model/parameters/model_824868_cnn_x3_mlp_128x2.txt b/audio/model/parameters/model_824868_cnn_x3_mlp_128x2.txt new file mode 100644 index 0000000..435009c --- /dev/null +++ b/audio/model/parameters/model_824868_cnn_x3_mlp_128x2.txt @@ -0,0 +1,55 @@ +_________________________________________________________________ +Layer (type) Output Shape Param # +================================================================= +conv2d_18 (Conv2D) (None, 94, 171, 32) 320 +_________________________________________________________________ +batch_normalization_5 (Batch (None, 94, 171, 32) 376 +_________________________________________________________________ +activation_26 (Activation) (None, 94, 171, 32) 0 +_________________________________________________________________ +conv2d_19 (Conv2D) (None, 92, 169, 32) 9248 +_________________________________________________________________ +activation_27 (Activation) (None, 92, 169, 32) 0 +_________________________________________________________________ +max_pooling2d_14 (MaxPooling (None, 46, 84, 32) 0 +_________________________________________________________________ +dropout_18 (Dropout) (None, 46, 84, 32) 0 +_________________________________________________________________ +conv2d_20 (Conv2D) (None, 44, 82, 32) 9248 +_________________________________________________________________ +activation_28 (Activation) (None, 44, 82, 32) 0 +_________________________________________________________________ +max_pooling2d_15 (MaxPooling (None, 22, 41, 32) 0 +_________________________________________________________________ +dropout_19 (Dropout) (None, 22, 41, 32) 0 +_________________________________________________________________ +conv2d_21 (Conv2D) (None, 20, 39, 32) 9248 +_________________________________________________________________ +activation_29 (Activation) (None, 20, 39, 32) 0 +_________________________________________________________________ +max_pooling2d_16 (MaxPooling (None, 10, 19, 32) 0 +_________________________________________________________________ +dropout_20 (Dropout) (None, 10, 19, 32) 0 +_________________________________________________________________ +flatten_5 (Flatten) (None, 6080) 0 +_________________________________________________________________ +dense_9 (Dense) (None, 128) 778368 +_________________________________________________________________ +activation_30 (Activation) (None, 128) 0 +_________________________________________________________________ +dropout_21 (Dropout) (None, 128) 0 +_________________________________________________________________ +dense_10 (Dense) (None, 128) 16512 +_________________________________________________________________ +activation_31 (Activation) (None, 128) 0 +_________________________________________________________________ +dropout_22 (Dropout) (None, 128) 0 +_________________________________________________________________ +dense_11 (Dense) (None, 12) 1548 +_________________________________________________________________ +activation_32 (Activation) (None, 12) 0 +================================================================= +Total params: 824,868 +Trainable params: 824,680 +Non-trainable params: 188 +_________________________________________________________________ diff --git a/audio/scripts/train-all-cnns.sh b/audio/scripts/train-all-cnns.sh new file mode 100644 index 0000000..3e9a02c --- /dev/null +++ b/audio/scripts/train-all-cnns.sh @@ -0,0 +1,6 @@ +(export CUDA_VISIBLE_DEVICES=0 && python train.py --bitrate 8 --setting 0 > logs/bitrate_0_8.log)& +(export CUDA_VISIBLE_DEVICES=1 && python train.py --bitrate 16 --setting 0 > logs/bitrate_0_16.log)& +(export CUDA_VISIBLE_DEVICES=2 && python train.py --bitrate 32 --setting 0 > logs/bitrate_0_32.log)& +(export CUDA_VISIBLE_DEVICES=3 && python train.py --bitrate 64 --setting 0 > logs/bitrate_0_64.log)& +(export CUDA_VISIBLE_DEVICES=4 && python train.py --bitrate 96 --setting 0 > logs/bitrate_0_96.log)& +(export CUDA_VISIBLE_DEVICES=5 && python train.py --bitrate 128 --setting 0 > logs/bitrate_0_128.log)& diff --git a/audio/scripts/train-all-settings.sh b/audio/scripts/train-all-settings.sh new file mode 100644 index 0000000..93c69c1 --- /dev/null +++ b/audio/scripts/train-all-settings.sh @@ -0,0 +1,5 @@ +(export CUDA_VISIBLE_DEVICES=1 && python train.py --setting 1 --bitrate 32 > logs/setting_1.log)& +(export CUDA_VISIBLE_DEVICES=2 && python train.py --setting 2 --bitrate 32 > logs/setting_2.log)& +(export CUDA_VISIBLE_DEVICES=3 && python train.py --setting 3 --bitrate 32 > logs/setting_3.log)& +(export CUDA_VISIBLE_DEVICES=4 && python train.py --setting 4 --bitrate 32 > logs/setting_4.log)& +(export CUDA_VISIBLE_DEVICES=5 && python train.py --setting 5 --bitrate 32 > logs/setting_5.log)& diff --git a/audio/train.py b/audio/train.py new file mode 100755 index 0000000..27194db --- /dev/null +++ b/audio/train.py @@ -0,0 +1,108 @@ +#! /usr/bin/env python3 + +from __future__ import print_function +import numpy as np +import librosa +from model.models import * +from model.datautils import * +import os +from os.path import isfile +from timeit import default_timer as timer +from keras.callbacks import ModelCheckpoint +import pandas +import argparse + +import tensorflow as tf +from keras.backend.tensorflow_backend import set_session +config = tf.ConfigProto() +config.gpu_options.per_process_gpu_memory_fraction = 0.3 +set_session(tf.Session(config=config)) + +parser = argparse.ArgumentParser(description="trains network using training dataset") +parser.add_argument('-w', '--weights', #nargs=1, type=argparse.FileType('r'), + help='weights file in hdf5 format', default="weights.hdf5") +parser.add_argument('-c', '--classpath', #type=argparse.string, + help='Train dataset directory with list of classes', default="") +parser.add_argument('--num_epoch', default=200, type=int, help="Number of iterations to train for") +parser.add_argument('--batch_size', default=50, type=int, help="Number of clips to send to GPU at once") +parser.add_argument('--val', default=0.25, type=float, help="Fraction of train to split off for validation") +parser.add_argument("--tile", help="tile mono spectrograms 3 times for use with imagenet models",action="store_true") + +parser.add_argument('--bitrate', type=int, default=128, help='Audio bitrate [default: 128kb/s]') +parser.add_argument('--setting', type=int, default=0, help='Model architecture (0-5) [default: 0]') +parser.add_argument('--model', type=str, default="", help='Model architecture description (0-5) [default: ""]') +parser.add_argument('--log_dir', type=str, default="", help="The path of training log (saving directory)") +parser.add_argument('--train_dir', type=str, default="", help="The path of training data (loading directory)") + +FLAGS = parser.parse_args() + +BITRATE = FLAGS.bitrate +SETTING = FLAGS.setting +MODEL = FLAGS.model +batch_size = FLAGS.batch_size +num_epoch = FLAGS.num_epoch +train_dir = FLAGS.train_dir +log_dir = FLAGS.log_dir +classpath = FLAGS.classpath +RATIO = FLAGS.val +TILE = FLAGS.tile + +if classpath == "": + classpath = "data/Preproc_" + str(BITRATE) + "/Train/" + +model_list = ["cnn_x3_mlp_0", + "cnn_x4_mlp_128", + "cnn_x3_mlp_64_128", + "cnn_x3_mlp_128", + "cnn_x3_mlp_128x2", + "cnn_x2_mlp_128"] + +if MODEL == "": + SETTING = model_list[SETTING] +else: + SETTING = MODEL + +print ("model: " + SETTING) + +if train_dir == "": + train_dir = "data/bitrate_" + str(BITRATE) + "/" +if log_dir == "": + log_dir = "logs/" + str(SETTING) + "/bitrate_" + str(BITRATE) + "/" + +print ("train_dir: " + train_dir) +print ("log_dir: " + log_dir) + +assert (os.path.exists(train_dir)) +if not os.path.exists(log_dir): + os.makedirs(log_dir) + +os.system('cp %s %s' % ("train.py", log_dir)) # bkp of train procedure + +FLAGS.train_dir = train_dir +FLAGS.log_dir = log_dir +FLAGS.classpath = classpath +FLAGS.model = SETTING +print (FLAGS) + +def train_network(weights_file="weights.hdf5", classpath="Preproc/Train/", epochs=50, batch_size=20, val_split=0.25,tile=False, setting=0): + np.random.seed(2337) + + # Get the data + X_train, Y_train, paths_train, class_names = build_dataset(path=classpath, batch_size=batch_size, tile=tile) + + # Instantiate the model + model = setup_model(X_train, class_names, weights_file=os.path.join(log_dir, weights_file), setting=setting) + + save_best_only = (val_split > 1e-6) + checkpoint = ModelCheckpoint(filepath=os.path.join(log_dir, weights_file), + monitor='val_acc', verbose=1, save_best_only=True, + save_weights_only=False, mode='max') + + history_callback = model.fit(X_train, Y_train, batch_size=batch_size, epochs=epochs, shuffle=True, + verbose=0, callbacks=[checkpoint], validation_split=val_split) # validation_data=(X_val, Y_val), + pandas.DataFrame(history_callback.history).to_csv(os.path.join(log_dir, "history.csv")) + model.save(os.path.join(log_dir, 'model.h5')) + +if __name__ == '__main__': + train_network(weights_file=FLAGS.weights, classpath=classpath, epochs=num_epoch, batch_size=batch_size, + val_split=RATIO, tile=TILE, setting=model_list.index(SETTING)) diff --git a/cifar/data/__init__.py b/cifar/data/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/cifar/data/convert_cifar10.py b/cifar/data/convert_cifar10.py new file mode 100644 index 0000000..7ad6b51 --- /dev/null +++ b/cifar/data/convert_cifar10.py @@ -0,0 +1,108 @@ +import copy +import os +from subprocess import call + +import numpy as np +import sklearn +import sklearn.cross_validation +import sklearn.linear_model + +import h5py + +print("") + +print("Downloading...") +if not os.path.exists("cifar-10-python.tar.gz"): + call( + "wget http://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz", + shell=True + ) + print("Downloading done.\n") +else: + print("Dataset already downloaded. Did not download twice.\n") + + +print("Extracting...") +cifar_python_directory = os.path.abspath("cifar-10-batches-py") +if not os.path.exists(cifar_python_directory): + call( + "tar -zxvf cifar-10-python.tar.gz", + shell=True + ) + print("Extracting successfully done to {}.".format(cifar_python_directory)) +else: + print("Dataset already extracted. Did not extract twice.\n") + + +print("Converting...") +cifar_caffe_directory = os.path.abspath('cifar_10_h5/') +if not os.path.exists(cifar_caffe_directory): + + def unpickle(file): + import cPickle + fo = open(file, 'rb') + dict = cPickle.load(fo) + fo.close() + return dict + + def shuffle_data(data, labels): + data, _, labels, _ = sklearn.cross_validation.train_test_split( + data, labels, test_size=0.0, random_state=42 + ) + return data, labels + + def load_data(train_batches): + data = [] + labels = [] + for data_batch_i in train_batches: + d = unpickle( + os.path.join(cifar_python_directory, data_batch_i) + ) + data.append(d['data']) + labels.append(d['labels']) + # Merge training batches on their first dimension + data = np.concatenate(data) + labels = np.concatenate(labels) + length = len(labels) + + data, labels = shuffle_data(data, labels) + data_reshaped = data.reshape(length, 3, 32, 32) + data_transposed = np.transpose(data_reshaped, (0, 3, 2, 1)) + print data_transposed.dtype, labels.dtype + return data_transposed, labels + + X, y = load_data( + ["data_batch_{}".format(i) for i in range(1, 6)] + ) + + Xt, yt = load_data(["test_batch"]) + + print("INFO: each dataset's element are of shape 3*32*32:") + print('"print(X.shape)" --> "{}"\n'.format(X.shape)) + # print("From the Caffe documentation: ") + print("The conventional blob dimensions for batches of image data " + "are number N x height H x width W x channel C.\n") + + print("Data is fully loaded, now truly converting.") + + os.makedirs(cifar_caffe_directory) + train_filename = os.path.join(cifar_caffe_directory, 'train.h5') + test_filename = os.path.join(cifar_caffe_directory, 'test.h5') + + comp_kwargs = {'compression': 'gzip', 'compression_opts': 1} + # Train + with h5py.File(train_filename, 'w') as f: + f.create_dataset('data', data=X, **comp_kwargs) + f.create_dataset('label', data=y.astype(np.int_), **comp_kwargs) + with open(os.path.join(cifar_caffe_directory, 'train.txt'), 'w') as f: + f.write(train_filename + '\n') + # Test + with h5py.File(test_filename, 'w') as f: + f.create_dataset('data', data=Xt, **comp_kwargs) + f.create_dataset('label', data=yt.astype(np.int_), **comp_kwargs) + with open(os.path.join(cifar_caffe_directory, 'test.txt'), 'w') as f: + f.write(test_filename + '\n') + + print('Conversion successfully done to "{}".\n'.format(cifar_caffe_directory)) +else: + print("Conversion was already done. Did not convert twice.\n") diff --git a/cifar/data/prepare.sh b/cifar/data/prepare.sh new file mode 100644 index 0000000..8c3abfa --- /dev/null +++ b/cifar/data/prepare.sh @@ -0,0 +1 @@ +python convert_cifar10.py && python restore_images.py && python prepare_cifar.py diff --git a/cifar/data/prepare_cifar.py b/cifar/data/prepare_cifar.py new file mode 100644 index 0000000..1975328 --- /dev/null +++ b/cifar/data/prepare_cifar.py @@ -0,0 +1,80 @@ +import numpy as np +import h5py +from scipy import ndimage, misc +import glob +import os + +quality_list = [1, 5, 10, 15, 20, 25, 50, 75, 100] + +img_dirs = ["quality_" + str(i) for i in quality_list] + +label_names = ['airplane', 'automobile', 'bird', 'cat', 'deer', + 'dog', 'frog', 'horse', 'ship', 'truck'] + +def load_data(img_dirs, image_W=None, image_H=None): + images = [] + labels = [] + + files = glob.glob(os.path.join(img_dirs, "*.jpg")) + + for i, filepath in enumerate(files): + name = filepath.split('/')[-1].split(".")[0] + label = label_names.index(name.split('_')[0]) + labels.append(label) + image = ndimage.imread(filepath, mode="RGB") + if not image_W is None and image_H is None: + image_resized = misc.imresize(image, (image_W, image_H)) / 255.0 + else: + image_resized = image / 255.0 + image_float32 = image_resized.astype('float32') + images.append(image_float32) + # if i == 999: + # break + + images = np.stack(images, axis = 0) + labels = np.asarray(labels) + print images.shape, labels.shape + return images, labels + + + comp_kwargs = {'compression': 'gzip', 'compression_opts': 1} + # Train + with h5py.File(train_filename, 'w') as f: + f.create_dataset('data', data=X, **comp_kwargs) + f.create_dataset('label', data=y.astype(np.int_), **comp_kwargs) + with open(os.path.join(cifar_caffe_directory, 'train.txt'), 'w') as f: + f.write(train_filename + '\n') + # Test + with h5py.File(test_filename, 'w') as f: + f.create_dataset('data', data=Xt, **comp_kwargs) + f.create_dataset('label', data=yt.astype(np.int_), **comp_kwargs) + with open(os.path.join(cifar_caffe_directory, 'test.txt'), 'w') as f: + f.write(test_filename + '\n') + + +def save_h5(data, label, filename): + comp_kwargs = {'compression': 'gzip', 'compression_opts': 1} + with h5py.File(filename, 'w') as f: + f.create_dataset('data', data=data, **comp_kwargs) + f.create_dataset('label', data=label, **comp_kwargs) + +def read_h5(filename): + f = h5py.File(filename,'r') + data, label = f['data'], f['label'] + return data, label + + +if __name__ == "__main__": + print img_dirs + + for img_dir in img_dirs: + train_dir, test_dir = os.path.join(img_dir, "train"), os.path.join(img_dir, "test") + train_data, train_label = load_data(train_dir) + test_data, test_label = load_data(test_dir) + save_h5(train_data, train_label, os.path.join(img_dir, "train.h5")) + save_h5(test_data, test_label, os.path.join(img_dir, "test.h5")) + # data, label = read_h5(os.path.join(img_dir, "data.h5")) + # print label.value + print img_dir + # break + # print data[0,0,:], label[0] diff --git a/cifar/data/restore_images.py b/cifar/data/restore_images.py new file mode 100644 index 0000000..4c670e2 --- /dev/null +++ b/cifar/data/restore_images.py @@ -0,0 +1,36 @@ +import h5py +from PIL import Image +import os + +quality_list = [1, 5, 10, 15, 20, 25, 50, 75, 100] + +label_names = ['airplane', 'automobile', 'bird', 'cat', 'deer', + 'dog', 'frog', 'horse', 'ship', 'truck'] +num_counts = [0] * 10 + +def restore_images(h5_filename, out_dirs, quality = 15): + if not os.path.exists(out_dirs): + os.makedirs(out_dirs) + h5_file = h5py.File(h5_filename, "r") + images = h5_file['data'].value + labels = h5_file['label'].value + print images.shape[0] + counts = [0] * 10 + for i in range(images.shape[0]): + img = Image.fromarray(images[i, ...]) + label = labels[i] + num_counts[label] += 1 + counts[label] += 1 + outname = label_names[label] + "_" + str(counts[label]) + ".jpg" + img.save(os.path.join(out_dirs, outname), quality=quality) + # if i == 100: + # break + +if __name__ == "__main__": + print quality_list + for quality in quality_list: + restore_images("cifar_10_h5/train.h5", "quality_" + str(quality) + "/train", quality) + restore_images("cifar_10_h5/test.h5", "quality_" + str(quality) + "/test", quality) + print quality + + print num_counts diff --git a/cifar/data/validate.py b/cifar/data/validate.py new file mode 100644 index 0000000..098d4b1 --- /dev/null +++ b/cifar/data/validate.py @@ -0,0 +1,40 @@ +import h5py +from PIL import Image +import os +import numpy as np + +quality_list = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 15, 20, 25, 50, 75] + +label_names = ['airplane', 'automobile', 'bird', 'cat', 'deer', + 'dog', 'frog', 'horse', 'ship', 'truck'] +num_counts = [0] * 10 + +def restore_images(h5_filename, out_dirs, quality = 15): + if not os.path.exists(out_dirs): + os.makedirs(out_dirs) + h5_file = h5py.File(h5_filename, "r") + images = h5_file['data'].value + labels = h5_file['label'].value + # print images.shape[0] + for i in range(images.shape[0]): + b = images[i, ...] + img = Image.fromarray(images[i, ...]) + label = labels[i] + num_counts[label] += 1 + outname = label_names[label] + "_" + str(num_counts[label]) + ".jpg" + img.save(os.path.join(out_dirs, outname), quality=quality) + new = Image.open(os.path.join(out_dirs, outname), mode='r') + a = np.asarray(new) + print (b == a).shape + print np.sum(b == a) + if i == 5: + break + +if __name__ == "__main__": + print quality_list + for quality in [1, 5, 10, 25, 50, 75, 100]: + restore_images("cifar_10_h5/train.h5", "test/quality_" + str(quality) + "/train", quality) + # restore_images("cifar_10_h5/test.h5", "quality_" + str(quality) + "/test") + # print quality + + print num_counts diff --git a/cifar/logs/README.md b/cifar/logs/README.md new file mode 100644 index 0000000..ace72f9 --- /dev/null +++ b/cifar/logs/README.md @@ -0,0 +1 @@ +This is the place where the training logs are generated. diff --git a/cifar/models.py b/cifar/models.py new file mode 100644 index 0000000..dbe7109 --- /dev/null +++ b/cifar/models.py @@ -0,0 +1,164 @@ +from keras.models import Sequential +from keras.layers import Dropout, Dense, Flatten, Activation, Convolution2D, GlobalAveragePooling2D +from keras.utils import np_utils, plot_model +from keras.optimizers import SGD + +model_list = ["all-cnns", + "cnns-dense-64", + "all-cnnsx2", + "cnns-x2-dense-128", + "cnns-dense-128", + "cnns-dense-128-256", + ] + +def save_summary(model, header, suffix): + assert(suffix.split(".")[0] == "") + with open(header + suffix, 'w') as fh: + # Pass the file handle in as a lambda functions to make it callable + model.summary(print_fn=lambda x: fh.write(x + '\n')) + +def build_model(lr, decay, setting=0): + model = Sequential() + + model.add(Convolution2D(32, (3, 3), padding = 'same', input_shape=(32, 32, 3))) + model.add(Activation('relu')) + model.add(Convolution2D(64, (3, 3), padding='same')) + model.add(Activation('relu')) + model.add(Convolution2D(128, (3, 3), padding='same', strides = (2,2))) + model.add(Dropout(0.5)) + + if setting == 0: + # all-cnns + model.add(Convolution2D(128, (3, 3), padding = 'same')) + model.add(Activation('relu')) + model.add(Convolution2D(128, (3, 3), padding='same')) + model.add(Activation('relu')) + model.add(Convolution2D(128, (3, 3), padding='same', strides = (2,2))) + model.add(Dropout(0.5)) + + model.add(Convolution2D(128, (3, 3), padding = 'same')) + model.add(Activation('relu')) + model.add(Convolution2D(128, (1, 1), padding='valid')) + model.add(Activation('relu')) + model.add(Convolution2D(10, (1, 1), padding='valid')) + + model.add(GlobalAveragePooling2D()) + + if setting == 1: + # cnns-dense-64 + model.add(Convolution2D(128, (3, 3), padding = 'same')) + model.add(Activation('relu')) + model.add(Convolution2D(128, (3, 3), padding='same')) + model.add(Activation('relu')) + model.add(Convolution2D(128, (3, 3), padding='same', strides = (2,2))) + model.add(Dropout(0.5)) + + model.add(Flatten()) + model.add(Dense(64, activation='relu')) + model.add(Dropout(0.5)) + model.add(Dense(256, activation='relu')) + model.add(Dropout(0.5)) + model.add(Dense(10, activation='relu')) + + if setting == 2: + # all-cnnsx2 + model.add(Convolution2D(128, (3, 3), padding = 'same')) + model.add(Activation('relu')) + model.add(Convolution2D(128, (3, 3), padding='same')) + model.add(Activation('relu')) + model.add(Convolution2D(128, (3, 3), padding='same', strides = (2,2))) + model.add(Dropout(0.5)) + + model.add(Convolution2D(128, (3, 3), padding = 'same')) + model.add(Activation('relu')) + model.add(Convolution2D(128, (3, 3), padding='same')) + model.add(Activation('relu')) + model.add(Convolution2D(128, (3, 3), padding='same', strides = (2,2))) + model.add(Dropout(0.5)) + + model.add(Convolution2D(128, (3, 3), padding = 'same')) + model.add(Activation('relu')) + model.add(Convolution2D(128, (1, 1), padding='valid')) + model.add(Activation('relu')) + model.add(Convolution2D(10, (1, 1), padding='valid')) + + model.add(GlobalAveragePooling2D()) + + if setting == 3: + # cnns-x2-dense-128 + model.add(Convolution2D(128, (3, 3), padding = 'same')) + model.add(Activation('relu')) + model.add(Convolution2D(128, (3, 3), padding='same')) + model.add(Activation('relu')) + model.add(Convolution2D(128, (3, 3), padding='same', strides = (2,2))) + model.add(Dropout(0.5)) + + model.add(Convolution2D(128, (3, 3), padding = 'same')) + model.add(Activation('relu')) + model.add(Convolution2D(128, (3, 3), padding='same')) + model.add(Activation('relu')) + model.add(Convolution2D(128, (3, 3), padding='same', strides = (2,2))) + model.add(Dropout(0.5)) + + model.add(Flatten()) + model.add(Dense(128, activation='relu')) + model.add(Dropout(0.5)) + model.add(Dense(256, activation='relu')) + model.add(Dropout(0.5)) + model.add(Dense(10, activation='relu')) + + if setting == 4: + # cnns-dense-128 + model.add(Convolution2D(128, (3, 3), padding = 'same')) + model.add(Activation('relu')) + model.add(Convolution2D(128, (3, 3), padding='same')) + model.add(Activation('relu')) + model.add(Convolution2D(128, (3, 3), padding='same', strides = (2,2))) + model.add(Dropout(0.5)) + + model.add(Flatten()) + model.add(Dense(128, activation='relu')) + model.add(Dropout(0.5)) + model.add(Dense(256, activation='relu')) + model.add(Dropout(0.5)) + model.add(Dense(10, activation='relu')) + + if setting == 5: + # cnns-dense-128-256 + model.add(Convolution2D(128, (3, 3), padding = 'same')) + model.add(Activation('relu')) + model.add(Convolution2D(128, (3, 3), padding='same')) + model.add(Activation('relu')) + model.add(Convolution2D(128, (3, 3), padding='same', strides = (2,2))) + model.add(Dropout(0.5)) + + model.add(Flatten()) + model.add(Dense(128, activation='relu')) + model.add(Dropout(0.5)) + model.add(Dense(256, activation='relu')) + model.add(Dropout(0.5)) + model.add(Dense(256, activation='relu')) + model.add(Dropout(0.5)) + model.add(Dense(10, activation='relu')) + + model.add(Activation('softmax')) + sgd = SGD(lr=lr, decay=decay, momentum=0.9, nesterov=True) + model.compile(loss='categorical_crossentropy', optimizer=sgd, metrics=['accuracy']) + + # print (model.count_params()) + # model.summary() + return model + +settings = [i for i in range(6)] +if __name__ == "__main__": + models = [] + parameters = [] + for setting in settings: + model = build_model(0.01, 1e-6, setting) + save_summary(model, "parameters/model_" + str(model.count_params()) + "_" + model_list[setting], ".txt") + print model_list[setting] + ": " + str(model.count_params()) + parameters.append(model.count_params()) + models.append(model_list[setting]) + # model.summary() + print models + print parameters diff --git a/cifar/parameters/model_1079562_cnns-dense-64.txt b/cifar/parameters/model_1079562_cnns-dense-64.txt new file mode 100644 index 0000000..7bb8d40 --- /dev/null +++ b/cifar/parameters/model_1079562_cnns-dense-64.txt @@ -0,0 +1,45 @@ +_________________________________________________________________ +Layer (type) Output Shape Param # +================================================================= +conv2d_10 (Conv2D) (None, 32, 32, 32) 896 +_________________________________________________________________ +activation_8 (Activation) (None, 32, 32, 32) 0 +_________________________________________________________________ +conv2d_11 (Conv2D) (None, 32, 32, 64) 18496 +_________________________________________________________________ +activation_9 (Activation) (None, 32, 32, 64) 0 +_________________________________________________________________ +conv2d_12 (Conv2D) (None, 16, 16, 128) 73856 +_________________________________________________________________ +dropout_3 (Dropout) (None, 16, 16, 128) 0 +_________________________________________________________________ +conv2d_13 (Conv2D) (None, 16, 16, 128) 147584 +_________________________________________________________________ +activation_10 (Activation) (None, 16, 16, 128) 0 +_________________________________________________________________ +conv2d_14 (Conv2D) (None, 16, 16, 128) 147584 +_________________________________________________________________ +activation_11 (Activation) (None, 16, 16, 128) 0 +_________________________________________________________________ +conv2d_15 (Conv2D) (None, 8, 8, 128) 147584 +_________________________________________________________________ +dropout_4 (Dropout) (None, 8, 8, 128) 0 +_________________________________________________________________ +flatten_1 (Flatten) (None, 8192) 0 +_________________________________________________________________ +dense_1 (Dense) (None, 64) 524352 +_________________________________________________________________ +dropout_5 (Dropout) (None, 64) 0 +_________________________________________________________________ +dense_2 (Dense) (None, 256) 16640 +_________________________________________________________________ +dropout_6 (Dropout) (None, 256) 0 +_________________________________________________________________ +dense_3 (Dense) (None, 10) 2570 +_________________________________________________________________ +activation_12 (Activation) (None, 10) 0 +================================================================= +Total params: 1,079,562 +Trainable params: 1,079,562 +Non-trainable params: 0 +_________________________________________________________________ diff --git a/cifar/parameters/model_1144138_all-cnnsx2.txt b/cifar/parameters/model_1144138_all-cnnsx2.txt new file mode 100644 index 0000000..dc01327 --- /dev/null +++ b/cifar/parameters/model_1144138_all-cnnsx2.txt @@ -0,0 +1,57 @@ +_________________________________________________________________ +Layer (type) Output Shape Param # +================================================================= +conv2d_16 (Conv2D) (None, 32, 32, 32) 896 +_________________________________________________________________ +activation_13 (Activation) (None, 32, 32, 32) 0 +_________________________________________________________________ +conv2d_17 (Conv2D) (None, 32, 32, 64) 18496 +_________________________________________________________________ +activation_14 (Activation) (None, 32, 32, 64) 0 +_________________________________________________________________ +conv2d_18 (Conv2D) (None, 16, 16, 128) 73856 +_________________________________________________________________ +dropout_7 (Dropout) (None, 16, 16, 128) 0 +_________________________________________________________________ +conv2d_19 (Conv2D) (None, 16, 16, 128) 147584 +_________________________________________________________________ +activation_15 (Activation) (None, 16, 16, 128) 0 +_________________________________________________________________ +conv2d_20 (Conv2D) (None, 16, 16, 128) 147584 +_________________________________________________________________ +activation_16 (Activation) (None, 16, 16, 128) 0 +_________________________________________________________________ +conv2d_21 (Conv2D) (None, 8, 8, 128) 147584 +_________________________________________________________________ +dropout_8 (Dropout) (None, 8, 8, 128) 0 +_________________________________________________________________ +conv2d_22 (Conv2D) (None, 8, 8, 128) 147584 +_________________________________________________________________ +activation_17 (Activation) (None, 8, 8, 128) 0 +_________________________________________________________________ +conv2d_23 (Conv2D) (None, 8, 8, 128) 147584 +_________________________________________________________________ +activation_18 (Activation) (None, 8, 8, 128) 0 +_________________________________________________________________ +conv2d_24 (Conv2D) (None, 4, 4, 128) 147584 +_________________________________________________________________ +dropout_9 (Dropout) (None, 4, 4, 128) 0 +_________________________________________________________________ +conv2d_25 (Conv2D) (None, 4, 4, 128) 147584 +_________________________________________________________________ +activation_19 (Activation) (None, 4, 4, 128) 0 +_________________________________________________________________ +conv2d_26 (Conv2D) (None, 4, 4, 128) 16512 +_________________________________________________________________ +activation_20 (Activation) (None, 4, 4, 128) 0 +_________________________________________________________________ +conv2d_27 (Conv2D) (None, 4, 4, 10) 1290 +_________________________________________________________________ +global_average_pooling2d_2 ( (None, 10) 0 +_________________________________________________________________ +activation_21 (Activation) (None, 10) 0 +================================================================= +Total params: 1,144,138 +Trainable params: 1,144,138 +Non-trainable params: 0 +_________________________________________________________________ diff --git a/cifar/parameters/model_1276618_cnns-x2-dense-128.txt b/cifar/parameters/model_1276618_cnns-x2-dense-128.txt new file mode 100644 index 0000000..4a1e282 --- /dev/null +++ b/cifar/parameters/model_1276618_cnns-x2-dense-128.txt @@ -0,0 +1,57 @@ +_________________________________________________________________ +Layer (type) Output Shape Param # +================================================================= +conv2d_28 (Conv2D) (None, 32, 32, 32) 896 +_________________________________________________________________ +activation_22 (Activation) (None, 32, 32, 32) 0 +_________________________________________________________________ +conv2d_29 (Conv2D) (None, 32, 32, 64) 18496 +_________________________________________________________________ +activation_23 (Activation) (None, 32, 32, 64) 0 +_________________________________________________________________ +conv2d_30 (Conv2D) (None, 16, 16, 128) 73856 +_________________________________________________________________ +dropout_10 (Dropout) (None, 16, 16, 128) 0 +_________________________________________________________________ +conv2d_31 (Conv2D) (None, 16, 16, 128) 147584 +_________________________________________________________________ +activation_24 (Activation) (None, 16, 16, 128) 0 +_________________________________________________________________ +conv2d_32 (Conv2D) (None, 16, 16, 128) 147584 +_________________________________________________________________ +activation_25 (Activation) (None, 16, 16, 128) 0 +_________________________________________________________________ +conv2d_33 (Conv2D) (None, 8, 8, 128) 147584 +_________________________________________________________________ +dropout_11 (Dropout) (None, 8, 8, 128) 0 +_________________________________________________________________ +conv2d_34 (Conv2D) (None, 8, 8, 128) 147584 +_________________________________________________________________ +activation_26 (Activation) (None, 8, 8, 128) 0 +_________________________________________________________________ +conv2d_35 (Conv2D) (None, 8, 8, 128) 147584 +_________________________________________________________________ +activation_27 (Activation) (None, 8, 8, 128) 0 +_________________________________________________________________ +conv2d_36 (Conv2D) (None, 4, 4, 128) 147584 +_________________________________________________________________ +dropout_12 (Dropout) (None, 4, 4, 128) 0 +_________________________________________________________________ +flatten_2 (Flatten) (None, 2048) 0 +_________________________________________________________________ +dense_4 (Dense) (None, 128) 262272 +_________________________________________________________________ +dropout_13 (Dropout) (None, 128) 0 +_________________________________________________________________ +dense_5 (Dense) (None, 256) 33024 +_________________________________________________________________ +dropout_14 (Dropout) (None, 256) 0 +_________________________________________________________________ +dense_6 (Dense) (None, 10) 2570 +_________________________________________________________________ +activation_28 (Activation) (None, 10) 0 +================================================================= +Total params: 1,276,618 +Trainable params: 1,276,618 +Non-trainable params: 0 +_________________________________________________________________ diff --git a/cifar/parameters/model_1620298_cnns-dense-128.txt b/cifar/parameters/model_1620298_cnns-dense-128.txt new file mode 100644 index 0000000..41c65eb --- /dev/null +++ b/cifar/parameters/model_1620298_cnns-dense-128.txt @@ -0,0 +1,45 @@ +_________________________________________________________________ +Layer (type) Output Shape Param # +================================================================= +conv2d_37 (Conv2D) (None, 32, 32, 32) 896 +_________________________________________________________________ +activation_29 (Activation) (None, 32, 32, 32) 0 +_________________________________________________________________ +conv2d_38 (Conv2D) (None, 32, 32, 64) 18496 +_________________________________________________________________ +activation_30 (Activation) (None, 32, 32, 64) 0 +_________________________________________________________________ +conv2d_39 (Conv2D) (None, 16, 16, 128) 73856 +_________________________________________________________________ +dropout_15 (Dropout) (None, 16, 16, 128) 0 +_________________________________________________________________ +conv2d_40 (Conv2D) (None, 16, 16, 128) 147584 +_________________________________________________________________ +activation_31 (Activation) (None, 16, 16, 128) 0 +_________________________________________________________________ +conv2d_41 (Conv2D) (None, 16, 16, 128) 147584 +_________________________________________________________________ +activation_32 (Activation) (None, 16, 16, 128) 0 +_________________________________________________________________ +conv2d_42 (Conv2D) (None, 8, 8, 128) 147584 +_________________________________________________________________ +dropout_16 (Dropout) (None, 8, 8, 128) 0 +_________________________________________________________________ +flatten_3 (Flatten) (None, 8192) 0 +_________________________________________________________________ +dense_7 (Dense) (None, 128) 1048704 +_________________________________________________________________ +dropout_17 (Dropout) (None, 128) 0 +_________________________________________________________________ +dense_8 (Dense) (None, 256) 33024 +_________________________________________________________________ +dropout_18 (Dropout) (None, 256) 0 +_________________________________________________________________ +dense_9 (Dense) (None, 10) 2570 +_________________________________________________________________ +activation_33 (Activation) (None, 10) 0 +================================================================= +Total params: 1,620,298 +Trainable params: 1,620,298 +Non-trainable params: 0 +_________________________________________________________________ diff --git a/cifar/parameters/model_1686090_cnns-dense-128-256.txt b/cifar/parameters/model_1686090_cnns-dense-128-256.txt new file mode 100644 index 0000000..be46b87 --- /dev/null +++ b/cifar/parameters/model_1686090_cnns-dense-128-256.txt @@ -0,0 +1,49 @@ +_________________________________________________________________ +Layer (type) Output Shape Param # +================================================================= +conv2d_43 (Conv2D) (None, 32, 32, 32) 896 +_________________________________________________________________ +activation_34 (Activation) (None, 32, 32, 32) 0 +_________________________________________________________________ +conv2d_44 (Conv2D) (None, 32, 32, 64) 18496 +_________________________________________________________________ +activation_35 (Activation) (None, 32, 32, 64) 0 +_________________________________________________________________ +conv2d_45 (Conv2D) (None, 16, 16, 128) 73856 +_________________________________________________________________ +dropout_19 (Dropout) (None, 16, 16, 128) 0 +_________________________________________________________________ +conv2d_46 (Conv2D) (None, 16, 16, 128) 147584 +_________________________________________________________________ +activation_36 (Activation) (None, 16, 16, 128) 0 +_________________________________________________________________ +conv2d_47 (Conv2D) (None, 16, 16, 128) 147584 +_________________________________________________________________ +activation_37 (Activation) (None, 16, 16, 128) 0 +_________________________________________________________________ +conv2d_48 (Conv2D) (None, 8, 8, 128) 147584 +_________________________________________________________________ +dropout_20 (Dropout) (None, 8, 8, 128) 0 +_________________________________________________________________ +flatten_4 (Flatten) (None, 8192) 0 +_________________________________________________________________ +dense_10 (Dense) (None, 128) 1048704 +_________________________________________________________________ +dropout_21 (Dropout) (None, 128) 0 +_________________________________________________________________ +dense_11 (Dense) (None, 256) 33024 +_________________________________________________________________ +dropout_22 (Dropout) (None, 256) 0 +_________________________________________________________________ +dense_12 (Dense) (None, 256) 65792 +_________________________________________________________________ +dropout_23 (Dropout) (None, 256) 0 +_________________________________________________________________ +dense_13 (Dense) (None, 10) 2570 +_________________________________________________________________ +activation_38 (Activation) (None, 10) 0 +================================================================= +Total params: 1,686,090 +Trainable params: 1,686,090 +Non-trainable params: 0 +_________________________________________________________________ diff --git a/cifar/parameters/model_701386_all-cnns.txt b/cifar/parameters/model_701386_all-cnns.txt new file mode 100644 index 0000000..3f1f462 --- /dev/null +++ b/cifar/parameters/model_701386_all-cnns.txt @@ -0,0 +1,45 @@ +_________________________________________________________________ +Layer (type) Output Shape Param # +================================================================= +conv2d_1 (Conv2D) (None, 32, 32, 32) 896 +_________________________________________________________________ +activation_1 (Activation) (None, 32, 32, 32) 0 +_________________________________________________________________ +conv2d_2 (Conv2D) (None, 32, 32, 64) 18496 +_________________________________________________________________ +activation_2 (Activation) (None, 32, 32, 64) 0 +_________________________________________________________________ +conv2d_3 (Conv2D) (None, 16, 16, 128) 73856 +_________________________________________________________________ +dropout_1 (Dropout) (None, 16, 16, 128) 0 +_________________________________________________________________ +conv2d_4 (Conv2D) (None, 16, 16, 128) 147584 +_________________________________________________________________ +activation_3 (Activation) (None, 16, 16, 128) 0 +_________________________________________________________________ +conv2d_5 (Conv2D) (None, 16, 16, 128) 147584 +_________________________________________________________________ +activation_4 (Activation) (None, 16, 16, 128) 0 +_________________________________________________________________ +conv2d_6 (Conv2D) (None, 8, 8, 128) 147584 +_________________________________________________________________ +dropout_2 (Dropout) (None, 8, 8, 128) 0 +_________________________________________________________________ +conv2d_7 (Conv2D) (None, 8, 8, 128) 147584 +_________________________________________________________________ +activation_5 (Activation) (None, 8, 8, 128) 0 +_________________________________________________________________ +conv2d_8 (Conv2D) (None, 8, 8, 128) 16512 +_________________________________________________________________ +activation_6 (Activation) (None, 8, 8, 128) 0 +_________________________________________________________________ +conv2d_9 (Conv2D) (None, 8, 8, 10) 1290 +_________________________________________________________________ +global_average_pooling2d_1 ( (None, 10) 0 +_________________________________________________________________ +activation_7 (Activation) (None, 10) 0 +================================================================= +Total params: 701,386 +Trainable params: 701,386 +Non-trainable params: 0 +_________________________________________________________________ diff --git a/cifar/provider.py b/cifar/provider.py new file mode 100644 index 0000000..441cf05 --- /dev/null +++ b/cifar/provider.py @@ -0,0 +1,33 @@ +import tensorflow as tf +import numpy as np +import os +import math +import glob +from scipy import ndimage, misc +from data.prepare_cifar import read_h5 + +# you need to change this to your data directory +train_dir = 'data/train/' + +def load_data(img_dirs, h5_filename="data.h5"): + f = os.path.join(img_dirs, h5_filename) + data, label = read_h5(f) + return data.value, label.value + +def shuffle_data(data, labels): + """ Shuffle data and labels. + Input: + data: B,... numpy array + label: B, numpy array + Return: + shuffled data, label and shuffle indices + """ + idx = np.arange(len(labels)) + np.random.shuffle(idx) + return data[idx, ...], labels[idx], idx + +if __name__ == "__main__": + f = os.path.join("data/quality_0", "train.h5") + data, label = read_h5(f) + print (data.value.shape, label.value.shape) + diff --git a/cifar/scripts/train-all-cnns.sh b/cifar/scripts/train-all-cnns.sh new file mode 100644 index 0000000..c35e967 --- /dev/null +++ b/cifar/scripts/train-all-cnns.sh @@ -0,0 +1,9 @@ +(export CUDA_VISIBLE_DEVICES=0 && python train.py --quality 1 > logs/quality_1.log)& +(export CUDA_VISIBLE_DEVICES=1 && python train.py --quality 5 > logs/qualtiy_5.log)& +(export CUDA_VISIBLE_DEVICES=2 && python train.py --quality 10 > logs/quality_10.log)& +(export CUDA_VISIBLE_DEVICES=3 && python train.py --quality 15 > logs/quality_15.log)& +(export CUDA_VISIBLE_DEVICES=4 && python train.py --quality 20 > logs/quality_20.log)& +(export CUDA_VISIBLE_DEVICES=5 && python train.py --quality 25 > logs/quality_25.log)& +(export CUDA_VISIBLE_DEVICES=6 && python train.py --quality 50 > logs/quality_50.log)& +(export CUDA_VISIBLE_DEVICES=7 && python train.py --quality 75 > logs/quality_75.log)& +(export CUDA_VISIBLE_DEVICES=7 && python train.py --quality 100 > logs/quality_100.log)& diff --git a/cifar/scripts/train-all-settings.sh b/cifar/scripts/train-all-settings.sh new file mode 100644 index 0000000..acaf90f --- /dev/null +++ b/cifar/scripts/train-all-settings.sh @@ -0,0 +1,5 @@ +(export CUDA_VISIBLE_DEVICES=1 && python train.py --setting 1 --quality 25 > logs/setting_1.log)& +(export CUDA_VISIBLE_DEVICES=2 && python train.py --setting 2 --quality 25 > logs/setting_2.log)& +(export CUDA_VISIBLE_DEVICES=3 && python train.py --setting 3 --quality 25 > logs/setting_3.log)& +(export CUDA_VISIBLE_DEVICES=4 && python train.py --setting 4 --quality 25 > logs/setting_4.log)& +(export CUDA_VISIBLE_DEVICES=5 && python train.py --setting 5 --quality 25 > logs/setting_5.log)& diff --git a/cifar/train.py b/cifar/train.py new file mode 100644 index 0000000..b30eaad --- /dev/null +++ b/cifar/train.py @@ -0,0 +1,150 @@ +from __future__ import print_function +import tensorflow as tf +from keras.datasets import cifar10 +from keras.preprocessing.image import ImageDataGenerator +from keras.utils import np_utils, plot_model +from keras import backend as K +from keras.models import Model +from keras.layers.core import Lambda +from keras.callbacks import ModelCheckpoint +from models import build_model +import provider +import pandas +import cv2 +import numpy as np +import argparse +import os + +import tensorflow as tf +from keras.backend.tensorflow_backend import set_session +config = tf.ConfigProto() +config.gpu_options.per_process_gpu_memory_fraction = 0.3 +set_session(tf.Session(config=config)) + +K.set_image_dim_ordering('tf') + +parser = argparse.ArgumentParser() +parser.add_argument('--quality', type=int, default=100, help='Image quality [default: 100]') +parser.add_argument('--setting', type=int, default=0, help='Model architecture (0-5) [default: 0]') +parser.add_argument('--model', type=str, default="", help='Model architecture description (0-5) [default: ""]') +parser.add_argument('--batch_size', type=int, default=128, help='Batch size during training [default: 128]') +parser.add_argument('--num_epoch', type=int, default=200, help='Batch size during training [default: 200]') +parser.add_argument('--learning_rate', type=float, default=0.01, help='Initial learning rate [default: 0.01]') +parser.add_argument('--decay_rate', type=float, default=1e-6, help='Decay rate [default: 1e-6]') +parser.add_argument('--log_dir', type=str, default="", help="The path of training log (saving directory)") +parser.add_argument('--train_dir', type=str, default="", help="The path of training data (loading directory)") + +FLAGS = parser.parse_args() + +QUALITY = FLAGS.quality +SETTING = FLAGS.setting +MODEL = FLAGS.model + + +model_list = ["all-cnns", + "cnns-dense-64", + "all-cnnsx2", + "cnns-x2-dense-128", + "cnns-dense-128", + "cnns-dense-128-256", + ] + +if MODEL == "": + SETTING = model_list[SETTING] +else: + SETTING = MODEL + +print ("model: " + SETTING) + + +nb_classes = 10 +batch_size = FLAGS.batch_size +nb_epoch = FLAGS.num_epoch +learning_rate = FLAGS.learning_rate +decay_rate = FLAGS.decay_rate +rows, cols = 32, 32 + +channels = 3 + +train_dir = FLAGS.train_dir +log_dir = FLAGS.log_dir +if train_dir == "": + train_dir = "data/quality_" + str(QUALITY) + "/" +if log_dir == "": + log_dir = "logs/" + str(SETTING) + "/quality_" + str(QUALITY) + "/" + +print ("train_dir: " + train_dir) +print ("log_dir: " + log_dir) + +assert (os.path.exists(train_dir)) +if not os.path.exists(log_dir): + os.makedirs(log_dir) + +os.system('cp %s %s' % ("train.py", log_dir)) # bkp of train procedure + +FLAGS.train_dir = train_dir +FLAGS.log_dir = log_dir +FLAGS.model = SETTING +LOG_FOUT = open(os.path.join(log_dir, 'setting.txt'), 'w') +LOG_FOUT.write(str(FLAGS)+'\n') +LOG_FOUT.close() + +def load_data(train_dir, test_dir): + X_train, y_train = provider.load_data(train_dir, "train.h5") + X_test, y_test = provider.load_data(test_dir, "test.h5") + + Y_train = np_utils.to_categorical(y_train, nb_classes) + Y_test = np_utils.to_categorical(y_test, nb_classes) + print (X_train.shape, Y_train.shape, X_test.shape, Y_test.shape) + return (X_train, Y_train), (X_test, Y_test) + +def save_summary(model, header, suffix): + assert(suffix.split(".")[0] == "") + with open(header + suffix, 'w') as fh: + # Pass the file handle in as a lambda functions to make it callable + model.summary(print_fn=lambda x: fh.write(x + '\n')) + +def data_generator(wshift=0.1, hshift=0.1, horizontal_flip=True): + data_gen = ImageDataGenerator( + featurewise_center=False, # set input mean to 0 over the dataset + samplewise_center=False, # set each sample mean to 0 + featurewise_std_normalization=False, # divide inputs by std of the dataset + samplewise_std_normalization=False, # divide each input by its std + zca_whitening=False, # apply ZCA whitening + rotation_range=0, # randomly rotate images in the range (degrees, 0 to 180) + width_shift_range=wshift, # randomly shift images horizontally (fraction of total width) + height_shift_range=hshift, # randomly shift images vertically (fraction of total height) + horizontal_flip=horizontal_flip, # randomly flip images + vertical_flip=False) + return data_gen + +def train(): + (X_train, Y_train), (X_test, Y_test) = load_data(train_dir, train_dir) + model = build_model(learning_rate, decay_rate, model_list.index(SETTING)) + # save_summary(model, "parameters/model", ".txt") + # plot_model(model, to_file="parameters/model" + ".pdf", show_shapes=True) + data_gen = data_generator() + data_gen.fit(X_train) + filepath = os.path.join(log_dir, "weights.hdf5") + checkpoint = ModelCheckpoint(filepath, monitor='val_acc', verbose=1, save_best_only=True, save_weights_only=False, mode='max') + + callbacks_list = [checkpoint] + # Fit the model on the batches generated by datagen.flow(). + history_callback = model.fit_generator(data_gen.flow(X_train, Y_train, + batch_size=batch_size), + samples_per_epoch=X_train.shape[0], + nb_epoch=nb_epoch, validation_data=(X_test, Y_test), + callbacks=callbacks_list, verbose=0) + + pandas.DataFrame(history_callback.history).to_csv(os.path.join(log_dir, "history.csv")) + model.save(os.path.join(log_dir, 'model.h5')) + + +def predict(filepath): + im = cv2.resize(cv2.imread(filepath), (224, 224)).astype(np.float32) + out = model.predict(im) + print (np.argmax(out)) + +if __name__ == "__main__": + train() +