From 6fcad9636a34d4f32ff2225d45c90fe882520a5d Mon Sep 17 00:00:00 2001 From: Francesco Mosconi Date: Fri, 1 Sep 2017 12:55:34 -0700 Subject: [PATCH] max gpus, gitignore, pep8 --- .gitignore | 91 ++++++++++++++++++++++++++++++++++++++++++++++ utils/multi_gpu.py | 55 ++++++++++++++++++---------- 2 files changed, 126 insertions(+), 20 deletions(-) create mode 100644 .gitignore diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..9eafa33 --- /dev/null +++ b/.gitignore @@ -0,0 +1,91 @@ +.DS_Store + +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +env/ +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +*.egg-info/ +.installed.cfg +*.egg + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*,cover +.hypothesis/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +target/ + +# IPython Notebook +.ipynb_checkpoints + +# pyenv +.python-version + +# celery beat schedule file +celerybeat-schedule + +# dotenv +.env + +# virtualenv +venv/ +ENV/ + +# Spyder project settings +.spyderproject + +# Rope project settings +.ropeproject diff --git a/utils/multi_gpu.py b/utils/multi_gpu.py index 6eea8b9..74f35e2 100644 --- a/utils/multi_gpu.py +++ b/utils/multi_gpu.py @@ -1,14 +1,21 @@ -from keras.layers import merge +from __future__ import print_function +from keras.layers import concatenate from keras.layers.core import Lambda from keras.models import Model - +from tensorflow.python.client import device_lib import tensorflow as tf -def make_parallel(model, gpu_count): + +def get_available_gpus(): + local_device_protos = device_lib.list_local_devices() + return [x.name for x in local_device_protos if x.device_type == 'GPU'] + + +def make_parallel(model, gpu_count=None): def get_slice(data, idx, parts): shape = tf.shape(data) - size = tf.concat([ shape[:1] // parts, shape[1:] ],axis=0) - stride = tf.concat([ shape[:1] // parts, shape[1:]*0 ],axis=0) + size = tf.concat([shape[:1] // parts, shape[1:]], axis=0) + stride = tf.concat([shape[:1] // parts, shape[1:] * 0], axis=0) start = stride * idx return tf.slice(data, start, size) @@ -16,24 +23,34 @@ def get_slice(data, idx, parts): for i in range(len(model.outputs)): outputs_all.append([]) - #Place a copy of the model on each GPU, each getting a slice of the batch - for i in range(gpu_count): - with tf.device('/gpu:%d' % i): - with tf.name_scope('tower_%d' % i) as scope: - + # if gpu_count provided, use it + # otherwise use all available gpus + if gpu_count: + gpus = ['/gpu:%d' % i for i in range(gpu_count)] + else: + gpus = get_available_gpus() + gpu_count = len(gpus) + + # Place a copy of the model on each GPU, each getting a slice of the batch + for i, device in enumerate(gpus): + with tf.device(device): + with tf.name_scope('tower_%d' % i): inputs = [] - #Slice each input into a piece for processing on this GPU + # Slice each input into a piece for processing on this GPU for x in model.inputs: input_shape = tuple(x.get_shape().as_list())[1:] - slice_n = Lambda(get_slice, output_shape=input_shape, arguments={'idx':i,'parts':gpu_count})(x) - inputs.append(slice_n) + slice_n = Lambda(get_slice, + output_shape=input_shape, + arguments={'idx': i, + 'parts': gpu_count})(x) + inputs.append(slice_n) outputs = model(inputs) - + if not isinstance(outputs, list): outputs = [outputs] - - #Save all the outputs for merging back together later + + # Save all the outputs for merging back together later for l in range(len(outputs)): outputs_all[l].append(outputs[l]) @@ -41,7 +58,5 @@ def get_slice(data, idx, parts): with tf.device('/cpu:0'): merged = [] for outputs in outputs_all: - merged.append(merge(outputs, mode='concat', concat_axis=0)) - - return Model(input=model.inputs, output=merged) - + merged.append(concatenate(outputs, axis=0)) + return Model(inputs=model.inputs, outputs=merged)