From 6fcad9636a34d4f32ff2225d45c90fe882520a5d Mon Sep 17 00:00:00 2001
From: Francesco Mosconi <git@mosconi.me>
Date: Fri, 1 Sep 2017 12:55:34 -0700
Subject: [PATCH] max gpus, gitignore, pep8

---
 .gitignore         | 91 ++++++++++++++++++++++++++++++++++++++++++++++
 utils/multi_gpu.py | 55 ++++++++++++++++++----------
 2 files changed, 126 insertions(+), 20 deletions(-)
 create mode 100644 .gitignore

diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..9eafa33
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,91 @@
+.DS_Store
+
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+
+# C extensions
+*.so
+
+# Distribution / packaging
+.Python
+env/
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+*.egg-info/
+.installed.cfg
+*.egg
+
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*,cover
+.hypothesis/
+
+# Translations
+*.mo
+*.pot
+
+# Django stuff:
+*.log
+local_settings.py
+
+# Flask stuff:
+instance/
+.webassets-cache
+
+# Scrapy stuff:
+.scrapy
+
+# Sphinx documentation
+docs/_build/
+
+# PyBuilder
+target/
+
+# IPython Notebook
+.ipynb_checkpoints
+
+# pyenv
+.python-version
+
+# celery beat schedule file
+celerybeat-schedule
+
+# dotenv
+.env
+
+# virtualenv
+venv/
+ENV/
+
+# Spyder project settings
+.spyderproject
+
+# Rope project settings
+.ropeproject
diff --git a/utils/multi_gpu.py b/utils/multi_gpu.py
index 6eea8b9..74f35e2 100644
--- a/utils/multi_gpu.py
+++ b/utils/multi_gpu.py
@@ -1,14 +1,21 @@
-from keras.layers import merge
+from __future__ import print_function
+from keras.layers import concatenate
 from keras.layers.core import Lambda
 from keras.models import Model
-
+from tensorflow.python.client import device_lib
 import tensorflow as tf
 
-def make_parallel(model, gpu_count):
+
+def get_available_gpus():
+    local_device_protos = device_lib.list_local_devices()
+    return [x.name for x in local_device_protos if x.device_type == 'GPU']
+
+
+def make_parallel(model, gpu_count=None):
     def get_slice(data, idx, parts):
         shape = tf.shape(data)
-        size = tf.concat([ shape[:1] // parts, shape[1:] ],axis=0)
-        stride = tf.concat([ shape[:1] // parts, shape[1:]*0 ],axis=0)
+        size = tf.concat([shape[:1] // parts, shape[1:]], axis=0)
+        stride = tf.concat([shape[:1] // parts, shape[1:] * 0], axis=0)
         start = stride * idx
         return tf.slice(data, start, size)
 
@@ -16,24 +23,34 @@ def get_slice(data, idx, parts):
     for i in range(len(model.outputs)):
         outputs_all.append([])
 
-    #Place a copy of the model on each GPU, each getting a slice of the batch
-    for i in range(gpu_count):
-        with tf.device('/gpu:%d' % i):
-            with tf.name_scope('tower_%d' % i) as scope:
-
+    # if gpu_count provided, use it
+    # otherwise use all available gpus
+    if gpu_count:
+        gpus = ['/gpu:%d' % i for i in range(gpu_count)]
+    else:
+        gpus = get_available_gpus()
+        gpu_count = len(gpus)
+
+    # Place a copy of the model on each GPU, each getting a slice of the batch
+    for i, device in enumerate(gpus):
+        with tf.device(device):
+            with tf.name_scope('tower_%d' % i):
                 inputs = []
-                #Slice each input into a piece for processing on this GPU
+                # Slice each input into a piece for processing on this GPU
                 for x in model.inputs:
                     input_shape = tuple(x.get_shape().as_list())[1:]
-                    slice_n = Lambda(get_slice, output_shape=input_shape, arguments={'idx':i,'parts':gpu_count})(x)
-                    inputs.append(slice_n)                
+                    slice_n = Lambda(get_slice,
+                                     output_shape=input_shape,
+                                     arguments={'idx': i,
+                                                'parts': gpu_count})(x)
+                    inputs.append(slice_n)
 
                 outputs = model(inputs)
-                
+
                 if not isinstance(outputs, list):
                     outputs = [outputs]
-                
-                #Save all the outputs for merging back together later
+
+                # Save all the outputs for merging back together later
                 for l in range(len(outputs)):
                     outputs_all[l].append(outputs[l])
 
@@ -41,7 +58,5 @@ def get_slice(data, idx, parts):
     with tf.device('/cpu:0'):
         merged = []
         for outputs in outputs_all:
-            merged.append(merge(outputs, mode='concat', concat_axis=0))
-            
-        return Model(input=model.inputs, output=merged)
-
+            merged.append(concatenate(outputs, axis=0))
+        return Model(inputs=model.inputs, outputs=merged)