From 3b4e9ced662239176b56955620461836c116c8e4 Mon Sep 17 00:00:00 2001
From: Esh <esh@EshMBPTB13.local>
Date: Mon, 12 Feb 2018 17:10:05 -0800
Subject: [PATCH] [containerization] CPU based containerization to support all
 environments that don't use observations

---
 .gitignore                                 |  5 +-
 Dockerfile                                 | 12 +++
 docs/Using-Docker.md                       | 44 +++++++++++
 docs/installation.md                       |  4 +
 python/learn.py                            | 18 ++++-
 python/trainer_config.yaml                 |  2 +-
 python/unityagents/environment.py          |  3 +
 python/unitytrainers/trainer_controller.py | 90 +++++++++++++++-------
 unity-volume/.gitignore                    |  3 +
 9 files changed, 149 insertions(+), 32 deletions(-)
 create mode 100644 Dockerfile
 create mode 100644 docs/Using-Docker.md
 create mode 100644 unity-volume/.gitignore
diff --git a/.gitignore b/.gitignore
index 2cf2d4ead7..dc6e6f4708 100755
--- a/.gitignore
+++ b/.gitignore
@@ -8,8 +8,8 @@
 /unity-environment/Assets/AssetStoreTools*
 
 # Tensorflow Model Info
-/python/models
-/python/summaries
+/models
+/summaries
 
 # Environemnt logfile
 *unity-environment.log
@@ -69,4 +69,5 @@
 *.eggs*
 *.gitignore.swp
 
+
 .DS_Store
diff --git a/Dockerfile b/Dockerfile
new file mode 100644
index 0000000000..1fc9986b08
--- /dev/null
+++ b/Dockerfile
@@ -0,0 +1,12 @@
+# Use an official Python runtime as a parent image
+FROM python:3.6-slim
+
+RUN apt-get update && apt-get -y upgrade
+
+ADD python/requirements.txt .
+RUN pip install --trusted-host pypi.python.org -r requirements.txt
+
+WORKDIR /execute
+COPY python /execute/python
+
+ENTRYPOINT ["python", "python/learn.py"]
diff --git a/docs/Using-Docker.md b/docs/Using-Docker.md
new file mode 100644
index 0000000000..0410189163
--- /dev/null
+++ b/docs/Using-Docker.md
@@ -0,0 +1,44 @@
+# Using Docker For ML Agents (Experimental)
+
+We are currently offering an experimental solution for people who'd like to do training or inference using docker. This setup currently forces both python and Unity to rely on _only_ the CPU for computation purposes. So we don't support environments such as [GridWorld](Example-Environments.md#gridworld) which use visual observations for training.
+
+## Setup
+
+- Install [docker](https://www.docker.com/community-edition#/download) if you don't have it setup on your machine. 
+
+- Since Docker runs a container in an environment that is isolated from the host machine, we will be using a mounted directory, e.g. `unity-volume` in your host machine in order to share data, e.g. the Unity executable, curriculum files and tensorflow graph.
+
+
+## Usage
+
+- Docker typically runs a container sharing a (linux) kernel with the host machine, this means that the 
+Unity environment **has** to be built for the **linux platform**. Please select the architecture to be `x86_64` and choose the build to be `headless` (_this is important because we are running it in a container that does not have graphics drivers installed_). 
+Save the generated environment in the directory to be mounted (e.g. we have conveniently created an empty directory called at the top level `unity-volume`). Ensure that  
+`unity-volume/<environment-name>.x86_64` and `unity-volume/environment-name_Data`. So for example, `<environment_name>` might be `3Dball` and you might want to ensure that `unity-volume/3Dball.x86_64` and `unity-volume/3Dball_Data` are both present in the directory `unity-volume`.
+
+
+- Make sure the docker engine is running on your machine, then build the docker container by running `docker build  -t <image_name> .` . in the top level of the source directory. Replace `<image_name>` by the name of the image that you want to use, e.g. `balance.ball.v0.1`.
+
+- Run the container:
+```
+
+docker run --mount type=bind,source="$(pwd)"/unity-volume,target=/unity-volume \
+	 <tag-name>:latest <environment-name> \
+	 --docker-target-name=unity-volume 
+	 --train --run-id=<run-id>
+```
+
+For our balance ball, example this would be:
+
+- Run the container:
+```
+
+docker run --mount type=bind,source="$(pwd)"/unity-volume,target=/unity-volume \
+	 balance.ball.v0.1:latest 3Dball \
+	 --docker-target-name=unity-volume 
+	 --train --run-id=<run-id>
+```
+
+**Note** The docker target volume name, `unity-volume` must be passed to ML-Agents as an argument using the `--docker-target-name` option. The output will be stored in mounted directory. 
+
+
diff --git a/docs/installation.md b/docs/installation.md
index 7294511873..55fde6885d 100644
--- a/docs/installation.md
+++ b/docs/installation.md
@@ -24,6 +24,10 @@ If you are a Windows user who is new to Python/TensorFlow, follow [this guide](h
 * docopt (Training)
 * TensorFlow (1.0+) (Training)
 
+## Docker-based Installation (experimental)
+
+If you’d like to use Docker for ML Agents, please follow [this guide](Using-Docker.md).
+
 ### Installing Dependencies
 To install dependencies, go into the `python` sub-directory of the repositroy, and run (depending on your python version) from the command line:
 
diff --git a/python/learn.py b/python/learn.py
index 7d699698e3..92b39e564f 100755
--- a/python/learn.py
+++ b/python/learn.py
@@ -3,6 +3,7 @@
 
 import logging
 
+import os
 from docopt import docopt
 
 from unitytrainers.trainer_controller import TrainerController
@@ -25,10 +26,16 @@
       --slow                     Whether to run the game at training speed [default: False].
       --train                    Whether to train model, or only run inference [default: False].
       --worker-id=<n>            Number to add to communication port (5005). Used for multi-environment [default: 0].
+      --docker-target-name=<dt>       Docker Volume to store curriculum, executable and model files [default: Empty].
     '''
 
     options = docopt(_USAGE)
     logger.info(options)
+    # Docker Parameters
+    if options['--docker-target-name'] == 'Empty':
+        docker_target_name = ''
+    else:
+        docker_target_name = options['--docker-target-name']
 
     # General parameters
     run_id = options['--run-id']
@@ -36,7 +43,7 @@
     load_model = options['--load']
     train_model = options['--train']
     save_freq = int(options['--save-freq'])
-    env_name = options['<env>']
+    env_path = options['<env>']
     keep_checkpoints = int(options['--keep-checkpoints'])
     worker_id = int(options['--worker-id'])
     curriculum_file = str(options['--curriculum'])
@@ -45,6 +52,11 @@
     lesson = int(options['--lesson'])
     fast_simulation = not bool(options['--slow'])
 
-    tc = TrainerController(env_name, run_id, save_freq, curriculum_file, fast_simulation, load_model, train_model,
-                           worker_id, keep_checkpoints, lesson, seed)
+    # Constants
+    # Assumption that this yaml is present in same dir as this file
+    base_path = os.path.dirname(__file__)
+    TRAINER_CONFIG_PATH = os.path.abspath(os.path.join(base_path, "trainer_config.yaml"))
+
+    tc = TrainerController(env_path, run_id, save_freq, curriculum_file, fast_simulation, load_model, train_model,
+                           worker_id, keep_checkpoints, lesson, seed, docker_target_name, TRAINER_CONFIG_PATH)
     tc.start_learning()
diff --git a/python/trainer_config.yaml b/python/trainer_config.yaml
index fd24fa044c..0f20e3b77c 100644
--- a/python/trainer_config.yaml
+++ b/python/trainer_config.yaml
@@ -21,7 +21,7 @@ Ball3DBrain:
     summary_freq: 1000
     normalize: true
     batch_size: 1000
-    buffer_size: 10000
+    buffer_size: 10000 
     hidden_units: 64
     max_steps: 1.0e4
 
diff --git a/python/unityagents/environment.py b/python/unityagents/environment.py
index e560982b41..c1efcd99b5 100755
--- a/python/unityagents/environment.py
+++ b/python/unityagents/environment.py
@@ -33,6 +33,7 @@ def __init__(self, file_name, worker_id=0,
         :int base_port: Baseline port number to connect to Unity environment over. worker_id increments over this.
         :int worker_id: Number to add to communication port (5005) [0]. Used for asynchronous agent scenarios.
         """
+
         atexit.register(self.close)
         self.port = base_port + worker_id
         self._buffer_size = 12000
@@ -57,6 +58,7 @@ def __init__(self, file_name, worker_id=0,
         file_name = (file_name.strip()
                      .replace('.app', '').replace('.exe', '').replace('.x86_64', '').replace('.x86', ''))
         true_filename = os.path.basename(os.path.normpath(file_name))
+        logger.info('The true file name is {}'.format(true_filename))
         launch_string = None
         if platform == "linux" or platform == "linux2":
             candidates = glob.glob(os.path.join(cwd, file_name) + '.x86_64')
@@ -87,6 +89,7 @@ def __init__(self, file_name, worker_id=0,
                                             "Provided filename does not match any environments."
                                             .format(true_filename))
         else:
+            logger.info("This is the launch string {}".format(launch_string))
             # Launch Unity environment
             proc1 = subprocess.Popen(
                 [launch_string,
diff --git a/python/unitytrainers/trainer_controller.py b/python/unitytrainers/trainer_controller.py
index 383749f89a..5bc981e99d 100644
--- a/python/unitytrainers/trainer_controller.py
+++ b/python/unitytrainers/trainer_controller.py
@@ -16,13 +16,51 @@
 
 
 class TrainerController(object):
-    def __init__(self, env_name, run_id, save_freq, curriculum_file, fast_simulation, load, train,
-                 worker_id, keep_checkpoints, lesson, seed):
-        self.model_path = './models/{}'.format(run_id)
+    def __init__(self, env_path, run_id, save_freq, curriculum_file, fast_simulation, load, train,
+                 worker_id, keep_checkpoints, lesson, seed, docker_target_name, trainer_config_path):
+        """
+
+        :param env_path: Location to the environment executable to be loaded.
+        :param run_id: The sub-directory name for model and summary statistics
+        :param save_freq: Frequency at which to save model
+        :param curriculum_file: Curriculum json file for environment
+        :param fast_simulation: Whether to run the game at training speed
+        :param load: Whether to load the model or randomly initialize
+        :param train: Whether to train model, or only run inference
+        :param worker_id: Number to add to communication port (5005). Used for multi-environment
+        :param keep_checkpoints: How many model checkpoints to keep
+        :param lesson: Start learning from this lesson
+        :param seed: Random seed used for training.
+        :param docker_target_name: Name of docker volume that will contain all data.
+        :param trainer_config_path: Fully qualified path to location of trainer configuration file
+        """
+        self.trainer_config_path = trainer_config_path
+        env_path = (env_path.strip()
+                    .replace('.app', '')
+                    .replace('.exe', '')
+                    .replace('.x86_64', '')
+                    .replace('.x86', ''))  # Strip out executable extensions if passed
+        # Recognize and use docker volume if one is passed as an argument
+        if docker_target_name == '':
+            self.model_path = './models/{run_id}'.format(run_id=run_id)
+            self.curriculum_file = curriculum_file
+            self.summaries_dir = './summaries'
+        else:
+            self.model_path = '/{docker_target_name}/models/{run_id}'.format(
+                docker_target_name=docker_target_name,
+                run_id=run_id)
+            env_path = '/{docker_target_name}/{env_name}'.format(docker_target_name=docker_target_name,
+                                                                 env_name=env_path)
+            if curriculum_file is None:
+                self.curriculum_file = None
+            else:
+                self.curriculum_file = '/{docker_target_name}/{curriculum_file}'.format(
+                    docker_target_name=docker_target_name,
+                    curriculum_file=curriculum_file)
+            self.summaries_dir = '/{docker_target_name}/summaries'.format(docker_target_name=docker_target_name)
         self.logger = logging.getLogger("unityagents")
         self.run_id = run_id
         self.save_freq = save_freq
-        self.curriculum_file = curriculum_file
         self.lesson = lesson
         self.fast_simulation = fast_simulation
         self.load_model = load
@@ -35,12 +73,9 @@ def __init__(self, env_name, run_id, save_freq, curriculum_file, fast_simulation
         self.seed = seed
         np.random.seed(self.seed)
         tf.set_random_seed(self.seed)
-        self.env = UnityEnvironment(file_name=env_name, worker_id=self.worker_id,
+        self.env = UnityEnvironment(file_name=env_path, worker_id=self.worker_id,
                                     curriculum=self.curriculum_file, seed=self.seed)
-        self.env_name = (env_name.strip().replace('.app', '').replace('.exe', '').replace('.x86_64', '')
-                         .replace('.x86', ''))
-        self.env_name = os.path.basename(os.path.normpath(self.env_name))
-        self.logger.info(str(self.env))
+        self.env_name = os.path.basename(os.path.normpath(env_path))  # Extract out name of environment
 
     def _get_progress(self):
         if self.curriculum_file is not None:
@@ -82,17 +117,16 @@ def _process_graph(self):
             self.logger.info("\t" + n)
         return nodes
 
-    def _save_model(self, sess, saver, model_path="./", steps=0):
+    def _save_model(self, sess, saver, steps=0):
         """
         Saves current model to checkpoint folder.
         :param sess: Current Tensorflow session.
-        :param model_path: Designated model path.
         :param steps: Current number of steps in training process.
         :param saver: Tensorflow saver for session.
         """
-        last_checkpoint = model_path + '/model-' + str(steps) + '.cptk'
+        last_checkpoint = self.model_path + '/model-' + str(steps) + '.cptk'
         saver.save(sess, last_checkpoint)
-        tf.train.write_graph(sess.graph_def, model_path, 'raw_graph_def.pb', as_text=False)
+        tf.train.write_graph(sess.graph_def, self.model_path, 'raw_graph_def.pb', as_text=False)
         self.logger.info("Saved Model")
 
     def _export_graph(self):
@@ -117,11 +151,14 @@ def _initialize_trainers(self, trainer_config, sess):
             if len(self.env.external_brain_names) > 1:
                 graph_scope = re.sub('[^0-9a-zA-Z]+', '-', brain_name)
                 trainer_parameters['graph_scope'] = graph_scope
-                trainer_parameters['summary_path'] = './summaries/{}'.format(
-                    str(self.run_id)) + '_' + graph_scope
+                trainer_parameters['summary_path'] = '{basedir}/{name}'.format(
+                    basedir=self.summaries_dir,
+                    name=str(self.run_id) + '_' + graph_scope)
             else:
                 trainer_parameters['graph_scope'] = ''
-                trainer_parameters['summary_path'] = './summaries/{}'.format(self.run_id)
+                trainer_parameters['summary_path'] = '{basedir}/{name}'.format(
+                    basedir=self.summaries_dir,
+                    name=str(self.run_id))
             if brain_name in trainer_config:
                 _brain_key = brain_name
                 while not isinstance(trainer_config[_brain_key], dict):
@@ -141,17 +178,18 @@ def _initialize_trainers(self, trainer_config, sess):
                 raise UnityEnvironmentException("The trainer config contains an unknown trainer type for brain {}"
                                                 .format(brain_name))
 
-    @staticmethod
-    def _load_config(config_filename):
+    def _load_config(self):
         try:
-            with open(config_filename) as data_file:
+            with open(self.trainer_config_path) as data_file:
                 trainer_config = yaml.load(data_file)
                 return trainer_config
         except IOError:
-            raise UnityEnvironmentException("The file {} could not be found. Will use default Hyperparameters"
-                                            .format("trainer_config.yaml"))
+            raise UnityEnvironmentException("""Parameter file could not be found here {}.
+                                            Will use default Hyper parameters"""
+                                            .format(self.trainer_config_path))
         except UnicodeDecodeError:
-            raise UnityEnvironmentException("There was an error decoding {}".format("trainer_config.yaml"))
+            raise UnityEnvironmentException("There was an error decoding Trainer Config from this path : {}"
+                                            .format(self.trainer_config_path))
 
     @staticmethod
     def _create_model_path(model_path):
@@ -165,7 +203,7 @@ def _create_model_path(model_path):
 
     def start_learning(self):
         self.env.curriculum.set_lesson_number(self.lesson)
-        trainer_config = self._load_config("trainer_config.yaml")
+        trainer_config = self._load_config()
         self._create_model_path(self.model_path)
 
         tf.reset_default_graph()
@@ -225,15 +263,15 @@ def start_learning(self):
                         global_step += 1
                     if global_step % self.save_freq == 0 and global_step != 0 and self.train_model:
                         # Save Tensorflow model
-                        self._save_model(sess, model_path=self.model_path, steps=global_step, saver=saver)
+                        self._save_model(sess, steps=global_step, saver=saver)
 
                 # Final save Tensorflow model
                 if global_step != 0 and self.train_model:
-                    self._save_model(sess, model_path=self.model_path, steps=global_step, saver=saver)
+                    self._save_model(sess,  steps=global_step, saver=saver)
             except KeyboardInterrupt:
                 if self.train_model:
                     self.logger.info("Learning was interrupted. Please wait while the graph is generated.")
-                    self._save_model(sess, model_path=self.model_path, steps=global_step, saver=saver)
+                    self._save_model(sess, steps=global_step, saver=saver)
                 pass
         self.env.close()
         if self.train_model:
diff --git a/unity-volume/.gitignore b/unity-volume/.gitignore
new file mode 100644
index 0000000000..927b40d9b8
--- /dev/null
+++ b/unity-volume/.gitignore
@@ -0,0 +1,3 @@
+*
+# Ignore everything in this directory except for .gitignore. This directory is for illustrative purposes
+!.gitignore