From 3b4e9ced662239176b56955620461836c116c8e4 Mon Sep 17 00:00:00 2001 From: Esh Date: Mon, 12 Feb 2018 17:10:05 -0800 Subject: [PATCH] [containerization] CPU based containerization to support all environments that don't use observations --- .gitignore | 5 +- Dockerfile | 12 +++ docs/Using-Docker.md | 44 +++++++++++ docs/installation.md | 4 + python/learn.py | 18 ++++- python/trainer_config.yaml | 2 +- python/unityagents/environment.py | 3 + python/unitytrainers/trainer_controller.py | 90 +++++++++++++++------- unity-volume/.gitignore | 3 + 9 files changed, 149 insertions(+), 32 deletions(-) create mode 100644 Dockerfile create mode 100644 docs/Using-Docker.md create mode 100644 unity-volume/.gitignore diff --git a/.gitignore b/.gitignore index 2cf2d4ead7..dc6e6f4708 100755 --- a/.gitignore +++ b/.gitignore @@ -8,8 +8,8 @@ /unity-environment/Assets/AssetStoreTools* # Tensorflow Model Info -/python/models -/python/summaries +/models +/summaries # Environemnt logfile *unity-environment.log @@ -69,4 +69,5 @@ *.eggs* *.gitignore.swp + .DS_Store diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000000..1fc9986b08 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,12 @@ +# Use an official Python runtime as a parent image +FROM python:3.6-slim + +RUN apt-get update && apt-get -y upgrade + +ADD python/requirements.txt . +RUN pip install --trusted-host pypi.python.org -r requirements.txt + +WORKDIR /execute +COPY python /execute/python + +ENTRYPOINT ["python", "python/learn.py"] diff --git a/docs/Using-Docker.md b/docs/Using-Docker.md new file mode 100644 index 0000000000..0410189163 --- /dev/null +++ b/docs/Using-Docker.md @@ -0,0 +1,44 @@ +# Using Docker For ML Agents (Experimental) + +We are currently offering an experimental solution for people who'd like to do training or inference using docker. This setup currently forces both python and Unity to rely on _only_ the CPU for computation purposes. So we don't support environments such as [GridWorld](Example-Environments.md#gridworld) which use visual observations for training. + +## Setup + +- Install [docker](https://www.docker.com/community-edition#/download) if you don't have it setup on your machine. + +- Since Docker runs a container in an environment that is isolated from the host machine, we will be using a mounted directory, e.g. `unity-volume` in your host machine in order to share data, e.g. the Unity executable, curriculum files and tensorflow graph. + + +## Usage + +- Docker typically runs a container sharing a (linux) kernel with the host machine, this means that the +Unity environment **has** to be built for the **linux platform**. Please select the architecture to be `x86_64` and choose the build to be `headless` (_this is important because we are running it in a container that does not have graphics drivers installed_). +Save the generated environment in the directory to be mounted (e.g. we have conveniently created an empty directory called at the top level `unity-volume`). Ensure that +`unity-volume/.x86_64` and `unity-volume/environment-name_Data`. So for example, `` might be `3Dball` and you might want to ensure that `unity-volume/3Dball.x86_64` and `unity-volume/3Dball_Data` are both present in the directory `unity-volume`. + + +- Make sure the docker engine is running on your machine, then build the docker container by running `docker build -t .` . in the top level of the source directory. Replace `` by the name of the image that you want to use, e.g. `balance.ball.v0.1`. + +- Run the container: +``` + +docker run --mount type=bind,source="$(pwd)"/unity-volume,target=/unity-volume \ + :latest \ + --docker-target-name=unity-volume + --train --run-id= +``` + +For our balance ball, example this would be: + +- Run the container: +``` + +docker run --mount type=bind,source="$(pwd)"/unity-volume,target=/unity-volume \ + balance.ball.v0.1:latest 3Dball \ + --docker-target-name=unity-volume + --train --run-id= +``` + +**Note** The docker target volume name, `unity-volume` must be passed to ML-Agents as an argument using the `--docker-target-name` option. The output will be stored in mounted directory. + + diff --git a/docs/installation.md b/docs/installation.md index 7294511873..55fde6885d 100644 --- a/docs/installation.md +++ b/docs/installation.md @@ -24,6 +24,10 @@ If you are a Windows user who is new to Python/TensorFlow, follow [this guide](h * docopt (Training) * TensorFlow (1.0+) (Training) +## Docker-based Installation (experimental) + +If you’d like to use Docker for ML Agents, please follow [this guide](Using-Docker.md). + ### Installing Dependencies To install dependencies, go into the `python` sub-directory of the repositroy, and run (depending on your python version) from the command line: diff --git a/python/learn.py b/python/learn.py index 7d699698e3..92b39e564f 100755 --- a/python/learn.py +++ b/python/learn.py @@ -3,6 +3,7 @@ import logging +import os from docopt import docopt from unitytrainers.trainer_controller import TrainerController @@ -25,10 +26,16 @@ --slow Whether to run the game at training speed [default: False]. --train Whether to train model, or only run inference [default: False]. --worker-id= Number to add to communication port (5005). Used for multi-environment [default: 0]. + --docker-target-name=
Docker Volume to store curriculum, executable and model files [default: Empty]. ''' options = docopt(_USAGE) logger.info(options) + # Docker Parameters + if options['--docker-target-name'] == 'Empty': + docker_target_name = '' + else: + docker_target_name = options['--docker-target-name'] # General parameters run_id = options['--run-id'] @@ -36,7 +43,7 @@ load_model = options['--load'] train_model = options['--train'] save_freq = int(options['--save-freq']) - env_name = options[''] + env_path = options[''] keep_checkpoints = int(options['--keep-checkpoints']) worker_id = int(options['--worker-id']) curriculum_file = str(options['--curriculum']) @@ -45,6 +52,11 @@ lesson = int(options['--lesson']) fast_simulation = not bool(options['--slow']) - tc = TrainerController(env_name, run_id, save_freq, curriculum_file, fast_simulation, load_model, train_model, - worker_id, keep_checkpoints, lesson, seed) + # Constants + # Assumption that this yaml is present in same dir as this file + base_path = os.path.dirname(__file__) + TRAINER_CONFIG_PATH = os.path.abspath(os.path.join(base_path, "trainer_config.yaml")) + + tc = TrainerController(env_path, run_id, save_freq, curriculum_file, fast_simulation, load_model, train_model, + worker_id, keep_checkpoints, lesson, seed, docker_target_name, TRAINER_CONFIG_PATH) tc.start_learning() diff --git a/python/trainer_config.yaml b/python/trainer_config.yaml index fd24fa044c..0f20e3b77c 100644 --- a/python/trainer_config.yaml +++ b/python/trainer_config.yaml @@ -21,7 +21,7 @@ Ball3DBrain: summary_freq: 1000 normalize: true batch_size: 1000 - buffer_size: 10000 + buffer_size: 10000 hidden_units: 64 max_steps: 1.0e4 diff --git a/python/unityagents/environment.py b/python/unityagents/environment.py index e560982b41..c1efcd99b5 100755 --- a/python/unityagents/environment.py +++ b/python/unityagents/environment.py @@ -33,6 +33,7 @@ def __init__(self, file_name, worker_id=0, :int base_port: Baseline port number to connect to Unity environment over. worker_id increments over this. :int worker_id: Number to add to communication port (5005) [0]. Used for asynchronous agent scenarios. """ + atexit.register(self.close) self.port = base_port + worker_id self._buffer_size = 12000 @@ -57,6 +58,7 @@ def __init__(self, file_name, worker_id=0, file_name = (file_name.strip() .replace('.app', '').replace('.exe', '').replace('.x86_64', '').replace('.x86', '')) true_filename = os.path.basename(os.path.normpath(file_name)) + logger.info('The true file name is {}'.format(true_filename)) launch_string = None if platform == "linux" or platform == "linux2": candidates = glob.glob(os.path.join(cwd, file_name) + '.x86_64') @@ -87,6 +89,7 @@ def __init__(self, file_name, worker_id=0, "Provided filename does not match any environments." .format(true_filename)) else: + logger.info("This is the launch string {}".format(launch_string)) # Launch Unity environment proc1 = subprocess.Popen( [launch_string, diff --git a/python/unitytrainers/trainer_controller.py b/python/unitytrainers/trainer_controller.py index 383749f89a..5bc981e99d 100644 --- a/python/unitytrainers/trainer_controller.py +++ b/python/unitytrainers/trainer_controller.py @@ -16,13 +16,51 @@ class TrainerController(object): - def __init__(self, env_name, run_id, save_freq, curriculum_file, fast_simulation, load, train, - worker_id, keep_checkpoints, lesson, seed): - self.model_path = './models/{}'.format(run_id) + def __init__(self, env_path, run_id, save_freq, curriculum_file, fast_simulation, load, train, + worker_id, keep_checkpoints, lesson, seed, docker_target_name, trainer_config_path): + """ + + :param env_path: Location to the environment executable to be loaded. + :param run_id: The sub-directory name for model and summary statistics + :param save_freq: Frequency at which to save model + :param curriculum_file: Curriculum json file for environment + :param fast_simulation: Whether to run the game at training speed + :param load: Whether to load the model or randomly initialize + :param train: Whether to train model, or only run inference + :param worker_id: Number to add to communication port (5005). Used for multi-environment + :param keep_checkpoints: How many model checkpoints to keep + :param lesson: Start learning from this lesson + :param seed: Random seed used for training. + :param docker_target_name: Name of docker volume that will contain all data. + :param trainer_config_path: Fully qualified path to location of trainer configuration file + """ + self.trainer_config_path = trainer_config_path + env_path = (env_path.strip() + .replace('.app', '') + .replace('.exe', '') + .replace('.x86_64', '') + .replace('.x86', '')) # Strip out executable extensions if passed + # Recognize and use docker volume if one is passed as an argument + if docker_target_name == '': + self.model_path = './models/{run_id}'.format(run_id=run_id) + self.curriculum_file = curriculum_file + self.summaries_dir = './summaries' + else: + self.model_path = '/{docker_target_name}/models/{run_id}'.format( + docker_target_name=docker_target_name, + run_id=run_id) + env_path = '/{docker_target_name}/{env_name}'.format(docker_target_name=docker_target_name, + env_name=env_path) + if curriculum_file is None: + self.curriculum_file = None + else: + self.curriculum_file = '/{docker_target_name}/{curriculum_file}'.format( + docker_target_name=docker_target_name, + curriculum_file=curriculum_file) + self.summaries_dir = '/{docker_target_name}/summaries'.format(docker_target_name=docker_target_name) self.logger = logging.getLogger("unityagents") self.run_id = run_id self.save_freq = save_freq - self.curriculum_file = curriculum_file self.lesson = lesson self.fast_simulation = fast_simulation self.load_model = load @@ -35,12 +73,9 @@ def __init__(self, env_name, run_id, save_freq, curriculum_file, fast_simulation self.seed = seed np.random.seed(self.seed) tf.set_random_seed(self.seed) - self.env = UnityEnvironment(file_name=env_name, worker_id=self.worker_id, + self.env = UnityEnvironment(file_name=env_path, worker_id=self.worker_id, curriculum=self.curriculum_file, seed=self.seed) - self.env_name = (env_name.strip().replace('.app', '').replace('.exe', '').replace('.x86_64', '') - .replace('.x86', '')) - self.env_name = os.path.basename(os.path.normpath(self.env_name)) - self.logger.info(str(self.env)) + self.env_name = os.path.basename(os.path.normpath(env_path)) # Extract out name of environment def _get_progress(self): if self.curriculum_file is not None: @@ -82,17 +117,16 @@ def _process_graph(self): self.logger.info("\t" + n) return nodes - def _save_model(self, sess, saver, model_path="./", steps=0): + def _save_model(self, sess, saver, steps=0): """ Saves current model to checkpoint folder. :param sess: Current Tensorflow session. - :param model_path: Designated model path. :param steps: Current number of steps in training process. :param saver: Tensorflow saver for session. """ - last_checkpoint = model_path + '/model-' + str(steps) + '.cptk' + last_checkpoint = self.model_path + '/model-' + str(steps) + '.cptk' saver.save(sess, last_checkpoint) - tf.train.write_graph(sess.graph_def, model_path, 'raw_graph_def.pb', as_text=False) + tf.train.write_graph(sess.graph_def, self.model_path, 'raw_graph_def.pb', as_text=False) self.logger.info("Saved Model") def _export_graph(self): @@ -117,11 +151,14 @@ def _initialize_trainers(self, trainer_config, sess): if len(self.env.external_brain_names) > 1: graph_scope = re.sub('[^0-9a-zA-Z]+', '-', brain_name) trainer_parameters['graph_scope'] = graph_scope - trainer_parameters['summary_path'] = './summaries/{}'.format( - str(self.run_id)) + '_' + graph_scope + trainer_parameters['summary_path'] = '{basedir}/{name}'.format( + basedir=self.summaries_dir, + name=str(self.run_id) + '_' + graph_scope) else: trainer_parameters['graph_scope'] = '' - trainer_parameters['summary_path'] = './summaries/{}'.format(self.run_id) + trainer_parameters['summary_path'] = '{basedir}/{name}'.format( + basedir=self.summaries_dir, + name=str(self.run_id)) if brain_name in trainer_config: _brain_key = brain_name while not isinstance(trainer_config[_brain_key], dict): @@ -141,17 +178,18 @@ def _initialize_trainers(self, trainer_config, sess): raise UnityEnvironmentException("The trainer config contains an unknown trainer type for brain {}" .format(brain_name)) - @staticmethod - def _load_config(config_filename): + def _load_config(self): try: - with open(config_filename) as data_file: + with open(self.trainer_config_path) as data_file: trainer_config = yaml.load(data_file) return trainer_config except IOError: - raise UnityEnvironmentException("The file {} could not be found. Will use default Hyperparameters" - .format("trainer_config.yaml")) + raise UnityEnvironmentException("""Parameter file could not be found here {}. + Will use default Hyper parameters""" + .format(self.trainer_config_path)) except UnicodeDecodeError: - raise UnityEnvironmentException("There was an error decoding {}".format("trainer_config.yaml")) + raise UnityEnvironmentException("There was an error decoding Trainer Config from this path : {}" + .format(self.trainer_config_path)) @staticmethod def _create_model_path(model_path): @@ -165,7 +203,7 @@ def _create_model_path(model_path): def start_learning(self): self.env.curriculum.set_lesson_number(self.lesson) - trainer_config = self._load_config("trainer_config.yaml") + trainer_config = self._load_config() self._create_model_path(self.model_path) tf.reset_default_graph() @@ -225,15 +263,15 @@ def start_learning(self): global_step += 1 if global_step % self.save_freq == 0 and global_step != 0 and self.train_model: # Save Tensorflow model - self._save_model(sess, model_path=self.model_path, steps=global_step, saver=saver) + self._save_model(sess, steps=global_step, saver=saver) # Final save Tensorflow model if global_step != 0 and self.train_model: - self._save_model(sess, model_path=self.model_path, steps=global_step, saver=saver) + self._save_model(sess, steps=global_step, saver=saver) except KeyboardInterrupt: if self.train_model: self.logger.info("Learning was interrupted. Please wait while the graph is generated.") - self._save_model(sess, model_path=self.model_path, steps=global_step, saver=saver) + self._save_model(sess, steps=global_step, saver=saver) pass self.env.close() if self.train_model: diff --git a/unity-volume/.gitignore b/unity-volume/.gitignore new file mode 100644 index 0000000000..927b40d9b8 --- /dev/null +++ b/unity-volume/.gitignore @@ -0,0 +1,3 @@ +* +# Ignore everything in this directory except for .gitignore. This directory is for illustrative purposes +!.gitignore