Hanabi-Game-Project · hellovertex · Jun 27, 2020 · Jun 27, 2020 · Jun 27, 2020 · Jun 27, 2020
diff --git a/README.md b/README.md
@@ -9,12 +9,12 @@ Dependencies:
  * [dm_env](https://github.com/deepmind/dm_env)
  * modified version of [hanabi-learning-environment](https://github.com/braintimeException/hanabi-learning-environment)
 
-For convenience we provide a docker file where most of the dependencies (including those for running rlax-based RL agents, see [hanabi-agents](https://github.com/braintimeException/hanabi-agents))
+For convenience we provide a docker file where most of the dependencies (including those for running compatible agents, see [hanabi-agents](https://github.com/braintimeException/hanabi-agents))
 
 For example, to build a docker container with GPU support and rlax installed, run 
 ```
 # clone this repo if you haven't yet
-$ git clone https://github.com/braintimeException/hanabi-multiagent-framework/
+$ git clone https://github.com/braintimeException/hanabi-multiagent-framework/ --branch pybind-env
 
 # build the container
 $ cd docker/images
@@ -23,32 +23,17 @@ $ docker build -t hanabi-framework:gpu-rlax -f Dockerfile-gpu-rlax .
 
 # Running
 
-The docker container above is a development version, meaning that it does not contain code neither from this repo, nor from `hanabi-learning-environment`, nor from `hanabi-agents`. Therefore, to run the code you would need to clone these repos and mount them, like so:
+The docker container contains all necessary dependencies along with the
+`hanabi-agents` repo. In order to try it out, run following commands:
 ```
-# clone the repos
-$ git clone https://github.com/braintimeException/hanabi-learning-environment
-$ git clone https://github.com/braintimeException/hanabi-agents
-
 # run the container with a bash session
-$ docker run -it --gpus=all \
-    --volume /path_to_repo/hanabi-multiagent-framework:/hanabi-framework:ro \
-    --volume /path_to_repo/hanabi-learning-environment:/hanabi-le:ro \
-    --volume /path_to_repo/hanabi-agents/:/hanabi-agents:ro \
+$ docker run -it --rm --gpus=all \
+    --volume /path_to_repo/hanabi-multiagent-framework/examples:/hanabi-examples:ro \
     hanabi-framework:gpu-rlax bash
-```
-
-This inconvenience is due to active development stage of the package. We are going to provide a version of the container with all dependencies later.
 
-From within the container you should install the repos:
-
-```
-$ pip install /hanabi-framework/
-$ pip install /hanabi-learning-environment/
-$ pip install /hanabi-agents/
-```
-
-There are some examples showing how to run the framework. For instance, the rlax_agent_session.py shows how to run the framework with a rlax-based DQN agent awailable in hanabi_agents repo. You can launch like so:
-
-```
-$ python /hanabi-framework/examples/rlax_agent_session.py
+# try out examples
+# e.g. train a rlax rainbow agent using a provided config
+python hanabi-examples/rlax_agent_session.py --agent_config_path=hanabi-framework/rlax_agent.gin
+# or let rule-based agent play with itself
+python hanabi-examples/rulebased_agent_session.py
 ```
diff --git a/docker/images/Dockerfile-cpu-rlax b/docker/images/Dockerfile-cpu-rlax
@@ -0,0 +1,41 @@
+FROM alpine/git as hanabi-fetch
+
+RUN mkdir /dm-tools && \
+    # git clone https://github.com/deepmind/hanabi-learning-environment.git /dm-tools/hanabi-learning-environment && \
+    git clone https://github.com/deepmind/dm_env.git --depth 1 /dm-tools/dm-env && \
+    git clone https://github.com/deepmind/dm-haiku.git --depth 1 /dm-tools/dm-haiku && \
+    git clone https://github.com/deepmind/rlax.git --depth 1 /dm-tools/rlax && \
+    mkdir /hanabi && \
+    git clone https://github.com/braintimeException/hanabi-learning-environment.git --recurse-submodules --branch pybind11 --depth 1 /hanabi/le && \
+    git clone https://github.com/braintimeException/hanabi-multiagent-framework.git --branch pybind-env --depth 1 /hanabi/framework && \
+    git clone https://github.com/braintimeException/hanabi-agents.git --recurse-submodules --depth 1 /hanabi/agents && \
+    wget -O /dm-tools/miniconda.sh https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh
+
+FROM ubuntu:18.04
+
+RUN apt-get update && apt-get install -y build-essential cmake git && rm -rf /var/lib/apt/lists/*
+
+COPY --from=hanabi-fetch /dm-tools /dm-tools/
+COPY --from=hanabi-fetch /hanabi /hanabi/
+
+# conda
+ENV LANG=C.UTF-8 LC_ALL=C.UTF-8
+ENV PATH /opt/conda/bin:$PATH
+
+RUN /bin/bash /dm-tools/miniconda.sh -b -p /opt/conda && \
+    /opt/conda/bin/conda clean -tipsy && \
+    ln -s /opt/conda/etc/profile.d/conda.sh /etc/profile.d/conda.sh && \
+    echo ". /opt/conda/etc/profile.d/conda.sh" >> ~/.bashrc && \
+    echo "conda activate base" >> ~/.bashrc && \
+    find /opt/conda/ -follow -type f -name '*.a' -delete && \
+    find /opt/conda/ -follow -type f -name '*.js.map' -delete && \
+    /opt/conda/bin/conda clean -afy && \
+    pip install --upgrade pip && \
+    pip install pyyaml gin-config && \
+    # install deepmind dependencies
+    pip install /dm-tools/dm-env dm-tools/dm-haiku dm-tools/rlax && \
+    # install hanabi
+    pip install /hanabi/le hanabi/framework hanabi/agents && \
+    rm -rf dm-tools hanabi
+
+CMD [ "/bin/bash" ]
diff --git a/docker/images/Dockerfile-gpu-rlax b/docker/images/Dockerfile-gpu-rlax
@@ -1,14 +1,22 @@
 FROM alpine/git as hanabi-fetch
 
 RUN mkdir /dm-tools && \
-    git clone https://github.com/deepmind/hanabi-learning-environment.git /dm-tools/hanabi-learning-environment && \
-    git clone https://github.com/deepmind/dm_env /dm-tools/dm-env && \
-    git clone https://github.com/deepmind/dm-haiku /dm-tools/dm-haiku && \
-    git clone https://github.com/deepmind/rlax.git /dm-tools/rlax && \
+    # git clone https://github.com/deepmind/hanabi-learning-environment.git /dm-tools/hanabi-learning-environment && \
+    git clone https://github.com/deepmind/dm_env.git --depth 1 /dm-tools/dm-env && \
+    git clone https://github.com/deepmind/dm-haiku.git --depth 1 /dm-tools/dm-haiku && \
+    git clone https://github.com/deepmind/rlax.git --depth 1 /dm-tools/rlax && \
+    mkdir /hanabi && \
+    git clone https://github.com/braintimeException/hanabi-learning-environment.git --recurse-submodules --branch pybind11 --depth 1 /hanabi/le && \
+    git clone https://github.com/braintimeException/hanabi-multiagent-framework.git --branch pybind-env --depth 1 /hanabi/framework && \
+    git clone https://github.com/braintimeException/hanabi-agents.git --recurse-submodules --depth 1 /hanabi/agents && \
     wget -O /dm-tools/miniconda.sh https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh
 
 FROM nvidia/cuda:10.1-cudnn7-devel
+
+RUN apt-get update && apt-get install -y cmake git && rm -rf /var/lib/apt/lists/*
+
 COPY --from=hanabi-fetch /dm-tools /dm-tools/
+COPY --from=hanabi-fetch /hanabi /hanabi/
 
 # conda
 ENV LANG=C.UTF-8 LC_ALL=C.UTF-8
@@ -22,16 +30,14 @@ RUN /bin/bash /dm-tools/miniconda.sh -b -p /opt/conda && \
     find /opt/conda/ -follow -type f -name '*.a' -delete && \
     find /opt/conda/ -follow -type f -name '*.js.map' -delete && \
     /opt/conda/bin/conda clean -afy && \
-    pip install --upgrade pip
-
-RUN cd /dm-tools && ls -lah && \
-    cd hanabi-learning-environment && pip install . && cd .. && \
-    cd dm-env                      && pip install . && cd .. && \
-    cd dm-haiku                    && pip install . && cd .. && \
-    cd rlax                        && pip install . && cd ..
-
-ADD install_jax.sh /
-RUN sh /install_jax.sh && \
-    rm -rf /dm-tools
+    pip install --upgrade pip && \
+    pip install pyyaml gin-config && \
+    # install cuda version of jaxlib
+    pip install --upgrade https://storage.googleapis.com/jax-releases/cuda101/jaxlib-$(pip search jaxlib | sed "s/.*(//" | sed "s/).*//")-$(python -V | awk '{split($2,a,"."); print "cp" a[1] a[2]}')-none-manylinux2010_x86_64.whl && \
+    # install deepmind dependencies
+    pip install /dm-tools/dm-env dm-tools/dm-haiku dm-tools/rlax && \
+    # install hanabi
+    pip install /hanabi/le hanabi/framework hanabi/agents && \
+    rm -rf dm-tools hanabi
 
 CMD [ "/bin/bash" ]
diff --git a/examples/mock_agent_session.py b/examples/mock_agent_session.py
@@ -1,14 +1,16 @@
 """
 An example with a mock agent on how to operate the framework.
 """
-import hanabi_multiagent_framework as hmf
-from hanabi_multiagent_framework.utils import make_hanabi_env_config
+import random
 import numpy as np
 from numpy import ndarray
-from collections import namedtuple
+import hanabi_multiagent_framework as hmf
+from hanabi_learning_environment import pyhanabi_pybind as pyhanabi
+from hanabi_multiagent_framework.utils import make_hanabi_env_config
 
 n_players = 2
-n_parallel = 10
+n_parallel = 100_000
+#  n_parallel = 10
 env_conf = make_hanabi_env_config('Hanabi-Full-Oracle', n_players)
 
 env = hmf.HanabiParallelEnvironment(env_conf, n_parallel)
@@ -17,22 +19,29 @@ class MockAgent(hmf.agent.HanabiAgent):
     """A mock agent which always selects the first legal move.
     """
 
-    def explore(self, observations: ndarray, legal_moves: ndarray) -> ndarray:
-        action = np.argmax(legal_moves, axis=1)
-        return action
+    def __init__(self, action_spec):
+        self.n_actions = action_spec.num_values
 
-    def exploit(self, observations: ndarray, legal_moves: ndarray) -> ndarray:
-        return self.explore(None, legal_moves)
+    def explore(self, observations: ndarray) -> ndarray:
+        #  return np.random.randint(0, self.n_actions + 1, size=len(observations))
+        #  action = np.argmax(legal_moves, axis=1)
+        #  return action
+        #  return [random.choice(o.legal_moves) for o in observations]
+        actions = pyhanabi.HanabiMoveVector()
+        for o in observations:
+            actions.append(o.legal_moves[0])
+        return actions
+
+    def exploit(self, observations) -> ndarray:
+        return self.explore(observations)
 
     def add_experience_first(self,
                              observations: ndarray,
-                             legal_moves: ndarray,
                              step_types: ndarray) -> None:
         pass
 
     def add_experience(self,
                        observations: ndarray,
-                       legal_moves: ndarray,
                        actions: ndarray,
                        rewards: ndarray,
                        step_types: ndarray) -> None:
@@ -41,14 +50,21 @@ def add_experience(self,
     def update(self):
         pass
 
+    def requires_vectorized_observation(self):
+        return False
+
+    def requires_vectorized_legal_moves(self):
+        return True
 
-agents = [MockAgent() for _ in range(n_players)]
+
+agents = [MockAgent(env.action_spec_vec()) for _ in range(n_players)]
 
 parallel_session = hmf.HanabiParallelSession(env, agents)
-parallel_session.reset()
 
-parallel_session.train(n_iter=30000,
-                       n_sim_steps=n_players,
-                       n_train_steps=1,
-                       n_warmup=0,
-                       train_batch_size=256)
+parallel_session.run_eval()
+
+#  parallel_session.reset()
+#  parallel_session.train(n_iter=30000,
+#                         n_sim_steps=n_players,
+#                         n_train_steps=1,
+#                         n_warmup=0)
diff --git a/examples/neuroevo_session.py b/examples/neuroevo_session.py
@@ -0,0 +1,132 @@
+import numpy as np
+import gin
+import hanabi_multiagent_framework as hmf
+from hanabi_multiagent_framework.utils import make_hanabi_env_config
+from hanabi_agents.neuroevo import NeuroEvoPopulation, NeuroEvoParams, Mutation, Crossover
+
+def main(
+        agent_config_path=None,
+        hanabi_game_type="Hanabi-Small-Oracle",
+        n_players=2,
+        max_life_tokens=None,
+        n_parallel=10000,
+        self_play=True,
+        n_train_steps=1,
+        n_sim_steps=20,
+        epochs=1_000_000,
+        eval_n_parallel=1_000,
+        eval_freq=500,
+    ):
+
+
+    env_conf = make_hanabi_env_config(hanabi_game_type, n_players)
+    if max_life_tokens is not None:
+        env_conf["max_life_tokens"] = str(max_life_tokens)
+
+    env = hmf.HanabiParallelEnvironment(env_conf, n_parallel)
+    eval_env = hmf.HanabiParallelEnvironment(env_conf, eval_n_parallel)
+
+    if agent_config_path is not None:
+        gin.parse_config_file(agent_config_path)
+    agent_params = NeuroEvoParams(
+        population_size=100,
+        chromosome_init_layers=[16, 16],
+        chromosome_n_seeds=1000,
+        crossover_attempts=1,
+        extinction_period=1,
+        n_survivors=10
+    )
+
+    def fitness_func(observations, actions, rewards):
+        return rewards
+
+    mutation = Mutation(
+        seed_mutation_proba=0.001,
+        layer_size_mutation_proba=0.1,
+        layer_number_mutation_proba=0.01
+    )
+
+    crossover = Crossover()
+
+    if self_play:
+        self_play_agent = NeuroEvoPopulation(
+            env.observation_spec_vec_batch()[0],
+            env.action_spec_vec(),
+            fitness_func,
+            mutation,
+            crossover,
+            agent_params)
+
+        agents = [self_play_agent for _ in range(n_players)]
+    else:
+        agents = [
+            NeuroEvoPopulation(
+                env.observation_spec_vec()[0],
+                env.action_spec(),
+                fitness_func,
+                mutation,
+                crossover,
+                agent_params)
+            for _ in range(n_players)
+        ]
+
+    parallel_session = hmf.HanabiParallelSession(env, agents)
+    parallel_session.reset()
+
+    parallel_eval_session = hmf.HanabiParallelSession(eval_env, agents)
+
+    print("Game config", parallel_session.parallel_env.game_config)
+
+    # eval before
+    parallel_eval_session.run_eval()
+
+    for i in range(epochs):
+        parallel_session.train(
+            n_iter=eval_freq,
+            n_sim_steps=n_sim_steps,
+            n_train_steps=n_train_steps,
+            n_warmup=0)
+        print("step", (i + 1) * eval_freq * n_train_steps)
+        # eval after
+        parallel_eval_session.run_eval()
+
+if __name__ == "__main__":
+    import argparse
+    parser = argparse.ArgumentParser(description="Train a neuroevolutionary population of agents.")
+
+    parser.add_argument(
+        "--hanabi_game_type", type=str, default="Hanabi-Small-Oracle",
+        help='Can be "Hanabi-{VerySmall,Small,Full}-{Oracle,CardKnowledge}"')
+    parser.add_argument("--n_players", type=int, default=2, help="Number of players.")
+    parser.add_argument(
+        "--max_life_tokens", type=int, default=None,
+        help="Set a different number of life tokens.")
+    parser.add_argument(
+        "--n_parallel", type=int, default=10000,
+        help="Number of games run in parallel during training.")
+    parser.add_argument(
+        "--self_play", type=bool, default=False,
+        help="Whether the agent should play with itself, or an independent agent instance should be created for each player.")
+    parser.add_argument(
+        "--n_train_steps", type=int, default=1,
+        help="Number of training steps made in each iteration. One iteration consists of n_sim_steps followed by n_train_steps.")
+    parser.add_argument(
+        "--n_sim_steps", type=int, default=2,
+        help="Number of environment steps made in each iteration.")
+    parser.add_argument(
+        "--epochs", type=int, default=1_000_000,
+        help="Total number of rotations = epochs * eval_freq.")
+    parser.add_argument(
+        "--eval_n_parallel", type=int, default=1_000,
+        help="Number of parallel games to use for evaluation.")
+    parser.add_argument(
+        "--eval_freq", type=int, default=500,
+        help="Number of iterations to perform between evaluations.")
+
+    parser.add_argument(
+        "--agent_config_path", type=str, default=None,
+        help="Path to gin config file for neuroevolutionary population.")
+
+    args = parser.parse_args()
+
+    main(**vars(args))
diff --git a/examples/rlax_agent.gin b/examples/rlax_agent.gin
@@ -0,0 +1,13 @@
+RlaxRainbowParams.train_batch_size       = 32
+RlaxRainbowParams.target_update_period   = 500
+RlaxRainbowParams.discount               = 0.99
+RlaxRainbowParams.epsilon                = 0.0
+RlaxRainbowParams.learning_rate          = 2.5e-5
+RlaxRainbowParams.layers                 = [512, 512]
+RlaxRainbowParams.use_double_q           = True
+RlaxRainbowParams.use_priority           = True
+RlaxRainbowParams.experience_buffer_size = 65536
+RlaxRainbowParams.seed                   = 42
+RlaxRainbowParams.n_atoms                = 51
+RlaxRainbowParams.atom_vmax              = 25
+RlaxRainbowParams.beta_is                = 0.2