Run pytest on GPU (Unity-Technologies#4865)

* make tests device-friendly * mark all tests in test_simple_rl
hubbardp · Jan 22, 2021 · 2f71f72 · 2f71f72
1 parent fd0e092
commit 2f71f72
Show file tree

Hide file tree

Showing 10 changed files with 59 additions and 16 deletions.
diff --git a/.yamato/pytest-gpu.yml b/.yamato/pytest-gpu.yml
@@ -0,0 +1,24 @@
+pytest_gpu:
+  name: Pytest GPU
+  agent:
+    type: Unity::VM::GPU
+    image: package-ci/ubuntu:stable
+    flavor: b1.large
+  commands:
+    - |
+      sudo apt-get update && sudo apt-get install -y python3-venv
+      python3 -m venv venv && source venv/bin/activate
+      python3 -m pip install pyyaml --index-url https://artifactory.prd.it.unity3d.com/artifactory/api/pypi/pypi/simple
+      python3 -u -m ml-agents.tests.yamato.setup_venv
+      python3 -m pip install --progress-bar=off -r test_requirements.txt --index-url https://artifactory.prd.it.unity3d.com/artifactory/api/pypi/pypi/simple
+      python3 -m pip install torch==1.7.1+cu101 torchvision==0.8.2+cu101 torchaudio==0.7.2 -f https://download.pytorch.org/whl/torch_stable.html --index-url https://artifactory.prd.it.unity3d.com/artifactory/api/pypi/pypi/simple
+      python3 -m pytest -m "not check_environment_trains" --junitxml=junit/test-results.xml -p no:warnings
+  triggers:
+    cancel_old_ci: true
+    recurring:
+      - branch: master
+        frequency: daily
+  artifacts:
+    logs:
+      paths:
+        - "artifacts/standalone_build.txt"
diff --git a/ml-agents/mlagents/trainers/tests/torch/saver/test_saver.py b/ml-agents/mlagents/trainers/tests/torch/saver/test_saver.py
@@ -3,7 +3,7 @@
 import os
 
 import numpy as np
-from mlagents.torch_utils import torch
+from mlagents.torch_utils import torch, default_device
 from mlagents.trainers.policy.torch_policy import TorchPolicy
 from mlagents.trainers.ppo.optimizer_torch import TorchPPOOptimizer
 from mlagents.trainers.model_saver.torch_model_saver import TorchModelSaver
@@ -69,6 +69,9 @@ def _compare_two_policies(policy1: TorchPolicy, policy2: TorchPolicy) -> None:
     """
     Make sure two policies have the same output for the same input.
     """
+    policy1.actor_critic = policy1.actor_critic.to(default_device())
+    policy2.actor_critic = policy2.actor_critic.to(default_device())
+
     decision_step, _ = mb.create_steps_from_behavior_spec(
         policy1.behavior_spec, num_agents=1
     )
@@ -87,7 +90,8 @@ def _compare_two_policies(policy1: TorchPolicy, policy2: TorchPolicy) -> None:
             tensor_obs, masks=masks, memories=memories
         )
     np.testing.assert_array_equal(
-        log_probs1.all_discrete_tensor, log_probs2.all_discrete_tensor
+        ModelUtils.to_numpy(log_probs1.all_discrete_tensor),
+        ModelUtils.to_numpy(log_probs2.all_discrete_tensor),
     )
 
 

diff --git a/ml-agents/mlagents/trainers/tests/torch/test_action_model.py b/ml-agents/mlagents/trainers/tests/torch/test_action_model.py
@@ -71,11 +71,11 @@ def test_get_probs_and_entropy():
     for _disc in log_probs.all_discrete_list:
         assert _disc.shape == (1, 2)
 
-    for clp in log_probs.continuous_tensor[0]:
+    for clp in log_probs.continuous_tensor[0].tolist():
         # Log prob of standard normal at 0
         assert clp == pytest.approx(-0.919, abs=0.01)
 
     assert log_probs.discrete_list[0] > log_probs.discrete_list[1]
 
-    for ent, val in zip(entropies[0], [1.4189, 0.6191, 0.6191]):
+    for ent, val in zip(entropies[0].tolist(), [1.4189, 0.6191, 0.6191]):
         assert ent == pytest.approx(val, abs=0.01)
diff --git a/ml-agents/mlagents/trainers/tests/torch/test_distributions.py b/ml-agents/mlagents/trainers/tests/torch/test_distributions.py
@@ -39,7 +39,7 @@ def test_gaussian_distribution(conditional_sigma, tanh_squash):
         optimizer.zero_grad()
         loss.backward()
         optimizer.step()
-    for prob in log_prob.flatten():
+    for prob in log_prob.flatten().tolist():
         assert prob == pytest.approx(-2, abs=0.1)
 
 
@@ -89,7 +89,7 @@ def create_test_prob(size: int) -> torch.Tensor:
     dist_insts = gauss_dist(sample_embedding, masks=masks)
     for dist_inst in dist_insts:
         log_prob = dist_inst.all_log_prob()
-        assert log_prob.flatten()[-1] == pytest.approx(0, abs=0.001)
+        assert log_prob.flatten()[-1].tolist() == pytest.approx(0, abs=0.001)
 
 
 def test_gaussian_dist_instance():
@@ -100,11 +100,13 @@ def test_gaussian_dist_instance():
     )
     action = dist_instance.sample()
     assert action.shape == (1, act_size)
-    for log_prob in dist_instance.log_prob(torch.zeros((1, act_size))).flatten():
+    for log_prob in (
+        dist_instance.log_prob(torch.zeros((1, act_size))).flatten().tolist()
+    ):
         # Log prob of standard normal at 0
         assert log_prob == pytest.approx(-0.919, abs=0.01)
 
-    for ent in dist_instance.entropy().flatten():
+    for ent in dist_instance.entropy().flatten().tolist():
         # entropy of standard normal at 0, based on 1/2 + ln(sqrt(2pi)sigma)
         assert ent == pytest.approx(1.42, abs=0.01)
 

diff --git a/ml-agents/mlagents/trainers/tests/torch/test_encoders.py b/ml-agents/mlagents/trainers/tests/torch/test_encoders.py
@@ -36,15 +36,15 @@ def test_normalizer():
     norm.update(vec_input3)
 
     # Test normalization
-    for val in norm(vec_input1)[0]:
+    for val in norm(vec_input1)[0].tolist():
         assert val == pytest.approx(0.707, abs=0.001)
 
     # Test copy normalization
     norm2 = Normalizer(input_size)
     assert not compare_models(norm, norm2)
     norm2.copy_from(norm)
     assert compare_models(norm, norm2)
-    for val in norm2(vec_input1)[0]:
+    for val in norm2(vec_input1)[0].tolist():
         assert val == pytest.approx(0.707, abs=0.001)
 
 

diff --git a/ml-agents/mlagents/trainers/tests/torch/test_hybrid.py b/ml-agents/mlagents/trainers/tests/torch/test_hybrid.py
@@ -18,6 +18,7 @@
 SAC_TORCH_CONFIG = sac_dummy_config()
 
 
+@pytest.mark.check_environment_trains
 @pytest.mark.parametrize("action_size", [(1, 1), (2, 2), (1, 2), (2, 1)])
 def test_hybrid_ppo(action_size):
     env = SimpleEnvironment([BRAIN_NAME], action_sizes=action_size, step_size=0.8)
@@ -37,6 +38,7 @@ def test_hybrid_ppo(action_size):
     check_environment_trains(env, {BRAIN_NAME: config}, success_threshold=0.9)
 
 
+@pytest.mark.check_environment_trains
 @pytest.mark.parametrize("num_visual", [1, 2])
 def test_hybrid_visual_ppo(num_visual):
     env = SimpleEnvironment(
@@ -49,6 +51,7 @@ def test_hybrid_visual_ppo(num_visual):
     check_environment_trains(env, {BRAIN_NAME: config}, training_seed=1336)
 
 
+@pytest.mark.check_environment_trains
 def test_hybrid_recurrent_ppo():
     env = MemoryEnvironment([BRAIN_NAME], action_sizes=(1, 1), step_size=0.5)
     new_network_settings = attr.evolve(
@@ -70,6 +73,7 @@ def test_hybrid_recurrent_ppo():
     check_environment_trains(env, {BRAIN_NAME: config}, success_threshold=0.9)
 
 
+@pytest.mark.check_environment_trains
 @pytest.mark.parametrize("action_size", [(1, 1), (2, 2), (1, 2), (2, 1)])
 def test_hybrid_sac(action_size):
     env = SimpleEnvironment([BRAIN_NAME], action_sizes=action_size, step_size=0.8)
@@ -88,6 +92,7 @@ def test_hybrid_sac(action_size):
     )
 
 
+@pytest.mark.check_environment_trains
 @pytest.mark.parametrize("num_visual", [1, 2])
 def test_hybrid_visual_sac(num_visual):
     env = SimpleEnvironment(
@@ -105,6 +110,7 @@ def test_hybrid_visual_sac(num_visual):
     check_environment_trains(env, {BRAIN_NAME: config})
 
 
+@pytest.mark.check_environment_trains
 def test_hybrid_recurrent_sac():
     env = MemoryEnvironment([BRAIN_NAME], action_sizes=(1, 1), step_size=0.5)
     new_networksettings = attr.evolve(

diff --git a/ml-agents/mlagents/trainers/tests/torch/test_networks.py b/ml-agents/mlagents/trainers/tests/torch/test_networks.py
@@ -36,7 +36,7 @@ def test_networkbody_vector():
         loss.backward()
         optimizer.step()
     # In the last step, values should be close to 1
-    for _enc in encoded.flatten():
+    for _enc in encoded.flatten().tolist():
         assert _enc == pytest.approx(1.0, abs=0.1)
 
 
@@ -63,7 +63,7 @@ def test_networkbody_lstm():
         loss.backward()
         optimizer.step()
     # In the last step, values should be close to 1
-    for _enc in encoded.flatten():
+    for _enc in encoded.flatten().tolist():
         assert _enc == pytest.approx(1.0, abs=0.1)
 
 
@@ -91,7 +91,7 @@ def test_networkbody_visual():
         loss.backward()
         optimizer.step()
     # In the last step, values should be close to 1
-    for _enc in encoded.flatten():
+    for _enc in encoded.flatten().tolist():
         assert _enc == pytest.approx(1.0, abs=0.1)
 
 
@@ -124,7 +124,7 @@ def test_valuenetwork():
         optimizer.step()
     # In the last step, values should be close to 1
     for value in values.values():
-        for _out in value:
+        for _out in value.tolist():
             assert _out[0] == pytest.approx(1.0, abs=0.1)
 
 

diff --git a/ml-agents/mlagents/trainers/tests/torch/test_simple_rl.py b/ml-agents/mlagents/trainers/tests/torch/test_simple_rl.py
@@ -38,6 +38,9 @@
 PPO_TORCH_CONFIG = ppo_dummy_config()
 SAC_TORCH_CONFIG = sac_dummy_config()
 
+# tests in this file won't be tested on GPU machine
+pytestmark = pytest.mark.check_environment_trains
+
 
 @pytest.mark.parametrize("action_sizes", [(0, 1), (1, 0)])
 def test_simple_ppo(action_sizes):

diff --git a/ml-agents/mlagents/trainers/torch/encoders.py b/ml-agents/mlagents/trainers/torch/encoders.py
@@ -143,7 +143,7 @@ def forward(self, visual_obs: torch.Tensor) -> torch.Tensor:
         if not exporting_to_onnx.is_exporting():
             visual_obs = visual_obs.permute([0, 3, 1, 2])
         hidden = self.conv_layers(visual_obs)
-        hidden = torch.reshape(hidden, (-1, self.final_flat))
+        hidden = hidden.reshape(-1, self.final_flat)
         return self.dense(hidden)
 
 
@@ -177,7 +177,7 @@ def forward(self, visual_obs: torch.Tensor) -> torch.Tensor:
         if not exporting_to_onnx.is_exporting():
             visual_obs = visual_obs.permute([0, 3, 1, 2])
         hidden = self.conv_layers(visual_obs)
-        hidden = torch.reshape(hidden, (-1, self.final_flat))
+        hidden = hidden.reshape(-1, self.final_flat)
         return self.dense(hidden)
 
 

diff --git a/pytest.ini b/pytest.ini
@@ -0,0 +1,4 @@
+[pytest]
+addopts = --strict-markers
+markers =
+    check_environment_trains: Slow training tests, do not run on yamato