From 597d6d9dbb6b9e0163c0e4628fc2812b6904a938 Mon Sep 17 00:00:00 2001
From: vincentpierre <vincentpierre@unity3d.com>
Date: Fri, 4 Dec 2020 10:05:52 -0800
Subject: [PATCH] removing tensorflow testing for pytest and yamato

---
 .github/workflows/pytest.yml                  | 13 ++---
 ml-agents/mlagents/trainers/ppo/trainer.py    | 30 +++--------
 ml-agents/mlagents/trainers/sac/trainer.py    | 53 +++++--------------
 ml-agents/mlagents/trainers/tests/__init__.py |  2 +-
 .../trainers/tests/test_trainer_controller.py |  4 +-
 .../mlagents/trainers/trainer_controller.py   |  2 +-
 ml-agents/setup.py                            |  1 -
 ml-agents/tests/yamato/yamato_utils.py        |  3 --
 test_constraints_max_tf1_version.txt          |  7 ---
 test_constraints_max_tf2_version.txt          |  6 ---
 test_constraints_min_version.txt              |  8 ---
 test_requirements.txt                         |  4 --
 12 files changed, 30 insertions(+), 103 deletions(-)
 delete mode 100644 test_constraints_max_tf1_version.txt
 delete mode 100644 test_constraints_max_tf2_version.txt
 delete mode 100644 test_constraints_min_version.txt

diff --git a/.github/workflows/pytest.yml b/.github/workflows/pytest.yml
index 89144f4053..350c2bfc9e 100644
--- a/.github/workflows/pytest.yml
+++ b/.github/workflows/pytest.yml
@@ -20,11 +20,8 @@ jobs:
         python-version: [3.6.x, 3.7.x, 3.8.x]
         include:
           - python-version: 3.6.x
-            pip_constraints: test_constraints_min_version.txt
           - python-version: 3.7.x
-            pip_constraints: test_constraints_max_tf1_version.txt
           - python-version: 3.8.x
-            pip_constraints: test_constraints_max_tf2_version.txt
     steps:
     - uses: actions/checkout@v2
     - name: Set up Python
@@ -37,7 +34,7 @@ jobs:
         # This path is specific to Ubuntu
         path: ~/.cache/pip
         # Look to see if there is a cache hit for the corresponding requirements file
-        key: ${{ runner.os }}-pip-${{ hashFiles('ml-agents/setup.py', 'ml-agents-envs/setup.py', 'gym-unity/setup.py', 'test_requirements.txt', matrix.pip_constraints) }}
+        key: ${{ runner.os }}-pip-${{ hashFiles('ml-agents/setup.py', 'ml-agents-envs/setup.py', 'gym-unity/setup.py', 'test_requirements.txt') }}
         restore-keys: |
           ${{ runner.os }}-pip-
           ${{ runner.os }}-
@@ -48,10 +45,10 @@ jobs:
         # pin pip to workaround https://github.com/pypa/pip/issues/9180
         python -m pip install pip==20.2
         python -m pip install --upgrade setuptools
-        python -m pip install --progress-bar=off -e ./ml-agents-envs -c ${{ matrix.pip_constraints }}
-        python -m pip install --progress-bar=off -e ./ml-agents -c ${{ matrix.pip_constraints }}
-        python -m pip install --progress-bar=off -r test_requirements.txt -c ${{ matrix.pip_constraints }}
-        python -m pip install --progress-bar=off -e ./gym-unity -c ${{ matrix.pip_constraints }}
+        python -m pip install --progress-bar=off -e ./ml-agents-envs
+        python -m pip install --progress-bar=off -e ./ml-agents
+        python -m pip install --progress-bar=off -r test_requirements.txt
+        python -m pip install --progress-bar=off -e ./gym-unity
     - name: Save python dependencies
       run: |
         pip freeze > pip_versions-${{ matrix.python-version }}.txt
diff --git a/ml-agents/mlagents/trainers/ppo/trainer.py b/ml-agents/mlagents/trainers/ppo/trainer.py
index f74202a213..04f5f5c1e3 100644
--- a/ml-agents/mlagents/trainers/ppo/trainer.py
+++ b/ml-agents/mlagents/trainers/ppo/trainer.py
@@ -16,9 +16,6 @@
 from mlagents.trainers.trajectory import Trajectory
 from mlagents.trainers.behavior_id_utils import BehaviorIdentifiers
 from mlagents.trainers.settings import TrainerSettings, PPOSettings
-from mlagents.trainers.torch.components.reward_providers.base_reward_provider import (
-    BaseRewardProvider,
-)
 
 logger = get_logger(__name__)
 
@@ -83,31 +80,20 @@ def _process_trajectory(self, trajectory: Trajectory) -> None:
 
         for name, v in value_estimates.items():
             agent_buffer_trajectory[f"{name}_value_estimates"].extend(v)
-            if isinstance(self.optimizer.reward_signals[name], BaseRewardProvider):
-                self._stats_reporter.add_stat(
-                    f"Policy/{self.optimizer.reward_signals[name].name.capitalize()} Value Estimate",
-                    np.mean(v),
-                )
-            else:
-                self._stats_reporter.add_stat(
-                    self.optimizer.reward_signals[name].value_name, np.mean(v)
-                )
+            self._stats_reporter.add_stat(
+                f"Policy/{self.optimizer.reward_signals[name].name.capitalize()} Value Estimate",
+                np.mean(v),
+            )
 
         # Evaluate all reward functions
         self.collected_rewards["environment"][agent_id] += np.sum(
             agent_buffer_trajectory["environment_rewards"]
         )
         for name, reward_signal in self.optimizer.reward_signals.items():
-            # BaseRewardProvider is a PyTorch-based reward signal
-            if isinstance(reward_signal, BaseRewardProvider):
-                evaluate_result = (
-                    reward_signal.evaluate(agent_buffer_trajectory)
-                    * reward_signal.strength
-                )
-            else:  # reward_signal is a TensorFlow-based RewardSignal class
-                evaluate_result = reward_signal.evaluate_batch(
-                    agent_buffer_trajectory
-                ).scaled_reward
+            evaluate_result = (
+                reward_signal.evaluate(agent_buffer_trajectory)
+                * reward_signal.strength
+            )
             agent_buffer_trajectory[f"{name}_rewards"].extend(evaluate_result)
             # Report the reward signals
             self.collected_rewards[name][agent_id] += np.sum(evaluate_result)
diff --git a/ml-agents/mlagents/trainers/sac/trainer.py b/ml-agents/mlagents/trainers/sac/trainer.py
index 3382c93f06..bdfd694dbd 100644
--- a/ml-agents/mlagents/trainers/sac/trainer.py
+++ b/ml-agents/mlagents/trainers/sac/trainer.py
@@ -19,7 +19,6 @@
 from mlagents.trainers.trajectory import Trajectory, SplitObservations
 from mlagents.trainers.behavior_id_utils import BehaviorIdentifiers
 from mlagents.trainers.settings import TrainerSettings, SACSettings
-from mlagents.trainers.torch.components.reward_providers import BaseRewardProvider
 
 logger = get_logger(__name__)
 
@@ -141,16 +140,10 @@ def _process_trajectory(self, trajectory: Trajectory) -> None:
             agent_buffer_trajectory["environment_rewards"]
         )
         for name, reward_signal in self.optimizer.reward_signals.items():
-            # BaseRewardProvider is a PyTorch-based reward signal
-            if isinstance(reward_signal, BaseRewardProvider):
-                evaluate_result = (
-                    reward_signal.evaluate(agent_buffer_trajectory)
-                    * reward_signal.strength
-                )
-            else:  # reward_signal uses TensorFlow
-                evaluate_result = reward_signal.evaluate_batch(
-                    agent_buffer_trajectory
-                ).scaled_reward
+            evaluate_result = (
+                reward_signal.evaluate(agent_buffer_trajectory)
+                * reward_signal.strength
+            )
 
             # Report the reward signals
             self.collected_rewards[name][agent_id] += np.sum(evaluate_result)
@@ -160,16 +153,10 @@ def _process_trajectory(self, trajectory: Trajectory) -> None:
             agent_buffer_trajectory, trajectory.next_obs, trajectory.done_reached
         )
         for name, v in value_estimates.items():
-            # BaseRewardProvider is a PyTorch-based reward signal
-            if isinstance(self.optimizer.reward_signals[name], BaseRewardProvider):
                 self._stats_reporter.add_stat(
                     f"Policy/{self.optimizer.reward_signals[name].name.capitalize()} Value",
                     np.mean(v),
                 )
-            else:  # TensorFlow reward signal
-                self._stats_reporter.add_stat(
-                    self.optimizer.reward_signals[name].value_name, np.mean(v)
-                )
 
         # Bootstrap using the last step rather than the bootstrap step if max step is reached.
         # Set last element to duplicate obs and remove dones.
@@ -272,15 +259,9 @@ def _update_sac_policy(self) -> bool:
                 )
                 # Get rewards for each reward
                 for name, signal in self.optimizer.reward_signals.items():
-                    # BaseRewardProvider is a PyTorch-based reward signal
-                    if isinstance(signal, BaseRewardProvider):
-                        sampled_minibatch[f"{name}_rewards"] = (
-                            signal.evaluate(sampled_minibatch) * signal.strength
-                        )
-                    else:  # reward_signal is a TensorFlow-based RewardSignal class
-                        sampled_minibatch[f"{name}_rewards"] = signal.evaluate_batch(
-                            sampled_minibatch
-                        ).scaled_reward
+                    sampled_minibatch[f"{name}_rewards"] = (
+                        signal.evaluate(sampled_minibatch) * signal.strength
+                    )
 
                 update_stats = self.optimizer.update(sampled_minibatch, n_sequences)
                 for stat_name, value in update_stats.items():
@@ -327,20 +308,12 @@ def _update_reward_signals(self) -> None:
             reward_signal_minibatches = {}
             for name, signal in self.optimizer.reward_signals.items():
                 logger.debug(f"Updating {name} at step {self.step}")
-                # BaseRewardProvider is a PyTorch-based reward signal
-                if not isinstance(signal, BaseRewardProvider):
-                    # Some signals don't need a minibatch to be sampled - so we don't!
-                    if signal.update_dict:
-                        reward_signal_minibatches[name] = buffer.sample_mini_batch(
-                            self.hyperparameters.batch_size,
-                            sequence_length=self.policy.sequence_length,
-                        )
-                else:  # TensorFlow reward signal
-                    if name != "extrinsic":
-                        reward_signal_minibatches[name] = buffer.sample_mini_batch(
-                            self.hyperparameters.batch_size,
-                            sequence_length=self.policy.sequence_length,
-                        )
+                # Some signals don't need a minibatch to be sampled - so we don't!
+                if signal.update_dict:
+                    reward_signal_minibatches[name] = buffer.sample_mini_batch(
+                        self.hyperparameters.batch_size,
+                        sequence_length=self.policy.sequence_length,
+                    )
             update_stats = self.optimizer.update_reward_signals(
                 reward_signal_minibatches, n_sequences
             )
diff --git a/ml-agents/mlagents/trainers/tests/__init__.py b/ml-agents/mlagents/trainers/tests/__init__.py
index 85482cb137..19fd7ccfa7 100644
--- a/ml-agents/mlagents/trainers/tests/__init__.py
+++ b/ml-agents/mlagents/trainers/tests/__init__.py
@@ -19,7 +19,7 @@ def _check_no_float64(arr, kwargs_dtype):
             # tb[-2] is the wrapper function, e.g. np_array_no_float64
             # we want the calling function, so use tb[-3]
             filename = tb[-3].filename
-            # Only raise if this came from mlagents code, not tensorflow
+            # Only raise if this came from mlagents code
             if (
                 "ml-agents/mlagents" in filename
                 or "ml-agents-envs/mlagents" in filename
diff --git a/ml-agents/mlagents/trainers/tests/test_trainer_controller.py b/ml-agents/mlagents/trainers/tests/test_trainer_controller.py
index c80e878d52..578b852f59 100644
--- a/ml-agents/mlagents/trainers/tests/test_trainer_controller.py
+++ b/ml-agents/mlagents/trainers/tests/test_trainer_controller.py
@@ -23,7 +23,7 @@ def basic_trainer_controller():
 
 @patch("numpy.random.seed")
 @patch.object(torch, "manual_seed")
-def test_initialization_seed(numpy_random_seed, tensorflow_set_seed):
+def test_initialization_seed(numpy_random_seed, torch_set_seed):
     seed = 27
     trainer_factory_mock = MagicMock()
     trainer_factory_mock.ghost_controller = GhostController()
@@ -36,7 +36,7 @@ def test_initialization_seed(numpy_random_seed, tensorflow_set_seed):
         training_seed=seed,
     )
     numpy_random_seed.assert_called_with(seed)
-    tensorflow_set_seed.assert_called_with(seed)
+    torch_set_seed.assert_called_with(seed)
 
 
 @pytest.fixture
diff --git a/ml-agents/mlagents/trainers/trainer_controller.py b/ml-agents/mlagents/trainers/trainer_controller.py
index 5841bb51d7..7f9808f5dd 100644
--- a/ml-agents/mlagents/trainers/trainer_controller.py
+++ b/ml-agents/mlagents/trainers/trainer_controller.py
@@ -48,7 +48,7 @@ def __init__(
         :param param_manager: EnvironmentParameterManager object which stores information about all
         environment parameters.
         :param train: Whether to train model, or only run inference.
-        :param training_seed: Seed to use for Numpy and Tensorflow random number generation.
+        :param training_seed: Seed to use for Numpy and Torch random number generation.
         :param threaded: Whether or not to run trainers in a separate thread. Disable for testing/debugging.
         """
         self.trainers: Dict[str, Trainer] = {}
diff --git a/ml-agents/setup.py b/ml-agents/setup.py
index e9338ca45e..55fcc94467 100644
--- a/ml-agents/setup.py
+++ b/ml-agents/setup.py
@@ -80,5 +80,4 @@ def run(self):
         ]
     },
     cmdclass={"verify": VerifyVersionCommand},
-    extras_require={"tensorflow": ["tensorflow>=1.14,<3.0", "six>=1.12.0"]},
 )
diff --git a/ml-agents/tests/yamato/yamato_utils.py b/ml-agents/tests/yamato/yamato_utils.py
index ad61223eb4..a37c3a84e5 100644
--- a/ml-agents/tests/yamato/yamato_utils.py
+++ b/ml-agents/tests/yamato/yamato_utils.py
@@ -115,9 +115,6 @@ def init_venv(
     pip_commands = [
         "--upgrade pip",
         "--upgrade setuptools",
-        # TODO build these and publish to internal pypi
-        "~/tensorflow_pkg/tensorflow-2.0.0-cp37-cp37m-macosx_10_14_x86_64.whl",
-        "tf2onnx==1.6.1",
     ]
     if mlagents_python_version:
         # install from pypi
diff --git a/test_constraints_max_tf1_version.txt b/test_constraints_max_tf1_version.txt
deleted file mode 100644
index d14c5fe4cc..0000000000
--- a/test_constraints_max_tf1_version.txt
+++ /dev/null
@@ -1,7 +0,0 @@
-# pip constraints to use the *highest* versions allowed in ml-agents/setup.py
-# with the exception of tensorflow, which is constrained to <2
-# For projects with upper bounds, we should periodically update this list to the latest release version
-grpcio>=1.23.0
-numpy>=1.17.2
-tensorflow>=1.15.2,<2.0.0
-h5py>=2.10.0
diff --git a/test_constraints_max_tf2_version.txt b/test_constraints_max_tf2_version.txt
deleted file mode 100644
index 74dca2a3c7..0000000000
--- a/test_constraints_max_tf2_version.txt
+++ /dev/null
@@ -1,6 +0,0 @@
-# pip constraints to use the *highest* versions allowed in ml-agents/setup.py
-# For projects with upper bounds, we should periodically update this list to the latest release version
-grpcio>=1.23.0
-numpy>=1.17.2
-tensorflow==2.3.0
-h5py>=2.10.0
diff --git a/test_constraints_min_version.txt b/test_constraints_min_version.txt
deleted file mode 100644
index a6dac3fcf9..0000000000
--- a/test_constraints_min_version.txt
+++ /dev/null
@@ -1,8 +0,0 @@
-# pip constraints to use the *lowest* versions allowed in ml-agents/setup.py
-grpcio==1.11.0
-numpy==1.14.1
-Pillow==4.2.1
-protobuf==3.6
-tensorflow==1.14.0
-h5py==2.9.0
-tensorboard==1.15.0
diff --git a/test_requirements.txt b/test_requirements.txt
index b08f286fa7..1a3d424ec2 100644
--- a/test_requirements.txt
+++ b/test_requirements.txt
@@ -3,7 +3,3 @@ pytest>4.0.0,<6.0.0
 pytest-cov==2.6.1
 pytest-xdist==1.34.0
 
-# Tensorflow tests are here for the time being, before they are used in the codebase.
-tensorflow>=1.14,<3.0
-
-tf2onnx>=1.5.5