From 597d6d9dbb6b9e0163c0e4628fc2812b6904a938 Mon Sep 17 00:00:00 2001 From: vincentpierre Date: Fri, 4 Dec 2020 10:05:52 -0800 Subject: [PATCH] removing tensorflow testing for pytest and yamato --- .github/workflows/pytest.yml | 13 ++--- ml-agents/mlagents/trainers/ppo/trainer.py | 30 +++-------- ml-agents/mlagents/trainers/sac/trainer.py | 53 +++++-------------- ml-agents/mlagents/trainers/tests/__init__.py | 2 +- .../trainers/tests/test_trainer_controller.py | 4 +- .../mlagents/trainers/trainer_controller.py | 2 +- ml-agents/setup.py | 1 - ml-agents/tests/yamato/yamato_utils.py | 3 -- test_constraints_max_tf1_version.txt | 7 --- test_constraints_max_tf2_version.txt | 6 --- test_constraints_min_version.txt | 8 --- test_requirements.txt | 4 -- 12 files changed, 30 insertions(+), 103 deletions(-) delete mode 100644 test_constraints_max_tf1_version.txt delete mode 100644 test_constraints_max_tf2_version.txt delete mode 100644 test_constraints_min_version.txt diff --git a/.github/workflows/pytest.yml b/.github/workflows/pytest.yml index 89144f4053..350c2bfc9e 100644 --- a/.github/workflows/pytest.yml +++ b/.github/workflows/pytest.yml @@ -20,11 +20,8 @@ jobs: python-version: [3.6.x, 3.7.x, 3.8.x] include: - python-version: 3.6.x - pip_constraints: test_constraints_min_version.txt - python-version: 3.7.x - pip_constraints: test_constraints_max_tf1_version.txt - python-version: 3.8.x - pip_constraints: test_constraints_max_tf2_version.txt steps: - uses: actions/checkout@v2 - name: Set up Python @@ -37,7 +34,7 @@ jobs: # This path is specific to Ubuntu path: ~/.cache/pip # Look to see if there is a cache hit for the corresponding requirements file - key: ${{ runner.os }}-pip-${{ hashFiles('ml-agents/setup.py', 'ml-agents-envs/setup.py', 'gym-unity/setup.py', 'test_requirements.txt', matrix.pip_constraints) }} + key: ${{ runner.os }}-pip-${{ hashFiles('ml-agents/setup.py', 'ml-agents-envs/setup.py', 'gym-unity/setup.py', 'test_requirements.txt') }} restore-keys: | ${{ runner.os }}-pip- ${{ runner.os }}- @@ -48,10 +45,10 @@ jobs: # pin pip to workaround https://github.com/pypa/pip/issues/9180 python -m pip install pip==20.2 python -m pip install --upgrade setuptools - python -m pip install --progress-bar=off -e ./ml-agents-envs -c ${{ matrix.pip_constraints }} - python -m pip install --progress-bar=off -e ./ml-agents -c ${{ matrix.pip_constraints }} - python -m pip install --progress-bar=off -r test_requirements.txt -c ${{ matrix.pip_constraints }} - python -m pip install --progress-bar=off -e ./gym-unity -c ${{ matrix.pip_constraints }} + python -m pip install --progress-bar=off -e ./ml-agents-envs + python -m pip install --progress-bar=off -e ./ml-agents + python -m pip install --progress-bar=off -r test_requirements.txt + python -m pip install --progress-bar=off -e ./gym-unity - name: Save python dependencies run: | pip freeze > pip_versions-${{ matrix.python-version }}.txt diff --git a/ml-agents/mlagents/trainers/ppo/trainer.py b/ml-agents/mlagents/trainers/ppo/trainer.py index f74202a213..04f5f5c1e3 100644 --- a/ml-agents/mlagents/trainers/ppo/trainer.py +++ b/ml-agents/mlagents/trainers/ppo/trainer.py @@ -16,9 +16,6 @@ from mlagents.trainers.trajectory import Trajectory from mlagents.trainers.behavior_id_utils import BehaviorIdentifiers from mlagents.trainers.settings import TrainerSettings, PPOSettings -from mlagents.trainers.torch.components.reward_providers.base_reward_provider import ( - BaseRewardProvider, -) logger = get_logger(__name__) @@ -83,31 +80,20 @@ def _process_trajectory(self, trajectory: Trajectory) -> None: for name, v in value_estimates.items(): agent_buffer_trajectory[f"{name}_value_estimates"].extend(v) - if isinstance(self.optimizer.reward_signals[name], BaseRewardProvider): - self._stats_reporter.add_stat( - f"Policy/{self.optimizer.reward_signals[name].name.capitalize()} Value Estimate", - np.mean(v), - ) - else: - self._stats_reporter.add_stat( - self.optimizer.reward_signals[name].value_name, np.mean(v) - ) + self._stats_reporter.add_stat( + f"Policy/{self.optimizer.reward_signals[name].name.capitalize()} Value Estimate", + np.mean(v), + ) # Evaluate all reward functions self.collected_rewards["environment"][agent_id] += np.sum( agent_buffer_trajectory["environment_rewards"] ) for name, reward_signal in self.optimizer.reward_signals.items(): - # BaseRewardProvider is a PyTorch-based reward signal - if isinstance(reward_signal, BaseRewardProvider): - evaluate_result = ( - reward_signal.evaluate(agent_buffer_trajectory) - * reward_signal.strength - ) - else: # reward_signal is a TensorFlow-based RewardSignal class - evaluate_result = reward_signal.evaluate_batch( - agent_buffer_trajectory - ).scaled_reward + evaluate_result = ( + reward_signal.evaluate(agent_buffer_trajectory) + * reward_signal.strength + ) agent_buffer_trajectory[f"{name}_rewards"].extend(evaluate_result) # Report the reward signals self.collected_rewards[name][agent_id] += np.sum(evaluate_result) diff --git a/ml-agents/mlagents/trainers/sac/trainer.py b/ml-agents/mlagents/trainers/sac/trainer.py index 3382c93f06..bdfd694dbd 100644 --- a/ml-agents/mlagents/trainers/sac/trainer.py +++ b/ml-agents/mlagents/trainers/sac/trainer.py @@ -19,7 +19,6 @@ from mlagents.trainers.trajectory import Trajectory, SplitObservations from mlagents.trainers.behavior_id_utils import BehaviorIdentifiers from mlagents.trainers.settings import TrainerSettings, SACSettings -from mlagents.trainers.torch.components.reward_providers import BaseRewardProvider logger = get_logger(__name__) @@ -141,16 +140,10 @@ def _process_trajectory(self, trajectory: Trajectory) -> None: agent_buffer_trajectory["environment_rewards"] ) for name, reward_signal in self.optimizer.reward_signals.items(): - # BaseRewardProvider is a PyTorch-based reward signal - if isinstance(reward_signal, BaseRewardProvider): - evaluate_result = ( - reward_signal.evaluate(agent_buffer_trajectory) - * reward_signal.strength - ) - else: # reward_signal uses TensorFlow - evaluate_result = reward_signal.evaluate_batch( - agent_buffer_trajectory - ).scaled_reward + evaluate_result = ( + reward_signal.evaluate(agent_buffer_trajectory) + * reward_signal.strength + ) # Report the reward signals self.collected_rewards[name][agent_id] += np.sum(evaluate_result) @@ -160,16 +153,10 @@ def _process_trajectory(self, trajectory: Trajectory) -> None: agent_buffer_trajectory, trajectory.next_obs, trajectory.done_reached ) for name, v in value_estimates.items(): - # BaseRewardProvider is a PyTorch-based reward signal - if isinstance(self.optimizer.reward_signals[name], BaseRewardProvider): self._stats_reporter.add_stat( f"Policy/{self.optimizer.reward_signals[name].name.capitalize()} Value", np.mean(v), ) - else: # TensorFlow reward signal - self._stats_reporter.add_stat( - self.optimizer.reward_signals[name].value_name, np.mean(v) - ) # Bootstrap using the last step rather than the bootstrap step if max step is reached. # Set last element to duplicate obs and remove dones. @@ -272,15 +259,9 @@ def _update_sac_policy(self) -> bool: ) # Get rewards for each reward for name, signal in self.optimizer.reward_signals.items(): - # BaseRewardProvider is a PyTorch-based reward signal - if isinstance(signal, BaseRewardProvider): - sampled_minibatch[f"{name}_rewards"] = ( - signal.evaluate(sampled_minibatch) * signal.strength - ) - else: # reward_signal is a TensorFlow-based RewardSignal class - sampled_minibatch[f"{name}_rewards"] = signal.evaluate_batch( - sampled_minibatch - ).scaled_reward + sampled_minibatch[f"{name}_rewards"] = ( + signal.evaluate(sampled_minibatch) * signal.strength + ) update_stats = self.optimizer.update(sampled_minibatch, n_sequences) for stat_name, value in update_stats.items(): @@ -327,20 +308,12 @@ def _update_reward_signals(self) -> None: reward_signal_minibatches = {} for name, signal in self.optimizer.reward_signals.items(): logger.debug(f"Updating {name} at step {self.step}") - # BaseRewardProvider is a PyTorch-based reward signal - if not isinstance(signal, BaseRewardProvider): - # Some signals don't need a minibatch to be sampled - so we don't! - if signal.update_dict: - reward_signal_minibatches[name] = buffer.sample_mini_batch( - self.hyperparameters.batch_size, - sequence_length=self.policy.sequence_length, - ) - else: # TensorFlow reward signal - if name != "extrinsic": - reward_signal_minibatches[name] = buffer.sample_mini_batch( - self.hyperparameters.batch_size, - sequence_length=self.policy.sequence_length, - ) + # Some signals don't need a minibatch to be sampled - so we don't! + if signal.update_dict: + reward_signal_minibatches[name] = buffer.sample_mini_batch( + self.hyperparameters.batch_size, + sequence_length=self.policy.sequence_length, + ) update_stats = self.optimizer.update_reward_signals( reward_signal_minibatches, n_sequences ) diff --git a/ml-agents/mlagents/trainers/tests/__init__.py b/ml-agents/mlagents/trainers/tests/__init__.py index 85482cb137..19fd7ccfa7 100644 --- a/ml-agents/mlagents/trainers/tests/__init__.py +++ b/ml-agents/mlagents/trainers/tests/__init__.py @@ -19,7 +19,7 @@ def _check_no_float64(arr, kwargs_dtype): # tb[-2] is the wrapper function, e.g. np_array_no_float64 # we want the calling function, so use tb[-3] filename = tb[-3].filename - # Only raise if this came from mlagents code, not tensorflow + # Only raise if this came from mlagents code if ( "ml-agents/mlagents" in filename or "ml-agents-envs/mlagents" in filename diff --git a/ml-agents/mlagents/trainers/tests/test_trainer_controller.py b/ml-agents/mlagents/trainers/tests/test_trainer_controller.py index c80e878d52..578b852f59 100644 --- a/ml-agents/mlagents/trainers/tests/test_trainer_controller.py +++ b/ml-agents/mlagents/trainers/tests/test_trainer_controller.py @@ -23,7 +23,7 @@ def basic_trainer_controller(): @patch("numpy.random.seed") @patch.object(torch, "manual_seed") -def test_initialization_seed(numpy_random_seed, tensorflow_set_seed): +def test_initialization_seed(numpy_random_seed, torch_set_seed): seed = 27 trainer_factory_mock = MagicMock() trainer_factory_mock.ghost_controller = GhostController() @@ -36,7 +36,7 @@ def test_initialization_seed(numpy_random_seed, tensorflow_set_seed): training_seed=seed, ) numpy_random_seed.assert_called_with(seed) - tensorflow_set_seed.assert_called_with(seed) + torch_set_seed.assert_called_with(seed) @pytest.fixture diff --git a/ml-agents/mlagents/trainers/trainer_controller.py b/ml-agents/mlagents/trainers/trainer_controller.py index 5841bb51d7..7f9808f5dd 100644 --- a/ml-agents/mlagents/trainers/trainer_controller.py +++ b/ml-agents/mlagents/trainers/trainer_controller.py @@ -48,7 +48,7 @@ def __init__( :param param_manager: EnvironmentParameterManager object which stores information about all environment parameters. :param train: Whether to train model, or only run inference. - :param training_seed: Seed to use for Numpy and Tensorflow random number generation. + :param training_seed: Seed to use for Numpy and Torch random number generation. :param threaded: Whether or not to run trainers in a separate thread. Disable for testing/debugging. """ self.trainers: Dict[str, Trainer] = {} diff --git a/ml-agents/setup.py b/ml-agents/setup.py index e9338ca45e..55fcc94467 100644 --- a/ml-agents/setup.py +++ b/ml-agents/setup.py @@ -80,5 +80,4 @@ def run(self): ] }, cmdclass={"verify": VerifyVersionCommand}, - extras_require={"tensorflow": ["tensorflow>=1.14,<3.0", "six>=1.12.0"]}, ) diff --git a/ml-agents/tests/yamato/yamato_utils.py b/ml-agents/tests/yamato/yamato_utils.py index ad61223eb4..a37c3a84e5 100644 --- a/ml-agents/tests/yamato/yamato_utils.py +++ b/ml-agents/tests/yamato/yamato_utils.py @@ -115,9 +115,6 @@ def init_venv( pip_commands = [ "--upgrade pip", "--upgrade setuptools", - # TODO build these and publish to internal pypi - "~/tensorflow_pkg/tensorflow-2.0.0-cp37-cp37m-macosx_10_14_x86_64.whl", - "tf2onnx==1.6.1", ] if mlagents_python_version: # install from pypi diff --git a/test_constraints_max_tf1_version.txt b/test_constraints_max_tf1_version.txt deleted file mode 100644 index d14c5fe4cc..0000000000 --- a/test_constraints_max_tf1_version.txt +++ /dev/null @@ -1,7 +0,0 @@ -# pip constraints to use the *highest* versions allowed in ml-agents/setup.py -# with the exception of tensorflow, which is constrained to <2 -# For projects with upper bounds, we should periodically update this list to the latest release version -grpcio>=1.23.0 -numpy>=1.17.2 -tensorflow>=1.15.2,<2.0.0 -h5py>=2.10.0 diff --git a/test_constraints_max_tf2_version.txt b/test_constraints_max_tf2_version.txt deleted file mode 100644 index 74dca2a3c7..0000000000 --- a/test_constraints_max_tf2_version.txt +++ /dev/null @@ -1,6 +0,0 @@ -# pip constraints to use the *highest* versions allowed in ml-agents/setup.py -# For projects with upper bounds, we should periodically update this list to the latest release version -grpcio>=1.23.0 -numpy>=1.17.2 -tensorflow==2.3.0 -h5py>=2.10.0 diff --git a/test_constraints_min_version.txt b/test_constraints_min_version.txt deleted file mode 100644 index a6dac3fcf9..0000000000 --- a/test_constraints_min_version.txt +++ /dev/null @@ -1,8 +0,0 @@ -# pip constraints to use the *lowest* versions allowed in ml-agents/setup.py -grpcio==1.11.0 -numpy==1.14.1 -Pillow==4.2.1 -protobuf==3.6 -tensorflow==1.14.0 -h5py==2.9.0 -tensorboard==1.15.0 diff --git a/test_requirements.txt b/test_requirements.txt index b08f286fa7..1a3d424ec2 100644 --- a/test_requirements.txt +++ b/test_requirements.txt @@ -3,7 +3,3 @@ pytest>4.0.0,<6.0.0 pytest-cov==2.6.1 pytest-xdist==1.34.0 -# Tensorflow tests are here for the time being, before they are used in the codebase. -tensorflow>=1.14,<3.0 - -tf2onnx>=1.5.5