Skip to content

Commit

Permalink
removing tensorflow testing for pytest and yamato
Browse files Browse the repository at this point in the history
  • Loading branch information
vincentpierre committed Dec 4, 2020
1 parent 54a2c8c commit 597d6d9
Show file tree
Hide file tree
Showing 12 changed files with 30 additions and 103 deletions.
13 changes: 5 additions & 8 deletions .github/workflows/pytest.yml
Original file line number Diff line number Diff line change
Expand Up @@ -20,11 +20,8 @@ jobs:
python-version: [3.6.x, 3.7.x, 3.8.x]
include:
- python-version: 3.6.x
pip_constraints: test_constraints_min_version.txt
- python-version: 3.7.x
pip_constraints: test_constraints_max_tf1_version.txt
- python-version: 3.8.x
pip_constraints: test_constraints_max_tf2_version.txt
steps:
- uses: actions/checkout@v2
- name: Set up Python
Expand All @@ -37,7 +34,7 @@ jobs:
# This path is specific to Ubuntu
path: ~/.cache/pip
# Look to see if there is a cache hit for the corresponding requirements file
key: ${{ runner.os }}-pip-${{ hashFiles('ml-agents/setup.py', 'ml-agents-envs/setup.py', 'gym-unity/setup.py', 'test_requirements.txt', matrix.pip_constraints) }}
key: ${{ runner.os }}-pip-${{ hashFiles('ml-agents/setup.py', 'ml-agents-envs/setup.py', 'gym-unity/setup.py', 'test_requirements.txt') }}
restore-keys: |
${{ runner.os }}-pip-
${{ runner.os }}-
Expand All @@ -48,10 +45,10 @@ jobs:
# pin pip to workaround https://github.com/pypa/pip/issues/9180
python -m pip install pip==20.2
python -m pip install --upgrade setuptools
python -m pip install --progress-bar=off -e ./ml-agents-envs -c ${{ matrix.pip_constraints }}
python -m pip install --progress-bar=off -e ./ml-agents -c ${{ matrix.pip_constraints }}
python -m pip install --progress-bar=off -r test_requirements.txt -c ${{ matrix.pip_constraints }}
python -m pip install --progress-bar=off -e ./gym-unity -c ${{ matrix.pip_constraints }}
python -m pip install --progress-bar=off -e ./ml-agents-envs
python -m pip install --progress-bar=off -e ./ml-agents
python -m pip install --progress-bar=off -r test_requirements.txt
python -m pip install --progress-bar=off -e ./gym-unity
- name: Save python dependencies
run: |
pip freeze > pip_versions-${{ matrix.python-version }}.txt
Expand Down
30 changes: 8 additions & 22 deletions ml-agents/mlagents/trainers/ppo/trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,6 @@
from mlagents.trainers.trajectory import Trajectory
from mlagents.trainers.behavior_id_utils import BehaviorIdentifiers
from mlagents.trainers.settings import TrainerSettings, PPOSettings
from mlagents.trainers.torch.components.reward_providers.base_reward_provider import (
BaseRewardProvider,
)

logger = get_logger(__name__)

Expand Down Expand Up @@ -83,31 +80,20 @@ def _process_trajectory(self, trajectory: Trajectory) -> None:

for name, v in value_estimates.items():
agent_buffer_trajectory[f"{name}_value_estimates"].extend(v)
if isinstance(self.optimizer.reward_signals[name], BaseRewardProvider):
self._stats_reporter.add_stat(
f"Policy/{self.optimizer.reward_signals[name].name.capitalize()} Value Estimate",
np.mean(v),
)
else:
self._stats_reporter.add_stat(
self.optimizer.reward_signals[name].value_name, np.mean(v)
)
self._stats_reporter.add_stat(
f"Policy/{self.optimizer.reward_signals[name].name.capitalize()} Value Estimate",
np.mean(v),
)

# Evaluate all reward functions
self.collected_rewards["environment"][agent_id] += np.sum(
agent_buffer_trajectory["environment_rewards"]
)
for name, reward_signal in self.optimizer.reward_signals.items():
# BaseRewardProvider is a PyTorch-based reward signal
if isinstance(reward_signal, BaseRewardProvider):
evaluate_result = (
reward_signal.evaluate(agent_buffer_trajectory)
* reward_signal.strength
)
else: # reward_signal is a TensorFlow-based RewardSignal class
evaluate_result = reward_signal.evaluate_batch(
agent_buffer_trajectory
).scaled_reward
evaluate_result = (
reward_signal.evaluate(agent_buffer_trajectory)
* reward_signal.strength
)
agent_buffer_trajectory[f"{name}_rewards"].extend(evaluate_result)
# Report the reward signals
self.collected_rewards[name][agent_id] += np.sum(evaluate_result)
Expand Down
53 changes: 13 additions & 40 deletions ml-agents/mlagents/trainers/sac/trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@
from mlagents.trainers.trajectory import Trajectory, SplitObservations
from mlagents.trainers.behavior_id_utils import BehaviorIdentifiers
from mlagents.trainers.settings import TrainerSettings, SACSettings
from mlagents.trainers.torch.components.reward_providers import BaseRewardProvider

logger = get_logger(__name__)

Expand Down Expand Up @@ -141,16 +140,10 @@ def _process_trajectory(self, trajectory: Trajectory) -> None:
agent_buffer_trajectory["environment_rewards"]
)
for name, reward_signal in self.optimizer.reward_signals.items():
# BaseRewardProvider is a PyTorch-based reward signal
if isinstance(reward_signal, BaseRewardProvider):
evaluate_result = (
reward_signal.evaluate(agent_buffer_trajectory)
* reward_signal.strength
)
else: # reward_signal uses TensorFlow
evaluate_result = reward_signal.evaluate_batch(
agent_buffer_trajectory
).scaled_reward
evaluate_result = (
reward_signal.evaluate(agent_buffer_trajectory)
* reward_signal.strength
)

# Report the reward signals
self.collected_rewards[name][agent_id] += np.sum(evaluate_result)
Expand All @@ -160,16 +153,10 @@ def _process_trajectory(self, trajectory: Trajectory) -> None:
agent_buffer_trajectory, trajectory.next_obs, trajectory.done_reached
)
for name, v in value_estimates.items():
# BaseRewardProvider is a PyTorch-based reward signal
if isinstance(self.optimizer.reward_signals[name], BaseRewardProvider):
self._stats_reporter.add_stat(
f"Policy/{self.optimizer.reward_signals[name].name.capitalize()} Value",
np.mean(v),
)
else: # TensorFlow reward signal
self._stats_reporter.add_stat(
self.optimizer.reward_signals[name].value_name, np.mean(v)
)

# Bootstrap using the last step rather than the bootstrap step if max step is reached.
# Set last element to duplicate obs and remove dones.
Expand Down Expand Up @@ -272,15 +259,9 @@ def _update_sac_policy(self) -> bool:
)
# Get rewards for each reward
for name, signal in self.optimizer.reward_signals.items():
# BaseRewardProvider is a PyTorch-based reward signal
if isinstance(signal, BaseRewardProvider):
sampled_minibatch[f"{name}_rewards"] = (
signal.evaluate(sampled_minibatch) * signal.strength
)
else: # reward_signal is a TensorFlow-based RewardSignal class
sampled_minibatch[f"{name}_rewards"] = signal.evaluate_batch(
sampled_minibatch
).scaled_reward
sampled_minibatch[f"{name}_rewards"] = (
signal.evaluate(sampled_minibatch) * signal.strength
)

update_stats = self.optimizer.update(sampled_minibatch, n_sequences)
for stat_name, value in update_stats.items():
Expand Down Expand Up @@ -327,20 +308,12 @@ def _update_reward_signals(self) -> None:
reward_signal_minibatches = {}
for name, signal in self.optimizer.reward_signals.items():
logger.debug(f"Updating {name} at step {self.step}")
# BaseRewardProvider is a PyTorch-based reward signal
if not isinstance(signal, BaseRewardProvider):
# Some signals don't need a minibatch to be sampled - so we don't!
if signal.update_dict:
reward_signal_minibatches[name] = buffer.sample_mini_batch(
self.hyperparameters.batch_size,
sequence_length=self.policy.sequence_length,
)
else: # TensorFlow reward signal
if name != "extrinsic":
reward_signal_minibatches[name] = buffer.sample_mini_batch(
self.hyperparameters.batch_size,
sequence_length=self.policy.sequence_length,
)
# Some signals don't need a minibatch to be sampled - so we don't!
if signal.update_dict:
reward_signal_minibatches[name] = buffer.sample_mini_batch(
self.hyperparameters.batch_size,
sequence_length=self.policy.sequence_length,
)
update_stats = self.optimizer.update_reward_signals(
reward_signal_minibatches, n_sequences
)
Expand Down
2 changes: 1 addition & 1 deletion ml-agents/mlagents/trainers/tests/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ def _check_no_float64(arr, kwargs_dtype):
# tb[-2] is the wrapper function, e.g. np_array_no_float64
# we want the calling function, so use tb[-3]
filename = tb[-3].filename
# Only raise if this came from mlagents code, not tensorflow
# Only raise if this came from mlagents code
if (
"ml-agents/mlagents" in filename
or "ml-agents-envs/mlagents" in filename
Expand Down
4 changes: 2 additions & 2 deletions ml-agents/mlagents/trainers/tests/test_trainer_controller.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ def basic_trainer_controller():

@patch("numpy.random.seed")
@patch.object(torch, "manual_seed")
def test_initialization_seed(numpy_random_seed, tensorflow_set_seed):
def test_initialization_seed(numpy_random_seed, torch_set_seed):
seed = 27
trainer_factory_mock = MagicMock()
trainer_factory_mock.ghost_controller = GhostController()
Expand All @@ -36,7 +36,7 @@ def test_initialization_seed(numpy_random_seed, tensorflow_set_seed):
training_seed=seed,
)
numpy_random_seed.assert_called_with(seed)
tensorflow_set_seed.assert_called_with(seed)
torch_set_seed.assert_called_with(seed)


@pytest.fixture
Expand Down
2 changes: 1 addition & 1 deletion ml-agents/mlagents/trainers/trainer_controller.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ def __init__(
:param param_manager: EnvironmentParameterManager object which stores information about all
environment parameters.
:param train: Whether to train model, or only run inference.
:param training_seed: Seed to use for Numpy and Tensorflow random number generation.
:param training_seed: Seed to use for Numpy and Torch random number generation.
:param threaded: Whether or not to run trainers in a separate thread. Disable for testing/debugging.
"""
self.trainers: Dict[str, Trainer] = {}
Expand Down
1 change: 0 additions & 1 deletion ml-agents/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,5 +80,4 @@ def run(self):
]
},
cmdclass={"verify": VerifyVersionCommand},
extras_require={"tensorflow": ["tensorflow>=1.14,<3.0", "six>=1.12.0"]},
)
3 changes: 0 additions & 3 deletions ml-agents/tests/yamato/yamato_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -115,9 +115,6 @@ def init_venv(
pip_commands = [
"--upgrade pip",
"--upgrade setuptools",
# TODO build these and publish to internal pypi
"~/tensorflow_pkg/tensorflow-2.0.0-cp37-cp37m-macosx_10_14_x86_64.whl",
"tf2onnx==1.6.1",
]
if mlagents_python_version:
# install from pypi
Expand Down
7 changes: 0 additions & 7 deletions test_constraints_max_tf1_version.txt

This file was deleted.

6 changes: 0 additions & 6 deletions test_constraints_max_tf2_version.txt

This file was deleted.

8 changes: 0 additions & 8 deletions test_constraints_min_version.txt

This file was deleted.

4 changes: 0 additions & 4 deletions test_requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,3 @@ pytest>4.0.0,<6.0.0
pytest-cov==2.6.1
pytest-xdist==1.34.0

# Tensorflow tests are here for the time being, before they are used in the codebase.
tensorflow>=1.14,<3.0

tf2onnx>=1.5.5

0 comments on commit 597d6d9

Please sign in to comment.