diff --git a/rllib/env/multi_agent_env_runner.py b/rllib/env/multi_agent_env_runner.py index ddad8b1238158..857b56081fd1d 100644 --- a/rllib/env/multi_agent_env_runner.py +++ b/rllib/env/multi_agent_env_runner.py @@ -901,6 +901,7 @@ def _log_episode_metrics(self, length, ret, sec, agents=None, modules=None): EPISODE_RETURN_MIN: ret, }, reduce="min", + window=self.config.metrics_num_episodes_for_smoothing, ) self.metrics.log_dict( { @@ -908,4 +909,5 @@ def _log_episode_metrics(self, length, ret, sec, agents=None, modules=None): EPISODE_RETURN_MAX: ret, }, reduce="max", + window=self.config.metrics_num_episodes_for_smoothing, ) diff --git a/rllib/env/single_agent_env_runner.py b/rllib/env/single_agent_env_runner.py index 065e70781ae55..e18e32d7010a2 100644 --- a/rllib/env/single_agent_env_runner.py +++ b/rllib/env/single_agent_env_runner.py @@ -818,7 +818,7 @@ def _log_episode_metrics(self, length, ret, sec): ) # For some metrics, log min/max as well. - self.metrics.log_value(EPISODE_LEN_MIN, length, reduce="min") - self.metrics.log_value(EPISODE_RETURN_MIN, ret, reduce="min") - self.metrics.log_value(EPISODE_LEN_MAX, length, reduce="max") - self.metrics.log_value(EPISODE_RETURN_MAX, ret, reduce="max") + self.metrics.log_value(EPISODE_LEN_MIN, length, reduce="min", window=win) + self.metrics.log_value(EPISODE_RETURN_MIN, ret, reduce="min", window=win) + self.metrics.log_value(EPISODE_LEN_MAX, length, reduce="max", window=win) + self.metrics.log_value(EPISODE_RETURN_MAX, ret, reduce="max", window=win)