diff --git a/docs/misc/changelog.rst b/docs/misc/changelog.rst index 1ccfbb5a1..ddf9b0625 100644 --- a/docs/misc/changelog.rst +++ b/docs/misc/changelog.rst @@ -39,6 +39,7 @@ Documentation: - Added Decisions and Dragons to resources. (@jmacglashan) - Updated PyBullet example, now compatible with Gymnasium - Added link to policies for ``policy_kwargs`` parameter (@kplers) +- Added FRASA to the project page (@MarcDcls) Release 2.4.0 (2024-11-18) -------------------------- @@ -1739,4 +1740,4 @@ And all the contributors: @DavyMorgan @luizapozzobon @Bonifatius94 @theSquaredError @harveybellini @DavyMorgan @FieteO @jonasreiher @npit @WeberSamuel @troiganto @lutogniew @lbergmann1 @lukashass @BertrandDecoster @pseudo-rnd-thoughts @stefanbschneider @kyle-he @PatrickHelm @corentinlger @marekm4 @stagoverflow @rushitnshah @markscsmith @NickLucche @cschindlbeck @peteole @jak3122 @will-maclean -@brn-dev @jmacglashan @kplers +@brn-dev @jmacglashan @kplers @MarcDcls diff --git a/docs/misc/projects.rst b/docs/misc/projects.rst index 5f0c69710..0048ee4c4 100644 --- a/docs/misc/projects.rst +++ b/docs/misc/projects.rst @@ -250,3 +250,19 @@ It enables solving environments involving partial observability or locomotion (e | Authors: Corentin Léger, Gautier Hamon, Eleni Nisioti, Xavier Hinaut, Clément Moulin-Frier | Github: https://github.com/corentinlger/ER-MRL | Paper: https://arxiv.org/abs/2312.06695 + + +FRASA: Fall Recovery And Stand up agent +--------------------------------------- + +A Deep Reinforcement Learning agent for a humanoid robot that learns to recover from falls and stand up. + +The agent is trained using the MuJoCo physics engine. Real world experiments are conducted on the +Sigmaban humanoid robot, a small-sized humanoid designed by the *Rhoban Team* to compete in the RoboCup Kidsize League. +The results, detailled in the paper and the video, show that the agent is able to recover from +various external disturbances and stand up in a few seconds. + +Authors: Marc Duclusaud, Clément Gaspard, Grégoire Passault, Mélodie Daniel, Olivier Ly +Github: https://github.com/Rhoban/frasa +Paper: https://arxiv.org/abs/2410.08655 +Video: https://www.youtube.com/watch?v=NL65XW0O0mk \ No newline at end of file diff --git a/stable_baselines3/common/callbacks.py b/stable_baselines3/common/callbacks.py index 31c3a24a7..0e7387911 100644 --- a/stable_baselines3/common/callbacks.py +++ b/stable_baselines3/common/callbacks.py @@ -490,7 +490,7 @@ def _on_step(self) -> bool: timesteps=self.evaluations_timesteps, results=self.evaluations_results, ep_lengths=self.evaluations_length, - **kwargs, + **kwargs, # type: ignore[arg-type] ) mean_reward, std_reward = np.mean(episode_rewards), np.std(episode_rewards) diff --git a/stable_baselines3/common/envs/bit_flipping_env.py b/stable_baselines3/common/envs/bit_flipping_env.py index 4d99313ea..9f56e6950 100644 --- a/stable_baselines3/common/envs/bit_flipping_env.py +++ b/stable_baselines3/common/envs/bit_flipping_env.py @@ -103,7 +103,7 @@ def convert_to_bit_vector(self, state: Union[int, np.ndarray], batch_size: int) # Convert to binary representation bit_vector = ((bit_vector[:, :] & (1 << np.arange(len(self.state)))) > 0).astype(int) elif self.image_obs_space: - bit_vector = state.reshape(batch_size, -1)[:, : len(self.state)] / 255 + bit_vector = state.reshape(batch_size, -1)[:, : len(self.state)] / 255 # type: ignore[assignment] else: bit_vector = np.array(state).reshape(batch_size, -1) return bit_vector diff --git a/stable_baselines3/common/off_policy_algorithm.py b/stable_baselines3/common/off_policy_algorithm.py index 6a043e7ac..c3e1c6662 100644 --- a/stable_baselines3/common/off_policy_algorithm.py +++ b/stable_baselines3/common/off_policy_algorithm.py @@ -487,7 +487,7 @@ def _store_transition( next_obs[i] = infos[i]["terminal_observation"] # VecNormalize normalizes the terminal observation if self._vec_normalize_env is not None: - next_obs[i] = self._vec_normalize_env.unnormalize_obs(next_obs[i, :]) + next_obs[i] = self._vec_normalize_env.unnormalize_obs(next_obs[i, :]) # type: ignore[assignment] replay_buffer.add( self._last_original_obs, # type: ignore[arg-type] diff --git a/stable_baselines3/common/vec_env/base_vec_env.py b/stable_baselines3/common/vec_env/base_vec_env.py index b85c1cf88..71ee15e61 100644 --- a/stable_baselines3/common/vec_env/base_vec_env.py +++ b/stable_baselines3/common/vec_env/base_vec_env.py @@ -43,7 +43,7 @@ def tile_images(images_nhwc: Sequence[np.ndarray]) -> np.ndarray: # pragma: no # img_HhWwc out_image = out_image.transpose(0, 2, 1, 3, 4) # img_Hh_Ww_c - out_image = out_image.reshape((new_height * height, new_width * width, n_channels)) + out_image = out_image.reshape((new_height * height, new_width * width, n_channels)) # type: ignore[assignment] return out_image diff --git a/stable_baselines3/common/vec_env/dummy_vec_env.py b/stable_baselines3/common/vec_env/dummy_vec_env.py index 4069356d2..267f23390 100644 --- a/stable_baselines3/common/vec_env/dummy_vec_env.py +++ b/stable_baselines3/common/vec_env/dummy_vec_env.py @@ -56,7 +56,7 @@ def step_async(self, actions: np.ndarray) -> None: def step_wait(self) -> VecEnvStepReturn: # Avoid circular imports for env_idx in range(self.num_envs): - obs, self.buf_rews[env_idx], terminated, truncated, self.buf_infos[env_idx] = self.envs[env_idx].step( + obs, self.buf_rews[env_idx], terminated, truncated, self.buf_infos[env_idx] = self.envs[env_idx].step( # type: ignore[assignment] self.actions[env_idx] ) # convert to SB3 VecEnv api diff --git a/stable_baselines3/her/her_replay_buffer.py b/stable_baselines3/her/her_replay_buffer.py index 956aabc92..e914c7ec0 100644 --- a/stable_baselines3/her/her_replay_buffer.py +++ b/stable_baselines3/her/her_replay_buffer.py @@ -157,7 +157,7 @@ def add( # type: ignore[override] self.ep_start[self.pos] = self._current_ep_start.copy() if self.copy_info_dict: - self.infos[self.pos] = infos + self.infos[self.pos] = infos # type: ignore[assignment] # Store the transition super().add(obs, next_obs, action, reward, done, infos)