docs: Review docstrings and docs (#15)

* Review docstrings * Add scaled individual reward function * Doc tweaks * Fix upper-case pi symbols
instadeepai · zombie-einstein · Nov 4, 2024 · Nov 4, 2024 · Nov 4, 2024 · Nov 5, 2024
commit 04fe710a4d88f1adc87ded49c636ddd64976662a
diff --git a/docs/environments/search_and_rescue.md b/docs/environments/search_and_rescue.md
@@ -12,17 +12,18 @@ is updated in the following sequence:
 
 - The velocity of searching agents are updated, and consequently their positions.
 - The positions of targets are updated.
-- Targets within detection range and an agents view cone are marked as found.
+- Targets within detection range, and within an agents view cone are marked as found.
 - Agents are rewarded for locating previously unfound targets.
-- Local views of the environment are generated for each search agent.
+- Local views of the environment are generated for each searching agent.
 
 The agents are allotted a fixed number of steps to locate the targets. The search
 space is a uniform square space, wrapped at the boundaries.
 
 Many aspects of the environment can be customised:
 
 - Agent observations can include targets as well as other searcher agents.
-- Rewards can be shared by agents, or can be treated completely individually for individual agents.
+- Rewards can be shared by agents, or can be treated completely individually for individual
+  agents and can be scaled by time-step.
 - Target dynamics can be customised to model various search scenarios.
 
 ## Observations
@@ -38,13 +39,14 @@ Many aspects of the environment can be customised:
   [-1.0, -1.0, 0.5, -1.0, -1.0]
   ```
 
-  where `-1.0` indicates there is no agents along that ray, and `0.5` is the normalised
+  where `-1.0` indicates there are no agents along that ray, and `0.5` is the normalised
   distance to the other agent. Channels in the segmented view are used to differentiate
   between different agents/targets and can be customised. By default, the view has three
-  channels representing other agents, found targets, and unfound targets.
+  channels representing other agents, found targets, and unlocated targets.
 - `targets_remaining`: float in the range `[0, 1]`. The normalised number of targets
   remaining to be detected (i.e. 1.0 when no targets have been found).
-- `Step`: int in the range `[0, time_limit]`. The current simulation step.
+- `step`: int in the range `[0, time_limit]`. The current simulation step.
+- `positions`: jax array (float) of shape `(num_searchers, 2)`. Agent coordinates.
 
 ## Actions
 
@@ -65,11 +67,14 @@ and speed
 speed = speed + max_acceleration * action[1]
 ```
 
-Once applied, agent speeds are clipped to velocities within a fixed range of speeds.
+Once applied, agent speeds are clipped to velocities within a fixed range of speeds given
+by the `min_speed` and `max_speed` parameters.
 
 ## Rewards
 
 Jax array (float) of `(num_searchers,)`. Rewards are generated for each agent individually.
+
 Agents are rewarded +1 for locating a target that has not already been detected. It is possible
 for multiple agents to detect a target inside a step, as such rewards can either be shared
-by the locating agents, or each agent can get the full reward.
+by the locating agents, or each individual agent can get the full reward. Rewards provided can
+also be scaled by simulation step to encourage agents to develop efficient search patterns.
diff --git a/jumanji/environments/swarms/common/updates.py b/jumanji/environments/swarms/common/updates.py
@@ -63,7 +63,7 @@ def move(pos: chex.Array, heading: chex.Array, speed: chex.Array, env_size: floa
         env_size: Size of the environment.
 
     Returns:
-        jax array (float32): Updated agent position.
+        jax array (float32): Updated agent positions.
     """
     d_pos = jnp.array([speed * jnp.cos(heading), speed * jnp.sin(heading)])
     return (pos + d_pos) % env_size
@@ -170,7 +170,7 @@ def view(
 
     Simple view model where the agents view angle is subdivided
     into an array of values representing the distance from
-    the agent along a rays from the agent, with rays evenly distributed
+    the agent along rays from the agent, with rays evenly distributed
     across the agents field of view. The limit of vision is set at 1.0.
     The default value if no object is within range is -1.0.
     Currently, this model assumes the viewed agent/objects are circular.

diff --git a/jumanji/environments/swarms/search_and_rescue/dynamics.py b/jumanji/environments/swarms/search_and_rescue/dynamics.py
@@ -25,8 +25,9 @@ class TargetDynamics(abc.ABC):
     def __call__(self, key: chex.PRNGKey, targets: TargetState, env_size: float) -> TargetState:
         """Interface for target state update function.
 
-        NOTE: Target positions should be bound to environment
-            area (generally wrapped around at the boundaries).
+        NOTE: Target positions should be inside the bounds
+            of the environment. Out-of-bound co-ordinates can
+            lead to unexpected behaviour.
 
         Args:
             key: Random key.
@@ -55,7 +56,7 @@ def __call__(self, key: chex.PRNGKey, targets: TargetState, env_size: float) ->
         """Update target state.
 
         Args:
-            key: random key.
+            key: Random key.
             targets: Current target states.
             env_size: Environment size.
 

diff --git a/jumanji/environments/swarms/search_and_rescue/env.py b/jumanji/environments/swarms/search_and_rescue/env.py
@@ -61,17 +61,19 @@ class SearchAndRescue(Environment):
             Each entry in the view indicates the distant to another agent/target
             along a ray from the agent, and is -1.0 if nothing is in range along the ray.
             The view model can be customised using an `ObservationFn` implementation, e.g.
-            the view can include all agents and targets, or just other agents.
+            the view can include agents and all targets, agents and found targets,or
+            just other agents.
         targets_remaining: (float) Number of targets remaining to be found from
             the total scaled to the range [0, 1] (i.e. a value of 1.0 indicates
             all the targets are still to be found).
         step: (int) current simulation step.
+        positions: jax array (float) of shape (num_searchers, 2) search agent positions.
 
     - action: jax array (float) of shape (num_searchers, 2)
         Array of individual agent actions. Each agents actions rotate and
         accelerate/decelerate the agent as [rotation, acceleration] on the range
         [-1, 1]. These values are then scaled to update agent velocities within
-        given parameters.
+        given parameters (i.e. a value of -+1 is the maximum acceleration/rotation).
 
     - reward: jax array (float) of shape (num_searchers,)
         Arrays of individual agent rewards. A reward of +1 is granted when an agent
@@ -84,7 +86,7 @@ class SearchAndRescue(Environment):
         - searchers: `AgentState`
             - pos: jax array (float) of shape (num_searchers, 2) in the range [0, env_size].
             - heading: jax array (float) of shape (num_searcher,) in
-                the range [0, 2pi].
+                the range [0, 2π].
             - speed: jax array (float) of shape (num_searchers,) in the
                 range [min_speed, max_speed].
         - targets: `TargetState`
@@ -115,7 +117,7 @@ def __init__(
         searcher_max_accelerate: float = 0.005,
         searcher_min_speed: float = 0.01,
         searcher_max_speed: float = 0.02,
-        searcher_view_angle: float = 0.75,
+        searcher_view_angle: float = 0.5,
         time_limit: int = 400,
         viewer: Optional[Viewer[State]] = None,
         target_dynamics: Optional[TargetDynamics] = None,
@@ -129,19 +131,18 @@ def __init__(
             target_contact_range: Range at which a searchers will 'find' a target.
             searcher_max_rotate: Maximum rotation searcher agents can
                 turn within a step. Should be a value from [0,1]
-                representing a fraction of pi radians.
+                representing a fraction of π-radians.
             searcher_max_accelerate: Magnitude of the maximum
                 acceleration/deceleration a searcher agent can apply within a step.
             searcher_min_speed: Minimum speed a searcher agent can move at.
             searcher_max_speed: Maximum speed a searcher agent can move at.
             searcher_view_angle: Searcher agent local view angle. Should be
-                a value from [0,1] representing a fraction of pi radians.
+                a value from [0,1] representing a fraction of π-radians.
                 The view cone of an agent goes from +- of the view angle
                 relative to its heading, e.g. 0.5 would mean searchers have a
                 90° view angle in total.
             time_limit: Maximum number of environment steps allowed for search.
             viewer: `Viewer` used for rendering. Defaults to `SearchAndRescueViewer`.
-                target_dynamics:
             target_dynamics: Target object dynamics model, implemented as a
                 `TargetDynamics` interface. Defaults to `RandomWalk`.
             generator: Initial state `Generator` instance. Defaults to `RandomGenerator`
@@ -150,7 +151,7 @@ def __init__(
                 agents share rewards if they locate a target simultaneously.
             observation: Agent observation view generation function. Defaults to
                 `AgentAndAllTargetObservationFn` where all targets (found and unfound)
-                and other ogents are included in the generated view.
+                and other searching agents are included in the generated view.
         """
 
         self.target_contact_range = target_contact_range
@@ -164,14 +165,14 @@ def __init__(
         )
         self.time_limit = time_limit
         self._target_dynamics = target_dynamics or RandomWalk(0.001)
-        self.generator = generator or RandomGenerator(num_targets=100, num_searchers=2)
+        self.generator = generator or RandomGenerator(num_targets=50, num_searchers=2)
         self._viewer = viewer or SearchAndRescueViewer()
         self._reward_fn = reward_fn or SharedRewardFn()
         self._observation = observation or AgentAndAllTargetObservationFn(
             num_vision=64,
-            vision_range=0.1,
+            vision_range=0.25,
             view_angle=searcher_view_angle,
-            agent_radius=0.01,
+            agent_radius=0.02,
             env_size=self.generator.env_size,
         )
         super().__init__()
@@ -182,7 +183,12 @@ def __repr__(self) -> str:
                 "Search & rescue multi-agent environment:",
                 f" - num searchers: {self.generator.num_searchers}",
                 f" - num targets: {self.generator.num_targets}",
+                f" - max searcher rotation: {self.searcher_params.max_rotate}",
+                f" - max searcher acceleration: {self.searcher_params.max_accelerate}",
+                f" - searcher min speed: {self.searcher_params.min_speed}",
+                f" - searcher max speed: {self.searcher_params.max_speed}",
                 f" - search vision range: {self._observation.vision_range}",
+                f" - search view angle: {self._observation.view_angle}",
                 f" - target contact range: {self.target_contact_range}",
                 f" - num vision: {self._observation.num_vision}",
                 f" - agent radius: {self._observation.agent_radius}",
@@ -217,7 +223,7 @@ def step(self, state: State, actions: chex.Array) -> Tuple[State, TimeStep[Obser
 
         Args:
             state: Environment state.
-            actions: Arrays of searcher steering actions.
+            actions: 2d array of searcher steering actions.
 
         Returns:
             state: Updated searcher and target positions and velocities.
@@ -360,7 +366,7 @@ def render(self, state: State) -> None:
         """Render a frame of the environment for a given state using matplotlib.
 
         Args:
-            state: State object containing the current state of the environment.
+            state: State object.
         """
         self._viewer.render(state)
 

diff --git a/jumanji/environments/swarms/search_and_rescue/observations.py b/jumanji/environments/swarms/search_and_rescue/observations.py
@@ -53,7 +53,7 @@ def __init__(
             num_channels: Number of channels in agent view.
             num_vision: Size of vision array.
             vision_range: Vision range.
-            view_angle: Agent view angle (as a fraction of pi).
+            view_angle: Agent view angle (as a fraction of π).
             agent_radius: Agent/target visual radius.
             env_size: Environment size.
         """
@@ -92,7 +92,7 @@ def __init__(
         Args:
             num_vision: Size of vision array.
             vision_range: Vision range.
-            view_angle: Agent view angle (as a fraction of pi).
+            view_angle: Agent view angle (as a fraction of π).
             agent_radius: Agent/target visual radius.
             env_size: Environment size.
         """
@@ -146,14 +146,14 @@ def found_target_view(
     """
     Return view of a target, dependent on target status.
 
-    This function is intended to be mapped over agents-targets
-    by Esquilax.
+    This function is intended to be mapped over agents-target
+    pairs by Esquilax.
 
     Args:
         params: View angle and target visual radius.
         searcher: Searcher agent state
         target: Target state
-        n_view: Number of value sin view array.
+        n_view: Number of values in view array.
         i_range: Vision range
         env_size: Environment size
 
@@ -190,15 +190,15 @@ def __init__(
         env_size: float,
     ) -> None:
         """
-        Vision model that contains other agents, and found targets.
+        Vision model that contains other agents and found targets.
 
         Searchers and targets are visualised as individual channels.
         Targets are only included if they have been located already.
 
         Args:
             num_vision: Size of vision array.
             vision_range: Vision range.
-            view_angle: Agent view angle (as a fraction of pi).
+            view_angle: Agent view angle (as a fraction of π).
             agent_radius: Agent/target visual radius.
             env_size: Environment size.
         """
@@ -225,7 +225,7 @@ def __call__(self, state: State) -> chex.Array:
         Returns:
             Array of individual agent views of shape
             (n-agents, 2, n-vision). Other agents are shown
-            in channel 0, and located targets 1.
+            in channel 0, and located targets in channel 1.
         """
         searcher_views = esquilax.transforms.spatial(
             view,
@@ -273,16 +273,16 @@ def all_target_view(
     """
     Return view of a target, dependent on target status.
 
-    This function is intended to be mapped over agents-targets
-    by Esquilax.
+    This function is intended to be mapped over agents target
+    pairs by Esquilax.
 
     Args:
         params: View angle and target visual radius.
-        searcher: Searcher agent state
-        target: Target state
+        searcher: Searcher agent state.
+        target: Target state.
         n_view: Number of value sin view array.
-        i_range: Vision range
-        env_size: Environment size
+        i_range: Vision range.
+        env_size: Environment size.
 
     Returns:
         Segmented agent view of target.
@@ -328,7 +328,7 @@ def __init__(
         Args:
             num_vision: Size of vision array.
             vision_range: Vision range.
-            view_angle: Agent view angle (as a fraction of pi).
+            view_angle: Agent view angle (as a fraction of π).
             agent_radius: Agent/target visual radius.
             env_size: Environment size.
         """
@@ -355,8 +355,8 @@ def __call__(self, state: State) -> chex.Array:
         Returns:
             Array of individual agent views of shape
             (n-agents, 3, n-vision). Other agents are shown
-            in channel 0, located targets 1, and un-located
-            targets at index 2.
+            in channel 0, located targets channel 1, and un-located
+            targets in channel 2.
         """
         searcher_views = esquilax.transforms.spatial(
             view,