updated version, docstrings

CarperAI · May 30, 2024 · 6c544a5 · 6c544a5
1 parent f316c11
commit 6c544a5
Show file tree

Hide file tree

Showing 4 changed files with 50 additions and 157 deletions.
diff --git a/nmmo/core/config.py b/nmmo/core/config.py
@@ -95,21 +95,7 @@ def validate(config):
 
 
 class Config(Template):
-  '''An environment configuration object
-
-  Global constants are defined as static class variables. You can override
-  any Config variable using standard CLI syntax (e.g. --NENT=128).
-
-  The default config as of v1.5 uses 1024x1024 maps with up to 2048 agents
-  and 1024 NPCs. It is suitable to time horizons of 8192+ steps. For smaller
-  experiments, consider the SmallMaps config.
-
-  Notes:
-    We use Google Fire internally to replace standard manual argparse
-    definitions for each Config property. This means you can subclass
-    Config to add new static attributes -- CLI definitions will be
-    generated automatically.
-  '''
+  '''An environment configuration object'''
   env_initialized = False
 
   def __init__(self):

diff --git a/nmmo/core/env.py b/nmmo/core/env.py
@@ -27,6 +27,13 @@ class Env(ParallelEnv):
   def __init__(self,
                config: Default = nmmo.config.Default(),
                seed = None):
+    '''Initializes the Neural MMO environment.
+
+    Args:
+      config (Default, optional): Configuration object for the environment.
+      Defaults to nmmo.config.Default().
+      seed (int, optional): Random seed for the environment. Defaults to None.
+    '''
     self._np_random = None
     self._np_seed = None
     self._reset_required = True
@@ -122,12 +129,12 @@ def mask_box(length):
   def observation_space(self, agent: AgentID):
     '''Neural MMO Observation Space
 
-    Args:
-        agent: Agent ID
-
-    Returns:
-        observation: gym.spaces object contained the structured observation
-        for the specified agent.'''
+      Args:
+        agent (AgentID): The ID of the agent.
+        
+      Returns:
+        gym.spaces.Dict: The observation space for the agent.
+    '''
     return self._obs_space
 
   # NOTE: make sure this runs once during trainer init and does NOT change afterwards
@@ -158,15 +165,12 @@ def _str_atn_map(self):
   def action_space(self, agent: AgentID):
     '''Neural MMO Action Space
 
-    Args:
-        agent: Agent ID
-
-    Returns:
-        actions: gym.spaces object contained the structured actions
-        for the specified agent. Each action is parameterized by a list
-        of discrete-valued arguments. These consist of both fixed, k-way
-        choices (such as movement direction) and selections from the
-        observation space (such as targeting)'''
+      Args:
+        agent (AgentID): The ID of the agent.
+
+      Returns:
+        gym.spaces.Dict: The action space for the agent.
+    '''
     return self._atn_space
 
   ############################################################################
@@ -176,27 +180,20 @@ def reset(self, seed=None, options=None,  # PettingZoo API args
             map_id=None,
             make_task_fn: Callable=None,
             game: game_api.Game=None):
-    '''OpenAI Gym API reset function
-
-    Loads a new game map and returns initial observations
-
-    Args:
-        map_id: Map index to load. Selects a random map by default
-        seed: random seed to use
-        make_task_fn: A function to make tasks
-        game: A game object
-
-    Returns:
-        observations, as documented by _compute_observations()
-
-    Notes:
-        Neural MMO simulates a persistent world. Ideally, you should reset
-        the environment only once, upon creation. In practice, this approach
-        limits the number of parallel environment simulations to the number
-        of CPU cores available. At small and medium hardware scale, we
-        therefore recommend the standard approach of resetting after a long
-        but finite horizon: ~1000 timesteps for small maps and
-        5000+ timesteps for large maps
+    '''Resets the environment and returns the initial observations.
+
+      Args:
+        seed (int, optional): Random seed for the environment. Defaults to None.
+        options (dict, optional): Additional options for resetting the environment.
+          Defaults to None.
+        map_id (int, optional): The ID of the map to load. Defaults to None.
+        make_task_fn (callable, optional): Function to create tasks. Defaults to None.
+        game (Game, optional): The game to be played. Defaults to None.
+
+      Returns:
+        tuple: A tuple containing:
+          - obs (dict): Dictionary mapping agent IDs to their initial observations.
+          - info (dict): Dictionary containing additional information.
     '''
     # If options are provided, override the kwargs
     if options is not None:
@@ -314,96 +311,20 @@ def _map_task_to_agent(self):
         self.realm.players[agent_id].my_task = agent_tasks[0]
 
   def step(self, actions: Dict[int, Dict[str, Dict[str, Any]]]):
-    '''Simulates one game tick or timestep
-
-    Args:
-        actions: A dictionary of agent decisions of format::
-
-              {
-                agent_1: {
-                    action_1: [arg_1, arg_2],
-                    action_2: [...],
-                    ...
-                },
-                agent_2: {
-                    ...
-                },
-                ...
-              }
-
-          Where agent_i is the integer index of the i\'th agent
-
-          The environment only evaluates provided actions for provided
-          gents. Unprovided action types are interpreted as no-ops and
-          illegal actions are ignored
-
-          It is also possible to specify invalid combinations of valid
-          actions, such as two movements or two attacks. In this case,
-          one will be selected arbitrarily from each incompatible sets.
-
-          A well-formed algorithm should do none of the above. We only
-          Perform this conditional processing to make batched action
-          computation easier.
-
-    Returns:
-        (dict, dict, dict, None):
-
-        observations:
-          A dictionary of agent observations of format::
-
-              {
-                agent_1: obs_1,
-                agent_2: obs_2,
-                ...
-              }
-
-          Where agent_i is the integer index of the i\'th agent and
-          obs_i is specified by the observation_space function.
-
-        rewards:
-          A dictionary of agent rewards of format::
-
-              {
-                agent_1: reward_1,
-                agent_2: reward_2,
-                ...
-              }
-
-          Where agent_i is the integer index of the i\'th agent and
-          reward_i is the reward of the i\'th' agent.
-
-          By default, agents receive -1 reward for dying and 0 reward for
-          all other circumstances. Override Env.reward to specify
-          custom reward functions
-
-        dones:
-          A dictionary of agent done booleans of format::
-
-              {
-                agent_1: done_1,
-                agent_2: done_2,
-                ...
-              }
-
-          Where agent_i is the integer index of the i\'th agent and
-          done_i is a boolean denoting whether the i\'th agent has died.
-
-          Note that obs_i will be a garbage placeholder if done_i is true.
-          This is provided only for conformity with PettingZoo. Your
-          algorithm should not attempt to leverage observations outside of
-          trajectory bounds. You can omit garbage obs_i values by setting
-          omitDead=True.
-
-        infos:
-          A dictionary of agent infos of format:
-
-              {
-                agent_1: None,
-                agent_2: None,
-                ...
-              }
-
-          Provided for conformity with PettingZoo
+    '''Performs one step in the environment given the provided actions.
+
+      Args:
+        actions (dict): Dictionary mapping agent IDs to their actions.
+
+      Returns:
+        tuple: A tuple containing:
+          - obs (dict): Dictionary mapping agent IDs to their new observations.
+          - rewards (dict): Dictionary mapping agent IDs to their rewards.
+          - terminated (dict): Dictionary mapping agent IDs to whether they reached 
+            a terminal state.
+          - truncated (dict): Dictionary mapping agent IDs to whether the episode was
+            truncated (e.g. reached maximum number of steps).
+          - infos (dict): Dictionary containing additional information.
     '''
     assert not self._reset_required, 'step() called before reset'
     # Add in scripted agents' actions, if any
@@ -556,20 +477,6 @@ def _update_comm_obs(self):
           self._comm_obs[eid] = team_obs
 
   def _compute_rewards(self):
-    '''Computes the reward for the specified agent
-
-    Override this method to create custom reward functions. You have full
-    access to the environment state via self.realm. Our baselines do not
-    modify this method; specify any changes when comparing to baselines
-
-    Args:
-        player: player object
-
-    Returns:
-        reward:
-          The reward for the actions on the previous timestep of the
-          entity identified by ent_id.
-    '''
     # Initialization
     agents = set(self._current_agents)
     infos = {agent_id: {'task': {}} for agent_id in agents}

diff --git a/nmmo/core/terrain.py b/nmmo/core/terrain.py
@@ -276,7 +276,7 @@ def load_textures(self):
     self.textures = lookup
 
   def generate_all_maps(self, seed=None):
-    '''Generates NMAPS maps according to generate_map
+    '''Generates MAP_N maps according to generate_map
 
     Provides additional utilities for saving to .npy and rendering png previews'''
 

diff --git a/nmmo/version.py b/nmmo/version.py
@@ -1 +1 @@
-__version__ = '2.0.0'
+__version__ = '2.1.0'