Skip to content

Commit

Permalink
updated version, docstrings
Browse files Browse the repository at this point in the history
  • Loading branch information
kywch committed May 30, 2024
1 parent f316c11 commit 6c544a5
Show file tree
Hide file tree
Showing 4 changed files with 50 additions and 157 deletions.
16 changes: 1 addition & 15 deletions nmmo/core/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,21 +95,7 @@ def validate(config):


class Config(Template):
'''An environment configuration object
Global constants are defined as static class variables. You can override
any Config variable using standard CLI syntax (e.g. --NENT=128).
The default config as of v1.5 uses 1024x1024 maps with up to 2048 agents
and 1024 NPCs. It is suitable to time horizons of 8192+ steps. For smaller
experiments, consider the SmallMaps config.
Notes:
We use Google Fire internally to replace standard manual argparse
definitions for each Config property. This means you can subclass
Config to add new static attributes -- CLI definitions will be
generated automatically.
'''
'''An environment configuration object'''
env_initialized = False

def __init__(self):
Expand Down
187 changes: 47 additions & 140 deletions nmmo/core/env.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,13 @@ class Env(ParallelEnv):
def __init__(self,
config: Default = nmmo.config.Default(),
seed = None):
'''Initializes the Neural MMO environment.
Args:
config (Default, optional): Configuration object for the environment.
Defaults to nmmo.config.Default().
seed (int, optional): Random seed for the environment. Defaults to None.
'''
self._np_random = None
self._np_seed = None
self._reset_required = True
Expand Down Expand Up @@ -122,12 +129,12 @@ def mask_box(length):
def observation_space(self, agent: AgentID):
'''Neural MMO Observation Space
Args:
agent: Agent ID
Returns:
observation: gym.spaces object contained the structured observation
for the specified agent.'''
Args:
agent (AgentID): The ID of the agent.
Returns:
gym.spaces.Dict: The observation space for the agent.
'''
return self._obs_space

# NOTE: make sure this runs once during trainer init and does NOT change afterwards
Expand Down Expand Up @@ -158,15 +165,12 @@ def _str_atn_map(self):
def action_space(self, agent: AgentID):
'''Neural MMO Action Space
Args:
agent: Agent ID
Returns:
actions: gym.spaces object contained the structured actions
for the specified agent. Each action is parameterized by a list
of discrete-valued arguments. These consist of both fixed, k-way
choices (such as movement direction) and selections from the
observation space (such as targeting)'''
Args:
agent (AgentID): The ID of the agent.
Returns:
gym.spaces.Dict: The action space for the agent.
'''
return self._atn_space

############################################################################
Expand All @@ -176,27 +180,20 @@ def reset(self, seed=None, options=None, # PettingZoo API args
map_id=None,
make_task_fn: Callable=None,
game: game_api.Game=None):
'''OpenAI Gym API reset function
Loads a new game map and returns initial observations
Args:
map_id: Map index to load. Selects a random map by default
seed: random seed to use
make_task_fn: A function to make tasks
game: A game object
Returns:
observations, as documented by _compute_observations()
Notes:
Neural MMO simulates a persistent world. Ideally, you should reset
the environment only once, upon creation. In practice, this approach
limits the number of parallel environment simulations to the number
of CPU cores available. At small and medium hardware scale, we
therefore recommend the standard approach of resetting after a long
but finite horizon: ~1000 timesteps for small maps and
5000+ timesteps for large maps
'''Resets the environment and returns the initial observations.
Args:
seed (int, optional): Random seed for the environment. Defaults to None.
options (dict, optional): Additional options for resetting the environment.
Defaults to None.
map_id (int, optional): The ID of the map to load. Defaults to None.
make_task_fn (callable, optional): Function to create tasks. Defaults to None.
game (Game, optional): The game to be played. Defaults to None.
Returns:
tuple: A tuple containing:
- obs (dict): Dictionary mapping agent IDs to their initial observations.
- info (dict): Dictionary containing additional information.
'''
# If options are provided, override the kwargs
if options is not None:
Expand Down Expand Up @@ -314,96 +311,20 @@ def _map_task_to_agent(self):
self.realm.players[agent_id].my_task = agent_tasks[0]

def step(self, actions: Dict[int, Dict[str, Dict[str, Any]]]):
'''Simulates one game tick or timestep
Args:
actions: A dictionary of agent decisions of format::
{
agent_1: {
action_1: [arg_1, arg_2],
action_2: [...],
...
},
agent_2: {
...
},
...
}
Where agent_i is the integer index of the i\'th agent
The environment only evaluates provided actions for provided
gents. Unprovided action types are interpreted as no-ops and
illegal actions are ignored
It is also possible to specify invalid combinations of valid
actions, such as two movements or two attacks. In this case,
one will be selected arbitrarily from each incompatible sets.
A well-formed algorithm should do none of the above. We only
Perform this conditional processing to make batched action
computation easier.
Returns:
(dict, dict, dict, None):
observations:
A dictionary of agent observations of format::
{
agent_1: obs_1,
agent_2: obs_2,
...
}
Where agent_i is the integer index of the i\'th agent and
obs_i is specified by the observation_space function.
rewards:
A dictionary of agent rewards of format::
{
agent_1: reward_1,
agent_2: reward_2,
...
}
Where agent_i is the integer index of the i\'th agent and
reward_i is the reward of the i\'th' agent.
By default, agents receive -1 reward for dying and 0 reward for
all other circumstances. Override Env.reward to specify
custom reward functions
dones:
A dictionary of agent done booleans of format::
{
agent_1: done_1,
agent_2: done_2,
...
}
Where agent_i is the integer index of the i\'th agent and
done_i is a boolean denoting whether the i\'th agent has died.
Note that obs_i will be a garbage placeholder if done_i is true.
This is provided only for conformity with PettingZoo. Your
algorithm should not attempt to leverage observations outside of
trajectory bounds. You can omit garbage obs_i values by setting
omitDead=True.
infos:
A dictionary of agent infos of format:
{
agent_1: None,
agent_2: None,
...
}
Provided for conformity with PettingZoo
'''Performs one step in the environment given the provided actions.
Args:
actions (dict): Dictionary mapping agent IDs to their actions.
Returns:
tuple: A tuple containing:
- obs (dict): Dictionary mapping agent IDs to their new observations.
- rewards (dict): Dictionary mapping agent IDs to their rewards.
- terminated (dict): Dictionary mapping agent IDs to whether they reached
a terminal state.
- truncated (dict): Dictionary mapping agent IDs to whether the episode was
truncated (e.g. reached maximum number of steps).
- infos (dict): Dictionary containing additional information.
'''
assert not self._reset_required, 'step() called before reset'
# Add in scripted agents' actions, if any
Expand Down Expand Up @@ -556,20 +477,6 @@ def _update_comm_obs(self):
self._comm_obs[eid] = team_obs

def _compute_rewards(self):
'''Computes the reward for the specified agent
Override this method to create custom reward functions. You have full
access to the environment state via self.realm. Our baselines do not
modify this method; specify any changes when comparing to baselines
Args:
player: player object
Returns:
reward:
The reward for the actions on the previous timestep of the
entity identified by ent_id.
'''
# Initialization
agents = set(self._current_agents)
infos = {agent_id: {'task': {}} for agent_id in agents}
Expand Down
2 changes: 1 addition & 1 deletion nmmo/core/terrain.py
Original file line number Diff line number Diff line change
Expand Up @@ -276,7 +276,7 @@ def load_textures(self):
self.textures = lookup

def generate_all_maps(self, seed=None):
'''Generates NMAPS maps according to generate_map
'''Generates MAP_N maps according to generate_map
Provides additional utilities for saving to .npy and rendering png previews'''

Expand Down
2 changes: 1 addition & 1 deletion nmmo/version.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = '2.0.0'
__version__ = '2.1.0'

0 comments on commit 6c544a5

Please sign in to comment.