Skip to content

Commit

Permalink
Merge pull request #122 from kywch/cython
Browse files Browse the repository at this point in the history
YOLO merging the rest of meta mmo.
  • Loading branch information
kywch authored May 30, 2024
2 parents 77c55fe + 6c544a5 commit a8ff404
Show file tree
Hide file tree
Showing 28 changed files with 357 additions and 268 deletions.
14 changes: 10 additions & 4 deletions nmmo/core/action.py
Original file line number Diff line number Diff line change
Expand Up @@ -436,8 +436,11 @@ def call(realm, entity, item, target):

if not (config.ITEM_ALLOW_GIFT and
entity.ent_id != target.ent_id and # but not self
target.is_player and
entity.pos == target.pos): # the same tile
target.is_player):
return

# NOTE: allow give within the visual range
if utils.linf_single(entity.pos, target.pos) > config.PLAYER_VISION_RADIUS:
return

if not target.inventory.space:
Expand Down Expand Up @@ -484,8 +487,11 @@ def call(realm, entity, amount, target):

if not (config.ITEM_ALLOW_GIFT and
entity.ent_id != target.ent_id and # but not self
target.is_player and
entity.pos == target.pos): # the same tile
target.is_player):
return

# NOTE: allow give within the visual range
if utils.linf_single(entity.pos, target.pos) > config.PLAYER_VISION_RADIUS:
return

if not isinstance(amount, int):
Expand Down
21 changes: 5 additions & 16 deletions nmmo/core/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,21 +95,7 @@ def validate(config):


class Config(Template):
'''An environment configuration object
Global constants are defined as static class variables. You can override
any Config variable using standard CLI syntax (e.g. --NENT=128).
The default config as of v1.5 uses 1024x1024 maps with up to 2048 agents
and 1024 NPCs. It is suitable to time horizons of 8192+ steps. For smaller
experiments, consider the SmallMaps config.
Notes:
We use Google Fire internally to replace standard manual argparse
definitions for each Config property. This means you can subclass
Config to add new static attributes -- CLI definitions will be
generated automatically.
'''
'''An environment configuration object'''
env_initialized = False

def __init__(self):
Expand Down Expand Up @@ -391,7 +377,7 @@ class Terrain:
TERRAIN_DISABLE_STONE = False
'''Disable stone (obstacle) tiles'''

TERRAIN_SCATTER_EXTRA_RESOURCES = False
TERRAIN_SCATTER_EXTRA_RESOURCES = True
'''Whether to scatter extra food, water on the map.
Only works when MAP_RESET_FROM_FRACTAL is True'''

Expand Down Expand Up @@ -588,6 +574,9 @@ class NPC:
NPC_LEVEL_DAMAGE = 8
'''Bonus NPC damage per level'''

NPC_LEVEL_MULTIPLIER = 1.0
'''Multiplier for NPC level damage and defense, for easier difficulty tuning'''

NPC_ALLOW_ATTACK_OTHER_NPCS = False
'''Whether NPCs can attack other NPCs'''

Expand Down
208 changes: 57 additions & 151 deletions nmmo/core/env.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,13 @@ class Env(ParallelEnv):
def __init__(self,
config: Default = nmmo.config.Default(),
seed = None):
'''Initializes the Neural MMO environment.
Args:
config (Default, optional): Configuration object for the environment.
Defaults to nmmo.config.Default().
seed (int, optional): Random seed for the environment. Defaults to None.
'''
self._np_random = None
self._np_seed = None
self._reset_required = True
Expand All @@ -45,7 +52,6 @@ def __init__(self,
self.possible_agents = self.config.POSSIBLE_AGENTS
self._alive_agents = None
self._current_agents = None
self._dead_agents = set()
self._dead_this_tick = None
self.scripted_agents = set()

Expand Down Expand Up @@ -123,12 +129,12 @@ def mask_box(length):
def observation_space(self, agent: AgentID):
'''Neural MMO Observation Space
Args:
agent: Agent ID
Returns:
observation: gym.spaces object contained the structured observation
for the specified agent.'''
Args:
agent (AgentID): The ID of the agent.
Returns:
gym.spaces.Dict: The observation space for the agent.
'''
return self._obs_space

# NOTE: make sure this runs once during trainer init and does NOT change afterwards
Expand Down Expand Up @@ -159,15 +165,12 @@ def _str_atn_map(self):
def action_space(self, agent: AgentID):
'''Neural MMO Action Space
Args:
agent: Agent ID
Returns:
actions: gym.spaces object contained the structured actions
for the specified agent. Each action is parameterized by a list
of discrete-valued arguments. These consist of both fixed, k-way
choices (such as movement direction) and selections from the
observation space (such as targeting)'''
Args:
agent (AgentID): The ID of the agent.
Returns:
gym.spaces.Dict: The action space for the agent.
'''
return self._atn_space

############################################################################
Expand All @@ -177,27 +180,20 @@ def reset(self, seed=None, options=None, # PettingZoo API args
map_id=None,
make_task_fn: Callable=None,
game: game_api.Game=None):
'''OpenAI Gym API reset function
Loads a new game map and returns initial observations
Args:
map_id: Map index to load. Selects a random map by default
seed: random seed to use
make_task_fn: A function to make tasks
game: A game object
Returns:
observations, as documented by _compute_observations()
Notes:
Neural MMO simulates a persistent world. Ideally, you should reset
the environment only once, upon creation. In practice, this approach
limits the number of parallel environment simulations to the number
of CPU cores available. At small and medium hardware scale, we
therefore recommend the standard approach of resetting after a long
but finite horizon: ~1000 timesteps for small maps and
5000+ timesteps for large maps
'''Resets the environment and returns the initial observations.
Args:
seed (int, optional): Random seed for the environment. Defaults to None.
options (dict, optional): Additional options for resetting the environment.
Defaults to None.
map_id (int, optional): The ID of the map to load. Defaults to None.
make_task_fn (callable, optional): Function to create tasks. Defaults to None.
game (Game, optional): The game to be played. Defaults to None.
Returns:
tuple: A tuple containing:
- obs (dict): Dictionary mapping agent IDs to their initial observations.
- info (dict): Dictionary containing additional information.
'''
# If options are provided, override the kwargs
if options is not None:
Expand All @@ -219,7 +215,7 @@ def reset(self, seed=None, options=None, # PettingZoo API args
self.game = game
self.game.reset(self._np_random, map_dict)
self.tasks = self.game.tasks
elif self.curriculum_file_path is not None:
elif self.curriculum_file_path is not None or self.game_packs is not None:
# Assume training -- pick a random game from the game packs
self.game = self.default_game
if self.game_packs:
Expand All @@ -240,7 +236,6 @@ def reset(self, seed=None, options=None, # PettingZoo API args

# Reset the agent vars
self._alive_agents = self.possible_agents
self._dead_agents.clear()
self._dead_this_tick = {}
self._map_task_to_agent()
self._current_agents = self.possible_agents # tracking alive + dead_this_tick
Expand Down Expand Up @@ -298,7 +293,7 @@ def _load_map_file(self, map_id: int=None):
def _map_task_to_agent(self):
self.agent_task_map.clear()
for agent_id in self.agents:
self.realm.players[agent_id].my_tasks = None
self.realm.players[agent_id].my_task = None
for task in self.tasks:
if task.embedding is None:
task.set_embedding(self._dummy_task_embedding)
Expand All @@ -316,96 +311,20 @@ def _map_task_to_agent(self):
self.realm.players[agent_id].my_task = agent_tasks[0]

def step(self, actions: Dict[int, Dict[str, Dict[str, Any]]]):
'''Simulates one game tick or timestep
Args:
actions: A dictionary of agent decisions of format::
{
agent_1: {
action_1: [arg_1, arg_2],
action_2: [...],
...
},
agent_2: {
...
},
...
}
Where agent_i is the integer index of the i\'th agent
The environment only evaluates provided actions for provided
gents. Unprovided action types are interpreted as no-ops and
illegal actions are ignored
It is also possible to specify invalid combinations of valid
actions, such as two movements or two attacks. In this case,
one will be selected arbitrarily from each incompatible sets.
A well-formed algorithm should do none of the above. We only
Perform this conditional processing to make batched action
computation easier.
Returns:
(dict, dict, dict, None):
observations:
A dictionary of agent observations of format::
{
agent_1: obs_1,
agent_2: obs_2,
...
}
Where agent_i is the integer index of the i\'th agent and
obs_i is specified by the observation_space function.
rewards:
A dictionary of agent rewards of format::
{
agent_1: reward_1,
agent_2: reward_2,
...
}
Where agent_i is the integer index of the i\'th agent and
reward_i is the reward of the i\'th' agent.
By default, agents receive -1 reward for dying and 0 reward for
all other circumstances. Override Env.reward to specify
custom reward functions
dones:
A dictionary of agent done booleans of format::
{
agent_1: done_1,
agent_2: done_2,
...
}
Where agent_i is the integer index of the i\'th agent and
done_i is a boolean denoting whether the i\'th agent has died.
Note that obs_i will be a garbage placeholder if done_i is true.
This is provided only for conformity with PettingZoo. Your
algorithm should not attempt to leverage observations outside of
trajectory bounds. You can omit garbage obs_i values by setting
omitDead=True.
infos:
A dictionary of agent infos of format:
{
agent_1: None,
agent_2: None,
...
}
Provided for conformity with PettingZoo
'''Performs one step in the environment given the provided actions.
Args:
actions (dict): Dictionary mapping agent IDs to their actions.
Returns:
tuple: A tuple containing:
- obs (dict): Dictionary mapping agent IDs to their new observations.
- rewards (dict): Dictionary mapping agent IDs to their rewards.
- terminated (dict): Dictionary mapping agent IDs to whether they reached
a terminal state.
- truncated (dict): Dictionary mapping agent IDs to whether the episode was
truncated (e.g. reached maximum number of steps).
- infos (dict): Dictionary containing additional information.
'''
assert not self._reset_required, 'step() called before reset'
# Add in scripted agents' actions, if any
Expand All @@ -421,27 +340,28 @@ def step(self, actions: Dict[int, Dict[str, Dict[str, Any]]]):
self._current_agents = list(set(self._alive_agents + list(self._dead_this_tick.keys())))

terminated = {}
truncated = {}
for agent_id in self._current_agents:
if agent_id in self._dead_this_tick:
self._dead_agents.add(agent_id)
# NOTE: Even though players can be resurrected, the time of death must be marked.
terminated[agent_id] = True
else:
terminated[agent_id] = False

if self.realm.tick >= self.config.HORIZON:
truncated[agent_id] = agent_id not in self._dead_agents
else:
truncated[agent_id] = False

if self.realm.tick >= self.config.HORIZON:
self._alive_agents = [] # pettingzoo requires agents to be empty

# Update the game stats, determine winners, etc.
# Also, resurrect dead agents and/or spawn new npcs if the game allows it
self.game.update(terminated, self._dead_this_tick, dead_npcs)

# Some games do additional player cull during update(), so process truncated here
truncated = {}
for agent_id in self._current_agents:
if self.realm.tick >= self.config.HORIZON:
truncated[agent_id] = agent_id in self.realm.players
else:
truncated[agent_id] = False

# Store the observations, since actions reference them
self._compute_observations()
gym_obs = {a: self.obs[a].to_gym() for a in self._current_agents}
Expand Down Expand Up @@ -557,20 +477,6 @@ def _update_comm_obs(self):
self._comm_obs[eid] = team_obs

def _compute_rewards(self):
'''Computes the reward for the specified agent
Override this method to create custom reward functions. You have full
access to the environment state via self.realm. Our baselines do not
modify this method; specify any changes when comparing to baselines
Args:
player: player object
Returns:
reward:
The reward for the actions on the previous timestep of the
entity identified by ent_id.
'''
# Initialization
agents = set(self._current_agents)
infos = {agent_id: {'task': {}} for agent_id in agents}
Expand Down
Loading

0 comments on commit a8ff404

Please sign in to comment.