From e9887a0029a7d231bb44c2434d80720418420d47 Mon Sep 17 00:00:00 2001
From: Ryan Sullivan <ryanpnavillus@gmail.com>
Date: Tue, 16 Apr 2024 21:22:37 -0400
Subject: [PATCH 1/5] Add Syllabus code

---
 reinforcement_learning/environment.py |  26 +-
 syllabus_task_wrapper.py              | 437 ++++++++++++++++++++++++++
 train.py                              |  95 ++++--
 3 files changed, 531 insertions(+), 27 deletions(-)
 create mode 100644 syllabus_task_wrapper.py

diff --git a/reinforcement_learning/environment.py b/reinforcement_learning/environment.py
index 5426cbc9..9f1e1575 100644
--- a/reinforcement_learning/environment.py
+++ b/reinforcement_learning/environment.py
@@ -1,18 +1,19 @@
 from argparse import Namespace
 
+import nmmo
+import nmmo.core.config as nc
+import nmmo.core.game_api as ng
 import pufferlib
 import pufferlib.emulation
-
 from pettingzoo.utils.wrappers.base_parallel import BaseParallelWrapper
-
-import nmmo 
-import nmmo.core.config as nc
-import nmmo.core.game_api as ng
+from syllabus.core import PettingZooMultiProcessingSyncWrapper
+from syllabus_task_wrapper import NMMOTaskWrapper
 
 
 def alt_combat_damage_formula(offense, defense, multiplier, minimum_proportion):
     return int(max(multiplier * offense - defense, offense * minimum_proportion))
 
+
 class Config(nc.Medium, nc.Terrain, nc.Resource, nc.Combat, nc.NPC, nc.Progression,
              nc.Item, nc.Equipment, nc.Profession, nc.Exchange):
     '''Configuration for Neural MMO.'''
@@ -63,11 +64,24 @@ def __init__(self, env_args: Namespace):
         self.set("EQUIPMENT_ARMOR_LEVEL_DEFENSE", 3)  # from 10
 
 
-def make_env_creator(reward_wrapper_cls: BaseParallelWrapper):
+def make_env_creator(reward_wrapper_cls: BaseParallelWrapper, task_wrapper=False, curriculum=None):
     def env_creator(*args, **kwargs):
         """Create an environment."""
         env = nmmo.Env(Config(kwargs['env']))  # args.env is provided as kwargs
+        # TODO: make nmmo conform to the newer PettingZoo API and remove below line
         env = reward_wrapper_cls(env, **kwargs['reward_wrapper'])
+
+        # Add Syllabus task wrapper
+        if task_wrapper or curriculum is not None:
+            env = NMMOTaskWrapper(env)
+
+        # Use curriculum if provided
+        if curriculum is not None:
+            # Add Syllabus Sync Wrapper
+            env = PettingZooMultiProcessingSyncWrapper(
+                env, curriculum.get_components(), update_on_step=False, task_space=env.task_space,
+            )
+
         env = pufferlib.emulation.PettingZooPufferEnv(env)
         return env
     return env_creator
diff --git a/syllabus_task_wrapper.py b/syllabus_task_wrapper.py
new file mode 100644
index 00000000..18430e63
--- /dev/null
+++ b/syllabus_task_wrapper.py
@@ -0,0 +1,437 @@
+""" Task wrapper for NMMO. """
+import gym
+import numpy as np
+from nmmo.core import realm
+from nmmo.core.agent import Agent
+from nmmo.core.tile import Tile
+from nmmo.entity.entity import Entity
+from nmmo.lib.material import Harvestable, Material
+from nmmo.systems import skill as nmmo_skill
+from nmmo.systems.item import Item
+from nmmo.systems.skill import Skill
+from nmmo.task import base_predicates as bp
+from nmmo.task import constraint
+from nmmo.task import constraint as c
+from nmmo.task import task_api, task_spec
+from nmmo.task.base_predicates import AllDead, StayAlive
+from nmmo.task.game_state import GameState
+from nmmo.task.group import Group
+from nmmo.task.task_api import OngoingTask, Task, make_predicate, make_same_task
+from syllabus.core.task_interface import PettingZooTaskWrapper
+from syllabus.task_space import TaskSpace
+
+import nmmo
+from nmmo.lib import utils
+from sample_tasks import tasks
+
+
+class NMMOTaskWrapper(PettingZooTaskWrapper):
+    """
+    Wrapper to handle tasks for the Neural MMO environment.
+    """
+    # task_space = TaskSpace((18, 200), [tuple(np.arange(18)), tuple(np.arange(200))])
+    task_space = TaskSpace(200)
+
+    # task_space = TaskSpace((2719, 200), [tuple(np.arange(2719)), tuple(np.arange(200))])
+
+    def __init__(self, env: gym.Env):
+        super().__init__(env)
+        self.env = env
+
+        self.task_list = self.sequential_task_list()
+        # self.task_list, task_names = self.create_manual_task_list()
+        # self.task_list = self._reformat_tasks(self.task_list)
+        self.task_space = NMMOTaskWrapper.task_space
+        self.task = None
+        self._task_index = None
+        self.task_fn = None
+
+    def _parse_tasks(self):
+        """
+        Parse LLM generated tasks from a python list of strings.
+        Strings are stored in tasks (imported from sample_tasks.py).
+        Returns a list of task functions.
+        """
+        # Check if tasks are already parsed
+        # TODO: Find a way to efficiently compare current task list to parsed task list and overwrite if different
+        globs = globals()
+        if 'parsed_tasks' in globs:
+            parsed_tasks = [globs[task] for task in globs['parsed_tasks']]
+            return parsed_tasks
+
+        parsed_predicates = []
+        previous_globals = globs.copy()
+        for task in tasks:
+            # Check for common errors in LLM defined tasks
+            # NOTE: If the LLM decides to produce dangerous code, it will be executed
+            # DO NOT let users affect the input to the LLM.
+            try:
+                exec(task.strip(), globs)
+            except NameError as e:
+                print(f"\nFailed to parse task: {repr(e)}\n", task.strip(), "\n")
+            except SyntaxError as e:
+                print(f"\nFailed to parse task: {repr(e)}\n", task.strip(), "\n")
+
+        for name, obj in globals().items():
+            if name not in previous_globals:
+                parsed_predicates.append(obj)
+
+        globals()['parsed_tasks'] = parsed_tasks
+        return parsed_tasks
+
+    def reset(self, **kwargs):
+        seed = kwargs.pop("seed", None)
+        new_task = kwargs.pop("new_task", None)
+        if new_task is not None:
+            self.change_task(new_task)
+            task = new_task
+            self.task = new_task
+            new_task_specs = self.task_list[task]
+            print(new_task_specs.eval_fn)
+            self.task_fn = task_spec.make_task_from_spec(
+                self.env.possible_agents, [new_task_specs] * len(self.env.possible_agents)
+            )
+            if seed is not None:
+                self.env.seed(int(seed))
+        if seed is not None:
+            obs, info = self.env.reset(seed=int(seed), make_task_fn=(lambda: self.task_fn) if self.task_fn is not None else None, **kwargs)
+        else:
+            obs, info = self.env.reset(make_task_fn=(lambda: self.task_fn) if self.task_fn is not None else None, **kwargs)
+
+        return self.observation(obs), info
+
+    def change_task(self, new_task):
+        pass
+
+    def step(self, action):
+        obs, rew, terms, truncs, info = self.env.step(action)
+        # obs[1]["Task"] = self._task_index
+        return self.observation(obs), rew, terms, truncs, info
+
+    def action_space(self, agent):
+        """Implement Neural MMO's action_space method."""
+        return self.env.action_space(agent)
+
+    def create_original_task_list(self):
+        return [('agent', StayAlive, {'task_cls': OngoingTask})]
+
+    def sequential_task_list(self):
+        # Stage 1 - Survival
+        stage1 = []
+        stage1.append(task_spec.TaskSpec(bp.TickGE, {'num_tick': 500}, reward_to='agent'))
+        stage1.append(task_spec.TaskSpec(bp.CountEvent, {'event': "EAT_FOOD", 'N': 20}, reward_to='agent'))
+        stage1.append(task_spec.TaskSpec(bp.CountEvent, {'event': "DRINK_WATER", 'N': 20}, reward_to='agent'))
+        stage1.append(task_spec.TaskSpec(bp.CountEvent, {'event': "GO_FARTHEST", 'N': 20}, reward_to='agent'))
+
+        # Stage 2 - Harvest Equiptment
+        stage2 = []
+        stage2.append(task_spec.TaskSpec(bp.HarvestItem, {'item': c.ammunition, 'level': 1, 'quantity': 20}, reward_to='agent'))
+        stage2.append(task_spec.TaskSpec(bp.HarvestItem, {'item': c.weapons, 'level': 1, 'quantity': 20}, reward_to='agent'))
+
+        # # Stage 3 - Equip Weapons
+        stage3 = []
+        stage3.append(task_spec.TaskSpec(bp.EquipItem, {'item': c.weapons, 'level': 1, 'num_agent': 1}, reward_to='agent'))
+        # stage3.append(task_spec.TaskSpec(bp.EquipItem, {'item': c.ammunition, 'level': 1, 'num_agent': 1}, reward_to='agent'))
+        stage3.append(task_spec.TaskSpec(bp.EquipItem, {'item': c.weapons, 'level': 1, 'num_agent': 8}, reward_to='agent'))
+        # stage3.append(task_spec.TaskSpec(bp.EquipItem, {'item': c.ammunition, 'level': 1, 'num_agent': 8}, reward_to='agent'))
+
+        # # Stage 4 - Fight
+        stage4 = []
+        stage4.append(task_spec.TaskSpec(bp.CanSeeGroup, {'target': 'all_foes'}, reward_to='agent'))
+        stage4.append(task_spec.TaskSpec(bp.CountEvent, {'event': "SCORE_HIT", 'N': 20}, reward_to='agent'))
+
+        # # Stage 5 - Kill
+        stage5 = []
+        stage5.append(task_spec.TaskSpec(bp.DefeatEntity, {'agent_type': 'player', 'level': 1, 'num_agent': 1}, reward_to='agent'))
+
+        return stage1 + stage2 + stage3 + stage4 + stage5
+
+    def create_manual_task_list(self):
+        STAY_ALIVE_GOAL = [50, 100, 150, 200, 300, 500]
+        # AGENT_NUM_GOAL = [1]    # competition team size: 8
+        task_specs = []
+        task_names = []
+
+        # Find resource tiles
+        for resource in Harvestable:
+            for reward_to in ['agent']:
+                spec = task_spec.TaskSpec(bp.CanSeeTile, {'tile_type': resource}, reward_to=reward_to)
+                task_specs.append(spec)
+                # task_names.append("see_" + resource.name)
+
+        # Stay alive
+        for reward_to in ['agent']:
+            for num_tick in STAY_ALIVE_GOAL:
+                spec = task_spec.TaskSpec(bp.TickGE, {'num_tick': num_tick}, reward_to=reward_to)
+                task_specs.append(spec)
+                # task_names.append("stay_alive_" + str(num_tick))
+
+        # Explore the map
+        for dist in [10, 20, 30, 50, 100]:      # each agent
+            spec = task_spec.TaskSpec(bp.DistanceTraveled, {'dist': dist}, reward_to=reward_to)
+            task_specs.append(spec)
+            # task_names.append("explore_" + str(dist) + "m")
+
+        return task_specs, task_names
+
+    def _create_testing_task_list(self):
+        """
+        Manually generate a list of tasks used for testing.
+        """
+        EVENT_NUMBER_GOAL = [1, 2, 3, 4, 5, 7, 9, 12, 15, 20, 30, 50]
+        INFREQUENT_GOAL = list(range(1, 10))
+        STAY_ALIVE_GOAL = [50, 100, 150, 200, 300, 500]
+        TEAM_NUMBER_GOAL = [10, 20, 30, 50, 70, 100]
+        LEVEL_GOAL = list(range(1, 10))     # TODO: get config
+        AGENT_NUM_GOAL = [1]    # competition team size: 8
+        ITEM_NUM_GOAL = AGENT_NUM_GOAL
+        TEAM_ITEM_GOAL = [1, 3, 5, 7, 10, 15, 20]
+        SKILLS = c.combat_skills + c.harvest_skills
+        COMBAT_STYLE = c.combat_skills
+        ALL_ITEM = c.armour + c.weapons + c.tools + c.ammunition + c.consumables
+        EQUIP_ITEM = c.armour + c.weapons + c.tools + c.ammunition
+        HARVEST_ITEM = c.weapons + c.ammunition + c.consumables
+
+        """ task_specs is a list of tuple (reward_to, predicate class, kwargs)
+
+            each tuple in the task_specswill create tasks for a team in teams
+
+            reward_to: must be in ['team', 'agent']
+            * 'team' create a single team task, in which all team members get rewarded
+            * 'agent' create a task for each agent, in which only the agent gets rewarded
+
+            predicate class from the base predicates or custom predicates like above
+
+            kwargs are the additional args that go into predicate. There are also special keys
+            * 'target' must be ['left_team', 'right_team', 'left_team_leader', 'right_team_leader']
+                these str will be translated into the actual agent ids
+            * 'task_cls' is optional. If not provided, the standard Task is used. """
+        task_specs = []
+
+        # explore, eat, drink, attack any agent, harvest any item, level up any skill
+        #   which can happen frequently
+        essential_skills = ['GO_FARTHEST', 'EAT_FOOD', 'DRINK_WATER',
+                            'SCORE_HIT', 'HARVEST_ITEM', 'LEVEL_UP']
+        for event_code in essential_skills:
+            task_specs += [('agent', bp.CountEvent, {'event': event_code, 'N': cnt})
+                           for cnt in EVENT_NUMBER_GOAL]
+
+        # item/market skills, which happen less frequently or should not do too much
+        item_skills = ['CONSUME_ITEM', 'GIVE_ITEM', 'DESTROY_ITEM', 'EQUIP_ITEM',
+                       'GIVE_GOLD', 'LIST_ITEM', 'EARN_GOLD', 'BUY_ITEM']
+        for event_code in item_skills:
+            task_specs += [('agent', bp.CountEvent, {'event': event_code, 'N': cnt})
+                           for cnt in INFREQUENT_GOAL]  # less than 10
+
+        # find resource tiles
+        for resource in Harvestable:
+            for reward_to in ['agent', 'team']:
+                task_specs.append((reward_to, bp.CanSeeTile, {'tile_type': resource}))
+
+        # stay alive ... like ... for 300 ticks
+        # i.e., getting incremental reward for each tick alive as an individual or a team
+        for reward_to in ['agent', 'team']:
+            for num_tick in STAY_ALIVE_GOAL:
+                task_specs.append((reward_to, bp.TickGE, {'num_tick': num_tick}))
+
+        # protect the leader: get reward for each tick the leader is alive
+        task_specs.append(('team', bp.StayAlive, {'target': 'my_team_leader', 'task_cls': OngoingTask}))
+
+        # want the other team or team leader to die
+        for target in ['left_team', 'left_team_leader', 'right_team', 'right_team_leader']:
+            task_specs.append(('team', bp.AllDead, {'target': target}))
+
+        # occupy the center tile, assuming the Medium map size
+        # TODO: it'd be better to have some intermediate targets toward the center
+        for reward_to in ['agent', 'team']:
+            task_specs.append((reward_to, bp.OccupyTile, {'row': 80, 'col': 80}))   # TODO: get config
+
+        # form a tight formation, for a certain number of ticks
+        def PracticeFormation(gs, subject, dist, num_tick):
+            return bp.AllMembersWithinRange(gs, subject, dist) * bp.TickGE(gs, subject, num_tick)
+
+        for dist in [1, 3, 5, 10]:
+            task_specs += [('team', PracticeFormation, {'dist': dist, 'num_tick': num_tick})
+                           for num_tick in STAY_ALIVE_GOAL]
+
+        # find the other team leader
+        for reward_to in ['agent', 'team']:
+            for target in ['left_team_leader', 'right_team_leader']:
+                task_specs.append((reward_to, bp.CanSeeAgent, {'target': target}))
+
+        # find the other team (any agent)
+        for reward_to in ['agent']:  # , 'team']:
+            for target in ['left_team', 'right_team']:
+                task_specs.append((reward_to, bp.CanSeeGroup, {'target': target}))
+
+        # explore the map -- sum the l-inf distance traveled by all subjects
+        for dist in [10, 20, 30, 50, 100]:  # each agent
+            task_specs.append(('agent', bp.DistanceTraveled, {'dist': dist}))
+        for dist in [30, 50, 70, 100, 150, 200, 300, 500]:  # summed over all team members
+            task_specs.append(('team', bp.DistanceTraveled, {'dist': dist}))
+
+        # level up a skill
+        for skill in SKILLS:
+            for level in LEVEL_GOAL:
+                # since this is an agent task, num_agent must be 1
+                task_specs.append(('agent', bp.AttainSkill, {'skill': skill, 'level': level, 'num_agent': 1}))
+
+        # make attain skill a team task by varying the number of agents
+        for skill in SKILLS:
+            for level in LEVEL_GOAL:
+                for num_agent in AGENT_NUM_GOAL:
+                    if level + num_agent <= 6 or num_agent == 1:    # heuristic prune
+                        task_specs.append(('team', bp.AttainSkill,
+                                          {'skill': skill, 'level': level, 'num_agent': num_agent}))
+
+        # practice specific combat style
+        for style in COMBAT_STYLE:
+            for cnt in EVENT_NUMBER_GOAL:
+                task_specs.append(('agent', bp.ScoreHit, {'combat_style': style, 'N': cnt}))
+            for cnt in TEAM_NUMBER_GOAL:
+                task_specs.append(('team', bp.ScoreHit, {'combat_style': style, 'N': cnt}))
+
+        # defeat agents of a certain level as a team
+        for agent_type in ['player', 'npc']:    # c.AGENT_TYPE_CONSTRAINT
+            for level in LEVEL_GOAL:
+                for num_agent in AGENT_NUM_GOAL:
+                    if level + num_agent <= 6 or num_agent == 1:    # heuristic prune
+                        task_specs.append(('team', bp.DefeatEntity,
+                                         {'agent_type': agent_type, 'level': level, 'num_agent': num_agent}))
+
+        # hoarding gold -- evaluated on the current gold
+        for amount in EVENT_NUMBER_GOAL:
+            task_specs.append(('agent', bp.HoardGold, {'amount': amount}))
+        for amount in TEAM_NUMBER_GOAL:
+            task_specs.append(('team', bp.HoardGold, {'amount': amount}))
+
+        # earning gold -- evaluated on the total gold earned by selling items
+        # does NOT include looted gold
+        for amount in EVENT_NUMBER_GOAL:
+            task_specs.append(('agent', bp.EarnGold, {'amount': amount}))
+        for amount in TEAM_NUMBER_GOAL:
+            task_specs.append(('team', bp.EarnGold, {'amount': amount}))
+
+        # spending gold, by buying items
+        for amount in EVENT_NUMBER_GOAL:
+            task_specs.append(('agent', bp.SpendGold, {'amount': amount}))
+        for amount in TEAM_NUMBER_GOAL:
+            task_specs.append(('team', bp.SpendGold, {'amount': amount}))
+
+        # making profits by trading -- only buying and selling are counted
+        for amount in EVENT_NUMBER_GOAL:
+            task_specs.append(('agent', bp.MakeProfit, {'amount': amount}))
+        for amount in TEAM_NUMBER_GOAL:
+            task_specs.append(('team', bp.MakeProfit, {'amount': amount}))
+
+        # managing inventory space
+        def PracticeInventoryManagement(gs, subject, space, num_tick):
+            return bp.InventorySpaceGE(gs, subject, space) * bp.TickGE(gs, subject, num_tick)
+        for space in [2, 4, 8]:
+            task_specs += [('agent', PracticeInventoryManagement, {'space': space, 'num_tick': num_tick})
+                           for num_tick in STAY_ALIVE_GOAL]
+
+        # own item, evaluated on the current inventory
+        for item in ALL_ITEM:
+            for level in LEVEL_GOAL:
+                # agent task
+                for quantity in ITEM_NUM_GOAL:
+                    if level + quantity <= 6 or quantity == 1:  # heuristic prune
+                        task_specs.append(('agent', bp.OwnItem,
+                                          {'item': item, 'level': level, 'quantity': quantity}))
+
+                # team task
+                for quantity in TEAM_ITEM_GOAL:
+                    if level + quantity <= 10 or quantity == 1:     # heuristic prune
+                        task_specs.append(('team', bp.OwnItem,
+                                          {'item': item, 'level': level, 'quantity': quantity}))
+
+        # equip item, evaluated on the current inventory and equipment status
+        for item in EQUIP_ITEM:
+            for level in LEVEL_GOAL:
+                # agent task
+                task_specs.append(('agent', bp.EquipItem,
+                                  {'item': item, 'level': level, 'num_agent': 1}))
+
+                # team task
+                for num_agent in AGENT_NUM_GOAL:
+                    if level + num_agent <= 6 or num_agent == 1:    # heuristic prune
+                        task_specs.append(('team', bp.EquipItem,
+                                          {'item': item, 'level': level, 'num_agent': num_agent}))
+
+        # consume items (ration, potion), evaluated based on the event log
+        for item in c.consumables:
+            for level in LEVEL_GOAL:
+                # agent task
+                for quantity in ITEM_NUM_GOAL:
+                    if level + quantity <= 6 or quantity == 1:  # heuristic prune
+                        task_specs.append(('agent', bp.ConsumeItem,
+                                          {'item': item, 'level': level, 'quantity': quantity}))
+
+                # team task
+                for quantity in TEAM_ITEM_GOAL:
+                    if level + quantity <= 10 or quantity == 1:     # heuristic prune
+                        task_specs.append(('team', bp.ConsumeItem,
+                                          {'item': item, 'level': level, 'quantity': quantity}))
+
+        # harvest items, evaluated based on the event log
+        for item in HARVEST_ITEM:
+            for level in LEVEL_GOAL:
+                # agent task
+                for quantity in ITEM_NUM_GOAL:
+                    if level + quantity <= 6 or quantity == 1:  # heuristic prune
+                        task_specs.append(('agent', bp.HarvestItem,
+                                          {'item': item, 'level': level, 'quantity': quantity}))
+
+                # team task
+                for quantity in TEAM_ITEM_GOAL:
+                    if level + quantity <= 10 or quantity == 1:     # heuristic prune
+                        task_specs.append(('team', bp.HarvestItem,
+                                          {'item': item, 'level': level, 'quantity': quantity}))
+
+        # list items, evaluated based on the event log
+        for item in ALL_ITEM:
+            for level in LEVEL_GOAL:
+                # agent task
+                for quantity in ITEM_NUM_GOAL:
+                    if level + quantity <= 6 or quantity == 1:  # heuristic prune
+                        task_specs.append(('agent', bp.ListItem,
+                                          {'item': item, 'level': level, 'quantity': quantity}))
+
+                # team task
+                for quantity in TEAM_ITEM_GOAL:
+                    if level + quantity <= 10 or quantity == 1:     # heuristic prune
+                        task_specs.append(('team', bp.ListItem,
+                                          {'item': item, 'level': level, 'quantity': quantity}))
+
+        # buy items, evaluated based on the event log
+        for item in ALL_ITEM:
+            for level in LEVEL_GOAL:
+                # agent task
+                for quantity in ITEM_NUM_GOAL:
+                    if level + quantity <= 6 or quantity == 1:  # heuristic prune
+                        task_specs.append(('agent', bp.BuyItem,
+                                          {'item': item, 'level': level, 'quantity': quantity}))
+
+                # team task
+                for quantity in TEAM_ITEM_GOAL:
+                    if level + quantity <= 10 or quantity == 1:     # heuristic prune
+                        task_specs.append(('team', bp.BuyItem,
+                                          {'item': item, 'level': level, 'quantity': quantity}))
+
+        # fully armed, evaluated based on the current player/inventory status
+        for style in COMBAT_STYLE:
+            for level in LEVEL_GOAL:
+                for num_agent in AGENT_NUM_GOAL:
+                    if level + num_agent <= 6 or num_agent == 1:    # heuristic prune
+                        task_specs.append(('team', bp.FullyArmed,
+                                          {'combat_style': style, 'level': level, 'num_agent': num_agent}))
+
+        packaged_task_specs = []
+        for spec in task_specs:
+            reward_to = spec[0]
+            eval_fn = spec[1]
+            eval_fn_kwargs = spec[2]
+            packaged_task_specs.append(task_spec.TaskSpec(eval_fn, eval_fn_kwargs, reward_to=reward_to))
+
+        return packaged_task_specs
diff --git a/train.py b/train.py
index 7ffe135c..f6aab00d 100644
--- a/train.py
+++ b/train.py
@@ -1,17 +1,19 @@
-from pdb import set_trace as T
-import importlib
 import argparse
+import importlib
 import inspect
 import logging
-import yaml
-import time
 import sys
+import time
 
 import pufferlib
 import pufferlib.utils
+import yaml
+from syllabus.core import MultiagentSharedCurriculumWrapper, make_multiprocessing_curriculum
+from syllabus.curricula import SequentialCurriculum
 
 from reinforcement_learning import environment
-from train_helper import init_wandb, train, sweep, generate_replay
+from syllabus_task_wrapper import NMMOTaskWrapper
+from train_helper import generate_replay, init_wandb, sweep, train
 
 DEBUG = False
 # See curriculum_generation/manual_curriculum.py for details
@@ -35,6 +37,7 @@ def load_from_config(agent, debug=False):
 
     return pufferlib.namespace(**combined_config)
 
+
 def get_init_args(fn):
     if fn is None:
         return {}
@@ -53,15 +56,8 @@ def get_init_args(fn):
             args[name] = param.default if param.default is not inspect.Parameter.empty else None
     return args
 
-# Return env_creator, agent_creator
-def setup_agent(module_name):
-    try:
-        agent_module = importlib.import_module(f'agent_zoo.{module_name}')
-    except ModuleNotFoundError:
-        raise ValueError(f'Agent module {module_name} not found under the agent_zoo directory.')
-
-    env_creator = environment.make_env_creator(reward_wrapper_cls=agent_module.RewardWrapper)
 
+def setup_agent(agent_module):
     def agent_creator(env, args):
         policy = agent_module.Policy(env, **args.policy)
         if not args.no_recurrence and agent_module.Recurrent is not None:
@@ -70,14 +66,8 @@ def agent_creator(env, args):
         else:
             policy = pufferlib.frameworks.cleanrl.Policy(policy)
         return policy.to(args.train.device)
+    return agent_creator
 
-    init_args = {
-        'policy': get_init_args(agent_module.Policy.__init__),
-        'recurrent': get_init_args(agent_module.Recurrent.__init__),
-        'reward_wrapper': get_init_args(agent_module.RewardWrapper.__init__),
-    }
-
-    return agent_module, env_creator, agent_creator, init_args
 
 def combine_config_args(parser, args, config):
     clean_parser = argparse.ArgumentParser(parents=[parser])
@@ -104,6 +94,7 @@ def combine_config_args(parser, args, config):
     clean_parser.parse_args(sys.argv[1:])
     return args
 
+
 def update_args(args, mode=None):
     args = pufferlib.namespace(**args)
 
@@ -150,6 +141,34 @@ def update_args(args, mode=None):
 
     return args
 
+
+def create_sequential_curriculum(task_space):
+    curricula = []
+    stopping = []
+
+    # Stage 1 - Survival
+    stage1 = [0, 1, 2, 3]
+    stopping.append("episode_return>=0.9&episodes>=5000")
+
+    # # Stage 2 - Harvest Equiptment
+    stage2 = [4, 5]
+    stopping.append("episode_return>=0.9&episodes>=5000")
+
+    # # Stage 3 - Equip Weapons
+    stage3 = [6, 7]
+    stopping.append("episode_return>=0.9&episodes>=5000")
+
+    # # Stage 4 - Fight
+    stage4 = [8, 9]
+    stopping.append("episode_return>=0.9&episodes>=5000")
+
+    # # Stage 5 - Kill
+    stage5 = [10]
+
+    curricula = [stage1, stage2, stage3, stage4, stage5]
+    return SequentialCurriculum(curricula, stopping, task_space, return_buffer_size=5000)
+
+
 if __name__ == '__main__':
     logging.basicConfig(level=logging.INFO)
     parser = argparse.ArgumentParser(description='Parse environment argument', add_help=False)
@@ -160,6 +179,8 @@ def update_args(args, mode=None):
     parser.add_argument('-c', '--curriculum', type=str, default=BASELINE_CURRICULUM, help='Path to curriculum file')
     parser.add_argument('-t', '--task-to-assign', type=int, default=None,
                         help='The index of the task to assign in the curriculum file')
+    parser.add_argument('--test-curriculum', type=str, default=BASELINE_CURRICULUM, help='Path to curriculum file')
+    parser.add_argument('--syllabus', type=bool, default=False, help='Use Syllabus for curriculum')
     #parser.add_argument('--baseline', action='store_true', help='Baseline run')
     parser.add_argument('--vectorization', type=str, default='multiprocessing', choices='serial multiprocessing ray'.split())
     parser.add_argument('--no-recurrence', action='store_true', help='Do not use recurrence')
@@ -172,7 +193,39 @@ def update_args(args, mode=None):
 
     args = parser.parse_known_args()[0].__dict__
     config = load_from_config(args['agent'], debug=args.get('debug', False))
-    agent_module, env_creator, agent_creator, init_args = setup_agent(args['agent'])
+
+    try:
+        agent_module = importlib.import_module(f'agent_zoo.{args["agent"]}')
+    except ModuleNotFoundError:
+        raise ValueError(f'Agent module {args["agent"]} not found under the agent_zoo directory.')
+
+    init_args = {
+        'policy': get_init_args(agent_module.Policy.__init__),
+        'recurrent': get_init_args(agent_module.Recurrent.__init__),
+        'reward_wrapper': get_init_args(agent_module.RewardWrapper.__init__),
+    }
+    sample_env_creator = environment.make_env_creator(reward_wrapper_cls=agent_module.RewardWrapper, task_wrapper=True)
+
+    # Set up curriculum
+    curriculum = None
+    if args.syllabus:
+        sample_env = sample_env_creator(env=args.env, postproc=args.postproc)
+        task_space = NMMOTaskWrapper.task_space
+        curriculum = create_sequential_curriculum(task_space)
+        curriculum = MultiagentSharedCurriculumWrapper(curriculum, sample_env.possible_agents)
+        curriculum = make_multiprocessing_curriculum(curriculum)
+    else:
+        args.env.curriculum_file_path = args.curriculum
+
+    env_creator = environment.make_env_creator(
+        postprocessor_cls=agent_module.Postprocessor, curriculum=curriculum
+    )
+    eval_env_creator = environment.make_eval_env_creator(
+        postprocessor_cls=agent_module.Postprocessor, stat_prefix="eval", curriculum=curriculum
+    )
+    eval_env_creator = None
+
+    agent_creator = setup_agent(agent_module)
 
     # Update config with environment defaults
     config.policy = {**init_args['policy'], **config.policy}

From 7ccce0b30c6d2481d2f4aa6be9cb2a3b9e64a6ec Mon Sep 17 00:00:00 2001
From: Ryan Sullivan <ryanpnavillus@gmail.com>
Date: Wed, 17 Apr 2024 01:39:33 -0400
Subject: [PATCH 2/5] Fix integration issues so code runs

---
 reinforcement_learning/stat_wrapper.py |  3 +++
 syllabus_task_wrapper.py               | 32 ++++++++++++-------------
 train.py                               | 33 +++++++++++---------------
 3 files changed, 32 insertions(+), 36 deletions(-)

diff --git a/reinforcement_learning/stat_wrapper.py b/reinforcement_learning/stat_wrapper.py
index 0ae4a475..3f8e066f 100644
--- a/reinforcement_learning/stat_wrapper.py
+++ b/reinforcement_learning/stat_wrapper.py
@@ -21,6 +21,9 @@ def __init__(
         self._reset_episode_stats()
         self._stat_prefix = stat_prefix
 
+    def seed(self, seed):
+        self.env.seed(seed)
+
     def observation(self, agent_id, agent_obs):
         '''Called before observations are returned from the environment
            Use this to define custom featurizers. Changing the space itself requires you to
diff --git a/syllabus_task_wrapper.py b/syllabus_task_wrapper.py
index 18430e63..2aa18e34 100644
--- a/syllabus_task_wrapper.py
+++ b/syllabus_task_wrapper.py
@@ -10,8 +10,8 @@
 from nmmo.systems.item import Item
 from nmmo.systems.skill import Skill
 from nmmo.task import base_predicates as bp
-from nmmo.task import constraint
-from nmmo.task import constraint as c
+from nmmo.systems import item as i
+from nmmo.entity import entity as e
 from nmmo.task import task_api, task_spec
 from nmmo.task.base_predicates import AllDead, StayAlive
 from nmmo.task.game_state import GameState
@@ -22,7 +22,6 @@
 
 import nmmo
 from nmmo.lib import utils
-from sample_tasks import tasks
 
 
 class NMMOTaskWrapper(PettingZooTaskWrapper):
@@ -87,7 +86,6 @@ def reset(self, **kwargs):
             task = new_task
             self.task = new_task
             new_task_specs = self.task_list[task]
-            print(new_task_specs.eval_fn)
             self.task_fn = task_spec.make_task_from_spec(
                 self.env.possible_agents, [new_task_specs] * len(self.env.possible_agents)
             )
@@ -125,15 +123,15 @@ def sequential_task_list(self):
 
         # Stage 2 - Harvest Equiptment
         stage2 = []
-        stage2.append(task_spec.TaskSpec(bp.HarvestItem, {'item': c.ammunition, 'level': 1, 'quantity': 20}, reward_to='agent'))
-        stage2.append(task_spec.TaskSpec(bp.HarvestItem, {'item': c.weapons, 'level': 1, 'quantity': 20}, reward_to='agent'))
+        stage2.append(task_spec.TaskSpec(bp.HarvestItem, {'item': i.Ammunition, 'level': 1, 'quantity': 20}, reward_to='agent'))
+        stage2.append(task_spec.TaskSpec(bp.HarvestItem, {'item': i.Weapon, 'level': 1, 'quantity': 20}, reward_to='agent'))
 
         # # Stage 3 - Equip Weapons
         stage3 = []
-        stage3.append(task_spec.TaskSpec(bp.EquipItem, {'item': c.weapons, 'level': 1, 'num_agent': 1}, reward_to='agent'))
-        # stage3.append(task_spec.TaskSpec(bp.EquipItem, {'item': c.ammunition, 'level': 1, 'num_agent': 1}, reward_to='agent'))
-        stage3.append(task_spec.TaskSpec(bp.EquipItem, {'item': c.weapons, 'level': 1, 'num_agent': 8}, reward_to='agent'))
-        # stage3.append(task_spec.TaskSpec(bp.EquipItem, {'item': c.ammunition, 'level': 1, 'num_agent': 8}, reward_to='agent'))
+        stage3.append(task_spec.TaskSpec(bp.EquipItem, {'item': i.Weapon, 'level': 1, 'num_agent': 1}, reward_to='agent'))
+        # stage3.append(task_spec.TaskSpec(bp.EquipItem, {'item': i.ammunition, 'level': 1, 'num_agent': 1}, reward_to='agent'))
+        stage3.append(task_spec.TaskSpec(bp.EquipItem, {'item': i.Weapon, 'level': 1, 'num_agent': 8}, reward_to='agent'))
+        # stage3.append(task_spec.TaskSpec(bp.EquipItem, {'item': i.ammunition, 'level': 1, 'num_agent': 8}, reward_to='agent'))
 
         # # Stage 4 - Fight
         stage4 = []
@@ -186,11 +184,11 @@ def _create_testing_task_list(self):
         AGENT_NUM_GOAL = [1]    # competition team size: 8
         ITEM_NUM_GOAL = AGENT_NUM_GOAL
         TEAM_ITEM_GOAL = [1, 3, 5, 7, 10, 15, 20]
-        SKILLS = c.combat_skills + c.harvest_skills
-        COMBAT_STYLE = c.combat_skills
-        ALL_ITEM = c.armour + c.weapons + c.tools + c.ammunition + c.consumables
-        EQUIP_ITEM = c.armour + c.weapons + c.tools + c.ammunition
-        HARVEST_ITEM = c.weapons + c.ammunition + c.consumables
+        SKILLS = e.combat_skills + e.harvest_skills
+        COMBAT_STYLE = e.combat_skills
+        ALL_ITEM = i.armour + i.weapons + i.tools + i.ammunition + i.consumables
+        EQUIP_ITEM = i.armour + i.weapons + i.tools + i.ammunition
+        HARVEST_ITEM = i.weapons + i.ammunition + i.consumables
 
         """ task_specs is a list of tuple (reward_to, predicate class, kwargs)
 
@@ -297,7 +295,7 @@ def PracticeFormation(gs, subject, dist, num_tick):
                 for num_agent in AGENT_NUM_GOAL:
                     if level + num_agent <= 6 or num_agent == 1:    # heuristic prune
                         task_specs.append(('team', bp.DefeatEntity,
-                                         {'agent_type': agent_type, 'level': level, 'num_agent': num_agent}))
+                                          {'agent_type': agent_type, 'level': level, 'num_agent': num_agent}))
 
         # hoarding gold -- evaluated on the current gold
         for amount in EVENT_NUMBER_GOAL:
@@ -360,7 +358,7 @@ def PracticeInventoryManagement(gs, subject, space, num_tick):
                                           {'item': item, 'level': level, 'num_agent': num_agent}))
 
         # consume items (ration, potion), evaluated based on the event log
-        for item in c.consumables:
+        for item in i.consumables:
             for level in LEVEL_GOAL:
                 # agent task
                 for quantity in ITEM_NUM_GOAL:
diff --git a/train.py b/train.py
index f6aab00d..3f5284dd 100644
--- a/train.py
+++ b/train.py
@@ -204,12 +204,24 @@ def create_sequential_curriculum(task_space):
         'recurrent': get_init_args(agent_module.Recurrent.__init__),
         'reward_wrapper': get_init_args(agent_module.RewardWrapper.__init__),
     }
+
+    # Update config with environment defaults
+    config.policy = {**init_args['policy'], **config.policy}
+    config.recurrent = {**init_args['recurrent'], **config.recurrent}
+    config.reward_wrapper = {**init_args['reward_wrapper'], **config.reward_wrapper}
+
+    # Generate argparse menu from config
+    args = combine_config_args(parser, args, config)
+
+    # Perform mode-specific updates
+    args = update_args(args, mode=args['mode'])
+
     sample_env_creator = environment.make_env_creator(reward_wrapper_cls=agent_module.RewardWrapper, task_wrapper=True)
 
     # Set up curriculum
     curriculum = None
     if args.syllabus:
-        sample_env = sample_env_creator(env=args.env, postproc=args.postproc)
+        sample_env = sample_env_creator(env=args.env, reward_wrapper=args.reward_wrapper)
         task_space = NMMOTaskWrapper.task_space
         curriculum = create_sequential_curriculum(task_space)
         curriculum = MultiagentSharedCurriculumWrapper(curriculum, sample_env.possible_agents)
@@ -217,27 +229,10 @@ def create_sequential_curriculum(task_space):
     else:
         args.env.curriculum_file_path = args.curriculum
 
-    env_creator = environment.make_env_creator(
-        postprocessor_cls=agent_module.Postprocessor, curriculum=curriculum
-    )
-    eval_env_creator = environment.make_eval_env_creator(
-        postprocessor_cls=agent_module.Postprocessor, stat_prefix="eval", curriculum=curriculum
-    )
-    eval_env_creator = None
+    env_creator = environment.make_env_creator(reward_wrapper_cls=agent_module.RewardWrapper, curriculum=curriculum)
 
     agent_creator = setup_agent(agent_module)
 
-    # Update config with environment defaults
-    config.policy = {**init_args['policy'], **config.policy}
-    config.recurrent = {**init_args['recurrent'], **config.recurrent}
-    config.reward_wrapper = {**init_args['reward_wrapper'], **config.reward_wrapper}
-
-    # Generate argparse menu from config
-    args = combine_config_args(parser, args, config)
-
-    # Perform mode-specific updates
-    args = update_args(args, mode=args['mode'])
-
     if args.train.env_pool is True:
         logging.warning('Env_pool is enabled. This may increase training speed but break determinism.')
 

From 814c0d7e9e8c20b0e4f91c8fe2082aa997e356e1 Mon Sep 17 00:00:00 2001
From: Ryan Sullivan <ryanpnavillus@gmail.com>
Date: Wed, 17 Apr 2024 01:41:23 -0400
Subject: [PATCH 3/5] Cleanup

---
 reinforcement_learning/environment.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/reinforcement_learning/environment.py b/reinforcement_learning/environment.py
index 9f1e1575..67098e86 100644
--- a/reinforcement_learning/environment.py
+++ b/reinforcement_learning/environment.py
@@ -68,7 +68,6 @@ def make_env_creator(reward_wrapper_cls: BaseParallelWrapper, task_wrapper=False
     def env_creator(*args, **kwargs):
         """Create an environment."""
         env = nmmo.Env(Config(kwargs['env']))  # args.env is provided as kwargs
-        # TODO: make nmmo conform to the newer PettingZoo API and remove below line
         env = reward_wrapper_cls(env, **kwargs['reward_wrapper'])
 
         # Add Syllabus task wrapper

From 41dfec0243c47532308d9f2b4381070c57fb0ad7 Mon Sep 17 00:00:00 2001
From: Ryan Sullivan <ryanpnavillus@gmail.com>
Date: Wed, 17 Apr 2024 02:07:31 -0400
Subject: [PATCH 4/5] Fix merge

---
 train.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/train.py b/train.py
index 1cb5a613..9ee26c34 100644
--- a/train.py
+++ b/train.py
@@ -60,6 +60,8 @@ def get_init_args(fn):
 
 
 def setup_agent(agent_module):
+    recurrent_policy = getattr(agent_module, "Recurrent", None)
+
     def agent_creator(env, args):
         policy = agent_module.Policy(env, **args.policy)
         if not args.no_recurrence and recurrent_policy is not None:
@@ -71,7 +73,6 @@ def agent_creator(env, args):
     return agent_creator
 
 
-
 def combine_config_args(parser, args, config):
     clean_parser = argparse.ArgumentParser(parents=[parser])
     for name, sub_config in config.items():

From 1195589180c075bbb99e5dcdcaddde4dd67f1130 Mon Sep 17 00:00:00 2001
From: Ryan Sullivan <ryanpnavillus@gmail.com>
Date: Wed, 17 Apr 2024 02:15:23 -0400
Subject: [PATCH 5/5] Remove config changes

---
 reinforcement_learning/environment.py | 26 --------------------------
 1 file changed, 26 deletions(-)

diff --git a/reinforcement_learning/environment.py b/reinforcement_learning/environment.py
index 90897e7a..7a270dc0 100644
--- a/reinforcement_learning/environment.py
+++ b/reinforcement_learning/environment.py
@@ -50,32 +50,6 @@ def __init__(self, env_args: Namespace):
         self.set("GAME_PACKS", [(ng.AgentTraining, 1)])
         self.set("CURRICULUM_FILE_PATH", env_args.curriculum_file_path)
 
-        # Game-balancing related, making the game somewhat easier
-        # since all agents are on their own (no team play)
-        self.set("TERRAIN_SCATTER_EXTRA_RESOURCES", True)  # extra food/water
-
-        self.set("PROGRESSION_COMBAT_XP_SCALE", 6)  # from 3
-
-        self.set("COMBAT_DAMAGE_FORMULA", alt_combat_damage_formula)
-
-        self.set("NPC_LEVEL_DEFENSE", 8)  # from 15
-        self.set("NPC_BASE_DAMAGE", 0)  # from 15
-        self.set("NPC_LEVEL_DAMAGE", 8)  # from 15
-
-        self.set("PROGRESSION_MELEE_BASE_DAMAGE", 10)  # from 20
-        self.set("PROGRESSION_RANGE_BASE_DAMAGE", 10)
-        self.set("PROGRESSION_MAGE_BASE_DAMAGE", 10)
-
-        self.set("EQUIPMENT_WEAPON_BASE_DAMAGE", 5)  # from 15
-        self.set("EQUIPMENT_WEAPON_LEVEL_DAMAGE", 5)  # from 15
-
-        self.set("EQUIPMENT_AMMUNITION_BASE_DAMAGE", 0)  # from 15
-        self.set("EQUIPMENT_AMMUNITION_LEVEL_DAMAGE", 10)  # from 15
-
-        self.set("EQUIPMENT_TOOL_BASE_DEFENSE", 15)  # from 30
-
-        self.set("EQUIPMENT_ARMOR_LEVEL_DEFENSE", 3)  # from 10
-
 
 def make_env_creator(reward_wrapper_cls: BaseParallelWrapper, task_wrapper=False, curriculum=None):
     def env_creator(*args, **kwargs):