From 6e08c06cf5cd9d0e9ae8e40eb7d683089119942d Mon Sep 17 00:00:00 2001
From: kywch <choe.kyoung@gmail.com>
Date: Fri, 5 Apr 2024 23:50:30 -0700
Subject: [PATCH] added donot attack npc to takeru

---
 agent_zoo/takeru/reward_wrapper.py |  13 +
 agent_zoo/takeru/submitted_env.py  | 673 +++++++++++++++++++++++++++++
 config.yaml                        |   1 +
 3 files changed, 687 insertions(+)
 create mode 100644 agent_zoo/takeru/submitted_env.py

diff --git a/agent_zoo/takeru/reward_wrapper.py b/agent_zoo/takeru/reward_wrapper.py
index cbd4e477..d2c42013 100644
--- a/agent_zoo/takeru/reward_wrapper.py
+++ b/agent_zoo/takeru/reward_wrapper.py
@@ -1,5 +1,11 @@
+import numpy as np
+
+from nmmo.entity.entity import EntityState
+
 from reinforcement_learning.stat_wrapper import BaseStatWrapper
 
+EntityAttr = EntityState.State.attr_name_to_col
+
 
 class RewardWrapper(BaseStatWrapper):
     def __init__(
@@ -13,12 +19,14 @@ def __init__(
         explore_bonus_weight=0,
         clip_unique_event=3,
         disable_give=True,
+        donot_attack_dangerous_npc=True,
     ):
         super().__init__(env, eval_mode, early_stop_agent_num, stat_prefix)
         self.stat_prefix = stat_prefix
         self.explore_bonus_weight = explore_bonus_weight
         self.clip_unique_event = clip_unique_event
         self.disable_give = disable_give
+        self.donot_attack_dangerous_npc = donot_attack_dangerous_npc
 
     def observation(self, agent_id, agent_obs):
         """Called before observations are returned from the environment
@@ -32,6 +40,11 @@ def observation(self, agent_id, agent_obs):
             agent_obs["ActionTargets"]["GiveGold"]["Target"][:-1] = 0
             agent_obs["ActionTargets"]["GiveGold"]["Price"][1:] = 0
 
+        if self.donot_attack_dangerous_npc is True:
+            # npc type: 1: passive, 2: neutral, 3: hostile
+            dangerours_npc_idxs = np.where(agent_obs["Entity"][:, EntityAttr["npc_type"]] > 1)
+            agent_obs["ActionTargets"]["Attack"]["Target"][dangerours_npc_idxs] = 0
+
         return agent_obs
 
     def reward_terminated_truncated_info(self, agent_id, reward, terminated, truncated, info):
diff --git a/agent_zoo/takeru/submitted_env.py b/agent_zoo/takeru/submitted_env.py
new file mode 100644
index 00000000..ed3404da
--- /dev/null
+++ b/agent_zoo/takeru/submitted_env.py
@@ -0,0 +1,673 @@
+# ruff: noqa
+# NOTE by nmmo competition organizers
+# This is the submitted env/reward file.
+# Although this file does not work with the current repo, we include it here as a reference.
+
+from typing import Dict, List, Optional
+from nmmo.task.task_spec import TaskSpec
+import numpy as np
+import dill
+import json
+from types import SimpleNamespace
+
+from argparse import Namespace
+import math
+import copy
+
+import nmmo
+from nmmo.lib.log import EventCode
+from nmmo.core.observation import Observation
+from nmmo.systems.skill import Skills
+from nmmo.entity.entity import Entity
+
+import pufferlib
+import pufferlib.emulation
+
+from leader_board import StatPostprocessor, calculate_entropy
+
+_DEBUG_TASK_REWARD = False
+# 指定任务内容以debug该任务的奖励设置
+_DEBUG_TASK_SETTING = {}  # {"harvest": {"Fishing": 0.01}}
+
+_EVENTS = [
+    "EAT_FOOD",
+    "DRINK_WATER",
+    "GO_FARTHEST",
+    "SCORE_HIT",
+    "PLAYER_KILL",
+    "CONSUME_ITEM",
+    "GIVE_ITEM",
+    "DESTROY_ITEM",
+    "HARVEST_ITEM",
+    "EQUIP_ITEM",
+    "LOOT_ITEM",
+    "GIVE_GOLD",
+    "LIST_ITEM",
+    "EARN_GOLD",
+    "BUY_ITEM",
+    "LEVEL_UP",
+]
+EVENTCODE_TO_EVENT = {getattr(EventCode, _): _ for _ in _EVENTS}
+_COLS = [
+    "type",
+    "level",
+    "number",
+    "gold",
+    "target_ent",
+]
+
+
+class Config(nmmo.config.Default):
+    """Configuration for Neural MMO."""
+
+    def __init__(self, args: Namespace):
+        super().__init__()
+
+        self.PROVIDE_ACTION_TARGETS = True
+        self.PROVIDE_NOOP_ACTION_TARGET = True
+        self.MAP_FORCE_GENERATION = False
+        self.PLAYER_N = args.num_agents
+        self.HORIZON = args.max_episode_length
+        self.MAP_N = args.num_maps
+        self.PLAYER_DEATH_FOG = args.death_fog_tick
+        self.PATH_MAPS = f"{args.maps_path}/{args.map_size}/"
+        self.MAP_CENTER = args.map_size
+        self.NPC_N = args.num_npcs
+        self.CURRICULUM_FILE_PATH = args.tasks_path
+        self.TASK_EMBED_DIM = args.task_size
+        self.RESOURCE_RESILIENT_POPULATION = args.resilient_population
+
+        self.COMMUNICATION_SYSTEM_ENABLED = False
+
+        self.COMBAT_SPAWN_IMMUNITY = args.spawn_immunity
+
+
+class Postprocessor(StatPostprocessor):
+    def __init__(
+        self,
+        env,
+        is_multiagent,
+        agent_id,
+        eval_mode=False,
+        early_stop_agent_num=0,
+        sqrt_achievement_rewards=False,
+        heal_bonus_weight=0,
+        meander_bonus_weight=0,
+        explore_bonus_weight=0,
+        task_learning_bonus_weight=0,
+        alive_bonus_weight=0,
+        clip_unique_event=3,
+        adjust_ori_reward=False,
+        train_tasks_info=None,
+        task_reward_settings=None,
+        debug_print_events=False,
+    ):
+        super().__init__(env, agent_id, eval_mode)
+        self.early_stop_agent_num = early_stop_agent_num
+        self.sqrt_achievement_rewards = sqrt_achievement_rewards
+        self.heal_bonus_weight = heal_bonus_weight
+        self.meander_bonus_weight = meander_bonus_weight
+        self.explore_bonus_weight = explore_bonus_weight
+        self.clip_unique_event = clip_unique_event
+
+        self.adjust_ori_reward = adjust_ori_reward
+
+        self.debug_print_events = debug_print_events
+
+        self.alive_bonus_weight = alive_bonus_weight
+
+        # 任务奖励相关
+        self.train_tasks_info = train_tasks_info
+        self._task_index: Optional[int] = None  # 当前任务在`train_tasks_info`的索引
+        self.task_learning_bonus_weight = task_learning_bonus_weight
+        self.task_reward_settings = task_reward_settings  # 各种任务的奖励设置
+        self.task_reward_setting: Optional[Dict] = None  # 智能体当前被分配任务的奖励设置
+
+        self.prev_done = False  # 之前的done状态
+
+    def _reset_task_reward_state(self) -> None:
+        # 见过的tile(以坐标为准)
+        self._seen_tiles = {
+            "co": set(),  # 坐标集合
+            "last_update_tick": 0,  # 上次更新的tick
+        }
+
+        # 去过的tile(以坐标为准)
+        self._been_tiles = {
+            "co": set(),  # 坐标集合
+            "last_update_tick": 0,  # 上次更新的tick
+        }
+
+        self._last_damage_inflicted = 0  # 上次的总造成伤害
+
+        self._last_harvest_skill_exp = 0  # 上次的收获技能exp
+
+        self._history_own = {}  # 记录最高拥有记录
+
+    def reset(self, obs):
+        """Called at the start of each episode"""
+        super().reset(obs)
+
+        self.prev_done = False
+
+        if self.task_learning_bonus_weight:
+            self._update_task_index(obs["Task"])
+            self._reset_task_reward_state()
+
+            setting = self._get_task_reward_setting()
+            # # 打金视为必要技能
+            # if True:
+            #     setting = copy.deepcopy(setting)
+            #     _ = setting.setdefault("log_value", {}).setdefault("EARN_GOLD", 0.0)
+            #     setting["log_value"]["EARN_GOLD"] += 0.01
+            self.task_reward_setting = setting
+
+    @property
+    def observation_space(self):
+        """If you modify the shape of features, you need to specify the new obs space"""
+        return super().observation_space
+
+    """
+    def observation(self, obs):
+        '''Called before observations are returned from the environment
+
+        Use this to define custom featurizers. Changing the space itself requires you to
+        define the observation space again (i.e. Gym.spaces.Dict(gym.spaces....))
+        '''
+        return obs
+
+    def action(self, action):
+        '''Called before actions are passed from the model to the environment'''
+        return action
+    """
+
+    def reward_done_info(self, reward, done, info):
+        """Called on reward, done, and info before they are returned from the environment"""
+        self.env: nmmo.Env
+
+        if self.adjust_ori_reward:
+            reward = self._adjust_ori_reward(reward, done, info)
+        # if reward:
+        #     print(f"reward: {reward}, agent_id {self.agent_id}, done {done}, info: {info}")
+
+        # Stop early if there are too few agents generating the training data
+        if len(self.env.agents) <= self.early_stop_agent_num:
+            done = True
+
+        reward, done, info = super().reward_done_info(reward, done, info)
+
+        # Default reward shaper sums team rewards.
+        # Add custom reward shaping here.
+
+        # Add "Healing" score based on health increase and decrease due to food and water
+        healing_bonus = 0
+        if self.heal_bonus_weight and self.agent_id in self.env.realm.players:
+            if self.env.realm.players[self.agent_id].resources.health_restore > 0:
+                healing_bonus = self.heal_bonus_weight
+
+        # Add meandering bonus to encourage moving to various directions
+        meander_bonus = 0
+        if self.meander_bonus_weight and len(self._last_moves) > 5:
+            move_entropy = calculate_entropy(self._last_moves[-8:])  # of last 8 moves
+            meander_bonus = self.meander_bonus_weight * (move_entropy - 1)
+
+        # Unique event-based rewards, similar to exploration bonus
+        # The number of unique events are available in self._curr_unique_count, self._prev_unique_count
+        explore_bonus = 0
+        if self.explore_bonus_weight:
+            if self.sqrt_achievement_rewards:
+                explore_bonus = math.sqrt(self._curr_unique_count) - math.sqrt(
+                    self._prev_unique_count
+                )
+            else:
+                explore_bonus = min(
+                    self.clip_unique_event,
+                    self._curr_unique_count - self._prev_unique_count,
+                )
+            explore_bonus *= self.explore_bonus_weight
+
+        # 活着就有的奖励
+        alive_bonus = 0
+        if self.alive_bonus_weight and not done:
+            alive_bonus = self._get_alive_bonus()
+            alive_bonus *= self.alive_bonus_weight
+
+        # 不同任务对应的不同人工奖励
+        task_learning_bonus = 0
+        if self.task_learning_bonus_weight and not done:
+            task_learning_bonus = self._get_task_learning_bonus()
+            task_learning_bonus *= self.task_learning_bonus_weight
+
+        if self.debug_print_events and done:
+            self._print_agent_all_events()
+
+        reward = reward + explore_bonus + healing_bonus + meander_bonus
+        reward += alive_bonus
+        reward += task_learning_bonus
+
+        self.prev_done = done
+
+        return reward, done, info
+
+    def _adjust_ori_reward(self, reward, done, info) -> float:
+        """调整原始reward大小
+        NOTE: 原始奖励: 任务进度推进奖励是 1/进度条大小, 完成任务奖励1, 死亡奖励-1
+        """
+        if not reward:
+            return reward
+
+        task_infos = list(info["task"].values())
+        assert len(task_infos) == 1
+        task_info = task_infos[0]
+
+        if reward == -1:
+            assert done
+            if task_info["completed"]:
+                # 完成任务后可以死
+                return -0.1
+            else:
+                # 加重惩罚没完成任务就死
+                return -10.0
+
+        if reward == 1:
+            assert task_info["completed"]
+            # 加大完成任务奖励
+            return 10.0
+
+        return reward
+
+    @property
+    def _eval_fn_name(self):
+        return self.train_tasks_info.eval_fn_name[self._task_index]
+
+    @property
+    def _eval_fn_kwargs(self):
+        return self.train_tasks_info.eval_fn_kwargs[self._task_index]
+
+    def _update_task_index(self, task_embedding: np.ndarray) -> None:
+        """根据 task embedding 找到 task 索引"""
+        if self.eval_mode:
+            self._task_index = None
+            return
+
+        assert task_embedding.shape == (4096,)
+
+        # diff = self.train_tasks_info.embedding_mat - task_embedding
+        # diff = np.sum(diff**2, axis=-1)
+        # (indexes,) = np.where(diff == 0)
+
+        (indexes,) = np.where((self.train_tasks_info.embedding_mat == task_embedding).all(axis=1))
+
+        n_matched_task = len(indexes)
+        assert n_matched_task == 1, f"{n_matched_task} task match emb ({task_embedding})"
+
+        self._task_index = int(indexes[0])
+
+        assert self._task_index < self.train_tasks_info.n
+
+        # if _DEBUG_TASK_REWARD and self.agent_id <= 20:
+        #     print(
+        #         f"agent_id {self.agent_id}, task index {self._task_index}"
+        #         f", {self.train_tasks_info.eval_fn_name[self._task_index]}"
+        #         f", {self.train_tasks_info.eval_fn_kwargs[self._task_index]}"
+        #     )
+
+        return
+
+    def _get_task_reward_setting(self) -> Dict:
+        if _DEBUG_TASK_REWARD and _DEBUG_TASK_SETTING:
+            return _DEBUG_TASK_SETTING
+
+        # 当前任务内容
+        _eval_fn_name = self._eval_fn_name
+        _eval_fn_kwargs = self._eval_fn_kwargs
+
+        if _eval_fn_name not in self.task_reward_settings:
+            # print(f"Reward of eval fn {_eval_fn_name} not set")
+            return {}
+
+        # eval_fn大类的任务奖励设置
+        eval_fn_setting: Dict = self.task_reward_settings[_eval_fn_name]
+        # 具体任务内容奖励设置
+        try:
+            ret: Dict = eval_fn_setting[_eval_fn_kwargs[eval_fn_setting["_key"]]]
+        except:
+            ret: Dict = eval_fn_setting["_default"]
+
+        return ret
+
+    def _get_alive_bonus(self) -> float:
+        ret = 0
+
+        cur_tick = self.env.realm.tick
+        entity: Entity = self.env.realm.players.entities[self.agent_id]
+
+        # ret = cur_tick / 1024  # 最大1
+        # entity.damage.val  # 被攻击所受的伤害
+
+        # 缺乏生存资源负反馈
+        # if entity.food.val == 0:
+        #     ret += -0.001
+        # if entity.water.val == 0:
+        #     ret += -0.001
+
+        # 低生命值负反馈
+        health_lost = 100 - entity.health.val
+        if health_lost > 50:
+            ret += -(health_lost - 50) / 50 * 0.001
+
+        return ret
+
+    def _get_task_learning_bonus(self) -> float:
+        ret = 0
+
+        setting = self.task_reward_setting
+
+        # 对各奖励提取方式计算奖励
+        for reward_type, args in setting.items():
+            if reward_type == "log":
+                _reward = self._task_log_bonus(args)
+            elif reward_type == "log_value":  # 不光看是否有，还看数值
+                _reward = self._task_log_bonus(args, use_value=True)
+            elif reward_type == "wander":
+                _reward = self._task_wander_bonus(args)
+            elif reward_type == "wander_occupy":
+                _reward = self._task_wander_occupy_bonus(args)
+            elif reward_type == "attack":
+                _reward = self._task_attack_bonus(args)
+            elif reward_type == "harvest":
+                _reward = self._task_harvest_bonus(args)
+            elif reward_type == "own":
+                _reward = self._task_own_bonus(args)
+            else:
+                raise Exception(f"Invalid reward type {reward_type}")
+
+            ret += _reward
+
+            # if _DEBUG_TASK_REWARD and _reward and self.agent_id <= 20:
+            #     print(
+            #         f"agent_id {self.agent_id}, current_tick {self.env.realm.tick}"
+            #         f", task learning bonus: type {reward_type}, setting {setting}, reward {_reward}"
+            #         f", # players remain {len(self.env.realm.players.entities)}"
+            #     )
+
+        return ret
+
+    def _task_log_bonus(self, args: Dict, use_value: bool = False) -> float:
+        """[任务奖励]根据特定log给奖励"""
+        ret = 0
+
+        assert args
+
+        cur_tick = self.env.realm.tick
+        cur_logs = self.env.realm.event_log.get_data(agents=[self.agent_id], tick=cur_tick)
+
+        attr_to_col = self.env.realm.event_log.attr_to_col
+
+        for line in cur_logs:
+            event_name = EVENTCODE_TO_EVENT.get(line[attr_to_col["event"]], "")
+
+            if event_name in args:
+                if use_value:
+                    if event_name == "EARN_GOLD":
+                        value = line[attr_to_col["gold"]]
+                    else:
+                        raise NotImplementedError(event_name)
+                    ret += args[event_name] * value
+                else:
+                    ret += args[event_name]
+
+        return ret
+
+    def _task_wander_bonus(self, args: Dict) -> float:
+        """[任务奖励]逛街奖励"""
+        ret = 0
+
+        # 每个新见tile的奖励
+        per_tile = args["per_tile"]
+
+        obs: Observation = self.env.obs[self.agent_id]
+        current_tick = obs.current_tick
+        visible_tiles = obs.tiles
+
+        # 更新见过的tile
+        n_new_seen_tiles = 0
+        for tile in visible_tiles:
+            x, y, t = tile
+            if (x, y) not in self._seen_tiles["co"]:
+                n_new_seen_tiles += 1
+                self._seen_tiles["co"].add((x, y))
+        self._seen_tiles["last_update_tick"] = current_tick
+
+        # 首步不计
+        if current_tick > 1:
+            ret += n_new_seen_tiles * per_tile
+
+        # if _DEBUG_TASK_REWARD and self.agent_id == 1:
+        #     print(
+        #         f"agent_id {self.agent_id}, current_tick {current_tick}"
+        #         f", n_new_seen_tiles {n_new_seen_tiles}"
+        #     )
+
+        return ret
+
+    def _task_wander_occupy_bonus(self, args: Dict) -> float:
+        """[任务奖励]逛街奖励, 当去到一个新坐标给奖励"""
+        ret = 0
+
+        # 每个新见tile的奖励
+        per_tile = args["per_tile"]
+
+        entity: Entity = self.env.realm.players.entities[self.agent_id]
+        current_tick = self.env.realm.tick
+
+        # 更新见过的tile
+        if entity.pos not in self._been_tiles["co"]:
+            self._been_tiles["co"].add(entity.pos)
+            # 首步不计
+            if current_tick > 1:
+                ret += per_tile
+
+        self._been_tiles["last_update_tick"] = current_tick
+
+        # if _DEBUG_TASK_REWARD and self.agent_id == 1:
+        #     print(
+        #         f"agent_id {self.agent_id}, current_tick {current_tick}"
+        #         f", self._been_tiles {self._been_tiles}, +reward {ret}"
+        #     )
+
+        return ret
+
+    def _task_attack_bonus(self, args: Dict) -> float:
+        """[任务奖励]攻击奖励"""
+        ret = 0
+
+        entity = self.env.realm.players.entities[self.agent_id]  # 已死亡玩家会被剔除
+        current_tick = self.env.realm.tick
+
+        # 造成了新的伤害
+        if entity.history.damage_inflicted > self._last_damage_inflicted:
+            assert isinstance(entity.history.attack, dict)
+            attack_style = entity.history.attack["style"]
+            if attack_style in args:
+                ret += args[attack_style]
+
+            self._last_damage_inflicted = entity.history.damage_inflicted
+
+        # if _DEBUG_TASK_REWARD and self.agent_id == 1:
+        #     print(
+        #         f"agent_id {self.agent_id}, current_tick {current_tick}"
+        #         f", entity.history.attack {entity.history.attack}"
+        #         f", entity.history.damage_inflicted {entity.history.damage_inflicted}"
+        #     )
+
+        return ret
+
+    def _task_harvest_bonus(self, args: Dict) -> float:
+        """[任务奖励]收获奖励"""
+        ret = 0
+
+        entity = self.env.realm.players.entities[self.agent_id]  # 已死亡玩家会被剔除
+        skills: Skills = entity.skills
+        current_tick = self.env.realm.tick
+
+        skill_names = list(args.keys())
+        assert len(skill_names) == 1, f"harvest reward require 1 skill but get {len(skill_names)}"
+        skill_name = skill_names[0]
+
+        if skill_name == "Fishing":
+            skill = skills.fishing
+        elif skill_name == "Herbalism":
+            skill = skills.herbalism
+        elif skill_name == "Prospecting":
+            skill = skills.prospecting
+        elif skill_name == "Carving":
+            skill = skills.carving
+        elif skill_name == "Alchemy":
+            skill = skills.alchemy
+        else:
+            raise Exception(f"Invalid skill {skill_name}")
+
+        cur_skill_exp = skill.exp.val
+        exp_diff = cur_skill_exp - self._last_harvest_skill_exp
+        if exp_diff > 0:
+            ret += args[skill_name] * exp_diff
+            self._last_harvest_skill_exp = cur_skill_exp
+
+        # if _DEBUG_TASK_REWARD and self.agent_id <= 20:
+        #     print(
+        #         f"agent_id {self.agent_id}, current_tick {current_tick}"
+        #         f", skill {skill_name}, exp {cur_skill_exp}, exp_diff {exp_diff}"
+        #     )
+
+        return ret
+
+    def _task_own_bonus(self, args: Dict) -> float:
+        """[任务奖励]收获奖励"""
+        ret = 0
+
+        entity: Entity = self.env.realm.players.entities[self.agent_id]
+        current_tick = self.env.realm.tick
+
+        packet = entity.inventory.packet()
+        for item in packet["items"]:
+            item_type = item["item"]
+            level = item["level"]
+            quantity = item["quantity"]
+
+            reward_coef = args.get(item_type, args.get("", 0.0))
+            if not reward_coef:
+                continue
+
+            if item_type not in self._history_own:
+                self._history_own[item_type] = {}
+            if level not in self._history_own[item_type]:
+                self._history_own[item_type][level] = 0
+
+            quantity_diff = quantity - self._history_own[item_type][level]
+
+            if quantity_diff > 0:  # 破拥有纪录时更新记录并给奖励
+                self._history_own[item_type][level] = quantity
+                ret += quantity_diff * level * reward_coef
+
+        # if _DEBUG_TASK_REWARD and ret:
+        #     print(
+        #         f"agent_id {self.agent_id}, current_tick {current_tick}"
+        #         f", _history_own {self._history_own}, +reward {ret}"
+        #     )
+
+        return ret
+
+    def _print_agent_all_events(self):
+        print(f"== agent_id {self.agent_id}'s logs ==")
+        log = self.env.realm.event_log.get_data(agents=[self.agent_id])
+        self._print_events_log(log, self.env.realm.event_log.attr_to_col)
+
+    @staticmethod
+    def _print_events_log(log, attr_to_col):
+        for line in log:
+            event_name = EVENTCODE_TO_EVENT.get(line[attr_to_col["event"]], "")
+            tick = line[attr_to_col["tick"]]
+            print(
+                f"tick {tick}, event {event_name}: "
+                + ", ".join([f"{_} {line[attr_to_col[_]]}" for _ in _COLS])
+            )
+
+
+def get_tasks_info_for_reward_setting(tasks_path: str) -> SimpleNamespace:
+    with open(tasks_path, "rb") as f:
+        curriculums: List[TaskSpec] = dill.load(f)
+
+    print(f"Load {len(curriculums)} train curriculums")
+
+    ret = SimpleNamespace(
+        embedding_mat=None,  # 所有 task embedding 拼接成的矩阵
+        eval_fn_name=[],
+        eval_fn_kwargs=[],
+        n=0,
+    )
+
+    _mat = []
+
+    for curriculum in curriculums:
+        eval_fn_kwargs = {
+            key: value if isinstance(value, (str, int, float)) else value.__name__
+            for key, value in curriculum.eval_fn_kwargs.items()
+        }
+
+        _mat.append(curriculum.embedding)
+        ret.eval_fn_name.append(curriculum.eval_fn.__name__)
+        ret.eval_fn_kwargs.append(eval_fn_kwargs)
+        ret.n += 1
+
+    ret.embedding_mat = np.vstack(_mat)
+
+    return ret
+
+
+def load_task_reward_settings(path: str) -> Dict:
+    print(f"Load task reward setting {path}")
+    with open(path, "r") as f:
+        ret = json.load(f)
+    return ret
+
+
+def make_env_creator(args: Namespace):
+    # TODO: Max episode length
+
+    use_task_reward = (
+        not args.eval_mode and args.task_reward_setting_path and args.task_learning_bonus_weight
+    )
+
+    # 任务信息将用于训练时设置人工奖励
+    train_tasks_info = (
+        get_tasks_info_for_reward_setting(args.tasks_path) if use_task_reward else None
+    )
+    task_reward_settings = (
+        load_task_reward_settings(args.task_reward_setting_path) if use_task_reward else None
+    )
+
+    def env_creator():
+        """Create an environment."""
+        env = nmmo.Env(Config(args), seed=args.seed)
+        env = pufferlib.emulation.PettingZooPufferEnv(
+            env,
+            postprocessor_cls=Postprocessor,
+            postprocessor_kwargs={
+                "eval_mode": args.eval_mode,
+                "early_stop_agent_num": args.early_stop_agent_num,
+                "sqrt_achievement_rewards": args.sqrt_achievement_rewards,
+                "heal_bonus_weight": args.heal_bonus_weight,
+                "meander_bonus_weight": args.meander_bonus_weight,
+                "explore_bonus_weight": args.explore_bonus_weight,
+                "task_learning_bonus_weight": args.task_learning_bonus_weight,
+                "alive_bonus_weight": args.alive_bonus_weight,
+                "adjust_ori_reward": args.adjust_ori_reward,
+                "train_tasks_info": train_tasks_info,
+                "task_reward_settings": task_reward_settings,
+                "debug_print_events": args.debug_print_events,
+            },
+        )
+        return env
+
+    return env_creator
diff --git a/config.yaml b/config.yaml
index 3d3ce1c9..ac42d902 100644
--- a/config.yaml
+++ b/config.yaml
@@ -137,3 +137,4 @@ takeru:
     early_stop_agent_num: 0
     explore_bonus_weight: 0.01
     disable_give: True
+    donot_attack_dangerous_npc: True