switched to pyproject.toml, applied ruff

CarperAI · Mar 28, 2024 · 47eccbe · 47eccbe
1 parent 46a8d48
commit 47eccbe
Showing 24 changed files with 1,460 additions and 1,153 deletions.
diff --git a/.github/workflows/workflow.yml b/.github/workflows/workflow.yml
@@ -0,0 +1,24 @@
+name: tox
+on: [push, pull_request]
+
+jobs:
+  test:
+    runs-on: ubuntu-latest
+    strategy:
+      fail-fast: false
+      matrix:
+        py: ["3.8", "3.9", "3.10"]
+    steps:
+      - name: Setup python for test ${{ matrix.py }}
+        uses: actions/setup-python@v5
+        with:
+          python-version: ${{ matrix.py }}
+      - uses: actions/checkout@v3
+      - name: Upgrade pip
+        run: python -m pip install -U pip setuptools wheel cython
+      - name: Install
+        run: python -m pip install -e '.[dev]'
+      - name: Check formatting
+        run: ruff format .
+      - name: Check lint
+        run: ruff check .
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -0,0 +1,9 @@
+repos:
+  - repo: https://github.com/astral-sh/ruff-pre-commit
+    # Ruff version.
+    rev: v0.3.2
+    hooks:
+      # Run the linter.
+      - id: ruff
+      # Run the formatter.
+      - id: ruff-format
diff --git a/agent_zoo/neurips23_start_kit/__init__.py b/agent_zoo/neurips23_start_kit/__init__.py
@@ -1,3 +1,3 @@
 from .baseline_policy import Baseline as Policy
 from .baseline_policy import Recurrent
-from .reward_wrapper import RewardWrapper
+from .reward_wrapper import RewardWrapper
diff --git a/agent_zoo/neurips23_start_kit/baseline_policy.py b/agent_zoo/neurips23_start_kit/baseline_policy.py
diff --git a/agent_zoo/neurips23_start_kit/reward_wrapper.py b/agent_zoo/neurips23_start_kit/reward_wrapper.py
@@ -9,7 +9,6 @@ def __init__(
         eval_mode=False,
         early_stop_agent_num=0,
         stat_prefix=None,
-
         # Custom reward wrapper args
         heal_bonus_weight=0,
         explore_bonus_weight=0,
@@ -22,15 +21,15 @@ def __init__(
         self.clip_unique_event = clip_unique_event
 
     def reset(self, **kwargs):
-        '''Called at the start of each episode'''
+        """Called at the start of each episode"""
         self._reset_reward_vars()
         return super().reset(**kwargs)
 
     def _reset_reward_vars(self):
         self._history = {
             agent_id: {
-                'prev_price': 0,
-                'prev_moves': [],
+                "prev_price": 0,
+                "prev_moves": [],
             }
             for agent_id in self.env.possible_agents
         }
@@ -43,20 +42,20 @@ def observation_space(self):
     """
 
     def observation(self, agent_id, agent_obs):
-        '''Called before observations are returned from the environment
+        """Called before observations are returned from the environment
 
         Use this to define custom featurizers. Changing the space itself requires you to
         define the observation space again (i.e. Gym.spaces.Dict(gym.spaces....))
-        '''
+        """
         # Mask the price of the previous action, to encourage agents to explore new prices
-        agent_obs['ActionTargets']['Sell']['Price'][self._history[agent_id]['prev_price']] = 0
+        agent_obs["ActionTargets"]["Sell"]["Price"][self._history[agent_id]["prev_price"]] = 0
         return agent_obs
 
     def action(self, agent_id, agent_atn):
-        '''Called before actions are passed from the model to the environment'''
+        """Called before actions are passed from the model to the environment"""
         # Keep track of the previous price and moves for each agent
-        self._history[agent_id]['prev_price'] = agent_atn['Sell']['Price']
-        self._history[agent_id]['prev_moves'].append(agent_atn['Move']['Direction'])
+        self._history[agent_id]["prev_price"] = agent_atn["Sell"]["Price"]
+        self._history[agent_id]["prev_moves"].append(agent_atn["Move"]["Direction"])
         return agent_atn
 
     def reward_terminated_truncated_info(self, agent_id, reward, terminated, truncated, info):
@@ -72,8 +71,8 @@ def reward_terminated_truncated_info(self, agent_id, reward, terminated, truncat
         # The number of unique events are available in self._unique_events[agent_id]
         uniq = self._unique_events[agent_id]
         explore_bonus = 0
-        if self.explore_bonus_weight > 0 and uniq['curr_count'] > uniq['prev_count']:
-            explore_bonus = min(self.clip_unique_event, uniq['curr_count'] - uniq['prev_count'])
+        if self.explore_bonus_weight > 0 and uniq["curr_count"] > uniq["prev_count"]:
+            explore_bonus = min(self.clip_unique_event, uniq["curr_count"] - uniq["prev_count"])
             explore_bonus *= self.explore_bonus_weight
 
         reward += healing_bonus + explore_bonus

diff --git a/analysis/proc_eval_result.py b/analysis/proc_eval_result.py
@@ -8,145 +8,170 @@
 
 # Make the table output simpler
 pl.Config.set_tbl_hide_dataframe_shape(True)
-pl.Config.set_tbl_formatting('NOTHING')
+pl.Config.set_tbl_formatting("NOTHING")
 pl.Config.set_tbl_hide_column_data_types(True)
 
 # string matching for task names
 WEIGHT_DICT = {
-    'TickGE': ('survival', 100 / 6),  # 1 survival task
-    'PLAYER_KILL': ('combat', 100 / (6*3)),  # 3 combat tasks
-    'DefeatEntity': ('combat', 100 / (6*3)),
-    'GO_FARTHEST': ('exploration', 100 / (6*2)),  # 2 exploration tasks
-    'OccupyTile': ('exploration', 100 / (6*2)),
-    'AttainSkill': ('skill', 100 / (6*8)),  # 8 skill tasks
-    'HarvestItem': ('item', 100 / (6*44)),  # 44 item tasks
-    'ConsumeItem': ('item', 100 / (6*44)),
-    'EquipItem': ('item', 100 / (6*44)),
-    'FullyArmed': ('item', 100 / (6*44)),
-    'EARN_GOLD': ('market', 100 / (6*5)),  # 5 market tasks
-    'BUY_ITEM': ('market', 100 / (6*5)),
-    'EarnGold': ('market', 100 / (6*5)),
-    'HoardGold': ('market', 100 / (6*5)),
-    'MakeProfit': ('market', 100 / (6*5)),
+    "TickGE": ("survival", 100 / 6),  # 1 survival task
+    "PLAYER_KILL": ("combat", 100 / (6 * 3)),  # 3 combat tasks
+    "DefeatEntity": ("combat", 100 / (6 * 3)),
+    "GO_FARTHEST": ("exploration", 100 / (6 * 2)),  # 2 exploration tasks
+    "OccupyTile": ("exploration", 100 / (6 * 2)),
+    "AttainSkill": ("skill", 100 / (6 * 8)),  # 8 skill tasks
+    "HarvestItem": ("item", 100 / (6 * 44)),  # 44 item tasks
+    "ConsumeItem": ("item", 100 / (6 * 44)),
+    "EquipItem": ("item", 100 / (6 * 44)),
+    "FullyArmed": ("item", 100 / (6 * 44)),
+    "EARN_GOLD": ("market", 100 / (6 * 5)),  # 5 market tasks
+    "BUY_ITEM": ("market", 100 / (6 * 5)),
+    "EarnGold": ("market", 100 / (6 * 5)),
+    "HoardGold": ("market", 100 / (6 * 5)),
+    "MakeProfit": ("market", 100 / (6 * 5)),
 }
 
+
 def get_task_weight(task_name):
     for key, val in WEIGHT_DICT.items():
         if key in task_name:
             return val
-    logging.warning(f'Task name {task_name} not found in weight dict')
-    return 'etc', 0
+    logging.warning(f"Task name {task_name} not found in weight dict")
+    return "etc", 0
+
 
 def get_summary_dict(vals, key):
-    progress = vals if key == 'length' else [v[0] for v in vals]
-    summ = {
-        'count': len(progress),
-        'mean': np.mean(progress),
-        'median': np.median(progress)
-    }
-
-    if key == 'length':
+    progress = vals if key == "length" else [v[0] for v in vals]
+    summ = {"count": len(progress), "mean": np.mean(progress), "median": np.median(progress)}
+
+    if key == "length":
         progress = np.array(progress) / 1023  # full episode length
 
-    summ['completed'] = np.mean([1 if v >= 1 else 0 for v in progress])
-    summ['over30pcnt'] = np.mean([1 if v >= 0.3 else 0 for v in progress])
+    summ["completed"] = np.mean([1 if v >= 1 else 0 for v in progress])
+    summ["over30pcnt"] = np.mean([1 if v >= 0.3 else 0 for v in progress])
     return summ
 
+
 def summarize_single_eval(data, weighted_score=False):
     summary = {}
 
     # task-level info
     for key, vals in data.items():
-        if key.startswith('curriculum') or key == 'length':
+        if key.startswith("curriculum") or key == "length":
             summary[key] = get_summary_dict(vals, key)
 
-            if weighted_score and key.startswith('curriculum'):
+            if weighted_score and key.startswith("curriculum"):
                 category, weight = get_task_weight(key)
-                summary[key]['category'] = category
-                summary[key]['weight'] = weight
-                summary[key]['weighted_score'] = summary[key]['mean'] * weight
+                summary[key]["category"] = category
+                summary[key]["weight"] = weight
+                summary[key]["weighted_score"] = summary[key]["mean"] * weight
 
     # meta info
-    summary['avg_progress'] = np.mean([v['mean'] for k, v in summary.items()
-                                       if k.startswith('curriculum')])
+    summary["avg_progress"] = np.mean(
+        [v["mean"] for k, v in summary.items() if k.startswith("curriculum")]
+    )
     if weighted_score:
-        summary['weighted_score'] = np.sum([v['weighted_score'] for k, v in summary.items()
-                                            if k.startswith('curriculum')])
+        summary["weighted_score"] = np.sum(
+            [v["weighted_score"] for k, v in summary.items() if k.startswith("curriculum")]
+        )
     return summary
 
+
 def process_eval_files(policy_store_dir, eval_prefix):
     summ_policy = []
     summ_task = []
 
     for file in os.listdir(policy_store_dir):
         # NOTE: assumes the file naming convention is 'eval_<mode>_<seed>.json'
-        if not file.startswith(eval_prefix) or not file.endswith('.json'):
+        if not file.startswith(eval_prefix) or not file.endswith(".json"):
             continue
 
-        mode = file.split('_')[1]
-        random_seed = file.split('_')[2].replace('.json', '')
+        mode = file.split("_")[1]
+        random_seed = file.split("_")[2].replace(".json", "")
 
-        with open(os.path.join(policy_store_dir, file), 'r') as f:
+        with open(os.path.join(policy_store_dir, file), "r") as f:
             data = json.load(f)
 
         for pol_name, pol_data in data.items():
             if len(pol_data) == 0:
                 continue
 
             summary = summarize_single_eval(pol_data, weighted_score=True)
-            summ_policy.append({
-                'policy_name': pol_name,
-                'mode': mode,
-                'seed': random_seed,
-                'count': summary['length']['count'],
-                'length': summary['length']['mean'],
-                'score': summary['avg_progress'],
-                'weighted_score': summary['weighted_score']
-            })
+            summ_policy.append(
+                {
+                    "policy_name": pol_name,
+                    "mode": mode,
+                    "seed": random_seed,
+                    "count": summary["length"]["count"],
+                    "length": summary["length"]["mean"],
+                    "score": summary["avg_progress"],
+                    "weighted_score": summary["weighted_score"],
+                }
+            )
 
             # also gather the results across random seeds for each task, then average
             for task_name, task_data in summary.items():
-                if not task_name.startswith('curriculum'):
+                if not task_name.startswith("curriculum"):
                     continue
-                summ_task.append({
-                    'category': task_data['category'],
-                    'task_name': task_name,
-                    'weight': task_data['weight'],
-                    'policy_name': pol_name,
-                    'mode': mode,
-                    'seed': random_seed,
-                    'count': task_data['count'],
-                    'score': task_data['mean']
-                })
-
-    summ_df = pl.DataFrame(summ_policy).sort(['policy_name', 'mode', 'seed'])
-    summ_grp = summ_df.group_by(['policy_name', 'mode']).agg(
-        pl.col('score').mean(),
-        pl.col('weighted_score').mean(),
+                summ_task.append(
+                    {
+                        "category": task_data["category"],
+                        "task_name": task_name,
+                        "weight": task_data["weight"],
+                        "policy_name": pol_name,
+                        "mode": mode,
+                        "seed": random_seed,
+                        "count": task_data["count"],
+                        "score": task_data["mean"],
+                    }
+                )
+
+    summ_df = pl.DataFrame(summ_policy).sort(["policy_name", "mode", "seed"])
+    summ_grp = summ_df.group_by(["policy_name", "mode"]).agg(
+        pl.col("score").mean(),
+        pl.col("weighted_score").mean(),
     )
-    summ_grp = summ_grp.sort('weighted_score', descending=True)
-    summ_grp.write_csv(os.path.join(policy_store_dir, 'score_summary.tsv'), separator="\t", float_precision=6)
-    print('\nPolicy score summary, sorted by weighted_score:')
+    summ_grp = summ_grp.sort("weighted_score", descending=True)
+    summ_grp.write_csv(
+        os.path.join(policy_store_dir, "score_summary.tsv"), separator="\t", float_precision=6
+    )
+    print("\nPolicy score summary, sorted by weighted_score:")
     print(summ_grp)
 
-    task_df = pl.DataFrame(summ_task).sort(['mode', 'category', 'task_name', 'policy_name', 'seed'])
-    task_grp = task_df.group_by(['mode', 'category', 'task_name', 'policy_name']).agg(pl.col('score').mean())
-    task_grp = task_grp.sort(['mode', 'category', 'task_name', 'policy_name'])
-    task_grp.write_csv(os.path.join(policy_store_dir, 'score_task_summary.tsv'), separator="\t", float_precision=6)
-    cate_grp = task_df.group_by(['mode', 'category', 'policy_name']).agg(pl.col('score').mean())
-    cate_grp = cate_grp.sort(['mode', 'category', 'policy_name'])
-    cate_grp.write_csv(os.path.join(policy_store_dir, 'score_category_summary.tsv'), separator="\t", float_precision=6)
+    task_df = pl.DataFrame(summ_task).sort(["mode", "category", "task_name", "policy_name", "seed"])
+    task_grp = task_df.group_by(["mode", "category", "task_name", "policy_name"]).agg(
+        pl.col("score").mean()
+    )
+    task_grp = task_grp.sort(["mode", "category", "task_name", "policy_name"])
+    task_grp.write_csv(
+        os.path.join(policy_store_dir, "score_task_summary.tsv"), separator="\t", float_precision=6
+    )
+    cate_grp = task_df.group_by(["mode", "category", "policy_name"]).agg(pl.col("score").mean())
+    cate_grp = cate_grp.sort(["mode", "category", "policy_name"])
+    cate_grp.write_csv(
+        os.path.join(policy_store_dir, "score_category_summary.tsv"),
+        separator="\t",
+        float_precision=6,
+    )
 
-    if len(summ_df['seed'].unique()) > 1:
-        summ_df.write_csv(os.path.join(policy_store_dir, 'score_by_seed.tsv'), separator="\t", float_precision=6)
-        task_df.write_csv(os.path.join(policy_store_dir, 'score_by_task_seed.tsv'), separator="\t", float_precision=6)
+    if len(summ_df["seed"].unique()) > 1:
+        summ_df.write_csv(
+            os.path.join(policy_store_dir, "score_by_seed.tsv"), separator="\t", float_precision=6
+        )
+        task_df.write_csv(
+            os.path.join(policy_store_dir, "score_by_task_seed.tsv"),
+            separator="\t",
+            float_precision=6,
+        )
 
     return summ_df, summ_grp, task_df, task_grp, cate_grp
 
-if __name__ == '__main__':
-    parser = argparse.ArgumentParser(description='Process the evaluation result files')
-    parser.add_argument('policy_store_dir', type=str, help='Path to the policy directory')
-    parser.add_argument('-p', '--prefix', type=str, default='eval_', help='Prefix of the evaluation result files')
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description="Process the evaluation result files")
+    parser.add_argument("policy_store_dir", type=str, help="Path to the policy directory")
+    parser.add_argument(
+        "-p", "--prefix", type=str, default="eval_", help="Prefix of the evaluation result files"
+    )
     args = parser.parse_args()
 
     process_eval_files(args.policy_store_dir, args.prefix)