Skip to content

Commit

Permalink
switched to pyproject.toml, applied ruff
Browse files Browse the repository at this point in the history
kywch committed Mar 28, 2024
1 parent 46a8d48 commit 47eccbe
Showing 24 changed files with 1,460 additions and 1,153 deletions.
24 changes: 24 additions & 0 deletions .github/workflows/workflow.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
name: tox
on: [push, pull_request]

jobs:
test:
runs-on: ubuntu-latest
strategy:
fail-fast: false
matrix:
py: ["3.8", "3.9", "3.10"]
steps:
- name: Setup python for test ${{ matrix.py }}
uses: actions/setup-python@v5
with:
python-version: ${{ matrix.py }}
- uses: actions/checkout@v3
- name: Upgrade pip
run: python -m pip install -U pip setuptools wheel cython
- name: Install
run: python -m pip install -e '.[dev]'
- name: Check formatting
run: ruff format .
- name: Check lint
run: ruff check .
9 changes: 9 additions & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
repos:
- repo: https://github.com/astral-sh/ruff-pre-commit
# Ruff version.
rev: v0.3.2
hooks:
# Run the linter.
- id: ruff
# Run the formatter.
- id: ruff-format
2 changes: 1 addition & 1 deletion agent_zoo/neurips23_start_kit/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
from .baseline_policy import Baseline as Policy
from .baseline_policy import Recurrent
from .reward_wrapper import RewardWrapper
from .reward_wrapper import RewardWrapper
455 changes: 231 additions & 224 deletions agent_zoo/neurips23_start_kit/baseline_policy.py

Large diffs are not rendered by default.

23 changes: 11 additions & 12 deletions agent_zoo/neurips23_start_kit/reward_wrapper.py
Original file line number Diff line number Diff line change
@@ -9,7 +9,6 @@ def __init__(
eval_mode=False,
early_stop_agent_num=0,
stat_prefix=None,

# Custom reward wrapper args
heal_bonus_weight=0,
explore_bonus_weight=0,
@@ -22,15 +21,15 @@ def __init__(
self.clip_unique_event = clip_unique_event

def reset(self, **kwargs):
'''Called at the start of each episode'''
"""Called at the start of each episode"""
self._reset_reward_vars()
return super().reset(**kwargs)

def _reset_reward_vars(self):
self._history = {
agent_id: {
'prev_price': 0,
'prev_moves': [],
"prev_price": 0,
"prev_moves": [],
}
for agent_id in self.env.possible_agents
}
@@ -43,20 +42,20 @@ def observation_space(self):
"""

def observation(self, agent_id, agent_obs):
'''Called before observations are returned from the environment
"""Called before observations are returned from the environment
Use this to define custom featurizers. Changing the space itself requires you to
define the observation space again (i.e. Gym.spaces.Dict(gym.spaces....))
'''
"""
# Mask the price of the previous action, to encourage agents to explore new prices
agent_obs['ActionTargets']['Sell']['Price'][self._history[agent_id]['prev_price']] = 0
agent_obs["ActionTargets"]["Sell"]["Price"][self._history[agent_id]["prev_price"]] = 0
return agent_obs

def action(self, agent_id, agent_atn):
'''Called before actions are passed from the model to the environment'''
"""Called before actions are passed from the model to the environment"""
# Keep track of the previous price and moves for each agent
self._history[agent_id]['prev_price'] = agent_atn['Sell']['Price']
self._history[agent_id]['prev_moves'].append(agent_atn['Move']['Direction'])
self._history[agent_id]["prev_price"] = agent_atn["Sell"]["Price"]
self._history[agent_id]["prev_moves"].append(agent_atn["Move"]["Direction"])
return agent_atn

def reward_terminated_truncated_info(self, agent_id, reward, terminated, truncated, info):
@@ -72,8 +71,8 @@ def reward_terminated_truncated_info(self, agent_id, reward, terminated, truncat
# The number of unique events are available in self._unique_events[agent_id]
uniq = self._unique_events[agent_id]
explore_bonus = 0
if self.explore_bonus_weight > 0 and uniq['curr_count'] > uniq['prev_count']:
explore_bonus = min(self.clip_unique_event, uniq['curr_count'] - uniq['prev_count'])
if self.explore_bonus_weight > 0 and uniq["curr_count"] > uniq["prev_count"]:
explore_bonus = min(self.clip_unique_event, uniq["curr_count"] - uniq["prev_count"])
explore_bonus *= self.explore_bonus_weight

reward += healing_bonus + explore_bonus
191 changes: 108 additions & 83 deletions analysis/proc_eval_result.py
Original file line number Diff line number Diff line change
@@ -8,145 +8,170 @@

# Make the table output simpler
pl.Config.set_tbl_hide_dataframe_shape(True)
pl.Config.set_tbl_formatting('NOTHING')
pl.Config.set_tbl_formatting("NOTHING")
pl.Config.set_tbl_hide_column_data_types(True)

# string matching for task names
WEIGHT_DICT = {
'TickGE': ('survival', 100 / 6), # 1 survival task
'PLAYER_KILL': ('combat', 100 / (6*3)), # 3 combat tasks
'DefeatEntity': ('combat', 100 / (6*3)),
'GO_FARTHEST': ('exploration', 100 / (6*2)), # 2 exploration tasks
'OccupyTile': ('exploration', 100 / (6*2)),
'AttainSkill': ('skill', 100 / (6*8)), # 8 skill tasks
'HarvestItem': ('item', 100 / (6*44)), # 44 item tasks
'ConsumeItem': ('item', 100 / (6*44)),
'EquipItem': ('item', 100 / (6*44)),
'FullyArmed': ('item', 100 / (6*44)),
'EARN_GOLD': ('market', 100 / (6*5)), # 5 market tasks
'BUY_ITEM': ('market', 100 / (6*5)),
'EarnGold': ('market', 100 / (6*5)),
'HoardGold': ('market', 100 / (6*5)),
'MakeProfit': ('market', 100 / (6*5)),
"TickGE": ("survival", 100 / 6), # 1 survival task
"PLAYER_KILL": ("combat", 100 / (6 * 3)), # 3 combat tasks
"DefeatEntity": ("combat", 100 / (6 * 3)),
"GO_FARTHEST": ("exploration", 100 / (6 * 2)), # 2 exploration tasks
"OccupyTile": ("exploration", 100 / (6 * 2)),
"AttainSkill": ("skill", 100 / (6 * 8)), # 8 skill tasks
"HarvestItem": ("item", 100 / (6 * 44)), # 44 item tasks
"ConsumeItem": ("item", 100 / (6 * 44)),
"EquipItem": ("item", 100 / (6 * 44)),
"FullyArmed": ("item", 100 / (6 * 44)),
"EARN_GOLD": ("market", 100 / (6 * 5)), # 5 market tasks
"BUY_ITEM": ("market", 100 / (6 * 5)),
"EarnGold": ("market", 100 / (6 * 5)),
"HoardGold": ("market", 100 / (6 * 5)),
"MakeProfit": ("market", 100 / (6 * 5)),
}


def get_task_weight(task_name):
for key, val in WEIGHT_DICT.items():
if key in task_name:
return val
logging.warning(f'Task name {task_name} not found in weight dict')
return 'etc', 0
logging.warning(f"Task name {task_name} not found in weight dict")
return "etc", 0


def get_summary_dict(vals, key):
progress = vals if key == 'length' else [v[0] for v in vals]
summ = {
'count': len(progress),
'mean': np.mean(progress),
'median': np.median(progress)
}

if key == 'length':
progress = vals if key == "length" else [v[0] for v in vals]
summ = {"count": len(progress), "mean": np.mean(progress), "median": np.median(progress)}

if key == "length":
progress = np.array(progress) / 1023 # full episode length

summ['completed'] = np.mean([1 if v >= 1 else 0 for v in progress])
summ['over30pcnt'] = np.mean([1 if v >= 0.3 else 0 for v in progress])
summ["completed"] = np.mean([1 if v >= 1 else 0 for v in progress])
summ["over30pcnt"] = np.mean([1 if v >= 0.3 else 0 for v in progress])
return summ


def summarize_single_eval(data, weighted_score=False):
summary = {}

# task-level info
for key, vals in data.items():
if key.startswith('curriculum') or key == 'length':
if key.startswith("curriculum") or key == "length":
summary[key] = get_summary_dict(vals, key)

if weighted_score and key.startswith('curriculum'):
if weighted_score and key.startswith("curriculum"):
category, weight = get_task_weight(key)
summary[key]['category'] = category
summary[key]['weight'] = weight
summary[key]['weighted_score'] = summary[key]['mean'] * weight
summary[key]["category"] = category
summary[key]["weight"] = weight
summary[key]["weighted_score"] = summary[key]["mean"] * weight

# meta info
summary['avg_progress'] = np.mean([v['mean'] for k, v in summary.items()
if k.startswith('curriculum')])
summary["avg_progress"] = np.mean(
[v["mean"] for k, v in summary.items() if k.startswith("curriculum")]
)
if weighted_score:
summary['weighted_score'] = np.sum([v['weighted_score'] for k, v in summary.items()
if k.startswith('curriculum')])
summary["weighted_score"] = np.sum(
[v["weighted_score"] for k, v in summary.items() if k.startswith("curriculum")]
)
return summary


def process_eval_files(policy_store_dir, eval_prefix):
summ_policy = []
summ_task = []

for file in os.listdir(policy_store_dir):
# NOTE: assumes the file naming convention is 'eval_<mode>_<seed>.json'
if not file.startswith(eval_prefix) or not file.endswith('.json'):
if not file.startswith(eval_prefix) or not file.endswith(".json"):
continue

mode = file.split('_')[1]
random_seed = file.split('_')[2].replace('.json', '')
mode = file.split("_")[1]
random_seed = file.split("_")[2].replace(".json", "")

with open(os.path.join(policy_store_dir, file), 'r') as f:
with open(os.path.join(policy_store_dir, file), "r") as f:
data = json.load(f)

for pol_name, pol_data in data.items():
if len(pol_data) == 0:
continue

summary = summarize_single_eval(pol_data, weighted_score=True)
summ_policy.append({
'policy_name': pol_name,
'mode': mode,
'seed': random_seed,
'count': summary['length']['count'],
'length': summary['length']['mean'],
'score': summary['avg_progress'],
'weighted_score': summary['weighted_score']
})
summ_policy.append(
{
"policy_name": pol_name,
"mode": mode,
"seed": random_seed,
"count": summary["length"]["count"],
"length": summary["length"]["mean"],
"score": summary["avg_progress"],
"weighted_score": summary["weighted_score"],
}
)

# also gather the results across random seeds for each task, then average
for task_name, task_data in summary.items():
if not task_name.startswith('curriculum'):
if not task_name.startswith("curriculum"):
continue
summ_task.append({
'category': task_data['category'],
'task_name': task_name,
'weight': task_data['weight'],
'policy_name': pol_name,
'mode': mode,
'seed': random_seed,
'count': task_data['count'],
'score': task_data['mean']
})

summ_df = pl.DataFrame(summ_policy).sort(['policy_name', 'mode', 'seed'])
summ_grp = summ_df.group_by(['policy_name', 'mode']).agg(
pl.col('score').mean(),
pl.col('weighted_score').mean(),
summ_task.append(
{
"category": task_data["category"],
"task_name": task_name,
"weight": task_data["weight"],
"policy_name": pol_name,
"mode": mode,
"seed": random_seed,
"count": task_data["count"],
"score": task_data["mean"],
}
)

summ_df = pl.DataFrame(summ_policy).sort(["policy_name", "mode", "seed"])
summ_grp = summ_df.group_by(["policy_name", "mode"]).agg(
pl.col("score").mean(),
pl.col("weighted_score").mean(),
)
summ_grp = summ_grp.sort('weighted_score', descending=True)
summ_grp.write_csv(os.path.join(policy_store_dir, 'score_summary.tsv'), separator="\t", float_precision=6)
print('\nPolicy score summary, sorted by weighted_score:')
summ_grp = summ_grp.sort("weighted_score", descending=True)
summ_grp.write_csv(
os.path.join(policy_store_dir, "score_summary.tsv"), separator="\t", float_precision=6
)
print("\nPolicy score summary, sorted by weighted_score:")
print(summ_grp)

task_df = pl.DataFrame(summ_task).sort(['mode', 'category', 'task_name', 'policy_name', 'seed'])
task_grp = task_df.group_by(['mode', 'category', 'task_name', 'policy_name']).agg(pl.col('score').mean())
task_grp = task_grp.sort(['mode', 'category', 'task_name', 'policy_name'])
task_grp.write_csv(os.path.join(policy_store_dir, 'score_task_summary.tsv'), separator="\t", float_precision=6)
cate_grp = task_df.group_by(['mode', 'category', 'policy_name']).agg(pl.col('score').mean())
cate_grp = cate_grp.sort(['mode', 'category', 'policy_name'])
cate_grp.write_csv(os.path.join(policy_store_dir, 'score_category_summary.tsv'), separator="\t", float_precision=6)
task_df = pl.DataFrame(summ_task).sort(["mode", "category", "task_name", "policy_name", "seed"])
task_grp = task_df.group_by(["mode", "category", "task_name", "policy_name"]).agg(
pl.col("score").mean()
)
task_grp = task_grp.sort(["mode", "category", "task_name", "policy_name"])
task_grp.write_csv(
os.path.join(policy_store_dir, "score_task_summary.tsv"), separator="\t", float_precision=6
)
cate_grp = task_df.group_by(["mode", "category", "policy_name"]).agg(pl.col("score").mean())
cate_grp = cate_grp.sort(["mode", "category", "policy_name"])
cate_grp.write_csv(
os.path.join(policy_store_dir, "score_category_summary.tsv"),
separator="\t",
float_precision=6,
)

if len(summ_df['seed'].unique()) > 1:
summ_df.write_csv(os.path.join(policy_store_dir, 'score_by_seed.tsv'), separator="\t", float_precision=6)
task_df.write_csv(os.path.join(policy_store_dir, 'score_by_task_seed.tsv'), separator="\t", float_precision=6)
if len(summ_df["seed"].unique()) > 1:
summ_df.write_csv(
os.path.join(policy_store_dir, "score_by_seed.tsv"), separator="\t", float_precision=6
)
task_df.write_csv(
os.path.join(policy_store_dir, "score_by_task_seed.tsv"),
separator="\t",
float_precision=6,
)

return summ_df, summ_grp, task_df, task_grp, cate_grp

if __name__ == '__main__':
parser = argparse.ArgumentParser(description='Process the evaluation result files')
parser.add_argument('policy_store_dir', type=str, help='Path to the policy directory')
parser.add_argument('-p', '--prefix', type=str, default='eval_', help='Prefix of the evaluation result files')

if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Process the evaluation result files")
parser.add_argument("policy_store_dir", type=str, help="Path to the policy directory")
parser.add_argument(
"-p", "--prefix", type=str, default="eval_", help="Prefix of the evaluation result files"
)
args = parser.parse_args()

process_eval_files(args.policy_store_dir, args.prefix)
Loading

0 comments on commit 47eccbe

Please sign in to comment.