Skip to content

Commit

Permalink
Merge pull request #3 from H-IAAC/gym
Browse files Browse the repository at this point in the history
Gymnasium integration
  • Loading branch information
EltonCN authored Nov 27, 2024
2 parents 25868a5 + d5a2198 commit c2e63e8
Show file tree
Hide file tree
Showing 12 changed files with 1,527 additions and 2 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ jobs:
python3 -m pip install --upgrade pip
python3 -m pip install pytest
python3 -m pip install pytest-cov
python3 -m pip install -e .[tests]
python3 -m pip install -e .[tests, gym]
- name: Tests
run: |
Expand Down
360 changes: 360 additions & 0 deletions dev/Gym codelet.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,360 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"from typing import Optional, Any\n",
"\n",
"import gymnasium as gym\n",
"from gymnasium.wrappers import TransformAction, TransformObservation\n",
"\n",
"import cst_python as cst\n",
"from cst_python.python.gym import GymCodelet"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"env = gym.make(\"Blackjack-v1\")\n",
"\n",
"env = TransformObservation(env, \n",
" lambda obs:{\"player_sum\":obs[0], \"dealer_card\":obs[1], \"usable_ace\":obs[2]}, \n",
" gym.spaces.Dict({\"player_sum\":env.observation_space[0], \"dealer_card\":env.observation_space[1], \"usable_ace\":env.observation_space[2]}))\n",
"\n",
"env = TransformAction(env, \n",
" lambda action:action[\"hit\"], \n",
" gym.spaces.Dict({\"hit\":env.action_space}))\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"-1"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"mind = cst.Mind()\n",
"gym_codelet = GymCodelet(mind, env)\n",
"mind.insert_codelet(gym_codelet)\n",
"\n",
"mind.start()\n",
"gym_codelet.seed_memory.set_info(42)\n",
"gym_codelet.reset_memory.set_info(not gym_codelet.reset_memory.get_info())"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"({'dealer_card': MemoryObject [idmemoryobject=0, timestamp=1732724413462, evaluation=0.0, I=2, name=dealer_card],\n",
" 'player_sum': MemoryObject [idmemoryobject=1, timestamp=1732724413462, evaluation=0.0, I=15, name=player_sum],\n",
" 'usable_ace': MemoryObject [idmemoryobject=2, timestamp=1732724413462, evaluation=0.0, I=0, name=usable_ace]},\n",
" MemoryObject [idmemoryobject=6, timestamp=1732724413462, evaluation=0.0, I=False, name=terminated],\n",
" MemoryObject [idmemoryobject=4, timestamp=1732724413462, evaluation=0.0, I=0, name=reward])"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"gym_codelet.observation_memories, gym_codelet.terminated_memory, gym_codelet.reward_memory"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Observation {'player_sum': 25, 'dealer_card': 2, 'usable_ace': 0}\n"
]
},
{
"data": {
"text/plain": [
"-1"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"gym_codelet.action_memories[\"hit\"].set_info(1)"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"({'dealer_card': MemoryObject [idmemoryobject=0, timestamp=1732724413492, evaluation=0.0, I=2, name=dealer_card],\n",
" 'player_sum': MemoryObject [idmemoryobject=1, timestamp=1732724413492, evaluation=0.0, I=25, name=player_sum],\n",
" 'usable_ace': MemoryObject [idmemoryobject=2, timestamp=1732724413492, evaluation=0.0, I=0, name=usable_ace]},\n",
" MemoryObject [idmemoryobject=6, timestamp=1732724413492, evaluation=0.0, I=True, name=terminated],\n",
" MemoryObject [idmemoryobject=4, timestamp=1732724413492, evaluation=0.0, I=-1.0, name=reward])"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"gym_codelet.observation_memories, gym_codelet.terminated_memory, gym_codelet.reward_memory"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"-1"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"gym_codelet.reset_memory.set_info(True)"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"({'dealer_card': MemoryObject [idmemoryobject=0, timestamp=1732724413554, evaluation=0.0, I=2, name=dealer_card],\n",
" 'player_sum': MemoryObject [idmemoryobject=1, timestamp=1732724413554, evaluation=0.0, I=15, name=player_sum],\n",
" 'usable_ace': MemoryObject [idmemoryobject=2, timestamp=1732724413554, evaluation=0.0, I=0, name=usable_ace]},\n",
" MemoryObject [idmemoryobject=6, timestamp=1732724413554, evaluation=0.0, I=False, name=terminated],\n",
" MemoryObject [idmemoryobject=4, timestamp=1732724413554, evaluation=0.0, I=0, name=reward])"
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"gym_codelet.observation_memories, gym_codelet.terminated_memory, gym_codelet.reward_memory"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Observation {'player_sum': 15, 'dealer_card': 2, 'usable_ace': 0}\n"
]
},
{
"data": {
"text/plain": [
"-1"
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"gym_codelet.action_memories[\"hit\"].set_info(0)"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"({'dealer_card': MemoryObject [idmemoryobject=0, timestamp=1732724413580, evaluation=0.0, I=2, name=dealer_card],\n",
" 'player_sum': MemoryObject [idmemoryobject=1, timestamp=1732724413580, evaluation=0.0, I=15, name=player_sum],\n",
" 'usable_ace': MemoryObject [idmemoryobject=2, timestamp=1732724413580, evaluation=0.0, I=0, name=usable_ace]},\n",
" MemoryObject [idmemoryobject=6, timestamp=1732724413580, evaluation=0.0, I=True, name=terminated],\n",
" MemoryObject [idmemoryobject=4, timestamp=1732724413580, evaluation=0.0, I=1.0, name=reward])"
]
},
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"gym_codelet.observation_memories, gym_codelet.terminated_memory, gym_codelet.reward_memory"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"-1"
]
},
"execution_count": 12,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"env = gym.make(\"Blackjack-v1\")\n",
"mind = cst.Mind()\n",
"\n",
"gym_codelet = GymCodelet(mind, env)\n",
"mind.insert_codelet(gym_codelet)\n",
"\n",
"mind.start()\n",
"gym_codelet.seed_memory.set_info(42)\n",
"gym_codelet.reset_memory.set_info(not gym_codelet.reset_memory.get_info())"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"({'observation': MemoryObject [idmemoryobject=0, timestamp=1732724413609, evaluation=0.0, I=(15, 2, 0), name=observation]},\n",
" MemoryObject [idmemoryobject=4, timestamp=1732724413609, evaluation=0.0, I=False, name=terminated1],\n",
" MemoryObject [idmemoryobject=2, timestamp=1732724413609, evaluation=0.0, I=0, name=reward1])"
]
},
"execution_count": 13,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"gym_codelet.observation_memories, gym_codelet.terminated_memory, gym_codelet.reward_memory"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Observation (25, 2, 0)\n"
]
},
{
"data": {
"text/plain": [
"-1"
]
},
"execution_count": 14,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"gym_codelet.action_memories[\"action\"].set_info(1)"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"({'observation': MemoryObject [idmemoryobject=0, timestamp=1732724413632, evaluation=0.0, I=(25, 2, 0), name=observation]},\n",
" MemoryObject [idmemoryobject=4, timestamp=1732724413632, evaluation=0.0, I=True, name=terminated1],\n",
" MemoryObject [idmemoryobject=2, timestamp=1732724413632, evaluation=0.0, I=-1.0, name=reward1])"
]
},
"execution_count": 15,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"gym_codelet.observation_memories, gym_codelet.terminated_memory, gym_codelet.reward_memory"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.9"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
1 change: 1 addition & 0 deletions docs/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
_examples/Implementing a Architecture
_examples/Publisher-Subscriber
_examples/Activation and Monitoring
_examples/Gymnasium Integration

.. toctree::
:maxdepth: 4
Expand Down
Loading

0 comments on commit c2e63e8

Please sign in to comment.