Skip to content
This repository has been archived by the owner on Aug 4, 2024. It is now read-only.

Refine learning #2

Open
wants to merge 7 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 29 additions & 0 deletions check_walking.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
#!/usr/bin/env python3

import gym
import envs
from time import sleep

env = gym.make('GankenKun-v0')
state = env.reset()

step = 0
while True:
if 0 <= step <= 10:
action = [1.0, 0.0, 0.0]
if 10 <= step <= 20:
action = [0.0, 1.0, 0.0]
if 20 <= step <= 30:
action = [-1.0, 0.0, 0.0]
if 30 <= step <= 40:
action = [0.0, -1.0, 0.0]
if 40 <= step <= 50:
action = [0.0, 0.0, 1.0]
if 50 <= step <= 60:
action = [0.0, 0.0, -1.0]
if 60 <= step <= 70:
action = [0.0, 0.0, 0.0]
next_state, reward, done, info = env.step(action)
step += 1
sleep(1)

22 changes: 13 additions & 9 deletions dqn_train.py
100644 → 100755
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
#!/usr/bin/env python3

import os
import random

Expand All @@ -13,15 +15,15 @@

def train():
# setup ===========================
max_episode = 1000 # 学習において繰り返す最大エピソード数
max_episode = 10000 # 学習において繰り返す最大エピソード数
max_step = 250 # 1エピソードの最大ステップ数
n_warmup_steps = 10000 # warmupを行うステップ数
n_warmup_steps = 250 # warmupを行うステップ数
interval = 1 # モデルや結果を吐き出すステップ間隔
actions_list = [[0.1, 0.0, 0.0], [-0.1, 0.0, 0.0], [0.0, 0.06, 0.0], [0.0, -0.06, 0.0], [0.0, 0.0, 0.4], [0.0, 0.0, -0.4]] # 行動(action)の取りうる値のリスト
gamma = 0.99 # 割引率
epsilon = 0.1 # ε-greedyのパラメータ
actions_list = [[0.1, 0.0, 0.0], [-0.1, 0.0, 0.0], [0.0, 0.06, 0.0], [0.0, -0.06, 0.0], [0.0, 0.1, 1.0], [0.0, -0.1, -1.0]] # 行動(action)の取りうる値のリスト
gamma = 0.9 # 割引率
epsilon = 0.05 # ε-greedyのパラメータ
memory_size = 10000
batch_size = 32
batch_size = 4
result_dir = os.path.join('./result/dqn', now_str())
x = []
x_reward = []
Expand Down Expand Up @@ -71,6 +73,7 @@ def train():
step = 0
if total_step > n_warmup_steps:
break
print(total_step)
memory = memory[-memory_size:]
print('warming up {:,} steps... done.'.format(n_warmup_steps))

Expand Down Expand Up @@ -100,14 +103,16 @@ def train():

memory.append((state, action, c_reward, next_state, done))
episode_reward_list.append(c_reward)

exps = random.sample(memory, batch_size)
loss, td_error = q_network.update_on_batch(exps)
loss_list.append(loss)
td_list.append(td_error)

q_network.sync_target_network(soft=0.01)
q_network.sync_target_network(soft=0.001)
state = next_state
memory = memory[-memory_size:]
# print(len(memory))

# end of episode
if step >= max_step or done:
Expand All @@ -116,7 +121,7 @@ def train():
reward_sum = np.sum(episode_reward_list)
loss_avg = np.mean(loss_list)
td_error_avg = np.mean(td_list)
print("{}episode reward_avg:{} loss:{} td_error:{}".format(num_episode, reward_sum, loss_avg, td_error_avg))
print("{}episode reward_avg:{} loss:{} td_error:{}".format(num_episode, reward_avg, loss_avg, td_error_avg))
if num_episode % interval == 0:
model_path = os.path.join(result_dir, 'episode_{}.h5'.format(num_episode))
q_network.main_network.save(model_path)
Expand All @@ -136,7 +141,6 @@ def train():
print("last_10_reward:{}".format(last_10_avg))
x_reward.append(num_episode)
y_last10_reward.append(last_10_avg)


if num_episode >= max_episode:
episode_loop = False
Expand Down
22 changes: 11 additions & 11 deletions envs/GankenKun_pybullet/GankenKun/foot_step_planner.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,21 +19,21 @@ def calculate(self, goal_x, goal_y, goal_th, current_x, current_y, current_th, n
step_x = abs(goal_x - current_x )/self.max_stride_x
step_y = abs(goal_y - current_y )/self.max_stride_y
step_th = abs(goal_th - current_th)/self.max_stride_th
max_step = max(step_x, step_y, step_th)
max_step = max(step_x, step_y, step_th, 1)
stride_x = (goal_x - current_x )/max_step
stride_y = (goal_y - current_y )/max_step
stride_th = (goal_th - current_th)/max_step

# first step
foot_step = []
if status == 'start':
foot_step += [[0.0, current_x, current_y, current_th, 'both']]
foot_step += [[0.0, current_x, current_y, current_th, 'both', 0]]
time += self.period * 2.0
if next_support_leg == 'right':
foot_step += [[time, current_x, -self.width+current_y, current_th, next_support_leg]]
foot_step += [[time, current_x, -self.width+current_y, current_th, next_support_leg, -self.width]]
next_support_leg = 'left'
elif next_support_leg == 'left':
foot_step += [[time, current_x, self.width+current_y, current_th, next_support_leg]]
foot_step += [[time, current_x, self.width+current_y, current_th, next_support_leg, self.width]]
next_support_leg = 'right'

# walking
Expand All @@ -50,10 +50,10 @@ def calculate(self, goal_x, goal_y, goal_th, current_x, current_y, current_th, n
next_y = current_y + stride_y
next_th = current_th + stride_th
if next_support_leg == 'right':
foot_step += [[time, next_x, next_y-self.width, next_th, next_support_leg]]
foot_step += [[time, next_x, next_y-self.width, next_th, next_support_leg, -self.width]]
next_support_leg = 'left'
elif next_support_leg == 'left':
foot_step += [[time, next_x, next_y+self.width, next_th, next_support_leg]]
foot_step += [[time, next_x, next_y+self.width, next_th, next_support_leg, self.width]]
next_support_leg = 'right'
current_x, current_y, current_th = next_x, next_y, next_th

Expand All @@ -62,16 +62,16 @@ def calculate(self, goal_x, goal_y, goal_th, current_x, current_y, current_th, n
if not status == 'stop':
time += self.period
if next_support_leg == 'right':
foot_step += [[time, next_x, next_y-self.width, next_th, next_support_leg]]
foot_step += [[time, next_x, next_y-self.width, next_th, next_support_leg, -self.width]]
elif next_support_leg == 'left':
foot_step += [[time, next_x, next_y+self.width, next_th, next_support_leg]]
foot_step += [[time, next_x, next_y+self.width, next_th, next_support_leg, self.width]]
time += self.period
next_support_leg = 'both'
foot_step += [[time, next_x, next_y, next_th, next_support_leg]]
foot_step += [[time, next_x, next_y, next_th, next_support_leg, 0]]
time += 2.0
foot_step += [[time, next_x, next_y, next_th, next_support_leg]]
foot_step += [[time, next_x, next_y, next_th, next_support_leg, 0]]
time += 100.0
foot_step += [[time, next_x, next_y, next_th, next_support_leg]]
foot_step += [[time, next_x, next_y, next_th, next_support_leg, 0]]

return foot_step

Expand Down
76 changes: 47 additions & 29 deletions envs/GankenKun_pybullet/GankenKun_env.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
import gym
from gym import error, spaces, utils
from gym.utils import seeding

import math

class GankenKunEnv(gym.Env):
def __init__(self):
Expand All @@ -27,17 +27,21 @@ def __init__(self):
self.y_threshold = 3.5
self.ball_x_threshold = 4.5
self.ball_y_threshold = 3.0
self.ball_not_touch_period = 0

TIME_STEP = 0.001
# physicsClient = p.connect(p.GUI)
physicsClient = p.connect(p.DIRECT)
TIME_STEP = 0.01
physicsClient = p.connect(p.GUI)
# physicsClient = p.connect(p.DIRECT)
p.setGravity(0, 0, -9.8)
p.setTimeStep(TIME_STEP)
p.setPhysicsEngineParameter(numSubSteps=10)
p.configureDebugVisualizer(p.COV_ENABLE_GUI,0)

# planeId = p.loadURDF("envs/GankenKun_pybullet/URDF/plane.urdf", [0, 0, 0])
self.FieldId = p.loadSDF("envs/GankenKun_pybullet/SDF/field/soccerfield.sdf")
self.RobotId = p.loadURDF("envs/GankenKun_pybullet/URDF/gankenkun.urdf", [0, 0, 0])
self.BallId = p.loadSDF("envs/GankenKun_pybullet/SDF/ball/ball.sdf")
self.ball_pos = [0.3, 0.0]

self.index = {p.getBodyInfo(self.RobotId)[0].decode('UTF-8'):-1,}
for id in range(p.getNumJoints(self.RobotId)):
Expand Down Expand Up @@ -65,37 +69,37 @@ def __init__(self):
x_goal, y_goal, th_goal = 0.1, 0.0, 0.0
self.foot_step = self.walk.setGoalPos([x_goal, y_goal, th_goal])
# print(p.getBasePositionAndOrientation(BallId[0]))
self.ball_delta_length = 0.0

def step(self, action):
j = 0
#print()
while p.isConnected():
j += 1
if j >= 10:
self.joint_angles,lf,rf,xp,n = self.walk.getNextPos()
j = 0
if n == 0:
if (len(self.foot_step) <= 6):
x_goal = self.foot_step[0][1] + action[0]
y_goal = self.foot_step[0][2] + action[1]
th_goal = self.foot_step[0][3] + action[2]

self.foot_step = self.walk.setGoalPos([x_goal, y_goal, th_goal])
else:
self.foot_step = self.walk.setGoalPos()
#if you want new goal, please send position
self.joint_angles,lf,rf,xp,n = self.walk.getNextPos()
if n == 0:
if self.foot_step[0][4] == "left":
x_goal = self.foot_step[0][1] + action[0]
y_goal = self.foot_step[0][2] - self.foot_step[0][5] + action[1]
th_goal = self.foot_step[0][3] + action[2]
self.foot_step = self.walk.setGoalPos([x_goal, y_goal, th_goal])
break
else:
self.foot_step = self.walk.setGoalPos()
for id in range(p.getNumJoints(self.RobotId)):
qIndex = p.getJointInfo(self.RobotId, id)[3]
if qIndex > -1:
p.setJointMotorControl2(self.RobotId, id, p.POSITION_CONTROL, self.joint_angles[qIndex-7])

p.stepSimulation()
# sleep(TIME_STEP) # delete -> speed up

x, y, _ = p.getBasePositionAndOrientation(self.RobotId)[0]
roll, pitch, yaw = p.getEulerFromQuaternion(p.getBasePositionAndOrientation(self.RobotId)[1])
ball_x, ball_y, _ = p.getBasePositionAndOrientation(self.BallId[0])[0]
self.state = [x, y, yaw, ball_x, ball_y]
ball_lx, ball_ly = ball_x - x, ball_y - y
goal_lx, goal_ly = self.goal_pos[0] - x, self.goal_pos[1] - y
ball_rx, ball_ry = ball_lx * math.cos(yaw) + ball_ly * math.sin(yaw), - ball_lx * math.sin(yaw) + ball_ly * math.cos(yaw)
goal_rx, goal_ry = goal_lx * math.cos(yaw) + goal_ly * math.sin(yaw), - goal_lx * math.sin(yaw) + goal_ly * math.cos(yaw)

self.state = [goal_rx, goal_ry, yaw, ball_rx, ball_ry]

done = bool(
abs(x) > self.x_threshold
Expand All @@ -108,19 +112,31 @@ def step(self, action):
if not done:
dx, dy = ball_x - x, ball_y - y
ball_distance = math.sqrt(dx**2 + dy**2)
reward += math.floor(-10.0 * ball_distance)/10
ball_goal_distance = math.sqrt((goal_x - ball_x)**2 + (goal_y - ball_y)**2)
reward += math.floor(-10.0 * ball_goal_distance)/10
reward += - ball_distance / 10
ball_goal_prev_distance = math.sqrt((goal_x + 1.0 - self.ball_pos[0])**2 + (goal_y - self.ball_pos[1])**2)
ball_goal_distance = math.sqrt((goal_x + 1.0 - ball_x)**2 + (goal_y - ball_y)**2)
if self.ball_delta_length == 0.0:
reward += max((- ball_goal_distance + ball_goal_prev_distance)*10, 0)
self.ball_delta_length = - ball_goal_distance + ball_goal_prev_distance
if ball_goal_distance == ball_goal_prev_distance:
self.ball_not_touch_period += 1
if self.ball_not_touch_period > 40:
self.ball_not_touch_period = 0
done = True
else:
self.ball_not_touch_period = 0
self.ball_pos[0] = ball_x
self.ball_pos[1] = ball_y
elif ball_x > goal_x and self.goal_leftpole < ball_y < self.goal_rightpole:
reward = 500
else:
reward = -500
reward = 10
done = True

# print("action: "+str(action)+", reward: "+str(reward))
return self.state, reward, done, {}

def reset(self):
p.resetBasePositionAndOrientation(self.RobotId, [0, 0, 0], [0, 0, 0, 1.0])
p.resetBasePositionAndOrientation(self.BallId[0], [0.2, 0, 0.1], [0, 0, 0, 1.0])
p.resetBasePositionAndOrientation(self.BallId[0], [0.3, 0, 0.1], [0, 0, 0, 1.0])
x_goal, y_goal, th_goal = 0.1, 0.0, 0.0
del self.walk, self.pc
self.pc = preview_control(0.01, 1.0, 0.27)
Expand All @@ -130,6 +146,8 @@ def reset(self):
roll, pitch, yaw = p.getEulerFromQuaternion(p.getBasePositionAndOrientation(self.RobotId)[1])
ball_x, ball_y, _ = p.getBasePositionAndOrientation(self.BallId[0])[0]
self.state = [x, y, yaw, ball_x, ball_y]
self.ball_pos[0] = 0.3
self.ball_pos[1] = 0.0
return np.array(self.state)


Loading