Skip to content

Commit

Permalink
mul proc v1
Browse files Browse the repository at this point in the history
  • Loading branch information
zdx3578 committed Jul 27, 2017
1 parent 28a3644 commit e34140b
Show file tree
Hide file tree
Showing 6 changed files with 80 additions and 153 deletions.
1 change: 1 addition & 0 deletions a3c.py
Original file line number Diff line number Diff line change
Expand Up @@ -329,6 +329,7 @@ def train(self, num_workers, load_model=False):
lambda: worker.work(
self.max_episode_length, self.gamma, sess, coord,
saver)))
sleep(np.random.random_integers(15))
t.start()
sleep(0.5)
#start_pid=os.system("ps -a|grep autostart")
Expand Down
30 changes: 19 additions & 11 deletions autostart.sh
Original file line number Diff line number Diff line change
@@ -1,12 +1,20 @@
#!/bin/bash
xte 'key Return'
xte 'usleep 100000'
xte 'key Return'
xte 'usleep 100000'
xte 'key Up'
xte 'usleep 100000'
xte 'key Up'
xte 'usleep 100000'
xte 'key Return'
xte 'usleep 100000'
xte 'key Return'

set -eu

window=`xdotool search --name $1 | head -n 1`

xdotool key --window $window Return
sleep 0.1
xdotool key --window $window Return
sleep 0.1
xdotool key --window $window Up
sleep 0.1
xdotool key --window $window Up
sleep 0.1
xdotool key --window $window Return
sleep 0.1
xdotool key --window $window Return
# Uncomment for using vision as input
# sleep 0.1
# xdotool key --window $window F2
123 changes: 48 additions & 75 deletions gym_torcs.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,51 +3,51 @@
import numpy as np
# from os import path
import snakeoil3_gym as snakeoil3
from snakeoil3_gym import send_cmd
import numpy as np
import copy
import collections as col
import os
import subprocess
import time
import requests

import signal


class TorcsEnv:
terminal_judge_start = 100 # If after 100 timestep still no progress, terminated
termination_limit_progress = 8 # [km/h], episode terminates if car is running slower than this limit
terminal_judge_start = 500 # Speed limit is applied after this step
termination_limit_progress = 5 # [km/h], episode terminates if car is running slower than this limit
default_speed = 50

initial_reset = True

def __init__(self, vision=False, throttle=False, gear_change=False , port=3201):
def start_torcs_process(self):
if self.torcs_proc is not None:
os.killpg(os.getpgid(self.torcs_proc.pid), signal.SIGKILL)
time.sleep(0.5)
self.torcs_proc = None
window_title = str(self.port)

command =['/usr/local/bin/torcs',' -nofuel -nodamage -nolaptime -title {} -p {}'.format(window_title, self.port) ]
if self.vision is True:
command += ' -vision'
self.torcs_proc = subprocess.Popen(command, shell=False, preexec_fn=os.setsid)
time.sleep(0.5)
os.system('sh autostart.sh {}'.format(window_title))
time.sleep(0.5)

def __init__(self, vision=False, throttle=False, gear_change=False, port=3101):
#print("Init")
self.vision = vision
self.throttle = throttle
self.gear_change = gear_change
self.port = port
self.port = port
self.torcs_proc = None

self.initial_run = True
self.pid=None


##print("launch torcs")
#os.system('pkill torcs')
time.sleep(0.5)

send_cmd(self.port,self.port)
# self.kill_torcs()
# if self.vision is True:
# os.system('torcs -nofuel -nodamage -nolaptime -vision -p {} &'.format(self.port))


# else:
# os.system('torcs -nofuel -nolaptime -p {} &'.format(self.port))
# self.start_torcs()


# time.sleep(0.5)
# os.system('sh autostart.sh')
# time.sleep(0.5)
self.start_torcs_process()
time.sleep(0.5)

"""
# Modify here if you use multiple tracks in the environment
Expand All @@ -72,44 +72,11 @@ def __init__(self, vision=False, throttle=False, gear_change=False , port=3201):
high = np.array([1., np.inf, np.inf, np.inf, 1., np.inf, 1., np.inf, 255])
low = np.array([0., -np.inf, -np.inf, -np.inf, 0., -np.inf, 0., -np.inf, 0])
self.observation_space = spaces.Box(low=low, high=high)
def start_torcs(self):
self.kill_torcs()
time.sleep(1.5)
while os.system("ps -a|grep autostart") == 0 :
sleep(5)

if self.vision is True:
# os.system('torcs -nofuel -nodamage -nolaptime -vision -p {} &'.format(self.port))
proc=subprocess.Popen([ '/usr/local/bin/torcs' , ' -nofuel -nodamage -nolaptime -vision -p {} &'.format(self.port)],shell=False)
self.pid=proc.pid

else:
# os.system('torcs -nofuel -nolaptime -p {} &'.format(self.port))
proc=subprocess.Popen([ '/usr/local/bin/torcs',' -nofuel -nolaptime -p {} &'.format(self.port)],shell=False)
self.pid=proc.pid
time.sleep(0.5)

print("start torcs {}".format(self.pid))
time.sleep(0.5)
while os.system("ps -a|grep autostart") == 0 :
sleep(5)
print('load game')

os.system('sh autostart.sh')
time.sleep(0.5)



def kill_torcs(self):
if self.pid :
os.system("kill -9 {}".format(self.pid))
print("kill torcs {}".format(self.pid))

def step(self, u):
#print("Step")
# convert thisAction to the actual torcs actionstr
client = self.client
# print("uuuuuu",u)

this_action = self.agent_to_torcs(u)

Expand Down Expand Up @@ -139,7 +106,7 @@ def step(self, u):
action_torcs['accel'] -= .2
else:
action_torcs['accel'] = this_action['accel']
action_torcs['brake'] = this_action['brake']
#action_torcs['brake'] = this_action['brake']

# Automatic Gear Change by Snakeoil
if self.gear_change is True:
Expand Down Expand Up @@ -176,12 +143,13 @@ def step(self, u):
# Reward setting Here #######################################
# direction-dependent positive reward
track = np.array(obs['track'])

trackPos = np.array(obs['trackPos'])
sp = np.array(obs['speedX'])
damage = np.array(obs['damage'])
rpm = np.array(obs['rpm'])

progress = sp*np.cos(obs['angle']) - np.abs(sp*np.sin(obs['angle'])) - sp*np.abs(obs['trackPos'])
progress = sp*np.cos(obs['angle']) - np.abs(sp*np.sin(obs['angle'])) - sp * np.abs(obs['trackPos'])
reward = progress

# collision detection
Expand All @@ -195,6 +163,12 @@ def step(self, u):
# episode_terminate = True
# client.R.d['meta'] = True

#if self.terminal_judge_start < self.time_step: # Episode terminates if the progress of agent is small
# if progress < self.termination_limit_progress:
# print("No progress")
# episode_terminate = True
# client.R.d['meta'] = True

if self.terminal_judge_start < self.time_step: # Episode terminates if the progress of agent is small
if progress < self.termination_limit_progress:
if self.time_step > 20 :
Expand All @@ -212,6 +186,11 @@ def step(self, u):



# if np.cos(obs['angle']) < 0: # Episode is terminated if the agent runs backward
# episode_terminate = True
# client.R.d['meta'] = True


if client.R.d['meta'] is True: # Send a reset signal
self.initial_run = False
client.respond_to_server()
Expand All @@ -221,7 +200,7 @@ def step(self, u):
return self.get_obs(), reward, client.R.d['meta'], {}

def reset(self, relaunch=False):
#print("Reset")
print("Reset")

self.time_step = 0

Expand All @@ -235,7 +214,7 @@ def reset(self, relaunch=False):
print("### TORCS is RELAUNCHED ###")

# Modify here if you use multiple tracks in the environment
self.client = snakeoil3.Client(p=self.port, vision=self.vision) # Open new UDP in vtorcs
self.client = snakeoil3.Client(self.start_torcs_process, p=self.port) # Open new UDP in vtorcs
self.client.MAX_STEPS = np.inf

client = self.client
Expand All @@ -250,32 +229,26 @@ def reset(self, relaunch=False):
return self.get_obs()

def end(self):
# os.system('pkill torcs')
self.kill_torcs()
os.killpg(os.getpgid(self.torcs_proc.pid), signal.SIGKILL)
time.sleep(0.5)
os.killpg(os.getpgid(self.torcs_proc.pid), signal.SIGKILL)

def get_obs(self):
return self.observation

def reset_torcs(self):
#print("relaunch torcs")
#os.system('pkill torcs')
# time.sleep(0.5)
# if self.vision is True:
# os.system('torcs -nofuel -nodamage -nolaptime -vision -p {} &'.format(self.port))
# else:
# os.system('torcs -nofuel -nolaptime -p {} &'.format(self.port))
# time.sleep(0.5)
# self.start_torcs()
send_cmd(self.port,self.port)

self.torcs_proc.terminate()
time.sleep(0.5)
self.start_torcs_process()
time.sleep(0.5)

# def agent_to_torcs(self, u):
# torcs_action = {'steer': u[0]}

# if self.throttle is True: # throttle action is enabled
# torcs_action.update({'accel': u[1]})
# torcs_action.update({'brake': u[2]})
# #torcs_action.update({'brake': u[2]})

# if self.gear_change is True: # gear change action is enabled
# torcs_action.update({'gear': int(u[3])})
Expand Down
11 changes: 6 additions & 5 deletions networks.py
Original file line number Diff line number Diff line change
Expand Up @@ -262,40 +262,41 @@ def update_target_graph(from_scope, to_scope):

def _create_network(self):
with tf.variable_scope(self.scope):
initializer= tf.truncated_normal_initializer( mean=0.0, stddev=0.01)
# Input and visual encoding layers
self.inputs = tf.placeholder(
shape=[None, self.state_size], dtype=tf.float32)

s_layer1 = tf.layers.batch_normalization(
tf.layers.dense(
inputs=self.inputs, activation=tf.nn.relu,
inputs=self.inputs, activation=tf.nn.relu,kernel_initializer=initializer,
units=A3CNetwork.HIDDEN1_UNITS),
training=self.is_training, name='s_layer_1')

s_layer2 = tf.layers.batch_normalization(
tf.layers.dense(
inputs=s_layer1, activation=tf.nn.relu,
inputs=s_layer1, activation=tf.nn.relu,kernel_initializer=initializer,
units=A3CNetwork.HIDDEN2_UNITS),
training=self.is_training, name='s_layer_2')

# Output layers for policy and value estimations
self.policy_mu = tf.layers.batch_normalization(
tf.layers.dense(
inputs=s_layer2, units=2, activation=tf.nn.tanh),
inputs=s_layer2, units=2, activation=tf.nn.tanh,kernel_initializer=initializer,),
training=self.is_training, name='policy_mu')

# clip the standard deviation to avoid numerical instabilites in
# the log probabilities
self.policy_sd = tf.clip_by_value(
tf.layers.batch_normalization(
tf.layers.dense(
inputs=s_layer2, units=2, activation=tf.nn.softplus),
inputs=s_layer2, units=2, activation=tf.nn.softplus,kernel_initializer=initializer,),
training=self.is_training),
[0.05]*self.action_size, [0.25]*self.action_size,
name='policy_sd')

self.value = tf.layers.batch_normalization(
tf.layers.dense(inputs=s_layer2, units=1),
tf.layers.dense(inputs=s_layer2, units=1,kernel_initializer=initializer,),
training=self.is_training, name='value')

self.normal_dist = tf.contrib.distributions.Normal(
Expand Down
Loading

0 comments on commit e34140b

Please sign in to comment.