Skip to content

Commit

Permalink
#14 Connection with MATLAB works.
Browse files Browse the repository at this point in the history
  • Loading branch information
nrontsis authored and kyr-pol committed Jan 15, 2019
1 parent 392ddb6 commit 88c0c0c
Show file tree
Hide file tree
Showing 4 changed files with 87 additions and 4 deletions.
6 changes: 4 additions & 2 deletions examples/pendulum-matlab/matlab-environment/pendulum_learn.m
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
basename = 'pendulum_'; % filename used for saving data

% 2. Initial J random rollouts

for jj = 1:J
[xx, yy, realCost{jj}, latent{jj}] = ...
rollout(gaussian(mu0, S0), struct('maxU',policy.maxU), H, plant, cost);
Expand All @@ -30,14 +31,15 @@
end
end


mu0Sim(odei,:) = mu0; S0Sim(odei,odei) = S0;
mu0Sim = mu0Sim(dyno); S0Sim = S0Sim(dyno,dyno);

% 3. Controlled learning (N iterations)
for j = 1:N
for j = 1:1
trainDynModel; % train (GP) dynamics model
learnPolicy; % learn policy
applyController; % apply controller to system
% applyController; % apply controller to system
disp(['controlled trial # ' num2str(j)]);
if plotting.verbosity > 0; % visualization of trajectory
if ~ishandle(1); figure(1); else set(0,'CurrentFigure',1); end; clf(1);
Expand Down
5 changes: 5 additions & 0 deletions examples/pendulum-matlab/matlab-environment/policy_wrapper.m
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
function u = policy_wrapper(mu,s)
global policy
u = policy.fcn(policy,mu,s);
end

10 changes: 8 additions & 2 deletions examples/pendulum-matlab/matlab-environment/settings_pendulum.m
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@

% include some paths
try
rd = '../../';
rd = '../pilcoV0.9/';
addpath([rd 'base'],[rd 'util'],[rd 'gp'],[rd 'control'],[rd 'loss']);
catch
end
Expand Down Expand Up @@ -84,6 +84,7 @@
plant.prop = @propagated;

% 4. Set up the policy structure
global policy
policy.fcn = @(policy,m,s)conCat(@congp,@gSat,policy,m,s);% controller
% representation
policy.maxU = 2.5; % max. amplitude of
Expand Down Expand Up @@ -137,4 +138,9 @@
% 9. Some initializations
x = []; y = [];
fantasy.mean = cell(1,N); fantasy.std = cell(1,N);
realCost = cell(1,N); M = cell(N,1); Sigma = cell(N,1);
realCost = cell(1,N); M = cell(N,1); Sigma = cell(N,1);

% Things copied from pendulum_learn
basename = 'pendulum_'; % filename used for saving data
mu0Sim(odei,:) = mu0; S0Sim(odei,odei) = S0;
mu0Sim = mu0Sim(dyno); S0Sim = S0Sim(dyno,dyno);
70 changes: 70 additions & 0 deletions examples/pendulum-matlab/runme.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,13 @@
# Call inside python as
# exec(open("runme.py").read())
# to avoid MATLAB closing if the scirpt ends/crashes.
import matlab.engine
import os
import urllib.request
import zipfile
import numpy as np
import gym
import time

if not os.path.isdir("pilcov0.9"):
print("Matlab implementation not found in current path.")
Expand All @@ -11,3 +17,67 @@
zip_ref.extractall("./")
zip_ref.close()
print("Done!")


def convert_to_matlab(x):
dtheta = x[2]
cos_theta = x[0]
sin_theta = x[1]

theta = np.arctan2(sin_theta, cos_theta)
return np.array([dtheta, theta, sin_theta, cos_theta])

env = gym.make('Pendulum-v0')

def rollout(policy, timesteps):
X = []; Y = []
env.reset()
x = convert_to_matlab(env.step([0])[0])
for timestep in range(timesteps):
env.render()
u = policy(np.array([x[0], x[2], x[3]]))
x_new, _, done, _ = env.step(u)
x_new = convert_to_matlab(x_new) # x_new -> dtheta, theta, sin(theta), cos(theta)
if done: break
X.append(np.hstack((x, u)))
Y.append(x_new[0:2]) # Y -> dtheta, theta
x = x_new
return np.stack(X), np.stack(Y)

def random_policy(x):
return env.action_space.sample()

eng = matlab.engine.start_matlab("-desktop")
# dir_path = os.path.dirname(os.path.realpath(__file__)) + "/matlab-environment"
dir_path = "matlab-environment"
eng.cd(dir_path, nargout=0)

def matlab_policy(x):
n = x.shape[0]
s = np.zeros((n,n))
u = eng.policy_wrapper(matlab.double(x[:, None].tolist()), matlab.double(s.tolist()), nargout=1)
return np.array([u])

# Initial random rollouts to generate a dataset
X,Y = rollout(policy=random_policy, timesteps=40)
for i in range(1,3):
X_, Y_ = rollout(policy=random_policy, timesteps=40)
X = np.vstack((X, X_))
Y = np.vstack((Y, Y_))

eng.settings_pendulum(nargout=0)
for rollouts in range(10):
print("Rollout #", rollouts + 1)
eng.workspace['j'] = rollouts + 1
eng.workspace['x'] = matlab.double(X.tolist())
eng.workspace['y'] = matlab.double(Y.tolist())
eng.trainDynModel(nargout=0)
start = time.time()
eng.learnPolicy(nargout=0)
end = time.time()
print("Learning of policy done in ", end - start, " seconds.")
if rollouts > 8:
import pdb; pdb.set_trace()
X_new, Y_new = rollout(policy=matlab_policy, timesteps=100)
# Update dataset
X = np.vstack((X, X_new)); Y = np.vstack((Y, Y_new))

0 comments on commit 88c0c0c

Please sign in to comment.