diff --git a/.gitignore b/.gitignore
index 4a920f2..81ae497 100644
--- a/.gitignore
+++ b/.gitignore
@@ -4,4 +4,5 @@ pilco.egg-info
 __pycache__
 *.swp
 octave-workspace
-*.egg
\ No newline at end of file
+*.egg
+examples/pendulum-matlab/pilcoV0.9/**
\ No newline at end of file
diff --git a/examples/pendulum-matlab/matlab-envinroment/draw_pendulum.m b/examples/pendulum-matlab/matlab-envinroment/draw_pendulum.m
new file mode 100644
index 0000000..5b5f4cd
--- /dev/null
+++ b/examples/pendulum-matlab/matlab-envinroment/draw_pendulum.m
@@ -0,0 +1,81 @@
+%% draw_pendulum.m
+% *Summary:* Draw the pendulum system with reward, applied torque, 
+% and predictive uncertainty of the tips of the pendulums
+%
+%    function draw_pendulum(theta, torque, cost, text1, text2, M, S)
+%
+%
+% *Input arguments:*
+%
+%   theta1     angle of inner pendulum
+%   theta2     angle of outer pendulum
+%   f1         torque applied to inner pendulum
+%   f2         torque applied to outer pendulum
+%   cost       cost structure
+%     .fcn     function handle (it is assumed to use saturating cost)
+%     .<>      other fields that are passed to cost
+%   text1      (optional) text field 1
+%   text2      (optional) text field 2
+%   M          (optional) mean of state
+%   S          (optional) covariance of state
+%
+%
+% Copyright (C) 2008-2013 by
+% Marc Deisenroth, Andrew McHutchon, Joe Hall, and Carl Edward Rasmussen.
+%
+% Last modified: 2013-03-18
+
+
+function draw_pendulum(theta, torque, cost, text1, text2, M, S)
+%% Code
+
+l = 0.6;
+xmin = -1.2*l; 
+xmax = 1.2*l;    
+umax = 0.5;
+height = 0;
+
+% Draw pendulum
+pendulum = [0, 0; l*sin(theta), -l*cos(theta)];
+clf; hold on
+plot(pendulum(:,1), pendulum(:,2),'r','linewidth',4)
+
+% plot ellipses around tips of pendulum (if M, S exist)
+try
+  if max(max(S))>0
+    err = linspace(-1,1,100)*sqrt(S(2,2));
+    plot(l*sin(M(2)+2*err),-l*cos(M(2)+2*err),'b','linewidth',1)
+    plot(l*sin(M(2)+err),-l*cos(M(2)+err),'b','linewidth',2)
+    plot(l*sin(M(2)),-l*cos(M(2)),'b.','markersize',20)
+  end
+catch
+end
+
+% Draw useful information
+% target location
+plot(0,l,'k+','MarkerSize',20);
+plot([xmin, xmax], [-height, -height],'k','linewidth',2)
+% joint
+plot(0,0,'k.','markersize',24)
+plot(0,0,'y.','markersize',14)
+% tip of pendulum
+plot(l*sin(theta),-l*cos(theta),'k.','markersize',24)
+plot(l*sin(theta),-l*cos(theta),'y.','markersize',14)
+plot(0,-2*l,'.w','markersize',0.005)
+% applied torque
+plot([0 torque/umax*xmax],[-0.5, -0.5],'g','linewidth',10);
+% immediate reward
+reward = 1-cost.fcn(cost,[0, theta]',zeros(2));
+plot([0 reward*xmax],[-0.7, -0.7],'y', 'linewidth',10);
+text(0,-0.5,'applied  torque')
+text(0,-0.7,'immediate reward')
+if exist('text1','var')  
+  text(0,-0.9, text1)
+end
+if exist('text2','var')  
+  text(0,-1.1, text2)
+end
+
+set(gca,'DataAspectRatio',[1 1 1],'XLim',[xmin xmax],'YLim',[-2*l 2*l]);
+axis off;
+drawnow;
\ No newline at end of file
diff --git a/examples/pendulum-matlab/matlab-envinroment/draw_rollout_pendulum.m b/examples/pendulum-matlab/matlab-envinroment/draw_rollout_pendulum.m
new file mode 100644
index 0000000..4aad70f
--- /dev/null
+++ b/examples/pendulum-matlab/matlab-envinroment/draw_rollout_pendulum.m
@@ -0,0 +1,29 @@
+%% draw_rollout_pendulum.m
+% *Summary:* Script to draw a trajectory of the most recent pendulum trajectory
+%
+% Copyright (C) 2008-2013 by 
+% Marc Deisenroth, Andrew McHutchon, Joe Hall, and Carl Edward Rasmussen.
+%
+% Last modified: 2013-03-27
+%
+%% High-Level Steps
+% # For each time step, plot the observed trajectory
+
+%% Code
+
+% Loop over states in trajectory
+for r = 1:size(xx,1)
+  cost.t = r;
+  if exist('j','var') && ~isempty(M{j})
+    draw_pendulum(latent{j}(r,2), latent{j}(r,end), cost,  ...
+      ['trial # ' num2str(j+J) ', T=' num2str(H*dt) ' sec'], ...
+      ['total experience (after this trial): ' num2str(dt*size(x,1)) ...
+      ' sec'], M{j}(:,r), Sigma{j}(:,:,r));
+  else
+     draw_pendulum(latent{jj}(r,2), latent{jj}(r,end), cost,  ...
+      ['(random) trial # ' num2str(1) ', T=' num2str(H*dt) ' sec'], ...
+      ['total experience (after this trial): ' num2str(dt*size(x,1)) ...
+      ' sec'])
+  end
+  pause(dt);
+end
\ No newline at end of file
diff --git a/examples/pendulum-matlab/matlab-envinroment/dynamics_pendulum.m b/examples/pendulum-matlab/matlab-envinroment/dynamics_pendulum.m
new file mode 100644
index 0000000..51ea331
--- /dev/null
+++ b/examples/pendulum-matlab/matlab-envinroment/dynamics_pendulum.m
@@ -0,0 +1,44 @@
+%% dynamics_pendulum.m
+% *Summary:* Implements ths ODE for simulating the pendulum dynamics, where 
+% an input torque f can be applied  
+%
+%    function dz = dynamics_pendulum(t,z,u)
+%
+%
+% *Input arguments:*
+%
+%		t     current time step (called from ODE solver)
+%   z     state                                                    [2 x 1]
+%   u     (optional): torque f(t) applied to pendulum
+%
+% *Output arguments:*
+%   
+%   dz    if 3 input arguments:      state derivative wrt time
+%
+%   Note: It is assumed that the state variables are of the following order:
+%         dtheta:  [rad/s] angular velocity of pendulum
+%         theta:   [rad]   angle of pendulum
+%
+% A detailed derivation of the dynamics can be found in:
+%
+% M.P. Deisenroth: 
+% Efficient Reinforcement Learning Using Gaussian Processes, Appendix C, 
+% KIT Scientific Publishing, 2010.
+%
+%
+% Copyright (C) 2008-2013 by
+% Marc Deisenroth, Andrew McHutchon, Joe Hall, and Carl Edward Rasmussen.
+%
+% Last modified: 2013-03-18
+
+function dz = dynamics_pendulum(t,z,u)
+%% Code
+
+l = 1;    % [m]        length of pendulum
+m = 1;    % [kg]       mass of pendulum
+g = 9.82; % [m/s^2]    acceleration of gravity
+b = 0.01; % [s*Nm/rad] friction coefficient
+
+dz = zeros(2,1);
+dz(1) = ( u(t) - b*z(1) - m*g*l*sin(z(2))/2 ) / (m*l^2/3);
+dz(2) = z(1);
\ No newline at end of file
diff --git a/examples/pendulum-matlab/matlab-envinroment/loss_pendulum.m b/examples/pendulum-matlab/matlab-envinroment/loss_pendulum.m
new file mode 100644
index 0000000..6131ead
--- /dev/null
+++ b/examples/pendulum-matlab/matlab-envinroment/loss_pendulum.m
@@ -0,0 +1,110 @@
+%% loss_pendulum.m
+% *Summary:* Pendulum loss function; the loss is
+% $1-\exp(-0.5*d^2*a)$,  where $a>0$ and  $d^2$ is the squared difference
+% between the actual and desired position of the tip of the pendulum.
+% The mean and the variance of the loss are computed by averaging over the
+% Gaussian distribution of the state $p(x) = \mathcal N(m,s)$ with mean $m$
+% and covariance matrix $s$.
+% Derivatives of these quantities are computed when desired.
+%
+%
+%   function [L, dLdm, dLds, S2] = loss_pendulum(cost, m, s)
+%
+%
+% *Input arguments:*
+%
+%   cost            cost structure
+%     .p            lengths of the pendulum                         [1 x  1 ]
+%     .width        array of widths of the cost (summed together)
+%     .expl         (optional) exploration parameter
+%     .angle        (optional) array of angle indices
+%     .target       target state                                    [D x  1 ]
+%   m               mean of state distribution                      [D x  1 ]
+%   s               covariance matrix for the state distribution    [D x  D ]
+%
+% *Output arguments:*
+%
+%   L     expected cost                                             [1 x  1 ]
+%   dLdm  derivative of expected cost wrt. state mean vector        [1 x  D ]
+%   dLds  derivative of expected cost wrt. state covariance matrix  [1 x D^2]
+%   S2    variance of cost                                          [1 x  1 ]
+%
+% Copyright (C) 2008-2014 by
+% Marc Deisenroth, Andrew McHutchon, Joe Hall, and Carl Edward Rasmussen.
+%
+% Last modified: 2014-01-09
+%
+%% High-Level Steps
+% # Precomputations
+% # Define static penalty as distance from target setpoint
+% # Trigonometric augmentation
+% # Calculate loss
+
+function [L, dLdm, dLds, S2] = loss_pendulum(cost, m, s)
+%% Code
+if isfield(cost,'width'); cw = cost.width; else cw = 1; end
+if ~isfield(cost,'expl') || isempty(cost.expl); b = 0; else b =  cost.expl; end
+
+
+% 1. Some precomputations
+D0 = size(s,2);                           % state dimension
+D1 = D0 + 2*length(cost.angle);           % state dimension (with sin/cos)
+
+M = zeros(D1,1); M(1:D0) = m; S = zeros(D1); S(1:D0,1:D0) = s;
+Mdm = [eye(D0); zeros(D1-D0,D0)]; Sdm = zeros(D1*D1,D0);
+Mds = zeros(D1,D0*D0); Sds = kron(Mdm,Mdm);
+
+% 2. Define static penalty as distance from target setpoint
+ell = cost.p;
+Q = zeros(D1); Q(D0+1:D0+2,D0+1:D0+2) = eye(2)*ell^2;
+
+% 3. Trigonometric augmentation
+if D1-D0 > 0
+  target = [cost.target(:); ...
+    gTrig(cost.target(:), zeros(numel(cost.target)), cost.angle)];
+  
+  i = 1:D0; k = D0+1:D1;
+  [M(k), S(k,k), C, mdm, sdm, Cdm, mds, sds, Cds] = ...
+    gTrig(M(i),S(i,i),cost.angle);
+  [S, Mdm, Mds, Sdm, Sds] = ...
+    fillIn(S,C,mdm,sdm,Cdm,mds,sds,Cds,Mdm,Sdm,Mds,Sds,i,k,D1);
+end
+
+% 4. Calculate loss
+L = 0; dLdm = zeros(1,D0); dLds = zeros(1,D0*D0); S2 = 0;
+for i = 1:length(cw)                    % scale mixture of immediate costs
+  cost.z = target; cost.W = Q/cw(i)^2;
+  [r, rdM, rdS, s2, s2dM, s2dS] = lossSat(cost, M, S);
+  
+  L = L + r; S2 = S2 + s2;
+  dLdm = dLdm + rdM(:)'*Mdm + rdS(:)'*Sdm;
+  dLds = dLds + rdM(:)'*Mds + rdS(:)'*Sds;
+  
+  if (b~=0 || ~isempty(b)) && abs(s2)>1e-12
+    L = L + b*sqrt(s2);
+    dLdm = dLdm + b/sqrt(s2) * ( s2dM(:)'*Mdm + s2dS(:)'*Sdm )/2;
+    dLds = dLds + b/sqrt(s2) * ( s2dM(:)'*Mds + s2dS(:)'*Sds )/2;
+  end
+end
+
+% normalize
+n = length(cw); L = L/n; dLdm = dLdm/n; dLds = dLds/n; S2 = S2/n;
+
+% Fill in covariance matrix...and derivatives ----------------------------
+function [S, Mdm, Mds, Sdm, Sds] = ...
+  fillIn(S,C,mdm,sdm,Cdm,mds,sds,Cds,Mdm,Sdm,Mds,Sds,i,k,D)
+X = reshape(1:D*D,[D D]); XT = X';                    % vectorised indices
+I=0*X; I(i,i)=1; ii=X(I==1)'; I=0*X; I(k,k)=1; kk=X(I==1)';
+I=0*X; I(i,k)=1; ik=X(I==1)'; ki=XT(I==1)';
+
+Mdm(k,:)  = mdm*Mdm(i,:) + mds*Sdm(ii,:);                      % chainrule
+Mds(k,:)  = mdm*Mds(i,:) + mds*Sds(ii,:);
+Sdm(kk,:) = sdm*Mdm(i,:) + sds*Sdm(ii,:);
+Sds(kk,:) = sdm*Mds(i,:) + sds*Sds(ii,:);
+dCdm      = Cdm*Mdm(i,:) + Cds*Sdm(ii,:);
+dCds      = Cdm*Mds(i,:) + Cds*Sds(ii,:);
+
+S(i,k) = S(i,i)*C; S(k,i) = S(i,k)';                        % off-diagonal
+SS = kron(eye(length(k)),S(i,i)); CC = kron(C',eye(length(i)));
+Sdm(ik,:) = SS*dCdm + CC*Sdm(ii,:); Sdm(ki,:) = Sdm(ik,:);
+Sds(ik,:) = SS*dCds + CC*Sds(ii,:); Sds(ki,:) = Sds(ik,:);
diff --git a/examples/pendulum-matlab/matlab-envinroment/pendulum_learn.m b/examples/pendulum-matlab/matlab-envinroment/pendulum_learn.m
new file mode 100644
index 0000000..e58107b
--- /dev/null
+++ b/examples/pendulum-matlab/matlab-envinroment/pendulum_learn.m
@@ -0,0 +1,46 @@
+%% pendulum_learn.m
+% *Summary:* Script to learn a controller for the pendulum swingup
+%
+% Copyright (C) 2008-2013 by
+% Marc Deisenroth, Andrew McHutchon, Joe Hall, and Carl Edward Rasmussen.
+%
+% Last modified: 2013-03-27
+%
+%% High-Level Steps
+% # Load parameters
+% # Create J initial trajectories by applying random controls
+% # Controlled learning (train dynamics model, policy learning, policy
+% application)
+
+%% Code
+
+% 1. Initialization
+clear all; close all;
+settings_pendulum;            % load scenario-specific settings
+basename = 'pendulum_';       % filename used for saving data
+
+% 2. Initial J random rollouts
+for jj = 1:J
+  [xx, yy, realCost{jj}, latent{jj}] = ...
+    rollout(gaussian(mu0, S0), struct('maxU',policy.maxU), H, plant, cost);
+  x = [x; xx]; y = [y; yy];       % augment training sets for dynamics model
+  if plotting.verbosity > 0;      % visualization of trajectory
+    if ~ishandle(1); figure(1); else set(0,'CurrentFigure',1); end; clf(1);
+    draw_rollout_pendulum;
+  end
+end
+
+mu0Sim(odei,:) = mu0; S0Sim(odei,odei) = S0;
+mu0Sim = mu0Sim(dyno); S0Sim = S0Sim(dyno,dyno);
+
+% 3. Controlled learning (N iterations)
+for j = 1:N
+  trainDynModel;   % train (GP) dynamics model
+  learnPolicy;     % learn policy
+  applyController; % apply controller to system
+  disp(['controlled trial # ' num2str(j)]);
+  if plotting.verbosity > 0;      % visualization of trajectory
+    if ~ishandle(1); figure(1); else set(0,'CurrentFigure',1); end; clf(1);
+    draw_rollout_pendulum;
+  end
+end
\ No newline at end of file
diff --git a/examples/pendulum-matlab/matlab-envinroment/settings_pendulum.m b/examples/pendulum-matlab/matlab-envinroment/settings_pendulum.m
new file mode 100644
index 0000000..4ce2cce
--- /dev/null
+++ b/examples/pendulum-matlab/matlab-envinroment/settings_pendulum.m
@@ -0,0 +1,140 @@
+%% settings_pendulum.m
+% *Summary:* Script set up the pendulum scenario
+%
+% Copyright (C) 2008-2013 by 
+% Marc Deisenroth, Andrew McHutchon, Joe Hall, and Carl Edward Rasmussen.
+%
+% Last modified: 2013-05-24
+%
+%% High-Level Steps
+% # Define state and important indices
+% # Set up scenario
+% # Set up the plant structure
+% # Set up the policy structure
+% # Set up the cost structure
+% # Set up the GP dynamics model structure
+% # Parameters for policy optimization
+% # Plotting verbosity
+% # Some array initializations
+
+%% Code
+
+warning('off','all'); format short; format compact; 
+
+% include some paths
+try
+  rd = '../../';
+  addpath([rd 'base'],[rd 'util'],[rd 'gp'],[rd 'control'],[rd 'loss']);
+catch
+end
+
+rand('seed',5); randn('seed',13); 
+
+
+
+% 1. Define state and important indices
+
+% 1a. Full state representation (including all augmentations)
+%  1  dtheta        angular velocity of inner pendulum
+%  2  theta2        angle inner pendulum
+%  3  sin(theta)    complex representation ...
+%  4  cos(theta)    ... of angle of pendulum
+%  5  u             torque applied to the pendulum
+
+% 1b. Important indices
+% odei  indicies for the ode solver
+% augi  indicies for variables augmented to the ode variables
+% dyno  indicies for the output from the dynamics model and indicies to loss
+% angi  indicies for variables treated as angles (using sin/cos representation)
+% dyni  indicies for inputs to the dynamics model
+% poli  indicies for variables that serve as inputs to the policy
+% difi  indicies for training targets that are differences (rather than values)
+
+odei = [1 2];               % varibles for the ODE solver
+augi = [];                  % variables to be augmented
+dyno = [1 2];               % variables to be predicted (and known to loss)
+angi = [2];                 % angle variables
+dyni = [1 3 4];             % variables that serve as inputs to the dynamics GP
+poli = [1 3 4];             % variables that serve as inputs to the policy
+difi = [1 2];               % variables that are learned via differences
+
+% 2. Set up the scenario
+dt = 0.1;                      % [s] sampling time
+T = 4;                         % [s] prediction time
+H = ceil(T/dt);                % prediction steps (optimization horizon)
+mu0 = [0 0]';                  % initial state mean
+S0 = 0.01*eye(2);              % initial state variance
+N = 10;                        % number of policy optimizations
+J = 1;                         % no. of inital training rollouts (of length H)
+K = 1;                         % number of initial states for which we optimize
+nc = 20;                       % size of controller training set
+
+% 3. Set up the plant structure
+plant.dynamics = @dynamics_pendulum;    % dynamics ODE function
+plant.noise = diag([0.1^2 0.01^2]);     % measurement noise
+plant.dt = dt;
+plant.ctrl = @zoh;                  % controler is zero-order-hold
+plant.odei = odei;                  % indices of the varibles for the ODE solver
+plant.augi = augi;                  % indices of augmented variables
+plant.angi = angi;
+plant.poli = poli;
+plant.dyno = dyno;
+plant.dyni = dyni;
+plant.difi = difi;
+plant.prop = @propagated;        
+
+% 4. Set up the policy structure
+policy.fcn = @(policy,m,s)conCat(@congp,@gSat,policy,m,s);% controller 
+                                                          % representation
+policy.maxU = 2.5;                                        % max. amplitude of 
+                                                          % torque
+[mm ss cc] = gTrig(mu0, S0, plant.angi);                  % represent angles 
+mm = [mu0; mm]; cc = S0*cc; ss = [S0 cc; cc' ss];         % in complex plane      
+policy.p.inputs = gaussian(mm(poli), ss(poli,poli), nc)'; % init. location of 
+                                                          % basis functions
+policy.p.targets = 0.1*randn(nc, length(policy.maxU));    % init. policy targets 
+                                                          % (close to zero)
+policy.p.hyp = log([1 0.7 0.7 1 0.01]');                  % initialize 
+                                                          % hyper-parameters
+
+
+
+% 5. Set up the cost structure
+cost.fcn = @loss_pendulum;                           % cost function
+cost.gamma = 1;                                      % discount factor
+cost.p = 1.0;                                        % length of pendulum
+cost.width = 0.5;                                    % cost function width
+cost.expl = 0;                                       % exploration parameter
+cost.angle = plant.angi;                             % angle variables in cost
+cost.target = [0 pi]';                               % target state
+
+
+% 6. Set up the GP dynamics model structure
+dynmodel.fcn = @gp0d;                % function for GP predictions
+dynmodel.train = @train;             % function to train dynamics model
+dynmodel.induce = zeros(300,0,1);    % shared inducing inputs (sparse GP)
+trainOpt = [300 500];                % defines the max. number of line searches
+                                     % when training the GP dynamics models
+                                     % trainOpt(1): full GP,
+                                     % trainOpt(2): sparse GP (FITC)
+                                     
+% 7. Parameters for policy optimization
+opt.length = 75;                         % max. number of line searches
+opt.MFEPLS = 30;                         % max. number of function evaluations
+                                         % per line search
+opt.verbosity = 1;                       % verbosity: specifies how much 
+                                         % information is displayed during
+                                         % policy learning. Options: 0-3
+opt.method = 'BFGS';                     % optimization algorithm. Options:
+                                         % 'BFGS' (default), 'LBFGS', 'CG'
+
+
+% 8. Plotting verbosity
+plotting.verbosity = 1;            % 0: no plots
+                                   % 1: some plots
+                                   % 2: all plots
+
+% 9. Some initializations
+x = []; y = [];
+fantasy.mean = cell(1,N); fantasy.std = cell(1,N);
+realCost = cell(1,N); M = cell(N,1); Sigma = cell(N,1);
\ No newline at end of file
diff --git a/examples/pendulum-matlab/runme.py b/examples/pendulum-matlab/runme.py
new file mode 100644
index 0000000..1d73c00
--- /dev/null
+++ b/examples/pendulum-matlab/runme.py
@@ -0,0 +1,13 @@
+import matlab.engine
+import os
+import urllib.request
+import zipfile
+
+if not os.path.isdir("pilcov0.9"):
+    print("Matlab implementation not found in current path.")
+    print("Attempting to download now")
+    urllib.request.urlretrieve("http://mlg.eng.cam.ac.uk/pilco/release/pilcoV0.9.zip", "pilcoV0.9.zip")
+    zip_ref = zipfile.ZipFile("pilcoV0.9.zip", 'r')
+    zip_ref.extractall("./")
+    zip_ref.close()
+    print("Done!")