From f982579f7170bd19e1b85d1bf0f503c4d948f4a9 Mon Sep 17 00:00:00 2001
From: Anes Benmerzoug <a.benmerzoug@appliedai.de>
Date: Sun, 28 Apr 2024 21:23:02 +0200
Subject: [PATCH] Update and cleanup intro notebook

---
 notebooks/nb_10_Introduction.ipynb | 721 +++++++----------------------
 1 file changed, 173 insertions(+), 548 deletions(-)
diff --git a/notebooks/nb_10_Introduction.ipynb b/notebooks/nb_10_Introduction.ipynb
index e9e4f36..16ca406 100644
--- a/notebooks/nb_10_Introduction.ipynb
+++ b/notebooks/nb_10_Introduction.ipynb
@@ -2,7 +2,7 @@
  "cells": [
   {
    "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": null,
    "metadata": {
     "editable": true,
     "init_cell": true,
@@ -18,16 +18,12 @@
    "outputs": [],
    "source": [
     "%%capture\n",
-    "%load_ext autoreload\n",
-    "%autoreload 2\n",
-    "%matplotlib inline\n",
-    "%load_ext training_ml_control\n",
-    "%set_random_seed 12"
+    "%load_ext training_ml_control"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": null,
    "metadata": {
     "init_cell": true,
     "scene__Default Scene": true,
@@ -39,210 +35,9 @@
      "ActiveScene"
     ]
    },
-   "outputs": [
-    {
-     "data": {
-      "text/html": [
-       "<style>/*\n",
-       "This file is mainly copy-pasta from rise's examples\n",
-       "https://github.com/damianavila/RISE/blob/master/examples/rise.css\n",
-       "that was further customized for appliedAI purposes\n",
-       "*/\n",
-       "@import url('https://fonts.googleapis.com/css2?family=Work+Sans:wght@400&display=swap');\n",
-       "\n",
-       "\n",
-       "/*\n",
-       "body {\n",
-       "    font-family: 'Work Sans', sans-serif !important;\n",
-       "    text-transform: initial !important;\n",
-       "    letter-spacing: initial !important;\n",
-       "    font-weight: 400 !important;\n",
-       "    line-height: 1.5 !important;\n",
-       "    text-size-adjust: 100% !important;\n",
-       "    ‑webkit‑text‑size‑adjust: 100% !important;\n",
-       "}\n",
-       "\n",
-       "\n",
-       ".reveal, div.text_cell_render, .md-slide, .sidebar-wrapper {\n",
-       "    font-size: 1.5rem !important;\n",
-       "}\n",
-       "*/\n",
-       "\n",
-       ".navbar-default .navbar-nav > li > a {\n",
-       "    color: #00747b !important;\n",
-       "}\n",
-       "\n",
-       ".filename {\n",
-       "    font-size: 2.4rem !important;\n",
-       "    color: #212529 !important;\n",
-       "    font-weight: 600 !important;\n",
-       "}\n",
-       "\n",
-       ".reveal, .md-slide {\n",
-       "    color: white !important;\n",
-       "}\n",
-       "\n",
-       "h1, h2 {\n",
-       "    color: #00747b !important;\n",
-       "}\n",
-       "\n",
-       "h3, h4, h5, h6 {\n",
-       "    color: #808080 !important;\n",
-       "}\n",
-       "\n",
-       ".reveal p, .reveal ol, .reveal dl, .reveal ul,\n",
-       "div.text_cell_render {\n",
-       "    color: #212529 !important;\n",
-       "}\n",
-       "\n",
-       "/*copied from stackoverflow, better spacing between list items*/\n",
-       "li + li {\n",
-       "  margin-top: 0.2em;\n",
-       "}\n",
-       "\n",
-       "body.rise-enabled .reveal ol, body.rise-enabled .reveal dl, body.rise-enabled .reveal ul {\n",
-       "    margin-left: 0.1em;\n",
-       "    margin-top: 0.2em;\n",
-       "}\n",
-       "\n",
-       ".reveal .rendered_html h1:first-child,\n",
-       ".reveal .rendered_html h2:first-child,\n",
-       ".reveal .rendered_html h3:first-child,\n",
-       ".reveal .rendered_html h4:first-child,\n",
-       ".reveal .rendered_html h5:first-child {\n",
-       "    margin-top: 0.2em;\n",
-       "}\n",
-       "\n",
-       "/*\n",
-       ".CodeMirror-lines, .output_text {\n",
-       "    font-size: 1.5rem !important;\n",
-       "}\n",
-       "*/\n",
-       "\n",
-       "h1.plan, h2.plan, h3.plan {\n",
-       "    text-align: center;\n",
-       "    padding-bottom: 30px;\n",
-       "}\n",
-       "\n",
-       "ul.plan>li>span.plan-bold {\n",
-       "    font-size: 110%;\n",
-       "    padding: 4px;\n",
-       "    font-weight: bold;\n",
-       "    background-color: #eee;\n",
-       "}\n",
-       "\n",
-       "ul.plan>li>ul.subplan>li>span.plan-bold {\n",
-       "    font-weight: bold;\n",
-       "}\n",
-       "\n",
-       ".plan-strike {\n",
-       "    opacity: 0.4;\n",
-       "/*    text-decoration: line-through; */\n",
-       "}\n",
-       "\n",
-       "div.plan-container {\n",
-       "    display: grid;\n",
-       "    grid-template-columns: 50% 50%;\n",
-       "}\n",
-       "\n",
-       "/*\n",
-       " * this is to void xarray's html output to show the fallback textual representation\n",
-       " * see also\n",
-       "   * xarray.md and\n",
-       "   * https://github.com/damianavila/RISE/issues/594\n",
-       " */\n",
-       ".reveal pre.xr-text-repr-fallback {\n",
-       "    display: none;\n",
-       "}\n",
-       "\n",
-       "#toc-header, .toc-item li {\n",
-       "    margin: auto !important;\n",
-       "    color: #808080 !important;\n",
-       "}\n",
-       "\n",
-       "#toc, #toc-wrapper, .toc-item-num, #toc a, .toc {\n",
-       "    margin: auto !important;\n",
-       "    color: #00747b !important;\n",
-       "}\n",
-       "\n",
-       "#toc-wrapper {\n",
-       "    top: auto !important;\n",
-       "    bottom: auto !important;\n",
-       "    margin-top: 2rem !important;\n",
-       "    color: #00747b !important;\n",
-       "}\n",
-       "\n",
-       "\n",
-       "#rise-header {\n",
-       "    margin: 10px;\n",
-       "    left: 5%;\n",
-       "}\n",
-       "\n",
-       "#rise-footer {\n",
-       "    margin: 10px;\n",
-       "    right: 5%;\n",
-       "}\n",
-       "\n",
-       "#rise-backimage {\n",
-       "    opacity: 0.70;\n",
-       "}\n",
-       "\n",
-       ".reveal img {\n",
-       "    max-width: 100%;\n",
-       "}\n",
-       "\n",
-       "\n",
-       ".md-slide.title {\n",
-       "    position: relative;\n",
-       "    top: -50%;\n",
-       "    margin-left: 5%;\n",
-       "    font-size: 4em !important;\n",
-       "    line-height: 1.6;\n",
-       "}\n",
-       "</style>"
-      ],
-      "text/plain": [
-       "<IPython.core.display.HTML object>"
-      ]
-     },
-     "execution_count": 6,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "%presentation_style"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "init_cell": true,
-    "slideshow": {
-     "slide_type": "skip"
-    },
-    "tags": [
-     "remove-cell"
-    ]
-   },
    "outputs": [],
    "source": [
-    "%autoreload\n",
-    "import warnings\n",
-    "import matplotlib.pyplot as plt\n",
-    "import numpy as np\n",
-    "import seaborn as sns\n",
-    "\n",
-    "from training_ml_control.environment import (\n",
-    "    create_inverted_pendulum_environment,\n",
-    "    simulate_environment,\n",
-    ")\n",
-    "from training_ml_control.nb_utils import show_video\n",
-    "\n",
-    "warnings.simplefilter(\"ignore\", UserWarning)\n",
-    "sns.set_theme()\n",
-    "plt.rcParams[\"figure.figsize\"] = [12, 8]"
+    "%presentation_style"
    ]
   },
   {
@@ -281,19 +76,21 @@
   {
    "cell_type": "markdown",
    "metadata": {
+    "editable": true,
     "slideshow": {
      "slide_type": "fragment"
     }
    },
    "source": [
-    "Control theory is a field of control engineering and applied mathematics that deals with influencing the behavior of dynamical systems. The objective is to drive a system towards a desired state by calculating and applying system inputs, while minimizing delay, overshoot, steady-state error, and ensuring stability. The aim is often to achieve a degree of optimal and robust control performance in the presence of uncertainty. "
+    "Control theory is a field of control engineering and applied mathematics that deals with influencing the behavior of dynamical systems. The objective is to drive a system towards a desired state by calculating and applying system inputs, while minimizing errors and taking into considerations any additional constraints (e.g. overshoot), and ensuring stability. The aim is often to achieve a degree of optimal and robust control performance in the presence of uncertainty. "
    ]
   },
   {
    "cell_type": "markdown",
    "metadata": {
+    "editable": true,
     "slideshow": {
-     "slide_type": "subslide"
+     "slide_type": "fragment"
     }
    },
    "source": [
@@ -302,17 +99,7 @@
     "- **Desired behavior** The target behavior must be clearly defined as part of the control design problem. Common examples include tracking a reference trajectory, regulating around a setpoint, or optimizing some performance index.\n",
     "- **Feasibility** The control solution must satisfy constraints on the available inputs, actuator capabilities, safety limits, etc. The controller must be realizable with available technology.\n",
     "- **Uncertainty** Precise knowledge of the system is rarely possible. There will always be some uncertainty in the model parameters, unmodeled dynamics, disturbances, and measurements.\n",
-    "- **Action** Control action is applied through manipulated inputs that command the system actuators. The choice of manipulated inputs and pairing with actuators is a key design decision."
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "slideshow": {
-     "slide_type": "subslide"
-    }
-   },
-   "source": [
+    "- **Action** Control action is applied through manipulated inputs that command the system actuators. The choice of manipulated inputs and pairing with actuators is a key design decision.\n",
     "- **Disturbances** Real systems experience unknown external disturbances that affect the system behavior and must be accounted for. Rejecting disturbances is often a key control objective.\n",
     "- **Approximate behavior** Due to uncertainty, no controller can achieve perfect setpoint tracking or disturbance rejection. There will always be some approximation error. Controllers must be designed to achieve some acceptable level of performance in spite of these challenges.\n",
     "- **Measurements** Measuring the system outputs is essential for closing the feedback loop and allowing the controller to determine the effect of its inputs on the system behavior. Noise on measurements must also be accounted for."
@@ -321,23 +108,30 @@
   {
    "cell_type": "markdown",
    "metadata": {
+    "editable": true,
     "slideshow": {
      "slide_type": "subslide"
     }
    },
    "source": [
-    "In this training, we will focus on classical control methods applied to linear time-invariant (LTI) systems. We will model an inverted pendulum system, estimate its parameters, analyze stability/controllability/observability, design a state observer, and synthesize PID and LQR controllers. These tools will demonstrate how to design controllers that are robust to uncertainty and reject disturbances for stable control.\n",
+    "In this training, we will focus on optimal control methods applied to linear and non-linear systems. Through the use of practical examples with a cart (double-integrator) system and an inverted pendulum system, you'll learn how to design controllers that achieve optimal performance.\n",
     "\n",
     "This training is structured as follows:\n",
     "\n",
-    "- We will start with a short introduction to Control Theory and its different branches.\n",
-    "\n",
-    "- We will then"
+    "- Introduction to Control Theory, planning and optimal control.\n",
+    "- Dynamic Programming.\n",
+    "- Linear Quadratic Regulator (LQR).\n",
+    "- Model Predictive Control (MPC).\n",
+    "- Monte-Carlo Tree Search (MCTS).\n",
+    "- Machine Learning in Control.\n",
+    "- Safe Learning Control."
    ]
   },
   {
    "cell_type": "markdown",
    "metadata": {
+    "editable": true,
+    "jp-MarkdownHeadingCollapsed": true,
     "slideshow": {
      "slide_type": "slide"
     }
@@ -382,14 +176,13 @@
     " \n",
     "- **Modern Control**\n",
     "\n",
-    "  deals with the behavior of linear or non-linear dynamical systems with inputs, and how their behavior is modified by feedback, using the state-space representation as a basic tool to model such systems. It can deal with multiple-input and multiple-output (**MIMO**) systems.\n",
-    "  \n",
-    "  Optimal, adaptive and robust control theories come under this division."
+    "  deals with the behavior of linear or non-linear dynamical systems with inputs, and how their behavior is modified by feedback, using the state-space representation as a basic tool to model such systems. It can deal with multiple-input and multiple-output (**MIMO**) systems."
    ]
   },
   {
    "cell_type": "markdown",
    "metadata": {
+    "editable": true,
     "slideshow": {
      "slide_type": "subslide"
     }
@@ -416,11 +209,17 @@
     }
    },
    "source": [
-    "# Control Theory and Machine Learning\n",
+    "# Control Systems Classification\n",
     "\n",
-    "Modern machine learning and control theory share deep theoretical connections. Framing machine learning problems as dynamical systems, which we refer to as **control theory for machine learning**, opens up new ways to analyze neural network training and design adaptive controllers.\n",
+    "There are two types of control loops:\n",
     "\n",
-    "Conversely, we can use machine learning to help solve large and complex control problems, which we refer to as **machine learning for control theory**."
+    "- **Open-loop control (feedforward)**\n",
+    "\n",
+    "  An open-loop control system operates without feedback, which means that the output is not measured or compared to the desired input. They are simple and inexpensive to implement. They are often used in systems where the output does not need to be precisely controlled. For example, a washing machine may use an open-loop control system to regulate the water level.\n",
+    "\n",
+    "- **Closed-loop control (feedback)**\n",
+    "\n",
+    "  A closed-loop control system, on the other hand, operates with feedback, meaning that the output is measured, and corrective action is taken to ensure it always matches the desired input. They are more complex and expensive to implement. However, they offer greater precision and accuracy in controlling the system's output. Closed-loop control systems are often used in critical applications, such as aerospace engineering or medical devices"
    ]
   },
   {
@@ -431,33 +230,33 @@
     }
    },
    "source": [
-    "## Control Theory for Machine Learning\n",
-    "\n",
-    "Control theory provides key concepts to guide the development of machine learning algorithms.\n",
-    "\n",
-    "- Viewing neural networks like deep residual networks (ResNet) as dynamical systems allows control stability and optimality principles to ensure robust training.\n",
-    "- Framing learning as an optimization problem enables control techniques like differential dynamic programming to improve convergence of algorithms like stochastic gradient descent.\n",
-    "- The need to balance exploration and exploitation in reinforcement learning is addressed by stochastic optimal control theory.\n",
+    "## Types of Systems\n",
     "\n",
-    "Overall, control theory provides a rigorous mathematical framework to guarantee crucial learning properties. The system dynamics perspective further allows the training process itself to be controlled for faster convergence. Bridging machine learning with concepts from control is leading to new theories and training methods with stability and optimality guarantees."
+    "- Time-Invariant (TI) or Time-Variant (TV).\n",
+    "- Linear or Non-Linear.\n",
+    "- Continuous-time or Discrete-time.\n",
+    "- Deterministic or Stochastic."
    ]
   },
   {
    "cell_type": "markdown",
    "metadata": {
     "slideshow": {
-     "slide_type": "subslide"
+     "slide_type": "slide"
     }
    },
    "source": [
-    "## Machine Learning for Control Theory\n",
+    "# Controller Design\n",
     "\n",
-    "Modern machine learning provides useful tools and perspectives for control theory. Framing control problems as data modeling tasks enables powerful function approximation, estimation, and optimization techniques from machine learning to be applied. For example:\n",
+    "1. Problem Formulation\n",
+    "2. Modeling\n",
+    "   1. Define a mathematical model that represents the system.\n",
+    "   2. Determine properties of this system: Identifiability, Stability, Observability and Controllability.\n",
     "\n",
-    "- Neural networks can learn to approximate complex dynamics for model predictive control (MPC).\n",
-    "- Reinforcement learning explores optimal policies like dynamic programming to control complex robots.\n",
-    "- Kernel methods enable non-parametric system identification without relying on predefined model structures.\n",
-    "- The Koopman operator is a data-driven tool to infer properties and facilitate control of unknown nonlinear systems."
+    "   3. Determine model's parameters, if they're not known already.\n",
+    "   4. (Optional) Linearize model around operating point.\n",
+    "   5. (Optional) If it's a continuous-time system and we're using a digital controller,\n",
+    "     discretize it to obtain a discrete-time system."
    ]
   },
   {
@@ -468,26 +267,27 @@
     }
    },
    "source": [
-    "Beyond specific techniques, a machine learning viewpoint focuses on what can be learned from data about a control system's unknown dynamics. This data-driven approach is key for adaptive and nonlinear control of complex systems.\n",
+    "3. Control Design\n",
     "\n",
-    "If you want to learn to learn more about this specific topic, consider attending our [Machine Learning Control](https://transferlab.ai/trainings/planning-and-control/) training."
+    "   - Design a controller to stabilize the system.\n",
+    "4. Evaluation\n",
+    "   1. Simulate the closed-loop system in order to validate the controller design.\n",
+    "   2. Use controller with actual system."
    ]
   },
   {
    "cell_type": "markdown",
    "metadata": {
     "slideshow": {
-     "slide_type": "subslide"
+     "slide_type": "slide"
     }
    },
    "source": [
-    "## Control Theory and Reinforcement Learning\n",
-    "\n",
-    "The predominant sub-field of Machine Learning is Supervised Learning. Its goal is make the output of the model mimic the labels y given in the training set. In that setting, the labels gave an unambiguous **right answer** for each of the inputs.\n",
+    "# Control Theory and Machine Learning\n",
     "\n",
-    "In Reinforcement Learning (RL), we do not have labels for the inputs and instead have to rely on a **reward function**, which indicates to the learning agent (i.e. model) when it is doing well, and when it is doing poorly.\n",
+    "Modern machine learning and control theory share deep theoretical connections. Framing machine learning problems as dynamical systems, which we refer to as **control theory for machine learning**, opens up new ways to analyze neural network training and design adaptive controllers.\n",
     "\n",
-    "RL studies how to use past data (experience) to enhance (learning) the future manipulation of a system, which is precisely the scope of Control Theory. Despite that, the two communities have remained disjointed and that has led to the co-development of vastly different approaches to the same problems."
+    "Conversely, we can use machine learning to help solve large and complex control problems, which we refer to as **machine learning for control theory**."
    ]
   },
   {
@@ -498,113 +298,85 @@
     }
    },
    "source": [
-    "The main differences between the two lie in how the system is modeled and the approaches taken to design controllers/agents:\n",
+    "## Control Theory for Machine Learning\n",
     "\n",
-    "- In Control Theory, we explicitly model the system using knowledge about the equations governing its behaviour, by estimating the parameters of such equations or by fitting a model on measurements from the system.\n",
-    "- Whereas in RL, we do not generally model the system and instead learn directly the agent that maximizes the expected reward while interacting with the environment."
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    ":::{figure} _static/images/10_feedback_block_diagram.svg\n",
-    "---\n",
-    "name: feedback-control-block\n",
-    "---\n",
-    "Feedback Control in Control Engineering\n",
+    "Control theory provides key concepts to guide the development of machine learning algorithms.\n",
+    "\n",
+    "- Viewing neural networks like deep residual networks (ResNet) as dynamical systems allows control stability and optimality principles to ensure robust training.\n",
+    "- Using state-space models (SSMs) in neural networks for long-range sequence modelling. Structure state-space sequence (S4) model or the more recent Mamba are examples of this.\n",
+    "\n",
+    ":::{figure} _static/images/10_state_space_model_sequence.png\n",
+    ":width: 60%\n",
+    "Sequence modeling using state-space models (SSMs) {cite}`gu_efficiently_2022`.\n",
     ":::\n",
     "\n",
-    ":::{figure} _static/images/10_reinforcement_learning_block_diagram.svg\n",
-    "---\n",
-    "name: rl-block\n",
-    "---\n",
-    "Feedback Control in Reinforcement Learning\n",
-    ":::"
+    "- Framing learning as an optimization problem enables control techniques like differential dynamic programming to improve convergence of algorithms like stochastic gradient descent.\n",
+    "- The need to balance exploration and exploitation in reinforcement learning is addressed by stochastic optimal control theory.\n",
+    "\n",
+    "Overall, control theory provides a rigorous mathematical framework to guarantee crucial learning properties. The system dynamics perspective further allows the training process itself to be controlled for faster convergence. Bridging machine learning with concepts from control is leading to new theories and training methods with stability and optimality guarantees."
    ]
   },
   {
    "cell_type": "markdown",
    "metadata": {
     "slideshow": {
-     "slide_type": "fragment"
+     "slide_type": "subslide"
     }
    },
    "source": [
-    "### Terminology\n",
+    "## Machine Learning for Control Theory\n",
     "\n",
-    "Here are a list of terms commonly used in Reinforcement Learning, and their control counterparts:\n",
+    "Modern machine learning provides useful tools and perspectives for control theory. Framing control problems as data modeling tasks enables powerful function approximation, estimation, and optimization techniques from machine learning to be applied. For example:\n",
+    "\n",
+    "- Gaussian process based model learning can be used to improve the predictions of a system's nominal model with data.\n",
     "\n",
-    "<ol type=\"a\">\n",
-    "    <li><b>Environment</b> = System</li>\n",
-    "    <li><b>Agent (Policy)</b> = Controller or Regulator</li>\n",
-    "    <li><b>Action</b> = Decision or Control</li>\n",
-    "    <li><b>Observation</b> = Measurement</li>\n",
-    "    <li><b>Reward</b> = (Opposite of) Cost</li>\n",
-    "</ol>"
+    ":::{figure} _static/images/70_learning_based_mpc_gp.png\n",
+    ":width: 70%\n",
+    "Gaussian process–based MPC for autonomous racing. (b,c) The resulting trajectories of a similar approach applied to miniature radio-controlled cars, with the initial nominal controller shown in panel b and the improved trajectories after learning shown in panel c {cite}`hewing_learningbased_2020`.\n",
+    ":::\n",
+    "\n",
+    "- Neural networks can learn to approximate complex dynamics for model-predictive control (MPC).\n",
+    "- Reinforcement learning explores optimal policies like dynamic programming to control complex robots.\n",
+    "- Kernel methods enable non-parametric system identification without relying on predefined model structures.\n",
+    "- The Koopman operator is a data-driven tool to infer properties and facilitate control of unknown nonlinear systems."
    ]
   },
   {
-   "cell_type": "markdown",
-   "metadata": {
-    "slideshow": {
-     "slide_type": "subslide"
-    }
-   },
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
    "source": [
-    "If you want to learn about reinforcement learning, consider attending our [Safe and efficient deep reinforcement learning](https://transferlab.ai/trainings/intro-rl/) training."
+    "%%html\n",
+    "<iframe width=\"800\" height=\"480\" src=\"https://www.youtube.com/embed/-cdXw1MyTUA?si=S3DXY90f8QEPFddI\" title=\"YouTube video player\" frameborder=\"0\" allow=\"accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture; web-share\" allowfullscreen></iframe>"
    ]
   },
   {
    "cell_type": "markdown",
    "metadata": {
     "slideshow": {
-     "slide_type": "slide"
+     "slide_type": "subslide"
     }
    },
    "source": [
-    "# System"
+    "Beyond specific techniques, a machine learning viewpoint focuses on what can be learned from data about a control system's unknown dynamics. This data-driven approach is key for adaptive and nonlinear control of complex systems."
    ]
   },
   {
    "cell_type": "markdown",
    "metadata": {
-    "editable": true,
     "slideshow": {
      "slide_type": "subslide"
     }
    },
    "source": [
-    "## Inverted Pendulum\n",
+    "## Control Theory and Reinforcement Learning\n",
     "\n",
-    ":::{figure} _static/images/10_inverted_pendulum_photo.png\n",
-    "---\n",
-    "name: inverted-pendulum\n",
-    "---\n",
-    "Balancing cart, a simple robotics system circa 1976 - [Wikipedia](https://en.wikipedia.org/wiki/Inverted_pendulum).\n",
-    ":::\n",
+    "The predominant sub-field of machine learning is supervised learning. Its goal is make the output of the model mimic the labels $y$ given in the training set. In that setting, the labels give an unambiguous **right answer** for each of the inputs.\n",
     "\n",
-    "An inverted pendulum is a pendulum that has its center of mass above its pivot point. It is unstable and without additional help will fall over.\n",
+    "In reinforcement learning (RL), we do not have labels for the inputs and instead have to rely on a **reward function**, which indicates to the learning agent (i.e. model) when it is doing well, and when it is doing poorly.\n",
     "\n",
-    "The inverted pendulum is a classic problem in dynamics and control theory and is used as a benchmark for testing control strategies. It is often implemented with the pivot point mounted on a cart that can move horizontally under control of an electronic servo system as shown in the image."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "editable": true,
-    "slideshow": {
-     "slide_type": "subslide"
-    },
-    "tags": [
-     "remove-input"
-    ]
-   },
-   "outputs": [],
-   "source": [
-    "%%html\n",
-    "<iframe width=\"800\" height=\"600\" src=\"https://www.youtube-nocookie.com/embed/AuAZ5zOP0yQ?si=1Lnyg2ghX6BJEEVX&amp;start=55\" title=\"YouTube video player\" frameborder=\"0\" allow=\"accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture; web-share\" allowfullscreen></iframe>"
+    "RL studies how to use past data (experience) to enhance (learning) the future manipulation of a system, which is precisely the scope of Control Theory. Despite that, the two communities have remained disjointed and that has led to the co-development of vastly different approaches to the same problems."
    ]
   },
   {
@@ -615,135 +387,68 @@
     }
    },
    "source": [
-    "For the simulation we will use a modified version of the [CartPole](https://gymnasium.farama.org/environments/classic_control/cart_pole/) environment from [gymnasium](https://gymnasium.farama.org/).\n",
-    "\n",
-    "It has the following possible action and observations:\n",
-    "\n",
-    "<table class=\"docutils align-default\">\n",
-    "<thead>\n",
-    "<tr class=\"row-odd\"><th class=\"head\"><p>Num</p></th>\n",
-    "<th class=\"head\"><p>Action</p></th>\n",
-    "<th class=\"head\"><p>Control Min</p></th>\n",
-    "<th class=\"head\"><p>Control Max</p></th>\n",
-    "</tr>\n",
-    "</thead>\n",
-    "<tbody>\n",
-    "<tr class=\"row-even\"><td><p>0</p></td>\n",
-    "<td><p>Force applied on the cart</p></td>\n",
-    "<td><p>-10</p></td>\n",
-    "<td><p>10</p></td>\n",
-    "</tr>\n",
-    "</tbody>\n",
-    "</table>\n",
-    "\n",
-    "<table class=\"docutils align-default\">\n",
-    "<thead>\n",
-    "<tr class=\"row-odd\"><th class=\"head\"><p>Num</p></th>\n",
-    "<th class=\"head\"><p>Observation</p></th>\n",
-    "<th class=\"head\"><p>Min</p></th>\n",
-    "<th class=\"head\"><p>Max</p></th>\n",
-    "</tr>\n",
-    "</thead>\n",
-    "<tbody>\n",
-    "<tr class=\"row-even\"><td><p>0</p></td>\n",
-    "<td><p>position of the cart along the linear surface</p></td>\n",
-    "<td><p>-3.0</p></td>\n",
-    "<td><p>3.0</p></td>\n",
-    "</tr>\n",
-    "<tr class=\"row-even\"><td><p>1</p></td>\n",
-    "<td><p>linear velocity of the cart</p></td>\n",
-    "<td><p>-Inf</p></td>\n",
-    "<td><p>Inf</p></td>\n",
-    "</tr>\n",
-    "</tr>\n",
-    "<tr class=\"row-odd\"><td><p>2</p></td>\n",
-    "<td><p>vertical angle of the pole on the cart</p></td>\n",
-    "<td><p>-24</p></td>\n",
-    "<td><p>24</p></td>\n",
-    "<tr class=\"row-odd\"><td><p>3</p></td>\n",
-    "<td><p>angular velocity of the pole on the cart</p></td>\n",
-    "<td><p>-Inf</p></td>\n",
-    "<td><p>Inf</p></td>\n",
-    "</tr>\n",
-    "</tbody>\n",
-    "</table>"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "editable": true,
-    "slideshow": {
-     "slide_type": ""
-    },
-    "tags": [
-     "remove-input"
-    ]
-   },
-   "outputs": [],
-   "source": [
-    "env = create_inverted_pendulum_environment(max_steps=200, theta_threshold=np.inf)\n",
-    "results = simulate_environment(env)\n",
-    "show_video(results.frames, fps=1 / env.dt)"
+    "The main differences between the two lie in how the system is modeled and the approaches taken to design controllers/agents:\n",
+    "\n",
+    "- In **control theory**:\n",
+    "\n",
+    "  - we explicitly model the system using knowledge about the equations governing its behaviour, by estimating the parameters of such equations or by fitting a model on measurements from the system.\n",
+    "  - we synthesize a controller by **minimizing** a cost function, in the case of optimal control.  \n",
+    "  \n",
+    "- Whereas in **reinforcement learning**:\n",
+    "\n",
+    "  - we do have to model the system and instead can directly learn the agent that maximizes the expected reward while interacting with the environment.\n",
+    "  - we train an agent by **maximing** a reward function."
    ]
   },
   {
    "cell_type": "markdown",
-   "metadata": {
-    "slideshow": {
-     "slide_type": "slide"
-    }
-   },
+   "metadata": {},
    "source": [
-    "# Control Systems Classification\n",
-    "\n",
-    "There are two types of control loops:\n",
-    "\n",
-    "- **Open-loop control (feedforward)**\n",
-    "\n",
-    "  An open-loop control system operates without feedback, which means that the output is not measured or compared to the desired input. They are simple and inexpensive to implement. They are often used in systems where the output does not need to be precisely controlled. For example, a washing machine may use an open-loop control system to regulate the water level.\n",
-    "\n",
-    "- **Closed-loop control (feedback)**\n",
+    ":::{figure} _static/images/10_feedback_block_diagram.svg\n",
+    "---\n",
+    "name: feedback-control-block\n",
+    "---\n",
+    "Feedback Control in Control Engineering\n",
+    ":::\n",
     "\n",
-    "  A closed-loop control system, on the other hand, operates with feedback, meaning that the output is measured, and corrective action is taken to ensure it always matches the desired input. They are more complex and expensive to implement. However, they offer greater precision and accuracy in controlling the system's output. Closed-loop control systems are often used in critical applications, such as aerospace engineering or medical devices"
+    ":::{figure} _static/images/10_reinforcement_learning_block_diagram.svg\n",
+    "---\n",
+    "name: rl-block\n",
+    "---\n",
+    "Feedback Control in Reinforcement Learning\n",
+    ":::"
    ]
   },
   {
    "cell_type": "markdown",
-   "metadata": {
-    "slideshow": {
-     "slide_type": "subslide"
-    }
-   },
+   "metadata": {},
    "source": [
-    "## Types of Systems\n",
+    "The two are not mutually exclusive. For example, enforcing safety constraints when using reinforcement learning can be achieved by combining it with model-predictive control (MPC).\n",
     "\n",
-    "- Time-Invariant (TI) or Time-Variant (TV).\n",
-    "- Linear or Non-Linear.\n",
-    "- Continuous-time or Discrete-time.\n",
-    "- Deterministic or Stochastic."
+    ":::{figure} _static/images/70_safety_filter.svg\n",
+    ":width: 50%\n",
+    "Based on the current state $x$, a learning-based controller provides an input\n",
+    "$u_L = \\pi_L(x) \\in \\mathbb{R}^m$, which is processed by the safety filter $u = \\pi_S(x, u_S)$ and applied to the real system {cite}`hewing_learningbased_2020`.\n",
+    ":::"
    ]
   },
   {
    "cell_type": "markdown",
    "metadata": {
     "slideshow": {
-     "slide_type": "slide"
+     "slide_type": "fragment"
     }
    },
    "source": [
-    "# Controller Design\n",
+    "### Terminology\n",
     "\n",
-    "1. Problem Formulation\n",
-    "2. Modeling\n",
-    "   1. Define a mathematical model that represents the system.\n",
-    "   2. Determine properties of this system: Identifiability, Stability, Observability and Controllability.\n",
+    "Here are a list of terms commonly used in Reinforcement Learning, and their control counterparts:\n",
     "\n",
-    "   3. Determine model's parameters, if they're not known already.\n",
-    "   4. (Optional) Linearize model around operating point.\n",
-    "   5. (Optional) If it's a continuous-time system and we're using a digital controller,\n",
-    "     discretize it to obtain a discrete-time system."
+    "1. **Environment** = System.\n",
+    "1. **Agent (Policy)** = Controller or Regulator.\n",
+    "1. **Action $a$** = Decision or Control $u$.\n",
+    "1. **Observation** = Measurement.\n",
+    "1. **Reward $r$** = (Opposite of) Cost $c$."
    ]
   },
   {
@@ -754,12 +459,7 @@
     }
    },
    "source": [
-    "3. Control Design\n",
-    "\n",
-    "   - Design a controller to stabilize the system.\n",
-    "4. Evaluation\n",
-    "   1. Simulate the closed-loop system in order to validate the controller design.\n",
-    "   2. Use controller with actual system."
+    "If you want to learn about reinforcement learning, consider attending our [Safe and efficient deep reinforcement learning](https://transferlab.ai/trainings/intro-rl/) training."
    ]
   },
   {
@@ -799,7 +499,7 @@
    "source": [
     "### State-Transition Systems\n",
     "\n",
-    "A state-transition system is a 3-tuple $\\Sigma = (S,A,\\gamma)$, where:\n",
+    "A state-transition system is a 3-tuple $\\Sigma = (S, U,\\gamma)$, where:\n",
     "\n",
     "- $S = \\{s_1,s_2,\\dots\\}$ is a finite or recursively enumerable set of states.\n",
     "- $U = \\{u_1,u_2,\\dots\\}$ is a finite or recursively enumerable set of actions.\n",
@@ -876,22 +576,9 @@
    "source": [
     "# Optimal Control\n",
     "\n",
-    "Optimal control theory is a branch of control theory that deals with finding a control for a dynamical system over a period of time such that an objective function is optimized. The fundamental idea in optimal control is to formulate the goal of control as the long-term optimization of a scalar cost function."
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "editable": true,
-    "slideshow": {
-     "slide_type": "subslide"
-    }
-   },
-   "source": [
-    "```{figure} _static/images/10_optimal_control_problem.png\n",
-    ":width: 80%\n",
-    "Deterministic N-stage optimal control problem.\n",
-    "```"
+    "Optimal control theory is a branch of control theory that deals with finding a control for a dynamical system over a period of time such that an objective function is optimized. The fundamental idea in optimal control is to formulate the goal of control as the long-term optimization of a scalar cost function as opposed to formulating the objective as direct constraints on the system's behaviour (e.g. overshoot) as done in classical control.\n",
+    "\n",
+    "The optimal control problem is to find a control $u^* \\in \\mathbf{U}$ which causes the system $\\dot{x}(t) = f(x(t), u(t))$ to follow a trajectory $x^* \\in \\mathbf{X}$ that minimizes the cost (performance measure)."
    ]
   },
   {
@@ -903,9 +590,6 @@
     }
    },
    "source": [
-    "The optimal control problem is to find a control $u^* \\in \\mathbf{U}$ which causes the system $\\dot{x}(t) = f(x(t), u(t))$ to follow a trajectory $x^* \\in \\mathbf{X}$ that minimizes the cost (performance measure):\n",
-    "\n",
-    "\n",
     "### Continuous-time\n",
     "\n",
     "$$\n",
@@ -916,18 +600,8 @@
     "& x(t) \\in \\mathbf{X} , \\forall t \\in [0, T] & \\text{(state constraints)}\\\\\n",
     "& u(t) \\in \\mathbf{U}, \\forall t \\in [0, T] & \\text{(input constraints)}\\\\\n",
     "\\end{array}\n",
-    "$$"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "editable": true,
-    "slideshow": {
-     "slide_type": "subslide"
-    }
-   },
-   "source": [
+    "$$\n",
+    "\n",
     "### Discrete-time\n",
     "\n",
     "$$\n",
@@ -943,65 +617,31 @@
   },
   {
    "cell_type": "markdown",
-   "metadata": {
-    "slideshow": {
-     "slide_type": "subslide"
-    }
-   },
+   "metadata": {},
    "source": [
-    "### Finite Horizon \n",
-    "\n",
-    "- Continuous-time:\n",
+    "### Variants\n",
     "\n",
-    "$$\n",
-    "J_{0}(x_0, u) = g_T(x(T)) + \\int \\limits_{0}^{T} g(x(t), u(t)) dt\n",
-    "$$\n",
+    "There are many variants of the optimal control problem:\n",
     "\n",
-    "- Discrete-time:\n",
+    "-  Finite Horizon:\n",
     "\n",
     "$$\n",
-    "J_0(x_0, u) = g_N(x_N) + \\sum \\limits_{k = 0}^{N-1} g_k(x_k, u_k)\n",
-    "$$"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "slideshow": {
-     "slide_type": "subslide"
-    }
-   },
-   "source": [
-    "### Infinite Horizon\n",
+    "J_0(x_0, u) = c_N(x_N) + \\sum \\limits_{k = 0}^{N-1} c_k(x_k, u_k)\n",
+    "$$\n",
     "\n",
-    "- Continuous-time:\n",
+    "- Infinite Horizon:\n",
     "\n",
     "$$\n",
-    "J(x_0, u) = \\int \\limits_{0}^{\\infty} g(x(t), u(t)) dt\n",
+    "J(x_0, u) = \\sum \\limits_{k = 0}^{\\infty} c_k(x_k, u_k)\n",
     "$$\n",
     "\n",
-    "- Discrete-time:\n",
+    "- Stochastic finite horizon:\n",
     "\n",
     "$$\n",
-    "J(x_0, u) = \\sum \\limits_{k = 0}^{\\infty} g_k(x_k, u_k)\n",
+    "J(x_0, u) = E\\left[\\sum \\limits_{k=0}^{T} c_k(x_k, u_k)\\right]\n",
     "$$"
    ]
   },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "editable": true,
-    "slideshow": {
-     "slide_type": "subslide"
-    }
-   },
-   "source": [
-    "```{figure} _static/images/10_transition_graph_discrete_system.png\n",
-    ":width: 80%\n",
-    "Transition graph for a deterministic discrete system.\n",
-    "```"
-   ]
-  },
   {
    "cell_type": "markdown",
    "metadata": {
@@ -1073,7 +713,7 @@
     ":width: 80%\n",
     ":align: center\n",
     "Classification of different methods to solve optimal control problems and related formulations and\n",
-    "solution algorithms{cite}`biral_notes_2016`\n",
+    "solution algorithms {cite}`biral_notes_2016`\n",
     "```"
    ]
   },
@@ -1098,48 +738,33 @@
   },
   {
    "cell_type": "markdown",
-   "metadata": {
-    "editable": true,
-    "slideshow": {
-     "slide_type": "subslide"
-    }
-   },
+   "metadata": {},
    "source": [
-    ":::{exercise}\n",
-    "\n",
-    "```{figure} _static/images/10_car_cost_exercise.svg\n",
-    ":width: 60%\n",
-    ":align: center\n",
-    "Simplified model of a car moving at velocity $v$ on a straight road. {cite}`tedrake_underactuated_2023`\n",
-    "```\n",
+    "```{exercise} RC-Circuit Exercise\n",
+    ":label: rc-circuit-exercise\n",
+    ":width: 80%\n",
     "\n",
-    "Given an autonomous car moving at constant velocity $v$ on a straight road. Let $x$ be the (longitudinal) position of the car along the road, $y$ its (transversal) distance from the centerline, and $\\theta$ the angle between the centerline and the direction of motion. The only control action is the steering velocity $v$, which is constrained in the interval (where and ). We describe the car dynamics with the simple kinematic model\n",
-    "Let be the state vector. To optimize the car trajectory we consider a quadratic objective function\n",
-    "where is a constant positive-semidefinite (hence symmetric) matrix and is a constant nonnegative scalar (note that is allowed here).\n",
+    "Given the following RC circuit with an external voltage source:\n",
     "\n",
-    "- Suppose our only goal is to keep the distance between the car and the centerline as small as possible, as fast as possible, without worrying about anything else. What would be your choice for $Q$ and $R$?\n",
-    "- Suppose that we additionally want to limit the speed at which the car moves. What would be your choice for $Q$ and $R$?\n",
-    ":::"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "slideshow": {
-     "slide_type": "slide"
-    },
-    "tags": [
-     "remove-cell"
-    ]
-   },
-   "source": [
-    ":::{figure} ./_static/images/aai-institute-cover.png\n",
-    ":width: 90%\n",
-    ":align: center\n",
+    ":::{figure} _static/images/10_rc_circuit.svg\n",
     "---\n",
-    "name: aai-institute\n",
+    "name: rc-circuit\n",
     "---\n",
-    ":::"
+    "Schematic created using [CircuitLab](https://www.circuitlab.com)\n",
+    ":::\n",
+    "\n",
+    "The differential equation governing the charge of capacitor is given by:\n",
+    "\n",
+    "$$\\frac{d y(t)}{dt} + y(t) = u(t)$$\n",
+    "\n",
+    "\n",
+    "with $y(0) = 0$ i.e. the capacitor is uncharged at $t=0$\n",
+    "\n",
+    "### Questions:\n",
+    "\n",
+    "- Suppose our only goal is to charge the capacitor as quickly as possible without worrying about anything else. What would be your choice for a cost function?\n",
+    "- Suppose that we additionally want to limit the current running throught the circuit. What would then be your choice for a cost function?\n",
+    "```"
    ]
   }
  ],

Num	Observation	Min	Max
0	position of the cart along the linear surface	-3.0	3.0
1	linear velocity of the cart	-Inf	Inf
2	vertical angle of the pole on the cart	-24	24
3	angular velocity of the pole on the cart	-Inf	Inf