HJReachability · dfridovi · Sep 5, 2021 · Sep 6, 2021 · Sep 6, 2021 · Sep 6, 2021
diff --git a/exec/one_player_reach_avoid_example/main.cpp b/exec/one_player_reach_avoid_example/main.cpp
@@ -0,0 +1,279 @@
+/*
+ * Copyright (c) 2021, The Regents of the University of California (Regents).
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *
+ *    1. Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *
+ *    2. Redistributions in binary form must reproduce the above
+ *       copyright notice, this list of conditions and the following
+ *       disclaimer in the documentation and/or other materials provided
+ *       with the distribution.
+ *
+ *    3. Neither the name of the copyright holder nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS AS IS
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Please contact the author(s) of this library if you have any questions.
+ * Authors: David Fridovich-Keil   ( [email protected] )
+ *          Jaime Fisac            ( [email protected] )
+ */
+
+///////////////////////////////////////////////////////////////////////////////
+//
+// Main GUI for OnePlayerReachAvoidExample.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+#include <ilqgames/examples/one_player_reach_avoid_example.h>
+#include <ilqgames/gui/control_sliders.h>
+#include <ilqgames/gui/cost_inspector.h>
+#include <ilqgames/gui/top_down_renderer.h>
+#include <ilqgames/solver/augmented_lagrangian_solver.h>
+#include <ilqgames/solver/ilq_solver.h>
+#include <ilqgames/solver/problem.h>
+#include <ilqgames/utils/check_local_nash_equilibrium.h>
+#include <ilqgames/utils/solver_log.h>
+
+#include <gflags/gflags.h>
+#include <glog/logging.h>
+#include <stdio.h>
+#include <iostream>
+#include <memory>
+
+#include <imgui/imgui.h>
+#include <imgui/imgui_impl_glfw.h>
+#include <imgui/imgui_impl_opengl3.h>
+
+// Optional log saving and visualization.
+DEFINE_bool(save, false,
+            "Optionally save logs to disk.");
+DEFINE_bool(viz, true, "Visualize results in a GUI.");
+DEFINE_bool(last_traj, false,
+            "Should the solver only dump the last trajectory?");
+DEFINE_string(experiment_name, "", "Name for the experiment.");
+
+// Regularization.
+DEFINE_double(state_regularization, 1.0, "State regularization.");
+DEFINE_double(control_regularization, 2.0, "Control regularization.");
+
+// Linesearch parameters.
+DEFINE_bool(linesearch, false, "Should the solver linesearch?");
+DEFINE_double(initial_alpha_scaling, 1.0, "Initial step size in linesearch.");
+DEFINE_double(convergence_tolerance, 1e-3, "KKT squared error tolerance.");
+DEFINE_double(expected_decrease, 0.1, "KKT sq err expected decrease per iter.");
+
+// About OpenGL function loaders: modern OpenGL doesn't have a standard header
+// file and requires individual function pointers to be loaded manually. Helper
+// libraries are often used for this purpose! Here we are supporting a few
+// common ones: gl3w, glew, glad. You may use another loader/header of your
+// choice (glext, glLoadGen, etc.), or chose to manually implement your own.
+#if defined(IMGUI_IMPL_OPENGL_LOADER_GL3W)
+#include <GL/gl3w.h>  // Initialize with gl3wInit()
+#elif defined(IMGUI_IMPL_OPENGL_LOADER_GLEW)
+#include <GL/glew.h>  // Initialize with glewInit()
+#elif defined(IMGUI_IMPL_OPENGL_LOADER_GLAD)
+#include <glad/glad.h>  // Initialize with gladLoadGL()
+#else
+#include IMGUI_IMPL_OPENGL_LOADER_CUSTOM
+#endif
+
+// Include glfw3.h after our OpenGL definitions.
+#include <GLFW/glfw3.h>
+
+static void glfw_error_callback(int error, const char* description) {
+  fprintf(stderr, "Glfw Error %d: %s\n", error, description);
+}
+
+int main(int argc, char** argv) {
+  const std::string log_file =
+      ILQGAMES_LOG_DIR + std::string("/one_player_reach_avoid_example.log");
+  google::SetLogDestination(0, log_file.c_str());
+  google::InitGoogleLogging(argv[0]);
+  gflags::ParseCommandLineFlags(&argc, &argv, true);
+  FLAGS_logtostderr = true;
+
+  // Solve for open-loop information pattern.
+  ilqgames::SolverParams params;
+  params.max_backtracking_steps = 100;
+  params.linesearch = FLAGS_linesearch;
+  params.expected_decrease_fraction = FLAGS_expected_decrease;
+  params.initial_alpha_scaling = FLAGS_initial_alpha_scaling;
+  params.convergence_tolerance = FLAGS_convergence_tolerance;
+  params.state_regularization = FLAGS_state_regularization;
+  params.control_regularization = FLAGS_control_regularization;
+  params.open_loop = false;
+
+  // Solve for feedback equilibrium.
+  auto start = std::chrono::system_clock::now();
+  auto feedback_problem =
+      std::make_shared<ilqgames::OnePlayerReachAvoidExample>();
+  feedback_problem->Initialize();
+  ilqgames::AugmentedLagrangianSolver feedback_solver(feedback_problem, params);
+
+  // Solve the game.
+  LOG(INFO) << "Computing feedback solution.";
+  const auto log = feedback_solver.Solve();
+  const std::vector<std::shared_ptr<const ilqgames::SolverLog>> feedback_logs =
+      {log};
+  LOG(INFO) << "Solver completed in "
+            << std::chrono::duration<ilqgames::Time>(
+                   std::chrono::system_clock::now() - start)
+                   .count()
+            << " seconds.";
+
+  // Check if solution satisfies sufficient conditions for being a local Nash.
+  feedback_problem->OverwriteSolution(log->FinalOperatingPoint(),
+                                      log->FinalStrategies());
+  // is_local_opt = NumericalCheckLocalNashEquilibrium(
+  //     *feedback_problem, kMaxPerturbation, !kOpenLoop);
+  // if (is_local_opt)
+  //   LOG(INFO) << "Feedback solution is a local optimum.";
+  // else
+  //   LOG(INFO) << "Feedback solution is not a local optimum.";
+
+  // Dump the logs and/or exit.
+  if (FLAGS_save) {
+    if (FLAGS_experiment_name == "") {
+      CHECK(log->Save(FLAGS_last_traj));
+    } else {
+      CHECK(log->Save(FLAGS_last_traj, FLAGS_experiment_name));
+    }
+  }
+
+  // Create a top-down renderer, control sliders, and cost inspector.
+  if (!FLAGS_viz) return 0;
+  std::shared_ptr<ilqgames::ControlSliders> sliders(
+      new ilqgames::ControlSliders({feedback_logs}));
+  ilqgames::TopDownRenderer top_down_renderer(
+      sliders, {feedback_problem});
+  ilqgames::CostInspector cost_inspector(
+      sliders,
+      {feedback_problem->PlayerCosts()});
+  // std::shared_ptr<ilqgames::ControlSliders> sliders(
+  //     new ilqgames::ControlSliders({feedback_logs, feedback_logs}));
+  // ilqgames::TopDownRenderer top_down_renderer(
+  //     sliders, {feedback_problem, feedback_problem});
+  // ilqgames::CostInspector cost_inspector(
+  //     sliders, {feedback_problem->Solver().PlayerCosts(),
+  //               feedback_problem->Solver().PlayerCosts()});
+
+  // Setup window.
+  glfwSetErrorCallback(glfw_error_callback);
+  if (!glfwInit()) return 1;
+
+// Decide GL+GLSL versions.
+#if __APPLE__
+  // GL 3.2 + GLSL 150.
+  const char* glsl_version = "#version 150";
+  glfwWindowHint(GLFW_CONTEXT_VERSION_MAJOR, 3);
+  glfwWindowHint(GLFW_CONTEXT_VERSION_MINOR, 2);
+  glfwWindowHint(GLFW_OPENGL_PROFILE, GLFW_OPENGL_CORE_PROFILE);  // 3.2+ only
+  glfwWindowHint(GLFW_OPENGL_FORWARD_COMPAT, GL_TRUE);  // Required on Mac
+#else
+  // GL 3.0 + GLSL 130.
+  const char* glsl_version = "#version 130";
+  glfwWindowHint(GLFW_CONTEXT_VERSION_MAJOR, 3);
+  glfwWindowHint(GLFW_CONTEXT_VERSION_MINOR, 0);
+// glfwWindowHint(GLFW_OPENGL_PROFILE, GLFW_OPENGL_CORE_PROFILE);  // 3.2+
+// only glfwWindowHint(GLFW_OPENGL_FORWARD_COMPAT, GL_TRUE); // 3.0+ only
+#endif
+
+  // Create window with graphics context
+  GLFWwindow* window = glfwCreateWindow(
+      1280, 720, "ILQGames: 1-Player Reachability Example", NULL, NULL);
+  if (window == NULL) return 1;
+  glfwMakeContextCurrent(window);
+  glfwSwapInterval(1);  // Enable vsync
+
+// Initialize OpenGL loader
+#if defined(IMGUI_IMPL_OPENGL_LOADER_GL3W)
+  bool err = gl3wInit() != 0;
+#elif defined(IMGUI_IMPL_OPENGL_LOADER_GLEW)
+  bool err = glewInit() != GLEW_OK;
+#elif defined(IMGUI_IMPL_OPENGL_LOADER_GLAD)
+  bool err = gladLoadGL() == 0;
+#else
+  bool err = false;  // If you use IMGUI_IMPL_OPENGL_LOADER_CUSTOM, your loader
+                     // is likely to requires some form of initialization.
+#endif
+  if (err) {
+    fprintf(stderr, "Failed to initialize OpenGL loader!\n");
+    return 1;
+  }
+
+  // Setup Dear ImGui context.
+  IMGUI_CHECKVERSION();
+  ImGui::CreateContext();
+
+  // Setup Dear ImGui style.
+  ImGui::StyleColorsDark();
+  // ImGui::StyleColorsClassic();
+
+  // Background color.
+  const ImVec4 clear_color =
+      ImVec4(213.0 / 255.0, 216.0 / 255.0, 226.0 / 255.0, 1.0f);
+
+  // Setup Platform/Renderer bindings
+  ImGui_ImplGlfw_InitForOpenGL(window, true);
+  ImGui_ImplOpenGL3_Init(glsl_version);
+
+  // Main loop
+  while (!glfwWindowShouldClose(window)) {
+    // Poll and handle events (inputs, window resize, etc.).
+    glfwPollEvents();
+
+    // Start the Dear ImGui frame.
+    ImGui_ImplOpenGL3_NewFrame();
+    ImGui_ImplGlfw_NewFrame();
+    ImGui::NewFrame();
+
+    // Control sliders.
+    sliders->Render();
+
+    // Top down view.
+    top_down_renderer.Render();
+
+    // Cost inspector.
+    cost_inspector.Render();
+
+    // Rendering
+    ImGui::Render();
+    int display_w, display_h;
+    glfwMakeContextCurrent(window);
+    glfwGetFramebufferSize(window, &display_w, &display_h);
+    glViewport(0, 0, display_w, display_h);
+    glClearColor(clear_color.x, clear_color.y, clear_color.z, clear_color.w);
+    glClear(GL_COLOR_BUFFER_BIT);
+    ImGui_ImplOpenGL3_RenderDrawData(ImGui::GetDrawData());
+
+    glfwMakeContextCurrent(window);
+    glfwSwapBuffers(window);
+  }
+
+  // Cleanup
+  ImGui_ImplOpenGL3_Shutdown();
+  ImGui_ImplGlfw_Shutdown();
+  ImGui::DestroyContext();
+
+  glfwDestroyWindow(window);
+  glfwTerminate();
+
+  return 0;
+}
diff --git a/include/ilqgames/cost/player_cost.h b/include/ilqgames/cost/player_cost.h
@@ -46,6 +46,7 @@
 
 #include <ilqgames/constraint/constraint.h>
 #include <ilqgames/cost/cost.h>
+#include <ilqgames/cost/extreme_value_cost.h>
 #include <ilqgames/utils/operating_point.h>
 #include <ilqgames/utils/quadratic_cost_approximation.h>
 #include <ilqgames/utils/types.h>
@@ -69,10 +70,18 @@ class PlayerCost {
         cost_structure_(CostStructure::SUM),
         time_of_extreme_cost_(0) {}
 
+  // Reset regularizations.
+  void ResetStateRegularization(float reg) { state_regularization_ = reg; }
+  void ResetControlRegularization(float reg) { control_regularization_ = reg; }
+
   // Add new state and control costs for this player.
   void AddStateCost(const std::shared_ptr<Cost>& cost);
   void AddControlCost(PlayerIndex idx, const std::shared_ptr<Cost>& cost);
 
+  // For reach-avoid problems, two sets of state costs.
+  void SetTargetStateCost(const std::shared_ptr<ExtremeValueCost>& cost);
+  void SetFailureStateCost(const std::shared_ptr<ExtremeValueCost>& cost);
+
   // Add new state and control constraints. For now, they are only equality
   // constraints but later they should really be inequality constraints and
   // there should be some logic for maintaining sets of active constraints.
@@ -85,6 +94,8 @@ class PlayerCost {
   // state costs will be evaluated at the next time step.
   float Evaluate(Time t, const VectorXf& x,
                  const std::vector<VectorXf>& us) const;
+  float EvaluateTargetCost(Time t, const VectorXf& x) const;
+  float EvaluateFailureCost(Time t, const VectorXf& x) const;
   float Evaluate(const OperatingPoint& op, Time time_step) const;
   float Evaluate(const OperatingPoint& op) const;
   float EvaluateOffset(Time t, Time next_t, const VectorXf& next_x,
@@ -98,23 +109,40 @@ class PlayerCost {
   QuadraticCostApproximation QuadraticizeControlCosts(
       Time t, const VectorXf& x, const std::vector<VectorXf>& us) const;
 
-  // Set whether this is a time-additive, max-over-time, or min-over-time cost.
-  // At each specific time, all costs are accumulated with the given operation.
-  enum CostStructure { SUM, MAX, MIN };
+  // Set whether this is a time-additive, max/min-over-time, or reach-avoid
+  // problem. At each specific time, all costs are accumulated with the given
+  // operation.
+  enum CostStructure { SUM, MAX, MIN, REACH_AVOID };
   void SetTimeAdditive() { cost_structure_ = SUM; }
   void SetMaxOverTime() { cost_structure_ = MAX; }
   void SetMinOverTime() { cost_structure_ = MIN; }
+  void SetReachAvoid() { cost_structure_ = REACH_AVOID; }
   bool IsTimeAdditive() const { return cost_structure_ == SUM; }
   bool IsMaxOverTime() const { return cost_structure_ == MAX; }
   bool IsMinOverTime() const { return cost_structure_ == MIN; }
+  bool IsReachAvoid() const { return cost_structure_ == REACH_AVOID; }
 
-  // Keep track of the time of extreme costs.
-  size_t TimeOfExtremeCost() { return time_of_extreme_cost_; }
+  // Keep track of the time of extreme and critical costs. (Critical costs are
+  // used in reach-avoid problems and are those times for each player for which
+  // that player's value function does not depend upon the future.)
+  size_t TimeOfExtremeCost() const { return time_of_extreme_cost_; }
   void SetTimeOfExtremeCost(size_t kk) { time_of_extreme_cost_ = kk; }
 
   // Accessors.
-  const PtrVector<Cost>& StateCosts() const { return state_costs_; }
+  PtrVector<Cost> StateCosts() const {
+    PtrVector<Cost> all;
+    all.insert(all.end(), state_costs_.begin(), state_costs_.end());
+    all.push_back(target_state_cost_);
+    all.push_back(failure_state_cost_);
+    return all;
+  }
   const PlayerPtrMultiMap<Cost>& ControlCosts() const { return control_costs_; }
+  const std::shared_ptr<ExtremeValueCost>& TargetStateCost() const {
+    return target_state_cost_;
+  }
+  const std::shared_ptr<ExtremeValueCost>& FailureStateCost() const {
+    return failure_state_cost_;
+  }
   const PtrVector<Constraint>& StateConstraints() const {
     return state_constraints_;
   }
@@ -133,13 +161,17 @@ class PlayerCost {
   PtrVector<Cost> state_costs_;
   PlayerPtrMultiMap<Cost> control_costs_;
 
+  // Reach-avoid-specific target/failure costs.
+  std::shared_ptr<ExtremeValueCost> target_state_cost_;
+  std::shared_ptr<ExtremeValueCost> failure_state_cost_;
+
   // State and control constraints
   PtrVector<Constraint> state_constraints_;
   PlayerPtrMultiMap<Constraint> control_constraints_;
 
   // Regularization on costs.
-  const float state_regularization_;
-  const float control_regularization_;
+  float state_regularization_;
+  float control_regularization_;
 
   // Ternary variable whether this objective is time-additive, max-over-time, or
   // min-over-time.