Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[WIP] Time consistent reach avoid implementation #63

Open
wants to merge 9 commits into
base: master
Choose a base branch
from
279 changes: 279 additions & 0 deletions exec/one_player_reach_avoid_example/main.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,279 @@
/*
* Copyright (c) 2021, The Regents of the University of California (Regents).
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are
* met:
*
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials provided
* with the distribution.
*
* 3. Neither the name of the copyright holder nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS AS IS
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*
* Please contact the author(s) of this library if you have any questions.
* Authors: David Fridovich-Keil ( [email protected] )
* Jaime Fisac ( [email protected] )
*/

///////////////////////////////////////////////////////////////////////////////
//
// Main GUI for OnePlayerReachAvoidExample.
//
///////////////////////////////////////////////////////////////////////////////

#include <ilqgames/examples/one_player_reach_avoid_example.h>
#include <ilqgames/gui/control_sliders.h>
#include <ilqgames/gui/cost_inspector.h>
#include <ilqgames/gui/top_down_renderer.h>
#include <ilqgames/solver/augmented_lagrangian_solver.h>
#include <ilqgames/solver/ilq_solver.h>
#include <ilqgames/solver/problem.h>
#include <ilqgames/utils/check_local_nash_equilibrium.h>
#include <ilqgames/utils/solver_log.h>

#include <gflags/gflags.h>
#include <glog/logging.h>
#include <stdio.h>
#include <iostream>
#include <memory>

#include <imgui/imgui.h>
#include <imgui/imgui_impl_glfw.h>
#include <imgui/imgui_impl_opengl3.h>

// Optional log saving and visualization.
DEFINE_bool(save, false,
"Optionally save logs to disk.");
DEFINE_bool(viz, true, "Visualize results in a GUI.");
DEFINE_bool(last_traj, false,
"Should the solver only dump the last trajectory?");
DEFINE_string(experiment_name, "", "Name for the experiment.");

// Regularization.
DEFINE_double(state_regularization, 1.0, "State regularization.");
DEFINE_double(control_regularization, 2.0, "Control regularization.");

// Linesearch parameters.
DEFINE_bool(linesearch, false, "Should the solver linesearch?");
DEFINE_double(initial_alpha_scaling, 1.0, "Initial step size in linesearch.");
DEFINE_double(convergence_tolerance, 1e-3, "KKT squared error tolerance.");
DEFINE_double(expected_decrease, 0.1, "KKT sq err expected decrease per iter.");

// About OpenGL function loaders: modern OpenGL doesn't have a standard header
// file and requires individual function pointers to be loaded manually. Helper
// libraries are often used for this purpose! Here we are supporting a few
// common ones: gl3w, glew, glad. You may use another loader/header of your
// choice (glext, glLoadGen, etc.), or chose to manually implement your own.
#if defined(IMGUI_IMPL_OPENGL_LOADER_GL3W)
#include <GL/gl3w.h> // Initialize with gl3wInit()
#elif defined(IMGUI_IMPL_OPENGL_LOADER_GLEW)
#include <GL/glew.h> // Initialize with glewInit()
#elif defined(IMGUI_IMPL_OPENGL_LOADER_GLAD)
#include <glad/glad.h> // Initialize with gladLoadGL()
#else
#include IMGUI_IMPL_OPENGL_LOADER_CUSTOM
#endif

// Include glfw3.h after our OpenGL definitions.
#include <GLFW/glfw3.h>

static void glfw_error_callback(int error, const char* description) {
fprintf(stderr, "Glfw Error %d: %s\n", error, description);
}

int main(int argc, char** argv) {
const std::string log_file =
ILQGAMES_LOG_DIR + std::string("/one_player_reach_avoid_example.log");
google::SetLogDestination(0, log_file.c_str());
google::InitGoogleLogging(argv[0]);
gflags::ParseCommandLineFlags(&argc, &argv, true);
FLAGS_logtostderr = true;

// Solve for open-loop information pattern.
ilqgames::SolverParams params;
params.max_backtracking_steps = 100;
params.linesearch = FLAGS_linesearch;
params.expected_decrease_fraction = FLAGS_expected_decrease;
params.initial_alpha_scaling = FLAGS_initial_alpha_scaling;
params.convergence_tolerance = FLAGS_convergence_tolerance;
params.state_regularization = FLAGS_state_regularization;
params.control_regularization = FLAGS_control_regularization;
params.open_loop = false;

// Solve for feedback equilibrium.
auto start = std::chrono::system_clock::now();
auto feedback_problem =
std::make_shared<ilqgames::OnePlayerReachAvoidExample>();
feedback_problem->Initialize();
ilqgames::AugmentedLagrangianSolver feedback_solver(feedback_problem, params);

// Solve the game.
LOG(INFO) << "Computing feedback solution.";
const auto log = feedback_solver.Solve();
const std::vector<std::shared_ptr<const ilqgames::SolverLog>> feedback_logs =
{log};
LOG(INFO) << "Solver completed in "
<< std::chrono::duration<ilqgames::Time>(
std::chrono::system_clock::now() - start)
.count()
<< " seconds.";

// Check if solution satisfies sufficient conditions for being a local Nash.
feedback_problem->OverwriteSolution(log->FinalOperatingPoint(),
log->FinalStrategies());
// is_local_opt = NumericalCheckLocalNashEquilibrium(
// *feedback_problem, kMaxPerturbation, !kOpenLoop);
// if (is_local_opt)
// LOG(INFO) << "Feedback solution is a local optimum.";
// else
// LOG(INFO) << "Feedback solution is not a local optimum.";

// Dump the logs and/or exit.
if (FLAGS_save) {
if (FLAGS_experiment_name == "") {
CHECK(log->Save(FLAGS_last_traj));
} else {
CHECK(log->Save(FLAGS_last_traj, FLAGS_experiment_name));
}
}

// Create a top-down renderer, control sliders, and cost inspector.
if (!FLAGS_viz) return 0;
std::shared_ptr<ilqgames::ControlSliders> sliders(
new ilqgames::ControlSliders({feedback_logs}));
ilqgames::TopDownRenderer top_down_renderer(
sliders, {feedback_problem});
ilqgames::CostInspector cost_inspector(
sliders,
{feedback_problem->PlayerCosts()});
// std::shared_ptr<ilqgames::ControlSliders> sliders(
// new ilqgames::ControlSliders({feedback_logs, feedback_logs}));
// ilqgames::TopDownRenderer top_down_renderer(
// sliders, {feedback_problem, feedback_problem});
// ilqgames::CostInspector cost_inspector(
// sliders, {feedback_problem->Solver().PlayerCosts(),
// feedback_problem->Solver().PlayerCosts()});

// Setup window.
glfwSetErrorCallback(glfw_error_callback);
if (!glfwInit()) return 1;

// Decide GL+GLSL versions.
#if __APPLE__
// GL 3.2 + GLSL 150.
const char* glsl_version = "#version 150";
glfwWindowHint(GLFW_CONTEXT_VERSION_MAJOR, 3);
glfwWindowHint(GLFW_CONTEXT_VERSION_MINOR, 2);
glfwWindowHint(GLFW_OPENGL_PROFILE, GLFW_OPENGL_CORE_PROFILE); // 3.2+ only
glfwWindowHint(GLFW_OPENGL_FORWARD_COMPAT, GL_TRUE); // Required on Mac
#else
// GL 3.0 + GLSL 130.
const char* glsl_version = "#version 130";
glfwWindowHint(GLFW_CONTEXT_VERSION_MAJOR, 3);
glfwWindowHint(GLFW_CONTEXT_VERSION_MINOR, 0);
// glfwWindowHint(GLFW_OPENGL_PROFILE, GLFW_OPENGL_CORE_PROFILE); // 3.2+
// only glfwWindowHint(GLFW_OPENGL_FORWARD_COMPAT, GL_TRUE); // 3.0+ only
#endif

// Create window with graphics context
GLFWwindow* window = glfwCreateWindow(
1280, 720, "ILQGames: 1-Player Reachability Example", NULL, NULL);
if (window == NULL) return 1;
glfwMakeContextCurrent(window);
glfwSwapInterval(1); // Enable vsync

// Initialize OpenGL loader
#if defined(IMGUI_IMPL_OPENGL_LOADER_GL3W)
bool err = gl3wInit() != 0;
#elif defined(IMGUI_IMPL_OPENGL_LOADER_GLEW)
bool err = glewInit() != GLEW_OK;
#elif defined(IMGUI_IMPL_OPENGL_LOADER_GLAD)
bool err = gladLoadGL() == 0;
#else
bool err = false; // If you use IMGUI_IMPL_OPENGL_LOADER_CUSTOM, your loader
// is likely to requires some form of initialization.
#endif
if (err) {
fprintf(stderr, "Failed to initialize OpenGL loader!\n");
return 1;
}

// Setup Dear ImGui context.
IMGUI_CHECKVERSION();
ImGui::CreateContext();

// Setup Dear ImGui style.
ImGui::StyleColorsDark();
// ImGui::StyleColorsClassic();

// Background color.
const ImVec4 clear_color =
ImVec4(213.0 / 255.0, 216.0 / 255.0, 226.0 / 255.0, 1.0f);

// Setup Platform/Renderer bindings
ImGui_ImplGlfw_InitForOpenGL(window, true);
ImGui_ImplOpenGL3_Init(glsl_version);

// Main loop
while (!glfwWindowShouldClose(window)) {
// Poll and handle events (inputs, window resize, etc.).
glfwPollEvents();

// Start the Dear ImGui frame.
ImGui_ImplOpenGL3_NewFrame();
ImGui_ImplGlfw_NewFrame();
ImGui::NewFrame();

// Control sliders.
sliders->Render();

// Top down view.
top_down_renderer.Render();

// Cost inspector.
cost_inspector.Render();

// Rendering
ImGui::Render();
int display_w, display_h;
glfwMakeContextCurrent(window);
glfwGetFramebufferSize(window, &display_w, &display_h);
glViewport(0, 0, display_w, display_h);
glClearColor(clear_color.x, clear_color.y, clear_color.z, clear_color.w);
glClear(GL_COLOR_BUFFER_BIT);
ImGui_ImplOpenGL3_RenderDrawData(ImGui::GetDrawData());

glfwMakeContextCurrent(window);
glfwSwapBuffers(window);
}

// Cleanup
ImGui_ImplOpenGL3_Shutdown();
ImGui_ImplGlfw_Shutdown();
ImGui::DestroyContext();

glfwDestroyWindow(window);
glfwTerminate();

return 0;
}
48 changes: 40 additions & 8 deletions include/ilqgames/cost/player_cost.h
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@

#include <ilqgames/constraint/constraint.h>
#include <ilqgames/cost/cost.h>
#include <ilqgames/cost/extreme_value_cost.h>
#include <ilqgames/utils/operating_point.h>
#include <ilqgames/utils/quadratic_cost_approximation.h>
#include <ilqgames/utils/types.h>
Expand All @@ -69,10 +70,18 @@ class PlayerCost {
cost_structure_(CostStructure::SUM),
time_of_extreme_cost_(0) {}

// Reset regularizations.
void ResetStateRegularization(float reg) { state_regularization_ = reg; }
void ResetControlRegularization(float reg) { control_regularization_ = reg; }

// Add new state and control costs for this player.
void AddStateCost(const std::shared_ptr<Cost>& cost);
void AddControlCost(PlayerIndex idx, const std::shared_ptr<Cost>& cost);

// For reach-avoid problems, two sets of state costs.
void SetTargetStateCost(const std::shared_ptr<ExtremeValueCost>& cost);
void SetFailureStateCost(const std::shared_ptr<ExtremeValueCost>& cost);

// Add new state and control constraints. For now, they are only equality
// constraints but later they should really be inequality constraints and
// there should be some logic for maintaining sets of active constraints.
Expand All @@ -85,6 +94,8 @@ class PlayerCost {
// state costs will be evaluated at the next time step.
float Evaluate(Time t, const VectorXf& x,
const std::vector<VectorXf>& us) const;
float EvaluateTargetCost(Time t, const VectorXf& x) const;
float EvaluateFailureCost(Time t, const VectorXf& x) const;
float Evaluate(const OperatingPoint& op, Time time_step) const;
float Evaluate(const OperatingPoint& op) const;
float EvaluateOffset(Time t, Time next_t, const VectorXf& next_x,
Expand All @@ -98,23 +109,40 @@ class PlayerCost {
QuadraticCostApproximation QuadraticizeControlCosts(
Time t, const VectorXf& x, const std::vector<VectorXf>& us) const;

// Set whether this is a time-additive, max-over-time, or min-over-time cost.
// At each specific time, all costs are accumulated with the given operation.
enum CostStructure { SUM, MAX, MIN };
// Set whether this is a time-additive, max/min-over-time, or reach-avoid
// problem. At each specific time, all costs are accumulated with the given
// operation.
enum CostStructure { SUM, MAX, MIN, REACH_AVOID };
void SetTimeAdditive() { cost_structure_ = SUM; }
void SetMaxOverTime() { cost_structure_ = MAX; }
void SetMinOverTime() { cost_structure_ = MIN; }
void SetReachAvoid() { cost_structure_ = REACH_AVOID; }
bool IsTimeAdditive() const { return cost_structure_ == SUM; }
bool IsMaxOverTime() const { return cost_structure_ == MAX; }
bool IsMinOverTime() const { return cost_structure_ == MIN; }
bool IsReachAvoid() const { return cost_structure_ == REACH_AVOID; }

// Keep track of the time of extreme costs.
size_t TimeOfExtremeCost() { return time_of_extreme_cost_; }
// Keep track of the time of extreme and critical costs. (Critical costs are
// used in reach-avoid problems and are those times for each player for which
// that player's value function does not depend upon the future.)
size_t TimeOfExtremeCost() const { return time_of_extreme_cost_; }
void SetTimeOfExtremeCost(size_t kk) { time_of_extreme_cost_ = kk; }

// Accessors.
const PtrVector<Cost>& StateCosts() const { return state_costs_; }
PtrVector<Cost> StateCosts() const {
PtrVector<Cost> all;
all.insert(all.end(), state_costs_.begin(), state_costs_.end());
all.push_back(target_state_cost_);
all.push_back(failure_state_cost_);
return all;
}
const PlayerPtrMultiMap<Cost>& ControlCosts() const { return control_costs_; }
const std::shared_ptr<ExtremeValueCost>& TargetStateCost() const {
return target_state_cost_;
}
const std::shared_ptr<ExtremeValueCost>& FailureStateCost() const {
return failure_state_cost_;
}
const PtrVector<Constraint>& StateConstraints() const {
return state_constraints_;
}
Expand All @@ -133,13 +161,17 @@ class PlayerCost {
PtrVector<Cost> state_costs_;
PlayerPtrMultiMap<Cost> control_costs_;

// Reach-avoid-specific target/failure costs.
std::shared_ptr<ExtremeValueCost> target_state_cost_;
std::shared_ptr<ExtremeValueCost> failure_state_cost_;

// State and control constraints
PtrVector<Constraint> state_constraints_;
PlayerPtrMultiMap<Constraint> control_constraints_;

// Regularization on costs.
const float state_regularization_;
const float control_regularization_;
float state_regularization_;
float control_regularization_;

// Ternary variable whether this objective is time-additive, max-over-time, or
// min-over-time.
Expand Down
Loading