diff --git a/exec/one_player_reach_avoid_example/main.cpp b/exec/one_player_reach_avoid_example/main.cpp new file mode 100644 index 00000000..afae81ef --- /dev/null +++ b/exec/one_player_reach_avoid_example/main.cpp @@ -0,0 +1,279 @@ +/* + * Copyright (c) 2021, The Regents of the University of California (Regents). + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials provided + * with the distribution. + * + * 3. Neither the name of the copyright holder nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS AS IS + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + * Please contact the author(s) of this library if you have any questions. + * Authors: David Fridovich-Keil ( dfk@utexas.edu ) + * Jaime Fisac ( jfisac@princeton.edu ) + */ + +/////////////////////////////////////////////////////////////////////////////// +// +// Main GUI for OnePlayerReachAvoidExample. +// +/////////////////////////////////////////////////////////////////////////////// + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#include +#include +#include + +// Optional log saving and visualization. +DEFINE_bool(save, false, + "Optionally save logs to disk."); +DEFINE_bool(viz, true, "Visualize results in a GUI."); +DEFINE_bool(last_traj, false, + "Should the solver only dump the last trajectory?"); +DEFINE_string(experiment_name, "", "Name for the experiment."); + +// Regularization. +DEFINE_double(state_regularization, 1.0, "State regularization."); +DEFINE_double(control_regularization, 2.0, "Control regularization."); + +// Linesearch parameters. +DEFINE_bool(linesearch, false, "Should the solver linesearch?"); +DEFINE_double(initial_alpha_scaling, 1.0, "Initial step size in linesearch."); +DEFINE_double(convergence_tolerance, 1e-3, "KKT squared error tolerance."); +DEFINE_double(expected_decrease, 0.1, "KKT sq err expected decrease per iter."); + +// About OpenGL function loaders: modern OpenGL doesn't have a standard header +// file and requires individual function pointers to be loaded manually. Helper +// libraries are often used for this purpose! Here we are supporting a few +// common ones: gl3w, glew, glad. You may use another loader/header of your +// choice (glext, glLoadGen, etc.), or chose to manually implement your own. +#if defined(IMGUI_IMPL_OPENGL_LOADER_GL3W) +#include // Initialize with gl3wInit() +#elif defined(IMGUI_IMPL_OPENGL_LOADER_GLEW) +#include // Initialize with glewInit() +#elif defined(IMGUI_IMPL_OPENGL_LOADER_GLAD) +#include // Initialize with gladLoadGL() +#else +#include IMGUI_IMPL_OPENGL_LOADER_CUSTOM +#endif + +// Include glfw3.h after our OpenGL definitions. +#include + +static void glfw_error_callback(int error, const char* description) { + fprintf(stderr, "Glfw Error %d: %s\n", error, description); +} + +int main(int argc, char** argv) { + const std::string log_file = + ILQGAMES_LOG_DIR + std::string("/one_player_reach_avoid_example.log"); + google::SetLogDestination(0, log_file.c_str()); + google::InitGoogleLogging(argv[0]); + gflags::ParseCommandLineFlags(&argc, &argv, true); + FLAGS_logtostderr = true; + + // Solve for open-loop information pattern. + ilqgames::SolverParams params; + params.max_backtracking_steps = 100; + params.linesearch = FLAGS_linesearch; + params.expected_decrease_fraction = FLAGS_expected_decrease; + params.initial_alpha_scaling = FLAGS_initial_alpha_scaling; + params.convergence_tolerance = FLAGS_convergence_tolerance; + params.state_regularization = FLAGS_state_regularization; + params.control_regularization = FLAGS_control_regularization; + params.open_loop = false; + + // Solve for feedback equilibrium. + auto start = std::chrono::system_clock::now(); + auto feedback_problem = + std::make_shared(); + feedback_problem->Initialize(); + ilqgames::AugmentedLagrangianSolver feedback_solver(feedback_problem, params); + + // Solve the game. + LOG(INFO) << "Computing feedback solution."; + const auto log = feedback_solver.Solve(); + const std::vector> feedback_logs = + {log}; + LOG(INFO) << "Solver completed in " + << std::chrono::duration( + std::chrono::system_clock::now() - start) + .count() + << " seconds."; + + // Check if solution satisfies sufficient conditions for being a local Nash. + feedback_problem->OverwriteSolution(log->FinalOperatingPoint(), + log->FinalStrategies()); + // is_local_opt = NumericalCheckLocalNashEquilibrium( + // *feedback_problem, kMaxPerturbation, !kOpenLoop); + // if (is_local_opt) + // LOG(INFO) << "Feedback solution is a local optimum."; + // else + // LOG(INFO) << "Feedback solution is not a local optimum."; + + // Dump the logs and/or exit. + if (FLAGS_save) { + if (FLAGS_experiment_name == "") { + CHECK(log->Save(FLAGS_last_traj)); + } else { + CHECK(log->Save(FLAGS_last_traj, FLAGS_experiment_name)); + } + } + + // Create a top-down renderer, control sliders, and cost inspector. + if (!FLAGS_viz) return 0; + std::shared_ptr sliders( + new ilqgames::ControlSliders({feedback_logs})); + ilqgames::TopDownRenderer top_down_renderer( + sliders, {feedback_problem}); + ilqgames::CostInspector cost_inspector( + sliders, + {feedback_problem->PlayerCosts()}); + // std::shared_ptr sliders( + // new ilqgames::ControlSliders({feedback_logs, feedback_logs})); + // ilqgames::TopDownRenderer top_down_renderer( + // sliders, {feedback_problem, feedback_problem}); + // ilqgames::CostInspector cost_inspector( + // sliders, {feedback_problem->Solver().PlayerCosts(), + // feedback_problem->Solver().PlayerCosts()}); + + // Setup window. + glfwSetErrorCallback(glfw_error_callback); + if (!glfwInit()) return 1; + +// Decide GL+GLSL versions. +#if __APPLE__ + // GL 3.2 + GLSL 150. + const char* glsl_version = "#version 150"; + glfwWindowHint(GLFW_CONTEXT_VERSION_MAJOR, 3); + glfwWindowHint(GLFW_CONTEXT_VERSION_MINOR, 2); + glfwWindowHint(GLFW_OPENGL_PROFILE, GLFW_OPENGL_CORE_PROFILE); // 3.2+ only + glfwWindowHint(GLFW_OPENGL_FORWARD_COMPAT, GL_TRUE); // Required on Mac +#else + // GL 3.0 + GLSL 130. + const char* glsl_version = "#version 130"; + glfwWindowHint(GLFW_CONTEXT_VERSION_MAJOR, 3); + glfwWindowHint(GLFW_CONTEXT_VERSION_MINOR, 0); +// glfwWindowHint(GLFW_OPENGL_PROFILE, GLFW_OPENGL_CORE_PROFILE); // 3.2+ +// only glfwWindowHint(GLFW_OPENGL_FORWARD_COMPAT, GL_TRUE); // 3.0+ only +#endif + + // Create window with graphics context + GLFWwindow* window = glfwCreateWindow( + 1280, 720, "ILQGames: 1-Player Reachability Example", NULL, NULL); + if (window == NULL) return 1; + glfwMakeContextCurrent(window); + glfwSwapInterval(1); // Enable vsync + +// Initialize OpenGL loader +#if defined(IMGUI_IMPL_OPENGL_LOADER_GL3W) + bool err = gl3wInit() != 0; +#elif defined(IMGUI_IMPL_OPENGL_LOADER_GLEW) + bool err = glewInit() != GLEW_OK; +#elif defined(IMGUI_IMPL_OPENGL_LOADER_GLAD) + bool err = gladLoadGL() == 0; +#else + bool err = false; // If you use IMGUI_IMPL_OPENGL_LOADER_CUSTOM, your loader + // is likely to requires some form of initialization. +#endif + if (err) { + fprintf(stderr, "Failed to initialize OpenGL loader!\n"); + return 1; + } + + // Setup Dear ImGui context. + IMGUI_CHECKVERSION(); + ImGui::CreateContext(); + + // Setup Dear ImGui style. + ImGui::StyleColorsDark(); + // ImGui::StyleColorsClassic(); + + // Background color. + const ImVec4 clear_color = + ImVec4(213.0 / 255.0, 216.0 / 255.0, 226.0 / 255.0, 1.0f); + + // Setup Platform/Renderer bindings + ImGui_ImplGlfw_InitForOpenGL(window, true); + ImGui_ImplOpenGL3_Init(glsl_version); + + // Main loop + while (!glfwWindowShouldClose(window)) { + // Poll and handle events (inputs, window resize, etc.). + glfwPollEvents(); + + // Start the Dear ImGui frame. + ImGui_ImplOpenGL3_NewFrame(); + ImGui_ImplGlfw_NewFrame(); + ImGui::NewFrame(); + + // Control sliders. + sliders->Render(); + + // Top down view. + top_down_renderer.Render(); + + // Cost inspector. + cost_inspector.Render(); + + // Rendering + ImGui::Render(); + int display_w, display_h; + glfwMakeContextCurrent(window); + glfwGetFramebufferSize(window, &display_w, &display_h); + glViewport(0, 0, display_w, display_h); + glClearColor(clear_color.x, clear_color.y, clear_color.z, clear_color.w); + glClear(GL_COLOR_BUFFER_BIT); + ImGui_ImplOpenGL3_RenderDrawData(ImGui::GetDrawData()); + + glfwMakeContextCurrent(window); + glfwSwapBuffers(window); + } + + // Cleanup + ImGui_ImplOpenGL3_Shutdown(); + ImGui_ImplGlfw_Shutdown(); + ImGui::DestroyContext(); + + glfwDestroyWindow(window); + glfwTerminate(); + + return 0; +} diff --git a/include/ilqgames/cost/player_cost.h b/include/ilqgames/cost/player_cost.h index f85e9706..ce36b0cd 100644 --- a/include/ilqgames/cost/player_cost.h +++ b/include/ilqgames/cost/player_cost.h @@ -46,6 +46,7 @@ #include #include +#include #include #include #include @@ -69,10 +70,18 @@ class PlayerCost { cost_structure_(CostStructure::SUM), time_of_extreme_cost_(0) {} + // Reset regularizations. + void ResetStateRegularization(float reg) { state_regularization_ = reg; } + void ResetControlRegularization(float reg) { control_regularization_ = reg; } + // Add new state and control costs for this player. void AddStateCost(const std::shared_ptr& cost); void AddControlCost(PlayerIndex idx, const std::shared_ptr& cost); + // For reach-avoid problems, two sets of state costs. + void SetTargetStateCost(const std::shared_ptr& cost); + void SetFailureStateCost(const std::shared_ptr& cost); + // Add new state and control constraints. For now, they are only equality // constraints but later they should really be inequality constraints and // there should be some logic for maintaining sets of active constraints. @@ -85,6 +94,8 @@ class PlayerCost { // state costs will be evaluated at the next time step. float Evaluate(Time t, const VectorXf& x, const std::vector& us) const; + float EvaluateTargetCost(Time t, const VectorXf& x) const; + float EvaluateFailureCost(Time t, const VectorXf& x) const; float Evaluate(const OperatingPoint& op, Time time_step) const; float Evaluate(const OperatingPoint& op) const; float EvaluateOffset(Time t, Time next_t, const VectorXf& next_x, @@ -98,23 +109,40 @@ class PlayerCost { QuadraticCostApproximation QuadraticizeControlCosts( Time t, const VectorXf& x, const std::vector& us) const; - // Set whether this is a time-additive, max-over-time, or min-over-time cost. - // At each specific time, all costs are accumulated with the given operation. - enum CostStructure { SUM, MAX, MIN }; + // Set whether this is a time-additive, max/min-over-time, or reach-avoid + // problem. At each specific time, all costs are accumulated with the given + // operation. + enum CostStructure { SUM, MAX, MIN, REACH_AVOID }; void SetTimeAdditive() { cost_structure_ = SUM; } void SetMaxOverTime() { cost_structure_ = MAX; } void SetMinOverTime() { cost_structure_ = MIN; } + void SetReachAvoid() { cost_structure_ = REACH_AVOID; } bool IsTimeAdditive() const { return cost_structure_ == SUM; } bool IsMaxOverTime() const { return cost_structure_ == MAX; } bool IsMinOverTime() const { return cost_structure_ == MIN; } + bool IsReachAvoid() const { return cost_structure_ == REACH_AVOID; } - // Keep track of the time of extreme costs. - size_t TimeOfExtremeCost() { return time_of_extreme_cost_; } + // Keep track of the time of extreme and critical costs. (Critical costs are + // used in reach-avoid problems and are those times for each player for which + // that player's value function does not depend upon the future.) + size_t TimeOfExtremeCost() const { return time_of_extreme_cost_; } void SetTimeOfExtremeCost(size_t kk) { time_of_extreme_cost_ = kk; } // Accessors. - const PtrVector& StateCosts() const { return state_costs_; } + PtrVector StateCosts() const { + PtrVector all; + all.insert(all.end(), state_costs_.begin(), state_costs_.end()); + all.push_back(target_state_cost_); + all.push_back(failure_state_cost_); + return all; + } const PlayerPtrMultiMap& ControlCosts() const { return control_costs_; } + const std::shared_ptr& TargetStateCost() const { + return target_state_cost_; + } + const std::shared_ptr& FailureStateCost() const { + return failure_state_cost_; + } const PtrVector& StateConstraints() const { return state_constraints_; } @@ -133,13 +161,17 @@ class PlayerCost { PtrVector state_costs_; PlayerPtrMultiMap control_costs_; + // Reach-avoid-specific target/failure costs. + std::shared_ptr target_state_cost_; + std::shared_ptr failure_state_cost_; + // State and control constraints PtrVector state_constraints_; PlayerPtrMultiMap control_constraints_; // Regularization on costs. - const float state_regularization_; - const float control_regularization_; + float state_regularization_; + float control_regularization_; // Ternary variable whether this objective is time-additive, max-over-time, or // min-over-time. diff --git a/include/ilqgames/examples/one_player_reach_avoid_example.h b/include/ilqgames/examples/one_player_reach_avoid_example.h new file mode 100644 index 00000000..30b6646c --- /dev/null +++ b/include/ilqgames/examples/one_player_reach_avoid_example.h @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2021, The Regents of the University of California (Regents). + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials provided + * with the distribution. + * + * 3. Neither the name of the copyright holder nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS AS IS + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + * Please contact the author(s) of this library if you have any questions. + * Authors: David Fridovich-Keil ( dfk@utexas.edu ) + * Jaime Fisac ( jfisac@princeton.edu ) + */ + +/////////////////////////////////////////////////////////////////////////////// +// +// One player reach-avoid example. Single player choosing control reach target +// ball while staying clear of failure balls. +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef ILQGAMES_EXAMPLES_ONE_PLAYER_REACH_AVOID_EXAMPLE_H +#define ILQGAMES_EXAMPLES_ONE_PLAYER_REACH_AVOID_EXAMPLE_H + +#include +#include + +namespace ilqgames { + +class OnePlayerReachAvoidExample : public TopDownRenderableProblem { + public: + ~OnePlayerReachAvoidExample() {} + OnePlayerReachAvoidExample() : TopDownRenderableProblem() {} + + // Construct dynamics, initial state, and player costs. + void ConstructDynamics(); + void ConstructInitialState(); + void ConstructPlayerCosts(); + + // Unpack x, y, heading (for each player, potentially) from a given state. + std::vector Xs(const VectorXf& x) const; + std::vector Ys(const VectorXf& x) const; + std::vector Thetas(const VectorXf& x) const; +}; // class OnePlayerReachAvoidExample + +} // namespace ilqgames + +#endif diff --git a/include/ilqgames/solver/ilq_solver.h b/include/ilqgames/solver/ilq_solver.h index 6133dfb4..3850d13b 100644 --- a/include/ilqgames/solver/ilq_solver.h +++ b/include/ilqgames/solver/ilq_solver.h @@ -77,9 +77,11 @@ class ILQSolver : public GameSolver { if (params_.open_loop) lq_solver_.reset( new LQOpenLoopSolver(problem_->Dynamics(), time::kNumTimeSteps)); - else + else { lq_solver_.reset( - new LQFeedbackSolver(problem_->Dynamics(), time::kNumTimeSteps)); + new LQFeedbackSolver(problem_->Dynamics(), time::kNumTimeSteps, + &problem->PlayerCosts(), &critical_times_)); + } // If this system is flat then compute the linearization once, now. if (problem_->Dynamics()->TreatAsLinear()) @@ -88,10 +90,17 @@ class ILQSolver : public GameSolver { // Prepopulate quadraticization. for (auto& quads : cost_quadraticization_) quads.resize(problem_->Dynamics()->NumPlayers(), - QuadraticCostApproximation(problem_->Dynamics()->XDim())); + QuadraticCostApproximation(problem_->Dynamics()->XDim(), + params.state_regularization)); // Set last quadraticization to current, to start. last_cost_quadraticization_ = cost_quadraticization_; + + // Set player cost regularizations. + for (auto& cost : problem->PlayerCosts()) { + cost.ResetStateRegularization(params.state_regularization); + cost.ResetControlRegularization(params.control_regularization); + } } // Solve this game. Returns true if converged. @@ -124,14 +133,24 @@ class ILQSolver : public GameSolver { const std::vector& dims) const; // Check if solver has converged. - virtual bool HasConverged(float current_merit_function_value) const { - return (last_merit_function_value_ - current_merit_function_value) < - params_.convergence_tolerance; + virtual bool HasConverged( + float current_merit_function_value, + const std::vector& current_total_costs) const { + // NOTE: accounting for both total costs for all players and merit function. + float total_cost_diff = 0.0; + for (size_t ii = 0; ii < current_total_costs.size(); ii++) + total_cost_diff += + std::abs(current_total_costs[ii] - last_total_costs_[ii]); + + return (std::abs(last_merit_function_value_ - + current_merit_function_value) < + params_.convergence_tolerance) && + (total_cost_diff < params_.convergence_tolerance); } // Compute overall costs and set times of extreme costs. void TotalCosts(const OperatingPoint& current_op, - std::vector* total_costs) const; + std::vector* total_costs); // Armijo condition check. Returns true if the new operating point satisfies // the Armijo condition, and also returns current merit function value. @@ -187,6 +206,14 @@ class ILQSolver : public GameSolver { // Last merit function value and expected decreases (per step length). float last_merit_function_value_; float expected_decrease_; + + // Last value of total costs. + std::vector last_total_costs_; + + // Boolean flags for which time steps are "critical" for each player. Critical + // times are those for which that player's value function does not depend upon + // the future. + std::vector> critical_times_; }; // class ILQSolver } // namespace ilqgames diff --git a/include/ilqgames/solver/lq_feedback_solver.h b/include/ilqgames/solver/lq_feedback_solver.h index 4baa5c17..e9dc3954 100644 --- a/include/ilqgames/solver/lq_feedback_solver.h +++ b/include/ilqgames/solver/lq_feedback_solver.h @@ -57,11 +57,13 @@ #ifndef ILQGAMES_SOLVER_LQ_FEEDBACK_SOLVER_H #define ILQGAMES_SOLVER_LQ_FEEDBACK_SOLVER_H +#include #include #include #include #include #include +#include #include @@ -72,8 +74,13 @@ class LQFeedbackSolver : public LQSolver { ~LQFeedbackSolver() {} LQFeedbackSolver( const std::shared_ptr& dynamics, - size_t num_time_steps) - : LQSolver(dynamics, num_time_steps) { + size_t num_time_steps, + const std::vector* player_costs = nullptr, + const std::vector>* critical_times = + nullptr) + : LQSolver(dynamics, num_time_steps), + player_costs_(player_costs), + critical_times_(critical_times) { // Cache the total number of control dimensions, since this is inefficient // to compute. const Dimension total_udim = dynamics_->TotalUDim(); @@ -140,6 +147,12 @@ class LQFeedbackSolver : public LQSolver { // Preallocate memory for intermediate variables F, beta. MatrixXf F_; VectorXf beta_; + + // Players' cost functions (used to check objective structure), + // along with list of whether each time index is critical for time-consistent + // backup. + const std::vector* player_costs_; + const std::vector>* critical_times_; }; // LQFeedbackSolver } // namespace ilqgames diff --git a/include/ilqgames/solver/problem.h b/include/ilqgames/solver/problem.h index 479d663d..29ed9e38 100644 --- a/include/ilqgames/solver/problem.h +++ b/include/ilqgames/solver/problem.h @@ -69,6 +69,12 @@ class Problem { ConstructInitialState(); ConstructInitialOperatingPoint(); ConstructInitialStrategies(); + + // Check if any player costs are reach-avoid. + any_reach_avoid_objectives_ = false; + for (const auto& cost : player_costs_) + any_reach_avoid_objectives_ |= cost.IsReachAvoid(); + initialized_ = true; } @@ -128,8 +134,15 @@ class Problem { return *strategies_; } + // Does any player have a reach-avoid objective? + bool AnyReachAvoidObjectives() const { return any_reach_avoid_objectives_; } + + // // Strategies and operating points for all players. + // std::unique_ptr operating_point_; + // std::unique_ptr> strategies_; + protected: - Problem(); + Problem() : initialized_(false) {} // Functions for initialization. By default, operating point and strategies // are initialized to zero. @@ -164,6 +177,9 @@ class Problem { // Player costs. These will not change during operation of this solver. std::vector player_costs_; + // Are any of these costs reach-avoid? + bool any_reach_avoid_objectives_; + // Initial condition. VectorXf x0_; diff --git a/include/ilqgames/solver/solver_params.h b/include/ilqgames/solver/solver_params.h index 37cf2e83..85393c7e 100644 --- a/include/ilqgames/solver/solver_params.h +++ b/include/ilqgames/solver/solver_params.h @@ -51,7 +51,7 @@ struct SolverParams { // Consider a solution converged once max elementwise difference is below this // tolerance or solver has exceeded a maximum number of iterations. float convergence_tolerance = 1e-1; - size_t max_solver_iters = 1000; + size_t max_solver_iters = 20; // Linesearch parameters. If flag is set 'true', then applied initial alpha // scaling to all strategies and backs off geometrically at the given rate for @@ -70,7 +70,7 @@ struct SolverParams { float control_regularization = 0.0; // Augmented Lagrangian parameters. - size_t unconstrained_solver_max_iters = 10; + size_t unconstrained_solver_max_iters = 1000; float geometric_mu_scaling = 1.1; float geometric_mu_downscaling = 0.5; float geometric_lambda_downscaling = 0.5; diff --git a/include/ilqgames/utils/types.h b/include/ilqgames/utils/types.h index e33d7126..972a6a86 100644 --- a/include/ilqgames/utils/types.h +++ b/include/ilqgames/utils/types.h @@ -142,6 +142,11 @@ static constexpr size_t kNumTimeSteps = static_cast((kTimeHorizon + constants::kSmallNumber) / kTimeStep); } // namespace time +// Boolean flags for which time steps are "critical" for each player. Critical +// times are those for which that player's value function does not depend upon +// the future. +enum CriticalTimeType { NOT_CRITICAL, CRITICAL_TARGET, CRITICAL_FAILURE }; + // ---------------------------- SIMPLE FUNCTIONS ---------------------------- // template diff --git a/src/ilq_solver.cpp b/src/ilq_solver.cpp index 4e199737..c65a3376 100644 --- a/src/ilq_solver.cpp +++ b/src/ilq_solver.cpp @@ -103,8 +103,11 @@ std::shared_ptr ILQSolver::Solve(bool* success, Time max_runtime) { CurrentOperatingPoint(last_operating_point, current_strategies, ¤t_operating_point); + // InitializeAlongRoute(params_.current_operating_point); + // Compute total costs. TotalCosts(current_operating_point, &total_costs); + last_total_costs_ = total_costs; // Log current iterate. Time elapsed = 0.0; @@ -156,6 +159,7 @@ std::shared_ptr ILQSolver::Solve(bool* success, Time max_runtime) { // Compute total costs and check if we've converged. TotalCosts(current_operating_point, &total_costs); + last_total_costs_ = total_costs; // Record loop runtime. elapsed += timer_.Toc(); @@ -165,6 +169,12 @@ std::shared_ptr ILQSolver::Solve(bool* success, Time max_runtime) { total_costs, elapsed, has_converged); } + // // DEBUG. + // for (size_t kk = 0; kk < time::kNumTimeSteps; kk++) { + // std::cout << "time: " << kk << ", critical: " << critical_times_[kk][0] + // << std::endl; + // } + // Handle success flag. if (success) *success = true; @@ -218,32 +228,43 @@ void ILQSolver::CurrentOperatingPoint( // } void ILQSolver::TotalCosts(const OperatingPoint& current_op, - std::vector* total_costs) const { + std::vector* total_costs) { // Initialize appropriately. if (total_costs->size() != problem_->PlayerCosts().size()) total_costs->resize(problem_->PlayerCosts().size()); + + if (problem_->AnyReachAvoidObjectives()) { + critical_times_.resize( + time::kNumTimeSteps, + std::vector(problem_->PlayerCosts().size())); + } + for (PlayerIndex ii = 0; ii < problem_->PlayerCosts().size(); ii++) { if (problem_->PlayerCosts()[ii].IsTimeAdditive()) (*total_costs)[ii] = 0.0; else if (problem_->PlayerCosts()[ii].IsMaxOverTime()) (*total_costs)[ii] = -constants::kInfinity; + else if (problem_->PlayerCosts()[ii].IsMinOverTime()) + (*total_costs)[ii] = constants::kInfinity; else (*total_costs)[ii] = constants::kInfinity; } - // Accumulate costs. - for (size_t kk = 0; kk < time::kNumTimeSteps; kk++) { + // Accumulate costs backward in time since that's needed for non-additive + // time-consistent solutions. + // std::cout << "----------------------------------------" << std::endl; + for (int kk = time::kNumTimeSteps - 1; kk >= 0; kk--) { const Time t = RelativeTimeTracker::RelativeTime(kk); for (size_t ii = 0; ii < problem_->PlayerCosts().size(); ii++) { const float current_cost = problem_->PlayerCosts()[ii].Evaluate( t, current_op.xs[kk], current_op.us[kk]); - if (problem_->PlayerCosts()[ii].IsTimeAdditive()) + if (problem_->PlayerCosts()[ii].IsTimeAdditive()) { (*total_costs)[ii] += problem_->PlayerCosts()[ii].Evaluate( t, current_op.xs[kk], current_op.us[kk]); - else if (problem_->PlayerCosts()[ii].IsMaxOverTime() && - current_cost > (*total_costs)[ii]) { + } else if (problem_->PlayerCosts()[ii].IsMaxOverTime() && + current_cost > (*total_costs)[ii]) { (*total_costs)[ii] = current_cost; problem_->PlayerCosts()[ii].SetTimeOfExtremeCost(kk); } else if (problem_->PlayerCosts()[ii].IsMinOverTime()) { @@ -251,6 +272,27 @@ void ILQSolver::TotalCosts(const OperatingPoint& current_op, (*total_costs)[ii] = current_cost; problem_->PlayerCosts()[ii].SetTimeOfExtremeCost(kk); } + } else if (problem_->PlayerCosts()[ii].IsReachAvoid()) { + // Backward-time reach-avoid computation of cost + critical times. + // NOTE: ignoring control costs. + const float target = problem_->PlayerCosts()[ii].EvaluateTargetCost( + t, current_op.xs[kk]); + const float failure = problem_->PlayerCosts()[ii].EvaluateFailureCost( + t, current_op.xs[kk]); + (*total_costs)[ii] = + std::max(std::min((*total_costs)[ii], target), failure); + + // std::cout << "time: " << kk << ", target = " << target + // << ", failure = " << failure + // << ", total = " << (*total_costs)[ii] << std::endl; + + // Find arg extremum. + if ((*total_costs)[ii] == target) + critical_times_[kk][ii] = CRITICAL_TARGET; + else if ((*total_costs)[ii] == failure) + critical_times_[kk][ii] = CRITICAL_FAILURE; + else + critical_times_[kk][ii] = NOT_CRITICAL; } } } @@ -318,18 +360,33 @@ bool ILQSolver::ModifyLQStrategies( float current_stepsize = params_.initial_alpha_scaling; CurrentOperatingPoint(last_operating_point, *strategies, current_operating_point); - if (!params_.linesearch) return true; + + // Compute total costs. + std::vector total_costs; + TotalCosts(*current_operating_point, &total_costs); + + // Compute merit function since this sets next quadraticization. + float current_merit_function_value = MeritFunction(*current_operating_point); + // std::cout << current_merit_function_value << std::endl; + + if (!params_.linesearch) { + *has_converged = HasConverged(current_merit_function_value, total_costs); + last_merit_function_value_ = current_merit_function_value; + return true; + } // Keep reducing alphas until we satisfy the Armijo condition. for (size_t ii = 0; ii < params_.max_backtracking_steps; ii++) { // Compute merit function value. - const float current_merit_function_value = - MeritFunction(*current_operating_point); + if (ii > 0) { + current_merit_function_value = MeritFunction(*current_operating_point); + TotalCosts(*current_operating_point, &total_costs); + } // Check Armijo condition. if (CheckArmijoCondition(current_merit_function_value, current_stepsize)) { // Success! Update cached terms and check convergence. - *has_converged = HasConverged(current_merit_function_value); + *has_converged = HasConverged(current_merit_function_value, total_costs); last_merit_function_value_ = current_merit_function_value; return true; } @@ -489,10 +546,34 @@ void ILQSolver::ComputeCostQuadraticization( const PlayerCost& cost = problem_->PlayerCosts()[ii]; if (cost.IsTimeAdditive() || - problem_->PlayerCosts()[ii].TimeOfExtremeCost() == kk) + ((cost.IsMinOverTime() || cost.IsMaxOverTime()) && + cost.TimeOfExtremeCost() == kk)) (*q)[kk][ii] = cost.Quadraticize(t, x, us); - else + else { + // Extremum over time cost but not at extreme cost, or in reach-avoid + // mode. In these cases, always quadraticize control and state costs + // separately. (*q)[kk][ii] = cost.QuadraticizeControlCosts(t, x, us); + + if (cost.IsReachAvoid()) { + if (critical_times_[kk][ii] == CRITICAL_TARGET) { + // std::cout << "Time: " << kk + // << ". Target is active... getting quadraticized." + // << std::endl; + CHECK_NOTNULL(cost.TargetStateCost().get()); + cost.TargetStateCost()->Quadraticize( + t, x, &((*q)[kk][ii].state.hess), &((*q)[kk][ii].state.grad)); + } else if (critical_times_[kk][ii] == CRITICAL_FAILURE) { + // std::cout << "Time: " << kk + // << ". Failure is active... getting quadraticized." + // << std::endl; + CHECK_NOTNULL(cost.FailureStateCost().get()); + + cost.FailureStateCost()->Quadraticize( + t, x, &((*q)[kk][ii].state.hess), &((*q)[kk][ii].state.grad)); + } + } + } } } } diff --git a/src/lq_feedback_solver.cpp b/src/lq_feedback_solver.cpp index 48dbfcd3..07f1c275 100644 --- a/src/lq_feedback_solver.cpp +++ b/src/lq_feedback_solver.cpp @@ -180,20 +180,29 @@ std::vector LQFeedbackSolver::Solve( // Update Zs and zetas. for (PlayerIndex ii = 0; ii < dynamics_->NumPlayers(); ii++) { - zetas_[kk][ii] = - (F_.transpose() * (zetas_[kk + 1][ii] + Zs_[kk + 1][ii] * beta_) + - quad[ii].state.grad) - .eval(); - Zs_[kk][ii] = - (F_.transpose() * Zs_[kk + 1][ii] * F_ + quad[ii].state.hess).eval(); - - // Add terms for nonzero Rijs. - for (const auto& Rij_entry : quad[ii].control) { - const PlayerIndex jj = Rij_entry.first; - const MatrixXf& Rij = Rij_entry.second.hess; - const VectorXf& rij = Rij_entry.second.grad; - zetas_[kk][ii] += Ps_[jj].transpose() * (Rij * alphas_[jj] - rij); - Zs_[kk][ii] += Ps_[jj].transpose() * Rij * Ps_[jj]; + if (!player_costs_ || !(*player_costs_)[ii].IsReachAvoid() || + (*critical_times_)[kk][ii] == CriticalTimeType::NOT_CRITICAL) { + zetas_[kk][ii] = + (F_.transpose() * (zetas_[kk + 1][ii] + Zs_[kk + 1][ii] * beta_) + + quad[ii].state.grad) + .eval(); + Zs_[kk][ii] = + (F_.transpose() * Zs_[kk + 1][ii] * F_ + quad[ii].state.hess) + .eval(); + + // Add terms for nonzero Rijs. + for (const auto& Rij_entry : quad[ii].control) { + const PlayerIndex jj = Rij_entry.first; + const MatrixXf& Rij = Rij_entry.second.hess; + const VectorXf& rij = Rij_entry.second.grad; + zetas_[kk][ii] += Ps_[jj].transpose() * (Rij * alphas_[jj] - rij); + Zs_[kk][ii] += Ps_[jj].transpose() * Rij * Ps_[jj]; + } + } else { + // Time-consistent reachability backup and this player doesn't care + // about later times. + zetas_[kk][ii] = quad[ii].state.grad; + Zs_[kk][ii] = quad[ii].state.hess; } } } diff --git a/src/one_player_reach_avoid_example.cpp b/src/one_player_reach_avoid_example.cpp new file mode 100644 index 00000000..68d8fb50 --- /dev/null +++ b/src/one_player_reach_avoid_example.cpp @@ -0,0 +1,176 @@ +/* + * Copyright (c) 2021, The Regents of the University of California (Regents). + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials provided + * with the distribution. + * + * 3. Neither the name of the copyright holder nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS AS IS + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + * Please contact the author(s) of this library if you have any questions. + * Authors: David Fridovich-Keil ( dfk@utexas.edu ) + * Jaime Fisac ( jfisac@princeton.edu ) + */ + +/////////////////////////////////////////////////////////////////////////////// +// +// One player reach-avoid example. Single player choosing control reach target +// ball while staying clear of failure balls. +// +/////////////////////////////////////////////////////////////////////////////// + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +// Initial state command-line flags. +DEFINE_double(px0, 0.0, "Initial x-position (m)."); +DEFINE_double(py0, 0.0, "Initial y-position (m)."); +DEFINE_double(theta0, 0.1, "Initial heading (rad)."); +DEFINE_double(v0, 10.0, "Initial speed (m/s)."); + +namespace ilqgames { + +namespace { + +// Target/failure radii. +static constexpr float kTargetRadius = 2.0; // m +static constexpr float kFailureRadius = 3.0; // m + +// Input cost weight. +static constexpr float kControlCostWeight = 1e-4; + +// Speed. +static constexpr float kInterAxleDistance = 4.0; // m + +// Target position. +static constexpr float kP1TargetX = 50.0; +static constexpr float kP1TargetY = 0.0; + +// Obstacle position. +static constexpr float kP1FailureX = 25.0; +static constexpr float kP1FailureY = 0.0; + +// State dimensions. +using P1 = SinglePlayerCar5D; + +static const Dimension kP1XIdx = P1::kPxIdx; +static const Dimension kP1YIdx = P1::kPyIdx; +static const Dimension kP1ThetaIdx = P1::kThetaIdx; +static const Dimension kP1PhiIdx = P1::kPhiIdx; +static const Dimension kP1VIdx = P1::kVIdx; + +// Control dimensions. +static const Dimension kP1OmegaIdx = P1::kOmegaIdx; +static const Dimension kP1AIdx = P1::kAIdx; + +} // anonymous namespace + +void OnePlayerReachAvoidExample::ConstructDynamics() { + dynamics_.reset(new ConcatenatedDynamicalSystem( + {std::make_shared(kInterAxleDistance)})); +} + +void OnePlayerReachAvoidExample::ConstructInitialState() { + // Set up initial state. + x0_ = VectorXf::Zero(dynamics_->XDim()); + x0_(kP1XIdx) = FLAGS_px0; + x0_(kP1YIdx) = FLAGS_py0; + x0_(kP1ThetaIdx) = FLAGS_theta0; + x0_(kP1VIdx) = FLAGS_v0; +} + +void OnePlayerReachAvoidExample::ConstructPlayerCosts() { + // Set up costs for all players. + player_costs_.emplace_back("P1"); + auto& p1_cost = player_costs_[0]; + + const auto control_cost = std::make_shared( + kControlCostWeight, -1, 0.0, "ControlCost"); + p1_cost.AddControlCost(0, control_cost); + + // // Constrain control effort. + // const auto p1_omega_max_constraint = + // std::make_shared(kP1OmegaIdx, kOmegaMax, + // true, + // "Input Constraint (Max)"); + // const auto p1_omega_min_constraint = + // std::make_shared( + // kP1OmegaIdx, -kOmegaMax, false, "Input Constraint (Min)"); + // p1_cost.AddControlConstraint(0, p1_omega_max_constraint); + // p1_cost.AddControlConstraint(0, p1_omega_min_constraint); + + // Target cost. + const Polyline2 target = + DrawCircle(Point2(kP1TargetX, kP1TargetY), kTargetRadius, 10); + const std::shared_ptr p1_target_cost( + new Polyline2SignedDistanceCost(target, {kP1XIdx, kP1YIdx}, 0.0, true)); + p1_cost.SetTargetStateCost(std::shared_ptr( + new ExtremeValueCost({p1_target_cost}, true, "Target"))); + + // Failure cost. + const Polyline2 failure = + DrawCircle(Point2(kP1FailureX, kP1FailureY), kFailureRadius, 10); + const std::shared_ptr p1_failure_cost( + new Polyline2SignedDistanceCost(failure, {kP1XIdx, kP1YIdx}, 0.0, false)); + p1_cost.SetFailureStateCost(std::shared_ptr( + new ExtremeValueCost({p1_failure_cost}, false, "Failure"))); + + // Make sure costs are reach-avoid. + p1_cost.SetReachAvoid(); +} + +inline std::vector OnePlayerReachAvoidExample::Xs( + const VectorXf& x) const { + return {x(kP1XIdx)}; +} + +inline std::vector OnePlayerReachAvoidExample::Ys( + const VectorXf& x) const { + return {x(kP1YIdx)}; +} + +inline std::vector OnePlayerReachAvoidExample::Thetas( + const VectorXf& x) const { + return {x(kP1ThetaIdx)}; +} + +} // namespace ilqgames diff --git a/src/player_cost.cpp b/src/player_cost.cpp index 633e867e..f6259273 100644 --- a/src/player_cost.cpp +++ b/src/player_cost.cpp @@ -110,6 +110,16 @@ void PlayerCost::AddStateCost(const std::shared_ptr& cost) { state_costs_.emplace_back(cost); } +void PlayerCost::SetTargetStateCost( + const std::shared_ptr& cost) { + target_state_cost_ = cost; +} + +void PlayerCost::SetFailureStateCost( + const std::shared_ptr& cost) { + failure_state_cost_ = cost; +} + void PlayerCost::AddControlCost(PlayerIndex idx, const std::shared_ptr& cost) { control_costs_.emplace(idx, cost); @@ -143,13 +153,24 @@ float PlayerCost::Evaluate(Time t, const VectorXf& x, return total_cost; } +float PlayerCost::EvaluateTargetCost(Time t, const VectorXf& x) const { + return (target_state_cost_) ? target_state_cost_->Evaluate(t, x) + : constants::kInfinity; +} + +float PlayerCost::EvaluateFailureCost(Time t, const VectorXf& x) const { + return (failure_state_cost_) ? failure_state_cost_->Evaluate(t, x) + : -constants::kInfinity; +} + float PlayerCost::Evaluate(const OperatingPoint& op, Time time_step) const { float cost = 0.0; - if (IsMinOverTime()) - cost = constants::kInfinity; - else if (IsMaxOverTime()) + if (IsMaxOverTime()) cost = -constants::kInfinity; + else if (IsMinOverTime() || IsReachAvoid()) + cost = constants::kInfinity; + float reach_avoid_running_worst_failure_cost = -constants::kInfinity; for (size_t kk = 0; kk < op.xs.size(); kk++) { const Time t = op.t0 + time_step * static_cast(kk); const float instantaneous_cost = Evaluate(t, op.xs[kk], op.us[kk]); @@ -158,8 +179,15 @@ float PlayerCost::Evaluate(const OperatingPoint& op, Time time_step) const { cost += instantaneous_cost; else if (IsMinOverTime()) cost = std::min(cost, instantaneous_cost); - else + else if (IsMaxOverTime()) cost = std::max(cost, instantaneous_cost); + else { + reach_avoid_running_worst_failure_cost = + std::max(reach_avoid_running_worst_failure_cost, + EvaluateFailureCost(t, op.xs[kk])); + cost = std::min(cost, std::max(reach_avoid_running_worst_failure_cost, + EvaluateTargetCost(t, op.xs[kk]))); + } } return cost; diff --git a/src/problem.cpp b/src/problem.cpp index 98780a41..d1e76b40 100644 --- a/src/problem.cpp +++ b/src/problem.cpp @@ -59,8 +59,6 @@ namespace ilqgames { -Problem::Problem() : initialized_(false) {} - size_t Problem::SyncToExistingProblem(const VectorXf& x0, Time t0, Time planner_runtime, OperatingPoint& op) {