ilqgames
A new real-time solver for large-scale differential games.
ilq_solver.h
1 /*
2  * Copyright (c) 2019, The Regents of the University of California (Regents).
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions are
7  * met:
8  *
9  * 1. Redistributions of source code must retain the above copyright
10  * notice, this list of conditions and the following disclaimer.
11  *
12  * 2. Redistributions in binary form must reproduce the above
13  * copyright notice, this list of conditions and the following
14  * disclaimer in the documentation and/or other materials provided
15  * with the distribution.
16  *
17  * 3. Neither the name of the copyright holder nor the names of its
18  * contributors may be used to endorse or promote products derived
19  * from this software without specific prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS AS IS
22  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
25  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31  * POSSIBILITY OF SUCH DAMAGE.
32  *
33  * Please contact the author(s) of this library if you have any questions.
34  * Authors: David Fridovich-Keil ( dfk@eecs.berkeley.edu )
35  */
36 
37 ///////////////////////////////////////////////////////////////////////////////
38 //
39 // Base class for all iterative LQ game solvers. Derives from FeedbackSolver.
40 //
41 ///////////////////////////////////////////////////////////////////////////////
42 
43 #ifndef ILQGAMES_SOLVER_ILQ_SOLVER_H
44 #define ILQGAMES_SOLVER_ILQ_SOLVER_H
45 
46 #include <ilqgames/cost/player_cost.h>
47 #include <ilqgames/dynamics/multi_player_dynamical_system.h>
48 #include <ilqgames/solver/game_solver.h>
49 #include <ilqgames/solver/lq_feedback_solver.h>
50 #include <ilqgames/solver/lq_solver.h>
51 #include <ilqgames/solver/solver_params.h>
52 #include <ilqgames/utils/linear_dynamics_approximation.h>
53 #include <ilqgames/utils/operating_point.h>
54 #include <ilqgames/utils/quadratic_cost_approximation.h>
55 #include <ilqgames/utils/solver_log.h>
56 #include <ilqgames/utils/strategy.h>
57 #include <ilqgames/utils/types.h>
58 
59 #include <glog/logging.h>
60 #include <limits>
61 #include <memory>
62 #include <vector>
63 
64 namespace ilqgames {
65 
66 class ILQSolver : public GameSolver {
67  public:
68  virtual ~ILQSolver() {}
69  ILQSolver(const std::shared_ptr<Problem>& problem,
70  const SolverParams& params = SolverParams())
71  : GameSolver(problem, params),
72  linearization_(time::kNumTimeSteps),
73  cost_quadraticization_(time::kNumTimeSteps),
74  last_merit_function_value_(constants::kInfinity),
75  expected_decrease_(constants::kInfinity) {
76  // Set up LQ solver.
77  if (params_.open_loop)
78  lq_solver_.reset(
79  new LQOpenLoopSolver(problem_->Dynamics(), time::kNumTimeSteps));
80  else
81  lq_solver_.reset(
82  new LQFeedbackSolver(problem_->Dynamics(), time::kNumTimeSteps));
83 
84  // If this system is flat then compute the linearization once, now.
85  if (problem_->Dynamics()->TreatAsLinear())
86  ComputeLinearization(&linearization_);
87 
88  // Prepopulate quadraticization.
89  for (auto& quads : cost_quadraticization_)
90  quads.resize(problem_->Dynamics()->NumPlayers(),
91  QuadraticCostApproximation(problem_->Dynamics()->XDim()));
92 
93  // Set last quadraticization to current, to start.
94  last_cost_quadraticization_ = cost_quadraticization_;
95  }
96 
97  // Solve this game. Returns true if converged.
98  virtual std::shared_ptr<SolverLog> Solve(
99  bool* success = nullptr,
100  Time max_runtime = std::numeric_limits<Time>::infinity());
101 
102  // Accessors.
103  // NOTE: these should be primarily used by higher-level solvers.
104  std::vector<std::vector<QuadraticCostApproximation>>* Quadraticization() {
105  return &cost_quadraticization_;
106  }
107 
108  std::vector<LinearDynamicsApproximation>* Linearization() {
109  return &linearization_;
110  }
111 
112  protected:
113  // Modify LQ strategies to improve convergence properties.
114  // This function performs an Armijo linesearch and returns true if successful.
115  bool ModifyLQStrategies(const std::vector<VectorXf>& delta_xs,
116  const std::vector<std::vector<VectorXf>>& costates,
117  std::vector<Strategy>* strategies,
118  OperatingPoint* current_operating_point,
119  bool* has_converged);
120 
121  // Compute distance (infinity norm) between states in the given dimensions.
122  // If dimensions empty, checks all dimensions.
123  float StateDistance(const VectorXf& x1, const VectorXf& x2,
124  const std::vector<Dimension>& dims) const;
125 
126  // Check if solver has converged.
127  virtual bool HasConverged(float current_merit_function_value) const {
128  return (last_merit_function_value_ - current_merit_function_value) <
129  params_.convergence_tolerance;
130  }
131 
132  // Compute overall costs and set times of extreme costs.
133  void TotalCosts(const OperatingPoint& current_op,
134  std::vector<float>* total_costs) const;
135 
136  // Armijo condition check. Returns true if the new operating point satisfies
137  // the Armijo condition, and also returns current merit function value.
138  bool CheckArmijoCondition(float current_merit_function_value,
139  float current_stepsize) const;
140 
141  // Compute current merit function value. Note that to compute the merit
142  // function at the given operating point we have to compute a full cost
143  // quadraticization there. To do so efficiently, this will overwrite the
144  // current cost quadraticization (and presume it has already been used to
145  // compute the expected decrease from the last iterate).
146  float MeritFunction(const OperatingPoint& current_op);
147 
148  // Compute expected decrease based on current cost quadraticization,
149  // (player-indexed) strategies, and (time-indexed) lists of delta states and
150  // (also player-indexed) costates.
151  float ExpectedDecrease(
152  const std::vector<Strategy>& strategies,
153  const std::vector<VectorXf>& delta_xs,
154  const std::vector<std::vector<VectorXf>>& costates) const;
155 
156  // Compute the current operating point based on the current set of
157  // strategies and the last operating point.
158  void CurrentOperatingPoint(const OperatingPoint& last_operating_point,
159  const std::vector<Strategy>& current_strategies,
160  OperatingPoint* current_operating_point) const;
161 
162  // Populate the given vector with a linearization of the dynamics about
163  // the given operating point. Provide version with no operating point for use
164  // with feedback linearizable systems.
165  void ComputeLinearization(
166  const OperatingPoint& op,
167  std::vector<LinearDynamicsApproximation>* linearization);
168  void ComputeLinearization(
169  std::vector<LinearDynamicsApproximation>* linearization);
170 
171  // Compute the quadratic cost approximation at the given operating point.
172  void ComputeCostQuadraticization(
173  const OperatingPoint& op,
174  std::vector<std::vector<QuadraticCostApproximation>>* q);
175 
176  // Linearization and quadraticization. Both are time-indexed (and
177  // quadraticizations' inner vector is indexed by player). Also keep track of
178  // the quadraticization from last iteration.
179  std::vector<LinearDynamicsApproximation> linearization_;
180  std::vector<std::vector<QuadraticCostApproximation>> cost_quadraticization_;
181  std::vector<std::vector<QuadraticCostApproximation>>
182  last_cost_quadraticization_;
183 
184  // Core LQ Solver.
185  std::unique_ptr<LQSolver> lq_solver_;
186 
187  // Last merit function value and expected decreases (per step length).
188  float last_merit_function_value_;
189  float expected_decrease_;
190 }; // class ILQSolver
191 
192 } // namespace ilqgames
193 
194 #endif