ilqgames
A new real-time solver for large-scale differential games.
lq_open_loop_solver.cpp
1 /*
2  * Copyright (c) 2020, The Regents of the University of California (Regents).
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions are
7  * met:
8  *
9  * 1. Redistributions of source code must retain the above copyright
10  * notice, this list of conditions and the following disclaimer.
11  *
12  * 2. Redistributions in binary form must reproduce the above
13  * copyright notice, this list of conditions and the following
14  * disclaimer in the documentation and/or other materials provided
15  * with the distribution.
16  *
17  * 3. Neither the name of the copyright holder nor the names of its
18  * contributors may be used to endorse or promote products derived
19  * from this software without specific prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS AS IS
22  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
25  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31  * POSSIBILITY OF SUCH DAMAGE.
32  *
33  * Please contact the author(s) of this library if you have any questions.
34  * Authors: David Fridovich-Keil ( dfk@eecs.berkeley.edu )
35  */
36 
37 ///////////////////////////////////////////////////////////////////////////////
38 //
39 // Core open-loop LQ game solver based on Basar and Olsder, Chapter 6. All
40 // notation matches the text, though we shall assume that `c` (additive drift in
41 // dynamics) is always `0`, which holds because these dynamics are for delta x,
42 // delta us. Also, we have modified terms slightly to account for linear terms
43 // in the stage cost for control, i.e.
44 // control penalty i = 0.5 \sum_j du_j^T R_ij (du_j + 2 r_ij)
45 //
46 // Solve a time-varying, finite horizon LQ game (finds open-loop Nash
47 // feedback strategies for both players).
48 //
49 // Assumes that dynamics are given by
50 // ``` dx_{k+1} = A_k dx_k + \sum_i Bs[i]_k du[i]_k ```
51 //
52 // Returns strategies Ps, alphas. Here, all the Ps are zero (by default), and
53 // only the alphas are nonzero.
54 //
55 // Notation is based on derivation which may be found in the PDF included in
56 // this repository named "open_loop_lq_derivation.pdf".
57 //
58 ///////////////////////////////////////////////////////////////////////////////
59 
60 #include <ilqgames/dynamics/multi_player_integrable_system.h>
61 #include <ilqgames/solver/lq_feedback_solver.h>
62 #include <ilqgames/solver/lq_open_loop_solver.h>
63 #include <ilqgames/utils/linear_dynamics_approximation.h>
64 #include <ilqgames/utils/quadratic_cost_approximation.h>
65 #include <ilqgames/utils/strategy.h>
66 
67 #include <glog/logging.h>
68 #include <Eigen/Core>
69 #include <vector>
70 
71 namespace ilqgames {
72 
73 std::vector<Strategy> LQOpenLoopSolver::Solve(
74  const std::vector<LinearDynamicsApproximation>& linearization,
75  const std::vector<std::vector<QuadraticCostApproximation>>&
76  quadraticization,
77  const VectorXf& x0, std::vector<VectorXf>* delta_xs,
78  std::vector<std::vector<VectorXf>>* costates) {
79  CHECK_EQ(linearization.size(), num_time_steps_);
80  CHECK_EQ(quadraticization.size(), num_time_steps_);
81 
82  // Make sure delta_xs and costates are the right size.
83  if (delta_xs) CHECK_NOTNULL(costates);
84  if (costates) CHECK_NOTNULL(delta_xs);
85  if (delta_xs) {
86  delta_xs->resize(num_time_steps_);
87  costates->resize(num_time_steps_);
88  for (size_t kk = 0; kk < num_time_steps_; kk++) {
89  (*delta_xs)[kk].resize(dynamics_->XDim());
90  (*costates)[kk].resize(dynamics_->NumPlayers());
91  for (PlayerIndex ii = 0; ii < dynamics_->NumPlayers(); ii++)
92  (*costates)[kk][ii].resize(dynamics_->XDim());
93  }
94  }
95 
96  // List of player-indexed strategies (each of which is a time-indexed
97  // affine state error-feedback controller). Since this is an open-loop
98  // strategy, we will not change the default zero value of the P matrix.
99  std::vector<Strategy> strategies;
100  for (PlayerIndex ii = 0; ii < dynamics_->NumPlayers(); ii++)
101  strategies.emplace_back(num_time_steps_, dynamics_->XDim(),
102  dynamics_->UDim(ii));
103 
104  // Initialize m^i and M^i and index first by time and then by player.
105  for (PlayerIndex ii = 0; ii < dynamics_->NumPlayers(); ii++) {
106  ms_.back()[ii] = quadraticization.back()[ii].state.grad;
107  Ms_.back()[ii] = quadraticization.back()[ii].state.hess;
108  }
109 
110  // (1) Work backward in time and cache "special" terms.
111  // NOTE: time starts from the second-to-last entry since we'll treat the
112  // final entry as a terminal cost as in Basar and Olsder, ch. 6.
113  for (int kk = num_time_steps_ - 2; kk >= 0; kk--) {
114  // Unpack linearization and quadraticization at this time step.
115  const auto& lin = linearization[kk];
116  const auto& quad = quadraticization[kk];
117 
118  // Campute capital lambdas.
119  capital_lambdas_[kk].setIdentity();
120  for (PlayerIndex ii = 0; ii < dynamics_->NumPlayers(); ii++) {
121  const auto control_iter = quad[ii].control.find(ii);
122  CHECK(control_iter != quad[ii].control.end());
123 
124  chol_Rs_[kk][ii].compute(control_iter->second.hess);
125  warped_Bs_[kk][ii] = chol_Rs_[kk][ii].solve(lin.Bs[ii].transpose());
126  warped_rs_[kk][ii] = chol_Rs_[kk][ii].solve(control_iter->second.grad);
127  capital_lambdas_[kk] += lin.Bs[ii] * warped_Bs_[kk][ii] * Ms_[kk + 1][ii];
128  }
129 
130  // Compute inv(capital lambda).
131  qr_capital_lambdas_[kk].compute(capital_lambdas_[kk]);
132 
133  // Compute Ms and ms.
134  intermediate_terms_[kk].setZero();
135  for (PlayerIndex ii = 0; ii < dynamics_->NumPlayers(); ii++) {
136  intermediate_terms_[kk] -=
137  lin.Bs[ii] *
138  (warped_Bs_[kk][ii] * ms_[kk + 1][ii] + warped_rs_[kk][ii]);
139  }
140 
141  for (PlayerIndex ii = 0; ii < dynamics_->NumPlayers(); ii++) {
142  Ms_[kk][ii] =
143  quad[ii].state.hess + lin.A.transpose() * Ms_[kk + 1][ii] *
144  qr_capital_lambdas_[kk].solve(lin.A);
145  ms_[kk][ii] =
146  quad[ii].state.grad +
147  lin.A.transpose() * (ms_[kk + 1][ii] +
148  Ms_[kk + 1][ii] * qr_capital_lambdas_[kk].solve(
149  intermediate_terms_[kk]));
150  }
151  }
152 
153  // (2) Now compute optimal state and control trajectory forward in time.
154  VectorXf x_star = x0;
155  VectorXf last_x_star;
156  for (size_t kk = 0; kk < num_time_steps_ - 1; kk++) {
157  // Maybe set delta_x.
158  if (delta_xs) (*delta_xs)[kk] = x_star;
159 
160  // Unpack linearization at this time step.
161  const auto& lin = linearization[kk];
162 
163  // Compute optimal x.
164  last_x_star = x_star;
165  x_star = qr_capital_lambdas_[kk].solve(lin.A * last_x_star +
166  intermediate_terms_[kk]);
167 
168  // Compute optimal u and store (sign flipped) in alpha.
169  // Also maybe compute costates.
170  for (PlayerIndex ii = 0; ii < dynamics_->NumPlayers(); ii++) {
171  const VectorXf intermediate_term =
172  Ms_[kk + 1][ii] * x_star + ms_[kk + 1][ii];
173  strategies[ii].alphas[kk] =
174  warped_Bs_[kk][ii] * intermediate_term + warped_rs_[kk][ii];
175 
176  if (costates) (*costates)[kk][ii] = lin.A.transpose() * intermediate_term;
177  }
178 
179  // Check dynamic feasibility.
180  // VectorXf check_x = lin.A * last_x_star;
181  // for (PlayerIndex ii = 0; ii < dynamics_->NumPlayers(); ii++)
182  // check_x -= lin.Bs[ii] * strategies[ii].alphas[kk];
183 
184  // CHECK_LE((x_star - check_x).cwiseAbs().maxCoeff(), 1e-1);
185  }
186 
187  // Set delta_x and costate for last time step.
188  if (delta_xs) {
189  delta_xs->back() = x_star;
190  for (PlayerIndex ii = 0; ii < dynamics_->NumPlayers(); ii++)
191  costates->back()[ii].setZero();
192  }
193 
194  return strategies;
195 }
196 
197 } // namespace ilqgames