ilqgames
A new real-time solver for large-scale differential games.
lq_feedback_solver.cpp
1 /*
2  * Copyright (c) 2019, The Regents of the University of California (Regents).
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions are
7  * met:
8  *
9  * 1. Redistributions of source code must retain the above copyright
10  * notice, this list of conditions and the following disclaimer.
11  *
12  * 2. Redistributions in binary form must reproduce the above
13  * copyright notice, this list of conditions and the following
14  * disclaimer in the documentation and/or other materials provided
15  * with the distribution.
16  *
17  * 3. Neither the name of the copyright holder nor the names of its
18  * contributors may be used to endorse or promote products derived
19  * from this software without specific prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS AS IS
22  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
25  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31  * POSSIBILITY OF SUCH DAMAGE.
32  *
33  * Please contact the author(s) of this library if you have any questions.
34  * Authors: David Fridovich-Keil ( dfk@eecs.berkeley.edu )
35  */
36 
37 ///////////////////////////////////////////////////////////////////////////////
38 //
39 // Core LQ game solver from Basar and Olsder, "Preliminary Notation for
40 // Corollary 6.1" (pp. 279). All notation matches the text, though we
41 // shall assume that `c` (additive drift in dynamics) is always `0`, which
42 // holds because these dynamics are for delta x, delta us.
43 //
44 // Solve a time-varying, finite horizon LQ game (finds closed-loop Nash
45 // feedback strategies for both players).
46 //
47 // Assumes that dynamics are given by
48 // ``` dx_{k+1} = A_k dx_k + \sum_i Bs[i]_k du[i]_k ```
49 //
50 // NOTE: Bs, Qs, ls, R1s, R2s are all lists of lists of matrices.
51 // NOTE: all indices of inner lists correspond to the "current time" k except
52 // for those of the Qs, which correspond to the "next time" k+1. That is,
53 // the kth entry of Qs[i] is the state cost corresponding to time step k+1. This
54 // makes sense because there is no point assigning any state cost to the
55 // initial state x_0.
56 //
57 // Returns strategies Ps, alphas.
58 //
59 ///////////////////////////////////////////////////////////////////////////////
60 
61 #include <ilqgames/solver/lq_feedback_solver.h>
62 #include <ilqgames/utils/linear_dynamics_approximation.h>
63 #include <ilqgames/utils/quadratic_cost_approximation.h>
64 #include <ilqgames/utils/strategy.h>
65 
66 #include <glog/logging.h>
67 #include <vector>
68 
69 namespace ilqgames {
70 
71 std::vector<Strategy> LQFeedbackSolver::Solve(
72  const std::vector<LinearDynamicsApproximation>& linearization,
73  const std::vector<std::vector<QuadraticCostApproximation>>&
74  quadraticization,
75  const VectorXf& x0, std::vector<VectorXf>* delta_xs,
76  std::vector<std::vector<VectorXf>>* costates) {
77  CHECK_EQ(linearization.size(), num_time_steps_);
78  CHECK_EQ(quadraticization.size(), num_time_steps_);
79 
80  // Make sure delta_xs and costates are the right size.
81  if (delta_xs) CHECK_NOTNULL(costates);
82  if (costates) CHECK_NOTNULL(delta_xs);
83  if (delta_xs) {
84  delta_xs->resize(num_time_steps_);
85  costates->resize(num_time_steps_);
86  for (size_t kk = 0; kk < num_time_steps_; kk++) {
87  (*delta_xs)[kk].resize(dynamics_->XDim());
88  (*costates)[kk].resize(dynamics_->NumPlayers());
89  for (PlayerIndex ii = 0; ii < dynamics_->NumPlayers(); ii++)
90  (*costates)[kk][ii].resize(dynamics_->XDim());
91  }
92  }
93 
94  // List of player-indexed strategies (each of which is a time-indexed
95  // affine state error-feedback controller).
96  std::vector<Strategy> strategies;
97  for (PlayerIndex ii = 0; ii < dynamics_->NumPlayers(); ii++)
98  strategies.emplace_back(num_time_steps_, dynamics_->XDim(),
99  dynamics_->UDim(ii));
100 
101  // Initialize Zs and zetas at the final time.
102  for (PlayerIndex ii = 0; ii < dynamics_->NumPlayers(); ii++) {
103  Zs_[num_time_steps_ - 1][ii] = quadraticization.back()[ii].state.hess;
104  zetas_[num_time_steps_ - 1][ii] = quadraticization.back()[ii].state.grad;
105  }
106 
107  // Work backward in time and solve the dynamic program.
108  // NOTE: time starts from the second-to-last entry since we'll treat the final
109  // entry as a terminal cost as in Basar and Olsder, ch. 6.
110  for (int kk = num_time_steps_ - 2; kk >= 0; kk--) {
111  // Unpack linearization and quadraticization at this time step.
112  const auto& lin = linearization[kk];
113  const auto& quad = quadraticization[kk];
114 
115  // Populate coupling matrix S for linear matrix equation to determine X (Ps
116  // and alphas).
117  // NOTE: S is generally dense and asymmetric, though it is symmetric if all
118  // players have the same Z.
119  Dimension cumulative_udim_row = 0;
120  for (PlayerIndex ii = 0; ii < dynamics_->NumPlayers(); ii++) {
121  // // Check Nash existence condition (sufficient, not necessary).
122  // Eigen::LLT<MatrixXf> llt(quad[ii].control.find(ii)->second.hess +
123  // lin.Bs[ii].transpose() * Zs_[ii] *
124  // lin.Bs[ii]);
125  // CHECK(llt.info() != Eigen::NumericalIssue);
126 
127  // Intermediate variable to store B[ii]' * Z[ii].
128  const MatrixXf BiZi = lin.Bs[ii].transpose() * Zs_[kk + 1][ii];
129 
130  Dimension cumulative_udim_col = 0;
131  for (PlayerIndex jj = 0; jj < dynamics_->NumPlayers(); jj++) {
132  Eigen::Ref<MatrixXf> S_block =
133  S_.block(cumulative_udim_row, cumulative_udim_col,
134  dynamics_->UDim(ii), dynamics_->UDim(jj));
135 
136  if (ii == jj) {
137  // Does player ii's cost depend upon player jj's control?
138  const auto control_iter = quad[ii].control.find(ii);
139  CHECK(control_iter != quad[ii].control.end())
140  << "Player " << ii << " is missing a control Hessian.";
141 
142  S_block = BiZi * lin.Bs[ii] + control_iter->second.hess;
143  } else {
144  S_block = BiZi * lin.Bs[jj];
145  }
146 
147  // Increment cumulative_udim_col.
148  cumulative_udim_col += dynamics_->UDim(jj);
149  }
150 
151  // Set appropriate blocks of Y.
152  Y_.block(cumulative_udim_row, 0, dynamics_->UDim(ii), dynamics_->XDim()) =
153  BiZi * lin.A;
154  Y_.col(dynamics_->XDim())
155  .segment(cumulative_udim_row, dynamics_->UDim(ii)) =
156  lin.Bs[ii].transpose() * zetas_[kk + 1][ii] +
157  quad[ii].control.at(ii).grad;
158 
159  // Increment cumulative_udim_row.
160  cumulative_udim_row += dynamics_->UDim(ii);
161  }
162 
163  // Solve linear matrix equality S X = Y.
164  // NOTE: not 100% sure that this avoids dynamic memory allocation.
165  X_ = S_.householderQr().solve(Y_);
166 
167  // Set strategy at current time step.
168  for (PlayerIndex ii = 0; ii < dynamics_->NumPlayers(); ii++) {
169  strategies[ii].Ps[kk] = Ps_[ii];
170  strategies[ii].alphas[kk] = alphas_[ii];
171  }
172 
173  // Compute F and beta.
174  F_ = lin.A;
175  beta_ = VectorXf::Zero(dynamics_->XDim());
176  for (PlayerIndex ii = 0; ii < dynamics_->NumPlayers(); ii++) {
177  F_ -= lin.Bs[ii] * Ps_[ii];
178  beta_ -= lin.Bs[ii] * alphas_[ii];
179  }
180 
181  // Update Zs and zetas.
182  for (PlayerIndex ii = 0; ii < dynamics_->NumPlayers(); ii++) {
183  zetas_[kk][ii] =
184  (F_.transpose() * (zetas_[kk + 1][ii] + Zs_[kk + 1][ii] * beta_) +
185  quad[ii].state.grad)
186  .eval();
187  Zs_[kk][ii] =
188  (F_.transpose() * Zs_[kk + 1][ii] * F_ + quad[ii].state.hess).eval();
189 
190  // Add terms for nonzero Rijs.
191  for (const auto& Rij_entry : quad[ii].control) {
192  const PlayerIndex jj = Rij_entry.first;
193  const MatrixXf& Rij = Rij_entry.second.hess;
194  const VectorXf& rij = Rij_entry.second.grad;
195  zetas_[kk][ii] += Ps_[jj].transpose() * (Rij * alphas_[jj] - rij);
196  Zs_[kk][ii] += Ps_[jj].transpose() * Rij * Ps_[jj];
197  }
198  }
199  }
200 
201  // Maybe compute delta_xs and costates forward in time.
202  if (delta_xs) {
203  VectorXf x_star = x0;
204  VectorXf last_x_star;
205  for (size_t kk = 0; kk < num_time_steps_; kk++) {
206  (*delta_xs)[kk] = x_star;
207  for (PlayerIndex ii = 0; ii < dynamics_->NumPlayers(); ii++) {
208  if (kk < num_time_steps_ - 1)
209  (*costates)[kk][ii] = -Zs_[kk + 1][ii] * x_star - zetas_[kk + 1][ii];
210  else
211  (*costates)[kk][ii].setZero();
212  }
213 
214  // Unpack linearization at this time step.
215  const auto& lin = linearization[kk];
216 
217  // Compute optimal x.
218  last_x_star = x_star;
219  x_star = lin.A * last_x_star;
220  for (PlayerIndex ii = 0; ii < dynamics_->NumPlayers(); ii++)
221  x_star -= lin.Bs[ii] * strategies[ii].alphas[kk];
222  }
223  }
224 
225  return strategies;
226 }
227 
228 } // namespace ilqgames