ilqgames
A new real-time solver for large-scale differential games.
lq_feedback_solver.h
1 /*
2  * Copyright (c) 2019, The Regents of the University of California (Regents).
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions are
7  * met:
8  *
9  * 1. Redistributions of source code must retain the above copyright
10  * notice, this list of conditions and the following disclaimer.
11  *
12  * 2. Redistributions in binary form must reproduce the above
13  * copyright notice, this list of conditions and the following
14  * disclaimer in the documentation and/or other materials provided
15  * with the distribution.
16  *
17  * 3. Neither the name of the copyright holder nor the names of its
18  * contributors may be used to endorse or promote products derived
19  * from this software without specific prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS AS IS
22  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
25  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31  * POSSIBILITY OF SUCH DAMAGE.
32  *
33  * Please contact the author(s) of this library if you have any questions.
34  * Authors: David Fridovich-Keil ( dfk@eecs.berkeley.edu )
35  */
36 
37 ///////////////////////////////////////////////////////////////////////////////
38 //
39 // Core LQ game solver from Basar and Olsder, "Preliminary Notation for
40 // Corollary 6.1" (pp. 279). All notation matches the text, though we
41 // shall assume that `c` (additive drift in dynamics) is always `0`, which
42 // holds because these dynamics are for delta x, delta us.
43 // Also, we have modified terms slightly to account for linear terms in the
44 // stage cost for control, i.e.
45 // control penalty i = 0.5 \sum_j du_j^T R_ij (du_j + 2 r_ij)
46 //
47 // Solve a time-varying, finite horizon LQ game (finds closed-loop Nash
48 // feedback strategies for both players).
49 //
50 // Assumes that dynamics are given by
51 // ``` dx_{k+1} = A_k dx_k + \sum_i Bs[i]_k du[i]_k ```
52 //
53 // Returns strategies Ps, alphas.
54 //
55 ///////////////////////////////////////////////////////////////////////////////
56 
57 #ifndef ILQGAMES_SOLVER_LQ_FEEDBACK_SOLVER_H
58 #define ILQGAMES_SOLVER_LQ_FEEDBACK_SOLVER_H
59 
60 #include <ilqgames/dynamics/multi_player_integrable_system.h>
61 #include <ilqgames/solver/lq_solver.h>
62 #include <ilqgames/utils/linear_dynamics_approximation.h>
63 #include <ilqgames/utils/quadratic_cost_approximation.h>
64 #include <ilqgames/utils/strategy.h>
65 
66 #include <vector>
67 
68 namespace ilqgames {
69 
70 class LQFeedbackSolver : public LQSolver {
71  public:
72  ~LQFeedbackSolver() {}
74  const std::shared_ptr<const MultiPlayerIntegrableSystem>& dynamics,
75  size_t num_time_steps)
76  : LQSolver(dynamics, num_time_steps) {
77  // Cache the total number of control dimensions, since this is inefficient
78  // to compute.
79  const Dimension total_udim = dynamics_->TotalUDim();
80 
81  // Preallocate memory for coupled Riccati solve at each time step and make
82  // Eigen::Refs to the solution.
83  S_.resize(total_udim, total_udim);
84  X_.resize(total_udim, dynamics_->XDim() + 1);
85  Y_.resize(total_udim, dynamics_->XDim() + 1);
86 
87  Dimension cumulative_udim = 0;
88  for (PlayerIndex ii = 0; ii < dynamics_->NumPlayers(); ii++) {
89  Ps_.push_back(
90  X_.block(cumulative_udim, 0, dynamics_->UDim(ii), dynamics_->XDim()));
91  alphas_.push_back(X_.col(dynamics_->XDim())
92  .segment(cumulative_udim, dynamics_->UDim(ii)));
93 
94  // Increment cumulative_udim.
95  cumulative_udim += dynamics_->UDim(ii);
96  }
97 
98  // Initialize Zs and zetas for each time and player. Note that we need to
99  // store over all time to compute optimal costates if desired.
100  Zs_.resize(num_time_steps_);
101  zetas_.resize(num_time_steps_);
102  for (size_t kk = 0; kk < num_time_steps_; kk++) {
103  Zs_[kk].resize(dynamics_->NumPlayers());
104  zetas_[kk].resize(dynamics_->NumPlayers());
105  for (PlayerIndex ii = 0; ii < dynamics_->NumPlayers(); ii++) {
106  Zs_[kk][ii].resize(dynamics_->XDim(), dynamics_->XDim());
107  zetas_[kk][ii].resize(dynamics_->XDim());
108  }
109  }
110 
111  // Preallocate memory for intermediate variables F, beta.
112  F_.resize(dynamics_->XDim(), dynamics_->XDim());
113  beta_.resize(dynamics_->XDim());
114  }
115 
116  // Solve underlying LQ game to a feedback Nash equilibrium.
117  // Optionally return delta xs and costates.
118  std::vector<Strategy> Solve(
119  const std::vector<LinearDynamicsApproximation>& linearization,
120  const std::vector<std::vector<QuadraticCostApproximation>>&
121  quadraticization,
122  const VectorXf& x0, std::vector<VectorXf>* delta_xs = nullptr,
123  std::vector<std::vector<VectorXf>>* costates = nullptr);
124 
125  private:
126  // Quadratic/linear components of value function at the current time step in
127  // the dynamic program.
128  // NOTE: since these will be computed by solving a big
129  // linear matrix equation S [Ps, alphas] = [YPs, Yalphas] (i.e., S X = Y), we
130  // will pre-allocate the memory for that equation and define these components
131  // as Eigen::Refs.
132  MatrixXf S_, X_, Y_;
133  std::vector<Eigen::Ref<MatrixXf>> Ps_;
134  std::vector<Eigen::Ref<VectorXf>> alphas_;
135 
136  // Initialize Zs and zetas for each time and player.
137  std::vector<std::vector<MatrixXf>> Zs_;
138  std::vector<std::vector<VectorXf>> zetas_;
139 
140  // Preallocate memory for intermediate variables F, beta.
141  MatrixXf F_;
142  VectorXf beta_;
143 }; // LQFeedbackSolver
144 
145 } // namespace ilqgames
146 
147 #endif