61 #include <ilqgames/solver/lq_feedback_solver.h> 62 #include <ilqgames/utils/linear_dynamics_approximation.h> 63 #include <ilqgames/utils/quadratic_cost_approximation.h> 64 #include <ilqgames/utils/strategy.h> 66 #include <glog/logging.h> 71 std::vector<Strategy> LQFeedbackSolver::Solve(
72 const std::vector<LinearDynamicsApproximation>& linearization,
73 const std::vector<std::vector<QuadraticCostApproximation>>&
75 const VectorXf& x0, std::vector<VectorXf>* delta_xs,
76 std::vector<std::vector<VectorXf>>* costates) {
77 CHECK_EQ(linearization.size(), num_time_steps_);
78 CHECK_EQ(quadraticization.size(), num_time_steps_);
81 if (delta_xs) CHECK_NOTNULL(costates);
82 if (costates) CHECK_NOTNULL(delta_xs);
84 delta_xs->resize(num_time_steps_);
85 costates->resize(num_time_steps_);
86 for (
size_t kk = 0; kk < num_time_steps_; kk++) {
87 (*delta_xs)[kk].resize(dynamics_->XDim());
88 (*costates)[kk].resize(dynamics_->NumPlayers());
89 for (PlayerIndex ii = 0; ii < dynamics_->NumPlayers(); ii++)
90 (*costates)[kk][ii].resize(dynamics_->XDim());
96 std::vector<Strategy> strategies;
97 for (PlayerIndex ii = 0; ii < dynamics_->NumPlayers(); ii++)
98 strategies.emplace_back(num_time_steps_, dynamics_->XDim(),
102 for (PlayerIndex ii = 0; ii < dynamics_->NumPlayers(); ii++) {
103 Zs_[num_time_steps_ - 1][ii] = quadraticization.back()[ii].state.hess;
104 zetas_[num_time_steps_ - 1][ii] = quadraticization.back()[ii].state.grad;
110 for (
int kk = num_time_steps_ - 2; kk >= 0; kk--) {
112 const auto& lin = linearization[kk];
113 const auto& quad = quadraticization[kk];
119 Dimension cumulative_udim_row = 0;
120 for (PlayerIndex ii = 0; ii < dynamics_->NumPlayers(); ii++) {
128 const MatrixXf BiZi = lin.Bs[ii].transpose() * Zs_[kk + 1][ii];
130 Dimension cumulative_udim_col = 0;
131 for (PlayerIndex jj = 0; jj < dynamics_->NumPlayers(); jj++) {
132 Eigen::Ref<MatrixXf> S_block =
133 S_.block(cumulative_udim_row, cumulative_udim_col,
134 dynamics_->UDim(ii), dynamics_->UDim(jj));
138 const auto control_iter = quad[ii].control.find(ii);
139 CHECK(control_iter != quad[ii].control.end())
140 <<
"Player " << ii <<
" is missing a control Hessian.";
142 S_block = BiZi * lin.Bs[ii] + control_iter->second.hess;
144 S_block = BiZi * lin.Bs[jj];
148 cumulative_udim_col += dynamics_->UDim(jj);
152 Y_.block(cumulative_udim_row, 0, dynamics_->UDim(ii), dynamics_->XDim()) =
154 Y_.col(dynamics_->XDim())
155 .segment(cumulative_udim_row, dynamics_->UDim(ii)) =
156 lin.Bs[ii].transpose() * zetas_[kk + 1][ii] +
157 quad[ii].control.at(ii).grad;
160 cumulative_udim_row += dynamics_->UDim(ii);
165 X_ = S_.householderQr().solve(Y_);
168 for (PlayerIndex ii = 0; ii < dynamics_->NumPlayers(); ii++) {
169 strategies[ii].Ps[kk] = Ps_[ii];
170 strategies[ii].alphas[kk] = alphas_[ii];
175 beta_ = VectorXf::Zero(dynamics_->XDim());
176 for (PlayerIndex ii = 0; ii < dynamics_->NumPlayers(); ii++) {
177 F_ -= lin.Bs[ii] * Ps_[ii];
178 beta_ -= lin.Bs[ii] * alphas_[ii];
182 for (PlayerIndex ii = 0; ii < dynamics_->NumPlayers(); ii++) {
184 (F_.transpose() * (zetas_[kk + 1][ii] + Zs_[kk + 1][ii] * beta_) +
188 (F_.transpose() * Zs_[kk + 1][ii] * F_ + quad[ii].state.hess).eval();
191 for (
const auto& Rij_entry : quad[ii].control) {
192 const PlayerIndex jj = Rij_entry.first;
193 const MatrixXf& Rij = Rij_entry.second.hess;
194 const VectorXf& rij = Rij_entry.second.grad;
195 zetas_[kk][ii] += Ps_[jj].transpose() * (Rij * alphas_[jj] - rij);
196 Zs_[kk][ii] += Ps_[jj].transpose() * Rij * Ps_[jj];
203 VectorXf x_star = x0;
204 VectorXf last_x_star;
205 for (
size_t kk = 0; kk < num_time_steps_; kk++) {
206 (*delta_xs)[kk] = x_star;
207 for (PlayerIndex ii = 0; ii < dynamics_->NumPlayers(); ii++) {
208 if (kk < num_time_steps_ - 1)
209 (*costates)[kk][ii] = -Zs_[kk + 1][ii] * x_star - zetas_[kk + 1][ii];
211 (*costates)[kk][ii].setZero();
215 const auto& lin = linearization[kk];
218 last_x_star = x_star;
219 x_star = lin.A * last_x_star;
220 for (PlayerIndex ii = 0; ii < dynamics_->NumPlayers(); ii++)
221 x_star -= lin.Bs[ii] * strategies[ii].alphas[kk];