ilqgames
A new real-time solver for large-scale differential games.
ilq_solver.cpp
1 /*
2  * Copyright (c) 2019, The Regents of the University of California (Regents).
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions are
7  * met:
8  *
9  * 1. Redistributions of source code must retain the above copyright
10  * notice, this list of conditions and the following disclaimer.
11  *
12  * 2. Redistributions in binary form must reproduce the above
13  * copyright notice, this list of conditions and the following
14  * disclaimer in the documentation and/or other materials provided
15  * with the distribution.
16  *
17  * 3. Neither the name of the copyright holder nor the names of its
18  * contributors may be used to endorse or promote products derived
19  * from this software without specific prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS AS IS
22  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
25  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31  * POSSIBILITY OF SUCH DAMAGE.
32  *
33  * Please contact the author(s) of this library if you have any questions.
34  * Authors: David Fridovich-Keil ( dfk@eecs.berkeley.edu )
35  */
36 
37 ///////////////////////////////////////////////////////////////////////////////
38 //
39 // Base class for all iterative LQ game solvers.
40 // Structured so that derived classes may only modify the `ModifyLQStrategies`
41 // and `HasConverged` virtual functions.
42 //
43 ///////////////////////////////////////////////////////////////////////////////
44 
45 #include <ilqgames/cost/player_cost.h>
46 #include <ilqgames/solver/ilq_solver.h>
47 #include <ilqgames/solver/lq_solver.h>
48 #include <ilqgames/utils/linear_dynamics_approximation.h>
49 #include <ilqgames/utils/loop_timer.h>
50 #include <ilqgames/utils/operating_point.h>
51 #include <ilqgames/utils/quadratic_cost_approximation.h>
52 #include <ilqgames/utils/strategy.h>
53 #include <ilqgames/utils/types.h>
54 
55 #include <glog/logging.h>
56 #include <chrono>
57 #include <memory>
58 #include <numeric>
59 #include <vector>
60 
61 namespace ilqgames {
62 
63 namespace {
64 
65 // Multiply all alphas in a set of strategies by the given constant.
66 void ScaleAlphas(float scaling, std::vector<Strategy>* strategies) {
67  CHECK_NOTNULL(strategies);
68 
69  for (auto& strategy : *strategies) {
70  for (auto& alpha : strategy.alphas) alpha *= scaling;
71  }
72 }
73 
74 } // anonymous namespace
75 
76 std::shared_ptr<SolverLog> ILQSolver::Solve(bool* success, Time max_runtime) {
77  const auto solver_call_time = Clock::now();
78 
79  // Create a new log.
80  std::shared_ptr<SolverLog> log = CreateNewLog();
81 
82  // Last and current operating points. Make sure the last one starts from the
83  // current state so that the current one will start there as well.
84  // NOTE: setting the current operating point to start at x0 is critical to the
85  // constraint satisfaction check at the first iteration.
86  OperatingPoint last_operating_point(problem_->CurrentOperatingPoint());
87  OperatingPoint current_operating_point(problem_->CurrentOperatingPoint());
88  current_operating_point.xs[0] = problem_->InitialState();
89  last_operating_point.xs[0] = problem_->InitialState();
90 
91  // Current strategies.
92  std::vector<Strategy> current_strategies(problem_->CurrentStrategies());
93 
94  // Things to keep track of during each iteration.
95  size_t num_iterations = 0;
96  bool has_converged = false;
97 
98  // Swap operating points and compute new current operating point. Future
99  // operating points will be computed during the call to `ModifyLQStrategies`
100  // which occurs after solving the LQ game.
101  std::vector<float> total_costs;
102  last_operating_point.swap(current_operating_point);
103  CurrentOperatingPoint(last_operating_point, current_strategies,
104  &current_operating_point);
105 
106  // Compute total costs.
107  TotalCosts(current_operating_point, &total_costs);
108 
109  // Log current iterate.
110  Time elapsed = 0.0;
111  log->AddSolverIterate(current_operating_point, current_strategies,
112  total_costs, elapsed, has_converged);
113 
114  // Quadraticize costs before first iteration. Subsequent quadraticizations
115  // will happen inside the linesearch every time we compute the merit function.
116  ComputeCostQuadraticization(current_operating_point, &cost_quadraticization_);
117 
118  // Maintain delta_xs and costates for linesearch.
119  std::vector<VectorXf> delta_xs;
120  std::vector<std::vector<VectorXf>> costates;
121 
122  // Main loop with timer for anytime execution.
123  while (num_iterations < params_.max_solver_iters && !has_converged &&
124  elapsed < max_runtime - timer_.RuntimeUpperBound()) {
125  // Start loop timer.
126  timer_.Tic();
127 
128  // New iteration.
129  num_iterations++;
130 
131  // Linearize dynamics about the new operating point, only if the system
132  // can't be treated as linear from the outset, in which case we've already
133  // linearized it.
134  // NOTE: we are already computing a new quadraticization
135  // during the linesearch process.
136  if (!problem_->Dynamics()->TreatAsLinear())
137  ComputeLinearization(current_operating_point, &linearization_);
138 
139  // Solve LQ game.
140  current_strategies = lq_solver_->Solve(
141  linearization_, cost_quadraticization_,
142  problem_->InitialState() - current_operating_point.xs.front(),
143  &delta_xs, &costates);
144 
145  // Modify this LQ solution.
146  if (!ModifyLQStrategies(delta_xs, costates, &current_strategies,
147  &current_operating_point, &has_converged)) {
148  // Maybe emit warning if exiting early.
149  VLOG(1) << "Solver exited due to linesearch failure.";
150 
151  // Handle success flag.
152  if (success) *success = false;
153 
154  return log;
155  }
156 
157  // Compute total costs and check if we've converged.
158  TotalCosts(current_operating_point, &total_costs);
159 
160  // Record loop runtime.
161  elapsed += timer_.Toc();
162 
163  // Log current iterate.
164  log->AddSolverIterate(current_operating_point, current_strategies,
165  total_costs, elapsed, has_converged);
166  }
167 
168  // Handle success flag.
169  if (success) *success = true;
170 
171  return log;
172 }
173 
174 void ILQSolver::CurrentOperatingPoint(
175  const OperatingPoint& last_operating_point,
176  const std::vector<Strategy>& current_strategies,
177  OperatingPoint* current_operating_point) const {
178  CHECK_NOTNULL(current_operating_point);
179 
180  // Initialize time.
181  current_operating_point->t0 = last_operating_point.t0;
182 
183  // Integrate dynamics and populate operating point, one time step at a time.
184  VectorXf x(last_operating_point.xs[0]);
185  for (size_t kk = 0; kk < time::kNumTimeSteps; kk++) {
186  const Time t = RelativeTimeTracker::RelativeTime(kk);
187 
188  // Unpack.
189  const VectorXf delta_x = x - last_operating_point.xs[kk];
190  const auto& last_us = last_operating_point.us[kk];
191  auto& current_us = current_operating_point->us[kk];
192 
193  // Record state.
194  current_operating_point->xs[kk] = x;
195 
196  // Compute and record control for each player.
197  for (PlayerIndex jj = 0; jj < problem_->Dynamics()->NumPlayers(); jj++) {
198  const auto& strategy = current_strategies[jj];
199  current_us[jj] = strategy(kk, delta_x, last_us[jj]);
200  }
201 
202  // Integrate dynamics for one time step.
203  if (kk < time::kNumTimeSteps - 1)
204  x = problem_->Dynamics()->Integrate(t, time::kTimeStep, x, current_us);
205  }
206 }
207 
208 // bool ILQSolver::HasConverged(const OperatingPoint& last_op,
209 // const OperatingPoint& current_op) const {
210 // for (size_t kk = 0; kk < time::kNumTimeSteps; kk++) {
211 // const float delta_x_distance = StateDistance(
212 // current_op.xs[kk], last_op.xs[kk], params_.trust_region_dimensions);
213 
214 // if (delta_x_distance > params_.convergence_tolerance) return false;
215 // }
216 
217 // return true;
218 // }
219 
220 void ILQSolver::TotalCosts(const OperatingPoint& current_op,
221  std::vector<float>* total_costs) const {
222  // Initialize appropriately.
223  if (total_costs->size() != problem_->PlayerCosts().size())
224  total_costs->resize(problem_->PlayerCosts().size());
225  for (PlayerIndex ii = 0; ii < problem_->PlayerCosts().size(); ii++) {
226  if (problem_->PlayerCosts()[ii].IsTimeAdditive())
227  (*total_costs)[ii] = 0.0;
228  else if (problem_->PlayerCosts()[ii].IsMaxOverTime())
229  (*total_costs)[ii] = -constants::kInfinity;
230  else
231  (*total_costs)[ii] = constants::kInfinity;
232  }
233 
234  // Accumulate costs.
235  for (size_t kk = 0; kk < time::kNumTimeSteps; kk++) {
236  const Time t = RelativeTimeTracker::RelativeTime(kk);
237 
238  for (size_t ii = 0; ii < problem_->PlayerCosts().size(); ii++) {
239  const float current_cost = problem_->PlayerCosts()[ii].Evaluate(
240  t, current_op.xs[kk], current_op.us[kk]);
241 
242  if (problem_->PlayerCosts()[ii].IsTimeAdditive())
243  (*total_costs)[ii] += problem_->PlayerCosts()[ii].Evaluate(
244  t, current_op.xs[kk], current_op.us[kk]);
245  else if (problem_->PlayerCosts()[ii].IsMaxOverTime() &&
246  current_cost > (*total_costs)[ii]) {
247  (*total_costs)[ii] = current_cost;
248  problem_->PlayerCosts()[ii].SetTimeOfExtremeCost(kk);
249  } else if (problem_->PlayerCosts()[ii].IsMinOverTime()) {
250  if (current_cost < (*total_costs)[ii]) {
251  (*total_costs)[ii] = current_cost;
252  problem_->PlayerCosts()[ii].SetTimeOfExtremeCost(kk);
253  }
254  }
255  }
256  }
257 }
258 
259 float ILQSolver::StateDistance(const VectorXf& x1, const VectorXf& x2,
260  const std::vector<Dimension>& dims) const {
261  auto total_distance = [&dims](const VectorXf& x1, const VectorXf& x2) {
262  if (dims.empty()) return (x1 - x2).cwiseAbs().maxCoeff();
263 
264  float distance = 0.0;
265  for (const Dimension dim : dims) distance += std::abs(x1(dim) - x2(dim));
266 
267  return distance;
268  }; // total_distance
269 
270  if (problem_->Dynamics()->TreatAsLinear()) {
271  const auto& dyn = problem_->FlatDynamics();
272 
273  // If singular return infinite distance and throw a warning. Otherwise, use
274  // base class implementation but for nonlinear system states.
275  if (dyn.IsLinearSystemStateSingular(x1) ||
276  dyn.IsLinearSystemStateSingular(x2)) {
277  LOG(WARNING)
278  << "Singular state encountered when computing state distance.";
279  return std::numeric_limits<float>::infinity();
280  }
281 
282  return total_distance(dyn.FromLinearSystemState(x1),
283  dyn.FromLinearSystemState(x2));
284  }
285 
286  return total_distance(x1, x2);
287 }
288 
289 bool ILQSolver::ModifyLQStrategies(
290  const std::vector<VectorXf>& delta_xs,
291  const std::vector<std::vector<VectorXf>>& costates,
292  std::vector<Strategy>* strategies, OperatingPoint* current_operating_point,
293  bool* has_converged) {
294  CHECK_NOTNULL(strategies);
295  CHECK_NOTNULL(current_operating_point);
296  CHECK_NOTNULL(has_converged);
297 
298  // DEBUG: show how alphas are decaying - i.e., we're finding a fixed point.
299  // std::cout << strategies->front().alphas.front().squaredNorm() <<
300  // std::endl;
301 
302  // Precompute expected decrease before we do anything else.
303  expected_decrease_ = ExpectedDecrease(*strategies, delta_xs, costates);
304 
305  // Every computation of the merit function will overwrite the current cost
306  // quadraticization, so first swap it with the previous one so we retain a
307  // copy.
308  last_cost_quadraticization_.swap(cost_quadraticization_);
309 
310  // Initially scale alphas by a fixed amount to avoid unnecessary
311  // backtracking.
312  // NOTE: use adaptive initialization here based on history.
313  ScaleAlphas(params_.initial_alpha_scaling, strategies);
314 
315  // Compute next operating point and keep track of whether it satisfies the
316  // Armijo condition.
317  const OperatingPoint last_operating_point(*current_operating_point);
318  float current_stepsize = params_.initial_alpha_scaling;
319  CurrentOperatingPoint(last_operating_point, *strategies,
320  current_operating_point);
321  if (!params_.linesearch) return true;
322 
323  // Keep reducing alphas until we satisfy the Armijo condition.
324  for (size_t ii = 0; ii < params_.max_backtracking_steps; ii++) {
325  // Compute merit function value.
326  const float current_merit_function_value =
327  MeritFunction(*current_operating_point);
328 
329  // Check Armijo condition.
330  if (CheckArmijoCondition(current_merit_function_value, current_stepsize)) {
331  // Success! Update cached terms and check convergence.
332  *has_converged = HasConverged(current_merit_function_value);
333  last_merit_function_value_ = current_merit_function_value;
334  return true;
335  }
336 
337  // Scale down the alphas and try again.
338  ScaleAlphas(params_.geometric_alpha_scaling, strategies);
339  current_stepsize *= params_.geometric_alpha_scaling;
340  CurrentOperatingPoint(last_operating_point, *strategies,
341  current_operating_point);
342  }
343 
344  // Output a warning. Solver should revert to last valid operating point.
345  VLOG(1) << "Exceeded maximum number of backtracking steps.";
346  return false;
347 }
348 
349 bool ILQSolver::CheckArmijoCondition(float current_merit_function_value,
350  float current_stepsize) const {
351  // Adjust total expected decrease.
352  const float scaled_expected_decrease = params_.expected_decrease_fraction *
353  current_stepsize * expected_decrease_;
354 
355  // std::cout << "Improvement = "
356  // << last_merit_function_value_ - current_merit_function_value
357  // << ", expected = " << scaled_expected_decrease << std::endl;
358 
359  return (last_merit_function_value_ - current_merit_function_value >=
360  scaled_expected_decrease);
361 }
362 
363 float ILQSolver::ExpectedDecrease(
364  const std::vector<Strategy>& strategies,
365  const std::vector<VectorXf>& delta_xs,
366  const std::vector<std::vector<VectorXf>>& costates) const {
367  float expected_decrease = 0.0;
368  for (size_t kk = 0; kk < time::kNumTimeSteps; kk++) {
369  const auto& lin = linearization_[kk];
370 
371  // Separate x expected decrease per step at each time (saves computation).
372  const size_t xdim = problem_->Dynamics()->XDim();
373  VectorXf expected_decrease_x = VectorXf::Zero(xdim);
374 
375  for (PlayerIndex ii = 0; ii < problem_->Dynamics()->NumPlayers(); ii++) {
376  const auto& quad = cost_quadraticization_[kk][ii];
377  const auto& costate = costates[kk][ii];
378  const auto& neg_ui =
379  strategies[ii].alphas[kk]; // NOTE: could also evaluate delta u on
380  // delta x to be more precise.
381 
382  // Handle control contribution.
383  expected_decrease -=
384  neg_ui.transpose() * quad.control.at(ii).hess *
385  (quad.control.at(ii).grad - lin.Bs[ii].transpose() * costate);
386 
387  // // Handle costate contribution (control). Keep this unmultiplied by
388  // // costate for efficiency.
389  // VectorXf expected_decrease_costate =
390  // -lin.Bs[ii] *
391  // (quad.control.at(ii).grad - lin.Bs[ii].transpose() * costate);
392 
393  // if (kk > 0) {
394  // // Handle state and costate (state) contributions. Doesn't exist at
395  // t0.
396  // // Handle final time separately from intermediate time steps.
397  // const auto& last_costate = costates[kk - 1][ii];
398 
399  // VectorXf kx = quad.state.grad + last_costate;
400  // if (kk == time::kNumTimeSteps - 1)
401  // expected_decrease_costate += kx;
402  // else {
403  // kx -= lin.A.transpose() * costate;
404  // expected_decrease_costate +=
405  // (MatrixXf::Identity(xdim, xdim) - lin.A) * kx;
406  // }
407 
408  // expected_decrease_x += quad.state.hess * kx;
409  // }
410 
411  // Update expected decrease from costate.
412  // expected_decrease += costate.transpose() * expected_decrease_costate;
413  }
414 
415  // Update expected decrease from x.
416  // expected_decrease += delta_xs[kk].transpose() * expected_decrease_x;
417  }
418 
419  return expected_decrease;
420 }
421 
422 float ILQSolver::MeritFunction(const OperatingPoint& current_op) {
423  // First, quadraticize cost and linearize dynamics around this operating
424  // point.
425  ComputeCostQuadraticization(current_op, &cost_quadraticization_);
426 
427  // Now, accumulate cost gradients (presuming that this operating point is
428  // dynamically feasible so dynamic constraints are all zero).
429  float merit = 0.0;
430  for (size_t kk = 0; kk < cost_quadraticization_.size(); kk++) {
431  for (PlayerIndex ii = 0; ii < problem_->Dynamics()->NumPlayers(); ii++) {
432  const auto& quad = cost_quadraticization_[kk][ii];
433  merit += quad.control.at(ii).grad.squaredNorm();
434 
435  if (kk > 0) {
436  // Don't accumulate state derivs at t0 since x0 can't change.
437  merit += quad.state.grad.squaredNorm();
438  }
439  }
440  }
441 
442  return 0.5 * merit;
443 }
444 
445 void ILQSolver::ComputeLinearization(
446  const OperatingPoint& op,
447  std::vector<LinearDynamicsApproximation>* linearization) {
448  CHECK_NOTNULL(linearization);
449 
450  // Check if linearization is the right length.
451  if (linearization->size() != op.xs.size())
452  linearization->resize(op.xs.size());
453 
454  // Cast dynamics to appropriate type.
455  const auto dyn = static_cast<const MultiPlayerDynamicalSystem*>(
456  problem_->Dynamics().get());
457 
458  // Populate one timestep at a time.
459  for (size_t kk = 0; kk < op.xs.size(); kk++) {
460  const Time t = RelativeTimeTracker::RelativeTime(kk);
461  (*linearization)[kk] = dyn->Linearize(t, op.xs[kk], op.us[kk]);
462  }
463 }
464 
465 void ILQSolver::ComputeLinearization(
466  std::vector<LinearDynamicsApproximation>* linearization) {
467  CHECK_NOTNULL(linearization);
468 
469  // Cast dynamics to appropriate type and make sure the system is
470  // linearizable.
471  CHECK(problem_->Dynamics()->TreatAsLinear());
472  const auto& dyn = problem_->FlatDynamics();
473 
474  // Populate one timestep at a time.
475  for (size_t kk = 0; kk < linearization->size(); kk++)
476  (*linearization)[kk] = dyn.LinearizedSystem();
477 }
478 
479 void ILQSolver::ComputeCostQuadraticization(
480  const OperatingPoint& op,
481  std::vector<std::vector<QuadraticCostApproximation>>* q) {
482  for (size_t kk = 0; kk < time::kNumTimeSteps; kk++) {
483  const Time t = RelativeTimeTracker::RelativeTime(kk);
484  const auto& x = op.xs[kk];
485  const auto& us = op.us[kk];
486 
487  // Quadraticize costs.
488  for (PlayerIndex ii = 0; ii < problem_->Dynamics()->NumPlayers(); ii++) {
489  const PlayerCost& cost = problem_->PlayerCosts()[ii];
490 
491  if (cost.IsTimeAdditive() ||
492  problem_->PlayerCosts()[ii].TimeOfExtremeCost() == kk)
493  (*q)[kk][ii] = cost.Quadraticize(t, x, us);
494  else
495  (*q)[kk][ii] = cost.QuadraticizeControlCosts(t, x, us);
496  }
497  }
498 }
499 
500 } // namespace ilqgames