45 #include <ilqgames/cost/player_cost.h> 46 #include <ilqgames/solver/ilq_solver.h> 47 #include <ilqgames/solver/lq_solver.h> 48 #include <ilqgames/utils/linear_dynamics_approximation.h> 49 #include <ilqgames/utils/loop_timer.h> 50 #include <ilqgames/utils/operating_point.h> 51 #include <ilqgames/utils/quadratic_cost_approximation.h> 52 #include <ilqgames/utils/strategy.h> 53 #include <ilqgames/utils/types.h> 55 #include <glog/logging.h> 66 void ScaleAlphas(
float scaling, std::vector<Strategy>* strategies) {
67 CHECK_NOTNULL(strategies);
69 for (
auto& strategy : *strategies) {
70 for (
auto& alpha : strategy.alphas) alpha *= scaling;
76 std::shared_ptr<SolverLog> ILQSolver::Solve(
bool* success, Time max_runtime) {
77 const auto solver_call_time = Clock::now();
80 std::shared_ptr<SolverLog> log = CreateNewLog();
86 OperatingPoint last_operating_point(problem_->CurrentOperatingPoint());
87 OperatingPoint current_operating_point(problem_->CurrentOperatingPoint());
88 current_operating_point.xs[0] = problem_->InitialState();
89 last_operating_point.xs[0] = problem_->InitialState();
92 std::vector<Strategy> current_strategies(problem_->CurrentStrategies());
95 size_t num_iterations = 0;
96 bool has_converged =
false;
101 std::vector<float> total_costs;
102 last_operating_point.swap(current_operating_point);
103 CurrentOperatingPoint(last_operating_point, current_strategies,
104 ¤t_operating_point);
107 TotalCosts(current_operating_point, &total_costs);
111 log->AddSolverIterate(current_operating_point, current_strategies,
112 total_costs, elapsed, has_converged);
116 ComputeCostQuadraticization(current_operating_point, &cost_quadraticization_);
119 std::vector<VectorXf> delta_xs;
120 std::vector<std::vector<VectorXf>> costates;
123 while (num_iterations < params_.max_solver_iters && !has_converged &&
124 elapsed < max_runtime - timer_.RuntimeUpperBound()) {
136 if (!problem_->Dynamics()->TreatAsLinear())
137 ComputeLinearization(current_operating_point, &linearization_);
140 current_strategies = lq_solver_->Solve(
141 linearization_, cost_quadraticization_,
142 problem_->InitialState() - current_operating_point.xs.front(),
143 &delta_xs, &costates);
146 if (!ModifyLQStrategies(delta_xs, costates, ¤t_strategies,
147 ¤t_operating_point, &has_converged)) {
149 VLOG(1) <<
"Solver exited due to linesearch failure.";
152 if (success) *success =
false;
158 TotalCosts(current_operating_point, &total_costs);
161 elapsed += timer_.Toc();
164 log->AddSolverIterate(current_operating_point, current_strategies,
165 total_costs, elapsed, has_converged);
169 if (success) *success =
true;
174 void ILQSolver::CurrentOperatingPoint(
175 const OperatingPoint& last_operating_point,
176 const std::vector<Strategy>& current_strategies,
177 OperatingPoint* current_operating_point)
const {
178 CHECK_NOTNULL(current_operating_point);
181 current_operating_point->t0 = last_operating_point.t0;
184 VectorXf x(last_operating_point.xs[0]);
185 for (
size_t kk = 0; kk < time::kNumTimeSteps; kk++) {
186 const Time t = RelativeTimeTracker::RelativeTime(kk);
189 const VectorXf delta_x = x - last_operating_point.xs[kk];
190 const auto& last_us = last_operating_point.us[kk];
191 auto& current_us = current_operating_point->us[kk];
194 current_operating_point->xs[kk] = x;
197 for (PlayerIndex jj = 0; jj < problem_->Dynamics()->NumPlayers(); jj++) {
198 const auto& strategy = current_strategies[jj];
199 current_us[jj] = strategy(kk, delta_x, last_us[jj]);
203 if (kk < time::kNumTimeSteps - 1)
204 x = problem_->Dynamics()->Integrate(t, time::kTimeStep, x, current_us);
220 void ILQSolver::TotalCosts(
const OperatingPoint& current_op,
221 std::vector<float>* total_costs)
const {
223 if (total_costs->size() != problem_->PlayerCosts().size())
224 total_costs->resize(problem_->PlayerCosts().size());
225 for (PlayerIndex ii = 0; ii < problem_->PlayerCosts().size(); ii++) {
226 if (problem_->PlayerCosts()[ii].IsTimeAdditive())
227 (*total_costs)[ii] = 0.0;
228 else if (problem_->PlayerCosts()[ii].IsMaxOverTime())
229 (*total_costs)[ii] = -constants::kInfinity;
231 (*total_costs)[ii] = constants::kInfinity;
235 for (
size_t kk = 0; kk < time::kNumTimeSteps; kk++) {
236 const Time t = RelativeTimeTracker::RelativeTime(kk);
238 for (
size_t ii = 0; ii < problem_->PlayerCosts().size(); ii++) {
239 const float current_cost = problem_->PlayerCosts()[ii].Evaluate(
240 t, current_op.xs[kk], current_op.us[kk]);
242 if (problem_->PlayerCosts()[ii].IsTimeAdditive())
243 (*total_costs)[ii] += problem_->PlayerCosts()[ii].Evaluate(
244 t, current_op.xs[kk], current_op.us[kk]);
245 else if (problem_->PlayerCosts()[ii].IsMaxOverTime() &&
246 current_cost > (*total_costs)[ii]) {
247 (*total_costs)[ii] = current_cost;
248 problem_->PlayerCosts()[ii].SetTimeOfExtremeCost(kk);
249 }
else if (problem_->PlayerCosts()[ii].IsMinOverTime()) {
250 if (current_cost < (*total_costs)[ii]) {
251 (*total_costs)[ii] = current_cost;
252 problem_->PlayerCosts()[ii].SetTimeOfExtremeCost(kk);
259 float ILQSolver::StateDistance(
const VectorXf& x1,
const VectorXf& x2,
260 const std::vector<Dimension>& dims)
const {
261 auto total_distance = [&dims](
const VectorXf& x1,
const VectorXf& x2) {
262 if (dims.empty())
return (x1 - x2).cwiseAbs().maxCoeff();
264 float distance = 0.0;
265 for (
const Dimension dim : dims) distance += std::abs(x1(dim) - x2(dim));
270 if (problem_->Dynamics()->TreatAsLinear()) {
271 const auto& dyn = problem_->FlatDynamics();
275 if (dyn.IsLinearSystemStateSingular(x1) ||
276 dyn.IsLinearSystemStateSingular(x2)) {
278 <<
"Singular state encountered when computing state distance.";
279 return std::numeric_limits<float>::infinity();
282 return total_distance(dyn.FromLinearSystemState(x1),
283 dyn.FromLinearSystemState(x2));
286 return total_distance(x1, x2);
289 bool ILQSolver::ModifyLQStrategies(
290 const std::vector<VectorXf>& delta_xs,
291 const std::vector<std::vector<VectorXf>>& costates,
292 std::vector<Strategy>* strategies, OperatingPoint* current_operating_point,
293 bool* has_converged) {
294 CHECK_NOTNULL(strategies);
295 CHECK_NOTNULL(current_operating_point);
296 CHECK_NOTNULL(has_converged);
303 expected_decrease_ = ExpectedDecrease(*strategies, delta_xs, costates);
308 last_cost_quadraticization_.swap(cost_quadraticization_);
313 ScaleAlphas(params_.initial_alpha_scaling, strategies);
317 const OperatingPoint last_operating_point(*current_operating_point);
318 float current_stepsize = params_.initial_alpha_scaling;
319 CurrentOperatingPoint(last_operating_point, *strategies,
320 current_operating_point);
321 if (!params_.linesearch)
return true;
324 for (
size_t ii = 0; ii < params_.max_backtracking_steps; ii++) {
326 const float current_merit_function_value =
327 MeritFunction(*current_operating_point);
330 if (CheckArmijoCondition(current_merit_function_value, current_stepsize)) {
332 *has_converged = HasConverged(current_merit_function_value);
333 last_merit_function_value_ = current_merit_function_value;
338 ScaleAlphas(params_.geometric_alpha_scaling, strategies);
339 current_stepsize *= params_.geometric_alpha_scaling;
340 CurrentOperatingPoint(last_operating_point, *strategies,
341 current_operating_point);
345 VLOG(1) <<
"Exceeded maximum number of backtracking steps.";
349 bool ILQSolver::CheckArmijoCondition(
float current_merit_function_value,
350 float current_stepsize)
const {
352 const float scaled_expected_decrease = params_.expected_decrease_fraction *
353 current_stepsize * expected_decrease_;
359 return (last_merit_function_value_ - current_merit_function_value >=
360 scaled_expected_decrease);
363 float ILQSolver::ExpectedDecrease(
364 const std::vector<Strategy>& strategies,
365 const std::vector<VectorXf>& delta_xs,
366 const std::vector<std::vector<VectorXf>>& costates)
const {
367 float expected_decrease = 0.0;
368 for (
size_t kk = 0; kk < time::kNumTimeSteps; kk++) {
369 const auto& lin = linearization_[kk];
372 const size_t xdim = problem_->Dynamics()->XDim();
373 VectorXf expected_decrease_x = VectorXf::Zero(xdim);
375 for (PlayerIndex ii = 0; ii < problem_->Dynamics()->NumPlayers(); ii++) {
376 const auto& quad = cost_quadraticization_[kk][ii];
377 const auto& costate = costates[kk][ii];
379 strategies[ii].alphas[kk];
384 neg_ui.transpose() * quad.control.at(ii).hess *
385 (quad.control.at(ii).grad - lin.Bs[ii].transpose() * costate);
419 return expected_decrease;
422 float ILQSolver::MeritFunction(
const OperatingPoint& current_op) {
425 ComputeCostQuadraticization(current_op, &cost_quadraticization_);
430 for (
size_t kk = 0; kk < cost_quadraticization_.size(); kk++) {
431 for (PlayerIndex ii = 0; ii < problem_->Dynamics()->NumPlayers(); ii++) {
432 const auto& quad = cost_quadraticization_[kk][ii];
433 merit += quad.control.at(ii).grad.squaredNorm();
437 merit += quad.state.grad.squaredNorm();
445 void ILQSolver::ComputeLinearization(
446 const OperatingPoint& op,
447 std::vector<LinearDynamicsApproximation>* linearization) {
448 CHECK_NOTNULL(linearization);
451 if (linearization->size() != op.xs.size())
452 linearization->resize(op.xs.size());
455 const auto dyn =
static_cast<const MultiPlayerDynamicalSystem*
>(
456 problem_->Dynamics().get());
459 for (
size_t kk = 0; kk < op.xs.size(); kk++) {
460 const Time t = RelativeTimeTracker::RelativeTime(kk);
461 (*linearization)[kk] = dyn->Linearize(t, op.xs[kk], op.us[kk]);
465 void ILQSolver::ComputeLinearization(
466 std::vector<LinearDynamicsApproximation>* linearization) {
467 CHECK_NOTNULL(linearization);
471 CHECK(problem_->Dynamics()->TreatAsLinear());
472 const auto& dyn = problem_->FlatDynamics();
475 for (
size_t kk = 0; kk < linearization->size(); kk++)
476 (*linearization)[kk] = dyn.LinearizedSystem();
479 void ILQSolver::ComputeCostQuadraticization(
480 const OperatingPoint& op,
481 std::vector<std::vector<QuadraticCostApproximation>>* q) {
482 for (
size_t kk = 0; kk < time::kNumTimeSteps; kk++) {
483 const Time t = RelativeTimeTracker::RelativeTime(kk);
484 const auto& x = op.xs[kk];
485 const auto& us = op.us[kk];
488 for (PlayerIndex ii = 0; ii < problem_->Dynamics()->NumPlayers(); ii++) {
489 const PlayerCost& cost = problem_->PlayerCosts()[ii];
491 if (cost.IsTimeAdditive() ||
492 problem_->PlayerCosts()[ii].TimeOfExtremeCost() == kk)
493 (*q)[kk][ii] = cost.Quadraticize(t, x, us);
495 (*q)[kk][ii] = cost.QuadraticizeControlCosts(t, x, us);