ilqgames
A new real-time solver for large-scale differential games.
player_cost.cpp
1 /*
2  * Copyright (c) 2019, The Regents of the University of California (Regents).
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions are
7  * met:
8  *
9  * 1. Redistributions of source code must retain the above copyright
10  * notice, this list of conditions and the following disclaimer.
11  *
12  * 2. Redistributions in binary form must reproduce the above
13  * copyright notice, this list of conditions and the following
14  * disclaimer in the documentation and/or other materials provided
15  * with the distribution.
16  *
17  * 3. Neither the name of the copyright holder nor the names of its
18  * contributors may be used to endorse or promote products derived
19  * from this software without specific prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS AS IS
22  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
25  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31  * POSSIBILITY OF SUCH DAMAGE.
32  *
33  * Please contact the author(s) of this library if you have any questions.
34  * Authors: David Fridovich-Keil ( dfk@eecs.berkeley.edu )
35  */
36 
37 ///////////////////////////////////////////////////////////////////////////////
38 //
39 // Container to store all the cost functions for a single player, and keep track
40 // of which variables (x, u1, u2, ..., uN) they correspond to.
41 //
42 ///////////////////////////////////////////////////////////////////////////////
43 
44 #include <ilqgames/cost/cost.h>
45 #include <ilqgames/cost/player_cost.h>
46 #include <ilqgames/utils/operating_point.h>
47 #include <ilqgames/utils/quadratic_cost_approximation.h>
48 #include <ilqgames/utils/types.h>
49 
50 #include <glog/logging.h>
51 #include <unordered_map>
52 
53 namespace ilqgames {
54 
55 namespace {
56 
57 // Accumulate control costs and constraints into the given quadratic
58 // approximation.
59 template <typename T, typename F>
60 void AccumulateControlCostsBase(const PlayerPtrMultiMap<T>& costs, Time t,
61  const std::vector<VectorXf>& us,
62  float regularization,
63  QuadraticCostApproximation* q, F f) {
64  size_t cost_idx = 0;
65  for (const auto& pair : costs) {
66  const PlayerIndex player = pair.first;
67  const auto& cost = pair.second;
68 
69  // If we haven't seen this player yet, initialize R and r to zero.
70  auto iter = q->control.find(player);
71  if (iter == q->control.end()) {
72  auto inserted_pair = q->control.emplace(
73  player, SingleCostApproximation(us[player].size(), regularization));
74 
75  // Second element should be true because we definitely won't have any
76  // key collisions.
77  CHECK(inserted_pair.second);
78 
79  // Update iter to point to where the new R was inserted.
80  iter = inserted_pair.first;
81  }
82 
83  f(*cost, t, us[player], &(iter->second.hess), &(iter->second.grad));
84  cost_idx++;
85  }
86 }
87 
88 void AccumulateControlCosts(const PlayerPtrMultiMap<Cost>& costs, Time t,
89  const std::vector<VectorXf>& us,
90  float regularization,
91  QuadraticCostApproximation* q) {
92  auto f = [](const Cost& cost, Time t, const VectorXf& u, MatrixXf* hess,
93  VectorXf* grad) { cost.Quadraticize(t, u, hess, grad); };
94  AccumulateControlCostsBase(costs, t, us, regularization, q, f);
95 }
96 
97 void AccumulateControlConstraints(
98  const PlayerPtrMultiMap<Constraint>& constraints, Time t,
99  const std::vector<VectorXf>& us, float regularization,
100  QuadraticCostApproximation* q) {
101  auto f = [](const Constraint& constraint, Time t, const VectorXf& u,
102  MatrixXf* hess,
103  VectorXf* grad) { constraint.Quadraticize(t, u, hess, grad); };
104  AccumulateControlCostsBase(constraints, t, us, regularization, q, f);
105 }
106 
107 } // namespace
108 
109 void PlayerCost::AddStateCost(const std::shared_ptr<Cost>& cost) {
110  state_costs_.emplace_back(cost);
111 }
112 
113 void PlayerCost::AddControlCost(PlayerIndex idx,
114  const std::shared_ptr<Cost>& cost) {
115  control_costs_.emplace(idx, cost);
116 }
117 
118 void PlayerCost::AddStateConstraint(
119  const std::shared_ptr<Constraint>& constraint) {
120  state_constraints_.emplace_back(constraint);
121 }
122 
123 void PlayerCost::AddControlConstraint(
124  PlayerIndex idx, const std::shared_ptr<Constraint>& constraint) {
125  control_constraints_.emplace(idx, constraint);
126 }
127 
128 float PlayerCost::Evaluate(Time t, const VectorXf& x,
129  const std::vector<VectorXf>& us) const {
130  float total_cost = 0.0;
131 
132  // State costs.
133  for (const auto& cost : state_costs_) total_cost += cost->Evaluate(t, x);
134 
135  // Control costs.
136  for (const auto& pair : control_costs_) {
137  const PlayerIndex& player = pair.first;
138  const auto& cost = pair.second;
139 
140  total_cost += cost->Evaluate(t, us[player]);
141  }
142 
143  return total_cost;
144 }
145 
146 float PlayerCost::Evaluate(const OperatingPoint& op, Time time_step) const {
147  float cost = 0.0;
148  if (IsMinOverTime())
149  cost = constants::kInfinity;
150  else if (IsMaxOverTime())
151  cost = -constants::kInfinity;
152 
153  for (size_t kk = 0; kk < op.xs.size(); kk++) {
154  const Time t = op.t0 + time_step * static_cast<float>(kk);
155  const float instantaneous_cost = Evaluate(t, op.xs[kk], op.us[kk]);
156 
157  if (IsTimeAdditive())
158  cost += instantaneous_cost;
159  else if (IsMinOverTime())
160  cost = std::min(cost, instantaneous_cost);
161  else
162  cost = std::max(cost, instantaneous_cost);
163  }
164 
165  return cost;
166 }
167 
168 float PlayerCost::Evaluate(const OperatingPoint& op) const {
169  float total_cost = 0.0;
170  for (size_t kk = 0; kk < op.xs.size(); kk++) total_cost += Evaluate(op, kk);
171 
172  return total_cost;
173 }
174 
175 float PlayerCost::EvaluateOffset(Time t, Time next_t, const VectorXf& next_x,
176  const std::vector<VectorXf>& us) const {
177  float total_cost = 0.0;
178 
179  // State costs.
180  for (const auto& cost : state_costs_)
181  total_cost += cost->Evaluate(next_t, next_x);
182 
183  // Control costs.
184  for (const auto& pair : control_costs_) {
185  const PlayerIndex& player = pair.first;
186  const auto& cost = pair.second;
187 
188  total_cost += cost->Evaluate(t, us[player]);
189  }
190 
191  return total_cost;
192 }
193 
194 QuadraticCostApproximation PlayerCost::Quadraticize(
195  Time t, const VectorXf& x, const std::vector<VectorXf>& us) const {
196  QuadraticCostApproximation q(x.size(), state_regularization_);
197 
198  // Accumulate state costs.
199  for (const auto& cost : state_costs_)
200  cost->Quadraticize(t, x, &q.state.hess, &q.state.grad);
201 
202  // Accumulate control costs.
203  AccumulateControlCosts(control_costs_, t, us, control_regularization_, &q);
204 
205  // Accumulate state constraints (including augmented Lagrangian terms scaled
206  // by appropriate multipliers).
207  for (const auto& constraint : state_constraints_)
208  constraint->Quadraticize(t, x, &q.state.hess, &q.state.grad);
209 
210  // Accumulate control constraints.
211  AccumulateControlConstraints(control_constraints_, t, us,
212  control_regularization_, &q);
213 
214  return q;
215 }
216 
217 QuadraticCostApproximation PlayerCost::QuadraticizeControlCosts(
218  Time t, const VectorXf& x, const std::vector<VectorXf>& us) const {
219  QuadraticCostApproximation q(x.size(), state_regularization_);
220 
221  // Accumulate control costs.
222  AccumulateControlCosts(control_costs_, t, us, control_regularization_, &q);
223 
224  return q;
225 }
226 
227 } // namespace ilqgames