libmove3d-planners
 All Classes Namespaces Files Functions Variables Enumerations Enumerator Friends Macros Groups Pages
policy_improvement.hpp
1 /*********************************************************************
2  * Copyright (c) 2010, Willow Garage, Inc.
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions are met:
7  *
8  * * Redistributions of source code must retain the above copyright
9  * notice, this list of conditions and the following disclaimer.
10  * * Redistributions in binary form must reproduce the above copyright
11  * notice, this list of conditions and the following disclaimer in the
12  * documentation and/or other materials provided with the distribution.
13  * * Neither the name of the Willow Garage, Inc. nor the names of its
14  * contributors may be used to endorse or promote products derived from
15  * this software without specific prior written permission.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
18  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
21  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
22  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
23  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
24  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
25  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
26  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
27  * POSSIBILITY OF SUCH DAMAGE.
28 
29  \file policy_improvement.h
30 
31  \author Ludovic Righetti, Peter Pastor, Mrinal Kalakrishnan
32  \date May 26, 2010
33 
34  **********************************************************************/
35 
36 #ifndef POLICYIMPROVEMENT_H_
37 #define POLICYIMPROVEMENT_H_
38 
39 // ros includes
40 //#include <ros/ros.h>
41 
42 
43 #include <Eigen/Core>
44 
45 // local includes
46 #include "policy.hpp"
47 #include "task.hpp"
48 
49 #include "planner/TrajectoryOptim/Chomp/chompMultivariateGaussian.hpp"
50 
51 #include <boost/shared_ptr.hpp>
52 
53 extern std::vector<double> global_noiseTrajectory1;
54 extern std::vector<double> global_noiseTrajectory2;
55 
56 namespace stomp_motion_planner
57 {
58 
59  struct Rollout
60  {
61  std::vector<Eigen::VectorXd> parameters_;
62  std::vector<Eigen::VectorXd> noise_;
63  std::vector<Eigen::VectorXd> noise_projected_;
64  std::vector<Eigen::VectorXd> parameters_noise_projected_;
65  Eigen::VectorXd state_costs_;
66  std::vector<Eigen::VectorXd> control_costs_;
67  std::vector<Eigen::VectorXd> total_costs_;
68  std::vector<Eigen::VectorXd> cumulative_costs_;
69  std::vector<Eigen::VectorXd> probabilities_;
73  double getCost();
75  void printCost();
76  void printProbabilities();
77  };
78 
80  {
81  public:
86 
91 
100  bool initialize(const int num_rollouts, const int num_time_steps, const int num_reused_rollouts,
101  const int num_extra_rollouts,
102  boost::shared_ptr<stomp_motion_planner::Policy> policy,
103  boost::shared_ptr<stomp_motion_planner::Task> task,
104  bool use_cumulative_costs=true);
105 
111  bool setNumRollouts(const int num_rollouts, const int num_reused_rollouts, const int num_extra_rollouts);
112 
120  bool getRollouts(std::vector<std::vector<Eigen::VectorXd> >& rollouts, const std::vector<double>& noise_stddev,
121  bool get_reused, std::vector<std::vector<Eigen::VectorXd> >& reused_rollouts);
122 
130  bool setRolloutCosts(const Eigen::MatrixXd& costs, const double control_cost_weight, std::vector<double>& rollout_costs_total);
131 
138  bool improvePolicy(std::vector<Eigen::MatrixXd>& parameter_updates);
139 
143  bool addExtraRollouts(std::vector<std::vector<Eigen::VectorXd> >& rollouts, std::vector<Eigen::VectorXd>& rollout_costs);
144 
149  void testNoiseGenerators();
150 
155  void setRolloutOutOfBounds(int id, bool out_of_bounds);
156 
160  bool resetReusedRollouts();
161 
162  std::vector<Eigen::MatrixXd> projection_matrix_;
164  private:
165 
166  bool initialized_;
167 
168  int num_dimensions_;
169  std::vector<int> num_parameters_;
170  int num_rollouts_;
171  int num_time_steps_;
172  int num_rollouts_reused_;
173  int num_rollouts_extra_;
174 
175  bool rollouts_reused_;
176  bool rollouts_reused_next_;
177  bool extra_rollouts_added_;
178  int num_rollouts_gen_;
180  bool use_multiplication_by_m_;
181  bool use_cumulative_costs_;
183  boost::shared_ptr<stomp_motion_planner::Policy> policy_;
184  boost::shared_ptr<stomp_motion_planner::Task> task_;
185 
186  std::vector<Eigen::MatrixXd> control_costs_;
187  std::vector<Eigen::MatrixXd> inv_control_costs_;
188  double control_cost_weight_;
189 
190  std::vector<Eigen::MatrixXd> basis_functions_;
192  std::vector<Eigen::VectorXd> parameters_;
194  std::vector<Rollout> rollouts_;
195  std::vector<Rollout> reused_rollouts_;
196  std::vector<Rollout> extra_rollouts_;
197 
198  std::vector<MultivariateGaussian> noise_generators_;
199  std::vector<Eigen::MatrixXd> parameter_updates_;
201  // temporary variables pre-allocated for efficiency:
202  std::vector<Eigen::VectorXd> tmp_noise_;
203  std::vector<Eigen::VectorXd> tmp_parameters_;
204  Eigen::VectorXd tmp_max_cost_;
205  Eigen::VectorXd tmp_min_cost_;
206  Eigen::VectorXd tmp_max_minus_min_cost_;
207  Eigen::VectorXd tmp_sum_rollout_probabilities_;
208  std::vector<std::pair<double, int> > rollout_cost_sorter_;
210  bool preAllocateMultivariateGaussianSampler();
211  bool preAllocateTempVariables();
212  bool preComputeProjectionMatrices();
213 
214  void resampleUpdates();
215  bool computeProjectedNoise();
216  bool computeRolloutControlCosts();
217  bool computeRolloutCumulativeCosts();
218  bool computeRolloutProbabilities();
219  bool computeParameterUpdates();
220 
221  bool computeNoise(Rollout& rollout);
222  bool computeProjectedNoise(Rollout& rollout);
223  bool computeRolloutControlCosts(Rollout& rollout);
224  bool copyParametersFromPolicy();
225 
226  void addStraightLines( std::vector<int> points, Rollout& rollouts);
227  bool generateRollouts(const std::vector<double>& noise_variance);
228  };
229 
230 }
231 
232 #endif /* POLICYIMPROVEMENT_H_ */
PolicyImprovement()
Constructor for the policy improvement class.
Definition: policy_improvement.cpp:135
std::vector< Eigen::VectorXd > noise_
[num_dimensions] num_parameters
Definition: policy_improvement.hpp:62
bool getRollouts(std::vector< std::vector< Eigen::VectorXd > > &rollouts, const std::vector< double > &noise_stddev, bool get_reused, std::vector< std::vector< Eigen::VectorXd > > &reused_rollouts)
Gets the next set of rollouts.
Definition: policy_improvement.cpp:606
bool setNumRollouts(const int num_rollouts, const int num_reused_rollouts, const int num_extra_rollouts)
Resets the number of rollouts.
Definition: policy_improvement.cpp:206
std::vector< Eigen::VectorXd > noise_projected_
[num_dimensions][num_time_steps] num_parameters
Definition: policy_improvement.hpp:63
std::vector< Eigen::VectorXd > probabilities_
[num_dimensions] num_time_steps
Definition: policy_improvement.hpp:69
Eigen::VectorXd state_costs_
num_time_steps
Definition: policy_improvement.hpp:65
std::vector< Eigen::VectorXd > control_costs_
[num_dimensions] num_time_steps
Definition: policy_improvement.hpp:66
bool setRolloutCosts(const Eigen::MatrixXd &costs, const double control_cost_weight, std::vector< double > &rollout_costs_total)
Set the costs of each rollout per time-step Only the first "n" rows of the costs matrix is used...
Definition: policy_improvement.cpp:658
bool resetReusedRollouts()
Reset extra rollouts.
Definition: policy_improvement.cpp:371
~PolicyImprovement()
Destructor.
Definition: policy_improvement.cpp:141
std::vector< Eigen::VectorXd > cumulative_costs_
[num_dimensions] num_time_steps
Definition: policy_improvement.hpp:68
bool improvePolicy(std::vector< Eigen::MatrixXd > &parameter_updates)
Performs the PI^2 update and provides parameter updates at every time step.
Definition: policy_improvement.cpp:849
std::vector< Eigen::VectorXd > parameters_noise_projected_
[num_dimensions][num_time_steps] num_parameters
Definition: policy_improvement.hpp:64
bool out_of_bounds_
Wether the rollout is violating dof limits.
Definition: policy_improvement.hpp:71
bool initialize(const int num_rollouts, const int num_time_steps, const int num_reused_rollouts, const int num_extra_rollouts, boost::shared_ptr< stomp_motion_planner::Policy > policy, boost::shared_ptr< stomp_motion_planner::Task > task, bool use_cumulative_costs=true)
Initializes the object which is required for all operations to succeed.
Definition: policy_improvement.cpp:149
std::vector< Eigen::MatrixXd > projection_matrix_
[num_dimensions] num_parameters x num_parameters
Definition: policy_improvement.hpp:162
bool addExtraRollouts(std::vector< std::vector< Eigen::VectorXd > > &rollouts, std::vector< Eigen::VectorXd > &rollout_costs)
Adds extra rollouts to the set of rollouts to be reused.
Definition: policy_improvement.cpp:866
std::vector< Eigen::VectorXd > total_costs_
[num_dimensions] num_time_steps
Definition: policy_improvement.hpp:67
void testNoiseGenerators()
Tests the noise generators Function added by jim.
Definition: policy_improvement.cpp:897
std::vector< Eigen::VectorXd > parameters_
[num_dimensions] num_parameters
Definition: policy_improvement.hpp:61
void setRolloutOutOfBounds(int id, bool out_of_bounds)
Set rollout as out of bounds Function added by jim.
Definition: policy_improvement.cpp:648
Definition: policy_improvement.hpp:79
Definition: policy_improvement.hpp:59
double getCost()
Gets the rollout cost = state cost + control costs per dimension.
Definition: policy_improvement.cpp:72