36 #ifndef POLICYIMPROVEMENT_H_
37 #define POLICYIMPROVEMENT_H_
49 #include "planner/TrajectoryOptim/Chomp/chompMultivariateGaussian.hpp"
51 #include <boost/shared_ptr.hpp>
53 extern std::vector<double> global_noiseTrajectory1;
54 extern std::vector<double> global_noiseTrajectory2;
56 namespace stomp_motion_planner
76 void printProbabilities();
100 bool initialize(
const int num_rollouts,
const int num_time_steps,
const int num_reused_rollouts,
101 const int num_extra_rollouts,
102 boost::shared_ptr<stomp_motion_planner::Policy> policy,
103 boost::shared_ptr<stomp_motion_planner::Task> task,
104 bool use_cumulative_costs=
true);
111 bool setNumRollouts(
const int num_rollouts,
const int num_reused_rollouts,
const int num_extra_rollouts);
120 bool getRollouts(std::vector<std::vector<Eigen::VectorXd> >& rollouts,
const std::vector<double>& noise_stddev,
121 bool get_reused, std::vector<std::vector<Eigen::VectorXd> >& reused_rollouts);
130 bool setRolloutCosts(
const Eigen::MatrixXd& costs,
const double control_cost_weight, std::vector<double>& rollout_costs_total);
138 bool improvePolicy(std::vector<Eigen::MatrixXd>& parameter_updates);
143 bool addExtraRollouts(std::vector<std::vector<Eigen::VectorXd> >& rollouts, std::vector<Eigen::VectorXd>& rollout_costs);
169 std::vector<int> num_parameters_;
172 int num_rollouts_reused_;
173 int num_rollouts_extra_;
175 bool rollouts_reused_;
176 bool rollouts_reused_next_;
177 bool extra_rollouts_added_;
178 int num_rollouts_gen_;
180 bool use_multiplication_by_m_;
181 bool use_cumulative_costs_;
183 boost::shared_ptr<stomp_motion_planner::Policy> policy_;
184 boost::shared_ptr<stomp_motion_planner::Task> task_;
186 std::vector<Eigen::MatrixXd> control_costs_;
187 std::vector<Eigen::MatrixXd> inv_control_costs_;
188 double control_cost_weight_;
190 std::vector<Eigen::MatrixXd> basis_functions_;
192 std::vector<Eigen::VectorXd> parameters_;
194 std::vector<Rollout> rollouts_;
195 std::vector<Rollout> reused_rollouts_;
196 std::vector<Rollout> extra_rollouts_;
198 std::vector<MultivariateGaussian> noise_generators_;
199 std::vector<Eigen::MatrixXd> parameter_updates_;
202 std::vector<Eigen::VectorXd> tmp_noise_;
203 std::vector<Eigen::VectorXd> tmp_parameters_;
204 Eigen::VectorXd tmp_max_cost_;
205 Eigen::VectorXd tmp_min_cost_;
206 Eigen::VectorXd tmp_max_minus_min_cost_;
207 Eigen::VectorXd tmp_sum_rollout_probabilities_;
208 std::vector<std::pair<double, int> > rollout_cost_sorter_;
210 bool preAllocateMultivariateGaussianSampler();
211 bool preAllocateTempVariables();
212 bool preComputeProjectionMatrices();
214 void resampleUpdates();
215 bool computeProjectedNoise();
216 bool computeRolloutControlCosts();
217 bool computeRolloutCumulativeCosts();
218 bool computeRolloutProbabilities();
219 bool computeParameterUpdates();
221 bool computeNoise(
Rollout& rollout);
222 bool computeProjectedNoise(
Rollout& rollout);
223 bool computeRolloutControlCosts(
Rollout& rollout);
224 bool copyParametersFromPolicy();
226 void addStraightLines( std::vector<int> points,
Rollout& rollouts);
227 bool generateRollouts(
const std::vector<double>& noise_variance);
PolicyImprovement()
Constructor for the policy improvement class.
Definition: policy_improvement.cpp:135
std::vector< Eigen::VectorXd > noise_
[num_dimensions] num_parameters
Definition: policy_improvement.hpp:62
bool getRollouts(std::vector< std::vector< Eigen::VectorXd > > &rollouts, const std::vector< double > &noise_stddev, bool get_reused, std::vector< std::vector< Eigen::VectorXd > > &reused_rollouts)
Gets the next set of rollouts.
Definition: policy_improvement.cpp:606
bool setNumRollouts(const int num_rollouts, const int num_reused_rollouts, const int num_extra_rollouts)
Resets the number of rollouts.
Definition: policy_improvement.cpp:206
std::vector< Eigen::VectorXd > noise_projected_
[num_dimensions][num_time_steps] num_parameters
Definition: policy_improvement.hpp:63
std::vector< Eigen::VectorXd > probabilities_
[num_dimensions] num_time_steps
Definition: policy_improvement.hpp:69
Eigen::VectorXd state_costs_
num_time_steps
Definition: policy_improvement.hpp:65
std::vector< Eigen::VectorXd > control_costs_
[num_dimensions] num_time_steps
Definition: policy_improvement.hpp:66
bool setRolloutCosts(const Eigen::MatrixXd &costs, const double control_cost_weight, std::vector< double > &rollout_costs_total)
Set the costs of each rollout per time-step Only the first "n" rows of the costs matrix is used...
Definition: policy_improvement.cpp:658
bool resetReusedRollouts()
Reset extra rollouts.
Definition: policy_improvement.cpp:371
~PolicyImprovement()
Destructor.
Definition: policy_improvement.cpp:141
std::vector< Eigen::VectorXd > cumulative_costs_
[num_dimensions] num_time_steps
Definition: policy_improvement.hpp:68
bool improvePolicy(std::vector< Eigen::MatrixXd > ¶meter_updates)
Performs the PI^2 update and provides parameter updates at every time step.
Definition: policy_improvement.cpp:849
std::vector< Eigen::VectorXd > parameters_noise_projected_
[num_dimensions][num_time_steps] num_parameters
Definition: policy_improvement.hpp:64
bool out_of_bounds_
Wether the rollout is violating dof limits.
Definition: policy_improvement.hpp:71
bool initialize(const int num_rollouts, const int num_time_steps, const int num_reused_rollouts, const int num_extra_rollouts, boost::shared_ptr< stomp_motion_planner::Policy > policy, boost::shared_ptr< stomp_motion_planner::Task > task, bool use_cumulative_costs=true)
Initializes the object which is required for all operations to succeed.
Definition: policy_improvement.cpp:149
std::vector< Eigen::MatrixXd > projection_matrix_
[num_dimensions] num_parameters x num_parameters
Definition: policy_improvement.hpp:162
bool addExtraRollouts(std::vector< std::vector< Eigen::VectorXd > > &rollouts, std::vector< Eigen::VectorXd > &rollout_costs)
Adds extra rollouts to the set of rollouts to be reused.
Definition: policy_improvement.cpp:866
std::vector< Eigen::VectorXd > total_costs_
[num_dimensions] num_time_steps
Definition: policy_improvement.hpp:67
void testNoiseGenerators()
Tests the noise generators Function added by jim.
Definition: policy_improvement.cpp:897
std::vector< Eigen::VectorXd > parameters_
[num_dimensions] num_parameters
Definition: policy_improvement.hpp:61
void setRolloutOutOfBounds(int id, bool out_of_bounds)
Set rollout as out of bounds Function added by jim.
Definition: policy_improvement.cpp:648
Definition: policy_improvement.hpp:79
Definition: policy_improvement.hpp:59
double getCost()
Gets the rollout cost = state cost + control costs per dimension.
Definition: policy_improvement.cpp:72