MLpp
EM.hpp
1 #pragma once
2 /* (C) 2020 Roman Werpachowski. */
3 #include <memory>
4 #include <random>
5 #include <vector>
6 #include <Eigen/Cholesky>
7 #include <Eigen/Core>
8 #include "Clustering.hpp"
9 #include "dll.hpp"
10 
11 
12 namespace ml
13 {
18  class EM: public Clustering::Model
19  {
20  public:
25  DLL_DECLSPEC EM(unsigned int number_components);
26 
30  DLL_DECLSPEC void set_seed(unsigned int seed);
31 
36  DLL_DECLSPEC void set_absolute_tolerance(double absolute_tolerance);
37 
42  DLL_DECLSPEC void set_relative_tolerance(double relative_tolerance);
43 
48  DLL_DECLSPEC void set_maximum_steps(unsigned int maximum_steps);
49 
54  DLL_DECLSPEC void set_means_initialiser(std::shared_ptr<const Clustering::CentroidsInitialiser> means_initialiser);
55 
60  DLL_DECLSPEC void set_responsibilities_initialiser(std::shared_ptr<const Clustering::ResponsibilitiesInitialiser> responsibilities_initialiser);
61 
65  void set_verbose(bool verbose)
66  {
67  verbose_ = verbose;
68  }
69 
73  void set_maximise_first(bool maximise_first)
74  {
75  maximise_first_ = maximise_first;
76  }
77 
82  DLL_DECLSPEC bool fit(Eigen::Ref<const Eigen::MatrixXd> data) override;
83 
86  auto number_components() const
87  {
88  return number_components_;
89  }
90 
91  unsigned int number_clusters() const override
92  {
93  return number_components();
94  }
95 
99  const auto& means() const
100  {
101  return means_;
102  }
103 
104  const Eigen::MatrixXd& centroids() const override
105  {
106  return means();
107  }
108 
112  const auto& covariances() const
113  {
114  return covariances_;
115  }
116 
120  DLL_DECLSPEC const Eigen::MatrixXd& covariance(unsigned int k) const;
121 
125  const auto& mixing_probabilities() const
126  {
127  return mixing_probabilities_;
128  }
129 
133  const auto& responsibilities() const
134  {
135  return responsibilities_;
136  }
137 
139  double log_likelihood() const
140  {
141  return log_likelihood_;
142  }
143 
145  std::shared_ptr<const Clustering::CentroidsInitialiser> means_initialiser() const
146  {
147  return means_initialiser_;
148  }
149 
156  DLL_DECLSPEC void assign_responsibilities(Eigen::Ref<const Eigen::VectorXd> x, Eigen::Ref<Eigen::VectorXd> u) const;
157 
158  const std::vector<unsigned int>& labels() const override
159  {
160  return labels_;
161  }
162 
163  bool converged() const override
164  {
165  return converged_;
166  }
167  private:
168  std::default_random_engine prng_;
169  std::shared_ptr<const Clustering::CentroidsInitialiser> means_initialiser_;
170  std::shared_ptr<const Clustering::ResponsibilitiesInitialiser> responsibilities_initialiser_;
171  Eigen::VectorXd mixing_probabilities_;
172  Eigen::MatrixXd means_;
173  Eigen::MatrixXd responsibilities_;
174  Eigen::VectorXd work_vector_;
175  std::vector<Eigen::MatrixXd> covariances_;
176  std::vector<Eigen::MatrixXd> inverse_covariances_;
177  std::vector<Eigen::LLT<Eigen::MatrixXd>> covariance_decompositions_;
178  Eigen::VectorXd sqrt_covariance_determinants_;
179  std::vector<unsigned int> labels_;
180  double absolute_tolerance_;
181  double relative_tolerance_;
182  double log_likelihood_;
183  unsigned int number_components_;
184  unsigned int maximum_steps_;
185  bool verbose_;
186  bool maximise_first_;
187  bool converged_;
188 
189  static Eigen::MatrixXd calculate_sample_covariance(Eigen::Ref<const Eigen::MatrixXd> data);
190 
191  void process_covariances(Eigen::Index number_dimensions);
192 
193  void expectation_step(Eigen::Ref<const Eigen::MatrixXd> data);
194 
195  void maximisation_step(Eigen::Ref<const Eigen::MatrixXd> data);
196 
197  void calculate_labels();
198  };
199 }
ml::EM::fit
bool fit(Eigen::Ref< const Eigen::MatrixXd > data) override
Fits the model.
ml::EM::assign_responsibilities
void assign_responsibilities(Eigen::Ref< const Eigen::VectorXd > x, Eigen::Ref< Eigen::VectorXd > u) const
Given a data point x, calculate each component's responsibilities for x and save them in u.
ml::EM::set_verbose
void set_verbose(bool verbose)
Switches between verbose and quiet mode.
Definition: EM.hpp:65
ml::EM::set_relative_tolerance
void set_relative_tolerance(double relative_tolerance)
Sets relative tolerance for convergence test.
ml::EM::set_seed
void set_seed(unsigned int seed)
Sets PRNG seed.
ml::EM::covariances
const auto & covariances() const
Returns a const reference to fitted component covariance matrices.
Definition: EM.hpp:112
ml
Definition: BallTree.hpp:10
dll.hpp
ml::EM::converged
bool converged() const override
Reports if the model converged.
Definition: EM.hpp:163
ml::EM::set_maximise_first
void set_maximise_first(bool maximise_first)
Switches between starting with E or M step first.
Definition: EM.hpp:73
ml::EM::labels
const std::vector< unsigned int > & labels() const override
Returns a const reference to resulting cluster labels for each datapoint. Value make sense only if fi...
Definition: EM.hpp:158
ml::EM::centroids
const Eigen::MatrixXd & centroids() const override
Returns a const reference to the matrix of cluster centroids (in columns).
Definition: EM.hpp:104
ml::EM::EM
EM(unsigned int number_components)
Constructs an EM ready to fit.
ml::Clustering::Model
Abstract clustering model.
Definition: Clustering.hpp:17
ml::EM::set_maximum_steps
void set_maximum_steps(unsigned int maximum_steps)
Sets maximum number of E-M steps.
ml::EM::means
const auto & means() const
Returns a const reference to matrix containing fitted component means.
Definition: EM.hpp:99
ml::EM::log_likelihood
double log_likelihood() const
Returns a const reference to maximised log-likelihood of training data.
Definition: EM.hpp:139
ml::EM::mixing_probabilities
const auto & mixing_probabilities() const
Returns a const reference to fitted component mixing probabilities.
Definition: EM.hpp:125
ml::EM::covariance
const Eigen::MatrixXd & covariance(unsigned int k) const
Returns a const reference to fitted k-th component's covariance matrix.
ml::EM::set_responsibilities_initialiser
void set_responsibilities_initialiser(std::shared_ptr< const Clustering::ResponsibilitiesInitialiser > responsibilities_initialiser)
Sets responsibilities initialiser.
ml::EM::set_absolute_tolerance
void set_absolute_tolerance(double absolute_tolerance)
Sets absolute tolerance for convergence test.
ml::EM
Gaussian Expectation-Maximisation algorithm.
Definition: EM.hpp:18
ml::EM::number_clusters
unsigned int number_clusters() const override
Returns the number of clusters.
Definition: EM.hpp:91
ml::EM::responsibilities
const auto & responsibilities() const
Returns a const reference to resulting component responsibilities.
Definition: EM.hpp:133
ml::EM::set_means_initialiser
void set_means_initialiser(std::shared_ptr< const Clustering::CentroidsInitialiser > means_initialiser)
Sets means initialiser.
ml::EM::means_initialiser
std::shared_ptr< const Clustering::CentroidsInitialiser > means_initialiser() const
Returns a shared pointer to means initialiser implementation.
Definition: EM.hpp:145
ml::EM::number_components
auto number_components() const
Returns the number of components.
Definition: EM.hpp:86