MLpp
KMeans.hpp
1 #pragma once
2 /* (C) 2021 Roman Werpachowski. */
3 #include "Clustering.hpp"
4 #include <vector>
5 #include <utility>
6 #include <Eigen/Core>
7 #include "dll.hpp"
8 
9 namespace ml
10 {
11  namespace Clustering
12  {
18  class KMeans : public Model
19  {
20  public:
25  DLL_DECLSPEC KMeans(unsigned int number_clusters);
26 
27  DLL_DECLSPEC bool fit(Eigen::Ref<const Eigen::MatrixXd> data) override;
28 
29  unsigned int number_clusters() const override
30  {
31  return num_clusters_;
32  }
33 
34  const std::vector<unsigned int>& labels() const override
35  {
36  return labels_;
37  }
38 
39  const Eigen::MatrixXd& centroids() const override
40  {
41  return centroids_;
42  }
43 
47  DLL_DECLSPEC void set_seed(unsigned int seed);
48 
53  DLL_DECLSPEC void set_absolute_tolerance(double absolute_tolerance);
54 
59  DLL_DECLSPEC void set_maximum_steps(unsigned int maximum_steps);
60 
66  DLL_DECLSPEC void set_number_initialisations(unsigned int number_initialisations);
67 
72  DLL_DECLSPEC void set_centroids_initialiser(std::shared_ptr<const CentroidsInitialiser> centroids_initialiser);
73 
77  void set_verbose(bool verbose)
78  {
79  verbose_ = verbose;
80  }
81 
87  DLL_DECLSPEC std::pair<unsigned int, double> assign_label(Eigen::Ref<const Eigen::VectorXd> x) const;
88 
93  double inertia() const
94  {
95  return inertia_;
96  }
97 
98  bool converged() const override
99  {
100  return converged_;
101  }
102  private:
103  std::vector<unsigned int> labels_;
104  std::vector<unsigned int> old_labels_;
105  Eigen::MatrixXd centroids_;
106  Eigen::MatrixXd old_centroids_;
107  Eigen::VectorXd work_vector_;
108  std::default_random_engine prng_;
109  std::shared_ptr<const CentroidsInitialiser> centroids_initialiser_;
110  double absolute_tolerance_;
111  double inertia_;
112  unsigned int maximum_steps_;
113  unsigned int num_inits_;
114  unsigned int num_clusters_;
115  bool verbose_;
116  bool converged_;
117 
118  bool fit_once(Eigen::Ref<const Eigen::MatrixXd> data);
119 
121  void assignment_step(Eigen::Ref<const Eigen::MatrixXd> data);
122 
124  void update_step(Eigen::Ref<const Eigen::MatrixXd> data);
125  };
126  }
127 }
ml::Clustering::KMeans
Naive K-means clustering method.
Definition: KMeans.hpp:18
ml::Clustering::KMeans::inertia
double inertia() const
Sum of squared distances to the nearest centroid.
Definition: KMeans.hpp:93
ml::Clustering::KMeans::set_seed
void set_seed(unsigned int seed)
Sets PRNG seed.
ml::Clustering::KMeans::set_absolute_tolerance
void set_absolute_tolerance(double absolute_tolerance)
Sets absolute tolerance for convergence test: || old centroids - new centroids ||^2 < absolute tolera...
ml::Clustering::KMeans::set_number_initialisations
void set_number_initialisations(unsigned int number_initialisations)
Sets number of initialisations to try, to find the clusters with lowest inertia.
ml
Definition: BallTree.hpp:10
ml::Clustering::KMeans::labels
const std::vector< unsigned int > & labels() const override
Returns a const reference to resulting cluster labels for each datapoint. Value make sense only if fi...
Definition: KMeans.hpp:34
ml::Clustering::KMeans::number_clusters
unsigned int number_clusters() const override
Returns the number of clusters.
Definition: KMeans.hpp:29
ml::Clustering::KMeans::centroids
const Eigen::MatrixXd & centroids() const override
Returns a const reference to the matrix of cluster centroids (in columns).
Definition: KMeans.hpp:39
dll.hpp
ml::Clustering::KMeans::set_maximum_steps
void set_maximum_steps(unsigned int maximum_steps)
Sets maximum number of K-means steps.
ml::Clustering::KMeans::assign_label
std::pair< unsigned int, double > assign_label(Eigen::Ref< const Eigen::VectorXd > x) const
Given a data point x, assign it to its cluster and return the correct label and squared Euclidean dis...
ml::Clustering::Model
Abstract clustering model.
Definition: Clustering.hpp:17
ml::Clustering::KMeans::set_centroids_initialiser
void set_centroids_initialiser(std::shared_ptr< const CentroidsInitialiser > centroids_initialiser)
Sets centroids initialiser.
ml::Clustering::KMeans::set_verbose
void set_verbose(bool verbose)
Switches between verbose and quiet mode.
Definition: KMeans.hpp:77
ml::Clustering::KMeans::converged
bool converged() const override
Reports if the model converged.
Definition: KMeans.hpp:98
ml::Clustering::KMeans::KMeans
KMeans(unsigned int number_clusters)
Constructs a K-means model ready to fit.
ml::Clustering::KMeans::fit
bool fit(Eigen::Ref< const Eigen::MatrixXd > data) override
Fits the model.