PICCANTE  0.4
The hottest HDR imaging library!
k_means.hpp
Go to the documentation of this file.
1 /*
2 
3 PICCANTE
4 The hottest HDR imaging library!
5 http://vcg.isti.cnr.it/piccante
6 
7 Copyright (C) 2014
8 Visual Computing Laboratory - ISTI CNR
9 http://vcg.isti.cnr.it
10 First author: Francesco Banterle
11 
12 This Source Code Form is subject to the terms of the Mozilla Public
13 License, v. 2.0. If a copy of the MPL was not distributed with this
14 file, You can obtain one at http://mozilla.org/MPL/2.0/.
15 
16 */
17 
18 #ifndef PIC_UTIL_K_MEANS_HPP
19 #define PIC_UTIL_K_MEANS_HPP
20 
21 #include <vector>
22 #include <set>
23 #include <chrono>
24 
25 #include "../base.hpp"
26 #include "../util/array.hpp"
27 #include "../util/math.hpp"
28 #include "../util/std_util.hpp"
29 
30 namespace pic{
31 
32 template<class T>
33 class KMeans
34 {
35 protected:
36 
37  T* getMean(T *samples, T *out, int nDim, std::set<uint> *cluster)
38  {
39  Array<T>::assign(T(0), out, nDim);
40 
41  int count = 0;
42  for (auto it = cluster->begin(); it != cluster->end(); it++) {
43  int i = *it;
44  Array<T>::add(&samples[i * nDim], nDim, out);
45  count++;
46  }
47 
48  if(count > 0) {
49  Array<T>::div(out, nDim, T(count));
50  }
51 
52  return out;
53  }
54 
55  uint assignLabel(T* sample_j, int nDim, T* centers)
56  {
57  T dist = Array<T>::distanceSq(sample_j, &centers[0], nDim);
58  uint label = 0;
59 
60  for(uint i = 1; i < k; i++) {
61  T *center_i = &centers[i * nDim];
62 
63  T tmp_dist = Array<T>::distanceSq(sample_j, center_i, nDim);
64 
65  if(tmp_dist < dist) {
66  dist = tmp_dist;
67  label = i;
68  }
69  }
70 
71  return label;
72  }
73 
74  virtual T* initCenters(T *samples, int nSamples, int nDim, T* centers)
75  {
76  if(centers == NULL) {
77  centers = new T[k * nDim];
78  }
79 
80  std::mt19937 m(42);
81 
82  T *tMin = new T[nDim];
83  T *tMax = new T[nDim];
84 
85  for(int j = 0; j < nDim; j++) {
86  T s = samples[j];
87  tMin[j] = s;
88  tMax[j] = s;
89  }
90 
91  for(int i = 1; i < nSamples; i++) {
92  int index = i * nDim;
93  for(int j = 0; j < nDim; j++) {
94  T s = samples[index + j];
95 
96  tMin[j] = MIN(tMin[j], s);
97  tMax[j] = MAX(tMax[j], s);
98  }
99  }
100 
101  for(uint i = 0; i < k; i++) {
102  int index = i * nDim;
103  for(int j = 0; j < nDim; j++) {
104  centers[index + j] = T(getRandom(m()) * (tMax[j] - tMin[j]) + tMin[j]);
105  }
106  }
107 
108  delete[] tMin;
109  delete[] tMax;
110 
111  return centers;
112  }
113 
115 
116 public:
117 
119  {
120  setup(k, maxIter);
121  }
122 
123  void setup(uint k, uint maxIter = 100)
124  {
125  this->k = k;
126  this->maxIter = maxIter;
127  }
128  T* Process(T *samples, int nSamples, int nDim,
129  T* centers,
130  std::vector< std::set<uint> *> &labels)
131  {
132  if(nSamples < k) {
133  return NULL;
134  }
135 
136  labels.clear();
137  for(uint i = 0; i < k; i++) {
138  labels.push_back(new std::set<uint>);
139  }
140 
141  centers = initCenters(samples, nSamples, nDim, centers);
142 
143  for(uint i = 0; i < nSamples; i++) {
144  T *sample_i = &samples[i * nDim];
145 
146  uint label = assignLabel(sample_i, nDim, centers);
147  labels[label]->insert(i);
148  }
149 
150  T *mean = new T[k * nDim];
151 
152  for(uint i = 0; i < maxIter; i++) {
153  bool bNoChanges = true;
154 
155  for(uint j = 0; j < k; j++) {
156  //compute new means
157  int index = j * nDim;
158  std::set<uint> *tmp = labels.at(j);
159  getMean(samples, &mean[index], nDim, tmp);
160 
161  //update centers
162  float dist = Arrayf::distanceSq(&centers[index], &mean[index], nDim);
163 
164  Arrayf::assign(&mean[index], nDim, &centers[index]);
165 
166  if(dist > 1e-6f) {
167  bNoChanges = false;
168  }
169  }
170 
171  if(bNoChanges) {
172  #ifdef PIC_DEBUG
173  printf("Max iterations: %d\n", i);
174  #endif
175 
176  delete[] mean;
177  return centers;
178  } else {
179  //clear labels
180  for(uint j = 0; j < k; j++) {
181  labels[j]->clear();
182  }
183 
184  //re-assign labels
185  for(uint j = 0; j < nSamples; j++) {
186  T *sample_j = &samples[j * nDim];
187  uint label = assignLabel(sample_j, nDim, centers);
188  labels[label]->insert(j);
189  }
190  }
191  }
192 
193  delete[] mean;
194 
195  return centers;
196  }
197 
198  static T* execute(T *samples, int nSamples, int nDim,
199  T* centers, int k,
200  std::vector< std::set<uint> *> &labels,
201  uint maxIter = 100)
202  {
203 
204  KMeans<T> km(k, maxIter);
205 
206  return km.Process(samples, nSamples, nDim, centers, labels);
207  }
208 
209  static T* select(T *samples, int nSamples, int nDim,
210  std::vector< std::set<uint> *> &labels,
211  uint &k,
212  float threshold = 1e-2f,
213  uint maxIter = 100)
214  {
215  T *centers = NULL;
216  k = 1;
217  T prevErr;
218  bool bFlag = true;
219  while(bFlag) {
220  k++;
221 
222  #ifdef PIC_DEBUG
223  printf("k: %d\n", k);
224  #endif
225 
226  labels.clear();
227  centers = delete_vec_s(centers);
228 
229  centers = KMeans::execute(samples, nSamples, nDim, NULL, k, labels, maxIter);
230 
231  T err = T(0);
232  for(uint i = 0; i < labels.size(); i++) {
233  T *center_i = &centers[i * nDim];
234 
235  std::set<uint> * cluster = labels.at(i);
236  for (std::set<uint>::iterator it = cluster->begin(); it != cluster->end(); it++) {
237  int i = *it;
238  err += Array<T>::distanceSq(&samples[i * nDim], center_i, nDim);
239  }
240 
241  }
242 
243  if(k > 2) {
244  float relErr = fabsf(float(err - prevErr)) / float(prevErr);
245 
246  #ifdef PIC_DEBUG
247  printf("%f %f %f\n", err, prevErr, relErr);
248  #endif
249 
250  if(relErr < threshold) {
251  bFlag = false;
252  }
253  }
254 
255  prevErr = err;
256  }
257 
258  return centers;
259  }
260 
261 };
262 
263 } //end namespace pic
264 
265 #endif // PIC_UTIL_K_MEANS_HPP
T * getMean(T *samples, T *out, int nDim, std::set< uint > *cluster)
Definition: k_means.hpp:37
unsigned int uint
Definition: base.hpp:23
uint k
Definition: k_means.hpp:114
static T * select(T *samples, int nSamples, int nDim, std::vector< std::set< uint > *> &labels, uint &k, float threshold=1e-2f, uint maxIter=100)
Definition: k_means.hpp:209
uint maxIter
Definition: k_means.hpp:114
uint assignLabel(T *sample_j, int nDim, T *centers)
Definition: k_means.hpp:55
static T * execute(T *samples, int nSamples, int nDim, T *centers, int k, std::vector< std::set< uint > *> &labels, uint maxIter=100)
Definition: k_means.hpp:198
T * Process(T *samples, int nSamples, int nDim, T *centers, std::vector< std::set< uint > *> &labels)
Definition: k_means.hpp:128
T * delete_vec_s(T *data)
delete_vec_s
Definition: std_util.hpp:138
Definition: k_means.hpp:33
KMeans(uint k, uint maxIter)
Definition: k_means.hpp:118
static T distanceSq(T *data0, T *data1, int n)
distanceSq
Definition: array.hpp:195
virtual T * initCenters(T *samples, int nSamples, int nDim, T *centers)
Definition: k_means.hpp:74
#define MIN(a, b)
Definition: math.hpp:69
static void div(T *data, int size, T value)
div
Definition: array.hpp:353
Definition: bilateral_separation.hpp:25
void setup(uint k, uint maxIter=100)
Definition: k_means.hpp:123
PIC_INLINE float getRandom(unsigned int n)
Random returns a number in [0, 2^32 - 1] to a float in [0, 1].
Definition: math.hpp:138
static T * assign(T *data, int size, T *ret)
assign
Definition: array.hpp:464
#define MAX(a, b)
Definition: math.hpp:73
static T * add(T *data, int size, T *ret)
add
Definition: array.hpp:334