cpp-toolbox  0.0.1
A toolbox library for C++
Loading...
Searching...
No Matches
histogram_metrics.hpp
Go to the documentation of this file.
1#pragma once
2
3#include <algorithm>
4#include <cmath>
5#include <numeric>
6#include <stdexcept>
7
9
10namespace toolbox::metrics
11{
12
13template<typename T>
14class ChiSquaredMetric : public base_metric_t<ChiSquaredMetric<T>, T>
15{
16public:
17 using element_type = T;
18
19 constexpr T distance_impl(const T* a, const T* b, std::size_t size) const
20 {
21 T sum {};
22 for (std::size_t i = 0; i < size; ++i) {
23 T ai = a[i];
24 T bi = b[i];
25 T denominator = ai + bi;
26
27 if (denominator > std::numeric_limits<T>::epsilon()) {
28 T diff = ai - bi;
29 sum += (diff * diff) / denominator;
30 }
31 }
32 return sum * T(0.5); // Standard chi-squared distance includes 1/2 factor
33 }
34
35 constexpr T squared_distance_impl(const T* a, const T* b, std::size_t size) const
36 {
37 T dist = distance_impl(a, b, size);
38 return dist * dist;
39 }
40};
41
42template<typename T>
43class HistogramIntersectionMetric : public base_metric_t<HistogramIntersectionMetric<T>, T>
44{
45public:
46 using element_type = T;
47
48 constexpr T distance_impl(const T* a, const T* b, std::size_t size) const
49 {
50 T intersection {};
51 T sum_a {};
52 T sum_b {};
53
54 for (std::size_t i = 0; i < size; ++i) {
55 if (a[i] < 0 || b[i] < 0) {
56 throw std::invalid_argument("Histogram values must be non-negative");
57 }
58 intersection += std::min(a[i], b[i]);
59 sum_a += a[i];
60 sum_b += b[i];
61 }
62
63 T max_sum = std::max(sum_a, sum_b);
64 if (max_sum < std::numeric_limits<T>::epsilon()) {
65 return T(0); // Both histograms are empty
66 }
67
68 // Return distance (1 - similarity)
69 return T(1) - (intersection / max_sum);
70 }
71
72 constexpr T squared_distance_impl(const T* a, const T* b, std::size_t size) const
73 {
74 T dist = distance_impl(a, b, size);
75 return dist * dist;
76 }
77};
78
79template<typename T>
80class BhattacharyyaMetric : public base_metric_t<BhattacharyyaMetric<T>, T>
81{
82public:
83 using element_type = T;
84
85 constexpr T distance_impl(const T* a, const T* b, std::size_t size) const
86 {
87 // Normalize histograms
88 T sum_a = std::accumulate(a, a + size, T(0));
89 T sum_b = std::accumulate(b, b + size, T(0));
90
91 if (sum_a < std::numeric_limits<T>::epsilon() ||
92 sum_b < std::numeric_limits<T>::epsilon()) {
93 return T(1); // Maximum distance if either histogram is empty
94 }
95
96 // Compute Bhattacharyya coefficient
97 T bc {};
98 for (std::size_t i = 0; i < size; ++i) {
99 T normalized_a = a[i] / sum_a;
100 T normalized_b = b[i] / sum_b;
101 bc += std::sqrt(normalized_a * normalized_b);
102 }
103
104 // Bhattacharyya distance
105 if (bc >= T(1)) {
106 return T(0); // Identical histograms
107 }
108 if (bc <= T(0)) {
109 return std::numeric_limits<T>::infinity(); // No overlap
110 }
111
112 return -std::log(bc);
113 }
114
115 constexpr T squared_distance_impl(const T* a, const T* b, std::size_t size) const
116 {
117 T dist = distance_impl(a, b, size);
118 return dist * dist;
119 }
120};
121
122// Hellinger distance (related to Bhattacharyya)
123template<typename T>
124class HellingerMetric : public base_metric_t<HellingerMetric<T>, T>
125{
126public:
127 using element_type = T;
128
129 constexpr T distance_impl(const T* a, const T* b, std::size_t size) const
130 {
131 // Normalize histograms
132 T sum_a = std::accumulate(a, a + size, T(0));
133 T sum_b = std::accumulate(b, b + size, T(0));
134
135 if (sum_a < std::numeric_limits<T>::epsilon() ||
136 sum_b < std::numeric_limits<T>::epsilon()) {
137 return T(1); // Maximum distance if either histogram is empty
138 }
139
140 // Compute sum of squared differences of square roots
141 T sum {};
142 for (std::size_t i = 0; i < size; ++i) {
143 T sqrt_a = std::sqrt(a[i] / sum_a);
144 T sqrt_b = std::sqrt(b[i] / sum_b);
145 T diff = sqrt_a - sqrt_b;
146 sum += diff * diff;
147 }
148
149 // Hellinger distance
150 return std::sqrt(sum / T(2));
151 }
152
153 constexpr T squared_distance_impl(const T* a, const T* b, std::size_t size) const
154 {
155 T dist = distance_impl(a, b, size);
156 return dist * dist;
157 }
158};
159
160// Earth Mover's Distance (EMD) / Wasserstein-1 distance for 1D histograms
161template<typename T>
162class EMDMetric : public base_metric_t<EMDMetric<T>, T>
163{
164public:
165 using element_type = T;
166
167 constexpr T distance_impl(const T* a, const T* b, std::size_t size) const
168 {
169 // Normalize to probability distributions
170 T sum_a = std::accumulate(a, a + size, T(0));
171 T sum_b = std::accumulate(b, b + size, T(0));
172
173 if (sum_a < std::numeric_limits<T>::epsilon() ||
174 sum_b < std::numeric_limits<T>::epsilon()) {
175 return T(size); // Maximum distance
176 }
177
178 // Compute cumulative distributions and their L1 distance
179 T emd {};
180 T cumsum_a {};
181 T cumsum_b {};
182
183 for (std::size_t i = 0; i < size; ++i) {
184 cumsum_a += a[i] / sum_a;
185 cumsum_b += b[i] / sum_b;
186 emd += std::abs(cumsum_a - cumsum_b);
187 }
188
189 return emd;
190 }
191
192 constexpr T squared_distance_impl(const T* a, const T* b, std::size_t size) const
193 {
194 T dist = distance_impl(a, b, size);
195 return dist * dist;
196 }
197};
198
199// Kullback-Leibler (KL) divergence - note: not symmetric!
200template<typename T>
201class KLDivergenceMetric : public base_metric_t<KLDivergenceMetric<T>, T>
202{
203public:
204 using element_type = T;
205
206 constexpr T distance_impl(const T* a, const T* b, std::size_t size) const
207 {
208 // Normalize to probability distributions
209 T sum_a = std::accumulate(a, a + size, T(0));
210 T sum_b = std::accumulate(b, b + size, T(0));
211
212 if (sum_a < std::numeric_limits<T>::epsilon()) {
213 return std::numeric_limits<T>::infinity();
214 }
215
216 T kl {};
217 const T epsilon = std::numeric_limits<T>::epsilon();
218
219 for (std::size_t i = 0; i < size; ++i) {
220 T p = a[i] / sum_a;
221 T q = b[i] / sum_b;
222
223 if (p > epsilon) {
224 if (q < epsilon) {
225 return std::numeric_limits<T>::infinity();
226 }
227 kl += p * std::log(p / q);
228 }
229 }
230
231 return kl;
232 }
233
234 constexpr T squared_distance_impl(const T* a, const T* b, std::size_t size) const
235 {
236 T dist = distance_impl(a, b, size);
237 return dist * dist;
238 }
239};
240
241// Jensen-Shannon divergence (symmetric version of KL)
242template<typename T>
243class JensenShannonMetric : public base_metric_t<JensenShannonMetric<T>, T>
244{
245public:
246 using element_type = T;
247
248 constexpr T distance_impl(const T* a, const T* b, std::size_t size) const
249 {
250 // Normalize to probability distributions
251 T sum_a = std::accumulate(a, a + size, T(0));
252 T sum_b = std::accumulate(b, b + size, T(0));
253
254 if (sum_a < std::numeric_limits<T>::epsilon() ||
255 sum_b < std::numeric_limits<T>::epsilon()) {
256 return T(1);
257 }
258
259 T js {};
260 const T epsilon = std::numeric_limits<T>::epsilon();
261
262 for (std::size_t i = 0; i < size; ++i) {
263 T p = a[i] / sum_a;
264 T q = b[i] / sum_b;
265 T m = (p + q) / T(2);
266
267 if (p > epsilon && m > epsilon) {
268 js += p * std::log(p / m);
269 }
270 if (q > epsilon && m > epsilon) {
271 js += q * std::log(q / m);
272 }
273 }
274
275 return std::sqrt(js / T(2)); // Square root for metric property
276 }
277
278 constexpr T squared_distance_impl(const T* a, const T* b, std::size_t size) const
279 {
280 T dist = distance_impl(a, b, size);
281 return dist * dist;
282 }
283};
284
285} // namespace toolbox::metrics
Definition histogram_metrics.hpp:81
T element_type
Definition histogram_metrics.hpp:83
constexpr T squared_distance_impl(const T *a, const T *b, std::size_t size) const
Definition histogram_metrics.hpp:115
constexpr T distance_impl(const T *a, const T *b, std::size_t size) const
Definition histogram_metrics.hpp:85
Definition histogram_metrics.hpp:15
constexpr T squared_distance_impl(const T *a, const T *b, std::size_t size) const
Definition histogram_metrics.hpp:35
constexpr T distance_impl(const T *a, const T *b, std::size_t size) const
Definition histogram_metrics.hpp:19
T element_type
Definition histogram_metrics.hpp:17
Definition histogram_metrics.hpp:163
constexpr T distance_impl(const T *a, const T *b, std::size_t size) const
Definition histogram_metrics.hpp:167
constexpr T squared_distance_impl(const T *a, const T *b, std::size_t size) const
Definition histogram_metrics.hpp:192
T element_type
Definition histogram_metrics.hpp:165
Definition histogram_metrics.hpp:125
T element_type
Definition histogram_metrics.hpp:127
constexpr T distance_impl(const T *a, const T *b, std::size_t size) const
Definition histogram_metrics.hpp:129
constexpr T squared_distance_impl(const T *a, const T *b, std::size_t size) const
Definition histogram_metrics.hpp:153
Definition histogram_metrics.hpp:44
T element_type
Definition histogram_metrics.hpp:46
constexpr T squared_distance_impl(const T *a, const T *b, std::size_t size) const
Definition histogram_metrics.hpp:72
constexpr T distance_impl(const T *a, const T *b, std::size_t size) const
Definition histogram_metrics.hpp:48
Definition histogram_metrics.hpp:244
constexpr T squared_distance_impl(const T *a, const T *b, std::size_t size) const
Definition histogram_metrics.hpp:278
constexpr T distance_impl(const T *a, const T *b, std::size_t size) const
Definition histogram_metrics.hpp:248
T element_type
Definition histogram_metrics.hpp:246
Definition histogram_metrics.hpp:202
constexpr T squared_distance_impl(const T *a, const T *b, std::size_t size) const
Definition histogram_metrics.hpp:234
constexpr T distance_impl(const T *a, const T *b, std::size_t size) const
Definition histogram_metrics.hpp:206
T element_type
Definition histogram_metrics.hpp:204
Definition base_metric.hpp:13
Definition angular_metrics.hpp:11