Grok 12.0.1
stats.h
Go to the documentation of this file.
1// Copyright 2024 Google LLC
2// SPDX-License-Identifier: Apache-2.0
3//
4// Licensed under the Apache License, Version 2.0 (the "License");
5// you may not use this file except in compliance with the License.
6// You may obtain a copy of the License at
7//
8// https://www.apache.org/licenses/LICENSE-2.0
9//
10// Unless required by applicable law or agreed to in writing, software
11// distributed under the License is distributed on an "AS IS" BASIS,
12// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13// See the License for the specific language governing permissions and
14// limitations under the License.
15
16#ifndef HIGHWAY_HWY_STATS_H_
17#define HIGHWAY_HWY_STATS_H_
18
19#include <stdint.h>
20#include <stdio.h>
21
22#include <algorithm>
23#include <cmath>
24#include <string>
25
26#include "hwy/base.h" // HWY_ASSERT
27
28namespace hwy {
29
30// Thread-compatible.
31template <size_t N>
32class Bins {
33 public:
34 Bins() { Reset(); }
35
36 template <typename T>
37 void Notify(T bin) {
38 HWY_ASSERT(T{0} <= bin && bin < static_cast<T>(N));
39 counts_[static_cast<int32_t>(bin)]++;
40 }
41
42 void Assimilate(const Bins<N>& other) {
43 for (size_t i = 0; i < N; ++i) {
44 counts_[i] += other.counts_[i];
45 }
46 }
47
48 void Print(const char* caption) {
49 fprintf(stderr, "\n%s [%zu]\n", caption, N);
50 size_t last_nonzero = 0;
51 for (size_t i = N - 1; i < N; --i) {
52 if (counts_[i] != 0) {
53 last_nonzero = i;
54 break;
55 }
56 }
57 for (size_t i = 0; i <= last_nonzero; ++i) {
58 fprintf(stderr, " %zu\n", counts_[i]);
59 }
60 }
61
62 void Reset() {
63 for (size_t i = 0; i < N; ++i) {
64 counts_[i] = 0;
65 }
66 }
67
68 private:
69 size_t counts_[N];
70};
71
72// Descriptive statistics of a variable (4 moments). Thread-compatible.
73class Stats {
74 public:
75 Stats() { Reset(); }
76
77 void Notify(const float x) {
78 ++n_;
79
80 min_ = std::min(min_, x);
81 max_ = std::max(max_, x);
82
83 product_ *= x;
84
85 // Online moments. Reference: https://goo.gl/9ha694
86 const double d = x - m1_;
87 const double d_div_n = d / n_;
88 const double d2n1_div_n = d * (n_ - 1) * d_div_n;
89 const int64_t n_poly = n_ * n_ - 3 * n_ + 3;
90 m1_ += d_div_n;
91 m4_ += d_div_n * (d_div_n * (d2n1_div_n * n_poly + 6.0 * m2_) - 4.0 * m3_);
92 m3_ += d_div_n * (d2n1_div_n * (n_ - 2) - 3.0 * m2_);
93 m2_ += d2n1_div_n;
94 }
95
96 void Assimilate(const Stats& other);
97
98 int64_t Count() const { return n_; }
99
100 float Min() const { return min_; }
101 float Max() const { return max_; }
102
103 double GeometricMean() const {
104 return n_ == 0 ? 0.0 : pow(product_, 1.0 / n_);
105 }
106
107 double Mean() const { return m1_; }
108 // Same as Mu2. Assumes n_ is large.
109 double SampleVariance() const {
110 return n_ == 0 ? 0.0 : m2_ / static_cast<int>(n_);
111 }
112 // Unbiased estimator for population variance even for smaller n_.
113 double Variance() const {
114 if (n_ == 0) return 0.0;
115 if (n_ == 1) return m2_;
116 return m2_ / static_cast<int>(n_ - 1);
117 }
118 double StandardDeviation() const { return std::sqrt(Variance()); }
119 // Near zero for normal distributions; if positive on a unimodal distribution,
120 // the right tail is fatter. Assumes n_ is large.
121 double SampleSkewness() const {
122 if (std::abs(m2_) < 1E-7) return 0.0;
123 return m3_ * std::sqrt(static_cast<double>(n_)) / std::pow(m2_, 1.5);
124 }
125 // Corrected for bias (same as Wikipedia and Minitab but not Excel).
126 double Skewness() const {
127 if (n_ == 0) return 0.0;
128 const double biased = SampleSkewness();
129 const double r = (n_ - 1.0) / n_;
130 return biased * std::pow(r, 1.5);
131 }
132 // Near zero for normal distributions; smaller values indicate fewer/smaller
133 // outliers and larger indicates more/larger outliers. Assumes n_ is large.
134 double SampleKurtosis() const {
135 if (std::abs(m2_) < 1E-7) return 0.0;
136 return m4_ * n_ / (m2_ * m2_);
137 }
138 // Corrected for bias (same as Wikipedia and Minitab but not Excel).
139 double Kurtosis() const {
140 if (n_ == 0) return 0.0;
141 const double biased = SampleKurtosis();
142 const double r = (n_ - 1.0) / n_;
143 return biased * r * r;
144 }
145
146 // Central moments, useful for "method of moments"-based parameter estimation
147 // of a mixture of two Gaussians. Assumes Count() != 0.
148 double Mu1() const { return m1_; }
149 double Mu2() const { return m2_ / static_cast<int>(n_); }
150 double Mu3() const { return m3_ / static_cast<int>(n_); }
151 double Mu4() const { return m4_ / static_cast<int>(n_); }
152
153 // Which statistics to EXCLUDE in ToString
154 enum {
159 kNoGeomean = 16
160 };
161 std::string ToString(int exclude = 0) const;
162
163 void Reset() {
164 n_ = 0;
165
168
169 product_ = 1.0;
170
171 m1_ = 0.0;
172 m2_ = 0.0;
173 m3_ = 0.0;
174 m4_ = 0.0;
175 }
176
177 private:
178 int64_t n_; // signed for faster conversion + safe subtraction
179
180 float min_;
181 float max_;
182
183 double product_; // for geomean
184
185 // Moments
186 double m1_;
187 double m2_;
188 double m3_;
189 double m4_;
190};
191
192} // namespace hwy
193
194#endif // HIGHWAY_HWY_STATS_H_
#define HWY_ASSERT(condition)
Definition base.h:237
Definition stats.h:32
size_t counts_[N]
Definition stats.h:69
void Notify(T bin)
Definition stats.h:37
Bins()
Definition stats.h:34
void Print(const char *caption)
Definition stats.h:48
void Reset()
Definition stats.h:62
void Assimilate(const Bins< N > &other)
Definition stats.h:42
Definition stats.h:73
int64_t Count() const
Definition stats.h:98
std::string ToString(int exclude=0) const
double GeometricMean() const
Definition stats.h:103
double StandardDeviation() const
Definition stats.h:118
Stats()
Definition stats.h:75
double Mean() const
Definition stats.h:107
@ kNoMinMax
Definition stats.h:157
@ kNoGeomean
Definition stats.h:159
@ kNoSkewKurt
Definition stats.h:158
@ kNoMeanSD
Definition stats.h:156
@ kNoCount
Definition stats.h:155
double Variance() const
Definition stats.h:113
void Reset()
Definition stats.h:163
double m1_
Definition stats.h:186
double Mu4() const
Definition stats.h:151
int64_t n_
Definition stats.h:178
float max_
Definition stats.h:181
double Skewness() const
Definition stats.h:126
double Kurtosis() const
Definition stats.h:139
double m4_
Definition stats.h:189
void Assimilate(const Stats &other)
double m3_
Definition stats.h:188
void Notify(const float x)
Definition stats.h:77
float min_
Definition stats.h:180
float Min() const
Definition stats.h:100
double Mu3() const
Definition stats.h:150
double Mu2() const
Definition stats.h:149
double SampleVariance() const
Definition stats.h:109
double product_
Definition stats.h:183
double SampleKurtosis() const
Definition stats.h:134
double m2_
Definition stats.h:187
double SampleSkewness() const
Definition stats.h:121
double Mu1() const
Definition stats.h:148
float Max() const
Definition stats.h:101
Definition abort.h:8
HWY_INLINE HWY_BITCASTSCALAR_CONSTEXPR float LowestValue< float >()
Definition base.h:2203
HWY_INLINE HWY_BITCASTSCALAR_CONSTEXPR float HighestValue< float >()
Definition base.h:2224