catch_stats.hpp 6.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160
  1. /*
  2. * Created by Joachim on 16/04/2019.
  3. * Adapted from donated nonius code.
  4. *
  5. * Distributed under the Boost Software License, Version 1.0. (See accompanying
  6. * file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
  7. */
  8. // Statistical analysis tools
  9. #ifndef TWOBLUECUBES_CATCH_DETAIL_ANALYSIS_HPP_INCLUDED
  10. #define TWOBLUECUBES_CATCH_DETAIL_ANALYSIS_HPP_INCLUDED
  11. #include "../catch_clock.hpp"
  12. #include "../catch_estimate.hpp"
  13. #include "../catch_outlier_classification.hpp"
  14. #include <algorithm>
  15. #include <functional>
  16. #include <vector>
  17. #include <iterator>
  18. #include <numeric>
  19. #include <tuple>
  20. #include <cmath>
  21. #include <utility>
  22. #include <cstddef>
  23. #include <random>
  24. namespace Catch {
  25. namespace Benchmark {
  26. namespace Detail {
  27. using sample = std::vector<double>;
  28. double weighted_average_quantile(int k, int q, std::vector<double>::iterator first, std::vector<double>::iterator last);
  29. template <typename Iterator>
  30. OutlierClassification classify_outliers(Iterator first, Iterator last) {
  31. std::vector<double> copy(first, last);
  32. auto q1 = weighted_average_quantile(1, 4, copy.begin(), copy.end());
  33. auto q3 = weighted_average_quantile(3, 4, copy.begin(), copy.end());
  34. auto iqr = q3 - q1;
  35. auto los = q1 - (iqr * 3.);
  36. auto lom = q1 - (iqr * 1.5);
  37. auto him = q3 + (iqr * 1.5);
  38. auto his = q3 + (iqr * 3.);
  39. OutlierClassification o;
  40. for (; first != last; ++first) {
  41. auto&& t = *first;
  42. if (t < los) ++o.low_severe;
  43. else if (t < lom) ++o.low_mild;
  44. else if (t > his) ++o.high_severe;
  45. else if (t > him) ++o.high_mild;
  46. ++o.samples_seen;
  47. }
  48. return o;
  49. }
  50. template <typename Iterator>
  51. double mean(Iterator first, Iterator last) {
  52. auto count = last - first;
  53. double sum = std::accumulate(first, last, 0.);
  54. return sum / count;
  55. }
  56. template <typename URng, typename Iterator, typename Estimator>
  57. sample resample(URng& rng, int resamples, Iterator first, Iterator last, Estimator& estimator) {
  58. auto n = last - first;
  59. std::uniform_int_distribution<decltype(n)> dist(0, n - 1);
  60. sample out;
  61. out.reserve(resamples);
  62. std::generate_n(std::back_inserter(out), resamples, [n, first, &estimator, &dist, &rng] {
  63. std::vector<double> resampled;
  64. resampled.reserve(n);
  65. std::generate_n(std::back_inserter(resampled), n, [first, &dist, &rng] { return first[dist(rng)]; });
  66. return estimator(resampled.begin(), resampled.end());
  67. });
  68. std::sort(out.begin(), out.end());
  69. return out;
  70. }
  71. template <typename Estimator, typename Iterator>
  72. sample jackknife(Estimator&& estimator, Iterator first, Iterator last) {
  73. auto n = last - first;
  74. auto second = std::next(first);
  75. sample results;
  76. results.reserve(n);
  77. for (auto it = first; it != last; ++it) {
  78. std::iter_swap(it, first);
  79. results.push_back(estimator(second, last));
  80. }
  81. return results;
  82. }
  83. inline double normal_cdf(double x) {
  84. return std::erfc(-x / std::sqrt(2.0)) / 2.0;
  85. }
  86. double erfc_inv(double x);
  87. double normal_quantile(double p);
  88. template <typename Iterator, typename Estimator>
  89. Estimate<double> bootstrap(double confidence_level, Iterator first, Iterator last, sample const& resample, Estimator&& estimator) {
  90. auto n_samples = last - first;
  91. double point = estimator(first, last);
  92. // Degenerate case with a single sample
  93. if (n_samples == 1) return { point, point, point, confidence_level };
  94. sample jack = jackknife(estimator, first, last);
  95. double jack_mean = mean(jack.begin(), jack.end());
  96. double sum_squares, sum_cubes;
  97. std::tie(sum_squares, sum_cubes) = std::accumulate(jack.begin(), jack.end(), std::make_pair(0., 0.), [jack_mean](std::pair<double, double> sqcb, double x) -> std::pair<double, double> {
  98. auto d = jack_mean - x;
  99. auto d2 = d * d;
  100. auto d3 = d2 * d;
  101. return { sqcb.first + d2, sqcb.second + d3 };
  102. });
  103. double accel = sum_cubes / (6 * std::pow(sum_squares, 1.5));
  104. int n = static_cast<int>(resample.size());
  105. double prob_n = std::count_if(resample.begin(), resample.end(), [point](double x) { return x < point; }) / (double)n;
  106. // degenerate case with uniform samples
  107. if (prob_n == 0) return { point, point, point, confidence_level };
  108. double bias = normal_quantile(prob_n);
  109. double z1 = normal_quantile((1. - confidence_level) / 2.);
  110. auto cumn = [n](double x) -> int {
  111. return std::lround(normal_cdf(x) * n); };
  112. auto a = [bias, accel](double b) { return bias + b / (1. - accel * b); };
  113. double b1 = bias + z1;
  114. double b2 = bias - z1;
  115. double a1 = a(b1);
  116. double a2 = a(b2);
  117. auto lo = (std::max)(cumn(a1), 0);
  118. auto hi = (std::min)(cumn(a2), n - 1);
  119. return { point, resample[lo], resample[hi], confidence_level };
  120. }
  121. double outlier_variance(Estimate<double> mean, Estimate<double> stddev, int n);
  122. struct bootstrap_analysis {
  123. Estimate<double> mean;
  124. Estimate<double> standard_deviation;
  125. double outlier_variance;
  126. };
  127. bootstrap_analysis analyse_samples(double confidence_level, int n_resamples, std::vector<double>::iterator first, std::vector<double>::iterator last);
  128. } // namespace Detail
  129. } // namespace Benchmark
  130. } // namespace Catch
  131. #endif // TWOBLUECUBES_CATCH_DETAIL_ANALYSIS_HPP_INCLUDED