You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

274 lines
7.8 KiB

3 weeks ago
  1. #include "benchmark/benchmark.h"
  2. #include <assert.h>
  3. #include <math.h>
  4. #include <stdint.h>
  5. #include <chrono>
  6. #include <complex>
  7. #include <cstdlib>
  8. #include <iostream>
  9. #include <limits>
  10. #include <list>
  11. #include <map>
  12. #include <mutex>
  13. #include <set>
  14. #include <sstream>
  15. #include <string>
  16. #include <thread>
  17. #include <utility>
  18. #include <vector>
  19. #if defined(__GNUC__)
  20. #define BENCHMARK_NOINLINE __attribute__((noinline))
  21. #else
  22. #define BENCHMARK_NOINLINE
  23. #endif
  24. namespace {
  25. int BENCHMARK_NOINLINE Factorial(int n) {
  26. return (n == 1) ? 1 : n * Factorial(n - 1);
  27. }
  28. double CalculatePi(int depth) {
  29. double pi = 0.0;
  30. for (int i = 0; i < depth; ++i) {
  31. double numerator = static_cast<double>(((i % 2) * 2) - 1);
  32. double denominator = static_cast<double>((2 * i) - 1);
  33. pi += numerator / denominator;
  34. }
  35. return (pi - 1.0) * 4;
  36. }
  37. std::set<int64_t> ConstructRandomSet(int64_t size) {
  38. std::set<int64_t> s;
  39. for (int i = 0; i < size; ++i) s.insert(s.end(), i);
  40. return s;
  41. }
  42. std::mutex test_vector_mu;
  43. std::vector<int>* test_vector = nullptr;
  44. } // end namespace
  45. static void BM_Factorial(benchmark::State& state) {
  46. int fac_42 = 0;
  47. for (auto _ : state) fac_42 = Factorial(8);
  48. // Prevent compiler optimizations
  49. std::stringstream ss;
  50. ss << fac_42;
  51. state.SetLabel(ss.str());
  52. }
  53. BENCHMARK(BM_Factorial);
  54. BENCHMARK(BM_Factorial)->UseRealTime();
  55. static void BM_CalculatePiRange(benchmark::State& state) {
  56. double pi = 0.0;
  57. for (auto _ : state) pi = CalculatePi(static_cast<int>(state.range(0)));
  58. std::stringstream ss;
  59. ss << pi;
  60. state.SetLabel(ss.str());
  61. }
  62. BENCHMARK_RANGE(BM_CalculatePiRange, 1, 1024 * 1024);
  63. static void BM_CalculatePi(benchmark::State& state) {
  64. static const int depth = 1024;
  65. for (auto _ : state) {
  66. double pi = CalculatePi(static_cast<int>(depth));
  67. benchmark::DoNotOptimize(pi);
  68. }
  69. }
  70. BENCHMARK(BM_CalculatePi)->Threads(8);
  71. BENCHMARK(BM_CalculatePi)->ThreadRange(1, 32);
  72. BENCHMARK(BM_CalculatePi)->ThreadPerCpu();
  73. static void BM_SetInsert(benchmark::State& state) {
  74. std::set<int64_t> data;
  75. for (auto _ : state) {
  76. state.PauseTiming();
  77. data = ConstructRandomSet(state.range(0));
  78. state.ResumeTiming();
  79. for (int j = 0; j < state.range(1); ++j) data.insert(rand());
  80. }
  81. state.SetItemsProcessed(state.iterations() * state.range(1));
  82. state.SetBytesProcessed(state.iterations() * state.range(1) *
  83. static_cast<int64_t>(sizeof(int)));
  84. }
  85. // Test many inserts at once to reduce the total iterations needed. Otherwise,
  86. // the slower, non-timed part of each iteration will make the benchmark take
  87. // forever.
  88. BENCHMARK(BM_SetInsert)->Ranges({{1 << 10, 8 << 10}, {128, 512}});
  89. template <typename Container,
  90. typename ValueType = typename Container::value_type>
  91. static void BM_Sequential(benchmark::State& state) {
  92. ValueType v = 42;
  93. for (auto _ : state) {
  94. Container c;
  95. for (int64_t i = state.range(0); --i;) c.push_back(v);
  96. }
  97. const int64_t items_processed = state.iterations() * state.range(0);
  98. state.SetItemsProcessed(items_processed);
  99. state.SetBytesProcessed(items_processed * static_cast<int64_t>(sizeof(v)));
  100. }
  101. BENCHMARK_TEMPLATE2(BM_Sequential, std::vector<int>, int)
  102. ->Range(1 << 0, 1 << 10);
  103. BENCHMARK_TEMPLATE(BM_Sequential, std::list<int>)->Range(1 << 0, 1 << 10);
  104. // Test the variadic version of BENCHMARK_TEMPLATE in C++11 and beyond.
  105. #ifdef BENCHMARK_HAS_CXX11
  106. BENCHMARK_TEMPLATE(BM_Sequential, std::vector<int>, int)->Arg(512);
  107. #endif
  108. static void BM_StringCompare(benchmark::State& state) {
  109. size_t len = static_cast<size_t>(state.range(0));
  110. std::string s1(len, '-');
  111. std::string s2(len, '-');
  112. for (auto _ : state) {
  113. auto comp = s1.compare(s2);
  114. benchmark::DoNotOptimize(comp);
  115. }
  116. }
  117. BENCHMARK(BM_StringCompare)->Range(1, 1 << 20);
  118. static void BM_SetupTeardown(benchmark::State& state) {
  119. if (state.thread_index() == 0) {
  120. // No need to lock test_vector_mu here as this is running single-threaded.
  121. test_vector = new std::vector<int>();
  122. }
  123. int i = 0;
  124. for (auto _ : state) {
  125. std::lock_guard<std::mutex> l(test_vector_mu);
  126. if (i % 2 == 0)
  127. test_vector->push_back(i);
  128. else
  129. test_vector->pop_back();
  130. ++i;
  131. }
  132. if (state.thread_index() == 0) {
  133. delete test_vector;
  134. }
  135. }
  136. BENCHMARK(BM_SetupTeardown)->ThreadPerCpu();
  137. static void BM_LongTest(benchmark::State& state) {
  138. double tracker = 0.0;
  139. for (auto _ : state) {
  140. for (int i = 0; i < state.range(0); ++i)
  141. benchmark::DoNotOptimize(tracker += i);
  142. }
  143. }
  144. BENCHMARK(BM_LongTest)->Range(1 << 16, 1 << 28);
  145. static void BM_ParallelMemset(benchmark::State& state) {
  146. int64_t size = state.range(0) / static_cast<int64_t>(sizeof(int));
  147. int thread_size = static_cast<int>(size) / state.threads();
  148. int from = thread_size * state.thread_index();
  149. int to = from + thread_size;
  150. if (state.thread_index() == 0) {
  151. test_vector = new std::vector<int>(static_cast<size_t>(size));
  152. }
  153. for (auto _ : state) {
  154. for (int i = from; i < to; i++) {
  155. // No need to lock test_vector_mu as ranges
  156. // do not overlap between threads.
  157. benchmark::DoNotOptimize(test_vector->at(static_cast<size_t>(i)) = 1);
  158. }
  159. }
  160. if (state.thread_index() == 0) {
  161. delete test_vector;
  162. }
  163. }
  164. BENCHMARK(BM_ParallelMemset)->Arg(10 << 20)->ThreadRange(1, 4);
  165. static void BM_ManualTiming(benchmark::State& state) {
  166. int64_t slept_for = 0;
  167. int64_t microseconds = state.range(0);
  168. std::chrono::duration<double, std::micro> sleep_duration{
  169. static_cast<double>(microseconds)};
  170. for (auto _ : state) {
  171. auto start = std::chrono::high_resolution_clock::now();
  172. // Simulate some useful workload with a sleep
  173. std::this_thread::sleep_for(
  174. std::chrono::duration_cast<std::chrono::nanoseconds>(sleep_duration));
  175. auto end = std::chrono::high_resolution_clock::now();
  176. auto elapsed =
  177. std::chrono::duration_cast<std::chrono::duration<double>>(end - start);
  178. state.SetIterationTime(elapsed.count());
  179. slept_for += microseconds;
  180. }
  181. state.SetItemsProcessed(slept_for);
  182. }
  183. BENCHMARK(BM_ManualTiming)->Range(1, 1 << 14)->UseRealTime();
  184. BENCHMARK(BM_ManualTiming)->Range(1, 1 << 14)->UseManualTime();
  185. #ifdef BENCHMARK_HAS_CXX11
  186. template <class... Args>
  187. void BM_with_args(benchmark::State& state, Args&&...) {
  188. for (auto _ : state) {
  189. }
  190. }
  191. BENCHMARK_CAPTURE(BM_with_args, int_test, 42, 43, 44);
  192. BENCHMARK_CAPTURE(BM_with_args, string_and_pair_test, std::string("abc"),
  193. std::pair<int, double>(42, 3.8));
  194. void BM_non_template_args(benchmark::State& state, int, double) {
  195. while (state.KeepRunning()) {
  196. }
  197. }
  198. BENCHMARK_CAPTURE(BM_non_template_args, basic_test, 0, 0);
  199. #endif // BENCHMARK_HAS_CXX11
  200. static void BM_DenseThreadRanges(benchmark::State& st) {
  201. switch (st.range(0)) {
  202. case 1:
  203. assert(st.threads() == 1 || st.threads() == 2 || st.threads() == 3);
  204. break;
  205. case 2:
  206. assert(st.threads() == 1 || st.threads() == 3 || st.threads() == 4);
  207. break;
  208. case 3:
  209. assert(st.threads() == 5 || st.threads() == 8 || st.threads() == 11 ||
  210. st.threads() == 14);
  211. break;
  212. default:
  213. assert(false && "Invalid test case number");
  214. }
  215. while (st.KeepRunning()) {
  216. }
  217. }
  218. BENCHMARK(BM_DenseThreadRanges)->Arg(1)->DenseThreadRange(1, 3);
  219. BENCHMARK(BM_DenseThreadRanges)->Arg(2)->DenseThreadRange(1, 4, 2);
  220. BENCHMARK(BM_DenseThreadRanges)->Arg(3)->DenseThreadRange(5, 14, 3);
  221. static void BM_BenchmarkName(benchmark::State& state) {
  222. for (auto _ : state) {
  223. }
  224. // Check that the benchmark name is passed correctly to `state`.
  225. assert("BM_BenchmarkName" == state.name());
  226. }
  227. BENCHMARK(BM_BenchmarkName);
  228. // regression test for #1446
  229. template <typename type>
  230. static void BM_templated_test(benchmark::State& state) {
  231. for (auto _ : state) {
  232. type created_string;
  233. benchmark::DoNotOptimize(created_string);
  234. }
  235. }
  236. static auto BM_templated_test_double = BM_templated_test<std::complex<double>>;
  237. BENCHMARK(BM_templated_test_double);
  238. BENCHMARK_MAIN();