From 207b9c7aeccd70a1c11a27504e1627ef9f8b5938 Mon Sep 17 00:00:00 2001 From: Eric Date: Wed, 14 Feb 2018 13:44:41 -0700 Subject: [PATCH] Improve State packing: put important members on first cache line. (#527) * Improve State packing: put important members on first cache line. This patch does a few different things to ensure commonly accessed data is on the first cache line of the `State` object. First, it moves the `error_occurred_` member to reside after the `started_` and `finished_` bools, since there was internal padding there that was unused. Second, it moves `batch_leftover_` and `max_iterations` further up in the struct declaration. These variables are used in the calculation of `iterations()` which users might call within the loop. Therefore it's more important they exist on the first cache line. Finally, this patch turns the bool members into bitfields. Although this shouldn't have much of an effect currently, because padding is still needed between the last bool and the first size_t, it should help in future changes that require more "bool like" members. * Remove bitfield change for now * Move bools (and their padding) to end of "first cache line" vars. I think it makes the most sense to move the padding required following the group of bools to the end of the variables we want on the first cache line. This also means that the `total_iterations_` variable, which is the most accessed, has the same address as the State object. * Fix static assertion after moving bools --- CMakeLists.txt | 7 +++++++ cmake/AddCXXCompilerFlag.cmake | 10 ++++++++++ include/benchmark/benchmark.h | 30 ++++++++++++++++++------------ src/benchmark.cc | 15 ++++++++++----- 4 files changed, 45 insertions(+), 17 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index aa082676..e25f7fae 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -120,6 +120,13 @@ else() cxx_feature_check(THREAD_SAFETY_ATTRIBUTES) endif() + # GCC doesn't report invalid -Wno-warning flags, so check the positive + # version, and if it exists add the negative. + check_cxx_warning_flag(-Winvalid-offsetof) + if (HAVE_CXX_FLAG_WINVALID_OFFSETOF) + add_cxx_compiler_flag(-Wno-invalid-offsetof) + endif() + # On most UNIX like platforms g++ and clang++ define _GNU_SOURCE as a # predefined macro, which turns on all of the wonderful libc extensions. # However g++ doesn't do this in Cygwin so we have to define it ourselfs diff --git a/cmake/AddCXXCompilerFlag.cmake b/cmake/AddCXXCompilerFlag.cmake index 17d5f3dc..d0d20998 100644 --- a/cmake/AddCXXCompilerFlag.cmake +++ b/cmake/AddCXXCompilerFlag.cmake @@ -62,3 +62,13 @@ function(add_required_cxx_compiler_flag FLAG) message(FATAL_ERROR "Required flag '${FLAG}' is not supported by the compiler") endif() endfunction() + +function(check_cxx_warning_flag FLAG) + mangle_compiler_flag("${FLAG}" MANGLED_FLAG) + set(OLD_CMAKE_REQUIRED_FLAGS "${CMAKE_REQUIRED_FLAGS}") + # Add -Werror to ensure the compiler generates an error if the warning flag + # doesn't exist. + set(CMAKE_REQUIRED_FLAGS "${CMAKE_REQUIRED_FLAGS} -Werror ${FLAG}") + check_cxx_compiler_flag("${FLAG}" ${MANGLED_FLAG}) + set(CMAKE_REQUIRED_FLAGS "${OLD_CMAKE_REQUIRED_FLAGS}") +endfunction() diff --git a/include/benchmark/benchmark.h b/include/benchmark/benchmark.h index 1825054b..5fed767d 100644 --- a/include/benchmark/benchmark.h +++ b/include/benchmark/benchmark.h @@ -573,13 +573,26 @@ class State { return (max_iterations - total_iterations_ + batch_leftover_); } - private: +private: // items we expect on the first cache line (ie 64 bytes of the struct) + + // When total_iterations_ is 0, KeepRunning() and friends will return false. + // May be larger than max_iterations. + size_t total_iterations_; + + // When using KeepRunningBatch(), batch_leftover_ holds the number of + // iterations beyond max_iters that were run. Used to track + // completed_iterations_ accurately. + size_t batch_leftover_; + +public: + const size_t max_iterations; + +private: bool started_; bool finished_; - // When total_iterations_ is 0, KeepRunning() and friends will return false. - size_t total_iterations_; - // May be larger than max_iterations. + bool error_occurred_; +private: // items we don't need on the first cache line std::vector range_; size_t bytes_processed_; @@ -587,13 +600,6 @@ class State { int complexity_n_; - bool error_occurred_; - - // When using KeepRunningBatch(), batch_leftover_ holds the number of - // iterations beyond max_iters that were run. Used to track - // completed_iterations_ accurately. - size_t batch_leftover_; - public: // Container for user-defined counters. UserCounters counters; @@ -601,7 +607,7 @@ class State { const int thread_index; // Number of threads concurrently executing the benchmark. const int threads; - const size_t max_iterations; + // TODO(EricWF) make me private State(size_t max_iters, const std::vector& ranges, int thread_i, diff --git a/src/benchmark.cc b/src/benchmark.cc index 8879204a..e0fb6f92 100644 --- a/src/benchmark.cc +++ b/src/benchmark.cc @@ -397,23 +397,28 @@ std::vector RunBenchmark( State::State(size_t max_iters, const std::vector& ranges, int thread_i, int n_threads, internal::ThreadTimer* timer, internal::ThreadManager* manager) - : started_(false), + : total_iterations_(0), + batch_leftover_(0), + max_iterations(max_iters), + started_(false), finished_(false), - total_iterations_(0), + error_occurred_(false), range_(ranges), bytes_processed_(0), items_processed_(0), complexity_n_(0), - error_occurred_(false), - batch_leftover_(0), counters(), thread_index(thread_i), threads(n_threads), - max_iterations(max_iters), timer_(timer), manager_(manager) { CHECK(max_iterations != 0) << "At least one iteration must be run"; CHECK_LT(thread_index, threads) << "thread_index must be less than threads"; + + // Offset tests to ensure commonly accessed data is on the first cache line. + const int cache_line_size = 64; + static_assert(offsetof(State, error_occurred_) <= + (cache_line_size - sizeof(error_occurred_)), ""); } void State::PauseTiming() {