mirror of
https://github.com/google/benchmark.git
synced 2024-12-27 13:00:36 +08:00
Improve State packing: put important members on first cache line. (#527)
* Improve State packing: put important members on first cache line. This patch does a few different things to ensure commonly accessed data is on the first cache line of the `State` object. First, it moves the `error_occurred_` member to reside after the `started_` and `finished_` bools, since there was internal padding there that was unused. Second, it moves `batch_leftover_` and `max_iterations` further up in the struct declaration. These variables are used in the calculation of `iterations()` which users might call within the loop. Therefore it's more important they exist on the first cache line. Finally, this patch turns the bool members into bitfields. Although this shouldn't have much of an effect currently, because padding is still needed between the last bool and the first size_t, it should help in future changes that require more "bool like" members. * Remove bitfield change for now * Move bools (and their padding) to end of "first cache line" vars. I think it makes the most sense to move the padding required following the group of bools to the end of the variables we want on the first cache line. This also means that the `total_iterations_` variable, which is the most accessed, has the same address as the State object. * Fix static assertion after moving bools
This commit is contained in:
parent
3924ee7b8a
commit
207b9c7aec
@ -120,6 +120,13 @@ else()
|
||||
cxx_feature_check(THREAD_SAFETY_ATTRIBUTES)
|
||||
endif()
|
||||
|
||||
# GCC doesn't report invalid -Wno-warning flags, so check the positive
|
||||
# version, and if it exists add the negative.
|
||||
check_cxx_warning_flag(-Winvalid-offsetof)
|
||||
if (HAVE_CXX_FLAG_WINVALID_OFFSETOF)
|
||||
add_cxx_compiler_flag(-Wno-invalid-offsetof)
|
||||
endif()
|
||||
|
||||
# On most UNIX like platforms g++ and clang++ define _GNU_SOURCE as a
|
||||
# predefined macro, which turns on all of the wonderful libc extensions.
|
||||
# However g++ doesn't do this in Cygwin so we have to define it ourselfs
|
||||
|
@ -62,3 +62,13 @@ function(add_required_cxx_compiler_flag FLAG)
|
||||
message(FATAL_ERROR "Required flag '${FLAG}' is not supported by the compiler")
|
||||
endif()
|
||||
endfunction()
|
||||
|
||||
function(check_cxx_warning_flag FLAG)
|
||||
mangle_compiler_flag("${FLAG}" MANGLED_FLAG)
|
||||
set(OLD_CMAKE_REQUIRED_FLAGS "${CMAKE_REQUIRED_FLAGS}")
|
||||
# Add -Werror to ensure the compiler generates an error if the warning flag
|
||||
# doesn't exist.
|
||||
set(CMAKE_REQUIRED_FLAGS "${CMAKE_REQUIRED_FLAGS} -Werror ${FLAG}")
|
||||
check_cxx_compiler_flag("${FLAG}" ${MANGLED_FLAG})
|
||||
set(CMAKE_REQUIRED_FLAGS "${OLD_CMAKE_REQUIRED_FLAGS}")
|
||||
endfunction()
|
||||
|
@ -573,13 +573,26 @@ class State {
|
||||
return (max_iterations - total_iterations_ + batch_leftover_);
|
||||
}
|
||||
|
||||
private:
|
||||
private: // items we expect on the first cache line (ie 64 bytes of the struct)
|
||||
|
||||
// When total_iterations_ is 0, KeepRunning() and friends will return false.
|
||||
// May be larger than max_iterations.
|
||||
size_t total_iterations_;
|
||||
|
||||
// When using KeepRunningBatch(), batch_leftover_ holds the number of
|
||||
// iterations beyond max_iters that were run. Used to track
|
||||
// completed_iterations_ accurately.
|
||||
size_t batch_leftover_;
|
||||
|
||||
public:
|
||||
const size_t max_iterations;
|
||||
|
||||
private:
|
||||
bool started_;
|
||||
bool finished_;
|
||||
// When total_iterations_ is 0, KeepRunning() and friends will return false.
|
||||
size_t total_iterations_;
|
||||
// May be larger than max_iterations.
|
||||
bool error_occurred_;
|
||||
|
||||
private: // items we don't need on the first cache line
|
||||
std::vector<int> range_;
|
||||
|
||||
size_t bytes_processed_;
|
||||
@ -587,13 +600,6 @@ class State {
|
||||
|
||||
int complexity_n_;
|
||||
|
||||
bool error_occurred_;
|
||||
|
||||
// When using KeepRunningBatch(), batch_leftover_ holds the number of
|
||||
// iterations beyond max_iters that were run. Used to track
|
||||
// completed_iterations_ accurately.
|
||||
size_t batch_leftover_;
|
||||
|
||||
public:
|
||||
// Container for user-defined counters.
|
||||
UserCounters counters;
|
||||
@ -601,7 +607,7 @@ class State {
|
||||
const int thread_index;
|
||||
// Number of threads concurrently executing the benchmark.
|
||||
const int threads;
|
||||
const size_t max_iterations;
|
||||
|
||||
|
||||
// TODO(EricWF) make me private
|
||||
State(size_t max_iters, const std::vector<int>& ranges, int thread_i,
|
||||
|
@ -397,23 +397,28 @@ std::vector<BenchmarkReporter::Run> RunBenchmark(
|
||||
State::State(size_t max_iters, const std::vector<int>& ranges, int thread_i,
|
||||
int n_threads, internal::ThreadTimer* timer,
|
||||
internal::ThreadManager* manager)
|
||||
: started_(false),
|
||||
: total_iterations_(0),
|
||||
batch_leftover_(0),
|
||||
max_iterations(max_iters),
|
||||
started_(false),
|
||||
finished_(false),
|
||||
total_iterations_(0),
|
||||
error_occurred_(false),
|
||||
range_(ranges),
|
||||
bytes_processed_(0),
|
||||
items_processed_(0),
|
||||
complexity_n_(0),
|
||||
error_occurred_(false),
|
||||
batch_leftover_(0),
|
||||
counters(),
|
||||
thread_index(thread_i),
|
||||
threads(n_threads),
|
||||
max_iterations(max_iters),
|
||||
timer_(timer),
|
||||
manager_(manager) {
|
||||
CHECK(max_iterations != 0) << "At least one iteration must be run";
|
||||
CHECK_LT(thread_index, threads) << "thread_index must be less than threads";
|
||||
|
||||
// Offset tests to ensure commonly accessed data is on the first cache line.
|
||||
const int cache_line_size = 64;
|
||||
static_assert(offsetof(State, error_occurred_) <=
|
||||
(cache_line_size - sizeof(error_occurred_)), "");
|
||||
}
|
||||
|
||||
void State::PauseTiming() {
|
||||
|
Loading…
Reference in New Issue
Block a user