diff --git a/include/bench_common/perfcounters.hpp b/include/bench_common/perfcounters.hpp index e79faa3cd008fc92a2691c14ae1287bce8d9e0ad..115b93d2fe6115f827ae40541af1b0a2d52e46d7 100644 --- a/include/bench_common/perfcounters.hpp +++ b/include/bench_common/perfcounters.hpp @@ -1,14 +1,19 @@ #ifndef PERFCOUNTERS_HPP #define PERFCOUNTERS_HPP +extern "C" +{ #include <linux/hw_breakpoint.h> #include <linux/perf_event.h> #include <stdint.h> #include <sys/ioctl.h> #include <sys/syscall.h> #include <unistd.h> +} +#include <algorithm> #include <array> +#include <functional> #include <limits> #include <string> #include <unordered_map> @@ -42,26 +47,34 @@ namespace perf counters(); - bool add_leader(type leader); - bool add_counter(type counter); + auto add_leader(type leader) + -> bool; + auto add_counter(type counter) + -> bool; - bool finalize(); + auto finalize() + -> bool; - inline std::vector<uint64_t> read() + auto read() { std::vector<uint64_t> counters(provided_counter_count); - std::vector<std::uint8_t> counter_data(read_format_size); + std::vector<std::uint64_t> counter_data(read_format_size); if(read_format_size != ::read(fds[leader_index], counter_data.data(), read_format_size)) { - return std::vector<uint64_t>(provided_counter_count); + //error + //return std::vector<uint64_t>(provided_counter_count); + throw std::runtime_error("Failed to read counter data"); } + constexpr std::size_t id_off = 2; + constexpr std::size_t val_off = 1; + for(size_t i = 0; i < provided_counter_count; i++) { for(size_t j = 0; j < provided_counter_count; j++) { - if(get_counter_id(counter_data.data(),i) == ids[j]) + if(counter_data[i*2 + id_off] == ids[j]) { - counters[j] = get_counter_value(counter_data.data(),i); + counters[j] = counter_data[i*2 + val_off]; } } } @@ -73,13 +86,16 @@ namespace perf tmpcounters = read(); } - inline std::vector<uint64_t> toc() + inline auto toc() { results = read(); + /* for(std::size_t i = 0; i < provided_counter_count; i ++) { results[i] -= tmpcounters[i]; - } + }*/ + std::transform(results.begin(),results.end(),tmpcounters.begin(),results.begin(), + std::minus<std::uint64_t>{}); return results; } @@ -156,9 +172,6 @@ namespace perf std::unordered_map<type,std::size_t> offsets; std::size_t leader_index = std::numeric_limits<std::size_t>::max(); - - std::uint64_t get_counter_id(void* counter_data, std::size_t offset); - std::uint64_t get_counter_value(void* counter_data, std::size_t offset); }; } diff --git a/src/perfcounters.cpp b/src/perfcounters.cpp index 2730d05ff507b675e8bc93a6a25e9256bccf4702..d6ac9b401a63e2a17d164e6a74d0ef497a53d5b3 100644 --- a/src/perfcounters.cpp +++ b/src/perfcounters.cpp @@ -12,16 +12,16 @@ perf::counters::counters() evattrs{}, names { - {"cycles", - "instructions", - "l1d reads", - "l1d read misses", - "l1d writes", - "l1d write misses", - "lld reads", - "lld read misses", - "lld writes", - "lld write misses"} + {"cycles", // NOLINT(fuchsia-default-arguments-calls) + "instructions", // NOLINT(fuchsia-default-arguments-calls) + "l1d reads", // NOLINT(fuchsia-default-arguments-calls) + "l1d read misses", // NOLINT(fuchsia-default-arguments-calls) + "l1d writes", // NOLINT(fuchsia-default-arguments-calls) + "l1d write misses", // NOLINT(fuchsia-default-arguments-calls) + "lld reads", // NOLINT(fuchsia-default-arguments-calls) + "lld read misses", // NOLINT(fuchsia-default-arguments-calls) + "lld writes", // NOLINT(fuchsia-default-arguments-calls) + "lld write misses"} // NOLINT(fuchsia-default-arguments-calls) } { perf_event_attr attr{}; @@ -32,7 +32,7 @@ perf::counters::counters() attr.exclude_kernel = 1; attr.exclude_hv = 1; attr.read_format = PERF_FORMAT_GROUP | PERF_FORMAT_ID; - evattrs[static_cast<std::uint32_t>(type::cycles)] = attr; + evattrs.at(static_cast<std::uint32_t>(type::cycles)) = attr; attr.type = PERF_TYPE_HARDWARE; attr.size = sizeof(perf_event_attr); @@ -41,8 +41,9 @@ perf::counters::counters() attr.exclude_kernel = 1; attr.exclude_hv = 1; attr.read_format = PERF_FORMAT_GROUP | PERF_FORMAT_ID; - evattrs[static_cast<std::uint32_t>(type::instructions)] = attr; + evattrs.at(static_cast<std::uint32_t>(type::instructions)) = attr; + constexpr std::size_t cache_counter_count = count - 2; /* From manpage: * @@ -53,118 +54,117 @@ perf::counters::counters() * (perf_hw_cache_id) | (perf_hw_cache_op_id << 8) | * (perf_hw_cache_op_result_id << 16) */ - std::array<std::uint64_t,8> configs = + + constexpr auto make_cache_config = [](std::uint32_t cache, std::uint32_t op, std::uint32_t result) { - {(PERF_COUNT_HW_CACHE_L1D) | (PERF_COUNT_HW_CACHE_OP_READ << 8) | (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16), - (PERF_COUNT_HW_CACHE_L1D) | (PERF_COUNT_HW_CACHE_OP_READ << 8) | (PERF_COUNT_HW_CACHE_RESULT_MISS << 16), - (PERF_COUNT_HW_CACHE_L1D) | (PERF_COUNT_HW_CACHE_OP_WRITE << 8) | (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16), - (PERF_COUNT_HW_CACHE_L1D) | (PERF_COUNT_HW_CACHE_OP_WRITE << 8) | (PERF_COUNT_HW_CACHE_RESULT_MISS << 16), - (PERF_COUNT_HW_CACHE_LL) | (PERF_COUNT_HW_CACHE_OP_READ << 8) | (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16), - (PERF_COUNT_HW_CACHE_LL) | (PERF_COUNT_HW_CACHE_OP_READ << 8) | (PERF_COUNT_HW_CACHE_RESULT_MISS << 16), - (PERF_COUNT_HW_CACHE_LL) | (PERF_COUNT_HW_CACHE_OP_WRITE << 8) | (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16), - (PERF_COUNT_HW_CACHE_LL) | (PERF_COUNT_HW_CACHE_OP_WRITE << 8) | (PERF_COUNT_HW_CACHE_RESULT_MISS << 16)} + //NOLINTNEXTLINE(cppcoreguidelines-avoid-magic-numbers,readability-magic-numbers,hicpp-signed-bitwise) + return (cache | (op << 8) | (result <<16)); }; - for(std::size_t i = 0; i < 8; i++) + + //NOLINTNEXTLINE(cppcoreguidelines-macro-usage) + #define CACHE_CONFIG(CACHE,OP,RESULT)\ + make_cache_config(PERF_COUNT_HW_CACHE_ ##CACHE,\ + PERF_COUNT_HW_CACHE_OP_ ##OP,\ + PERF_COUNT_HW_CACHE_RESULT_ ##RESULT) + std::array<std::uint64_t, cache_counter_count> configs = + { + CACHE_CONFIG(L1D, READ, ACCESS), + CACHE_CONFIG(L1D, READ, MISS), + CACHE_CONFIG(L1D, WRITE, ACCESS), + CACHE_CONFIG(L1D, WRITE, MISS), + CACHE_CONFIG(LL, READ, ACCESS), + CACHE_CONFIG(LL, READ, MISS), + CACHE_CONFIG(LL, WRITE, ACCESS), + CACHE_CONFIG(LL, WRITE, MISS) + }; + #undef CACHE_CONFIG + + for(std::size_t i = 0; i < cache_counter_count; i++) { attr.type = PERF_TYPE_HW_CACHE; attr.size = sizeof(perf_event_attr); - attr.config = configs[i]; + attr.config = configs.at(i); attr.disabled = 1; attr.exclude_kernel = 1; attr.exclude_hv = 1; attr.read_format = PERF_FORMAT_GROUP | PERF_FORMAT_ID; - evattrs[static_cast<std::uint32_t>(type::l1dreads) + i] = attr; - } - - for(size_t i = 0; i < count; i++) - { - fds[i] = -1; - ids[i] = 0; + evattrs.at(static_cast<std::uint32_t>(type::l1dreads) + i) = attr; } + fds.fill(-1); + ids.fill(0); } -bool perf::counters::add_leader(type leader) +auto perf::counters::add_leader(type leader) + -> bool { if(std::numeric_limits<std::size_t>::max() != leader_index) { - std::cout << "Leader already defined, can't set other leader" << std::endl; + std::cout << "Leader already defined, can't set other leader\n"; return false; } auto idx = static_cast<std::uint32_t>(leader); - auto attr = evattrs[idx]; - fds[idx] = syscall(__NR_perf_event_open, + auto attr = evattrs.at(idx); + fds.at(idx) = syscall(__NR_perf_event_open, &attr, 0, -1, -1, 0); - if(-1 == fds[idx]) + if(-1 == fds.at(idx)) { - std::cout << "Counters broken (" << names[idx] << ")" << std::endl; + std::cout << "Counters broken (" << names.at(idx) << ")\n"; return false; } read_format_size += 2*sizeof(std::uint64_t); provided_counter_count++; - ioctl(fds[idx], PERF_EVENT_IOC_ID, &ids[idx]); + ioctl(fds.at(idx), PERF_EVENT_IOC_ID, &ids.at(idx)); leader_index = idx; offsets.emplace(leader,current_offset++); - provided_counter_names.push_back(names[idx]); + provided_counter_names.push_back(names.at(idx)); return true; } -bool perf::counters::add_counter(type counter) +auto perf::counters::add_counter(type counter) + -> bool { if(std::numeric_limits<std::size_t>::max() == leader_index) { - std::cout << "No leader defined, can't add counter" << std::endl; + std::cout << "No leader defined, can't add counter\n"; return false; } auto idx = static_cast<std::uint32_t>(counter); - auto attr = evattrs[idx]; - fds[idx] = syscall(__NR_perf_event_open, - &attr, 0, -1, fds[leader_index], 0); + auto attr = evattrs.at(idx); + fds.at(idx) = syscall(__NR_perf_event_open, + &attr, 0, -1, fds.at(leader_index), 0); - if(-1 == fds[idx]) + if(-1 == fds.at(idx)) { - std::cout << "Counters broken (" << names[idx] << ")" << std::endl; + std::cout << "Counters broken (" << names.at(idx) << ")\n"; return false; } read_format_size += 2*sizeof(std::uint64_t); provided_counter_count++; - ioctl(fds[idx], PERF_EVENT_IOC_ID, &ids[idx]); + ioctl(fds.at(idx), PERF_EVENT_IOC_ID, &ids.at(idx)); offsets.emplace(counter,current_offset++); - provided_counter_names.push_back(names[idx]); + provided_counter_names.push_back(names.at(idx)); return true; } -bool perf::counters::finalize() +auto perf::counters::finalize() + -> bool { - ioctl(fds[leader_index], PERF_EVENT_IOC_RESET, PERF_IOC_FLAG_GROUP); - ioctl(fds[leader_index], PERF_EVENT_IOC_ENABLE, PERF_IOC_FLAG_GROUP); + ioctl(fds.at(leader_index), PERF_EVENT_IOC_RESET, PERF_IOC_FLAG_GROUP); + ioctl(fds.at(leader_index), PERF_EVENT_IOC_ENABLE, PERF_IOC_FLAG_GROUP); tic(); overheads = toc(); return true; } - -std::uint64_t perf::counters::get_counter_id(void* counter_data, std::size_t offset) -{ - std::uint64_t id; - memcpy(&id, static_cast<std::uint8_t*>(counter_data)+sizeof(std::uint64_t)*(offset*2+2), sizeof(std::uint64_t)); - return id; -} - -std::uint64_t perf::counters::get_counter_value(void* counter_data, std::size_t offset) -{ - std::uint64_t value; - memcpy(&value, static_cast<std::uint8_t*>(counter_data)+sizeof(std::uint64_t)*(offset*2+1), sizeof(std::uint64_t)); - return value; -}