diff --git a/include/bench_common/perfcounters.hpp b/include/bench_common/perfcounters.hpp
index e79faa3cd008fc92a2691c14ae1287bce8d9e0ad..115b93d2fe6115f827ae40541af1b0a2d52e46d7 100644
--- a/include/bench_common/perfcounters.hpp
+++ b/include/bench_common/perfcounters.hpp
@@ -1,14 +1,19 @@
 #ifndef PERFCOUNTERS_HPP
 #define PERFCOUNTERS_HPP
 
+extern "C"
+{
 #include <linux/hw_breakpoint.h>
 #include <linux/perf_event.h>
 #include <stdint.h>
 #include <sys/ioctl.h>
 #include <sys/syscall.h>
 #include <unistd.h>
+}
 
+#include <algorithm>
 #include <array>
+#include <functional>
 #include <limits>
 #include <string>
 #include <unordered_map>
@@ -42,26 +47,34 @@ namespace perf
 
         counters();
 
-        bool add_leader(type leader);
-        bool add_counter(type counter);
+        auto add_leader(type leader)
+            -> bool;
+        auto add_counter(type counter)
+            -> bool;
 
-        bool finalize();
+        auto finalize()
+            -> bool;
 
-        inline std::vector<uint64_t> read()
+        auto read()
         {
             std::vector<uint64_t> counters(provided_counter_count);
-            std::vector<std::uint8_t> counter_data(read_format_size);
+            std::vector<std::uint64_t> counter_data(read_format_size);
             if(read_format_size != ::read(fds[leader_index], counter_data.data(), read_format_size))
             {
-                return std::vector<uint64_t>(provided_counter_count);
+                //error
+                //return std::vector<uint64_t>(provided_counter_count);
+                throw std::runtime_error("Failed to read counter data");
             }
+            constexpr std::size_t id_off = 2;
+            constexpr std::size_t val_off = 1;
+
             for(size_t i = 0; i < provided_counter_count; i++)
             {
                 for(size_t j = 0; j < provided_counter_count; j++)
                 {
-                    if(get_counter_id(counter_data.data(),i) == ids[j])
+                    if(counter_data[i*2 + id_off] == ids[j])
                     {
-                        counters[j] = get_counter_value(counter_data.data(),i);
+                        counters[j] = counter_data[i*2 + val_off];
                     }
                 }
             }
@@ -73,13 +86,16 @@ namespace perf
             tmpcounters = read();
         }
 
-        inline std::vector<uint64_t> toc()
+        inline auto toc()
         {
             results = read();
+            /*
             for(std::size_t i = 0; i < provided_counter_count; i ++)
             {
                 results[i] -= tmpcounters[i];
-            }
+            }*/
+            std::transform(results.begin(),results.end(),tmpcounters.begin(),results.begin(),
+                    std::minus<std::uint64_t>{});
             return results;
         }
 
@@ -156,9 +172,6 @@ namespace perf
         std::unordered_map<type,std::size_t> offsets;
         std::size_t                          leader_index = std::numeric_limits<std::size_t>::max();
 
-
-        std::uint64_t get_counter_id(void* counter_data, std::size_t offset);
-        std::uint64_t get_counter_value(void* counter_data, std::size_t offset);
     };
 
 }
diff --git a/src/perfcounters.cpp b/src/perfcounters.cpp
index 2730d05ff507b675e8bc93a6a25e9256bccf4702..d6ac9b401a63e2a17d164e6a74d0ef497a53d5b3 100644
--- a/src/perfcounters.cpp
+++ b/src/perfcounters.cpp
@@ -12,16 +12,16 @@ perf::counters::counters()
     evattrs{},
     names
     {
-        {"cycles",
-        "instructions",
-        "l1d reads",
-        "l1d read misses",
-        "l1d writes",
-        "l1d write misses",
-        "lld reads",
-        "lld read misses",
-        "lld writes",
-        "lld write misses"}
+        {"cycles",          // NOLINT(fuchsia-default-arguments-calls)
+        "instructions",     // NOLINT(fuchsia-default-arguments-calls) 
+        "l1d reads",        // NOLINT(fuchsia-default-arguments-calls)
+        "l1d read misses",  // NOLINT(fuchsia-default-arguments-calls)
+        "l1d writes",       // NOLINT(fuchsia-default-arguments-calls)
+        "l1d write misses", // NOLINT(fuchsia-default-arguments-calls)
+        "lld reads",        // NOLINT(fuchsia-default-arguments-calls)
+        "lld read misses",  // NOLINT(fuchsia-default-arguments-calls)
+        "lld writes",       // NOLINT(fuchsia-default-arguments-calls)
+        "lld write misses"} // NOLINT(fuchsia-default-arguments-calls)
     }
 {
     perf_event_attr attr{};
@@ -32,7 +32,7 @@ perf::counters::counters()
     attr.exclude_kernel = 1;
     attr.exclude_hv     = 1;
     attr.read_format    = PERF_FORMAT_GROUP | PERF_FORMAT_ID;
-    evattrs[static_cast<std::uint32_t>(type::cycles)] = attr;
+    evattrs.at(static_cast<std::uint32_t>(type::cycles)) = attr;
 
     attr.type           = PERF_TYPE_HARDWARE;
     attr.size           = sizeof(perf_event_attr);
@@ -41,8 +41,9 @@ perf::counters::counters()
     attr.exclude_kernel = 1;
     attr.exclude_hv     = 1;
     attr.read_format    = PERF_FORMAT_GROUP | PERF_FORMAT_ID;
-    evattrs[static_cast<std::uint32_t>(type::instructions)] = attr;   
+    evattrs.at(static_cast<std::uint32_t>(type::instructions)) = attr;   
 
+    constexpr std::size_t cache_counter_count = count - 2;
 
     /* From manpage:
      *
@@ -53,118 +54,117 @@ perf::counters::counters()
      *         (perf_hw_cache_id) | (perf_hw_cache_op_id << 8) |
      *         (perf_hw_cache_op_result_id << 16)
      */
-    std::array<std::uint64_t,8> configs =
+
+    constexpr auto make_cache_config = [](std::uint32_t cache, std::uint32_t op, std::uint32_t result)
     {
-        {(PERF_COUNT_HW_CACHE_L1D) | (PERF_COUNT_HW_CACHE_OP_READ  << 8) | (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16),
-        (PERF_COUNT_HW_CACHE_L1D) | (PERF_COUNT_HW_CACHE_OP_READ  << 8) | (PERF_COUNT_HW_CACHE_RESULT_MISS << 16),
-        (PERF_COUNT_HW_CACHE_L1D) | (PERF_COUNT_HW_CACHE_OP_WRITE << 8) | (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16),
-        (PERF_COUNT_HW_CACHE_L1D) | (PERF_COUNT_HW_CACHE_OP_WRITE << 8) | (PERF_COUNT_HW_CACHE_RESULT_MISS << 16),
-        (PERF_COUNT_HW_CACHE_LL)  | (PERF_COUNT_HW_CACHE_OP_READ  << 8) | (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16),
-        (PERF_COUNT_HW_CACHE_LL)  | (PERF_COUNT_HW_CACHE_OP_READ  << 8) | (PERF_COUNT_HW_CACHE_RESULT_MISS << 16),
-        (PERF_COUNT_HW_CACHE_LL)  | (PERF_COUNT_HW_CACHE_OP_WRITE << 8) | (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16),
-        (PERF_COUNT_HW_CACHE_LL)  | (PERF_COUNT_HW_CACHE_OP_WRITE << 8) | (PERF_COUNT_HW_CACHE_RESULT_MISS << 16)}
+        //NOLINTNEXTLINE(cppcoreguidelines-avoid-magic-numbers,readability-magic-numbers,hicpp-signed-bitwise)
+        return (cache | (op << 8) | (result <<16));
     };
-    for(std::size_t i = 0; i < 8; i++)
+
+    //NOLINTNEXTLINE(cppcoreguidelines-macro-usage)
+    #define CACHE_CONFIG(CACHE,OP,RESULT)\
+     make_cache_config(PERF_COUNT_HW_CACHE_ ##CACHE,\
+                       PERF_COUNT_HW_CACHE_OP_ ##OP,\
+                       PERF_COUNT_HW_CACHE_RESULT_ ##RESULT)
+    std::array<std::uint64_t, cache_counter_count> configs =
+    {
+        CACHE_CONFIG(L1D, READ,  ACCESS),
+        CACHE_CONFIG(L1D, READ,  MISS),
+        CACHE_CONFIG(L1D, WRITE, ACCESS),
+        CACHE_CONFIG(L1D, WRITE, MISS),
+        CACHE_CONFIG(LL, READ,  ACCESS),
+        CACHE_CONFIG(LL, READ,  MISS),
+        CACHE_CONFIG(LL, WRITE, ACCESS),
+        CACHE_CONFIG(LL, WRITE, MISS)
+    };
+    #undef CACHE_CONFIG
+
+    for(std::size_t i = 0; i < cache_counter_count; i++)
     {
 
         attr.type           = PERF_TYPE_HW_CACHE;
         attr.size           = sizeof(perf_event_attr);
-        attr.config         = configs[i];
+        attr.config         = configs.at(i);
         attr.disabled       = 1;
         attr.exclude_kernel = 1;
         attr.exclude_hv     = 1;
         attr.read_format    = PERF_FORMAT_GROUP | PERF_FORMAT_ID;
 
-        evattrs[static_cast<std::uint32_t>(type::l1dreads) + i] = attr;   
-    }
-    
-    for(size_t i = 0; i < count; i++)
-    {
-        fds[i] = -1;
-        ids[i] = 0;
+        evattrs.at(static_cast<std::uint32_t>(type::l1dreads) + i) = attr;   
     }
 
+    fds.fill(-1);
+    ids.fill(0);
 }
-bool perf::counters::add_leader(type leader)
+auto perf::counters::add_leader(type leader)
+    -> bool
 {
     if(std::numeric_limits<std::size_t>::max() != leader_index)
     {
-        std::cout << "Leader already defined, can't set other leader" << std::endl;
+        std::cout << "Leader already defined, can't set other leader\n";
         return false;
     }
     auto idx = static_cast<std::uint32_t>(leader);
-    auto attr = evattrs[idx];
-    fds[idx] = syscall(__NR_perf_event_open, 
+    auto attr = evattrs.at(idx);
+    fds.at(idx) = syscall(__NR_perf_event_open, 
             &attr, 0, -1, -1, 0);
 
-    if(-1 == fds[idx])
+    if(-1 == fds.at(idx))
     {
-        std::cout << "Counters broken (" << names[idx] << ")" << std::endl;
+        std::cout << "Counters broken (" << names.at(idx) << ")\n";
         return false;
     }
     read_format_size += 2*sizeof(std::uint64_t);
     provided_counter_count++;
 
-    ioctl(fds[idx], PERF_EVENT_IOC_ID, &ids[idx]);
+    ioctl(fds.at(idx), PERF_EVENT_IOC_ID, &ids.at(idx));
 
     leader_index = idx;
 
     offsets.emplace(leader,current_offset++);
-    provided_counter_names.push_back(names[idx]);
+    provided_counter_names.push_back(names.at(idx));
 
     return true;
 }
 
-bool perf::counters::add_counter(type counter)
+auto perf::counters::add_counter(type counter)
+    -> bool
 {
 
     if(std::numeric_limits<std::size_t>::max() == leader_index)
     {
-        std::cout << "No leader defined, can't add counter" << std::endl;
+        std::cout << "No leader defined, can't add counter\n";
         return false;
     }
 
     auto idx = static_cast<std::uint32_t>(counter);
-    auto attr = evattrs[idx];
-    fds[idx] = syscall(__NR_perf_event_open, 
-            &attr, 0, -1, fds[leader_index], 0);
+    auto attr = evattrs.at(idx);
+    fds.at(idx) = syscall(__NR_perf_event_open, 
+            &attr, 0, -1, fds.at(leader_index), 0);
 
-    if(-1 == fds[idx])
+    if(-1 == fds.at(idx))
     {
-        std::cout << "Counters broken (" << names[idx] << ")" << std::endl;
+        std::cout << "Counters broken (" << names.at(idx) << ")\n";
         return false;
     }
     read_format_size += 2*sizeof(std::uint64_t);
     provided_counter_count++;
 
-    ioctl(fds[idx], PERF_EVENT_IOC_ID, &ids[idx]);
+    ioctl(fds.at(idx), PERF_EVENT_IOC_ID, &ids.at(idx));
 
     offsets.emplace(counter,current_offset++);
-    provided_counter_names.push_back(names[idx]);
+    provided_counter_names.push_back(names.at(idx));
 
     return true;
 }
 
-bool perf::counters::finalize()
+auto perf::counters::finalize()
+    -> bool
 {
-    ioctl(fds[leader_index], PERF_EVENT_IOC_RESET, PERF_IOC_FLAG_GROUP);
-    ioctl(fds[leader_index], PERF_EVENT_IOC_ENABLE, PERF_IOC_FLAG_GROUP);
+    ioctl(fds.at(leader_index), PERF_EVENT_IOC_RESET, PERF_IOC_FLAG_GROUP);
+    ioctl(fds.at(leader_index), PERF_EVENT_IOC_ENABLE, PERF_IOC_FLAG_GROUP);
 
     tic();
     overheads = toc();
     return true;
 }
-
-std::uint64_t perf::counters::get_counter_id(void* counter_data, std::size_t offset)
-{
-    std::uint64_t id;
-    memcpy(&id, static_cast<std::uint8_t*>(counter_data)+sizeof(std::uint64_t)*(offset*2+2), sizeof(std::uint64_t));
-    return id;
-}
-
-std::uint64_t perf::counters::get_counter_value(void* counter_data, std::size_t offset)
-{
-    std::uint64_t value;
-    memcpy(&value, static_cast<std::uint8_t*>(counter_data)+sizeof(std::uint64_t)*(offset*2+1), sizeof(std::uint64_t));
-    return value;
-}