File indexing completed on 2024-10-23 22:48:06
0001 #include <memory>
0002 #include <cassert>
0003 #include <atomic>
0004 #include <array>
0005 #include <mutex>
0006 #include <limits>
0007 #include <cstddef>
0008 #include <malloc.h>
0009 #define ALLOC_USE_PTHREADS
0010 #if defined(ALLOC_USE_PTHREADS)
0011 #include <pthread.h>
0012 #else
0013 #include <unistd.h>
0014 #include <sys/syscall.h>
0015 #endif
0016
0017 #include "PerfTools/AllocMonitor/interface/AllocMonitorRegistry.h"
0018 #include "FWCore/Utilities/interface/thread_safety_macros.h"
0019
0020 #include <dlfcn.h> // dlsym
0021
0022 #if !defined(__x86_64__) && !defined(__i386__)
0023 #define USE_LOCAL_MALLOC
0024 #endif
0025 #if defined(__GLIBC__) && (__GLIBC__ == 2) && (__GLIBC_MINOR__ < 28)
0026
0027 #define USE_LOCAL_MALLOC
0028 #endif
0029
0030 namespace {
0031 std::atomic<bool>& alloc_monitor_running_state() {
0032 static std::atomic<bool> s_state = false;
0033 return s_state;
0034 }
0035
0036 template <typename T>
0037 T get(const char* iName) {
0038 void* original = dlsym(RTLD_NEXT, iName);
0039 assert(original);
0040 return reinterpret_cast<T>(original);
0041 }
0042
0043 inline auto thread_id() {
0044 #if defined(ALLOC_USE_PTHREADS)
0045
0046 return pthread_self();
0047 #else
0048 return syscall(SYS_gettid);
0049 #endif
0050 }
0051 #ifdef USE_LOCAL_MALLOC
0052
0053
0054
0055
0056
0057 constexpr auto max_align = alignof(std::max_align_t);
0058 alignas(max_align) char tmpbuff[131072];
0059 unsigned long tmppos = 0;
0060 unsigned long tmpallocs = 0;
0061
0062 void* local_malloc(size_t size) noexcept {
0063
0064 size = ((size + max_align - 1) / max_align) * max_align;
0065 if (tmppos + size < sizeof(tmpbuff)) {
0066 void* retptr = tmpbuff + tmppos;
0067 tmppos += size;
0068 ++tmpallocs;
0069 return retptr;
0070 } else {
0071 return nullptr;
0072 }
0073 }
0074
0075
0076 void* local_calloc(size_t nitems, size_t item_size) noexcept { return local_malloc(nitems * item_size); }
0077
0078 inline bool is_local_alloc(void* ptr) noexcept { return ptr >= (void*)tmpbuff && ptr <= (void*)(tmpbuff + tmppos); }
0079
0080
0081
0082
0083 struct Originals {
0084 inline static void init() noexcept {
0085 if (not set) {
0086 set = true;
0087 malloc = get<decltype(&::malloc)>("malloc");
0088 calloc = get<decltype(&::calloc)>("calloc");
0089 }
0090 }
0091 CMS_SA_ALLOW static decltype(&::malloc) malloc;
0092 CMS_SA_ALLOW static decltype(&::calloc) calloc;
0093 CMS_SA_ALLOW static bool set;
0094 };
0095
0096 decltype(&::malloc) Originals::malloc = local_malloc;
0097 decltype(&::calloc) Originals::calloc = local_calloc;
0098 bool Originals::set = false;
0099 #else
0100 constexpr inline bool is_local_alloc(void* ptr) noexcept { return false; }
0101 #endif
0102
0103 struct ThreadTracker {
0104 static constexpr unsigned int kEntries = 128;
0105 using entry_type = decltype(thread_id());
0106 std::array<std::atomic<entry_type>, kEntries> used_threads_;
0107 std::array<std::mutex, kEntries> used_threads_mutex_;
0108
0109 ThreadTracker() {
0110
0111 entry_type entry = 0;
0112 for (auto& v : used_threads_) {
0113 v = ++entry;
0114 }
0115 }
0116
0117 std::size_t thread_index(entry_type id) const {
0118 #if defined(ALLOC_USE_PTHREADS)
0119 return (id / 0x700) % kEntries;
0120 #else
0121 return id % kEntries;
0122 #endif
0123 }
0124
0125
0126 bool stop_reporting() {
0127 auto id = thread_id();
0128 auto index = thread_index(id);
0129
0130 if (id == used_threads_[index]) {
0131 return false;
0132 }
0133 used_threads_mutex_[index].lock();
0134 used_threads_[index] = id;
0135 return true;
0136 }
0137
0138 void start_reporting() {
0139 auto id = thread_id();
0140 auto index = thread_index(id);
0141 auto& v = used_threads_[index];
0142 if (v == static_cast<entry_type>(index + 1)) {
0143 return;
0144 }
0145 assert(v == id);
0146 v = index + 1;
0147 used_threads_mutex_[index].unlock();
0148 }
0149 };
0150
0151 static ThreadTracker& getTracker() {
0152 static ThreadTracker s_tracker;
0153 return s_tracker;
0154 }
0155
0156 }
0157
0158 using namespace cms::perftools;
0159
0160 extern "C" {
0161 void alloc_monitor_start() { alloc_monitor_running_state() = true; }
0162 void alloc_monitor_stop() { alloc_monitor_running_state() = false; }
0163
0164 bool alloc_monitor_stop_thread_reporting() { return getTracker().stop_reporting(); }
0165
0166 void alloc_monitor_start_thread_reporting() { getTracker().start_reporting(); }
0167
0168
0169
0170
0171 #ifdef USE_LOCAL_MALLOC
0172 void* malloc(size_t size) noexcept {
0173 const auto original = Originals::malloc;
0174 Originals::init();
0175 if (not alloc_monitor_running_state()) {
0176 return original(size);
0177 }
0178 auto& reg = AllocMonitorRegistry::instance();
0179 return reg.allocCalled(
0180 size, [size, original]() { return original(size); }, [](auto ret) { return malloc_usable_size(ret); });
0181 }
0182
0183 void* calloc(size_t nitems, size_t item_size) noexcept {
0184 const auto original = Originals::calloc;
0185 Originals::init();
0186 if (not alloc_monitor_running_state()) {
0187 return original(nitems, item_size);
0188 }
0189 auto& reg = AllocMonitorRegistry::instance();
0190 return reg.allocCalled(
0191 nitems * item_size,
0192 [nitems, item_size, original]() { return original(nitems, item_size); },
0193 [](auto ret) { return malloc_usable_size(ret); });
0194 }
0195 #else
0196 void* malloc(size_t size) noexcept {
0197 CMS_SA_ALLOW static const auto original = get<decltype(&::malloc)>("malloc");
0198 if (not alloc_monitor_running_state()) {
0199 return original(size);
0200 }
0201 auto& reg = AllocMonitorRegistry::instance();
0202 return reg.allocCalled(size, [size]() { return original(size); }, [](auto ret) { return malloc_usable_size(ret); });
0203 }
0204
0205 void* calloc(size_t nitems, size_t item_size) noexcept {
0206 CMS_SA_ALLOW static const auto original = get<decltype(&::calloc)>("calloc");
0207 if (not alloc_monitor_running_state()) {
0208 return original(nitems, item_size);
0209 }
0210 auto& reg = AllocMonitorRegistry::instance();
0211 return reg.allocCalled(
0212 nitems * item_size,
0213 [nitems, item_size]() { return original(nitems, item_size); },
0214 [](auto ret) { return malloc_usable_size(ret); });
0215 }
0216 #endif
0217
0218 void* realloc(void* ptr, size_t size) noexcept {
0219 CMS_SA_ALLOW static const auto original = get<decltype(&::realloc)>("realloc");
0220 if (not alloc_monitor_running_state()) {
0221 return original(ptr, size);
0222 }
0223 size_t oldsize = malloc_usable_size(ptr);
0224 void* ret;
0225 auto& reg = AllocMonitorRegistry::instance();
0226 {
0227
0228 [[maybe_unused]] auto g = reg.makeGuard();
0229 ret = original(ptr, size);
0230 }
0231 size_t used = malloc_usable_size(ret);
0232 if (used != oldsize) {
0233 reg.deallocCalled(ptr, [](auto) {}, [oldsize](auto) { return oldsize; });
0234 reg.allocCalled(size, [ret]() { return ret; }, [used](auto) { return used; });
0235 }
0236 return ret;
0237 }
0238
0239 void* aligned_alloc(size_t alignment, size_t size) noexcept {
0240 CMS_SA_ALLOW static const auto original = get<decltype(&::aligned_alloc)>("aligned_alloc");
0241 if (not alloc_monitor_running_state()) {
0242 return original(alignment, size);
0243 }
0244
0245 auto& reg = AllocMonitorRegistry::instance();
0246 return reg.allocCalled(
0247 size,
0248 [alignment, size]() { return original(alignment, size); },
0249 [](auto ret) { return malloc_usable_size(ret); });
0250 }
0251
0252
0253 int posix_memalign(void** memptr, size_t alignment, size_t size) noexcept {
0254 CMS_SA_ALLOW static const auto original = get<decltype(&::posix_memalign)>("posix_memalign");
0255 if (not alloc_monitor_running_state()) {
0256 return original(memptr, alignment, size);
0257 }
0258
0259 auto& reg = AllocMonitorRegistry::instance();
0260 int ret;
0261 reg.allocCalled(
0262 size,
0263 [&ret, memptr, alignment, size]() {
0264 ret = original(memptr, alignment, size);
0265 return *memptr;
0266 },
0267 [](auto ret) { return malloc_usable_size(ret); });
0268 return ret;
0269 }
0270
0271
0272 void* memalign(size_t alignment, size_t size) noexcept {
0273 CMS_SA_ALLOW static const auto original = get<decltype(&::memalign)>("memalign");
0274 if (not alloc_monitor_running_state()) {
0275 return original(alignment, size);
0276 }
0277
0278 auto& reg = AllocMonitorRegistry::instance();
0279 return reg.allocCalled(
0280 size,
0281 [alignment, size]() { return original(alignment, size); },
0282 [](auto ret) { return malloc_usable_size(ret); });
0283 }
0284
0285 void free(void* ptr) noexcept {
0286 CMS_SA_ALLOW static const auto original = get<decltype(&::free)>("free");
0287
0288 if (not is_local_alloc(ptr)) {
0289 if (not alloc_monitor_running_state()) {
0290 original(ptr);
0291 return;
0292 }
0293
0294 auto& reg = AllocMonitorRegistry::instance();
0295 reg.deallocCalled(ptr, [](auto ptr) { original(ptr); }, [](auto ptr) { return malloc_usable_size(ptr); });
0296 }
0297 }
0298 }
0299
0300
0301
0302
0303 #define CPP_MEM_OVERRIDE
0304
0305 #if defined(CPP_MEM_OVERRIDE)
0306 #include <new>
0307
0308 void* operator new(std::size_t size) {
0309 CMS_SA_ALLOW static const auto original = get<void* (*)(std::size_t)>("_Znwm");
0310 if (not alloc_monitor_running_state()) {
0311 return original(size);
0312 }
0313
0314 auto& reg = AllocMonitorRegistry::instance();
0315 return reg.allocCalled(size, [size]() { return original(size); }, [](auto ret) { return malloc_usable_size(ret); });
0316 }
0317
0318 void operator delete(void* ptr) noexcept {
0319 CMS_SA_ALLOW static const auto original = get<void (*)(void*)>("_ZdlPv");
0320 if (not alloc_monitor_running_state()) {
0321 original(ptr);
0322 return;
0323 }
0324
0325 auto& reg = AllocMonitorRegistry::instance();
0326 reg.deallocCalled(ptr, [](auto ptr) { original(ptr); }, [](auto ptr) { return malloc_usable_size(ptr); });
0327 }
0328
0329 void* operator new[](std::size_t size) {
0330 CMS_SA_ALLOW static const auto original = get<void* (*)(std::size_t)>("_Znam");
0331 if (not alloc_monitor_running_state()) {
0332 return original(size);
0333 }
0334
0335 auto& reg = AllocMonitorRegistry::instance();
0336 return reg.allocCalled(size, [size]() { return original(size); }, [](auto ret) { return malloc_usable_size(ret); });
0337 }
0338
0339 void operator delete[](void* ptr) noexcept {
0340 CMS_SA_ALLOW static const auto original = get<void (*)(void*)>("_ZdaPv");
0341
0342 if (not alloc_monitor_running_state()) {
0343 original(ptr);
0344 return;
0345 }
0346 auto& reg = AllocMonitorRegistry::instance();
0347 reg.deallocCalled(ptr, [](auto ptr) { original(ptr); }, [](auto ptr) { return malloc_usable_size(ptr); });
0348 }
0349
0350 void* operator new(std::size_t size, std::align_val_t al) {
0351 CMS_SA_ALLOW static const auto original = get<void* (*)(std::size_t, std::align_val_t)>("_ZnwmSt11align_val_t");
0352 if (not alloc_monitor_running_state()) {
0353 return original(size, al);
0354 }
0355
0356 auto& reg = AllocMonitorRegistry::instance();
0357 return reg.allocCalled(
0358 size, [size, al]() { return original(size, al); }, [](auto ret) { return malloc_usable_size(ret); });
0359 }
0360
0361 void* operator new[](std::size_t size, std::align_val_t al) {
0362 CMS_SA_ALLOW static const auto original = get<void* (*)(std::size_t, std::align_val_t)>("_ZnamSt11align_val_t");
0363
0364 if (not alloc_monitor_running_state()) {
0365 return original(size, al);
0366 }
0367
0368 auto& reg = AllocMonitorRegistry::instance();
0369 return reg.allocCalled(
0370 size, [size, al]() { return original(size, al); }, [](auto ret) { return malloc_usable_size(ret); });
0371 }
0372
0373 void* operator new(std::size_t size, const std::nothrow_t& tag) noexcept {
0374 CMS_SA_ALLOW static const auto original =
0375 get<void* (*)(std::size_t, const std::nothrow_t&) noexcept>("_ZnwmRKSt9nothrow_t");
0376
0377 if (not alloc_monitor_running_state()) {
0378 return original(size, tag);
0379 }
0380
0381 auto& reg = AllocMonitorRegistry::instance();
0382 return reg.allocCalled(
0383 size, [size, &tag]() { return original(size, tag); }, [](auto ret) { return malloc_usable_size(ret); });
0384 }
0385
0386 void* operator new[](std::size_t size, const std::nothrow_t& tag) noexcept {
0387 CMS_SA_ALLOW static const auto original =
0388 get<void* (*)(std::size_t, const std::nothrow_t&) noexcept>("_ZnamRKSt9nothrow_t");
0389
0390 if (not alloc_monitor_running_state()) {
0391 return original(size, tag);
0392 }
0393
0394 auto& reg = AllocMonitorRegistry::instance();
0395 return reg.allocCalled(
0396 size, [size, &tag]() { return original(size, tag); }, [](auto ret) { return malloc_usable_size(ret); });
0397 }
0398
0399 void* operator new(std::size_t size, std::align_val_t al, const std::nothrow_t& tag) noexcept {
0400 CMS_SA_ALLOW static const auto original =
0401 get<void* (*)(std::size_t, std::align_val_t, const std::nothrow_t&) noexcept>(
0402 "_ZnwmSt11align_val_tRKSt9nothrow_t");
0403
0404 if (not alloc_monitor_running_state()) {
0405 return original(size, al, tag);
0406 }
0407
0408 auto& reg = AllocMonitorRegistry::instance();
0409 return reg.allocCalled(
0410 size, [size, al, &tag]() { return original(size, al, tag); }, [](auto ret) { return malloc_usable_size(ret); });
0411 }
0412
0413 void* operator new[](std::size_t size, std::align_val_t al, const std::nothrow_t& tag) noexcept {
0414 CMS_SA_ALLOW static const auto original =
0415 get<void* (*)(std::size_t, std::align_val_t, const std::nothrow_t&) noexcept>(
0416 "_ZnamSt11align_val_tRKSt9nothrow_t");
0417
0418 if (not alloc_monitor_running_state()) {
0419 return original(size, al, tag);
0420 }
0421
0422 auto& reg = AllocMonitorRegistry::instance();
0423 return reg.allocCalled(
0424 size, [size, al, &tag]() { return original(size, al, tag); }, [](auto ret) { return malloc_usable_size(ret); });
0425 }
0426
0427 void operator delete(void* ptr, std::align_val_t al) noexcept {
0428 CMS_SA_ALLOW static const auto original = get<void (*)(void*, std::align_val_t) noexcept>("_ZdlPvSt11align_val_t");
0429
0430 if (not alloc_monitor_running_state()) {
0431 original(ptr, al);
0432 return;
0433 }
0434 auto& reg = AllocMonitorRegistry::instance();
0435 reg.deallocCalled(ptr, [al](auto ptr) { original(ptr, al); }, [](auto ptr) { return malloc_usable_size(ptr); });
0436 }
0437
0438 void operator delete[](void* ptr, std::align_val_t al) noexcept {
0439 CMS_SA_ALLOW static const auto original = get<void (*)(void*, std::align_val_t) noexcept>("_ZdaPvSt11align_val_t");
0440
0441 if (not alloc_monitor_running_state()) {
0442 original(ptr, al);
0443 return;
0444 }
0445 auto& reg = AllocMonitorRegistry::instance();
0446 reg.deallocCalled(ptr, [al](auto ptr) { original(ptr, al); }, [](auto ptr) { return malloc_usable_size(ptr); });
0447 }
0448
0449 void operator delete(void* ptr, std::size_t sz) noexcept {
0450 CMS_SA_ALLOW static const auto original = get<void (*)(void*, std::size_t) noexcept>("_ZdlPvm");
0451
0452 if (not alloc_monitor_running_state()) {
0453 original(ptr, sz);
0454 return;
0455 }
0456 auto& reg = AllocMonitorRegistry::instance();
0457 reg.deallocCalled(ptr, [sz](auto ptr) { original(ptr, sz); }, [](auto ptr) { return malloc_usable_size(ptr); });
0458 }
0459
0460 void operator delete[](void* ptr, std::size_t sz) noexcept {
0461 CMS_SA_ALLOW static const auto original = get<void (*)(void*, std::size_t) noexcept>("_ZdaPvm");
0462
0463 if (not alloc_monitor_running_state()) {
0464 original(ptr, sz);
0465 return;
0466 }
0467 auto& reg = AllocMonitorRegistry::instance();
0468 reg.deallocCalled(ptr, [sz](auto ptr) { original(ptr, sz); }, [](auto ptr) { return malloc_usable_size(ptr); });
0469 }
0470
0471 void operator delete(void* ptr, std::size_t sz, std::align_val_t al) noexcept {
0472 CMS_SA_ALLOW static const auto original =
0473 get<void (*)(void*, std::size_t, std::align_val_t) noexcept>("_ZdlPvmSt11align_val_t");
0474
0475 if (not alloc_monitor_running_state()) {
0476 original(ptr, sz, al);
0477 return;
0478 }
0479 auto& reg = AllocMonitorRegistry::instance();
0480 reg.deallocCalled(
0481 ptr, [sz, al](auto ptr) { original(ptr, sz, al); }, [](auto ptr) { return malloc_usable_size(ptr); });
0482 }
0483
0484 void operator delete[](void* ptr, std::size_t sz, std::align_val_t al) noexcept {
0485 CMS_SA_ALLOW static const auto original =
0486 get<void (*)(void*, std::size_t, std::align_val_t) noexcept>("_ZdaPvmSt11align_val_t");
0487
0488 if (not alloc_monitor_running_state()) {
0489 original(ptr, sz, al);
0490 return;
0491 }
0492 auto& reg = AllocMonitorRegistry::instance();
0493 reg.deallocCalled(
0494 ptr, [sz, al](auto ptr) { original(ptr, sz, al); }, [](auto ptr) { return malloc_usable_size(ptr); });
0495 }
0496
0497 void operator delete(void* ptr, const std::nothrow_t& tag) noexcept {
0498 CMS_SA_ALLOW static const auto original =
0499 get<void (*)(void*, const std::nothrow_t&) noexcept>("_ZdlPvRKSt9nothrow_t");
0500
0501 if (not alloc_monitor_running_state()) {
0502 original(ptr, tag);
0503 return;
0504 }
0505 auto& reg = AllocMonitorRegistry::instance();
0506 reg.deallocCalled(ptr, [&tag](auto ptr) { original(ptr, tag); }, [](auto ptr) { return malloc_usable_size(ptr); });
0507 }
0508
0509 void operator delete[](void* ptr, const std::nothrow_t& tag) noexcept {
0510 CMS_SA_ALLOW static const auto original =
0511 get<void (*)(void*, const std::nothrow_t&) noexcept>("_ZdaPvRKSt9nothrow_t");
0512
0513 if (not alloc_monitor_running_state()) {
0514 original(ptr, tag);
0515 return;
0516 }
0517 auto& reg = AllocMonitorRegistry::instance();
0518 reg.deallocCalled(ptr, [&tag](auto ptr) { original(ptr, tag); }, [](auto ptr) { return malloc_usable_size(ptr); });
0519 }
0520
0521 void operator delete(void* ptr, std::align_val_t al, const std::nothrow_t& tag) noexcept {
0522 CMS_SA_ALLOW static const auto original =
0523 get<void (*)(void*, std::align_val_t, const std::nothrow_t&) noexcept>("_ZdlPvSt11align_val_tRKSt9nothrow_t");
0524
0525 if (not alloc_monitor_running_state()) {
0526 original(ptr, al, tag);
0527 return;
0528 }
0529 auto& reg = AllocMonitorRegistry::instance();
0530 reg.deallocCalled(
0531 ptr, [al, &tag](auto ptr) { original(ptr, al, tag); }, [](auto ptr) { return malloc_usable_size(ptr); });
0532 }
0533
0534 void operator delete[](void* ptr, std::align_val_t al, const std::nothrow_t& tag) noexcept {
0535 CMS_SA_ALLOW static const auto original =
0536 get<void (*)(void*, std::align_val_t, const std::nothrow_t&) noexcept>("_ZdaPvSt11align_val_tRKSt9nothrow_t");
0537
0538 if (not alloc_monitor_running_state()) {
0539 original(ptr, al, tag);
0540 return;
0541 }
0542 auto& reg = AllocMonitorRegistry::instance();
0543 reg.deallocCalled(
0544 ptr, [al, &tag](auto ptr) { original(ptr, al, tag); }, [](auto ptr) { return malloc_usable_size(ptr); });
0545 }
0546
0547 #endif