Back to home page

Project CMSSW displayed by LXR

 
 

    


File indexing completed on 2024-10-23 22:48:06

0001 #include <memory>
0002 #include <cassert>
0003 #include <atomic>
0004 #include <array>
0005 #include <mutex>
0006 #include <limits>
0007 #include <cstddef>
0008 #include <malloc.h>
0009 #define ALLOC_USE_PTHREADS
0010 #if defined(ALLOC_USE_PTHREADS)
0011 #include <pthread.h>
0012 #else
0013 #include <unistd.h>
0014 #include <sys/syscall.h>
0015 #endif
0016 
0017 #include "PerfTools/AllocMonitor/interface/AllocMonitorRegistry.h"
0018 #include "FWCore/Utilities/interface/thread_safety_macros.h"
0019 
0020 #include <dlfcn.h>  // dlsym
0021 
0022 #if !defined(__x86_64__) && !defined(__i386__)
0023 #define USE_LOCAL_MALLOC
0024 #endif
0025 #if defined(__GLIBC__) && (__GLIBC__ == 2) && (__GLIBC_MINOR__ < 28)
0026 //needed for sl7
0027 #define USE_LOCAL_MALLOC
0028 #endif
0029 
0030 namespace {
0031   std::atomic<bool>& alloc_monitor_running_state() {
0032     static std::atomic<bool> s_state = false;
0033     return s_state;
0034   }
0035 
0036   template <typename T>
0037   T get(const char* iName) {
0038     void* original = dlsym(RTLD_NEXT, iName);
0039     assert(original);
0040     return reinterpret_cast<T>(original);
0041   }
0042 
0043   inline auto thread_id() {
0044 #if defined(ALLOC_USE_PTHREADS)
0045     /*NOTE: if use pthread_self, the values returned by linux had                                                                                                                                                                                      lots of hash collisions when using a simple % hash. Worked                                                                                                                                                                                        better if first divided value by 0x700 and then did %.                                                                                                                                                                                            [test done on el8] */
0046     return pthread_self();
0047 #else
0048     return syscall(SYS_gettid);
0049 #endif
0050   }
0051 #ifdef USE_LOCAL_MALLOC
0052   // this is a very simple-minded allocator used for any allocations
0053   // before we've finished our setup.  In particular, this avoids a
0054   // chicken/egg problem if dlsym() allocates any memory.
0055   // Size was chosen to be 2x what ARM64 uses as an emergency buffer
0056   // for libstdc++ exception handling.
0057   constexpr auto max_align = alignof(std::max_align_t);
0058   alignas(max_align) char tmpbuff[131072];
0059   unsigned long tmppos = 0;
0060   unsigned long tmpallocs = 0;
0061 
0062   void* local_malloc(size_t size) noexcept {
0063     // round up so next alloc is aligned
0064     size = ((size + max_align - 1) / max_align) * max_align;
0065     if (tmppos + size < sizeof(tmpbuff)) {
0066       void* retptr = tmpbuff + tmppos;
0067       tmppos += size;
0068       ++tmpallocs;
0069       return retptr;
0070     } else {
0071       return nullptr;
0072     }
0073   }
0074 
0075   //can use local_malloc since static memory buffers are guaranteed to be zero initialized
0076   void* local_calloc(size_t nitems, size_t item_size) noexcept { return local_malloc(nitems * item_size); }
0077 
0078   inline bool is_local_alloc(void* ptr) noexcept { return ptr >= (void*)tmpbuff && ptr <= (void*)(tmpbuff + tmppos); }
0079 
0080   // the pointers in this struct should only be modified during
0081   // global construction at program startup, so thread safety
0082   // should not be an issue.
0083   struct Originals {
0084     inline static void init() noexcept {
0085       if (not set) {
0086         set = true;  // must be first to avoid recursion
0087         malloc = get<decltype(&::malloc)>("malloc");
0088         calloc = get<decltype(&::calloc)>("calloc");
0089       }
0090     }
0091     CMS_SA_ALLOW static decltype(&::malloc) malloc;
0092     CMS_SA_ALLOW static decltype(&::calloc) calloc;
0093     CMS_SA_ALLOW static bool set;
0094   };
0095 
0096   decltype(&::malloc) Originals::malloc = local_malloc;
0097   decltype(&::calloc) Originals::calloc = local_calloc;
0098   bool Originals::set = false;
0099 #else
0100   constexpr inline bool is_local_alloc(void* ptr) noexcept { return false; }
0101 #endif
0102 
0103   struct ThreadTracker {
0104     static constexpr unsigned int kEntries = 128;
0105     using entry_type = decltype(thread_id());
0106     std::array<std::atomic<entry_type>, kEntries> used_threads_;
0107     std::array<std::mutex, kEntries> used_threads_mutex_;
0108 
0109     ThreadTracker() {
0110       //put a value which will not match the % used when looking up the entry
0111       entry_type entry = 0;
0112       for (auto& v : used_threads_) {
0113         v = ++entry;
0114       }
0115     }
0116 
0117     std::size_t thread_index(entry_type id) const {
0118 #if defined(ALLOC_USE_PTHREADS)
0119       return (id / 0x700) % kEntries;
0120 #else
0121       return id % kEntries;
0122 #endif
0123     }
0124 
0125     //returns true if the thread had not already stopped reporting
0126     bool stop_reporting() {
0127       auto id = thread_id();
0128       auto index = thread_index(id);
0129       //are we already in this thread?
0130       if (id == used_threads_[index]) {
0131         return false;
0132       }
0133       used_threads_mutex_[index].lock();
0134       used_threads_[index] = id;
0135       return true;
0136     }
0137 
0138     void start_reporting() {
0139       auto id = thread_id();
0140       auto index = thread_index(id);
0141       auto& v = used_threads_[index];
0142       if (v == static_cast<entry_type>(index + 1)) {
0143         return;
0144       }
0145       assert(v == id);
0146       v = index + 1;
0147       used_threads_mutex_[index].unlock();
0148     }
0149   };
0150 
0151   static ThreadTracker& getTracker() {
0152     static ThreadTracker s_tracker;
0153     return s_tracker;
0154   }
0155 
0156 }  // namespace
0157 
0158 using namespace cms::perftools;
0159 
0160 extern "C" {
0161 void alloc_monitor_start() { alloc_monitor_running_state() = true; }
0162 void alloc_monitor_stop() { alloc_monitor_running_state() = false; }
0163 
0164 bool alloc_monitor_stop_thread_reporting() { return getTracker().stop_reporting(); }
0165 
0166 void alloc_monitor_start_thread_reporting() { getTracker().start_reporting(); }
0167 
0168 //----------------------------------------------------------------
0169 //C memory functions
0170 
0171 #ifdef USE_LOCAL_MALLOC
0172 void* malloc(size_t size) noexcept {
0173   const auto original = Originals::malloc;
0174   Originals::init();
0175   if (not alloc_monitor_running_state()) {
0176     return original(size);
0177   }
0178   auto& reg = AllocMonitorRegistry::instance();
0179   return reg.allocCalled(
0180       size, [size, original]() { return original(size); }, [](auto ret) { return malloc_usable_size(ret); });
0181 }
0182 
0183 void* calloc(size_t nitems, size_t item_size) noexcept {
0184   const auto original = Originals::calloc;
0185   Originals::init();
0186   if (not alloc_monitor_running_state()) {
0187     return original(nitems, item_size);
0188   }
0189   auto& reg = AllocMonitorRegistry::instance();
0190   return reg.allocCalled(
0191       nitems * item_size,
0192       [nitems, item_size, original]() { return original(nitems, item_size); },
0193       [](auto ret) { return malloc_usable_size(ret); });
0194 }
0195 #else
0196 void* malloc(size_t size) noexcept {
0197   CMS_SA_ALLOW static const auto original = get<decltype(&::malloc)>("malloc");
0198   if (not alloc_monitor_running_state()) {
0199     return original(size);
0200   }
0201   auto& reg = AllocMonitorRegistry::instance();
0202   return reg.allocCalled(size, [size]() { return original(size); }, [](auto ret) { return malloc_usable_size(ret); });
0203 }
0204 
0205 void* calloc(size_t nitems, size_t item_size) noexcept {
0206   CMS_SA_ALLOW static const auto original = get<decltype(&::calloc)>("calloc");
0207   if (not alloc_monitor_running_state()) {
0208     return original(nitems, item_size);
0209   }
0210   auto& reg = AllocMonitorRegistry::instance();
0211   return reg.allocCalled(
0212       nitems * item_size,
0213       [nitems, item_size]() { return original(nitems, item_size); },
0214       [](auto ret) { return malloc_usable_size(ret); });
0215 }
0216 #endif
0217 
0218 void* realloc(void* ptr, size_t size) noexcept {
0219   CMS_SA_ALLOW static const auto original = get<decltype(&::realloc)>("realloc");
0220   if (not alloc_monitor_running_state()) {
0221     return original(ptr, size);
0222   }
0223   size_t oldsize = malloc_usable_size(ptr);
0224   void* ret;
0225   auto& reg = AllocMonitorRegistry::instance();
0226   {
0227     //incase this calls malloc/free
0228     [[maybe_unused]] auto g = reg.makeGuard();
0229     ret = original(ptr, size);
0230   }
0231   size_t used = malloc_usable_size(ret);
0232   if (used != oldsize) {
0233     reg.deallocCalled(ptr, [](auto) {}, [oldsize](auto) { return oldsize; });
0234     reg.allocCalled(size, [ret]() { return ret; }, [used](auto) { return used; });
0235   }
0236   return ret;
0237 }
0238 
0239 void* aligned_alloc(size_t alignment, size_t size) noexcept {
0240   CMS_SA_ALLOW static const auto original = get<decltype(&::aligned_alloc)>("aligned_alloc");
0241   if (not alloc_monitor_running_state()) {
0242     return original(alignment, size);
0243   }
0244 
0245   auto& reg = AllocMonitorRegistry::instance();
0246   return reg.allocCalled(
0247       size,
0248       [alignment, size]() { return original(alignment, size); },
0249       [](auto ret) { return malloc_usable_size(ret); });
0250 }
0251 
0252 //used by tensorflow
0253 int posix_memalign(void** memptr, size_t alignment, size_t size) noexcept {
0254   CMS_SA_ALLOW static const auto original = get<decltype(&::posix_memalign)>("posix_memalign");
0255   if (not alloc_monitor_running_state()) {
0256     return original(memptr, alignment, size);
0257   }
0258 
0259   auto& reg = AllocMonitorRegistry::instance();
0260   int ret;
0261   reg.allocCalled(
0262       size,
0263       [&ret, memptr, alignment, size]() {
0264         ret = original(memptr, alignment, size);
0265         return *memptr;
0266       },
0267       [](auto ret) { return malloc_usable_size(ret); });
0268   return ret;
0269 }
0270 
0271 //used by libc
0272 void* memalign(size_t alignment, size_t size) noexcept {
0273   CMS_SA_ALLOW static const auto original = get<decltype(&::memalign)>("memalign");
0274   if (not alloc_monitor_running_state()) {
0275     return original(alignment, size);
0276   }
0277 
0278   auto& reg = AllocMonitorRegistry::instance();
0279   return reg.allocCalled(
0280       size,
0281       [alignment, size]() { return original(alignment, size); },
0282       [](auto ret) { return malloc_usable_size(ret); });
0283 }
0284 
0285 void free(void* ptr) noexcept {
0286   CMS_SA_ALLOW static const auto original = get<decltype(&::free)>("free");
0287   // ignore memory allocated from our static array at startup
0288   if (not is_local_alloc(ptr)) {
0289     if (not alloc_monitor_running_state()) {
0290       original(ptr);
0291       return;
0292     }
0293 
0294     auto& reg = AllocMonitorRegistry::instance();
0295     reg.deallocCalled(ptr, [](auto ptr) { original(ptr); }, [](auto ptr) { return malloc_usable_size(ptr); });
0296   }
0297 }
0298 }  // extern "C"
0299 
0300 //----------------------------------------------------------------
0301 //C++ memory functions
0302 
0303 #define CPP_MEM_OVERRIDE
0304 
0305 #if defined(CPP_MEM_OVERRIDE)
0306 #include <new>
0307 
0308 void* operator new(std::size_t size) {
0309   CMS_SA_ALLOW static const auto original = get<void* (*)(std::size_t)>("_Znwm");
0310   if (not alloc_monitor_running_state()) {
0311     return original(size);
0312   }
0313 
0314   auto& reg = AllocMonitorRegistry::instance();
0315   return reg.allocCalled(size, [size]() { return original(size); }, [](auto ret) { return malloc_usable_size(ret); });
0316 }  //_Znwm
0317 
0318 void operator delete(void* ptr) noexcept {
0319   CMS_SA_ALLOW static const auto original = get<void (*)(void*)>("_ZdlPv");
0320   if (not alloc_monitor_running_state()) {
0321     original(ptr);
0322     return;
0323   }
0324 
0325   auto& reg = AllocMonitorRegistry::instance();
0326   reg.deallocCalled(ptr, [](auto ptr) { original(ptr); }, [](auto ptr) { return malloc_usable_size(ptr); });
0327 }  //_ZdlPv
0328 
0329 void* operator new[](std::size_t size) {
0330   CMS_SA_ALLOW static const auto original = get<void* (*)(std::size_t)>("_Znam");
0331   if (not alloc_monitor_running_state()) {
0332     return original(size);
0333   }
0334 
0335   auto& reg = AllocMonitorRegistry::instance();
0336   return reg.allocCalled(size, [size]() { return original(size); }, [](auto ret) { return malloc_usable_size(ret); });
0337 }  //_Znam
0338 
0339 void operator delete[](void* ptr) noexcept {
0340   CMS_SA_ALLOW static const auto original = get<void (*)(void*)>("_ZdaPv");
0341 
0342   if (not alloc_monitor_running_state()) {
0343     original(ptr);
0344     return;
0345   }
0346   auto& reg = AllocMonitorRegistry::instance();
0347   reg.deallocCalled(ptr, [](auto ptr) { original(ptr); }, [](auto ptr) { return malloc_usable_size(ptr); });
0348 }  //_ZdaPv
0349 
0350 void* operator new(std::size_t size, std::align_val_t al) {
0351   CMS_SA_ALLOW static const auto original = get<void* (*)(std::size_t, std::align_val_t)>("_ZnwmSt11align_val_t");
0352   if (not alloc_monitor_running_state()) {
0353     return original(size, al);
0354   }
0355 
0356   auto& reg = AllocMonitorRegistry::instance();
0357   return reg.allocCalled(
0358       size, [size, al]() { return original(size, al); }, [](auto ret) { return malloc_usable_size(ret); });
0359 }  //_ZnwmSt11align_val_t
0360 
0361 void* operator new[](std::size_t size, std::align_val_t al) {
0362   CMS_SA_ALLOW static const auto original = get<void* (*)(std::size_t, std::align_val_t)>("_ZnamSt11align_val_t");
0363 
0364   if (not alloc_monitor_running_state()) {
0365     return original(size, al);
0366   }
0367 
0368   auto& reg = AllocMonitorRegistry::instance();
0369   return reg.allocCalled(
0370       size, [size, al]() { return original(size, al); }, [](auto ret) { return malloc_usable_size(ret); });
0371 }  //_ZnamSt11align_val_t
0372 
0373 void* operator new(std::size_t size, const std::nothrow_t& tag) noexcept {
0374   CMS_SA_ALLOW static const auto original =
0375       get<void* (*)(std::size_t, const std::nothrow_t&) noexcept>("_ZnwmRKSt9nothrow_t");
0376 
0377   if (not alloc_monitor_running_state()) {
0378     return original(size, tag);
0379   }
0380 
0381   auto& reg = AllocMonitorRegistry::instance();
0382   return reg.allocCalled(
0383       size, [size, &tag]() { return original(size, tag); }, [](auto ret) { return malloc_usable_size(ret); });
0384 }  //_ZnwmRKSt9nothrow_t
0385 
0386 void* operator new[](std::size_t size, const std::nothrow_t& tag) noexcept {
0387   CMS_SA_ALLOW static const auto original =
0388       get<void* (*)(std::size_t, const std::nothrow_t&) noexcept>("_ZnamRKSt9nothrow_t");
0389 
0390   if (not alloc_monitor_running_state()) {
0391     return original(size, tag);
0392   }
0393 
0394   auto& reg = AllocMonitorRegistry::instance();
0395   return reg.allocCalled(
0396       size, [size, &tag]() { return original(size, tag); }, [](auto ret) { return malloc_usable_size(ret); });
0397 }  //_ZnamRKSt9nothrow_t
0398 
0399 void* operator new(std::size_t size, std::align_val_t al, const std::nothrow_t& tag) noexcept {
0400   CMS_SA_ALLOW static const auto original =
0401       get<void* (*)(std::size_t, std::align_val_t, const std::nothrow_t&) noexcept>(
0402           "_ZnwmSt11align_val_tRKSt9nothrow_t");
0403 
0404   if (not alloc_monitor_running_state()) {
0405     return original(size, al, tag);
0406   }
0407 
0408   auto& reg = AllocMonitorRegistry::instance();
0409   return reg.allocCalled(
0410       size, [size, al, &tag]() { return original(size, al, tag); }, [](auto ret) { return malloc_usable_size(ret); });
0411 }  //_ZnwmSt11align_val_tRKSt9nothrow_t
0412 
0413 void* operator new[](std::size_t size, std::align_val_t al, const std::nothrow_t& tag) noexcept {
0414   CMS_SA_ALLOW static const auto original =
0415       get<void* (*)(std::size_t, std::align_val_t, const std::nothrow_t&) noexcept>(
0416           "_ZnamSt11align_val_tRKSt9nothrow_t");
0417 
0418   if (not alloc_monitor_running_state()) {
0419     return original(size, al, tag);
0420   }
0421 
0422   auto& reg = AllocMonitorRegistry::instance();
0423   return reg.allocCalled(
0424       size, [size, al, &tag]() { return original(size, al, tag); }, [](auto ret) { return malloc_usable_size(ret); });
0425 }  //_ZnamSt11align_val_tRKSt9nothrow_t
0426 
0427 void operator delete(void* ptr, std::align_val_t al) noexcept {
0428   CMS_SA_ALLOW static const auto original = get<void (*)(void*, std::align_val_t) noexcept>("_ZdlPvSt11align_val_t");
0429 
0430   if (not alloc_monitor_running_state()) {
0431     original(ptr, al);
0432     return;
0433   }
0434   auto& reg = AllocMonitorRegistry::instance();
0435   reg.deallocCalled(ptr, [al](auto ptr) { original(ptr, al); }, [](auto ptr) { return malloc_usable_size(ptr); });
0436 }  //_ZdlPvSt11align_val_t
0437 
0438 void operator delete[](void* ptr, std::align_val_t al) noexcept {
0439   CMS_SA_ALLOW static const auto original = get<void (*)(void*, std::align_val_t) noexcept>("_ZdaPvSt11align_val_t");
0440 
0441   if (not alloc_monitor_running_state()) {
0442     original(ptr, al);
0443     return;
0444   }
0445   auto& reg = AllocMonitorRegistry::instance();
0446   reg.deallocCalled(ptr, [al](auto ptr) { original(ptr, al); }, [](auto ptr) { return malloc_usable_size(ptr); });
0447 }  //_ZdaPvSt11align_val_t
0448 
0449 void operator delete(void* ptr, std::size_t sz) noexcept {
0450   CMS_SA_ALLOW static const auto original = get<void (*)(void*, std::size_t) noexcept>("_ZdlPvm");
0451 
0452   if (not alloc_monitor_running_state()) {
0453     original(ptr, sz);
0454     return;
0455   }
0456   auto& reg = AllocMonitorRegistry::instance();
0457   reg.deallocCalled(ptr, [sz](auto ptr) { original(ptr, sz); }, [](auto ptr) { return malloc_usable_size(ptr); });
0458 }  //_ZdlPvm
0459 
0460 void operator delete[](void* ptr, std::size_t sz) noexcept {
0461   CMS_SA_ALLOW static const auto original = get<void (*)(void*, std::size_t) noexcept>("_ZdaPvm");
0462 
0463   if (not alloc_monitor_running_state()) {
0464     original(ptr, sz);
0465     return;
0466   }
0467   auto& reg = AllocMonitorRegistry::instance();
0468   reg.deallocCalled(ptr, [sz](auto ptr) { original(ptr, sz); }, [](auto ptr) { return malloc_usable_size(ptr); });
0469 }  //_ZdaPvm
0470 
0471 void operator delete(void* ptr, std::size_t sz, std::align_val_t al) noexcept {
0472   CMS_SA_ALLOW static const auto original =
0473       get<void (*)(void*, std::size_t, std::align_val_t) noexcept>("_ZdlPvmSt11align_val_t");
0474 
0475   if (not alloc_monitor_running_state()) {
0476     original(ptr, sz, al);
0477     return;
0478   }
0479   auto& reg = AllocMonitorRegistry::instance();
0480   reg.deallocCalled(
0481       ptr, [sz, al](auto ptr) { original(ptr, sz, al); }, [](auto ptr) { return malloc_usable_size(ptr); });
0482 }  //_ZdlPvmSt11align_val_t
0483 
0484 void operator delete[](void* ptr, std::size_t sz, std::align_val_t al) noexcept {
0485   CMS_SA_ALLOW static const auto original =
0486       get<void (*)(void*, std::size_t, std::align_val_t) noexcept>("_ZdaPvmSt11align_val_t");
0487 
0488   if (not alloc_monitor_running_state()) {
0489     original(ptr, sz, al);
0490     return;
0491   }
0492   auto& reg = AllocMonitorRegistry::instance();
0493   reg.deallocCalled(
0494       ptr, [sz, al](auto ptr) { original(ptr, sz, al); }, [](auto ptr) { return malloc_usable_size(ptr); });
0495 }  //_ZdaPvmSt11align_val_t
0496 
0497 void operator delete(void* ptr, const std::nothrow_t& tag) noexcept {
0498   CMS_SA_ALLOW static const auto original =
0499       get<void (*)(void*, const std::nothrow_t&) noexcept>("_ZdlPvRKSt9nothrow_t");
0500 
0501   if (not alloc_monitor_running_state()) {
0502     original(ptr, tag);
0503     return;
0504   }
0505   auto& reg = AllocMonitorRegistry::instance();
0506   reg.deallocCalled(ptr, [&tag](auto ptr) { original(ptr, tag); }, [](auto ptr) { return malloc_usable_size(ptr); });
0507 }  //_ZdlPvRKSt9nothrow_t
0508 
0509 void operator delete[](void* ptr, const std::nothrow_t& tag) noexcept {
0510   CMS_SA_ALLOW static const auto original =
0511       get<void (*)(void*, const std::nothrow_t&) noexcept>("_ZdaPvRKSt9nothrow_t");
0512 
0513   if (not alloc_monitor_running_state()) {
0514     original(ptr, tag);
0515     return;
0516   }
0517   auto& reg = AllocMonitorRegistry::instance();
0518   reg.deallocCalled(ptr, [&tag](auto ptr) { original(ptr, tag); }, [](auto ptr) { return malloc_usable_size(ptr); });
0519 }  //_ZdaPvRKSt9nothrow_t
0520 
0521 void operator delete(void* ptr, std::align_val_t al, const std::nothrow_t& tag) noexcept {
0522   CMS_SA_ALLOW static const auto original =
0523       get<void (*)(void*, std::align_val_t, const std::nothrow_t&) noexcept>("_ZdlPvSt11align_val_tRKSt9nothrow_t");
0524 
0525   if (not alloc_monitor_running_state()) {
0526     original(ptr, al, tag);
0527     return;
0528   }
0529   auto& reg = AllocMonitorRegistry::instance();
0530   reg.deallocCalled(
0531       ptr, [al, &tag](auto ptr) { original(ptr, al, tag); }, [](auto ptr) { return malloc_usable_size(ptr); });
0532 }  //_ZdlPvSt11align_val_tRKSt9nothrow_t
0533 
0534 void operator delete[](void* ptr, std::align_val_t al, const std::nothrow_t& tag) noexcept {
0535   CMS_SA_ALLOW static const auto original =
0536       get<void (*)(void*, std::align_val_t, const std::nothrow_t&) noexcept>("_ZdaPvSt11align_val_tRKSt9nothrow_t");
0537 
0538   if (not alloc_monitor_running_state()) {
0539     original(ptr, al, tag);
0540     return;
0541   }
0542   auto& reg = AllocMonitorRegistry::instance();
0543   reg.deallocCalled(
0544       ptr, [al, &tag](auto ptr) { original(ptr, al, tag); }, [](auto ptr) { return malloc_usable_size(ptr); });
0545 }  //_ZdaPvSt11align_val_tRKSt9nothrow_t
0546 
0547 #endif