File indexing completed on 2022-06-11 03:13:10
0001 #include "FWCore/Utilities/interface/RootHandlers.h"
0002
0003 #include "FWCore/ServiceRegistry/interface/ActivityRegistry.h"
0004 #include "FWCore/ServiceRegistry/interface/SystemBounds.h"
0005 #include "DataFormats/Common/interface/RefCoreStreamer.h"
0006 #include "DataFormats/Provenance/interface/ModuleDescription.h"
0007 #include "FWCore/MessageLogger/interface/ELseverityLevel.h"
0008 #include "FWCore/MessageLogger/interface/MessageLogger.h"
0009 #include "FWCore/ParameterSet/interface/ParameterSet.h"
0010 #include "FWCore/PluginManager/interface/PluginCapabilities.h"
0011 #include "FWCore/ParameterSet/interface/ConfigurationDescriptions.h"
0012 #include "FWCore/ParameterSet/interface/ParameterSetDescription.h"
0013 #include "FWCore/Utilities/interface/EDMException.h"
0014 #include "FWCore/Reflection/interface/TypeWithDict.h"
0015 #include "FWCore/Utilities/interface/UnixSignalHandlers.h"
0016 #include "FWCore/ServiceRegistry/interface/CurrentModuleOnThread.h"
0017 #include "FWCore/ServiceRegistry/interface/ModuleCallingContext.h"
0018
0019 #include "oneapi/tbb/concurrent_unordered_set.h"
0020 #include "oneapi/tbb/task.h"
0021 #include "oneapi/tbb/task_scheduler_observer.h"
0022 #include "oneapi/tbb/global_control.h"
0023 #include <memory>
0024
0025 #include <thread>
0026 #include <sys/wait.h>
0027 #include <sstream>
0028 #include <cstring>
0029 #include <poll.h>
0030 #include <atomic>
0031 #include <algorithm>
0032 #include <vector>
0033 #include <string>
0034 #include <array>
0035
0036
0037
0038
0039 #ifdef __linux__
0040 #include <syscall.h>
0041 #endif
0042
0043 #include "TROOT.h"
0044 #include "TError.h"
0045 #include "TFile.h"
0046 #include "TInterpreter.h"
0047 #include "TH1.h"
0048 #include "TSystem.h"
0049 #include "TUnixSystem.h"
0050 #include "TTree.h"
0051 #include "TVirtualStreamerInfo.h"
0052
0053 #include "TClassTable.h"
0054
0055 #include <memory>
0056
0057 namespace {
0058
0059
0060 constexpr std::size_t moduleBufferSize = 128;
0061 }
0062
0063 namespace edm {
0064 class ConfigurationDescriptions;
0065 class ParameterSet;
0066 class ActivityRegistry;
0067
0068 namespace service {
0069 class InitRootHandlers : public RootHandlers {
0070 friend int cmssw_stacktrace(void*);
0071
0072 public:
0073 class ThreadTracker : public oneapi::tbb::task_scheduler_observer {
0074 public:
0075 typedef oneapi::tbb::concurrent_unordered_set<pthread_t> Container_type;
0076
0077 ThreadTracker() : oneapi::tbb::task_scheduler_observer() { observe(); }
0078 ~ThreadTracker() override = default;
0079
0080 void on_scheduler_entry(bool) override {
0081
0082
0083
0084
0085
0086 edm::CurrentModuleOnThread::getCurrentModuleOnThread();
0087 threadIDs_.insert(pthread_self());
0088 }
0089 void on_scheduler_exit(bool) override {}
0090 const Container_type& IDs() { return threadIDs_; }
0091
0092 private:
0093 Container_type threadIDs_;
0094 };
0095
0096 explicit InitRootHandlers(ParameterSet const& pset, ActivityRegistry& iReg);
0097 ~InitRootHandlers() override;
0098
0099 static void fillDescriptions(ConfigurationDescriptions& descriptions);
0100 static void stacktraceFromThread();
0101 static const ThreadTracker::Container_type& threadIDs() {
0102 static const ThreadTracker::Container_type empty;
0103 if (threadTracker_) {
0104 return threadTracker_->IDs();
0105 }
0106 return empty;
0107 }
0108 static int stackTracePause() { return stackTracePause_; }
0109
0110 static std::vector<std::array<char, moduleBufferSize>> moduleListBuffers_;
0111 static std::atomic<std::size_t> nextModule_, doneModules_;
0112
0113 private:
0114 static char const* const* getPstackArgv();
0115 void enableWarnings_() override;
0116 void ignoreWarnings_(edm::RootHandlers::SeverityLevel level) override;
0117 void willBeUsingThreads() override;
0118
0119 void cachePidInfo();
0120 static void stacktraceHelperThread();
0121
0122 static constexpr int pidStringLength_ = 200;
0123 static char pidString_[pidStringLength_];
0124 static char const* const pstackArgv_[];
0125 static int parentToChild_[2];
0126 static int childToParent_[2];
0127 static std::unique_ptr<std::thread> helperThread_;
0128 static std::unique_ptr<ThreadTracker> threadTracker_;
0129 static int stackTracePause_;
0130
0131 bool unloadSigHandler_;
0132 bool resetErrHandler_;
0133 bool loadAllDictionaries_;
0134 bool autoLibraryLoader_;
0135 bool interactiveDebug_;
0136 std::shared_ptr<const void> sigBusHandler_;
0137 std::shared_ptr<const void> sigSegvHandler_;
0138 std::shared_ptr<const void> sigIllHandler_;
0139 std::shared_ptr<const void> sigTermHandler_;
0140 std::shared_ptr<const void> sigAbrtHandler_;
0141 };
0142
0143 inline bool isProcessWideService(InitRootHandlers const*) { return true; }
0144
0145 }
0146 }
0147
0148 namespace edm {
0149 namespace service {
0150 int cmssw_stacktrace(void*);
0151 }
0152 }
0153
0154 namespace {
0155 thread_local edm::RootHandlers::SeverityLevel s_ignoreWarnings = edm::RootHandlers::SeverityLevel::kInfo;
0156
0157 constexpr bool s_ignoreEverything = false;
0158
0159 template <std::size_t SIZE>
0160 bool find_if_string(const std::string& search, const std::array<const char* const, SIZE>& substrs) {
0161 return (std::find_if(substrs.begin(), substrs.end(), [&search](const char* const s) -> bool {
0162 return (search.find(s) != std::string::npos);
0163 }) != substrs.end());
0164 }
0165
0166
0167 constexpr std::array<const char* const, 9> in_message{
0168 {"no dictionary for class",
0169 "already in TClassTable",
0170 "matrix not positive definite",
0171 "not a TStreamerInfo object",
0172 "Problems declaring payload",
0173 "Announced number of args different from the real number of argument passed",
0174 "nbins is <=0 - set to nbins = 1",
0175 "nbinsy is <=0 - set to nbinsy = 1",
0176 "oneapi::tbb::global_control is limiting"}};
0177
0178
0179 constexpr std::array<const char* const, 7> in_location{{"Fit",
0180 "TDecompChol::Solve",
0181 "THistPainter::PaintInit",
0182 "TUnixSystem::SetDisplay",
0183 "TGClient::GetFontByName",
0184 "Inverter::Dinv",
0185 "RTaskArenaWrapper"}};
0186
0187 constexpr std::array<const char* const, 3> in_message_print_error{{"number of iterations was insufficient",
0188 "bad integrand behavior",
0189 "integral is divergent, or slowly convergent"}};
0190
0191 void RootErrorHandlerImpl(int level, char const* location, char const* message) {
0192 bool die = false;
0193
0194
0195
0196 edm::RootHandlers::SeverityLevel el_severity = edm::RootHandlers::SeverityLevel::kInfo;
0197
0198 if (level >= kFatal) {
0199 el_severity = edm::RootHandlers::SeverityLevel::kFatal;
0200 } else if (level >= kSysError) {
0201 el_severity = edm::RootHandlers::SeverityLevel::kSysError;
0202 } else if (level >= kError) {
0203 el_severity = edm::RootHandlers::SeverityLevel::kError;
0204 } else if (level >= kWarning) {
0205 el_severity = edm::RootHandlers::SeverityLevel::kWarning;
0206 }
0207
0208 if (s_ignoreEverything || el_severity <= s_ignoreWarnings) {
0209 el_severity = edm::RootHandlers::SeverityLevel::kInfo;
0210 }
0211
0212
0213
0214
0215 std::string el_location = "@SUB=?";
0216 if (location != nullptr)
0217 el_location = std::string("@SUB=") + std::string(location);
0218
0219 std::string el_message = "?";
0220 if (message != nullptr)
0221 el_message = message;
0222
0223
0224
0225
0226
0227
0228
0229 std::string el_identifier = "ROOT";
0230
0231 std::string precursor("class ");
0232 size_t index1 = el_message.find(precursor);
0233 if (index1 != std::string::npos) {
0234 size_t index2 = index1 + precursor.length();
0235 size_t index3 = el_message.find_first_of(" :", index2);
0236 if (index3 != std::string::npos) {
0237 size_t substrlen = index3 - index2;
0238 el_identifier += "-";
0239 el_identifier += el_message.substr(index2, substrlen);
0240 }
0241 } else {
0242 index1 = el_location.find("::");
0243 if (index1 != std::string::npos) {
0244 el_identifier += "/";
0245 el_identifier += el_location.substr(0, index1);
0246 }
0247 }
0248
0249
0250
0251 if ((el_location.find("TBranchElement::Fill") != std::string::npos) &&
0252 (el_message.find("fill branch") != std::string::npos) && (el_message.find("address") != std::string::npos) &&
0253 (el_message.find("not set") != std::string::npos)) {
0254 el_severity = edm::RootHandlers::SeverityLevel::kFatal;
0255 }
0256
0257 if ((el_message.find("Tree branches") != std::string::npos) &&
0258 (el_message.find("different numbers of entries") != std::string::npos)) {
0259 el_severity = edm::RootHandlers::SeverityLevel::kFatal;
0260 }
0261
0262
0263
0264 if (find_if_string(el_message, in_message) || find_if_string(el_location, in_location) ||
0265 (level < kError and (el_location.find("CINTTypedefBuilder::Setup") != std::string::npos) and
0266 (el_message.find("possible entries are in use!") != std::string::npos))) {
0267 el_severity = edm::RootHandlers::SeverityLevel::kInfo;
0268 }
0269
0270
0271
0272 bool alreadyPrinted = false;
0273 if (find_if_string(el_message, in_message_print_error)) {
0274 el_severity = edm::RootHandlers::SeverityLevel::kInfo;
0275 edm::LogError("Root_Error") << el_location << el_message;
0276 alreadyPrinted = true;
0277 }
0278
0279 if (el_severity == edm::RootHandlers::SeverityLevel::kInfo) {
0280
0281 die = false;
0282 } else {
0283 die = true;
0284 }
0285
0286
0287
0288
0289
0290
0291 if (die && (el_location != std::string("@SUB=TUnixSystem::DispatchSignals"))) {
0292 std::ostringstream sstr;
0293 sstr << "Fatal Root Error: " << el_location << "\n" << el_message << '\n';
0294 edm::Exception except(edm::errors::FatalRootError, sstr.str());
0295 except.addAdditionalInfo(except.message());
0296 except.clearMessage();
0297 throw except;
0298 }
0299
0300
0301
0302
0303 if (!alreadyPrinted) {
0304 if (el_severity == edm::RootHandlers::SeverityLevel::kFatal) {
0305 edm::LogError("Root_Fatal") << el_location << el_message;
0306 } else if (el_severity == edm::RootHandlers::SeverityLevel::kSysError) {
0307 edm::LogError("Root_Severe") << el_location << el_message;
0308 } else if (el_severity == edm::RootHandlers::SeverityLevel::kError) {
0309 edm::LogError("Root_Error") << el_location << el_message;
0310 } else if (el_severity == edm::RootHandlers::SeverityLevel::kWarning) {
0311 edm::LogWarning("Root_Warning") << el_location << el_message;
0312 } else if (el_severity == edm::RootHandlers::SeverityLevel::kInfo) {
0313 edm::LogInfo("Root_Information") << el_location << el_message;
0314 }
0315 }
0316 }
0317
0318 void RootErrorHandler(int level, bool, char const* location, char const* message) {
0319 RootErrorHandlerImpl(level, location, message);
0320 }
0321
0322 extern "C" {
0323 void set_default_signals() {
0324 signal(SIGILL, SIG_DFL);
0325 signal(SIGSEGV, SIG_DFL);
0326 signal(SIGBUS, SIG_DFL);
0327 signal(SIGTERM, SIG_DFL);
0328 signal(SIGABRT, SIG_DFL);
0329 }
0330
0331 static int full_write(int fd, const char* text) {
0332 const char* buffer = text;
0333 size_t count = strlen(text);
0334 ssize_t written = 0;
0335 while (count) {
0336 written = write(fd, buffer, count);
0337 if (written == -1) {
0338 if (errno == EINTR) {
0339 continue;
0340 } else {
0341 return -errno;
0342 }
0343 }
0344 count -= written;
0345 buffer += written;
0346 }
0347 return 0;
0348 }
0349
0350 static int full_read(int fd, char* inbuf, size_t len, int timeout_s = -1) {
0351 char* buf = inbuf;
0352 size_t count = len;
0353 ssize_t complete = 0;
0354 std::chrono::time_point<std::chrono::steady_clock> end_time =
0355 std::chrono::steady_clock::now() + std::chrono::seconds(timeout_s);
0356 int flags;
0357 if (timeout_s < 0) {
0358 flags = O_NONBLOCK;
0359 } else if ((-1 == (flags = fcntl(fd, F_GETFL)))) {
0360 return -errno;
0361 }
0362 if ((flags & O_NONBLOCK) != O_NONBLOCK) {
0363 if (-1 == fcntl(fd, F_SETFL, flags | O_NONBLOCK)) {
0364 return -errno;
0365 }
0366 }
0367 while (count) {
0368 if (timeout_s >= 0) {
0369 struct pollfd poll_info {
0370 fd, POLLIN, 0
0371 };
0372 int ms_remaining =
0373 std::chrono::duration_cast<std::chrono::milliseconds>(end_time - std::chrono::steady_clock::now()).count();
0374 if (ms_remaining > 0) {
0375 int rc = poll(&poll_info, 1, ms_remaining);
0376 if (rc <= 0) {
0377 if (rc < 0) {
0378 if (errno == EINTR || errno == EAGAIN) {
0379 continue;
0380 }
0381 rc = -errno;
0382 } else {
0383 rc = -ETIMEDOUT;
0384 }
0385 if ((flags & O_NONBLOCK) != O_NONBLOCK) {
0386 fcntl(fd, F_SETFL, flags);
0387 }
0388 return rc;
0389 }
0390 } else if (ms_remaining < 0) {
0391 if ((flags & O_NONBLOCK) != O_NONBLOCK) {
0392 fcntl(fd, F_SETFL, flags);
0393 }
0394 return -ETIMEDOUT;
0395 }
0396 }
0397 complete = read(fd, buf, count);
0398 if (complete == -1) {
0399 if (errno == EINTR) {
0400 continue;
0401 } else if ((errno == EAGAIN) || (errno == EWOULDBLOCK)) {
0402 continue;
0403 } else {
0404 int orig_errno = errno;
0405 if ((flags & O_NONBLOCK) != O_NONBLOCK) {
0406 fcntl(fd, F_SETFL, flags);
0407 }
0408 return -orig_errno;
0409 }
0410 }
0411 count -= complete;
0412 buf += complete;
0413 }
0414 if ((flags & O_NONBLOCK) != O_NONBLOCK) {
0415 fcntl(fd, F_SETFL, flags);
0416 }
0417 return 0;
0418 }
0419
0420 static int full_cerr_write(const char* text) { return full_write(2, text); }
0421
0422
0423
0424
0425
0426 #if defined(SIGRTMAX)
0427 #define PAUSE_SIGNAL SIGRTMAX
0428 #define RESUME_SIGNAL SIGRTMAX - 1
0429 #elif defined(SIGINFO)
0430 #define PAUSE_SIGNAL SIGINFO
0431 #define RESUME_SIGNAL SIGALRM
0432 #endif
0433
0434
0435 void sig_resume_handler(int sig, siginfo_t*, void*) {}
0436
0437
0438 void sig_pause_for_stacktrace(int sig, siginfo_t*, void*) {
0439 using namespace edm::service;
0440
0441 #ifdef RESUME_SIGNAL
0442 sigset_t sigset;
0443 sigemptyset(&sigset);
0444 sigaddset(&sigset, RESUME_SIGNAL);
0445 pthread_sigmask(SIG_UNBLOCK, &sigset, nullptr);
0446 #endif
0447
0448 sleep(InitRootHandlers::stackTracePause());
0449
0450 if (InitRootHandlers::doneModules_.is_lock_free() && InitRootHandlers::nextModule_.is_lock_free()) {
0451 auto i = InitRootHandlers::nextModule_++;
0452 if (i < InitRootHandlers::moduleListBuffers_.size()) {
0453 char* buff = InitRootHandlers::moduleListBuffers_[i].data();
0454
0455 strlcpy(buff, "\nModule: ", moduleBufferSize);
0456 if (edm::CurrentModuleOnThread::getCurrentModuleOnThread() != nullptr) {
0457 strlcat(buff,
0458 edm::CurrentModuleOnThread::getCurrentModuleOnThread()->moduleDescription()->moduleName().c_str(),
0459 moduleBufferSize);
0460 strlcat(buff, ":", moduleBufferSize);
0461 strlcat(buff,
0462 edm::CurrentModuleOnThread::getCurrentModuleOnThread()->moduleDescription()->moduleLabel().c_str(),
0463 moduleBufferSize);
0464 } else {
0465 strlcat(buff, "none", moduleBufferSize);
0466 }
0467 ++edm::service::InitRootHandlers::doneModules_;
0468 }
0469 }
0470 }
0471
0472 void sig_dostack_then_abort(int sig, siginfo_t*, void*) {
0473 using namespace edm::service;
0474
0475 const auto& tids = InitRootHandlers::threadIDs();
0476
0477 const auto self = pthread_self();
0478 #ifdef PAUSE_SIGNAL
0479 if (InitRootHandlers::stackTracePause() > 0 && tids.size() > 1) {
0480
0481 struct sigaction act;
0482 act.sa_sigaction = sig_pause_for_stacktrace;
0483 act.sa_flags = 0;
0484 sigemptyset(&act.sa_mask);
0485 sigaction(PAUSE_SIGNAL, &act, nullptr);
0486
0487
0488 sigset_t pausesigset;
0489 sigemptyset(&pausesigset);
0490 sigaddset(&pausesigset, PAUSE_SIGNAL);
0491 sigprocmask(SIG_UNBLOCK, &pausesigset, nullptr);
0492
0493
0494 for (auto id : tids) {
0495 if (self != id) {
0496 pthread_kill(id, PAUSE_SIGNAL);
0497 }
0498 }
0499
0500 #ifdef RESUME_SIGNAL
0501
0502 act.sa_sigaction = sig_resume_handler;
0503 sigaction(RESUME_SIGNAL, &act, nullptr);
0504 #endif
0505 }
0506 #endif
0507
0508 const char* signalname = "unknown";
0509 switch (sig) {
0510 case SIGBUS: {
0511 signalname = "bus error";
0512 break;
0513 }
0514 case SIGSEGV: {
0515 signalname = "segmentation violation";
0516 break;
0517 }
0518 case SIGILL: {
0519 signalname = "illegal instruction";
0520 break;
0521 }
0522 case SIGTERM: {
0523 signalname = "external termination request";
0524 break;
0525 }
0526 case SIGABRT: {
0527 signalname = "abort signal";
0528 break;
0529 }
0530 default:
0531 break;
0532 }
0533 full_cerr_write("\n\nA fatal system signal has occurred: ");
0534 full_cerr_write(signalname);
0535 full_cerr_write("\nThe following is the call stack containing the origin of the signal.\n\n");
0536
0537 edm::service::InitRootHandlers::stacktraceFromThread();
0538
0539
0540
0541
0542
0543 #ifdef RESUME_SIGNAL
0544 std::size_t notified = 0;
0545 if (InitRootHandlers::stackTracePause() > 0 && tids.size() > 1) {
0546 for (auto id : tids) {
0547 if (self != id) {
0548 if (pthread_kill(id, RESUME_SIGNAL) == 0)
0549 ++notified;
0550 }
0551 }
0552 }
0553 #endif
0554
0555 full_cerr_write("\nCurrent Modules:\n");
0556
0557
0558
0559
0560
0561
0562 if (tids.count(self) > 0) {
0563 char buff[moduleBufferSize] = "\nModule: ";
0564 if (edm::CurrentModuleOnThread::getCurrentModuleOnThread() != nullptr) {
0565 strlcat(buff,
0566 edm::CurrentModuleOnThread::getCurrentModuleOnThread()->moduleDescription()->moduleName().c_str(),
0567 moduleBufferSize);
0568 strlcat(buff, ":", moduleBufferSize);
0569 strlcat(buff,
0570 edm::CurrentModuleOnThread::getCurrentModuleOnThread()->moduleDescription()->moduleLabel().c_str(),
0571 moduleBufferSize);
0572 } else {
0573 strlcat(buff, "none", moduleBufferSize);
0574 }
0575 strlcat(buff, " (crashed)", moduleBufferSize);
0576 full_cerr_write(buff);
0577 } else {
0578 full_cerr_write("\nModule: non-CMSSW (crashed)");
0579 }
0580
0581 #ifdef PAUSE_SIGNAL
0582
0583
0584 if (InitRootHandlers::doneModules_.is_lock_free()) {
0585 int spincount = 0;
0586 timespec t = {0, 1000};
0587 while (++spincount < 1000 && InitRootHandlers::doneModules_ < notified) {
0588 nanosleep(&t, nullptr);
0589 }
0590 for (std::size_t i = 0; i < InitRootHandlers::doneModules_; ++i) {
0591 full_cerr_write(InitRootHandlers::moduleListBuffers_[i].data());
0592 }
0593 }
0594 #endif
0595
0596 full_cerr_write("\n\nA fatal system signal has occurred: ");
0597 full_cerr_write(signalname);
0598 full_cerr_write("\n");
0599
0600
0601
0602 if ((sig == SIGILL) || (sig == SIGSEGV) || (sig == SIGBUS) || (sig == SIGTERM) || (sig == SIGABRT)) {
0603 signal(sig, SIG_DFL);
0604 raise(sig);
0605 } else {
0606 set_default_signals();
0607 ::abort();
0608 }
0609 }
0610
0611 void sig_abort(int sig, siginfo_t*, void*) {
0612 full_cerr_write("\n\nFatal system signal has occurred during exit\n");
0613
0614
0615 signal(sig, SIG_DFL);
0616 raise(sig);
0617
0618
0619 set_default_signals();
0620 ::sleep(10);
0621 ::abort();
0622 }
0623 }
0624 }
0625
0626 namespace edm {
0627 namespace service {
0628
0629
0630
0631
0632
0633
0634
0635 static void cmssw_stacktrace_fork();
0636
0637 void InitRootHandlers::stacktraceHelperThread() {
0638 int toParent = childToParent_[1];
0639 int fromParent = parentToChild_[0];
0640 char buf[2];
0641 buf[1] = '\0';
0642
0643 while (true) {
0644 int result = full_read(fromParent, buf, 1);
0645 if (result < 0) {
0646
0647
0648
0649 set_default_signals();
0650 close(toParent);
0651 full_cerr_write("\n\nTraceback helper thread failed to read from parent: ");
0652 full_cerr_write(strerror(-result));
0653 full_cerr_write("\n");
0654 ::abort();
0655 }
0656 if (buf[0] == '1') {
0657 set_default_signals();
0658 cmssw_stacktrace_fork();
0659 full_write(toParent, buf);
0660 } else if (buf[0] == '2') {
0661
0662
0663 close(toParent);
0664 close(fromParent);
0665 toParent = childToParent_[1];
0666 fromParent = parentToChild_[0];
0667 } else if (buf[0] == '3') {
0668 break;
0669 } else {
0670 set_default_signals();
0671 close(toParent);
0672 full_cerr_write("\n\nTraceback helper thread got unknown command from parent: ");
0673 full_cerr_write(buf);
0674 full_cerr_write("\n");
0675 ::abort();
0676 }
0677 }
0678 }
0679
0680 void InitRootHandlers::stacktraceFromThread() {
0681 int result = full_write(parentToChild_[1], "1");
0682 if (result < 0) {
0683 full_cerr_write("\n\nAttempt to request stacktrace failed: ");
0684 full_cerr_write(strerror(-result));
0685 full_cerr_write("\n");
0686 return;
0687 }
0688 char buf[2];
0689 buf[1] = '\0';
0690 if ((result = full_read(childToParent_[0], buf, 1, 5 * 60)) < 0) {
0691 full_cerr_write("\n\nWaiting for stacktrace completion failed: ");
0692 if (result == -ETIMEDOUT) {
0693 full_cerr_write("timed out waiting for GDB to complete.");
0694 } else {
0695 full_cerr_write(strerror(-result));
0696 }
0697 full_cerr_write("\n");
0698 return;
0699 }
0700 }
0701
0702 void cmssw_stacktrace_fork() {
0703 char child_stack[4 * 1024];
0704 char* child_stack_ptr = child_stack + 4 * 1024;
0705
0706
0707
0708
0709 int pid =
0710 #ifdef __linux__
0711 clone(edm::service::cmssw_stacktrace, child_stack_ptr, CLONE_VM | CLONE_FS | SIGCHLD, nullptr);
0712 #else
0713 fork();
0714 if (child_stack_ptr) {
0715 }
0716 if (pid == 0) {
0717 edm::service::cmssw_stacktrace(nullptr);
0718 }
0719 #endif
0720 if (pid == -1) {
0721 full_cerr_write("(Attempt to perform stack dump failed.)\n");
0722 } else {
0723 int status;
0724 if (waitpid(pid, &status, 0) == -1) {
0725 full_cerr_write("(Failed to wait on stack dump output.)\n");
0726 }
0727 if (status) {
0728 full_cerr_write("(GDB stack trace failed unexpectedly)\n");
0729 }
0730 }
0731 }
0732
0733 int cmssw_stacktrace(void* ) {
0734 set_default_signals();
0735
0736 char const* const* argv = edm::service::InitRootHandlers::getPstackArgv();
0737
0738
0739
0740 #ifdef __linux__
0741 syscall(SYS_execve, "/bin/sh", argv, __environ);
0742 #else
0743 execv("/bin/sh", argv);
0744 #endif
0745 ::abort();
0746 return 1;
0747 }
0748
0749 static constexpr char pstackName[] = "(CMSSW stack trace helper)";
0750 static constexpr char dashC[] = "-c";
0751 char InitRootHandlers::pidString_[InitRootHandlers::pidStringLength_] = {};
0752 char const* const InitRootHandlers::pstackArgv_[] = {pstackName, dashC, InitRootHandlers::pidString_, nullptr};
0753 int InitRootHandlers::parentToChild_[2] = {-1, -1};
0754 int InitRootHandlers::childToParent_[2] = {-1, -1};
0755 std::unique_ptr<std::thread> InitRootHandlers::helperThread_;
0756 std::unique_ptr<InitRootHandlers::ThreadTracker> InitRootHandlers::threadTracker_;
0757 int InitRootHandlers::stackTracePause_ = 300;
0758 std::vector<std::array<char, moduleBufferSize>> InitRootHandlers::moduleListBuffers_;
0759 std::atomic<std::size_t> InitRootHandlers::nextModule_(0), InitRootHandlers::doneModules_(0);
0760
0761 InitRootHandlers::InitRootHandlers(ParameterSet const& pset, ActivityRegistry& iReg)
0762 : RootHandlers(),
0763 unloadSigHandler_(pset.getUntrackedParameter<bool>("UnloadRootSigHandler")),
0764 resetErrHandler_(pset.getUntrackedParameter<bool>("ResetRootErrHandler")),
0765 loadAllDictionaries_(pset.getUntrackedParameter<bool>("LoadAllDictionaries")),
0766 autoLibraryLoader_(loadAllDictionaries_ or pset.getUntrackedParameter<bool>("AutoLibraryLoader")),
0767 interactiveDebug_(pset.getUntrackedParameter<bool>("InteractiveDebug")) {
0768 stackTracePause_ = pset.getUntrackedParameter<int>("StackTracePauseTime");
0769
0770 if (not threadTracker_) {
0771 threadTracker_ = std::make_unique<ThreadTracker>();
0772 iReg.watchPostEndJob([]() {
0773 if (threadTracker_) {
0774 threadTracker_->observe(false);
0775 }
0776 });
0777 }
0778
0779 if (unloadSigHandler_) {
0780
0781 gSystem->ResetSignal(kSigChild);
0782 gSystem->ResetSignal(kSigBus);
0783 gSystem->ResetSignal(kSigSegmentationViolation);
0784 gSystem->ResetSignal(kSigIllegalInstruction);
0785 gSystem->ResetSignal(kSigSystem);
0786 gSystem->ResetSignal(kSigPipe);
0787 gSystem->ResetSignal(kSigAlarm);
0788 gSystem->ResetSignal(kSigUrgent);
0789 gSystem->ResetSignal(kSigFloatingException);
0790 gSystem->ResetSignal(kSigWindowChanged);
0791 } else if (pset.getUntrackedParameter<bool>("AbortOnSignal")) {
0792 cachePidInfo();
0793
0794
0795
0796 gSystem->ResetSignal(kSigBus);
0797 gSystem->ResetSignal(kSigSegmentationViolation);
0798 gSystem->ResetSignal(kSigIllegalInstruction);
0799 installCustomHandler(SIGBUS, sig_dostack_then_abort);
0800 sigBusHandler_ = std::shared_ptr<const void>(nullptr, [](void*) { installCustomHandler(SIGBUS, sig_abort); });
0801 installCustomHandler(SIGSEGV, sig_dostack_then_abort);
0802 sigSegvHandler_ = std::shared_ptr<const void>(nullptr, [](void*) { installCustomHandler(SIGSEGV, sig_abort); });
0803 installCustomHandler(SIGILL, sig_dostack_then_abort);
0804 sigIllHandler_ = std::shared_ptr<const void>(nullptr, [](void*) { installCustomHandler(SIGILL, sig_abort); });
0805 installCustomHandler(SIGTERM, sig_dostack_then_abort);
0806 sigTermHandler_ = std::shared_ptr<const void>(nullptr, [](void*) { installCustomHandler(SIGTERM, sig_abort); });
0807 installCustomHandler(SIGABRT, sig_dostack_then_abort);
0808 sigAbrtHandler_ = std::shared_ptr<const void>(nullptr, [](void*) {
0809 signal(SIGABRT, SIG_DFL);
0810 });
0811 }
0812
0813 iReg.watchPreallocate([](edm::service::SystemBounds const& iBounds) {
0814 if (iBounds.maxNumberOfThreads() > moduleListBuffers_.size()) {
0815 moduleListBuffers_.resize(iBounds.maxNumberOfThreads());
0816 }
0817 });
0818
0819 if (resetErrHandler_) {
0820
0821 SetErrorHandler(RootErrorHandler);
0822 }
0823
0824
0825 if (autoLibraryLoader_) {
0826 gInterpreter->SetClassAutoloading(1);
0827 }
0828
0829
0830 TTree::SetMaxTreeSize(kMaxLong64);
0831 TH1::AddDirectory(kFALSE);
0832
0833
0834
0835 setRefCoreStreamerInTClass();
0836
0837
0838 if (!hasDictionary(typeid(std::vector<std::vector<unsigned int>>))) {
0839 TypeWithDict::byName("std::vector<std::vector<unsigned int> >");
0840 }
0841
0842 int debugLevel = pset.getUntrackedParameter<int>("DebugLevel");
0843 if (debugLevel > 0) {
0844 gDebug = debugLevel;
0845 }
0846
0847
0848 bool imt = pset.getUntrackedParameter<bool>("EnableIMT");
0849 if (imt && not ROOT::IsImplicitMTEnabled()) {
0850
0851
0852 ROOT::EnableImplicitMT(
0853 oneapi::tbb::global_control::active_value(oneapi::tbb::global_control::max_allowed_parallelism));
0854 }
0855 }
0856
0857 InitRootHandlers::~InitRootHandlers() {
0858
0859 TIter iter(gROOT->GetListOfFiles());
0860 TObject* obj = nullptr;
0861 while (nullptr != (obj = iter.Next())) {
0862 TFile* f = dynamic_cast<TFile*>(obj);
0863 if (f) {
0864
0865
0866 f->Close();
0867 iter = TIter(gROOT->GetListOfFiles());
0868 }
0869 }
0870
0871 threadTracker_.reset();
0872 }
0873
0874 void InitRootHandlers::willBeUsingThreads() {
0875
0876 ROOT::EnableThreadSafety();
0877
0878
0879 TObject::SetObjectStat(false);
0880
0881
0882 TVirtualStreamerInfo::Optimize(false);
0883 }
0884
0885 void InitRootHandlers::fillDescriptions(ConfigurationDescriptions& descriptions) {
0886 ParameterSetDescription desc;
0887 desc.setComment("Centralized interface to ROOT.");
0888 desc.addUntracked<bool>("UnloadRootSigHandler", false)
0889 ->setComment("If True, signals are handled by this service, rather than by ROOT.");
0890 desc.addUntracked<bool>("ResetRootErrHandler", true)
0891 ->setComment(
0892 "If True, ROOT messages (e.g. errors, warnings) are handled by this service, rather than by ROOT.");
0893 desc.addUntracked<bool>("AutoLibraryLoader", true)
0894 ->setComment("If True, enables automatic loading of data dictionaries.");
0895 desc.addUntracked<bool>("LoadAllDictionaries", false)->setComment("If True, loads all ROOT dictionaries.");
0896 desc.addUntracked<bool>("EnableIMT", true)->setComment("If True, calls ROOT::EnableImplicitMT().");
0897 desc.addUntracked<bool>("AbortOnSignal", true)
0898 ->setComment(
0899 "If True, do an abort when a signal occurs that causes a crash. If False, ROOT will do an exit which "
0900 "attempts to do a clean shutdown.");
0901 desc.addUntracked<bool>("InteractiveDebug", false)
0902 ->setComment(
0903 "If True, leave gdb attached to cmsRun after a crash; "
0904 "if False, attach gdb, print a stack trace, and quit gdb");
0905 desc.addUntracked<int>("DebugLevel", 0)->setComment("Sets ROOT's gDebug value.");
0906 desc.addUntracked<int>("StackTracePauseTime", 300)
0907 ->setComment("Seconds to pause other threads during stack trace.");
0908 descriptions.add("InitRootHandlers", desc);
0909 }
0910
0911 char const* const* InitRootHandlers::getPstackArgv() { return pstackArgv_; }
0912
0913 void InitRootHandlers::enableWarnings_() { s_ignoreWarnings = edm::RootHandlers::SeverityLevel::kInfo; }
0914
0915 void InitRootHandlers::ignoreWarnings_(edm::RootHandlers::SeverityLevel level) { s_ignoreWarnings = level; }
0916
0917 void InitRootHandlers::cachePidInfo() {
0918 if (helperThread_) {
0919
0920
0921
0922 return;
0923 }
0924 std::string gdbcmd{"date; gdb -quiet -p %d"};
0925 if (!interactiveDebug_) {
0926 gdbcmd +=
0927 " 2>&1 <<EOF |\n"
0928 "set width 0\n"
0929 "set height 0\n"
0930 "set pagination no\n"
0931 "thread apply all bt\n"
0932 "EOF\n"
0933 "/bin/sed -n -e 's/^\\((gdb) \\)*//' -e '/^#/p' -e '/^Thread/p'";
0934 }
0935 if (snprintf(pidString_, pidStringLength_ - 1, gdbcmd.c_str(), getpid()) >= pidStringLength_) {
0936 std::ostringstream sstr;
0937 sstr << "Unable to pre-allocate stacktrace handler information";
0938 edm::Exception except(edm::errors::OtherCMS, sstr.str());
0939 throw except;
0940 }
0941
0942
0943
0944
0945 close(childToParent_[0]);
0946 close(childToParent_[1]);
0947 childToParent_[0] = -1;
0948 childToParent_[1] = -1;
0949 close(parentToChild_[0]);
0950 close(parentToChild_[1]);
0951 parentToChild_[0] = -1;
0952 parentToChild_[1] = -1;
0953
0954 if (-1 == pipe2(childToParent_, O_CLOEXEC)) {
0955 std::ostringstream sstr;
0956 sstr << "Failed to create child-to-parent pipes (errno=" << errno << "): " << strerror(errno);
0957 edm::Exception except(edm::errors::OtherCMS, sstr.str());
0958 throw except;
0959 }
0960
0961 if (-1 == pipe2(parentToChild_, O_CLOEXEC)) {
0962 close(childToParent_[0]);
0963 close(childToParent_[1]);
0964 childToParent_[0] = -1;
0965 childToParent_[1] = -1;
0966 std::ostringstream sstr;
0967 sstr << "Failed to create child-to-parent pipes (errno=" << errno << "): " << strerror(errno);
0968 edm::Exception except(edm::errors::OtherCMS, sstr.str());
0969 throw except;
0970 }
0971
0972 helperThread_ = std::make_unique<std::thread>(stacktraceHelperThread);
0973 helperThread_->detach();
0974 }
0975
0976 }
0977 }
0978
0979 #include "FWCore/ServiceRegistry/interface/ServiceMaker.h"
0980
0981 using edm::service::InitRootHandlers;
0982 typedef edm::serviceregistry::AllArgsMaker<edm::RootHandlers, InitRootHandlers> RootHandlersMaker;
0983 DEFINE_FWK_SERVICE_MAKER(InitRootHandlers, RootHandlersMaker);