File indexing completed on 2022-09-22 23:03:38
0001 #include "FWCore/Utilities/interface/RootHandlers.h"
0002
0003 #include "FWCore/ServiceRegistry/interface/ActivityRegistry.h"
0004 #include "FWCore/ServiceRegistry/interface/SystemBounds.h"
0005 #include "DataFormats/Common/interface/RefCoreStreamer.h"
0006 #include "DataFormats/Provenance/interface/ModuleDescription.h"
0007 #include "FWCore/MessageLogger/interface/ELseverityLevel.h"
0008 #include "FWCore/MessageLogger/interface/MessageLogger.h"
0009 #include "FWCore/ParameterSet/interface/ParameterSet.h"
0010 #include "FWCore/PluginManager/interface/PluginCapabilities.h"
0011 #include "FWCore/ParameterSet/interface/ConfigurationDescriptions.h"
0012 #include "FWCore/ParameterSet/interface/ParameterSetDescription.h"
0013 #include "FWCore/Utilities/interface/EDMException.h"
0014 #include "FWCore/Reflection/interface/TypeWithDict.h"
0015 #include "FWCore/Utilities/interface/UnixSignalHandlers.h"
0016 #include "FWCore/ServiceRegistry/interface/CurrentModuleOnThread.h"
0017 #include "FWCore/ServiceRegistry/interface/ModuleCallingContext.h"
0018
0019 #include "oneapi/tbb/concurrent_unordered_set.h"
0020 #include "oneapi/tbb/task.h"
0021 #include "oneapi/tbb/task_scheduler_observer.h"
0022 #include "oneapi/tbb/global_control.h"
0023 #include <memory>
0024
0025 #include <thread>
0026 #include <sys/wait.h>
0027 #include <sstream>
0028 #include <cstring>
0029 #include <poll.h>
0030 #include <atomic>
0031 #include <algorithm>
0032 #include <vector>
0033 #include <string>
0034 #include <array>
0035
0036
0037
0038
0039 #ifdef __linux__
0040 #include <syscall.h>
0041 #endif
0042
0043 #include "TROOT.h"
0044 #include "TError.h"
0045 #include "TFile.h"
0046 #include "TInterpreter.h"
0047 #include "TH1.h"
0048 #include "TSystem.h"
0049 #include "TUnixSystem.h"
0050 #include "TTree.h"
0051 #include "TVirtualStreamerInfo.h"
0052
0053 #include "TClassTable.h"
0054
0055 #include <memory>
0056
0057 namespace {
0058
0059
0060 constexpr std::size_t moduleBufferSize = 128;
0061 }
0062
0063 namespace edm {
0064 class ConfigurationDescriptions;
0065 class ParameterSet;
0066 class ActivityRegistry;
0067
0068 namespace service {
0069 class InitRootHandlers : public RootHandlers {
0070 friend int cmssw_stacktrace(void*);
0071
0072 public:
0073 class ThreadTracker : public oneapi::tbb::task_scheduler_observer {
0074 public:
0075 typedef oneapi::tbb::concurrent_unordered_set<pthread_t> Container_type;
0076
0077 ThreadTracker() : oneapi::tbb::task_scheduler_observer() { observe(); }
0078 ~ThreadTracker() override = default;
0079
0080 void on_scheduler_entry(bool) override {
0081
0082
0083
0084
0085
0086 edm::CurrentModuleOnThread::getCurrentModuleOnThread();
0087 threadIDs_.insert(pthread_self());
0088 }
0089 void on_scheduler_exit(bool) override {}
0090 const Container_type& IDs() { return threadIDs_; }
0091
0092 private:
0093 Container_type threadIDs_;
0094 };
0095
0096 explicit InitRootHandlers(ParameterSet const& pset, ActivityRegistry& iReg);
0097 ~InitRootHandlers() override;
0098
0099 static void fillDescriptions(ConfigurationDescriptions& descriptions);
0100 static void stacktraceFromThread();
0101 static const ThreadTracker::Container_type& threadIDs() {
0102 static const ThreadTracker::Container_type empty;
0103 if (threadTracker_) {
0104 return threadTracker_->IDs();
0105 }
0106 return empty;
0107 }
0108 static int stackTracePause() { return stackTracePause_; }
0109
0110 static std::vector<std::array<char, moduleBufferSize>> moduleListBuffers_;
0111 static std::atomic<std::size_t> nextModule_, doneModules_;
0112
0113 private:
0114 static char const* const* getPstackArgv();
0115 void enableWarnings_() override;
0116 void ignoreWarnings_(edm::RootHandlers::SeverityLevel level) override;
0117 void willBeUsingThreads() override;
0118
0119 void cachePidInfo();
0120 static void stacktraceHelperThread();
0121
0122 static constexpr int pidStringLength_ = 200;
0123 static char pidString_[pidStringLength_];
0124 static char const* const pstackArgv_[];
0125 static int parentToChild_[2];
0126 static int childToParent_[2];
0127 static std::unique_ptr<std::thread> helperThread_;
0128 static std::unique_ptr<ThreadTracker> threadTracker_;
0129 static int stackTracePause_;
0130
0131 bool unloadSigHandler_;
0132 bool resetErrHandler_;
0133 bool loadAllDictionaries_;
0134 bool autoLibraryLoader_;
0135 bool interactiveDebug_;
0136 std::shared_ptr<const void> sigBusHandler_;
0137 std::shared_ptr<const void> sigSegvHandler_;
0138 std::shared_ptr<const void> sigIllHandler_;
0139 std::shared_ptr<const void> sigTermHandler_;
0140 std::shared_ptr<const void> sigAbrtHandler_;
0141 std::shared_ptr<const void> sigFpeHandler_;
0142 };
0143
0144 inline bool isProcessWideService(InitRootHandlers const*) { return true; }
0145
0146 }
0147 }
0148
0149 namespace edm {
0150 namespace service {
0151 int cmssw_stacktrace(void*);
0152 }
0153 }
0154
0155 namespace {
0156 thread_local edm::RootHandlers::SeverityLevel s_ignoreWarnings = edm::RootHandlers::SeverityLevel::kInfo;
0157
0158 constexpr bool s_ignoreEverything = false;
0159
0160 template <std::size_t SIZE>
0161 bool find_if_string(const std::string& search, const std::array<const char* const, SIZE>& substrs) {
0162 return (std::find_if(substrs.begin(), substrs.end(), [&search](const char* const s) -> bool {
0163 return (search.find(s) != std::string::npos);
0164 }) != substrs.end());
0165 }
0166
0167
0168 constexpr std::array<const char* const, 9> in_message{
0169 {"no dictionary for class",
0170 "already in TClassTable",
0171 "matrix not positive definite",
0172 "not a TStreamerInfo object",
0173 "Problems declaring payload",
0174 "Announced number of args different from the real number of argument passed",
0175 "nbins is <=0 - set to nbins = 1",
0176 "nbinsy is <=0 - set to nbinsy = 1",
0177 "oneapi::tbb::global_control is limiting"}};
0178
0179
0180 constexpr std::array<const char* const, 7> in_location{{"Fit",
0181 "TDecompChol::Solve",
0182 "THistPainter::PaintInit",
0183 "TUnixSystem::SetDisplay",
0184 "TGClient::GetFontByName",
0185 "Inverter::Dinv",
0186 "RTaskArenaWrapper"}};
0187
0188 constexpr std::array<const char* const, 3> in_message_print_error{{"number of iterations was insufficient",
0189 "bad integrand behavior",
0190 "integral is divergent, or slowly convergent"}};
0191
0192 void RootErrorHandlerImpl(int level, char const* location, char const* message) {
0193 bool die = false;
0194
0195
0196
0197 edm::RootHandlers::SeverityLevel el_severity = edm::RootHandlers::SeverityLevel::kInfo;
0198
0199 if (level >= kFatal) {
0200 el_severity = edm::RootHandlers::SeverityLevel::kFatal;
0201 } else if (level >= kSysError) {
0202 el_severity = edm::RootHandlers::SeverityLevel::kSysError;
0203 } else if (level >= kError) {
0204 el_severity = edm::RootHandlers::SeverityLevel::kError;
0205 } else if (level >= kWarning) {
0206 el_severity = edm::RootHandlers::SeverityLevel::kWarning;
0207 }
0208
0209 if (s_ignoreEverything || el_severity <= s_ignoreWarnings) {
0210 el_severity = edm::RootHandlers::SeverityLevel::kInfo;
0211 }
0212
0213
0214
0215
0216 std::string el_location = "@SUB=?";
0217 if (location != nullptr)
0218 el_location = std::string("@SUB=") + std::string(location);
0219
0220 std::string el_message = "?";
0221 if (message != nullptr)
0222 el_message = message;
0223
0224
0225
0226
0227
0228
0229
0230 std::string el_identifier = "ROOT";
0231
0232 std::string precursor("class ");
0233 size_t index1 = el_message.find(precursor);
0234 if (index1 != std::string::npos) {
0235 size_t index2 = index1 + precursor.length();
0236 size_t index3 = el_message.find_first_of(" :", index2);
0237 if (index3 != std::string::npos) {
0238 size_t substrlen = index3 - index2;
0239 el_identifier += "-";
0240 el_identifier += el_message.substr(index2, substrlen);
0241 }
0242 } else {
0243 index1 = el_location.find("::");
0244 if (index1 != std::string::npos) {
0245 el_identifier += "/";
0246 el_identifier += el_location.substr(0, index1);
0247 }
0248 }
0249
0250
0251
0252 if ((el_location.find("TBranchElement::Fill") != std::string::npos) &&
0253 (el_message.find("fill branch") != std::string::npos) && (el_message.find("address") != std::string::npos) &&
0254 (el_message.find("not set") != std::string::npos)) {
0255 el_severity = edm::RootHandlers::SeverityLevel::kFatal;
0256 }
0257
0258 if ((el_message.find("Tree branches") != std::string::npos) &&
0259 (el_message.find("different numbers of entries") != std::string::npos)) {
0260 el_severity = edm::RootHandlers::SeverityLevel::kFatal;
0261 }
0262
0263
0264
0265 if (find_if_string(el_message, in_message) || find_if_string(el_location, in_location) ||
0266 (level < kError and (el_location.find("CINTTypedefBuilder::Setup") != std::string::npos) and
0267 (el_message.find("possible entries are in use!") != std::string::npos))) {
0268 el_severity = edm::RootHandlers::SeverityLevel::kInfo;
0269 }
0270
0271
0272
0273 bool alreadyPrinted = false;
0274 if (find_if_string(el_message, in_message_print_error)) {
0275 el_severity = edm::RootHandlers::SeverityLevel::kInfo;
0276 edm::LogError("Root_Error") << el_location << el_message;
0277 alreadyPrinted = true;
0278 }
0279
0280 if (el_severity == edm::RootHandlers::SeverityLevel::kInfo) {
0281
0282 die = false;
0283 } else {
0284 die = true;
0285 }
0286
0287
0288
0289
0290
0291
0292 if (die && (el_location != std::string("@SUB=TUnixSystem::DispatchSignals"))) {
0293 std::ostringstream sstr;
0294 sstr << "Fatal Root Error: " << el_location << "\n" << el_message << '\n';
0295 edm::Exception except(edm::errors::FatalRootError, sstr.str());
0296 except.addAdditionalInfo(except.message());
0297 except.clearMessage();
0298 throw except;
0299 }
0300
0301
0302
0303
0304 if (!alreadyPrinted) {
0305 if (el_severity == edm::RootHandlers::SeverityLevel::kFatal) {
0306 edm::LogError("Root_Fatal") << el_location << el_message;
0307 } else if (el_severity == edm::RootHandlers::SeverityLevel::kSysError) {
0308 edm::LogError("Root_Severe") << el_location << el_message;
0309 } else if (el_severity == edm::RootHandlers::SeverityLevel::kError) {
0310 edm::LogError("Root_Error") << el_location << el_message;
0311 } else if (el_severity == edm::RootHandlers::SeverityLevel::kWarning) {
0312 edm::LogWarning("Root_Warning") << el_location << el_message;
0313 } else if (el_severity == edm::RootHandlers::SeverityLevel::kInfo) {
0314 edm::LogInfo("Root_Information") << el_location << el_message;
0315 }
0316 }
0317 }
0318
0319 void RootErrorHandler(int level, bool, char const* location, char const* message) {
0320 RootErrorHandlerImpl(level, location, message);
0321 }
0322
0323 extern "C" {
0324 void set_default_signals() {
0325 signal(SIGILL, SIG_DFL);
0326 signal(SIGSEGV, SIG_DFL);
0327 signal(SIGBUS, SIG_DFL);
0328 signal(SIGTERM, SIG_DFL);
0329 signal(SIGFPE, SIG_DFL);
0330 signal(SIGABRT, SIG_DFL);
0331 }
0332
0333 static int full_write(int fd, const char* text) {
0334 const char* buffer = text;
0335 size_t count = strlen(text);
0336 ssize_t written = 0;
0337 while (count) {
0338 written = write(fd, buffer, count);
0339 if (written == -1) {
0340 if (errno == EINTR) {
0341 continue;
0342 } else {
0343 return -errno;
0344 }
0345 }
0346 count -= written;
0347 buffer += written;
0348 }
0349 return 0;
0350 }
0351
0352 static int full_read(int fd, char* inbuf, size_t len, int timeout_s = -1) {
0353 char* buf = inbuf;
0354 size_t count = len;
0355 ssize_t complete = 0;
0356 std::chrono::time_point<std::chrono::steady_clock> end_time =
0357 std::chrono::steady_clock::now() + std::chrono::seconds(timeout_s);
0358 int flags;
0359 if (timeout_s < 0) {
0360 flags = O_NONBLOCK;
0361 } else if ((-1 == (flags = fcntl(fd, F_GETFL)))) {
0362 return -errno;
0363 }
0364 if ((flags & O_NONBLOCK) != O_NONBLOCK) {
0365 if (-1 == fcntl(fd, F_SETFL, flags | O_NONBLOCK)) {
0366 return -errno;
0367 }
0368 }
0369 while (count) {
0370 if (timeout_s >= 0) {
0371 struct pollfd poll_info {
0372 fd, POLLIN, 0
0373 };
0374 int ms_remaining =
0375 std::chrono::duration_cast<std::chrono::milliseconds>(end_time - std::chrono::steady_clock::now()).count();
0376 if (ms_remaining > 0) {
0377 int rc = poll(&poll_info, 1, ms_remaining);
0378 if (rc <= 0) {
0379 if (rc < 0) {
0380 if (errno == EINTR || errno == EAGAIN) {
0381 continue;
0382 }
0383 rc = -errno;
0384 } else {
0385 rc = -ETIMEDOUT;
0386 }
0387 if ((flags & O_NONBLOCK) != O_NONBLOCK) {
0388 fcntl(fd, F_SETFL, flags);
0389 }
0390 return rc;
0391 }
0392 } else if (ms_remaining < 0) {
0393 if ((flags & O_NONBLOCK) != O_NONBLOCK) {
0394 fcntl(fd, F_SETFL, flags);
0395 }
0396 return -ETIMEDOUT;
0397 }
0398 }
0399 complete = read(fd, buf, count);
0400 if (complete == -1) {
0401 if (errno == EINTR) {
0402 continue;
0403 } else if ((errno == EAGAIN) || (errno == EWOULDBLOCK)) {
0404 continue;
0405 } else {
0406 int orig_errno = errno;
0407 if ((flags & O_NONBLOCK) != O_NONBLOCK) {
0408 fcntl(fd, F_SETFL, flags);
0409 }
0410 return -orig_errno;
0411 }
0412 }
0413 count -= complete;
0414 buf += complete;
0415 }
0416 if ((flags & O_NONBLOCK) != O_NONBLOCK) {
0417 fcntl(fd, F_SETFL, flags);
0418 }
0419 return 0;
0420 }
0421
0422 static int full_cerr_write(const char* text) { return full_write(2, text); }
0423
0424
0425
0426
0427
0428 #if defined(SIGRTMAX)
0429 #define PAUSE_SIGNAL SIGRTMAX
0430 #define RESUME_SIGNAL SIGRTMAX - 1
0431 #elif defined(SIGINFO)
0432 #define PAUSE_SIGNAL SIGINFO
0433 #define RESUME_SIGNAL SIGALRM
0434 #endif
0435
0436
0437 void sig_resume_handler(int sig, siginfo_t*, void*) {}
0438
0439
0440 void sig_pause_for_stacktrace(int sig, siginfo_t*, void*) {
0441 using namespace edm::service;
0442
0443 #ifdef RESUME_SIGNAL
0444 sigset_t sigset;
0445 sigemptyset(&sigset);
0446 sigaddset(&sigset, RESUME_SIGNAL);
0447 pthread_sigmask(SIG_UNBLOCK, &sigset, nullptr);
0448 #endif
0449
0450 sleep(InitRootHandlers::stackTracePause());
0451
0452 if (InitRootHandlers::doneModules_.is_lock_free() && InitRootHandlers::nextModule_.is_lock_free()) {
0453 auto i = InitRootHandlers::nextModule_++;
0454 if (i < InitRootHandlers::moduleListBuffers_.size()) {
0455 char* buff = InitRootHandlers::moduleListBuffers_[i].data();
0456
0457 strlcpy(buff, "\nModule: ", moduleBufferSize);
0458 if (edm::CurrentModuleOnThread::getCurrentModuleOnThread() != nullptr) {
0459 strlcat(buff,
0460 edm::CurrentModuleOnThread::getCurrentModuleOnThread()->moduleDescription()->moduleName().c_str(),
0461 moduleBufferSize);
0462 strlcat(buff, ":", moduleBufferSize);
0463 strlcat(buff,
0464 edm::CurrentModuleOnThread::getCurrentModuleOnThread()->moduleDescription()->moduleLabel().c_str(),
0465 moduleBufferSize);
0466 } else {
0467 strlcat(buff, "none", moduleBufferSize);
0468 }
0469 ++edm::service::InitRootHandlers::doneModules_;
0470 }
0471 }
0472 }
0473
0474 void sig_dostack_then_abort(int sig, siginfo_t*, void*) {
0475 using namespace edm::service;
0476
0477 const auto& tids = InitRootHandlers::threadIDs();
0478
0479 const auto self = pthread_self();
0480 #ifdef PAUSE_SIGNAL
0481 if (InitRootHandlers::stackTracePause() > 0 && tids.size() > 1) {
0482
0483 struct sigaction act;
0484 act.sa_sigaction = sig_pause_for_stacktrace;
0485 act.sa_flags = 0;
0486 sigemptyset(&act.sa_mask);
0487 sigaction(PAUSE_SIGNAL, &act, nullptr);
0488
0489
0490 sigset_t pausesigset;
0491 sigemptyset(&pausesigset);
0492 sigaddset(&pausesigset, PAUSE_SIGNAL);
0493 sigprocmask(SIG_UNBLOCK, &pausesigset, nullptr);
0494
0495
0496 for (auto id : tids) {
0497 if (self != id) {
0498 pthread_kill(id, PAUSE_SIGNAL);
0499 }
0500 }
0501
0502 #ifdef RESUME_SIGNAL
0503
0504 act.sa_sigaction = sig_resume_handler;
0505 sigaction(RESUME_SIGNAL, &act, nullptr);
0506 #endif
0507 }
0508 #endif
0509
0510 const char* signalname = "unknown";
0511 switch (sig) {
0512 case SIGBUS: {
0513 signalname = "bus error";
0514 break;
0515 }
0516 case SIGSEGV: {
0517 signalname = "segmentation violation";
0518 break;
0519 }
0520 case SIGILL: {
0521 signalname = "illegal instruction";
0522 break;
0523 }
0524 case SIGFPE: {
0525 signalname = "floating point exception";
0526 break;
0527 }
0528 case SIGTERM: {
0529 signalname = "external termination request";
0530 break;
0531 }
0532 case SIGABRT: {
0533 signalname = "abort signal";
0534 break;
0535 }
0536 default:
0537 break;
0538 }
0539 full_cerr_write("\n\nA fatal system signal has occurred: ");
0540 full_cerr_write(signalname);
0541 full_cerr_write("\nThe following is the call stack containing the origin of the signal.\n\n");
0542
0543 edm::service::InitRootHandlers::stacktraceFromThread();
0544
0545
0546
0547
0548
0549 #ifdef RESUME_SIGNAL
0550 std::size_t notified = 0;
0551 if (InitRootHandlers::stackTracePause() > 0 && tids.size() > 1) {
0552 for (auto id : tids) {
0553 if (self != id) {
0554 if (pthread_kill(id, RESUME_SIGNAL) == 0)
0555 ++notified;
0556 }
0557 }
0558 }
0559 #endif
0560
0561 full_cerr_write("\nCurrent Modules:\n");
0562
0563
0564
0565
0566
0567
0568 if (tids.count(self) > 0) {
0569 char buff[moduleBufferSize] = "\nModule: ";
0570 if (edm::CurrentModuleOnThread::getCurrentModuleOnThread() != nullptr) {
0571 strlcat(buff,
0572 edm::CurrentModuleOnThread::getCurrentModuleOnThread()->moduleDescription()->moduleName().c_str(),
0573 moduleBufferSize);
0574 strlcat(buff, ":", moduleBufferSize);
0575 strlcat(buff,
0576 edm::CurrentModuleOnThread::getCurrentModuleOnThread()->moduleDescription()->moduleLabel().c_str(),
0577 moduleBufferSize);
0578 } else {
0579 strlcat(buff, "none", moduleBufferSize);
0580 }
0581 strlcat(buff, " (crashed)", moduleBufferSize);
0582 full_cerr_write(buff);
0583 } else {
0584 full_cerr_write("\nModule: non-CMSSW (crashed)");
0585 }
0586
0587 #ifdef PAUSE_SIGNAL
0588
0589
0590 if (InitRootHandlers::doneModules_.is_lock_free()) {
0591 int spincount = 0;
0592 timespec t = {0, 1000};
0593 while (++spincount < 1000 && InitRootHandlers::doneModules_ < notified) {
0594 nanosleep(&t, nullptr);
0595 }
0596 for (std::size_t i = 0; i < InitRootHandlers::doneModules_; ++i) {
0597 full_cerr_write(InitRootHandlers::moduleListBuffers_[i].data());
0598 }
0599 }
0600 #endif
0601
0602 full_cerr_write("\n\nA fatal system signal has occurred: ");
0603 full_cerr_write(signalname);
0604 full_cerr_write("\n");
0605
0606
0607
0608 if ((sig == SIGILL) || (sig == SIGSEGV) || (sig == SIGBUS) || (sig == SIGTERM) || (sig == SIGFPE) ||
0609 (sig == SIGABRT)) {
0610 signal(sig, SIG_DFL);
0611 raise(sig);
0612 } else {
0613 set_default_signals();
0614 ::abort();
0615 }
0616 }
0617
0618 void sig_abort(int sig, siginfo_t*, void*) {
0619 full_cerr_write("\n\nFatal system signal has occurred during exit\n");
0620
0621
0622 signal(sig, SIG_DFL);
0623 raise(sig);
0624
0625
0626 set_default_signals();
0627 ::sleep(10);
0628 ::abort();
0629 }
0630 }
0631 }
0632
0633 namespace edm {
0634 namespace service {
0635
0636
0637
0638
0639
0640
0641
0642 static void cmssw_stacktrace_fork();
0643
0644 void InitRootHandlers::stacktraceHelperThread() {
0645 int toParent = childToParent_[1];
0646 int fromParent = parentToChild_[0];
0647 char buf[2];
0648 buf[1] = '\0';
0649
0650 while (true) {
0651 int result = full_read(fromParent, buf, 1);
0652 if (result < 0) {
0653
0654
0655
0656 set_default_signals();
0657 close(toParent);
0658 full_cerr_write("\n\nTraceback helper thread failed to read from parent: ");
0659 full_cerr_write(strerror(-result));
0660 full_cerr_write("\n");
0661 ::abort();
0662 }
0663 if (buf[0] == '1') {
0664 set_default_signals();
0665 cmssw_stacktrace_fork();
0666 full_write(toParent, buf);
0667 } else if (buf[0] == '2') {
0668
0669
0670 close(toParent);
0671 close(fromParent);
0672 toParent = childToParent_[1];
0673 fromParent = parentToChild_[0];
0674 } else if (buf[0] == '3') {
0675 break;
0676 } else {
0677 set_default_signals();
0678 close(toParent);
0679 full_cerr_write("\n\nTraceback helper thread got unknown command from parent: ");
0680 full_cerr_write(buf);
0681 full_cerr_write("\n");
0682 ::abort();
0683 }
0684 }
0685 }
0686
0687 void InitRootHandlers::stacktraceFromThread() {
0688 int result = full_write(parentToChild_[1], "1");
0689 if (result < 0) {
0690 full_cerr_write("\n\nAttempt to request stacktrace failed: ");
0691 full_cerr_write(strerror(-result));
0692 full_cerr_write("\n");
0693 return;
0694 }
0695 char buf[2];
0696 buf[1] = '\0';
0697 if ((result = full_read(childToParent_[0], buf, 1, 5 * 60)) < 0) {
0698 full_cerr_write("\n\nWaiting for stacktrace completion failed: ");
0699 if (result == -ETIMEDOUT) {
0700 full_cerr_write("timed out waiting for GDB to complete.");
0701 } else {
0702 full_cerr_write(strerror(-result));
0703 }
0704 full_cerr_write("\n");
0705 return;
0706 }
0707 }
0708
0709 void cmssw_stacktrace_fork() {
0710 char child_stack[4 * 1024];
0711 char* child_stack_ptr = child_stack + 4 * 1024;
0712
0713
0714
0715
0716 int pid =
0717 #ifdef __linux__
0718 clone(edm::service::cmssw_stacktrace, child_stack_ptr, CLONE_VM | CLONE_FS | SIGCHLD, nullptr);
0719 #else
0720 fork();
0721 if (child_stack_ptr) {
0722 }
0723 if (pid == 0) {
0724 edm::service::cmssw_stacktrace(nullptr);
0725 }
0726 #endif
0727 if (pid == -1) {
0728 full_cerr_write("(Attempt to perform stack dump failed.)\n");
0729 } else {
0730 int status;
0731 if (waitpid(pid, &status, 0) == -1) {
0732 full_cerr_write("(Failed to wait on stack dump output.)\n");
0733 }
0734 if (status) {
0735 full_cerr_write("(GDB stack trace failed unexpectedly)\n");
0736 }
0737 }
0738 }
0739
0740 int cmssw_stacktrace(void* ) {
0741 set_default_signals();
0742
0743 char const* const* argv = edm::service::InitRootHandlers::getPstackArgv();
0744
0745
0746
0747 #ifdef __linux__
0748 syscall(SYS_execve, "/bin/sh", argv, __environ);
0749 #else
0750 execv("/bin/sh", argv);
0751 #endif
0752 ::abort();
0753 return 1;
0754 }
0755
0756 static constexpr char pstackName[] = "(CMSSW stack trace helper)";
0757 static constexpr char dashC[] = "-c";
0758 char InitRootHandlers::pidString_[InitRootHandlers::pidStringLength_] = {};
0759 char const* const InitRootHandlers::pstackArgv_[] = {pstackName, dashC, InitRootHandlers::pidString_, nullptr};
0760 int InitRootHandlers::parentToChild_[2] = {-1, -1};
0761 int InitRootHandlers::childToParent_[2] = {-1, -1};
0762 std::unique_ptr<std::thread> InitRootHandlers::helperThread_;
0763 std::unique_ptr<InitRootHandlers::ThreadTracker> InitRootHandlers::threadTracker_;
0764 int InitRootHandlers::stackTracePause_ = 300;
0765 std::vector<std::array<char, moduleBufferSize>> InitRootHandlers::moduleListBuffers_;
0766 std::atomic<std::size_t> InitRootHandlers::nextModule_(0), InitRootHandlers::doneModules_(0);
0767
0768 InitRootHandlers::InitRootHandlers(ParameterSet const& pset, ActivityRegistry& iReg)
0769 : RootHandlers(),
0770 unloadSigHandler_(pset.getUntrackedParameter<bool>("UnloadRootSigHandler")),
0771 resetErrHandler_(pset.getUntrackedParameter<bool>("ResetRootErrHandler")),
0772 loadAllDictionaries_(pset.getUntrackedParameter<bool>("LoadAllDictionaries")),
0773 autoLibraryLoader_(loadAllDictionaries_ or pset.getUntrackedParameter<bool>("AutoLibraryLoader")),
0774 interactiveDebug_(pset.getUntrackedParameter<bool>("InteractiveDebug")) {
0775 stackTracePause_ = pset.getUntrackedParameter<int>("StackTracePauseTime");
0776
0777 if (not threadTracker_) {
0778 threadTracker_ = std::make_unique<ThreadTracker>();
0779 iReg.watchPostEndJob([]() {
0780 if (threadTracker_) {
0781 threadTracker_->observe(false);
0782 }
0783 });
0784 }
0785
0786 if (unloadSigHandler_) {
0787
0788 gSystem->ResetSignal(kSigChild);
0789 gSystem->ResetSignal(kSigBus);
0790 gSystem->ResetSignal(kSigSegmentationViolation);
0791 gSystem->ResetSignal(kSigIllegalInstruction);
0792 gSystem->ResetSignal(kSigSystem);
0793 gSystem->ResetSignal(kSigPipe);
0794 gSystem->ResetSignal(kSigAlarm);
0795 gSystem->ResetSignal(kSigUrgent);
0796 gSystem->ResetSignal(kSigFloatingException);
0797 gSystem->ResetSignal(kSigWindowChanged);
0798 } else if (pset.getUntrackedParameter<bool>("AbortOnSignal")) {
0799 cachePidInfo();
0800
0801
0802
0803 gSystem->ResetSignal(kSigBus);
0804 gSystem->ResetSignal(kSigSegmentationViolation);
0805 gSystem->ResetSignal(kSigIllegalInstruction);
0806 gSystem->ResetSignal(kSigFloatingException);
0807 installCustomHandler(SIGBUS, sig_dostack_then_abort);
0808 sigBusHandler_ = std::shared_ptr<const void>(nullptr, [](void*) { installCustomHandler(SIGBUS, sig_abort); });
0809 installCustomHandler(SIGSEGV, sig_dostack_then_abort);
0810 sigSegvHandler_ = std::shared_ptr<const void>(nullptr, [](void*) { installCustomHandler(SIGSEGV, sig_abort); });
0811 installCustomHandler(SIGILL, sig_dostack_then_abort);
0812 sigIllHandler_ = std::shared_ptr<const void>(nullptr, [](void*) { installCustomHandler(SIGILL, sig_abort); });
0813 installCustomHandler(SIGTERM, sig_dostack_then_abort);
0814 sigTermHandler_ = std::shared_ptr<const void>(nullptr, [](void*) { installCustomHandler(SIGTERM, sig_abort); });
0815 installCustomHandler(SIGFPE, sig_dostack_then_abort);
0816 sigFpeHandler_ = std::shared_ptr<const void>(nullptr, [](void*) { installCustomHandler(SIGFPE, sig_abort); });
0817 installCustomHandler(SIGABRT, sig_dostack_then_abort);
0818 sigAbrtHandler_ = std::shared_ptr<const void>(nullptr, [](void*) {
0819 signal(SIGABRT, SIG_DFL);
0820 });
0821 }
0822
0823 iReg.watchPreallocate([](edm::service::SystemBounds const& iBounds) {
0824 if (iBounds.maxNumberOfThreads() > moduleListBuffers_.size()) {
0825 moduleListBuffers_.resize(iBounds.maxNumberOfThreads());
0826 }
0827 });
0828
0829 if (resetErrHandler_) {
0830
0831 SetErrorHandler(RootErrorHandler);
0832 }
0833
0834
0835 if (autoLibraryLoader_) {
0836 gInterpreter->SetClassAutoloading(1);
0837 }
0838
0839
0840 TTree::SetMaxTreeSize(kMaxLong64);
0841 TH1::AddDirectory(kFALSE);
0842
0843
0844
0845 setRefCoreStreamerInTClass();
0846
0847
0848 if (!hasDictionary(typeid(std::vector<std::vector<unsigned int>>))) {
0849 TypeWithDict::byName("std::vector<std::vector<unsigned int> >");
0850 }
0851
0852 int debugLevel = pset.getUntrackedParameter<int>("DebugLevel");
0853 if (debugLevel > 0) {
0854 gDebug = debugLevel;
0855 }
0856
0857
0858 bool imt = pset.getUntrackedParameter<bool>("EnableIMT");
0859 if (imt && not ROOT::IsImplicitMTEnabled()) {
0860
0861
0862 ROOT::EnableImplicitMT(
0863 oneapi::tbb::global_control::active_value(oneapi::tbb::global_control::max_allowed_parallelism));
0864 }
0865 }
0866
0867 InitRootHandlers::~InitRootHandlers() {
0868
0869 TIter iter(gROOT->GetListOfFiles());
0870 TObject* obj = nullptr;
0871 while (nullptr != (obj = iter.Next())) {
0872 TFile* f = dynamic_cast<TFile*>(obj);
0873 if (f) {
0874
0875
0876 f->Close();
0877 iter = TIter(gROOT->GetListOfFiles());
0878 }
0879 }
0880
0881 threadTracker_.reset();
0882 }
0883
0884 void InitRootHandlers::willBeUsingThreads() {
0885
0886 ROOT::EnableThreadSafety();
0887
0888
0889 TObject::SetObjectStat(false);
0890
0891
0892 TVirtualStreamerInfo::Optimize(false);
0893 }
0894
0895 void InitRootHandlers::fillDescriptions(ConfigurationDescriptions& descriptions) {
0896 ParameterSetDescription desc;
0897 desc.setComment("Centralized interface to ROOT.");
0898 desc.addUntracked<bool>("UnloadRootSigHandler", false)
0899 ->setComment("If True, signals are handled by this service, rather than by ROOT.");
0900 desc.addUntracked<bool>("ResetRootErrHandler", true)
0901 ->setComment(
0902 "If True, ROOT messages (e.g. errors, warnings) are handled by this service, rather than by ROOT.");
0903 desc.addUntracked<bool>("AutoLibraryLoader", true)
0904 ->setComment("If True, enables automatic loading of data dictionaries.");
0905 desc.addUntracked<bool>("LoadAllDictionaries", false)->setComment("If True, loads all ROOT dictionaries.");
0906 desc.addUntracked<bool>("EnableIMT", true)->setComment("If True, calls ROOT::EnableImplicitMT().");
0907 desc.addUntracked<bool>("AbortOnSignal", true)
0908 ->setComment(
0909 "If True, do an abort when a signal occurs that causes a crash. If False, ROOT will do an exit which "
0910 "attempts to do a clean shutdown.");
0911 desc.addUntracked<bool>("InteractiveDebug", false)
0912 ->setComment(
0913 "If True, leave gdb attached to cmsRun after a crash; "
0914 "if False, attach gdb, print a stack trace, and quit gdb");
0915 desc.addUntracked<int>("DebugLevel", 0)->setComment("Sets ROOT's gDebug value.");
0916 desc.addUntracked<int>("StackTracePauseTime", 300)
0917 ->setComment("Seconds to pause other threads during stack trace.");
0918 descriptions.add("InitRootHandlers", desc);
0919 }
0920
0921 char const* const* InitRootHandlers::getPstackArgv() { return pstackArgv_; }
0922
0923 void InitRootHandlers::enableWarnings_() { s_ignoreWarnings = edm::RootHandlers::SeverityLevel::kInfo; }
0924
0925 void InitRootHandlers::ignoreWarnings_(edm::RootHandlers::SeverityLevel level) { s_ignoreWarnings = level; }
0926
0927 void InitRootHandlers::cachePidInfo() {
0928 if (helperThread_) {
0929
0930
0931
0932 return;
0933 }
0934 std::string gdbcmd{"date; gdb -quiet -p %d"};
0935 if (!interactiveDebug_) {
0936 gdbcmd +=
0937 " 2>&1 <<EOF |\n"
0938 "set width 0\n"
0939 "set height 0\n"
0940 "set pagination no\n"
0941 "thread apply all bt\n"
0942 "EOF\n"
0943 "/bin/sed -n -e 's/^\\((gdb) \\)*//' -e '/^#/p' -e '/^Thread/p'";
0944 }
0945 if (snprintf(pidString_, pidStringLength_ - 1, gdbcmd.c_str(), getpid()) >= pidStringLength_) {
0946 std::ostringstream sstr;
0947 sstr << "Unable to pre-allocate stacktrace handler information";
0948 edm::Exception except(edm::errors::OtherCMS, sstr.str());
0949 throw except;
0950 }
0951
0952
0953
0954
0955 close(childToParent_[0]);
0956 close(childToParent_[1]);
0957 childToParent_[0] = -1;
0958 childToParent_[1] = -1;
0959 close(parentToChild_[0]);
0960 close(parentToChild_[1]);
0961 parentToChild_[0] = -1;
0962 parentToChild_[1] = -1;
0963
0964 if (-1 == pipe2(childToParent_, O_CLOEXEC)) {
0965 std::ostringstream sstr;
0966 sstr << "Failed to create child-to-parent pipes (errno=" << errno << "): " << strerror(errno);
0967 edm::Exception except(edm::errors::OtherCMS, sstr.str());
0968 throw except;
0969 }
0970
0971 if (-1 == pipe2(parentToChild_, O_CLOEXEC)) {
0972 close(childToParent_[0]);
0973 close(childToParent_[1]);
0974 childToParent_[0] = -1;
0975 childToParent_[1] = -1;
0976 std::ostringstream sstr;
0977 sstr << "Failed to create child-to-parent pipes (errno=" << errno << "): " << strerror(errno);
0978 edm::Exception except(edm::errors::OtherCMS, sstr.str());
0979 throw except;
0980 }
0981
0982 helperThread_ = std::make_unique<std::thread>(stacktraceHelperThread);
0983 helperThread_->detach();
0984 }
0985
0986 }
0987 }
0988
0989 #include "FWCore/ServiceRegistry/interface/ServiceMaker.h"
0990
0991 using edm::service::InitRootHandlers;
0992 typedef edm::serviceregistry::AllArgsMaker<edm::RootHandlers, InitRootHandlers> RootHandlersMaker;
0993 DEFINE_FWK_SERVICE_MAKER(InitRootHandlers, RootHandlersMaker);