File indexing completed on 2025-03-07 01:53:27
0001 #include "FWCore/AbstractServices/interface/RootHandlers.h"
0002
0003 #include "FWCore/ServiceRegistry/interface/ActivityRegistry.h"
0004 #include "FWCore/ServiceRegistry/interface/SystemBounds.h"
0005 #include "DataFormats/Common/interface/RefCoreStreamer.h"
0006 #include "DataFormats/Provenance/interface/ModuleDescription.h"
0007 #include "FWCore/MessageLogger/interface/ELseverityLevel.h"
0008 #include "FWCore/MessageLogger/interface/MessageLogger.h"
0009 #include "FWCore/ParameterSet/interface/ParameterSet.h"
0010 #include "FWCore/PluginManager/interface/PluginCapabilities.h"
0011 #include "FWCore/ParameterSet/interface/ConfigurationDescriptions.h"
0012 #include "FWCore/ParameterSet/interface/ParameterSetDescription.h"
0013 #include "FWCore/Utilities/interface/EDMException.h"
0014 #include "FWCore/Reflection/interface/TypeWithDict.h"
0015 #include "FWCore/Utilities/interface/UnixSignalHandlers.h"
0016 #include "FWCore/ServiceRegistry/interface/CurrentModuleOnThread.h"
0017 #include "FWCore/ServiceRegistry/interface/ModuleCallingContext.h"
0018
0019 #include "oneapi/tbb/concurrent_unordered_set.h"
0020 #include "oneapi/tbb/task.h"
0021 #include "oneapi/tbb/task_scheduler_observer.h"
0022 #include "oneapi/tbb/global_control.h"
0023 #include <memory>
0024
0025 #include <thread>
0026 #include <sys/wait.h>
0027 #include <sstream>
0028 #include <cstring>
0029 #include <poll.h>
0030 #include <atomic>
0031 #include <algorithm>
0032 #include <vector>
0033 #include <string>
0034 #include <array>
0035
0036
0037
0038
0039 #ifdef __linux__
0040 #include <syscall.h>
0041 #endif
0042
0043 #include "TROOT.h"
0044 #include "TError.h"
0045 #include "TFile.h"
0046 #include "TInterpreter.h"
0047 #include "TH1.h"
0048 #include "TSystem.h"
0049 #include "TUnixSystem.h"
0050 #include "TTree.h"
0051 #include "TVirtualStreamerInfo.h"
0052
0053 #include "TClassTable.h"
0054
0055 #include <memory>
0056
0057 namespace {
0058
0059
0060 constexpr std::size_t moduleBufferSize = 128;
0061 }
0062
0063 namespace edm {
0064 class ConfigurationDescriptions;
0065 class ParameterSet;
0066 class ActivityRegistry;
0067
0068 namespace service {
0069 class InitRootHandlers : public RootHandlers {
0070 friend int cmssw_stacktrace(void*);
0071
0072 public:
0073 class ThreadTracker : public oneapi::tbb::task_scheduler_observer {
0074 public:
0075 typedef oneapi::tbb::concurrent_unordered_set<pthread_t> Container_type;
0076
0077 ThreadTracker() : oneapi::tbb::task_scheduler_observer() { observe(); }
0078 ~ThreadTracker() override = default;
0079
0080 void on_scheduler_entry(bool) override {
0081
0082
0083
0084
0085
0086 edm::CurrentModuleOnThread::getCurrentModuleOnThread();
0087 threadIDs_.insert(pthread_self());
0088 }
0089 void on_scheduler_exit(bool) override {}
0090 const Container_type& IDs() { return threadIDs_; }
0091
0092 private:
0093 Container_type threadIDs_;
0094 };
0095
0096 explicit InitRootHandlers(ParameterSet const& pset, ActivityRegistry& iReg);
0097 ~InitRootHandlers() override;
0098
0099 static void fillDescriptions(ConfigurationDescriptions& descriptions);
0100 static void stacktraceFromThread();
0101 static const ThreadTracker::Container_type& threadIDs() {
0102 static const ThreadTracker::Container_type empty;
0103 if (threadTracker_) {
0104 return threadTracker_->IDs();
0105 }
0106 return empty;
0107 }
0108 static int stackTracePause() { return stackTracePause_; }
0109
0110 static std::vector<std::array<char, moduleBufferSize>> moduleListBuffers_;
0111 static std::atomic<std::size_t> nextModule_, doneModules_;
0112
0113 private:
0114 static char const* const* getPstackArgv();
0115 void enableWarnings_() override;
0116 void ignoreWarnings_(edm::RootHandlers::SeverityLevel level) override;
0117 void willBeUsingThreads() override;
0118
0119 void cachePidInfo();
0120 static void stacktraceHelperThread();
0121
0122 static constexpr int pidStringLength_ = 200;
0123 static char pidString_[pidStringLength_];
0124 static char const* const pstackArgv_[];
0125 static int parentToChild_[2];
0126 static int childToParent_[2];
0127 static std::unique_ptr<std::thread> helperThread_;
0128 static std::unique_ptr<ThreadTracker> threadTracker_;
0129 static int stackTracePause_;
0130
0131 bool unloadSigHandler_;
0132 bool resetErrHandler_;
0133 bool loadAllDictionaries_;
0134 bool autoLibraryLoader_;
0135 bool autoClassParser_;
0136 bool interactiveDebug_;
0137 std::shared_ptr<const void> sigBusHandler_;
0138 std::shared_ptr<const void> sigSegvHandler_;
0139 std::shared_ptr<const void> sigIllHandler_;
0140 std::shared_ptr<const void> sigTermHandler_;
0141 std::shared_ptr<const void> sigAbrtHandler_;
0142 std::shared_ptr<const void> sigFpeHandler_;
0143 };
0144
0145 inline bool isProcessWideService(InitRootHandlers const*) { return true; }
0146
0147 }
0148 }
0149
0150 namespace edm {
0151 namespace service {
0152 int cmssw_stacktrace(void*);
0153 }
0154 }
0155
0156 namespace {
0157 thread_local edm::RootHandlers::SeverityLevel s_ignoreWarnings = edm::RootHandlers::SeverityLevel::kInfo;
0158
0159 constexpr bool s_ignoreEverything = false;
0160
0161 template <std::size_t SIZE>
0162 bool find_if_string(const std::string& search, const std::array<const char* const, SIZE>& substrs) {
0163 return (std::find_if(substrs.begin(), substrs.end(), [&search](const char* const s) -> bool {
0164 return (search.find(s) != std::string::npos);
0165 }) != substrs.end());
0166 }
0167
0168
0169 constexpr std::array<const char* const, 11> in_message{
0170 {"no dictionary for class",
0171 "already in TClassTable",
0172 "matrix not positive definite",
0173 "not a TStreamerInfo object",
0174 "Problems declaring payload",
0175 "Announced number of args different from the real number of argument passed",
0176 "nbins is <=0 - set to nbins = 1",
0177 "nbinsy is <=0 - set to nbinsy = 1",
0178 "oneapi::tbb::global_control is limiting",
0179 "ufirst < fXmin, fXmin is used",
0180 "ulast > fXmax, fXmax is used"}};
0181
0182
0183 constexpr std::array<const char* const, 7> in_location{{"Fit",
0184 "TDecompChol::Solve",
0185 "THistPainter::PaintInit",
0186 "TUnixSystem::SetDisplay",
0187 "TGClient::GetFontByName",
0188 "Inverter::Dinv",
0189 "RTaskArenaWrapper"}};
0190
0191 constexpr std::array<const char* const, 4> in_message_print_error{
0192 {"number of iterations was insufficient",
0193 "bad integrand behavior",
0194 "integral is divergent, or slowly convergent",
0195 "VariableMetricBuilder Initial matrix not pos.def."}};
0196
0197 void RootErrorHandlerImpl(int level, char const* location, char const* message) {
0198 bool die = false;
0199
0200
0201
0202 edm::RootHandlers::SeverityLevel el_severity = edm::RootHandlers::SeverityLevel::kInfo;
0203
0204 if (level >= kFatal) {
0205 el_severity = edm::RootHandlers::SeverityLevel::kFatal;
0206 } else if (level >= kSysError) {
0207 el_severity = edm::RootHandlers::SeverityLevel::kSysError;
0208 } else if (level >= kError) {
0209 el_severity = edm::RootHandlers::SeverityLevel::kError;
0210 } else if (level >= kWarning) {
0211 el_severity = edm::RootHandlers::SeverityLevel::kWarning;
0212 }
0213
0214 if (s_ignoreEverything || el_severity <= s_ignoreWarnings) {
0215 el_severity = edm::RootHandlers::SeverityLevel::kInfo;
0216 }
0217
0218
0219
0220
0221 std::string el_location = "@SUB=?";
0222 if (location != nullptr)
0223 el_location = std::string("@SUB=") + std::string(location);
0224
0225 std::string el_message = "?";
0226 if (message != nullptr)
0227 el_message = message;
0228
0229
0230
0231
0232
0233
0234
0235 std::string el_identifier = "ROOT";
0236
0237 std::string precursor("class ");
0238 size_t index1 = el_message.find(precursor);
0239 if (index1 != std::string::npos) {
0240 size_t index2 = index1 + precursor.length();
0241 size_t index3 = el_message.find_first_of(" :", index2);
0242 if (index3 != std::string::npos) {
0243 size_t substrlen = index3 - index2;
0244 el_identifier += "-";
0245 el_identifier += el_message.substr(index2, substrlen);
0246 }
0247 } else {
0248 index1 = el_location.find("::");
0249 if (index1 != std::string::npos) {
0250 el_identifier += "/";
0251 el_identifier += el_location.substr(0, index1);
0252 }
0253 }
0254
0255
0256
0257 if ((el_location.find("TBranchElement::Fill") != std::string::npos) &&
0258 (el_message.find("fill branch") != std::string::npos) && (el_message.find("address") != std::string::npos) &&
0259 (el_message.find("not set") != std::string::npos)) {
0260 el_severity = edm::RootHandlers::SeverityLevel::kFatal;
0261 }
0262
0263 if ((el_message.find("Tree branches") != std::string::npos) &&
0264 (el_message.find("different numbers of entries") != std::string::npos)) {
0265 el_severity = edm::RootHandlers::SeverityLevel::kFatal;
0266 }
0267
0268
0269
0270 if (find_if_string(el_message, in_message) || find_if_string(el_location, in_location) ||
0271 (level < kError and (el_location.find("CINTTypedefBuilder::Setup") != std::string::npos) and
0272 (el_message.find("possible entries are in use!") != std::string::npos))) {
0273 el_severity = edm::RootHandlers::SeverityLevel::kInfo;
0274 }
0275
0276
0277
0278 bool alreadyPrinted = false;
0279 if (find_if_string(el_message, in_message_print_error)) {
0280 el_severity = edm::RootHandlers::SeverityLevel::kInfo;
0281 edm::LogError("Root_Error") << el_location << el_message;
0282 alreadyPrinted = true;
0283 }
0284
0285 if (el_severity == edm::RootHandlers::SeverityLevel::kInfo) {
0286
0287 die = false;
0288 } else {
0289 die = true;
0290 }
0291
0292
0293
0294
0295
0296
0297 if (die && (el_location != std::string("@SUB=TUnixSystem::DispatchSignals"))) {
0298 std::ostringstream sstr;
0299 sstr << "Fatal Root Error: " << el_location << "\n" << el_message << '\n';
0300 edm::Exception except(edm::errors::FatalRootError, sstr.str());
0301 except.addAdditionalInfo(except.message());
0302 except.clearMessage();
0303 throw except;
0304 }
0305
0306
0307
0308
0309 if (!alreadyPrinted) {
0310 if (el_severity == edm::RootHandlers::SeverityLevel::kFatal) {
0311 edm::LogError("Root_Fatal") << el_location << el_message;
0312 } else if (el_severity == edm::RootHandlers::SeverityLevel::kSysError) {
0313 edm::LogError("Root_Severe") << el_location << el_message;
0314 } else if (el_severity == edm::RootHandlers::SeverityLevel::kError) {
0315 edm::LogError("Root_Error") << el_location << el_message;
0316 } else if (el_severity == edm::RootHandlers::SeverityLevel::kWarning) {
0317 edm::LogWarning("Root_Warning") << el_location << el_message;
0318 } else if (el_severity == edm::RootHandlers::SeverityLevel::kInfo) {
0319 edm::LogInfo("Root_Information") << el_location << el_message;
0320 }
0321 }
0322 }
0323
0324 void RootErrorHandler(int level, bool, char const* location, char const* message) {
0325 RootErrorHandlerImpl(level, location, message);
0326 }
0327
0328 extern "C" {
0329 void set_default_signals() {
0330 signal(SIGILL, SIG_DFL);
0331 signal(SIGSEGV, SIG_DFL);
0332 signal(SIGBUS, SIG_DFL);
0333 signal(SIGTERM, SIG_DFL);
0334 signal(SIGFPE, SIG_DFL);
0335 signal(SIGABRT, SIG_DFL);
0336 }
0337
0338 static int full_write(int fd, const char* text) {
0339 const char* buffer = text;
0340 size_t count = strlen(text);
0341 ssize_t written = 0;
0342 while (count) {
0343 written = write(fd, buffer, count);
0344 if (written == -1) {
0345 if (errno == EINTR) {
0346 continue;
0347 } else {
0348 return -errno;
0349 }
0350 }
0351 count -= written;
0352 buffer += written;
0353 }
0354 return 0;
0355 }
0356
0357 static int full_read(int fd, char* inbuf, size_t len, int timeout_s = -1) {
0358 char* buf = inbuf;
0359 size_t count = len;
0360 ssize_t complete = 0;
0361 std::chrono::time_point<std::chrono::steady_clock> end_time =
0362 std::chrono::steady_clock::now() + std::chrono::seconds(timeout_s);
0363 int flags;
0364 if (timeout_s < 0) {
0365 flags = O_NONBLOCK;
0366 } else if ((-1 == (flags = fcntl(fd, F_GETFL)))) {
0367 return -errno;
0368 }
0369 if ((flags & O_NONBLOCK) != O_NONBLOCK) {
0370 if (-1 == fcntl(fd, F_SETFL, flags | O_NONBLOCK)) {
0371 return -errno;
0372 }
0373 }
0374 while (count) {
0375 if (timeout_s >= 0) {
0376 struct pollfd poll_info {
0377 fd, POLLIN, 0
0378 };
0379 int ms_remaining =
0380 std::chrono::duration_cast<std::chrono::milliseconds>(end_time - std::chrono::steady_clock::now()).count();
0381 if (ms_remaining > 0) {
0382 int rc = poll(&poll_info, 1, ms_remaining);
0383 if (rc <= 0) {
0384 if (rc < 0) {
0385 if (errno == EINTR || errno == EAGAIN) {
0386 continue;
0387 }
0388 rc = -errno;
0389 } else {
0390 rc = -ETIMEDOUT;
0391 }
0392 if ((flags & O_NONBLOCK) != O_NONBLOCK) {
0393 fcntl(fd, F_SETFL, flags);
0394 }
0395 return rc;
0396 }
0397 } else if (ms_remaining < 0) {
0398 if ((flags & O_NONBLOCK) != O_NONBLOCK) {
0399 fcntl(fd, F_SETFL, flags);
0400 }
0401 return -ETIMEDOUT;
0402 }
0403 }
0404 complete = read(fd, buf, count);
0405 if (complete == -1) {
0406 if (errno == EINTR) {
0407 continue;
0408 } else if ((errno == EAGAIN) || (errno == EWOULDBLOCK)) {
0409 continue;
0410 } else {
0411 int orig_errno = errno;
0412 if ((flags & O_NONBLOCK) != O_NONBLOCK) {
0413 fcntl(fd, F_SETFL, flags);
0414 }
0415 return -orig_errno;
0416 }
0417 }
0418 count -= complete;
0419 buf += complete;
0420 }
0421 if ((flags & O_NONBLOCK) != O_NONBLOCK) {
0422 fcntl(fd, F_SETFL, flags);
0423 }
0424 return 0;
0425 }
0426
0427 static int full_cerr_write(const char* text) { return full_write(2, text); }
0428
0429
0430
0431
0432
0433 #if defined(SIGRTMAX)
0434 #define PAUSE_SIGNAL SIGRTMAX
0435 #define RESUME_SIGNAL SIGRTMAX - 1
0436 #elif defined(SIGINFO)
0437 #define PAUSE_SIGNAL SIGINFO
0438 #define RESUME_SIGNAL SIGALRM
0439 #endif
0440
0441
0442 void sig_resume_handler(int sig, siginfo_t*, void*) {}
0443
0444
0445 void sig_pause_for_stacktrace(int sig, siginfo_t*, void*) {
0446 using namespace edm::service;
0447
0448 #ifdef RESUME_SIGNAL
0449 sigset_t sigset;
0450 sigemptyset(&sigset);
0451 sigaddset(&sigset, RESUME_SIGNAL);
0452 pthread_sigmask(SIG_UNBLOCK, &sigset, nullptr);
0453 #endif
0454
0455 sleep(InitRootHandlers::stackTracePause());
0456
0457 if (InitRootHandlers::doneModules_.is_lock_free() && InitRootHandlers::nextModule_.is_lock_free()) {
0458 auto i = InitRootHandlers::nextModule_++;
0459 if (i < InitRootHandlers::moduleListBuffers_.size()) {
0460 char* buff = InitRootHandlers::moduleListBuffers_[i].data();
0461
0462 strlcpy(buff, "\nModule: ", moduleBufferSize);
0463 if (edm::CurrentModuleOnThread::getCurrentModuleOnThread() != nullptr) {
0464 strlcat(buff,
0465 edm::CurrentModuleOnThread::getCurrentModuleOnThread()->moduleDescription()->moduleName().c_str(),
0466 moduleBufferSize);
0467 strlcat(buff, ":", moduleBufferSize);
0468 strlcat(buff,
0469 edm::CurrentModuleOnThread::getCurrentModuleOnThread()->moduleDescription()->moduleLabel().c_str(),
0470 moduleBufferSize);
0471 } else {
0472 strlcat(buff, "none", moduleBufferSize);
0473 }
0474 ++edm::service::InitRootHandlers::doneModules_;
0475 }
0476 }
0477 }
0478
0479 void sig_dostack_then_abort(int sig, siginfo_t*, void*) {
0480 using namespace edm::service;
0481
0482 const auto& tids = InitRootHandlers::threadIDs();
0483
0484 const auto self = pthread_self();
0485 #ifdef PAUSE_SIGNAL
0486 if (InitRootHandlers::stackTracePause() > 0 && tids.size() > 1) {
0487
0488 struct sigaction act;
0489 act.sa_sigaction = sig_pause_for_stacktrace;
0490 act.sa_flags = 0;
0491 sigemptyset(&act.sa_mask);
0492 sigaction(PAUSE_SIGNAL, &act, nullptr);
0493
0494
0495 sigset_t pausesigset;
0496 sigemptyset(&pausesigset);
0497 sigaddset(&pausesigset, PAUSE_SIGNAL);
0498 sigprocmask(SIG_UNBLOCK, &pausesigset, nullptr);
0499
0500
0501 for (auto id : tids) {
0502 if (self != id) {
0503 pthread_kill(id, PAUSE_SIGNAL);
0504 }
0505 }
0506
0507 #ifdef RESUME_SIGNAL
0508
0509 act.sa_sigaction = sig_resume_handler;
0510 sigaction(RESUME_SIGNAL, &act, nullptr);
0511 #endif
0512 }
0513 #endif
0514
0515 const char* signalname = "unknown";
0516 switch (sig) {
0517 case SIGBUS: {
0518 signalname = "bus error";
0519 break;
0520 }
0521 case SIGSEGV: {
0522 signalname = "segmentation violation";
0523 break;
0524 }
0525 case SIGILL: {
0526 signalname = "illegal instruction";
0527 break;
0528 }
0529 case SIGFPE: {
0530 signalname = "floating point exception";
0531 break;
0532 }
0533 case SIGTERM: {
0534 signalname = "external termination request";
0535 break;
0536 }
0537 case SIGABRT: {
0538 signalname = "abort signal";
0539 break;
0540 }
0541 default:
0542 break;
0543 }
0544 full_cerr_write("\n\nA fatal system signal has occurred: ");
0545 full_cerr_write(signalname);
0546 full_cerr_write("\nThe following is the call stack containing the origin of the signal.\n\n");
0547
0548 edm::service::InitRootHandlers::stacktraceFromThread();
0549
0550
0551
0552
0553
0554 #ifdef RESUME_SIGNAL
0555 std::size_t notified = 0;
0556 if (InitRootHandlers::stackTracePause() > 0 && tids.size() > 1) {
0557 for (auto id : tids) {
0558 if (self != id) {
0559 if (pthread_kill(id, RESUME_SIGNAL) == 0)
0560 ++notified;
0561 }
0562 }
0563 }
0564 #endif
0565
0566 full_cerr_write("\nCurrent Modules:\n");
0567
0568
0569
0570
0571
0572
0573 if (tids.count(self) > 0) {
0574 char buff[moduleBufferSize] = "\nModule: ";
0575 if (edm::CurrentModuleOnThread::getCurrentModuleOnThread() != nullptr) {
0576 strlcat(buff,
0577 edm::CurrentModuleOnThread::getCurrentModuleOnThread()->moduleDescription()->moduleName().c_str(),
0578 moduleBufferSize);
0579 strlcat(buff, ":", moduleBufferSize);
0580 strlcat(buff,
0581 edm::CurrentModuleOnThread::getCurrentModuleOnThread()->moduleDescription()->moduleLabel().c_str(),
0582 moduleBufferSize);
0583 } else {
0584 strlcat(buff, "none", moduleBufferSize);
0585 }
0586 strlcat(buff, " (crashed)", moduleBufferSize);
0587 full_cerr_write(buff);
0588 } else {
0589 full_cerr_write("\nModule: non-CMSSW (crashed)");
0590 }
0591
0592 #ifdef PAUSE_SIGNAL
0593
0594
0595 if (InitRootHandlers::doneModules_.is_lock_free()) {
0596 int spincount = 0;
0597 timespec t = {0, 1000};
0598 while (++spincount < 1000 && InitRootHandlers::doneModules_ < notified) {
0599 nanosleep(&t, nullptr);
0600 }
0601 for (std::size_t i = 0; i < InitRootHandlers::doneModules_; ++i) {
0602 full_cerr_write(InitRootHandlers::moduleListBuffers_[i].data());
0603 }
0604 }
0605 #endif
0606
0607 full_cerr_write("\n\nA fatal system signal has occurred: ");
0608 full_cerr_write(signalname);
0609 full_cerr_write("\n");
0610
0611
0612
0613 if ((sig == SIGILL) || (sig == SIGSEGV) || (sig == SIGBUS) || (sig == SIGTERM) || (sig == SIGFPE) ||
0614 (sig == SIGABRT)) {
0615 signal(sig, SIG_DFL);
0616 raise(sig);
0617 } else {
0618 set_default_signals();
0619 ::abort();
0620 }
0621 }
0622
0623 void sig_abort(int sig, siginfo_t*, void*) {
0624 full_cerr_write("\n\nFatal system signal has occurred during exit\n");
0625
0626
0627 signal(sig, SIG_DFL);
0628 raise(sig);
0629
0630
0631 set_default_signals();
0632 ::sleep(10);
0633 ::abort();
0634 }
0635 }
0636 }
0637
0638 namespace edm {
0639 namespace service {
0640
0641
0642
0643
0644
0645
0646
0647 static void cmssw_stacktrace_fork();
0648
0649 void InitRootHandlers::stacktraceHelperThread() {
0650 int toParent = childToParent_[1];
0651 int fromParent = parentToChild_[0];
0652 char buf[2];
0653 buf[1] = '\0';
0654
0655 while (true) {
0656 int result = full_read(fromParent, buf, 1);
0657 if (result < 0) {
0658
0659
0660
0661 set_default_signals();
0662 close(toParent);
0663 full_cerr_write("\n\nTraceback helper thread failed to read from parent: ");
0664 full_cerr_write(strerror(-result));
0665 full_cerr_write("\n");
0666 ::abort();
0667 }
0668 if (buf[0] == '1') {
0669 set_default_signals();
0670 cmssw_stacktrace_fork();
0671 full_write(toParent, buf);
0672 } else if (buf[0] == '2') {
0673
0674
0675 close(toParent);
0676 close(fromParent);
0677 toParent = childToParent_[1];
0678 fromParent = parentToChild_[0];
0679 } else if (buf[0] == '3') {
0680 break;
0681 } else {
0682 set_default_signals();
0683 close(toParent);
0684 full_cerr_write("\n\nTraceback helper thread got unknown command from parent: ");
0685 full_cerr_write(buf);
0686 full_cerr_write("\n");
0687 ::abort();
0688 }
0689 }
0690 }
0691
0692 void InitRootHandlers::stacktraceFromThread() {
0693 int result = full_write(parentToChild_[1], "1");
0694 if (result < 0) {
0695 full_cerr_write("\n\nAttempt to request stacktrace failed: ");
0696 full_cerr_write(strerror(-result));
0697 full_cerr_write("\n");
0698 return;
0699 }
0700 char buf[2];
0701 buf[1] = '\0';
0702 if ((result = full_read(childToParent_[0], buf, 1, 5 * 60)) < 0) {
0703 full_cerr_write("\n\nWaiting for stacktrace completion failed: ");
0704 if (result == -ETIMEDOUT) {
0705 full_cerr_write("timed out waiting for GDB to complete.");
0706 } else {
0707 full_cerr_write(strerror(-result));
0708 }
0709 full_cerr_write("\n");
0710 return;
0711 }
0712 }
0713
0714 void cmssw_stacktrace_fork() {
0715 char child_stack[4 * 1024];
0716 char* child_stack_ptr = child_stack + 4 * 1024;
0717
0718
0719
0720
0721 int pid =
0722 #ifdef __linux__
0723 clone(edm::service::cmssw_stacktrace, child_stack_ptr, CLONE_VM | CLONE_FS | SIGCHLD, nullptr);
0724 #else
0725 fork();
0726 if (child_stack_ptr) {
0727 }
0728 if (pid == 0) {
0729 edm::service::cmssw_stacktrace(nullptr);
0730 }
0731 #endif
0732 if (pid == -1) {
0733 full_cerr_write("(Attempt to perform stack dump failed.)\n");
0734 } else {
0735 int status;
0736 if (waitpid(pid, &status, 0) == -1) {
0737 full_cerr_write("(Failed to wait on stack dump output.)\n");
0738 }
0739 if (status) {
0740 full_cerr_write("(GDB stack trace failed unexpectedly)\n");
0741 }
0742 }
0743 }
0744
0745 int cmssw_stacktrace(void* ) {
0746 set_default_signals();
0747
0748 char const* const* argv = edm::service::InitRootHandlers::getPstackArgv();
0749
0750
0751
0752 #ifdef __linux__
0753 syscall(SYS_execve, "/bin/sh", argv, __environ);
0754 #else
0755 execv("/bin/sh", argv);
0756 #endif
0757 ::abort();
0758 return 1;
0759 }
0760
0761 static constexpr char pstackName[] = "(CMSSW stack trace helper)";
0762 static constexpr char dashC[] = "-c";
0763 char InitRootHandlers::pidString_[InitRootHandlers::pidStringLength_] = {};
0764 char const* const InitRootHandlers::pstackArgv_[] = {pstackName, dashC, InitRootHandlers::pidString_, nullptr};
0765 int InitRootHandlers::parentToChild_[2] = {-1, -1};
0766 int InitRootHandlers::childToParent_[2] = {-1, -1};
0767 std::unique_ptr<std::thread> InitRootHandlers::helperThread_;
0768 std::unique_ptr<InitRootHandlers::ThreadTracker> InitRootHandlers::threadTracker_;
0769 int InitRootHandlers::stackTracePause_ = 300;
0770 std::vector<std::array<char, moduleBufferSize>> InitRootHandlers::moduleListBuffers_;
0771 std::atomic<std::size_t> InitRootHandlers::nextModule_(0), InitRootHandlers::doneModules_(0);
0772
0773 InitRootHandlers::InitRootHandlers(ParameterSet const& pset, ActivityRegistry& iReg)
0774 : RootHandlers(),
0775 unloadSigHandler_(pset.getUntrackedParameter<bool>("UnloadRootSigHandler")),
0776 resetErrHandler_(pset.getUntrackedParameter<bool>("ResetRootErrHandler")),
0777 loadAllDictionaries_(pset.getUntrackedParameter<bool>("LoadAllDictionaries")),
0778 autoLibraryLoader_(loadAllDictionaries_ or pset.getUntrackedParameter<bool>("AutoLibraryLoader")),
0779 autoClassParser_(pset.getUntrackedParameter<bool>("AutoClassParser")),
0780 interactiveDebug_(pset.getUntrackedParameter<bool>("InteractiveDebug")) {
0781 stackTracePause_ = pset.getUntrackedParameter<int>("StackTracePauseTime");
0782
0783 if (not threadTracker_) {
0784 threadTracker_ = std::make_unique<ThreadTracker>();
0785 iReg.watchPostEndJob([]() {
0786 if (threadTracker_) {
0787 threadTracker_->observe(false);
0788 }
0789 });
0790 }
0791
0792 if (unloadSigHandler_) {
0793
0794 gSystem->ResetSignal(kSigChild);
0795 gSystem->ResetSignal(kSigBus);
0796 gSystem->ResetSignal(kSigSegmentationViolation);
0797 gSystem->ResetSignal(kSigIllegalInstruction);
0798 gSystem->ResetSignal(kSigSystem);
0799 gSystem->ResetSignal(kSigPipe);
0800 gSystem->ResetSignal(kSigAlarm);
0801 gSystem->ResetSignal(kSigUrgent);
0802 gSystem->ResetSignal(kSigFloatingException);
0803 gSystem->ResetSignal(kSigWindowChanged);
0804 } else if (pset.getUntrackedParameter<bool>("AbortOnSignal")) {
0805 cachePidInfo();
0806
0807
0808
0809 gSystem->ResetSignal(kSigBus);
0810 gSystem->ResetSignal(kSigSegmentationViolation);
0811 gSystem->ResetSignal(kSigIllegalInstruction);
0812 gSystem->ResetSignal(kSigFloatingException);
0813 installCustomHandler(SIGBUS, sig_dostack_then_abort);
0814 sigBusHandler_ = std::shared_ptr<const void>(nullptr, [](void*) { installCustomHandler(SIGBUS, sig_abort); });
0815 installCustomHandler(SIGSEGV, sig_dostack_then_abort);
0816 sigSegvHandler_ = std::shared_ptr<const void>(nullptr, [](void*) { installCustomHandler(SIGSEGV, sig_abort); });
0817 installCustomHandler(SIGILL, sig_dostack_then_abort);
0818 sigIllHandler_ = std::shared_ptr<const void>(nullptr, [](void*) { installCustomHandler(SIGILL, sig_abort); });
0819 installCustomHandler(SIGTERM, sig_dostack_then_abort);
0820 sigTermHandler_ = std::shared_ptr<const void>(nullptr, [](void*) { installCustomHandler(SIGTERM, sig_abort); });
0821 installCustomHandler(SIGFPE, sig_dostack_then_abort);
0822 sigFpeHandler_ = std::shared_ptr<const void>(nullptr, [](void*) { installCustomHandler(SIGFPE, sig_abort); });
0823 installCustomHandler(SIGABRT, sig_dostack_then_abort);
0824 sigAbrtHandler_ = std::shared_ptr<const void>(nullptr, [](void*) {
0825 signal(SIGABRT, SIG_DFL);
0826 });
0827 }
0828
0829 iReg.watchPreallocate([](edm::service::SystemBounds const& iBounds) {
0830 if (iBounds.maxNumberOfThreads() > moduleListBuffers_.size()) {
0831 moduleListBuffers_.resize(iBounds.maxNumberOfThreads());
0832 }
0833 });
0834
0835 if (resetErrHandler_) {
0836
0837 SetErrorHandler(RootErrorHandler);
0838 }
0839
0840
0841 if (autoLibraryLoader_) {
0842 gInterpreter->SetClassAutoloading(1);
0843 }
0844
0845
0846 if (not autoClassParser_) {
0847
0848 iReg.watchPreModuleConstruction(
0849 [](edm::ModuleDescription const&) { gInterpreter->SetClassAutoparsing(false); });
0850 iReg.watchPostModuleConstruction(
0851 [](edm::ModuleDescription const&) { gInterpreter->SetClassAutoparsing(true); });
0852 }
0853
0854
0855 TTree::SetMaxTreeSize(kMaxLong64);
0856 TH1::AddDirectory(kFALSE);
0857
0858
0859
0860 setRefCoreStreamerInTClass();
0861
0862
0863 if (!hasDictionary(typeid(std::vector<std::vector<unsigned int>>))) {
0864 TypeWithDict::byName("std::vector<std::vector<unsigned int> >");
0865 }
0866
0867 int debugLevel = pset.getUntrackedParameter<int>("DebugLevel");
0868 if (debugLevel > 0) {
0869 gDebug = debugLevel;
0870 }
0871
0872
0873 bool imt = pset.getUntrackedParameter<bool>("EnableIMT");
0874 if (imt && not ROOT::IsImplicitMTEnabled()) {
0875
0876
0877 ROOT::EnableImplicitMT(
0878 oneapi::tbb::global_control::active_value(oneapi::tbb::global_control::max_allowed_parallelism));
0879 }
0880 }
0881
0882 InitRootHandlers::~InitRootHandlers() {
0883
0884 TIter iter(gROOT->GetListOfFiles());
0885 TObject* obj = nullptr;
0886 while (nullptr != (obj = iter.Next())) {
0887 TFile* f = dynamic_cast<TFile*>(obj);
0888 if (f) {
0889
0890
0891 f->Close();
0892 iter = TIter(gROOT->GetListOfFiles());
0893 }
0894 }
0895
0896 threadTracker_.reset();
0897 }
0898
0899 void InitRootHandlers::willBeUsingThreads() {
0900
0901 ROOT::EnableThreadSafety();
0902
0903
0904 TObject::SetObjectStat(false);
0905
0906
0907 TVirtualStreamerInfo::Optimize(false);
0908 }
0909
0910 void InitRootHandlers::fillDescriptions(ConfigurationDescriptions& descriptions) {
0911 ParameterSetDescription desc;
0912 desc.setComment("Centralized interface to ROOT.");
0913 desc.addUntracked<bool>("UnloadRootSigHandler", false)
0914 ->setComment("If True, signals are handled by this service, rather than by ROOT.");
0915 desc.addUntracked<bool>("ResetRootErrHandler", true)
0916 ->setComment(
0917 "If True, ROOT messages (e.g. errors, warnings) are handled by this service, rather than by ROOT.");
0918 desc.addUntracked<bool>("AutoLibraryLoader", true)
0919 ->setComment("If True, enables automatic loading of data dictionaries.");
0920 desc.addUntracked<bool>("AutoClassParser", true)
0921 ->setComment(
0922 "If False, the automatic parsing of class headers for dictionaries when pre-built dictionaries are "
0923 "missing is disable during module construction. The current implementation of disabling the parsing is "
0924 "fragile, and may work only in a single-thread job that does not use reco::parser::cutParser() or "
0925 "reco::parser::expressionParser() (and it certainly does not work on multiple threads).");
0926 desc.addUntracked<bool>("LoadAllDictionaries", false)->setComment("If True, loads all ROOT dictionaries.");
0927 desc.addUntracked<bool>("EnableIMT", true)->setComment("If True, calls ROOT::EnableImplicitMT().");
0928 desc.addUntracked<bool>("AbortOnSignal", true)
0929 ->setComment(
0930 "If True, do an abort when a signal occurs that causes a crash. If False, ROOT will do an exit which "
0931 "attempts to do a clean shutdown.");
0932 desc.addUntracked<bool>("InteractiveDebug", false)
0933 ->setComment(
0934 "If True, leave gdb attached to cmsRun after a crash; "
0935 "if False, attach gdb, print a stack trace, and quit gdb");
0936 desc.addUntracked<int>("DebugLevel", 0)->setComment("Sets ROOT's gDebug value.");
0937 desc.addUntracked<int>("StackTracePauseTime", 300)
0938 ->setComment("Seconds to pause other threads during stack trace.");
0939 descriptions.add("InitRootHandlers", desc);
0940 }
0941
0942 char const* const* InitRootHandlers::getPstackArgv() { return pstackArgv_; }
0943
0944 void InitRootHandlers::enableWarnings_() { s_ignoreWarnings = edm::RootHandlers::SeverityLevel::kInfo; }
0945
0946 void InitRootHandlers::ignoreWarnings_(edm::RootHandlers::SeverityLevel level) { s_ignoreWarnings = level; }
0947
0948 void InitRootHandlers::cachePidInfo() {
0949 if (helperThread_) {
0950
0951
0952
0953 return;
0954 }
0955 std::string gdbcmd{"date; gdb -quiet -p %d"};
0956 if (!interactiveDebug_) {
0957 gdbcmd +=
0958 " 2>&1 <<EOF |\n"
0959 "set width 0\n"
0960 "set height 0\n"
0961 "set pagination no\n"
0962 "thread apply all bt\n"
0963 "EOF\n"
0964 "/bin/sed -n -e 's/^\\((gdb) \\)*//' -e '/^#/p' -e '/^Thread/p'";
0965 }
0966 if (snprintf(pidString_, pidStringLength_ - 1, gdbcmd.c_str(), getpid()) >= pidStringLength_) {
0967 std::ostringstream sstr;
0968 sstr << "Unable to pre-allocate stacktrace handler information";
0969 edm::Exception except(edm::errors::OtherCMS, sstr.str());
0970 throw except;
0971 }
0972
0973
0974
0975
0976 close(childToParent_[0]);
0977 close(childToParent_[1]);
0978 childToParent_[0] = -1;
0979 childToParent_[1] = -1;
0980 close(parentToChild_[0]);
0981 close(parentToChild_[1]);
0982 parentToChild_[0] = -1;
0983 parentToChild_[1] = -1;
0984
0985 if (-1 == pipe2(childToParent_, O_CLOEXEC)) {
0986 std::ostringstream sstr;
0987 sstr << "Failed to create child-to-parent pipes (errno=" << errno << "): " << strerror(errno);
0988 edm::Exception except(edm::errors::OtherCMS, sstr.str());
0989 throw except;
0990 }
0991
0992 if (-1 == pipe2(parentToChild_, O_CLOEXEC)) {
0993 close(childToParent_[0]);
0994 close(childToParent_[1]);
0995 childToParent_[0] = -1;
0996 childToParent_[1] = -1;
0997 std::ostringstream sstr;
0998 sstr << "Failed to create child-to-parent pipes (errno=" << errno << "): " << strerror(errno);
0999 edm::Exception except(edm::errors::OtherCMS, sstr.str());
1000 throw except;
1001 }
1002
1003 helperThread_ = std::make_unique<std::thread>(stacktraceHelperThread);
1004 helperThread_->detach();
1005 }
1006
1007 }
1008 }
1009
1010 #include "FWCore/ServiceRegistry/interface/ServiceMaker.h"
1011
1012 using edm::service::InitRootHandlers;
1013 typedef edm::serviceregistry::AllArgsMaker<edm::RootHandlers, InitRootHandlers> RootHandlersMaker;
1014 DEFINE_FWK_SERVICE_MAKER(InitRootHandlers, RootHandlersMaker);