Back to home page

Project CMSSW displayed by LXR

 
 

    


File indexing completed on 2021-02-14 13:29:11

0001 // -*- C++ -*-
0002 //
0003 // Package:     Services
0004 // Class  :     ResourceEnforcer
0005 //
0006 // Implementation:
0007 //     [Notes on implementation]
0008 //
0009 // Original Author:  Chris Jones
0010 //         Created:  Sun May  6 12:16:49 CDT 2012
0011 //
0012 
0013 // system include files
0014 
0015 // user include files
0016 #include "FWCore/Services/plugins/ProcInfoFetcher.h"
0017 #include "FWCore/ParameterSet/interface/ParameterSet.h"
0018 #include "FWCore/ParameterSet/interface/ConfigurationDescriptions.h"
0019 #include "FWCore/ServiceRegistry/interface/ActivityRegistry.h"
0020 #include "FWCore/Utilities/interface/CPUTimer.h"
0021 #include "FWCore/ServiceRegistry/interface/ServiceMaker.h"
0022 #include "FWCore/ServiceRegistry/interface/SystemBounds.h"
0023 
0024 namespace edm {
0025   class Event;
0026   class EventSetup;
0027 
0028   namespace service {
0029     class ResourceEnforcer {
0030     public:
0031       ResourceEnforcer(edm::ParameterSet const& iConfig, edm::ActivityRegistry& iAR);
0032 
0033       static void fillDescriptions(edm::ConfigurationDescriptions& descriptions);
0034 
0035     private:
0036       void check();
0037       void postEventProcessing(edm::StreamContext const&);
0038 
0039       ProcInfoFetcher m_fetcher;
0040       CPUTimer m_timer;
0041 
0042       double m_maxVSize;
0043       double m_maxRSS;
0044       double m_maxTime;
0045       unsigned int m_nEventsToSkip;
0046       std::atomic<unsigned int> m_eventCount;
0047       std::atomic<bool> m_doingCheck;
0048     };
0049   }  // namespace service
0050 }  // namespace edm
0051 
0052 using namespace edm::service;
0053 //
0054 // constants, enums and typedefs
0055 //
0056 
0057 //
0058 // static data member definitions
0059 //
0060 
0061 //
0062 // constructors and destructor
0063 //
0064 ResourceEnforcer::ResourceEnforcer(edm::ParameterSet const& iConfig, ActivityRegistry& iReg)
0065     : m_maxVSize(iConfig.getUntrackedParameter<double>("maxVSize", 0) * 1000.),  //convert to MB
0066       m_maxRSS(iConfig.getUntrackedParameter<double>("maxRSS", 0) * 1000.),
0067       m_maxTime(iConfig.getUntrackedParameter<double>("maxTime", 0) * 60. * 60.),  //convert from hours to seconds
0068       m_nEventsToSkip(0),
0069       m_eventCount(0),
0070       m_doingCheck(false) {
0071   iReg.watchPostEvent(this, &ResourceEnforcer::postEventProcessing);
0072   iReg.watchPreallocate([this](edm::service::SystemBounds const& iBounds) {
0073     //We do not want the frequency of checking to be dependent on
0074     // how many parallel streams are running
0075     m_nEventsToSkip = iBounds.maxNumberOfStreams() - 1;
0076   });
0077   m_timer.start();
0078 }
0079 
0080 //
0081 // member functions
0082 //
0083 
0084 //
0085 // const member functions
0086 //
0087 
0088 void ResourceEnforcer::postEventProcessing(StreamContext const&) {
0089   //If another thread is already doing a check, we don't need to do another one
0090   auto count = ++m_eventCount;
0091   if (count > m_nEventsToSkip) {
0092     bool expected = false;
0093     if (m_doingCheck.compare_exchange_strong(expected, true, std::memory_order_acq_rel)) {
0094       this->check();
0095       m_doingCheck.store(false, std::memory_order_release);
0096       m_eventCount.store(0, std::memory_order_release);
0097     }
0098   }
0099 }
0100 
0101 void ResourceEnforcer::check() {
0102   ProcInfo pi = m_fetcher.fetch();
0103 
0104   if (0 != m_maxVSize && m_maxVSize < pi.vsize) {
0105     throw edm::Exception(errors::ExceededResourceVSize)
0106         << "Exceeded maximum allowed VSize of " << m_maxVSize / 1000. << " GB (VSize is " << pi.vsize / 1000. << ")";
0107   }
0108 
0109   if (0 != m_maxRSS && m_maxRSS < pi.rss) {
0110     throw edm::Exception(errors::ExceededResourceRSS)
0111         << "Exceeded maximum allowed RSS of " << m_maxRSS / 1000. << " GB (VSize is " << pi.rss / 1000. << ")";
0112   }
0113 
0114   if (0 != m_maxTime && m_maxTime < m_timer.realTime()) {
0115     throw edm::Exception(errors::ExceededResourceTime)
0116         << "Exceeded maximum allowed time of " << m_maxTime / 60. / 60. << " hours";
0117   }
0118 }
0119 
0120 //
0121 // static member functions
0122 //
0123 
0124 void ResourceEnforcer::fillDescriptions(ConfigurationDescriptions& descriptions) {
0125   ParameterSetDescription desc;
0126   desc.addUntracked<double>("maxVSize", 0.)
0127       ->setComment("Maximum allowed VSize for the job in GB. Ignored if set to 0.");
0128   desc.addUntracked<double>("maxRSS", 0.)->setComment("Maximum allowd RSS for the job in GB. Ignored if set to 0.");
0129   desc.addUntracked<double>("maxTime", 0.)
0130       ->setComment("Maximum allowd wallclock time for the job in hours. Ignored if set to 0.");
0131   descriptions.add("ResourceEnforcer", desc);
0132 }
0133 
0134 DEFINE_FWK_SERVICE(ResourceEnforcer);