Back to home page

Project CMSSW displayed by LXR

 
 

    


File indexing completed on 2024-04-06 12:04:21

0001 #ifndef DataFormats_Histograms_MonitorElementCollection_h
0002 #define DataFormats_Histograms_MonitorElementCollection_h
0003 // -*- C++ -*-
0004 //
0005 // Package:     DataFormats/Histograms
0006 // Class  :     MonitorElementCollection
0007 //
0008 /**\class MonitorElementCollection MonitorElementCollection.h "DataFormats/Histograms/interface/MonitorElementCollection.h"
0009 
0010  Description: Product to represent DQM data in LuminosityBlocks and Runs.
0011  The MonitorElements are represented by a simple struct that only contains the 
0012  required fields to represent a ME. The only opration allowed on these objects
0013  is merging, which is a important part of the DQM functionality and should be
0014  handled by EDM.
0015 
0016  Usage: This product should only be handled by the DQMStore, which provides 
0017  access to the MEs inside. The DQMStore will wrap the MonitorElementData in
0018  real MonitorElements, which allow various operations on the underlying 
0019  histograms, depending on the current stage of processing: In the RECO step,
0020  only filling is allowed, while in HARVESTING, the same data will be wrapped in
0021  a MonitorElement that also allows access to the ROOT objects.
0022 
0023  Currently, the product types are not used as products and all data is passed
0024  through the edm::Service<DQMStore>.
0025 
0026 */
0027 //
0028 // Original Author:  Marcel Schneider
0029 //         Created:  2018-05-02
0030 //
0031 //
0032 #include "DataFormats/Provenance/interface/LuminosityBlockID.h"
0033 #include "FWCore/Utilities/interface/propagate_const.h"
0034 
0035 #include <cstdint>
0036 #include <cassert>
0037 #include <vector>
0038 #include <string>
0039 
0040 #include "TH1.h"
0041 
0042 struct MonitorElementData {
0043   // This is technically a union, but the struct is safer.
0044   struct Scalar {
0045     int64_t num = 0;
0046     double real = 0;
0047     std::string str;
0048   };
0049 
0050   // Quality test result types.
0051   // These are inherited from DQMNet/old DQMStore, and left unchanged to avoid
0052   // another layer of wrapping. The APIs are used in some places in subsystem
0053   // code, and could be changed, but not removed.
0054   class QReport {
0055   public:
0056     struct QValue {
0057       int code;
0058       float qtresult;
0059       std::string message;
0060       std::string qtname;
0061       std::string algorithm;
0062     };
0063     struct DQMChannel {
0064       int binx;       //< bin # in x-axis (or bin # for 1D histogram)
0065       int biny;       //< bin # in y-axis (for 2D or 3D histograms)
0066       int binz;       //< bin # in z-axis (for 3D histograms)
0067       float content;  //< bin content
0068       float RMS;      //< RMS of bin content
0069 
0070       int getBin() { return getBinX(); }
0071       int getBinX() { return binx; }
0072       int getBinY() { return biny; }
0073       int getBinZ() { return binz; }
0074       float getContents() { return content; }
0075       float getRMS() { return RMS; }
0076 
0077       DQMChannel(int bx, int by, int bz, float data, float rms) {
0078         binx = bx;
0079         biny = by;
0080         binz = bz;
0081         content = data;
0082         RMS = rms;
0083       }
0084 
0085       DQMChannel() {
0086         binx = 0;
0087         biny = 0;
0088         binz = 0;
0089         content = 0;
0090         RMS = 0;
0091       }
0092     };
0093 
0094     /// access underlying value
0095     QValue& getValue() { return qvalue_; };
0096     QValue const& getValue() const { return qvalue_; };
0097 
0098     /// get test status
0099     int getStatus() const { return qvalue_.code; }
0100 
0101     /// get test result i.e. prob value
0102     float getQTresult() const { return qvalue_.qtresult; }
0103 
0104     /// get message attached to test
0105     const std::string& getMessage() const { return qvalue_.message; }
0106 
0107     /// get name of quality test
0108     const std::string& getQRName() const { return qvalue_.qtname; }
0109 
0110     /// get quality test algorithm
0111     const std::string& getAlgorithm() const { return qvalue_.algorithm; }
0112 
0113     /// get vector of channels that failed test
0114     /// (not relevant for all quality tests!)
0115     const std::vector<DQMChannel>& getBadChannels() const { return badChannels_; }
0116 
0117     void setBadChannels(std::vector<DQMChannel> badChannels) { badChannels_ = badChannels; }
0118 
0119     QReport(QValue value) : qvalue_(value) {}
0120 
0121   private:
0122     QValue qvalue_;                        //< Pointer to the actual data.
0123     std::vector<DQMChannel> badChannels_;  //< Bad channels from QCriterion.
0124   };
0125 
0126   // These values are compatible to DQMNet, but DQMNet is not likely to exist
0127   // in the future.
0128   enum class Kind {
0129     INVALID = 0x0,
0130     INT = 0x1,
0131     REAL = 0x2,
0132     STRING = 0x3,
0133     TH1F = 0x10,
0134     TH1S = 0x11,
0135     TH1D = 0x12,
0136     TH1I = 0x13,
0137     TH2F = 0x20,
0138     TH2S = 0x21,
0139     TH2D = 0x22,
0140     TH2I = 0x23,
0141     TH3F = 0x30,
0142     TPROFILE = 0x40,
0143     TPROFILE2D = 0x41
0144   };
0145 
0146   // Which window of time the ME is supposed to cover.
0147   // There is space for a granularity level between runs and lumisections,
0148   // maybe blocks of 10LS or some fixed number of events or integrated
0149   // luminosity. We also want to be able to change the granularity centrally
0150   // depending on the use case. That is what the default is for, and it should
0151   // be used unless some specific granularity is really required.
0152   // We'll also need to switch the default to JOB for multi-run harvesting.
0153   enum Scope { JOB = 1, RUN = 2, LUMI = 3 /*, BLOCK = 4 */ };
0154 
0155   // The main ME data. We don't keep references/QTest results, instead we use
0156   // only the fields stored in DQMIO files.
0157   struct Value {
0158     Scalar scalar_;
0159     edm::propagate_const<std::unique_ptr<TH1>> object_;
0160     std::vector<QReport> qreports_;
0161   };
0162 
0163   struct Path {
0164   private:
0165     // We could use pointers to interned strings here to save some space.
0166     std::string dirname_;
0167     std::string objname_;
0168 
0169   public:
0170     enum class Type { DIR, DIR_AND_NAME };
0171 
0172     std::string const& getDirname() const { return dirname_; }
0173     std::string const& getObjectname() const { return objname_; }
0174     std::string getFullname() const { return dirname_ + objname_; }
0175 
0176     // Clean up the path and normalize it to preserve certain invariants.
0177     // Instead of reasoning about whatever properties of paths, we just parse
0178     // the thing and build a normalized instance with no slash in the beginning
0179     // and a slash in the end.
0180     // Type of string `path` could be just directory name, or
0181     // directory name followed by the name of the monitor element
0182     void set(std::string path, Path::Type type) {
0183       //rebuild 'path' to be in canonical form
0184 
0185       //remove any leading '/'
0186       while (not path.empty() and path.front() == '/') {
0187         path.erase(path.begin());
0188       }
0189 
0190       //handle '..' and '//'
0191       // the 'dir' tokens are separate by a single '/'
0192       std::string::size_type tokenStartPos = 0;
0193       while (tokenStartPos < path.size()) {
0194         auto tokenEndPos = path.find('/', tokenStartPos);
0195         if (tokenEndPos == std::string::npos) {
0196           tokenEndPos = path.size();
0197         }
0198         if (0 == tokenEndPos - tokenStartPos) {
0199           //we are sitting on a '/'
0200           path.erase(path.begin() + tokenStartPos);
0201           continue;
0202         } else if (2 == tokenEndPos - tokenStartPos) {
0203           if (path[tokenStartPos] == '.' and path[tokenStartPos + 1] == '.') {
0204             //need to go backwards and remove previous directory
0205             auto endOfLastToken = tokenStartPos;
0206             if (tokenStartPos > 1) {
0207               endOfLastToken -= 2;
0208             }
0209             auto startOfLastToken = path.rfind('/', endOfLastToken);
0210             if (startOfLastToken == std::string::npos) {
0211               //we are at the very beginning of 'path' since no '/' found
0212               path.erase(path.begin(), path.begin() + tokenEndPos);
0213               tokenStartPos = 0;
0214             } else {
0215               path.erase(path.begin() + startOfLastToken + 1, path.begin() + tokenEndPos);
0216               tokenStartPos = startOfLastToken + 1;
0217             }
0218             continue;
0219           }
0220         }
0221         tokenStartPos = tokenEndPos + 1;
0222       }
0223 
0224       //separate into objname_ and dirname_;
0225       objname_.clear();
0226       if (type == Path::Type::DIR) {
0227         if (not path.empty() and path.back() != '/') {
0228           path.append(1, '/');
0229         }
0230         dirname_ = std::move(path);
0231       } else {
0232         auto lastSlash = path.rfind('/');
0233         if (lastSlash == std::string::npos) {
0234           objname_ = std::move(path);
0235           dirname_.clear();
0236         } else {
0237           objname_ = path.substr(lastSlash + 1);
0238           path.erase(path.begin() + lastSlash + 1, path.end());
0239           dirname_ = std::move(path);
0240         }
0241       }
0242     }
0243 
0244     bool operator==(Path const& other) const {
0245       return this->dirname_ == other.dirname_ && this->objname_ == other.objname_;
0246     }
0247   };
0248 
0249   // Metadata about the ME. The range is included here in case we have e.g.
0250   // multiple per-lumi histograms in one collection. For a logical comparison,
0251   // one should look only at the name.
0252   struct Key {
0253     Path path_;
0254 
0255     // Run number (and optionally lumi number) that the ME belongs to.
0256     edm::LuminosityBlockID id_;
0257     Scope scope_;
0258     Kind kind_;
0259 
0260     bool operator<(Key const& other) const {
0261       auto makeKeyTuple = [](Key const& k) {
0262         return std::make_tuple(
0263             k.path_.getDirname(), k.path_.getObjectname(), k.scope_, k.id_.run(), k.id_.luminosityBlock());
0264       };
0265 
0266       return makeKeyTuple(*this) < makeKeyTuple(other);
0267     }
0268   };
0269 
0270   bool operator<(MonitorElementData const& other) const { return this->key_ < other.key_; }
0271 
0272   // The only non class/struct members
0273   Key key_;
0274   Value value_;
0275 };
0276 
0277 // For now, no additional (meta-)data is needed apart from the MEs themselves.
0278 // The framework will take care of tracking the plugin and LS/run that the MEs
0279 // belong to.
0280 // Unused for now.
0281 class MonitorElementCollection {
0282   std::vector<std::unique_ptr<const MonitorElementData>> data_;
0283 
0284 public:
0285   void push_back(std::unique_ptr<const MonitorElementData> value) {
0286     // enforce ordering
0287     assert(data_.empty() || data_[data_.size() - 1] <= value);
0288     data_.push_back(std::move(value));
0289   }
0290 
0291   void swap(MonitorElementCollection& other) { data_.swap(other.data_); }
0292 
0293   auto begin() const { return data_.begin(); }
0294 
0295   auto end() const { return data_.end(); }
0296 
0297   bool mergeProduct(MonitorElementCollection const& product) {
0298     // discussion: https://twiki.cern.ch/twiki/bin/view/CMSPublic/SWGuidePerRunAndPerLumiBlockData#Merging_Run_and_Luminosity_Block
0299     assert(!"Not implemented yet.");
0300     return false;
0301     // Things to decide:
0302     // - Should we allow merging collections of different sets of MEs? (probably not.) [0]
0303     // - Should we assume the MEs to be ordered? (probably yes.)
0304     // - How to handle incompatible MEs (different binning)? (fail hard.) [1]
0305     // - Can multiple MEs with same (dirname, objname) exist? (probably yes.) [2]
0306     // - Shall we modify the (immutable?) ROOT objects? (probably yes.)
0307     //
0308     // [0] Merging should increase the statistics, but not change the number of
0309     // MEs, at least with the current workflows. It might be convenient to
0310     // allow it, but for the beginning, it would only mask errors.
0311     // [1] The DQM framework should guarantee same booking parameters as long
0312     // as we stay within the Scope of the MEs.
0313     // [2] To implement e.g. MEs covering blocks of 10LS, we'd store them in a
0314     // run product, but have as many MEs with same name but different range as
0315     // needed to perserve the wanted granularity. Merging then can merge or
0316     // concatenate as possible/needed.
0317     // Problem: We need to keep copies in memory until the end of run, even
0318     // though we could save them to the output file as soon as it is clear that
0319     // the nexe LS will not fall into the same block. Instead, we could drop
0320     // them into the next lumi block we see; the semantics would be weird (the
0321     // MEs in the lumi block don't actually correspond to the lumi block they
0322     // are in) but the DQMIO output should be able to handle that.
0323   }
0324 };
0325 
0326 #endif