Back to home page

Project CMSSW displayed by LXR

 
 

    


File indexing completed on 2024-04-06 11:56:53

0001 /***********************************************************************************
0002  * Macro was updated for the purpose of TrackerAlignment group "All-In-One tool"
0003  * Reference to original macro by Samantha Hewamanage <samanthaATSPAMMENOTfnal.gov>: 
0004  * https://github.com/hkaushalya/haddws
0005  * Changes are mostly of informative character and dealing with inconsistent binning
0006  * @Author: Tomas Kello <tomas DOT kello AT cern.ch>
0007  ***********************************************************************************/
0008 
0009 #include "Riostream.h"
0010 #include "TChain.h"
0011 #include "TFile.h"
0012 #include "TH1.h"
0013 #include "TKey.h"
0014 #include "TProfile.h"
0015 #include "TTree.h"
0016 #include <cstdlib>
0017 #include <cstring>
0018 #include <sstream>
0019 #include <utility>
0020 #include <vector>
0021 
0022 using namespace std;
0023 
0024 TFile *Target;
0025 typedef vector<pair<TFile *, double> > vec_pair;
0026 typedef vector<pair<TFile *, double> >::const_iterator vec_pair_it;
0027 
0028 void MergeRootfile(TDirectory *target, const vector<pair<TFile *, double> > &vFileList);
0029 void loginfo(char log_type, std::string text);
0030 
0031 void loginfo(char log_type = 'i', std::string text = "") {
0032   //**************************************************************************************************************************
0033   //Logger:
0034   //    INFO    = Informative text
0035   //    WARNING = Notify user about unpredictable changes or missing files which do not result in abort
0036   //    ERROR   = Error in logic results in abort. Can be fixed by user (missing input, settings clash ...)
0037   //    FATAL   = Fatal error results in abort. Cannot be fixed by user (the way how input is produced has changed or bug ...)
0038   //**************************************************************************************************************************
0039 
0040   std::string source = "haddws:                    ";
0041   if (log_type == 'i') {
0042     std::cout << source << "[INFO]     " << text << std::endl;
0043   } else if (log_type == 'w') {
0044     std::cout << source << "[WARNING]  " << text << std::endl;
0045   } else if (log_type == 'e') {
0046     std::cout << source << "[ERROR]    " << text << std::endl;
0047   } else if (log_type == 'f') {
0048     std::cout << source << "[FATAL]    " << text << std::endl;
0049   }
0050   return;
0051 }
0052 
0053 /* entry point for commandline executable */
0054 int main(int argc, char *argv[]) {
0055   if (argc < 2) {
0056     cout << "\n======================== HADDWS USAGE INFO ===============================================" << endl;
0057     cout << "       ./haddws file1.root file2.root w1 w2\n" << endl;
0058     cout << "       w1, w2: are doubleing point numbers." << endl;
0059     cout << "          Each input file should have a corresponding weight (>0)." << endl;
0060     cout << "          A weight of 1 will leave those histograms/trees unchanged." << endl;
0061     cout << "       Files names and weights can be given in any order." << endl;
0062     cout << "          But the first weight found will be assigned to the first file listed," << endl;
0063     cout << "          second weight found will be assigned to the second file listed and so on." << endl;
0064     cout << "       Output file will be named results.root and will be recreated with subseqent" << endl;
0065     cout << "          executions." << endl;
0066     cout << "       Return value:" << endl;
0067     cout << "          0 in any error, 1 upon successful completion." << endl;
0068     cout << "======================== END USAGE INFO ==================================================\n" << endl;
0069     return 0;
0070   }
0071 
0072   vec_pair inputList;
0073   vector<string> inputFileNames;
0074   vector<double> inputWeights;
0075 
0076   try {
0077     for (int i = 1; i < argc; ++i) {
0078       //const char *argi = argv[i];
0079       string sargi(argv[i]);
0080 
0081       //check if this a number
0082       if (sargi.find_first_not_of("1234567890.") == string::npos) {
0083         const double w = atof(argv[i]);
0084         inputWeights.push_back(w);
0085       } else {
0086         inputFileNames.push_back(sargi);
0087       }
0088     }
0089   } catch (exception &e) {
0090     cout << e.what() << endl;
0091   }
0092 
0093   //do some basic sanity checks
0094   //if no weights are given, just add them. else use the weights.
0095   if (!inputWeights.empty()) {
0096     if (inputFileNames.size() != inputWeights.size()) {
0097       loginfo('e', "Every input root file must have a corresponding weight! please check!");
0098       return 0;
0099     }
0100   }
0101   //check if all files exists and readable
0102   for (vector<string>::const_iterator it = inputFileNames.begin(); it != inputFileNames.end(); ++it) {
0103     TFile *f = new TFile(it->c_str());
0104     if (f->IsZombie()) {
0105       cout << "File: " << (*it) << " not found or readable!" << endl;
0106       return 0;
0107     }
0108     f->Close();
0109     delete f;
0110   }
0111 
0112   vec_pair vFileList;
0113 
0114   for (unsigned i = 0; i < inputFileNames.size(); ++i) {
0115     double w = 1.0;
0116     if (!inputWeights.empty())
0117       w = inputWeights.at(i);
0118     vFileList.push_back(make_pair(TFile::Open(inputFileNames.at(i).c_str()), w));
0119 
0120     string msg("");
0121     msg += "File";
0122     if (!inputWeights.empty())
0123       msg += "/Weight";
0124     msg += " = ";
0125     msg += vFileList.at(i).first->GetName();
0126     if (!inputWeights.empty()) {
0127       stringstream swgt;
0128       swgt << " <- " << vFileList.at(i).second;
0129       msg += swgt.str();
0130     }
0131     loginfo('i', msg);
0132   }
0133 
0134   Target = TFile::Open("result.root", "RECREATE");
0135   MergeRootfile(Target, vFileList);
0136 
0137   //now cleanup. This makes valgrind happy :)
0138   Target->Close();
0139   for (vec_pair_it it = vFileList.begin(); it != vFileList.end(); ++it) {
0140     delete (it->first);
0141   }
0142 
0143   return 1;
0144 }
0145 
0146 void haddws() {
0147   /**********************************************************
0148    * in an interactive ROOT session, edit the file names, 
0149     * corresponding weights, and target name. Then
0150    * root:> .x haddws.C+
0151     **********************************************************/
0152 
0153   Target = TFile::Open("result.root", "RECREATE");
0154 
0155   vec_pair vFileList;
0156   vFileList.push_back(make_pair(TFile::Open("simple1.root"), 1.0));
0157   vFileList.push_back(make_pair(TFile::Open("simple2.root"), 0.5));
0158 
0159   for (vec_pair_it it = vFileList.begin(); it != vFileList.end(); ++it) {
0160     cout << "File/weight = " << it->first->GetName() << "/" << it->second << endl;
0161   }
0162 
0163   MergeRootfile(Target, vFileList);
0164   Target->Close();
0165 
0166   for (vec_pair_it it = vFileList.begin(); it != vFileList.end(); ++it) {
0167     delete (it->first);
0168   }
0169 }
0170 
0171 void MergeRootfile(TDirectory *target, const vector<pair<TFile *, double> > &vFileList) {
0172   //cout << "Target path: " << target->GetPath() << endl;
0173   TString path((char *)strstr(target->GetPath(), ":"));
0174   path.Remove(0, 2);
0175 
0176   vec_pair_it it = vFileList.begin();
0177   TFile *first_source = (*it).first;
0178   const double first_weight = (*it).second;
0179   first_source->cd(path);
0180   TDirectory *current_sourcedir = gDirectory;
0181   //gain time, do not add the objects in the list in memory
0182   Bool_t status = TH1::AddDirectoryStatus();
0183   TH1::AddDirectory(kFALSE);
0184 
0185   // loop over all keys in this directory
0186   TChain *globChain = nullptr;
0187   TIter nextkey(current_sourcedir->GetListOfKeys());
0188   TKey *key, *oldkey = nullptr;
0189   while ((key = (TKey *)nextkey())) {
0190     //keep only the highest cycle number for each key
0191     if (oldkey && !strcmp(oldkey->GetName(), key->GetName()))
0192       continue;
0193 
0194     // read object from first source file
0195     first_source->cd(path);
0196     TObject *obj = key->ReadObj();
0197 
0198     if (obj->IsA()->InheritsFrom(TH1::Class())) {
0199       // descendant of TH1 -> merge it
0200 
0201       //cout << "Merging histogram " << obj->GetName() << endl;
0202       TH1 *h1 = (TH1 *)obj;
0203       if (first_weight > 0) {
0204         h1->Scale(first_weight);
0205       }
0206 
0207       // loop over all source files and add the content of the
0208       // correspondant histogram to the one pointed to by "h1"
0209       for (vec_pair_it nextsrc = vFileList.begin() + 1; nextsrc != vFileList.end(); ++nextsrc) {
0210         // make sure we are at the correct directory level by cd'ing to path
0211         (*nextsrc).first->cd(path);
0212         const double next_weight = (*nextsrc).second;
0213         TKey *key2 = (TKey *)gDirectory->GetListOfKeys()->FindObject(h1->GetName());
0214 
0215         //check for the same number of bins and same bin labels, then scale and add
0216         if (key2) {
0217           TH1 *h2 = (TH1 *)key2->ReadObj();
0218           if (next_weight > 0) {
0219             h2->Scale(next_weight);
0220           }
0221           if (h1->GetNbinsX() == h2->GetNbinsX()) {
0222             h1->Add(h2);
0223           } else {
0224             loginfo('w',
0225                     "Inconsistent binning detected:. Merge will possibly fail for histogram: " +
0226                         std::string(h2->GetName()));
0227             h1->Add(h2);
0228           }
0229           delete h2;
0230         }
0231       }
0232 
0233     } else if (obj->IsA()->InheritsFrom(TTree::Class())) {
0234       // loop over all source files create a chain of Trees "globChain"
0235       const char *obj_name = obj->GetName();
0236 
0237       globChain = new TChain(obj_name);
0238       globChain->Add(first_source->GetName());
0239       for (vec_pair_it nextsrc = vFileList.begin() + 1; nextsrc != vFileList.end(); ++nextsrc) {
0240         globChain->Add(nextsrc->first->GetName());
0241       }
0242 
0243     } else if (obj->IsA()->InheritsFrom(TDirectory::Class())) {
0244       // it's a subdirectory
0245       loginfo('i', "Found subdirectory " + std::string(obj->GetName()));
0246 
0247       // create a new subdir of same name and title in the target file
0248       target->cd();
0249       TDirectory *newdir = target->mkdir(obj->GetName(), obj->GetTitle());
0250 
0251       // newdir is now the starting point of another round of merging
0252       // newdir still knows its depth within the target file via
0253       // GetPath(), so we can still figure out where we are in the recursion
0254       MergeRootfile(newdir, vFileList);
0255 
0256     } else {
0257       // object is of no type that we know or can handle
0258       cout << "Unknown object type, name: " << obj->GetName() << " title: " << obj->GetTitle() << endl;
0259     }
0260 
0261     // now write the merged histogram (which is "in" obj) to the target file
0262     // note that this will just store obj in the current directory level,
0263     // which is not persistent until the complete directory itself is stored
0264     // by "target->Write()" below
0265     if (obj) {
0266       target->cd();
0267 
0268       //!!if the object is a tree, it is stored in globChain...
0269       if (obj->IsA()->InheritsFrom(TTree::Class()))
0270         globChain->Merge(target->GetFile(), 0, "keep");
0271       else
0272         obj->Write(key->GetName());
0273     }
0274 
0275   }  // while ( ( TKey *key = (TKey*)nextkey() ) )
0276 
0277   // save modifications to target file
0278   target->SaveSelf(kTRUE);
0279   TH1::AddDirectory(status);
0280 }