Back to home page

Project CMSSW displayed by LXR

 
 

    


File indexing completed on 2025-06-12 23:30:07

0001 #include "lst.h"
0002 #include "LSTPrepareInput.h"
0003 
0004 #include <typeinfo>
0005 
0006 using LSTEvent = ALPAKA_ACCELERATOR_NAMESPACE::lst::LSTEvent;
0007 using LSTInputDeviceCollection = ALPAKA_ACCELERATOR_NAMESPACE::lst::LSTInputDeviceCollection;
0008 using namespace ::lst;
0009 
0010 //___________________________________________________________________________________________________________________________________________________________________________________________
0011 int main(int argc, char **argv) {
0012   //********************************************************************************
0013   //
0014   // 0. Preliminary operations
0015   //
0016   //********************************************************************************
0017 
0018   // Checking the TRACKLOOPERDIR is set
0019   ana.track_looper_dir_path = gSystem->Getenv("TRACKLOOPERDIR");
0020   if (ana.track_looper_dir_path.IsNull()) {
0021     RooUtil::error(
0022         "TRACKLOOPERDIR is not set! Did you run $ source setup.sh from TrackLooper/ main repository directory?");
0023   }
0024   RooUtil::print(TString::Format("TRACKLOOPERDIR=%s", ana.track_looper_dir_path.Data()));
0025 
0026   // Write the command line used to run it
0027   // N.B. This needs to be before the argument parsing as it will change some values
0028   std::vector<std::string> allArgs(argv, argv + argc);
0029   ana.full_cmd_line = "";
0030   for (auto &str : allArgs) {
0031     ana.full_cmd_line += TString::Format(" %s", str.c_str());
0032   }
0033 
0034   //********************************************************************************
0035   //
0036   // 1. Parsing options
0037   //
0038   //********************************************************************************
0039 
0040   // cxxopts is just a tool to parse argc, and argv easily
0041 
0042   // Grand option setting
0043   cxxopts::Options options("\n  $ lst",
0044                            "\n         **********************\n         *                    *\n         *       "
0045                            "Looper       *\n         *                    *\n         **********************\n");
0046 
0047   // Read the options
0048   options.add_options()("m,mode", "Run mode (NOT DEFINED)", cxxopts::value<int>()->default_value("5"))(
0049       "i,input",
0050       "Comma separated input file list OR if just a directory is provided it will glob all in the directory BUT must "
0051       "end with '/' for the path",
0052       cxxopts::value<std::string>()->default_value("muonGun"))(
0053       "t,tree",
0054       "Name of the tree in the root file to open and loop over",
0055       cxxopts::value<std::string>()->default_value("trackingNtuple/tree"))(
0056       "o,output", "Output file name", cxxopts::value<std::string>())(
0057       "N,nmatch", "N match for MTV-like matching", cxxopts::value<int>()->default_value("9"))(
0058       "p,ptCut", "Min pT cut In GeV", cxxopts::value<float>()->default_value("0.8"))(
0059       "n,nevents", "N events to loop over", cxxopts::value<int>()->default_value("-1"))(
0060       "x,event_index", "specific event index to process", cxxopts::value<int>()->default_value("-1"))(
0061       "g,pdg_id", "The simhit pdgId match option", cxxopts::value<int>()->default_value("0"))(
0062       "v,verbose",
0063       "Verbose mode (0: no print, 1: only final timing, 2: object multiplitcity",
0064       cxxopts::value<int>()->default_value("0"))(
0065       "w,write_ntuple", "Write Ntuple", cxxopts::value<int>()->default_value("1"))(
0066       "s,streams", "Set number of streams", cxxopts::value<int>()->default_value("1"))(
0067       "d,debug", "Run debug job. i.e. overrides output option to 'debug.root' and 'recreate's the file.")(
0068       "l,lower_level", "write lower level objects ntuple results")("G,gnn_ntuple", "write gnn input variable ntuple")(
0069       "j,nsplit_jobs", "Enable splitting jobs by N blocks (--job_index must be set)", cxxopts::value<int>())(
0070       "I,job_index",
0071       "job_index of split jobs (--nsplit_jobs must be set. index starts from 0. i.e. 0, 1, 2, 3, etc...)",
0072       cxxopts::value<int>())("3,tc_pls_triplets", "Allow triplet pLSs in TC collection")(
0073       "2,no_pls_dupclean", "Disable pLS duplicate cleaning (both steps)")("h,help", "Print help");
0074 
0075   auto result = options.parse(argc, argv);
0076 
0077   // NOTE: When an option was provided (e.g. -i or --input), then the result.count("<option name>") is more than 0
0078   // Therefore, the option can be parsed easily by asking the condition if (result.count("<option name>");
0079   // That's how the several options are parsed below
0080 
0081   //_______________________________________________________________________________
0082   // --help
0083   if (result.count("help")) {
0084     std::cout << options.help() << std::endl;
0085     exit(1);
0086   }
0087 
0088   //_______________________________________________________________________________
0089   // --input
0090   ana.input_raw_string = result["input"].as<std::string>();
0091 
0092   // A default value one
0093   TString TrackingNtupleDir = gSystem->Getenv("TRACKINGNTUPLEDIR");
0094   if (ana.input_raw_string.EqualTo("muonGun"))
0095     ana.input_file_list_tstring = TString::Format("%s/trackingNtuple_10mu_pt_0p5_2.root", TrackingNtupleDir.Data());
0096   else if (ana.input_raw_string.EqualTo("muonGun_highPt"))
0097     ana.input_file_list_tstring = TString::Format("%s/trackingNtuple_10mu_pt_0p5_50.root", TrackingNtupleDir.Data());
0098   else if (ana.input_raw_string.EqualTo("pionGun"))
0099     ana.input_file_list_tstring =
0100         TString::Format("%s/trackingNtuple_6pion_1k_pt_0p5_50.root", TrackingNtupleDir.Data());
0101   else if (ana.input_raw_string.EqualTo("PU200"))
0102     ana.input_file_list_tstring = TString::Format("%s/trackingNtuple_ttbar_PU200.root", TrackingNtupleDir.Data());
0103   else if (ana.input_raw_string.EqualTo("PU200RelVal"))
0104     ana.input_file_list_tstring = TString::Format(
0105         "%s/RelValTTbar_14TeV_CMSSW_12_5_0_pre3/",
0106         (TrackingNtupleDir.Replace(31, 1, "5").Replace(38, 1, "3"))
0107             .Data());  // RelVal files under CMSSW_12_5_0_pre3 directory, not CMSSW_12_2_0_pre2 as is the case for the rest of the samples
0108   else if (ana.input_raw_string.EqualTo("cube5"))
0109     ana.input_file_list_tstring =
0110         TString::Format("%s/trackingNtuple_10mu_10k_pt_0p5_2_5cm_cube.root", TrackingNtupleDir.Data());
0111   else if (ana.input_raw_string.EqualTo("cube5_highPt"))
0112     ana.input_file_list_tstring =
0113         TString::Format("%s/trackingNtuple_10mu_10k_pt_0p5_50_5cm_cube.root", TrackingNtupleDir.Data());
0114   else if (ana.input_raw_string.EqualTo("cube50"))
0115     ana.input_file_list_tstring =
0116         TString::Format("%s/trackingNtuple_10mu_10k_pt_0p5_2_50cm_cube.root", TrackingNtupleDir.Data());
0117   else if (ana.input_raw_string.EqualTo("cube50_highPt"))
0118     ana.input_file_list_tstring =
0119         TString::Format("%s/trackingNtuple_10mu_10k_pt_0p5_50_50cm_cube.root", TrackingNtupleDir.Data());
0120   else {
0121     ana.input_file_list_tstring = ana.input_raw_string;
0122   }
0123 
0124   //_______________________________________________________________________________
0125   // --tree
0126   ana.input_tree_name = result["tree"].as<std::string>();
0127 
0128   //_______________________________________________________________________________
0129   // --debug
0130   if (result.count("debug")) {
0131     ana.output_tfile = new TFile("debug.root", "recreate");
0132   } else {
0133     //_______________________________________________________________________________
0134     // --output
0135     if (result.count("output")) {
0136       ana.output_tfile = new TFile(result["output"].as<std::string>().c_str(), "create");
0137       if (not ana.output_tfile->IsOpen()) {
0138         std::cout << options.help() << std::endl;
0139         std::cout << "ERROR: output already exists! provide new output name or delete old file. OUTPUTFILE="
0140                   << result["output"].as<std::string>() << std::endl;
0141         exit(1);
0142       }
0143     } else {
0144       std::cout
0145           << "Warning: Output file name is not provided! Check your arguments. Output file will be set to 'debug.root'"
0146           << std::endl;
0147       ana.output_tfile = new TFile("debug.root", "recreate");
0148     }
0149   }
0150 
0151   //_______________________________________________________________________________
0152   // --ptCut
0153   ana.ptCut = result["ptCut"].as<float>();
0154 
0155   //_______________________________________________________________________________
0156   // --nmatch
0157   ana.nmatch_threshold = result["nmatch"].as<int>();
0158 
0159   //_______________________________________________________________________________
0160   // --nevents
0161   ana.n_events = result["nevents"].as<int>();
0162   ana.specific_event_index = result["event_index"].as<int>();
0163 
0164   //_______________________________________________________________________________
0165   // --pdg_id
0166   ana.pdg_id = result["pdg_id"].as<int>();
0167 
0168   //_______________________________________________________________________________
0169   // --nsplit_jobs
0170   if (result.count("nsplit_jobs")) {
0171     ana.nsplit_jobs = result["nsplit_jobs"].as<int>();
0172     if (ana.nsplit_jobs <= 0) {
0173       std::cout << options.help() << std::endl;
0174       std::cout << "ERROR: option string --nsplit_jobs" << ana.nsplit_jobs << " has zero or negative value!"
0175                 << std::endl;
0176       std::cout << "I am not sure what this means..." << std::endl;
0177       exit(1);
0178     }
0179   } else {
0180     ana.nsplit_jobs = -1;
0181   }
0182 
0183   //_______________________________________________________________________________
0184   // --job_index
0185   if (result.count("job_index")) {
0186     ana.job_index = result["job_index"].as<int>();
0187     if (ana.job_index < 0) {
0188       std::cout << options.help() << std::endl;
0189       std::cout << "ERROR: option string --job_index" << ana.job_index << " has negative value!" << std::endl;
0190       std::cout << "I am not sure what this means..." << std::endl;
0191       exit(1);
0192     }
0193   } else {
0194     ana.job_index = -1;
0195   }
0196 
0197   // Sanity check for split jobs (if one is set the other must be set too)
0198   if (result.count("job_index") or result.count("nsplit_jobs")) {
0199     // If one is not provided then throw error
0200     if (not(result.count("job_index") and result.count("nsplit_jobs"))) {
0201       std::cout << options.help() << std::endl;
0202       std::cout << "ERROR: option string --job_index and --nsplit_jobs must be set at the same time!" << std::endl;
0203       exit(1);
0204     }
0205     // If it is set then check for sanity
0206     else {
0207       if (ana.job_index >= ana.nsplit_jobs) {
0208         std::cout << options.help() << std::endl;
0209         std::cout << "ERROR: --job_index >= --nsplit_jobs ! This does not make sense..." << std::endl;
0210         exit(1);
0211       }
0212     }
0213   }
0214 
0215   //_______________________________________________________________________________
0216   // --verbose
0217   ana.verbose = result["verbose"].as<int>();
0218 
0219   //_______________________________________________________________________________
0220   // --streams
0221   ana.streams = result["streams"].as<int>();
0222 
0223   //_______________________________________________________________________________
0224   // --mode
0225   ana.mode = result["mode"].as<int>();
0226 
0227   //_______________________________________________________________________________
0228   // --write_ntuple
0229   ana.do_write_ntuple = result["write_ntuple"].as<int>();
0230 
0231   //_______________________________________________________________________________
0232   // --optimization
0233 
0234   //_______________________________________________________________________________
0235   // --lower_level
0236   if (result.count("lower_level")) {
0237     ana.do_lower_level = true;
0238   } else {
0239     ana.do_lower_level = false;
0240   }
0241 
0242   //_______________________________________________________________________________
0243   // --gnn_ntuple
0244   if (result.count("gnn_ntuple")) {
0245     ana.gnn_ntuple = true;
0246     // If one is not provided then throw error
0247     if (not ana.do_write_ntuple) {
0248       std::cout << options.help() << std::endl;
0249       std::cout << "ERROR: option string --write_ntuple 1 and --gnn_ntuple must be set at the same time!" << std::endl;
0250       exit(1);
0251     }
0252   } else {
0253     ana.gnn_ntuple = false;
0254   }
0255 
0256   //_______________________________________________________________________________
0257   // --tc_pls_triplets
0258   ana.tc_pls_triplets = result["tc_pls_triplets"].as<bool>();
0259 
0260   //_______________________________________________________________________________
0261   // --no_pls_dupclean
0262   ana.no_pls_dupclean = result["no_pls_dupclean"].as<bool>();
0263 
0264   // Printing out the option settings overview
0265   std::cout << "=========================================================" << std::endl;
0266   std::cout << " Running for Acc = " << alpaka::getAccName<ALPAKA_ACCELERATOR_NAMESPACE::Acc3D>() << std::endl;
0267   std::cout << " Setting of the analysis job based on provided arguments " << std::endl;
0268   std::cout << "---------------------------------------------------------" << std::endl;
0269   std::cout << " ana.input_file_list_tstring: " << ana.input_file_list_tstring << std::endl;
0270   std::cout << " ana.output_tfile: " << ana.output_tfile->GetName() << std::endl;
0271   std::cout << " ana.n_events: " << ana.n_events << std::endl;
0272   std::cout << " ana.nsplit_jobs: " << ana.nsplit_jobs << std::endl;
0273   std::cout << " ana.job_index: " << ana.job_index << std::endl;
0274   std::cout << " ana.specific_event_index: " << ana.specific_event_index << std::endl;
0275   std::cout << " ana.do_write_ntuple: " << ana.do_write_ntuple << std::endl;
0276   std::cout << " ana.mode: " << ana.mode << std::endl;
0277   std::cout << " ana.streams: " << ana.streams << std::endl;
0278   std::cout << " ana.verbose: " << ana.verbose << std::endl;
0279   std::cout << " ana.nmatch_threshold: " << ana.nmatch_threshold << std::endl;
0280   std::cout << " ana.tc_pls_triplets: " << ana.tc_pls_triplets << std::endl;
0281   std::cout << " ana.no_pls_dupclean: " << ana.no_pls_dupclean << std::endl;
0282   std::cout << "=========================================================" << std::endl;
0283 
0284   // Create the TChain that holds the TTree's of the baby ntuples
0285   ana.events_tchain = RooUtil::FileUtil::createTChain(ana.input_tree_name, ana.input_file_list_tstring);
0286   ana.looper.init(ana.events_tchain, &trk, ana.n_events);
0287   ana.looper.setSilent();
0288 
0289   // Set the cutflow object output file
0290   ana.cutflow.setTFile(ana.output_tfile);
0291 
0292   // Create ttree to output to the ana.output_tfile
0293   ana.output_ttree = new TTree("tree", "tree");
0294 
0295   // Create TTreeX instance that will take care of the interface part of TTree
0296   ana.tx = new RooUtil::TTreeX(ana.output_ttree);
0297 
0298   // Write metadata related to this run
0299   writeMetaData();
0300 
0301   // Run the code
0302   run_lst();
0303 
0304   return 0;
0305 }
0306 
0307 //___________________________________________________________________________________________________________________________________________________________________________________________
0308 void run_lst() {
0309   ALPAKA_ACCELERATOR_NAMESPACE::Device devAcc = alpaka::getDevByIdx(ALPAKA_ACCELERATOR_NAMESPACE::Platform{}, 0u);
0310   std::vector<ALPAKA_ACCELERATOR_NAMESPACE::Queue> queues;
0311   for (int s = 0; s < ana.streams; s++) {
0312     queues.push_back(ALPAKA_ACCELERATOR_NAMESPACE::Queue(devAcc));
0313   }
0314 
0315   // Load various maps used in the lst reconstruction
0316   TStopwatch full_timer;
0317   full_timer.Start();
0318   // Determine which maps to use based on given pt cut for standalone.
0319   std::string ptCutString = (ana.ptCut >= 0.8) ? "0.8" : "0.6";
0320   auto hostESData = lst::loadAndFillESHost(ptCutString);
0321   auto deviceESData =
0322       cms::alpakatools::CopyToDevice<lst::LSTESData<alpaka_common::DevHost>>::copyAsync(queues[0], *hostESData.get());
0323   float timeForMapLoading = full_timer.RealTime() * 1000;
0324 
0325   if (ana.do_write_ntuple) {
0326     createOutputBranches();
0327     if (ana.gnn_ntuple) {
0328       createGnnNtupleBranches();
0329     }
0330   }
0331 
0332   std::vector<LSTInputHostCollection> out_lstInputHC;
0333   std::vector<int> evt_num;
0334   std::vector<TString> file_name;
0335 
0336   // Looping input file
0337   full_timer.Reset();
0338   full_timer.Start();
0339   while (ana.looper.nextEvent()) {
0340     if (ana.verbose >= 1)
0341       std::cout << "PreLoading event number = " << ana.looper.getCurrentEventIndex() << std::endl;
0342 
0343     if (not goodEvent())
0344       continue;
0345 
0346     auto lstInputHC = prepareInput(trk.getVF("see_px"),
0347                                    trk.getVF("see_py"),
0348                                    trk.getVF("see_pz"),
0349                                    trk.getVF("see_dxy"),
0350                                    trk.getVF("see_dz"),
0351                                    trk.getVF("see_ptErr"),
0352                                    trk.getVF("see_etaErr"),
0353                                    trk.getVF("see_stateTrajGlbX"),
0354                                    trk.getVF("see_stateTrajGlbY"),
0355                                    trk.getVF("see_stateTrajGlbZ"),
0356                                    trk.getVF("see_stateTrajGlbPx"),
0357                                    trk.getVF("see_stateTrajGlbPy"),
0358                                    trk.getVF("see_stateTrajGlbPz"),
0359                                    trk.getVI("see_q"),
0360                                    trk.getVVI("see_hitIdx"),
0361                                    trk.getVU("see_algo"),
0362                                    trk.getVU("ph2_detId"),
0363                                    trk.getVF("ph2_x"),
0364                                    trk.getVF("ph2_y"),
0365                                    trk.getVF("ph2_z"),
0366                                    ana.ptCut,
0367                                    queues[0]);
0368 
0369     out_lstInputHC.push_back(std::move(lstInputHC));
0370 
0371     evt_num.push_back(ana.looper.getCurrentEventIndex());
0372     file_name.push_back(ana.looper.getCurrentFileName());
0373   }
0374   float timeForInputLoading = full_timer.RealTime() * 1000;
0375 
0376   full_timer.Reset();
0377   full_timer.Start();
0378   std::vector<LSTEvent *> events;
0379   std::vector<ALPAKA_ACCELERATOR_NAMESPACE::Queue *> event_queues;
0380   for (int s = 0; s < ana.streams; s++) {
0381     LSTEvent *event = new LSTEvent(ana.verbose >= 2, ana.ptCut, queues[s], &deviceESData);
0382     events.push_back(event);
0383     event_queues.push_back(&queues[s]);
0384   }
0385   float timeForEventCreation = full_timer.RealTime() * 1000;
0386 
0387   std::vector<std::vector<float>> timevec;
0388   full_timer.Reset();
0389   full_timer.Start();
0390   float full_elapsed = 0;
0391 #pragma omp parallel num_threads(ana.streams)  // private(event)
0392   {
0393     std::vector<std::vector<float>> timing_information;
0394     float timing_input_loading;
0395     float timing_MD;
0396     float timing_LS;
0397     float timing_T3;
0398     float timing_T5;
0399     float timing_pLS;
0400     float timing_pT5;
0401     float timing_pT3;
0402     float timing_TC;
0403 
0404 #pragma omp for  // nowait// private(event)
0405     for (int evt = 0; evt < static_cast<int>(out_lstInputHC.size()); evt++) {
0406       if (ana.verbose >= 1)
0407         std::cout << "Running Event number = " << evt << " " << omp_get_thread_num() << std::endl;
0408 
0409       events.at(omp_get_thread_num())->initSync();
0410 
0411       // We need to initialize it here so that it stays in scope
0412       auto &queue = *event_queues.at(omp_get_thread_num());
0413       LSTInputDeviceCollection lstInputDC(out_lstInputHC.at(evt).sizes(), queue);
0414 
0415       timing_input_loading =
0416           addInputsToEventPreLoad(events.at(omp_get_thread_num()), &out_lstInputHC.at(evt), &lstInputDC, queue);
0417 
0418       timing_MD = runMiniDoublet(events.at(omp_get_thread_num()), evt);
0419       timing_LS = runSegment(events.at(omp_get_thread_num()));
0420       timing_T3 = runT3(events.at(omp_get_thread_num()));
0421       timing_T5 = runQuintuplet(events.at(omp_get_thread_num()));
0422 
0423       timing_pLS = runPixelLineSegment(events.at(omp_get_thread_num()), ana.no_pls_dupclean);
0424 
0425       timing_pT5 = runPixelQuintuplet(events.at(omp_get_thread_num()));
0426       timing_pT3 = runpT3(events.at(omp_get_thread_num()));
0427       timing_TC = runTrackCandidate(events.at(omp_get_thread_num()), ana.no_pls_dupclean, ana.tc_pls_triplets);
0428 
0429       if (ana.verbose == 4) {
0430 #pragma omp critical
0431         {
0432           // TODO BROKEN //
0433           // printAllObjects(events.at(omp_get_thread_num()));
0434         }
0435       }
0436 
0437       if (ana.verbose == 5) {
0438 #pragma omp critical
0439         {
0440           // TODO: debugPrintOutlierMultiplicities
0441         }
0442       }
0443 
0444       if (ana.do_write_ntuple) {
0445 #pragma omp critical
0446         {
0447           unsigned int trkev = evt_num.at(evt);
0448           TString fname = file_name.at(evt);
0449           TFile *f = TFile::Open(fname.Data(), "open");
0450           TTree *t = (TTree *)f->Get(ana.input_tree_name.Data());
0451           trk.Init(t);
0452           trk.GetEntry(trkev);
0453           fillOutputBranches(events.at(omp_get_thread_num()));
0454           f->Close();
0455         }
0456       }
0457 
0458       // Clear this event
0459       TStopwatch my_timer;
0460       my_timer.Start();
0461       events.at(omp_get_thread_num())->resetEventSync();
0462       float timing_resetEvent = my_timer.RealTime();
0463 
0464       timing_information.push_back({timing_input_loading,
0465                                     timing_MD,
0466                                     timing_LS,
0467                                     timing_T3,
0468                                     timing_T5,
0469                                     timing_pLS,
0470                                     timing_pT5,
0471                                     timing_pT3,
0472                                     timing_TC,
0473                                     timing_resetEvent});
0474     }
0475 
0476     full_elapsed =
0477         full_timer.RealTime() *
0478         1000.f;  // for loop has implicit barrier I think. So this stops onces all cpu threads have finished but before the next critical section.
0479 #pragma omp critical
0480     timevec.insert(timevec.end(), timing_information.begin(), timing_information.end());
0481   }
0482 
0483   float avg_elapsed = full_elapsed / out_lstInputHC.size();
0484 
0485   std::cout << "Time for map loading = " << timeForMapLoading << " ms\n";
0486   std::cout << "Time for input loading = " << timeForInputLoading << " ms\n";
0487   std::cout << "Time for event creation = " << timeForEventCreation << " ms\n";
0488   printTimingInformation(timevec, full_elapsed, avg_elapsed);
0489 
0490   if (ana.do_write_ntuple) {
0491     // Writing ttree output to file
0492     ana.output_tfile->cd();
0493     ana.cutflow.saveOutput();
0494     ana.output_ttree->Write();
0495   }
0496 
0497   for (int s = 0; s < ana.streams; s++) {
0498     delete events.at(s);
0499   }
0500 
0501   delete ana.output_tfile;
0502 }