Back to home page

Project CMSSW displayed by LXR

 
 

    


File indexing completed on 2024-04-06 11:56:36

0001 #!/usr/bin/env python3
0002 
0003 # Original author: Joerg Behr
0004 # Translation from Perl to Python: Gregor Mittag
0005 #
0006 # This script reads the histogram file produced by Pede and it extracts the plot
0007 # showing the average chi2/ndf per Mille binary number.  After reading the MPS
0008 # database, for which the file name has to be provided, an output file called
0009 # chi2pedehis.txt is produced where the first column corresponds to the
0010 # associated name, the second column corresponds to the Mille binary number, and
0011 # the last column is equal to <chi2/ndf>. As further argument this scripts
0012 # expects the file name of the Pede histogram file -- usually millepede.his. The
0013 # last required argument represents the location of the Python config which was
0014 # used by CMSSW.
0015 #
0016 # Use createChi2ndfplot.C to plot the output of this script.
0017 
0018 from __future__ import print_function
0019 import os
0020 import sys
0021 import re
0022 import argparse
0023 
0024 import Alignment.MillePedeAlignmentAlgorithm.mpslib.tools as mps_tools
0025 import Alignment.MillePedeAlignmentAlgorithm.mpslib.Mpslibclass as mpslib
0026 
0027 
0028 ################################################################################
0029 def main(argv = None):
0030     """Main routine of the script.
0031 
0032     Arguments:
0033     - `argv`: arguments passed to the main routine
0034     """
0035 
0036     if argv == None:
0037         argv = sys.argv[1:]
0038 
0039     parser = argparse.ArgumentParser(description="Analysis pede histogram file")
0040     parser.add_argument("-d", "--mps-db", dest="mps_db", required=True,
0041                         metavar="PATH", help="MPS database file ('mps.db')")
0042     parser.add_argument("--his", dest="his_file", required=True,
0043                         metavar="PATH", help="pede histogram file")
0044     parser.add_argument("-c", "--cfg", dest="cfg", metavar="PATH", required=True,
0045                         help="python configuration file of pede job")
0046     parser.add_argument("-b", "--no-binary-check", dest="no_binary_check",
0047                         default=False, action="store_true",
0048                         help=("skip check for existing binaries "
0049                               "(possibly needed if used interactively)"))
0050     args = parser.parse_args(argv)
0051 
0052 
0053     for input_file in (args.mps_db, args.his_file, args.cfg):
0054         if not os.path.exists(input_file):
0055             print("Could not find input file:", input_file)
0056             sys.exit(1)
0057 
0058     ids, names = get_all_ids_names(args.mps_db)
0059     used_binaries = get_used_binaries(args.cfg, args.no_binary_check)
0060     his_data = get_his_data(args.his_file)
0061 
0062     if len(his_data) != len(used_binaries):
0063         print("The number of used binaries is", len(used_binaries), end=' ')
0064         print("whereas in contrast, however, the <chi2/ndf> histogram in Pede has", end=' ')
0065         print(len(his_data), "bins (Pede version >= rev92 might help if #bins < #binaries).", end=' ')
0066         print("Exiting.")
0067         sys.exit(1)
0068 
0069     with open("chi2pedehis.txt", "w") as f:
0070         for i, b in enumerate(used_binaries):
0071             index = ids.index(b)
0072             name = names[index]
0073             f.write(" ".join([name, "{:03d}".format(b), his_data[i]])+"\n")
0074 
0075 
0076 ################################################################################
0077 def get_all_ids_names(mps_db):
0078     """Returns two lists containing the mille job IDs and the associated names.
0079     
0080     Arguments:
0081     - `mps_db`: path to the MPS database file
0082     """
0083 
0084     lib = mpslib.jobdatabase()
0085     lib.read_db(mps_db)
0086 
0087     ids = lib.JOBNUMBER[:lib.nJobs]
0088     names = lib.JOBSP3[:lib.nJobs]
0089 
0090     return ids, names
0091 
0092 
0093 def get_used_binaries(cfg, no_binary_check):
0094     """Returns list of used binary IDs.
0095     
0096     Arguments:
0097     - `cfg`: python config used to run the pede job
0098     - `no_binary_check`: if 'True' a check for file existence is skipped
0099     """
0100 
0101     cms_process = mps_tools.get_process_object(cfg)
0102 
0103     binaries = cms_process.AlignmentProducer.algoConfig.mergeBinaryFiles
0104     if no_binary_check:
0105         used_binaries = binaries
0106     else:
0107         # following check works only if 'args.cfg' was run from the same directory:
0108         used_binaries = [b for b in binaries
0109                          if os.path.exists(os.path.join(os.path.dirname(cfg), b))]
0110 
0111     used_binaries = [int(re.sub(r"milleBinary(\d+)\.dat", r"\1", b))
0112                      for b in used_binaries]
0113 
0114     return used_binaries
0115 
0116 
0117 def get_his_data(his_file):
0118     """Parse the pede histogram file.
0119     
0120     Arguments:
0121     - `his_file`: pede histogram file
0122     """
0123     
0124     his_data = []
0125     with open(his_file, "r") as his:
0126         found_chi2_start = False;
0127         
0128         for line in his:
0129             if r"final <Chi^2/Ndf> from accepted local fits vs file number" in line:
0130                 found_chi2_start = True
0131             if not found_chi2_start:
0132                 continue
0133             else:
0134                 if r"end of xy-data" in line: break
0135                 if not re.search("\d", line): continue
0136                 if re.search(r"[a-z]", line): continue
0137                 splitted = line.split()
0138                 his_data.append(splitted[-1])
0139 
0140     return his_data
0141 
0142     
0143 ################################################################################
0144 if __name__ == "__main__":
0145     main()