File indexing completed on 2024-11-25 02:29:03
0001
0002
0003 """ Print the total number of events processed by the mille jobs per dataset
0004
0005 The information is taken from the `mps.db' file. Will group entries of the
0006 same dataset and also datasets the script *thinks* belong to the same
0007 data type, e.g. 0T cosmics. This is implemented very simple and should
0008 always be checked by the user.
0009
0010 Usage:
0011
0012 `python mps_list_evts.py <mps.db file name>' or, after `scram b'
0013 `mps_list_evts.py <mps.db file name>'
0014
0015 M. Schroeder, DESY Hamburg 26-May-2014
0016 """
0017
0018 import sys
0019
0020
0021 mps_db = "mps.db"
0022
0023
0024 def get_mille_lines():
0025 """ Return list of mps.db lines that correspond to a mille job """
0026 mille_lines = []
0027 with open(mps_db,"r") as db:
0028 for line in db:
0029 line = line.rstrip('\n')
0030
0031 parts = line.split(":")
0032 if len(parts) == 13:
0033
0034 if parts[1] == "job"+parts[0]:
0035 mille_lines.append(parts)
0036
0037 return mille_lines
0038
0039
0040
0041 def get_num_evts_per_dataset(mille_lines):
0042 """ Return number of events per dataset
0043
0044 Returns a dict `<dataset>:<num_evts>', where <dataset> is the label
0045 in the last field of the mille line.
0046 """
0047 num_evts_per_dataset = {}
0048 for line in mille_lines:
0049 dataset = line[12]
0050 num_evts = int(line[6])
0051 if dataset in num_evts_per_dataset:
0052 num_evts_per_dataset[dataset] = num_evts_per_dataset[dataset] + num_evts
0053 else:
0054 num_evts_per_dataset[dataset] = num_evts
0055
0056 return num_evts_per_dataset
0057
0058
0059
0060 def get_num_evts_per_merged_dataset(merged_datasets,num_evts_per_dataset):
0061 """ Return number of events per merged dataset
0062
0063 Returns a dict `<merged_dataset>:<num_evts>'; see comments to function
0064 `merge_datasets' for an explanation of <merged_dataset>.
0065 """
0066 num_evts_per_merged_dataset = {}
0067 for merged_dataset,datasets in merged_datasets.items():
0068 num_evts = 0
0069 for dataset in datasets:
0070 num_evts = num_evts + num_evts_per_dataset[dataset]
0071 num_evts_per_merged_dataset[merged_dataset] = num_evts
0072
0073 return num_evts_per_merged_dataset
0074
0075
0076
0077 def merge_datasets(num_evts_per_dataset):
0078 """ Return dict `<merged_dataset> : list of <dataset>'
0079
0080 Associates all datasets in `num_evts_per_dataset' that belong by their
0081 name to the same PD but to a different run era. For example:
0082
0083 isolated_mu_runa_v1, isolated_mu_runb_v1, isolated_mu_runc_v2 --> isolated_mu
0084
0085 The returned dict has as value a list of the merged datasets.
0086 """
0087 datasets = num_evts_per_dataset.keys()
0088 merged_datasets = {}
0089 for dataset in datasets:
0090 bare_name = dataset[0:dataset.find("run")].rstrip("_")
0091 if bare_name in merged_datasets:
0092 merged_datasets[bare_name].append(dataset)
0093 else:
0094 merged_datasets[bare_name] = [dataset]
0095
0096 return merged_datasets
0097
0098
0099
0100 def print_merging_scheme(merged_datasets):
0101 """ Print number of events per merged dataset
0102
0103 See comments to function `merge_datasets' for an explanation
0104 of what is meant by merged dataset.
0105 """
0106 print("Defining the following merged datasets:")
0107 for merged_dataset,datasets in merged_datasets.items():
0108 print("\n `"+merged_dataset+"' from:")
0109 for dataset in datasets:
0110 print(" `"+dataset+"'")
0111
0112
0113
0114 def print_num_evts_per_dataset(num_evts_per_dataset):
0115 """ Print number of events per dataset
0116
0117 See comments to function `get_num_evts_per_dataset' for an
0118 explanation of what is meant by dataset.
0119 """
0120 print("The following number of events per dataset have been processed:")
0121 datasets = sorted(num_evts_per_dataset.keys())
0122 max_name = 0
0123 max_num = 0
0124 for dataset in datasets:
0125 if len(dataset) > max_name:
0126 max_name = len(dataset)
0127 if len(str(num_evts_per_dataset[dataset])) > max_num:
0128 max_num = len(str(num_evts_per_dataset[dataset]))
0129 expr_name = " {0: <"+str(max_name)+"}"
0130 expr_num = " {0: >"+str(max_num)+"}"
0131 for dataset in datasets:
0132 print(expr_name.format(dataset)+" : "+expr_num.format(str(num_evts_per_dataset[dataset])))
0133
0134
0135 if __name__ == '__main__':
0136 """ main subroutine """
0137
0138 if len(sys.argv) < 2:
0139 print('ERROR')
0140 print('usage:')
0141 print(' python mps_list_evts.py <mps.db file name> or, after scram b')
0142 print(' mps_list_evts.py <mps.db file name>')
0143 sys.exit(1)
0144
0145 mps_db = sys.argv[1]
0146 print('Parsing '+mps_db)
0147
0148 mille_lines = get_mille_lines()
0149 num_evts_per_dataset = get_num_evts_per_dataset(mille_lines)
0150 merged_datasets = merge_datasets(num_evts_per_dataset)
0151 num_evts_per_merged_dataset = get_num_evts_per_merged_dataset(merged_datasets,num_evts_per_dataset)
0152
0153 print("\n")
0154 print_num_evts_per_dataset(num_evts_per_dataset)
0155 print("\n\n")
0156 print_merging_scheme(merged_datasets)
0157 print("\n\n")
0158 print_num_evts_per_dataset(num_evts_per_merged_dataset)
0159
0160