File indexing completed on 2024-04-06 11:56:36
0001
0002
0003 """ Print the total number of events processed by the mille jobs per dataset
0004
0005 The information is taken from the `mps.db' file. Will group entries of the
0006 same dataset and also datasets the script *thinks* belong to the same
0007 data type, e.g. 0T cosmics. This is implemented very simple and should
0008 always be checked by the user.
0009
0010 Usage:
0011
0012 `python mps_list_evts.py <mps.db file name>' or, after `scram b'
0013 `mps_list_evts.py <mps.db file name>'
0014
0015 M. Schroeder, DESY Hamburg 26-May-2014
0016 """
0017 from __future__ import print_function
0018
0019 import sys
0020
0021
0022 mps_db = "mps.db"
0023
0024
0025 def get_mille_lines():
0026 """ Return list of mps.db lines that correspond to a mille job """
0027 mille_lines = []
0028 with open(mps_db,"r") as db:
0029 for line in db:
0030 line = line.rstrip('\n')
0031
0032 parts = line.split(":")
0033 if len(parts) == 13:
0034
0035 if parts[1] == "job"+parts[0]:
0036 mille_lines.append(parts)
0037
0038 return mille_lines
0039
0040
0041
0042 def get_num_evts_per_dataset(mille_lines):
0043 """ Return number of events per dataset
0044
0045 Returns a dict `<dataset>:<num_evts>', where <dataset> is the label
0046 in the last field of the mille line.
0047 """
0048 num_evts_per_dataset = {}
0049 for line in mille_lines:
0050 dataset = line[12]
0051 num_evts = int(line[6])
0052 if dataset in num_evts_per_dataset:
0053 num_evts_per_dataset[dataset] = num_evts_per_dataset[dataset] + num_evts
0054 else:
0055 num_evts_per_dataset[dataset] = num_evts
0056
0057 return num_evts_per_dataset
0058
0059
0060
0061 def get_num_evts_per_merged_dataset(merged_datasets,num_evts_per_dataset):
0062 """ Return number of events per merged dataset
0063
0064 Returns a dict `<merged_dataset>:<num_evts>'; see comments to function
0065 `merge_datasets' for an explanation of <merged_dataset>.
0066 """
0067 num_evts_per_merged_dataset = {}
0068 for merged_dataset,datasets in merged_datasets.items():
0069 num_evts = 0
0070 for dataset in datasets:
0071 num_evts = num_evts + num_evts_per_dataset[dataset]
0072 num_evts_per_merged_dataset[merged_dataset] = num_evts
0073
0074 return num_evts_per_merged_dataset
0075
0076
0077
0078 def merge_datasets(num_evts_per_dataset):
0079 """ Return dict `<merged_dataset> : list of <dataset>'
0080
0081 Associates all datasets in `num_evts_per_dataset' that belong by their
0082 name to the same PD but to a different run era. For example:
0083
0084 isolated_mu_runa_v1, isolated_mu_runb_v1, isolated_mu_runc_v2 --> isolated_mu
0085
0086 The returned dict has as value a list of the merged datasets.
0087 """
0088 datasets = num_evts_per_dataset.keys()
0089 merged_datasets = {}
0090 for dataset in datasets:
0091 bare_name = dataset[0:dataset.find("run")].rstrip("_")
0092 if bare_name in merged_datasets:
0093 merged_datasets[bare_name].append(dataset)
0094 else:
0095 merged_datasets[bare_name] = [dataset]
0096
0097 return merged_datasets
0098
0099
0100
0101 def print_merging_scheme(merged_datasets):
0102 """ Print number of events per merged dataset
0103
0104 See comments to function `merge_datasets' for an explanation
0105 of what is meant by merged dataset.
0106 """
0107 print("Defining the following merged datasets:")
0108 for merged_dataset,datasets in merged_datasets.items():
0109 print("\n `"+merged_dataset+"' from:")
0110 for dataset in datasets:
0111 print(" `"+dataset+"'")
0112
0113
0114
0115 def print_num_evts_per_dataset(num_evts_per_dataset):
0116 """ Print number of events per dataset
0117
0118 See comments to function `get_num_evts_per_dataset' for an
0119 explanation of what is meant by dataset.
0120 """
0121 print("The following number of events per dataset have been processed:")
0122 datasets = sorted(num_evts_per_dataset.keys())
0123 max_name = 0
0124 max_num = 0
0125 for dataset in datasets:
0126 if len(dataset) > max_name:
0127 max_name = len(dataset)
0128 if len(str(num_evts_per_dataset[dataset])) > max_num:
0129 max_num = len(str(num_evts_per_dataset[dataset]))
0130 expr_name = " {0: <"+str(max_name)+"}"
0131 expr_num = " {0: >"+str(max_num)+"}"
0132 for dataset in datasets:
0133 print(expr_name.format(dataset)+" : "+expr_num.format(str(num_evts_per_dataset[dataset])))
0134
0135
0136 if __name__ == '__main__':
0137 """ main subroutine """
0138
0139 if len(sys.argv) < 2:
0140 print('ERROR')
0141 print('usage:')
0142 print(' python mps_list_evts.py <mps.db file name> or, after scram b')
0143 print(' mps_list_evts.py <mps.db file name>')
0144 sys.exit(1)
0145
0146 mps_db = sys.argv[1]
0147 print('Parsing '+mps_db)
0148
0149 mille_lines = get_mille_lines()
0150 num_evts_per_dataset = get_num_evts_per_dataset(mille_lines)
0151 merged_datasets = merge_datasets(num_evts_per_dataset)
0152 num_evts_per_merged_dataset = get_num_evts_per_merged_dataset(merged_datasets,num_evts_per_dataset)
0153
0154 print("\n")
0155 print_num_evts_per_dataset(num_evts_per_dataset)
0156 print("\n\n")
0157 print_merging_scheme(merged_datasets)
0158 print("\n\n")
0159 print_num_evts_per_dataset(num_evts_per_merged_dataset)
0160
0161