File indexing completed on 2023-03-17 11:15:51
0001
0002
0003
0004
0005 from __future__ import print_function
0006 import os
0007 import pprint
0008 import pickle
0009 import shutil
0010
0011 MAX_ARG_STRLEN = 131072
0012
0013 def haddPck(file, odir, idirs):
0014 '''add pck files in directories idirs to a directory outdir.
0015 All dirs in idirs must have the same subdirectory structure.
0016 Each pickle file will be opened, and the corresponding objects added to a destination pickle in odir.
0017 '''
0018 sum = None
0019 for dir in idirs:
0020 fileName = file.replace( idirs[0], dir )
0021 pckfile = open(fileName)
0022 obj = pickle.load(pckfile)
0023 if sum is None:
0024 sum = obj
0025 else:
0026 try:
0027 sum += obj
0028 except TypeError:
0029
0030 pass
0031
0032 oFileName = file.replace( idirs[0], odir )
0033 pckfile = open(oFileName, 'w')
0034 pickle.dump(sum, pckfile)
0035 txtFileName = oFileName.replace('.pck','.txt')
0036 txtFile = open(txtFileName, 'w')
0037 txtFile.write( str(sum) )
0038 txtFile.write( '\n' )
0039 txtFile.close()
0040
0041
0042 def hadd(file, odir, idirs, appx=''):
0043 if file.endswith('.pck'):
0044 try:
0045 haddPck( file, odir, idirs)
0046 except ImportError:
0047 pass
0048 return
0049 elif not file.endswith('.root'):
0050 return
0051 haddCmd = ['hadd']
0052 haddCmd.append( file.replace( idirs[0], odir ).replace('.root', appx+'.root') )
0053 for dir in idirs:
0054 haddCmd.append( file.replace( idirs[0], dir ) )
0055
0056 cmd = ' '.join(haddCmd)
0057 print(cmd)
0058 if len(cmd) > MAX_ARG_STRLEN:
0059 print('Command longer than maximum unix string length; dividing into 2')
0060 hadd(file, odir, idirs[:len(idirs)/2], '1')
0061 hadd(file.replace(idirs[0], idirs[len(idirs)/2]), odir, idirs[len(idirs)/2:], '2')
0062 haddCmd = ['hadd']
0063 haddCmd.append( file.replace( idirs[0], odir ).replace('.root', appx+'.root') )
0064 haddCmd.append( file.replace( idirs[0], odir ).replace('.root', '1.root') )
0065 haddCmd.append( file.replace( idirs[0], odir ).replace('.root', '2.root') )
0066 cmd = ' '.join(haddCmd)
0067 print('Running merge cmd:', cmd)
0068 os.system(cmd)
0069 else:
0070 os.system(cmd)
0071
0072
0073 def haddRec(odir, idirs):
0074 print('adding', idirs)
0075 print('to', odir)
0076
0077 cmd = ' '.join( ['mkdir', odir])
0078
0079
0080 try:
0081 os.mkdir( odir )
0082 except OSError:
0083 print()
0084 print('ERROR: directory in the way. Maybe you ran hadd already in this directory? Remove it and try again')
0085 print()
0086 raise
0087 for root,dirs,files in os.walk( idirs[0] ):
0088
0089 for dir in dirs:
0090 dir = '/'.join([root, dir])
0091 dir = dir.replace(idirs[0], odir)
0092 cmd = 'mkdir ' + dir
0093
0094
0095 os.mkdir(dir)
0096 for file in files:
0097 hadd('/'.join([root, file]), odir, idirs)
0098
0099 def haddChunks(idir, removeDestDir, cleanUp=False, odir_cmd='./'):
0100 chunks = {}
0101 for file in sorted(os.listdir(idir)):
0102 filepath = '/'.join( [idir, file] )
0103
0104 if os.path.isdir(filepath):
0105 compdir = file
0106 try:
0107 prefix,num = compdir.split('_Chunk')
0108 except ValueError:
0109
0110 continue
0111
0112 chunks.setdefault( prefix, list() ).append(filepath)
0113 if len(chunks)==0:
0114 print('warning: no chunk found.')
0115 return
0116 for comp, cchunks in chunks.items():
0117 odir = odir_cmd+'/'+'/'.join( [idir, comp] )
0118 print(odir, cchunks)
0119 if removeDestDir:
0120 if os.path.isdir( odir ):
0121 shutil.rmtree(odir)
0122 haddRec(odir, cchunks)
0123 if cleanUp:
0124 chunkDir = 'Chunks'
0125 if os.path.isdir('Chunks'):
0126 shutil.rmtree(chunkDir)
0127 os.mkdir(chunkDir)
0128 print(chunks)
0129 for comp, chunks in chunks.items():
0130 for chunk in chunks:
0131 shutil.move(chunk, chunkDir)
0132
0133
0134 if __name__ == '__main__':
0135
0136 import os
0137 import sys
0138 from optparse import OptionParser
0139
0140 parser = OptionParser()
0141 parser.usage = """
0142 %prog <dir>
0143 Find chunks in dir, and run recursive hadd to group all chunks.
0144 For example:
0145 DYJets_Chunk0/, DYJets_Chunk1/ ... -> hadd -> DYJets/
0146 WJets_Chunk0/, WJets_Chunk1/ ... -> hadd -> WJets/
0147 """
0148 parser.add_option("-r","--remove", dest="remove",
0149 default=False,action="store_true",
0150 help="remove existing destination directories.")
0151 parser.add_option("-c","--clean", dest="clean",
0152 default=False,action="store_true",
0153 help="move chunks to Chunks/ after processing.")
0154
0155 (options,args) = parser.parse_args()
0156
0157 if len(args)>2:
0158 print('provide at most 2 directory as arguments: first the source, then the destination (optional)')
0159 sys.exit(1)
0160
0161 dir = args[0]
0162 if(len(args)>1):
0163 odir = args[1]
0164 else:
0165 odir='./'
0166
0167 haddChunks(dir, options.remove, options.clean, odir)
0168