Back to home page

Project CMSSW displayed by LXR

 
 

    


File indexing completed on 2022-03-03 02:26:45

0001 #!/usr/bin/env python3
0002 # Copyright (C) 2014 Colin Bernet
0003 # https://github.com/cbernet/heppy/blob/master/LICENSE
0004 
0005 from __future__ import print_function
0006 import os
0007 import pprint
0008 import pickle
0009 import shutil
0010 
0011 MAX_ARG_STRLEN = 131072
0012 
0013 def haddPck(file, odir, idirs):
0014     '''add pck files in directories idirs to a directory outdir.
0015     All dirs in idirs must have the same subdirectory structure.
0016     Each pickle file will be opened, and the corresponding objects added to a destination pickle in odir.
0017     '''
0018     sum = None
0019     for dir in idirs:
0020         fileName = file.replace( idirs[0], dir )
0021         pckfile = open(fileName)
0022         obj = pickle.load(pckfile)
0023         if sum is None:
0024             sum = obj
0025         else:
0026             try:
0027                 sum += obj
0028             except TypeError:
0029                 # += not implemented, nevermind
0030                 pass
0031                 
0032     oFileName = file.replace( idirs[0], odir )
0033     pckfile = open(oFileName, 'w')
0034     pickle.dump(sum, pckfile)
0035     txtFileName = oFileName.replace('.pck','.txt')
0036     txtFile = open(txtFileName, 'w')
0037     txtFile.write( str(sum) )
0038     txtFile.write( '\n' )
0039     txtFile.close()
0040     
0041 
0042 def hadd(file, odir, idirs, appx=''):
0043     if file.endswith('.pck'):
0044         try:
0045             haddPck( file, odir, idirs)
0046         except ImportError:
0047             pass
0048         return
0049     elif not file.endswith('.root'):
0050         return
0051     haddCmd = ['hadd']
0052     haddCmd.append( file.replace( idirs[0], odir ).replace('.root', appx+'.root') )
0053     for dir in idirs:
0054         haddCmd.append( file.replace( idirs[0], dir ) )
0055     # import pdb; pdb.set_trace()
0056     cmd = ' '.join(haddCmd)
0057     print(cmd)
0058     if len(cmd) > MAX_ARG_STRLEN:
0059         print('Command longer than maximum unix string length; dividing into 2')
0060         hadd(file, odir, idirs[:len(idirs)/2], '1')
0061         hadd(file.replace(idirs[0], idirs[len(idirs)/2]), odir, idirs[len(idirs)/2:], '2')
0062         haddCmd = ['hadd']
0063         haddCmd.append( file.replace( idirs[0], odir ).replace('.root', appx+'.root') )
0064         haddCmd.append( file.replace( idirs[0], odir ).replace('.root', '1.root') )
0065         haddCmd.append( file.replace( idirs[0], odir ).replace('.root', '2.root') )
0066         cmd = ' '.join(haddCmd)
0067         print('Running merge cmd:', cmd)
0068         os.system(cmd)
0069     else:
0070         os.system(cmd)
0071 
0072 
0073 def haddRec(odir, idirs):
0074     print('adding', idirs)
0075     print('to', odir) 
0076 
0077     cmd = ' '.join( ['mkdir', odir])
0078     # import pdb; pdb.set_trace()
0079     # os.system( cmd )
0080     try:
0081         os.mkdir( odir )
0082     except OSError:
0083         print() 
0084         print('ERROR: directory in the way. Maybe you ran hadd already in this directory? Remove it and try again')
0085         print() 
0086         raise
0087     for root,dirs,files in os.walk( idirs[0] ):
0088         # print root, dirs, files
0089         for dir in dirs:
0090             dir = '/'.join([root, dir])
0091             dir = dir.replace(idirs[0], odir)
0092             cmd = 'mkdir ' + dir 
0093             # print cmd
0094             # os.system(cmd)
0095             os.mkdir(dir)
0096         for file in files:
0097             hadd('/'.join([root, file]), odir, idirs)
0098 
0099 def haddChunks(idir, removeDestDir, cleanUp=False, odir_cmd='./'):
0100     chunks = {}
0101     for file in sorted(os.listdir(idir)):
0102         filepath = '/'.join( [idir, file] )
0103         # print filepath
0104         if os.path.isdir(filepath):
0105             compdir = file
0106             try:
0107                 prefix,num = compdir.split('_Chunk')
0108             except ValueError:
0109                 # ok, not a chunk
0110                 continue
0111             # print prefix, num
0112             chunks.setdefault( prefix, list() ).append(filepath)
0113     if len(chunks)==0:
0114         print('warning: no chunk found.')
0115         return
0116     for comp, cchunks in chunks.items():
0117         odir = odir_cmd+'/'+'/'.join( [idir, comp] )
0118         print(odir, cchunks)
0119         if removeDestDir:
0120             if os.path.isdir( odir ):
0121                 shutil.rmtree(odir)
0122         haddRec(odir, cchunks)
0123     if cleanUp:
0124         chunkDir = 'Chunks'
0125         if os.path.isdir('Chunks'):
0126             shutil.rmtree(chunkDir)
0127         os.mkdir(chunkDir)
0128         print(chunks)
0129         for comp, chunks in chunks.items():
0130             for chunk in chunks:
0131                 shutil.move(chunk, chunkDir)
0132         
0133 
0134 if __name__ == '__main__':
0135 
0136     import os
0137     import sys
0138     from optparse import OptionParser
0139 
0140     parser = OptionParser()
0141     parser.usage = """
0142     %prog <dir>
0143     Find chunks in dir, and run recursive hadd to group all chunks.
0144     For example: 
0145     DYJets_Chunk0/, DYJets_Chunk1/ ... -> hadd -> DYJets/
0146     WJets_Chunk0/, WJets_Chunk1/ ... -> hadd -> WJets/
0147     """
0148     parser.add_option("-r","--remove", dest="remove",
0149                       default=False,action="store_true",
0150                       help="remove existing destination directories.")
0151     parser.add_option("-c","--clean", dest="clean",
0152                       default=False,action="store_true",
0153                       help="move chunks to Chunks/ after processing.")
0154 
0155     (options,args) = parser.parse_args()
0156 
0157     if len(args)>2:
0158         print('provide at most 2 directory as arguments: first the source, then the destination (optional)')
0159         sys.exit(1)
0160 
0161     dir = args[0]
0162     if(len(args)>1):
0163       odir = args[1]
0164     else:
0165       odir='./'
0166 
0167     haddChunks(dir, options.remove, options.clean, odir)
0168