Back to home page

Project CMSSW displayed by LXR

 
 

    


File indexing completed on 2024-11-25 02:29:51

0001 #!/usr/bin/env python3
0002 # Copyright (C) 2014 Colin Bernet
0003 # https://github.com/cbernet/heppy/blob/master/LICENSE
0004 
0005 import os
0006 import pprint
0007 import pickle
0008 import shutil
0009 
0010 MAX_ARG_STRLEN = 131072
0011 
0012 def haddPck(file, odir, idirs):
0013     '''add pck files in directories idirs to a directory outdir.
0014     All dirs in idirs must have the same subdirectory structure.
0015     Each pickle file will be opened, and the corresponding objects added to a destination pickle in odir.
0016     '''
0017     sum = None
0018     for dir in idirs:
0019         fileName = file.replace( idirs[0], dir )
0020         pckfile = open(fileName)
0021         obj = pickle.load(pckfile)
0022         if sum is None:
0023             sum = obj
0024         else:
0025             try:
0026                 sum += obj
0027             except TypeError:
0028                 # += not implemented, nevermind
0029                 pass
0030                 
0031     oFileName = file.replace( idirs[0], odir )
0032     pckfile = open(oFileName, 'w')
0033     pickle.dump(sum, pckfile)
0034     txtFileName = oFileName.replace('.pck','.txt')
0035     txtFile = open(txtFileName, 'w')
0036     txtFile.write( str(sum) )
0037     txtFile.write( '\n' )
0038     txtFile.close()
0039     
0040 
0041 def hadd(file, odir, idirs, appx=''):
0042     if file.endswith('.pck'):
0043         try:
0044             haddPck( file, odir, idirs)
0045         except ImportError:
0046             pass
0047         return
0048     elif not file.endswith('.root'):
0049         return
0050     haddCmd = ['hadd']
0051     haddCmd.append( file.replace( idirs[0], odir ).replace('.root', appx+'.root') )
0052     for dir in idirs:
0053         haddCmd.append( file.replace( idirs[0], dir ) )
0054     # import pdb; pdb.set_trace()
0055     cmd = ' '.join(haddCmd)
0056     print(cmd)
0057     if len(cmd) > MAX_ARG_STRLEN:
0058         print('Command longer than maximum unix string length; dividing into 2')
0059         hadd(file, odir, idirs[:len(idirs)/2], '1')
0060         hadd(file.replace(idirs[0], idirs[len(idirs)/2]), odir, idirs[len(idirs)/2:], '2')
0061         haddCmd = ['hadd']
0062         haddCmd.append( file.replace( idirs[0], odir ).replace('.root', appx+'.root') )
0063         haddCmd.append( file.replace( idirs[0], odir ).replace('.root', '1.root') )
0064         haddCmd.append( file.replace( idirs[0], odir ).replace('.root', '2.root') )
0065         cmd = ' '.join(haddCmd)
0066         print('Running merge cmd:', cmd)
0067         os.system(cmd)
0068     else:
0069         os.system(cmd)
0070 
0071 
0072 def haddRec(odir, idirs):
0073     print('adding', idirs)
0074     print('to', odir) 
0075 
0076     cmd = ' '.join( ['mkdir', odir])
0077     # import pdb; pdb.set_trace()
0078     # os.system( cmd )
0079     try:
0080         os.mkdir( odir )
0081     except OSError:
0082         print() 
0083         print('ERROR: directory in the way. Maybe you ran hadd already in this directory? Remove it and try again')
0084         print() 
0085         raise
0086     for root,dirs,files in os.walk( idirs[0] ):
0087         # print root, dirs, files
0088         for dir in dirs:
0089             dir = '/'.join([root, dir])
0090             dir = dir.replace(idirs[0], odir)
0091             cmd = 'mkdir ' + dir 
0092             # print cmd
0093             # os.system(cmd)
0094             os.mkdir(dir)
0095         for file in files:
0096             hadd('/'.join([root, file]), odir, idirs)
0097 
0098 def haddChunks(idir, removeDestDir, cleanUp=False, odir_cmd='./'):
0099     chunks = {}
0100     for file in sorted(os.listdir(idir)):
0101         filepath = '/'.join( [idir, file] )
0102         # print filepath
0103         if os.path.isdir(filepath):
0104             compdir = file
0105             try:
0106                 prefix,num = compdir.split('_Chunk')
0107             except ValueError:
0108                 # ok, not a chunk
0109                 continue
0110             # print prefix, num
0111             chunks.setdefault( prefix, list() ).append(filepath)
0112     if len(chunks)==0:
0113         print('warning: no chunk found.')
0114         return
0115     for comp, cchunks in chunks.items():
0116         odir = odir_cmd+'/'+'/'.join( [idir, comp] )
0117         print(odir, cchunks)
0118         if removeDestDir:
0119             if os.path.isdir( odir ):
0120                 shutil.rmtree(odir)
0121         haddRec(odir, cchunks)
0122     if cleanUp:
0123         chunkDir = 'Chunks'
0124         if os.path.isdir('Chunks'):
0125             shutil.rmtree(chunkDir)
0126         os.mkdir(chunkDir)
0127         print(chunks)
0128         for comp, chunks in chunks.items():
0129             for chunk in chunks:
0130                 shutil.move(chunk, chunkDir)
0131         
0132 
0133 if __name__ == '__main__':
0134 
0135     import os
0136     import sys
0137     from optparse import OptionParser
0138 
0139     parser = OptionParser()
0140     parser.usage = """
0141     %prog <dir>
0142     Find chunks in dir, and run recursive hadd to group all chunks.
0143     For example: 
0144     DYJets_Chunk0/, DYJets_Chunk1/ ... -> hadd -> DYJets/
0145     WJets_Chunk0/, WJets_Chunk1/ ... -> hadd -> WJets/
0146     """
0147     parser.add_option("-r","--remove", dest="remove",
0148                       default=False,action="store_true",
0149                       help="remove existing destination directories.")
0150     parser.add_option("-c","--clean", dest="clean",
0151                       default=False,action="store_true",
0152                       help="move chunks to Chunks/ after processing.")
0153 
0154     (options,args) = parser.parse_args()
0155 
0156     if len(args)>2:
0157         print('provide at most 2 directory as arguments: first the source, then the destination (optional)')
0158         sys.exit(1)
0159 
0160     dir = args[0]
0161     if(len(args)>1):
0162       odir = args[1]
0163     else:
0164       odir='./'
0165 
0166     haddChunks(dir, options.remove, options.clean, odir)
0167