Back to home page

Project CMSSW displayed by LXR

 
 

    


File indexing completed on 2024-09-26 05:07:19

0001 from __future__ import print_function
0002 from __future__ import absolute_import
0003 ################################################################################
0004 # RelMon: a tool for automatic Release Comparison                              
0005 # https://twiki.cern.ch/twiki/bin/view/CMSPublic/RelMon
0006 #
0007 #
0008 #                                                                              
0009 # Danilo Piparo CERN - danilo.piparo@cern.ch                                   
0010 #                                                                              
0011 ################################################################################
0012 
0013 from builtins import range
0014 from copy import deepcopy
0015 from os import chdir,getcwd,makedirs
0016 from os.path import abspath,exists,join, basename
0017 from re import sub,search
0018 from re import compile as recompile
0019 from sys import exit,stderr,version_info
0020 from threading import Thread,activeCount
0021 from time import sleep
0022 if version_info[0]==2:
0023   from urllib2  import Request,build_opener,urlopen
0024 else:
0025   from urllib.request  import Request,build_opener,urlopen
0026 
0027 import sys
0028 argv=sys.argv
0029 import ROOT
0030 sys.argv=argv
0031 
0032 ROOT.gROOT.SetBatch(True)
0033 
0034 from .authentication import X509CertOpen
0035 from .dirstructure import Comparison,Directory,tcanvas_print_processes
0036 from .utils import Chi2,KS,BinToBin,Statistical_Tests,literal2root
0037 
0038 #-------------------------------------------------------------------------------  
0039 
0040 class Error(Exception):
0041     """Base class for exceptions in this module."""
0042     pass
0043 
0044 class DQM_DB_Communication(Error):
0045     """Exception occurs in case of problems of communication with the server.
0046     """
0047     def __init__(self,msg):
0048         self.msg = msg
0049 
0050 class InvalidNumberOfArguments(Error):
0051 
0052     def __init__(self,msg):
0053         self.msg = msg
0054 
0055 #-----------------------------------------------------------------------------    
0056 
0057 class DQMcommunicator(object):
0058 
0059     """Communicate with the DQM Document server"""
0060 
0061     #-----------------------------------------------------------------------------
0062 
0063     base_dir='/data/json/archive/'
0064 
0065     def __init__(self,
0066                  server,
0067                  is_private=False,
0068                  ident="DQMToJson/1.0 python/%d.%d.%d" % version_info[:3]):
0069         self.ident = ident
0070         self.server = server
0071         self.is_private = is_private
0072         self.DQMpwd=DQMcommunicator.base_dir
0073         self.prevDQMpwd=self.DQMpwd
0074         self.opener=None
0075         if not self.is_private:
0076             self.opener=build_opener(X509CertOpen())
0077     #-----------------------------------------------------------------------------
0078 
0079     def open_url(self,url):
0080         url=url.replace(' ','%20')
0081         datareq = Request(url)
0082         datareq.add_header('User-agent', self.ident)    
0083         url_obj=0
0084         if not self.is_private:
0085             url_obj=self.opener.open(datareq)   
0086             #url_obj=build_opener(X509CertOpen()).open(datareq) 
0087         else:
0088             url_obj=urlopen(datareq)
0089 
0090         return url_obj
0091 
0092     #-----------------------------------------------------------------------------
0093 
0094     def get_data(self, full_url):
0095         #print "getting data from %s" %full_url
0096         data = self.open_url(full_url).read()
0097 
0098         data = sub("-inf", '0', data)
0099         data = sub("\\s+inf", '0', data)
0100         data = sub("\\s+nan", '0', data)
0101         data = sub('""(CMSSW.*?)""', '"\\1"', data)
0102 
0103         return data
0104 
0105     #-----------------------------------------------------------------------------
0106 
0107     def ls_url(self, url):
0108         url=url.replace(" ","%20")
0109         url=self.server+url
0110         #print "listing "+url
0111         form_folder={}
0112         raw_folder=None
0113         try:
0114             raw_folder=eval(self.get_data(url))
0115         except:
0116             print("Retrying..")
0117             for ntrials in range(5):
0118                 try:
0119                     if ntrials!=0:
0120                         sleep(2)
0121                     #raw_folder=loads(self.get_data(url))
0122                     raw_folder=eval(self.get_data(url))
0123                     break
0124                 except:
0125                     print("Could not fetch %s. Retrying" %url)
0126 
0127         #raw_folder=loads(self.get_data(url))
0128         for content_dict in raw_folder["contents"]:      
0129             if "subdir" in content_dict:
0130                 form_folder[content_dict["subdir"]]={"type":'dir'}
0131             elif "obj" in content_dict:
0132                 properties=content_dict["properties"]
0133                 obj_name=content_dict["obj"]
0134                 obj_type=properties["type"]
0135                 obj_kind=properties["kind"]
0136                 obj_as_string=''
0137                 if "rootobj" in content_dict:
0138                     obj_as_string=content_dict["rootobj"]
0139                 form_folder[obj_name]={'type':obj_type,'obj_as_string':obj_as_string,"kind":obj_kind}
0140         #for k,v in form_folder.items():
0141             #print "* %s --> %s" %(k,v["type"])
0142 
0143         return form_folder        
0144 
0145     #-----------------------------------------------------------------------------
0146 
0147     def ls(self, url='', fetch_root=False):
0148         if len(url)==0:
0149             url=join(self.DQMpwd,url)
0150 
0151         form_folder={}   
0152 
0153         if fetch_root:
0154             url='%s?rootcontent=1'%url
0155         form_folder=self.ls_url(url)
0156 
0157         return form_folder
0158 
0159     #-----------------------------------------------------------------------------
0160 
0161     def cd(self, *args):
0162         len_args=len(args)
0163         full_url=""
0164         if len_args!=1 and len_args!=3:
0165             raise InvalidNumberOfArguments
0166         if len_args==3:
0167             dataset, run, folder = args    
0168             full_url='%s/data/json/archive/%s/%s/%s' % (self.server, dataset, run, folder)
0169         if len_args==1:
0170             folder=args[0]
0171             if folder==self.DQMpwd:
0172                 full_url=self.DQMpwd
0173             elif folder=="..":
0174                 full_url=self.DQMpwd[:self.DQMpwd.rfind("/")]
0175             elif folder=="-":
0176                 full_url=self.oldDQMpwd
0177             elif folder=="":
0178                 full_url=DQMcommunicator.base_dir
0179             else:
0180                 full_url=self.DQMpwd+"/"+folder
0181 
0182         full_url=full_url.replace(' ','%20')
0183         #print "cd: "+full_url
0184 
0185         self.oldDQMpwd=self.DQMpwd
0186         self.DQMpwd=full_url   
0187         #print "In %s" %self.DQMpwd
0188 
0189     #-----------------------------------------------------------------------------
0190 
0191     def get_samples(self, samples_string="*"):
0192         """
0193         A sample contains, among the other things, a data type, a dataset name 
0194         and a run.
0195         """
0196         full_url='%s/data/json/samples?match=%s' % (self.server, samples_string)
0197         samples_dict=eval(self.get_data(full_url))
0198         return samples_dict["samples"]
0199 
0200     #-----------------------------------------------------------------------------
0201 
0202     def get_datasets_list(self, dataset_string=""):
0203         samples_list=self.get_samples(dataset_string)    
0204         datasets_list=[]
0205         for sample in samples_list:
0206             temp_datasets_list =  map(lambda item:item["dataset"] ,sample['items'])
0207             for temp_dataset in temp_datasets_list:
0208                 if not temp_dataset in datasets_list:
0209                     datasets_list.append(temp_dataset)
0210         return datasets_list
0211 
0212     #-----------------------------------------------------------------------------
0213 
0214     def get_RelVal_CMSSW_versions(self,query):
0215         """Get the available cmssw versions for the relvals.
0216         """
0217         relvals_list=self.get_datasets_list(query)
0218         # The samples are of the form /RelValTHISISMYFAVOURITECHANNEL/CMSSW_VERSION/GEN-SIM-WHATEVER-RECO
0219         cmssw_versions_with_duplicates=map (lambda x: x.split("/")[2],relvals_list)
0220         return list(set(cmssw_versions_with_duplicates))
0221 
0222     #-----------------------------------------------------------------------------    
0223 
0224     def get_runs_list(self, dataset_string):
0225         slash="/"
0226         while(dataset_string.endswith(slash) or dataset_string.beginswith(slash)):
0227             dataset_string=dataset_string.strip("/")
0228         samples_list=self.get_samples(dataset_string)
0229         runlist=[]
0230         # Get all the runs in all the items which are in every sample
0231         map( lambda sample: map (lambda item: runlist.append(item['run']), sample['items']), samples_list)
0232         return runlist
0233 
0234     #-----------------------------------------------------------------------------  
0235 
0236     def get_dataset_runs(self,dataset_string):
0237         dataset_runs={}
0238         for dataset in self.get_datasets_list(dataset_string):
0239             dataset_runs[dataset]=self.get_runs_list(dataset)
0240         return dataset_runs
0241 
0242     #-----------------------------------------------------------------------------  
0243 
0244     def get_common_runs(self,dataset_string1,dataset_string2):
0245         set1=set(self.get_runs_list(dataset_string1))
0246         set2=set(self.get_runs_list(dataset_string2))
0247         set1.intersection_update(set2)
0248         return list (set2)
0249 
0250     #-----------------------------------------------------------------------------  
0251 
0252     def get_root_objects_list(self, url=""):
0253         if len(url)==0:
0254             url=self.DQMpwd
0255         else:
0256             url="/"+url    
0257         url = url.replace(" ","%20")
0258         objects=[]
0259         for name,description in self.ls(url,True).items():     
0260             if "dir" not in description["type"]  and "ROOT" in description["kind"]:
0261                 objects.append(literal2root(description["obj_as_string"],description["type"]))
0262         return objects
0263 
0264     #-----------------------------------------------------------------------------  
0265 
0266     def get_root_objects(self, url=""):
0267         if len(url)==0:
0268             url=self.DQMpwd
0269         else:
0270             url=self.server+"/"+url    
0271         url = url.replace(" ","%20")
0272         objects={}
0273         for name,description in self.ls(url,True).items():     
0274             if "dir" not in description["type"] and "ROOT" in description["kind"]:
0275                 objects[name]=literal2root(description["obj_as_string"],description["type"])
0276         return objects
0277 
0278      #-------------------------------------------------------------------------------
0279 
0280     def get_root_objects_list_recursive(self, url=""):
0281         null_url = (len(url)==0)    
0282         if len(url)==0:
0283             url=self.DQMpwd
0284         else:
0285             url="/"+url    
0286         url = url.replace(" ","%20")      
0287         if not null_url: 
0288             self.cd(url)
0289         objects=[]
0290         for name,description in self.ls("",True).items():     
0291             if "dir" in description["type"]:
0292                 objects+=self.get_root_objects_list_recursive(name)
0293                 self.cd("..")
0294             elif  "ROOT" in description["kind"]:
0295                 objects.append(literal2root(description["obj_as_string"],description["type"]))
0296         if not null_url: 
0297             self.cd("..")
0298         return objects
0299 
0300      #-------------------------------------------------------------------------------
0301 
0302     def get_root_objects_names_list_recursive(self, url="",present_url=""):
0303         null_url = (len(url)==0)
0304         if (not null_url):
0305             if len(present_url)==0:
0306                 present_url=url
0307             else:
0308                 present_url+="_%s"%url
0309         if len(url)==0:
0310             url=self.DQMpwd
0311         else:
0312             url="/"+url    
0313         url = url.replace(" ","%20")
0314         if not null_url:
0315             self.cd(url)
0316         objects_names=[]
0317         for name,description in self.ls("",False).items():     
0318             if "dir" in description["type"]:        
0319                 objects_names+=self.get_root_objects_names_list_recursive(name,present_url)
0320                 self.cd("..")
0321             elif  "ROOT" in description["kind"]:
0322                 objects_names.append("%s_%s"%(present_url,name))
0323         if not null_url: 
0324             self.cd("..")
0325         return objects_names
0326 
0327      #-------------------------------------------------------------------------------
0328 
0329     def get_root_objects_recursive(self, url="",present_url=""):
0330         null_url = (len(url)==0)
0331         if (not null_url):
0332             if len(present_url)==0:
0333                 present_url=url
0334             else:
0335                 present_url+="_%s"%url
0336         if len(url)==0:
0337             url=self.DQMpwd
0338         else:
0339             url="/"+url    
0340         url = url.replace(" ","%20")
0341         #if not null_url:
0342         self.cd(url)
0343         objects={}
0344         for name,description in self.ls("",True).items():     
0345             if "dir" in description["type"]:
0346                 objects.update(self.get_root_objects_recursive(name,present_url))
0347                 self.cd("..")
0348             elif  "ROOT" in description["kind"]:
0349                 objects["%s_%s"%(present_url,name)]=literal2root(description["obj_as_string"],description["type"])
0350         #if not null_url:
0351         self.cd("..")
0352         return objects
0353 
0354 #-------------------------------------------------------------------------------
0355 
0356 class DirID(object):
0357     """Structure used to identify a directory in the walked tree,
0358     It carries the name and depth information.
0359     """
0360     def __init__(self,name,depth,mother=""):
0361         self.name=name
0362         self.compname=recompile(name)
0363         self.mother=mother
0364         self.depth=depth
0365     def __eq__(self,dirid):
0366         depth2=dirid.depth
0367         compname2=dirid.compname
0368         name2=dirid.name
0369         is_equal = False
0370         #if self.name in name2 or name2 in self.name:
0371         if search(self.compname,name2)!=None or search(compname2,self.name)!=None:
0372             is_equal = self.depth*depth2 <0 or self.depth==depth2
0373         if len(self.mother)*len(dirid.mother)>0:
0374             is_equal = is_equal and self.mother==dirid.mother
0375         return is_equal
0376 
0377     def __repr__(self):
0378         return "Directory %s at level %s" %(self.name,self.depth)
0379 
0380 #-------------------------------------------------------------------------------
0381 class DirFetcher(Thread):
0382     """ Fetch the content of the single "directory" in the dqm.
0383     """
0384     def __init__ (self,comm,directory):
0385         Thread.__init__(self)
0386         self.comm = comm
0387         self.directory = directory
0388         self.contents=None    
0389     def run(self):
0390         self.contents = self.comm.ls(self.directory,True)
0391 
0392 #-------------------------------------------------------------------------------
0393 
0394 class DirWalkerDB(Thread):
0395     """An interface to the DQM document db. It is threaded to compensate the 
0396     latency introduced by the finite response time of the server.
0397     """
0398     def __init__ (self,comm1,comm2,base1,base2,directory,depth=0,do_pngs=True,stat_test="KS",test_threshold=.5,black_list=[]):
0399         Thread.__init__(self)
0400         self.comm1 = deepcopy(comm1)
0401         self.comm2 = deepcopy(comm2)
0402         self.base1,self.base2 = base1,base2
0403         self.directory = directory
0404         self.depth=depth
0405         self.do_pngs=do_pngs
0406         self.test_threshold=test_threshold
0407         self.stat_test=stat_test
0408         self.black_list=black_list
0409         # name of the thread
0410         self.name+="_%s" %directory.name
0411 
0412     def run(self):
0413 
0414         this_dir=DirID(self.directory.name,self.depth)
0415         if this_dir in self.black_list: 
0416             print("Skipping %s since blacklisted!" %this_dir)
0417             return 0 
0418 
0419         self.depth+=1
0420 
0421         the_test=Statistical_Tests[self.stat_test](self.test_threshold)
0422         #print "Test %s with threshold %s" %(self.stat_test,self.test_threshold)
0423 
0424         directory1=self.base1+"/"+self.directory.mother_dir+"/"+self.directory.name
0425         directory2=self.base2+"/"+self.directory.mother_dir+"/"+self.directory.name
0426 
0427         fetchers =(DirFetcher(self.comm1,directory1),DirFetcher(self.comm2,directory2))
0428         for fetcher in fetchers:
0429             fetcher.start()
0430         for fetcher in fetchers:  
0431             fetcher.join()
0432 
0433         contents1 = fetchers[0].contents
0434         contents2 = fetchers[1].contents
0435         set1= set(contents1.keys())
0436         set2= set(contents2.keys())  
0437 
0438         walkers=[]
0439         self_directory_directories=self.directory.subdirs
0440         self_directory_comparisons=self.directory.comparisons
0441         contents_names=list(set1.intersection(set2))
0442 
0443         for name in contents_names:
0444             content = contents1[name]
0445             if "dir" in content["type"]:
0446                 #if this_dir not in DirWalker.white_list:continue              
0447                 subdir=Directory(name,join(self.directory.mother_dir,self.directory.name))        
0448                 dirwalker=DirWalkerDB(self.comm1,self.comm2,self.base1,self.base2,subdir,self.depth,
0449                                       self.do_pngs,self.stat_test,self.test_threshold,self.black_list)
0450                 dirwalker.start()
0451                 walkers.append(dirwalker)
0452                 n_threads=activeCount()
0453                 if n_threads>5:
0454                     #print >> stderr, "Threads that are running: %s. Joining them." %(n_threads)    
0455                     dirwalker.join()
0456             elif content["kind"]=="ROOT":
0457 #   print directory1,name
0458                 comparison=Comparison(name,
0459                                       join(self.directory.mother_dir,self.directory.name),
0460                                       literal2root(content["obj_as_string"],content["type"]),
0461                                       literal2root(contents2[name]["obj_as_string"],content["type"]),
0462                                       deepcopy(the_test),
0463                                       do_pngs=self.do_pngs)
0464                 self_directory_comparisons.append(comparison)
0465 
0466 
0467         for walker in walkers:
0468             walker.join()
0469             walker_directory=walker.directory
0470             if not walker_directory.is_empty():
0471                 self_directory_directories.append(walker_directory)
0472 
0473 #-------------------------------------------------------------------------------
0474 
0475 class DQMRootFile(object):
0476     """ Class acting as interface between the user and the harvested DQMRootFile.  
0477     It skips the directories created by the DQM infrastructure so to provide an
0478     interface as similar as possible to a real direcory structure and to the 
0479     directory structure provided by the db interface.
0480     """
0481     def __init__(self,rootfilename):
0482         dqmdatadir="DQMData"
0483         self.rootfile=ROOT.TFile(rootfilename)
0484         self.rootfilepwd=self.rootfile.GetDirectory(dqmdatadir)
0485         self.rootfileprevpwd=self.rootfile.GetDirectory(dqmdatadir)
0486         if self.rootfilepwd == None:
0487             print("Directory %s does not exist: skipping. Is this a custom rootfile?" %dqmdatadir)
0488             self.rootfilepwd=self.rootfile
0489             self.rootfileprevpwd=self.rootfile
0490 
0491     def __is_null(self,directory,name):
0492         is_null = not directory
0493         if is_null:
0494             print("Directory %s does not exist!" %name, file=stderr)
0495         return is_null
0496 
0497     def ls(self,directory_name=""):
0498         contents={}
0499         directory=None
0500         if len(directory_name)==0:
0501             directory=self.rootfilepwd      
0502 
0503         directory=self.rootfilepwd.GetDirectory(directory_name)    
0504         if self.__is_null(directory,directory_name):
0505             return contents
0506 
0507         for key in directory.GetListOfKeys():
0508             contents[key.GetName()]=key.GetClassName()
0509         return contents
0510 
0511     def cd(self,directory_name):
0512         """Change the current TDirectoryFile. The familiar "-" and ".." directories 
0513         can be accessed as well.
0514         """
0515         if directory_name=="-":
0516             tmp=self.rootfilepwd
0517             self.rootfilepwd=self.rootfileprevpwd
0518             self.rootfileprevpwd=tmp
0519         if directory_name=="..":
0520             #print "Setting prevpwd"
0521             self.rootfileprevpwd=self.rootfilepwd
0522             #print "The mom"
0523             mom=self.rootfilepwd.GetMotherDir()
0524             #print "In directory +%s+" %self.rootfilepwd
0525             #print "Deleting the TFileDir"
0526             if "Run " not in self.rootfilepwd.GetName():
0527                 self.rootfilepwd.Delete()
0528             #print "Setting pwd to mom"
0529             self.rootfilepwd=mom
0530         else:
0531             new_directory=self.rootfilepwd.GetDirectory(directory_name)
0532             if not self.__is_null(new_directory,directory_name):
0533                 self.rootfileprevpwd=self.rootfilepwd
0534                 self.rootfilepwd=new_directory
0535 
0536     def getObj(self,objname):
0537         """Get a TObject from the rootfile.
0538         """
0539         obj=self.rootfilepwd.Get(objname)
0540         if not self.__is_null(obj,objname):
0541             return obj
0542 
0543 #-------------------------------------------------------------------------------
0544 
0545 class DirWalkerFile(object):
0546     def __init__(self, name, topdirname,rootfilename1, rootfilename2, run=-1, black_list=[], stat_test="KS", test_threshold=.5,draw_success=True,do_pngs=False, black_list_histos=[]):
0547         self.name=name
0548         self.dqmrootfile1=DQMRootFile(abspath(rootfilename1))
0549         self.dqmrootfile2=DQMRootFile(abspath(rootfilename2))
0550         self.run=run
0551         self.stat_test=Statistical_Tests[stat_test](test_threshold)
0552         self.workdir=getcwd()
0553         self.black_list=black_list
0554         self.directory=Directory(topdirname)
0555         #print "DIRWALKERFILE %s %s" %(draw_success,do_pngs)
0556         self.directory.draw_success=draw_success
0557         self.directory.do_pngs=do_pngs
0558         self.black_list_histos = black_list_histos
0559         self.different_histograms = {}
0560         self.filename1 = basename(rootfilename2)
0561         self.filename2 = basename(rootfilename1)
0562 
0563     def __del__(self):
0564         chdir(self.workdir)
0565 
0566     def cd(self,directory_name, on_disk=False, regexp=False,):
0567         if regexp == True:
0568             if len(directory_name)!=0:
0569                 if on_disk:
0570                     if not exists(directory_name):
0571                         makedirs(directory_name)
0572                         chdir(directory_name)  
0573                 tmp = self.dqmrootfile2.ls().keys()
0574                 for elem in tmp:
0575                     if "Run" in elem:
0576                         next_dir = elem
0577                 self.dqmrootfile2.cd(next_dir)
0578                 tmp = self.dqmrootfile1.ls().keys()
0579                 for elem in tmp:
0580                     if "Run" in elem:
0581                         next_dir = elem
0582                 self.dqmrootfile1.cd(next_dir)
0583         else:
0584             if len(directory_name)!=0:
0585                 if on_disk:
0586                     if not exists(directory_name):
0587                         makedirs(directory_name)
0588                         chdir(directory_name)
0589                 self.dqmrootfile2.cd(directory_name)
0590                 self.dqmrootfile1.cd(directory_name)
0591 
0592     def ls(self,directory_name=""):
0593         """Return common objects to the 2 files.
0594         """
0595         contents1=self.dqmrootfile1.ls(directory_name)
0596         contents2=self.dqmrootfile2.ls(directory_name)
0597         #print "cont1: %s"%(contents1)
0598         #print "cont2: %s"%(contents2)
0599         contents={}
0600         self.different_histograms['file1']= {}
0601         self.different_histograms['file2']= {}
0602         keys = [key for key in contents2.keys() if key in contents1] #set of all possible contents from both files
0603         #print " ## keys: %s" %(keys)
0604         for key in keys:  #iterate on all unique keys
0605             if contents1[key]!=contents2[key]:
0606                 diff_file1 = set(contents1.keys()) - set(contents2.keys()) #set of contents that file1 is missing
0607                 diff_file2 = set(contents2.keys()) - set(contents1.keys()) #--'-- that file2 is missing
0608                 for key1 in diff_file1:
0609                     obj_type = contents1[key1]
0610                     if obj_type == "TDirectoryFile":
0611                         self.different_histograms['file1'][key1] = contents1[key1] #if direcory
0612                         #print "\n Missing inside a dir: ", self.ls(key1)
0613                         #contents[key] = contents1[key1]
0614                     if obj_type[:2]!="TH" and obj_type[:3]!="TPr" : #if histogram
0615                         continue
0616                     self.different_histograms['file1'][key1] = contents1[key1]
0617                 for key1 in diff_file2:
0618                     obj_type = contents2[key1]
0619                     if obj_type == "TDirectoryFile":
0620                         self.different_histograms['file2'][key1] = contents2[key1] #if direcory
0621                         #print "\n Missing inside a dir: ", self.ls(key1)
0622                         #contents[key] = contents2[key1]
0623                     if obj_type[:2]!="TH" and obj_type[:3]!="TPr" : #if histogram
0624                         continue
0625                     self.different_histograms['file2'][key1] = contents2[key1]
0626             contents[key]=contents1[key]
0627         return contents
0628 
0629     def getObjs(self,name):
0630         h1=self.dqmrootfile1.getObj(name)
0631         h2=self.dqmrootfile2.getObj(name)
0632         return h1,h2
0633 
0634     def __fill_single_dir(self,dir_name,directory,mother_name="",depth=0):
0635         #print "MOTHER NAME  = +%s+" %mother_name
0636      #print "About to study %s (in dir %s)" %(dir_name,getcwd())
0637 
0638         # see if in black_list
0639         this_dir=DirID(dir_name,depth)
0640         #print "  ## this_dir: %s"%(this_dir)
0641         if this_dir in self.black_list: 
0642             #print "Directory %s skipped because black-listed" %dir_name
0643             return 0        
0644 
0645         depth+=1
0646 
0647         self.cd(dir_name)
0648         #if dir_name == 'HLTJETMET':
0649         #    print self.ls()
0650 
0651         #print "Test %s with thre %s" %(self.stat_test.name, self.stat_test.threshold)
0652 
0653         contents=self.ls()
0654         if depth==1:
0655             n_top_contents=len(contents)
0656 
0657         #print contents
0658         cont_counter=1
0659         comparisons=[]
0660         for name,obj_type in contents.items():
0661             if obj_type=="TDirectoryFile":        
0662                 #We have a dir, launch recursion!
0663                 #Some feedback on the progress
0664                 if depth==1:
0665                     print("Studying directory %s, %s/%s" %(name,cont_counter,n_top_contents))
0666                     cont_counter+=1          
0667 
0668                 #print "Studying directory",name
0669                 # ok recursion on!
0670                 subdir=Directory(name)
0671                 subdir.draw_success=directory.draw_success
0672                 subdir.do_pngs=directory.do_pngs
0673                 self.__fill_single_dir(name,subdir,join(mother_name,dir_name),depth)
0674                 if not subdir.is_empty():
0675                     if depth==1:
0676                         print(" ->Appending %s..." %name, end=' ')
0677                     directory.subdirs.append(subdir)
0678                     if depth==1:
0679                         print("Appended.")
0680             elif name != '':
0681                 # We have probably an histo. Let's make the plot and the png.        
0682                 if obj_type[:2]!="TH" and obj_type[:3]!="TPr" :
0683                     continue
0684                 h1,h2=self.getObjs(name)
0685                 #print "COMPARISON : +%s+%s+" %(mother_name,dir_name)
0686                 path = join(mother_name,dir_name,name)
0687                 if path in self.black_list_histos:
0688                     print("  Skipping %s" %(path))
0689                     directory.comparisons.append(Comparison(name,
0690                                         join(mother_name,dir_name),
0691                                         h1,h2,
0692                                         deepcopy(self.stat_test),
0693                                         draw_success=directory.draw_success,
0694                                         do_pngs=directory.do_pngs, skip=True))
0695                 else:
0696                     directory.comparisons.append(Comparison(name,
0697                                           join(mother_name,dir_name),
0698                                           h1,h2,
0699                                           deepcopy(self.stat_test),
0700                                           draw_success=directory.draw_success,
0701                                           do_pngs=directory.do_pngs, skip=False))
0702                     directory.filename1 = self.filename1
0703                     directory.filename2 = self.filename2
0704                     directory.different_histograms['file1'] = self.different_histograms['file1']
0705                     directory.different_histograms['file2'] = self.different_histograms['file2']
0706 
0707         self.cd("..")
0708 
0709     def walk(self):
0710         # Build the top dir in the rootfile first
0711         rundir=""
0712         if self.run<0:
0713             # change dir in the first one...
0714             #print  self.ls().keys()
0715             first_run_dir = ""
0716             try:
0717                 first_run_dir = list(filter(lambda k: "Run " in k, self.ls().keys()))[0]
0718             except:
0719                 print("\nRundir not there: Is this a generic rootfile?\n")
0720             rundir=first_run_dir
0721             try:
0722                 self.run= int(rundir.split(" ")[1])
0723             except:
0724                 print("Setting run number to 0")
0725                 self.run= 0
0726         else:
0727             rundir="Run %s"%self.run
0728 
0729         try:
0730             self.cd(rundir, False, True) #True -> for checking the Rundir in case of different runs
0731         except:
0732             print("\nRundir not there: Is this a generic rootfile?\n")
0733 
0734         # Let's rock!
0735         self.__fill_single_dir(self.directory.name,self.directory)
0736         print("Finished")
0737         n_left_threads=len(tcanvas_print_processes)
0738         if n_left_threads>0:
0739             print("Waiting for %s threads to finish..." %n_left_threads)
0740             for p in tcanvas_print_processes:
0741                 p.join()  
0742 
0743 #-------------------------------------------------------------------------------
0744 
0745 class DirWalkerFile_thread_wrapper(Thread):
0746     def __init__(self, walker):
0747         Thread.__init__(self)
0748         self.walker=walker
0749     def run(self):
0750         self.walker.walk()
0751 
0752 #-------------------------------------------------------------------------------
0753 
0754 def string2blacklist(black_list_str):
0755     black_list=[]
0756     # replace the + with " ":
0757     black_list_str=black_list_str.replace("__"," ")
0758     if len(black_list_str)>0:
0759         for ele in black_list_str.split(","):
0760             dirname,level=ele.split("@")
0761             level=int(level)
0762             dirid=None
0763             if "/" not in dirname:
0764                 dirid=DirID(dirname,level)
0765             else:
0766                 mother,daughter=dirname.split("/")
0767                 dirid=DirID(daughter,level,mother)
0768             if not dirid in black_list:
0769                 black_list.append(dirid)
0770 
0771     return black_list
0772 
0773 #-------------------------------------------------------------------------------
0774