Back to home page

Project CMSSW displayed by LXR

 
 

    


File indexing completed on 2024-11-25 02:29:50

0001 #!/usr/bin/env python
0002 """
0003 A module to manipulate files on EOS or on the local file system. Intended to have the same interface as castortools.py.
0004 """
0005 import sys
0006 import os
0007 import re
0008 import pprint
0009 import shutil
0010 
0011 eos_select = '/afs/cern.ch/project/eos/installation/cms/bin/eos.select'
0012     
0013 def setCAFPath():
0014     """Hack to get the CAF scripts on the PYTHONPATH"""
0015     caf = '/afs/cern.ch/cms/caf/python'
0016 
0017     if caf not in sys.path:
0018         sys.path.append(caf)
0019 setCAFPath()
0020 import cmsIO
0021 
0022 def runXRDCommand(path, cmd, *args):
0023     """Run an xrd command.
0024 
0025     !!! Will, what is happening in case of problem?
0026     ??? At some point, should return a list of lines instead of a string."""
0027     
0028     lfn = eosToLFN(path)
0029     #print "lfn:", lfn, cmd
0030     tokens = cmsIO.splitPFN(lfnToPFN(lfn))
0031     
0032     command = ['xrd', tokens[1], cmd, tokens[2]]
0033     command.extend(args)
0034     runner = cmsIO.cmsFileManip()
0035     # print ' '.join(command)
0036     return runner.runCommand(command)
0037 
0038 def runEOSCommand(path, cmd, *args):
0039     """Run an eos command.
0040 
0041     !!! Will, when the EOS command fails, it passes silently...
0042     I think we should really try and raise an exception in case of problems.
0043     should be possible as the return code is provided in the tuple returned by runner."""
0044     
0045     lfn = eosToLFN(path)
0046     pfn = lfnToPFN(lfn)
0047     tokens = cmsIO.splitPFN(pfn)
0048     
0049     #obviously, this is not nice
0050     command = [eos_select, cmd]
0051     command.extend(args)
0052     command.append(tokens[2])
0053     runner = cmsIO.cmsFileManip()
0054     return runner.runCommand(command)
0055 
0056 def isLFN( path ):
0057     """Tests whether this path is a CMS LFN (name starts with /store...)"""
0058     # return re.match('^/store.*', path ) is not None
0059     return path.startswith('/store')
0060 
0061 def isEOS( path ):
0062     """Tests whether this path is a CMS EOS (name starts with /eos...)"""
0063     return path.startswith('/eos') or path.startswith('root://eoscms.cern.ch//eos/cms')
0064 
0065 def eosToLFN( path ):
0066     """Converts a EOS PFN to an LFN.
0067 
0068     Just strip out /eos/cms from path.
0069     If this string is not found, return path.
0070     ??? Shouldn't we raise an exception instead?"""
0071     return path.replace('root://eoscms.cern.ch/', '').replace('/eos/cms','')
0072 
0073 #also define an alias for backwards compatibility
0074 castorToLFN = eosToLFN
0075 
0076 def lfnToPFN( path, tfcProt = 'rfio'):
0077     """Converts an LFN to a PFN. For example:
0078     /store/cmst3/user/cbern/CMG/TauPlusX/Run2011A-03Oct2011-v1/AOD/V2/PAT_CMG_V2_4_0/H2TAUTAU_Nov21
0079     ->
0080     root://eoscms//eos/cms/store/cmst3/user/cbern/CMG/TauPlusX/Run2011A-03Oct2011-v1/AOD/V2/PAT_CMG_V2_4_0/H2TAUTAU_Nov21?svcClass=cmst3&stageHost=castorcms
0081 
0082     This function only checks path, and does not access the storage system.
0083     If the path is in /store/cmst3, it assumes that the CMST3 svcClass is to be used.
0084     Otherwise, is uses the default one. 
0085     
0086     ??? what is tfcprot? """
0087 
0088     if path.startswith("/store/"):
0089         path = path.replace("/store/","root://eoscms.cern.ch//eos/cms/store/")
0090     if path.startswith("/pnfs/psi.ch/cms/trivcat/"):
0091         path = path.replace("/pnfs/psi.ch/cms/trivcat/","root://t3se01.psi.ch//")
0092     #print "path to cmsFile():", path
0093     entity = cmsIO.cmsFile( path, tfcProt )
0094 #    tokens = cmsIO.splitPFN(entity.pfn)
0095     pfn = '%s://%s//%s/' % (entity.protocol,entity.host,entity.path)
0096     
0097     pfn = entity.pfn
0098     if tfcProt == 'rfio' and \
0099         entity.path.startswith("/eos/cms/") and \
0100                 str(entity.stat()).startswith("Error 3011: Unable to stat"):
0101 
0102             pfn.replace("/eos/cms","/castor/cern.ch/cms")
0103             pfn.replace("eoscms","castorcms")
0104     return pfn
0105 
0106 
0107 def lfnToEOS( path ):
0108     """Converts LFN to EOS.
0109 
0110     If path is not an LFN in the first place, return path.
0111     ??? shouldn't we raise an exception?"""
0112     if isLFN(path):
0113         pfn = 'root://eoscms.cern.ch//eos/cms/' + path
0114         return pfn.replace('//store','/store') 
0115     else:
0116         return path
0117 
0118 #also define an alias for backwards compatibility
0119 lfnToCastor = lfnToEOS
0120 
0121 def isEOSDir( path ):
0122     """Returns True if path is either:
0123     /store/...
0124     or
0125     /eos/cms/store/...
0126     or
0127     root://eoscms.cern.ch//eos/cms/
0128 
0129     Otherwise, returns False.
0130 
0131     WARNING!! This function does not check for path existence,
0132     and returns true also for plain files.
0133     !!! Will, is my summary correct? 
0134     """
0135     if os.path.exists( path ):
0136         # path does not exist
0137         # COLIN: I think this condition could be removed,
0138         # as it duplicates the following one. 
0139         return False
0140     if not path.startswith('/eos') and not path.startswith('/store') and not path.startswith('root://eoscms.cern.ch//eos/cms/'):
0141         # neither an EOS PFN or a LFN.
0142         return False
0143     # at this stage, we must have an EOS PFN or an LFN
0144     pfn = lfnToPFN(eosToLFN(path))
0145     tokens = cmsIO.splitPFN(pfn)
0146     return tokens and tokens[1].lower().startswith('eos')
0147 
0148 #also define an alias for backwards compatibility
0149 isCastorDir = isEOSDir
0150 
0151 
0152 def isEOSFile( path, tfcProt = 'rfio'):
0153     """Returns True if path is a file or directory stored on EOS (checks for path existence)
0154     ??? This function does not behave well if passed a non EOS path...
0155     returns lots of error messages like:
0156 >>> eostools.isEOSFile('/store/asdfasfd')
0157 Command (['ls', '/', 's', 't', 'o', 'r', 'e', '/', 'a', 's', 'd', 'f', 'a', 's', 'f', 'd', '/store']) failed with return code: 2
0158 ls: s: No such file or directory
0159 ls: t: No such file or directory
0160 ls: o: No such file or directory
0161 ls: r: No such file or directory
0162 ls: e: No such file or directory
0163 ls: a: No such file or directory
0164 ls: s: No such file or directory
0165 ls: d: No such file or directory
0166 ls: f: No such file or directory
0167 ls: a: No such file or directory
0168 ls: s: No such file or directory
0169 ls: f: No such file or directory
0170 ls: d: No such file or directory
0171 ls: /store: No such file or directory
0172 
0173 ls: s: No such file or directory
0174 ls: t: No such file or directory
0175 ls: o: No such file or directory
0176 ls: r: No such file or directory
0177 ls: e: No such file or directory
0178 ls: a: No such file or directory
0179 ls: s: No such file or directory
0180 ls: d: No such file or directory
0181 ls: f: No such file or directory
0182 ls: a: No such file or directory
0183 ls: s: No such file or directory
0184 ls: f: No such file or directory
0185 ls: d: No such file or directory
0186 ls: /store: No such file or directory
0187 
0188 False
0189     """
0190     _, _, ret = runEOSCommand( path, 'ls')
0191     return ret == 0
0192 
0193 #also define an alias for backwards compatibility
0194 isCastorFile = isEOSFile
0195 
0196 
0197 def fileExists( path ):
0198     """Returns true if path is a file or directory stored locally, or on EOS.
0199 
0200     This function checks for the file or directory existence."""
0201 
0202     eos = isEOSDir(path)
0203     result = False
0204     if eos:
0205         # print 'eos', path
0206         result = isEOSFile(path)
0207     else:
0208         # print 'not eos', path
0209         #check locally
0210         result = os.path.exists(path)
0211     # print result
0212     return result
0213 
0214 
0215 def eosDirSize(path):
0216     '''Returns the size of a directory on EOS in GB.'''
0217     lfn = eosToLFN(path)
0218     res = runEOSCommand(lfn, 'find', '--size')
0219     output = res[0].split('\n')
0220     size = 0
0221     for file in output:
0222         try:
0223             size += float(file.split('=')[2])
0224         except IndexError:
0225             pass
0226     return size/1024/1024/1024
0227 
0228 
0229 def createEOSDir( path ):
0230     """Makes a directory in EOS
0231 
0232     ???Will, I'm quite worried by the fact that if this path already exists, and is
0233     a file, everything will 'work'. But then we have a file, and not a directory,
0234     while we expect a dir..."""
0235     lfn = eosToLFN(path)
0236     if not isEOSFile(lfn):
0237     # if not isDirectory(lfn):
0238         runEOSCommand(lfn,'mkdir','-p')
0239         #        entity = cmsIO.cmsFile( lfn,"stageout")
0240         #        entity.mkdir([])
0241         #        # print 'created ', path
0242     if isDirectory(path):
0243         return path
0244     else:
0245         raise OSError('cannot create directory '+ path)
0246 
0247 #also define an alias for backwards compatibility
0248 createCastorDir = createEOSDir
0249 
0250 def mkdir(path):
0251     """Create a directory, either on EOS or locally"""
0252     # print 'mkdir', path
0253     if isEOS( path ) or isLFN(path):
0254         createEOSDir(path)
0255     else:
0256         # recursive directory creation (like mkdir -p)
0257         os.makedirs(path)
0258     return path
0259 
0260 def isDirectory(path):
0261     """Returns True if path is a directory on EOS.
0262 
0263     Tests for file existence. 
0264     This function returns False for EOS files, and crashes with local paths
0265 
0266     ???Will, this function also seems to work for paths like:
0267     /eos/cms/...
0268     ??? I think that it should work also for local files, see isFile."""
0269 
0270     out, _, _ = runXRDCommand(path,'existdir')
0271     return 'The directory exists' in out
0272 
0273 def isFile(path):
0274     """Returns True if a path is a file.
0275 
0276     Tests for file existence.
0277     Returns False for directories.
0278     Works on EOS and local paths.
0279     
0280     ???This function works with local files, so not the same as isDirectory...
0281     isFile and isDirectory should behave the same.
0282     """
0283 
0284     if not path.startswith('/eos') and not path.startswith('/store'):
0285         if( os.path.isfile(path) ):
0286             return True
0287         else:
0288             return False
0289     else: 
0290         out, _, _ = runXRDCommand(path,'existfile')
0291         return 'The file exists' in out
0292 
0293 def chmod(path, mode):
0294     """Does chmod on a file or directory"""
0295     #
0296     return runEOSCommand(path, 'chmod', '-r', str(mode))
0297 
0298 
0299 def listFiles(path, rec = False, full_info = False):
0300     """Provides a list of the specified directory
0301     """
0302     # -- listing on the local filesystem --
0303     if os.path.isdir( path ):
0304         if not rec:
0305             # not recursive
0306             return [ '/'.join([path,file]) for file in os.listdir( path )]
0307         else:
0308             # recursive, directories are put in the list first,
0309             # followed by the list of all files in the directory tree
0310             result = []
0311             allFiles = []
0312             for root,dirs,files in os.walk(path):
0313                 result.extend( [ '/'.join([root,dir]) for dir in dirs] )
0314                 allFiles.extend( [ '/'.join([root,file]) for file in files] )
0315             result.extend(allFiles)
0316             return result
0317     # -- listing on EOS --
0318     cmd = 'dirlist'
0319     if rec:
0320         cmd = 'dirlistrec'
0321     files, _, _ = runXRDCommand(path, cmd)
0322     result = []
0323     for line in files.split('\n'):
0324         tokens = [t for t in line.split() if t]
0325         if tokens:
0326             #convert to an LFN
0327             # result.append(tuple(tokens))
0328             #COLIN need same interface for eos and local fs
0329             if full_info:
0330                 result.append( tokens)
0331             else:
0332                 result.append( tokens[4] )
0333     return result
0334 
0335 def which(cmd):
0336     command = ['which', cmd]
0337     runner = cmsIO.cmsFileManip()
0338     out, _, _ = runner.runCommand(command)
0339     
0340     lines = [line for line in out.split('\n') if line]
0341     if len(lines) == 1:
0342         return lines[0]
0343     elif len(lines) == 2:
0344         return lines[1]
0345     else:
0346         return lines
0347 
0348 def ls(path, rec = False):
0349     """Provides a simple list of the specified directory, works on EOS and locally"""
0350     return [eosToLFN(t) for t in listFiles(path, rec)]
0351 
0352 def ls_EOS(path, rec = False):
0353     """Provides a simple list of the specified directory, works on EOS only, but is faster than the xrd version"""
0354     if rec:
0355         stdout, _, ret = runEOSCommand(path,'find','-f')
0356         return [eosToLFN(line) for line in stdout.split('\n') if line]
0357     else:
0358         stdout, _, ret = runEOSCommand(path,'ls')
0359         lfn = eosToLFN(path)
0360         return [os.path.join(lfn,line) for line in stdout.split('\n') if line]
0361 
0362 def rm(path, rec=False):
0363     """rm, works on EOS and locally.
0364 
0365     Colin: should implement a -f mode and a confirmation when deleting dirs recursively."""
0366     # print 'rm ', path
0367     path = lfnToEOS(path)
0368     if isEOS(path):
0369         if rec:
0370             runEOSCommand(path, 'rm', '-r')
0371         else: 
0372             runEOSCommand(path,'rm')
0373     elif os.path.exists(path):
0374         if not rec:
0375             os.remove( path )
0376         else:
0377             shutil.rmtree(path)
0378     else:
0379         raise ValueError(path + ' is not EOS and not local... should not happen!')
0380 
0381 def remove( files, rec = False):
0382     """Remove a list of files and directories, possibly recursively
0383 
0384     Colin: Is that obsolete? why not use rm?"""
0385     for path in files:
0386         lfn = eosToLFN(path)
0387         if not rec:
0388             rm(path)
0389         else:
0390             #this should be used with care
0391             file_list = ls(path, rec = True)
0392             file_list.append(lfn)
0393             
0394             #order the files in depth order - i.e. remove the deepest files first
0395             files_rec = sorted([(len([ff for ff in f.split('/') if ff]), f) for f in file_list if f and f.startswith(lfn)], reverse = True)
0396             
0397             for f in files_rec:
0398                 rm(f[1])
0399                 
0400 def cat(path):
0401     """cat, works on EOS and locally"""
0402     path = lfnToEOS(path)
0403     if isEOS(path):
0404         #print "the file to cat is:", path
0405         out, err, _ = runXRDCommand(path,'cat') 
0406         lines = []
0407         if out:
0408             pattern = re.compile('cat returned [0-9]+')
0409             for line in out.split('\n'):
0410                 match = pattern.search(line)
0411                 if line and match is not None:
0412                     lines.append(line.replace(match.group(0),''))
0413                     break
0414                 else:
0415                     lines.append(line)
0416         if err:
0417             print(out, file=sys.stderr)
0418             print(err, file=sys.stderr)
0419         allLines = '\n'.join(lines)
0420         if allLines and not allLines.endswith('\n'):
0421             allLines += '\n'
0422         return allLines
0423     else:
0424         content = file(path).read()
0425         if content and not content.endswith('\n'):
0426             content += '\n'
0427         return content
0428     
0429 def xrdcp(src, dest):
0430     """Does a copy of files using xrd.
0431 
0432     Colin: implement a generic cp interface as done for rm, ls, etc?"""
0433     
0434     recursive = False
0435     
0436     #first the src file
0437     pfn_src = src
0438     if os.path.exists(src):
0439         #local
0440         pfn_src = src
0441         if os.path.isdir(src):
0442             recursive = True
0443     elif fileExists(src):
0444         src = eosToLFN(src)
0445         pfn_src = lfnToPFN(src)
0446         if isDirectory(src):
0447             recursive = True
0448     else:
0449         raise ValueError(src + ' does not exist.')
0450             
0451     #now the dest
0452     pfn_dest = dest
0453     if isEOSDir(dest):
0454         dest = eosToLFN(dest)
0455         pfn_dest = lfnToPFN(dest)
0456         if isDirectory(dest):
0457             tokens = cmsIO.splitPFN(pfn_dest)
0458             pfn_dest = '%s://%s//%s/' % (tokens[0],tokens[1],tokens[2])
0459     elif os.path.exists(dest):
0460         pfn_dest = dest
0461 
0462     command = ['xrdcp']
0463     if recursive:
0464         # print 'recursive'
0465         topDir = src.rstrip('/').split('/')[-1]
0466         if topDir != '.':
0467             dest = '/'.join([dest, topDir])
0468             # print 'mkdir ' + dest
0469             mkdir( dest )
0470         files = listFiles(src, rec=True)
0471         # pprint.pprint( [file[4] for file in files] )
0472         for srcFile in files:
0473             # srcFile = file[4]
0474             pfnSrcFile = srcFile
0475             if isEOSDir(srcFile):
0476                 srcFile = eosToLFN(srcFile)
0477                 pfnSrcFile = lfnToPFN(srcFile)
0478             destFile = srcFile.replace( src, '' )
0479             destFile = '/'.join([dest,destFile])
0480             pfnDestFile = destFile
0481             if isEOSDir(destFile):
0482                 lfnDestFile = eosToLFN(destFile)
0483                 pfnDestFile = lfnToPFN(lfnDestFile)
0484             # print 'srcFile', pfnSrcFile
0485             # print 'destFile', pfnDestFile
0486             if isFile(srcFile):
0487                 _xrdcpSingleFile(  pfnSrcFile, pfnDestFile )
0488             else:
0489                 mkdir(destFile)
0490     else:
0491         _xrdcpSingleFile( pfn_src, pfn_dest )
0492 
0493 
0494 def _xrdcpSingleFile( pfn_src, pfn_dest):
0495     """Copies a single file using xrd."""
0496     
0497     command = ['xrdcp']
0498     command.append(pfn_src)
0499     command.append(pfn_dest)
0500     # print ' '.join(command)
0501     run = True
0502     if run: 
0503         runner = cmsIO.cmsFileManip()
0504         out, err, ret = runner.runCommand(command)
0505         if err:
0506             print(out, file=sys.stderr)
0507             print(err, file=sys.stderr)
0508         return ret
0509 
0510 def move(src, dest):
0511     """Move filename1 to filename2 locally to the same server"""
0512     
0513     src = eosToLFN(src)
0514     dest = eosToLFN(dest)
0515 
0516     runXRDCommand(src,'mv', lfnToEOS(dest))
0517                 
0518 def matchingFiles( path, regexp):
0519     """Return a list of files matching a regexp"""
0520 
0521     # print path, regexp
0522     pattern = re.compile( regexp )
0523     #files = ls_EOS(path)
0524     files = ls(path)
0525     # print files
0526     return [f for f in files if pattern.match(os.path.basename(f)) is not None]
0527 
0528 def datasetNotEmpty( path, regexp ):
0529     pattern = re.compile( regexp )
0530     files = ls_EOS(path)
0531     
0532     for f in files:
0533         if pattern.match( os.path.basename(f) ) is not None:
0534             return 1
0535     return 0
0536     
0537 def cmsStage( absDestDir, files, force):
0538     """Runs cmsStage with LFNs if possible"""
0539 
0540     destIsEOSDir = isEOSDir(absDestDir)
0541     if destIsEOSDir: 
0542         createEOSDir( absDestDir )
0543 
0544     for fname in files:
0545         command = ['cmsStage']
0546         if force:
0547             command.append('-f')
0548         command.append(eosToLFN(fname))
0549         command.append(eosToLFN(absDestDir))
0550         print(' '.join(command))
0551         runner = cmsIO.cmsFileManip()
0552         runner.runCommand(command)