Back to home page

Project CMSSW displayed by LXR

 
 

    


File indexing completed on 2021-02-14 13:32:32

0001 #!/usr/bin/env python
0002 """
0003 A module to manipulate files on EOS or on the local file system. Intended to have the same interface as castortools.py.
0004 """
0005 from __future__ import print_function
0006 import sys
0007 import os
0008 import re
0009 import pprint
0010 import shutil
0011 
0012 eos_select = '/afs/cern.ch/project/eos/installation/cms/bin/eos.select'
0013     
0014 def setCAFPath():
0015     """Hack to get the CAF scripts on the PYTHONPATH"""
0016     caf = '/afs/cern.ch/cms/caf/python'
0017 
0018     if caf not in sys.path:
0019         sys.path.append(caf)
0020 setCAFPath()
0021 import cmsIO
0022 
0023 def runXRDCommand(path, cmd, *args):
0024     """Run an xrd command.
0025 
0026     !!! Will, what is happening in case of problem?
0027     ??? At some point, should return a list of lines instead of a string."""
0028     
0029     lfn = eosToLFN(path)
0030     #print "lfn:", lfn, cmd
0031     tokens = cmsIO.splitPFN(lfnToPFN(lfn))
0032     
0033     command = ['xrd', tokens[1], cmd, tokens[2]]
0034     command.extend(args)
0035     runner = cmsIO.cmsFileManip()
0036     # print ' '.join(command)
0037     return runner.runCommand(command)
0038 
0039 def runEOSCommand(path, cmd, *args):
0040     """Run an eos command.
0041 
0042     !!! Will, when the EOS command fails, it passes silently...
0043     I think we should really try and raise an exception in case of problems.
0044     should be possible as the return code is provided in the tuple returned by runner."""
0045     
0046     lfn = eosToLFN(path)
0047     pfn = lfnToPFN(lfn)
0048     tokens = cmsIO.splitPFN(pfn)
0049     
0050     #obviously, this is not nice
0051     command = [eos_select, cmd]
0052     command.extend(args)
0053     command.append(tokens[2])
0054     runner = cmsIO.cmsFileManip()
0055     return runner.runCommand(command)
0056 
0057 def isLFN( path ):
0058     """Tests whether this path is a CMS LFN (name starts with /store...)"""
0059     # return re.match('^/store.*', path ) is not None
0060     return path.startswith('/store')
0061 
0062 def isEOS( path ):
0063     """Tests whether this path is a CMS EOS (name starts with /eos...)"""
0064     return path.startswith('/eos') or path.startswith('root://eoscms.cern.ch//eos/cms')
0065 
0066 def eosToLFN( path ):
0067     """Converts a EOS PFN to an LFN.
0068 
0069     Just strip out /eos/cms from path.
0070     If this string is not found, return path.
0071     ??? Shouldn't we raise an exception instead?"""
0072     return path.replace('root://eoscms.cern.ch/', '').replace('/eos/cms','')
0073 
0074 #also define an alias for backwards compatibility
0075 castorToLFN = eosToLFN
0076 
0077 def lfnToPFN( path, tfcProt = 'rfio'):
0078     """Converts an LFN to a PFN. For example:
0079     /store/cmst3/user/cbern/CMG/TauPlusX/Run2011A-03Oct2011-v1/AOD/V2/PAT_CMG_V2_4_0/H2TAUTAU_Nov21
0080     ->
0081     root://eoscms//eos/cms/store/cmst3/user/cbern/CMG/TauPlusX/Run2011A-03Oct2011-v1/AOD/V2/PAT_CMG_V2_4_0/H2TAUTAU_Nov21?svcClass=cmst3&stageHost=castorcms
0082 
0083     This function only checks path, and does not access the storage system.
0084     If the path is in /store/cmst3, it assumes that the CMST3 svcClass is to be used.
0085     Otherwise, is uses the default one. 
0086     
0087     ??? what is tfcprot? """
0088 
0089     if path.startswith("/store/"):
0090         path = path.replace("/store/","root://eoscms.cern.ch//eos/cms/store/")
0091     if path.startswith("/pnfs/psi.ch/cms/trivcat/"):
0092         path = path.replace("/pnfs/psi.ch/cms/trivcat/","root://t3se01.psi.ch//")
0093     #print "path to cmsFile():", path
0094     entity = cmsIO.cmsFile( path, tfcProt )
0095 #    tokens = cmsIO.splitPFN(entity.pfn)
0096     pfn = '%s://%s//%s/' % (entity.protocol,entity.host,entity.path)
0097     
0098     pfn = entity.pfn
0099     if tfcProt == 'rfio' and \
0100         entity.path.startswith("/eos/cms/") and \
0101                 str(entity.stat()).startswith("Error 3011: Unable to stat"):
0102 
0103             pfn.replace("/eos/cms","/castor/cern.ch/cms")
0104             pfn.replace("eoscms","castorcms")
0105     return pfn
0106 
0107 
0108 def lfnToEOS( path ):
0109     """Converts LFN to EOS.
0110 
0111     If path is not an LFN in the first place, return path.
0112     ??? shouldn't we raise an exception?"""
0113     if isLFN(path):
0114         pfn = 'root://eoscms.cern.ch//eos/cms/' + path
0115         return pfn.replace('//store','/store') 
0116     else:
0117         return path
0118 
0119 #also define an alias for backwards compatibility
0120 lfnToCastor = lfnToEOS
0121 
0122 def isEOSDir( path ):
0123     """Returns True if path is either:
0124     /store/...
0125     or
0126     /eos/cms/store/...
0127     or
0128     root://eoscms.cern.ch//eos/cms/
0129 
0130     Otherwise, returns False.
0131 
0132     WARNING!! This function does not check for path existence,
0133     and returns true also for plain files.
0134     !!! Will, is my summary correct? 
0135     """
0136     if os.path.exists( path ):
0137         # path does not exist
0138         # COLIN: I think this condition could be removed,
0139         # as it duplicates the following one. 
0140         return False
0141     if not path.startswith('/eos') and not path.startswith('/store') and not path.startswith('root://eoscms.cern.ch//eos/cms/'):
0142         # neither an EOS PFN or a LFN.
0143         return False
0144     # at this stage, we must have an EOS PFN or an LFN
0145     pfn = lfnToPFN(eosToLFN(path))
0146     tokens = cmsIO.splitPFN(pfn)
0147     return tokens and tokens[1].lower().startswith('eos')
0148 
0149 #also define an alias for backwards compatibility
0150 isCastorDir = isEOSDir
0151 
0152 
0153 def isEOSFile( path, tfcProt = 'rfio'):
0154     """Returns True if path is a file or directory stored on EOS (checks for path existence)
0155     ??? This function does not behave well if passed a non EOS path...
0156     returns lots of error messages like:
0157 >>> eostools.isEOSFile('/store/asdfasfd')
0158 Command (['ls', '/', 's', 't', 'o', 'r', 'e', '/', 'a', 's', 'd', 'f', 'a', 's', 'f', 'd', '/store']) failed with return code: 2
0159 ls: s: No such file or directory
0160 ls: t: No such file or directory
0161 ls: o: No such file or directory
0162 ls: r: No such file or directory
0163 ls: e: No such file or directory
0164 ls: a: No such file or directory
0165 ls: s: No such file or directory
0166 ls: d: No such file or directory
0167 ls: f: No such file or directory
0168 ls: a: No such file or directory
0169 ls: s: No such file or directory
0170 ls: f: No such file or directory
0171 ls: d: No such file or directory
0172 ls: /store: No such file or directory
0173 
0174 ls: s: No such file or directory
0175 ls: t: No such file or directory
0176 ls: o: No such file or directory
0177 ls: r: No such file or directory
0178 ls: e: No such file or directory
0179 ls: a: No such file or directory
0180 ls: s: No such file or directory
0181 ls: d: No such file or directory
0182 ls: f: No such file or directory
0183 ls: a: No such file or directory
0184 ls: s: No such file or directory
0185 ls: f: No such file or directory
0186 ls: d: No such file or directory
0187 ls: /store: No such file or directory
0188 
0189 False
0190     """
0191     _, _, ret = runEOSCommand( path, 'ls')
0192     return ret == 0
0193 
0194 #also define an alias for backwards compatibility
0195 isCastorFile = isEOSFile
0196 
0197 
0198 def fileExists( path ):
0199     """Returns true if path is a file or directory stored locally, or on EOS.
0200 
0201     This function checks for the file or directory existence."""
0202 
0203     eos = isEOSDir(path)
0204     result = False
0205     if eos:
0206         # print 'eos', path
0207         result = isEOSFile(path)
0208     else:
0209         # print 'not eos', path
0210         #check locally
0211         result = os.path.exists(path)
0212     # print result
0213     return result
0214 
0215 
0216 def eosDirSize(path):
0217     '''Returns the size of a directory on EOS in GB.'''
0218     lfn = eosToLFN(path)
0219     res = runEOSCommand(lfn, 'find', '--size')
0220     output = res[0].split('\n')
0221     size = 0
0222     for file in output:
0223         try:
0224             size += float(file.split('=')[2])
0225         except IndexError:
0226             pass
0227     return size/1024/1024/1024
0228 
0229 
0230 def createEOSDir( path ):
0231     """Makes a directory in EOS
0232 
0233     ???Will, I'm quite worried by the fact that if this path already exists, and is
0234     a file, everything will 'work'. But then we have a file, and not a directory,
0235     while we expect a dir..."""
0236     lfn = eosToLFN(path)
0237     if not isEOSFile(lfn):
0238     # if not isDirectory(lfn):
0239         runEOSCommand(lfn,'mkdir','-p')
0240         #        entity = cmsIO.cmsFile( lfn,"stageout")
0241         #        entity.mkdir([])
0242         #        # print 'created ', path
0243     if isDirectory(path):
0244         return path
0245     else:
0246         raise OSError('cannot create directory '+ path)
0247 
0248 #also define an alias for backwards compatibility
0249 createCastorDir = createEOSDir
0250 
0251 def mkdir(path):
0252     """Create a directory, either on EOS or locally"""
0253     # print 'mkdir', path
0254     if isEOS( path ) or isLFN(path):
0255         createEOSDir(path)
0256     else:
0257         # recursive directory creation (like mkdir -p)
0258         os.makedirs(path)
0259     return path
0260 
0261 def isDirectory(path):
0262     """Returns True if path is a directory on EOS.
0263 
0264     Tests for file existence. 
0265     This function returns False for EOS files, and crashes with local paths
0266 
0267     ???Will, this function also seems to work for paths like:
0268     /eos/cms/...
0269     ??? I think that it should work also for local files, see isFile."""
0270 
0271     out, _, _ = runXRDCommand(path,'existdir')
0272     return 'The directory exists' in out
0273 
0274 def isFile(path):
0275     """Returns True if a path is a file.
0276 
0277     Tests for file existence.
0278     Returns False for directories.
0279     Works on EOS and local paths.
0280     
0281     ???This function works with local files, so not the same as isDirectory...
0282     isFile and isDirectory should behave the same.
0283     """
0284 
0285     if not path.startswith('/eos') and not path.startswith('/store'):
0286         if( os.path.isfile(path) ):
0287             return True
0288         else:
0289             return False
0290     else: 
0291         out, _, _ = runXRDCommand(path,'existfile')
0292         return 'The file exists' in out
0293 
0294 def chmod(path, mode):
0295     """Does chmod on a file or directory"""
0296     #
0297     return runEOSCommand(path, 'chmod', '-r', str(mode))
0298 
0299 
0300 def listFiles(path, rec = False, full_info = False):
0301     """Provides a list of the specified directory
0302     """
0303     # -- listing on the local filesystem --
0304     if os.path.isdir( path ):
0305         if not rec:
0306             # not recursive
0307             return [ '/'.join([path,file]) for file in os.listdir( path )]
0308         else:
0309             # recursive, directories are put in the list first,
0310             # followed by the list of all files in the directory tree
0311             result = []
0312             allFiles = []
0313             for root,dirs,files in os.walk(path):
0314                 result.extend( [ '/'.join([root,dir]) for dir in dirs] )
0315                 allFiles.extend( [ '/'.join([root,file]) for file in files] )
0316             result.extend(allFiles)
0317             return result
0318     # -- listing on EOS --
0319     cmd = 'dirlist'
0320     if rec:
0321         cmd = 'dirlistrec'
0322     files, _, _ = runXRDCommand(path, cmd)
0323     result = []
0324     for line in files.split('\n'):
0325         tokens = [t for t in line.split() if t]
0326         if tokens:
0327             #convert to an LFN
0328             # result.append(tuple(tokens))
0329             #COLIN need same interface for eos and local fs
0330             if full_info:
0331                 result.append( tokens)
0332             else:
0333                 result.append( tokens[4] )
0334     return result
0335 
0336 def which(cmd):
0337     command = ['which', cmd]
0338     runner = cmsIO.cmsFileManip()
0339     out, _, _ = runner.runCommand(command)
0340     
0341     lines = [line for line in out.split('\n') if line]
0342     if len(lines) == 1:
0343         return lines[0]
0344     elif len(lines) == 2:
0345         return lines[1]
0346     else:
0347         return lines
0348 
0349 def ls(path, rec = False):
0350     """Provides a simple list of the specified directory, works on EOS and locally"""
0351     return [eosToLFN(t) for t in listFiles(path, rec)]
0352 
0353 def ls_EOS(path, rec = False):
0354     """Provides a simple list of the specified directory, works on EOS only, but is faster than the xrd version"""
0355     if rec:
0356         stdout, _, ret = runEOSCommand(path,'find','-f')
0357         return [eosToLFN(line) for line in stdout.split('\n') if line]
0358     else:
0359         stdout, _, ret = runEOSCommand(path,'ls')
0360         lfn = eosToLFN(path)
0361         return [os.path.join(lfn,line) for line in stdout.split('\n') if line]
0362 
0363 def rm(path, rec=False):
0364     """rm, works on EOS and locally.
0365 
0366     Colin: should implement a -f mode and a confirmation when deleting dirs recursively."""
0367     # print 'rm ', path
0368     path = lfnToEOS(path)
0369     if isEOS(path):
0370         if rec:
0371             runEOSCommand(path, 'rm', '-r')
0372         else: 
0373             runEOSCommand(path,'rm')
0374     elif os.path.exists(path):
0375         if not rec:
0376             os.remove( path )
0377         else:
0378             shutil.rmtree(path)
0379     else:
0380         raise ValueError(path + ' is not EOS and not local... should not happen!')
0381 
0382 def remove( files, rec = False):
0383     """Remove a list of files and directories, possibly recursively
0384 
0385     Colin: Is that obsolete? why not use rm?"""
0386     for path in files:
0387         lfn = eosToLFN(path)
0388         if not rec:
0389             rm(path)
0390         else:
0391             #this should be used with care
0392             file_list = ls(path, rec = True)
0393             file_list.append(lfn)
0394             
0395             #order the files in depth order - i.e. remove the deepest files first
0396             files_rec = sorted([(len([ff for ff in f.split('/') if ff]), f) for f in file_list if f and f.startswith(lfn)], reverse = True)
0397             
0398             for f in files_rec:
0399                 rm(f[1])
0400                 
0401 def cat(path):
0402     """cat, works on EOS and locally"""
0403     path = lfnToEOS(path)
0404     if isEOS(path):
0405         #print "the file to cat is:", path
0406         out, err, _ = runXRDCommand(path,'cat') 
0407         lines = []
0408         if out:
0409             pattern = re.compile('cat returned [0-9]+')
0410             for line in out.split('\n'):
0411                 match = pattern.search(line)
0412                 if line and match is not None:
0413                     lines.append(line.replace(match.group(0),''))
0414                     break
0415                 else:
0416                     lines.append(line)
0417         if err:
0418             print(out, file=sys.stderr)
0419             print(err, file=sys.stderr)
0420         allLines = '\n'.join(lines)
0421         if allLines and not allLines.endswith('\n'):
0422             allLines += '\n'
0423         return allLines
0424     else:
0425         content = file(path).read()
0426         if content and not content.endswith('\n'):
0427             content += '\n'
0428         return content
0429     
0430 def xrdcp(src, dest):
0431     """Does a copy of files using xrd.
0432 
0433     Colin: implement a generic cp interface as done for rm, ls, etc?"""
0434     
0435     recursive = False
0436     
0437     #first the src file
0438     pfn_src = src
0439     if os.path.exists(src):
0440         #local
0441         pfn_src = src
0442         if os.path.isdir(src):
0443             recursive = True
0444     elif fileExists(src):
0445         src = eosToLFN(src)
0446         pfn_src = lfnToPFN(src)
0447         if isDirectory(src):
0448             recursive = True
0449     else:
0450         raise ValueError(src + ' does not exist.')
0451             
0452     #now the dest
0453     pfn_dest = dest
0454     if isEOSDir(dest):
0455         dest = eosToLFN(dest)
0456         pfn_dest = lfnToPFN(dest)
0457         if isDirectory(dest):
0458             tokens = cmsIO.splitPFN(pfn_dest)
0459             pfn_dest = '%s://%s//%s/' % (tokens[0],tokens[1],tokens[2])
0460     elif os.path.exists(dest):
0461         pfn_dest = dest
0462 
0463     command = ['xrdcp']
0464     if recursive:
0465         # print 'recursive'
0466         topDir = src.rstrip('/').split('/')[-1]
0467         if topDir != '.':
0468             dest = '/'.join([dest, topDir])
0469             # print 'mkdir ' + dest
0470             mkdir( dest )
0471         files = listFiles(src, rec=True)
0472         # pprint.pprint( [file[4] for file in files] )
0473         for srcFile in files:
0474             # srcFile = file[4]
0475             pfnSrcFile = srcFile
0476             if isEOSDir(srcFile):
0477                 srcFile = eosToLFN(srcFile)
0478                 pfnSrcFile = lfnToPFN(srcFile)
0479             destFile = srcFile.replace( src, '' )
0480             destFile = '/'.join([dest,destFile])
0481             pfnDestFile = destFile
0482             if isEOSDir(destFile):
0483                 lfnDestFile = eosToLFN(destFile)
0484                 pfnDestFile = lfnToPFN(lfnDestFile)
0485             # print 'srcFile', pfnSrcFile
0486             # print 'destFile', pfnDestFile
0487             if isFile(srcFile):
0488                 _xrdcpSingleFile(  pfnSrcFile, pfnDestFile )
0489             else:
0490                 mkdir(destFile)
0491     else:
0492         _xrdcpSingleFile( pfn_src, pfn_dest )
0493 
0494 
0495 def _xrdcpSingleFile( pfn_src, pfn_dest):
0496     """Copies a single file using xrd."""
0497     
0498     command = ['xrdcp']
0499     command.append(pfn_src)
0500     command.append(pfn_dest)
0501     # print ' '.join(command)
0502     run = True
0503     if run: 
0504         runner = cmsIO.cmsFileManip()
0505         out, err, ret = runner.runCommand(command)
0506         if err:
0507             print(out, file=sys.stderr)
0508             print(err, file=sys.stderr)
0509         return ret
0510 
0511 def move(src, dest):
0512     """Move filename1 to filename2 locally to the same server"""
0513     
0514     src = eosToLFN(src)
0515     dest = eosToLFN(dest)
0516 
0517     runXRDCommand(src,'mv', lfnToEOS(dest))
0518                 
0519 def matchingFiles( path, regexp):
0520     """Return a list of files matching a regexp"""
0521 
0522     # print path, regexp
0523     pattern = re.compile( regexp )
0524     #files = ls_EOS(path)
0525     files = ls(path)
0526     # print files
0527     return [f for f in files if pattern.match(os.path.basename(f)) is not None]
0528 
0529 def datasetNotEmpty( path, regexp ):
0530     pattern = re.compile( regexp )
0531     files = ls_EOS(path)
0532     
0533     for f in files:
0534         if pattern.match( os.path.basename(f) ) is not None:
0535             return 1
0536     return 0
0537     
0538 def cmsStage( absDestDir, files, force):
0539     """Runs cmsStage with LFNs if possible"""
0540 
0541     destIsEOSDir = isEOSDir(absDestDir)
0542     if destIsEOSDir: 
0543         createEOSDir( absDestDir )
0544 
0545     for fname in files:
0546         command = ['cmsStage']
0547         if force:
0548             command.append('-f')
0549         command.append(eosToLFN(fname))
0550         command.append(eosToLFN(absDestDir))
0551         print(' '.join(command))
0552         runner = cmsIO.cmsFileManip()
0553         runner.runCommand(command)