Back to home page

Project CMSSW displayed by LXR

 
 

    


File indexing completed on 2021-02-14 12:44:50

0001 #!/usr/bin/perl
0002 ###############################################################################
0003 # Parallel processing for alignment
0004 # Similar to alignment.pl but 
0005 # - faster running
0006 # - accepts files on diskpools different from default CASTOR
0007 # - saves disk space on output
0008 #
0009 # data.list is the name of a list of root files in the data/ directory
0010 # It must contain filenames in this format:
0011 #     '<file1.root>'
0012 #     '<file2.root>'
0013 #     ...
0014 #     '<fileN.root>'
0015 # NEW: you can choose the number of files processed in each parallel job
0016 # and the STAGE_SVCCLASS location
0017 ###############################################################################
0018 
0019 # determine location
0020 $host=`echo \$HOST`;
0021 if ( $host =~ /lxplus/ )     { $location="lxplus"; }
0022 elsif ( $host =~ /lxcms/ )   { $location="lxcmsg1"; }
0023 
0024 # job steering ----------------------------------------------------------------
0025 
0026 # name of job
0027 $jobname="test-caf";
0028 
0029 # cfg file
0030 $steering="alignment_noinput.cfg";
0031 
0032 # db output files (to be deleted except for last iteration)
0033 $dbfiles="alignments.db";
0034 
0035 # db input file
0036 $sqlitefile="";
0037 
0038 # list of data
0039 $datalist="data.list";
0040 # number of files per job
0041 $nfilesperjob=2;
0042 # castor data location
0043 # $thepool="wan";
0044 $thepool="cmscaf";
0045 
0046 # number of iterations (excluding initial step)
0047 $iterations=10;
0048 
0049 # interactive or lxbatch queue
0050 # $farm="I";
0051 # $farm="8nm"; $resource="";
0052 $farm="dedicated -R cmscaf";
0053 
0054 $cmsswvers="CMSSW_1_7_5";
0055 $scramarch="slc4_ia32_gcc345";
0056 $scram=scramv1;
0057 
0058 # sleep time in seconds between two check cycles
0059 $sleeptime=30;
0060 
0061 # site-specific paths etc
0062 
0063 if ( $location eq "lxcmsg1" ) {
0064   $homedir="/afs/cern.ch/user/c/covarell/scratch0/goodalign";
0065   $basedir="${homedir}/${cmsswvers}";
0066   $outdir="/data/covarell/joboutput";
0067 }
0068 elsif ( $location eq "lxplus" ) {
0069   # die "ERROR: Root files for the analysis are on lxcmsg1!\n";
0070    $homedir="/afs/cern.ch/user/c/covarell/scratch0/goodalign";
0071    $basedir="${homedir}/${cmsswvers}";
0072    $outdir="/afs/cern.ch/user/c/covarell/scratch0/joboutput";
0073 }
0074 else {
0075   die "ERROR: location: $location unknown!\n";
0076 }
0077 
0078 # -----------------------------------------------------------------------------
0079 
0080 $workdir="${basedir}/src/Alignment/CommonAlignmentProducer/test";
0081 $datalistdir="${basedir}/src/Alignment/CommonAlignmentProducer/data/" . ${datalist};
0082 
0083 print "\n";
0084 print "-------------------------------------------------------------------------------\n";
0085 print "H I P   A l i g n m e n t \n";
0086 print "-------------------------------------------------------------------------------\n";
0087 print "Location: $location\n";
0088 print "Workdir: ${workdir}\n";
0089 print "Outdir: ${outdir}/${jobname}/${cmsswvers}\n";
0090 
0091 # Calculate njobs (= nfiles / nfilesperjob, last files not taken)
0092 system("more ${datalistdir} | grep root | wc -l >> nofiles.txt;");
0093 open(INFILE,"nofiles.txt") or die "cannot open nofiles.txt";;
0094 @nofiles=<INFILE>;
0095 close(INFILE);
0096 foreach $line1 (@nofiles) {$njobs = int($line1 / ${nfilesperjob});}
0097 system("rm -f nofiles.txt;");
0098 
0099 if ($njobs>1) { 
0100   print "Parallel Jobs: ${njobs} with ${nfilesperjob} file(s) per job\n";
0101 }
0102 else {
0103   # only a few files 
0104   print "Only one job (no parallel): please use the default alignment.pl script.\n";
0105   exit;
0106 }
0107 print "Iterations: ${iterations}\n";
0108 
0109 $odir="$outdir/$jobname";
0110 
0111 # -----------------------------------------------------------------------------
0112 # create sandbox and set up local environment
0113 
0114 system("
0115 rm -rf $outdir/$jobname;
0116 mkdir $outdir/$jobname;
0117 cd $outdir/$jobname;
0118 $scram project CMSSW $cmsswvers > /dev/null;
0119 cd $cmsswvers;
0120 mkdir -p lib/${scramarch};
0121 mkdir -p bin/${scramarch};
0122 mkdir -p external/${scramarch};
0123 mkdir -p module/${scramarch};
0124 mkdir -p tmp;
0125 cp $basedir/lib/${scramarch}/* lib/${scramarch} > /dev/null;
0126 cp $basedir/external/${scramarch}/* external/${scramarch} > /dev/null;
0127 ");
0128 
0129 # deal with modified cf? files
0130 chdir "$basedir/src";
0131 @cfgfilelist = `ls */*/*/*.cf?`;
0132 foreach $file ( @cfgfilelist ) {
0133   system("tar rf cfg.tar $file");
0134 }
0135 system("cp cfg.tar $odir/$cmsswvers/src; rm cfg.tar; cd $odir/$cmsswvers/src; tar xf cfg.tar");
0136 
0137 
0138 # if one cpu create subjob and submit #########################################
0139 
0140 if ($njobs == 1) {
0141 
0142   # make subjob
0143   $dir="$outdir/$jobname/$cmsswvers/main";
0144   make_subjob($dir,1);
0145 
0146   # create cfg file
0147   system("cp ${workdir}/$steering $dir/cfgfile");
0148   $repl="replace maxEvents.input  = -1 ";
0149   replace("$dir/cfgfile",$repl);
0150 
0151   if ($farm eq "I") {
0152     print "Run interactively ...\n";
0153     system("cd $dir; ./subjob");
0154   }
0155   else {
0156     print "Submit to farm ...\n";
0157     system("bsub -i /tmp/junk -q $farm < $dir/subjob");
0158   }
0159 
0160 }
0161 
0162 # PARALLEL ####################################################################
0163 
0164 else {
0165 
0166 # -----------------------------------------------------------------------------
0167 # create N job directories and set up steering
0168 
0169 $ijob=1;
0170 $ifile=0;
0171 
0172 open(INFILE2,"$datalistdir") or die "cannot open $datalistdir";;
0173 @listdata=<INFILE2>;
0174 close(INFILE2);
0175 
0176 foreach $line (@listdata) { 
0177     if ($line =~ /root/) {
0178     ${ifile}++;
0179         if (${nfilesperjob} == 1) {   # 1 FILE PER JOB
0180             # make job dir and copy aux files
0181         $dir="$outdir/$jobname/$cmsswvers/job$ijob";
0182         make_subjob($dir,0);
0183         
0184         # create cfg file
0185         system("cp ${workdir}/$steering $dir/cfgfile");
0186         if ($dbfiles) {
0187         $repl= "source = PoolSource { 
0188         
0189            untracked vstring fileNames = {
0190                    $line
0191                    }
0192                    untracked uint32 skipEvents = 0  
0193                }
0194                    replace maxEvents.input  = -1 
0195                    replace AlignmentProducer.saveToDB = false
0196                   ";
0197         } else {
0198         $repl= "source = PoolSource { 
0199         
0200            untracked vstring fileNames = {
0201                    $line
0202                    }
0203                    untracked uint32 skipEvents = 0  
0204                }
0205                    replace maxEvents.input  = -1 
0206                   ";
0207         }
0208         replace("$dir/cfgfile",$repl);
0209         ${ijob}++;
0210     } else {   # 2 OR MORE FILES PER JOB
0211         if ($ifile % ${nfilesperjob} == 0) { 
0212         
0213         # make job dir and copy aux files
0214         $dir="$outdir/$jobname/$cmsswvers/job$ijob";
0215         make_subjob($dir,0);
0216         
0217         # create cfg file
0218         system("cp ${workdir}/$steering $dir/cfgfile");
0219         if ($dbfiles) {
0220             $repl= $repl . "
0221                        $line
0222                       }
0223                       untracked uint32 skipEvents = 0   
0224                }
0225                    replace maxEvents.input  = -1 
0226                    replace AlignmentProducer.saveToDB = false
0227                   ";
0228         } else {
0229             $repl= $repl . "
0230                        $line
0231                      }
0232                       untracked uint32 skipEvents = 0   
0233                }
0234                    replace maxEvents.input  = -1 
0235                   ";
0236         }
0237         replace("$dir/cfgfile",$repl);
0238         ${ijob}++;
0239         
0240         } elsif ($ifile % ${nfilesperjob} == 1) {
0241         $repl= "source = PoolSource { 
0242         
0243           untracked vstring fileNames = { 
0244                   $line,
0245                    ";   
0246         } else {
0247         $repl= $repl . "$line,";
0248         }
0249     }
0250     }
0251 };
0252 
0253 # -----------------------------------------------------------------------------
0254 # create collector job and steering
0255 
0256 $dir="$outdir/$jobname/$cmsswvers/main";
0257 make_subjob($dir,0);
0258 
0259 # create cfg file
0260 system("cp ${workdir}/$steering $dir/cfgfile");
0261 $inutile=1;
0262 foreach $line (@listdata) { 
0263   if ($inutile == 1) {   
0264       ${inutile}++;
0265       $repl="
0266         source = PoolSource { 
0267         
0268         untracked vstring fileNames = { 
0269                 $line
0270                 }
0271         }
0272         replace maxEvents.input = 1
0273         replace HIPAlignmentAlgorithm.collectorActive = true
0274         replace HIPAlignmentAlgorithm.collectorNJobs = $njobs
0275         replace HIPAlignmentAlgorithm.collectorPath = \"../\"
0276       ";
0277   }
0278 }
0279 replace("$dir/cfgfile",$repl);
0280 
0281 
0282 # -----------------------------------------------------------------------------
0283 
0284 $dir="$outdir/$jobname/$cmsswvers";
0285 $iteration=0;
0286 
0287 system("rm -f $dir/iteration.txt");
0288 open(FILE,">$dir/iteration.txt");
0289 print FILE "$iteration";
0290 close(FILE);
0291 
0292 # enter loop ... ==============================================================
0293 
0294 LOOP: {
0295 
0296   # check if all are finished
0297   if ($iteration gt 0 ) { $alldone=&check_finished(); }
0298 
0299   # need to run collector / resubmit for next iteration
0300   if ( $alldone eq 1 or $iteration eq 0) {
0301 
0302     # run collector
0303     print "Run collector for iteration $iteration ...\n";
0304     $iret=&run_collector();
0305     if ($iret ne 0) { die "ERROR in collector!\n";}
0306 
0307     # submit jobs for next iteration
0308     if ($iteration < $iterations) {
0309       $iteration++;
0310 
0311       system("rm -f $dir/iteration.txt");
0312       open(FILE,">$dir/iteration.txt");
0313       print FILE "$iteration";
0314       close(FILE);
0315 
0316       print "New iteration $iteration --------------------------------------------------------------\n";
0317       print "Submit all jobs for iteration $iteration ...\n";
0318       $iret=&submit_jobs;
0319       if ($iret ne 0) { die "ERROR in submit jobs!\n";}
0320 
0321       redo LOOP;
0322     }
0323     # we are done
0324     else {
0325       system("
0326         cd $dir
0327         rm -rf tmp src logs lib config bin
0328      ");
0329       print "Finished!\n";
0330     }
0331 
0332   }
0333 
0334   # else, just wait ...
0335   else {
0336     # print "sleeping for $sleeptime seconds ...\n";
0337     sleep($sleeptime);
0338     redo LOOP;
0339   }
0340 
0341 }
0342 
0343 # save some disk space
0344 system("cd $dir; rm -rf job*; rm -rf LSFJOB*"); 
0345 
0346 }
0347 
0348 exit 0;
0349 
0350 ###############################################################################
0351 
0352 sub check_finished  
0353 {
0354 
0355 $alldone=1;
0356 $ijob=1;
0357 
0358 $timestamp=`date +%H:%M:%S_%d/%m/%y`;
0359 
0360 print "Job status: ";
0361 
0362 while ( $ijob <= $njobs ) {
0363   if ( -e "$dir/job$ijob/DONE" ) { print "1 "; }
0364   else { print "0 "; $alldone=0; }
0365   $ijob++;
0366 }
0367 
0368 print "at $timestamp";
0369 if ($alldone eq 1) { return 1;}
0370 else { return 0; }
0371 
0372 
0373 }
0374 
0375 ###############################################################################
0376 
0377 sub run_collector 
0378 {
0379   system(" 
0380     cd $dir/main
0381     subjob
0382   ");
0383   if ($iteration gt 0) {
0384     system("cp $dir/job1/HIPAlignmentEvents.root $dir/main");
0385   }
0386   $myjob=2;
0387   while ( $myjob <= $njobs ) {
0388     system("rm -f $dir/job$myjob/HIPAlignmentEvents.root");
0389     ${myjob}++;
0390   }
0391   return 0;
0392 }
0393 
0394 ###############################################################################
0395 
0396 sub submit_jobs 
0397 {
0398 
0399 $ijob=1;
0400 while ( $ijob <= $njobs ) {
0401 
0402   system("
0403     rm -f $dir/job$ijob/DONE
0404 
0405     cp $dir/main/IOTruePositions.root       $dir/job$ijob
0406     cp $dir/main/IOMisalignedPositions.root $dir/job$ijob
0407     cp $dir/main/IOAlignedPositions.root    $dir/job$ijob
0408     cp $dir/main/IOIteration.root           $dir/job$ijob
0409 
0410   ");
0411 
0412   if ( $farm eq "I" ) {
0413     print "Run job $ijob interactively ...\n";
0414     system("$dir/job$ijob/subjob &");
0415   }
0416   else {
0417     print "Submit job $ijob ... ";
0418     $test=0;
0419     while($test == 0) {
0420       $rc=system("cd $dir ; bsub -o /tmp/junk$ijob -q $farm < job$ijob/subjob");
0421       # $rc=system("cd $dir ; bsub -q $farm < job$ijob/subjob");
0422       if ($rc == 0) { $test=1; }
0423       else {
0424     print "ERROR in submitting job: $rc  .. retrying in 10s ...\n";
0425     sleep(5);
0426       }
0427     }
0428   }
0429   sleep(1);
0430   $ijob++;
0431 }
0432 
0433   return 0;
0434 }
0435 
0436 ###############################################################################
0437 
0438 sub make_subjob {
0439 
0440 $dir=@_[0];
0441 $single=@_[1];
0442 
0443 system("mkdir $dir");
0444 system("cp ${workdir}/$authfile $dir");
0445 if ($sqlitefile) { system("cp ${workdir}/$sqlitefile $dir"); }
0446 # if ($condbcatalogfile) { system("cp ${workdir}/$condbcatalogfile $dir"); }
0447 
0448 if ($single == 1) {
0449 
0450 # for single job
0451 system("touch $dir/timing.log");
0452 $subjob="#!/bin/zsh -f
0453 #BSUB -J \"ALIGN\"
0454 #BSUB -C 0
0455 cd $dir
0456 eval \`$scram runtime -sh\`
0457 export STAGE_SVCCLASS=${thepool}
0458 rehash
0459 thisiter=1
0460 until ((thisiter > $iterations )); do
0461 rm -f ${dbfiles}
0462 echo \"Iteration \${thisiter} ...\"
0463 date | read starttime
0464 EdmPluginRefresh
0465 cmsRun cfgfile > /dev/null
0466 date | read endtime
0467 rm -f log.\${thisiter}.log
0468 mv alignment.log alignment.\${thisiter}.log
0469 gzip alignment.\${thisiter}.log
0470 echo \"Iteration \${thisiter}: \${starttime} ... \${endtime}\" >> timing.log
0471 let thisiter++
0472 done
0473 ";
0474 
0475 }
0476 
0477 else {
0478 
0479 # for parallel
0480 $subjob="#!/bin/zsh 
0481 #BSUB -J \"ALIGN\" 
0482 #BSUB -C 0
0483 cd $dir
0484 eval \`$scram runtime -sh\`
0485 export STAGE_SVCCLASS=${thepool}
0486 rehash
0487 cat ../iteration.txt | read iter
0488 rm -f ${dbfiles}
0489 EdmPluginRefresh
0490 cmsRun cfgfile > log.\$iter.log
0491 gzip log.\$iter.log
0492 mv alignment.log alignment.\$iter.log
0493 gzip alignment.\$iter.log
0494 touch DONE
0495 ";
0496 
0497 }
0498 
0499 open(FILE,">$dir/subjob");
0500 print FILE "$subjob";
0501 close(FILE);
0502 system("chmod u+x $dir/subjob");
0503 
0504 }
0505 
0506 ###############################################################################
0507 
0508 sub replace {
0509 
0510 $infile = @_[0];
0511 $repl = @_[1];
0512 
0513 open(INFILE,"$infile") or die "cannot open $infile";;
0514 @log=<INFILE>;
0515 close(INFILE);
0516 
0517 system("rm -f tmp");
0518 open(OUTFILE,">tmp");
0519 
0520 foreach $line (@log) {
0521   if ($line =~ /REPLACEME/) { print OUTFILE $repl; }
0522   else { print OUTFILE $line; }
0523 }
0524 
0525 close(OUTFILE);
0526 system("mv tmp $infile");
0527 
0528 }