Back to home page

Project CMSSW displayed by LXR

 
 

    


File indexing completed on 2024-04-06 11:56:17

0001 #!/usr/bin/perl
0002 ###############################################################################
0003 # Parallel processing for alignment
0004 ###############################################################################
0005 
0006 # determine location
0007 $host=`echo \$HOST`;
0008 if    ( $host =~ /fpslife/ ) { $location="fpslife"; }
0009 elsif ( $host =~ /lxplus/ )  { $location="lxplus"; }
0010 elsif ( $host =~ /cnaf/ )    { $location="cnaf"; }
0011 elsif ( $host =~ /lxcms/ )  { $location="lxplus"; }
0012 
0013 # job steering ----------------------------------------------------------------
0014 
0015 # name of job
0016 $jobname="test";
0017 
0018 # cfg file
0019 $steering="alignment.cfg";
0020 
0021 # db output files to be deleted except for last iteration
0022 $dbfiles="Alignments.db condbcatalog.xml";
0023 
0024 # db authentication file to be copied to exec dir
0025 $authfile="authentication.xml";
0026 
0027 # db input file
0028 # $sqlitefile="CSA06Scenario.db"; $condbcatalogfile="condbcatalog.xml";
0029 $sqlitefile=""; $condbcatalogfile="";
0030 
0031 # number of events per job
0032 $nevent=10000;
0033 
0034 # first event
0035 $firstev=0;
0036 
0037 # number of jobs
0038 $njobs=1;
0039 
0040 # number of iterations (excluding initial step)
0041 $iterations=3;
0042 
0043 # interactive or lxbatch queue
0044 $farm="I";
0045 # $farm="8nm"; $resource="";
0046 # $farm="dedicated -R cmsalca";
0047 
0048 $cmsswvers="CMSSW_1_2_0_pre3";
0049 $scramarch="slc3_ia32_gcc323";
0050 $scram=scramv1;
0051 
0052 # sleep time in seconds between two check cycles
0053 $sleeptime=30;
0054 
0055 # site-specific paths etc
0056 
0057 if ( $location eq "lxplus" ) {
0058   $homedir="/afs/cern.ch/user/f/fpschill";
0059   $basedir="${homedir}/w1/cmssw/${cmsswvers}";
0060   $outdir="${homedir}/scratch0/joboutput";
0061 }
0062 elsif ( $location eq "fpslife" ) {
0063    die "ERROR: location: $location unsupported!\n";
0064 #  $homedir="/afs/cern.ch/user/f/fpschill";
0065 #  $basedir="${homedir}/cms/${cmsswvers}";
0066 #  $outdir="/x01/usr/fpschill/cms/joboutput";
0067 }
0068 elsif ( $location eq "cnaf" ) {
0069    die "ERROR: location: $location unsupported!\n";
0070 #  $homedir="/home/CMS/fpschill";
0071 #  $basedir="${homedir}/${cmsswvers}";
0072 #  $outdir="/afs/infn.it/cnaf/user/fpschill/joboutput";
0073 }
0074 else {
0075   die "ERROR: location: $location unknown!\n";
0076 }
0077 
0078 # -----------------------------------------------------------------------------
0079 
0080 $workdir="${basedir}/src/Alignment/CommonAlignmentProducer/test";
0081 
0082 print "\n";
0083 print "-------------------------------------------------------------------------------\n";
0084 print "A l i g n m e n t \n";
0085 print "-------------------------------------------------------------------------------\n";
0086 print "Location: $location\n";
0087 print "Workdir: ${workdir}\n";
0088 print "Outdir: ${outdir}\n";
0089 if ($njobs>1) { 
0090   print "Parallel Jobs: ${njobs} with $nevent events per job\n";
0091 }
0092 else {
0093   print "Events: $nevent \n";
0094 }
0095 print "Iterations: ${iterations}\n";
0096 
0097 $odir="$outdir/$jobname";
0098 
0099 # -----------------------------------------------------------------------------
0100 # create sandbox and set up local environment
0101 
0102 system("
0103 rm -rf $outdir/$jobname;
0104 mkdir $outdir/$jobname;
0105 cd $outdir/$jobname;
0106 $scram project CMSSW $cmsswvers > /dev/null;
0107 cd $cmsswvers;
0108 mkdir -p lib/${scramarch};
0109 mkdir -p bin/${scramarch};
0110 mkdir -p config/${scramarch};
0111 cp $basedir/lib/${scramarch}/* lib/${scramarch} > /dev/null;
0112 cp $basedir/module/${scramarch}/* module/${scramarch} > /dev/null;
0113 ");
0114 
0115 # deal with modified cf? files
0116 chdir "$basedir/src";
0117 @cfgfilelist = `ls */*/*/*.cf?`;
0118 foreach $file ( @cfgfilelist ) {
0119   system("tar rf cfg.tar $file");
0120 }
0121 system("cp cfg.tar $odir/$cmsswvers/src; rm cfg.tar; cd $odir/$cmsswvers/src; tar xf cfg.tar");
0122 
0123 
0124 # if one cpu create subjob and submit #########################################
0125 
0126 if ($njobs == 1) {
0127 
0128   # make subjob
0129   $dir="$outdir/$jobname/$cmsswvers/main";
0130   make_subjob($dir,1);
0131 
0132   # create cfg file
0133   system("cp ${workdir}/$steering $dir/cfgfile");
0134   $repl="replace maxEvents  = { untracked int32 input = $nevent }";
0135   replace("$dir/cfgfile",$repl);
0136 
0137   if ($farm eq "I") {
0138     print "Run interactively ...\n";
0139     system("cd $dir; ./subjob");
0140   }
0141   else {
0142     print "Submit to farm ...\n";
0143     system("bsub -i /tmp/junk -q $farm < $dir/subjob");
0144   }
0145 
0146 }
0147 
0148 # PARALLEL ####################################################################
0149 
0150 else {
0151 
0152 # -----------------------------------------------------------------------------
0153 # create N job directories and set up steering
0154 
0155 $ijob=1;
0156 $ifirst=$firstev;
0157 
0158 while ( ${ijob} <= ${njobs} ) {
0159   print "Job ${ijob}/${njobs} starting at event $ifirst \n";
0160 
0161   # make job dir and copy aux files
0162   $dir="$outdir/$jobname/$cmsswvers/job$ijob";
0163   make_subjob($dir,0);
0164 
0165   # create cfg file
0166   system("cp ${workdir}/$steering $dir/cfgfile");
0167   $repl="
0168     replace PoolSource.maxEvents  = $nevent 
0169     replace PoolSource.skipEvents = $ifirst 
0170     replace AlignmentProducer.saveToDB = false
0171   ";
0172   replace("$dir/cfgfile",$repl);
0173 
0174   ${ijob}++;
0175   ${ifirst}=${ifirst}+${nevent};
0176 };
0177 
0178 # -----------------------------------------------------------------------------
0179 # create collector job and steering
0180 
0181 $dir="$outdir/$jobname/$cmsswvers/main";
0182 make_subjob($dir,0);
0183 
0184 # create cfg file
0185 system("cp ${workdir}/$steering $dir/cfgfile");
0186 $repl="
0187   replace PoolSource.maxEvents  = 1
0188   replace PoolSource.skipEvents = 0
0189   replace HIPAlignmentAlgorithm.collectorActive = true
0190   replace HIPAlignmentAlgorithm.collectorNJobs = $njobs
0191   replace HIPAlignmentAlgorithm.collectorPath = \"../\"
0192 ";
0193 replace("$dir/cfgfile",$repl);
0194 
0195 
0196 # -----------------------------------------------------------------------------
0197 
0198 $dir="$outdir/$jobname/$cmsswvers";
0199 $iteration=0;
0200 
0201 system("rm -f $dir/iteration.txt");
0202 open(FILE,">$dir/iteration.txt");
0203 print FILE "$iteration";
0204 close(FILE);
0205 
0206 # enter loop ... ==============================================================
0207 
0208 LOOP: {
0209 
0210   # check if all are finished
0211   if ($iteration gt 0 ) { $alldone=&check_finished(); }
0212 
0213   # need to run collector / resubmit for next iteration
0214   if ( $alldone eq 1 or $iteration eq 0) {
0215 
0216     # run collector
0217     print "Run collector for iteration $iteration ...\n";
0218     $iret=&run_collector();
0219     if ($iret ne 0) { die "ERROR in collector!\n";}
0220 
0221     # submit jobs for next iteration
0222     if ($iteration < $iterations) {
0223       $iteration++;
0224 
0225       system("rm -f $dir/iteration.txt");
0226       open(FILE,">$dir/iteration.txt");
0227       print FILE "$iteration";
0228       close(FILE);
0229 
0230       print "New iteration $iteration --------------------------------------------------------------\n";
0231       print "Submit all jobs for iteration $iteration ...\n";
0232       $iret=&submit_jobs;
0233       if ($iret ne 0) { die "ERROR in submit jobs!\n";}
0234 
0235       redo LOOP;
0236     }
0237     # we are done
0238     else {
0239       system("
0240         cd $dir
0241         rm -rf tmp src logs lib config bin
0242      ");
0243       print "Finished!\n";
0244     }
0245 
0246   }
0247 
0248   # else, just wait ...
0249   else {
0250     # print "sleeping for $sleeptime seconds ...\n";
0251     sleep($sleeptime);
0252     redo LOOP;
0253   }
0254 
0255 }
0256 
0257 }
0258 
0259 exit 0;
0260 
0261 ###############################################################################
0262 
0263 sub check_finished  
0264 {
0265 
0266 $alldone=1;
0267 $ijob=1;
0268 
0269 $timestamp=`date +%H:%M:%S_%d/%m/%y`;
0270 
0271 print "Job status: ";
0272 
0273 while ( $ijob <= $njobs ) {
0274   if ( -e "$dir/job$ijob/DONE" ) { print "1 "; }
0275   else { print "0 "; $alldone=0; }
0276   $ijob++;
0277 }
0278 
0279 print "at $timestamp";
0280 if ($alldone eq 1) { return 1;}
0281 else { return 0; }
0282 
0283 
0284 }
0285 
0286 ###############################################################################
0287 
0288 sub run_collector 
0289 {
0290   system(" 
0291     cd $dir/main
0292     ./subjob
0293   ");
0294   if ($iteration gt 0) {
0295     system("cp $dir/job1/HIPAlignmentEvents.root $dir/main");
0296   }
0297 
0298   return 0;
0299 }
0300 
0301 ###############################################################################
0302 
0303 sub submit_jobs 
0304 {
0305 
0306 $ijob=1;
0307 while ( $ijob <= $njobs ) {
0308 
0309   system("
0310     rm -f $dir/job$ijob/DONE
0311 
0312     cp $dir/main/IOTruePositions.root       $dir/job$ijob
0313     cp $dir/main/IOMisalignedPositions.root $dir/job$ijob
0314     cp $dir/main/IOAlignedPositions.root    $dir/job$ijob
0315     cp $dir/main/IOIteration.root           $dir/job$ijob
0316 
0317   ");
0318 
0319   if ( $farm eq "I" ) {
0320     print "Run job $ijob interactively ...\n";
0321     system("$dir/job$ijob/subjob &");
0322   }
0323   else {
0324     print "Submit job $ijob ... ";
0325     $test=0;
0326     while($test == 0) {
0327       $rc=system("cd $dir ; bsub -o /tmp/junk -q $farm < job$ijob/subjob");
0328       if ($rc == 0) { $test=1; }
0329       else {
0330     print "ERROR in submitting job: $rc  .. retrying in 10s ...\n";
0331     sleep(5);
0332       }
0333     }
0334   }
0335   sleep(1);
0336   $ijob++;
0337 }
0338 
0339   return 0;
0340 }
0341 
0342 ###############################################################################
0343 
0344 sub make_subjob {
0345 
0346 $dir=@_[0];
0347 $single=@_[1];
0348 
0349 system("mkdir $dir");
0350 system("cp ${workdir}/$authfile $dir");
0351 if ($sqlitefile !="") { system("cp ${workdir}/$sqlitefile $dir"); }
0352 if ($condbcatalogfile !="") { system("cp ${workdir}/$condbcatalogfile $dir"); }
0353 
0354 if ($single == 1) {
0355 
0356 # for single job
0357 system("touch $dir/timing.log");
0358 $subjob="#!/bin/zsh -f
0359 #BSUB -J \"ALIGN\"
0360 #BSUB -C 0
0361 cd $dir
0362 eval \`$scram runtime -sh\`
0363 rehash
0364 thisiter=1
0365 until ((thisiter > $iterations )); do
0366 rm -f ${dbfiles}
0367 echo \"Iteration \${thisiter} ...\"
0368 date | read starttime
0369 cmsRun cfgfile >& log.\${thisiter}.log
0370 date | read endtime
0371 gzip log.\${thisiter}.log
0372 mv alignment.log alignment.\${thisiter}.log
0373 gzip alignment.\${thisiter}.log
0374 echo \"Iteration \${thisiter}: \${starttime} ... \${endtime}\" >> timing.log
0375 let thisiter++
0376 done
0377 ";
0378 
0379 }
0380 
0381 else {
0382 
0383 # for parallel
0384 $subjob="#!/bin/zsh 
0385 #BSUB -J \"ALIGN\" 
0386 #BSUB -C 0
0387 cd $dir
0388 eval \`$scram runtime -sh\`
0389 rehash
0390 cat ../iteration.txt | read iter
0391 rm -f $dbfiles
0392 cmsRun cfgfile >& log.\$iter.log
0393 gzip log.\$iter.log
0394 mv alignment.log alignment.\$iter.log
0395 gzip alignment.\$iter.log
0396 touch DONE
0397 ";
0398 
0399 }
0400 
0401 open(FILE,">$dir/subjob");
0402 print FILE "$subjob";
0403 close(FILE);
0404 system("chmod u+x $dir/subjob");
0405 
0406 }
0407 
0408 ###############################################################################
0409 
0410 sub replace {
0411 
0412 $infile = @_[0];
0413 $repl = @_[1];
0414 
0415 open(INFILE,"$infile") or die "cannot open $infile";;
0416 @log=<INFILE>;
0417 close(INFILE);
0418 
0419 system("rm -f tmp");
0420 open(OUTFILE,">tmp");
0421 
0422 foreach $line (@log) {
0423   if ($line =~ /REPLACEME/) { print OUTFILE $repl; }
0424   else { print OUTFILE $line; }
0425 }
0426 
0427 close(OUTFILE);
0428 system("mv tmp $infile");
0429 
0430 }