Back to home page

Project CMSSW displayed by LXR

 
 

    


File indexing completed on 2024-04-06 12:07:43

0001 #!/usr/bin/env perl 
0002 
0003 ###############################################################################
0004 #                                                                             #
0005 #   Script to check the status of DQM Harvesting jobs                         #
0006 #                                                                             #
0007 #   It basically checks whether the root tree is present for all jobs         #
0008 #   If not, it resubmits the failed jobs on request                           #
0009 #                                                                             #
0010 #  Usage: check_harvesting.pl [crab_status_output]                            #
0011 #                                                                             #
0012 #  NOTE: it needs to be run from the directory from which you submitted       #
0013 #        multicrab jobs                                                       #
0014 #                                                                             #
0015 #  Optional argument:                                                         #
0016 #   crab_status_output: file containing output of "multicrab -status" command #
0017 #                       if not given, the script will get it itself           #
0018 #                                                                             #
0019 #  Author:        Vuko Brigljevic, Rudjer Boskovic Institute                  #
0020 #  First Version: January 22, 2010                                            #
0021 #                                                                             #
0022 ###############################################################################
0023 
0024 
0025 
0026 
0027 use strict;
0028 
0029 
0030 
0031 
0032 # Gets the crab status from input or produce it
0033 
0034 my $nargs = @ARGV;
0035 
0036 
0037 my $crabstatus = "";
0038 
0039 if ($nargs>0 && -e $ARGV[0] ) {
0040     $crabstatus= $ARGV[0];
0041 }
0042 
0043 if ($crabstatus eq "") {
0044     $crabstatus = "multicrabstatus.out";
0045     print "Getting multicrab jobs status ... \n";
0046     system "multicrab -status > $crabstatus ";
0047     print "Done \n";
0048 }
0049 
0050 
0051 
0052 
0053 # Open the multicrab cfg file to extract the list of job ids
0054 #
0055 
0056 my $crabcfg = "multicrab.cfg";
0057 
0058 if (! -r $crabcfg ) {
0059     print "No multicrab.cfg in this directory, quitting... \n";
0060     exit;
0061 }
0062 
0063 
0064 open (CRABCFG,$crabcfg);
0065 
0066 #my @lines=<STDIN>;
0067 my @lines=<CRABCFG>;
0068 
0069 my $line="";
0070 
0071 my $tempfile=".check_harvesting_output";
0072 
0073 my $castorbase="/castor/cern.ch";
0074 
0075 my $job;
0076 
0077 my $nsuccess=0;
0078 my $njobs=0;
0079 my $nfailed=0;
0080 
0081 my @failedjobs=();
0082 
0083 foreach $line (@lines) {
0084 
0085     if ( $line =~ /\[(.*?)\]/ )
0086     {
0087     if ( $1 ne "MULTICRAB") {
0088         $job = $1;
0089         $njobs++;
0090     }
0091     }
0092 
0093     if ($line =~ /USER.user_remote_dir/) {
0094 
0095     my @words = split (/ = /,$line);
0096     chop($words[1]);
0097     my $castordir=$castorbase.$words[1];  # ADD JOB NAME IN CASTOR DIRECTORY!
0098     # MODIFICATION: to account for the fact that multicrab adds job name in directory tree for output!!!
0099     $castordir=$castordir."/".$job;
0100 #   print "CASTOR DIR: $castordir \n";
0101     # this lines needs to be commented out to account for the above modification
0102 #   chop($castordir); #remove newline at the end
0103 
0104     if ( -e $tempfile ) {
0105         system "rm $tempfile";
0106     }
0107 
0108     my $rfcmd="rfdir $castordir > $tempfile";
0109 
0110     # Check if root file present in output directory
0111     system $rfcmd ;
0112     open(LIST,$tempfile);
0113 
0114     my $tmpline;
0115     my $nrootfiles=0;
0116     while ($tmpline = <LIST> ) {
0117         if ( $tmpline =~ /root/ ) {
0118         $nrootfiles++;
0119         }
0120     }
0121     if ($nrootfiles > 0) {
0122         $nsuccess++;
0123     } else {
0124         $failedjobs[$nfailed]=$job;
0125         $nfailed++;
0126     }
0127     }
0128 }
0129 
0130 
0131 
0132 print "Number of jobs                               : $njobs \n";
0133 print "Number of successful jobs (root file present : $nsuccess \n";
0134 print "Number of failed or still running jobs       : $nfailed \n";
0135 
0136 my %jobstatus;
0137 
0138 ##################################################
0139 # Get job status for jobs without root output
0140 
0141 foreach $job (@failedjobs) {
0142 
0143     my $jobstatus  = get_multicrab_job_status($job,$crabstatus);
0144     $jobstatus{$job} = $jobstatus;
0145 
0146     print " $job : $jobstatus \n";
0147 }
0148 
0149 if ($nfailed == 0) {
0150     exit;
0151 }
0152 
0153 # Resubmit failed jobs if desired
0154 
0155 print "Would you like to resubmit the failed jobs? [y/n] \n";
0156 
0157 my $reply=<STDIN>;
0158 chop ($reply);
0159 
0160 print "reply: $reply \n";
0161 
0162 if ( $reply eq "y" || $reply eq "Y" ) {
0163 
0164     print "Resubmitting jobs... \n";
0165 
0166     foreach $job (@failedjobs) {
0167 
0168 
0169     my $status = $jobstatus{$job};
0170 
0171     my $retrieve = 0;
0172     my $resubmit = 0;
0173 
0174     if ( $status eq "Scheduled" 
0175          || $status eq "Pending"
0176          || $status eq "Running" ) {
0177         print "$job running: leave it in peace... \n";
0178     } elsif ($status eq "Aborted") {
0179         $resubmit = 1;
0180     } elsif ($status eq "Done") {
0181         $retrieve = 1;
0182         $resubmit = 1;
0183     } elsif ($status eq "Retrieved") {
0184         $resubmit = 1;
0185     }
0186 
0187     if ($retrieve) {
0188         print "retrieving... \n";
0189         system "crab -c $job -getoutput all";
0190     }
0191     if ($resubmit) {
0192         print "resubmitting... \n";
0193         system "crab -c $job -resubmit all";
0194     }
0195 
0196     }
0197 
0198 }
0199 
0200 
0201 
0202 
0203 
0204 ##################################################################################
0205 
0206 sub get_multicrab_job_status {
0207     
0208     #-------------------------------------------------------------------#
0209     # Aim: Get the CRAB job status of a multicrab job                   #
0210     #      by parsing the "multicrab -status" output                    #
0211     #                                                                   #
0212     #  two input arguments                                              #
0213     #      $job         : CRAB job name                                 #
0214     #      $crabstatus  : file with output of "multicrab -status"       #
0215     #-------------------------------------------------------------------#
0216 
0217 
0218     if ( scalar(@_) != 2 ) {
0219     print "get_multicrab_job_status() called with wrong number of arguments: @_ \n";
0220     return "";
0221     }
0222     
0223     my ($job, $crabstatus) = @_ ;
0224 
0225     open(CRABSTATUS,$crabstatus);
0226    
0227     my $jobfound=0;
0228     my $linecounter=0;
0229     my $jobstatus="undefined";
0230     while ( <CRABSTATUS>) {
0231     if ($linecounter > 0) {
0232         if ($linecounter == 2) { 
0233         # This is the line with the job status
0234         
0235         my @words = split (" ");
0236         $jobstatus=$words[1];
0237         last;
0238         }
0239         $linecounter++;
0240     }
0241     if ($jobfound) {
0242         if (/ID/ && /STATUS/ ) {
0243         $linecounter++
0244         }
0245         next;
0246     }
0247     
0248     if (/$job/) {
0249         $jobfound = 1;
0250     }
0251     }
0252 
0253     close(CRABSTATUS);
0254 
0255     return $jobstatus;
0256     
0257 }