Back to home page

Project CMSSW displayed by LXR

 
 

    


File indexing completed on 2024-04-06 12:23:59

0001 #!/usr/bin/env perl
0002 
0003 ## Tool to dig out information about the event size in PAT
0004 ## 
0005 ## Please run this giving as argument the root file, and redirecting the output on an HTML file
0006 ## Notes:
0007 ##    - you must have a correctly initialized environment, and FWLite auto-loading with ROOT
0008 ##    - you must put in the same folder of the html also these three files:
0009 ##            http://cern.ch/gpetrucc/patsize.css
0010 ##            http://cern.ch/gpetrucc/blue-dot.gif
0011 ##            http://cern.ch/gpetrucc/red-dot.gif
0012 ##      otherwise you will get an unreadable output file
0013 ##    - for small files, compression does not work (as you will read from the output html)
0014 ##    - per-event provenance is just the GetZipBytes of EventMetaData, EventHistory.
0015 ##    -  
0016 
0017 use strict; 
0018 use warnings;
0019 use Data::Dumper;
0020 use File::Temp qw/tempfile/;
0021 
0022 my $filename = shift(@ARGV);
0023 
0024 if ((!$filename) || ($filename eq "-h")) {
0025     print STDERR "Usage: diskSize.pl filename.root > filename.html\n";
0026     exit(1);
0027 }
0028 
0029 my ($MACRO, $macrofile) = tempfile( "macroXXXXX", SUFFIX=>'.c' , UNLINK => 1 );
0030 my ($macroname) = ($macrofile =~ m/(macro.....)\.c/);
0031 
0032 print STDERR "Getting list of branches ...\n";
0033 
0034 print $MACRO "void $macroname(){\nEvents->Print();\n}\n";
0035 close $MACRO;
0036 my $IN = qx(root.exe -b -l $filename $macrofile -q 2> /dev/null);
0037 
0038 my %survey = ();
0039 my $obj = undef; my $item = undef;
0040 my $events = 0;
0041 my %arrays = ();
0042 
0043 foreach (split(/\n/, $IN)) {
0044   chomp; #print STDERR "    [$_]\n";
0045   if (m/\*Branch\s+:((\w+)_(\w+)_(\w*)_(\w+))\./) {
0046         $item = undef;
0047   }
0048   if (m/\*Br\s+\d+\s+:((\w+)_(\w+)_(\w*)_(\w+))\.obj\s/) {
0049         $survey{$1} = { 'type'=>$2, 'label'=>$3, 'instance'=>$4, 'process'=>$5, 'tot'=>0, 'num'=> 0, 'items'=>{},  };
0050         $obj = undef; $item = $1;
0051         #print STDERR "Got item $item\n";
0052   }
0053   next unless defined $item;
0054  #if (m/\*Br\s+\d+\s+:((\w+)_(\w+)_(\w*)_(\w+))\.obj\.(\S+) :/) {
0055   if (m/\*Br\s+\d+\s+:((\w+)_(\w+)_(\w*)_(\w+))\.(\S+) :/) {
0056         $obj = $6; $item = $1;
0057         #print STDERR "Got item $item, obj $obj\n";
0058         die "Product $1 not found" unless defined($survey{$1});
0059   }
0060   if ((m/\|\s+\w+\[\S+/) && ($survey{$item}->{'type'} ne 'edmTriggerResults')) { $arrays{$item} = 1;  }
0061   next unless defined $obj;
0062   if (m/Entries\s+:\s*(\d+)\s+:\s+Total\s+Size=\s+(\d+)\s+bytes\s+File\s+Size\s+=\s+(\d+)/) {
0063         die "Mismatching number of events ($events, $1) " unless (($events == 0) || ($events == $1));
0064         $events = $1;
0065         $survey{$item}->{'items'}->{$obj} = { 'siz'=>$3/1024.0, 'ok'=>1 };
0066         $survey{$item}->{'tot'} += $survey{$item}->{'items'}->{$obj}->{'siz'};
0067   } elsif (m/Entries\s+:\s*(\d+)\s+:\s+Total\s+Size=\s+(\d+)\s+bytes\s+One basket in memory/) {
0068         die "Mismatching number of events ($events, $1) " unless (($events == 0) || ($events == $1));
0069         $events = $1;
0070         $survey{$item}->{'items'}->{$obj} = { 'siz'=>$2/1024.0, 'ok'=>0 };
0071         $survey{$item}->{'tot'} += $survey{$item}->{'items'}->{$obj}->{'siz'};
0072   }
0073 }
0074 
0075 my ($grandtotal,$provenance) = (0,0);
0076 foreach (keys(%survey)) { $grandtotal += $survey{$_}->{'tot'}; }
0077 
0078 print STDERR "Events: $events\n";
0079 open $MACRO, "> $macrofile";
0080 print $MACRO "void $macroname() {\n";
0081 foreach my $coll (sort(keys(%arrays))) {
0082     print $MACRO "   Events->Draw(\"$coll.\@obj.size()>>htmp\");\n";
0083     print $MACRO "   if ( Events->GetSelectedRows()>0) {\n";
0084     print $MACRO "      std::cout << \"SIZE\t$coll\\t\" << (htmp->GetMean()*htmp->GetEntries()) << std::endl;\n";
0085     print $MACRO "      htmp->Delete();\n";
0086     print $MACRO "   } else {\n";
0087     print $MACRO "     Events->Draw(\"$coll.obj.\@obj.size()>>htmp\");\n";
0088     print $MACRO "     if ( Events->GetSelectedRows()>0) std::cout << \"SIZE\t$coll\\t\" << (htmp->GetMean()*htmp->GetEntries()) << std::endl;\n";
0089     print $MACRO "     else std::cout << \"SIZE\t$coll\\t\" << 0 << std::endl;\n";
0090     print $MACRO "   }\n";
0091 }
0092 print $MACRO "   std::cout << \"PROVENANCE\t\" << (EventMetaData->GetZipBytes()+EventHistory->GetZipBytes()) << std::endl;\n";
0093 print $MACRO "}\n";
0094 close $MACRO;
0095 
0096 print STDERR "Getting items in the collections (it can take a while) ...\n";
0097 
0098 my $root = qx(root.exe -b -l "$filename" -q $macrofile  2> /dev/null);
0099 my @lines = split('\n', $root);
0100 foreach (grep( /^SIZE\s+\S+\s+\S+/, @lines)) {
0101     my ($item, $total) = (m/SIZE\s+(\w+)\s+(\S+)/);
0102     $survey{$item}->{'num'} = $total;
0103 }
0104 foreach my $item (keys(%survey)) { $survey{$item}->{'num'} = $events if $survey{$item}->{'num'} == 0; }
0105 
0106 foreach (grep( /^PROVENANCE\s+(\S+)/, @lines)) { /^PROVENANCE\s+(\S+)/ and $provenance = $1/1024.0; }
0107 
0108 my $totalavg = sprintf("%.1f",$grandtotal/$events);
0109 print <<_END_;
0110 <html>
0111 <head>
0112     <title>$filename : PAT Size</title>
0113     <link rel="stylesheet" type="text/css" href="patsize.css" />
0114 </head>
0115 <h1>Summary ($totalavg kb/event)</h1>
0116 <table>
0117 _END_
0118 print "<tr class='header'><th>".join("</th><th>", "Collection", "items/event", "kb/event", "kb/item", "plot", "%") . "</th></tr>\n";
0119 foreach (sort({$survey{$b}->{'tot'} <=> $survey{$a}->{'tot'} }
0120               keys(%survey))) {
0121     print "<th><a href='#$_'>$_</a></th>";
0122     foreach my $val ($survey{$_}->{'num'}/$events, $survey{$_}->{'tot'}/$events, $survey{$_}->{'tot'}/$survey{$_}->{'num'}) {
0123         print sprintf("<td>%.2f</td>", $val);
0124     }
0125     print sprintf("<td class=\"img\"><img src='blue-dot.gif' width='\%d' height='\%d' /></td>",
0126                             $survey{$_}->{'tot'}/$grandtotal * 200, 10 );
0127     print sprintf("<td>%.1f%%</td>", $survey{$_}->{'tot'}/$grandtotal * 100.0);
0128     print "</tr>\n";
0129 }
0130 # provenance
0131 print "<th>EventMetaData + EventHistory</th>";
0132 foreach my $val (1, $provenance/$events, $provenance/$events) {
0133     print sprintf("<td>%.2f</td>", $val);
0134 }
0135 print sprintf("<td class=\"img\"><img src='red-dot.gif' width='\%d' height='\%d' /></td>",$provenance/$grandtotal * 200, 10 );
0136 print sprintf("<td>%.1f%%</td>", $provenance/$grandtotal * 100.0);
0137 print "</tr>\n";
0138 
0139 print <<_END_;
0140 </table>
0141 Note: size percentages are relative to the total size of data only, without the per-event provenance (EventMetaData + EventHistory).
0142 <h1>Detail</h1>
0143 _END_
0144 foreach (sort(keys(%survey))) {
0145     my $avg = sprintf("%.1f",$survey{$_}->{'num'}/$events);
0146     print <<_END_;
0147 <h2><a name="$_" id="$_">$_</a> ($avg items/event)</h2>
0148 <table>
0149 _END_
0150     print "<tr class='header'><th>".join("</th><th>", "Datamember", "kb/event", "kb/item", "plot", "%", "compressed") . "</th></tr>\n";
0151     foreach my $it (sort({$survey{$_}->{'items'}->{$b}->{'siz'} <=> $survey{$_}->{'items'}->{$a}->{'siz'}} 
0152                          keys(%{$survey{$_}->{'items'}}))) {
0153         print "<th>$it</th>";
0154         my $IT = $survey{$_}->{'items'}->{$it};
0155         foreach my $val ($IT->{'siz'}/$events, $IT->{'siz'}/$survey{$_}->{'num'}) {
0156             print sprintf("<td>%.3f</td>", $val);
0157         }
0158         print sprintf("<td class=\"img\"><img src='\%s-dot.gif' width='\%d' height='\%d' /></td>",
0159                                 ($IT->{'ok'} ? 'blue' : 'red'), $IT->{'siz'}/$survey{$_}->{'tot'} * 200, 10 );
0160         print sprintf("<td>%.1f%%</td>", $IT->{'siz'}/$survey{$_}->{'tot'} * 100.0);
0161         print "<td>". ($IT->{'ok'} ? 'ok' : 'no') . "</td>";
0162         print "</tr>\n";
0163     }
0164     print <<_END_;
0165 </table>
0166 _END_
0167 }
0168 print <<_END_;
0169 </body></html>
0170 _END_
0171 close;