Back to home page

Project CMSSW displayed by LXR

 
 

    


File indexing completed on 2024-04-06 12:18:46

0001 from __future__ import print_function
0002 from HTMLParser import HTMLParser
0003 from urllib2 import urlopen
0004 import cPickle as pickle
0005 import sys
0006 import re
0007 locatestarttagend = re.compile(r"""
0008         <[a-zA-Z][-.a-zA-Z0-9:_]*          # tag name
0009         (?:\s+                             # whitespace before attribute name
0010         (?:[a-zA-Z_][-.:a-zA-Z0-9_]*     # attribute name
0011         (?:\s*=\s*                     # value indicator
0012         (?:'[^']*'                   # LITA-enclosed value
0013         |\"[^\"]*\"                # LIT-enclosed value
0014         |this.src='[^']*'          # hack
0015         |[^'\">\s]+                # bare value
0016         )
0017         )?
0018         )
0019         )*
0020         \s*                                # trailing whitespace
0021         """, re.VERBOSE)
0022 
0023 tagfind = re.compile('[a-zA-Z][-.a-zA-Z0-9:_]*')
0024 attrfind = re.compile(
0025     r'\s*([a-zA-Z_][-.:a-zA-Z_0-9]*)(\s*=\s*'
0026     r'(\'[^\']*\'|"[^"]*"|[-a-zA-Z0-9./,:;+*%?!&$\(\)_#=~@]*))?')
0027 
0028 class Page1Parser(HTMLParser):
0029 
0030 
0031     def __init__(self):
0032         HTMLParser.__init__(self)
0033         
0034         self.InRow=0
0035         self.InEntry=0
0036         self.table =  []
0037         self.tmpRow = []
0038         self.hyperlinks = []
0039         self.RunNumber = 0
0040         self.TriggerRates = []
0041         self.Nevts = []
0042         self.LumiByLS = []
0043         self.FirstLS = -1
0044         self.AvLumi = []
0045         self.PrescaleColumn=[]
0046         self.L1PrescaleTable=[]
0047         self.HLTPrescaleTable=[]
0048         self.TotalPrescaleTable=[]
0049         self.ColumnLumi=[]
0050         self.L1Prescales=[]
0051         self.RunPage = ''
0052         self.RatePage = ''
0053         self.LumiPage = ''
0054         self.L1Page=''
0055         self.TrigModePage=''
0056         self.SeedMap=[]
0057 
0058     def parse_starttag(self, i):
0059         self.__starttag_text = None
0060         endpos = self.check_for_whole_start_tag(i)
0061         if endpos < 0:
0062             return endpos
0063         rawdata = self.rawdata
0064         self.__starttag_text = rawdata[i:endpos]
0065 
0066         # Now parse the data between i+1 and j into a tag and attrs
0067         attrs = []
0068         match = tagfind.match(rawdata, i+1)
0069         assert match, 'unexpected call to parse_starttag()'
0070         k = match.end()
0071         self.lasttag = tag = rawdata[i+1:k].lower()
0072 
0073         if tag == 'img':
0074             return endpos
0075 
0076         while k < endpos:
0077             m = attrfind.match(rawdata, k)
0078             if not m:
0079                 break
0080             attrname, rest, attrvalue = m.group(1, 2, 3)
0081             if not rest:
0082                 attrvalue = None
0083             elif attrvalue[:1] == '\'' == attrvalue[-1:] or \
0084                  attrvalue[:1] == '"' == attrvalue[-1:]:
0085                 attrvalue = attrvalue[1:-1]
0086                 attrvalue = self.unescape(attrvalue)
0087             attrs.append((attrname.lower(), attrvalue))
0088             k = m.end()
0089 
0090         end = rawdata[k:endpos].strip()
0091         if end not in (">", "/>"):
0092             lineno, offset = self.getpos()
0093             if "\n" in self.__starttag_text:
0094                 lineno = lineno + self.__starttag_text.count("\n")
0095                 offset = len(self.__starttag_text) \
0096                          - self.__starttag_text.rfind("\n")
0097             else:
0098                 offset = offset + len(self.__starttag_text)
0099             self.error("junk characters in start tag: %r"
0100                        % (rawdata[k:endpos][:20],))
0101         if end.endswith('/>'):
0102             # XHTML-style empty tag: <span attr="value" />
0103             self.handle_startendtag(tag, attrs)
0104         else:
0105             self.handle_starttag(tag, attrs)
0106             if tag in self.CDATA_CONTENT_ELEMENTS:
0107                 self.set_cdata_mode()
0108         return endpos
0109 
0110     def check_for_whole_start_tag(self, i):
0111         rawdata = self.rawdata
0112         m = locatestarttagend.match(rawdata, i)
0113         if m:
0114             j = m.end()
0115             next = rawdata[j:j+1]
0116             #print next
0117             #if next == "'":
0118             #    j = rawdata.find(".jpg'",j)
0119             #    j = rawdata.find(".jpg'",j+1)
0120             #    next = rawdata[j:j+1]
0121             if next == ">":
0122                 return j + 1
0123             if next == "/":
0124                 if rawdata.startswith("/>", j):
0125                     return j + 2
0126                 if rawdata.startswith("/", j):
0127                     # buffer boundary
0128                     return -1
0129                 # else bogus input
0130             self.updatepos(i, j + 1)
0131             self.error("malformed empty start tag")
0132             if next == "":
0133                 # end of input
0134                 return -1
0135             if next in ("abcdefghijklmnopqrstuvwxyz=/"
0136                         "ABCDEFGHIJKLMNOPQRSTUVWXYZ"):
0137                 # end of input in or before attribute value, or we have the
0138                 # '/' from a '/>' ending
0139                 return -1
0140             self.updatepos(i, j)
0141             self.error("malformed start tag")
0142         raise AssertionError("we should not get here!")
0143         
0144     def _Parse(self,url):
0145         self.table = []
0146         self.hyperlinks = []
0147         req = urlopen(url)
0148         try:
0149             self.feed(req.read())
0150         except Exception as inst:
0151             print(inst)
0152 
0153     def handle_starttag(self,tag,attrs):
0154         ValidTags = ['a','tr','td']
0155         try:
0156             if not tag in ValidTags:
0157                 return
0158             tag.replace('%','')
0159             tag.replace('?','')
0160             if tag == 'a' and attrs:
0161                 self.hyperlinks.append(attrs[0][1])
0162             elif tag == 'tr':
0163                 self.InRow=1
0164             elif tag == 'td':
0165                 self.InEntry=1
0166         except:
0167             print(tag)
0168             print(attrs)
0169         
0170     def handle_endtag(self,tag):
0171         if tag =='tr':
0172             if self.InRow==1:
0173                 self.InRow=0
0174                 self.table.append(self.tmpRow)
0175                 self.tmpRow=[]
0176         if tag == 'td':
0177             self.InEntry=0
0178 
0179     def handle_startendtag(self,tag, attrs):
0180         pass
0181 
0182     def handle_data(self,data):
0183         if self.InEntry:
0184             self.tmpRow.append(data)
0185 
0186 
0187     def ParsePage1(self):
0188         # Find the first non-empty row on page one
0189         MostRecent = self.table[0]
0190         for line in self.table:
0191             if line == []:
0192                 continue # skip empty rows, not exactly sure why they show up
0193             MostRecent = line
0194             break # find first non-empty line
0195         TriggerMode = MostRecent[3]
0196         isCollisions = not (TriggerMode.find('l1_hlt_collisions') == -1)
0197         if not isCollisions:
0198             return ''
0199         self.RunNumber = MostRecent[0]
0200         for link in self.hyperlinks:
0201             if not link.find('RUN='+self.RunNumber)==-1:
0202                 self.RunPage = link
0203                 return link
0204         
0205         
0206     def ParseRunPage(self):
0207         for entry in self.hyperlinks:
0208             entry = entry.replace('../../','http://cmswbm/')
0209             if not entry.find('HLTSummary') == -1:
0210                 self.RatePage = entry
0211             if not entry.find('L1Summary') == -1:
0212                 self.L1Page = entry
0213             if not entry.find('LumiSections') == -1:
0214                 self.LumiPage = "http://cmswbm/cmsdb/servlet/"+entry
0215             if not entry.find('TriggerMode') == -1:
0216                 if not entry.startswith("http://cmswbm/cmsdb/servlet/"):
0217                     entry = "http://cmswbm/cmsdb/servlet/"+entry
0218                 self.TrigModePage = entry
0219         return [self.RatePage,self.LumiPage,self.L1Page,self.TrigModePage]
0220 
0221     def ParseRunSummaryPage(self):
0222         for line in self.table:
0223             if not len(line)>6:  # All relevant lines in the table will be at least this long
0224                 continue
0225             if line[1].startswith('HLT_'):
0226                 TriggerName = line[1][:line[1].find(' ')] # Format is HLT_... (####), this gets rid of the (####)
0227                 TriggerRate = float(line[6].replace(',','')) # Need to remove the ","s, since float() can't parse them
0228                 self.Nevts.append([TriggerName,int(line[3]),int(line[4]),int(line[5]),line[9]]) # 3-5 are the accept columns, 9 is the L1 seed name
0229                 PS=0
0230                 if int(line[4])>0:
0231                     PS = float(line[3])/float(line[4])
0232                 self.TriggerRates.append([TriggerName,TriggerRate,PS,line[9]])
0233 
0234     def ParseLumiPage(self):
0235         for line in self.table[1:]:
0236             if len(line)<4 or len(line)>12:
0237                 continue
0238             self.PrescaleColumn.append(int(line[2]))
0239             self.LumiByLS.append(float(line[4]))  # Inst lumi is in position 4
0240             if self.FirstLS == -1 and float(line[6]) > 0:  # live lumi is in position 5, the first lumiblock with this > 0 should be recorded
0241                 self.FirstLS = int(line[0])
0242                 self.RatePage = self.RatePage.replace('HLTSummary?','HLTSummary?fromLS='+line[0]+'&toLS=&')
0243         try:
0244             self.AvLumi = sum(self.LumiByLS[self.FirstLS:])/len(self.LumiByLS[self.FirstLS:])
0245         except ZeroDivisionError:
0246             print("Cannot calculate average lumi -- something is wrong!")
0247             print(self.table[:10])
0248             raise
0249 
0250     def ParseL1Page(self):
0251         for line in self.table:
0252             print(line)
0253             if len(line) < 9:
0254                 continue
0255             if line[1].startswith('L1_'):
0256                 pass
0257 
0258     def ParseTrigModePage(self):
0259         ColIndex=0 ## This is the index of the next column that we look for
0260         for line in self.table:
0261             if len(line) < 2:
0262                 continue
0263             ## get the column usage
0264             if line[0].isdigit() and len(line)>=3:
0265                 if int(line[0])==ColIndex:
0266                     ColIndex+=1
0267                     StrLumiSplit = line[2].split('E')
0268                     if len(StrLumiSplit)!=2:
0269                         ColIndex=-99999999
0270                     else:
0271                         lumi = float(StrLumiSplit[0])
0272                         lumi*= pow(10,int(StrLumiSplit[1])-30)
0273                         self.ColumnLumi.append(round(lumi,1))
0274                     
0275 
0276             ## Get the actual prescale tables
0277             if line[1].startswith('L1_') or line[1].startswith('HLT_'):
0278                 tmp=[]
0279                 seedtmp=[]
0280                 tmp.append(line[1])
0281                 seedtmp.append(line[1])
0282                 for entry in line[2:]:
0283                     if entry.isdigit():
0284                         tmp.append(entry)
0285                     if entry.startswith('L1_'):
0286                         seedtmp.append(entry)
0287 
0288                 del tmp[len(self.ColumnLumi)+1:]  ## Truncate the list (TT seeds look like prescale entries)
0289 
0290                 if line[1].startswith('L1_'):
0291                     self.L1PrescaleTable.append(tmp)
0292                 else:
0293                     self.HLTPrescaleTable.append(tmp)                    
0294                     if len(seedtmp)==2:
0295                         self.SeedMap.append(seedtmp)
0296             if len(self.PrescaleColumn)==0:
0297                 continue
0298             for L1Row in self.L1PrescaleTable: 
0299                 thisAvPS=0
0300                 nLS=0
0301                 for prescaleThisLS in self.PrescaleColumn[self.FirstLS:]:
0302                     thisAvPS+=float(L1Row[prescaleThisLS+1])
0303                     nLS+=1
0304                 thisAvPS/=nLS
0305                 self.L1Prescales.append([L1Row[0],thisAvPS])
0306 
0307     def ComputeTotalPrescales(self):
0308         if len(self.L1PrescaleTable)==0 or len(self.HLTPrescaleTable)==0 or len(self.SeedMap)==0:
0309             return
0310 
0311         for hltLine in self.HLTPrescaleTable:
0312             totalLine=[]
0313             hltName = hltLine[0]
0314             l1Name = ""
0315             # figure out the l1 Seed
0316             for hlt,l1 in self.SeedMap:
0317                 if hltName==hlt:
0318                     l1Name=l1
0319                     break
0320 
0321             if l1Name == "":
0322                 totalLine = [hltName]+[l1Name]+[-3]*(len(hltLine)-1)  ## couldn't figure out the L1 seed (error -3)
0323             else:
0324                 ## Get the L1 Prescales
0325                 l1Line=[]
0326                 if not l1Name.find(' OR ')==-1:  ## contains ORs, don't parse for the moment
0327                     l1Line = [l1Name]+[1]*(len(hltLine)-1)  ## couldn't parse the ORs !! FOR NOW WE JUST SET THE L1 PRESCALE TO 1
0328                 else:
0329                     for thisl1Line in self.L1PrescaleTable:
0330                         if thisl1Line[0] == l1Name:
0331                             l1Line=thisl1Line
0332                             break
0333                 if len(l1Line)==0:
0334                     totalLine = [hltName]+[l1Name]+[-4]*(len(hltLine)-1)  ## we found the L1 name, but there was no prescale info for it (error -4)
0335                 else:
0336                     totalLine = [hltName,l1Name]
0337                     for hltPS,l1PS in zip(hltLine[1:],l1Line[1:]):
0338                         try:
0339                             totalLine.append( int(hltPS)*int(l1PS) )
0340                         except:
0341                             print(hltPS)
0342                             print(l1PS)
0343                             raise
0344             self.TotalPrescaleTable.append(totalLine)
0345                     
0346         
0347     def Save(self, fileName):
0348         pickle.dump( self, open( fileName, 'w' ) )
0349 
0350     def Load(self, fileName):
0351         self = pickle.load( open( fileName ) )
0352 
0353     def ComputePU(nBunches):
0354         ScaleFactor = 71e-27/11.2e3/nBunches
0355         out = []
0356         for l in self.LumiByLS:
0357             out.append(l*ScaleFactor)
0358         return l
0359