File indexing completed on 2024-04-06 12:18:46
0001 from __future__ import print_function
0002 from HTMLParser import HTMLParser
0003 from urllib2 import urlopen
0004 import cPickle as pickle
0005 import sys
0006 import re
0007 locatestarttagend = re.compile(r"""
0008 <[a-zA-Z][-.a-zA-Z0-9:_]* # tag name
0009 (?:\s+ # whitespace before attribute name
0010 (?:[a-zA-Z_][-.:a-zA-Z0-9_]* # attribute name
0011 (?:\s*=\s* # value indicator
0012 (?:'[^']*' # LITA-enclosed value
0013 |\"[^\"]*\" # LIT-enclosed value
0014 |this.src='[^']*' # hack
0015 |[^'\">\s]+ # bare value
0016 )
0017 )?
0018 )
0019 )*
0020 \s* # trailing whitespace
0021 """, re.VERBOSE)
0022
0023 tagfind = re.compile('[a-zA-Z][-.a-zA-Z0-9:_]*')
0024 attrfind = re.compile(
0025 r'\s*([a-zA-Z_][-.:a-zA-Z_0-9]*)(\s*=\s*'
0026 r'(\'[^\']*\'|"[^"]*"|[-a-zA-Z0-9./,:;+*%?!&$\(\)_#=~@]*))?')
0027
0028 class Page1Parser(HTMLParser):
0029
0030
0031 def __init__(self):
0032 HTMLParser.__init__(self)
0033
0034 self.InRow=0
0035 self.InEntry=0
0036 self.table = []
0037 self.tmpRow = []
0038 self.hyperlinks = []
0039 self.RunNumber = 0
0040 self.TriggerRates = []
0041 self.Nevts = []
0042 self.LumiByLS = []
0043 self.FirstLS = -1
0044 self.AvLumi = []
0045 self.PrescaleColumn=[]
0046 self.L1PrescaleTable=[]
0047 self.HLTPrescaleTable=[]
0048 self.TotalPrescaleTable=[]
0049 self.ColumnLumi=[]
0050 self.L1Prescales=[]
0051 self.RunPage = ''
0052 self.RatePage = ''
0053 self.LumiPage = ''
0054 self.L1Page=''
0055 self.TrigModePage=''
0056 self.SeedMap=[]
0057
0058 def parse_starttag(self, i):
0059 self.__starttag_text = None
0060 endpos = self.check_for_whole_start_tag(i)
0061 if endpos < 0:
0062 return endpos
0063 rawdata = self.rawdata
0064 self.__starttag_text = rawdata[i:endpos]
0065
0066
0067 attrs = []
0068 match = tagfind.match(rawdata, i+1)
0069 assert match, 'unexpected call to parse_starttag()'
0070 k = match.end()
0071 self.lasttag = tag = rawdata[i+1:k].lower()
0072
0073 if tag == 'img':
0074 return endpos
0075
0076 while k < endpos:
0077 m = attrfind.match(rawdata, k)
0078 if not m:
0079 break
0080 attrname, rest, attrvalue = m.group(1, 2, 3)
0081 if not rest:
0082 attrvalue = None
0083 elif attrvalue[:1] == '\'' == attrvalue[-1:] or \
0084 attrvalue[:1] == '"' == attrvalue[-1:]:
0085 attrvalue = attrvalue[1:-1]
0086 attrvalue = self.unescape(attrvalue)
0087 attrs.append((attrname.lower(), attrvalue))
0088 k = m.end()
0089
0090 end = rawdata[k:endpos].strip()
0091 if end not in (">", "/>"):
0092 lineno, offset = self.getpos()
0093 if "\n" in self.__starttag_text:
0094 lineno = lineno + self.__starttag_text.count("\n")
0095 offset = len(self.__starttag_text) \
0096 - self.__starttag_text.rfind("\n")
0097 else:
0098 offset = offset + len(self.__starttag_text)
0099 self.error("junk characters in start tag: %r"
0100 % (rawdata[k:endpos][:20],))
0101 if end.endswith('/>'):
0102
0103 self.handle_startendtag(tag, attrs)
0104 else:
0105 self.handle_starttag(tag, attrs)
0106 if tag in self.CDATA_CONTENT_ELEMENTS:
0107 self.set_cdata_mode()
0108 return endpos
0109
0110 def check_for_whole_start_tag(self, i):
0111 rawdata = self.rawdata
0112 m = locatestarttagend.match(rawdata, i)
0113 if m:
0114 j = m.end()
0115 next = rawdata[j:j+1]
0116
0117
0118
0119
0120
0121 if next == ">":
0122 return j + 1
0123 if next == "/":
0124 if rawdata.startswith("/>", j):
0125 return j + 2
0126 if rawdata.startswith("/", j):
0127
0128 return -1
0129
0130 self.updatepos(i, j + 1)
0131 self.error("malformed empty start tag")
0132 if next == "":
0133
0134 return -1
0135 if next in ("abcdefghijklmnopqrstuvwxyz=/"
0136 "ABCDEFGHIJKLMNOPQRSTUVWXYZ"):
0137
0138
0139 return -1
0140 self.updatepos(i, j)
0141 self.error("malformed start tag")
0142 raise AssertionError("we should not get here!")
0143
0144 def _Parse(self,url):
0145 self.table = []
0146 self.hyperlinks = []
0147 req = urlopen(url)
0148 try:
0149 self.feed(req.read())
0150 except Exception as inst:
0151 print(inst)
0152
0153 def handle_starttag(self,tag,attrs):
0154 ValidTags = ['a','tr','td']
0155 try:
0156 if not tag in ValidTags:
0157 return
0158 tag.replace('%','')
0159 tag.replace('?','')
0160 if tag == 'a' and attrs:
0161 self.hyperlinks.append(attrs[0][1])
0162 elif tag == 'tr':
0163 self.InRow=1
0164 elif tag == 'td':
0165 self.InEntry=1
0166 except:
0167 print(tag)
0168 print(attrs)
0169
0170 def handle_endtag(self,tag):
0171 if tag =='tr':
0172 if self.InRow==1:
0173 self.InRow=0
0174 self.table.append(self.tmpRow)
0175 self.tmpRow=[]
0176 if tag == 'td':
0177 self.InEntry=0
0178
0179 def handle_startendtag(self,tag, attrs):
0180 pass
0181
0182 def handle_data(self,data):
0183 if self.InEntry:
0184 self.tmpRow.append(data)
0185
0186
0187 def ParsePage1(self):
0188
0189 MostRecent = self.table[0]
0190 for line in self.table:
0191 if line == []:
0192 continue
0193 MostRecent = line
0194 break
0195 TriggerMode = MostRecent[3]
0196 isCollisions = not (TriggerMode.find('l1_hlt_collisions') == -1)
0197 if not isCollisions:
0198 return ''
0199 self.RunNumber = MostRecent[0]
0200 for link in self.hyperlinks:
0201 if not link.find('RUN='+self.RunNumber)==-1:
0202 self.RunPage = link
0203 return link
0204
0205
0206 def ParseRunPage(self):
0207 for entry in self.hyperlinks:
0208 entry = entry.replace('../../','http://cmswbm/')
0209 if not entry.find('HLTSummary') == -1:
0210 self.RatePage = entry
0211 if not entry.find('L1Summary') == -1:
0212 self.L1Page = entry
0213 if not entry.find('LumiSections') == -1:
0214 self.LumiPage = "http://cmswbm/cmsdb/servlet/"+entry
0215 if not entry.find('TriggerMode') == -1:
0216 if not entry.startswith("http://cmswbm/cmsdb/servlet/"):
0217 entry = "http://cmswbm/cmsdb/servlet/"+entry
0218 self.TrigModePage = entry
0219 return [self.RatePage,self.LumiPage,self.L1Page,self.TrigModePage]
0220
0221 def ParseRunSummaryPage(self):
0222 for line in self.table:
0223 if not len(line)>6:
0224 continue
0225 if line[1].startswith('HLT_'):
0226 TriggerName = line[1][:line[1].find(' ')]
0227 TriggerRate = float(line[6].replace(',',''))
0228 self.Nevts.append([TriggerName,int(line[3]),int(line[4]),int(line[5]),line[9]])
0229 PS=0
0230 if int(line[4])>0:
0231 PS = float(line[3])/float(line[4])
0232 self.TriggerRates.append([TriggerName,TriggerRate,PS,line[9]])
0233
0234 def ParseLumiPage(self):
0235 for line in self.table[1:]:
0236 if len(line)<4 or len(line)>12:
0237 continue
0238 self.PrescaleColumn.append(int(line[2]))
0239 self.LumiByLS.append(float(line[4]))
0240 if self.FirstLS == -1 and float(line[6]) > 0:
0241 self.FirstLS = int(line[0])
0242 self.RatePage = self.RatePage.replace('HLTSummary?','HLTSummary?fromLS='+line[0]+'&toLS=&')
0243 try:
0244 self.AvLumi = sum(self.LumiByLS[self.FirstLS:])/len(self.LumiByLS[self.FirstLS:])
0245 except ZeroDivisionError:
0246 print("Cannot calculate average lumi -- something is wrong!")
0247 print(self.table[:10])
0248 raise
0249
0250 def ParseL1Page(self):
0251 for line in self.table:
0252 print(line)
0253 if len(line) < 9:
0254 continue
0255 if line[1].startswith('L1_'):
0256 pass
0257
0258 def ParseTrigModePage(self):
0259 ColIndex=0
0260 for line in self.table:
0261 if len(line) < 2:
0262 continue
0263
0264 if line[0].isdigit() and len(line)>=3:
0265 if int(line[0])==ColIndex:
0266 ColIndex+=1
0267 StrLumiSplit = line[2].split('E')
0268 if len(StrLumiSplit)!=2:
0269 ColIndex=-99999999
0270 else:
0271 lumi = float(StrLumiSplit[0])
0272 lumi*= pow(10,int(StrLumiSplit[1])-30)
0273 self.ColumnLumi.append(round(lumi,1))
0274
0275
0276
0277 if line[1].startswith('L1_') or line[1].startswith('HLT_'):
0278 tmp=[]
0279 seedtmp=[]
0280 tmp.append(line[1])
0281 seedtmp.append(line[1])
0282 for entry in line[2:]:
0283 if entry.isdigit():
0284 tmp.append(entry)
0285 if entry.startswith('L1_'):
0286 seedtmp.append(entry)
0287
0288 del tmp[len(self.ColumnLumi)+1:]
0289
0290 if line[1].startswith('L1_'):
0291 self.L1PrescaleTable.append(tmp)
0292 else:
0293 self.HLTPrescaleTable.append(tmp)
0294 if len(seedtmp)==2:
0295 self.SeedMap.append(seedtmp)
0296 if len(self.PrescaleColumn)==0:
0297 continue
0298 for L1Row in self.L1PrescaleTable:
0299 thisAvPS=0
0300 nLS=0
0301 for prescaleThisLS in self.PrescaleColumn[self.FirstLS:]:
0302 thisAvPS+=float(L1Row[prescaleThisLS+1])
0303 nLS+=1
0304 thisAvPS/=nLS
0305 self.L1Prescales.append([L1Row[0],thisAvPS])
0306
0307 def ComputeTotalPrescales(self):
0308 if len(self.L1PrescaleTable)==0 or len(self.HLTPrescaleTable)==0 or len(self.SeedMap)==0:
0309 return
0310
0311 for hltLine in self.HLTPrescaleTable:
0312 totalLine=[]
0313 hltName = hltLine[0]
0314 l1Name = ""
0315
0316 for hlt,l1 in self.SeedMap:
0317 if hltName==hlt:
0318 l1Name=l1
0319 break
0320
0321 if l1Name == "":
0322 totalLine = [hltName]+[l1Name]+[-3]*(len(hltLine)-1)
0323 else:
0324
0325 l1Line=[]
0326 if not l1Name.find(' OR ')==-1:
0327 l1Line = [l1Name]+[1]*(len(hltLine)-1)
0328 else:
0329 for thisl1Line in self.L1PrescaleTable:
0330 if thisl1Line[0] == l1Name:
0331 l1Line=thisl1Line
0332 break
0333 if len(l1Line)==0:
0334 totalLine = [hltName]+[l1Name]+[-4]*(len(hltLine)-1)
0335 else:
0336 totalLine = [hltName,l1Name]
0337 for hltPS,l1PS in zip(hltLine[1:],l1Line[1:]):
0338 try:
0339 totalLine.append( int(hltPS)*int(l1PS) )
0340 except:
0341 print(hltPS)
0342 print(l1PS)
0343 raise
0344 self.TotalPrescaleTable.append(totalLine)
0345
0346
0347 def Save(self, fileName):
0348 pickle.dump( self, open( fileName, 'w' ) )
0349
0350 def Load(self, fileName):
0351 self = pickle.load( open( fileName ) )
0352
0353 def ComputePU(nBunches):
0354 ScaleFactor = 71e-27/11.2e3/nBunches
0355 out = []
0356 for l in self.LumiByLS:
0357 out.append(l*ScaleFactor)
0358 return l
0359