File indexing completed on 2024-11-26 02:34:21
0001 from HTMLParser import HTMLParser
0002 from urllib2 import urlopen
0003 import cPickle as pickle
0004 import sys
0005 import re
0006 locatestarttagend = re.compile(r"""
0007 <[a-zA-Z][-.a-zA-Z0-9:_]* # tag name
0008 (?:\s+ # whitespace before attribute name
0009 (?:[a-zA-Z_][-.:a-zA-Z0-9_]* # attribute name
0010 (?:\s*=\s* # value indicator
0011 (?:'[^']*' # LITA-enclosed value
0012 |\"[^\"]*\" # LIT-enclosed value
0013 |this.src='[^']*' # hack
0014 |[^'\">\s]+ # bare value
0015 )
0016 )?
0017 )
0018 )*
0019 \s* # trailing whitespace
0020 """, re.VERBOSE)
0021
0022 tagfind = re.compile('[a-zA-Z][-.a-zA-Z0-9:_]*')
0023 attrfind = re.compile(
0024 r'\s*([a-zA-Z_][-.:a-zA-Z_0-9]*)(\s*=\s*'
0025 r'(\'[^\']*\'|"[^"]*"|[-a-zA-Z0-9./,:;+*%?!&$\(\)_#=~@]*))?')
0026
0027 class Page1Parser(HTMLParser):
0028
0029
0030 def __init__(self):
0031 HTMLParser.__init__(self)
0032
0033 self.InRow=0
0034 self.InEntry=0
0035 self.table = []
0036 self.tmpRow = []
0037 self.hyperlinks = []
0038 self.RunNumber = 0
0039 self.TriggerRates = []
0040 self.Nevts = []
0041 self.LumiByLS = []
0042 self.FirstLS = -1
0043 self.AvLumi = []
0044 self.PrescaleColumn=[]
0045 self.L1PrescaleTable=[]
0046 self.HLTPrescaleTable=[]
0047 self.TotalPrescaleTable=[]
0048 self.ColumnLumi=[]
0049 self.L1Prescales=[]
0050 self.RunPage = ''
0051 self.RatePage = ''
0052 self.LumiPage = ''
0053 self.L1Page=''
0054 self.TrigModePage=''
0055 self.SeedMap=[]
0056
0057 def parse_starttag(self, i):
0058 self.__starttag_text = None
0059 endpos = self.check_for_whole_start_tag(i)
0060 if endpos < 0:
0061 return endpos
0062 rawdata = self.rawdata
0063 self.__starttag_text = rawdata[i:endpos]
0064
0065
0066 attrs = []
0067 match = tagfind.match(rawdata, i+1)
0068 assert match, 'unexpected call to parse_starttag()'
0069 k = match.end()
0070 self.lasttag = tag = rawdata[i+1:k].lower()
0071
0072 if tag == 'img':
0073 return endpos
0074
0075 while k < endpos:
0076 m = attrfind.match(rawdata, k)
0077 if not m:
0078 break
0079 attrname, rest, attrvalue = m.group(1, 2, 3)
0080 if not rest:
0081 attrvalue = None
0082 elif attrvalue[:1] == '\'' == attrvalue[-1:] or \
0083 attrvalue[:1] == '"' == attrvalue[-1:]:
0084 attrvalue = attrvalue[1:-1]
0085 attrvalue = self.unescape(attrvalue)
0086 attrs.append((attrname.lower(), attrvalue))
0087 k = m.end()
0088
0089 end = rawdata[k:endpos].strip()
0090 if end not in (">", "/>"):
0091 lineno, offset = self.getpos()
0092 if "\n" in self.__starttag_text:
0093 lineno = lineno + self.__starttag_text.count("\n")
0094 offset = len(self.__starttag_text) \
0095 - self.__starttag_text.rfind("\n")
0096 else:
0097 offset = offset + len(self.__starttag_text)
0098 self.error("junk characters in start tag: %r"
0099 % (rawdata[k:endpos][:20],))
0100 if end.endswith('/>'):
0101
0102 self.handle_startendtag(tag, attrs)
0103 else:
0104 self.handle_starttag(tag, attrs)
0105 if tag in self.CDATA_CONTENT_ELEMENTS:
0106 self.set_cdata_mode()
0107 return endpos
0108
0109 def check_for_whole_start_tag(self, i):
0110 rawdata = self.rawdata
0111 m = locatestarttagend.match(rawdata, i)
0112 if m:
0113 j = m.end()
0114 next = rawdata[j:j+1]
0115
0116
0117
0118
0119
0120 if next == ">":
0121 return j + 1
0122 if next == "/":
0123 if rawdata.startswith("/>", j):
0124 return j + 2
0125 if rawdata.startswith("/", j):
0126
0127 return -1
0128
0129 self.updatepos(i, j + 1)
0130 self.error("malformed empty start tag")
0131 if next == "":
0132
0133 return -1
0134 if next in ("abcdefghijklmnopqrstuvwxyz=/"
0135 "ABCDEFGHIJKLMNOPQRSTUVWXYZ"):
0136
0137
0138 return -1
0139 self.updatepos(i, j)
0140 self.error("malformed start tag")
0141 raise AssertionError("we should not get here!")
0142
0143 def _Parse(self,url):
0144 self.table = []
0145 self.hyperlinks = []
0146 req = urlopen(url)
0147 try:
0148 self.feed(req.read())
0149 except Exception as inst:
0150 print(inst)
0151
0152 def handle_starttag(self,tag,attrs):
0153 ValidTags = ['a','tr','td']
0154 try:
0155 if not tag in ValidTags:
0156 return
0157 tag.replace('%','')
0158 tag.replace('?','')
0159 if tag == 'a' and attrs:
0160 self.hyperlinks.append(attrs[0][1])
0161 elif tag == 'tr':
0162 self.InRow=1
0163 elif tag == 'td':
0164 self.InEntry=1
0165 except:
0166 print(tag)
0167 print(attrs)
0168
0169 def handle_endtag(self,tag):
0170 if tag =='tr':
0171 if self.InRow==1:
0172 self.InRow=0
0173 self.table.append(self.tmpRow)
0174 self.tmpRow=[]
0175 if tag == 'td':
0176 self.InEntry=0
0177
0178 def handle_startendtag(self,tag, attrs):
0179 pass
0180
0181 def handle_data(self,data):
0182 if self.InEntry:
0183 self.tmpRow.append(data)
0184
0185
0186 def ParsePage1(self):
0187
0188 MostRecent = self.table[0]
0189 for line in self.table:
0190 if line == []:
0191 continue
0192 MostRecent = line
0193 break
0194 TriggerMode = MostRecent[3]
0195 isCollisions = not (TriggerMode.find('l1_hlt_collisions') == -1)
0196 if not isCollisions:
0197 return ''
0198 self.RunNumber = MostRecent[0]
0199 for link in self.hyperlinks:
0200 if not link.find('RUN='+self.RunNumber)==-1:
0201 self.RunPage = link
0202 return link
0203
0204
0205 def ParseRunPage(self):
0206 for entry in self.hyperlinks:
0207 entry = entry.replace('../../','http://cmswbm/')
0208 if not entry.find('HLTSummary') == -1:
0209 self.RatePage = entry
0210 if not entry.find('L1Summary') == -1:
0211 self.L1Page = entry
0212 if not entry.find('LumiSections') == -1:
0213 self.LumiPage = "http://cmswbm/cmsdb/servlet/"+entry
0214 if not entry.find('TriggerMode') == -1:
0215 if not entry.startswith("http://cmswbm/cmsdb/servlet/"):
0216 entry = "http://cmswbm/cmsdb/servlet/"+entry
0217 self.TrigModePage = entry
0218 return [self.RatePage,self.LumiPage,self.L1Page,self.TrigModePage]
0219
0220 def ParseRunSummaryPage(self):
0221 for line in self.table:
0222 if not len(line)>6:
0223 continue
0224 if line[1].startswith('HLT_'):
0225 TriggerName = line[1][:line[1].find(' ')]
0226 TriggerRate = float(line[6].replace(',',''))
0227 self.Nevts.append([TriggerName,int(line[3]),int(line[4]),int(line[5]),line[9]])
0228 PS=0
0229 if int(line[4])>0:
0230 PS = float(line[3])/float(line[4])
0231 self.TriggerRates.append([TriggerName,TriggerRate,PS,line[9]])
0232
0233 def ParseLumiPage(self):
0234 for line in self.table[1:]:
0235 if len(line)<4 or len(line)>12:
0236 continue
0237 self.PrescaleColumn.append(int(line[2]))
0238 self.LumiByLS.append(float(line[4]))
0239 if self.FirstLS == -1 and float(line[6]) > 0:
0240 self.FirstLS = int(line[0])
0241 self.RatePage = self.RatePage.replace('HLTSummary?','HLTSummary?fromLS='+line[0]+'&toLS=&')
0242 try:
0243 self.AvLumi = sum(self.LumiByLS[self.FirstLS:])/len(self.LumiByLS[self.FirstLS:])
0244 except ZeroDivisionError:
0245 print("Cannot calculate average lumi -- something is wrong!")
0246 print(self.table[:10])
0247 raise
0248
0249 def ParseL1Page(self):
0250 for line in self.table:
0251 print(line)
0252 if len(line) < 9:
0253 continue
0254 if line[1].startswith('L1_'):
0255 pass
0256
0257 def ParseTrigModePage(self):
0258 ColIndex=0
0259 for line in self.table:
0260 if len(line) < 2:
0261 continue
0262
0263 if line[0].isdigit() and len(line)>=3:
0264 if int(line[0])==ColIndex:
0265 ColIndex+=1
0266 StrLumiSplit = line[2].split('E')
0267 if len(StrLumiSplit)!=2:
0268 ColIndex=-99999999
0269 else:
0270 lumi = float(StrLumiSplit[0])
0271 lumi*= pow(10,int(StrLumiSplit[1])-30)
0272 self.ColumnLumi.append(round(lumi,1))
0273
0274
0275
0276 if line[1].startswith('L1_') or line[1].startswith('HLT_'):
0277 tmp=[]
0278 seedtmp=[]
0279 tmp.append(line[1])
0280 seedtmp.append(line[1])
0281 for entry in line[2:]:
0282 if entry.isdigit():
0283 tmp.append(entry)
0284 if entry.startswith('L1_'):
0285 seedtmp.append(entry)
0286
0287 del tmp[len(self.ColumnLumi)+1:]
0288
0289 if line[1].startswith('L1_'):
0290 self.L1PrescaleTable.append(tmp)
0291 else:
0292 self.HLTPrescaleTable.append(tmp)
0293 if len(seedtmp)==2:
0294 self.SeedMap.append(seedtmp)
0295 if len(self.PrescaleColumn)==0:
0296 continue
0297 for L1Row in self.L1PrescaleTable:
0298 thisAvPS=0
0299 nLS=0
0300 for prescaleThisLS in self.PrescaleColumn[self.FirstLS:]:
0301 thisAvPS+=float(L1Row[prescaleThisLS+1])
0302 nLS+=1
0303 thisAvPS/=nLS
0304 self.L1Prescales.append([L1Row[0],thisAvPS])
0305
0306 def ComputeTotalPrescales(self):
0307 if len(self.L1PrescaleTable)==0 or len(self.HLTPrescaleTable)==0 or len(self.SeedMap)==0:
0308 return
0309
0310 for hltLine in self.HLTPrescaleTable:
0311 totalLine=[]
0312 hltName = hltLine[0]
0313 l1Name = ""
0314
0315 for hlt,l1 in self.SeedMap:
0316 if hltName==hlt:
0317 l1Name=l1
0318 break
0319
0320 if l1Name == "":
0321 totalLine = [hltName]+[l1Name]+[-3]*(len(hltLine)-1)
0322 else:
0323
0324 l1Line=[]
0325 if not l1Name.find(' OR ')==-1:
0326 l1Line = [l1Name]+[1]*(len(hltLine)-1)
0327 else:
0328 for thisl1Line in self.L1PrescaleTable:
0329 if thisl1Line[0] == l1Name:
0330 l1Line=thisl1Line
0331 break
0332 if len(l1Line)==0:
0333 totalLine = [hltName]+[l1Name]+[-4]*(len(hltLine)-1)
0334 else:
0335 totalLine = [hltName,l1Name]
0336 for hltPS,l1PS in zip(hltLine[1:],l1Line[1:]):
0337 try:
0338 totalLine.append( int(hltPS)*int(l1PS) )
0339 except:
0340 print(hltPS)
0341 print(l1PS)
0342 raise
0343 self.TotalPrescaleTable.append(totalLine)
0344
0345
0346 def Save(self, fileName):
0347 pickle.dump( self, open( fileName, 'w' ) )
0348
0349 def Load(self, fileName):
0350 self = pickle.load( open( fileName ) )
0351
0352 def ComputePU(nBunches):
0353 ScaleFactor = 71e-27/11.2e3/nBunches
0354 out = []
0355 for l in self.LumiByLS:
0356 out.append(l*ScaleFactor)
0357 return l
0358