Back to home page

Project CMSSW displayed by LXR

 
 

    


File indexing completed on 2024-12-01 23:40:11

0001 # email: cmsdoxy@cern.ch, ali.mehmet.altundag@cern.ch
0002 
0003 # please have a look at the namespaces.html (namespace list) and annotated.html
0004 # (~class list) html files to understand the tags/attributes that we use in
0005 # this script.
0006 
0007 from bs4 import BeautifulSoup
0008 import sys, os, copy
0009 
0010 htmlFullPath     = None
0011 htmlFilePath     = None
0012 htmlFileName     = None
0013 fileNameTemplate = None # html file name template
0014 htmlPage         = None
0015 tableClassName   = 'directory'
0016 
0017 # load rows from the table in [C]lass and [N]amespace list pages  and prapere
0018 # pages in the following structure: pages = {'A' : [...], 'B' : [...]}
0019 def extractPages(configFileFlag = False):
0020     # initial page, A
0021     pages = {'A':[]}
0022     # find all class/namespace talbe rows.
0023     table = htmlPage.find('table', {'class' : tableClassName})
0024     for row in table.findAll('tr'):
0025         # please see the related html file (annotated.html) to understand the
0026         # approach here. you will see that, only hidden rows have style
0027         # attribute and these hidden rows must be added to pages of their
0028         # parents. This is why we need to check whether row has a style
0029         # attribute or not.
0030         styleFlag = False
0031         if 'style' in row: styleFlag = True
0032         # change the first letter if row is not hidden (child) one
0033         if not styleFlag: firstLetter = row.findAll('td')[0].text[0].upper()
0034         # if pages dict doesn't have the page yet..
0035         if firstLetter not in pages:
0036             pages[firstLetter] = []
0037         # insert the row into the related page
0038         if configFileFlag:
0039             url = row.find('a')['href']
0040             if '_cff' in url or '_cfi' in url or '_cfg' in url:
0041                 pages[firstLetter].append(row)
0042         else:
0043             pages[firstLetter].append(row)
0044     return pages
0045 
0046 # load rows from the package documentation page. output structure:
0047 # pages = {'PackageA' : [..], 'PackageB' : [...]}
0048 def extractPagesForPackage():
0049     # initial page, A
0050     pages = {}
0051     table = htmlPage.find('table', {'class' : tableClassName})
0052     for row in table.findAll('tr'):
0053         # first cell contains name of the package...
0054         name = row.findAll('td')[0].text
0055         # parse package names --please have a look at the pages.html file
0056         name = name[name.find(' '):name.find('/')].strip()
0057         # if the package is not added yet
0058         if name not in pages: pages[name] = []
0059         pages[name].append(row)
0060     return pages
0061 
0062 # generate alphabetic tab for html pages that will be generated by this script
0063 def generateTab(items, curr, tabClass = 'tabs3'):
0064     itemTagMap = {}; tab = ''
0065     for item in items:
0066         fn  = fileNameTemplate % item.replace(' ', '') # generate file name
0067         if item != curr: tab += '<li><a href="%s">%s</a></li>' % (fn, item)
0068         else: tab += '<li class="current"><a href="%s">%s</a></li>'%(fn, item)
0069     return '<div class="%s"><ul class="tablist">%s</ul></div>' % (tabClass,tab)
0070 
0071 if __name__ == "__main__":
0072     if len(sys.argv) < 2:
0073         sys.stderr.write("not enough parameter!\n")
0074         sys.exit(1)
0075 
0076     # initialize variables
0077     htmlFullPath     = sys.argv[1]
0078     htmlFilePath     = os.path.split(htmlFullPath)[0]
0079     htmlFileName     = os.path.split(htmlFullPath)[1]
0080     fileNameTemplate = htmlFileName.replace('.html', '_%s.html')
0081 
0082     # load the html page
0083     with open(htmlFullPath) as f:
0084         htmlPage = f.read()
0085         htmlPage = BeautifulSoup(htmlPage)
0086 
0087     # please have a look at the pages.html page. You will see that class name
0088     # of the related tab, which we will use to put 'index tab' by using this
0089     # tab, is different for pages.html file. For namespaces.html (namespace
0090     # list) and annotated.html (~class list) files, class names are the same
0091     # tabs2. this is why we are setting 'the destination tab class name' up
0092     # differently depending on the html file name.
0093     if htmlFileName == 'packageDocumentation.html':
0094         pages = extractPagesForPackage()
0095         destTabClassName = 'tabs'
0096     elif htmlFileName == 'configfiles.html':
0097         pages = extractPages(configFileFlag = True)
0098         destTabClassName = 'tabs2'
0099     else:
0100         pages = extractPages()
0101         destTabClassName = 'tabs2'
0102 
0103     allRows = []
0104     pageNames = pages.keys(); pageNames.sort()
0105     for page in pageNames:
0106         allRows = allRows + pages[page]
0107     pages['All'] = allRows
0108     pageNames.append('All')
0109 
0110     # prepare the template
0111     table     = htmlPage.find('table', {'class' : tableClassName})
0112     # generate template (clean whole table content)
0113     for row in table.findAll('tr'):
0114         row.extract()
0115 
0116     # generate pages
0117     for page in pageNames:
0118         print('generating %s...' % (fileNameTemplate % page))
0119         temp   = BeautifulSoup(str(htmlPage))
0120         table  = temp.find('table', {'class' : tableClassName})
0121         oldTab = temp.find('div', {'class' : destTabClassName})
0122         newTab = generateTab(pageNames, page)
0123         oldTab.replaceWith(BeautifulSoup(oldTab.prettify() + str(newTab)))
0124         for row in pages[page]:
0125             table.append(row)
0126         # replace blank character with '_'. Please notice that you will not
0127         # be able to use original page name after this line.
0128         page = page.replace(' ', '_')
0129         with open('%s/%s'%(htmlFilePath, fileNameTemplate % page), 'w') as f:
0130             f.write(str(temp))