File indexing completed on 2024-12-01 23:40:11
0001
0002
0003
0004
0005
0006
0007 from bs4 import BeautifulSoup
0008 import sys, os, copy
0009
0010 htmlFullPath = None
0011 htmlFilePath = None
0012 htmlFileName = None
0013 fileNameTemplate = None
0014 htmlPage = None
0015 tableClassName = 'directory'
0016
0017
0018
0019 def extractPages(configFileFlag = False):
0020
0021 pages = {'A':[]}
0022
0023 table = htmlPage.find('table', {'class' : tableClassName})
0024 for row in table.findAll('tr'):
0025
0026
0027
0028
0029
0030 styleFlag = False
0031 if 'style' in row: styleFlag = True
0032
0033 if not styleFlag: firstLetter = row.findAll('td')[0].text[0].upper()
0034
0035 if firstLetter not in pages:
0036 pages[firstLetter] = []
0037
0038 if configFileFlag:
0039 url = row.find('a')['href']
0040 if '_cff' in url or '_cfi' in url or '_cfg' in url:
0041 pages[firstLetter].append(row)
0042 else:
0043 pages[firstLetter].append(row)
0044 return pages
0045
0046
0047
0048 def extractPagesForPackage():
0049
0050 pages = {}
0051 table = htmlPage.find('table', {'class' : tableClassName})
0052 for row in table.findAll('tr'):
0053
0054 name = row.findAll('td')[0].text
0055
0056 name = name[name.find(' '):name.find('/')].strip()
0057
0058 if name not in pages: pages[name] = []
0059 pages[name].append(row)
0060 return pages
0061
0062
0063 def generateTab(items, curr, tabClass = 'tabs3'):
0064 itemTagMap = {}; tab = ''
0065 for item in items:
0066 fn = fileNameTemplate % item.replace(' ', '')
0067 if item != curr: tab += '<li><a href="%s">%s</a></li>' % (fn, item)
0068 else: tab += '<li class="current"><a href="%s">%s</a></li>'%(fn, item)
0069 return '<div class="%s"><ul class="tablist">%s</ul></div>' % (tabClass,tab)
0070
0071 if __name__ == "__main__":
0072 if len(sys.argv) < 2:
0073 sys.stderr.write("not enough parameter!\n")
0074 sys.exit(1)
0075
0076
0077 htmlFullPath = sys.argv[1]
0078 htmlFilePath = os.path.split(htmlFullPath)[0]
0079 htmlFileName = os.path.split(htmlFullPath)[1]
0080 fileNameTemplate = htmlFileName.replace('.html', '_%s.html')
0081
0082
0083 with open(htmlFullPath) as f:
0084 htmlPage = f.read()
0085 htmlPage = BeautifulSoup(htmlPage)
0086
0087
0088
0089
0090
0091
0092
0093 if htmlFileName == 'packageDocumentation.html':
0094 pages = extractPagesForPackage()
0095 destTabClassName = 'tabs'
0096 elif htmlFileName == 'configfiles.html':
0097 pages = extractPages(configFileFlag = True)
0098 destTabClassName = 'tabs2'
0099 else:
0100 pages = extractPages()
0101 destTabClassName = 'tabs2'
0102
0103 allRows = []
0104 pageNames = pages.keys(); pageNames.sort()
0105 for page in pageNames:
0106 allRows = allRows + pages[page]
0107 pages['All'] = allRows
0108 pageNames.append('All')
0109
0110
0111 table = htmlPage.find('table', {'class' : tableClassName})
0112
0113 for row in table.findAll('tr'):
0114 row.extract()
0115
0116
0117 for page in pageNames:
0118 print('generating %s...' % (fileNameTemplate % page))
0119 temp = BeautifulSoup(str(htmlPage))
0120 table = temp.find('table', {'class' : tableClassName})
0121 oldTab = temp.find('div', {'class' : destTabClassName})
0122 newTab = generateTab(pageNames, page)
0123 oldTab.replaceWith(BeautifulSoup(oldTab.prettify() + str(newTab)))
0124 for row in pages[page]:
0125 table.append(row)
0126
0127
0128 page = page.replace(' ', '_')
0129 with open('%s/%s'%(htmlFilePath, fileNameTemplate % page), 'w') as f:
0130 f.write(str(temp))