File indexing completed on 2023-03-17 11:26:51
0001
0002 from bs4 import BeautifulSoup
0003 import sys
0004 import os
0005 url = os.path.abspath(sys.argv[1])
0006 report_dir = os.path.dirname(url)
0007 page = open(url)
0008 soup = BeautifulSoup(page.read(), 'html.parser')
0009 page.close()
0010 seen = dict()
0011 tables = soup.find_all('table', recursive=True)
0012
0013 rowheader = tables[2].find('thead')
0014 rowheaders = rowheader.find_all('tr')
0015 htag = soup.new_tag('td')
0016 htag.string = 'Num reports'
0017 htag['class'] = 'Q'
0018 rowheaders[-1].insert(7, htag)
0019 sortable = rowheaders[-1].find_all('span')
0020 sortable[0].string.replace_with(' ▾')
0021
0022 rowsbody = tables[2].find('tbody')
0023 rows = rowsbody.find_all('tr')
0024 for row in rows:
0025 cells = row.find_all('td')
0026 if cells:
0027 key = str(cells[2])+str(cells[3])+str(cells[4])
0028 if key in seen.keys():
0029 seen[key] = seen[key]+1
0030 href = cells[6].find('a', href=True)
0031 if href:
0032 report = href['href'].split("#")[0]
0033 report_file = os.path.join(report_dir, report)
0034 if report.startswith("report-") and os.path.exists(report_file):
0035 os.remove(report_file)
0036 row.decompose()
0037 else:
0038 seen[key] = 1
0039
0040
0041 rowsbody = tables[2].find('tbody')
0042 rows = rowsbody.find_all('tr')
0043 for row in rows:
0044 cells = row.find_all('td')
0045 if cells:
0046 key = str(cells[2])+str(cells[3])+str(cells[4])
0047 tag = soup.new_tag('td')
0048 tag.string = '{}'.format(seen[key])
0049 tag['class'] = 'Q'
0050 row.insert(3, tag)
0051 print(soup.prettify(formatter=None))