File indexing completed on 2023-03-17 11:26:50
0001
0002 """
0003 The script compares two releases, generates SQLite3 database file with release
0004 comparison information.
0005
0006 Author: Albertas Gimbutas, Vilnius University (LT)
0007 e-mail: albertasgim@gmail.com
0008
0009 Note: default Pool size for file comparison is 7.
0010 Note: did NOT finish static HTML generation implementation.
0011 """
0012 from __future__ import print_function
0013 import sqlite3
0014 from datetime import datetime
0015 from multiprocessing import Pool, Queue, Process
0016 from subprocess import call
0017 from optparse import OptionParser, OptionGroup
0018 from os import makedirs, remove
0019 from os.path import basename, join, exists
0020
0021 from Utilities.RelMon.utils_v2 import *
0022 from compare_using_files_v2 import RootFileComparison
0023
0024
0025
0026 parser = OptionParser(usage='Usage: %prog --re1 RELEASE1 [--f1 FR,FR,..] ' +
0027 '--re2 RELEASE2 [--f2 FR,FR,..] [--st ST_TESTS] [options]')
0028 parser.add_option('--re1', action='store', dest='release1', default=None,
0029 help='First CMSSW release for release comparison, e.g. CMSSW_5_3_2_pre7.')
0030 parser.add_option('--re2', action='store', dest='release2', default=None,
0031 help='Second CMSSW release for release comparison.')
0032 parser.add_option('--f1', action='store', dest='fragments1', default='',
0033 help='Comma separated filename fragments that have or have not to be ' +
0034 'in RELEASE1 filenames. For "not include" use `!` before fragment, ' +
0035 'e.g. `--f1 FullSim,!2012`.''')
0036 parser.add_option('--f2', action='store', dest='fragments2', default='',
0037 help='Comma separated filename fragments that have or have not to be ' +
0038 'in RELEASE2 filenames. For "not include" use `!` before fragment.''')
0039
0040 optional_group = OptionGroup(parser, 'Optional')
0041 optional_group.add_option('--st', action='store', dest='st_tests', default='KS',
0042 help='Comma separated statistical tests to use. \nAvailable: KS, Chi2. Default: %default.')
0043 optional_group.add_option('--title', action='store', dest='title', default=None,
0044 help='Release comparison title.')
0045 optional_group.add_option('--dir', action='store', dest='dir', default=None,
0046 help='Directory to download and compare files in.')
0047 optional_group.add_option('--url', action='store', dest='url', default=None,
0048 help='URL to fetch ROOT files from. File search is recursive ' +
0049 'for links in given URL.')
0050 optional_group.add_option('--no-url', action='store_true', dest='no_url', default=False,
0051 help='Search for files in DIR (specified by --dir option), ' +
0052 'do NOT browse for files online.')
0053 optional_group.add_option('--db', action='store', dest='db_name', default=None,
0054 help='SQLite3 .db filename to use for the comparison. Default: auto-generated SQLite3 .db file.')
0055 optional_group.add_option('--cl', action='store_true', dest='clear_db', default=False,
0056 help='Clean DB before comparison.')
0057 optional_group.add_option('--dry', action='store_true', dest='dry', default=False,
0058 help='Do not download or compare files, just show the progress.')
0059 optional_group.add_option('--html', action='store_true', dest='html', default=False,
0060 help='Generate static html. Default: %default.')
0061 parser.add_option_group(optional_group)
0062
0063
0064 def call_compare_using_files(args):
0065 file1, file2, work_path, db_name, clear_db = args
0066 command = ['./compare_using_files_v2.py', join(work_path, file1), join(work_path, file2), '--db', db_name]
0067 if clear_db:
0068 command.append('--cl')
0069 return call(command)
0070
0071 def partial_db_name(db_name, i):
0072 """Generates temporary database name."""
0073 return '%s___%d.db' % (db_name.strip('.db'), i + 1)
0074
0075 def merge_dbs(main_db, partial_db):
0076 conn = sqlite3.connect(main_db)
0077 c = conn.cursor()
0078
0079
0080 c.execute('''SELECT * FROM Directory limit 1;''')
0081 directory_row = c.fetchall()
0082
0083
0084 rel_cmp_offset, file_cmp_offset, directory_offset, hist_cmp_offset = 0, 0, 0, 0
0085 if directory_row:
0086 c.execute('''SELECT count(*) FROM ReleaseComparison;''')
0087 rel_cmp_offset = c.fetchone()[0]
0088 c.execute('''SELECT count(*) FROM RootFileComparison;''')
0089 file_cmp_offset = c.fetchone()[0]
0090 c.execute('''SELECT max(id) FROM Directory;''')
0091 directory_offset = c.fetchone()[0]
0092 c.execute('''SELECT max(id) FROM HistogramComparison;''')
0093 hist_cmp_offset = c.fetchone()[0]
0094
0095
0096 c.executescript("""
0097 ATTACH '{0}' AS partial;
0098 BEGIN;
0099
0100 INSERT INTO ReleaseComparison (title, release1, release2, statistical_test)
0101 SELECT title, release1, release2, statistical_test FROM partial.ReleaseComparison;
0102
0103 INSERT INTO RootFileComparison (filename1, filename2, release_comparison_id, directory_id)
0104 SELECT filename1, filename2, release_comparison_id+{1}, directory_id+{3} FROM partial.RootFileComparison;
0105
0106 INSERT INTO Directory (id, name, parent_id, from_histogram_id, till_histogram_id)
0107 SELECT id+{3}, name, parent_id+{3}, from_histogram_id+{4}, till_histogram_id+{4} FROM partial.Directory;
0108
0109 INSERT INTO HistogramComparison (name, p_value, directory_id)
0110 SELECT name, p_value, directory_id+{3} FROM partial.HistogramComparison;
0111
0112 COMMIT;""".format(partial_db, rel_cmp_offset, file_cmp_offset, directory_offset, hist_cmp_offset))
0113
0114
0115 c.execute('''SELECT max(id) FROM RootFileComparison;''')
0116 max_file_cmp_id = c.fetchone()[0]
0117 conn.close()
0118 return max_file_cmp_id
0119
0120
0121 class ReleaseComparison(object):
0122 """Generates release comparison information and stores it on SQLite3 .db file."""
0123 def __init__(self, work_path=None, db_name=None, clear_db=False, dry=False, no_url=False, use_external=False):
0124 self.work_path = work_path
0125 self.db_name = db_name
0126 self.clear_db = clear_db
0127 self.dry = dry
0128 self.no_url = no_url
0129 self.use_external_script_to_compare_files = use_external
0130
0131 def was_compared(self, release1, release2, st_test_name):
0132 conn = sqlite3.connect(self.db_name)
0133 c = conn.cursor()
0134 c.execute('''SELECT id FROM ReleaseComparison WHERE release1=? AND
0135 release2=? AND statistical_test=?''', (release1, release2, st_test_name))
0136 release_comparison_id = c.fetchone()
0137 conn.close()
0138 if release_comparison_id:
0139 return release_comparison_id[0]
0140 return False
0141
0142 def compare(self, rel1, frags1, rel2, frags2, st_tests, url=None, title=None):
0143 print('\n################# Searching for files ###################')
0144 if self.no_url:
0145 print('Searching for files on disk at %s' % (self.work_path))
0146 files1, files2 = search_on_disk(self.work_path, rel1, frags1, rel2, frags2)
0147 file_pairs = make_file_pairs(files1, files2)
0148 else:
0149 print('Searching for files online at:')
0150 files_with_urls1, files_with_urls2 = recursive_search_online(url, rel1, frags1, rel2, frags2)
0151 file_pairs = make_file_pairs(files_with_urls1, files_with_urls2)
0152 files_with_urls1.update(files_with_urls2)
0153 files1, files2 = list(zip(*file_pairs))
0154 paired_files_with_urls = [(file, files_with_urls1[file]) for file in files1 + files2]
0155
0156 if self.dry:
0157 print('DRY: nothing to do. Exiting.')
0158 exit()
0159
0160
0161 if not self.work_path:
0162 self.work_path = '%s___VS___%s' % (get_version(files1[0]), get_version(files2[0]))
0163 if self.db_name:
0164 self.db_name = join(self.work_path, self.db_name)
0165
0166 if not exists(self.work_path):
0167 print('\n################### Preparing directory ###################')
0168 print('Creating working directory: %s ...' % self.work_path, end=' ')
0169 makedirs(self.work_path)
0170 print('Done.')
0171
0172 print('\n################# Downloading the files ###################')
0173 total_size, files_to_download = get_size_to_download(self.work_path, paired_files_with_urls)
0174 check_disk_for_space(self.work_path, total_size)
0175
0176
0177 q = Queue()
0178 show_status_bar.q = q
0179 auth_download_file.q = q
0180 auth_download_file.work_dir = self.work_path
0181
0182 Process(target=show_status_bar, args=(total_size,)).start()
0183 Pool(2).map(auth_download_file, files_to_download)
0184 if total_size:
0185 print("Done.")
0186
0187
0188 print('\n################# Preparing Database ###################')
0189 if not self.db_name:
0190 self.db_name = '%s___VS___%s.db' % (get_version(file_pairs[0][0]), get_version(file_pairs[0][1]))
0191
0192 if self.clear_db:
0193 print('Clearing DB: %s...' % self.db_name, end=' ')
0194 open(join(self.work_path, self.db_name), 'w').close()
0195 print('Done.')
0196
0197
0198 self.db_name = init_database(join(self.work_path, self.db_name))
0199
0200
0201 for st_test_name in st_tests.split(','):
0202 print('\n################# Comparing Releases (%s) ###################' % st_test_name)
0203 st_test = tests[st_test_name]()
0204
0205 some_files_compared = False
0206 file_comparison_ids = []
0207 if self.use_external_script_to_compare_files:
0208
0209 arg_list = [list(pair) + [self.work_path, partial_db_name(self.db_name, i),
0210 self.clear_db] for i, pair in enumerate(file_pairs)]
0211 pool = Pool(7)
0212 pool.map(call_compare_using_files, arg_list)
0213
0214
0215 print('\n################# Merging DBs (%s) ###################' % st_test_name)
0216 for i, pair in enumerate(file_pairs):
0217 tmp_db = partial_db_name(self.db_name, i)
0218 print('Merging %s...' % (basename(tmp_db),), end=' ')
0219 file_comparison_ids.append(merge_dbs(self.db_name, tmp_db))
0220 remove(tmp_db)
0221 print('Done.')
0222 some_files_compared = True
0223 else:
0224 file_comparison = RootFileComparison(self.db_name)
0225
0226 for file1, file2 in file_pairs:
0227
0228
0229 file1_path = join(self.work_path, file1)
0230 file2_path = join(self.work_path, file2)
0231
0232 if not file_comparison.was_compared(file1, file2, st_test_name):
0233 print("Comparing:\n%s\n%s\n" % (file1, file2))
0234 file_comparison_id = file_comparison.compare(file1_path, file2_path, st_test)
0235 file_comparison_ids.append(file_comparison_id)
0236 some_files_compared = True
0237 else:
0238 print("Already compared:\n%s\n%s\n" % (file1, file2))
0239
0240
0241 release1 = get_version(file_pairs[0][0])
0242 release2 = get_version(file_pairs[0][1])
0243 if some_files_compared:
0244 release_comparison_id = self.was_compared(release1, release2, st_test_name)
0245 conn = sqlite3.connect(self.db_name)
0246 c = conn.cursor()
0247 if not release_comparison_id:
0248 print('Inserting release "%s VS %s" description.\n' % (release1, release2))
0249 if not title:
0250 title = "%s__VS__%s" % (release1, release2)
0251 c.execute('''INSERT INTO ReleaseComparison(title, release1, release2,
0252 statistical_test) VALUES (?, ?, ?, ?)''', (title,
0253 release1, release2, st_test_name))
0254 release_comparison_id = c.lastrowid
0255 c.executemany('''UPDATE RootFileComparison SET release_comparison_id = ?
0256 WHERE id == ?''', [(release_comparison_id, fid) for fid in file_comparison_ids])
0257 conn.commit()
0258 conn.close()
0259
0260
0261 if __name__ == '__main__':
0262 start = datetime.now()
0263 opts, args = parser.parse_args()
0264 if not opts.release1 or not opts.release2:
0265 parser.error('Not all releases specified. Please check --re1 and --re2 options.')
0266
0267 rel_cmp = ReleaseComparison(opts.dir, opts.db_name, opts.clear_db, opts.dry, opts.no_url, use_external=True)
0268 rel_cmp.compare(opts.release1, opts.fragments1, opts.release2,
0269 opts.fragments2, opts.st_tests, opts.url, opts.title)
0270 if opts.html:
0271 print('\n################# Generating static HTML #################')
0272 print('\n Warrning!!! Did NOT finished the implementation. \n')
0273 from Utilities.RelMon.web.dbfile2html import dbfile2html
0274 dbfile2html(rel_cmp.db_name, opts.dir)
0275 print('################# Execution time: %s #################\n' % (datetime.now() - start,))