File indexing completed on 2024-11-26 02:34:35
0001
0002 """
0003 The script compares two releases, generates SQLite3 database file with release
0004 comparison information.
0005
0006 Author: Albertas Gimbutas, Vilnius University (LT)
0007 e-mail: albertasgim@gmail.com
0008
0009 Note: default Pool size for file comparison is 7.
0010 Note: did NOT finish static HTML generation implementation.
0011 """
0012 import sqlite3
0013 from datetime import datetime
0014 from multiprocessing import Pool, Queue, Process
0015 from subprocess import call
0016 from optparse import OptionParser, OptionGroup
0017 from os import makedirs, remove
0018 from os.path import basename, join, exists
0019
0020 from Utilities.RelMon.utils_v2 import *
0021 from compare_using_files_v2 import RootFileComparison
0022
0023
0024
0025 parser = OptionParser(usage='Usage: %prog --re1 RELEASE1 [--f1 FR,FR,..] ' +
0026 '--re2 RELEASE2 [--f2 FR,FR,..] [--st ST_TESTS] [options]')
0027 parser.add_option('--re1', action='store', dest='release1', default=None,
0028 help='First CMSSW release for release comparison, e.g. CMSSW_5_3_2_pre7.')
0029 parser.add_option('--re2', action='store', dest='release2', default=None,
0030 help='Second CMSSW release for release comparison.')
0031 parser.add_option('--f1', action='store', dest='fragments1', default='',
0032 help='Comma separated filename fragments that have or have not to be ' +
0033 'in RELEASE1 filenames. For "not include" use `!` before fragment, ' +
0034 'e.g. `--f1 FullSim,!2012`.''')
0035 parser.add_option('--f2', action='store', dest='fragments2', default='',
0036 help='Comma separated filename fragments that have or have not to be ' +
0037 'in RELEASE2 filenames. For "not include" use `!` before fragment.''')
0038
0039 optional_group = OptionGroup(parser, 'Optional')
0040 optional_group.add_option('--st', action='store', dest='st_tests', default='KS',
0041 help='Comma separated statistical tests to use. \nAvailable: KS, Chi2. Default: %default.')
0042 optional_group.add_option('--title', action='store', dest='title', default=None,
0043 help='Release comparison title.')
0044 optional_group.add_option('--dir', action='store', dest='dir', default=None,
0045 help='Directory to download and compare files in.')
0046 optional_group.add_option('--url', action='store', dest='url', default=None,
0047 help='URL to fetch ROOT files from. File search is recursive ' +
0048 'for links in given URL.')
0049 optional_group.add_option('--no-url', action='store_true', dest='no_url', default=False,
0050 help='Search for files in DIR (specified by --dir option), ' +
0051 'do NOT browse for files online.')
0052 optional_group.add_option('--db', action='store', dest='db_name', default=None,
0053 help='SQLite3 .db filename to use for the comparison. Default: auto-generated SQLite3 .db file.')
0054 optional_group.add_option('--cl', action='store_true', dest='clear_db', default=False,
0055 help='Clean DB before comparison.')
0056 optional_group.add_option('--dry', action='store_true', dest='dry', default=False,
0057 help='Do not download or compare files, just show the progress.')
0058 optional_group.add_option('--html', action='store_true', dest='html', default=False,
0059 help='Generate static html. Default: %default.')
0060 parser.add_option_group(optional_group)
0061
0062
0063 def call_compare_using_files(args):
0064 file1, file2, work_path, db_name, clear_db = args
0065 command = ['./compare_using_files_v2.py', join(work_path, file1), join(work_path, file2), '--db', db_name]
0066 if clear_db:
0067 command.append('--cl')
0068 return call(command)
0069
0070 def partial_db_name(db_name, i):
0071 """Generates temporary database name."""
0072 return '%s___%d.db' % (db_name.strip('.db'), i + 1)
0073
0074 def merge_dbs(main_db, partial_db):
0075 conn = sqlite3.connect(main_db)
0076 c = conn.cursor()
0077
0078
0079 c.execute('''SELECT * FROM Directory limit 1;''')
0080 directory_row = c.fetchall()
0081
0082
0083 rel_cmp_offset, file_cmp_offset, directory_offset, hist_cmp_offset = 0, 0, 0, 0
0084 if directory_row:
0085 c.execute('''SELECT count(*) FROM ReleaseComparison;''')
0086 rel_cmp_offset = c.fetchone()[0]
0087 c.execute('''SELECT count(*) FROM RootFileComparison;''')
0088 file_cmp_offset = c.fetchone()[0]
0089 c.execute('''SELECT max(id) FROM Directory;''')
0090 directory_offset = c.fetchone()[0]
0091 c.execute('''SELECT max(id) FROM HistogramComparison;''')
0092 hist_cmp_offset = c.fetchone()[0]
0093
0094
0095 c.executescript("""
0096 ATTACH '{0}' AS partial;
0097 BEGIN;
0098
0099 INSERT INTO ReleaseComparison (title, release1, release2, statistical_test)
0100 SELECT title, release1, release2, statistical_test FROM partial.ReleaseComparison;
0101
0102 INSERT INTO RootFileComparison (filename1, filename2, release_comparison_id, directory_id)
0103 SELECT filename1, filename2, release_comparison_id+{1}, directory_id+{3} FROM partial.RootFileComparison;
0104
0105 INSERT INTO Directory (id, name, parent_id, from_histogram_id, till_histogram_id)
0106 SELECT id+{3}, name, parent_id+{3}, from_histogram_id+{4}, till_histogram_id+{4} FROM partial.Directory;
0107
0108 INSERT INTO HistogramComparison (name, p_value, directory_id)
0109 SELECT name, p_value, directory_id+{3} FROM partial.HistogramComparison;
0110
0111 COMMIT;""".format(partial_db, rel_cmp_offset, file_cmp_offset, directory_offset, hist_cmp_offset))
0112
0113
0114 c.execute('''SELECT max(id) FROM RootFileComparison;''')
0115 max_file_cmp_id = c.fetchone()[0]
0116 conn.close()
0117 return max_file_cmp_id
0118
0119
0120 class ReleaseComparison(object):
0121 """Generates release comparison information and stores it on SQLite3 .db file."""
0122 def __init__(self, work_path=None, db_name=None, clear_db=False, dry=False, no_url=False, use_external=False):
0123 self.work_path = work_path
0124 self.db_name = db_name
0125 self.clear_db = clear_db
0126 self.dry = dry
0127 self.no_url = no_url
0128 self.use_external_script_to_compare_files = use_external
0129
0130 def was_compared(self, release1, release2, st_test_name):
0131 conn = sqlite3.connect(self.db_name)
0132 c = conn.cursor()
0133 c.execute('''SELECT id FROM ReleaseComparison WHERE release1=? AND
0134 release2=? AND statistical_test=?''', (release1, release2, st_test_name))
0135 release_comparison_id = c.fetchone()
0136 conn.close()
0137 if release_comparison_id:
0138 return release_comparison_id[0]
0139 return False
0140
0141 def compare(self, rel1, frags1, rel2, frags2, st_tests, url=None, title=None):
0142 print('\n################# Searching for files ###################')
0143 if self.no_url:
0144 print('Searching for files on disk at %s' % (self.work_path))
0145 files1, files2 = search_on_disk(self.work_path, rel1, frags1, rel2, frags2)
0146 file_pairs = make_file_pairs(files1, files2)
0147 else:
0148 print('Searching for files online at:')
0149 files_with_urls1, files_with_urls2 = recursive_search_online(url, rel1, frags1, rel2, frags2)
0150 file_pairs = make_file_pairs(files_with_urls1, files_with_urls2)
0151 files_with_urls1.update(files_with_urls2)
0152 files1, files2 = list(zip(*file_pairs))
0153 paired_files_with_urls = [(file, files_with_urls1[file]) for file in files1 + files2]
0154
0155 if self.dry:
0156 print('DRY: nothing to do. Exiting.')
0157 exit()
0158
0159
0160 if not self.work_path:
0161 self.work_path = '%s___VS___%s' % (get_version(files1[0]), get_version(files2[0]))
0162 if self.db_name:
0163 self.db_name = join(self.work_path, self.db_name)
0164
0165 if not exists(self.work_path):
0166 print('\n################### Preparing directory ###################')
0167 print('Creating working directory: %s ...' % self.work_path, end=' ')
0168 makedirs(self.work_path)
0169 print('Done.')
0170
0171 print('\n################# Downloading the files ###################')
0172 total_size, files_to_download = get_size_to_download(self.work_path, paired_files_with_urls)
0173 check_disk_for_space(self.work_path, total_size)
0174
0175
0176 q = Queue()
0177 show_status_bar.q = q
0178 auth_download_file.q = q
0179 auth_download_file.work_dir = self.work_path
0180
0181 Process(target=show_status_bar, args=(total_size,)).start()
0182 Pool(2).map(auth_download_file, files_to_download)
0183 if total_size:
0184 print("Done.")
0185
0186
0187 print('\n################# Preparing Database ###################')
0188 if not self.db_name:
0189 self.db_name = '%s___VS___%s.db' % (get_version(file_pairs[0][0]), get_version(file_pairs[0][1]))
0190
0191 if self.clear_db:
0192 print('Clearing DB: %s...' % self.db_name, end=' ')
0193 open(join(self.work_path, self.db_name), 'w').close()
0194 print('Done.')
0195
0196
0197 self.db_name = init_database(join(self.work_path, self.db_name))
0198
0199
0200 for st_test_name in st_tests.split(','):
0201 print('\n################# Comparing Releases (%s) ###################' % st_test_name)
0202 st_test = tests[st_test_name]()
0203
0204 some_files_compared = False
0205 file_comparison_ids = []
0206 if self.use_external_script_to_compare_files:
0207
0208 arg_list = [list(pair) + [self.work_path, partial_db_name(self.db_name, i),
0209 self.clear_db] for i, pair in enumerate(file_pairs)]
0210 pool = Pool(7)
0211 pool.map(call_compare_using_files, arg_list)
0212
0213
0214 print('\n################# Merging DBs (%s) ###################' % st_test_name)
0215 for i, pair in enumerate(file_pairs):
0216 tmp_db = partial_db_name(self.db_name, i)
0217 print('Merging %s...' % (basename(tmp_db),), end=' ')
0218 file_comparison_ids.append(merge_dbs(self.db_name, tmp_db))
0219 remove(tmp_db)
0220 print('Done.')
0221 some_files_compared = True
0222 else:
0223 file_comparison = RootFileComparison(self.db_name)
0224
0225 for file1, file2 in file_pairs:
0226
0227
0228 file1_path = join(self.work_path, file1)
0229 file2_path = join(self.work_path, file2)
0230
0231 if not file_comparison.was_compared(file1, file2, st_test_name):
0232 print("Comparing:\n%s\n%s\n" % (file1, file2))
0233 file_comparison_id = file_comparison.compare(file1_path, file2_path, st_test)
0234 file_comparison_ids.append(file_comparison_id)
0235 some_files_compared = True
0236 else:
0237 print("Already compared:\n%s\n%s\n" % (file1, file2))
0238
0239
0240 release1 = get_version(file_pairs[0][0])
0241 release2 = get_version(file_pairs[0][1])
0242 if some_files_compared:
0243 release_comparison_id = self.was_compared(release1, release2, st_test_name)
0244 conn = sqlite3.connect(self.db_name)
0245 c = conn.cursor()
0246 if not release_comparison_id:
0247 print('Inserting release "%s VS %s" description.\n' % (release1, release2))
0248 if not title:
0249 title = "%s__VS__%s" % (release1, release2)
0250 c.execute('''INSERT INTO ReleaseComparison(title, release1, release2,
0251 statistical_test) VALUES (?, ?, ?, ?)''', (title,
0252 release1, release2, st_test_name))
0253 release_comparison_id = c.lastrowid
0254 c.executemany('''UPDATE RootFileComparison SET release_comparison_id = ?
0255 WHERE id == ?''', [(release_comparison_id, fid) for fid in file_comparison_ids])
0256 conn.commit()
0257 conn.close()
0258
0259
0260 if __name__ == '__main__':
0261 start = datetime.now()
0262 opts, args = parser.parse_args()
0263 if not opts.release1 or not opts.release2:
0264 parser.error('Not all releases specified. Please check --re1 and --re2 options.')
0265
0266 rel_cmp = ReleaseComparison(opts.dir, opts.db_name, opts.clear_db, opts.dry, opts.no_url, use_external=True)
0267 rel_cmp.compare(opts.release1, opts.fragments1, opts.release2,
0268 opts.fragments2, opts.st_tests, opts.url, opts.title)
0269 if opts.html:
0270 print('\n################# Generating static HTML #################')
0271 print('\n Warrning!!! Did NOT finished the implementation. \n')
0272 from Utilities.RelMon.web.dbfile2html import dbfile2html
0273 dbfile2html(rel_cmp.db_name, opts.dir)
0274 print('################# Execution time: %s #################\n' % (datetime.now() - start,))