Back to home page

Project CMSSW displayed by LXR

 
 

    


File indexing completed on 2024-11-26 02:34:35

0001 #! /usr/bin/env python3
0002 """
0003 The script compares two releases, generates SQLite3 database file with release
0004 comparison information.
0005 
0006 Author:  Albertas Gimbutas,  Vilnius University (LT)
0007 e-mail:  albertasgim@gmail.com
0008 
0009 Note: default Pool size for file comparison is 7.
0010 Note: did NOT finish static HTML generation implementation.
0011 """
0012 import sqlite3
0013 from datetime import datetime
0014 from multiprocessing import Pool, Queue, Process
0015 from subprocess import call
0016 from optparse import OptionParser, OptionGroup
0017 from os import makedirs, remove
0018 from os.path import basename, join, exists
0019 
0020 from Utilities.RelMon.utils_v2 import *
0021 from compare_using_files_v2 import RootFileComparison
0022 
0023 
0024 ##  Parse options
0025 parser = OptionParser(usage='Usage: %prog --re1 RELEASE1 [--f1 FR,FR,..] ' +
0026                             '--re2 RELEASE2 [--f2 FR,FR,..] [--st ST_TESTS] [options]')
0027 parser.add_option('--re1', action='store', dest='release1', default=None,
0028                     help='First CMSSW release for release comparison, e.g. CMSSW_5_3_2_pre7.')
0029 parser.add_option('--re2', action='store', dest='release2', default=None,
0030                     help='Second CMSSW release for release comparison.')
0031 parser.add_option('--f1', action='store', dest='fragments1', default='',
0032                     help='Comma separated filename fragments that have or have not to be ' +
0033                     'in RELEASE1 filenames. For "not include" use `!` before fragment, ' +
0034                     'e.g. `--f1 FullSim,!2012`.''')
0035 parser.add_option('--f2', action='store', dest='fragments2', default='',
0036                     help='Comma separated filename fragments that have or have not to be ' +
0037                     'in RELEASE2 filenames. For "not include" use `!` before fragment.''')
0038 
0039 optional_group = OptionGroup(parser, 'Optional')
0040 optional_group.add_option('--st', action='store', dest='st_tests', default='KS',
0041                     help='Comma separated statistical tests to use. \nAvailable: KS, Chi2. Default: %default.')
0042 optional_group.add_option('--title', action='store', dest='title', default=None,
0043                     help='Release comparison title.')
0044 optional_group.add_option('--dir', action='store', dest='dir', default=None,
0045         help='Directory to download and compare files in.')
0046 optional_group.add_option('--url', action='store', dest='url', default=None,
0047                     help='URL to fetch ROOT files from. File search is recursive ' +
0048                     'for links in given URL.')
0049 optional_group.add_option('--no-url', action='store_true', dest='no_url', default=False,
0050                     help='Search for files in DIR (specified by --dir option), ' +
0051                     'do NOT browse for files online.')
0052 optional_group.add_option('--db', action='store', dest='db_name', default=None,
0053         help='SQLite3 .db filename to use for the comparison. Default: auto-generated SQLite3 .db file.')
0054 optional_group.add_option('--cl', action='store_true', dest='clear_db', default=False,
0055                     help='Clean DB before comparison.')
0056 optional_group.add_option('--dry', action='store_true', dest='dry', default=False,
0057                     help='Do not download or compare files, just show the progress.')
0058 optional_group.add_option('--html', action='store_true', dest='html', default=False,
0059                     help='Generate static html. Default: %default.')
0060 parser.add_option_group(optional_group)
0061 
0062 
0063 def call_compare_using_files(args):
0064     file1, file2, work_path, db_name, clear_db = args
0065     command = ['./compare_using_files_v2.py', join(work_path, file1), join(work_path, file2), '--db', db_name]
0066     if clear_db:
0067         command.append('--cl')
0068     return call(command)
0069 
0070 def partial_db_name(db_name, i):
0071     """Generates temporary database name."""
0072     return '%s___%d.db' % (db_name.strip('.db'), i + 1)
0073 
0074 def merge_dbs(main_db, partial_db):
0075     conn = sqlite3.connect(main_db)
0076     c = conn.cursor()
0077 
0078     ## Test if database is empty
0079     c.execute('''SELECT * FROM Directory limit 1;''')
0080     directory_row = c.fetchall()
0081 
0082     ## Select offsets
0083     rel_cmp_offset, file_cmp_offset, directory_offset, hist_cmp_offset = 0, 0, 0, 0
0084     if directory_row:
0085         c.execute('''SELECT count(*) FROM ReleaseComparison;''')
0086         rel_cmp_offset = c.fetchone()[0]
0087         c.execute('''SELECT count(*) FROM RootFileComparison;''')
0088         file_cmp_offset = c.fetchone()[0]
0089         c.execute('''SELECT max(id) FROM Directory;''')
0090         directory_offset = c.fetchone()[0]
0091         c.execute('''SELECT max(id) FROM HistogramComparison;''')
0092         hist_cmp_offset = c.fetchone()[0]
0093 
0094     ## Merge DBs
0095     c.executescript("""
0096     ATTACH '{0}' AS partial;
0097     BEGIN;
0098 
0099     INSERT INTO ReleaseComparison (title, release1, release2, statistical_test)
0100     SELECT title, release1, release2, statistical_test FROM partial.ReleaseComparison;
0101 
0102     INSERT INTO RootFileComparison (filename1, filename2, release_comparison_id, directory_id)
0103     SELECT filename1, filename2, release_comparison_id+{1}, directory_id+{3} FROM partial.RootFileComparison;
0104 
0105     INSERT INTO Directory (id, name, parent_id, from_histogram_id, till_histogram_id)
0106     SELECT id+{3}, name, parent_id+{3}, from_histogram_id+{4}, till_histogram_id+{4} FROM partial.Directory;
0107 
0108     INSERT INTO HistogramComparison (name, p_value, directory_id)
0109     SELECT name, p_value, directory_id+{3} FROM partial.HistogramComparison;
0110 
0111     COMMIT;""".format(partial_db, rel_cmp_offset, file_cmp_offset, directory_offset, hist_cmp_offset))
0112 
0113     ## Select Last RootFileComparison ID
0114     c.execute('''SELECT max(id) FROM RootFileComparison;''')
0115     max_file_cmp_id = c.fetchone()[0]
0116     conn.close()
0117     return max_file_cmp_id
0118 
0119 
0120 class ReleaseComparison(object):
0121     """Generates release comparison information and stores it on SQLite3 .db file."""
0122     def __init__(self, work_path=None, db_name=None, clear_db=False, dry=False, no_url=False, use_external=False):
0123         self.work_path = work_path
0124         self.db_name = db_name
0125         self.clear_db = clear_db
0126         self.dry = dry
0127         self.no_url = no_url
0128         self.use_external_script_to_compare_files = use_external
0129 
0130     def was_compared(self, release1, release2, st_test_name):
0131         conn = sqlite3.connect(self.db_name)
0132         c = conn.cursor()
0133         c.execute('''SELECT id FROM ReleaseComparison WHERE release1=? AND
0134                 release2=? AND statistical_test=?''', (release1, release2, st_test_name))
0135         release_comparison_id = c.fetchone()
0136         conn.close()
0137         if release_comparison_id:
0138             return release_comparison_id[0]
0139         return False
0140 
0141     def compare(self, rel1, frags1, rel2, frags2, st_tests, url=None, title=None):
0142         print('\n#################     Searching for files     ###################')
0143         if self.no_url:
0144             print('Searching for files on disk at %s' % (self.work_path))
0145             files1, files2 = search_on_disk(self.work_path, rel1, frags1, rel2, frags2)
0146             file_pairs = make_file_pairs(files1, files2)
0147         else:
0148             print('Searching for files online at:')
0149             files_with_urls1, files_with_urls2 = recursive_search_online(url, rel1, frags1, rel2, frags2)
0150             file_pairs = make_file_pairs(files_with_urls1, files_with_urls2)
0151             files_with_urls1.update(files_with_urls2)
0152             files1, files2 = list(zip(*file_pairs))
0153             paired_files_with_urls = [(file, files_with_urls1[file]) for file in files1 + files2]
0154 
0155             if self.dry:
0156                 print('DRY: nothing to do. Exiting.')
0157                 exit()
0158 
0159             ## Create working directory if not given.
0160             if not self.work_path:
0161                 self.work_path = '%s___VS___%s' % (get_version(files1[0]), get_version(files2[0]))
0162                 if self.db_name:
0163                     self.db_name = join(self.work_path, self.db_name)
0164 
0165             if not exists(self.work_path):
0166                 print('\n###################      Preparing directory     ###################')
0167                 print('Creating working directory: %s ...' % self.work_path, end=' ')
0168                 makedirs(self.work_path)
0169                 print('Done.')
0170 
0171             print('\n#################     Downloading the files     ###################')
0172             total_size, files_to_download = get_size_to_download(self.work_path, paired_files_with_urls)
0173             check_disk_for_space(self.work_path, total_size)
0174 
0175             ## Download needed files.
0176             q = Queue()
0177             show_status_bar.q = q
0178             auth_download_file.q = q
0179             auth_download_file.work_dir = self.work_path
0180 
0181             Process(target=show_status_bar, args=(total_size,)).start()
0182             Pool(2).map(auth_download_file, files_to_download)
0183             if total_size:
0184                 print("Done.")
0185 
0186         ## Create database
0187         print('\n#################     Preparing Database     ###################')
0188         if not self.db_name:
0189             self.db_name = '%s___VS___%s.db' % (get_version(file_pairs[0][0]), get_version(file_pairs[0][1]))
0190 
0191         if self.clear_db:
0192             print('Clearing DB: %s...' % self.db_name, end=' ')
0193             open(join(self.work_path, self.db_name), 'w').close()
0194             print('Done.')
0195 
0196         ## Compare file pairs.
0197         self.db_name = init_database(join(self.work_path, self.db_name))
0198 
0199         # TODO: Use multiprocessing for this task.
0200         for st_test_name in st_tests.split(','):
0201             print('\n#################     Comparing Releases (%s)     ###################' % st_test_name)
0202             st_test = tests[st_test_name]()
0203 
0204             some_files_compared = False
0205             file_comparison_ids = []
0206             if self.use_external_script_to_compare_files:
0207                 # Compare files using compare_using_files_v2.py
0208                 arg_list = [list(pair) + [self.work_path, partial_db_name(self.db_name, i),
0209                                                 self.clear_db] for i, pair in enumerate(file_pairs)]
0210                 pool = Pool(7)
0211                 pool.map(call_compare_using_files, arg_list)
0212 
0213                 # Merge databases
0214                 print('\n#################     Merging DBs (%s)     ###################' % st_test_name)
0215                 for i, pair in enumerate(file_pairs):
0216                     tmp_db = partial_db_name(self.db_name, i)
0217                     print('Merging %s...' % (basename(tmp_db),), end=' ')
0218                     file_comparison_ids.append(merge_dbs(self.db_name, tmp_db))
0219                     remove(tmp_db)
0220                     print('Done.')
0221                     some_files_compared = True
0222             else:
0223                 file_comparison = RootFileComparison(self.db_name)
0224 
0225                 for file1, file2 in file_pairs:
0226                     # TODO: If files are not found desplay nice message.
0227                     # TODO: Maybe subprocces would control the unwanted reports of RootFileComparison.compare()
0228                     file1_path = join(self.work_path, file1)
0229                     file2_path = join(self.work_path, file2)
0230 
0231                     if not file_comparison.was_compared(file1, file2, st_test_name):
0232                         print("Comparing:\n%s\n%s\n" % (file1, file2))
0233                         file_comparison_id = file_comparison.compare(file1_path, file2_path, st_test)
0234                         file_comparison_ids.append(file_comparison_id)
0235                         some_files_compared = True
0236                     else:
0237                         print("Already compared:\n%s\n%s\n" % (file1, file2))
0238 
0239             ## Calculate statistics for the release.
0240             release1 = get_version(file_pairs[0][0])
0241             release2 = get_version(file_pairs[0][1])
0242             if some_files_compared:
0243                 release_comparison_id = self.was_compared(release1, release2, st_test_name)
0244                 conn = sqlite3.connect(self.db_name)
0245                 c = conn.cursor()
0246                 if not release_comparison_id:
0247                     print('Inserting release "%s  VS  %s" description.\n' % (release1, release2))
0248                     if not title:
0249                         title = "%s__VS__%s" % (release1, release2)
0250                     c.execute('''INSERT INTO ReleaseComparison(title, release1, release2,
0251                                    statistical_test) VALUES (?, ?, ?, ?)''', (title,
0252                                 release1, release2, st_test_name))
0253                     release_comparison_id = c.lastrowid
0254                 c.executemany('''UPDATE RootFileComparison SET release_comparison_id = ?
0255                         WHERE id == ?''', [(release_comparison_id, fid) for fid in file_comparison_ids])
0256                 conn.commit()
0257                 conn.close()
0258 
0259 
0260 if __name__ == '__main__':
0261     start = datetime.now()
0262     opts, args = parser.parse_args()
0263     if not opts.release1 or not opts.release2:
0264         parser.error('Not all releases specified. Please check --re1 and --re2 options.')
0265 
0266     rel_cmp = ReleaseComparison(opts.dir, opts.db_name, opts.clear_db, opts.dry, opts.no_url, use_external=True)
0267     rel_cmp.compare(opts.release1, opts.fragments1, opts.release2,
0268                         opts.fragments2, opts.st_tests, opts.url, opts.title)
0269     if opts.html:
0270         print('\n#################     Generating static HTML    #################')
0271         print('\n  Warrning!!!  Did NOT finished the implementation. \n')
0272         from Utilities.RelMon.web.dbfile2html import dbfile2html
0273         dbfile2html(rel_cmp.db_name, opts.dir)
0274     print('#################     Execution time: %s    #################\n' % (datetime.now() - start,))