Back to home page

Project CMSSW displayed by LXR

 
 

    


File indexing completed on 2024-04-06 12:31:49

0001 #! /usr/bin/env python3
0002 """
0003 The script compares two releases, generates SQLite3 database file with release
0004 comparison information.
0005 
0006 Author:  Albertas Gimbutas,  Vilnius University (LT)
0007 e-mail:  albertasgim@gmail.com
0008 
0009 Note: default Pool size for file comparison is 7.
0010 Note: did NOT finish static HTML generation implementation.
0011 """
0012 from __future__ import print_function
0013 import sqlite3
0014 from datetime import datetime
0015 from multiprocessing import Pool, Queue, Process
0016 from subprocess import call
0017 from optparse import OptionParser, OptionGroup
0018 from os import makedirs, remove
0019 from os.path import basename, join, exists
0020 
0021 from Utilities.RelMon.utils_v2 import *
0022 from compare_using_files_v2 import RootFileComparison
0023 
0024 
0025 ##  Parse options
0026 parser = OptionParser(usage='Usage: %prog --re1 RELEASE1 [--f1 FR,FR,..] ' +
0027                             '--re2 RELEASE2 [--f2 FR,FR,..] [--st ST_TESTS] [options]')
0028 parser.add_option('--re1', action='store', dest='release1', default=None,
0029                     help='First CMSSW release for release comparison, e.g. CMSSW_5_3_2_pre7.')
0030 parser.add_option('--re2', action='store', dest='release2', default=None,
0031                     help='Second CMSSW release for release comparison.')
0032 parser.add_option('--f1', action='store', dest='fragments1', default='',
0033                     help='Comma separated filename fragments that have or have not to be ' +
0034                     'in RELEASE1 filenames. For "not include" use `!` before fragment, ' +
0035                     'e.g. `--f1 FullSim,!2012`.''')
0036 parser.add_option('--f2', action='store', dest='fragments2', default='',
0037                     help='Comma separated filename fragments that have or have not to be ' +
0038                     'in RELEASE2 filenames. For "not include" use `!` before fragment.''')
0039 
0040 optional_group = OptionGroup(parser, 'Optional')
0041 optional_group.add_option('--st', action='store', dest='st_tests', default='KS',
0042                     help='Comma separated statistical tests to use. \nAvailable: KS, Chi2. Default: %default.')
0043 optional_group.add_option('--title', action='store', dest='title', default=None,
0044                     help='Release comparison title.')
0045 optional_group.add_option('--dir', action='store', dest='dir', default=None,
0046         help='Directory to download and compare files in.')
0047 optional_group.add_option('--url', action='store', dest='url', default=None,
0048                     help='URL to fetch ROOT files from. File search is recursive ' +
0049                     'for links in given URL.')
0050 optional_group.add_option('--no-url', action='store_true', dest='no_url', default=False,
0051                     help='Search for files in DIR (specified by --dir option), ' +
0052                     'do NOT browse for files online.')
0053 optional_group.add_option('--db', action='store', dest='db_name', default=None,
0054         help='SQLite3 .db filename to use for the comparison. Default: auto-generated SQLite3 .db file.')
0055 optional_group.add_option('--cl', action='store_true', dest='clear_db', default=False,
0056                     help='Clean DB before comparison.')
0057 optional_group.add_option('--dry', action='store_true', dest='dry', default=False,
0058                     help='Do not download or compare files, just show the progress.')
0059 optional_group.add_option('--html', action='store_true', dest='html', default=False,
0060                     help='Generate static html. Default: %default.')
0061 parser.add_option_group(optional_group)
0062 
0063 
0064 def call_compare_using_files(args):
0065     file1, file2, work_path, db_name, clear_db = args
0066     command = ['./compare_using_files_v2.py', join(work_path, file1), join(work_path, file2), '--db', db_name]
0067     if clear_db:
0068         command.append('--cl')
0069     return call(command)
0070 
0071 def partial_db_name(db_name, i):
0072     """Generates temporary database name."""
0073     return '%s___%d.db' % (db_name.strip('.db'), i + 1)
0074 
0075 def merge_dbs(main_db, partial_db):
0076     conn = sqlite3.connect(main_db)
0077     c = conn.cursor()
0078 
0079     ## Test if database is empty
0080     c.execute('''SELECT * FROM Directory limit 1;''')
0081     directory_row = c.fetchall()
0082 
0083     ## Select offsets
0084     rel_cmp_offset, file_cmp_offset, directory_offset, hist_cmp_offset = 0, 0, 0, 0
0085     if directory_row:
0086         c.execute('''SELECT count(*) FROM ReleaseComparison;''')
0087         rel_cmp_offset = c.fetchone()[0]
0088         c.execute('''SELECT count(*) FROM RootFileComparison;''')
0089         file_cmp_offset = c.fetchone()[0]
0090         c.execute('''SELECT max(id) FROM Directory;''')
0091         directory_offset = c.fetchone()[0]
0092         c.execute('''SELECT max(id) FROM HistogramComparison;''')
0093         hist_cmp_offset = c.fetchone()[0]
0094 
0095     ## Merge DBs
0096     c.executescript("""
0097     ATTACH '{0}' AS partial;
0098     BEGIN;
0099 
0100     INSERT INTO ReleaseComparison (title, release1, release2, statistical_test)
0101     SELECT title, release1, release2, statistical_test FROM partial.ReleaseComparison;
0102 
0103     INSERT INTO RootFileComparison (filename1, filename2, release_comparison_id, directory_id)
0104     SELECT filename1, filename2, release_comparison_id+{1}, directory_id+{3} FROM partial.RootFileComparison;
0105 
0106     INSERT INTO Directory (id, name, parent_id, from_histogram_id, till_histogram_id)
0107     SELECT id+{3}, name, parent_id+{3}, from_histogram_id+{4}, till_histogram_id+{4} FROM partial.Directory;
0108 
0109     INSERT INTO HistogramComparison (name, p_value, directory_id)
0110     SELECT name, p_value, directory_id+{3} FROM partial.HistogramComparison;
0111 
0112     COMMIT;""".format(partial_db, rel_cmp_offset, file_cmp_offset, directory_offset, hist_cmp_offset))
0113 
0114     ## Select Last RootFileComparison ID
0115     c.execute('''SELECT max(id) FROM RootFileComparison;''')
0116     max_file_cmp_id = c.fetchone()[0]
0117     conn.close()
0118     return max_file_cmp_id
0119 
0120 
0121 class ReleaseComparison(object):
0122     """Generates release comparison information and stores it on SQLite3 .db file."""
0123     def __init__(self, work_path=None, db_name=None, clear_db=False, dry=False, no_url=False, use_external=False):
0124         self.work_path = work_path
0125         self.db_name = db_name
0126         self.clear_db = clear_db
0127         self.dry = dry
0128         self.no_url = no_url
0129         self.use_external_script_to_compare_files = use_external
0130 
0131     def was_compared(self, release1, release2, st_test_name):
0132         conn = sqlite3.connect(self.db_name)
0133         c = conn.cursor()
0134         c.execute('''SELECT id FROM ReleaseComparison WHERE release1=? AND
0135                 release2=? AND statistical_test=?''', (release1, release2, st_test_name))
0136         release_comparison_id = c.fetchone()
0137         conn.close()
0138         if release_comparison_id:
0139             return release_comparison_id[0]
0140         return False
0141 
0142     def compare(self, rel1, frags1, rel2, frags2, st_tests, url=None, title=None):
0143         print('\n#################     Searching for files     ###################')
0144         if self.no_url:
0145             print('Searching for files on disk at %s' % (self.work_path))
0146             files1, files2 = search_on_disk(self.work_path, rel1, frags1, rel2, frags2)
0147             file_pairs = make_file_pairs(files1, files2)
0148         else:
0149             print('Searching for files online at:')
0150             files_with_urls1, files_with_urls2 = recursive_search_online(url, rel1, frags1, rel2, frags2)
0151             file_pairs = make_file_pairs(files_with_urls1, files_with_urls2)
0152             files_with_urls1.update(files_with_urls2)
0153             files1, files2 = list(zip(*file_pairs))
0154             paired_files_with_urls = [(file, files_with_urls1[file]) for file in files1 + files2]
0155 
0156             if self.dry:
0157                 print('DRY: nothing to do. Exiting.')
0158                 exit()
0159 
0160             ## Create working directory if not given.
0161             if not self.work_path:
0162                 self.work_path = '%s___VS___%s' % (get_version(files1[0]), get_version(files2[0]))
0163                 if self.db_name:
0164                     self.db_name = join(self.work_path, self.db_name)
0165 
0166             if not exists(self.work_path):
0167                 print('\n###################      Preparing directory     ###################')
0168                 print('Creating working directory: %s ...' % self.work_path, end=' ')
0169                 makedirs(self.work_path)
0170                 print('Done.')
0171 
0172             print('\n#################     Downloading the files     ###################')
0173             total_size, files_to_download = get_size_to_download(self.work_path, paired_files_with_urls)
0174             check_disk_for_space(self.work_path, total_size)
0175 
0176             ## Download needed files.
0177             q = Queue()
0178             show_status_bar.q = q
0179             auth_download_file.q = q
0180             auth_download_file.work_dir = self.work_path
0181 
0182             Process(target=show_status_bar, args=(total_size,)).start()
0183             Pool(2).map(auth_download_file, files_to_download)
0184             if total_size:
0185                 print("Done.")
0186 
0187         ## Create database
0188         print('\n#################     Preparing Database     ###################')
0189         if not self.db_name:
0190             self.db_name = '%s___VS___%s.db' % (get_version(file_pairs[0][0]), get_version(file_pairs[0][1]))
0191 
0192         if self.clear_db:
0193             print('Clearing DB: %s...' % self.db_name, end=' ')
0194             open(join(self.work_path, self.db_name), 'w').close()
0195             print('Done.')
0196 
0197         ## Compare file pairs.
0198         self.db_name = init_database(join(self.work_path, self.db_name))
0199 
0200         # TODO: Use multiprocessing for this task.
0201         for st_test_name in st_tests.split(','):
0202             print('\n#################     Comparing Releases (%s)     ###################' % st_test_name)
0203             st_test = tests[st_test_name]()
0204 
0205             some_files_compared = False
0206             file_comparison_ids = []
0207             if self.use_external_script_to_compare_files:
0208                 # Compare files using compare_using_files_v2.py
0209                 arg_list = [list(pair) + [self.work_path, partial_db_name(self.db_name, i),
0210                                                 self.clear_db] for i, pair in enumerate(file_pairs)]
0211                 pool = Pool(7)
0212                 pool.map(call_compare_using_files, arg_list)
0213 
0214                 # Merge databases
0215                 print('\n#################     Merging DBs (%s)     ###################' % st_test_name)
0216                 for i, pair in enumerate(file_pairs):
0217                     tmp_db = partial_db_name(self.db_name, i)
0218                     print('Merging %s...' % (basename(tmp_db),), end=' ')
0219                     file_comparison_ids.append(merge_dbs(self.db_name, tmp_db))
0220                     remove(tmp_db)
0221                     print('Done.')
0222                     some_files_compared = True
0223             else:
0224                 file_comparison = RootFileComparison(self.db_name)
0225 
0226                 for file1, file2 in file_pairs:
0227                     # TODO: If files are not found desplay nice message.
0228                     # TODO: Maybe subprocces would control the unwanted reports of RootFileComparison.compare()
0229                     file1_path = join(self.work_path, file1)
0230                     file2_path = join(self.work_path, file2)
0231 
0232                     if not file_comparison.was_compared(file1, file2, st_test_name):
0233                         print("Comparing:\n%s\n%s\n" % (file1, file2))
0234                         file_comparison_id = file_comparison.compare(file1_path, file2_path, st_test)
0235                         file_comparison_ids.append(file_comparison_id)
0236                         some_files_compared = True
0237                     else:
0238                         print("Already compared:\n%s\n%s\n" % (file1, file2))
0239 
0240             ## Calculate statistics for the release.
0241             release1 = get_version(file_pairs[0][0])
0242             release2 = get_version(file_pairs[0][1])
0243             if some_files_compared:
0244                 release_comparison_id = self.was_compared(release1, release2, st_test_name)
0245                 conn = sqlite3.connect(self.db_name)
0246                 c = conn.cursor()
0247                 if not release_comparison_id:
0248                     print('Inserting release "%s  VS  %s" description.\n' % (release1, release2))
0249                     if not title:
0250                         title = "%s__VS__%s" % (release1, release2)
0251                     c.execute('''INSERT INTO ReleaseComparison(title, release1, release2,
0252                                    statistical_test) VALUES (?, ?, ?, ?)''', (title,
0253                                 release1, release2, st_test_name))
0254                     release_comparison_id = c.lastrowid
0255                 c.executemany('''UPDATE RootFileComparison SET release_comparison_id = ?
0256                         WHERE id == ?''', [(release_comparison_id, fid) for fid in file_comparison_ids])
0257                 conn.commit()
0258                 conn.close()
0259 
0260 
0261 if __name__ == '__main__':
0262     start = datetime.now()
0263     opts, args = parser.parse_args()
0264     if not opts.release1 or not opts.release2:
0265         parser.error('Not all releases specified. Please check --re1 and --re2 options.')
0266 
0267     rel_cmp = ReleaseComparison(opts.dir, opts.db_name, opts.clear_db, opts.dry, opts.no_url, use_external=True)
0268     rel_cmp.compare(opts.release1, opts.fragments1, opts.release2,
0269                         opts.fragments2, opts.st_tests, opts.url, opts.title)
0270     if opts.html:
0271         print('\n#################     Generating static HTML    #################')
0272         print('\n  Warrning!!!  Did NOT finished the implementation. \n')
0273         from Utilities.RelMon.web.dbfile2html import dbfile2html
0274         dbfile2html(rel_cmp.db_name, opts.dir)
0275     print('#################     Execution time: %s    #################\n' % (datetime.now() - start,))