Back to home page

Project CMSSW displayed by LXR

 
 

    


File indexing completed on 2024-04-06 12:10:08

0001 #!/usr/bin/env python3
0002 from __future__ import print_function
0003 import re
0004 import json
0005 import ROOT
0006 import sqlite3
0007 import argparse
0008 
0009 parser = argparse.ArgumentParser(description="Convert arbitrary ROOT file to SQLite database, mapping TTrees to tables and converting TObjects to JSON.")
0010 
0011 parser.add_argument('inputfile', help='ROOT file to read')
0012 parser.add_argument('-o', '--output', help='SQLite file to write', default='root.sqlite')
0013 args = parser.parse_args()
0014 
0015 f = ROOT.TFile.Open(args.inputfile)
0016 db = sqlite3.connect(args.output)
0017 
0018 basic_objects = {}
0019 
0020 inf = re.compile("([- \[])inf([,}\]])")
0021 nan = re.compile("([- \[])nan([,}\]])")
0022 
0023 def tosqlite(x):
0024     if isinstance(x, ROOT.string):
0025         try:
0026             return unicode(x.data())
0027         except:
0028             return buffer(x.data())
0029     if isinstance(x, int):
0030           return x
0031     if isinstance(x, float):
0032         return x
0033     if isinstance(x, int):
0034         return x
0035     else:
0036         try: 
0037             rootobj = unicode(ROOT.TBufferJSON.ConvertToJSON(x))
0038             # turns out ROOT does not generate valid JSON for NaN/inf
0039             clean = nan.sub('\\g<1>0\\g<2>', inf.sub('\\g<1>1e38\\g<2>', rootobj))
0040             obj = json.loads(clean)
0041             jsonobj = json.dumps(obj, allow_nan=False)
0042             return jsonobj
0043         except Exception as e:
0044             return json.dumps({"root2sqlite_error": e.__repr__(), "root2sqlite_object": x.__repr__()})
0045 
0046 def columnescape(s):
0047     # add whatever is not a valid column name here
0048     SQLKWDs = ["index"]
0049     if s.lower() in SQLKWDs:
0050         return s + "_"
0051     else:
0052         return s
0053 
0054 def treetotable(ttree, name):
0055     name = name.replace("/", "_")
0056     branches = [b.GetName() for b in ttree.GetListOfBranches()]
0057     colnames = ", ".join(columnescape(b) for b in branches)
0058     create =  "CREATE TABLE %s(%s);"  % (name, colnames)
0059     print(create)
0060     db.execute(create)
0061     data = []
0062     for i in range(ttree.GetEntries()):
0063         ttree.GetEntry(i)
0064         vals = tuple([tosqlite(getattr(ttree, b)) for b in branches])
0065         data.append(vals)
0066     insert = "INSERT INTO %s(%s) VALUES (%s);" % (name, colnames, ",".join(["?"] * len(branches)))
0067     print(insert)
0068     db.executemany(insert, data)
0069 
0070 def read_objects_root(rootfile):
0071     xml_re = re.compile(r"^<(.+)>(.+)=(.+)<\/\1>$")
0072     def parse_directory(di):
0073         directory = rootfile.GetDirectory(di)
0074         for key in directory.GetListOfKeys():
0075             entry = key.GetName()
0076             rtype = key.GetClassName()
0077             fullpath = "%s/%s" % (di, entry) if di != "" else entry
0078             if (rtype == "TDirectoryFile"):
0079                 for k, v, t in parse_directory(fullpath):
0080                     yield (k, v, t)
0081             else:
0082                 obj = rootfile.Get(fullpath)
0083                 if obj:
0084                     yield (fullpath, obj, rtype)
0085                 else:
0086                     # special case to parse the xml abomination
0087                     m = xml_re.search(entry)
0088                     if m:
0089                         name = m.group(1)
0090                         typecode = m.group(2)
0091                         value = m.group(3)
0092                         fp = "%s/%s" % (di, name)
0093                         yield (fp, value, rtype)
0094                     else:
0095                         raise Exception("Invalid xml:" + entry)
0096     path_fix = re.compile(r"^\/Run \d+")
0097     for fullname, obj, rtype in parse_directory(""):
0098         yield fullname, obj, rtype
0099 
0100 def save_keyvalue(dictionary, name):
0101     name = name.replace("/", "_")
0102     create =  "CREATE TABLE %s(key, value);"  % name
0103     print(create)
0104     db.execute(create)
0105     data = []
0106     for k, v in dictionary.iteritems():
0107         vals = (unicode(k), tosqlite(v))
0108         data.append(vals)
0109     insert = "INSERT INTO %s(key, value) VALUES (?,?);" % name
0110     print(insert)
0111     db.executemany(insert, data)
0112     db.commit()
0113 
0114 
0115 for name, obj, rtype in read_objects_root(f):
0116   if rtype == "TTree":
0117     treetotable(obj, name)
0118   else:
0119     basic_objects[name] = obj
0120 
0121 save_keyvalue(basic_objects, "TDirectory")