Back to home page

Project CMSSW displayed by LXR

 
 

    


File indexing completed on 2024-11-26 02:34:12

0001 #!/usr/bin/env python3
0002 import re
0003 import json
0004 import ROOT
0005 import sqlite3
0006 import argparse
0007 
0008 parser = argparse.ArgumentParser(description="Convert arbitrary ROOT file to SQLite database, mapping TTrees to tables and converting TObjects to JSON.")
0009 
0010 parser.add_argument('inputfile', help='ROOT file to read')
0011 parser.add_argument('-o', '--output', help='SQLite file to write', default='root.sqlite')
0012 args = parser.parse_args()
0013 
0014 f = ROOT.TFile.Open(args.inputfile)
0015 db = sqlite3.connect(args.output)
0016 
0017 basic_objects = {}
0018 
0019 inf = re.compile("([- \[])inf([,}\]])")
0020 nan = re.compile("([- \[])nan([,}\]])")
0021 
0022 def tosqlite(x):
0023     if isinstance(x, ROOT.string):
0024         try:
0025             return unicode(x.data())
0026         except:
0027             return buffer(x.data())
0028     if isinstance(x, int):
0029           return x
0030     if isinstance(x, float):
0031         return x
0032     if isinstance(x, int):
0033         return x
0034     else:
0035         try: 
0036             rootobj = unicode(ROOT.TBufferJSON.ConvertToJSON(x))
0037             # turns out ROOT does not generate valid JSON for NaN/inf
0038             clean = nan.sub('\\g<1>0\\g<2>', inf.sub('\\g<1>1e38\\g<2>', rootobj))
0039             obj = json.loads(clean)
0040             jsonobj = json.dumps(obj, allow_nan=False)
0041             return jsonobj
0042         except Exception as e:
0043             return json.dumps({"root2sqlite_error": e.__repr__(), "root2sqlite_object": x.__repr__()})
0044 
0045 def columnescape(s):
0046     # add whatever is not a valid column name here
0047     SQLKWDs = ["index"]
0048     if s.lower() in SQLKWDs:
0049         return s + "_"
0050     else:
0051         return s
0052 
0053 def treetotable(ttree, name):
0054     name = name.replace("/", "_")
0055     branches = [b.GetName() for b in ttree.GetListOfBranches()]
0056     colnames = ", ".join(columnescape(b) for b in branches)
0057     create =  "CREATE TABLE %s(%s);"  % (name, colnames)
0058     print(create)
0059     db.execute(create)
0060     data = []
0061     for i in range(ttree.GetEntries()):
0062         ttree.GetEntry(i)
0063         vals = tuple([tosqlite(getattr(ttree, b)) for b in branches])
0064         data.append(vals)
0065     insert = "INSERT INTO %s(%s) VALUES (%s);" % (name, colnames, ",".join(["?"] * len(branches)))
0066     print(insert)
0067     db.executemany(insert, data)
0068 
0069 def read_objects_root(rootfile):
0070     xml_re = re.compile(r"^<(.+)>(.+)=(.+)<\/\1>$")
0071     def parse_directory(di):
0072         directory = rootfile.GetDirectory(di)
0073         for key in directory.GetListOfKeys():
0074             entry = key.GetName()
0075             rtype = key.GetClassName()
0076             fullpath = "%s/%s" % (di, entry) if di != "" else entry
0077             if (rtype == "TDirectoryFile"):
0078                 for k, v, t in parse_directory(fullpath):
0079                     yield (k, v, t)
0080             else:
0081                 obj = rootfile.Get(fullpath)
0082                 if obj:
0083                     yield (fullpath, obj, rtype)
0084                 else:
0085                     # special case to parse the xml abomination
0086                     m = xml_re.search(entry)
0087                     if m:
0088                         name = m.group(1)
0089                         typecode = m.group(2)
0090                         value = m.group(3)
0091                         fp = "%s/%s" % (di, name)
0092                         yield (fp, value, rtype)
0093                     else:
0094                         raise Exception("Invalid xml:" + entry)
0095     path_fix = re.compile(r"^\/Run \d+")
0096     for fullname, obj, rtype in parse_directory(""):
0097         yield fullname, obj, rtype
0098 
0099 def save_keyvalue(dictionary, name):
0100     name = name.replace("/", "_")
0101     create =  "CREATE TABLE %s(key, value);"  % name
0102     print(create)
0103     db.execute(create)
0104     data = []
0105     for k, v in dictionary.iteritems():
0106         vals = (unicode(k), tosqlite(v))
0107         data.append(vals)
0108     insert = "INSERT INTO %s(key, value) VALUES (?,?);" % name
0109     print(insert)
0110     db.executemany(insert, data)
0111     db.commit()
0112 
0113 
0114 for name, obj, rtype in read_objects_root(f):
0115   if rtype == "TTree":
0116     treetotable(obj, name)
0117   else:
0118     basic_objects[name] = obj
0119 
0120 save_keyvalue(basic_objects, "TDirectory")