Back to home page

Project CMSSW displayed by LXR

 
 

    


File indexing completed on 2024-04-06 12:01:53

0001 """
0002 
0003 This file contains the base DataSource class, and all sub classes that implement their own methods for parsing data.
0004 
0005 """
0006 
0007 import json
0008 
0009 # data_source will extend this
0010 class node(object):
0011 
0012     _data = None
0013     _child_nodes = None
0014     def __init__(self, data=None):
0015         self._data = data
0016         self._child_nodes = []
0017 
0018     def data(self):
0019         return self._data
0020 
0021     def add_child(self, node_data):
0022         new_node = node(node_data)
0023         self._child_nodes.append(new_node)
0024 
0025     def children(self):
0026         return self._child_nodes
0027 
0028     def child(self, index):
0029         return self.children()[index]
0030 
0031     def left_child(self):
0032         return self.children()[0]
0033 
0034     def right_child(self):
0035         return self.children()[1]
0036 
0037     def is_leaf(self):
0038         return len(self.children()) == 0
0039 
0040     def __str__(self):
0041         return "<node data='%s' children=%s>" % (self.data(), str(self.children()))
0042 
0043 class data_source(node):
0044 
0045     def __init__(self):
0046         pass
0047 
0048     def get_data(self):
0049         return []
0050 
0051     def __repr__(self):
0052         return "<data_source>"
0053 
0054 # a json file data source first reads json from the file given, and then provides methods to navigate it and select fields
0055 class json_file(data_source):
0056 
0057     # sub_data is the current subtree of the json data
0058     # sub_data is used for chaining navigation methods
0059     # Note: _data is defined since data_source extends node, but defining it here for convenience
0060     _data, _sub_data, _file_name = None, None, None
0061     def __init__(self, json_file_name):
0062         # read the file, then parse into JSON object
0063         self._file_name = json_file_name
0064         with open(self._file_name, "r") as handle:
0065             contents = "".join(handle.readlines())
0066             data = json.loads(contents)
0067             self._data = data
0068             self._sub_data = data
0069 
0070     def data(self):
0071         return json_data_node.make(self._data)
0072 
0073     def raw(self):
0074         return self._data
0075 
0076     def __str__(self):
0077         return self.__repr__()
0078 
0079 class sqlite_schema(data_source):
0080     _data, _sub_data, _file_name = None, None, None
0081     def __init__(self, sqlite_file_name):
0082         self._file_name = sqlite_file_name
0083         # import sqlite3 and connect to the database file
0084         import sqlite3
0085         connection = sqlite3.connect(self._file_name)
0086         cursor = connection.cursor()
0087         if query_object == None:
0088             # try to query the file to get table and column data
0089             tables = cursor.execute("select name from sqlite_master where type = 'table'")
0090 
0091             # now build a mapping of tables to columns - with a dictionary
0092             table_to_columns = {}
0093             for table in tables.fetchall():
0094                 table_to_columns[table[0]] = []
0095                 # now query columns for this table
0096                 columns = cursor.execute("pragma table_info(%s)" % table[0])
0097                 for column in columns.fetchall():
0098                     table_to_columns[table[0]].append(str(column[1]))
0099 
0100             # now query with the mapping
0101             table_to_data = {}
0102             for table in table_to_columns:
0103                 # query with all columns
0104                 column_string = ",".join(table_to_columns[table])
0105                 sql_query = "select %s from %s" % (column_string, table)
0106                 results = cursor.execute(sql_query).fetchall()
0107                 for n in range(0, len(results)):
0108                     results[n] = dict(list(zip(table_to_columns[table], list(map(str, results[n])))))
0109                 table_to_data[str(table)] = results
0110             self._data = json_data_node.make(table_to_data)
0111         else:
0112             sql_query = query_object.to_sql()
0113 
0114     def data(self):
0115         return self._data
0116 
0117 # used for chaining json-navigation methods
0118 # when a method is called initially on the data, an object of this class is returned,
0119 # then the methods on that object return an object of this class again.
0120 class json_data_node(object):
0121 
0122     _data = None
0123     def __init__(self, data=None):
0124         self._data = data
0125 
0126     # use this instead of having to decide on which kind of json node should
0127     # be created in code that shouldn't be doing it.
0128     @staticmethod
0129     def make(data):
0130         if type(data) == list:
0131             return json_list(data)
0132         elif type(data) == dict:
0133             return json_dict(data)
0134         else:
0135             return json_basic(data)
0136 
0137     def data(self):
0138         return self._data
0139 
0140     def raw(self):
0141         return self._data
0142 
0143     def get(self, *args):
0144         current_json_node = self
0145         if len(args) == 1:
0146             data_to_use = current_json_node.data()[args[0]]
0147             return json_data_node.make(data_to_use)
0148         for key in args:
0149             current_json_node = current_json_node.get(key)
0150         return current_json_node
0151 
0152     def set(self, data):
0153         self._data = data
0154         return self
0155 
0156     def find(self, type_name):
0157         # traverse json_data_node structure, and find all lists
0158         # if this node in the structure is a list, return all sub lists
0159         lists = []
0160         if type(self._data) == type_name:
0161             lists.append(self._data)
0162         if type(self._data) == list:
0163             for item in self._data:
0164                 lists += json_data_node.make(item).find(type_name)
0165         elif type(self._data) == dict:
0166             for key in self._data:
0167                 lists += json_data_node.make(self._data[key]).find(type_name)
0168         return lists
0169 
0170     def __str__(self):
0171         return "<json_data_node data='%s'>" % str(self._data)
0172 
0173 class json_list(json_data_node):
0174 
0175     iterator_index = None
0176 
0177     def __init__(self, data=None):
0178         self._data = data if data != None else []
0179         self.iterator_index = 0
0180 
0181     def first(self):
0182         data = self.get(0)
0183         return data
0184 
0185     def last(self):
0186         data = self.get(len(self.data())-1)
0187         return data
0188 
0189     def add_child(self, data):
0190         if data.__class__.__name__ in ["json_list", "json_dict", "json_basic"]:
0191             data = data.data()
0192         self._data.append(data)
0193 
0194     # iterator methods
0195 
0196     def __iter__(self):
0197         return self
0198 
0199     def __next__(self):
0200         if self.iterator_index > len(self._data)-1:
0201             self.reset()
0202             raise StopIteration
0203         else:
0204             self.iterator_index += 1
0205             return self._data[self.iterator_index-1]
0206 
0207     def reset(self):
0208         self.iterator_index = 0
0209 
0210     # misc methods
0211 
0212     def indices(self, *indices):
0213         final_list = []
0214         for index in indices:
0215             try:
0216                 index = int(index)
0217                 try:
0218                     final_list.append(self.get(index).data())
0219                 except Exception:
0220                     # index didn't exist
0221                     pass
0222             except Exception:
0223                 return
0224         return json_data_node.make(final_list)
0225 
0226     def get_members(self, member_name):
0227         # assume self.data() is a list
0228         if not(type(member_name) in [str, str]):
0229             raise TypeError("Value given for member name must be a string.")
0230         type_of_first_item = self.data()[0].__class__
0231         for item in self.data():
0232             if item.__class__ != type_of_first_item:
0233                 return None
0234         return json_data_node.make([getattr(item, member_name) for item in self.data()])
0235 
0236     # format methods
0237 
0238     def as_dicts(self, convert_timestamps=False):
0239 
0240         if len(self.data()) == 0:
0241             print("\nNo data to convert to dictionaries.\n")
0242             return
0243 
0244         if self.get(0).data().__class__.__name__ in ["GlobalTag", "GlobalTagMap", "Tag", "IOV", "Payload"]:
0245             # copy data
0246             new_data = [item.as_dicts(convert_timestamps=convert_timestamps) for item in [item for item in self.data()]]
0247             return new_data
0248         else:
0249             print("Data in json_list was not the correct type.")
0250 
0251 
0252     # return ascii version of data
0253     # expects array of dicts
0254     # fit is a list of columns that should be kept at their full size
0255     # col_width is the column width to be used as a guide
0256     def as_table(self, fit=["all"], columns=None, hide=None, col_width=None, row_nums=False):
0257 
0258         if len(self.data()) == 0:
0259             print("\nNo data to draw table with.\n")
0260             return
0261 
0262         from . import models
0263         models_dict = models.generate()
0264 
0265         # if the list contains ORM objects, then convert them all to dictionaries,
0266         # otherwise, leave the list as it is - assume it is already a list of dictionaries
0267         if self.get(0).data().__class__.__name__ in ["GlobalTag", "GlobalTagMap", "GlobalTagMapRequest", "Tag", "IOV", "Payload"]:
0268 
0269             from .data_formats import _objects_to_dicts
0270             data = _objects_to_dicts(self.data()).data()
0271 
0272             from .querying import connection
0273             table_name = models.class_name_to_column(self.get(0).data().__class__).upper()
0274             # set headers to those found in ORM models
0275             # do it like this so we copy the headers
0276             # for example, if headers are hidden by the user, then this will change the orm class if we don't do it like this
0277             headers = [header for header in models_dict[self.get(0).data().__class__.__name__.lower()].headers]
0278         else:
0279             table_name = None
0280             data = self.data()
0281             # gets headers stored in first dictionary
0282             headers = list(data[0].keys())
0283 
0284         if columns != None:
0285             headers = columns
0286 
0287         if row_nums:
0288             headers = ["row"] + headers
0289 
0290             # append an extra column to all rows of data, as well
0291             for i, item in enumerate(data):
0292                 data[i]["row"] = str(i)
0293 
0294         if fit == ["all"]:
0295             fit = headers
0296 
0297         if col_width == None:
0298             import subprocess
0299             table_width = int(0.95*int(subprocess.check_output([b'stty', b'size']).split(b' ')[1]))
0300             col_width = int(table_width/len(headers))
0301 
0302         if hide != None:
0303             for n in range(0, len(hide)):
0304                 del headers[headers.index(hide[n])]
0305 
0306         def max_width_of_column(column, data):
0307             max_width_found = len(str(data[0][column]))
0308             for item in data:
0309                 current_width = len(str(item[column]))
0310                 if current_width > max_width_found:
0311                     max_width_found = current_width
0312             if max_width_found > len(column):
0313                 return max_width_found
0314             else:
0315                 return len(column)
0316 
0317         def cell(content, header, col_width, fit):
0318             if fit:
0319                 col_width_with_padding = col_width+2
0320                 col_width_substring = len(str(content))
0321             else:
0322                 col_width_with_padding = col_width-2 if col_width-2 > 0 else 1
0323                 col_width_substring = col_width-5 if col_width-7 > 0 else 1
0324             return ("| {:<%s} " % (col_width_with_padding)).format(str(content)[0:col_width_substring].replace("\n", "")\
0325                     + ("..." if not(fit) and col_width_substring < len(str(content)) else ""))
0326 
0327         column_to_width = {}
0328 
0329         if fit != headers:
0330 
0331             # get the column widths of fited columns
0332             surplus_width = 0
0333             for column in fit:
0334 
0335                 if not(column in headers):
0336                     print(("'%s' is not a valid column." % column))
0337                     return
0338 
0339                 column_to_width[column] = max_width_of_column(column, data)
0340                 surplus_width += column_to_width[column]-col_width
0341 
0342             if len(set(headers)-set(fit)) != 0:
0343                 non_fited_width_surplus = surplus_width/len(set(headers)-set(fit))
0344             else:
0345                 non_fited_width_surplus = 0
0346 
0347             for column in headers:
0348                 if not(column in fit):
0349                     column_to_width[column] = col_width - non_fited_width_surplus
0350         else:
0351             for column in headers:
0352                 column_to_width[column] = max_width_of_column(column, data)
0353 
0354         ascii_string = "\n%s\n\n" % table_name if table_name != None else "\n"
0355         for header in headers:
0356             ascii_string += cell(header, header, column_to_width[header], header in fit)
0357         ascii_string += "\n"
0358         horizontal_border = "\n"
0359         ascii_string += horizontal_border
0360         for item in data:
0361             for n in range(0, len(headers)):
0362                 entry = item[headers[n]]
0363                 ascii_string += cell(entry, headers[n], column_to_width[headers[n]], headers[n] in fit)
0364             ascii_string += "\n"
0365         #ascii_string += "\n"
0366         ascii_string += horizontal_border
0367         ascii_string += "Showing %d rows\n\n" % len(data)
0368         print(ascii_string)
0369 
0370 class json_dict(json_data_node):
0371 
0372     def __init__(self, data=None):
0373         self._data = data if data != None else {}
0374 
0375     def add_key(self, data, key):
0376         if data.__class__.__name__ in ["json_list", "json_dict", "json_basic"]:
0377             data = data.data()
0378         self._data[key] = data
0379 
0380 # for strings, integers, etc
0381 class json_basic(json_data_node):
0382 
0383     def __init__(self, data=None):
0384         self._data = data if data != None else ""