Back to home page

Project CMSSW displayed by LXR

 
 

    


File indexing completed on 2024-04-06 12:13:01

0001 from __future__ import print_function
0002 ## Original version of code heavily based on recipe written by Wai Yip
0003 ## Tung, released under PSF license.
0004 ## http://code.activestate.com/recipes/534109/
0005 
0006 import re
0007 import os
0008 import xml.sax.handler
0009 import pprint
0010 
0011 class DataNode (object):
0012 
0013     spaces = 4
0014 
0015     def __init__ (self, **kwargs):
0016         self._attrs = {}     # XML attributes and child elements
0017         self._data  = None   # child text data
0018         self._ncDict = kwargs.get ('nameChangeDict', {})
0019 
0020 
0021     def __len__ (self):
0022         # treat single element as a list of 1
0023         return 1
0024 
0025 
0026     def __getitem__ (self, key):
0027         if isinstance (key, str):
0028             return self._attrs.get(key,None)
0029         else:
0030             return [self][key]
0031 
0032 
0033     def __contains__ (self, name):
0034         return name in self._attrs
0035 
0036 
0037     def __nonzero__ (self):
0038         return bool (self._attrs or self._data)
0039 
0040 
0041     def __getattr__ (self, name):
0042         if name.startswith('__'):
0043             # need to do this for Python special methods???
0044             raise AttributeError (name)
0045         return self._attrs.get (name, None)
0046 
0047 
0048     def _add_xml_attr (self, name, value):
0049         change = self._ncDict.get (name)
0050         if change:
0051             name = change
0052         if name in self._attrs:
0053             # multiple attribute of the same name are represented by a list
0054             children = self._attrs[name]
0055             if not isinstance(children, list):
0056                 children = [children]
0057                 self._attrs[name] = children
0058             children.append(value)
0059         else:
0060             self._attrs[name] = value
0061 
0062 
0063     def __str__ (self):
0064         return self.stringify()
0065 
0066 
0067     def __repr__ (self):
0068         items = sorted (self._attrs.items())
0069         if self._data:
0070             items.append(('data', self._data))
0071         return u'{%s}' % ', '.join([u'%s:%s' % (k,repr(v)) for k,v in items])
0072 
0073 
0074     def attributes (self):
0075         return self._attrs
0076 
0077 
0078     @staticmethod
0079     def isiterable (obj):
0080         return getattr (obj, '__iter__', False)
0081 
0082 
0083     @staticmethod
0084     def _outputValues (obj, name, offset):
0085         retval = ' ' * offset
0086         if name:
0087             retval += '%s: ' % name
0088             offset += len (name) + DataNode.spaces
0089         # if this is a list
0090         if isinstance (obj, list):
0091             first = True
0092             for value in obj:
0093                 print("value", value, value.__class__.__name__)
0094                 if first:
0095                     tempoffset = offset
0096                     first = False
0097                     retval += '[\n ' + ' ' * offset
0098                 else:
0099                     retval += ',\n ' + ' ' * offset
0100                     tempoffset = offset
0101                 if isinstance (value, DataNode):
0102                     retval += value.stringify (offset=tempoffset)
0103                     print("  calling stringify for %s" % value)
0104                 elif DataNode.isiterable (value):
0105                     retval += DataNode._outputValues (value, '', offset)
0106                 else:
0107                     retval += "%s" % value
0108             retval += '\n' + ' ' * (offset - 2) +']\n'
0109             return retval
0110         retval += pprint.pformat(obj,
0111                                  indent= offset,
0112                                  width=1)
0113         return retval
0114 
0115 
0116     def stringify (self, name = '', offset = 0):
0117         # is this just data and nothing below
0118         if self._data and not len (self._attrs):
0119             return _outputValues (self._data, name, offset)
0120             retval = ' ' * offset
0121             if name:
0122                 retval += '%s : %s\n' % \
0123                           (name,
0124                            pprint.pformat (self._data,
0125                                           indent= offset+DataNode.spaces,
0126                                           width=1) )
0127             else:
0128                 retval += pprint.pformat (self._data,
0129                                           indent=offset+DataNode.spaces,
0130                                           width=1)
0131             return retval
0132         # this has attributes
0133         retval = ''
0134         if name:
0135             retval += '\n' + ' ' * offset
0136             retval += '%s: ' % name
0137         first = True
0138         for key, value in sorted (self._attrs.items()):
0139             if first:
0140                 retval += '{ \n'
0141                 tempspace = offset + 3
0142                 first = False
0143             else:
0144                 retval += ',\n'
0145                 tempspace = offset + 3
0146             if isinstance (value, DataNode):
0147                 retval += value.stringify (key, tempspace)
0148             else:
0149                 retval += DataNode._outputValues (value, key, tempspace)
0150         # this has data too
0151         if self._data:
0152             retval += ',\n'
0153             tempspace = offset + 3
0154             retval += DataNode._ouptputValues (self._data, name, tempspace)
0155         retval += '\n ' + ' ' * offset + '}'
0156         return retval 
0157         
0158 
0159 
0160 class TreeBuilder (xml.sax.handler.ContentHandler):
0161 
0162     non_id_char = re.compile('[^_0-9a-zA-Z]')
0163 
0164     def __init__ (self, **kwargs):
0165         self._stack = []
0166         self._text_parts = []
0167         self._ncDict = kwargs.get ('nameChangeDict', {})
0168         self._root = DataNode (nameChangeDict = self._ncDict)
0169         self.current = self._root
0170 
0171     def startElement (self, name, attrs):
0172         self._stack.append( (self.current, self._text_parts))
0173         self.current = DataNode (nameChangeDict = self._ncDict)
0174         self._text_parts = []
0175         # xml attributes --> python attributes
0176         for k, v in attrs.items():
0177             self.current._add_xml_attr (TreeBuilder._name_mangle(k), v)
0178 
0179     def endElement (self, name):
0180         text = ''.join (self._text_parts).strip()
0181         if text:
0182             self.current._data = text
0183         if self.current.attributes():
0184             obj = self.current
0185         else:
0186             # a text only node is simply represented by the string
0187             obj = text or ''
0188         self.current, self._text_parts = self._stack.pop()
0189         self.current._add_xml_attr (TreeBuilder._name_mangle(name), obj)
0190 
0191     def characters (self, content):
0192         self._text_parts.append(content)
0193 
0194     def root (self):
0195         return self._root
0196 
0197     def topLevel (self):
0198         '''Returns top level object'''
0199         return list(self._root.attributes().values())[0]
0200         
0201 
0202     @staticmethod
0203     def _name_mangle (name):
0204         return TreeBuilder.non_id_char.sub('_', name)
0205 
0206 
0207 regexList = [ (re.compile (r'&'), '&'   ),
0208               (re.compile (r'<'), '&lt;'    ),
0209               (re.compile (r'>'), '&gt;'    ),
0210               (re.compile (r'"'), '&quote;' ),
0211               (re.compile (r"'"), '&#39;'   )
0212               ]
0213 
0214 quoteRE = re.compile (r'(\w\s*=\s*")([^"]+)"')
0215 
0216 def fixQuoteValue (match):
0217     '''Changes all characters inside of the match'''
0218     quote = match.group(2)
0219     for regexTup in regexList:
0220         quote = regexTup[0].sub( regexTup[1], quote )
0221     return match.group(1) + quote + '"'
0222 
0223 
0224 def xml2obj (**kwargs):
0225     ''' Converts XML data into native Python object.  Takes either
0226     file handle or string as input.  Does NOT fix illegal characters.
0227 
0228     input source:  Exactly one of the three following is needed
0229     filehandle     - input from file handle
0230     contents       - input from string
0231     filename       - input from filename
0232 
0233     options:
0234     filtering      - boolean value telling code whether or not to fileter
0235                      input selection to remove illegal XML characters
0236     nameChangeDict - dictionaries of names to change in python object'''
0237 
0238     # make sure we have exactly 1 input source
0239     filehandle = kwargs.get ('filehandle')
0240     contents   = kwargs.get ('contents')
0241     filename   = kwargs.get ('filename')
0242     if not filehandle and not contents and not filename:
0243         raise RuntimeError("You must provide 'filehandle', 'contents', or 'filename'")
0244     if     filehandle and contents or \
0245            filehandle and filename or \
0246            contents   and filename:
0247         raise RuntimeError("You must provide only ONE of 'filehandle', 'contents', or 'filename'")
0248 
0249     # are we filtering?
0250     filtering = kwargs.get ('filtering')
0251     if filtering:
0252         # if we are filtering, we need to read in the contents to modify them
0253         if not contents:
0254             if not filehandle:
0255                 try:
0256                     filehandle = open (filename, 'r')
0257                 except:
0258                     raise RuntimeError("Failed to open '%s'" % filename)
0259             contents = ''
0260             for line in filehandle:
0261                 contents += line
0262             filehandle.close()
0263             filehandle = filename = ''
0264         contents = quoteRE.sub (fixQuoteValue, contents)
0265     
0266     ncDict = kwargs.get ('nameChangeDict', {})
0267     builder = TreeBuilder (nameChangeDict = ncDict)
0268     if contents:
0269         xml.sax.parseString(contents, builder)
0270     else:
0271         if not filehandle:
0272             try:
0273                 filehandle = open (filename, 'r')
0274             except:
0275                 raise RuntimeError("Failed to open '%s'" % filename)
0276         xml.sax.parse(filehandle, builder)
0277     return builder.topLevel()