Back to home page

Project CMSSW displayed by LXR

 
 

    


File indexing completed on 2024-12-01 23:40:20

0001 ## Original version of code heavily based on recipe written by Wai Yip
0002 ## Tung, released under PSF license.
0003 ## http://code.activestate.com/recipes/534109/
0004 
0005 import re
0006 import os
0007 import xml.sax.handler
0008 import pprint
0009 
0010 class DataNode (object):
0011 
0012     spaces = 4
0013 
0014     def __init__ (self, **kwargs):
0015         self._attrs = {}     # XML attributes and child elements
0016         self._data  = None   # child text data
0017         self._ncDict = kwargs.get ('nameChangeDict', {})
0018 
0019 
0020     def __len__ (self):
0021         # treat single element as a list of 1
0022         return 1
0023 
0024 
0025     def __getitem__ (self, key):
0026         if isinstance (key, str):
0027             return self._attrs.get(key,None)
0028         else:
0029             return [self][key]
0030 
0031 
0032     def __contains__ (self, name):
0033         return name in self._attrs
0034 
0035 
0036     def __nonzero__ (self):
0037         return bool (self._attrs or self._data)
0038 
0039 
0040     def __getattr__ (self, name):
0041         if name.startswith('__'):
0042             # need to do this for Python special methods???
0043             raise AttributeError (name)
0044         return self._attrs.get (name, None)
0045 
0046 
0047     def _add_xml_attr (self, name, value):
0048         change = self._ncDict.get (name)
0049         if change:
0050             name = change
0051         if name in self._attrs:
0052             # multiple attribute of the same name are represented by a list
0053             children = self._attrs[name]
0054             if not isinstance(children, list):
0055                 children = [children]
0056                 self._attrs[name] = children
0057             children.append(value)
0058         else:
0059             self._attrs[name] = value
0060 
0061 
0062     def __str__ (self):
0063         return self.stringify()
0064 
0065 
0066     def __repr__ (self):
0067         items = sorted (self._attrs.items())
0068         if self._data:
0069             items.append(('data', self._data))
0070         return u'{%s}' % ', '.join([u'%s:%s' % (k,repr(v)) for k,v in items])
0071 
0072 
0073     def attributes (self):
0074         return self._attrs
0075 
0076 
0077     @staticmethod
0078     def isiterable (obj):
0079         return getattr (obj, '__iter__', False)
0080 
0081 
0082     @staticmethod
0083     def _outputValues (obj, name, offset):
0084         retval = ' ' * offset
0085         if name:
0086             retval += '%s: ' % name
0087             offset += len (name) + DataNode.spaces
0088         # if this is a list
0089         if isinstance (obj, list):
0090             first = True
0091             for value in obj:
0092                 print("value", value, value.__class__.__name__)
0093                 if first:
0094                     tempoffset = offset
0095                     first = False
0096                     retval += '[\n ' + ' ' * offset
0097                 else:
0098                     retval += ',\n ' + ' ' * offset
0099                     tempoffset = offset
0100                 if isinstance (value, DataNode):
0101                     retval += value.stringify (offset=tempoffset)
0102                     print("  calling stringify for %s" % value)
0103                 elif DataNode.isiterable (value):
0104                     retval += DataNode._outputValues (value, '', offset)
0105                 else:
0106                     retval += "%s" % value
0107             retval += '\n' + ' ' * (offset - 2) +']\n'
0108             return retval
0109         retval += pprint.pformat(obj,
0110                                  indent= offset,
0111                                  width=1)
0112         return retval
0113 
0114 
0115     def stringify (self, name = '', offset = 0):
0116         # is this just data and nothing below
0117         if self._data and not len (self._attrs):
0118             return _outputValues (self._data, name, offset)
0119             retval = ' ' * offset
0120             if name:
0121                 retval += '%s : %s\n' % \
0122                           (name,
0123                            pprint.pformat (self._data,
0124                                           indent= offset+DataNode.spaces,
0125                                           width=1) )
0126             else:
0127                 retval += pprint.pformat (self._data,
0128                                           indent=offset+DataNode.spaces,
0129                                           width=1)
0130             return retval
0131         # this has attributes
0132         retval = ''
0133         if name:
0134             retval += '\n' + ' ' * offset
0135             retval += '%s: ' % name
0136         first = True
0137         for key, value in sorted (self._attrs.items()):
0138             if first:
0139                 retval += '{ \n'
0140                 tempspace = offset + 3
0141                 first = False
0142             else:
0143                 retval += ',\n'
0144                 tempspace = offset + 3
0145             if isinstance (value, DataNode):
0146                 retval += value.stringify (key, tempspace)
0147             else:
0148                 retval += DataNode._outputValues (value, key, tempspace)
0149         # this has data too
0150         if self._data:
0151             retval += ',\n'
0152             tempspace = offset + 3
0153             retval += DataNode._ouptputValues (self._data, name, tempspace)
0154         retval += '\n ' + ' ' * offset + '}'
0155         return retval 
0156         
0157 
0158 
0159 class TreeBuilder (xml.sax.handler.ContentHandler):
0160 
0161     non_id_char = re.compile('[^_0-9a-zA-Z]')
0162 
0163     def __init__ (self, **kwargs):
0164         self._stack = []
0165         self._text_parts = []
0166         self._ncDict = kwargs.get ('nameChangeDict', {})
0167         self._root = DataNode (nameChangeDict = self._ncDict)
0168         self.current = self._root
0169 
0170     def startElement (self, name, attrs):
0171         self._stack.append( (self.current, self._text_parts))
0172         self.current = DataNode (nameChangeDict = self._ncDict)
0173         self._text_parts = []
0174         # xml attributes --> python attributes
0175         for k, v in attrs.items():
0176             self.current._add_xml_attr (TreeBuilder._name_mangle(k), v)
0177 
0178     def endElement (self, name):
0179         text = ''.join (self._text_parts).strip()
0180         if text:
0181             self.current._data = text
0182         if self.current.attributes():
0183             obj = self.current
0184         else:
0185             # a text only node is simply represented by the string
0186             obj = text or ''
0187         self.current, self._text_parts = self._stack.pop()
0188         self.current._add_xml_attr (TreeBuilder._name_mangle(name), obj)
0189 
0190     def characters (self, content):
0191         self._text_parts.append(content)
0192 
0193     def root (self):
0194         return self._root
0195 
0196     def topLevel (self):
0197         '''Returns top level object'''
0198         return list(self._root.attributes().values())[0]
0199         
0200 
0201     @staticmethod
0202     def _name_mangle (name):
0203         return TreeBuilder.non_id_char.sub('_', name)
0204 
0205 
0206 regexList = [ (re.compile (r'&'), '&'   ),
0207               (re.compile (r'<'), '&lt;'    ),
0208               (re.compile (r'>'), '&gt;'    ),
0209               (re.compile (r'"'), '&quote;' ),
0210               (re.compile (r"'"), '&#39;'   )
0211               ]
0212 
0213 quoteRE = re.compile (r'(\w\s*=\s*")([^"]+)"')
0214 
0215 def fixQuoteValue (match):
0216     '''Changes all characters inside of the match'''
0217     quote = match.group(2)
0218     for regexTup in regexList:
0219         quote = regexTup[0].sub( regexTup[1], quote )
0220     return match.group(1) + quote + '"'
0221 
0222 
0223 def xml2obj (**kwargs):
0224     ''' Converts XML data into native Python object.  Takes either
0225     file handle or string as input.  Does NOT fix illegal characters.
0226 
0227     input source:  Exactly one of the three following is needed
0228     filehandle     - input from file handle
0229     contents       - input from string
0230     filename       - input from filename
0231 
0232     options:
0233     filtering      - boolean value telling code whether or not to fileter
0234                      input selection to remove illegal XML characters
0235     nameChangeDict - dictionaries of names to change in python object'''
0236 
0237     # make sure we have exactly 1 input source
0238     filehandle = kwargs.get ('filehandle')
0239     contents   = kwargs.get ('contents')
0240     filename   = kwargs.get ('filename')
0241     if not filehandle and not contents and not filename:
0242         raise RuntimeError("You must provide 'filehandle', 'contents', or 'filename'")
0243     if     filehandle and contents or \
0244            filehandle and filename or \
0245            contents   and filename:
0246         raise RuntimeError("You must provide only ONE of 'filehandle', 'contents', or 'filename'")
0247 
0248     # are we filtering?
0249     filtering = kwargs.get ('filtering')
0250     if filtering:
0251         # if we are filtering, we need to read in the contents to modify them
0252         if not contents:
0253             if not filehandle:
0254                 try:
0255                     filehandle = open (filename, 'r')
0256                 except:
0257                     raise RuntimeError("Failed to open '%s'" % filename)
0258             contents = ''
0259             for line in filehandle:
0260                 contents += line
0261             filehandle.close()
0262             filehandle = filename = ''
0263         contents = quoteRE.sub (fixQuoteValue, contents)
0264     
0265     ncDict = kwargs.get ('nameChangeDict', {})
0266     builder = TreeBuilder (nameChangeDict = ncDict)
0267     if contents:
0268         xml.sax.parseString(contents, builder)
0269     else:
0270         if not filehandle:
0271             try:
0272                 filehandle = open (filename, 'r')
0273             except:
0274                 raise RuntimeError("Failed to open '%s'" % filename)
0275         xml.sax.parse(filehandle, builder)
0276     return builder.topLevel()