Back to home page

Project CMSSW displayed by LXR

 
 

    


File indexing completed on 2021-08-17 23:10:44

0001 #ifndef __SIMPLE_SAX_PARSER_H_
0002 #define __SIMPLE_SAX_PARSER_H_
0003 /*  A simple SAX-like parser. 
0004 
0005     And yes, I know the S in SAX stands for Simple.
0006         
0007     Licensed under GPLv3 license.
0008     
0009     TODO: incomplete support for entities.
0010     TODO: no support for DTD nor <?xml> preamble.
0011  */
0012 
0013 #include <string>
0014 #include <cstdio>
0015 #include <cstdlib>
0016 #include <cassert>
0017 #include <cstring>
0018 #include <iostream>
0019 #include <algorithm>
0020 #include <vector>
0021 
0022 bool fgettoken(std::istream &in, char **buffer, size_t *maxSize, const char *separators, int *firstChar);
0023 
0024 /** A simple SAX parser which is able to parse the configuration.
0025 
0026     State machine for the parser can be drawn by cut and pasting the following
0027     to graphviz:
0028   
0029     digraph {
0030     IN_DOCUMENT->IN_BEGIN_TAG [label="nextChar == '<'"];
0031     IN_DOCUMENT->IN_DATA [label="nextChar != '<'"];
0032     
0033     IN_BEGIN_TAG->IN_BEGIN_ELEMENT [label="nextChar >= 'a' && nextChar < 'Z'"];
0034     IN_BEGIN_TAG->IN_END_ELEMENT [label= "nextChar == '/'"];
0035     
0036     IN_BEGIN_ELEMENT->IN_END_ELEMENT [label="nextChar == '/'"];
0037     IN_BEGIN_ELEMENT->IN_ELEMENT_WHITESPACE [label="nextChar == ' '"];
0038     IN_BEGIN_ELEMENT->IN_END_TAG [label="nextChar == '>'"];
0039     
0040     IN_ELEMENT_WHITESPACE->IN_ELEMENT_WHITESPACE [ label = "nextChar == \"\\ \\t\\n\""]
0041     IN_ELEMENT_WHITESPACE->IN_ATTRIBUTE_KEY [ label = "nextChar >= 'a' && nextChar < 'Z'"]
0042     IN_ELEMENT_WHITESPACE->IN_END_ELEMENT [label="nextChar == '/'"]
0043     
0044     IN_END_ELEMENT->IN_END_TAG [label = "nextChar == '>'"];
0045     
0046     IN_END_TAG->IN_BEGIN_TAG [label="nextChar == '<'"];
0047     IN_END_TAG->IN_DATA [label="nextChar != '<'"]
0048     
0049     IN_DATA->IN_BEGIN_TAG [label="nextChar == '<'"];
0050     IN_DATA->IN_DATA_ENTITY [label="nextChar == '&'"];
0051     IN_DATA->IN_DONE [label = "nextChar == EOF"];
0052     
0053     IN_DATA_ENTITY->IN_DATA [label="nextChar == ';'"];
0054     
0055     IN_ATTRIBUTE_KEY->IN_BEGIN_ATTRIBUTE_VALUE [label = "nextChar == '='"]
0056     
0057     IN_BEGIN_ATTRIBUTE_VALUE->IN_STRING [label = "nextChar == '\"' || nextChar == '\'' "]
0058     
0059     IN_STRING->IN_END_ATTRIBUTE_VALUE [label = "nextChar == quote"]
0060     IN_STRING->IN_STRING_ENTITY [label = "nextChar == '&'"]
0061     
0062     IN_END_ATTRIBUTE_VALUE->IN_ELEMENT_WHITESPACE [label = "nextChar == ' '"]
0063     IN_END_ATTRIBUTE_VALUE->IN_END_ELEMENT [label = "nextChar == '/'"]
0064     IN_END_ATTRIBUTE_VALUE->IN_END_TAG [label = "nextChar == '>'"]
0065     
0066     IN_STRING_ENTITY->IN_STRING [label = "nextChar == ';'"]
0067     }    
0068     */
0069 class SimpleSAXParser {
0070 public:
0071   struct Attribute {
0072     std::string key;
0073     std::string value;
0074 
0075     Attribute(const std::string &iKey, const std::string &iValue) : key(iKey), value(iValue) {}
0076 
0077     Attribute(const Attribute &attr) : key(attr.key), value(attr.value) {}
0078 
0079     bool operator<(const Attribute &attribute) const { return this->key < attribute.key; }
0080   };
0081 
0082   typedef std::vector<Attribute> Attributes;
0083   class ParserError {
0084   public:
0085     ParserError(const std::string &error) : m_error(error) {}
0086 
0087     const char *error() { return m_error.c_str(); }
0088 
0089   private:
0090     std::string m_error;
0091   };
0092 
0093   enum PARSER_STATES {
0094     IN_DOCUMENT,
0095     IN_BEGIN_TAG,
0096     IN_DONE,
0097     IN_BEGIN_ELEMENT,
0098     IN_ELEMENT_WHITESPACE,
0099     IN_END_ELEMENT,
0100     IN_ATTRIBUTE_KEY,
0101     IN_END_TAG,
0102     IN_DATA,
0103     IN_BEGIN_ATTRIBUTE_VALUE,
0104     IN_STRING,
0105     IN_END_ATTRIBUTE_VALUE,
0106     IN_STRING_ENTITY,
0107     IN_DATA_ENTITY
0108   };
0109 
0110   SimpleSAXParser(std::istream &f)
0111       : m_in(f), m_bufferSize(1024), m_buffer(new char[m_bufferSize]), m_nextChar(m_in.get()) {}
0112 
0113   virtual ~SimpleSAXParser();
0114 
0115   void parse(void);
0116 
0117   virtual void startElement(const std::string & /*name*/, Attributes & /*attributes*/) {}
0118   virtual void endElement(const std::string & /*name*/) {}
0119   virtual void data(const std::string & /*data*/) {}
0120 
0121   SimpleSAXParser(const SimpleSAXParser &) = delete;                   // stop default
0122   const SimpleSAXParser &operator=(const SimpleSAXParser &) = delete;  // stop default
0123 
0124 private:
0125   std::string parseEntity(const std::string &entity);
0126   std::string getToken(const char *delim) {
0127     fgettoken(m_in, &m_buffer, &m_bufferSize, delim, &m_nextChar);
0128     return m_buffer;
0129   }
0130 
0131   std::string getToken(const char delim) {
0132     char buf[2] = {delim, 0};
0133     fgettoken(m_in, &m_buffer, &m_bufferSize, buf, &m_nextChar);
0134     m_nextChar = m_in.get();
0135     return m_buffer;
0136   }
0137 
0138   bool skipChar(int c) {
0139     if (m_nextChar != c)
0140       return false;
0141     m_nextChar = m_in.get();
0142     return true;
0143   }
0144 
0145   int nextChar(void) { return m_nextChar; }
0146 
0147   std::istream &m_in;
0148   size_t m_bufferSize;
0149   char *m_buffer;
0150   int m_nextChar;
0151   std::vector<std::string> m_elementTags;
0152   Attributes m_attributes;
0153 };
0154 
0155 // NOTE: put in a .cc if this file is used in more than one place.
0156 #endif  // __SIMPLE_SAX_PARSER_H_