File indexing completed on 2021-08-17 23:10:44
0001 #ifndef __SIMPLE_SAX_PARSER_H_
0002 #define __SIMPLE_SAX_PARSER_H_
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013 #include <string>
0014 #include <cstdio>
0015 #include <cstdlib>
0016 #include <cassert>
0017 #include <cstring>
0018 #include <iostream>
0019 #include <algorithm>
0020 #include <vector>
0021
0022 bool fgettoken(std::istream &in, char **buffer, size_t *maxSize, const char *separators, int *firstChar);
0023
0024
0025
0026
0027
0028
0029
0030
0031
0032
0033
0034
0035
0036
0037
0038
0039
0040
0041
0042
0043
0044
0045
0046
0047
0048
0049
0050
0051
0052
0053
0054
0055
0056
0057
0058
0059
0060
0061
0062
0063
0064
0065
0066
0067
0068
0069 class SimpleSAXParser {
0070 public:
0071 struct Attribute {
0072 std::string key;
0073 std::string value;
0074
0075 Attribute(const std::string &iKey, const std::string &iValue) : key(iKey), value(iValue) {}
0076
0077 Attribute(const Attribute &attr) : key(attr.key), value(attr.value) {}
0078
0079 bool operator<(const Attribute &attribute) const { return this->key < attribute.key; }
0080 };
0081
0082 typedef std::vector<Attribute> Attributes;
0083 class ParserError {
0084 public:
0085 ParserError(const std::string &error) : m_error(error) {}
0086
0087 const char *error() { return m_error.c_str(); }
0088
0089 private:
0090 std::string m_error;
0091 };
0092
0093 enum PARSER_STATES {
0094 IN_DOCUMENT,
0095 IN_BEGIN_TAG,
0096 IN_DONE,
0097 IN_BEGIN_ELEMENT,
0098 IN_ELEMENT_WHITESPACE,
0099 IN_END_ELEMENT,
0100 IN_ATTRIBUTE_KEY,
0101 IN_END_TAG,
0102 IN_DATA,
0103 IN_BEGIN_ATTRIBUTE_VALUE,
0104 IN_STRING,
0105 IN_END_ATTRIBUTE_VALUE,
0106 IN_STRING_ENTITY,
0107 IN_DATA_ENTITY
0108 };
0109
0110 SimpleSAXParser(std::istream &f)
0111 : m_in(f), m_bufferSize(1024), m_buffer(new char[m_bufferSize]), m_nextChar(m_in.get()) {}
0112
0113 virtual ~SimpleSAXParser();
0114
0115 void parse(void);
0116
0117 virtual void startElement(const std::string & , Attributes & ) {}
0118 virtual void endElement(const std::string & ) {}
0119 virtual void data(const std::string & ) {}
0120
0121 SimpleSAXParser(const SimpleSAXParser &) = delete;
0122 const SimpleSAXParser &operator=(const SimpleSAXParser &) = delete;
0123
0124 private:
0125 std::string parseEntity(const std::string &entity);
0126 std::string getToken(const char *delim) {
0127 fgettoken(m_in, &m_buffer, &m_bufferSize, delim, &m_nextChar);
0128 return m_buffer;
0129 }
0130
0131 std::string getToken(const char delim) {
0132 char buf[2] = {delim, 0};
0133 fgettoken(m_in, &m_buffer, &m_bufferSize, buf, &m_nextChar);
0134 m_nextChar = m_in.get();
0135 return m_buffer;
0136 }
0137
0138 bool skipChar(int c) {
0139 if (m_nextChar != c)
0140 return false;
0141 m_nextChar = m_in.get();
0142 return true;
0143 }
0144
0145 int nextChar(void) { return m_nextChar; }
0146
0147 std::istream &m_in;
0148 size_t m_bufferSize;
0149 char *m_buffer;
0150 int m_nextChar;
0151 std::vector<std::string> m_elementTags;
0152 Attributes m_attributes;
0153 };
0154
0155
0156 #endif