File indexing completed on 2024-04-06 12:11:33
0001 #ifndef __SIMPLE_SAX_PARSER_H_
0002 #define __SIMPLE_SAX_PARSER_H_
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013 #include <string>
0014 #include <cstdio>
0015 #include <cstdlib>
0016 #include <cassert>
0017 #include <cstring>
0018 #include <iostream>
0019 #include <algorithm>
0020 #include <vector>
0021
0022 bool fgettoken(std::istream &in, char **buffer, size_t *maxSize, const char *separators, int *firstChar);
0023
0024
0025
0026
0027
0028
0029
0030
0031
0032
0033
0034
0035
0036
0037
0038
0039
0040
0041
0042
0043
0044
0045
0046
0047
0048
0049
0050
0051
0052
0053
0054
0055
0056
0057
0058
0059
0060
0061
0062
0063
0064
0065
0066
0067
0068
0069 class SimpleSAXParser {
0070 public:
0071 struct Attribute {
0072 std::string key;
0073 std::string value;
0074
0075 Attribute(const std::string &iKey, const std::string &iValue) : key(iKey), value(iValue) {}
0076
0077 bool operator<(const Attribute &attribute) const { return this->key < attribute.key; }
0078 };
0079
0080 typedef std::vector<Attribute> Attributes;
0081 class ParserError {
0082 public:
0083 ParserError(const std::string &error) : m_error(error) {}
0084
0085 const char *error() { return m_error.c_str(); }
0086
0087 private:
0088 std::string m_error;
0089 };
0090
0091 enum PARSER_STATES {
0092 IN_DOCUMENT,
0093 IN_BEGIN_TAG,
0094 IN_DONE,
0095 IN_BEGIN_ELEMENT,
0096 IN_ELEMENT_WHITESPACE,
0097 IN_END_ELEMENT,
0098 IN_ATTRIBUTE_KEY,
0099 IN_END_TAG,
0100 IN_DATA,
0101 IN_BEGIN_ATTRIBUTE_VALUE,
0102 IN_STRING,
0103 IN_END_ATTRIBUTE_VALUE,
0104 IN_STRING_ENTITY,
0105 IN_DATA_ENTITY
0106 };
0107
0108 SimpleSAXParser(std::istream &f)
0109 : m_in(f), m_bufferSize(1024), m_buffer(new char[m_bufferSize]), m_nextChar(m_in.get()) {}
0110
0111 virtual ~SimpleSAXParser();
0112
0113 void parse(void);
0114
0115 virtual void startElement(const std::string & , Attributes & ) {}
0116 virtual void endElement(const std::string & ) {}
0117 virtual void data(const std::string & ) {}
0118
0119 SimpleSAXParser(const SimpleSAXParser &) = delete;
0120 const SimpleSAXParser &operator=(const SimpleSAXParser &) = delete;
0121
0122 private:
0123 std::string parseEntity(const std::string &entity);
0124 std::string getToken(const char *delim) {
0125 fgettoken(m_in, &m_buffer, &m_bufferSize, delim, &m_nextChar);
0126 return m_buffer;
0127 }
0128
0129 std::string getToken(const char delim) {
0130 char buf[2] = {delim, 0};
0131 fgettoken(m_in, &m_buffer, &m_bufferSize, buf, &m_nextChar);
0132 m_nextChar = m_in.get();
0133 return m_buffer;
0134 }
0135
0136 bool skipChar(int c) {
0137 if (m_nextChar != c)
0138 return false;
0139 m_nextChar = m_in.get();
0140 return true;
0141 }
0142
0143 int nextChar(void) { return m_nextChar; }
0144
0145 std::istream &m_in;
0146 size_t m_bufferSize;
0147 char *m_buffer;
0148 int m_nextChar;
0149 std::vector<std::string> m_elementTags;
0150 Attributes m_attributes;
0151 };
0152
0153
0154 #endif