File indexing completed on 2024-04-06 12:11:44
0001 #include "Fireworks/Core/interface/SimpleSAXParser.h"
0002
0003
0004
0005
0006 std::string SimpleSAXParser::parseEntity(const std::string &entity) {
0007 if (entity == "quot")
0008 return "\"";
0009 else if (entity == "amp")
0010 return "&";
0011 else if (entity == "lt")
0012 return "<";
0013 else if (entity == "gt")
0014 return ">";
0015 throw ParserError("Unknown entity " + entity);
0016 }
0017
0018 void debug_state_machine(enum SimpleSAXParser::PARSER_STATES state) {
0019 #ifdef SIMPLE_SAX_PARSER_DEBUG
0020 static char *debug_states[] = {"IN_DOCUMENT",
0021 "IN_BEGIN_TAG",
0022 "IN_DONE",
0023 "IN_BEGIN_ELEMENT",
0024 "IN_ELEMENT_WHITESPACE",
0025 "IN_END_ELEMENT",
0026 "IN_ATTRIBUTE_KEY",
0027 "IN_END_TAG",
0028 "IN_DATA",
0029 "IN_BEGIN_ATTRIBUTE_VALUE",
0030 "IN_STRING",
0031 "IN_END_ATTRIBUTE_VALUE",
0032 "IN_STRING_ENTITY",
0033 "IN_DATA_ENTITY"};
0034
0035 std::cerr << debug_states[state] << std::endl;
0036 #endif
0037 }
0038
0039
0040
0041
0042
0043
0044
0045
0046 void SimpleSAXParser::parse(void) {
0047 enum PARSER_STATES state = IN_DOCUMENT;
0048
0049 char stringDelims[] = "\"&";
0050 std::string attributeName;
0051 std::string attributeValue;
0052 std::string tmp;
0053 std::string currentData;
0054
0055 while (state != IN_DONE) {
0056 debug_state_machine(state);
0057
0058 switch (state) {
0059
0060 case IN_DOCUMENT:
0061 state = IN_DATA;
0062 if (skipChar('<'))
0063 state = IN_BEGIN_TAG;
0064 break;
0065
0066 case IN_BEGIN_TAG:
0067 if (nextChar() >= 'A' && nextChar() <= 'z')
0068 state = IN_BEGIN_ELEMENT;
0069 else if (skipChar('/'))
0070 state = IN_END_ELEMENT;
0071 else
0072 throw ParserError("Bad tag");
0073 break;
0074
0075 case IN_BEGIN_ELEMENT:
0076 m_attributes.clear();
0077 m_elementTags.push_back(getToken(" />"));
0078 if (nextChar() == ' ')
0079 state = IN_ELEMENT_WHITESPACE;
0080 else if (skipChar('/'))
0081 state = IN_END_ELEMENT;
0082 else if (skipChar('>')) {
0083 startElement(m_elementTags.back(), m_attributes);
0084 state = IN_END_TAG;
0085 } else
0086 throw ParserError("Bad element.");
0087 break;
0088
0089 case IN_ELEMENT_WHITESPACE:
0090 while (skipChar(' ') || skipChar('\n') || skipChar('\t')) {
0091 }
0092
0093 if (nextChar() >= 'A' && nextChar() <= 'z')
0094 state = IN_ATTRIBUTE_KEY;
0095 else if (nextChar() == '/')
0096 state = IN_END_ELEMENT;
0097 else
0098 throw ParserError("Syntax error in element" + m_elementTags.back());
0099 break;
0100
0101 case IN_ATTRIBUTE_KEY:
0102 attributeName = getToken('=');
0103 state = IN_BEGIN_ATTRIBUTE_VALUE;
0104 break;
0105
0106 case IN_BEGIN_ATTRIBUTE_VALUE:
0107 if (skipChar('"')) {
0108 state = IN_STRING;
0109 attributeValue.clear();
0110 stringDelims[0] = '\"';
0111 } else if (skipChar('\'')) {
0112 state = IN_STRING;
0113 attributeValue.clear();
0114 stringDelims[0] = '\'';
0115 } else
0116 throw ParserError("Expecting quotes.");
0117 break;
0118
0119 case IN_STRING:
0120 attributeValue += getToken(stringDelims);
0121 if (skipChar(stringDelims[0])) {
0122
0123
0124 Attribute attr(attributeName, attributeValue);
0125 Attributes::iterator i = std::lower_bound(m_attributes.begin(), m_attributes.end(), attr);
0126 if (i != m_attributes.end() && i->key == attr.key)
0127 throw ParserError("Attribute " + i->key + " defined more than once");
0128 m_attributes.insert(i, attr);
0129 state = IN_END_ATTRIBUTE_VALUE;
0130 } else if (skipChar(stringDelims[1]))
0131 state = IN_STRING_ENTITY;
0132 else
0133 throw ParserError("Unexpected end of input at " + attributeValue);
0134 break;
0135
0136 case IN_END_ATTRIBUTE_VALUE:
0137 getToken(" />");
0138 if (nextChar() == ' ')
0139 state = IN_ELEMENT_WHITESPACE;
0140 else if (skipChar('/'))
0141 state = IN_END_ELEMENT;
0142 else if (skipChar('>')) {
0143 startElement(m_elementTags.back(), m_attributes);
0144 state = IN_END_TAG;
0145 }
0146 break;
0147
0148 case IN_END_ELEMENT:
0149 tmp = getToken('>');
0150 if (!tmp.empty() && tmp != m_elementTags.back())
0151 throw ParserError("Non-matching closing element " + tmp + " for " + attributeValue);
0152 endElement(tmp);
0153 m_elementTags.pop_back();
0154 state = IN_END_TAG;
0155 break;
0156
0157 case IN_END_TAG:
0158 if (nextChar() == EOF)
0159 return;
0160 else if (skipChar('<'))
0161 state = IN_BEGIN_TAG;
0162 else
0163 state = IN_DATA;
0164 break;
0165
0166 case IN_DATA:
0167 currentData += getToken("<&");
0168 if (skipChar('&'))
0169 state = IN_DATA_ENTITY;
0170 else if (skipChar('<')) {
0171 data(currentData);
0172 currentData.clear();
0173 state = IN_BEGIN_TAG;
0174 } else if (nextChar() == EOF) {
0175 data(currentData);
0176 return;
0177 } else
0178 throw ParserError("Unexpected end of input in element " + m_elementTags.back() + currentData);
0179 break;
0180
0181 case IN_DATA_ENTITY:
0182 currentData += parseEntity(getToken(';'));
0183 state = IN_DATA;
0184 break;
0185
0186 case IN_STRING_ENTITY:
0187 attributeValue += parseEntity(getToken(';'));
0188 state = IN_STRING;
0189 break;
0190
0191 case IN_DONE:
0192 return;
0193 }
0194 }
0195 }
0196
0197 SimpleSAXParser::~SimpleSAXParser() { delete[] m_buffer; }
0198
0199
0200
0201
0202
0203
0204
0205
0206
0207
0208
0209
0210
0211
0212
0213
0214
0215
0216
0217
0218
0219
0220
0221
0222
0223
0224 bool fgettoken(std::istream &in, char **buffer, size_t *maxSize, const char *separators, int *firstChar) {
0225
0226
0227
0228 if (*firstChar == EOF || (int)separators[0] == *firstChar || strchr(separators + 1, *firstChar)) {
0229 (*buffer)[0] = 0;
0230 return true;
0231 } else
0232 (*buffer)[0] = (char)*firstChar;
0233
0234 size_t i = 1;
0235
0236 while (true) {
0237 if (i >= *maxSize) {
0238 *maxSize += 1024;
0239 *buffer = (char *)realloc(*buffer, *maxSize);
0240 if (!*buffer)
0241 return false;
0242 }
0243
0244 int c = in.get();
0245
0246 if (c == EOF) {
0247 (*buffer)[i] = 0;
0248 *firstChar = c;
0249 return false;
0250 }
0251
0252 if (separators[0] == c || strchr(separators + 1, c)) {
0253 (*buffer)[i] = 0;
0254 *firstChar = c;
0255 return true;
0256 }
0257
0258 (*buffer)[i++] = (char)c;
0259 }
0260 }