File indexing completed on 2024-05-20 22:39:47
0001 #include "EventFilter/Utilities/interface/reader.h"
0002 #include "EventFilter/Utilities/interface/value.h"
0003 #include <utility>
0004 #include <cstdio>
0005 #include <cassert>
0006 #include <cstring>
0007 #include <iostream>
0008 #include <stdexcept>
0009
0010 #if _MSC_VER >= 1400
0011 #pragma warning(disable : 4996)
0012 #endif
0013
0014 namespace jsoncollector {
0015 namespace Json {
0016
0017
0018
0019
0020 Features::Features() : allowComments_(true), strictRoot_(false) {}
0021
0022 Features Features::all() { return Features(); }
0023
0024 Features Features::strictMode() {
0025 Features features;
0026 features.allowComments_ = false;
0027 features.strictRoot_ = true;
0028 return features;
0029 }
0030
0031
0032
0033
0034 static inline bool in(Reader::Char c, Reader::Char c1, Reader::Char c2, Reader::Char c3, Reader::Char c4) {
0035 return c == c1 || c == c2 || c == c3 || c == c4;
0036 }
0037
0038 static inline bool in(
0039 Reader::Char c, Reader::Char c1, Reader::Char c2, Reader::Char c3, Reader::Char c4, Reader::Char c5) {
0040 return c == c1 || c == c2 || c == c3 || c == c4 || c == c5;
0041 }
0042
0043 static bool containsNewLine(Reader::Location begin, Reader::Location end) {
0044 for (; begin < end; ++begin)
0045 if (*begin == '\n' || *begin == '\r')
0046 return true;
0047 return false;
0048 }
0049
0050 static std::string codePointToUTF8(unsigned int cp) {
0051 std::string result;
0052
0053
0054
0055 if (cp <= 0x7f) {
0056 result.resize(1);
0057 result[0] = static_cast<char>(cp);
0058 } else if (cp <= 0x7FF) {
0059 result.resize(2);
0060 result[1] = static_cast<char>(0x80 | (0x3f & cp));
0061 result[0] = static_cast<char>(0xC0 | (0x1f & (cp >> 6)));
0062 } else if (cp <= 0xFFFF) {
0063 result.resize(3);
0064 result[2] = static_cast<char>(0x80 | (0x3f & cp));
0065 result[1] = 0x80 | static_cast<char>((0x3f & (cp >> 6)));
0066 result[0] = 0xE0 | static_cast<char>((0xf & (cp >> 12)));
0067 } else if (cp <= 0x10FFFF) {
0068 result.resize(4);
0069 result[3] = static_cast<char>(0x80 | (0x3f & cp));
0070 result[2] = static_cast<char>(0x80 | (0x3f & (cp >> 6)));
0071 result[1] = static_cast<char>(0x80 | (0x3f & (cp >> 12)));
0072 result[0] = static_cast<char>(0xF0 | (0x7 & (cp >> 18)));
0073 }
0074
0075 return result;
0076 }
0077
0078
0079
0080
0081 Reader::Reader() : features_(Features::all()) {}
0082
0083 Reader::Reader(const Features &features) : features_(features) {}
0084
0085 bool Reader::parse(const std::string &document, Value &root, bool collectComments) {
0086 document_ = document;
0087 const char *begin = document_.c_str();
0088 const char *end = begin + document_.length();
0089 return parse(begin, end, root, collectComments);
0090 }
0091
0092 bool Reader::parse(std::istream &sin, Value &root, bool collectComments) {
0093
0094
0095
0096
0097
0098
0099
0100 std::string doc;
0101 std::getline(sin, doc, (char)EOF);
0102 return parse(doc, root, collectComments);
0103 }
0104
0105 bool Reader::parse(const char *beginDoc, const char *endDoc, Value &root, bool collectComments) {
0106 if (!features_.allowComments_) {
0107 collectComments = false;
0108 }
0109
0110 begin_ = beginDoc;
0111 end_ = endDoc;
0112 collectComments_ = collectComments;
0113 current_ = begin_;
0114 lastValueEnd_ = nullptr;
0115 lastValue_ = nullptr;
0116 commentsBefore_ = "";
0117 errors_.clear();
0118 while (!nodes_.empty())
0119 nodes_.pop();
0120 nodes_.push(&root);
0121
0122 bool successful = readValue();
0123 Token token;
0124 skipCommentTokens(token);
0125 if (collectComments_ && !commentsBefore_.empty())
0126 root.setComment(commentsBefore_, commentAfter);
0127 if (features_.strictRoot_) {
0128 if (!root.isArray() && !root.isObject()) {
0129
0130 token.type_ = tokenError;
0131 token.start_ = beginDoc;
0132 token.end_ = endDoc;
0133 addError("A valid JSON document must be either an array or an object value.", token);
0134 return false;
0135 }
0136 }
0137 return successful;
0138 }
0139
0140 bool Reader::readValue() {
0141 Token token;
0142 skipCommentTokens(token);
0143 bool successful = true;
0144
0145 if (collectComments_ && !commentsBefore_.empty()) {
0146 currentValue().setComment(commentsBefore_, commentBefore);
0147 commentsBefore_ = "";
0148 }
0149
0150 switch (token.type_) {
0151 case tokenObjectBegin:
0152 successful = readObject(token);
0153 break;
0154 case tokenArrayBegin:
0155 successful = readArray(token);
0156 break;
0157 case tokenNumber:
0158 successful = decodeNumber(token);
0159 break;
0160 case tokenString:
0161 successful = decodeString(token);
0162 break;
0163 case tokenTrue:
0164 currentValue() = true;
0165 break;
0166 case tokenFalse:
0167 currentValue() = false;
0168 break;
0169 case tokenNull:
0170 currentValue() = Value();
0171 break;
0172 default:
0173 return addError("Syntax error: value, object or array expected.", token);
0174 }
0175
0176 if (collectComments_) {
0177 lastValueEnd_ = current_;
0178 lastValue_ = ¤tValue();
0179 }
0180
0181 return successful;
0182 }
0183
0184 void Reader::skipCommentTokens(Token &token) {
0185 if (features_.allowComments_) {
0186 do {
0187 readToken(token);
0188 } while (token.type_ == tokenComment);
0189 } else {
0190 readToken(token);
0191 }
0192 }
0193
0194 bool Reader::expectToken(TokenType type, Token &token, const char *message) {
0195 readToken(token);
0196 if (token.type_ != type)
0197 return addError(message, token);
0198 return true;
0199 }
0200
0201 bool Reader::readToken(Token &token) {
0202 skipSpaces();
0203 token.start_ = current_;
0204 Char c = getNextChar();
0205 bool ok = true;
0206 switch (c) {
0207 case '{':
0208 token.type_ = tokenObjectBegin;
0209 break;
0210 case '}':
0211 token.type_ = tokenObjectEnd;
0212 break;
0213 case '[':
0214 token.type_ = tokenArrayBegin;
0215 break;
0216 case ']':
0217 token.type_ = tokenArrayEnd;
0218 break;
0219 case '"':
0220 token.type_ = tokenString;
0221 ok = readString();
0222 break;
0223 case '/':
0224 token.type_ = tokenComment;
0225 ok = readComment();
0226 break;
0227 case '0':
0228 case '1':
0229 case '2':
0230 case '3':
0231 case '4':
0232 case '5':
0233 case '6':
0234 case '7':
0235 case '8':
0236 case '9':
0237 case '-':
0238 token.type_ = tokenNumber;
0239 readNumber();
0240 break;
0241 case 't':
0242 token.type_ = tokenTrue;
0243 ok = match("rue", 3);
0244 break;
0245 case 'f':
0246 token.type_ = tokenFalse;
0247 ok = match("alse", 4);
0248 break;
0249 case 'n':
0250 token.type_ = tokenNull;
0251 ok = match("ull", 3);
0252 break;
0253 case ',':
0254 token.type_ = tokenArraySeparator;
0255 break;
0256 case ':':
0257 token.type_ = tokenMemberSeparator;
0258 break;
0259 case 0:
0260 token.type_ = tokenEndOfStream;
0261 break;
0262 default:
0263 ok = false;
0264 break;
0265 }
0266 if (!ok)
0267 token.type_ = tokenError;
0268 token.end_ = current_;
0269 return true;
0270 }
0271
0272 void Reader::skipSpaces() {
0273 while (current_ != end_) {
0274 Char c = *current_;
0275 if (c == ' ' || c == '\t' || c == '\r' || c == '\n')
0276 ++current_;
0277 else
0278 break;
0279 }
0280 }
0281
0282 bool Reader::match(Location pattern, int patternLength) {
0283 if (end_ - current_ < patternLength)
0284 return false;
0285 int index = patternLength;
0286 while (index--)
0287 if (current_[index] != pattern[index])
0288 return false;
0289 current_ += patternLength;
0290 return true;
0291 }
0292
0293 bool Reader::readComment() {
0294 Location commentBegin = current_ - 1;
0295 Char c = getNextChar();
0296 bool successful = false;
0297 if (c == '*')
0298 successful = readCStyleComment();
0299 else if (c == '/')
0300 successful = readCppStyleComment();
0301 if (!successful)
0302 return false;
0303
0304 if (collectComments_) {
0305 CommentPlacement placement = commentBefore;
0306 if (lastValueEnd_ && !containsNewLine(lastValueEnd_, commentBegin)) {
0307 if (c != '*' || !containsNewLine(commentBegin, current_))
0308 placement = commentAfterOnSameLine;
0309 }
0310
0311 addComment(commentBegin, current_, placement);
0312 }
0313 return true;
0314 }
0315
0316 void Reader::addComment(Location begin, Location end, CommentPlacement placement) {
0317 assert(collectComments_);
0318 if (placement == commentAfterOnSameLine) {
0319 assert(lastValue_ != nullptr);
0320 lastValue_->setComment(std::string(begin, end), placement);
0321 } else {
0322 if (!commentsBefore_.empty())
0323 commentsBefore_ += "\n";
0324 commentsBefore_ += std::string(begin, end);
0325 }
0326 }
0327
0328 bool Reader::readCStyleComment() {
0329 while (current_ != end_) {
0330 Char c = getNextChar();
0331 if (c == '*' && *current_ == '/')
0332 break;
0333 }
0334 return getNextChar() == '/';
0335 }
0336
0337 bool Reader::readCppStyleComment() {
0338 while (current_ != end_) {
0339 Char c = getNextChar();
0340 if (c == '\r' || c == '\n')
0341 break;
0342 }
0343 return true;
0344 }
0345
0346 void Reader::readNumber() {
0347 while (current_ != end_) {
0348 if (!(*current_ >= '0' && *current_ <= '9') && !in(*current_, '.', 'e', 'E', '+', '-'))
0349 break;
0350 ++current_;
0351 }
0352 }
0353
0354 bool Reader::readString() {
0355 Char c = 0;
0356 while (current_ != end_) {
0357 c = getNextChar();
0358 if (c == '\\')
0359 getNextChar();
0360 else if (c == '"')
0361 break;
0362 }
0363 return c == '"';
0364 }
0365
0366 bool Reader::readObject(Token &tokenStart) {
0367 Token tokenName;
0368 std::string name;
0369 currentValue() = Value(objectValue);
0370 while (readToken(tokenName)) {
0371 bool initialTokenOk = true;
0372 while (tokenName.type_ == tokenComment && initialTokenOk)
0373 initialTokenOk = readToken(tokenName);
0374 if (!initialTokenOk)
0375 break;
0376 if (tokenName.type_ == tokenObjectEnd && name.empty())
0377 return true;
0378 if (tokenName.type_ != tokenString)
0379 break;
0380
0381 name = "";
0382 if (!decodeString(tokenName, name))
0383 return recoverFromError(tokenObjectEnd);
0384
0385 Token colon;
0386 if (!readToken(colon) || colon.type_ != tokenMemberSeparator) {
0387 return addErrorAndRecover("Missing ':' after object member name", colon, tokenObjectEnd);
0388 }
0389 Value &value = currentValue()[name];
0390 nodes_.push(&value);
0391 bool ok = readValue();
0392 nodes_.pop();
0393 if (!ok)
0394 return recoverFromError(tokenObjectEnd);
0395
0396 Token comma;
0397 if (!readToken(comma) ||
0398 (comma.type_ != tokenObjectEnd && comma.type_ != tokenArraySeparator && comma.type_ != tokenComment)) {
0399 return addErrorAndRecover("Missing ',' or '}' in object declaration", comma, tokenObjectEnd);
0400 }
0401 bool finalizeTokenOk = true;
0402 while (comma.type_ == tokenComment && finalizeTokenOk)
0403 finalizeTokenOk = readToken(comma);
0404 if (comma.type_ == tokenObjectEnd)
0405 return true;
0406 }
0407 return addErrorAndRecover("Missing '}' or object member name", tokenName, tokenObjectEnd);
0408 }
0409
0410 bool Reader::readArray(Token &tokenStart) {
0411 currentValue() = Value(arrayValue);
0412 skipSpaces();
0413 if (*current_ == ']')
0414 {
0415 Token endArray;
0416 readToken(endArray);
0417 return true;
0418 }
0419 int index = 0;
0420 while (true) {
0421 Value &value = currentValue()[index++];
0422 nodes_.push(&value);
0423 bool ok = readValue();
0424 nodes_.pop();
0425 if (!ok)
0426 return recoverFromError(tokenArrayEnd);
0427
0428 Token token;
0429
0430 ok = readToken(token);
0431 while (token.type_ == tokenComment && ok) {
0432 ok = readToken(token);
0433 }
0434 bool badTokenType = (token.type_ != tokenArraySeparator && token.type_ != tokenArrayEnd);
0435 if (!ok || badTokenType) {
0436 return addErrorAndRecover("Missing ',' or ']' in array declaration", token, tokenArrayEnd);
0437 }
0438 if (token.type_ == tokenArrayEnd)
0439 break;
0440 }
0441 return true;
0442 }
0443
0444 bool Reader::decodeNumber(Token &token) {
0445 bool isDouble = false;
0446 for (Location inspect = token.start_; inspect != token.end_; ++inspect) {
0447 isDouble = isDouble || in(*inspect, '.', 'e', 'E', '+') || (*inspect == '-' && inspect != token.start_);
0448 }
0449 if (isDouble)
0450 return decodeDouble(token);
0451 Location current = token.start_;
0452 bool isNegative = *current == '-';
0453 if (isNegative)
0454 ++current;
0455 Value::UInt threshold = (isNegative ? Value::UInt(-Value::minInt) : Value::maxUInt) / 10;
0456 Value::UInt value = 0;
0457 while (current < token.end_) {
0458 Char c = *current++;
0459 if (c < '0' || c > '9')
0460 return addError("'" + std::string(token.start_, token.end_) + "' is not a number.", token);
0461 if (value >= threshold)
0462 return decodeDouble(token);
0463 value = value * 10 + Value::UInt(c - '0');
0464 }
0465 if (isNegative)
0466 currentValue() = -Value::Int(value);
0467 else if (value <= Value::UInt(Value::maxInt))
0468 currentValue() = Value::Int(value);
0469 else
0470 currentValue() = value;
0471 return true;
0472 }
0473
0474 bool Reader::decodeDouble(Token &token) {
0475 double value = 0;
0476 const int bufferSize = 32;
0477 int count;
0478 int length = int(token.end_ - token.start_);
0479 if (length <= bufferSize) {
0480 Char buffer[bufferSize];
0481 memcpy(buffer, token.start_, length);
0482 buffer[length] = 0;
0483 count = sscanf(buffer, "%lf", &value);
0484 } else {
0485 std::string buffer(token.start_, token.end_);
0486 count = sscanf(buffer.c_str(), "%lf", &value);
0487 }
0488
0489 if (count != 1)
0490 return addError("'" + std::string(token.start_, token.end_) + "' is not a number.", token);
0491 currentValue() = value;
0492 return true;
0493 }
0494
0495 bool Reader::decodeString(Token &token) {
0496 std::string decoded;
0497 if (!decodeString(token, decoded))
0498 return false;
0499 currentValue() = decoded;
0500 return true;
0501 }
0502
0503 bool Reader::decodeString(Token &token, std::string &decoded) {
0504 decoded.reserve(token.end_ - token.start_ - 2);
0505 Location current = token.start_ + 1;
0506 Location end = token.end_ - 1;
0507 while (current != end) {
0508 Char c = *current++;
0509 if (c == '"')
0510 break;
0511 else if (c == '\\') {
0512 if (current == end)
0513 return addError("Empty escape sequence in string", token, current);
0514 Char escape = *current++;
0515 switch (escape) {
0516 case '"':
0517 decoded += '"';
0518 break;
0519 case '/':
0520 decoded += '/';
0521 break;
0522 case '\\':
0523 decoded += '\\';
0524 break;
0525 case 'b':
0526 decoded += '\b';
0527 break;
0528 case 'f':
0529 decoded += '\f';
0530 break;
0531 case 'n':
0532 decoded += '\n';
0533 break;
0534 case 'r':
0535 decoded += '\r';
0536 break;
0537 case 't':
0538 decoded += '\t';
0539 break;
0540 case 'u': {
0541 unsigned int unicode;
0542 if (!decodeUnicodeCodePoint(token, current, end, unicode))
0543 return false;
0544 decoded += codePointToUTF8(unicode);
0545 } break;
0546 default:
0547 return addError("Bad escape sequence in string", token, current);
0548 }
0549 } else {
0550 decoded += c;
0551 }
0552 }
0553 return true;
0554 }
0555
0556 bool Reader::decodeUnicodeCodePoint(Token &token, Location ¤t, Location end, unsigned int &unicode) {
0557 if (!decodeUnicodeEscapeSequence(token, current, end, unicode))
0558 return false;
0559 if (unicode >= 0xD800 && unicode <= 0xDBFF) {
0560
0561 if (end - current < 6)
0562 return addError("additional six characters expected to parse unicode surrogate pair.", token, current);
0563 unsigned int surrogatePair;
0564 if (*(current++) == '\\' && *(current++) == 'u') {
0565 if (decodeUnicodeEscapeSequence(token, current, end, surrogatePair)) {
0566 unicode = 0x10000 + ((unicode & 0x3FF) << 10) + (surrogatePair & 0x3FF);
0567 } else
0568 return false;
0569 } else
0570 return addError(
0571 "expecting another \\u token to begin the second half of a unicode surrogate pair", token, current);
0572 }
0573 return true;
0574 }
0575
0576 bool Reader::decodeUnicodeEscapeSequence(Token &token, Location ¤t, Location end, unsigned int &unicode) {
0577 if (end - current < 4)
0578 return addError("Bad unicode escape sequence in string: four digits expected.", token, current);
0579 unicode = 0;
0580 for (int index = 0; index < 4; ++index) {
0581 Char c = *current++;
0582 unicode *= 16;
0583 if (c >= '0' && c <= '9')
0584 unicode += c - '0';
0585 else if (c >= 'a' && c <= 'f')
0586 unicode += c - 'a' + 10;
0587 else if (c >= 'A' && c <= 'F')
0588 unicode += c - 'A' + 10;
0589 else
0590 return addError("Bad unicode escape sequence in string: hexadecimal digit expected.", token, current);
0591 }
0592 return true;
0593 }
0594
0595 bool Reader::addError(const std::string &message, Token &token, Location extra) {
0596 ErrorInfo info;
0597 info.token_ = token;
0598 info.message_ = message;
0599 info.extra_ = extra;
0600 errors_.push_back(info);
0601 return false;
0602 }
0603
0604 bool Reader::recoverFromError(TokenType skipUntilToken) {
0605 int errorCount = int(errors_.size());
0606 Token skip;
0607 while (true) {
0608 if (!readToken(skip))
0609 errors_.resize(errorCount);
0610 if (skip.type_ == skipUntilToken || skip.type_ == tokenEndOfStream)
0611 break;
0612 }
0613 errors_.resize(errorCount);
0614 return false;
0615 }
0616
0617 bool Reader::addErrorAndRecover(const std::string &message, Token &token, TokenType skipUntilToken) {
0618 addError(message, token);
0619 return recoverFromError(skipUntilToken);
0620 }
0621
0622 Value &Reader::currentValue() { return *(nodes_.top()); }
0623
0624 Reader::Char Reader::getNextChar() {
0625 if (current_ == end_)
0626 return 0;
0627 return *current_++;
0628 }
0629
0630 void Reader::getLocationLineAndColumn(Location location, int &line, int &column) const {
0631 Location current = begin_;
0632 Location lastLineStart = current;
0633 line = 0;
0634 while (current < location && current != end_) {
0635 Char c = *current++;
0636 if (c == '\r') {
0637 if (*current == '\n')
0638 ++current;
0639 lastLineStart = current;
0640 ++line;
0641 } else if (c == '\n') {
0642 lastLineStart = current;
0643 ++line;
0644 }
0645 }
0646
0647 column = int(location - lastLineStart) + 1;
0648 ++line;
0649 }
0650
0651 std::string Reader::getLocationLineAndColumn(Location location) const {
0652 int line, column;
0653 getLocationLineAndColumn(location, line, column);
0654 char buffer[18 + 16 + 16 + 1];
0655 sprintf(buffer, "Line %d, Column %d", line, column);
0656 return buffer;
0657 }
0658
0659 std::string Reader::getFormatedErrorMessages() const {
0660 std::string formattedMessage;
0661 for (Errors::const_iterator itError = errors_.begin(); itError != errors_.end(); ++itError) {
0662 const ErrorInfo &error = *itError;
0663 formattedMessage += "* " + getLocationLineAndColumn(error.token_.start_) + "\n";
0664 formattedMessage += " " + error.message_ + "\n";
0665 if (error.extra_)
0666 formattedMessage += "See " + getLocationLineAndColumn(error.extra_) + " for detail.\n";
0667 }
0668 return formattedMessage;
0669 }
0670
0671 std::istream &operator>>(std::istream &sin, Value &root) {
0672 Json::Reader reader;
0673 bool ok = reader.parse(sin, root, true);
0674
0675 if (!ok)
0676 throw std::runtime_error(reader.getFormatedErrorMessages());
0677 return sin;
0678 }
0679
0680 }
0681 }