File indexing completed on 2021-02-14 13:26:30
0001 #include "EventFilter/Utilities/interface/reader.h"
0002 #include "EventFilter/Utilities/interface/value.h"
0003 #include <utility>
0004 #include <cstdio>
0005 #include <cassert>
0006 #include <cstring>
0007 #include <iostream>
0008 #include <stdexcept>
0009
0010 #if _MSC_VER >= 1400
0011 #pragma warning(disable : 4996)
0012 #endif
0013
0014 namespace Json {
0015
0016
0017
0018
0019 Features::Features() : allowComments_(true), strictRoot_(false) {}
0020
0021 Features Features::all() { return Features(); }
0022
0023 Features Features::strictMode() {
0024 Features features;
0025 features.allowComments_ = false;
0026 features.strictRoot_ = true;
0027 return features;
0028 }
0029
0030
0031
0032
0033 static inline bool in(Reader::Char c, Reader::Char c1, Reader::Char c2, Reader::Char c3, Reader::Char c4) {
0034 return c == c1 || c == c2 || c == c3 || c == c4;
0035 }
0036
0037 static inline bool in(
0038 Reader::Char c, Reader::Char c1, Reader::Char c2, Reader::Char c3, Reader::Char c4, Reader::Char c5) {
0039 return c == c1 || c == c2 || c == c3 || c == c4 || c == c5;
0040 }
0041
0042 static bool containsNewLine(Reader::Location begin, Reader::Location end) {
0043 for (; begin < end; ++begin)
0044 if (*begin == '\n' || *begin == '\r')
0045 return true;
0046 return false;
0047 }
0048
0049 static std::string codePointToUTF8(unsigned int cp) {
0050 std::string result;
0051
0052
0053
0054 if (cp <= 0x7f) {
0055 result.resize(1);
0056 result[0] = static_cast<char>(cp);
0057 } else if (cp <= 0x7FF) {
0058 result.resize(2);
0059 result[1] = static_cast<char>(0x80 | (0x3f & cp));
0060 result[0] = static_cast<char>(0xC0 | (0x1f & (cp >> 6)));
0061 } else if (cp <= 0xFFFF) {
0062 result.resize(3);
0063 result[2] = static_cast<char>(0x80 | (0x3f & cp));
0064 result[1] = 0x80 | static_cast<char>((0x3f & (cp >> 6)));
0065 result[0] = 0xE0 | static_cast<char>((0xf & (cp >> 12)));
0066 } else if (cp <= 0x10FFFF) {
0067 result.resize(4);
0068 result[3] = static_cast<char>(0x80 | (0x3f & cp));
0069 result[2] = static_cast<char>(0x80 | (0x3f & (cp >> 6)));
0070 result[1] = static_cast<char>(0x80 | (0x3f & (cp >> 12)));
0071 result[0] = static_cast<char>(0xF0 | (0x7 & (cp >> 18)));
0072 }
0073
0074 return result;
0075 }
0076
0077
0078
0079
0080 Reader::Reader() : features_(Features::all()) {}
0081
0082 Reader::Reader(const Features &features) : features_(features) {}
0083
0084 bool Reader::parse(const std::string &document, Value &root, bool collectComments) {
0085 document_ = document;
0086 const char *begin = document_.c_str();
0087 const char *end = begin + document_.length();
0088 return parse(begin, end, root, collectComments);
0089 }
0090
0091 bool Reader::parse(std::istream &sin, Value &root, bool collectComments) {
0092
0093
0094
0095
0096
0097
0098
0099 std::string doc;
0100 std::getline(sin, doc, (char)EOF);
0101 return parse(doc, root, collectComments);
0102 }
0103
0104 bool Reader::parse(const char *beginDoc, const char *endDoc, Value &root, bool collectComments) {
0105 if (!features_.allowComments_) {
0106 collectComments = false;
0107 }
0108
0109 begin_ = beginDoc;
0110 end_ = endDoc;
0111 collectComments_ = collectComments;
0112 current_ = begin_;
0113 lastValueEnd_ = nullptr;
0114 lastValue_ = nullptr;
0115 commentsBefore_ = "";
0116 errors_.clear();
0117 while (!nodes_.empty())
0118 nodes_.pop();
0119 nodes_.push(&root);
0120
0121 bool successful = readValue();
0122 Token token;
0123 skipCommentTokens(token);
0124 if (collectComments_ && !commentsBefore_.empty())
0125 root.setComment(commentsBefore_, commentAfter);
0126 if (features_.strictRoot_) {
0127 if (!root.isArray() && !root.isObject()) {
0128
0129 token.type_ = tokenError;
0130 token.start_ = beginDoc;
0131 token.end_ = endDoc;
0132 addError("A valid JSON document must be either an array or an object value.", token);
0133 return false;
0134 }
0135 }
0136 return successful;
0137 }
0138
0139 bool Reader::readValue() {
0140 Token token;
0141 skipCommentTokens(token);
0142 bool successful = true;
0143
0144 if (collectComments_ && !commentsBefore_.empty()) {
0145 currentValue().setComment(commentsBefore_, commentBefore);
0146 commentsBefore_ = "";
0147 }
0148
0149 switch (token.type_) {
0150 case tokenObjectBegin:
0151 successful = readObject(token);
0152 break;
0153 case tokenArrayBegin:
0154 successful = readArray(token);
0155 break;
0156 case tokenNumber:
0157 successful = decodeNumber(token);
0158 break;
0159 case tokenString:
0160 successful = decodeString(token);
0161 break;
0162 case tokenTrue:
0163 currentValue() = true;
0164 break;
0165 case tokenFalse:
0166 currentValue() = false;
0167 break;
0168 case tokenNull:
0169 currentValue() = Value();
0170 break;
0171 default:
0172 return addError("Syntax error: value, object or array expected.", token);
0173 }
0174
0175 if (collectComments_) {
0176 lastValueEnd_ = current_;
0177 lastValue_ = ¤tValue();
0178 }
0179
0180 return successful;
0181 }
0182
0183 void Reader::skipCommentTokens(Token &token) {
0184 if (features_.allowComments_) {
0185 do {
0186 readToken(token);
0187 } while (token.type_ == tokenComment);
0188 } else {
0189 readToken(token);
0190 }
0191 }
0192
0193 bool Reader::expectToken(TokenType type, Token &token, const char *message) {
0194 readToken(token);
0195 if (token.type_ != type)
0196 return addError(message, token);
0197 return true;
0198 }
0199
0200 bool Reader::readToken(Token &token) {
0201 skipSpaces();
0202 token.start_ = current_;
0203 Char c = getNextChar();
0204 bool ok = true;
0205 switch (c) {
0206 case '{':
0207 token.type_ = tokenObjectBegin;
0208 break;
0209 case '}':
0210 token.type_ = tokenObjectEnd;
0211 break;
0212 case '[':
0213 token.type_ = tokenArrayBegin;
0214 break;
0215 case ']':
0216 token.type_ = tokenArrayEnd;
0217 break;
0218 case '"':
0219 token.type_ = tokenString;
0220 ok = readString();
0221 break;
0222 case '/':
0223 token.type_ = tokenComment;
0224 ok = readComment();
0225 break;
0226 case '0':
0227 case '1':
0228 case '2':
0229 case '3':
0230 case '4':
0231 case '5':
0232 case '6':
0233 case '7':
0234 case '8':
0235 case '9':
0236 case '-':
0237 token.type_ = tokenNumber;
0238 readNumber();
0239 break;
0240 case 't':
0241 token.type_ = tokenTrue;
0242 ok = match("rue", 3);
0243 break;
0244 case 'f':
0245 token.type_ = tokenFalse;
0246 ok = match("alse", 4);
0247 break;
0248 case 'n':
0249 token.type_ = tokenNull;
0250 ok = match("ull", 3);
0251 break;
0252 case ',':
0253 token.type_ = tokenArraySeparator;
0254 break;
0255 case ':':
0256 token.type_ = tokenMemberSeparator;
0257 break;
0258 case 0:
0259 token.type_ = tokenEndOfStream;
0260 break;
0261 default:
0262 ok = false;
0263 break;
0264 }
0265 if (!ok)
0266 token.type_ = tokenError;
0267 token.end_ = current_;
0268 return true;
0269 }
0270
0271 void Reader::skipSpaces() {
0272 while (current_ != end_) {
0273 Char c = *current_;
0274 if (c == ' ' || c == '\t' || c == '\r' || c == '\n')
0275 ++current_;
0276 else
0277 break;
0278 }
0279 }
0280
0281 bool Reader::match(Location pattern, int patternLength) {
0282 if (end_ - current_ < patternLength)
0283 return false;
0284 int index = patternLength;
0285 while (index--)
0286 if (current_[index] != pattern[index])
0287 return false;
0288 current_ += patternLength;
0289 return true;
0290 }
0291
0292 bool Reader::readComment() {
0293 Location commentBegin = current_ - 1;
0294 Char c = getNextChar();
0295 bool successful = false;
0296 if (c == '*')
0297 successful = readCStyleComment();
0298 else if (c == '/')
0299 successful = readCppStyleComment();
0300 if (!successful)
0301 return false;
0302
0303 if (collectComments_) {
0304 CommentPlacement placement = commentBefore;
0305 if (lastValueEnd_ && !containsNewLine(lastValueEnd_, commentBegin)) {
0306 if (c != '*' || !containsNewLine(commentBegin, current_))
0307 placement = commentAfterOnSameLine;
0308 }
0309
0310 addComment(commentBegin, current_, placement);
0311 }
0312 return true;
0313 }
0314
0315 void Reader::addComment(Location begin, Location end, CommentPlacement placement) {
0316 assert(collectComments_);
0317 if (placement == commentAfterOnSameLine) {
0318 assert(lastValue_ != nullptr);
0319 lastValue_->setComment(std::string(begin, end), placement);
0320 } else {
0321 if (!commentsBefore_.empty())
0322 commentsBefore_ += "\n";
0323 commentsBefore_ += std::string(begin, end);
0324 }
0325 }
0326
0327 bool Reader::readCStyleComment() {
0328 while (current_ != end_) {
0329 Char c = getNextChar();
0330 if (c == '*' && *current_ == '/')
0331 break;
0332 }
0333 return getNextChar() == '/';
0334 }
0335
0336 bool Reader::readCppStyleComment() {
0337 while (current_ != end_) {
0338 Char c = getNextChar();
0339 if (c == '\r' || c == '\n')
0340 break;
0341 }
0342 return true;
0343 }
0344
0345 void Reader::readNumber() {
0346 while (current_ != end_) {
0347 if (!(*current_ >= '0' && *current_ <= '9') && !in(*current_, '.', 'e', 'E', '+', '-'))
0348 break;
0349 ++current_;
0350 }
0351 }
0352
0353 bool Reader::readString() {
0354 Char c = 0;
0355 while (current_ != end_) {
0356 c = getNextChar();
0357 if (c == '\\')
0358 getNextChar();
0359 else if (c == '"')
0360 break;
0361 }
0362 return c == '"';
0363 }
0364
0365 bool Reader::readObject(Token &tokenStart) {
0366 Token tokenName;
0367 std::string name;
0368 currentValue() = Value(objectValue);
0369 while (readToken(tokenName)) {
0370 bool initialTokenOk = true;
0371 while (tokenName.type_ == tokenComment && initialTokenOk)
0372 initialTokenOk = readToken(tokenName);
0373 if (!initialTokenOk)
0374 break;
0375 if (tokenName.type_ == tokenObjectEnd && name.empty())
0376 return true;
0377 if (tokenName.type_ != tokenString)
0378 break;
0379
0380 name = "";
0381 if (!decodeString(tokenName, name))
0382 return recoverFromError(tokenObjectEnd);
0383
0384 Token colon;
0385 if (!readToken(colon) || colon.type_ != tokenMemberSeparator) {
0386 return addErrorAndRecover("Missing ':' after object member name", colon, tokenObjectEnd);
0387 }
0388 Value &value = currentValue()[name];
0389 nodes_.push(&value);
0390 bool ok = readValue();
0391 nodes_.pop();
0392 if (!ok)
0393 return recoverFromError(tokenObjectEnd);
0394
0395 Token comma;
0396 if (!readToken(comma) ||
0397 (comma.type_ != tokenObjectEnd && comma.type_ != tokenArraySeparator && comma.type_ != tokenComment)) {
0398 return addErrorAndRecover("Missing ',' or '}' in object declaration", comma, tokenObjectEnd);
0399 }
0400 bool finalizeTokenOk = true;
0401 while (comma.type_ == tokenComment && finalizeTokenOk)
0402 finalizeTokenOk = readToken(comma);
0403 if (comma.type_ == tokenObjectEnd)
0404 return true;
0405 }
0406 return addErrorAndRecover("Missing '}' or object member name", tokenName, tokenObjectEnd);
0407 }
0408
0409 bool Reader::readArray(Token &tokenStart) {
0410 currentValue() = Value(arrayValue);
0411 skipSpaces();
0412 if (*current_ == ']')
0413 {
0414 Token endArray;
0415 readToken(endArray);
0416 return true;
0417 }
0418 int index = 0;
0419 while (true) {
0420 Value &value = currentValue()[index++];
0421 nodes_.push(&value);
0422 bool ok = readValue();
0423 nodes_.pop();
0424 if (!ok)
0425 return recoverFromError(tokenArrayEnd);
0426
0427 Token token;
0428
0429 ok = readToken(token);
0430 while (token.type_ == tokenComment && ok) {
0431 ok = readToken(token);
0432 }
0433 bool badTokenType = (token.type_ != tokenArraySeparator && token.type_ != tokenArrayEnd);
0434 if (!ok || badTokenType) {
0435 return addErrorAndRecover("Missing ',' or ']' in array declaration", token, tokenArrayEnd);
0436 }
0437 if (token.type_ == tokenArrayEnd)
0438 break;
0439 }
0440 return true;
0441 }
0442
0443 bool Reader::decodeNumber(Token &token) {
0444 bool isDouble = false;
0445 for (Location inspect = token.start_; inspect != token.end_; ++inspect) {
0446 isDouble = isDouble || in(*inspect, '.', 'e', 'E', '+') || (*inspect == '-' && inspect != token.start_);
0447 }
0448 if (isDouble)
0449 return decodeDouble(token);
0450 Location current = token.start_;
0451 bool isNegative = *current == '-';
0452 if (isNegative)
0453 ++current;
0454 Value::UInt threshold = (isNegative ? Value::UInt(-Value::minInt) : Value::maxUInt) / 10;
0455 Value::UInt value = 0;
0456 while (current < token.end_) {
0457 Char c = *current++;
0458 if (c < '0' || c > '9')
0459 return addError("'" + std::string(token.start_, token.end_) + "' is not a number.", token);
0460 if (value >= threshold)
0461 return decodeDouble(token);
0462 value = value * 10 + Value::UInt(c - '0');
0463 }
0464 if (isNegative)
0465 currentValue() = -Value::Int(value);
0466 else if (value <= Value::UInt(Value::maxInt))
0467 currentValue() = Value::Int(value);
0468 else
0469 currentValue() = value;
0470 return true;
0471 }
0472
0473 bool Reader::decodeDouble(Token &token) {
0474 double value = 0;
0475 const int bufferSize = 32;
0476 int count;
0477 int length = int(token.end_ - token.start_);
0478 if (length <= bufferSize) {
0479 Char buffer[bufferSize];
0480 memcpy(buffer, token.start_, length);
0481 buffer[length] = 0;
0482 count = sscanf(buffer, "%lf", &value);
0483 } else {
0484 std::string buffer(token.start_, token.end_);
0485 count = sscanf(buffer.c_str(), "%lf", &value);
0486 }
0487
0488 if (count != 1)
0489 return addError("'" + std::string(token.start_, token.end_) + "' is not a number.", token);
0490 currentValue() = value;
0491 return true;
0492 }
0493
0494 bool Reader::decodeString(Token &token) {
0495 std::string decoded;
0496 if (!decodeString(token, decoded))
0497 return false;
0498 currentValue() = decoded;
0499 return true;
0500 }
0501
0502 bool Reader::decodeString(Token &token, std::string &decoded) {
0503 decoded.reserve(token.end_ - token.start_ - 2);
0504 Location current = token.start_ + 1;
0505 Location end = token.end_ - 1;
0506 while (current != end) {
0507 Char c = *current++;
0508 if (c == '"')
0509 break;
0510 else if (c == '\\') {
0511 if (current == end)
0512 return addError("Empty escape sequence in string", token, current);
0513 Char escape = *current++;
0514 switch (escape) {
0515 case '"':
0516 decoded += '"';
0517 break;
0518 case '/':
0519 decoded += '/';
0520 break;
0521 case '\\':
0522 decoded += '\\';
0523 break;
0524 case 'b':
0525 decoded += '\b';
0526 break;
0527 case 'f':
0528 decoded += '\f';
0529 break;
0530 case 'n':
0531 decoded += '\n';
0532 break;
0533 case 'r':
0534 decoded += '\r';
0535 break;
0536 case 't':
0537 decoded += '\t';
0538 break;
0539 case 'u': {
0540 unsigned int unicode;
0541 if (!decodeUnicodeCodePoint(token, current, end, unicode))
0542 return false;
0543 decoded += codePointToUTF8(unicode);
0544 } break;
0545 default:
0546 return addError("Bad escape sequence in string", token, current);
0547 }
0548 } else {
0549 decoded += c;
0550 }
0551 }
0552 return true;
0553 }
0554
0555 bool Reader::decodeUnicodeCodePoint(Token &token, Location ¤t, Location end, unsigned int &unicode) {
0556 if (!decodeUnicodeEscapeSequence(token, current, end, unicode))
0557 return false;
0558 if (unicode >= 0xD800 && unicode <= 0xDBFF) {
0559
0560 if (end - current < 6)
0561 return addError("additional six characters expected to parse unicode surrogate pair.", token, current);
0562 unsigned int surrogatePair;
0563 if (*(current++) == '\\' && *(current++) == 'u') {
0564 if (decodeUnicodeEscapeSequence(token, current, end, surrogatePair)) {
0565 unicode = 0x10000 + ((unicode & 0x3FF) << 10) + (surrogatePair & 0x3FF);
0566 } else
0567 return false;
0568 } else
0569 return addError(
0570 "expecting another \\u token to begin the second half of a unicode surrogate pair", token, current);
0571 }
0572 return true;
0573 }
0574
0575 bool Reader::decodeUnicodeEscapeSequence(Token &token, Location ¤t, Location end, unsigned int &unicode) {
0576 if (end - current < 4)
0577 return addError("Bad unicode escape sequence in string: four digits expected.", token, current);
0578 unicode = 0;
0579 for (int index = 0; index < 4; ++index) {
0580 Char c = *current++;
0581 unicode *= 16;
0582 if (c >= '0' && c <= '9')
0583 unicode += c - '0';
0584 else if (c >= 'a' && c <= 'f')
0585 unicode += c - 'a' + 10;
0586 else if (c >= 'A' && c <= 'F')
0587 unicode += c - 'A' + 10;
0588 else
0589 return addError("Bad unicode escape sequence in string: hexadecimal digit expected.", token, current);
0590 }
0591 return true;
0592 }
0593
0594 bool Reader::addError(const std::string &message, Token &token, Location extra) {
0595 ErrorInfo info;
0596 info.token_ = token;
0597 info.message_ = message;
0598 info.extra_ = extra;
0599 errors_.push_back(info);
0600 return false;
0601 }
0602
0603 bool Reader::recoverFromError(TokenType skipUntilToken) {
0604 int errorCount = int(errors_.size());
0605 Token skip;
0606 while (true) {
0607 if (!readToken(skip))
0608 errors_.resize(errorCount);
0609 if (skip.type_ == skipUntilToken || skip.type_ == tokenEndOfStream)
0610 break;
0611 }
0612 errors_.resize(errorCount);
0613 return false;
0614 }
0615
0616 bool Reader::addErrorAndRecover(const std::string &message, Token &token, TokenType skipUntilToken) {
0617 addError(message, token);
0618 return recoverFromError(skipUntilToken);
0619 }
0620
0621 Value &Reader::currentValue() { return *(nodes_.top()); }
0622
0623 Reader::Char Reader::getNextChar() {
0624 if (current_ == end_)
0625 return 0;
0626 return *current_++;
0627 }
0628
0629 void Reader::getLocationLineAndColumn(Location location, int &line, int &column) const {
0630 Location current = begin_;
0631 Location lastLineStart = current;
0632 line = 0;
0633 while (current < location && current != end_) {
0634 Char c = *current++;
0635 if (c == '\r') {
0636 if (*current == '\n')
0637 ++current;
0638 lastLineStart = current;
0639 ++line;
0640 } else if (c == '\n') {
0641 lastLineStart = current;
0642 ++line;
0643 }
0644 }
0645
0646 column = int(location - lastLineStart) + 1;
0647 ++line;
0648 }
0649
0650 std::string Reader::getLocationLineAndColumn(Location location) const {
0651 int line, column;
0652 getLocationLineAndColumn(location, line, column);
0653 char buffer[18 + 16 + 16 + 1];
0654 sprintf(buffer, "Line %d, Column %d", line, column);
0655 return buffer;
0656 }
0657
0658 std::string Reader::getFormatedErrorMessages() const {
0659 std::string formattedMessage;
0660 for (Errors::const_iterator itError = errors_.begin(); itError != errors_.end(); ++itError) {
0661 const ErrorInfo &error = *itError;
0662 formattedMessage += "* " + getLocationLineAndColumn(error.token_.start_) + "\n";
0663 formattedMessage += " " + error.message_ + "\n";
0664 if (error.extra_)
0665 formattedMessage += "See " + getLocationLineAndColumn(error.extra_) + " for detail.\n";
0666 }
0667 return formattedMessage;
0668 }
0669
0670 std::istream &operator>>(std::istream &sin, Value &root) {
0671 Json::Reader reader;
0672 bool ok = reader.parse(sin, root, true);
0673
0674 if (!ok)
0675 throw std::runtime_error(reader.getFormatedErrorMessages());
0676 return sin;
0677 }
0678
0679 }