Back to home page

Project CMSSW displayed by LXR

 
 

    


File indexing completed on 2021-02-14 13:26:30

0001 #include "EventFilter/Utilities/interface/reader.h"
0002 #include "EventFilter/Utilities/interface/value.h"
0003 #include <utility>
0004 #include <cstdio>
0005 #include <cassert>
0006 #include <cstring>
0007 #include <iostream>
0008 #include <stdexcept>
0009 
0010 #if _MSC_VER >= 1400             // VC++ 8.0
0011 #pragma warning(disable : 4996)  // disable warning about strdup being deprecated.
0012 #endif
0013 
0014 namespace Json {
0015 
0016   // Implementation of class Features
0017   // ////////////////////////////////
0018 
0019   Features::Features() : allowComments_(true), strictRoot_(false) {}
0020 
0021   Features Features::all() { return Features(); }
0022 
0023   Features Features::strictMode() {
0024     Features features;
0025     features.allowComments_ = false;
0026     features.strictRoot_ = true;
0027     return features;
0028   }
0029 
0030   // Implementation of class Reader
0031   // ////////////////////////////////
0032 
0033   static inline bool in(Reader::Char c, Reader::Char c1, Reader::Char c2, Reader::Char c3, Reader::Char c4) {
0034     return c == c1 || c == c2 || c == c3 || c == c4;
0035   }
0036 
0037   static inline bool in(
0038       Reader::Char c, Reader::Char c1, Reader::Char c2, Reader::Char c3, Reader::Char c4, Reader::Char c5) {
0039     return c == c1 || c == c2 || c == c3 || c == c4 || c == c5;
0040   }
0041 
0042   static bool containsNewLine(Reader::Location begin, Reader::Location end) {
0043     for (; begin < end; ++begin)
0044       if (*begin == '\n' || *begin == '\r')
0045         return true;
0046     return false;
0047   }
0048 
0049   static std::string codePointToUTF8(unsigned int cp) {
0050     std::string result;
0051 
0052     // based on description from http://en.wikipedia.org/wiki/UTF-8
0053 
0054     if (cp <= 0x7f) {
0055       result.resize(1);
0056       result[0] = static_cast<char>(cp);
0057     } else if (cp <= 0x7FF) {
0058       result.resize(2);
0059       result[1] = static_cast<char>(0x80 | (0x3f & cp));
0060       result[0] = static_cast<char>(0xC0 | (0x1f & (cp >> 6)));
0061     } else if (cp <= 0xFFFF) {
0062       result.resize(3);
0063       result[2] = static_cast<char>(0x80 | (0x3f & cp));
0064       result[1] = 0x80 | static_cast<char>((0x3f & (cp >> 6)));
0065       result[0] = 0xE0 | static_cast<char>((0xf & (cp >> 12)));
0066     } else if (cp <= 0x10FFFF) {
0067       result.resize(4);
0068       result[3] = static_cast<char>(0x80 | (0x3f & cp));
0069       result[2] = static_cast<char>(0x80 | (0x3f & (cp >> 6)));
0070       result[1] = static_cast<char>(0x80 | (0x3f & (cp >> 12)));
0071       result[0] = static_cast<char>(0xF0 | (0x7 & (cp >> 18)));
0072     }
0073 
0074     return result;
0075   }
0076 
0077   // Class Reader
0078   // //////////////////////////////////////////////////////////////////
0079 
0080   Reader::Reader() : features_(Features::all()) {}
0081 
0082   Reader::Reader(const Features &features) : features_(features) {}
0083 
0084   bool Reader::parse(const std::string &document, Value &root, bool collectComments) {
0085     document_ = document;
0086     const char *begin = document_.c_str();
0087     const char *end = begin + document_.length();
0088     return parse(begin, end, root, collectComments);
0089   }
0090 
0091   bool Reader::parse(std::istream &sin, Value &root, bool collectComments) {
0092     //std::istream_iterator<char> begin(sin);
0093     //std::istream_iterator<char> end;
0094     // Those would allow streamed input from a file, if parse() were a
0095     // template function.
0096 
0097     // Since std::string is reference-counted, this at least does not
0098     // create an extra copy.
0099     std::string doc;
0100     std::getline(sin, doc, (char)EOF);
0101     return parse(doc, root, collectComments);
0102   }
0103 
0104   bool Reader::parse(const char *beginDoc, const char *endDoc, Value &root, bool collectComments) {
0105     if (!features_.allowComments_) {
0106       collectComments = false;
0107     }
0108 
0109     begin_ = beginDoc;
0110     end_ = endDoc;
0111     collectComments_ = collectComments;
0112     current_ = begin_;
0113     lastValueEnd_ = nullptr;
0114     lastValue_ = nullptr;
0115     commentsBefore_ = "";
0116     errors_.clear();
0117     while (!nodes_.empty())
0118       nodes_.pop();
0119     nodes_.push(&root);
0120 
0121     bool successful = readValue();
0122     Token token;
0123     skipCommentTokens(token);
0124     if (collectComments_ && !commentsBefore_.empty())
0125       root.setComment(commentsBefore_, commentAfter);
0126     if (features_.strictRoot_) {
0127       if (!root.isArray() && !root.isObject()) {
0128         // Set error location to start of doc, ideally should be first token found in doc
0129         token.type_ = tokenError;
0130         token.start_ = beginDoc;
0131         token.end_ = endDoc;
0132         addError("A valid JSON document must be either an array or an object value.", token);
0133         return false;
0134       }
0135     }
0136     return successful;
0137   }
0138 
0139   bool Reader::readValue() {
0140     Token token;
0141     skipCommentTokens(token);
0142     bool successful = true;
0143 
0144     if (collectComments_ && !commentsBefore_.empty()) {
0145       currentValue().setComment(commentsBefore_, commentBefore);
0146       commentsBefore_ = "";
0147     }
0148 
0149     switch (token.type_) {
0150       case tokenObjectBegin:
0151         successful = readObject(token);
0152         break;
0153       case tokenArrayBegin:
0154         successful = readArray(token);
0155         break;
0156       case tokenNumber:
0157         successful = decodeNumber(token);
0158         break;
0159       case tokenString:
0160         successful = decodeString(token);
0161         break;
0162       case tokenTrue:
0163         currentValue() = true;
0164         break;
0165       case tokenFalse:
0166         currentValue() = false;
0167         break;
0168       case tokenNull:
0169         currentValue() = Value();
0170         break;
0171       default:
0172         return addError("Syntax error: value, object or array expected.", token);
0173     }
0174 
0175     if (collectComments_) {
0176       lastValueEnd_ = current_;
0177       lastValue_ = &currentValue();
0178     }
0179 
0180     return successful;
0181   }
0182 
0183   void Reader::skipCommentTokens(Token &token) {
0184     if (features_.allowComments_) {
0185       do {
0186         readToken(token);
0187       } while (token.type_ == tokenComment);
0188     } else {
0189       readToken(token);
0190     }
0191   }
0192 
0193   bool Reader::expectToken(TokenType type, Token &token, const char *message) {
0194     readToken(token);
0195     if (token.type_ != type)
0196       return addError(message, token);
0197     return true;
0198   }
0199 
0200   bool Reader::readToken(Token &token) {
0201     skipSpaces();
0202     token.start_ = current_;
0203     Char c = getNextChar();
0204     bool ok = true;
0205     switch (c) {
0206       case '{':
0207         token.type_ = tokenObjectBegin;
0208         break;
0209       case '}':
0210         token.type_ = tokenObjectEnd;
0211         break;
0212       case '[':
0213         token.type_ = tokenArrayBegin;
0214         break;
0215       case ']':
0216         token.type_ = tokenArrayEnd;
0217         break;
0218       case '"':
0219         token.type_ = tokenString;
0220         ok = readString();
0221         break;
0222       case '/':
0223         token.type_ = tokenComment;
0224         ok = readComment();
0225         break;
0226       case '0':
0227       case '1':
0228       case '2':
0229       case '3':
0230       case '4':
0231       case '5':
0232       case '6':
0233       case '7':
0234       case '8':
0235       case '9':
0236       case '-':
0237         token.type_ = tokenNumber;
0238         readNumber();
0239         break;
0240       case 't':
0241         token.type_ = tokenTrue;
0242         ok = match("rue", 3);
0243         break;
0244       case 'f':
0245         token.type_ = tokenFalse;
0246         ok = match("alse", 4);
0247         break;
0248       case 'n':
0249         token.type_ = tokenNull;
0250         ok = match("ull", 3);
0251         break;
0252       case ',':
0253         token.type_ = tokenArraySeparator;
0254         break;
0255       case ':':
0256         token.type_ = tokenMemberSeparator;
0257         break;
0258       case 0:
0259         token.type_ = tokenEndOfStream;
0260         break;
0261       default:
0262         ok = false;
0263         break;
0264     }
0265     if (!ok)
0266       token.type_ = tokenError;
0267     token.end_ = current_;
0268     return true;
0269   }
0270 
0271   void Reader::skipSpaces() {
0272     while (current_ != end_) {
0273       Char c = *current_;
0274       if (c == ' ' || c == '\t' || c == '\r' || c == '\n')
0275         ++current_;
0276       else
0277         break;
0278     }
0279   }
0280 
0281   bool Reader::match(Location pattern, int patternLength) {
0282     if (end_ - current_ < patternLength)
0283       return false;
0284     int index = patternLength;
0285     while (index--)
0286       if (current_[index] != pattern[index])
0287         return false;
0288     current_ += patternLength;
0289     return true;
0290   }
0291 
0292   bool Reader::readComment() {
0293     Location commentBegin = current_ - 1;
0294     Char c = getNextChar();
0295     bool successful = false;
0296     if (c == '*')
0297       successful = readCStyleComment();
0298     else if (c == '/')
0299       successful = readCppStyleComment();
0300     if (!successful)
0301       return false;
0302 
0303     if (collectComments_) {
0304       CommentPlacement placement = commentBefore;
0305       if (lastValueEnd_ && !containsNewLine(lastValueEnd_, commentBegin)) {
0306         if (c != '*' || !containsNewLine(commentBegin, current_))
0307           placement = commentAfterOnSameLine;
0308       }
0309 
0310       addComment(commentBegin, current_, placement);
0311     }
0312     return true;
0313   }
0314 
0315   void Reader::addComment(Location begin, Location end, CommentPlacement placement) {
0316     assert(collectComments_);
0317     if (placement == commentAfterOnSameLine) {
0318       assert(lastValue_ != nullptr);
0319       lastValue_->setComment(std::string(begin, end), placement);
0320     } else {
0321       if (!commentsBefore_.empty())
0322         commentsBefore_ += "\n";
0323       commentsBefore_ += std::string(begin, end);
0324     }
0325   }
0326 
0327   bool Reader::readCStyleComment() {
0328     while (current_ != end_) {
0329       Char c = getNextChar();
0330       if (c == '*' && *current_ == '/')
0331         break;
0332     }
0333     return getNextChar() == '/';
0334   }
0335 
0336   bool Reader::readCppStyleComment() {
0337     while (current_ != end_) {
0338       Char c = getNextChar();
0339       if (c == '\r' || c == '\n')
0340         break;
0341     }
0342     return true;
0343   }
0344 
0345   void Reader::readNumber() {
0346     while (current_ != end_) {
0347       if (!(*current_ >= '0' && *current_ <= '9') && !in(*current_, '.', 'e', 'E', '+', '-'))
0348         break;
0349       ++current_;
0350     }
0351   }
0352 
0353   bool Reader::readString() {
0354     Char c = 0;
0355     while (current_ != end_) {
0356       c = getNextChar();
0357       if (c == '\\')
0358         getNextChar();
0359       else if (c == '"')
0360         break;
0361     }
0362     return c == '"';
0363   }
0364 
0365   bool Reader::readObject(Token &tokenStart) {
0366     Token tokenName;
0367     std::string name;
0368     currentValue() = Value(objectValue);
0369     while (readToken(tokenName)) {
0370       bool initialTokenOk = true;
0371       while (tokenName.type_ == tokenComment && initialTokenOk)
0372         initialTokenOk = readToken(tokenName);
0373       if (!initialTokenOk)
0374         break;
0375       if (tokenName.type_ == tokenObjectEnd && name.empty())  // empty object
0376         return true;
0377       if (tokenName.type_ != tokenString)
0378         break;
0379 
0380       name = "";
0381       if (!decodeString(tokenName, name))
0382         return recoverFromError(tokenObjectEnd);
0383 
0384       Token colon;
0385       if (!readToken(colon) || colon.type_ != tokenMemberSeparator) {
0386         return addErrorAndRecover("Missing ':' after object member name", colon, tokenObjectEnd);
0387       }
0388       Value &value = currentValue()[name];
0389       nodes_.push(&value);
0390       bool ok = readValue();
0391       nodes_.pop();
0392       if (!ok)  // error already set
0393         return recoverFromError(tokenObjectEnd);
0394 
0395       Token comma;
0396       if (!readToken(comma) ||
0397           (comma.type_ != tokenObjectEnd && comma.type_ != tokenArraySeparator && comma.type_ != tokenComment)) {
0398         return addErrorAndRecover("Missing ',' or '}' in object declaration", comma, tokenObjectEnd);
0399       }
0400       bool finalizeTokenOk = true;
0401       while (comma.type_ == tokenComment && finalizeTokenOk)
0402         finalizeTokenOk = readToken(comma);
0403       if (comma.type_ == tokenObjectEnd)
0404         return true;
0405     }
0406     return addErrorAndRecover("Missing '}' or object member name", tokenName, tokenObjectEnd);
0407   }
0408 
0409   bool Reader::readArray(Token &tokenStart) {
0410     currentValue() = Value(arrayValue);
0411     skipSpaces();
0412     if (*current_ == ']')  // empty array
0413     {
0414       Token endArray;
0415       readToken(endArray);
0416       return true;
0417     }
0418     int index = 0;
0419     while (true) {
0420       Value &value = currentValue()[index++];
0421       nodes_.push(&value);
0422       bool ok = readValue();
0423       nodes_.pop();
0424       if (!ok)  // error already set
0425         return recoverFromError(tokenArrayEnd);
0426 
0427       Token token;
0428       // Accept Comment after last item in the array.
0429       ok = readToken(token);
0430       while (token.type_ == tokenComment && ok) {
0431         ok = readToken(token);
0432       }
0433       bool badTokenType = (token.type_ != tokenArraySeparator && token.type_ != tokenArrayEnd);
0434       if (!ok || badTokenType) {
0435         return addErrorAndRecover("Missing ',' or ']' in array declaration", token, tokenArrayEnd);
0436       }
0437       if (token.type_ == tokenArrayEnd)
0438         break;
0439     }
0440     return true;
0441   }
0442 
0443   bool Reader::decodeNumber(Token &token) {
0444     bool isDouble = false;
0445     for (Location inspect = token.start_; inspect != token.end_; ++inspect) {
0446       isDouble = isDouble || in(*inspect, '.', 'e', 'E', '+') || (*inspect == '-' && inspect != token.start_);
0447     }
0448     if (isDouble)
0449       return decodeDouble(token);
0450     Location current = token.start_;
0451     bool isNegative = *current == '-';
0452     if (isNegative)
0453       ++current;
0454     Value::UInt threshold = (isNegative ? Value::UInt(-Value::minInt) : Value::maxUInt) / 10;
0455     Value::UInt value = 0;
0456     while (current < token.end_) {
0457       Char c = *current++;
0458       if (c < '0' || c > '9')
0459         return addError("'" + std::string(token.start_, token.end_) + "' is not a number.", token);
0460       if (value >= threshold)
0461         return decodeDouble(token);
0462       value = value * 10 + Value::UInt(c - '0');
0463     }
0464     if (isNegative)
0465       currentValue() = -Value::Int(value);
0466     else if (value <= Value::UInt(Value::maxInt))
0467       currentValue() = Value::Int(value);
0468     else
0469       currentValue() = value;
0470     return true;
0471   }
0472 
0473   bool Reader::decodeDouble(Token &token) {
0474     double value = 0;
0475     const int bufferSize = 32;
0476     int count;
0477     int length = int(token.end_ - token.start_);
0478     if (length <= bufferSize) {
0479       Char buffer[bufferSize];
0480       memcpy(buffer, token.start_, length);
0481       buffer[length] = 0;
0482       count = sscanf(buffer, "%lf", &value);
0483     } else {
0484       std::string buffer(token.start_, token.end_);
0485       count = sscanf(buffer.c_str(), "%lf", &value);
0486     }
0487 
0488     if (count != 1)
0489       return addError("'" + std::string(token.start_, token.end_) + "' is not a number.", token);
0490     currentValue() = value;
0491     return true;
0492   }
0493 
0494   bool Reader::decodeString(Token &token) {
0495     std::string decoded;
0496     if (!decodeString(token, decoded))
0497       return false;
0498     currentValue() = decoded;
0499     return true;
0500   }
0501 
0502   bool Reader::decodeString(Token &token, std::string &decoded) {
0503     decoded.reserve(token.end_ - token.start_ - 2);
0504     Location current = token.start_ + 1;  // skip '"'
0505     Location end = token.end_ - 1;        // do not include '"'
0506     while (current != end) {
0507       Char c = *current++;
0508       if (c == '"')
0509         break;
0510       else if (c == '\\') {
0511         if (current == end)
0512           return addError("Empty escape sequence in string", token, current);
0513         Char escape = *current++;
0514         switch (escape) {
0515           case '"':
0516             decoded += '"';
0517             break;
0518           case '/':
0519             decoded += '/';
0520             break;
0521           case '\\':
0522             decoded += '\\';
0523             break;
0524           case 'b':
0525             decoded += '\b';
0526             break;
0527           case 'f':
0528             decoded += '\f';
0529             break;
0530           case 'n':
0531             decoded += '\n';
0532             break;
0533           case 'r':
0534             decoded += '\r';
0535             break;
0536           case 't':
0537             decoded += '\t';
0538             break;
0539           case 'u': {
0540             unsigned int unicode;
0541             if (!decodeUnicodeCodePoint(token, current, end, unicode))
0542               return false;
0543             decoded += codePointToUTF8(unicode);
0544           } break;
0545           default:
0546             return addError("Bad escape sequence in string", token, current);
0547         }
0548       } else {
0549         decoded += c;
0550       }
0551     }
0552     return true;
0553   }
0554 
0555   bool Reader::decodeUnicodeCodePoint(Token &token, Location &current, Location end, unsigned int &unicode) {
0556     if (!decodeUnicodeEscapeSequence(token, current, end, unicode))
0557       return false;
0558     if (unicode >= 0xD800 && unicode <= 0xDBFF) {
0559       // surrogate pairs
0560       if (end - current < 6)
0561         return addError("additional six characters expected to parse unicode surrogate pair.", token, current);
0562       unsigned int surrogatePair;
0563       if (*(current++) == '\\' && *(current++) == 'u') {
0564         if (decodeUnicodeEscapeSequence(token, current, end, surrogatePair)) {
0565           unicode = 0x10000 + ((unicode & 0x3FF) << 10) + (surrogatePair & 0x3FF);
0566         } else
0567           return false;
0568       } else
0569         return addError(
0570             "expecting another \\u token to begin the second half of a unicode surrogate pair", token, current);
0571     }
0572     return true;
0573   }
0574 
0575   bool Reader::decodeUnicodeEscapeSequence(Token &token, Location &current, Location end, unsigned int &unicode) {
0576     if (end - current < 4)
0577       return addError("Bad unicode escape sequence in string: four digits expected.", token, current);
0578     unicode = 0;
0579     for (int index = 0; index < 4; ++index) {
0580       Char c = *current++;
0581       unicode *= 16;
0582       if (c >= '0' && c <= '9')
0583         unicode += c - '0';
0584       else if (c >= 'a' && c <= 'f')
0585         unicode += c - 'a' + 10;
0586       else if (c >= 'A' && c <= 'F')
0587         unicode += c - 'A' + 10;
0588       else
0589         return addError("Bad unicode escape sequence in string: hexadecimal digit expected.", token, current);
0590     }
0591     return true;
0592   }
0593 
0594   bool Reader::addError(const std::string &message, Token &token, Location extra) {
0595     ErrorInfo info;
0596     info.token_ = token;
0597     info.message_ = message;
0598     info.extra_ = extra;
0599     errors_.push_back(info);
0600     return false;
0601   }
0602 
0603   bool Reader::recoverFromError(TokenType skipUntilToken) {
0604     int errorCount = int(errors_.size());
0605     Token skip;
0606     while (true) {
0607       if (!readToken(skip))
0608         errors_.resize(errorCount);  // discard errors caused by recovery
0609       if (skip.type_ == skipUntilToken || skip.type_ == tokenEndOfStream)
0610         break;
0611     }
0612     errors_.resize(errorCount);
0613     return false;
0614   }
0615 
0616   bool Reader::addErrorAndRecover(const std::string &message, Token &token, TokenType skipUntilToken) {
0617     addError(message, token);
0618     return recoverFromError(skipUntilToken);
0619   }
0620 
0621   Value &Reader::currentValue() { return *(nodes_.top()); }
0622 
0623   Reader::Char Reader::getNextChar() {
0624     if (current_ == end_)
0625       return 0;
0626     return *current_++;
0627   }
0628 
0629   void Reader::getLocationLineAndColumn(Location location, int &line, int &column) const {
0630     Location current = begin_;
0631     Location lastLineStart = current;
0632     line = 0;
0633     while (current < location && current != end_) {
0634       Char c = *current++;
0635       if (c == '\r') {
0636         if (*current == '\n')
0637           ++current;
0638         lastLineStart = current;
0639         ++line;
0640       } else if (c == '\n') {
0641         lastLineStart = current;
0642         ++line;
0643       }
0644     }
0645     // column & line start at 1
0646     column = int(location - lastLineStart) + 1;
0647     ++line;
0648   }
0649 
0650   std::string Reader::getLocationLineAndColumn(Location location) const {
0651     int line, column;
0652     getLocationLineAndColumn(location, line, column);
0653     char buffer[18 + 16 + 16 + 1];
0654     sprintf(buffer, "Line %d, Column %d", line, column);
0655     return buffer;
0656   }
0657 
0658   std::string Reader::getFormatedErrorMessages() const {
0659     std::string formattedMessage;
0660     for (Errors::const_iterator itError = errors_.begin(); itError != errors_.end(); ++itError) {
0661       const ErrorInfo &error = *itError;
0662       formattedMessage += "* " + getLocationLineAndColumn(error.token_.start_) + "\n";
0663       formattedMessage += "  " + error.message_ + "\n";
0664       if (error.extra_)
0665         formattedMessage += "See " + getLocationLineAndColumn(error.extra_) + " for detail.\n";
0666     }
0667     return formattedMessage;
0668   }
0669 
0670   std::istream &operator>>(std::istream &sin, Value &root) {
0671     Json::Reader reader;
0672     bool ok = reader.parse(sin, root, true);
0673     //JSON_ASSERT( ok );
0674     if (!ok)
0675       throw std::runtime_error(reader.getFormatedErrorMessages());
0676     return sin;
0677   }
0678 
0679 }  // namespace Json