Back to home page

Project CMSSW displayed by LXR

 
 

    


File indexing completed on 2024-04-06 12:05:34

0001 /*
0002  * Licensed to the Apache Software Foundation (ASF) under one or more
0003  * contributor license agreements.  See the NOTICE file distributed with
0004  * this work for additional information regarding copyright ownership.
0005  * The ASF licenses this file to You under the Apache License, Version 2.0
0006  * (the "License"); you may not use this file except in compliance with
0007  * the License.  You may obtain a copy of the License at
0008  *
0009  *      http://www.apache.org/licenses/LICENSE-2.0
0010  *
0011  * Unless required by applicable law or agreed to in writing, software
0012  * distributed under the License is distributed on an "AS IS" BASIS,
0013  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
0014  * See the License for the specific language governing permissions and
0015  * limitations under the License.
0016  */
0017 
0018 #include <xercesc/util/PlatformUtils.hpp>
0019 #include <xercesc/parsers/AbstractDOMParser.hpp>
0020 #include <xercesc/dom/DOMImplementation.hpp>
0021 #include <xercesc/dom/DOMImplementationLS.hpp>
0022 #include <xercesc/dom/DOMImplementationRegistry.hpp>
0023 #include <xercesc/dom/DOMLSParser.hpp>
0024 #include <xercesc/dom/DOMException.hpp>
0025 #include <xercesc/dom/DOMDocument.hpp>
0026 #include <xercesc/dom/DOMNodeList.hpp>
0027 #include <xercesc/dom/DOMError.hpp>
0028 #include <xercesc/dom/DOMLocator.hpp>
0029 #include <xercesc/dom/DOMNamedNodeMap.hpp>
0030 #include <xercesc/dom/DOMAttr.hpp>
0031 #include "DOMCount.hpp"
0032 #include <cstring>
0033 #include <cstdlib>
0034 
0035 #if defined(XERCES_NEW_IOSTREAMS)
0036 #include <fstream>
0037 #else
0038 #include <fstream.h>
0039 #endif
0040 
0041 // ---------------------------------------------------------------------------
0042 //  This is a simple program which invokes the DOMParser to build a DOM
0043 //  tree for the specified input file. It then walks the tree and counts
0044 //  the number of elements. The element count is then printed.
0045 // ---------------------------------------------------------------------------
0046 static void usage() {
0047   XERCES_STD_QUALIFIER cout << "\nUsage:\n"
0048                                "    DOMCount [options] <XML file | List file>\n\n"
0049                                "This program invokes the DOMLSParser, builds the DOM tree,\n"
0050                                "and then prints the number of elements found in each XML file.\n\n"
0051                                "Options:\n"
0052                                "    -l          Indicate the input file is a List File that has a list of xml files.\n"
0053                                "                Default to off (Input file is an XML file).\n"
0054                                "    -v=xxx      Validation scheme [always | never | auto*].\n"
0055                                "    -n          Enable namespace processing. Defaults to off.\n"
0056                                "    -s          Enable schema processing. Defaults to off.\n"
0057                                "    -f          Enable full schema constraint checking. Defaults to off.\n"
0058                                "    -locale=ll_CC specify the locale, default: en_US.\n"
0059                                "    -p          Print out names of elements and attributes encountered.\n"
0060                                "    -?          Show this help.\n\n"
0061                                "  * = Default if not provided explicitly.\n"
0062                             << XERCES_STD_QUALIFIER endl;
0063 }
0064 
0065 // ---------------------------------------------------------------------------
0066 //
0067 //  Recursively Count up the total number of child Elements under the specified Node.
0068 //  Process attributes of the node, if any.
0069 //
0070 // ---------------------------------------------------------------------------
0071 static int countChildElements(DOMNode *n, bool printOutEncounteredEles) {
0072   DOMNode *child;
0073   int count = 0;
0074   if (n) {
0075     if (n->getNodeType() == DOMNode::ELEMENT_NODE) {
0076       if (printOutEncounteredEles) {
0077         char *name = XMLString::transcode(n->getNodeName());
0078         XERCES_STD_QUALIFIER cout << "----------------------------------------------------------"
0079                                   << XERCES_STD_QUALIFIER endl;
0080         XERCES_STD_QUALIFIER cout << "Encountered Element : " << name << XERCES_STD_QUALIFIER endl;
0081 
0082         XMLString::release(&name);
0083 
0084         if (n->hasAttributes()) {
0085           // get all the attributes of the node
0086           DOMNamedNodeMap *pAttributes = n->getAttributes();
0087           const XMLSize_t nSize = pAttributes->getLength();
0088           XERCES_STD_QUALIFIER cout << "\tAttributes" << XERCES_STD_QUALIFIER endl;
0089           XERCES_STD_QUALIFIER cout << "\t----------" << XERCES_STD_QUALIFIER endl;
0090           for (XMLSize_t i = 0; i < nSize; ++i) {
0091             DOMAttr *pAttributeNode = (DOMAttr *)pAttributes->item(i);
0092             // get attribute name
0093             char *name = XMLString::transcode(pAttributeNode->getName());
0094 
0095             XERCES_STD_QUALIFIER cout << "\t" << name << "=";
0096             XMLString::release(&name);
0097 
0098             // get attribute type
0099             name = XMLString::transcode(pAttributeNode->getValue());
0100             XERCES_STD_QUALIFIER cout << name << XERCES_STD_QUALIFIER endl;
0101             XMLString::release(&name);
0102           }
0103         }
0104       }
0105       ++count;
0106     }
0107     for (child = n->getFirstChild(); child != nullptr; child = child->getNextSibling())
0108       count += countChildElements(child, printOutEncounteredEles);
0109   }
0110   return count;
0111 }
0112 
0113 // ---------------------------------------------------------------------------
0114 //
0115 //   main
0116 //
0117 // ---------------------------------------------------------------------------
0118 int main(int argC, char *argV[]) {
0119   // Check command line and extract arguments.
0120   if (argC < 2) {
0121     usage();
0122     return 1;
0123   }
0124 
0125   const char *xmlFile = nullptr;
0126   AbstractDOMParser::ValSchemes valScheme = AbstractDOMParser::Val_Auto;
0127   bool doNamespaces = false;
0128   bool doSchema = false;
0129   bool schemaFullChecking = false;
0130   bool doList = false;
0131   bool errorOccurred = false;
0132   bool recognizeNEL = false;
0133   bool printOutEncounteredEles = false;
0134   char localeStr[64];
0135   memset(localeStr, 0, sizeof localeStr);
0136 
0137   int argInd;
0138   for (argInd = 1; argInd < argC; argInd++) {
0139     // Break out on first parm not starting with a dash
0140     if (argV[argInd][0] != '-')
0141       break;
0142 
0143     // Watch for special case help request
0144     if (!strcmp(argV[argInd], "-?")) {
0145       usage();
0146       return 2;
0147     } else if (!strncmp(argV[argInd], "-v=", 3) || !strncmp(argV[argInd], "-V=", 3)) {
0148       const char *const parm = &argV[argInd][3];
0149 
0150       if (!strcmp(parm, "never"))
0151         valScheme = AbstractDOMParser::Val_Never;
0152       else if (!strcmp(parm, "auto"))
0153         valScheme = AbstractDOMParser::Val_Auto;
0154       else if (!strcmp(parm, "always"))
0155         valScheme = AbstractDOMParser::Val_Always;
0156       else {
0157         XERCES_STD_QUALIFIER cerr << "Unknown -v= value: " << parm << XERCES_STD_QUALIFIER endl;
0158         return 2;
0159       }
0160     } else if (!strcmp(argV[argInd], "-n") || !strcmp(argV[argInd], "-N")) {
0161       doNamespaces = true;
0162     } else if (!strcmp(argV[argInd], "-s") || !strcmp(argV[argInd], "-S")) {
0163       doSchema = true;
0164     } else if (!strcmp(argV[argInd], "-f") || !strcmp(argV[argInd], "-F")) {
0165       schemaFullChecking = true;
0166     } else if (!strcmp(argV[argInd], "-l") || !strcmp(argV[argInd], "-L")) {
0167       doList = true;
0168     } else if (!strcmp(argV[argInd], "-special:nel")) {
0169       // turning this on will lead to non-standard compliance behaviour
0170       // it will recognize the unicode character 0x85 as new line character
0171       // instead of regular character as specified in XML 1.0
0172       // do not turn this on unless really necessary
0173 
0174       recognizeNEL = true;
0175     } else if (!strcmp(argV[argInd], "-p") || !strcmp(argV[argInd], "-P")) {
0176       printOutEncounteredEles = true;
0177     } else if (!strncmp(argV[argInd], "-locale=", 8)) {
0178       // Get out the end of line
0179       strcpy(localeStr, &(argV[argInd][8]));
0180     } else {
0181       XERCES_STD_QUALIFIER cerr << "Unknown option '" << argV[argInd] << "', ignoring it\n"
0182                                 << XERCES_STD_QUALIFIER endl;
0183     }
0184   }
0185 
0186   //
0187   //  There should be only one and only one parameter left, and that
0188   //  should be the file name.
0189   //
0190   if (argInd != argC - 1) {
0191     usage();
0192     return 1;
0193   }
0194 
0195   // Initialize the XML4C system
0196   try {
0197     if (strlen(localeStr)) {
0198       XMLPlatformUtils::Initialize(localeStr);
0199     } else {
0200       XMLPlatformUtils::Initialize();
0201     }
0202 
0203     if (recognizeNEL) {
0204       XMLPlatformUtils::recognizeNEL(recognizeNEL);
0205     }
0206   }
0207 
0208   catch (const XMLException &toCatch) {
0209     XERCES_STD_QUALIFIER cerr << "Error during initialization! :\n"
0210                               << StrX(toCatch.getMessage()) << XERCES_STD_QUALIFIER endl;
0211     return 1;
0212   }
0213 
0214   // Instantiate the DOM parser.
0215   static const XMLCh gLS[] = {chLatin_L, chLatin_S, chNull};
0216   DOMImplementation *impl = DOMImplementationRegistry::getDOMImplementation(gLS);
0217   DOMLSParser *parser = ((DOMImplementationLS *)impl)->createLSParser(DOMImplementationLS::MODE_SYNCHRONOUS, nullptr);
0218   DOMConfiguration *config = parser->getDomConfig();
0219 
0220   config->setParameter(XMLUni::fgDOMNamespaces, doNamespaces);
0221   config->setParameter(XMLUni::fgXercesSchema, doSchema);
0222   config->setParameter(XMLUni::fgXercesHandleMultipleImports, true);
0223   config->setParameter(XMLUni::fgXercesSchemaFullChecking, schemaFullChecking);
0224 
0225   if (valScheme == AbstractDOMParser::Val_Auto) {
0226     config->setParameter(XMLUni::fgDOMValidateIfSchema, true);
0227   } else if (valScheme == AbstractDOMParser::Val_Never) {
0228     config->setParameter(XMLUni::fgDOMValidate, false);
0229   } else if (valScheme == AbstractDOMParser::Val_Always) {
0230     config->setParameter(XMLUni::fgDOMValidate, true);
0231   }
0232 
0233   // enable datatype normalization - default is off
0234   config->setParameter(XMLUni::fgDOMDatatypeNormalization, true);
0235 
0236   // And create our error handler and install it
0237   DOMCountErrorHandler errorHandler;
0238   config->setParameter(XMLUni::fgDOMErrorHandler, &errorHandler);
0239 
0240   //
0241   //  Get the starting time and kick off the parse of the indicated
0242   //  file. Catch any exceptions that might propogate out of it.
0243   //
0244   unsigned long duration;
0245 
0246   bool more = true;
0247   XERCES_STD_QUALIFIER ifstream fin;
0248 
0249   // the input is a list file
0250   if (doList)
0251     fin.open(argV[argInd]);
0252 
0253   if (fin.fail()) {
0254     XERCES_STD_QUALIFIER cerr << "Cannot open the list file: " << argV[argInd] << XERCES_STD_QUALIFIER endl;
0255     return 2;
0256   }
0257 
0258   while (more) {
0259     char fURI[1000];
0260     //initialize the array to zeros
0261     memset(fURI, 0, sizeof(fURI));
0262 
0263     if (doList) {
0264       if (!fin.eof()) {
0265         fin.getline(fURI, sizeof(fURI));
0266         if (!*fURI)
0267           continue;
0268         else {
0269           xmlFile = fURI;
0270           XERCES_STD_QUALIFIER cerr << "==Parsing== " << xmlFile << XERCES_STD_QUALIFIER endl;
0271         }
0272       } else
0273         break;
0274     } else {
0275       xmlFile = argV[argInd];
0276       more = false;
0277     }
0278 
0279     //reset error count first
0280     errorHandler.resetErrors();
0281 
0282     XERCES_CPP_NAMESPACE_QUALIFIER DOMDocument *doc = nullptr;
0283 
0284     try {
0285       // reset document pool
0286       parser->resetDocumentPool();
0287 
0288       const unsigned long startMillis = XMLPlatformUtils::getCurrentMillis();
0289       doc = parser->parseURI(xmlFile);
0290       const unsigned long endMillis = XMLPlatformUtils::getCurrentMillis();
0291       duration = endMillis - startMillis;
0292     }
0293 
0294     catch (const XMLException &toCatch) {
0295       XERCES_STD_QUALIFIER cerr << "\nError during parsing: '" << xmlFile << "'\n"
0296                                 << "Exception message is:  \n"
0297                                 << StrX(toCatch.getMessage()) << "\n"
0298                                 << XERCES_STD_QUALIFIER endl;
0299       errorOccurred = true;
0300       continue;
0301     } catch (const DOMException &toCatch) {
0302       const unsigned int maxChars = 2047;
0303       XMLCh errText[maxChars + 1];
0304 
0305       XERCES_STD_QUALIFIER cerr << "\nDOM Error during parsing: '" << xmlFile << "'\n"
0306                                 << "DOMException code is:  " << toCatch.code << XERCES_STD_QUALIFIER endl;
0307 
0308       if (DOMImplementation::loadDOMExceptionMsg(toCatch.code, errText, maxChars))
0309         XERCES_STD_QUALIFIER cerr << "Message is: " << StrX(errText) << XERCES_STD_QUALIFIER endl;
0310 
0311       errorOccurred = true;
0312       continue;
0313     } catch (...) {
0314       XERCES_STD_QUALIFIER cerr << "\nUnexpected exception during parsing: '" << xmlFile << "'\n";
0315       errorOccurred = true;
0316       continue;
0317     }
0318 
0319     //
0320     //  Extract the DOM tree, get the list of all the elements and report the
0321     //  length as the count of elements.
0322     //
0323     if (errorHandler.getSawErrors()) {
0324       XERCES_STD_QUALIFIER cout << "\nErrors occurred, no output available\n" << XERCES_STD_QUALIFIER endl;
0325       errorOccurred = true;
0326     } else {
0327       unsigned int elementCount = 0;
0328       if (doc) {
0329         elementCount = countChildElements((DOMNode *)doc->getDocumentElement(), printOutEncounteredEles);
0330         // test getElementsByTagName and getLength
0331         XMLCh xa[] = {chAsterisk, chNull};
0332         if (elementCount != doc->getElementsByTagName(xa)->getLength()) {
0333           XERCES_STD_QUALIFIER cout << "\nErrors occurred, element count is wrong\n" << XERCES_STD_QUALIFIER endl;
0334           errorOccurred = true;
0335         }
0336       }
0337 
0338       // Print out the stats that we collected and time taken.
0339       XERCES_STD_QUALIFIER cout << xmlFile << ": " << duration << " ms (" << elementCount << " elems)."
0340                                 << XERCES_STD_QUALIFIER endl;
0341     }
0342   }
0343 
0344   //
0345   //  Delete the parser itself.  Must be done prior to calling Terminate, below.
0346   //
0347   parser->release();
0348 
0349   // And call the termination method
0350   XMLPlatformUtils::Terminate();
0351 
0352   if (doList)
0353     fin.close();
0354 
0355   if (errorOccurred)
0356     return 4;
0357   else
0358     return 0;
0359 }
0360 
0361 DOMCountErrorHandler::DOMCountErrorHandler()
0362     :
0363 
0364       fSawErrors(false) {}
0365 
0366 DOMCountErrorHandler::~DOMCountErrorHandler() {}
0367 
0368 // ---------------------------------------------------------------------------
0369 //  DOMCountHandlers: Overrides of the DOM ErrorHandler interface
0370 // ---------------------------------------------------------------------------
0371 bool DOMCountErrorHandler::handleError(const DOMError &domError) {
0372   fSawErrors = true;
0373   if (domError.getSeverity() == DOMError::DOM_SEVERITY_WARNING)
0374     XERCES_STD_QUALIFIER cerr << "\nWarning at file ";
0375   else if (domError.getSeverity() == DOMError::DOM_SEVERITY_ERROR)
0376     XERCES_STD_QUALIFIER cerr << "\nError at file ";
0377   else
0378     XERCES_STD_QUALIFIER cerr << "\nFatal Error at file ";
0379 
0380   XERCES_STD_QUALIFIER cerr << StrX(domError.getLocation()->getURI()) << ", line "
0381                             << domError.getLocation()->getLineNumber() << ", char "
0382                             << domError.getLocation()->getColumnNumber()
0383                             << "\n  Message: " << StrX(domError.getMessage()) << XERCES_STD_QUALIFIER endl;
0384 
0385   return true;
0386 }
0387 
0388 void DOMCountErrorHandler::resetErrors() { fSawErrors = false; }