Back to home page

Project CMSSW displayed by LXR

 
 

    


File indexing completed on 2024-04-06 12:05:35

0001 #include <cstdlib>
0002 #include <cstring>
0003 #include <xercesc/sax2/SAX2XMLReader.hpp>
0004 #include <xercesc/sax2/XMLReaderFactory.hpp>
0005 #include <fstream>
0006 #include <map>
0007 #include <string>
0008 #include <utility>
0009 #include <vector>
0010 
0011 #include "DetectorDescription/RegressionTest/src/SaxToDom.h"
0012 #include "DetectorDescription/RegressionTest/src/TagName.h"
0013 #include "DetectorDescription/RegressionTest/src/TinyDom.h"
0014 #include "DetectorDescription/RegressionTest/src/TinyDomTest.h"
0015 #include "Utilities/Xerces/interface/Xerces.h"
0016 #include "xercesc/util/PlatformUtils.hpp"
0017 #include "xercesc/util/XMLException.hpp"
0018 #include "xercesc/util/XMLUni.hpp"
0019 
0020 using namespace std;
0021 using namespace xercesc;
0022 
0023 class ADummy {};
0024 
0025 // ---------------------------------------------------------------------------
0026 //  Local helper methods
0027 // ---------------------------------------------------------------------------
0028 void usage() {
0029   cout << "\nUsage:\n"
0030           "    SAX2Count [options] <XML file | List file>\n\n"
0031           "This program invokes the SAX2XMLReader, and then prints the\n"
0032           "number of elements, attributes, spaces and characters found\n"
0033           "in each XML file, using SAX2 API.\n\n"
0034           "Options:\n"
0035           "    -l          Indicate the input file is a List File that has a list of xml files.\n"
0036           "                Default to off (Input file is an XML file).\n"
0037           "    -v=xxx      Validation scheme [always | never | auto*].\n"
0038           "    -f          Enable full schema constraint checking processing. Defaults to off.\n"
0039           "    -p          Enable namespace-prefixes feature. Defaults to off.\n"
0040           "    -n          Disable namespace processing. Defaults to on.\n"
0041           "                NOTE: THIS IS OPPOSITE FROM OTHER SAMPLES.\n"
0042           "    -s          Disable schema processing. Defaults to on.\n"
0043           "                NOTE: THIS IS OPPOSITE FROM OTHER SAMPLES.\n"
0044           "    -?          Show this help.\n\n"
0045           "  * = Default if not provided explicitly.\n"
0046        << endl;
0047 }
0048 
0049 // ---------------------------------------------------------------------------
0050 //  Program entry point
0051 // ---------------------------------------------------------------------------
0052 int main(int argC, char* argV[]) {
0053   // Initialize the XML4C2 system
0054   try {
0055     cms::concurrency::xercesInitialize();
0056   }
0057 
0058   catch (const XMLException& toCatch) {
0059     char* message = XMLString::transcode(toCatch.getMessage());
0060     cerr << "Error during initialization! Message:\n" << message << endl;
0061     XMLString::release(&message);
0062     return 1;
0063   }
0064 
0065   // Check command line and extract arguments.
0066   if (argC < 2) {
0067     usage();
0068     cms::concurrency::xercesTerminate();
0069     return 1;
0070   }
0071 
0072   const char* xmlFile = nullptr;
0073   SAX2XMLReader::ValSchemes valScheme = SAX2XMLReader::Val_Auto;
0074   bool doNamespaces = true;
0075   bool doSchema = true;
0076   bool schemaFullChecking = false;
0077   bool doList = false;
0078   bool errorOccurred = false;
0079   bool namespacePrefixes = false;
0080 
0081   int argInd;
0082   for (argInd = 1; argInd < argC; ++argInd) {
0083     // Break out on first parm not starting with a dash
0084     if (argV[argInd][0] != '-')
0085       break;
0086 
0087     // Watch for special case help request
0088     if (!strcmp(argV[argInd], "-?")) {
0089       usage();
0090       cms::concurrency::xercesTerminate();
0091       return 2;
0092     } else if (!strncmp(argV[argInd], "-v=", 3) || !strncmp(argV[argInd], "-V=", 3)) {
0093       const char* const parm = &argV[argInd][3];
0094 
0095       if (!strcmp(parm, "never"))
0096         valScheme = SAX2XMLReader::Val_Never;
0097       else if (!strcmp(parm, "auto"))
0098         valScheme = SAX2XMLReader::Val_Auto;
0099       else if (!strcmp(parm, "always"))
0100         valScheme = SAX2XMLReader::Val_Always;
0101       else {
0102         cerr << "Unknown -v= value: " << parm << endl;
0103         cms::concurrency::xercesTerminate();
0104         return 2;
0105       }
0106     } else if (!strcmp(argV[argInd], "-n") || !strcmp(argV[argInd], "-N")) {
0107       doNamespaces = false;
0108     } else if (!strcmp(argV[argInd], "-s") || !strcmp(argV[argInd], "-S")) {
0109       doSchema = false;
0110     } else if (!strcmp(argV[argInd], "-f") || !strcmp(argV[argInd], "-F")) {
0111       schemaFullChecking = true;
0112     } else if (!strcmp(argV[argInd], "-l") || !strcmp(argV[argInd], "-L")) {
0113       doList = true;
0114     } else if (!strcmp(argV[argInd], "-p") || !strcmp(argV[argInd], "-P")) {
0115       namespacePrefixes = true;
0116     } else if (!strcmp(argV[argInd], "-special:nel")) {
0117       // turning this on will lead to non-standard compliance behaviour
0118       // it will recognize the unicode character 0x85 as new line character
0119       // instead of regular character as specified in XML 1.0
0120       // do not turn this on unless really necessary
0121       XMLPlatformUtils::recognizeNEL(true);
0122     } else {
0123       cerr << "Unknown option '" << argV[argInd] << "', ignoring it\n" << endl;
0124     }
0125   }
0126 
0127   //
0128   //  There should be only one and only one parameter left, and that
0129   //  should be the file name.
0130   //
0131   if (argInd != argC - 1) {
0132     usage();
0133     cms::concurrency::xercesTerminate();
0134     return 1;
0135   }
0136 
0137   //
0138   //  Create a SAX parser object. Then, according to what we were told on
0139   //  the command line, set it to validate or not.
0140   //
0141   SAX2XMLReader* parser = XMLReaderFactory::createXMLReader();
0142   parser->setFeature(XMLUni::fgSAX2CoreNameSpaces, doNamespaces);
0143   parser->setFeature(XMLUni::fgXercesSchema, doSchema);
0144   parser->setFeature(XMLUni::fgXercesSchemaFullChecking, schemaFullChecking);
0145   parser->setFeature(XMLUni::fgSAX2CoreNameSpacePrefixes, namespacePrefixes);
0146 
0147   if (valScheme == SAX2XMLReader::Val_Auto) {
0148     parser->setFeature(XMLUni::fgSAX2CoreValidation, true);
0149     parser->setFeature(XMLUni::fgXercesDynamic, true);
0150   }
0151   if (valScheme == SAX2XMLReader::Val_Never) {
0152     parser->setFeature(XMLUni::fgSAX2CoreValidation, false);
0153   }
0154   if (valScheme == SAX2XMLReader::Val_Always) {
0155     parser->setFeature(XMLUni::fgSAX2CoreValidation, true);
0156     parser->setFeature(XMLUni::fgXercesDynamic, false);
0157   }
0158 
0159   //
0160   //  Create our SAX handler object and install it on the parser, as the
0161   //  document and error handler.
0162   //
0163   SaxToDom handler;
0164   parser->setContentHandler(&handler);
0165   parser->setErrorHandler(&handler);
0166 
0167   //
0168   //  Get the starting time and kick off the parse of the indicated
0169   //  file. Catch any exceptions that might propogate out of it.
0170   //
0171   unsigned long duration;
0172 
0173   bool more = true;
0174   ifstream fin;
0175 
0176   // the input is a list file
0177   if (doList)
0178     fin.open(argV[argInd]);
0179 
0180   while (more) {
0181     char fURI[1000];
0182     //initialize the array to zeros
0183     memset(fURI, 0, sizeof(fURI));
0184 
0185     if (doList) {
0186       if (!fin.eof()) {
0187         fin.getline(fURI, sizeof(fURI));
0188         if (!*fURI)
0189           continue;
0190         else {
0191           xmlFile = fURI;
0192           cerr << "==Parsing== " << xmlFile << endl;
0193         }
0194       } else
0195         break;
0196     } else {
0197       xmlFile = argV[argInd];
0198       more = false;
0199     }
0200 
0201     //reset error count first
0202     handler.resetErrors();
0203 
0204     try {
0205       const unsigned long startMillis = XMLPlatformUtils::getCurrentMillis();
0206       cout << "start parsing:" << xmlFile << endl;
0207       parser->parse(xmlFile);
0208       cout << "parsing ended" << endl;
0209       const unsigned long endMillis = XMLPlatformUtils::getCurrentMillis();
0210       duration = endMillis - startMillis;
0211     }
0212 
0213     catch (const XMLException& e) {
0214       char* message = XMLString::transcode(e.getMessage());
0215       cerr << "\nError during parsing: '" << xmlFile << "'\n"
0216            << "Exception message is:  \n"
0217            << message << "\n"
0218            << endl;
0219       errorOccurred = true;
0220       XMLString::release(&message);
0221       continue;
0222     }
0223 
0224     catch (...) {
0225       cerr << "\nUnexpected exception during parsing: '" << xmlFile << "'\n";
0226       errorOccurred = true;
0227       continue;
0228     }
0229 
0230     // Print out the stats that we collected and time taken
0231     if (true && getenv("DOTEST")) {
0232       TinyDomTest test(handler.dom());
0233       vector<const AttList*> atts;
0234       test.allNodes(NodeName("Box"), atts);
0235       unsigned int i = 0;
0236       for (; i < atts.size(); ++i) {
0237         const AttList& a = *(atts[i]);
0238         AttList::const_iterator it = a.begin();
0239         for (; it != a.end(); ++it) {
0240           cout << it->first.str() << '=' << it->second.str() << ' ';
0241         }
0242         cout << endl;
0243       }
0244       cout << "dom-size=" << handler.dom().size() << "duration " << duration << endl;
0245       /*
0246        TinyDomWalker walker(handler.dom());
0247        bool go = true;
0248        TagName name("Box");
0249        while (go) {
0250           if (name.sameName(walker.current().first)) {
0251             cout << walker.current().first.str() << endl;
0252           }
0253           go = walker.next();
0254        }
0255        */
0256 
0257     } else
0258       errorOccurred = true;
0259   }
0260 
0261   if (doList)
0262     fin.close();
0263 
0264   //
0265   //  Delete the parser itself.  Must be done prior to calling Terminate, below.
0266   //
0267   delete parser;
0268 
0269   // And call the termination method
0270   cms::concurrency::xercesTerminate();
0271 
0272   if (errorOccurred)
0273     return 4;
0274   else
0275     return 0;
0276 }