1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
|
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <xercesc/util/PlatformUtils.hpp>
#include <xercesc/parsers/AbstractDOMParser.hpp>
#include <xercesc/dom/DOMImplementation.hpp>
#include <xercesc/dom/DOMImplementationLS.hpp>
#include <xercesc/dom/DOMImplementationRegistry.hpp>
#include <xercesc/dom/DOMLSParser.hpp>
#include <xercesc/dom/DOMException.hpp>
#include <xercesc/dom/DOMDocument.hpp>
#include <xercesc/dom/DOMNodeList.hpp>
#include <xercesc/dom/DOMError.hpp>
#include <xercesc/dom/DOMLocator.hpp>
#include <xercesc/dom/DOMNamedNodeMap.hpp>
#include <xercesc/dom/DOMAttr.hpp>
#include "DOMCount.hpp"
#include <cstring>
#include <cstdlib>
#if defined(XERCES_NEW_IOSTREAMS)
#include <fstream>
#else
#include <fstream.h>
#endif
// ---------------------------------------------------------------------------
// This is a simple program which invokes the DOMParser to build a DOM
// tree for the specified input file. It then walks the tree and counts
// the number of elements. The element count is then printed.
// ---------------------------------------------------------------------------
static void usage() {
XERCES_STD_QUALIFIER cout << "\nUsage:\n"
" DOMCount [options] <XML file | List file>\n\n"
"This program invokes the DOMLSParser, builds the DOM tree,\n"
"and then prints the number of elements found in each XML file.\n\n"
"Options:\n"
" -l Indicate the input file is a List File that has a list of xml files.\n"
" Default to off (Input file is an XML file).\n"
" -v=xxx Validation scheme [always | never | auto*].\n"
" -n Enable namespace processing. Defaults to off.\n"
" -s Enable schema processing. Defaults to off.\n"
" -f Enable full schema constraint checking. Defaults to off.\n"
" -locale=ll_CC specify the locale, default: en_US.\n"
" -p Print out names of elements and attributes encountered.\n"
" -? Show this help.\n\n"
" * = Default if not provided explicitly.\n"
<< XERCES_STD_QUALIFIER endl;
}
// ---------------------------------------------------------------------------
//
// Recursively Count up the total number of child Elements under the specified Node.
// Process attributes of the node, if any.
//
// ---------------------------------------------------------------------------
static int countChildElements(DOMNode *n, bool printOutEncounteredEles) {
DOMNode *child;
int count = 0;
if (n) {
if (n->getNodeType() == DOMNode::ELEMENT_NODE) {
if (printOutEncounteredEles) {
char *name = XMLString::transcode(n->getNodeName());
XERCES_STD_QUALIFIER cout << "----------------------------------------------------------"
<< XERCES_STD_QUALIFIER endl;
XERCES_STD_QUALIFIER cout << "Encountered Element : " << name << XERCES_STD_QUALIFIER endl;
XMLString::release(&name);
if (n->hasAttributes()) {
// get all the attributes of the node
DOMNamedNodeMap *pAttributes = n->getAttributes();
const XMLSize_t nSize = pAttributes->getLength();
XERCES_STD_QUALIFIER cout << "\tAttributes" << XERCES_STD_QUALIFIER endl;
XERCES_STD_QUALIFIER cout << "\t----------" << XERCES_STD_QUALIFIER endl;
for (XMLSize_t i = 0; i < nSize; ++i) {
DOMAttr *pAttributeNode = (DOMAttr *)pAttributes->item(i);
// get attribute name
char *name = XMLString::transcode(pAttributeNode->getName());
XERCES_STD_QUALIFIER cout << "\t" << name << "=";
XMLString::release(&name);
// get attribute type
name = XMLString::transcode(pAttributeNode->getValue());
XERCES_STD_QUALIFIER cout << name << XERCES_STD_QUALIFIER endl;
XMLString::release(&name);
}
}
}
++count;
}
for (child = n->getFirstChild(); child != nullptr; child = child->getNextSibling())
count += countChildElements(child, printOutEncounteredEles);
}
return count;
}
// ---------------------------------------------------------------------------
//
// main
//
// ---------------------------------------------------------------------------
int main(int argC, char *argV[]) {
// Check command line and extract arguments.
if (argC < 2) {
usage();
return 1;
}
const char *xmlFile = nullptr;
AbstractDOMParser::ValSchemes valScheme = AbstractDOMParser::Val_Auto;
bool doNamespaces = false;
bool doSchema = false;
bool schemaFullChecking = false;
bool doList = false;
bool errorOccurred = false;
bool recognizeNEL = false;
bool printOutEncounteredEles = false;
char localeStr[64];
memset(localeStr, 0, sizeof localeStr);
int argInd;
for (argInd = 1; argInd < argC; argInd++) {
// Break out on first parm not starting with a dash
if (argV[argInd][0] != '-')
break;
// Watch for special case help request
if (!strcmp(argV[argInd], "-?")) {
usage();
return 2;
} else if (!strncmp(argV[argInd], "-v=", 3) || !strncmp(argV[argInd], "-V=", 3)) {
const char *const parm = &argV[argInd][3];
if (!strcmp(parm, "never"))
valScheme = AbstractDOMParser::Val_Never;
else if (!strcmp(parm, "auto"))
valScheme = AbstractDOMParser::Val_Auto;
else if (!strcmp(parm, "always"))
valScheme = AbstractDOMParser::Val_Always;
else {
XERCES_STD_QUALIFIER cerr << "Unknown -v= value: " << parm << XERCES_STD_QUALIFIER endl;
return 2;
}
} else if (!strcmp(argV[argInd], "-n") || !strcmp(argV[argInd], "-N")) {
doNamespaces = true;
} else if (!strcmp(argV[argInd], "-s") || !strcmp(argV[argInd], "-S")) {
doSchema = true;
} else if (!strcmp(argV[argInd], "-f") || !strcmp(argV[argInd], "-F")) {
schemaFullChecking = true;
} else if (!strcmp(argV[argInd], "-l") || !strcmp(argV[argInd], "-L")) {
doList = true;
} else if (!strcmp(argV[argInd], "-special:nel")) {
// turning this on will lead to non-standard compliance behaviour
// it will recognize the unicode character 0x85 as new line character
// instead of regular character as specified in XML 1.0
// do not turn this on unless really necessary
recognizeNEL = true;
} else if (!strcmp(argV[argInd], "-p") || !strcmp(argV[argInd], "-P")) {
printOutEncounteredEles = true;
} else if (!strncmp(argV[argInd], "-locale=", 8)) {
// Get out the end of line
strcpy(localeStr, &(argV[argInd][8]));
} else {
XERCES_STD_QUALIFIER cerr << "Unknown option '" << argV[argInd] << "', ignoring it\n"
<< XERCES_STD_QUALIFIER endl;
}
}
//
// There should be only one and only one parameter left, and that
// should be the file name.
//
if (argInd != argC - 1) {
usage();
return 1;
}
// Initialize the XML4C system
try {
if (strlen(localeStr)) {
XMLPlatformUtils::Initialize(localeStr);
} else {
XMLPlatformUtils::Initialize();
}
if (recognizeNEL) {
XMLPlatformUtils::recognizeNEL(recognizeNEL);
}
}
catch (const XMLException &toCatch) {
XERCES_STD_QUALIFIER cerr << "Error during initialization! :\n"
<< StrX(toCatch.getMessage()) << XERCES_STD_QUALIFIER endl;
return 1;
}
// Instantiate the DOM parser.
static const XMLCh gLS[] = {chLatin_L, chLatin_S, chNull};
DOMImplementation *impl = DOMImplementationRegistry::getDOMImplementation(gLS);
DOMLSParser *parser = ((DOMImplementationLS *)impl)->createLSParser(DOMImplementationLS::MODE_SYNCHRONOUS, nullptr);
DOMConfiguration *config = parser->getDomConfig();
config->setParameter(XMLUni::fgDOMNamespaces, doNamespaces);
config->setParameter(XMLUni::fgXercesSchema, doSchema);
config->setParameter(XMLUni::fgXercesHandleMultipleImports, true);
config->setParameter(XMLUni::fgXercesSchemaFullChecking, schemaFullChecking);
if (valScheme == AbstractDOMParser::Val_Auto) {
config->setParameter(XMLUni::fgDOMValidateIfSchema, true);
} else if (valScheme == AbstractDOMParser::Val_Never) {
config->setParameter(XMLUni::fgDOMValidate, false);
} else if (valScheme == AbstractDOMParser::Val_Always) {
config->setParameter(XMLUni::fgDOMValidate, true);
}
// enable datatype normalization - default is off
config->setParameter(XMLUni::fgDOMDatatypeNormalization, true);
// And create our error handler and install it
DOMCountErrorHandler errorHandler;
config->setParameter(XMLUni::fgDOMErrorHandler, &errorHandler);
//
// Get the starting time and kick off the parse of the indicated
// file. Catch any exceptions that might propogate out of it.
//
unsigned long duration;
bool more = true;
XERCES_STD_QUALIFIER ifstream fin;
// the input is a list file
if (doList)
fin.open(argV[argInd]);
if (fin.fail()) {
XERCES_STD_QUALIFIER cerr << "Cannot open the list file: " << argV[argInd] << XERCES_STD_QUALIFIER endl;
return 2;
}
while (more) {
char fURI[1000];
//initialize the array to zeros
memset(fURI, 0, sizeof(fURI));
if (doList) {
if (!fin.eof()) {
fin.getline(fURI, sizeof(fURI));
if (!*fURI)
continue;
else {
xmlFile = fURI;
XERCES_STD_QUALIFIER cerr << "==Parsing== " << xmlFile << XERCES_STD_QUALIFIER endl;
}
} else
break;
} else {
xmlFile = argV[argInd];
more = false;
}
//reset error count first
errorHandler.resetErrors();
XERCES_CPP_NAMESPACE_QUALIFIER DOMDocument *doc = nullptr;
try {
// reset document pool
parser->resetDocumentPool();
const unsigned long startMillis = XMLPlatformUtils::getCurrentMillis();
doc = parser->parseURI(xmlFile);
const unsigned long endMillis = XMLPlatformUtils::getCurrentMillis();
duration = endMillis - startMillis;
}
catch (const XMLException &toCatch) {
XERCES_STD_QUALIFIER cerr << "\nError during parsing: '" << xmlFile << "'\n"
<< "Exception message is: \n"
<< StrX(toCatch.getMessage()) << "\n"
<< XERCES_STD_QUALIFIER endl;
errorOccurred = true;
continue;
} catch (const DOMException &toCatch) {
const unsigned int maxChars = 2047;
XMLCh errText[maxChars + 1];
XERCES_STD_QUALIFIER cerr << "\nDOM Error during parsing: '" << xmlFile << "'\n"
<< "DOMException code is: " << toCatch.code << XERCES_STD_QUALIFIER endl;
if (DOMImplementation::loadDOMExceptionMsg(toCatch.code, errText, maxChars))
XERCES_STD_QUALIFIER cerr << "Message is: " << StrX(errText) << XERCES_STD_QUALIFIER endl;
errorOccurred = true;
continue;
} catch (...) {
XERCES_STD_QUALIFIER cerr << "\nUnexpected exception during parsing: '" << xmlFile << "'\n";
errorOccurred = true;
continue;
}
//
// Extract the DOM tree, get the list of all the elements and report the
// length as the count of elements.
//
if (errorHandler.getSawErrors()) {
XERCES_STD_QUALIFIER cout << "\nErrors occurred, no output available\n" << XERCES_STD_QUALIFIER endl;
errorOccurred = true;
} else {
unsigned int elementCount = 0;
if (doc) {
elementCount = countChildElements((DOMNode *)doc->getDocumentElement(), printOutEncounteredEles);
// test getElementsByTagName and getLength
XMLCh xa[] = {chAsterisk, chNull};
if (elementCount != doc->getElementsByTagName(xa)->getLength()) {
XERCES_STD_QUALIFIER cout << "\nErrors occurred, element count is wrong\n" << XERCES_STD_QUALIFIER endl;
errorOccurred = true;
}
}
// Print out the stats that we collected and time taken.
XERCES_STD_QUALIFIER cout << xmlFile << ": " << duration << " ms (" << elementCount << " elems)."
<< XERCES_STD_QUALIFIER endl;
}
}
//
// Delete the parser itself. Must be done prior to calling Terminate, below.
//
parser->release();
// And call the termination method
XMLPlatformUtils::Terminate();
if (doList)
fin.close();
if (errorOccurred)
return 4;
else
return 0;
}
DOMCountErrorHandler::DOMCountErrorHandler()
:
fSawErrors(false) {}
DOMCountErrorHandler::~DOMCountErrorHandler() {}
// ---------------------------------------------------------------------------
// DOMCountHandlers: Overrides of the DOM ErrorHandler interface
// ---------------------------------------------------------------------------
bool DOMCountErrorHandler::handleError(const DOMError &domError) {
fSawErrors = true;
if (domError.getSeverity() == DOMError::DOM_SEVERITY_WARNING)
XERCES_STD_QUALIFIER cerr << "\nWarning at file ";
else if (domError.getSeverity() == DOMError::DOM_SEVERITY_ERROR)
XERCES_STD_QUALIFIER cerr << "\nError at file ";
else
XERCES_STD_QUALIFIER cerr << "\nFatal Error at file ";
XERCES_STD_QUALIFIER cerr << StrX(domError.getLocation()->getURI()) << ", line "
<< domError.getLocation()->getLineNumber() << ", char "
<< domError.getLocation()->getColumnNumber()
<< "\n Message: " << StrX(domError.getMessage()) << XERCES_STD_QUALIFIER endl;
return true;
}
void DOMCountErrorHandler::resetErrors() { fSawErrors = false; }
|