File indexing completed on 2024-11-26 02:34:07
0001 import xml.dom.minidom as dom
0002 import sys, os, optparse
0003
0004 class OptionParser(optparse.OptionParser):
0005 """
0006 OptionParser is main class to parse options.
0007 """
0008 def __init__(self):
0009 optparse.OptionParser.__init__(self, usage="%prog --help or %prog [options] file", version="%prog 0.0.1", conflict_handler="resolve")
0010 self.add_option("--src", action="store", type="string", dest="src", help="specify source XML file")
0011 self.add_option("--min", action="store", type="int", dest="min", help="Minimum length to measure")
0012 self.add_option("--max", action="store", type="int", dest="max", help="Maximum length to measure")
0013 self.add_option("--cid", action="store", type="int", dest="cid", help="Apply combination ID")
0014 self.add_option("--xsd", action="store_true", dest="xsd", help="Create XML Schema fragment")
0015
0016 def read_data():
0017 print("Reading histogram file")
0018 n = 0
0019 histos = srcdoc.getElementsByTagName("Histogram")
0020 for histo in histos:
0021 h = []
0022 for key in histo.childNodes:
0023 if key.nodeType == key.ELEMENT_NODE:
0024 name = key.localName
0025 value = key.childNodes[0].nodeValue
0026 found = 0
0027
0028 if name not in elements:
0029 elements[name] = {'type': '', 'count': 0}
0030 elements[name]['count'] = elements[name]['count'] + 1
0031
0032 try:
0033 i = int(value)
0034 if elements[name]['type'] == '':
0035 elements[name]['type'] = 'xs:integer'
0036 except ValueError:
0037 try:
0038 i = float(value)
0039 if elements[name]['type'] in ('', 'xs:integer'):
0040 elements[name]['type'] = 'xs:double'
0041 except ValueError:
0042 elements[name]['type'] = 'xs:string'
0043
0044 for k in keys.keys():
0045 if keys[k]['name'] == name and keys[k]['value'] == value:
0046 keys[k]['count'] = keys[k]['count'] + 1
0047 h.append(k)
0048 found = 1
0049 break
0050 if found == 0:
0051 keys[n] = {'name': name, 'value': value, 'count': 1}
0052 h.append(n)
0053 n += 1
0054 h.sort()
0055 histograms.append(h)
0056
0057 def create_xsd():
0058 for k in keys.keys():
0059 name = keys[k]['name']
0060
0061 root = resdoc.createElement("xs:complexType")
0062 root.setAttribute("name", "HistogramType")
0063 resdoc.appendChild(root)
0064 seq = resdoc.createElement("xs:all")
0065 root.appendChild(seq)
0066 for e in sorted(elements.keys()):
0067 el = resdoc.createElement("xs:element")
0068 el.setAttribute("name", e)
0069 el.setAttribute("type", elements[e]['type'])
0070 if elements[e]['count'] < len(histograms):
0071 el.setAttribute("minOccurs", '0')
0072 el.setAttribute("maxOccurs", '1')
0073 seq.appendChild(el)
0074
0075 def create_declaration(cid):
0076 co = comb[cid]
0077 print("Declaration to apply:", co)
0078 for k in comb[cid]:
0079 print(keys[k]['name'], '=', keys[k]['value'])
0080
0081 def cexists(s, c):
0082 d = len(c)
0083 for v1 in s:
0084 for v2 in c:
0085 if v1 == v2:
0086 d = d - 1
0087 return (d == 0)
0088
0089 def ccopy(a):
0090 r = []
0091 for v in a:
0092 r.append(v)
0093 return r
0094
0095 def kpermutation(vfrom, vto, min, max):
0096 vto = vto + 1
0097 queue = []
0098 for i in range(vfrom, vto):
0099 for j in range(i, vto):
0100 queue.append(j)
0101 if len(queue) >= min and len(queue) <= max:
0102 yield queue
0103 queue = []
0104
0105 def compute(min, max):
0106 print("Computing permutations")
0107 for v in kpermutation(0, len(keys), min, max):
0108 ci = -1
0109 for h in histograms:
0110 if cexists(h, v):
0111 if ci == -1:
0112 ci = len(comb)
0113 comb[ci] = ccopy(v)
0114 results[ci] = [h]
0115 else:
0116 results[ci].append(h)
0117
0118 def priorities():
0119 for ci in comb.keys():
0120 l = len(results[ci])
0121 if l == 1:
0122 continue
0123 if l not in prior:
0124 prior[l] = [ci]
0125 else:
0126 prior[l].append(ci)
0127
0128 if __name__ == "__main__":
0129
0130 optManager = OptionParser()
0131 (opts, args) = optManager.parse_args()
0132 opts = opts.__dict__
0133
0134 if opts['src'] in ('', None):
0135 print("You must specify a valid source xml file")
0136 sys.exit(0)
0137
0138 resdoc = dom.Document()
0139 srcdoc = dom.parse(opts['src'])
0140
0141 histograms = []
0142 keys = {}
0143 results = {}
0144 comb = {}
0145 prior = {}
0146 elements = {}
0147 len_min = 1000000
0148 len_max = 0
0149
0150 read_data()
0151
0152 if opts['xsd'] != None:
0153
0154 create_xsd()
0155 print(resdoc.toprettyxml())
0156
0157 else:
0158
0159 for h in histograms:
0160 if len(h) > len_max: len_max = len(h)
0161 if len(h) < len_min: len_min = len(h)
0162 print("Computed len: min = ", len_min, ", max = ", len_max)
0163
0164 min = 2
0165 if opts['min'] not in (0, None): min = opts['min']
0166 max = len_max
0167 if opts['max'] not in (0, None): max = opts['max']
0168 print("Computing lens from", min, " to ", max)
0169
0170 compute(min, max)
0171 priorities()
0172
0173 for pi in sorted(prior.keys()):
0174 print(pi, "=", prior[pi])
0175
0176 if opts['cid'] != None:
0177 create_declaration(opts['cid'])