1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
|
import h5py
import zlib
import lzma
import numpy as np
#The file structure
#
# "format_version" - Attribute says which version of the file format was used
# "default_payload_compressor" - Attribute name of compressor used for the payloads
#
# "Records"- Group
# <Record> - Group name is the EventSetup record name
# "DataProducts" - Group
# <data product> - Group name is the '<type>@<label>' combination
# "type" - Attribute, the C++ canonical type name
# "Payloads" - Group
# <payload> - DataSet name is hash used in DB
# "memsize" = Attribute bytes needed after decompression
# "type" = Attribute the actual type stored (for polymorphism)
# "Tags" - Group
# <tag> - Group name is
# same as DB if only one data product is in the tag
# a hybrid name formed from the different DB tags it merged
# "products" - Attribute, list of the data products used in the order they appear in "payload"
# "time_type" - Attribute, either 'run_lumi' or 'time'
# "db_tags" - Attribute the list of DB tags that were combine
# "record" - Attribute name of the record to which the tag is associated (optimizes readback)
# "first" - DataSet holds the beginning IOVSyncValue for the IOVs
# "last" - DataSet holds the end IOVSyncValue for the IOVS
# "payload" - DataSet references to the payloads for this IOV for each data product
#
# "GlobalTags" - Group
# <global tag> - Group name is the global tag name
# "Tags" - DataSet holds references to the tags
def writeTagImpl(tagsGroup, name, recName, time_type, IOV_payloads, payloadToRefs, productNames, originalTagNames):
tagGroup = tagsGroup.create_group(name)
tagGroup.attrs["time_type"] = time_type.encode("ascii")
tagGroup.attrs["db_tags"] = [x.encode("ascii") for x in originalTagNames]
tagGroup.attrs["record"] = recName.encode("ascii")
tagGroup.attrs['products'] = [x.encode("ascii") for x in productNames]
firstValues = [x[0] for x in IOV_payloads]
lastValues = [x[1] for x in IOV_payloads]
syncValueType = np.dtype([("high", np.uint32),("low", np.uint32)])
first_np = np.empty(shape=(len(IOV_payloads),), dtype=syncValueType)
first_np['high'] = [ x.high for x in firstValues]
first_np['low'] = [ x.low for x in firstValues]
last_np = np.empty(shape=(len(lastValues),), dtype=syncValueType)
last_np['high'] = [ x.high for x in lastValues]
last_np['low'] = [ x.low for x in lastValues]
#tagGroup.create_dataset("first",data=np.array(firstValues), dtype=syncValueType)
#tagGroup.create_dataset("last", data=np.array(lastValues),dtype=syncValueType)
payloads = [ [ payloadToRefs[y] for y in x[2]] for x in IOV_payloads]
compressor = None
if len(first_np) > 100:
compressor = 'gzip'
tagGroup.create_dataset("first",data=first_np, compression = compressor)
tagGroup.create_dataset("last",data=last_np, compression = compressor)
tagGroup.create_dataset("payload", data=payloads, dtype=h5py.ref_dtype, compression = compressor)
return tagGroup.ref
def writeTag(tagsGroup, time_type, IOV_payloads, payloadToRefs, originalTagNames, recName, productNames):
name = originalTagNames[0]
if len(originalTagNames) != 1:
name = name+"@joined"
return writeTagImpl(tagsGroup, name, recName, time_type, IOV_payloads, payloadToRefs, productNames, originalTagNames)
def writeH5File(fileName, globalTags, excludeRecords, includeRecords, tagReader, compressorName):
#what are key lists??? They seem to hold objects of type 'cond::persistency::KeyList'
# and have their own proxy type
keyListRecords = set(["ExDwarfListRcd", "DTKeyedConfigListRcd", "DTKeyedConfigContainerRcd"])
default_compressor_name = compressorName
print(default_compressor_name)
default_compressor = None
if default_compressor_name == "zlib":
default_compressor = zlib
elif default_compressor_name == "lzma":
default_compressor = lzma
with h5py.File(fileName, 'w') as h5file:
h5file.attrs["file_format"] = 1
h5file.attrs["default_payload_compressor"] = default_compressor_name.encode("ascii")
recordsGroup = h5file.create_group("Records")
globalTagsGroup = h5file.create_group("GlobalTags")
null_dataset = h5file.create_dataset("null_payload", data=np.array([], dtype='b') )
tagGroupRefs = []
for name in globalTags:
gt = tagReader(name)
for tag in gt.tags():
rcd = tag.record()
if rcd in keyListRecords:
continue
if rcd in excludeRecords:
continue
if includeRecords and (not rcd in includeRecords):
continue
recordDataSize = 0
payloadToRefs = { None: null_dataset.ref}
recordGroup = recordsGroup.create_group(rcd)
tagsGroup = recordGroup.create_group("Tags")
dataProductsGroup = recordGroup.create_group("DataProducts")
print("record: %s"%rcd)
productNames = []
for dataProduct in tag.dataProducts():
productNames.append(dataProduct.name())
dataProductGroup = dataProductsGroup.create_group(dataProduct.name())
dataProductGroup.attrs["type"] = dataProduct.objtype().encode("ascii")
payloadsGroup = dataProductGroup.create_group("Payloads")
print(" product: %s"%dataProduct.name())
for p_index, payload in enumerate(dataProduct.payloads()):
print(" %i payload: %s size: %i"%(p_index,payload.name(),len(payload.data())))
recordDataSize +=len(payload.data())
if default_compressor:
b = default_compressor.compress(payload.data())
if len(b) >= len(payload.data()):
#compressing isn't helping
b = payload.data()
else:
b = payload.data()
pl = payloadsGroup.create_dataset(payload.name(), data=np.frombuffer(b,dtype='b'))
pl.attrs["memsize"] = len(payload.data())
pl.attrs["type"] = payload.actualType()
payloadToRefs[payload.name()] = pl.ref
tagGroupRefs.append(writeTag(tagsGroup, tag.time_type(), tag.iovsNPayloadNames(), payloadToRefs, tag.originalTagNames(), rcd, productNames))
print(" total size:",recordDataSize)
recordDataSize = 0
globalTagGroup = globalTagsGroup.create_group(name)
globalTagGroup.create_dataset("Tags", data=tagGroupRefs, dtype=h5py.ref_dtype)
|