1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
|
#ifndef GENERS_BINARYARCHIVEBASE_HH_
#define GENERS_BINARYARCHIVEBASE_HH_
#include <fstream>
#include <sstream>
#include "Alignment/Geners/interface/AbsArchive.hh"
#include "Alignment/Geners/interface/AbsCatalog.hh"
#include "Alignment/Geners/interface/CStringStream.hh"
namespace gs {
class BinaryArchiveBase : public AbsArchive {
public:
// The "mode" argument is a string which can have one or more
// sections, separated by ":". The first section has the same
// meaning as in the "fopen" call (see "man 3 fopen"). Additional
// sections can specify other aspects of the archive behavior
// using the format "option=value" if the default settings are
// not suitable. The available options are:
//
// "z" -- compression type. Possible option values are
// "n" -- no compression (default)
// "z" -- compress with zlib
// "b" -- compress with bzlib2
//
// "cl" -- compression level (an integer between -1 and 9,
// -1 is default. Meanigful for zlib compression
// only. See zlib documentation for details.
//
// "cb" -- compression buffer size in bytes (unsigned integer).
// Default is 1 MB.
//
// "cm" -- minimum object size in bytes to compress (unsigned
// integer). Objects whose serialized size is below
// this limit are not compressed. Default is 1 KB.
//
// "cat" -- if the value is set to "i" (which means "internal"
// or "injected"), the catalog data will be injected
// into the data stream in addition to having it in
// a separate catalog file. This allows for catalog
// recovery from the data stream in cases of program
// failure but also increases the data file size.
// The default value of this option is "s" which means
// that the catalog data will be stored separately.
// This option is meaningful for new archives only,
// for existing archives the value of this option is
// taken from the archive header record.
//
// Example: "w+:z=z:cl=9:cm=2048:cat=s". This will compress
// objects with 2 KB or larger size using level 9 zlib compression
// (the best compression ratio possible in zlib which is also
// the slowest). The archive will be open for reading and writing.
// If an archive with the same name already exists, it will be
// overwritten. The catalog will be stored in a separate file
// created when the archive is closed, catalog recovery from the
// data file(s) in case of a catastrophic program failure will not
// be possible.
//
BinaryArchiveBase(const char *name, const char *mode);
~BinaryArchiveBase() override;
inline bool isOpen() const override { return modeIsValid_ && catalog_; }
inline bool isReadable() const override { return modeIsValid_ && catalog_ && (mode_ & std::ios_base::in); }
inline bool isWritable() const override { return modeIsValid_ && catalog_ && (mode_ & std::ios_base::out); }
// Error message produced in case the archive could not be opened
inline std::string error() const override { return errorStream_ ? errorStream_->str() : std::string(""); }
// Check whether the constructor "mode" argument was valid.
// If it was not, derived classes should behave as if the
// archive could not be opened.
inline bool modeValid() const { return modeIsValid_; }
inline unsigned long long size() const override { return catalog_ ? catalog_->size() : 0ULL; }
inline unsigned long long smallestId() const override { return catalog_ ? catalog_->smallestId() : 0ULL; }
inline unsigned long long largestId() const override { return catalog_ ? catalog_->largestId() : 0ULL; }
inline bool idsAreContiguous() const override { return catalog_ ? catalog_->isContiguous() : false; }
inline bool itemExists(const unsigned long long id) const override {
return catalog_ ? catalog_->itemExists(id) : false;
}
void itemSearch(const SearchSpecifier &namePattern,
const SearchSpecifier &categoryPattern,
std::vector<unsigned long long> *idsFound) const override;
inline std::shared_ptr<const CatalogEntry> catalogEntry(const unsigned long long id) override {
return catalog_ ? catalog_->retrieveEntry(id)
: std::shared_ptr<const CatalogEntry>((const CatalogEntry *)nullptr);
}
// Inspection methods for compression options
inline CStringStream::CompressionMode compressionMode() const { return cStream_->compressionMode(); }
inline std::size_t compressionBufferSize() const { return cStream_->bufferSize(); }
inline int compressionLevel() const { return cStream_->compressionLevel(); }
inline unsigned minSizeToCompress() const { return cStream_->minSizeToCompress(); }
// Inject metadata into the data stream when writing? If this
// method returns "true", we either had "cat=i" in the opening
// mode for a new archive or a corresponding flag was set in
// the header of an existing archive data file.
inline bool injectMetadata() const { return addCatalogToData_; }
// The following method moves the "get pointer" of the stream
// to the end of file as a side effect
static bool isEmptyFile(std::fstream &s);
// The following method converts the first section of the "mode"
// argument into std::ios_base::openmode
static std::ios_base::openmode parseMode(const char *mode);
protected:
inline AbsCatalog *catalog() const { return catalog_; }
// Non-null catalog must be set exactly once. This object will
// assume the catalog ownership. Null catalog can be set after
// non-null in case some essential operation on the catalog has
// failed (such as writing it to file) in order to indicate
// failure to open the archive.
void setCatalog(AbsCatalog *c);
// Set compression mode (can be used when the catalog is read).
// The argment must be consistent with one of the modes defined
// in the CStringStream.hh header.
inline void setCompressionMode(const unsigned cMode) {
cStream_->setCompressionMode(static_cast<CStringStream::CompressionMode>(cMode));
}
// Stream for error messages. To be used from constructors
// of derived classes in case of problems, to indicate the
// reason why the archive could not be opened.
inline std::ostringstream &errorStream() {
if (!errorStream_)
errorStream_ = new std::ostringstream();
return *errorStream_;
}
// The following method opens a binary archive. It makes sure
// that a proper header is written out in case an empty file
// is open or in case the file is truncated, and that the header
// is there when a non-empty file is open without truncation.
// If the argument fstream is open when this method is invoked,
// it is closed first. After invocation of this method, the
// "injectMetadata()" flag will be properly set up for the
// data file open last. If the method is not successful, it
// closes the stream and throws an exception inherited from
// "IOException".
//
void openDataFile(std::fstream &stream, const char *filename);
// Stream mode used to open the archive data file(s)
inline std::ios_base::openmode openmode() const { return mode_; }
// Info needed for catalog recovery. These methods will return
// null pointers if item metadata is not in the data stream.
const ClassId *catalogEntryClassId() const { return storedEntryId_; }
const ClassId *itemLocationClassId() const { return storedLocationId_; }
public:
BinaryArchiveBase() = delete;
BinaryArchiveBase(const BinaryArchiveBase &) = delete;
BinaryArchiveBase &operator=(const BinaryArchiveBase &) = delete;
private:
static bool parseArchiveOptions(std::ostringstream &errmes,
const char *mode,
CStringStream::CompressionMode *m,
int *compressionLevel,
unsigned *minSizeToCompress,
unsigned *bufSize,
bool *multiplexCatalog);
void writeHeader(std::ostream &os);
// The following method returns "true" if a correctly
// formatted header was found
bool readHeader(std::istream &is);
void search(AbsReference &reference) override;
// The derived classes must override the following two methods
virtual std::ostream &plainOutputStream() = 0;
virtual std::istream &plainInputStream(unsigned long long id,
unsigned *compressionCode,
unsigned long long *length) = 0;
std::istream &inputStream(unsigned long long id, long long *sz) override;
std::ostream &outputStream() override;
std::ostream &compressedStream(std::ostream &uncompressed) override;
unsigned flushCompressedRecord(std::ostream &compressed) override;
void releaseClassIds();
const std::ios_base::openmode mode_;
std::ostringstream *errorStream_;
CStringStream *cStream_;
AbsCatalog *catalog_;
ClassId *storedEntryId_;
ClassId *storedLocationId_;
bool catalogIsSet_;
bool modeIsValid_;
bool addCatalogToData_;
};
} // namespace gs
#endif // GENERS_BINARYARCHIVEBASE_HH_
|