File indexing completed on 2024-04-06 11:56:20
0001 #include <cassert>
0002 #include <cctype>
0003 #include <cerrno>
0004 #include <climits>
0005 #include <cstdlib>
0006 #include <cstring>
0007
0008 #include "Alignment/Geners/interface/BinaryArchiveBase.hh"
0009 #include "Alignment/Geners/interface/CatalogIO.hh"
0010 #include "Alignment/Geners/interface/binaryIO.hh"
0011
0012 #ifdef GENERS_BINARY_ARCHIVE_FORMAT_ID
0013 #undef GENERS_BINARY_ARCHIVE_FORMAT_ID
0014 #endif
0015 #define GENERS_BINARY_ARCHIVE_FORMAT_ID (0x1f2e3d4c)
0016
0017 static bool parse_unsigned(std::ostringstream &err, const char *c, unsigned *result) {
0018 char *endptr;
0019 errno = 0;
0020 const unsigned long value = strtoul(c, &endptr, 0);
0021 if (errno || *endptr != '\0') {
0022 err << "expected an unsigned integer, got \"" << c << '"';
0023 if (errno)
0024 err << ", " << strerror(errno);
0025 return false;
0026 }
0027 if (value > UINT_MAX) {
0028 err << "unsigned value \"" << c << "\" is out of range";
0029 return false;
0030 }
0031 *result = value;
0032 return true;
0033 }
0034
0035 static bool parse_int(std::ostringstream &err, const char *c, int *result) {
0036 char *endptr;
0037 errno = 0;
0038 const long value = strtol(c, &endptr, 0);
0039 if (errno || *endptr != '\0') {
0040 err << "expected an integer, got \"" << c << '"';
0041 if (errno)
0042 err << ", " << strerror(errno);
0043 return false;
0044 }
0045 if (value < INT_MIN || value > INT_MAX) {
0046 err << "integer value \"" << c << "\" is out of range";
0047 return false;
0048 }
0049 *result = value;
0050 return true;
0051 }
0052
0053 namespace gs {
0054 BinaryArchiveBase::BinaryArchiveBase(const char *name, const char *mode)
0055 : AbsArchive(name),
0056 mode_(parseMode(mode)),
0057 errorStream_(nullptr),
0058 cStream_(nullptr),
0059 catalog_(nullptr),
0060 storedEntryId_(nullptr),
0061 storedLocationId_(nullptr),
0062 catalogIsSet_(false),
0063 addCatalogToData_(false) {
0064 CStringStream::CompressionMode m = CStringStream::NOT_COMPRESSED;
0065 int compressionLevel = -1;
0066 unsigned minSizeToCompress = 1024U;
0067 unsigned bufSize = 1048576U;
0068
0069 std::ostringstream err;
0070 modeIsValid_ =
0071 parseArchiveOptions(err, mode, &m, &compressionLevel, &minSizeToCompress, &bufSize, &addCatalogToData_);
0072 if (modeIsValid_)
0073 cStream_ = new CStringStream(m, compressionLevel, minSizeToCompress, bufSize);
0074 else {
0075 errorStream() << "In BinaryArchiveBase constructor: "
0076 << "invalid archive opening mode \"" << mode << '"';
0077 const std::string &errInfo = err.str();
0078 if (!errInfo.empty())
0079 errorStream() << ": " << errInfo;
0080 }
0081 }
0082
0083 void BinaryArchiveBase::releaseClassIds() {
0084 delete storedEntryId_;
0085 storedEntryId_ = nullptr;
0086 delete storedLocationId_;
0087 storedLocationId_ = nullptr;
0088 }
0089
0090 BinaryArchiveBase::~BinaryArchiveBase() {
0091 releaseClassIds();
0092 delete errorStream_;
0093 delete catalog_;
0094 delete cStream_;
0095 }
0096
0097 void BinaryArchiveBase::writeHeader(std::ostream &os) {
0098 const unsigned format = GENERS_BINARY_ARCHIVE_FORMAT_ID;
0099 write_pod(os, format);
0100
0101
0102 const unsigned multiplex = addCatalogToData_ ? 1 : 0;
0103 const unsigned sizeoflong = sizeof(long);
0104 const unsigned infoword = (sizeoflong << 1) | multiplex;
0105 write_pod(os, infoword);
0106
0107 if (multiplex) {
0108
0109 releaseClassIds();
0110 storedEntryId_ = new ClassId(ClassId::makeId<CatalogEntry>());
0111 storedEntryId_->write(os);
0112 storedLocationId_ = new ClassId(ClassId::makeId<ItemLocation>());
0113 storedLocationId_->write(os);
0114 }
0115 }
0116
0117 bool BinaryArchiveBase::readHeader(std::istream &is) {
0118 const unsigned expectedFormat = GENERS_BINARY_ARCHIVE_FORMAT_ID;
0119 is.seekg(0, std::ios_base::beg);
0120 unsigned format = 0;
0121 read_pod(is, &format);
0122 if (format != expectedFormat)
0123 return false;
0124
0125 unsigned infoword = 0xffffffff;
0126 read_pod(is, &infoword);
0127 const unsigned multiplex = infoword & 0x1U;
0128 const unsigned sizeoflong = infoword >> 1;
0129
0130
0131
0132
0133 if (sizeoflong != sizeof(long))
0134 return false;
0135
0136 addCatalogToData_ = multiplex;
0137 if (addCatalogToData_) {
0138 releaseClassIds();
0139 storedEntryId_ = new ClassId(is, 1);
0140 storedLocationId_ = new ClassId(is, 1);
0141
0142
0143
0144
0145 if (mode_ & std::ios_base::out) {
0146 const ClassId &entryId = ClassId::makeId<CatalogEntry>();
0147 const ClassId &locId = ClassId::makeId<ItemLocation>();
0148 if (entryId != *storedEntryId_ || locId != *storedLocationId_)
0149 throw IOInvalidData(
0150 "In gs::BinaryArchiveBase::readHeader: this "
0151 "archive can no longer be open for update as it was "
0152 "created using an older version of I/O software");
0153 }
0154 }
0155 return !is.fail();
0156 }
0157
0158 void BinaryArchiveBase::openDataFile(std::fstream &stream, const char *filename) {
0159 assert(filename);
0160 if (stream.is_open())
0161 stream.close();
0162 stream.clear();
0163 stream.open(filename, mode_);
0164 if (!stream.is_open())
0165 throw IOOpeningFailure("gs::BinaryArchiveBase::openDataFile", filename);
0166
0167
0168 bool writeHead = false;
0169 if (mode_ & std::ios_base::out) {
0170 if (mode_ & std::ios_base::trunc)
0171 writeHead = true;
0172 else if (isEmptyFile(stream))
0173 writeHead = true;
0174 }
0175
0176 if (writeHead) {
0177 writeHeader(stream);
0178 if (stream.fail()) {
0179 stream.close();
0180 std::string e =
0181 "In gs::BinaryArchiveBase::openDataFile: "
0182 "failed to write archive header to file \"";
0183 e += filename;
0184 e += "\"";
0185 throw IOWriteFailure(e);
0186 }
0187 } else {
0188 if (!readHeader(stream)) {
0189 const bool failed = stream.fail();
0190 stream.close();
0191 std::string e = "In gs::BinaryArchiveBase::openDataFile: ";
0192 if (failed) {
0193 e += "could not read archive header from file \"";
0194 e += filename;
0195 e += "\"";
0196 throw IOReadFailure(e);
0197 } else {
0198 e += "no valid archive header in file \"";
0199 e += filename;
0200 e += "\"";
0201 throw IOInvalidData(e);
0202 }
0203 }
0204 }
0205 }
0206
0207 void BinaryArchiveBase::setCatalog(AbsCatalog *c) {
0208 if (c) {
0209 assert(!catalogIsSet_);
0210 catalogIsSet_ = true;
0211 }
0212 delete catalog_;
0213 catalog_ = c;
0214 }
0215
0216 void BinaryArchiveBase::itemSearch(const SearchSpecifier &namePattern,
0217 const SearchSpecifier &categoryPattern,
0218 std::vector<unsigned long long> *idsFound) const {
0219 if (catalog_)
0220 catalog_->search(namePattern, categoryPattern, idsFound);
0221 else {
0222 assert(idsFound);
0223 idsFound->clear();
0224 }
0225 }
0226
0227 bool BinaryArchiveBase::parseArchiveOptions(std::ostringstream &err,
0228 const char *modeIn,
0229 CStringStream::CompressionMode *m,
0230 int *compressionLevel,
0231 unsigned *minSizeToCompress,
0232 unsigned *bufSize,
0233 bool *multiplexCatalog) {
0234 if (!modeIn)
0235 return true;
0236 std::string cmode(modeIn ? modeIn : "");
0237 if (cmode.empty())
0238 return true;
0239 char *mode = const_cast<char *>(cmode.c_str());
0240
0241 unsigned cnt = 0;
0242 for (char *opt = strtok(mode, ":"); opt; opt = strtok(nullptr, ":"), ++cnt) {
0243
0244 if (!cnt)
0245 continue;
0246 char *eq = strchr(opt, '=');
0247 if (eq) {
0248
0249 char *optname = opt;
0250 while (isspace(*optname) && optname < eq)
0251 ++optname;
0252 if (optname == eq) {
0253 err << "invalid binary archive option \"\"";
0254 return false;
0255 }
0256 char *optend = eq - 1;
0257 while (isspace(*optend))
0258 --optend;
0259 ++optend;
0260 *optend = '\0';
0261
0262
0263 char *optval = eq + 1;
0264 while (*optval && isspace(*optval))
0265 ++optval;
0266 if (!*optval) {
0267 err << "invalid binary archive option value \"\"";
0268 return false;
0269 }
0270 char *valend = opt + strlen(opt) - 1;
0271 while (isspace(*valend))
0272 --valend;
0273 ++valend;
0274 *valend = '\0';
0275 if (strlen(optval) == 0) {
0276 err << "invalid binary archive option value \"\"";
0277 return false;
0278 }
0279
0280
0281 if (!strcasecmp(optname, "z")) {
0282
0283 if (!CStringStream::getCompressionModeByName(optval, m)) {
0284 err << "invalid compression type \"" << optval << '"';
0285 return false;
0286 }
0287 } else if (!strcasecmp(optname, "cl")) {
0288
0289 if (!parse_int(err, optval, compressionLevel))
0290 return false;
0291 if (*compressionLevel < -1 || *compressionLevel > 9) {
0292 err << "compression level is out of range";
0293 return false;
0294 }
0295 } else if (!strcasecmp(optname, "cb")) {
0296
0297 if (!parse_unsigned(err, optval, bufSize))
0298 return false;
0299 } else if (!strcasecmp(optname, "cm")) {
0300
0301 if (!parse_unsigned(err, optval, minSizeToCompress))
0302 return false;
0303 } else if (!strcasecmp(optname, "cat")) {
0304
0305 if (optval[0] == 'i' || optval[0] == 'I')
0306 *multiplexCatalog = true;
0307 else if (optval[0] == 's' || optval[0] == 'S')
0308 *multiplexCatalog = false;
0309 else {
0310 err << "invalid catalog mode \"" << optval << '"';
0311 return false;
0312 }
0313 } else {
0314
0315 err << "unrecognized binary archive option \"" << optname << '"';
0316 return false;
0317 }
0318 } else {
0319 err << "invalid binary archive option \"" << opt << '"';
0320 return false;
0321 }
0322 }
0323 return true;
0324 }
0325
0326 std::ios_base::openmode BinaryArchiveBase::parseMode(const char *mode) {
0327 std::ios_base::openmode m = std::ios_base::binary;
0328 if (mode) {
0329 const unsigned len = strlen(mode);
0330 for (unsigned i = 0; i < len; ++i) {
0331
0332
0333 if (mode[i] == 'r')
0334 m |= std::ios_base::in;
0335 else if (mode[i] == 'w')
0336 m |= (std::ios_base::out | std::ios_base::trunc);
0337 else if (mode[i] == 'a')
0338 m |= (std::ios_base::out | std::ios_base::app);
0339 else if (mode[i] == '+')
0340 m |= (std::ios_base::in | std::ios_base::out);
0341 else if (mode[i] == ':')
0342 break;
0343 }
0344 }
0345
0346
0347 if (!(m & (std::ios_base::in | std::ios_base::out)))
0348 m |= std::ios_base::in;
0349 return m;
0350 }
0351
0352 void BinaryArchiveBase::search(AbsReference &reference) {
0353 if (catalog_) {
0354 std::vector<unsigned long long> idlist;
0355 catalog_->search(reference.namePattern(), reference.categoryPattern(), &idlist);
0356 const unsigned long nfound = idlist.size();
0357 for (unsigned long i = 0; i < nfound; ++i) {
0358 std::shared_ptr<const CatalogEntry> pentry = catalog_->retrieveEntry(idlist[i]);
0359 if (reference.isIOCompatible(*pentry))
0360 addItemToReference(reference, idlist[i]);
0361 }
0362 }
0363 }
0364
0365 bool BinaryArchiveBase::isEmptyFile(std::fstream &s) {
0366 s.seekg(0, std::ios_base::end);
0367 return s.tellg() == std::streampos(0);
0368 }
0369
0370 std::istream &BinaryArchiveBase::inputStream(const unsigned long long id, long long *sz) {
0371 unsigned long long length = 0;
0372 unsigned compressionCode = 0;
0373 std::istream &is = plainInputStream(id, &compressionCode, &length);
0374 if (cStream_->compressionMode() == CStringStream::NOT_COMPRESSED) {
0375 if (sz)
0376 *sz = -1LL;
0377 return is;
0378 } else {
0379 cStream_->readCompressed(is, compressionCode, length);
0380 if (sz) {
0381 std::streamoff off = cStream_->tellp();
0382 *sz = off;
0383 }
0384 return *cStream_;
0385 }
0386 }
0387
0388 std::ostream &BinaryArchiveBase::outputStream() { return plainOutputStream(); }
0389
0390 std::ostream &BinaryArchiveBase::compressedStream(std::ostream &os) {
0391 if (cStream_->compressionMode() == CStringStream::NOT_COMPRESSED)
0392 return os;
0393 else {
0394 cStream_->reset();
0395 cStream_->setSink(os);
0396 return *cStream_;
0397 }
0398 }
0399
0400 unsigned BinaryArchiveBase::flushCompressedRecord(std::ostream &) {
0401 CStringStream::CompressionMode m = cStream_->compressionMode();
0402 if (m != CStringStream::NOT_COMPRESSED) {
0403 cStream_->flush();
0404 m = cStream_->writeCompressed();
0405 }
0406 return static_cast<unsigned>(m);
0407 }
0408 }