NanoAOD/interface/FlatTable.h

0001 #ifndef DataFormats_NanoAOD_FlatTable_h
0002 #define DataFormats_NanoAOD_FlatTable_h
0003
0004 #include "DataFormats/Math/interface/libminifloat.h"
0005 #include "FWCore/Utilities/interface/Exception.h"
0006
0007 #include <cstdint>
0008 #include <vector>
0009 #include <span>
0010 #include <string>
0011 #include <type_traits>
0012
0013 namespace nanoaod {
0014
0015   namespace flatTableHelper {
0016     template <typename T>
0017     struct MaybeMantissaReduce {
0018       MaybeMantissaReduce(int mantissaBits) {}
0019       inline T one(const T &val) const { return val; }
0020       template <typename Span>
0021       inline void bulk(Span const &data) const {}
0022     };
0023     template <>
0024     struct MaybeMantissaReduce<float> {
0025       int bits_;
0026       MaybeMantissaReduce(int mantissaBits) : bits_(mantissaBits) {}
0027       inline float one(const float &val) const {
0028         return (bits_ > 0 ? MiniFloatConverter::reduceMantissaToNbitsRounding(val, bits_) : val);
0029       }
0030       template <typename Span>
0031       inline void bulk(Span &&data) const {
0032         if (bits_ > 0)
0033           MiniFloatConverter::reduceMantissaToNbitsRounding(bits_, data.begin(), data.end(), data.begin());
0034       }
0035     };
0036   }  // namespace flatTableHelper
0037
0038   class FlatTable {
0039   public:
0040     //Int8, //removed due to mis-interpretation in ROOT/pyroot
0041     enum class ColumnType {
0042       UInt8,
0043       Int16,
0044       UInt16,
0045       Int32,
0046       UInt32,
0047       Int64,
0048       UInt64,
0049       Bool,
0050       Float,
0051       Double,
0052     };  // We could have other Float types with reduced mantissa, and similar
0053
0054     // special case: bool stored as vector of uint8
0055     template <typename T>
0056     using ColumnStorageType = std::conditional_t<std::is_same_v<T, bool>, uint8_t, T>;
0057
0058     FlatTable() : size_(0) {}
0059     FlatTable(unsigned int size, const std::string &name, bool singleton, bool extension = false)
0060         : size_(size), name_(name), singleton_(singleton), extension_(extension) {}
0061     ~FlatTable() {}
0062
0063     unsigned int nColumns() const { return columns_.size(); };
0064     unsigned int nRows() const { return size_; };
0065     unsigned int size() const { return size_; }
0066     bool singleton() const { return singleton_; }
0067     bool extension() const { return extension_; }
0068     const std::string &name() const { return name_; }
0069
0070     const std::string &columnName(unsigned int col) const { return columns_[col].name; }
0071     int columnIndex(const std::string &name) const;
0072
0073     ColumnType columnType(unsigned int col) const { return columns_[col].type; }
0074
0075     void setDoc(const std::string &doc) { doc_ = doc; }
0076     const std::string &doc() const { return doc_; }
0077     const std::string &columnDoc(unsigned int col) const { return columns_[col].doc; }
0078
0079     /// get a column by index (const)
0080     template <typename T>
0081     auto columnData(unsigned int column) const {
0082       auto begin = beginData<T>(column);
0083       return std::span<const ColumnStorageType<T>>(begin, size_t(size_));
0084     }
0085
0086     /// get a column by index (non-const)
0087     template <typename T>
0088     auto columnData(unsigned int column) {
0089       auto begin = beginData<T>(column);
0090       return std::span<ColumnStorageType<T>>(begin, size_);
0091     }
0092
0093     /// get a column value for singleton (const)
0094     template <typename T>
0095     const auto &columValue(unsigned int column) const {
0096       if (!singleton())
0097         throw cms::Exception("LogicError", "columnValue works only for singleton tables");
0098       return *beginData<T>(column);
0099     }
0100
0101     double getAnyValue(unsigned int row, unsigned int column) const;
0102
0103     class RowView {
0104     public:
0105       RowView() {}
0106       RowView(const FlatTable &table, unsigned int row) : table_(&table), row_(row) {}
0107       double getAnyValue(unsigned int column) const { return table_->getAnyValue(row_, column); }
0108       double getAnyValue(const std::string &column) const {
0109         auto index = table_->columnIndex(column);
0110         if (index == -1)
0111           throwUnknownColumn(column);
0112         return table_->getAnyValue(row_, index);
0113       }
0114       const FlatTable &table() const { return *table_; }
0115       unsigned int row() const { return row_; }
0116
0117     private:
0118       [[noreturn]] static void throwUnknownColumn(const std::string &column) noexcept(false);
0119       const FlatTable *table_;
0120       unsigned int row_;
0121     };
0122     RowView row(unsigned int row) const { return RowView(*this, row); }
0123
0124     template <typename T, typename C>
0125     void addColumn(const std::string &name, const C &values, const std::string &docString, int mantissaBits = -1) {
0126       if (columnIndex(name) != -1)
0127         throw cms::Exception("LogicError", "Duplicated column: " + name);
0128       if (values.size() != size())
0129         throw cms::Exception("LogicError", "Mismatched size for " + name);
0130       auto &vec = bigVector<T>();
0131       columns_.emplace_back(name, docString, defaultColumnType<T>(), vec.size());
0132       vec.insert(vec.end(), values.begin(), values.end());
0133       flatTableHelper::MaybeMantissaReduce<T>(mantissaBits).bulk(columnData<T>(columns_.size() - 1));
0134     }
0135
0136     template <typename T, typename C>
0137     void addColumnValue(const std::string &name, const C &value, const std::string &docString, int mantissaBits = -1) {
0138       if (!singleton())
0139         throw cms::Exception("LogicError", "addColumnValue works only for singleton tables");
0140       if (columnIndex(name) != -1)
0141         throw cms::Exception("LogicError", "Duplicated column: " + name);
0142       auto &vec = bigVector<T>();
0143       columns_.emplace_back(name, docString, defaultColumnType<T>(), vec.size());
0144       vec.push_back(flatTableHelper::MaybeMantissaReduce<T>(mantissaBits).one(value));
0145     }
0146
0147     void addExtension(const FlatTable &extension);
0148
0149     template <class T>
0150     struct dependent_false : std::false_type {};
0151     template <typename T>
0152     static ColumnType defaultColumnType() {
0153       if constexpr (std::is_same<T, uint8_t>())
0154         return ColumnType::UInt8;
0155       else if constexpr (std::is_same<T, int16_t>())
0156         return ColumnType::Int16;
0157       else if constexpr (std::is_same<T, uint16_t>())
0158         return ColumnType::UInt16;
0159       else if constexpr (std::is_same<T, int32_t>())
0160         return ColumnType::Int32;
0161       else if constexpr (std::is_same<T, uint32_t>())
0162         return ColumnType::UInt32;
0163       else if constexpr (std::is_same<T, int64_t>())
0164         return ColumnType::Int64;
0165       else if constexpr (std::is_same<T, uint64_t>())
0166         return ColumnType::UInt64;
0167       else if constexpr (std::is_same<T, bool>())
0168         return ColumnType::Bool;
0169       else if constexpr (std::is_same<T, float>())
0170         return ColumnType::Float;
0171       else if constexpr (std::is_same<T, double>())
0172         return ColumnType::Double;
0173       else
0174         static_assert(dependent_false<T>::value, "unsupported type");
0175     }
0176
0177     // this below needs to be public for ROOT, but it is to be considered private otherwise
0178     struct Column {
0179       std::string name, doc;
0180       ColumnType type;
0181       unsigned int firstIndex;
0182       Column() {}  // for ROOT
0183       Column(const std::string &aname, const std::string &docString, ColumnType atype, unsigned int anIndex)
0184           : name(aname), doc(docString), type(atype), firstIndex(anIndex) {}
0185     };
0186
0187   private:
0188     template <typename T>
0189     auto beginData(unsigned int column) const {
0190       return bigVector<T>().cbegin() + columns_[column].firstIndex;
0191     }
0192     template <typename T>
0193     auto beginData(unsigned int column) {
0194       return bigVector<T>().begin() + columns_[column].firstIndex;
0195     }
0196
0197     template <typename T>
0198     auto const &bigVector() const {
0199       return bigVectorImpl<T>(*this);
0200     }
0201     template <typename T>
0202     auto &bigVector() {
0203       return bigVectorImpl<T>(*this);
0204     }
0205
0206     template <typename T, class This>
0207     static auto &bigVectorImpl(This &table) {
0208       // helper function to avoid code duplication, for the two accessor functions that differ only in const-ness
0209       using StorageT = ColumnStorageType<T>;
0210       if constexpr (std::is_same<StorageT, uint8_t>())
0211         return table.uint8s_;
0212       else if constexpr (std::is_same<StorageT, int16_t>())
0213         return table.int16s_;
0214       else if constexpr (std::is_same<StorageT, uint16_t>())
0215         return table.uint16s_;
0216       else if constexpr (std::is_same<StorageT, int32_t>())
0217         return table.int32s_;
0218       else if constexpr (std::is_same<StorageT, uint32_t>())
0219         return table.uint32s_;
0220       else if constexpr (std::is_same<StorageT, int64_t>())
0221         return table.int64s_;
0222       else if constexpr (std::is_same<StorageT, uint64_t>())
0223         return table.uint64s_;
0224       else if constexpr (std::is_same<StorageT, float>())
0225         return table.floats_;
0226       else if constexpr (std::is_same<StorageT, double>())
0227         return table.doubles_;
0228       else
0229         static_assert(dependent_false<T>::value, "unsupported type");
0230     }
0231
0232     unsigned int size_;
0233     std::string name_, doc_;
0234     bool singleton_, extension_;
0235     std::vector<Column> columns_;
0236     std::vector<uint8_t> uint8s_;
0237     std::vector<int16_t> int16s_;
0238     std::vector<uint16_t> uint16s_;
0239     std::vector<int32_t> int32s_;
0240     std::vector<uint32_t> uint32s_;
0241     std::vector<int64_t> int64s_;
0242     std::vector<uint64_t> uint64s_;
0243     std::vector<float> floats_;
0244     std::vector<double> doubles_;
0245   };
0246
0247 }  // namespace nanoaod
0248
0249 #endif