MkFitCore/interface/binnor.h

0001 #ifndef RecoTracker_MkFitCore_interface_binnor_h
0002 #define RecoTracker_MkFitCore_interface_binnor_h
0003
0004 #include "radix_sort.h"
0005
0006 #include <algorithm>
0007 #include <numeric>
0008 #include <vector>
0009 #include <cassert>
0010 #include <cmath>
0011
0012 namespace mkfit {
0013
0014   // For all axis types:
0015   //--------------------
0016   // R - real type
0017   // I - bin index type
0018   // M and N - number of bits for fine and normal binning
0019
0020   // axis_base
0021   //----------
0022   template <typename R, typename I, unsigned M, unsigned N>
0023   struct axis_base {
0024     static_assert(M >= N);
0025
0026     typedef R real_t;
0027     typedef I index_t;
0028
0029     static constexpr unsigned int c_M = M;
0030     static constexpr unsigned int c_N = N;
0031     static constexpr unsigned int c_M2N_shift = M - N;
0032
0033     const R m_R_min, m_R_max;
0034     const R m_M_fac, m_N_fac;
0035     const R m_M_lbhp, m_N_lbhp;  // last bin half-posts
0036     const I m_last_M_bin, m_last_N_bin;
0037
0038     struct I_pair {
0039       I begin;
0040       I end;
0041
0042       I_pair() : begin(0), end(0) {}
0043       I_pair(I b, I e) : begin(b), end(e) {}
0044     };
0045
0046     axis_base(R min, R max, unsigned int M_size, unsigned int N_size)
0047         : m_R_min(min),
0048           m_R_max(max),
0049           m_M_fac(M_size / (max - min)),
0050           m_N_fac(N_size / (max - min)),
0051           m_M_lbhp(max - 0.5 / m_M_fac),
0052           m_N_lbhp(max - 0.5 / m_N_fac),
0053           m_last_M_bin(M_size - 1),
0054           m_last_N_bin(N_size - 1) {
0055       // Requested number of bins must fit within the intended bit-field (declared by binnor, later).
0056       assert(M_size <= (1 << M));
0057       assert(N_size <= (1 << N));
0058       assert(max > min);
0059     }
0060
0061     I from_R_to_M_bin(R r) const { return std::floor((r - m_R_min) * m_M_fac); }
0062     I from_R_to_N_bin(R r) const { return std::floor((r - m_R_min) * m_N_fac); }
0063
0064     I from_R_to_M_bin_safe(R r) const { return r <= m_R_min ? 0 : (r >= m_M_lbhp ? m_last_M_bin : from_R_to_M_bin(r)); }
0065     I from_R_to_N_bin_safe(R r) const { return r <= m_R_min ? 0 : (r >= m_N_lbhp ? m_last_N_bin : from_R_to_N_bin(r)); }
0066
0067     I from_M_bin_to_N_bin(I m) const { return m >> c_M2N_shift; }
0068
0069     I_pair from_R_minmax_to_N_bins(R rmin, R rmax) const {
0070       return I_pair(from_R_to_N_bin_safe(rmin), from_R_to_N_bin_safe(rmax) + I{1});
0071     }
0072
0073     I_pair from_R_rdr_to_N_bins(R r, R dr) const { return from_R_minmax_to_N_bins(r - dr, r + dr); }
0074     I next_N_bin(I bin) const { return bin + 1; }
0075   };
0076
0077   // axis_pow2_base
0078   //---------------
0079   // Assumes the numbers of fine/normal bins are powers of 2 that are inferred directly from the number of bits.
0080   template <typename R, typename I, unsigned M, unsigned N>
0081   struct axis_pow2_base : public axis_base<R, I, M, N> {
0082     static constexpr unsigned int c_M_end = 1 << M;
0083     static constexpr unsigned int c_N_end = 1 << N;
0084
0085     axis_pow2_base(R min, R max) : axis_base<R, I, M, N>(min, max, c_M_end, c_N_end) {}
0086
0087     unsigned int size_of_M() const { return c_M_end; }
0088     unsigned int size_of_N() const { return c_N_end; }
0089   };
0090
0091   // axis_pow2_u1
0092   //-------------
0093   // Specialization of axis_pow2 for the "U(1)" case where the coordinate is periodic with period (Rmax - Rmin).
0094   // In the "safe" methods below, bit masking serves as the modulo operator for out-of-range bin numbers.
0095   template <typename R, typename I, unsigned int M, unsigned int N>
0096   struct axis_pow2_u1 : public axis_pow2_base<R, I, M, N> {
0097     static constexpr I c_M_mask = (1 << M) - 1;
0098     static constexpr I c_N_mask = (1 << N) - 1;
0099
0100     axis_pow2_u1(R min, R max) : axis_pow2_base<R, I, M, N>(min, max) {}
0101
0102     I from_R_to_M_bin_safe(R r) const { return this->from_R_to_M_bin(r) & c_M_mask; }
0103     I from_R_to_N_bin_safe(R r) const { return this->from_R_to_N_bin(r) & c_N_mask; }
0104
0105     typename axis_base<R, I, M, N>::I_pair from_R_minmax_to_N_bins(R rmin, R rmax) const {
0106       return typename axis_base<R, I, M, N>::I_pair(from_R_to_N_bin_safe(rmin),
0107                                                     (this->from_R_to_N_bin(rmax) + I{1}) & c_N_mask);
0108     }
0109
0110     typename axis_base<R, I, M, N>::I_pair from_R_rdr_to_N_bins(R r, R dr) const {
0111       return from_R_minmax_to_N_bins(r - dr, r + dr);
0112     }
0113     I next_N_bin(I bin) const { return (bin + 1) & c_N_mask; }
0114   };
0115
0116   // axis_pow2
0117   //----------
0118   template <typename R, typename I, unsigned int M, unsigned int N>
0119   struct axis_pow2 : public axis_pow2_base<R, I, M, N> {
0120     axis_pow2(R min, R max) : axis_pow2_base<R, I, M, N>(min, max) {}
0121   };
0122
0123   // axis
0124   //-----
0125   template <typename R, typename I, unsigned M = 8 * sizeof(I), unsigned N = 8 * sizeof(I)>
0126   struct axis : public axis_base<R, I, M, N> {
0127     const unsigned int m_num_M_bins, m_num_N_bins;
0128
0129     axis(R min, R max, unsigned n_bins)
0130         : axis_base<R, I, M, N>(min, max, n_bins << this->c_M2N_shift, n_bins),
0131           m_num_M_bins(n_bins << this->c_M2N_shift),
0132           m_num_N_bins(n_bins) {}
0133
0134     axis(R min, R max, R bin_width) {
0135       R extent = max - min;
0136       unsigned int n_bins = std::ceil(extent / bin_width);
0137       R extra = (n_bins * bin_width - extent) / 2;
0138
0139       axis(min - extra, max + extra, n_bins);
0140     }
0141
0142     unsigned int size_of_M() const { return m_num_M_bins; }
0143     unsigned int size_of_N() const { return m_num_N_bins; }
0144   };
0145
0146   // binnor
0147   //---------------
0148   // To build and populate bins, do the following:
0149   // 1. Construct two axis objects, giving numbers of bits and bins, and extents.
0150   // 2. Construct a binnor from the axis objects, and begin_registration on it.
0151   // 3. Loop register_entry (optional: _safe) over pairs of coordinates, to fill
0152   //    m_cons with the corresponding pairs of bin indices (B_pairs).
0153   // 4. Call finalize_registration, which sorts m_ranks based on m_cons, making
0154   //    m_ranks into an in-order map into m_cons (as well as the inputs that were
0155   //    used to fill it). Final counts for all the bins, as well as starting
0156   //    indices for the bins (within m_ranks), are computed and stored in packed
0157   //    form (i.e., bit-fields) in m_bins.
0158   // 5. m_cons can be kept to do preselection when determining search ranges.
0159   //    Note that additional precision on Axis2 is screened out during sorting.
0160
0161   //
0162   // C - bin content type, to hold "bin population coordinates" in packed form (bit-fields)
0163   // A1, A2 - axis types
0164   // NB_first, NB_count - number of bits for storage of { first, count } pairs
0165
0166   template <typename C,
0167             typename A1,
0168             typename A2,
0169             unsigned int NB_first = 8 * sizeof(C),
0170             unsigned int NB_count = 8 * sizeof(C)>
0171   struct binnor {
0172     static_assert(std::is_same<C, unsigned short>() || std::is_same<C, unsigned int>());
0173     static_assert(std::is_same<typename A1::real_t, typename A2::real_t>());
0174     static_assert(A1::c_M + A2::c_M <= 32);
0175
0176     static constexpr unsigned int c_A1_mask = (1 << A1::c_M) - 1;
0177     static constexpr unsigned int c_A2_Mout_mask = ~(((1 << A2::c_M2N_shift) - 1) << A1::c_M);
0178
0179     // Pair of axis bin indices packed into unsigned.
0180     struct B_pair {
0181       unsigned int packed_value;  // bin1 in A1::c_M lower bits, bin2 above
0182
0183       B_pair() : packed_value(0) {}
0184       B_pair(unsigned int pv) : packed_value(pv) {}
0185       B_pair(typename A1::index_t i1, typename A2::index_t i2) : packed_value(i2 << A1::c_M | i1) {}
0186
0187       typename A1::index_t bin1() const { return packed_value & c_A1_mask; }
0188       typename A2::index_t bin2() const { return packed_value >> A1::c_M; }
0189
0190       unsigned int mask_A2_M_bins() const { return packed_value & c_A2_Mout_mask; }
0191     };
0192
0193     // Bin content pair (bit-fields).
0194     struct C_pair {
0195       C first : NB_first;
0196       C count : NB_count;
0197
0198       C_pair() : first(0), count(0) {}
0199       C_pair(C f, C c) : first(f), count(c) {}
0200
0201       bool empty() const { return count == 0; }
0202       C begin() const { return first; }
0203       C end() const { return first + count; }
0204     };
0205
0206     const A1 &m_a1;
0207     const A2 &m_a2;
0208     std::vector<B_pair> m_cons;
0209     std::vector<unsigned int> m_cons_masked;
0210     std::vector<C_pair> m_bins;
0211     std::vector<C> m_ranks;
0212     const bool m_radix_sort;
0213     const bool m_keep_cons;
0214     const bool m_do_masked;
0215
0216     binnor(const A1 &a1, const A2 &a2, bool radix = true, bool keep_cons = false)
0217         : m_a1(a1),
0218           m_a2(a2),
0219           m_bins(m_a1.size_of_N() * m_a2.size_of_N()),
0220           m_radix_sort(radix),
0221           m_keep_cons(keep_cons),
0222           m_do_masked(radix || !keep_cons) {}
0223
0224     // Access -- bin indices
0225
0226     B_pair m_bin_to_n_bin(B_pair m_bin) {
0227       return {m_a1.from_M_bin_to_N_bin(m_bin.bin1()), m_a2.from_M_bin_to_N_bin(m_bin.bin2())};
0228     }
0229
0230     B_pair get_n_bin(typename A1::index_t n1, typename A2::index_t n2) const { return {n1, n2}; }
0231
0232     B_pair get_n_bin(typename A1::real_t r1, typename A2::real_t r2) const {
0233       return {m_a1.from_R_to_N_bin(r1), m_a2.from_R_to_N_bin(r2)};
0234     }
0235
0236     // Access -- content of bins
0237
0238     C_pair &ref_content(B_pair n_bin) { return m_bins[n_bin.bin2() * m_a1.size_of_N() + n_bin.bin1()]; }
0239
0240     C_pair get_content(B_pair n_bin) const { return m_bins[n_bin.bin2() * m_a1.size_of_N() + n_bin.bin1()]; }
0241
0242     C_pair get_content(typename A1::index_t n1, typename A2::index_t n2) const {
0243       return m_bins[n2 * m_a1.size_of_N() + n1];
0244     }
0245
0246     C_pair get_content(typename A1::real_t r1, typename A2::real_t r2) const {
0247       return get_content(m_a1.from_R_to_N_bin(r1), m_a2.from_R_to_N_bin(r2));
0248     }
0249
0250     // Filling
0251
0252     void reset_contents(bool shrink_vectors = true) {
0253       if (m_keep_cons) {
0254         m_cons.clear();
0255         if (shrink_vectors)
0256           m_cons.shrink_to_fit();
0257       }
0258       m_bins.assign(m_bins.size(), C_pair());
0259       m_ranks.clear();
0260       if (shrink_vectors)
0261         m_ranks.shrink_to_fit();
0262     }
0263
0264     void begin_registration(C n_items) {
0265       if (m_keep_cons)
0266         m_cons.reserve(n_items);
0267       if (m_do_masked)
0268         m_cons_masked.reserve(n_items);
0269     }
0270
0271     void register_entry(B_pair bp) {
0272       if (m_keep_cons)
0273         m_cons.push_back(bp);
0274       if (m_do_masked)
0275         m_cons_masked.push_back(bp.mask_A2_M_bins());
0276     }
0277
0278     void register_entry(typename A1::real_t r1, typename A2::real_t r2) {
0279       register_entry({m_a1.from_R_to_M_bin(r1), m_a2.from_R_to_M_bin(r2)});
0280     }
0281
0282     void register_entry_safe(typename A1::real_t r1, typename A2::real_t r2) {
0283       register_entry({m_a1.from_R_to_M_bin_safe(r1), m_a2.from_R_to_M_bin_safe(r2)});
0284     }
0285
0286     // Do M-binning outside, potentially using R_to_M_bin_safe().
0287     void register_m_bins(typename A1::index_t m1, typename A2::index_t m2) { register_entry({m1, m2}); }
0288
0289     void finalize_registration() {
0290       unsigned int n_entries = m_do_masked ? m_cons_masked.size() : m_cons.size();
0291       if (m_radix_sort && n_entries >= (m_keep_cons ? 128 : 256)) {
0292         radix_sort<unsigned int, C> radix;
0293         radix.sort(m_cons_masked, m_ranks);
0294       } else {
0295         m_ranks.resize(n_entries);
0296         std::iota(m_ranks.begin(), m_ranks.end(), 0);
0297         if (m_do_masked)
0298           std::sort(
0299               m_ranks.begin(), m_ranks.end(), [&](auto &a, auto &b) { return m_cons_masked[a] < m_cons_masked[b]; });
0300         else
0301           std::sort(m_ranks.begin(), m_ranks.end(), [&](auto &a, auto &b) {
0302             return m_cons[a].mask_A2_M_bins() < m_cons[b].mask_A2_M_bins();
0303           });
0304       }
0305
0306       for (C i = 0; i < m_ranks.size(); ++i) {
0307         C j = m_ranks[i];
0308         C_pair &c_bin = ref_content(m_bin_to_n_bin(m_keep_cons ? m_cons[j] : B_pair(m_cons_masked[j])));
0309         if (c_bin.count == 0)
0310           c_bin.first = i;
0311         ++c_bin.count;
0312
0313 #ifdef DEBUG
0314         B_pair n_pair = m_bin_to_n_bin(m_cons[j]);
0315         printf("i=%4u j=%4u  %u %u %u %u\n", i, j, n_pair.bin1, n_pair.bin2, c_bin.first, c_bin.count);
0316 #endif
0317       }
0318
0319       if (m_keep_cons)
0320         m_cons.shrink_to_fit();
0321       if (m_do_masked) {
0322         m_cons_masked.clear();
0323         m_cons_masked.shrink_to_fit();
0324       }
0325     }
0326   };
0327
0328 }  // namespace mkfit
0329
0330 #endif