Back to home page

Project CMSSW displayed by LXR

 
 

    


File indexing completed on 2023-03-17 10:45:25

0001 #ifndef COMMONTOOLS_RECOALGOS_FKDTREE_H
0002 #define COMMONTOOLS_RECOALGOS_FKDTREE_H
0003 
0004 #include <vector>
0005 #include <array>
0006 #include <algorithm>
0007 #include <cmath>
0008 #include <utility>
0009 #include "FKDPoint.h"
0010 #include "FQueue.h"
0011 
0012 // Author: Felice Pantaleo
0013 // email: felice.pantaleo@cern.ch
0014 // date: 08/05/2017
0015 // Description: This class provides a k-d tree implementation targeting modern architectures.
0016 // Building each level of the FKDTree can be done in parallel by different threads.
0017 // It produces a compact array of nodes in memory thanks to the different space partitioning method used.
0018 
0019 // Fast version of the integer logarithm
0020 namespace {
0021   const std::array<unsigned int, 32> MultiplyDeBruijnBitPosition{{0,  9,  1,  10, 13, 21, 2,  29, 11, 14, 16,
0022                                                                   18, 22, 25, 3,  30, 8,  12, 20, 28, 15, 17,
0023                                                                   24, 7,  19, 27, 23, 6,  26, 5,  4,  31}};
0024   unsigned int ilog2(unsigned int v) {
0025     v |= v >> 1;  // first round down to one less than a power of 2
0026     v |= v >> 2;
0027     v |= v >> 4;
0028     v |= v >> 8;
0029     v |= v >> 16;
0030     return MultiplyDeBruijnBitPosition[(unsigned int)(v * 0x07C4ACDDU) >> 27];
0031   }
0032 }  // namespace
0033 
0034 template <class TYPE, unsigned int numberOfDimensions>
0035 class FKDTree {
0036 public:
0037   FKDTree() {
0038     theNumberOfPoints = 0;
0039     theDepth = 0;
0040   }
0041 
0042   bool empty() { return theNumberOfPoints == 0; }
0043 
0044   // One can search for all the points which are contained in a k-dimensional box.
0045   // Searching is done by providing the two k-dimensional points in the minimum and maximum corners.
0046   // The vector that will contain the indices of the points that lay inside the box is also needed.
0047   // Indices are pushed into foundPoints, which is not checked for emptiness at the beginning,
0048   // nor memory is reserved for it.
0049   // Searching is done using a Breadth-first search, level after level.
0050   void search(const FKDPoint<TYPE, numberOfDimensions>& minPoint,
0051               const FKDPoint<TYPE, numberOfDimensions>& maxPoint,
0052               std::vector<unsigned int>& foundPoints) {
0053     //going down the FKDTree, one needs track which indices have to be visited in the following level.
0054     //a custom queue is created, since std::queue is based on lists which are sometimes not performing
0055     // well on computing accelerators
0056     // The initial size of the queue has to be a power of two for allowing fast modulo  % operation.
0057     FQueue<unsigned int> indicesToVisit(16);
0058 
0059     //The root element is pushed first
0060     indicesToVisit.push_back(0);
0061     unsigned int index;
0062     bool intersection;
0063     unsigned int dimension;
0064     unsigned int numberOfindicesToVisitThisDepth;
0065     unsigned int numberOfSonsToVisitNext;
0066     unsigned int firstSonToVisitNext;
0067 
0068     //The loop over levels of the FKDTree starts here
0069     for (unsigned int depth = 0; depth < theDepth + 1; ++depth) {
0070       // At each level, comparisons are performed for a different dimension in round robin.
0071       dimension = depth % numberOfDimensions;
0072       numberOfindicesToVisitThisDepth = indicesToVisit.size();
0073       // Loop over the nodes to be visit at this level
0074       for (unsigned int visitedindicesThisDepth = 0; visitedindicesThisDepth < numberOfindicesToVisitThisDepth;
0075            visitedindicesThisDepth++) {
0076         index = indicesToVisit[visitedindicesThisDepth];
0077         // check if the element's dimension lays between the two box borders
0078         intersection = intersects(index, minPoint, maxPoint, dimension);
0079         firstSonToVisitNext = leftSonIndex(index);
0080 
0081         if (intersection) {
0082           // Check if the element is contained in the box and push it to the result
0083           if (is_in_the_box(index, minPoint, maxPoint)) {
0084             foundPoints.emplace_back(theIds[index]);
0085           }
0086           //if the element is between the box borders, both the its sons have to be visited
0087           numberOfSonsToVisitNext =
0088               (firstSonToVisitNext < theNumberOfPoints) + ((firstSonToVisitNext + 1) < theNumberOfPoints);
0089         } else {
0090           // depending on the position of the element wrt the box, one son will be visited (if it exists)
0091           firstSonToVisitNext += (theDimensions[dimension][index] < minPoint[dimension]);
0092 
0093           numberOfSonsToVisitNext =
0094               std::min((firstSonToVisitNext < theNumberOfPoints) + ((firstSonToVisitNext + 1) < theNumberOfPoints), 1);
0095         }
0096 
0097         // the indices of the sons to be visited in the next iteration are pushed in the queue
0098         for (unsigned int whichSon = 0; whichSon < numberOfSonsToVisitNext; ++whichSon) {
0099           indicesToVisit.push_back(firstSonToVisitNext + whichSon);
0100         }
0101       }
0102       // a new element is popped from the queue
0103       indicesToVisit.pop_front(numberOfindicesToVisitThisDepth);
0104     }
0105   }
0106 
0107   // A vector of K-dimensional points needs to be passed in order to build the kdtree.
0108   // The order of the elements in the vector will be modified.
0109   void build(std::vector<FKDPoint<TYPE, numberOfDimensions> >& points) {
0110     // initialization of the data members
0111     theNumberOfPoints = points.size();
0112     theDepth = ilog2(theNumberOfPoints);
0113     theIntervalLength.resize(theNumberOfPoints, 0);
0114     theIntervalMin.resize(theNumberOfPoints, 0);
0115     for (unsigned int i = 0; i < numberOfDimensions; ++i)
0116       theDimensions[i].resize(theNumberOfPoints);
0117     theIds.resize(theNumberOfPoints);
0118 
0119     // building is done by reordering elements in a partition starting at theIntervalMin
0120     // for a number of elements theIntervalLength
0121     unsigned int dimension;
0122     theIntervalMin[0] = 0;
0123     theIntervalLength[0] = theNumberOfPoints;
0124 
0125     // building for each level starts here
0126     for (unsigned int depth = 0; depth < theDepth; ++depth) {
0127       // A heapified left-balanced tree can be represented in memory as an array.
0128       // Each level contains a power of two number of elements and starts from element 2^depth -1
0129       dimension = depth % numberOfDimensions;
0130       unsigned int firstIndexInDepth = (1 << depth) - 1;
0131       unsigned int maxDepth = (1 << depth);
0132       for (unsigned int indexInDepth = 0; indexInDepth < maxDepth; ++indexInDepth) {
0133         unsigned int indexInArray = firstIndexInDepth + indexInDepth;
0134         unsigned int leftSonIndexInArray = 2 * indexInArray + 1;
0135         unsigned int rightSonIndexInArray = leftSonIndexInArray + 1;
0136 
0137         // partitioning is done by choosing the pivotal element that keeps the tree heapified
0138         // and left-balanced
0139         unsigned int whichElementInInterval = partition_complete_kdtree(theIntervalLength[indexInArray]);
0140         // the elements have been partitioned in two unsorted subspaces (one containing the elements
0141         // smaller than the pivot, the other containing those greater than the pivot)
0142         std::nth_element(points.begin() + theIntervalMin[indexInArray],
0143                          points.begin() + theIntervalMin[indexInArray] + whichElementInInterval,
0144                          points.begin() + theIntervalMin[indexInArray] + theIntervalLength[indexInArray],
0145                          [dimension](const FKDPoint<TYPE, numberOfDimensions>& a,
0146                                      const FKDPoint<TYPE, numberOfDimensions>& b) -> bool {
0147                            if (a[dimension] == b[dimension])
0148                              return a.getId() < b.getId();
0149                            else
0150                              return a[dimension] < b[dimension];
0151                          });
0152         // the element is placed in its final position in the internal array representation
0153         // of the tree
0154         add_at_position(points[theIntervalMin[indexInArray] + whichElementInInterval], indexInArray);
0155         if (leftSonIndexInArray < theNumberOfPoints) {
0156           theIntervalMin[leftSonIndexInArray] = theIntervalMin[indexInArray];
0157           theIntervalLength[leftSonIndexInArray] = whichElementInInterval;
0158         }
0159 
0160         if (rightSonIndexInArray < theNumberOfPoints) {
0161           theIntervalMin[rightSonIndexInArray] = theIntervalMin[indexInArray] + whichElementInInterval + 1;
0162           theIntervalLength[rightSonIndexInArray] = (theIntervalLength[indexInArray] - 1 - whichElementInInterval);
0163         }
0164       }
0165     }
0166     // the last level of the tree may not be complete and needs special treatment
0167     dimension = theDepth % numberOfDimensions;
0168     unsigned int firstIndexInDepth = (1 << theDepth) - 1;
0169     for (unsigned int indexInArray = firstIndexInDepth; indexInArray < theNumberOfPoints; ++indexInArray) {
0170       add_at_position(points[theIntervalMin[indexInArray]], indexInArray);
0171     }
0172   }
0173   // returns the number of points in the FKDTree
0174   std::size_t size() const { return theNumberOfPoints; }
0175 
0176 private:
0177   // returns the index of the element which makes the FKDtree a left-complete heap
0178   // e.g.: if we have 6 elements, the tree will be shaped like
0179   //                 O
0180   //                / '\'
0181   //               O    O
0182   //              /'\' /
0183   //             O   OO
0184   //
0185   // This will return for a length of 6 the 4th element, which will partition the tree so that
0186   // 3 elements are on its left and 2 elements are on its right
0187   unsigned int partition_complete_kdtree(unsigned int length) {
0188     if (length == 1)
0189       return 0;
0190     unsigned int index = 1 << (ilog2(length));
0191 
0192     if ((index / 2) - 1 <= length - index)
0193       return index - 1;
0194     else
0195       return length - index / 2;
0196   }
0197 
0198   // returns the index of an element left son in the array representation
0199   unsigned int leftSonIndex(unsigned int index) const { return 2 * index + 1; }
0200   // returns the index of an element right son in the array representation
0201   unsigned int rightSonIndex(unsigned int index) const { return 2 * index + 2; }
0202 
0203   //check if one element's dimension is between minPoint's and maxPoint's dimension
0204   bool intersects(unsigned int index,
0205                   const FKDPoint<TYPE, numberOfDimensions>& minPoint,
0206                   const FKDPoint<TYPE, numberOfDimensions>& maxPoint,
0207                   int dimension) const {
0208     return (theDimensions[dimension][index] <= maxPoint[dimension] &&
0209             theDimensions[dimension][index] >= minPoint[dimension]);
0210   }
0211 
0212   // check if an element is completely in the box
0213   bool is_in_the_box(unsigned int index,
0214                      const FKDPoint<TYPE, numberOfDimensions>& minPoint,
0215                      const FKDPoint<TYPE, numberOfDimensions>& maxPoint) const {
0216     for (unsigned int i = 0; i < numberOfDimensions; ++i) {
0217       if ((theDimensions[i][index] <= maxPoint[i] && theDimensions[i][index] >= minPoint[i]) == false)
0218         return false;
0219     }
0220     return true;
0221   }
0222 
0223   // places an element at the specified position in the internal data structure
0224   void add_at_position(const FKDPoint<TYPE, numberOfDimensions>& point, const unsigned int position) {
0225     for (unsigned int i = 0; i < numberOfDimensions; ++i)
0226       theDimensions[i][position] = point[i];
0227     theIds[position] = point.getId();
0228   }
0229 
0230   void add_at_position(FKDPoint<TYPE, numberOfDimensions>&& point, const unsigned int position) {
0231     for (unsigned int i = 0; i < numberOfDimensions; ++i)
0232       theDimensions[i][position] = point[i];
0233     theIds[position] = point.getId();
0234   }
0235 
0236   unsigned int theNumberOfPoints;
0237   unsigned int theDepth;
0238 
0239   // a SoA containing all the dimensions for each point
0240   std::array<std::vector<TYPE>, numberOfDimensions> theDimensions;
0241   std::vector<unsigned int> theIntervalLength;
0242   std::vector<unsigned int> theIntervalMin;
0243   std::vector<unsigned int> theIds;
0244 };
0245 
0246 #endif