Back to home page

Project CMSSW displayed by LXR

 
 

    


File indexing completed on 2021-02-14 14:31:50

0001 #ifndef TrackingTools_TrajectoryCleaning_src_OtherHashMaps
0002 #define TrackingTools_TrajectoryCleaning_src_OtherHashMaps
0003 
0004 #include <functional>
0005 #include <iostream>
0006 #include <iterator>
0007 #include <list>
0008 #include <map>
0009 #include <memory>
0010 #include <type_traits>
0011 #include <vector>
0012 
0013 namespace cmsutil {
0014 
0015   /*** The concept is the same of std::map<K, std::vector<V>>, but the implementation is much different (and the interface is not really compatible)
0016  *   It works only for K and V objects with trivial destructors (primitive types and bare pointers are fine)
0017  *   The main implementation difference w.r.t. unordered_map<K, std::vector<V>> is that this map is optimized to do very few memory allocations
0018  *   (in particular, clear does not redeme memory so if you just clear the map instead of deleting it you won't call malloc each time you fill it)
0019  *   The map doesn't rehash, so you should set a reasonable number of buckets (that you can change also when clearing the map)
0020  *   Note that too many buckets will make your map slow to clear (and possibly more prone to cache misses).
0021  *   Although it can take an allocator as template argument, it has been tested only with std::allocator.
0022  *   When used in the TrajectoryCleanerBySharedHits it works and it does improve the performance. Any other usecase was not checked at all.
0023  * */
0024   template <typename K,
0025             typename V,
0026             typename Hasher = std::hash<K>,
0027             typename Equals = std::equal_to<K>,
0028             typename Alloc = std::allocator<V> >
0029   class SimpleAllocHashMultiMap {
0030     // taking Alloc definition from http://www.codeproject.com/KB/cpp/allocator.aspx
0031     static_assert(std::conjunction<std::is_trivially_destructible<K>, std::is_trivially_destructible<V> >::value);
0032 
0033   public:
0034     typedef Hasher hasher;
0035     typedef SimpleAllocHashMultiMap<K, V, Hasher, Equals, Alloc> map_type;
0036     typedef typename std::conditional<std::is_pointer<V>::value, V, V const &>::type
0037         value_ref;  // otherwise we get problems with 'const' if V is a pointer
0038 
0039     SimpleAllocHashMultiMap(size_t buckets, size_t keyRowSize, size_t valueRowSize, size_t maxRows = 50);
0040 
0041     ~SimpleAllocHashMultiMap();
0042 
0043     void clear(size_t newBucketSize = 0) {
0044       if (newBucketSize != 0) {
0045         if (newBucketSize > bucketCapacity_) {
0046           ptrAlloc_.deallocate(buckets_, bucketCapacity_);
0047           bucketCapacity_ = newBucketSize;
0048           buckets_ = ptrAlloc_.allocate(bucketCapacity_);
0049         }
0050         bucketSize_ = newBucketSize;
0051       }
0052       memset(buckets_, 0, bucketSize_ * sizeof(KeyItem *));
0053       currentKeyRow_ = keyRows_.begin();
0054       nextKeyItem_ = keyRows_.front();
0055       keyEndMarker_ = nextKeyItem_ + keyRowSize_;
0056       currentValueRow_ = valueRows_.begin();
0057       nextValueItem_ = valueRows_.front();
0058       valueEndMarker_ = nextValueItem_ + valueRowSize_;
0059       if ((keyRows_.size() > maxRows_) || (valueRows_.size() > maxRows_))
0060         freeRows();
0061     }
0062     void freeRows();
0063 
0064     bool empty() const { return nextKeyItem_ == keyRows_.front(); }
0065 
0066     void insert(K const &key, value_ref value);
0067 
0068     struct ValueItem {
0069       ValueItem(ValueItem *next1, value_ref val) : next(next1), value(val) {}
0070       ValueItem *next;
0071       V value;
0072       typedef V value_type;
0073       const value_type &operator()() const { return value; }
0074     };
0075 
0076     struct KeyItem {
0077       KeyItem(KeyItem *next1, K const &key1, ValueItem *val1) : key(key1), next(next1), value(val1) {}
0078       K key;
0079       KeyItem *next;
0080       ValueItem *value;
0081       typedef KeyItem value_type;
0082       const value_type &operator()() const { return *this; }
0083     };
0084 
0085     template <typename Item>
0086     class item_iterator {
0087     public:
0088       typedef ::std::forward_iterator_tag iterator_category;
0089       typedef const typename Item::value_type value_type;
0090       typedef const value_type &reference;
0091       typedef const value_type *pointer;
0092       typedef ptrdiff_t difference_type;
0093       typedef item_iterator<Item> self_type;
0094 
0095       item_iterator() : it_(nullptr) {}
0096       item_iterator(const Item *it) : it_(it) {}
0097       const value_type &operator*() const { return (*it_)(); }
0098       const value_type *operator->() const { return &(*it_)(); }
0099       self_type &operator++() {
0100         if (it_ != nullptr)
0101           it_ = it_->next;
0102         return *this;
0103       }
0104       bool operator==(const self_type &other) const { return it_ == other.it_; }
0105       bool operator!=(const self_type &other) const { return it_ != other.it_; }
0106       bool good() const { return (it_ != nullptr); }
0107 
0108     private:
0109       const Item *it_;
0110     };
0111     typedef item_iterator<ValueItem> value_iterator;
0112 
0113     value_iterator values(K const &key);
0114 
0115   private:
0116     typedef typename Alloc::template rebind<KeyItem>::other KeyItemAllocator;
0117     typedef typename Alloc::template rebind<KeyItem *>::other KeyItemPtrAllocator;
0118     typedef typename Alloc::template rebind<ValueItem>::other ValueItemAllocator;
0119 
0120     // --- buckets ---
0121     size_t bucketSize_, bucketCapacity_;
0122     KeyItem **buckets_;
0123     // --- keys ---
0124     size_t keyRowSize_;
0125     std::list<KeyItem *> keyRows_;
0126     typename std::list<KeyItem *>::iterator
0127         currentKeyRow_;  // last row that is currently in use. nextItem_ and the last valid item are both on this row. it is never rows_.end()
0128     KeyItem *nextKeyItem_, *keyEndMarker_;
0129     // --- values ---
0130     size_t valueRowSize_;
0131     std::list<ValueItem *> valueRows_;
0132     typename std::list<ValueItem *>::iterator
0133         currentValueRow_;  // last row that is currently in use. nextItem_ and the last valid item are both on this row. it is never rows_.end()
0134     ValueItem *nextValueItem_, *valueEndMarker_;
0135     // --- other ---
0136     size_t maxRows_;
0137     Hasher hasher_;
0138     Equals eq_;
0139     KeyItemAllocator keyAlloc_;
0140     ValueItemAllocator valueAlloc_;
0141     KeyItemPtrAllocator ptrAlloc_;
0142 
0143     KeyItem *push_back_(K const &key, KeyItem *next);
0144     ValueItem *push_back_(value_ref value, ValueItem *next);
0145     KeyItem &find_or_insert_(K const &key);
0146 
0147   public:
0148     void dump() {
0149       std::cout << "Dumping HASH MULTIMAP" << std::endl;
0150       std::cout << "  Key items: "
0151                 << (std::distance(keyRows_.begin(), currentKeyRow_) * keyRowSize_ + (nextKeyItem_ - *currentKeyRow_))
0152                 << std::endl;
0153       std::cout << "  Value items: "
0154                 << (std::distance(valueRows_.begin(), currentValueRow_) * valueRowSize_ +
0155                     (nextValueItem_ - *currentValueRow_))
0156                 << std::endl;
0157       size_t row = 0;
0158       std::cout << "  Buckets (size " << bucketSize_ << ", capacity " << bucketCapacity_ << ")";
0159       for (KeyItem **p = buckets_; p != buckets_ + bucketSize_; ++p, ++row) {
0160         std::cout << "      [" << row << "] " << *p << std::endl;
0161       }
0162       row = 0;
0163       std::cout << "  Key Items " << std::endl;
0164       for (typename std::list<KeyItem *>::iterator it = keyRows_.begin(), last = keyRows_.end(); it != last;
0165            ++it, ++row) {
0166         KeyItem *lastI = *it + keyRowSize_;
0167         std::cout << "   Key Row " << row << " (of size  " << keyRowSize_ << ")" << std::endl;
0168         for (KeyItem *p = *it; p != lastI; ++p) {
0169           std::cout << "      @ " << p << " [" << p->key << ", @" << p->value << "], next = " << p->next << std::endl;
0170           if ((it == currentKeyRow_) && (p == nextKeyItem_ - 1)) {
0171             std::cout << "      ^^^ this was the last valid item." << std::endl;
0172             last = 0;
0173             break;
0174           }
0175         }
0176         if (lastI == 0)
0177           break;
0178       }
0179       row = 0;
0180       std::cout << "  Value Items " << std::endl;
0181       for (typename std::list<ValueItem *>::iterator it = valueRows_.begin(), last = valueRows_.end(); it != last;
0182            ++it, ++row) {
0183         ValueItem *lastI = *it + valueRowSize_;
0184         std::cout << "   Value Row " << row << " (of size  " << valueRowSize_ << ")" << std::endl;
0185         for (ValueItem *p = *it; p != lastI; ++p) {
0186           std::cout << "      @ " << p << " [" << p->value << "], next = " << p->next << std::endl;
0187           if ((it == currentValueRow_) && (p == nextValueItem_ - 1)) {
0188             std::cout << "      ^^^ this was the last valid item." << std::endl;
0189             last = 0;
0190             break;
0191           }
0192         }
0193         if (lastI == 0)
0194           break;
0195       }
0196       std::cout << "  End of dump" << std::endl;
0197     }
0198   };
0199 
0200   template <typename K, typename V, typename Hasher, typename Equals, typename Alloc>
0201   SimpleAllocHashMultiMap<K, V, Hasher, Equals, Alloc>::SimpleAllocHashMultiMap(size_t buckets,
0202                                                                                 size_t keyRowSize,
0203                                                                                 size_t valueRowSize,
0204                                                                                 size_t maxRows)
0205       : bucketSize_(buckets),
0206         bucketCapacity_(bucketSize_),
0207         keyRowSize_(keyRowSize),
0208         valueRowSize_(valueRowSize),
0209         maxRows_(maxRows) {
0210     buckets_ = ptrAlloc_.allocate(bucketCapacity_);
0211     keyRows_.push_back(keyAlloc_.allocate(keyRowSize_));
0212     valueRows_.push_back(valueAlloc_.allocate(valueRowSize_));
0213     clear();
0214   }
0215   template <typename K, typename V, typename Hasher, typename Equals, typename Alloc>
0216   SimpleAllocHashMultiMap<K, V, Hasher, Equals, Alloc>::~SimpleAllocHashMultiMap() {
0217     for (typename std::list<KeyItem *>::iterator it = keyRows_.begin(), last = keyRows_.end(); it != last; ++it) {
0218       keyAlloc_.deallocate(*it, keyRowSize_);
0219     }
0220     for (typename std::list<ValueItem *>::iterator it = valueRows_.begin(), last = valueRows_.end(); it != last; ++it) {
0221       valueAlloc_.deallocate(*it, valueRowSize_);
0222     }
0223     ptrAlloc_.deallocate(buckets_, bucketCapacity_);
0224   }
0225 
0226   template <typename K, typename V, typename Hasher, typename Equals, typename Alloc>
0227   void SimpleAllocHashMultiMap<K, V, Hasher, Equals, Alloc>::freeRows() {
0228     if (keyRows_.size() > maxRows_) {
0229       //std::cerr << "Freeing key rows, current size is " << keyRows_.size() << std::endl;
0230       typename std::list<KeyItem *>::iterator it = keyRows_.begin(), last = keyRows_.end();
0231       for (std::advance(it, maxRows_); it != last; ++it) {
0232         keyAlloc_.deallocate(*it, keyRowSize_);
0233       }
0234       keyRows_.resize(maxRows_);
0235     }
0236     if (valueRows_.size() > maxRows_) {
0237       //std::cerr << "Freeing value rows, current size is " << valueRows_.size() << std::endl;
0238       typename std::list<ValueItem *>::iterator it = valueRows_.begin(), last = valueRows_.end();
0239       for (std::advance(it, maxRows_); it != last; ++it) {
0240         valueAlloc_.deallocate(*it, valueRowSize_);
0241       }
0242       valueRows_.resize(maxRows_);
0243     }
0244   }
0245 
0246   template <typename K, typename V, typename Hasher, typename Equals, typename Alloc>
0247   typename SimpleAllocHashMultiMap<K, V, Hasher, Equals, Alloc>::KeyItem *
0248   SimpleAllocHashMultiMap<K, V, Hasher, Equals, Alloc>::push_back_(K const &key, KeyItem *next) {
0249     if (nextKeyItem_ == keyEndMarker_) {
0250       ++currentKeyRow_;
0251       if (currentKeyRow_ == keyRows_.end()) {
0252         keyRows_.push_back(keyAlloc_.allocate(keyRowSize_));
0253         currentKeyRow_ = keyRows_.end();
0254         --currentKeyRow_;  // end - 1 doesn't work!
0255       }
0256       nextKeyItem_ = *currentKeyRow_;
0257       keyEndMarker_ = nextKeyItem_ + keyRowSize_;
0258     }
0259     keyAlloc_.construct(nextKeyItem_, KeyItem(next, key, nullptr));
0260     nextKeyItem_++;
0261     return (nextKeyItem_ - 1);
0262   }
0263   template <typename K, typename V, typename Hasher, typename Equals, typename Alloc>
0264   typename SimpleAllocHashMultiMap<K, V, Hasher, Equals, Alloc>::ValueItem *
0265   SimpleAllocHashMultiMap<K, V, Hasher, Equals, Alloc>::push_back_(value_ref value, ValueItem *next) {
0266     if (nextValueItem_ == valueEndMarker_) {
0267       ++currentValueRow_;
0268       if (currentValueRow_ == valueRows_.end()) {
0269         valueRows_.push_back(valueAlloc_.allocate(valueRowSize_));
0270         currentValueRow_ = valueRows_.end();
0271         --currentValueRow_;  // end - 1 doesn't work!
0272       }
0273       nextValueItem_ = *currentValueRow_;
0274       valueEndMarker_ = nextValueItem_ + valueRowSize_;
0275     }
0276     valueAlloc_.construct(nextValueItem_, ValueItem(next, value));
0277     nextValueItem_++;
0278     return (nextValueItem_ - 1);
0279   }
0280 
0281   template <typename K, typename V, typename Hasher, typename Equals, typename Alloc>
0282   typename SimpleAllocHashMultiMap<K, V, Hasher, Equals, Alloc>::KeyItem &
0283   SimpleAllocHashMultiMap<K, V, Hasher, Equals, Alloc>::find_or_insert_(K const &key) {
0284     //std::cout << "Find or insert for key " << key << std::endl;
0285     size_t hash = hasher_(key);
0286     KeyItem *&buck = buckets_[hash % bucketSize_];
0287     KeyItem *curr = buck;
0288     while (curr) {
0289       if (eq_(curr->key, key)) {
0290         //std::cout << "  Key " << key << " was found." << std::endl;
0291         return *curr;
0292       }
0293       curr = curr->next;
0294     }
0295     buck = push_back_(key, buck);
0296     return *buck;
0297   }
0298 
0299   template <typename K, typename V, typename Hasher, typename Equals, typename Alloc>
0300   void SimpleAllocHashMultiMap<K, V, Hasher, Equals, Alloc>::insert(K const &key, value_ref value) {
0301     //std::cout << "Pushing back value " << value << " for key " << key << std::endl;
0302     KeyItem &k = find_or_insert_(key);
0303     //std::cout << "Key " << (k.value ? "exists" : " is new") << std::endl;
0304     k.value = push_back_(value, k.value);
0305   }
0306 
0307   template <typename K, typename V, typename Hasher, typename Equals, typename Alloc>
0308   typename SimpleAllocHashMultiMap<K, V, Hasher, Equals, Alloc>::value_iterator
0309   SimpleAllocHashMultiMap<K, V, Hasher, Equals, Alloc>::values(K const &key) {
0310     //std::cout << "Gettinv values for key " << key << std::endl;
0311     size_t hash = hasher_(key);
0312     for (KeyItem *curr = buckets_[hash % bucketSize_]; curr; curr = curr->next) {
0313       if (eq_(curr->key, key))
0314         return value_iterator(curr->value);
0315     }
0316     return value_iterator();
0317   }
0318 
0319   /*** Very very simple map implementation
0320  *   It's just a std::vector<pair<key,value>>, and the operator[] does a linear search to find the key (it's O(N) time, both if the key exists and if it doesn't)
0321  *   Anyway, if your map is very small and if you clear it often, it performs better than more complex variants
0322  *   Semantics is as std::map, except that only very few methods are implemented.
0323  * */
0324   template <typename K, typename V>
0325   class UnsortedDumbVectorMap {
0326   public:
0327     typedef std::pair<K, V> value_type;
0328     typedef typename std::vector<value_type>::const_iterator const_iterator;
0329     typedef typename std::vector<value_type>::iterator iterator;  // but please don't mutate the keys
0330 
0331     void clear() { data_.clear(); }
0332     bool empty() const { return data_.empty(); }
0333     const_iterator begin() const { return data_.begin(); }
0334     const_iterator end() const { return data_.end(); }
0335     iterator begin() { return data_.begin(); }
0336     iterator end() { return data_.end(); }
0337 
0338     V &operator[](const K &k) {
0339       for (typename std::vector<value_type>::iterator it = data_.begin(), ed = data_.end(); it != ed; ++it) {
0340         if (it->first == k)
0341           return it->second;
0342       }
0343       data_.push_back(value_type(k, V()));
0344       return data_.back().second;
0345     }
0346 
0347     UnsortedDumbVectorMap() {}
0348 
0349   private:
0350     std::vector<value_type> data_;
0351   };
0352 
0353 }  // namespace cmsutil
0354 
0355 #endif