Back to home page

Project CMSSW displayed by LXR

 
 

    


File indexing completed on 2024-04-06 12:31:39

0001 #ifndef TrackingTools_TrajectoryCleaning_src_OtherHashMaps
0002 #define TrackingTools_TrajectoryCleaning_src_OtherHashMaps
0003 
0004 #include <functional>
0005 #include <iostream>
0006 #include <iterator>
0007 #include <list>
0008 #include <map>
0009 #include <memory>
0010 #include <type_traits>
0011 #include <vector>
0012 
0013 namespace cmsutil {
0014 
0015   /*** The concept is the same of std::map<K, std::vector<V>>, but the implementation is much different (and the interface is not really compatible)
0016  *   It works only for K and V objects with trivial destructors (primitive types and bare pointers are fine)
0017  *   The main implementation difference w.r.t. unordered_map<K, std::vector<V>> is that this map is optimized to do very few memory allocations
0018  *   (in particular, clear does not redeme memory so if you just clear the map instead of deleting it you won't call malloc each time you fill it)
0019  *   The map doesn't rehash, so you should set a reasonable number of buckets (that you can change also when clearing the map)
0020  *   Note that too many buckets will make your map slow to clear (and possibly more prone to cache misses).
0021  *   Although it can take an allocator as template argument, it has been tested only with std::allocator.
0022  *   When used in the TrajectoryCleanerBySharedHits it works and it does improve the performance. Any other usecase was not checked at all.
0023  * */
0024   template <typename K,
0025             typename V,
0026             typename Hasher = std::hash<K>,
0027             typename Equals = std::equal_to<K>,
0028             typename Alloc = std::allocator<V> >
0029   class SimpleAllocHashMultiMap {
0030     // taking Alloc definition from http://www.codeproject.com/KB/cpp/allocator.aspx
0031     static_assert(std::conjunction<std::is_trivially_destructible<K>, std::is_trivially_destructible<V> >::value);
0032 
0033   public:
0034     typedef Hasher hasher;
0035     typedef SimpleAllocHashMultiMap<K, V, Hasher, Equals, Alloc> map_type;
0036     typedef typename std::conditional<std::is_pointer<V>::value, V, V const &>::type
0037         value_ref;  // otherwise we get problems with 'const' if V is a pointer
0038 
0039     SimpleAllocHashMultiMap(size_t buckets, size_t keyRowSize, size_t valueRowSize, size_t maxRows = 50);
0040 
0041     ~SimpleAllocHashMultiMap();
0042 
0043     void clear(size_t newBucketSize = 0) {
0044       if (newBucketSize != 0) {
0045         if (newBucketSize > bucketCapacity_) {
0046           ptrAlloc_.deallocate(buckets_, bucketCapacity_);
0047           bucketCapacity_ = newBucketSize;
0048           buckets_ = ptrAlloc_.allocate(bucketCapacity_);
0049         }
0050         bucketSize_ = newBucketSize;
0051       }
0052       memset(buckets_, 0, bucketSize_ * sizeof(KeyItem *));
0053       currentKeyRow_ = keyRows_.begin();
0054       nextKeyItem_ = keyRows_.front();
0055       keyEndMarker_ = nextKeyItem_ + keyRowSize_;
0056       currentValueRow_ = valueRows_.begin();
0057       nextValueItem_ = valueRows_.front();
0058       valueEndMarker_ = nextValueItem_ + valueRowSize_;
0059       if ((keyRows_.size() > maxRows_) || (valueRows_.size() > maxRows_))
0060         freeRows();
0061     }
0062     void freeRows();
0063 
0064     bool empty() const { return nextKeyItem_ == keyRows_.front(); }
0065 
0066     void insert(K const &key, value_ref value);
0067 
0068     struct ValueItem {
0069       ValueItem(ValueItem *next1, value_ref val) : next(next1), value(val) {}
0070       ValueItem *next;
0071       V value;
0072       typedef V value_type;
0073       const value_type &operator()() const { return value; }
0074     };
0075 
0076     struct KeyItem {
0077       KeyItem(KeyItem *next1, K const &key1, ValueItem *val1) : key(key1), next(next1), value(val1) {}
0078       K key;
0079       KeyItem *next;
0080       ValueItem *value;
0081       typedef KeyItem value_type;
0082       const value_type &operator()() const { return *this; }
0083     };
0084 
0085     template <typename Item>
0086     class item_iterator {
0087     public:
0088       typedef ::std::forward_iterator_tag iterator_category;
0089       typedef const typename Item::value_type value_type;
0090       typedef const value_type &reference;
0091       typedef const value_type *pointer;
0092       typedef ptrdiff_t difference_type;
0093       typedef item_iterator<Item> self_type;
0094 
0095       item_iterator() : it_(nullptr) {}
0096       item_iterator(const Item *it) : it_(it) {}
0097       const value_type &operator*() const { return (*it_)(); }
0098       const value_type *operator->() const { return &(*it_)(); }
0099       self_type &operator++() {
0100         if (it_ != nullptr)
0101           it_ = it_->next;
0102         return *this;
0103       }
0104       bool operator==(const self_type &other) const { return it_ == other.it_; }
0105       bool operator!=(const self_type &other) const { return it_ != other.it_; }
0106       bool good() const { return (it_ != nullptr); }
0107 
0108     private:
0109       const Item *it_;
0110     };
0111     typedef item_iterator<ValueItem> value_iterator;
0112 
0113     value_iterator values(K const &key);
0114 
0115   private:
0116     using AllocTraits = std::allocator_traits<Alloc>;
0117     using KeyItemAllocator = typename AllocTraits::template rebind_alloc<KeyItem>;
0118     using KeyItemPtrAllocator = typename AllocTraits::template rebind_alloc<KeyItem *>;
0119     using ValueItemAllocator = typename AllocTraits::template rebind_alloc<ValueItem>;
0120 
0121     // --- buckets ---
0122     size_t bucketSize_, bucketCapacity_;
0123     KeyItem **buckets_;
0124     // --- keys ---
0125     size_t keyRowSize_;
0126     std::list<KeyItem *> keyRows_;
0127     typename std::list<KeyItem *>::iterator
0128         currentKeyRow_;  // last row that is currently in use. nextItem_ and the last valid item are both on this row. it is never rows_.end()
0129     KeyItem *nextKeyItem_, *keyEndMarker_;
0130     // --- values ---
0131     size_t valueRowSize_;
0132     std::list<ValueItem *> valueRows_;
0133     typename std::list<ValueItem *>::iterator
0134         currentValueRow_;  // last row that is currently in use. nextItem_ and the last valid item are both on this row. it is never rows_.end()
0135     ValueItem *nextValueItem_, *valueEndMarker_;
0136     // --- other ---
0137     size_t maxRows_;
0138     Hasher hasher_;
0139     Equals eq_;
0140     KeyItemAllocator keyAlloc_;
0141     ValueItemAllocator valueAlloc_;
0142     KeyItemPtrAllocator ptrAlloc_;
0143 
0144     KeyItem *push_back_(K const &key, KeyItem *next);
0145     ValueItem *push_back_(value_ref value, ValueItem *next);
0146     KeyItem &find_or_insert_(K const &key);
0147 
0148   public:
0149     void dump() {
0150       std::cout << "Dumping HASH MULTIMAP" << std::endl;
0151       std::cout << "  Key items: "
0152                 << (std::distance(keyRows_.begin(), currentKeyRow_) * keyRowSize_ + (nextKeyItem_ - *currentKeyRow_))
0153                 << std::endl;
0154       std::cout << "  Value items: "
0155                 << (std::distance(valueRows_.begin(), currentValueRow_) * valueRowSize_ +
0156                     (nextValueItem_ - *currentValueRow_))
0157                 << std::endl;
0158       size_t row = 0;
0159       std::cout << "  Buckets (size " << bucketSize_ << ", capacity " << bucketCapacity_ << ")";
0160       for (KeyItem **p = buckets_; p != buckets_ + bucketSize_; ++p, ++row) {
0161         std::cout << "      [" << row << "] " << *p << std::endl;
0162       }
0163       row = 0;
0164       std::cout << "  Key Items " << std::endl;
0165       for (typename std::list<KeyItem *>::iterator it = keyRows_.begin(), last = keyRows_.end(); it != last;
0166            ++it, ++row) {
0167         KeyItem *lastI = *it + keyRowSize_;
0168         std::cout << "   Key Row " << row << " (of size  " << keyRowSize_ << ")" << std::endl;
0169         for (KeyItem *p = *it; p != lastI; ++p) {
0170           std::cout << "      @ " << p << " [" << p->key << ", @" << p->value << "], next = " << p->next << std::endl;
0171           if ((it == currentKeyRow_) && (p == nextKeyItem_ - 1)) {
0172             std::cout << "      ^^^ this was the last valid item." << std::endl;
0173             last = 0;
0174             break;
0175           }
0176         }
0177         if (lastI == 0)
0178           break;
0179       }
0180       row = 0;
0181       std::cout << "  Value Items " << std::endl;
0182       for (typename std::list<ValueItem *>::iterator it = valueRows_.begin(), last = valueRows_.end(); it != last;
0183            ++it, ++row) {
0184         ValueItem *lastI = *it + valueRowSize_;
0185         std::cout << "   Value Row " << row << " (of size  " << valueRowSize_ << ")" << std::endl;
0186         for (ValueItem *p = *it; p != lastI; ++p) {
0187           std::cout << "      @ " << p << " [" << p->value << "], next = " << p->next << std::endl;
0188           if ((it == currentValueRow_) && (p == nextValueItem_ - 1)) {
0189             std::cout << "      ^^^ this was the last valid item." << std::endl;
0190             last = 0;
0191             break;
0192           }
0193         }
0194         if (lastI == 0)
0195           break;
0196       }
0197       std::cout << "  End of dump" << std::endl;
0198     }
0199   };
0200 
0201   template <typename K, typename V, typename Hasher, typename Equals, typename Alloc>
0202   SimpleAllocHashMultiMap<K, V, Hasher, Equals, Alloc>::SimpleAllocHashMultiMap(size_t buckets,
0203                                                                                 size_t keyRowSize,
0204                                                                                 size_t valueRowSize,
0205                                                                                 size_t maxRows)
0206       : bucketSize_(buckets),
0207         bucketCapacity_(bucketSize_),
0208         keyRowSize_(keyRowSize),
0209         valueRowSize_(valueRowSize),
0210         maxRows_(maxRows) {
0211     buckets_ = ptrAlloc_.allocate(bucketCapacity_);
0212     keyRows_.push_back(keyAlloc_.allocate(keyRowSize_));
0213     valueRows_.push_back(valueAlloc_.allocate(valueRowSize_));
0214     clear();
0215   }
0216   template <typename K, typename V, typename Hasher, typename Equals, typename Alloc>
0217   SimpleAllocHashMultiMap<K, V, Hasher, Equals, Alloc>::~SimpleAllocHashMultiMap() {
0218     for (typename std::list<KeyItem *>::iterator it = keyRows_.begin(), last = keyRows_.end(); it != last; ++it) {
0219       keyAlloc_.deallocate(*it, keyRowSize_);
0220     }
0221     for (typename std::list<ValueItem *>::iterator it = valueRows_.begin(), last = valueRows_.end(); it != last; ++it) {
0222       valueAlloc_.deallocate(*it, valueRowSize_);
0223     }
0224     ptrAlloc_.deallocate(buckets_, bucketCapacity_);
0225   }
0226 
0227   template <typename K, typename V, typename Hasher, typename Equals, typename Alloc>
0228   void SimpleAllocHashMultiMap<K, V, Hasher, Equals, Alloc>::freeRows() {
0229     if (keyRows_.size() > maxRows_) {
0230       //std::cerr << "Freeing key rows, current size is " << keyRows_.size() << std::endl;
0231       typename std::list<KeyItem *>::iterator it = keyRows_.begin(), last = keyRows_.end();
0232       for (std::advance(it, maxRows_); it != last; ++it) {
0233         keyAlloc_.deallocate(*it, keyRowSize_);
0234       }
0235       keyRows_.resize(maxRows_);
0236     }
0237     if (valueRows_.size() > maxRows_) {
0238       //std::cerr << "Freeing value rows, current size is " << valueRows_.size() << std::endl;
0239       typename std::list<ValueItem *>::iterator it = valueRows_.begin(), last = valueRows_.end();
0240       for (std::advance(it, maxRows_); it != last; ++it) {
0241         valueAlloc_.deallocate(*it, valueRowSize_);
0242       }
0243       valueRows_.resize(maxRows_);
0244     }
0245   }
0246 
0247   template <typename K, typename V, typename Hasher, typename Equals, typename Alloc>
0248   typename SimpleAllocHashMultiMap<K, V, Hasher, Equals, Alloc>::KeyItem *
0249   SimpleAllocHashMultiMap<K, V, Hasher, Equals, Alloc>::push_back_(K const &key, KeyItem *next) {
0250     if (nextKeyItem_ == keyEndMarker_) {
0251       ++currentKeyRow_;
0252       if (currentKeyRow_ == keyRows_.end()) {
0253         keyRows_.push_back(keyAlloc_.allocate(keyRowSize_));
0254         currentKeyRow_ = keyRows_.end();
0255         --currentKeyRow_;  // end - 1 doesn't work!
0256       }
0257       nextKeyItem_ = *currentKeyRow_;
0258       keyEndMarker_ = nextKeyItem_ + keyRowSize_;
0259     }
0260     std::allocator_traits<KeyItemAllocator>::construct(keyAlloc_, nextKeyItem_, KeyItem(next, key, nullptr));
0261     nextKeyItem_++;
0262     return (nextKeyItem_ - 1);
0263   }
0264   template <typename K, typename V, typename Hasher, typename Equals, typename Alloc>
0265   typename SimpleAllocHashMultiMap<K, V, Hasher, Equals, Alloc>::ValueItem *
0266   SimpleAllocHashMultiMap<K, V, Hasher, Equals, Alloc>::push_back_(value_ref value, ValueItem *next) {
0267     if (nextValueItem_ == valueEndMarker_) {
0268       ++currentValueRow_;
0269       if (currentValueRow_ == valueRows_.end()) {
0270         valueRows_.push_back(valueAlloc_.allocate(valueRowSize_));
0271         currentValueRow_ = valueRows_.end();
0272         --currentValueRow_;  // end - 1 doesn't work!
0273       }
0274       nextValueItem_ = *currentValueRow_;
0275       valueEndMarker_ = nextValueItem_ + valueRowSize_;
0276     }
0277     std::allocator_traits<ValueItemAllocator>::construct(valueAlloc_, nextValueItem_, ValueItem(next, value));
0278     nextValueItem_++;
0279     return (nextValueItem_ - 1);
0280   }
0281 
0282   template <typename K, typename V, typename Hasher, typename Equals, typename Alloc>
0283   typename SimpleAllocHashMultiMap<K, V, Hasher, Equals, Alloc>::KeyItem &
0284   SimpleAllocHashMultiMap<K, V, Hasher, Equals, Alloc>::find_or_insert_(K const &key) {
0285     //std::cout << "Find or insert for key " << key << std::endl;
0286     size_t hash = hasher_(key);
0287     KeyItem *&buck = buckets_[hash % bucketSize_];
0288     KeyItem *curr = buck;
0289     while (curr) {
0290       if (eq_(curr->key, key)) {
0291         //std::cout << "  Key " << key << " was found." << std::endl;
0292         return *curr;
0293       }
0294       curr = curr->next;
0295     }
0296     buck = push_back_(key, buck);
0297     return *buck;
0298   }
0299 
0300   template <typename K, typename V, typename Hasher, typename Equals, typename Alloc>
0301   void SimpleAllocHashMultiMap<K, V, Hasher, Equals, Alloc>::insert(K const &key, value_ref value) {
0302     //std::cout << "Pushing back value " << value << " for key " << key << std::endl;
0303     KeyItem &k = find_or_insert_(key);
0304     //std::cout << "Key " << (k.value ? "exists" : " is new") << std::endl;
0305     k.value = push_back_(value, k.value);
0306   }
0307 
0308   template <typename K, typename V, typename Hasher, typename Equals, typename Alloc>
0309   typename SimpleAllocHashMultiMap<K, V, Hasher, Equals, Alloc>::value_iterator
0310   SimpleAllocHashMultiMap<K, V, Hasher, Equals, Alloc>::values(K const &key) {
0311     //std::cout << "Gettinv values for key " << key << std::endl;
0312     size_t hash = hasher_(key);
0313     for (KeyItem *curr = buckets_[hash % bucketSize_]; curr; curr = curr->next) {
0314       if (eq_(curr->key, key))
0315         return value_iterator(curr->value);
0316     }
0317     return value_iterator();
0318   }
0319 
0320   /*** Very very simple map implementation
0321  *   It's just a std::vector<pair<key,value>>, and the operator[] does a linear search to find the key (it's O(N) time, both if the key exists and if it doesn't)
0322  *   Anyway, if your map is very small and if you clear it often, it performs better than more complex variants
0323  *   Semantics is as std::map, except that only very few methods are implemented.
0324  * */
0325   template <typename K, typename V>
0326   class UnsortedDumbVectorMap {
0327   public:
0328     typedef std::pair<K, V> value_type;
0329     typedef typename std::vector<value_type>::const_iterator const_iterator;
0330     typedef typename std::vector<value_type>::iterator iterator;  // but please don't mutate the keys
0331 
0332     void clear() { data_.clear(); }
0333     bool empty() const { return data_.empty(); }
0334     const_iterator begin() const { return data_.begin(); }
0335     const_iterator end() const { return data_.end(); }
0336     iterator begin() { return data_.begin(); }
0337     iterator end() { return data_.end(); }
0338 
0339     V &operator[](const K &k) {
0340       for (typename std::vector<value_type>::iterator it = data_.begin(), ed = data_.end(); it != ed; ++it) {
0341         if (it->first == k)
0342           return it->second;
0343       }
0344       data_.push_back(value_type(k, V()));
0345       return data_.back().second;
0346     }
0347 
0348     UnsortedDumbVectorMap() {}
0349 
0350   private:
0351     std::vector<value_type> data_;
0352   };
0353 
0354 }  // namespace cmsutil
0355 
0356 #endif