File indexing completed on 2023-05-31 00:37:45
0001 #ifndef DataFormats_Common_Trie_h
0002 #define DataFormats_Common_Trie_h
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014
0015
0016
0017
0018
0019
0020
0021
0022
0023
0024
0025
0026 #include <list>
0027 #include <string>
0028
0029 namespace edm {
0030
0031 template <typename T>
0032 class TrieNode;
0033
0034
0035
0036
0037
0038 template <typename T>
0039 class TrieFactory {
0040 public:
0041 TrieFactory(unsigned paquetSize);
0042 ~TrieFactory();
0043
0044 private:
0045
0046 TrieFactory() = delete;
0047
0048 TrieFactory(const TrieFactory &e) = delete;
0049
0050 TrieFactory &operator=(const TrieFactory &e) = delete;
0051
0052 public:
0053 TrieNode<T> *newNode(const T &value);
0054 void clear();
0055
0056 private:
0057 unsigned _paquetSize;
0058 std::list<TrieNode<T> *> _allocatedNodes;
0059 TrieNode<T> *_lastNodes;
0060 unsigned _nbUsedInLastNodes;
0061 };
0062 }
0063
0064 namespace edm {
0065 template <typename T>
0066 class TrieNodeIter;
0067
0068
0069
0070
0071
0072
0073 template <typename T>
0074 class TrieNode {
0075 public:
0076 typedef TrieNodeIter<T> const_iterator;
0077
0078 TrieNode();
0079 ~TrieNode();
0080
0081 private:
0082
0083 TrieNode(const TrieNode &e) = delete;
0084
0085 TrieNode &operator=(const TrieNode &e) = delete;
0086
0087 public:
0088
0089 void setValue(const T &val);
0090
0091 const T &value() const;
0092
0093
0094 const TrieNode<T> *brother() const;
0095 TrieNode<T> *brother();
0096
0097 unsigned char brotherLabel() const;
0098
0099
0100 TrieNodeIter<T> begin() const;
0101
0102 TrieNodeIter<T> end() const;
0103
0104
0105 const TrieNode<T> *subNode() const;
0106 TrieNode<T> *subNode();
0107 unsigned char subNodeLabel() const;
0108
0109
0110 const TrieNode<T> *subNodeByLabel(unsigned char chr) const;
0111 TrieNode<T> *subNodeByLabel(unsigned char chr);
0112
0113
0114 void addSubNode(unsigned char chr, TrieNode<T> *node);
0115
0116
0117 void display(std::ostream &os, unsigned offset, unsigned char label) const;
0118
0119
0120 void clear();
0121
0122 private:
0123 template <typename Node>
0124 Node _sequentialSearch(Node first, unsigned char label, unsigned char val) const;
0125
0126 void _setBrother(TrieNode<T> *brother, unsigned char brotherLabel);
0127
0128 void _addBrother(unsigned char chr, TrieNode<T> *brother);
0129
0130
0131
0132
0133 const TrieNode<T> *_getBrother(unsigned char chr) const;
0134
0135
0136
0137
0138 TrieNode<T> *_getBrother(unsigned char chr);
0139
0140 private:
0141
0142 TrieNode<T> *_brother;
0143
0144 unsigned char _brotherLabel;
0145
0146 TrieNode<T> *_firstSubNode;
0147
0148 unsigned char _firstSubNodeLabel;
0149
0150 T _value;
0151 };
0152 }
0153
0154 #include <ostream>
0155
0156 namespace edm {
0157
0158 template <typename T>
0159 class TrieFactory;
0160 template <typename T>
0161 class TrieNode;
0162
0163
0164
0165
0166
0167 template <typename T>
0168 class Trie {
0169 public:
0170
0171
0172 Trie(const T &empty);
0173 ~Trie();
0174
0175 private:
0176
0177 Trie() = delete;
0178
0179 Trie(const Trie &e) = delete;
0180
0181 Trie &operator=(const Trie &e) = delete;
0182
0183 public:
0184
0185
0186 void insert(std::string const &str, const T &value);
0187 void insert(const char *str, unsigned strLen, const T &value);
0188
0189
0190 void setEntry(std::string const &str, const T &value);
0191 void setEntry(const char *str, unsigned strLen, const T &value);
0192
0193 const T &find(std::string const &str) const;
0194 const T &find(const char *str, unsigned strLen) const;
0195
0196 const TrieNode<T> *node(std::string const &str) const;
0197 const TrieNode<T> *node(const char *str, unsigned strLen) const;
0198
0199 const TrieNode<T> *initialNode() const;
0200
0201 void display(std::ostream &os);
0202
0203 void clear();
0204
0205 private:
0206 TrieNode<T> *_addEntry(const char *str, unsigned strLen);
0207
0208 private:
0209
0210 T _empty;
0211
0212 TrieFactory<T> *_factory;
0213
0214 TrieNode<T> *_initialNode;
0215 };
0216 }
0217
0218 #include <boost/iterator/iterator_facade.hpp>
0219
0220 #include <string>
0221 #include <ostream>
0222
0223
0224
0225 namespace edm {
0226
0227 template <typename T>
0228 class TrieNodeIter : public boost::iterator_facade<TrieNodeIter<T>, TrieNode<T> const, boost::forward_traversal_tag> {
0229 public:
0230 typedef TrieNodeIter<T> self;
0231 typedef TrieNode<T> const node_base;
0232 TrieNodeIter() : m_node(nullptr), m_label(0) {}
0233
0234 explicit TrieNodeIter(node_base *p) : m_node(p ? p->subNode() : nullptr), m_label(p ? p->subNodeLabel() : 0) {}
0235
0236 unsigned char label() const { return m_label; }
0237
0238 private:
0239 friend class boost::iterator_core_access;
0240
0241 void increment() {
0242 m_label = m_node->brotherLabel();
0243 m_node = m_node->brother();
0244 }
0245
0246 bool equal(self const &other) const { return this->m_node == other.m_node; }
0247
0248 node_base &dereference() const { return *m_node; }
0249
0250 node_base *m_node;
0251 unsigned char m_label;
0252 };
0253
0254
0255 template <typename V, typename T>
0256 void walkTrie(V &v, TrieNode<T> const &n, std::string const &label = "") {
0257 typedef TrieNodeIter<T> node_iterator;
0258 node_iterator e;
0259 for (node_iterator p(&n); p != e; ++p) {
0260 v((*p).value(), label + (char)p.label());
0261 walkTrie(v, *p, label + (char)p.label());
0262 }
0263 }
0264
0265
0266 template <typename V, typename T>
0267 bool iterateTrieLeaves(V &v, TrieNode<T> const &n, std::string const &label = "") {
0268 typedef TrieNodeIter<T> node_iterator;
0269 node_iterator e;
0270 node_iterator p(&n);
0271 if (p == e)
0272 return true;
0273 for (; p != e; ++p) {
0274 if (iterateTrieLeaves(v, *p, label + (char)p.label()))
0275 v((*p).value(), label + (char)p.label());
0276 }
0277 return false;
0278 }
0279
0280 }
0281
0282
0283
0284
0285
0286
0287 template <typename T>
0288 edm::TrieFactory<T>::TrieFactory(unsigned paquetSize)
0289 : _paquetSize(paquetSize), _lastNodes(nullptr), _nbUsedInLastNodes(0) {
0290 _lastNodes = new TrieNode<T>[paquetSize];
0291 }
0292
0293 template <typename T>
0294 edm::TrieFactory<T>::~TrieFactory() {
0295 typename std::list<TrieNode<T> *>::const_iterator it;
0296
0297 for (it = _allocatedNodes.begin(); it != _allocatedNodes.end(); ++it)
0298 delete[] *it;
0299 if (_lastNodes)
0300 delete[] _lastNodes;
0301 }
0302
0303 template <typename T>
0304 edm::TrieNode<T> *edm::TrieFactory<T>::newNode(const T &value) {
0305 if (_nbUsedInLastNodes == _paquetSize) {
0306 _allocatedNodes.push_back(_lastNodes);
0307 _nbUsedInLastNodes = 0;
0308 _lastNodes = new TrieNode<T>[_paquetSize];
0309 }
0310 TrieNode<T> *res = &_lastNodes[_nbUsedInLastNodes];
0311 ++_nbUsedInLastNodes;
0312 res->setValue(value);
0313 res->clear();
0314 return res;
0315 }
0316
0317 template <typename T>
0318 void edm::TrieFactory<T>::clear() {
0319 typename std::list<TrieNode<T> *>::const_iterator it;
0320 for (it = _allocatedNodes.begin(); it != _allocatedNodes.end(); ++it)
0321 delete[] *it;
0322 _allocatedNodes.clear();
0323 _nbUsedInLastNodes = 0;
0324 }
0325
0326 template <typename T>
0327 edm::TrieNode<T>::TrieNode()
0328 : _brother(nullptr),
0329 _brotherLabel(0),
0330 _firstSubNode(nullptr),
0331 _firstSubNodeLabel(0),
0332 _value()
0333
0334
0335 {}
0336
0337 template <typename T>
0338 edm::TrieNode<T>::~TrieNode() {
0339
0340
0341 }
0342
0343 template <typename T>
0344 edm::TrieNodeIter<T> edm::TrieNode<T>::begin() const {
0345 return const_iterator(this);
0346 }
0347 template <typename T>
0348 edm::TrieNodeIter<T> edm::TrieNode<T>::end() const {
0349 return const_iterator(nullptr);
0350 }
0351
0352 template <typename T>
0353 void edm::TrieNode<T>::setValue(const T &val) {
0354 _value = val;
0355 }
0356
0357 template <typename T>
0358 const T &edm::TrieNode<T>::value() const {
0359 return _value;
0360 }
0361
0362 template <typename T>
0363 const edm::TrieNode<T> *edm::TrieNode<T>::brother() const {
0364 return _brother;
0365 }
0366
0367 template <typename T>
0368 edm::TrieNode<T> *edm::TrieNode<T>::brother() {
0369 return _brother;
0370 }
0371
0372 template <typename T>
0373 const edm::TrieNode<T> *edm::TrieNode<T>::_getBrother(unsigned char chr) const {
0374 const TrieNode<T> *brother = _brother;
0375 return _sequentialSearch(brother, _brotherLabel, chr);
0376 }
0377
0378 template <typename T>
0379 edm::TrieNode<T> *edm::TrieNode<T>::_getBrother(unsigned char chr) {
0380 return _sequentialSearch(_brother, _brotherLabel, chr);
0381 }
0382
0383 template <typename T>
0384 void edm::TrieNode<T>::_addBrother(unsigned char chr, TrieNode<T> *brother) {
0385 if (!_brother || _brotherLabel > chr) {
0386 brother->_setBrother(_brother, _brotherLabel);
0387 _brother = brother;
0388 _brotherLabel = chr;
0389 } else
0390 _brother->_addBrother(chr, brother);
0391 }
0392
0393 template <typename T>
0394 unsigned char edm::TrieNode<T>::brotherLabel() const {
0395 return _brotherLabel;
0396 }
0397
0398 template <typename T>
0399 const edm::TrieNode<T> *edm::TrieNode<T>::subNode() const {
0400 return _firstSubNode;
0401 }
0402
0403 template <typename T>
0404 edm::TrieNode<T> *edm::TrieNode<T>::subNode() {
0405 return _firstSubNode;
0406 }
0407
0408 template <typename T>
0409 unsigned char edm::TrieNode<T>::subNodeLabel() const {
0410 return _firstSubNodeLabel;
0411 }
0412
0413 template <typename T>
0414 const edm::TrieNode<T> *edm::TrieNode<T>::subNodeByLabel(unsigned char chr) const {
0415 const TrieNode<T> *first = _firstSubNode;
0416 return _sequentialSearch(first, _firstSubNodeLabel, chr);
0417 }
0418
0419 template <typename T>
0420 edm::TrieNode<T> *edm::TrieNode<T>::subNodeByLabel(unsigned char chr) {
0421 return _sequentialSearch(_firstSubNode, _firstSubNodeLabel, chr);
0422 }
0423
0424 template <typename T>
0425 void edm::TrieNode<T>::addSubNode(unsigned char chr, TrieNode<T> *node) {
0426 if (!_firstSubNode || _firstSubNodeLabel > chr) {
0427 node->_setBrother(_firstSubNode, _firstSubNodeLabel);
0428 _firstSubNode = node;
0429 _firstSubNodeLabel = chr;
0430 } else
0431 _firstSubNode->_addBrother(chr, node);
0432 }
0433
0434 template <typename T>
0435 template <typename Node>
0436 inline Node edm::TrieNode<T>::_sequentialSearch(Node first, unsigned char label, unsigned char val) const {
0437 if (first && label <= val) {
0438 if (label == val)
0439 return first;
0440 return first->_getBrother(val);
0441 }
0442 return 0x0;
0443 }
0444
0445 template <typename T>
0446 void edm::TrieNode<T>::_setBrother(TrieNode<T> *brother, unsigned char brotherLabel) {
0447 _brother = brother;
0448 _brotherLabel = brotherLabel;
0449 }
0450
0451 template <typename T>
0452 void edm::TrieNode<T>::display(std::ostream &os, unsigned offset, unsigned char label) const {
0453 unsigned int i;
0454 for (i = 0; i < offset; ++i)
0455 os << " ";
0456 if (label)
0457 os << "label[" << label << "] ";
0458 os << "value[" << _value << "]" << std::endl;
0459 if (_firstSubNode)
0460 _firstSubNode->display(os, offset + 2, _firstSubNodeLabel);
0461 if (_brother)
0462 _brother->display(os, offset, _brotherLabel);
0463 }
0464
0465 template <typename T>
0466 void edm::TrieNode<T>::clear() {
0467 _brother = nullptr;
0468 _brotherLabel = 0;
0469 _firstSubNode = nullptr;
0470 _firstSubNodeLabel = 0;
0471 }
0472
0473 #include <vector>
0474 #include <algorithm>
0475 #include <string>
0476 #include <cassert>
0477
0478 #include "FWCore/Utilities/interface/EDMException.h"
0479
0480 namespace edm {
0481 namespace detailsTrie {
0482 inline void errorInsert(std::string const &key) {
0483 Exception::throwThis(errors::InvalidReference,
0484 "Trie::insert called with a key already in collection;\n"
0485 "key value: ",
0486 key.c_str());
0487 }
0488 }
0489 }
0490
0491 template <typename T>
0492 edm::Trie<T>::Trie(const T &empty) : _empty(empty), _factory(nullptr), _initialNode(nullptr) {
0493
0494 _factory = new TrieFactory<T>(10000);
0495 _initialNode = _factory->newNode(_empty);
0496 }
0497
0498 template <typename T>
0499 edm::Trie<T>::~Trie() {
0500 delete _factory;
0501 }
0502
0503 template <typename T>
0504 void edm::Trie<T>::setEntry(std::string const &str, const T &value) {
0505 setEntry(str.c_str(), str.size(), value);
0506 }
0507 template <typename T>
0508 void edm::Trie<T>::setEntry(const char *str, unsigned strLen, const T &value) {
0509 TrieNode<T> *node = _addEntry(str, strLen);
0510 node->setValue(value);
0511 }
0512
0513 template <typename T>
0514 edm::TrieNode<T> *edm::Trie<T>::_addEntry(const char *str, unsigned strLen) {
0515 unsigned pos = 0;
0516 bool found = true;
0517 TrieNode<T> *node = _initialNode, *previous = nullptr;
0518
0519
0520 while (found && pos < strLen) {
0521 found = false;
0522 previous = node;
0523 node = node->subNodeByLabel(str[pos]);
0524 if (node) {
0525 found = true;
0526 ++pos;
0527 }
0528 }
0529
0530
0531 if (!node || pos != strLen) {
0532 node = previous;
0533 for (unsigned i = pos; i < strLen; ++i) {
0534 TrieNode<T> *newNode = _factory->newNode(_empty);
0535 node->addSubNode(str[pos], newNode);
0536 node = newNode;
0537 ++pos;
0538 }
0539 }
0540 assert(node != nullptr);
0541 return node;
0542 }
0543
0544 template <typename T>
0545 void edm::Trie<T>::insert(std::string const &str, const T &value) {
0546 insert(str.c_str(), str.size(), value);
0547 }
0548
0549 template <typename T>
0550 void edm::Trie<T>::insert(const char *str, unsigned strLen, const T &value) {
0551 TrieNode<T> *node = _addEntry(str, strLen);
0552
0553
0554 if (node && node->value() != _empty)
0555 detailsTrie::errorInsert(std::string(str, strLen));
0556 node->setValue(value);
0557 }
0558
0559 template <typename T>
0560 const T &edm::Trie<T>::find(std::string const &str) const {
0561 return find(str.c_str(), str.size());
0562 }
0563
0564 template <typename T>
0565 const T &edm::Trie<T>::find(const char *str, unsigned strLen) const {
0566 unsigned pos = 0;
0567 bool found = true;
0568 const TrieNode<T> *node = _initialNode;
0569
0570 while (found && pos < strLen) {
0571 found = false;
0572 node = node->subNodeByLabel(str[pos]);
0573 if (node) {
0574 found = true;
0575 ++pos;
0576 }
0577 }
0578 if (node && pos == strLen)
0579 return node->value();
0580 return _empty;
0581 }
0582
0583 template <typename T>
0584 edm::TrieNode<T> const *edm::Trie<T>::node(std::string const &str) const {
0585 return node(str.c_str(), str.size());
0586 }
0587
0588 template <typename T>
0589 edm::TrieNode<T> const *edm::Trie<T>::node(const char *str, unsigned strLen) const {
0590 unsigned pos = 0;
0591 bool found = true;
0592 const TrieNode<T> *node = _initialNode;
0593
0594 while (found && pos < strLen) {
0595 found = false;
0596 node = node->subNodeByLabel(str[pos]);
0597 if (node) {
0598 found = true;
0599 ++pos;
0600 }
0601 }
0602 return node;
0603 }
0604
0605 template <typename T>
0606 const edm::TrieNode<T> *edm::Trie<T>::initialNode() const {
0607 return _initialNode;
0608 }
0609
0610 template <typename T>
0611 void edm::Trie<T>::clear() {
0612 _factory->clear();
0613 _initialNode = _factory->newNode(_empty);
0614 }
0615
0616 template <typename T>
0617 void edm::Trie<T>::display(std::ostream &os) {
0618 if (_initialNode)
0619 _initialNode->display(os, 0, 0);
0620 }
0621
0622 #endif