Joshua
open source statistical hierarchical phrase-based machine translation system
 All Classes Namespaces Functions Variables Typedefs Enumerations Enumerator Friends
src/joshua/decoder/ff/lm/kenlm/util/probing_hash_table.hh
00001 #ifndef UTIL_PROBING_HASH_TABLE__
00002 #define UTIL_PROBING_HASH_TABLE__
00003 
00004 #include "util/exception.hh"
00005 
00006 #include <algorithm>
00007 #include <cstddef>
00008 #include <functional>
00009 
00010 #include <assert.h>
00011 
00012 namespace util {
00013 
00014 /* Thrown when table grows too large */
00015 class ProbingSizeException : public Exception {
00016   public:
00017     ProbingSizeException() throw() {}
00018     ~ProbingSizeException() throw() {}
00019 };
00020 
00021 /* Non-standard hash table
00022  * Buckets must be set at the beginning and must be greater than maximum number
00023  * of elements, else an infinite loop happens.
00024  * Memory management and initialization is externalized to make it easier to
00025  * serialize these to disk and load them quickly.
00026  * Uses linear probing to find value.
00027  * Only insert and lookup operations.  
00028  */
00029 
00030 template <class PackingT, class HashT, class EqualT = std::equal_to<typename PackingT::Key> > class ProbingHashTable {
00031   public:
00032     typedef PackingT Packing;
00033     typedef typename Packing::Key Key;
00034     typedef typename Packing::MutableIterator MutableIterator;
00035     typedef typename Packing::ConstIterator ConstIterator;
00036 
00037     typedef HashT Hash;
00038     typedef EqualT Equal;
00039 
00040     static std::size_t Size(std::size_t entries, float multiplier) {
00041       return std::max(entries + 1, static_cast<std::size_t>(multiplier * static_cast<float>(entries))) * Packing::kBytes;
00042     }
00043 
00044     // Must be assigned to later.  
00045     ProbingHashTable() : entries_(0)
00046 #ifdef DEBUG
00047       , initialized_(false)
00048 #endif
00049     {}
00050 
00051     ProbingHashTable(void *start, std::size_t allocated, const Key &invalid = Key(), const Hash &hash_func = Hash(), const Equal &equal_func = Equal())
00052       : begin_(Packing::FromVoid(start)),
00053         buckets_(allocated / Packing::kBytes),
00054         end_(begin_ + (allocated / Packing::kBytes)),
00055         invalid_(invalid),
00056         hash_(hash_func),
00057         equal_(equal_func),
00058         entries_(0)
00059 #ifdef DEBUG
00060         , initialized_(true)
00061 #endif
00062     {}
00063 
00064     template <class T> void Insert(const T &t) {
00065       if (++entries_ >= buckets_)
00066         UTIL_THROW(ProbingSizeException, "Hash table with " << buckets_ << " buckets is full.");
00067 #ifdef DEBUG
00068       assert(initialized_);
00069 #endif
00070       for (MutableIterator i(begin_ + (hash_(t.GetKey()) % buckets_));;) {
00071         if (equal_(i->GetKey(), invalid_)) { *i = t; return; }
00072         if (++i == end_) { i = begin_; }
00073       }
00074     }
00075 
00076     void FinishedInserting() {}
00077 
00078     void LoadedBinary() {}
00079 
00080     // Don't change anything related to GetKey,  
00081     template <class Key> bool UnsafeMutableFind(const Key key, MutableIterator &out) {
00082       for (MutableIterator i(begin_ + (hash_(key) % buckets_));;) {
00083         Key got(i->GetKey());
00084         if (equal_(got, key)) { out = i; return true; }
00085         if (equal_(got, invalid_)) return false;
00086         if (++i == end_) i = begin_;
00087       }    
00088     }
00089 
00090     template <class Key> bool Find(const Key key, ConstIterator &out) const {
00091 #ifdef DEBUG
00092       assert(initialized_);
00093 #endif
00094       for (ConstIterator i(begin_ + (hash_(key) % buckets_));;) {
00095         Key got(i->GetKey());
00096         if (equal_(got, key)) { out = i; return true; }
00097         if (equal_(got, invalid_)) return false;
00098         if (++i == end_) i = begin_;
00099       }    
00100     }
00101 
00102   private:
00103     MutableIterator begin_;
00104     std::size_t buckets_;
00105     MutableIterator end_;
00106     Key invalid_;
00107     Hash hash_;
00108     Equal equal_;
00109     std::size_t entries_;
00110 #ifdef DEBUG
00111     bool initialized_;
00112 #endif
00113 };
00114 
00115 } // namespace util
00116 
00117 #endif // UTIL_PROBING_HASH_TABLE__