|
Joshua
open source statistical hierarchical phrase-based machine translation system
|
00001 #ifndef UTIL_PROBING_HASH_TABLE__ 00002 #define UTIL_PROBING_HASH_TABLE__ 00003 00004 #include "util/exception.hh" 00005 00006 #include <algorithm> 00007 #include <cstddef> 00008 #include <functional> 00009 00010 #include <assert.h> 00011 00012 namespace util { 00013 00014 /* Thrown when table grows too large */ 00015 class ProbingSizeException : public Exception { 00016 public: 00017 ProbingSizeException() throw() {} 00018 ~ProbingSizeException() throw() {} 00019 }; 00020 00021 /* Non-standard hash table 00022 * Buckets must be set at the beginning and must be greater than maximum number 00023 * of elements, else an infinite loop happens. 00024 * Memory management and initialization is externalized to make it easier to 00025 * serialize these to disk and load them quickly. 00026 * Uses linear probing to find value. 00027 * Only insert and lookup operations. 00028 */ 00029 00030 template <class PackingT, class HashT, class EqualT = std::equal_to<typename PackingT::Key> > class ProbingHashTable { 00031 public: 00032 typedef PackingT Packing; 00033 typedef typename Packing::Key Key; 00034 typedef typename Packing::MutableIterator MutableIterator; 00035 typedef typename Packing::ConstIterator ConstIterator; 00036 00037 typedef HashT Hash; 00038 typedef EqualT Equal; 00039 00040 static std::size_t Size(std::size_t entries, float multiplier) { 00041 return std::max(entries + 1, static_cast<std::size_t>(multiplier * static_cast<float>(entries))) * Packing::kBytes; 00042 } 00043 00044 // Must be assigned to later. 00045 ProbingHashTable() : entries_(0) 00046 #ifdef DEBUG 00047 , initialized_(false) 00048 #endif 00049 {} 00050 00051 ProbingHashTable(void *start, std::size_t allocated, const Key &invalid = Key(), const Hash &hash_func = Hash(), const Equal &equal_func = Equal()) 00052 : begin_(Packing::FromVoid(start)), 00053 buckets_(allocated / Packing::kBytes), 00054 end_(begin_ + (allocated / Packing::kBytes)), 00055 invalid_(invalid), 00056 hash_(hash_func), 00057 equal_(equal_func), 00058 entries_(0) 00059 #ifdef DEBUG 00060 , initialized_(true) 00061 #endif 00062 {} 00063 00064 template <class T> void Insert(const T &t) { 00065 if (++entries_ >= buckets_) 00066 UTIL_THROW(ProbingSizeException, "Hash table with " << buckets_ << " buckets is full."); 00067 #ifdef DEBUG 00068 assert(initialized_); 00069 #endif 00070 for (MutableIterator i(begin_ + (hash_(t.GetKey()) % buckets_));;) { 00071 if (equal_(i->GetKey(), invalid_)) { *i = t; return; } 00072 if (++i == end_) { i = begin_; } 00073 } 00074 } 00075 00076 void FinishedInserting() {} 00077 00078 void LoadedBinary() {} 00079 00080 // Don't change anything related to GetKey, 00081 template <class Key> bool UnsafeMutableFind(const Key key, MutableIterator &out) { 00082 for (MutableIterator i(begin_ + (hash_(key) % buckets_));;) { 00083 Key got(i->GetKey()); 00084 if (equal_(got, key)) { out = i; return true; } 00085 if (equal_(got, invalid_)) return false; 00086 if (++i == end_) i = begin_; 00087 } 00088 } 00089 00090 template <class Key> bool Find(const Key key, ConstIterator &out) const { 00091 #ifdef DEBUG 00092 assert(initialized_); 00093 #endif 00094 for (ConstIterator i(begin_ + (hash_(key) % buckets_));;) { 00095 Key got(i->GetKey()); 00096 if (equal_(got, key)) { out = i; return true; } 00097 if (equal_(got, invalid_)) return false; 00098 if (++i == end_) i = begin_; 00099 } 00100 } 00101 00102 private: 00103 MutableIterator begin_; 00104 std::size_t buckets_; 00105 MutableIterator end_; 00106 Key invalid_; 00107 Hash hash_; 00108 Equal equal_; 00109 std::size_t entries_; 00110 #ifdef DEBUG 00111 bool initialized_; 00112 #endif 00113 }; 00114 00115 } // namespace util 00116 00117 #endif // UTIL_PROBING_HASH_TABLE__