00001 //---------------------------------------------------------------------------- 00002 /** @file HashMap.hpp 00003 Contains a thread-safe lock-free constant-size hash map. 00004 */ 00005 //---------------------------------------------------------------------------- 00006 00007 #ifndef HASHMAP_H 00008 #define HASHMAP_H 00009 00010 #include <boost/concept_check.hpp> 00011 #include <boost/scoped_array.hpp> 00012 00013 #include "Benzene.hpp" 00014 #include "Hash.hpp" 00015 #include "Logger.hpp" 00016 #include <cassert> 00017 00018 _BEGIN_BENZENE_NAMESPACE_ 00019 00020 //---------------------------------------------------------------------------- 00021 00022 /** Concept of a state in a hash table. */ 00023 template<class T> 00024 struct HashMapStateConcept 00025 { 00026 void constraints() 00027 { 00028 boost::function_requires< boost::DefaultConstructibleConcept<T> >(); 00029 boost::function_requires< boost::AssignableConcept<T> >(); 00030 } 00031 }; 00032 00033 //---------------------------------------------------------------------------- 00034 00035 /** Lock-free HashMap with 2^n slots. 00036 00037 Deletes and dynamic resizing are not supported. Thread-safe, so 00038 multiple threads can read/write data at the same time. Performs 00039 simple linear probing on hash collisions. 00040 00041 Requires gcc builtins for atomic access. 00042 00043 References: 00044 - Gao, H, Groote J.F., Hesselink W.H: 00045 <a href="http://www.cs.rug.nl/~wim/mechver/hashtable/index.html" 00046 Lock-free Dynamic Hash table with Open Addressing</a> 00047 - Cliff Click Jr: 00048 <a href="http://blogs.azulsystems.com/cliff/2007/03/a_nonblocking_h.html" 00049 A Non-Blocking HashTable</a> 00050 */ 00051 template<typename T> 00052 class HashMap 00053 { 00054 BOOST_CLASS_REQUIRE(T, benzene, HashMapStateConcept); 00055 00056 public: 00057 00058 /** Constructs a hashmap with 2^bits slots. */ 00059 HashMap(unsigned bits); 00060 00061 /** Copy constructor. */ 00062 HashMap(const HashMap<T>& other); 00063 00064 /** Destructor. */ 00065 ~HashMap(); 00066 00067 /** Returns the lg2 of the number of slots. */ 00068 unsigned bits() const; 00069 00070 /** Returns the number of slots. */ 00071 unsigned size() const; 00072 00073 /** Returns number of objects stored. */ 00074 unsigned count() const; 00075 00076 /** Retrieves object with key. Returns true on success. */ 00077 bool get(hash_t key, T& out) const; 00078 00079 /** Stores a (key, object) pair. 00080 00081 WILL ABORT IF TABLE IS FULL! 00082 00083 Issues a warning if table becomes 1/4th full. 00084 00085 Make sure the table is much larger (at least 4x larger) than 00086 the maximum amount of data you ever expect to store in this 00087 table. This is to reduce the number of probes on subsequent 00088 get() calls, as well as to avoid the need for dynamic resizing 00089 of the table. 00090 00091 This function can possibly create duplicate entries. This is 00092 because put() does not check the key of the slots it runs over 00093 while searching for an empty slot. So if two threads both 00094 write try to put() k1 at the same time, each will write to its 00095 own slot. Only the first slot written will ever be used by 00096 get(). 00097 */ 00098 void put(hash_t key, const T& obj); 00099 00100 /** Clears the table. */ 00101 void clear(); 00102 00103 /** Copys other's data, overwriting everything in this table. */ 00104 void operator=(const HashMap<T>& other); 00105 00106 private: 00107 00108 static const int EMPTY_SLOT = -1; 00109 00110 struct Data 00111 { 00112 hash_t key; 00113 T value; 00114 }; 00115 00116 /** See bits() */ 00117 unsigned m_bits; 00118 00119 /** See size() */ 00120 unsigned m_size; 00121 00122 /** Equal to size() - 1. Used for bitmasking operations. */ 00123 unsigned m_mask; 00124 00125 /** See count() */ 00126 volatile unsigned m_count; 00127 00128 /** Index into m_allocated at which data for this slot is located; 00129 index is equal to EMPTY_SLOT if unused. 00130 */ 00131 boost::scoped_array<volatile int> m_used; 00132 00133 /** Allocated space for entries in the table. */ 00134 boost::scoped_array<Data> m_allocated; 00135 00136 void CopyFrom(const HashMap<T>& other); 00137 }; 00138 00139 template<typename T> 00140 HashMap<T>::HashMap(unsigned bits) 00141 : m_bits(bits), 00142 m_size(1 << bits), 00143 m_mask(m_size - 1), 00144 m_count(0), 00145 m_used(new volatile int[m_size]), 00146 m_allocated(new Data[m_size]) 00147 { 00148 clear(); 00149 } 00150 00151 template<typename T> 00152 HashMap<T>::HashMap(const HashMap<T>& other) 00153 : m_bits(other.m_bits), 00154 m_size(other.m_size), 00155 m_mask(other.m_mask), 00156 m_count(other.m_count), 00157 m_used(new volatile int[m_size]), 00158 m_allocated(new Data[m_size]) 00159 { 00160 CopyFrom(other); 00161 } 00162 00163 template<typename T> 00164 HashMap<T>::~HashMap() 00165 { 00166 } 00167 00168 template<typename T> 00169 inline unsigned HashMap<T>::bits() const 00170 { 00171 return m_bits; 00172 } 00173 00174 template<typename T> 00175 inline unsigned HashMap<T>::size() const 00176 { 00177 return m_size; 00178 } 00179 00180 template<typename T> 00181 inline unsigned HashMap<T>::count() const 00182 { 00183 return m_count; 00184 } 00185 00186 /** Performs linear probing to find key. */ 00187 template<typename T> 00188 bool HashMap<T>::get(hash_t key, T& out) const 00189 { 00190 // Search for slot containing this key. 00191 // Limit number of probes to a single pass: Table should never 00192 // fill up, but I'm being extra paranoid anyway. 00193 hash_t index = key; 00194 for (int guard = m_size; guard > 0; --guard) 00195 { 00196 index &= m_mask; 00197 if (m_used[index] == EMPTY_SLOT) 00198 return false; 00199 if (m_used[index] != EMPTY_SLOT 00200 && m_allocated[m_used[index]].key == key) 00201 { 00202 out = m_allocated[m_used[index]].value; 00203 return true; 00204 } 00205 index++; 00206 } 00207 return false; 00208 } 00209 00210 template<typename T> 00211 void HashMap<T>::put(hash_t key, const T& value) 00212 { 00213 if (m_count > m_size) 00214 { 00215 LogSevere() << "HashMap: Full! Uhh oh..." << '\n'; 00216 abort(); 00217 } 00218 else if (m_count > m_size / 4) 00219 LogWarning() << "HashMap: table becoming full..." << '\n'; 00220 00221 // Atomic: get offset into allocated memory and increment m_count. 00222 int offset = __sync_fetch_and_add(&m_count, 1); 00223 00224 // Copy data over 00225 m_allocated[offset].key = key; 00226 m_allocated[offset].value = value; 00227 00228 // Find an empty slot 00229 hash_t index = key; 00230 while (true) 00231 { 00232 index &= m_mask; 00233 // Atomic: grab slot if unused 00234 if (__sync_bool_compare_and_swap(&m_used[index], EMPTY_SLOT, offset)) 00235 break; 00236 index++; 00237 } 00238 } 00239 00240 template<typename T> 00241 void HashMap<T>::clear() 00242 { 00243 m_count = 0; 00244 for (unsigned i = 0; i < m_size; ++i) 00245 m_used[i] = EMPTY_SLOT; 00246 } 00247 00248 template<typename T> 00249 void HashMap<T>::CopyFrom(const HashMap<T>& other) 00250 { 00251 assert(m_size == other.m_size); 00252 m_count = other.m_count; 00253 for (unsigned i = 0; i < m_size; ++i) 00254 { 00255 m_used[i] = other.m_used[i]; 00256 m_allocated[i] = other.m_allocated[i]; 00257 } 00258 } 00259 00260 template<typename T> 00261 void HashMap<T>::operator=(const HashMap<T>& other) 00262 { 00263 CopyFrom(other); 00264 } 00265 00266 //---------------------------------------------------------------------------- 00267 00268 _END_BENZENE_NAMESPACE_ 00269 00270 #endif // HASHMAP_H