Main   Namespaces   Classes   Hierarchy   Annotated   Files   Compound   Global   Pages  

HashMap.hpp

Go to the documentation of this file.
00001 //----------------------------------------------------------------------------
00002 /** @file HashMap.hpp
00003     Contains a thread-safe lock-free constant-size hash map.
00004  */
00005 //----------------------------------------------------------------------------
00006 
00007 #ifndef HASHMAP_H
00008 #define HASHMAP_H
00009 
00010 #include <boost/concept_check.hpp>
00011 #include <boost/scoped_array.hpp>
00012 
00013 #include "Benzene.hpp"
00014 #include "Hash.hpp"
00015 #include "Logger.hpp"
00016 #include <cassert>
00017 
00018 _BEGIN_BENZENE_NAMESPACE_
00019 
00020 //----------------------------------------------------------------------------
00021 
00022 /** Concept of a state in a hash table. */
00023 template<class T>
00024 struct HashMapStateConcept
00025 {
00026     void constraints() 
00027     {
00028         boost::function_requires< boost::DefaultConstructibleConcept<T> >();
00029         boost::function_requires< boost::AssignableConcept<T> >();
00030     }
00031 };
00032 
00033 //----------------------------------------------------------------------------
00034 
00035 /** Lock-free HashMap with 2^n slots. 
00036 
00037     Deletes and dynamic resizing are not supported.  Thread-safe, so
00038     multiple threads can read/write data at the same time. Performs
00039     simple linear probing on hash collisions.
00040 
00041     Requires gcc builtins for atomic access.
00042 
00043     References:
00044     - Gao, H, Groote J.F., Hesselink W.H: 
00045     <a href="http://www.cs.rug.nl/~wim/mechver/hashtable/index.html"
00046     Lock-free Dynamic Hash table with Open Addressing</a>
00047     - Cliff Click Jr:
00048     <a href="http://blogs.azulsystems.com/cliff/2007/03/a_nonblocking_h.html"
00049     A Non-Blocking HashTable</a>
00050 */
00051 template<typename T>
00052 class HashMap
00053 {
00054     BOOST_CLASS_REQUIRE(T, benzene, HashMapStateConcept);
00055 
00056 public:
00057 
00058     /** Constructs a hashmap with 2^bits slots. */
00059     HashMap(unsigned bits);
00060 
00061     /** Copy constructor. */
00062     HashMap(const HashMap<T>& other);
00063 
00064     /** Destructor. */
00065     ~HashMap();
00066 
00067     /** Returns the lg2 of the number of slots. */
00068     unsigned bits() const;
00069 
00070     /** Returns the number of slots. */
00071     unsigned size() const;
00072 
00073     /** Returns number of objects stored. */
00074     unsigned count() const;
00075 
00076     /** Retrieves object with key. Returns true on success. */
00077     bool get(hash_t key, T& out) const;
00078 
00079     /** Stores a (key, object) pair. 
00080      
00081         WILL ABORT IF TABLE IS FULL!
00082 
00083         Issues a warning if table becomes 1/4th full. 
00084 
00085         Make sure the table is much larger (at least 4x larger) than
00086         the maximum amount of data you ever expect to store in this
00087         table. This is to reduce the number of probes on subsequent
00088         get() calls, as well as to avoid the need for dynamic resizing
00089         of the table.
00090 
00091         This function can possibly create duplicate entries. This is
00092         because put() does not check the key of the slots it runs over
00093         while searching for an empty slot. So if two threads both
00094         write try to put() k1 at the same time, each will write to its
00095         own slot. Only the first slot written will ever be used by
00096         get().
00097      */
00098     void put(hash_t key, const T& obj);
00099     
00100     /** Clears the table. */
00101     void clear();
00102 
00103     /** Copys other's data, overwriting everything in this table. */
00104     void operator=(const HashMap<T>& other);
00105 
00106 private:    
00107     
00108     static const int EMPTY_SLOT = -1;
00109 
00110     struct Data
00111     {
00112         hash_t key;
00113         T value;
00114     };
00115 
00116     /** See bits() */
00117     unsigned m_bits;
00118 
00119     /** See size() */
00120     unsigned m_size;
00121 
00122     /** Equal to size() - 1. Used for bitmasking operations. */
00123     unsigned m_mask;
00124 
00125     /** See count() */
00126     volatile unsigned m_count;
00127 
00128     /** Index into m_allocated at which data for this slot is located;
00129         index is equal to EMPTY_SLOT if unused.
00130      */
00131     boost::scoped_array<volatile int> m_used;
00132 
00133     /** Allocated space for entries in the table. */
00134     boost::scoped_array<Data> m_allocated;
00135 
00136     void CopyFrom(const HashMap<T>& other);
00137 };
00138 
00139 template<typename T>
00140 HashMap<T>::HashMap(unsigned bits)
00141     : m_bits(bits),
00142       m_size(1 << bits),
00143       m_mask(m_size - 1),
00144       m_count(0),
00145       m_used(new volatile int[m_size]),
00146       m_allocated(new Data[m_size])
00147 {
00148     clear();
00149 }
00150 
00151 template<typename T>
00152 HashMap<T>::HashMap(const HashMap<T>& other)
00153     : m_bits(other.m_bits),
00154       m_size(other.m_size),
00155       m_mask(other.m_mask),
00156       m_count(other.m_count),
00157       m_used(new volatile int[m_size]),
00158       m_allocated(new Data[m_size])
00159 {
00160     CopyFrom(other);
00161 }
00162 
00163 template<typename T>
00164 HashMap<T>::~HashMap()
00165 {
00166 }
00167 
00168 template<typename T>
00169 inline unsigned HashMap<T>::bits() const
00170 {
00171     return m_bits;
00172 }
00173 
00174 template<typename T>
00175 inline unsigned HashMap<T>::size() const
00176 {
00177     return m_size;
00178 }
00179 
00180 template<typename T>
00181 inline unsigned HashMap<T>::count() const
00182 {
00183     return m_count;
00184 }
00185 
00186 /** Performs linear probing to find key. */
00187 template<typename T>
00188 bool HashMap<T>::get(hash_t key, T& out) const
00189 {
00190     // Search for slot containing this key.
00191     // Limit number of probes to a single pass: Table should never
00192     // fill up, but I'm being extra paranoid anyway.
00193     hash_t index = key;
00194     for (int guard = m_size; guard > 0; --guard)
00195     {
00196         index &= m_mask;
00197         if (m_used[index] == EMPTY_SLOT)
00198             return false;
00199         if (m_used[index] != EMPTY_SLOT
00200             && m_allocated[m_used[index]].key == key)
00201         {
00202             out = m_allocated[m_used[index]].value;
00203             return true;
00204         }
00205         index++;
00206     }
00207     return false;
00208 }
00209 
00210 template<typename T>
00211 void HashMap<T>::put(hash_t key, const T& value)
00212 {
00213     if (m_count > m_size)
00214     {
00215         LogSevere() << "HashMap: Full! Uhh oh..." << '\n';
00216         abort();
00217     }
00218     else if (m_count > m_size / 4)
00219         LogWarning() << "HashMap: table becoming full..." << '\n';
00220 
00221     // Atomic: get offset into allocated memory and increment m_count.
00222     int offset = __sync_fetch_and_add(&m_count, 1);
00223 
00224     // Copy data over
00225     m_allocated[offset].key = key;
00226     m_allocated[offset].value = value;
00227 
00228     // Find an empty slot
00229     hash_t index = key;
00230     while (true)
00231     {
00232         index &= m_mask;
00233         // Atomic: grab slot if unused
00234         if (__sync_bool_compare_and_swap(&m_used[index], EMPTY_SLOT, offset))
00235             break;
00236         index++;
00237     }
00238 }
00239 
00240 template<typename T>
00241 void HashMap<T>::clear()
00242 {
00243     m_count = 0;
00244     for (unsigned i = 0; i < m_size; ++i)
00245         m_used[i] = EMPTY_SLOT;
00246 }
00247 
00248 template<typename T>
00249 void HashMap<T>::CopyFrom(const HashMap<T>& other)
00250 {
00251     assert(m_size == other.m_size);
00252     m_count = other.m_count;
00253     for (unsigned i = 0; i < m_size; ++i)
00254     {
00255         m_used[i] = other.m_used[i];
00256         m_allocated[i] = other.m_allocated[i];
00257     }
00258 }
00259 
00260 template<typename T>
00261 void HashMap<T>::operator=(const HashMap<T>& other)
00262 {
00263     CopyFrom(other);
00264 }
00265 
00266 //----------------------------------------------------------------------------
00267 
00268 _END_BENZENE_NAMESPACE_
00269 
00270 #endif // HASHMAP_H


6 Jan 2011 Doxygen 1.6.3