Main   Namespaces   Classes   Hierarchy   Annotated   Files   Compound   Global   Pages  

HashSet.hpp

Go to the documentation of this file.
00001 //----------------------------------------------------------------------------
00002 /** @file HashSet.hpp
00003  */
00004 //----------------------------------------------------------------------------
00005 
00006 #ifndef HASHSET_H
00007 #define HASHSET_H
00008 
00009 #include <boost/scoped_array.hpp>
00010 
00011 #include "Benzene.hpp"
00012 #include "Hash.hpp"
00013 #include <cassert>
00014 
00015 _BEGIN_BENZENE_NAMESPACE_
00016 
00017 //----------------------------------------------------------------------------
00018 
00019 /** HashSet with 2^n slots. 
00020 
00021     Deletes and dynamic resizing are not supported.  Not thread-safe. 
00022     Performs simple linear probing on hash collisions.
00023 
00024     Uses a changelog for quick clears O(n) in number of added entries.
00025 */
00026 class HashSet
00027 {
00028 public:
00029     /** Constructs a hashset with 2^bits slots. */
00030     HashSet(unsigned bits);
00031 
00032     /** Copy constructor. */
00033     HashSet(const HashSet& other);
00034 
00035     /** Destructor. */
00036     ~HashSet();
00037 
00038     /** Returns the lg2 of the number of slots. */
00039     unsigned bits() const;
00040 
00041     /** Returns the number of slots. */
00042     unsigned size() const;
00043 
00044     /** Returns number of objects stored. */
00045     unsigned count() const;
00046 
00047     /** Returns true if key is in set. */
00048     bool exists(hash_t key) const;
00049 
00050     /** Adds a key. 
00051      
00052         WILL ABORT IF TABLE IS FULL!
00053 
00054         Issues a warning if table becomes 1/4th full. 
00055 
00056         Make sure the table is much larger (at least 4x larger) than
00057         the maximum amount of data you ever expect to store in this
00058         table. This is to reduce the number of probes on subsequent
00059         get() calls, as well as to avoid the need for dynamic resizing
00060         of the table.
00061      */
00062     void add(hash_t key);
00063     
00064     /** Clears the table. */
00065     void clear();
00066 
00067     /** Copys other's data, overwriting everything in this table. */
00068     void operator=(const HashSet& other);
00069 
00070 private:    
00071     
00072     static const int EMPTY_SLOT = -1;
00073 
00074     struct Data
00075     {
00076         hash_t key;
00077     };
00078 
00079     /** See bits() */
00080     unsigned m_bits;
00081 
00082     /** See size() */
00083     unsigned m_size;
00084 
00085     /** Equal to size() - 1. Used for bitmasking operations. */
00086     unsigned m_mask;
00087 
00088     /** See count() */
00089     unsigned m_count;
00090 
00091     /** Index into m_allocated at which data for this slot is located;
00092         index is equal to EMPTY_SLOT if unused.
00093      */
00094     boost::scoped_array<int> m_used;
00095 
00096     /** Allocated space for entries in the table. */
00097     boost::scoped_array<Data> m_allocated;
00098 
00099     /** History of slots that have been filled. */
00100     std::vector<int> m_changelog;
00101 
00102     void CopyFrom(const HashSet& other);
00103 };
00104 
00105 inline HashSet::HashSet(unsigned bits)
00106     : m_bits(bits),
00107       m_size(1 << bits),
00108       m_mask(m_size - 1),
00109       m_count(0),
00110       m_used(new int[m_size]),
00111       m_allocated(new Data[m_size]),
00112       m_changelog()
00113 {
00114     m_changelog.reserve(m_size);
00115     for (unsigned i = 0; i < m_size; ++i)
00116         m_used[i] = EMPTY_SLOT;
00117 }
00118 
00119 inline HashSet::HashSet(const HashSet& other)
00120     : m_bits(other.m_bits),
00121       m_size(other.m_size),
00122       m_mask(other.m_mask),
00123       m_count(other.m_count),
00124       m_used(new int[m_size]),
00125       m_allocated(new Data[m_size]),
00126       m_changelog()
00127 {
00128     m_changelog.reserve(m_size);
00129     CopyFrom(other);
00130 }
00131 
00132 inline HashSet::~HashSet()
00133 {
00134 }
00135 
00136 inline unsigned HashSet::bits() const
00137 {
00138     return m_bits;
00139 }
00140 
00141 inline unsigned HashSet::size() const
00142 {
00143     return m_size;
00144 }
00145 
00146 inline unsigned HashSet::count() const
00147 {
00148     return m_count;
00149 }
00150 
00151 /** Performs linear probing to find key. */
00152 inline bool HashSet::exists(hash_t key) const
00153 {
00154     // Search for slot containing this key.
00155     // Limit number of probes to a single pass: Table should never
00156     // fill up, but I'm being extra paranoid anyway.
00157     hash_t index = key;
00158     for (int guard = m_size; guard > 0; --guard)
00159     {
00160         index &= m_mask;
00161         if (m_used[index] == EMPTY_SLOT)
00162             return false;
00163         if (m_used[index] != EMPTY_SLOT
00164             && m_allocated[m_used[index]].key == key)
00165         {
00166             return true;
00167         }
00168         index++;
00169     }
00170     return false;
00171 }
00172 
00173 inline void HashSet::add(hash_t key)
00174 {
00175     if (m_count > m_size)
00176     {
00177         std::cerr << "HashSet: Full! Uhh oh..." << '\n';
00178         abort();
00179     }
00180     else if (m_count > m_size / 4)
00181         std::cerr << "HashSet: table becoming full..." << '\n';
00182 
00183     int offset = m_count++;
00184     m_allocated[offset].key = key;
00185 
00186     // Find an empty slot
00187     hash_t index = key;
00188     while (true)
00189     {
00190         index &= m_mask;
00191         if (m_used[index] == EMPTY_SLOT)
00192         {
00193             m_used[index] = offset;
00194             m_changelog.push_back(index);
00195             break;
00196         }
00197         index++;
00198     }
00199 }
00200 
00201 inline void HashSet::clear()
00202 {
00203     for (unsigned i = 0; i < m_changelog.size(); ++i)
00204         m_used[m_changelog[i]] = EMPTY_SLOT;
00205     m_changelog.clear();
00206     m_count = 0;
00207 }
00208 
00209 inline void HashSet::CopyFrom(const HashSet& other)
00210 {
00211     assert(m_size == other.m_size);
00212     m_count = other.m_count;
00213     for (unsigned i = 0; i < m_size; ++i)
00214     {
00215         m_used[i] = other.m_used[i];
00216         m_allocated[i] = other.m_allocated[i];
00217     }
00218     m_changelog = other.m_changelog;
00219 }
00220 
00221 inline void HashSet::operator=(const HashSet& other)
00222 {
00223     CopyFrom(other);
00224 }
00225 
00226 //----------------------------------------------------------------------------
00227 
00228 _END_BENZENE_NAMESPACE_
00229 
00230 #endif // HASHMAP_H


6 Jan 2011 Doxygen 1.6.3