00001 //---------------------------------------------------------------------------- 00002 /** @file HashSet.hpp 00003 */ 00004 //---------------------------------------------------------------------------- 00005 00006 #ifndef HASHSET_H 00007 #define HASHSET_H 00008 00009 #include <boost/scoped_array.hpp> 00010 00011 #include "Benzene.hpp" 00012 #include "Hash.hpp" 00013 #include <cassert> 00014 00015 _BEGIN_BENZENE_NAMESPACE_ 00016 00017 //---------------------------------------------------------------------------- 00018 00019 /** HashSet with 2^n slots. 00020 00021 Deletes and dynamic resizing are not supported. Not thread-safe. 00022 Performs simple linear probing on hash collisions. 00023 00024 Uses a changelog for quick clears O(n) in number of added entries. 00025 */ 00026 class HashSet 00027 { 00028 public: 00029 /** Constructs a hashset with 2^bits slots. */ 00030 HashSet(unsigned bits); 00031 00032 /** Copy constructor. */ 00033 HashSet(const HashSet& other); 00034 00035 /** Destructor. */ 00036 ~HashSet(); 00037 00038 /** Returns the lg2 of the number of slots. */ 00039 unsigned bits() const; 00040 00041 /** Returns the number of slots. */ 00042 unsigned size() const; 00043 00044 /** Returns number of objects stored. */ 00045 unsigned count() const; 00046 00047 /** Returns true if key is in set. */ 00048 bool exists(hash_t key) const; 00049 00050 /** Adds a key. 00051 00052 WILL ABORT IF TABLE IS FULL! 00053 00054 Issues a warning if table becomes 1/4th full. 00055 00056 Make sure the table is much larger (at least 4x larger) than 00057 the maximum amount of data you ever expect to store in this 00058 table. This is to reduce the number of probes on subsequent 00059 get() calls, as well as to avoid the need for dynamic resizing 00060 of the table. 00061 */ 00062 void add(hash_t key); 00063 00064 /** Clears the table. */ 00065 void clear(); 00066 00067 /** Copys other's data, overwriting everything in this table. */ 00068 void operator=(const HashSet& other); 00069 00070 private: 00071 00072 static const int EMPTY_SLOT = -1; 00073 00074 struct Data 00075 { 00076 hash_t key; 00077 }; 00078 00079 /** See bits() */ 00080 unsigned m_bits; 00081 00082 /** See size() */ 00083 unsigned m_size; 00084 00085 /** Equal to size() - 1. Used for bitmasking operations. */ 00086 unsigned m_mask; 00087 00088 /** See count() */ 00089 unsigned m_count; 00090 00091 /** Index into m_allocated at which data for this slot is located; 00092 index is equal to EMPTY_SLOT if unused. 00093 */ 00094 boost::scoped_array<int> m_used; 00095 00096 /** Allocated space for entries in the table. */ 00097 boost::scoped_array<Data> m_allocated; 00098 00099 /** History of slots that have been filled. */ 00100 std::vector<int> m_changelog; 00101 00102 void CopyFrom(const HashSet& other); 00103 }; 00104 00105 inline HashSet::HashSet(unsigned bits) 00106 : m_bits(bits), 00107 m_size(1 << bits), 00108 m_mask(m_size - 1), 00109 m_count(0), 00110 m_used(new int[m_size]), 00111 m_allocated(new Data[m_size]), 00112 m_changelog() 00113 { 00114 m_changelog.reserve(m_size); 00115 for (unsigned i = 0; i < m_size; ++i) 00116 m_used[i] = EMPTY_SLOT; 00117 } 00118 00119 inline HashSet::HashSet(const HashSet& other) 00120 : m_bits(other.m_bits), 00121 m_size(other.m_size), 00122 m_mask(other.m_mask), 00123 m_count(other.m_count), 00124 m_used(new int[m_size]), 00125 m_allocated(new Data[m_size]), 00126 m_changelog() 00127 { 00128 m_changelog.reserve(m_size); 00129 CopyFrom(other); 00130 } 00131 00132 inline HashSet::~HashSet() 00133 { 00134 } 00135 00136 inline unsigned HashSet::bits() const 00137 { 00138 return m_bits; 00139 } 00140 00141 inline unsigned HashSet::size() const 00142 { 00143 return m_size; 00144 } 00145 00146 inline unsigned HashSet::count() const 00147 { 00148 return m_count; 00149 } 00150 00151 /** Performs linear probing to find key. */ 00152 inline bool HashSet::exists(hash_t key) const 00153 { 00154 // Search for slot containing this key. 00155 // Limit number of probes to a single pass: Table should never 00156 // fill up, but I'm being extra paranoid anyway. 00157 hash_t index = key; 00158 for (int guard = m_size; guard > 0; --guard) 00159 { 00160 index &= m_mask; 00161 if (m_used[index] == EMPTY_SLOT) 00162 return false; 00163 if (m_used[index] != EMPTY_SLOT 00164 && m_allocated[m_used[index]].key == key) 00165 { 00166 return true; 00167 } 00168 index++; 00169 } 00170 return false; 00171 } 00172 00173 inline void HashSet::add(hash_t key) 00174 { 00175 if (m_count > m_size) 00176 { 00177 std::cerr << "HashSet: Full! Uhh oh..." << '\n'; 00178 abort(); 00179 } 00180 else if (m_count > m_size / 4) 00181 std::cerr << "HashSet: table becoming full..." << '\n'; 00182 00183 int offset = m_count++; 00184 m_allocated[offset].key = key; 00185 00186 // Find an empty slot 00187 hash_t index = key; 00188 while (true) 00189 { 00190 index &= m_mask; 00191 if (m_used[index] == EMPTY_SLOT) 00192 { 00193 m_used[index] = offset; 00194 m_changelog.push_back(index); 00195 break; 00196 } 00197 index++; 00198 } 00199 } 00200 00201 inline void HashSet::clear() 00202 { 00203 for (unsigned i = 0; i < m_changelog.size(); ++i) 00204 m_used[m_changelog[i]] = EMPTY_SLOT; 00205 m_changelog.clear(); 00206 m_count = 0; 00207 } 00208 00209 inline void HashSet::CopyFrom(const HashSet& other) 00210 { 00211 assert(m_size == other.m_size); 00212 m_count = other.m_count; 00213 for (unsigned i = 0; i < m_size; ++i) 00214 { 00215 m_used[i] = other.m_used[i]; 00216 m_allocated[i] = other.m_allocated[i]; 00217 } 00218 m_changelog = other.m_changelog; 00219 } 00220 00221 inline void HashSet::operator=(const HashSet& other) 00222 { 00223 CopyFrom(other); 00224 } 00225 00226 //---------------------------------------------------------------------------- 00227 00228 _END_BENZENE_NAMESPACE_ 00229 00230 #endif // HASHMAP_H