00001 //---------------------------------------------------------------------------- 00002 /** @file HexUctPolicy.hpp 00003 */ 00004 //---------------------------------------------------------------------------- 00005 00006 #ifndef HEXUCTPOLICY_H 00007 #define HEXUCTPOLICY_H 00008 00009 #include "SgSystem.h" 00010 #include "SgRandom.h" 00011 00012 #include "HexUctSearch.hpp" 00013 00014 _BEGIN_BENZENE_NAMESPACE_ 00015 00016 //---------------------------------------------------------------------------- 00017 00018 /** Whether statistics on patterns should be collected or not. This 00019 information is pretty much useless and slows down the search. */ 00020 #define COLLECT_PATTERN_STATISTICS 0 00021 00022 //---------------------------------------------------------------------------- 00023 00024 /** Configuration options for policies. */ 00025 struct HexUctPolicyConfig 00026 { 00027 /** Generate pattern moves. */ 00028 bool patternHeuristic; 00029 00030 /** Play learned responses. */ 00031 bool responseHeuristic; 00032 00033 int pattern_update_radius; 00034 00035 /** Percent chance to check for pattern moves. */ 00036 int pattern_check_percent; 00037 00038 /** Threshold at which the reponse heuristic is used. */ 00039 std::size_t response_threshold; 00040 00041 HexUctPolicyConfig(); 00042 }; 00043 00044 /** Statistics over all threads. */ 00045 struct HexUctPolicyStatistics 00046 { 00047 std::size_t total_moves; 00048 00049 std::size_t random_moves; 00050 00051 std::size_t pattern_moves; 00052 00053 std::map<const Pattern*, size_t> pattern_counts[BLACK_AND_WHITE]; 00054 00055 std::map<const Pattern*, size_t> pattern_picked[BLACK_AND_WHITE]; 00056 00057 HexUctPolicyStatistics() 00058 : total_moves(0), 00059 random_moves(0), 00060 pattern_moves(0) 00061 { } 00062 }; 00063 00064 /** Policy information shared amoung all threads. */ 00065 class HexUctSharedPolicy 00066 { 00067 public: 00068 00069 /** Constructor. */ 00070 HexUctSharedPolicy(); 00071 00072 /** Destructor. */ 00073 ~HexUctSharedPolicy(); 00074 00075 //---------------------------------------------------------------------- 00076 00077 /** Loads patterns from shared directory. */ 00078 void LoadPatterns(); 00079 00080 /** Returns set of patterns used to guide playouts. */ 00081 const HashedPatternSet& PlayPatterns(HexColor color) const; 00082 00083 //---------------------------------------------------------------------- 00084 00085 /** Returns reference to configuration settings controlling all 00086 policies. */ 00087 HexUctPolicyConfig& Config(); 00088 00089 /** Returns constant reference to configuration settings 00090 controlling all policies. */ 00091 const HexUctPolicyConfig& Config() const; 00092 00093 private: 00094 00095 HexUctPolicyConfig m_config; 00096 00097 std::vector<Pattern> m_patterns[BLACK_AND_WHITE]; 00098 00099 HashedPatternSet m_hash_patterns[BLACK_AND_WHITE]; 00100 00101 //---------------------------------------------------------------------- 00102 00103 void LoadPlayPatterns(const std::string& filename); 00104 }; 00105 00106 inline HexUctPolicyConfig& HexUctSharedPolicy::Config() 00107 { 00108 return m_config; 00109 } 00110 00111 inline const HexUctPolicyConfig& HexUctSharedPolicy::Config() const 00112 { 00113 return m_config; 00114 } 00115 00116 inline const HashedPatternSet& 00117 HexUctSharedPolicy::PlayPatterns(HexColor color) const 00118 { 00119 return m_hash_patterns[color]; 00120 } 00121 00122 //---------------------------------------------------------------------------- 00123 00124 /** Generates moves during the random playout phase of UCT search. 00125 Uses local configuration and pattern data in HexUctSharedPolicy. 00126 Everything in this class must be thread-safe. 00127 */ 00128 class HexUctPolicy : public HexUctSearchPolicy 00129 { 00130 public: 00131 00132 /** Constructor. Creates a policy. */ 00133 HexUctPolicy(const HexUctSharedPolicy* shared); 00134 00135 /* Destructor. */ 00136 ~HexUctPolicy(); 00137 00138 /** Implementation of SgUctSearch::GenerateRandomMove(). 00139 - Pattern move (if enabled) 00140 - Purely random 00141 */ 00142 HexPoint GenerateMove(PatternState& pastate, HexColor color, 00143 HexPoint lastMove); 00144 00145 /** Initializes the moves to generate from the empty cells on the 00146 given board. Should be called with the boardstate before any 00147 calls to GenerateMove(). */ 00148 void InitializeForRollout(const StoneBoard& brd); 00149 00150 void InitializeForSearch(); 00151 00152 void AddResponse(HexColor toPlay, HexPoint lastMove, HexPoint response); 00153 00154 #if COLLECT_PATTERN_STATISTICS 00155 /** Returns a string containing formatted statistics 00156 information. */ 00157 std::string DumpStatistics(); 00158 00159 /** Returns the collected statistics. */ 00160 const HexUctPolicyStatistics& Statistics() const; 00161 #endif 00162 00163 private: 00164 00165 static const int MAX_VOTES = 1024; 00166 00167 const HexUctSharedPolicy* m_shared; 00168 00169 std::vector<HexPoint> m_moves; 00170 00171 std::vector<HexPoint> m_response[BLACK_AND_WHITE][BITSETSIZE]; 00172 00173 /** Generator for this policy. */ 00174 SgRandom m_random; 00175 00176 #if COLLECT_PATTERN_STATISTICS 00177 HexUctPolicyStatistics m_statistics; 00178 #endif 00179 00180 //---------------------------------------------------------------------- 00181 00182 HexPoint PickRandomPatternMove(const PatternState& pastate, 00183 const HashedPatternSet& patterns, 00184 HexColor toPlay, 00185 HexPoint lastMove); 00186 00187 HexPoint GeneratePatternMove(const PatternState& pastate, HexColor color, 00188 HexPoint lastMove); 00189 00190 HexPoint GenerateResponseMove(HexColor toPlay, HexPoint lastMove, 00191 const StoneBoard& brd); 00192 00193 HexPoint GenerateRandomMove(const StoneBoard& brd); 00194 }; 00195 00196 inline void HexUctPolicy::AddResponse(HexColor toPlay, HexPoint lastMove, 00197 HexPoint response) 00198 { 00199 if (m_shared->Config().responseHeuristic) 00200 m_response[toPlay][lastMove].push_back(response); 00201 } 00202 00203 #if COLLECT_PATTERN_STATISTICS 00204 inline const HexUctPolicyStatistics& HexUctPolicy::Statistics() const 00205 { 00206 return m_statistics; 00207 } 00208 #endif 00209 00210 //---------------------------------------------------------------------------- 00211 00212 _END_BENZENE_NAMESPACE_ 00213 00214 #endif // HEXUCTPOLICY_H