Rev 154 | Details | Compare with Previous | Last modification | View Log | RSS feed
| Rev | Author | Line No. | Line |
|---|---|---|---|
| 33 | pmbaty | 1 | #include <math.h> |
| 2 | #include <time.h> |
||
| 3 | #include "chess.h" |
||
| 4 | #include "data.h" |
||
| 5 | #if defined(UNIX) |
||
| 6 | # include <unistd.h> |
||
| 7 | #endif |
||
| 8 | |||
| 9 | /* last modified 02/24/14 */ |
||
| 10 | /* |
||
| 11 | ******************************************************************************* |
||
| 12 | * * |
||
| 13 | * LearnAdjust() us used to scale the learn value, which can be used to * |
||
| 14 | * limit the aggressiveness of the learning algorithm. All we do here is * |
||
| 15 | * divide the learn value passed in by "learning / 10". * |
||
| 16 | * * |
||
| 17 | ******************************************************************************* |
||
| 18 | */ |
||
| 19 | int LearnAdjust(int value) { |
||
| 20 | |||
| 21 | if (learning / 10 > 0) |
||
| 22 | return value / (learning / 10); |
||
| 23 | else |
||
| 24 | return 0; |
||
| 25 | } |
||
| 26 | |||
| 27 | /* last modified 02/24/14 */ |
||
| 28 | /* |
||
| 29 | ******************************************************************************* |
||
| 30 | * * |
||
| 31 | * LearnBook() is used to update the book database when a game ends for any * |
||
| 32 | * reason. It uses the global "learn_value" variable and updates the book * |
||
| 33 | * based on the moves played and the value that was "learned". * |
||
| 34 | * * |
||
| 35 | * The global learn_value has two possible sources. If a game ends with a * |
||
| 36 | * real result (win, lose or draw) then the learrn_value will be set to a * |
||
| 37 | * number in the interval {-300, 300} depending on the result. If there is * |
||
| 38 | * no result (the operator exits the program prior to reaching a conclusion * |
||
| 39 | * (quit, end, ^C) then we will use the values from the first few searches * |
||
| 40 | * after leaving book to compute a learrn_value (see LearnValue() comments * |
||
| 41 | * later in this file). * |
||
| 42 | * * |
||
| 43 | ******************************************************************************* |
||
| 44 | */ |
||
| 45 | void LearnBook() { |
||
| 108 | pmbaty | 46 | float book_learn[64], t_learn_value; |
| 47 | int nplies = 0, thisply = 0, i, j, v, cluster; |
||
| 33 | pmbaty | 48 | unsigned char buf32[4]; |
| 49 | |||
| 50 | /* |
||
| 51 | ************************************************************ |
||
| 52 | * * |
||
| 53 | * If we have not been "out of book" for N moves, all we * |
||
| 54 | * we need to do is take the search evaluation for the * |
||
| 55 | * search just completed and tuck it away in the book * |
||
| 56 | * learning array (book_learn_eval[]) for use later. * |
||
| 57 | * * |
||
| 58 | ************************************************************ |
||
| 59 | */ |
||
| 60 | if (!book_file) |
||
| 61 | return; |
||
| 62 | if (!learn) |
||
| 63 | return; |
||
| 64 | if (Abs(learn_value) != learning) |
||
| 65 | learn_value = LearnAdjust(learn_value); |
||
| 66 | learn = 0; |
||
| 108 | pmbaty | 67 | Print(32, "LearnBook() updating book database\n"); |
| 33 | pmbaty | 68 | /* |
| 69 | ************************************************************ |
||
| 70 | * * |
||
| 71 | * Now we build a vector of book learning results. We * |
||
| 72 | * give every book move below the last point where there * |
||
| 73 | * were alternatives 100% of the learned score. We give * |
||
| 74 | * the book move played at that point 100% of the learned * |
||
| 75 | * score as well. Then we divide the learned score by the * |
||
| 76 | * number of alternatives, and propagate this score back * |
||
| 77 | * until there was another alternative, where we do this * |
||
| 78 | * again and again until we reach the top of the book * |
||
| 79 | * tree. * |
||
| 80 | * * |
||
| 81 | ************************************************************ |
||
| 82 | */ |
||
| 156 | pmbaty | 83 | t_learn_value = (float) (((float) learn_value) / 100.0); // Pierre-Marie Baty -- added type cast |
| 33 | pmbaty | 84 | for (i = 0; i < 64; i++) |
| 85 | if (learn_nmoves[i] > 1) |
||
| 86 | nplies++; |
||
| 87 | nplies = Max(nplies, 1); |
||
| 88 | for (i = 0; i < 64; i++) { |
||
| 89 | if (learn_nmoves[i] > 1) |
||
| 90 | thisply++; |
||
| 91 | book_learn[i] = t_learn_value * (float) thisply / (float) nplies; |
||
| 92 | } |
||
| 93 | /* |
||
| 94 | ************************************************************ |
||
| 95 | * * |
||
| 96 | * Now find the appropriate cluster, find the key we were * |
||
| 97 | * passed, and update the resulting learn value. * |
||
| 98 | * * |
||
| 99 | ************************************************************ |
||
| 100 | */ |
||
| 101 | for (i = 0; i < 64 && learn_seekto[i]; i++) { |
||
| 102 | if (learn_seekto[i] > 0) { |
||
| 103 | fseek(book_file, learn_seekto[i], SEEK_SET); |
||
| 108 | pmbaty | 104 | v = fread(buf32, 4, 1, book_file); |
| 105 | if (v <= 0) |
||
| 106 | perror("Learn() fread error: "); |
||
| 33 | pmbaty | 107 | cluster = BookIn32(buf32); |
| 108 | pmbaty | 108 | if (cluster) |
| 109 | BookClusterIn(book_file, cluster, book_buffer); |
||
| 33 | pmbaty | 110 | for (j = 0; j < cluster; j++) |
| 111 | if (!(learn_key[i] ^ book_buffer[j].position)) |
||
| 112 | break; |
||
| 113 | if (j >= cluster) |
||
| 114 | return; |
||
| 115 | if (fabs(book_buffer[j].learn) < 0.0001) |
||
| 116 | book_buffer[j].learn = book_learn[i]; |
||
| 117 | else |
||
| 156 | pmbaty | 118 | book_buffer[j].learn = (float) ((book_buffer[j].learn + book_learn[i]) / 2.0); // Pierre-Marie Baty -- added type cast |
| 33 | pmbaty | 119 | fseek(book_file, learn_seekto[i] + 4, SEEK_SET); |
| 108 | pmbaty | 120 | if (cluster) |
| 121 | BookClusterOut(book_file, cluster, book_buffer); |
||
| 33 | pmbaty | 122 | fflush(book_file); |
| 123 | } |
||
| 124 | } |
||
| 125 | } |
||
| 126 | |||
| 127 | /* last modified 02/24/14 */ |
||
| 128 | /* |
||
| 129 | ******************************************************************************* |
||
| 130 | * * |
||
| 131 | * LearnFunction() is called to compute the adjustment value added to the * |
||
| 132 | * learn counter in the opening book. It takes three pieces of information * |
||
| 133 | * into consideration to do this: the search value, the search depth that * |
||
| 134 | * produced this value, and the rating difference (Crafty-opponent) so that * |
||
| 135 | * + numbers means Crafty is expected to win, - numbers mean Crafty is ex- * |
||
| 136 | * pected to lose. * |
||
| 137 | * * |
||
| 138 | ******************************************************************************* |
||
| 139 | */ |
||
| 140 | int LearnFunction(int sv, int search_depth, int rating_difference, |
||
| 141 | int trusted_value) { |
||
| 108 | pmbaty | 142 | float multiplier; |
| 156 | pmbaty | 143 | static const float rating_mult_t[11] = { .00625f, .0125f, .025f, .05f, .075f, .1f, // Pierre-Marie Baty -- fixed constant truncations |
| 144 | 0.15f, 0.2f, 0.25f, 0.3f, 0.35f |
||
| 33 | pmbaty | 145 | }; |
| 156 | pmbaty | 146 | static const float rating_mult_ut[11] = { .25f, .2f, .15f, .1f, .05f, .025f, .012f, // Pierre-Marie Baty -- added type cast |
| 147 | .006f, .003f, .001f |
||
| 33 | pmbaty | 148 | }; |
| 149 | int sd, rd, value; |
||
| 150 | |||
| 151 | sd = Max(Min(search_depth - 10, 19), 0); |
||
| 152 | rd = Max(Min(rating_difference / 200, 5), -5) + 5; |
||
| 153 | if (trusted_value) |
||
| 154 | multiplier = rating_mult_t[rd] * sd; |
||
| 155 | else |
||
| 156 | multiplier = rating_mult_ut[rd] * sd; |
||
| 157 | sv = Max(Min(sv, 5 * learning), -5 * learning); |
||
| 158 | value = (int) (sv * multiplier); |
||
| 159 | return value; |
||
| 160 | } |
||
| 161 | |||
| 162 | /* last modified 02/24/14 */ |
||
| 163 | /* |
||
| 164 | ******************************************************************************* |
||
| 165 | * * |
||
| 166 | * LearnValue() is used to monitor the scores over the first N moves out of * |
||
| 167 | * book. After these moves have been played, the evaluations are then used * |
||
| 168 | * to decide whether the last book move played was a reasonable choice or * |
||
| 169 | * not. (N is set by the #define LEARN_INTERVAL definition.) * |
||
| 170 | * * |
||
| 171 | * This procedure does not directly update the book. Rather, it sets the * |
||
| 172 | * global learn_value variable to represent the goodness or badness of the * |
||
| 173 | * position where we left the opening book. This will be used later to * |
||
| 174 | * update the book in the event the game ends without any sort of actual * |
||
| 175 | * result. In a normal situation, we will base our learning on the result * |
||
| 176 | * of the game, win-lose-draw. But it is possible that the game ends before * |
||
| 177 | * the final result is known. In this case, we will use the score from the * |
||
| 178 | * learn_value we compute here so that we learn _something_ from playing a * |
||
| 179 | * game fragment. * |
||
| 180 | * * |
||
| 181 | * There are three cases to be handled. (1) If the evaluation is bad right * |
||
| 182 | * out of book, or it drops enough to be considered a bad line, then the * |
||
| 183 | * book move will have its "learn" value reduced to discourage playing this * |
||
| 184 | * move again. (2) If the evaluation is even after N moves, then the learn * |
||
| 185 | * value will be increased, but by a relatively modest amount, so that a few * |
||
| 186 | * even results will offset one bad result. (3) If the evaluation is very * |
||
| 187 | * good after N moves, the learn value will be increased by a large amount * |
||
| 188 | * so that this move will be favored the next time the game is played. * |
||
| 189 | * * |
||
| 190 | ******************************************************************************* |
||
| 191 | */ |
||
| 192 | void LearnValue(int search_value, int search_depth) { |
||
| 108 | pmbaty | 193 | int i, interval; |
| 33 | pmbaty | 194 | int best_eval = -999999, best_eval_p = 0; |
| 195 | int worst_eval = 999999, worst_eval_p = 0; |
||
| 196 | int best_after_worst_eval = -999999, worst_after_best_eval = 999999; |
||
| 197 | |||
| 198 | /* |
||
| 199 | ************************************************************ |
||
| 200 | * * |
||
| 201 | * If we have not been "out of book" for N moves, all we * |
||
| 202 | * need to do is take the search evaluation for the search * |
||
| 203 | * just completed and tuck it away in the book learning * |
||
| 204 | * array (book_learn_eval[]) for use later. * |
||
| 205 | * * |
||
| 206 | ************************************************************ |
||
| 207 | */ |
||
| 208 | if (!book_file) |
||
| 209 | return; |
||
| 210 | if (!learn || learn_value != 0) |
||
| 211 | return; |
||
| 212 | if (moves_out_of_book <= LEARN_INTERVAL) { |
||
| 213 | if (moves_out_of_book) { |
||
| 214 | book_learn_eval[moves_out_of_book - 1] = search_value; |
||
| 215 | book_learn_depth[moves_out_of_book - 1] = search_depth; |
||
| 216 | } |
||
| 217 | } |
||
| 218 | /* |
||
| 219 | ************************************************************ |
||
| 220 | * * |
||
| 221 | * Check the evaluations we've seen so far. If they are * |
||
| 222 | * within reason (+/- 1/3 of a pawn or so) we simply keep * |
||
| 223 | * playing and leave the book alone. If the eval is much * |
||
| 224 | * better or worse, we need to update the learning data. * |
||
| 225 | * * |
||
| 226 | ************************************************************ |
||
| 227 | */ |
||
| 228 | else if (moves_out_of_book == LEARN_INTERVAL + 1) { |
||
| 229 | if (moves_out_of_book < 1) |
||
| 230 | return; |
||
| 231 | interval = Min(LEARN_INTERVAL, moves_out_of_book); |
||
| 232 | if (interval < 2) |
||
| 233 | return; |
||
| 234 | for (i = 0; i < interval; i++) { |
||
| 235 | if (book_learn_eval[i] > best_eval) { |
||
| 236 | best_eval = book_learn_eval[i]; |
||
| 237 | best_eval_p = i; |
||
| 238 | } |
||
| 239 | if (book_learn_eval[i] < worst_eval) { |
||
| 240 | worst_eval = book_learn_eval[i]; |
||
| 241 | worst_eval_p = i; |
||
| 242 | } |
||
| 243 | } |
||
| 244 | if (best_eval_p < interval - 1) { |
||
| 245 | for (i = best_eval_p; i < interval; i++) |
||
| 246 | if (book_learn_eval[i] < worst_after_best_eval) |
||
| 247 | worst_after_best_eval = book_learn_eval[i]; |
||
| 248 | } else |
||
| 249 | worst_after_best_eval = book_learn_eval[interval - 1]; |
||
| 250 | if (worst_eval_p < interval - 1) { |
||
| 251 | for (i = worst_eval_p; i < interval; i++) |
||
| 252 | if (book_learn_eval[i] > best_after_worst_eval) |
||
| 253 | best_after_worst_eval = book_learn_eval[i]; |
||
| 254 | } else |
||
| 255 | best_after_worst_eval = book_learn_eval[interval - 1]; |
||
| 256 | /* |
||
| 257 | ************************************************************ |
||
| 258 | * * |
||
| 259 | * We now have the best eval for the first N moves out of * |
||
| 260 | * book, the worst eval for the first N moves out of book, * |
||
| 261 | * and the worst eval that follows the best eval. This * |
||
| 262 | * will be used to recognize the following cases of * |
||
| 263 | * results that follow a book move: * |
||
| 264 | * * |
||
| 265 | ************************************************************ |
||
| 266 | */ |
||
| 267 | /* |
||
| 268 | ************************************************************ |
||
| 269 | * * |
||
| 270 | * (1) The best score is very good, and it doesn't drop * |
||
| 271 | * after following the game further. This case detects * |
||
| 272 | * those moves in book that are "good" and should be * |
||
| 273 | * played whenever possible, while avoiding the sound * |
||
| 274 | * gambits that leave us ahead in material for a short * |
||
| 275 | * while until the score starts to drop as the gambit * |
||
| 276 | * begins to show its effect. * |
||
| 277 | * * |
||
| 278 | ************************************************************ |
||
| 279 | */ |
||
| 280 | if (best_eval == best_after_worst_eval) { |
||
| 281 | learn_value = best_eval; |
||
| 282 | for (i = 0; i < interval; i++) |
||
| 283 | if (learn_value == book_learn_eval[i]) |
||
| 284 | search_depth = Max(search_depth, book_learn_depth[i]); |
||
| 285 | } |
||
| 286 | /* |
||
| 287 | ************************************************************ |
||
| 288 | * * |
||
| 289 | * (2) The worst score is bad, and doesn't improve any * |
||
| 290 | * after the worst point, indicating that the book move * |
||
| 291 | * chosen was "bad" and should be avoided in the future. * |
||
| 292 | * * |
||
| 293 | ************************************************************ |
||
| 294 | */ |
||
| 295 | else if (worst_eval == worst_after_best_eval) { |
||
| 296 | learn_value = worst_eval; |
||
| 297 | for (i = 0; i < interval; i++) |
||
| 298 | if (learn_value == book_learn_eval[i]) |
||
| 299 | search_depth = Max(search_depth, book_learn_depth[i]); |
||
| 300 | } |
||
| 301 | /* |
||
| 302 | ************************************************************ |
||
| 303 | * * |
||
| 304 | * (3) Things seem even out of book and remain that way * |
||
| 305 | * for N moves. We will just average the 10 scores and * |
||
| 306 | * use that as an approximation. * |
||
| 307 | * * |
||
| 308 | ************************************************************ |
||
| 309 | */ |
||
| 310 | else { |
||
| 311 | learn_value = 0; |
||
| 312 | search_depth = 0; |
||
| 313 | for (i = 0; i < interval; i++) { |
||
| 314 | learn_value += book_learn_eval[i]; |
||
| 315 | search_depth += book_learn_depth[i]; |
||
| 316 | } |
||
| 317 | learn_value /= interval; |
||
| 318 | search_depth /= interval; |
||
| 319 | } |
||
| 320 | learn_value = |
||
| 321 | LearnFunction(learn_value, search_depth, |
||
| 322 | crafty_rating - opponent_rating, learn_value < 0); |
||
| 323 | } |
||
| 324 | } |