Subversion Repositories Games.Chess Giants

Rev

Rev 154 | Details | Compare with Previous | Last modification | View Log | RSS feed

Rev Author Line No. Line
33 pmbaty 1
#include <math.h>
2
#include <time.h>
3
#include "chess.h"
4
#include "data.h"
5
#if defined(UNIX)
6
#  include <unistd.h>
7
#endif
8
 
9
/* last modified 02/24/14 */
10
/*
11
 *******************************************************************************
12
 *                                                                             *
13
 *   LearnAdjust() us used to scale the learn value, which can be used to      *
14
 *   limit the aggressiveness of the learning algorithm.  All we do here is    *
15
 *   divide the learn value passed in by "learning / 10".                      *
16
 *                                                                             *
17
 *******************************************************************************
18
 */
19
int LearnAdjust(int value) {
20
 
21
  if (learning / 10 > 0)
22
    return value / (learning / 10);
23
  else
24
    return 0;
25
}
26
 
27
/* last modified 02/24/14 */
28
/*
29
 *******************************************************************************
30
 *                                                                             *
31
 *   LearnBook() is used to update the book database when a game ends for any  *
32
 *   reason.  It uses the global "learn_value" variable and updates the book   *
33
 *   based on the moves played and the value that was "learned".               *
34
 *                                                                             *
35
 *   The global learn_value has two possible sources.  If a game ends with a   *
36
 *   real result (win, lose or draw) then the learrn_value will be set to a    *
37
 *   number in the interval {-300, 300} depending on the result.  If there is  *
38
 *   no result (the operator exits the program prior to reaching a conclusion  *
39
 *   (quit, end, ^C) then we will use the values from the first few searches   *
40
 *   after leaving book to compute a learrn_value (see LearnValue() comments   *
41
 *   later in this file).                                                      *
42
 *                                                                             *
43
 *******************************************************************************
44
 */
45
void LearnBook() {
108 pmbaty 46
  float book_learn[64], t_learn_value;
47
  int nplies = 0, thisply = 0, i, j, v, cluster;
33 pmbaty 48
  unsigned char buf32[4];
49
 
50
/*
51
 ************************************************************
52
 *                                                          *
53
 *  If we have not been "out of book" for N moves, all we   *
54
 *  we need to do is take the search evaluation for the     *
55
 *  search just completed and tuck it away in the book      *
56
 *  learning array (book_learn_eval[]) for use later.       *
57
 *                                                          *
58
 ************************************************************
59
 */
60
  if (!book_file)
61
    return;
62
  if (!learn)
63
    return;
64
  if (Abs(learn_value) != learning)
65
    learn_value = LearnAdjust(learn_value);
66
  learn = 0;
108 pmbaty 67
  Print(32, "LearnBook() updating book database\n");
33 pmbaty 68
/*
69
 ************************************************************
70
 *                                                          *
71
 *  Now we build a vector of book learning results.  We     *
72
 *  give every book move below the last point where there   *
73
 *  were alternatives 100% of the learned score.  We give   *
74
 *  the book move played at that point 100% of the learned  *
75
 *  score as well.  Then we divide the learned score by the *
76
 *  number of alternatives, and propagate this score back   *
77
 *  until there was another alternative, where we do this   *
78
 *  again and again until we reach the top of the book      *
79
 *  tree.                                                   *
80
 *                                                          *
81
 ************************************************************
82
 */
156 pmbaty 83
  t_learn_value = (float) (((float) learn_value) / 100.0); // Pierre-Marie Baty -- added type cast
33 pmbaty 84
  for (i = 0; i < 64; i++)
85
    if (learn_nmoves[i] > 1)
86
      nplies++;
87
  nplies = Max(nplies, 1);
88
  for (i = 0; i < 64; i++) {
89
    if (learn_nmoves[i] > 1)
90
      thisply++;
91
    book_learn[i] = t_learn_value * (float) thisply / (float) nplies;
92
  }
93
/*
94
 ************************************************************
95
 *                                                          *
96
 *  Now find the appropriate cluster, find the key we were  *
97
 *  passed, and update the resulting learn value.           *
98
 *                                                          *
99
 ************************************************************
100
 */
101
  for (i = 0; i < 64 && learn_seekto[i]; i++) {
102
    if (learn_seekto[i] > 0) {
103
      fseek(book_file, learn_seekto[i], SEEK_SET);
108 pmbaty 104
      v = fread(buf32, 4, 1, book_file);
105
      if (v <= 0)
106
        perror("Learn() fread error: ");
33 pmbaty 107
      cluster = BookIn32(buf32);
108 pmbaty 108
      if (cluster)
109
        BookClusterIn(book_file, cluster, book_buffer);
33 pmbaty 110
      for (j = 0; j < cluster; j++)
111
        if (!(learn_key[i] ^ book_buffer[j].position))
112
          break;
113
      if (j >= cluster)
114
        return;
115
      if (fabs(book_buffer[j].learn) < 0.0001)
116
        book_buffer[j].learn = book_learn[i];
117
      else
156 pmbaty 118
        book_buffer[j].learn = (float) ((book_buffer[j].learn + book_learn[i]) / 2.0); // Pierre-Marie Baty -- added type cast
33 pmbaty 119
      fseek(book_file, learn_seekto[i] + 4, SEEK_SET);
108 pmbaty 120
      if (cluster)
121
        BookClusterOut(book_file, cluster, book_buffer);
33 pmbaty 122
      fflush(book_file);
123
    }
124
  }
125
}
126
 
127
/* last modified 02/24/14 */
128
/*
129
 *******************************************************************************
130
 *                                                                             *
131
 *   LearnFunction() is called to compute the adjustment value added to the    *
132
 *   learn counter in the opening book.  It takes three pieces of information  *
133
 *   into consideration to do this:  the search value, the search depth that   *
134
 *   produced this value, and the rating difference (Crafty-opponent) so that  *
135
 *   + numbers means Crafty is expected to win, - numbers mean Crafty is ex-   *
136
 *   pected to lose.                                                           *
137
 *                                                                             *
138
 *******************************************************************************
139
 */
140
int LearnFunction(int sv, int search_depth, int rating_difference,
141
    int trusted_value) {
108 pmbaty 142
  float multiplier;
156 pmbaty 143
  static const float rating_mult_t[11] = { .00625f, .0125f, .025f, .05f, .075f, .1f, // Pierre-Marie Baty -- fixed constant truncations
144
    0.15f, 0.2f, 0.25f, 0.3f, 0.35f
33 pmbaty 145
  };
156 pmbaty 146
  static const float rating_mult_ut[11] = { .25f, .2f, .15f, .1f, .05f, .025f, .012f, // Pierre-Marie Baty -- added type cast
147
    .006f, .003f, .001f
33 pmbaty 148
  };
149
  int sd, rd, value;
150
 
151
  sd = Max(Min(search_depth - 10, 19), 0);
152
  rd = Max(Min(rating_difference / 200, 5), -5) + 5;
153
  if (trusted_value)
154
    multiplier = rating_mult_t[rd] * sd;
155
  else
156
    multiplier = rating_mult_ut[rd] * sd;
157
  sv = Max(Min(sv, 5 * learning), -5 * learning);
158
  value = (int) (sv * multiplier);
159
  return value;
160
}
161
 
162
/* last modified 02/24/14 */
163
/*
164
 *******************************************************************************
165
 *                                                                             *
166
 *   LearnValue() is used to monitor the scores over the first N moves out of  *
167
 *   book.  After these moves have been played, the evaluations are then used  *
168
 *   to decide whether the last book move played was a reasonable choice or    *
169
 *   not.  (N is set by the #define LEARN_INTERVAL definition.)                *
170
 *                                                                             *
171
 *   This procedure does not directly update the book.  Rather, it sets the    *
172
 *   global learn_value variable to represent the goodness or badness of the   *
173
 *   position where we left the opening book.  This will be used later to      *
174
 *   update the book in the event the game ends without any sort of actual     *
175
 *   result.  In a normal situation, we will base our learning on the result   *
176
 *   of the game, win-lose-draw.  But it is possible that the game ends before *
177
 *   the final result is known.  In this case, we will use the score from the  *
178
 *   learn_value we compute here so that we learn _something_ from playing a   *
179
 *   game fragment.                                                            *
180
 *                                                                             *
181
 *   There are three cases to be handled.  (1) If the evaluation is bad right  *
182
 *   out of book, or it drops enough to be considered a bad line, then the     *
183
 *   book move will have its "learn" value reduced to discourage playing this  *
184
 *   move again.  (2) If the evaluation is even after N moves, then the learn  *
185
 *   value will be increased, but by a relatively modest amount, so that a few *
186
 *   even results will offset one bad result.  (3) If the evaluation is very   *
187
 *   good after N moves, the learn value will be increased by a large amount   *
188
 *   so that this move will be favored the next time the game is played.       *
189
 *                                                                             *
190
 *******************************************************************************
191
 */
192
void LearnValue(int search_value, int search_depth) {
108 pmbaty 193
  int i, interval;
33 pmbaty 194
  int best_eval = -999999, best_eval_p = 0;
195
  int worst_eval = 999999, worst_eval_p = 0;
196
  int best_after_worst_eval = -999999, worst_after_best_eval = 999999;
197
 
198
/*
199
 ************************************************************
200
 *                                                          *
201
 *  If we have not been "out of book" for N moves, all we   *
202
 *  need to do is take the search evaluation for the search *
203
 *  just completed and tuck it away in the book learning    *
204
 *  array (book_learn_eval[]) for use later.                *
205
 *                                                          *
206
 ************************************************************
207
 */
208
  if (!book_file)
209
    return;
210
  if (!learn || learn_value != 0)
211
    return;
212
  if (moves_out_of_book <= LEARN_INTERVAL) {
213
    if (moves_out_of_book) {
214
      book_learn_eval[moves_out_of_book - 1] = search_value;
215
      book_learn_depth[moves_out_of_book - 1] = search_depth;
216
    }
217
  }
218
/*
219
 ************************************************************
220
 *                                                          *
221
 *  Check the evaluations we've seen so far.  If they are   *
222
 *  within reason (+/- 1/3 of a pawn or so) we simply keep  *
223
 *  playing and leave the book alone.  If the eval is much  *
224
 *  better or worse, we need to update the learning data.   *
225
 *                                                          *
226
 ************************************************************
227
 */
228
  else if (moves_out_of_book == LEARN_INTERVAL + 1) {
229
    if (moves_out_of_book < 1)
230
      return;
231
    interval = Min(LEARN_INTERVAL, moves_out_of_book);
232
    if (interval < 2)
233
      return;
234
    for (i = 0; i < interval; i++) {
235
      if (book_learn_eval[i] > best_eval) {
236
        best_eval = book_learn_eval[i];
237
        best_eval_p = i;
238
      }
239
      if (book_learn_eval[i] < worst_eval) {
240
        worst_eval = book_learn_eval[i];
241
        worst_eval_p = i;
242
      }
243
    }
244
    if (best_eval_p < interval - 1) {
245
      for (i = best_eval_p; i < interval; i++)
246
        if (book_learn_eval[i] < worst_after_best_eval)
247
          worst_after_best_eval = book_learn_eval[i];
248
    } else
249
      worst_after_best_eval = book_learn_eval[interval - 1];
250
    if (worst_eval_p < interval - 1) {
251
      for (i = worst_eval_p; i < interval; i++)
252
        if (book_learn_eval[i] > best_after_worst_eval)
253
          best_after_worst_eval = book_learn_eval[i];
254
    } else
255
      best_after_worst_eval = book_learn_eval[interval - 1];
256
/*
257
 ************************************************************
258
 *                                                          *
259
 *  We now have the best eval for the first N moves out of  *
260
 *  book, the worst eval for the first N moves out of book, *
261
 *  and the worst eval that follows the best eval.  This    *
262
 *  will be used to recognize the following cases of        *
263
 *  results that follow a book move:                        *
264
 *                                                          *
265
 ************************************************************
266
 */
267
/*
268
 ************************************************************
269
 *                                                          *
270
 *  (1) The best score is very good, and it doesn't drop    *
271
 *  after following the game further.  This case detects    *
272
 *  those moves in book that are "good" and should be       *
273
 *  played whenever possible, while avoiding the sound      *
274
 *  gambits that leave us ahead in material for a short     *
275
 *  while until the score starts to drop as the gambit      *
276
 *  begins to show its effect.                              *
277
 *                                                          *
278
 ************************************************************
279
 */
280
    if (best_eval == best_after_worst_eval) {
281
      learn_value = best_eval;
282
      for (i = 0; i < interval; i++)
283
        if (learn_value == book_learn_eval[i])
284
          search_depth = Max(search_depth, book_learn_depth[i]);
285
    }
286
/*
287
 ************************************************************
288
 *                                                          *
289
 *  (2) The worst score is bad, and doesn't improve any     *
290
 *  after the worst point, indicating that the book move    *
291
 *  chosen was "bad" and should be avoided in the future.   *
292
 *                                                          *
293
 ************************************************************
294
 */
295
    else if (worst_eval == worst_after_best_eval) {
296
      learn_value = worst_eval;
297
      for (i = 0; i < interval; i++)
298
        if (learn_value == book_learn_eval[i])
299
          search_depth = Max(search_depth, book_learn_depth[i]);
300
    }
301
/*
302
 ************************************************************
303
 *                                                          *
304
 *  (3) Things seem even out of book and remain that way    *
305
 *  for N moves.  We will just average the 10 scores and    *
306
 *  use that as an approximation.                           *
307
 *                                                          *
308
 ************************************************************
309
 */
310
    else {
311
      learn_value = 0;
312
      search_depth = 0;
313
      for (i = 0; i < interval; i++) {
314
        learn_value += book_learn_eval[i];
315
        search_depth += book_learn_depth[i];
316
      }
317
      learn_value /= interval;
318
      search_depth /= interval;
319
    }
320
    learn_value =
321
        LearnFunction(learn_value, search_depth,
322
        crafty_rating - opponent_rating, learn_value < 0);
323
  }
324
}