Subversion Repositories Games.Chess Giants

Rev

Rev 33 | Go to most recent revision | Blame | Compare with Previous | Last modification | View Log | Download | RSS feed

  1. #include <math.h>
  2. #include <time.h>
  3. #include "chess.h"
  4. #include "data.h"
  5. #if defined(UNIX)
  6. #  include <unistd.h>
  7. #endif
  8.  
  9. /* last modified 02/24/14 */
  10. /*
  11.  *******************************************************************************
  12.  *                                                                             *
  13.  *   LearnAdjust() us used to scale the learn value, which can be used to      *
  14.  *   limit the aggressiveness of the learning algorithm.  All we do here is    *
  15.  *   divide the learn value passed in by "learning / 10".                      *
  16.  *                                                                             *
  17.  *******************************************************************************
  18.  */
  19. int LearnAdjust(int value) {
  20.  
  21.   if (learning / 10 > 0)
  22.     return value / (learning / 10);
  23.   else
  24.     return 0;
  25. }
  26.  
  27. /* last modified 02/24/14 */
  28. /*
  29.  *******************************************************************************
  30.  *                                                                             *
  31.  *   LearnBook() is used to update the book database when a game ends for any  *
  32.  *   reason.  It uses the global "learn_value" variable and updates the book   *
  33.  *   based on the moves played and the value that was "learned".               *
  34.  *                                                                             *
  35.  *   The global learn_value has two possible sources.  If a game ends with a   *
  36.  *   real result (win, lose or draw) then the learrn_value will be set to a    *
  37.  *   number in the interval {-300, 300} depending on the result.  If there is  *
  38.  *   no result (the operator exits the program prior to reaching a conclusion  *
  39.  *   (quit, end, ^C) then we will use the values from the first few searches   *
  40.  *   after leaving book to compute a learrn_value (see LearnValue() comments   *
  41.  *   later in this file).                                                      *
  42.  *                                                                             *
  43.  *******************************************************************************
  44.  */
  45. void LearnBook() {
  46.   float book_learn[64], t_learn_value;
  47.   int nplies = 0, thisply = 0, i, j, v, cluster;
  48.   unsigned char buf32[4];
  49.  
  50. /*
  51.  ************************************************************
  52.  *                                                          *
  53.  *  If we have not been "out of book" for N moves, all we   *
  54.  *  we need to do is take the search evaluation for the     *
  55.  *  search just completed and tuck it away in the book      *
  56.  *  learning array (book_learn_eval[]) for use later.       *
  57.  *                                                          *
  58.  ************************************************************
  59.  */
  60.   if (!book_file)
  61.     return;
  62.   if (!learn)
  63.     return;
  64.   if (Abs(learn_value) != learning)
  65.     learn_value = LearnAdjust(learn_value);
  66.   learn = 0;
  67.   Print(32, "LearnBook() updating book database\n");
  68. /*
  69.  ************************************************************
  70.  *                                                          *
  71.  *  Now we build a vector of book learning results.  We     *
  72.  *  give every book move below the last point where there   *
  73.  *  were alternatives 100% of the learned score.  We give   *
  74.  *  the book move played at that point 100% of the learned  *
  75.  *  score as well.  Then we divide the learned score by the *
  76.  *  number of alternatives, and propagate this score back   *
  77.  *  until there was another alternative, where we do this   *
  78.  *  again and again until we reach the top of the book      *
  79.  *  tree.                                                   *
  80.  *                                                          *
  81.  ************************************************************
  82.  */
  83.   t_learn_value = ((float) learn_value) / 100.0f; // Pierre-Marie Baty -- added type cast
  84.   for (i = 0; i < 64; i++)
  85.     if (learn_nmoves[i] > 1)
  86.       nplies++;
  87.   nplies = Max(nplies, 1);
  88.   for (i = 0; i < 64; i++) {
  89.     if (learn_nmoves[i] > 1)
  90.       thisply++;
  91.     book_learn[i] = t_learn_value * (float) thisply / (float) nplies;
  92.   }
  93. /*
  94.  ************************************************************
  95.  *                                                          *
  96.  *  Now find the appropriate cluster, find the key we were  *
  97.  *  passed, and update the resulting learn value.           *
  98.  *                                                          *
  99.  ************************************************************
  100.  */
  101.   for (i = 0; i < 64 && learn_seekto[i]; i++) {
  102.     if (learn_seekto[i] > 0) {
  103.       fseek(book_file, learn_seekto[i], SEEK_SET);
  104.       v = fread(buf32, 4, 1, book_file);
  105.       if (v <= 0)
  106.         perror("Learn() fread error: ");
  107.       cluster = BookIn32(buf32);
  108.       if (cluster)
  109.         BookClusterIn(book_file, cluster, book_buffer);
  110.       for (j = 0; j < cluster; j++)
  111.         if (!(learn_key[i] ^ book_buffer[j].position))
  112.           break;
  113.       if (j >= cluster)
  114.         return;
  115.       if (fabs(book_buffer[j].learn) < 0.0001)
  116.         book_buffer[j].learn = book_learn[i];
  117.       else
  118.         book_buffer[j].learn = (book_buffer[j].learn + book_learn[i]) / 2.0f; // Pierre-Marie Baty -- added type cast
  119.       fseek(book_file, learn_seekto[i] + 4, SEEK_SET);
  120.       if (cluster)
  121.         BookClusterOut(book_file, cluster, book_buffer);
  122.       fflush(book_file);
  123.     }
  124.   }
  125. }
  126.  
  127. /* last modified 02/24/14 */
  128. /*
  129.  *******************************************************************************
  130.  *                                                                             *
  131.  *   LearnFunction() is called to compute the adjustment value added to the    *
  132.  *   learn counter in the opening book.  It takes three pieces of information  *
  133.  *   into consideration to do this:  the search value, the search depth that   *
  134.  *   produced this value, and the rating difference (Crafty-opponent) so that  *
  135.  *   + numbers means Crafty is expected to win, - numbers mean Crafty is ex-   *
  136.  *   pected to lose.                                                           *
  137.  *                                                                             *
  138.  *******************************************************************************
  139.  */
  140. int LearnFunction(int sv, int search_depth, int rating_difference,
  141.     int trusted_value) {
  142.   float multiplier;
  143.   static const float rating_mult_t[11] = { .00625f, .0125f, .025f, .05f, .075f, .1f,
  144.     0.15f, 0.2f, 0.25f, 0.3f, 0.35f // Pierre-Marie Baty -- added type casts
  145.   };
  146.   static const float rating_mult_ut[11] = { .25f, .2f, .15f, .1f, .05f, .025f, .012f,
  147.     .006f, .003f, .001f // Pierre-Marie Baty -- added type casts
  148.   };
  149.   int sd, rd, value;
  150.  
  151.   sd = Max(Min(search_depth - 10, 19), 0);
  152.   rd = Max(Min(rating_difference / 200, 5), -5) + 5;
  153.   if (trusted_value)
  154.     multiplier = rating_mult_t[rd] * sd;
  155.   else
  156.     multiplier = rating_mult_ut[rd] * sd;
  157.   sv = Max(Min(sv, 5 * learning), -5 * learning);
  158.   value = (int) (sv * multiplier);
  159.   return value;
  160. }
  161.  
  162. /* last modified 02/24/14 */
  163. /*
  164.  *******************************************************************************
  165.  *                                                                             *
  166.  *   LearnValue() is used to monitor the scores over the first N moves out of  *
  167.  *   book.  After these moves have been played, the evaluations are then used  *
  168.  *   to decide whether the last book move played was a reasonable choice or    *
  169.  *   not.  (N is set by the #define LEARN_INTERVAL definition.)                *
  170.  *                                                                             *
  171.  *   This procedure does not directly update the book.  Rather, it sets the    *
  172.  *   global learn_value variable to represent the goodness or badness of the   *
  173.  *   position where we left the opening book.  This will be used later to      *
  174.  *   update the book in the event the game ends without any sort of actual     *
  175.  *   result.  In a normal situation, we will base our learning on the result   *
  176.  *   of the game, win-lose-draw.  But it is possible that the game ends before *
  177.  *   the final result is known.  In this case, we will use the score from the  *
  178.  *   learn_value we compute here so that we learn _something_ from playing a   *
  179.  *   game fragment.                                                            *
  180.  *                                                                             *
  181.  *   There are three cases to be handled.  (1) If the evaluation is bad right  *
  182.  *   out of book, or it drops enough to be considered a bad line, then the     *
  183.  *   book move will have its "learn" value reduced to discourage playing this  *
  184.  *   move again.  (2) If the evaluation is even after N moves, then the learn  *
  185.  *   value will be increased, but by a relatively modest amount, so that a few *
  186.  *   even results will offset one bad result.  (3) If the evaluation is very   *
  187.  *   good after N moves, the learn value will be increased by a large amount   *
  188.  *   so that this move will be favored the next time the game is played.       *
  189.  *                                                                             *
  190.  *******************************************************************************
  191.  */
  192. void LearnValue(int search_value, int search_depth) {
  193.   int i, interval;
  194.   int best_eval = -999999, best_eval_p = 0;
  195.   int worst_eval = 999999, worst_eval_p = 0;
  196.   int best_after_worst_eval = -999999, worst_after_best_eval = 999999;
  197.  
  198. /*
  199.  ************************************************************
  200.  *                                                          *
  201.  *  If we have not been "out of book" for N moves, all we   *
  202.  *  need to do is take the search evaluation for the search *
  203.  *  just completed and tuck it away in the book learning    *
  204.  *  array (book_learn_eval[]) for use later.                *
  205.  *                                                          *
  206.  ************************************************************
  207.  */
  208.   if (!book_file)
  209.     return;
  210.   if (!learn || learn_value != 0)
  211.     return;
  212.   if (moves_out_of_book <= LEARN_INTERVAL) {
  213.     if (moves_out_of_book) {
  214.       book_learn_eval[moves_out_of_book - 1] = search_value;
  215.       book_learn_depth[moves_out_of_book - 1] = search_depth;
  216.     }
  217.   }
  218. /*
  219.  ************************************************************
  220.  *                                                          *
  221.  *  Check the evaluations we've seen so far.  If they are   *
  222.  *  within reason (+/- 1/3 of a pawn or so) we simply keep  *
  223.  *  playing and leave the book alone.  If the eval is much  *
  224.  *  better or worse, we need to update the learning data.   *
  225.  *                                                          *
  226.  ************************************************************
  227.  */
  228.   else if (moves_out_of_book == LEARN_INTERVAL + 1) {
  229.     if (moves_out_of_book < 1)
  230.       return;
  231.     interval = Min(LEARN_INTERVAL, moves_out_of_book);
  232.     if (interval < 2)
  233.       return;
  234.     for (i = 0; i < interval; i++) {
  235.       if (book_learn_eval[i] > best_eval) {
  236.         best_eval = book_learn_eval[i];
  237.         best_eval_p = i;
  238.       }
  239.       if (book_learn_eval[i] < worst_eval) {
  240.         worst_eval = book_learn_eval[i];
  241.         worst_eval_p = i;
  242.       }
  243.     }
  244.     if (best_eval_p < interval - 1) {
  245.       for (i = best_eval_p; i < interval; i++)
  246.         if (book_learn_eval[i] < worst_after_best_eval)
  247.           worst_after_best_eval = book_learn_eval[i];
  248.     } else
  249.       worst_after_best_eval = book_learn_eval[interval - 1];
  250.     if (worst_eval_p < interval - 1) {
  251.       for (i = worst_eval_p; i < interval; i++)
  252.         if (book_learn_eval[i] > best_after_worst_eval)
  253.           best_after_worst_eval = book_learn_eval[i];
  254.     } else
  255.       best_after_worst_eval = book_learn_eval[interval - 1];
  256. /*
  257.  ************************************************************
  258.  *                                                          *
  259.  *  We now have the best eval for the first N moves out of  *
  260.  *  book, the worst eval for the first N moves out of book, *
  261.  *  and the worst eval that follows the best eval.  This    *
  262.  *  will be used to recognize the following cases of        *
  263.  *  results that follow a book move:                        *
  264.  *                                                          *
  265.  ************************************************************
  266.  */
  267. /*
  268.  ************************************************************
  269.  *                                                          *
  270.  *  (1) The best score is very good, and it doesn't drop    *
  271.  *  after following the game further.  This case detects    *
  272.  *  those moves in book that are "good" and should be       *
  273.  *  played whenever possible, while avoiding the sound      *
  274.  *  gambits that leave us ahead in material for a short     *
  275.  *  while until the score starts to drop as the gambit      *
  276.  *  begins to show its effect.                              *
  277.  *                                                          *
  278.  ************************************************************
  279.  */
  280.     if (best_eval == best_after_worst_eval) {
  281.       learn_value = best_eval;
  282.       for (i = 0; i < interval; i++)
  283.         if (learn_value == book_learn_eval[i])
  284.           search_depth = Max(search_depth, book_learn_depth[i]);
  285.     }
  286. /*
  287.  ************************************************************
  288.  *                                                          *
  289.  *  (2) The worst score is bad, and doesn't improve any     *
  290.  *  after the worst point, indicating that the book move    *
  291.  *  chosen was "bad" and should be avoided in the future.   *
  292.  *                                                          *
  293.  ************************************************************
  294.  */
  295.     else if (worst_eval == worst_after_best_eval) {
  296.       learn_value = worst_eval;
  297.       for (i = 0; i < interval; i++)
  298.         if (learn_value == book_learn_eval[i])
  299.           search_depth = Max(search_depth, book_learn_depth[i]);
  300.     }
  301. /*
  302.  ************************************************************
  303.  *                                                          *
  304.  *  (3) Things seem even out of book and remain that way    *
  305.  *  for N moves.  We will just average the 10 scores and    *
  306.  *  use that as an approximation.                           *
  307.  *                                                          *
  308.  ************************************************************
  309.  */
  310.     else {
  311.       learn_value = 0;
  312.       search_depth = 0;
  313.       for (i = 0; i < interval; i++) {
  314.         learn_value += book_learn_eval[i];
  315.         search_depth += book_learn_depth[i];
  316.       }
  317.       learn_value /= interval;
  318.       search_depth /= interval;
  319.     }
  320.     learn_value =
  321.         LearnFunction(learn_value, search_depth,
  322.         crafty_rating - opponent_rating, learn_value < 0);
  323.   }
  324. }
  325.