WebSVN – Games.Chess Giants – Blame – /engine-crafty/learn.c

Rev	Author	Line No.	Line
33	pmbaty	1	#include <math.h>
		2	#include <time.h>
		3	#include "chess.h"
		4	#include "data.h"
		5	#if defined(UNIX)
		6	# include <unistd.h>
		7	#endif
		8
		9	/* last modified 02/24/14 */
		10	/*
		11	*******************************************************************************
		12	* *
		13	* LearnAdjust() us used to scale the learn value, which can be used to *
		14	* limit the aggressiveness of the learning algorithm. All we do here is *
		15	* divide the learn value passed in by "learning / 10". *
		16	* *
		17	*******************************************************************************
		18	*/
		19	int LearnAdjust(int value) {
		20
		21	if (learning / 10 > 0)
		22	return value / (learning / 10);
		23	else
		24	return 0;
		25	}
		26
		27	/* last modified 02/24/14 */
		28	/*
		29	*******************************************************************************
		30	* *
		31	* LearnBook() is used to update the book database when a game ends for any *
		32	* reason. It uses the global "learn_value" variable and updates the book *
		33	* based on the moves played and the value that was "learned". *
		34	* *
		35	* The global learn_value has two possible sources. If a game ends with a *
		36	* real result (win, lose or draw) then the learrn_value will be set to a *
		37	* number in the interval {-300, 300} depending on the result. If there is *
		38	* no result (the operator exits the program prior to reaching a conclusion *
		39	* (quit, end, ^C) then we will use the values from the first few searches *
		40	* after leaving book to compute a learrn_value (see LearnValue() comments *
		41	* later in this file). *
		42	* *
		43	*******************************************************************************
		44	*/
		45	void LearnBook() {
108	pmbaty	46	float book_learn[64], t_learn_value;
		47	int nplies = 0, thisply = 0, i, j, v, cluster;
33	pmbaty	48	unsigned char buf32[4];
		49
		50	/*
		51	************************************************************
		52	* *
		53	* If we have not been "out of book" for N moves, all we *
		54	* we need to do is take the search evaluation for the *
		55	* search just completed and tuck it away in the book *
		56	* learning array (book_learn_eval[]) for use later. *
		57	* *
		58	************************************************************
		59	*/
		60	if (!book_file)
		61	return;
		62	if (!learn)
		63	return;
		64	if (Abs(learn_value) != learning)
		65	learn_value = LearnAdjust(learn_value);
		66	learn = 0;
108	pmbaty	67	Print(32, "LearnBook() updating book database\n");
33	pmbaty	68	/*
		69	************************************************************
		70	* *
		71	* Now we build a vector of book learning results. We *
		72	* give every book move below the last point where there *
		73	* were alternatives 100% of the learned score. We give *
		74	* the book move played at that point 100% of the learned *
		75	* score as well. Then we divide the learned score by the *
		76	* number of alternatives, and propagate this score back *
		77	* until there was another alternative, where we do this *
		78	* again and again until we reach the top of the book *
		79	* tree. *
		80	* *
		81	************************************************************
		82	*/
154	pmbaty	83	t_learn_value = ((float) learn_value) / 100.0;
33	pmbaty	84	for (i = 0; i < 64; i++)
		85	if (learn_nmoves[i] > 1)
		86	nplies++;
		87	nplies = Max(nplies, 1);
		88	for (i = 0; i < 64; i++) {
		89	if (learn_nmoves[i] > 1)
		90	thisply++;
		91	book_learn[i] = t_learn_value * (float) thisply / (float) nplies;
		92	}
		93	/*
		94	************************************************************
		95	* *
		96	* Now find the appropriate cluster, find the key we were *
		97	* passed, and update the resulting learn value. *
		98	* *
		99	************************************************************
		100	*/
		101	for (i = 0; i < 64 && learn_seekto[i]; i++) {
		102	if (learn_seekto[i] > 0) {
		103	fseek(book_file, learn_seekto[i], SEEK_SET);
108	pmbaty	104	v = fread(buf32, 4, 1, book_file);
		105	if (v <= 0)
		106	perror("Learn() fread error: ");
33	pmbaty	107	cluster = BookIn32(buf32);
108	pmbaty	108	if (cluster)
		109	BookClusterIn(book_file, cluster, book_buffer);
33	pmbaty	110	for (j = 0; j < cluster; j++)
		111	if (!(learn_key[i] ^ book_buffer[j].position))
		112	break;
		113	if (j >= cluster)
		114	return;
		115	if (fabs(book_buffer[j].learn) < 0.0001)
		116	book_buffer[j].learn = book_learn[i];
		117	else
154	pmbaty	118	book_buffer[j].learn = (book_buffer[j].learn + book_learn[i]) / 2.0;
33	pmbaty	119	fseek(book_file, learn_seekto[i] + 4, SEEK_SET);
108	pmbaty	120	if (cluster)
		121	BookClusterOut(book_file, cluster, book_buffer);
33	pmbaty	122	fflush(book_file);
		123	}
		124	}
		125	}
		126
		127	/* last modified 02/24/14 */
		128	/*
		129	*******************************************************************************
		130	* *
		131	* LearnFunction() is called to compute the adjustment value added to the *
		132	* learn counter in the opening book. It takes three pieces of information *
		133	* into consideration to do this: the search value, the search depth that *
		134	* produced this value, and the rating difference (Crafty-opponent) so that *
		135	* + numbers means Crafty is expected to win, - numbers mean Crafty is ex- *
		136	* pected to lose. *
		137	* *
		138	*******************************************************************************
		139	*/
		140	int LearnFunction(int sv, int search_depth, int rating_difference,
		141	int trusted_value) {
108	pmbaty	142	float multiplier;
154	pmbaty	143	static const float rating_mult_t[11] = { .00625, .0125, .025, .05, .075, .1,
		144	0.15, 0.2, 0.25, 0.3, 0.35
33	pmbaty	145	};
154	pmbaty	146	static const float rating_mult_ut[11] = { .25, .2, .15, .1, .05, .025, .012,
		147	.006, .003, .001
33	pmbaty	148	};
		149	int sd, rd, value;
		150
		151	sd = Max(Min(search_depth - 10, 19), 0);
		152	rd = Max(Min(rating_difference / 200, 5), -5) + 5;
		153	if (trusted_value)
		154	multiplier = rating_mult_t[rd] * sd;
		155	else
		156	multiplier = rating_mult_ut[rd] * sd;
		157	sv = Max(Min(sv, 5 * learning), -5 * learning);
		158	value = (int) (sv * multiplier);
		159	return value;
		160	}
		161
		162	/* last modified 02/24/14 */
		163	/*
		164	*******************************************************************************
		165	* *
		166	* LearnValue() is used to monitor the scores over the first N moves out of *
		167	* book. After these moves have been played, the evaluations are then used *
		168	* to decide whether the last book move played was a reasonable choice or *
		169	* not. (N is set by the #define LEARN_INTERVAL definition.) *
		170	* *
		171	* This procedure does not directly update the book. Rather, it sets the *
		172	* global learn_value variable to represent the goodness or badness of the *
		173	* position where we left the opening book. This will be used later to *
		174	* update the book in the event the game ends without any sort of actual *
		175	* result. In a normal situation, we will base our learning on the result *
		176	* of the game, win-lose-draw. But it is possible that the game ends before *
		177	* the final result is known. In this case, we will use the score from the *
		178	* learn_value we compute here so that we learn _something_ from playing a *
		179	* game fragment. *
		180	* *
		181	* There are three cases to be handled. (1) If the evaluation is bad right *
		182	* out of book, or it drops enough to be considered a bad line, then the *
		183	* book move will have its "learn" value reduced to discourage playing this *
		184	* move again. (2) If the evaluation is even after N moves, then the learn *
		185	* value will be increased, but by a relatively modest amount, so that a few *
		186	* even results will offset one bad result. (3) If the evaluation is very *
		187	* good after N moves, the learn value will be increased by a large amount *
		188	* so that this move will be favored the next time the game is played. *
		189	* *
		190	*******************************************************************************
		191	*/
		192	void LearnValue(int search_value, int search_depth) {
108	pmbaty	193	int i, interval;
33	pmbaty	194	int best_eval = -999999, best_eval_p = 0;
		195	int worst_eval = 999999, worst_eval_p = 0;
		196	int best_after_worst_eval = -999999, worst_after_best_eval = 999999;
		197
		198	/*
		199	************************************************************
		200	* *
		201	* If we have not been "out of book" for N moves, all we *
		202	* need to do is take the search evaluation for the search *
		203	* just completed and tuck it away in the book learning *
		204	* array (book_learn_eval[]) for use later. *
		205	* *
		206	************************************************************
		207	*/
		208	if (!book_file)
		209	return;
		210	if (!learn \|\| learn_value != 0)
		211	return;
		212	if (moves_out_of_book <= LEARN_INTERVAL) {
		213	if (moves_out_of_book) {
		214	book_learn_eval[moves_out_of_book - 1] = search_value;
		215	book_learn_depth[moves_out_of_book - 1] = search_depth;
		216	}
		217	}
		218	/*
		219	************************************************************
		220	* *
		221	* Check the evaluations we've seen so far. If they are *
		222	* within reason (+/- 1/3 of a pawn or so) we simply keep *
		223	* playing and leave the book alone. If the eval is much *
		224	* better or worse, we need to update the learning data. *
		225	* *
		226	************************************************************
		227	*/
		228	else if (moves_out_of_book == LEARN_INTERVAL + 1) {
		229	if (moves_out_of_book < 1)
		230	return;
		231	interval = Min(LEARN_INTERVAL, moves_out_of_book);
		232	if (interval < 2)
		233	return;
		234	for (i = 0; i < interval; i++) {
		235	if (book_learn_eval[i] > best_eval) {
		236	best_eval = book_learn_eval[i];
		237	best_eval_p = i;
		238	}
		239	if (book_learn_eval[i] < worst_eval) {
		240	worst_eval = book_learn_eval[i];
		241	worst_eval_p = i;
		242	}
		243	}
		244	if (best_eval_p < interval - 1) {
		245	for (i = best_eval_p; i < interval; i++)
		246	if (book_learn_eval[i] < worst_after_best_eval)
		247	worst_after_best_eval = book_learn_eval[i];
		248	} else
		249	worst_after_best_eval = book_learn_eval[interval - 1];
		250	if (worst_eval_p < interval - 1) {
		251	for (i = worst_eval_p; i < interval; i++)
		252	if (book_learn_eval[i] > best_after_worst_eval)
		253	best_after_worst_eval = book_learn_eval[i];
		254	} else
		255	best_after_worst_eval = book_learn_eval[interval - 1];
		256	/*
		257	************************************************************
		258	* *
		259	* We now have the best eval for the first N moves out of *
		260	* book, the worst eval for the first N moves out of book, *
		261	* and the worst eval that follows the best eval. This *
		262	* will be used to recognize the following cases of *
		263	* results that follow a book move: *
		264	* *
		265	************************************************************
		266	*/
		267	/*
		268	************************************************************
		269	* *
		270	* (1) The best score is very good, and it doesn't drop *
		271	* after following the game further. This case detects *
		272	* those moves in book that are "good" and should be *
		273	* played whenever possible, while avoiding the sound *
		274	* gambits that leave us ahead in material for a short *
		275	* while until the score starts to drop as the gambit *
		276	* begins to show its effect. *
		277	* *
		278	************************************************************
		279	*/
		280	if (best_eval == best_after_worst_eval) {
		281	learn_value = best_eval;
		282	for (i = 0; i < interval; i++)
		283	if (learn_value == book_learn_eval[i])
		284	search_depth = Max(search_depth, book_learn_depth[i]);
		285	}
		286	/*
		287	************************************************************
		288	* *
		289	* (2) The worst score is bad, and doesn't improve any *
		290	* after the worst point, indicating that the book move *
		291	* chosen was "bad" and should be avoided in the future. *
		292	* *
		293	************************************************************
		294	*/
		295	else if (worst_eval == worst_after_best_eval) {
		296	learn_value = worst_eval;
		297	for (i = 0; i < interval; i++)
		298	if (learn_value == book_learn_eval[i])
		299	search_depth = Max(search_depth, book_learn_depth[i]);
		300	}
		301	/*
		302	************************************************************
		303	* *
		304	* (3) Things seem even out of book and remain that way *
		305	* for N moves. We will just average the 10 scores and *
		306	* use that as an approximation. *
		307	* *
		308	************************************************************
		309	*/
		310	else {
		311	learn_value = 0;
		312	search_depth = 0;
		313	for (i = 0; i < interval; i++) {
		314	learn_value += book_learn_eval[i];
		315	search_depth += book_learn_depth[i];
		316	}
		317	learn_value /= interval;
		318	search_depth /= interval;
		319	}
		320	learn_value =
		321	LearnFunction(learn_value, search_depth,
		322	crafty_rating - opponent_rating, learn_value < 0);
		323	}
		324	}

Subversion Repositories Games.Chess Giants

Games.Chess Giants/engine-crafty/learn.c – Rev 154