Subversion Repositories Games.Chess Giants

Rev

Rev 154 | Go to most recent revision | Show entire file | Ignore whitespace | Details | Blame | Last modification | View Log | RSS feed

Rev 154 Rev 169
Line 4... Line 4...
4
  Copyright (C) 2008-2015 Marco Costalba, Joona Kiiski, Tord Romstad
4
  Copyright (C) 2008-2015 Marco Costalba, Joona Kiiski, Tord Romstad
5
  Copyright (C) 2015-2016 Marco Costalba, Joona Kiiski, Gary Linscott, Tord Romstad
5
  Copyright (C) 2015-2018 Marco Costalba, Joona Kiiski, Gary Linscott, Tord Romstad
6
 
6
 
7
  Stockfish is free software: you can redistribute it and/or modify
7
  Stockfish is free software: you can redistribute it and/or modify
8
  it under the terms of the GNU General Public License as published by
8
  it under the terms of the GNU General Public License as published by
9
  the Free Software Foundation, either version 3 of the License, or
9
  the Free Software Foundation, either version 3 of the License, or
10
  (at your option) any later version.
10
  (at your option) any later version.
Line 15... Line 15...
15
  GNU General Public License for more details.
15
  GNU General Public License for more details.
16
 
16
 
17
  You should have received a copy of the GNU General Public License
17
  You should have received a copy of the GNU General Public License
18
  along with this program.  If not, see <http://www.gnu.org/licenses/>.
18
  along with this program.  If not, see <http://www.gnu.org/licenses/>.
19
*/
19
*/
-
 
20
 
-
 
21
#ifdef _WIN32
-
 
22
#if _WIN32_WINNT < 0x0601
-
 
23
#undef  _WIN32_WINNT
-
 
24
#define _WIN32_WINNT 0x0601 // Force to include needed API prototypes
-
 
25
#endif
-
 
26
#include <windows.h>
-
 
27
// The needed Windows API for processor groups could be missed from old Windows
-
 
28
// versions, so instead of calling them directly (forcing the linker to resolve
-
 
29
// the calls at compile time), try to load them at runtime. To do this we need
-
 
30
// first to define the corresponding function pointers.
-
 
31
extern "C" {
-
 
32
typedef bool(*fun1_t)(LOGICAL_PROCESSOR_RELATIONSHIP,
-
 
33
                      PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX, PDWORD);
-
 
34
typedef bool(*fun2_t)(USHORT, PGROUP_AFFINITY);
-
 
35
typedef bool(*fun3_t)(HANDLE, CONST GROUP_AFFINITY*, PGROUP_AFFINITY);
-
 
36
}
-
 
37
#endif
20
 
38
 
21
#include <fstream>
39
#include <fstream>
22
#include <iomanip>
40
#include <iomanip>
23
#include <iostream>
41
#include <iostream>
24
#include <sstream>
42
#include <sstream>
-
 
43
#include <vector>
25
 
44
 
26
#include "misc.h"
45
#include "misc.h"
27
#include "thread.h"
46
#include "thread.h"
28
 
47
 
29
using namespace std;
48
using namespace std;
30
 
49
 
31
namespace {
50
namespace {
32
 
51
 
33
/// Version number. If Version is left empty, then compile date in the format
52
/// Version number. If Version is left empty, then compile date in the format
34
/// DD-MM-YY and show in engine_info.
53
/// DD-MM-YY and show in engine_info.
35
const string Version = "8";
54
const string Version = "9";
36
 
55
 
37
/// Our fancy logging facility. The trick here is to replace cin.rdbuf() and
56
/// Our fancy logging facility. The trick here is to replace cin.rdbuf() and
38
/// cout.rdbuf() with two Tie objects that tie cin and cout to a file stream. We
57
/// cout.rdbuf() with two Tie objects that tie cin and cout to a file stream. We
39
/// can toggle the logging of std::cout and std:cin at runtime whilst preserving
58
/// can toggle the logging of std::cout and std:cin at runtime whilst preserving
40
/// usual I/O functionality, all without changing a single line of code!
59
/// usual I/O functionality, all without changing a single line of code!
41
/// Idea from http://groups.google.com/group/comp.lang.c++/msg/1d941c0f26ea0d81
60
/// Idea from http://groups.google.com/group/comp.lang.c++/msg/1d941c0f26ea0d81
42
 
61
 
43
struct Tie: public streambuf { // MSVC requires split streambuf for cin and cout
62
struct Tie: public streambuf { // MSVC requires split streambuf for cin and cout
44
 
63
 
45
  Tie(streambuf* b, streambuf* l) : buf(b), logBuf(l) {}
64
  Tie(streambuf* b, streambuf* l) : buf(b), logBuf(l) {}
46
 
65
 
47
  int sync() { return logBuf->pubsync(), buf->pubsync(); }
66
  int sync() override { return logBuf->pubsync(), buf->pubsync(); }
48
  int overflow(int c) { return log(buf->sputc((char)c), "<< "); }
67
  int overflow(int c) override { return log(buf->sputc((char)c), "<< "); }
49
  int underflow() { return buf->sgetc(); }
68
  int underflow() override { return buf->sgetc(); }
50
  int uflow() { return log(buf->sbumpc(), ">> "); }
69
  int uflow() override { return log(buf->sbumpc(), ">> "); }
51
 
70
 
52
  streambuf *buf, *logBuf;
71
  streambuf *buf, *logBuf;
53
 
72
 
54
  int log(int c, const char* prefix) {
73
  int log(int c, const char* prefix) {
55
 
74
 
56
    static int last = '\n'; // Single log file
75
    static int last = '\n'; // Single log file
57
 
76
 
58
    if (last == '\n')
77
    if (last == '\n')
59
        logBuf->sputn(prefix, 3);
78
        logBuf->sputn(prefix, 3);
60
 
79
 
61
    return last = logBuf->sputc((char)c);
80
    return last = logBuf->sputc((char)c);
62
  }
81
  }
63
};
82
};
64
 
83
 
65
class Logger {
84
class Logger {
66
 
85
 
67
  Logger() : in(cin.rdbuf(), file.rdbuf()), out(cout.rdbuf(), file.rdbuf()) {}
86
  Logger() : in(cin.rdbuf(), file.rdbuf()), out(cout.rdbuf(), file.rdbuf()) {}
68
 ~Logger() { start(""); }
87
 ~Logger() { start(""); }
69
 
88
 
70
  ofstream file;
89
  ofstream file;
71
  Tie in, out;
90
  Tie in, out;
Line 162... Line 181...
162
 
181
 
163
/// prefetch() preloads the given address in L1/L2 cache. This is a non-blocking
182
/// prefetch() preloads the given address in L1/L2 cache. This is a non-blocking
164
/// function that doesn't stall the CPU waiting for data to be loaded from memory,
183
/// function that doesn't stall the CPU waiting for data to be loaded from memory,
165
/// which can be quite slow.
184
/// which can be quite slow.
166
#ifdef NO_PREFETCH
185
#ifdef NO_PREFETCH
167
 
186
 
168
void prefetch(void*) {}
187
void prefetch(void*) {}
169
 
188
 
170
#else
189
#else
171
 
190
 
172
void prefetch(void* addr) {
191
void prefetch(void* addr) {
173
 
192
 
174
#  if defined(__INTEL_COMPILER)
193
#  if defined(__INTEL_COMPILER)
175
   // This hack prevents prefetches from being optimized away by
194
   // This hack prevents prefetches from being optimized away by
176
   // Intel compiler. Both MSVC and gcc seem not be affected by this.
195
   // Intel compiler. Both MSVC and gcc seem not be affected by this.
177
   __asm__ ("");
196
   __asm__ ("");
178
#  endif
197
#  endif
Line 183... Line 202...
183
  __builtin_prefetch(addr);
202
  __builtin_prefetch(addr);
184
#  endif
203
#  endif
185
}
204
}
186
 
205
 
187
#endif
206
#endif
-
 
207
 
-
 
208
void prefetch2(void* addr) {
-
 
209
 
-
 
210
  prefetch(addr);
-
 
211
  prefetch((uint8_t*)addr + 64);
-
 
212
}
-
 
213
 
-
 
214
namespace WinProcGroup {
-
 
215
 
-
 
216
#ifndef _WIN32
-
 
217
 
-
 
218
void bindThisThread(size_t) {}
-
 
219
 
-
 
220
#else
-
 
221
 
-
 
222
/// get_group() retrieves logical processor information using Windows specific
-
 
223
/// API and returns the best group id for the thread with index idx. Original
-
 
224
/// code from Texel by Peter Ă–sterlund.
-
 
225
 
-
 
226
int get_group(size_t idx) {
-
 
227
 
-
 
228
  int threads = 0;
-
 
229
  int nodes = 0;
-
 
230
  int cores = 0;
-
 
231
  DWORD returnLength = 0;
-
 
232
  DWORD byteOffset = 0;
-
 
233
 
-
 
234
  // Early exit if the needed API is not available at runtime
-
 
235
  HMODULE k32 = GetModuleHandle("Kernel32.dll");
-
 
236
  auto fun1 = (fun1_t)GetProcAddress(k32, "GetLogicalProcessorInformationEx");
-
 
237
  if (!fun1)
-
 
238
      return -1;
-
 
239
 
-
 
240
  // First call to get returnLength. We expect it to fail due to null buffer
-
 
241
  if (fun1(RelationAll, nullptr, &returnLength))
-
 
242
      return -1;
-
 
243
 
-
 
244
  // Once we know returnLength, allocate the buffer
-
 
245
  SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX *buffer, *ptr;
-
 
246
  ptr = buffer = (SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX*)malloc(returnLength);
-
 
247
 
-
 
248
  // Second call, now we expect to succeed
-
 
249
  if (!fun1(RelationAll, buffer, &returnLength))
-
 
250
  {
-
 
251
      free(buffer);
-
 
252
      return -1;
-
 
253
  }
-
 
254
 
-
 
255
  while (ptr->Size > 0 && byteOffset + ptr->Size <= returnLength)
-
 
256
  {
-
 
257
      if (ptr->Relationship == RelationNumaNode)
-
 
258
          nodes++;
-
 
259
 
-
 
260
      else if (ptr->Relationship == RelationProcessorCore)
-
 
261
      {
-
 
262
          cores++;
-
 
263
          threads += (ptr->Processor.Flags == LTP_PC_SMT) ? 2 : 1;
-
 
264
      }
-
 
265
 
-
 
266
      byteOffset += ptr->Size;
-
 
267
      ptr = (SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX*)(((char*)ptr) + ptr->Size);
-
 
268
  }
-
 
269
 
-
 
270
  free(buffer);
-
 
271
 
-
 
272
  std::vector<int> groups;
-
 
273
 
-
 
274
  // Run as many threads as possible on the same node until core limit is
-
 
275
  // reached, then move on filling the next node.
-
 
276
  for (int n = 0; n < nodes; n++)
-
 
277
      for (int i = 0; i < cores / nodes; i++)
-
 
278
          groups.push_back(n);
-
 
279
 
-
 
280
  // In case a core has more than one logical processor (we assume 2) and we
-
 
281
  // have still threads to allocate, then spread them evenly across available
-
 
282
  // nodes.
-
 
283
  for (int t = 0; t < threads - cores; t++)
-
 
284
      groups.push_back(t % nodes);
-
 
285
 
-
 
286
  // If we still have more threads than the total number of logical processors
-
 
287
  // then return -1 and let the OS to decide what to do.
-
 
288
  return idx < groups.size() ? groups[idx] : -1;
-
 
289
}
-
 
290
 
-
 
291
 
-
 
292
/// bindThisThread() set the group affinity of the current thread
-
 
293
 
-
 
294
void bindThisThread(size_t idx) {
-
 
295
 
-
 
296
  // Use only local variables to be thread-safe
-
 
297
  int group = get_group(idx);
-
 
298
 
-
 
299
  if (group == -1)
-
 
300
      return;
-
 
301
 
-
 
302
  // Early exit if the needed API are not available at runtime
-
 
303
  HMODULE k32 = GetModuleHandle("Kernel32.dll");
-
 
304
  auto fun2 = (fun2_t)GetProcAddress(k32, "GetNumaNodeProcessorMaskEx");
-
 
305
  auto fun3 = (fun3_t)GetProcAddress(k32, "SetThreadGroupAffinity");
-
 
306
 
-
 
307
  if (!fun2 || !fun3)
-
 
308
      return;
-
 
309
 
-
 
310
  GROUP_AFFINITY affinity;
-
 
311
  if (fun2(group, &affinity))
-
 
312
      fun3(GetCurrentThread(), &affinity, nullptr);
-
 
313
}
-
 
314
 
-
 
315
#endif
-
 
316
 
-
 
317
} // namespace WinProcGroup