Rev 154 | Go to most recent revision | Show entire file | Ignore whitespace | Details | Blame | Last modification | View Log | RSS feed
| Rev 154 | Rev 169 | ||
|---|---|---|---|
| Line 4... | Line 4... | ||
| 4 | Copyright (C) 2008-2015 Marco Costalba, Joona Kiiski, Tord Romstad |
4 | Copyright (C) 2008-2015 Marco Costalba, Joona Kiiski, Tord Romstad |
| 5 | Copyright (C) 2015- |
5 | Copyright (C) 2015-2018 Marco Costalba, Joona Kiiski, Gary Linscott, Tord Romstad |
| 6 | 6 | ||
| 7 | Stockfish is free software: you can redistribute it and/or modify |
7 | Stockfish is free software: you can redistribute it and/or modify |
| 8 | it under the terms of the GNU General Public License as published by |
8 | it under the terms of the GNU General Public License as published by |
| 9 | the Free Software Foundation, either version 3 of the License, or |
9 | the Free Software Foundation, either version 3 of the License, or |
| 10 | (at your option) any later version. |
10 | (at your option) any later version. |
| Line 15... | Line 15... | ||
| 15 | GNU General Public License for more details. |
15 | GNU General Public License for more details. |
| 16 | 16 | ||
| 17 | You should have received a copy of the GNU General Public License |
17 | You should have received a copy of the GNU General Public License |
| 18 | along with this program. If not, see <http://www.gnu.org/licenses/>. |
18 | along with this program. If not, see <http://www.gnu.org/licenses/>. |
| 19 | */ |
19 | */ |
| - | 20 | ||
| - | 21 | #ifdef _WIN32 |
|
| - | 22 | #if _WIN32_WINNT < 0x0601 |
|
| - | 23 | #undef _WIN32_WINNT |
|
| - | 24 | #define _WIN32_WINNT 0x0601 // Force to include needed API prototypes |
|
| - | 25 | #endif |
|
| - | 26 | #include <windows.h> |
|
| - | 27 | // The needed Windows API for processor groups could be missed from old Windows |
|
| - | 28 | // versions, so instead of calling them directly (forcing the linker to resolve |
|
| - | 29 | // the calls at compile time), try to load them at runtime. To do this we need |
|
| - | 30 | // first to define the corresponding function pointers. |
|
| - | 31 | extern "C" { |
|
| - | 32 | typedef bool(*fun1_t)(LOGICAL_PROCESSOR_RELATIONSHIP, |
|
| - | 33 | PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX, PDWORD); |
|
| - | 34 | typedef bool(*fun2_t)(USHORT, PGROUP_AFFINITY); |
|
| - | 35 | typedef bool(*fun3_t)(HANDLE, CONST GROUP_AFFINITY*, PGROUP_AFFINITY); |
|
| - | 36 | } |
|
| - | 37 | #endif |
|
| 20 | 38 | ||
| 21 | #include <fstream> |
39 | #include <fstream> |
| 22 | #include <iomanip> |
40 | #include <iomanip> |
| 23 | #include <iostream> |
41 | #include <iostream> |
| 24 | #include <sstream> |
42 | #include <sstream> |
| - | 43 | #include <vector> |
|
| 25 | 44 | ||
| 26 | #include "misc.h" |
45 | #include "misc.h" |
| 27 | #include "thread.h" |
46 | #include "thread.h" |
| 28 | 47 | ||
| 29 | using namespace std; |
48 | using namespace std; |
| 30 | 49 | ||
| 31 | namespace { |
50 | namespace { |
| 32 | 51 | ||
| 33 | /// Version number. If Version is left empty, then compile date in the format |
52 | /// Version number. If Version is left empty, then compile date in the format |
| 34 | /// DD-MM-YY and show in engine_info. |
53 | /// DD-MM-YY and show in engine_info. |
| 35 | const string Version = " |
54 | const string Version = "9"; |
| 36 | 55 | ||
| 37 | /// Our fancy logging facility. The trick here is to replace cin.rdbuf() and |
56 | /// Our fancy logging facility. The trick here is to replace cin.rdbuf() and |
| 38 | /// cout.rdbuf() with two Tie objects that tie cin and cout to a file stream. We |
57 | /// cout.rdbuf() with two Tie objects that tie cin and cout to a file stream. We |
| 39 | /// can toggle the logging of std::cout and std:cin at runtime whilst preserving |
58 | /// can toggle the logging of std::cout and std:cin at runtime whilst preserving |
| 40 | /// usual I/O functionality, all without changing a single line of code! |
59 | /// usual I/O functionality, all without changing a single line of code! |
| Line 42... | Line 61... | ||
| 42 | 61 | ||
| 43 | struct Tie: public streambuf { // MSVC requires split streambuf for cin and cout |
62 | struct Tie: public streambuf { // MSVC requires split streambuf for cin and cout |
| 44 | 63 | ||
| 45 | Tie(streambuf* b, streambuf* l) : buf(b), logBuf(l) {} |
64 | Tie(streambuf* b, streambuf* l) : buf(b), logBuf(l) {} |
| 46 | 65 | ||
| 47 | int sync() { return logBuf->pubsync(), buf->pubsync(); } |
66 | int sync() override { return logBuf->pubsync(), buf->pubsync(); } |
| 48 | int overflow(int c) { return log(buf->sputc((char)c), "<< "); } |
67 | int overflow(int c) override { return log(buf->sputc((char)c), "<< "); } |
| 49 | int underflow() { return buf->sgetc(); } |
68 | int underflow() override { return buf->sgetc(); } |
| 50 | int uflow() { return log(buf->sbumpc(), ">> "); } |
69 | int uflow() override { return log(buf->sbumpc(), ">> "); } |
| 51 | 70 | ||
| 52 | streambuf *buf, *logBuf; |
71 | streambuf *buf, *logBuf; |
| 53 | 72 | ||
| 54 | int log(int c, const char* prefix) { |
73 | int log(int c, const char* prefix) { |
| 55 | 74 | ||
| Line 183... | Line 202... | ||
| 183 | __builtin_prefetch(addr); |
202 | __builtin_prefetch(addr); |
| 184 | # endif |
203 | # endif |
| 185 | } |
204 | } |
| 186 | 205 | ||
| 187 | #endif |
206 | #endif |
| - | 207 | ||
| - | 208 | void prefetch2(void* addr) { |
|
| - | 209 | ||
| - | 210 | prefetch(addr); |
|
| - | 211 | prefetch((uint8_t*)addr + 64); |
|
| - | 212 | } |
|
| - | 213 | ||
| - | 214 | namespace WinProcGroup { |
|
| - | 215 | ||
| - | 216 | #ifndef _WIN32 |
|
| - | 217 | ||
| - | 218 | void bindThisThread(size_t) {} |
|
| - | 219 | ||
| - | 220 | #else |
|
| - | 221 | ||
| - | 222 | /// get_group() retrieves logical processor information using Windows specific |
|
| - | 223 | /// API and returns the best group id for the thread with index idx. Original |
|
| - | 224 | /// code from Texel by Peter Ă–sterlund. |
|
| - | 225 | ||
| - | 226 | int get_group(size_t idx) { |
|
| - | 227 | ||
| - | 228 | int threads = 0; |
|
| - | 229 | int nodes = 0; |
|
| - | 230 | int cores = 0; |
|
| - | 231 | DWORD returnLength = 0; |
|
| - | 232 | DWORD byteOffset = 0; |
|
| - | 233 | ||
| - | 234 | // Early exit if the needed API is not available at runtime |
|
| - | 235 | HMODULE k32 = GetModuleHandle("Kernel32.dll"); |
|
| - | 236 | auto fun1 = (fun1_t)GetProcAddress(k32, "GetLogicalProcessorInformationEx"); |
|
| - | 237 | if (!fun1) |
|
| - | 238 | return -1; |
|
| - | 239 | ||
| - | 240 | // First call to get returnLength. We expect it to fail due to null buffer |
|
| - | 241 | if (fun1(RelationAll, nullptr, &returnLength)) |
|
| - | 242 | return -1; |
|
| - | 243 | ||
| - | 244 | // Once we know returnLength, allocate the buffer |
|
| - | 245 | SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX *buffer, *ptr; |
|
| - | 246 | ptr = buffer = (SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX*)malloc(returnLength); |
|
| - | 247 | ||
| - | 248 | // Second call, now we expect to succeed |
|
| - | 249 | if (!fun1(RelationAll, buffer, &returnLength)) |
|
| - | 250 | { |
|
| - | 251 | free(buffer); |
|
| - | 252 | return -1; |
|
| - | 253 | } |
|
| - | 254 | ||
| - | 255 | while (ptr->Size > 0 && byteOffset + ptr->Size <= returnLength) |
|
| - | 256 | { |
|
| - | 257 | if (ptr->Relationship == RelationNumaNode) |
|
| - | 258 | nodes++; |
|
| - | 259 | ||
| - | 260 | else if (ptr->Relationship == RelationProcessorCore) |
|
| - | 261 | { |
|
| - | 262 | cores++; |
|
| - | 263 | threads += (ptr->Processor.Flags == LTP_PC_SMT) ? 2 : 1; |
|
| - | 264 | } |
|
| - | 265 | ||
| - | 266 | byteOffset += ptr->Size; |
|
| - | 267 | ptr = (SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX*)(((char*)ptr) + ptr->Size); |
|
| - | 268 | } |
|
| - | 269 | ||
| - | 270 | free(buffer); |
|
| - | 271 | ||
| - | 272 | std::vector<int> groups; |
|
| - | 273 | ||
| - | 274 | // Run as many threads as possible on the same node until core limit is |
|
| - | 275 | // reached, then move on filling the next node. |
|
| - | 276 | for (int n = 0; n < nodes; n++) |
|
| - | 277 | for (int i = 0; i < cores / nodes; i++) |
|
| - | 278 | groups.push_back(n); |
|
| - | 279 | ||
| - | 280 | // In case a core has more than one logical processor (we assume 2) and we |
|
| - | 281 | // have still threads to allocate, then spread them evenly across available |
|
| - | 282 | // nodes. |
|
| - | 283 | for (int t = 0; t < threads - cores; t++) |
|
| - | 284 | groups.push_back(t % nodes); |
|
| - | 285 | ||
| - | 286 | // If we still have more threads than the total number of logical processors |
|
| - | 287 | // then return -1 and let the OS to decide what to do. |
|
| - | 288 | return idx < groups.size() ? groups[idx] : -1; |
|
| - | 289 | } |
|
| - | 290 | ||
| - | 291 | ||
| - | 292 | /// bindThisThread() set the group affinity of the current thread |
|
| - | 293 | ||
| - | 294 | void bindThisThread(size_t idx) { |
|
| - | 295 | ||
| - | 296 | // Use only local variables to be thread-safe |
|
| - | 297 | int group = get_group(idx); |
|
| - | 298 | ||
| - | 299 | if (group == -1) |
|
| - | 300 | return; |
|
| - | 301 | ||
| - | 302 | // Early exit if the needed API are not available at runtime |
|
| - | 303 | HMODULE k32 = GetModuleHandle("Kernel32.dll"); |
|
| - | 304 | auto fun2 = (fun2_t)GetProcAddress(k32, "GetNumaNodeProcessorMaskEx"); |
|
| - | 305 | auto fun3 = (fun3_t)GetProcAddress(k32, "SetThreadGroupAffinity"); |
|
| - | 306 | ||
| - | 307 | if (!fun2 || !fun3) |
|
| - | 308 | return; |
|
| - | 309 | ||
| - | 310 | GROUP_AFFINITY affinity; |
|
| - | 311 | if (fun2(group, &affinity)) |
|
| - | 312 | fun3(GetCurrentThread(), &affinity, nullptr); |
|
| - | 313 | } |
|
| - | 314 | ||
| - | 315 | #endif |
|
| - | 316 | ||
| - | 317 | } // namespace WinProcGroup |
|