/*
Texel - A UCI chess engine.
Copyright (C) 2014 Peter Ă–sterlund, peterosterlund2@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
/*
* numa.cpp
*
* Created on: Jul 24, 2014
* Author: petero
*/
#include "numa.hpp"
#include "util/util.hpp"
#include "util/logger.hpp"
#include "bitBoard.hpp"
#include <map>
#include <set>
#include <algorithm>
#include <fstream>
#include <iostream>
#ifdef NUMA
#ifdef _WIN32
#include <windows.h>
#else
#include <numa.h>
#endif
#endif
Numa&
Numa::instance() {
static Numa numa;
return numa;
}
Numa::Numa() {
#ifdef NUMA
#ifdef _WIN32
SYSTEM_LOGICAL_PROCESSOR_INFORMATION* buffer = nullptr;
DWORD returnLength = 0;
while (true) {
if (GetLogicalProcessorInformation(buffer, &returnLength))
break;
if (GetLastError() == ERROR_INSUFFICIENT_BUFFER) {
free(buffer);
buffer = (PSYSTEM_LOGICAL_PROCESSOR_INFORMATION)malloc(returnLength);
if (!buffer)
return;
} else {
free(buffer);
return;
}
}
int threads = 0;
int nodes = 0;
int cores = 0;
DWORD byteOffset = 0;
SYSTEM_LOGICAL_PROCESSOR_INFORMATION* ptr = buffer;
while (byteOffset + sizeof(SYSTEM_LOGICAL_PROCESSOR_INFORMATION) <= returnLength) {
switch (ptr->Relationship) {
case RelationNumaNode:
nodes++;
break;
case RelationProcessorCore:
cores++;
threads += BitBoard::bitCount(ptr->ProcessorMask);
break;
default:
break;
}
byteOffset += sizeof(SYSTEM_LOGICAL_PROCESSOR_INFORMATION);
ptr++;
}
free(buffer);
for (int n = 0; n < nodes; n++)
for (int i = 0; i < cores / nodes; i++)
threadToNode.push_back(n);
for (int t = 0; t < threads - cores; t++)
threadToNode.push_back(t % nodes);
#else
if (numa_available() == -1)
return;
const int maxNode = numa_max_node();
if (maxNode == 0)
return;
std::set<int> nodesToUse;
bitmask* runNodes = numa_get_run_node_mask();
int nBits = numa_bitmask_nbytes(runNodes) * 8;
for (int i = 0; i < nBits; i++)
if (numa_bitmask_isbitset(runNodes, i))
nodesToUse.insert(i);
std::map<int, NodeInfo> nodeInfo;
std::string baseDir("/sys/devices/system/cpu");
for (int i = 0; ; i++) {
std::string cpuDir(baseDir + "/cpu" + num2Str(i));
if (i > 0) {
std::ifstream is(cpuDir + "/online");
if (!is)
break;
std::string line;
std::getline(is, line);
if (!is || is.eof() || (line != "1"))
continue;
}
int node = -1;
for (int n = 0; n <= maxNode; n++) {
std::ifstream is(cpuDir + "/node" + num2Str(n));
if (is) {
node = n;
break;
}
}
if (node < 0)
continue;
nodeInfo[node].node = node;
nodeInfo[node].numThreads++;
std::ifstream is(cpuDir + "/topology/thread_siblings_list");
if (is) {
std::string line;
std::getline(is, line);
if (is && !is.eof()) {
auto pos = line.find_first_of(",-");
if (pos != std::string::npos)
line = line.substr(0, pos);
int num;
if (str2Num(line, num)) {
if (i == num)
nodeInfo[node].numCores++;
}
}
}
}
std::vector<NodeInfo> nodes;
for (int node : nodesToUse) {
auto it = nodeInfo.find(node);
if (it != nodeInfo.end())
nodes.push_back(it->second);
}
std::sort(nodes.begin(), nodes.end(), [](const NodeInfo& a, const NodeInfo& b) {
if (a.numCores != b.numCores)
return a.numCores > b.numCores;
return a.numThreads > b.numThreads;
});
for (const NodeInfo& ni : nodes)
for (int i = 0; i < ni.numCores; i++)
threadToNode.push_back(ni.node);
bool done = false;
while (!done) {
done = true;
for (NodeInfo& ni : nodes) {
if (ni.numThreads > ni.numCores) {
threadToNode.push_back(ni.node);
ni.numThreads--;
done = false;
}
}
}
#endif
#endif
}
void
Numa::disable() {
threadToNode.clear();
}
int
Numa::nodeForThread(int threadNo) const {
#ifdef NUMA
if (threadNo < (int)threadToNode.size())
return threadToNode[threadNo];
#endif
return -1;
}
void
Numa::bindThread(int threadNo) const {
#ifdef NUMA
int node = nodeForThread(threadNo);
if (node < 0)
return;
// Logger::log([&](std::ostream& os){os << "threadNo:" << threadNo << " node:" << node;});
#ifdef _WIN32
ULONGLONG mask;
if (GetNumaNodeProcessorMask(node, &mask))
SetThreadAffinityMask(GetCurrentThread(), mask);
#else
numa_run_on_node(node);
numa_set_preferred(node);
#endif
#endif
}
bool
Numa::isMainNode(int threadNo) const {
if (threadToNode.empty())
return true; // Not NUMA hardware
return nodeForThread(threadNo) == threadToNode[0];
}