Support numa policy set by user

Add hostNumaAlloc() to support numa policy set by user

Change-Id: Ib6c3e838aa53e3d9b3db9735c585df46a1c98944
Этот коммит содержится в:
Tao Sang
2020-06-15 19:13:58 -04:00
коммит произвёл Tao Sang
родитель 20c24cae93
Коммит da94cd0de1
3 изменённых файлов: 71 добавлений и 6 удалений
+61
Просмотреть файл
@@ -49,6 +49,7 @@
#include <iostream>
#include <vector>
#include <algorithm>
#include <numaif.h>
#endif // WITHOUT_HSA_BACKEND
#define OPENCL_VERSION_STR XSTR(OPENCL_MAJOR) "." XSTR(OPENCL_MINOR)
@@ -1782,12 +1783,72 @@ void* Device::hostAlloc(size_t size, size_t alignment, bool atomics) const {
stat = hsa_amd_agents_allow_access(gpu_agents_.size(), &gpu_agents_[0], nullptr, ptr);
if (stat != HSA_STATUS_SUCCESS) {
LogError("Fail hsa_amd_agents_allow_access");
hostFree(ptr, size);
return nullptr;
}
return ptr;
}
void* Device::hostAgentAlloc(size_t size, const AgentInfo& agentInfo, bool atomics) const {
void* ptr = nullptr;
const hsa_amd_memory_pool_t segment =
(!atomics) ?
(agentInfo.coarse_grain_pool.handle != 0) ?
agentInfo.coarse_grain_pool : agentInfo.fine_grain_pool
: agentInfo.fine_grain_pool;
assert(segment.handle != 0);
hsa_status_t stat = hsa_amd_memory_pool_allocate(segment, size, 0, &ptr);
if (stat != HSA_STATUS_SUCCESS) {
LogPrintfError("Fail allocation host memory with err %d", stat);
return nullptr;
}
stat = hsa_amd_agents_allow_access(gpu_agents_.size(), &gpu_agents_[0], nullptr, ptr);
if (stat != HSA_STATUS_SUCCESS) {
LogPrintfError("Fail hsa_amd_agents_allow_access with err %d", stat);
hostFree(ptr, size);
return nullptr;
}
return ptr;
}
void* Device::hostNumaAlloc(size_t size, size_t alignment, bool atomics) const {
void* ptr = nullptr;
int mode = MPOL_DEFAULT;
unsigned long nodeMask = 0;
auto cpuCount = cpu_agents_.size();
constexpr unsigned long maxNode = sizeof(nodeMask) * 8;
long res = get_mempolicy(&mode, &nodeMask, maxNode, NULL, 0);
if (res) {
LogPrintfError("get_mempolicy failed with error %ld", res);
return ptr;
}
ClPrint(amd::LOG_INFO, amd::LOG_RESOURCE,
"get_mempolicy() succeed with mode %d, nodeMask 0x%lx, cpuCount %zu",
mode, nodeMask, cpuCount);
switch (mode) {
// For details, see "man get_mempolicy".
case MPOL_BIND:
case MPOL_PREFERRED:
// We only care about the first CPU node
for (unsigned int i = 0; i < cpuCount; i++) {
if ((1u << i) & nodeMask) {
ptr = hostAgentAlloc(size, cpu_agents_[i], atomics);
break;
}
}
break;
default:
// All other modes fall back to default mode
ptr = hostAlloc(size, alignment, atomics);
}
return ptr;
}
void Device::hostFree(void* ptr, size_t size) const { memFree(ptr, size); }
void* Device::deviceLocalAlloc(size_t size, bool atomics) const {
+6 -1
Просмотреть файл
@@ -308,7 +308,6 @@ class Device : public NullDevice {
virtual hsa_agent_t getBackendDevice() const { return _bkendDevice; }
const hsa_agent_t &getCpuAgent() const { return cpu_agent_; } // Get the CPU agent with the least NUMA distance to this GPU
static const std::vector<hsa_agent_t>& getGpuAgents() { return gpu_agents_; }
static const std::vector<AgentInfo>& getCpuAgents() { return cpu_agents_; }
@@ -389,6 +388,12 @@ class Device : public NullDevice {
virtual bool SetClockMode(const cl_set_device_clock_mode_input_amd setClockModeInput,
cl_set_device_clock_mode_output_amd* pSetClockModeOutput);
//! Allocate host memory in terms of numa policy set by user
void* hostNumaAlloc(size_t size, size_t alignment, bool atomics = false) const;
//! Allocate host memory from agent info
void* hostAgentAlloc(size_t size, const AgentInfo& agentInfo, bool atomics = false) const;
//! Returns transfer engine object
const device::BlitManager& xferMgr() const { return xferQueue()->blitMgr(); }
+4 -5
Просмотреть файл
@@ -753,11 +753,10 @@ bool Buffer::create() {
#else
deviceMemory_ = dev().hostAlloc(size(), 1, false);
#endif // AMD_HMM_SUPPORT
} else if (memFlags & CL_MEM_SVM_ATOMICS) {
deviceMemory_ = dev().hostAlloc(size(), 1, true);
}
else {
deviceMemory_ = dev().hostAlloc(size(), 1, false);
} else if (memFlags & CL_MEM_FOLLOW_USER_NUMA_POLICY) {
deviceMemory_ = dev().hostNumaAlloc(size(), 1, (memFlags & CL_MEM_SVM_ATOMICS) != 0);
} else {
deviceMemory_ = dev().hostAlloc(size(), 1, (memFlags & CL_MEM_SVM_ATOMICS) != 0);
}
} else {
assert(!isHostMemDirectAccess() && "Runtime doesn't support direct access to GPU memory!");