Support numa policy set by user
Add hostNumaAlloc() to support numa policy set by user Change-Id: Ib6c3e838aa53e3d9b3db9735c585df46a1c98944
Этот коммит содержится в:
@@ -49,6 +49,7 @@
|
||||
#include <iostream>
|
||||
#include <vector>
|
||||
#include <algorithm>
|
||||
#include <numaif.h>
|
||||
#endif // WITHOUT_HSA_BACKEND
|
||||
|
||||
#define OPENCL_VERSION_STR XSTR(OPENCL_MAJOR) "." XSTR(OPENCL_MINOR)
|
||||
@@ -1782,12 +1783,72 @@ void* Device::hostAlloc(size_t size, size_t alignment, bool atomics) const {
|
||||
stat = hsa_amd_agents_allow_access(gpu_agents_.size(), &gpu_agents_[0], nullptr, ptr);
|
||||
if (stat != HSA_STATUS_SUCCESS) {
|
||||
LogError("Fail hsa_amd_agents_allow_access");
|
||||
hostFree(ptr, size);
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
return ptr;
|
||||
}
|
||||
|
||||
void* Device::hostAgentAlloc(size_t size, const AgentInfo& agentInfo, bool atomics) const {
|
||||
void* ptr = nullptr;
|
||||
const hsa_amd_memory_pool_t segment =
|
||||
(!atomics) ?
|
||||
(agentInfo.coarse_grain_pool.handle != 0) ?
|
||||
agentInfo.coarse_grain_pool : agentInfo.fine_grain_pool
|
||||
: agentInfo.fine_grain_pool;
|
||||
assert(segment.handle != 0);
|
||||
hsa_status_t stat = hsa_amd_memory_pool_allocate(segment, size, 0, &ptr);
|
||||
if (stat != HSA_STATUS_SUCCESS) {
|
||||
LogPrintfError("Fail allocation host memory with err %d", stat);
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
stat = hsa_amd_agents_allow_access(gpu_agents_.size(), &gpu_agents_[0], nullptr, ptr);
|
||||
if (stat != HSA_STATUS_SUCCESS) {
|
||||
LogPrintfError("Fail hsa_amd_agents_allow_access with err %d", stat);
|
||||
hostFree(ptr, size);
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
return ptr;
|
||||
}
|
||||
|
||||
void* Device::hostNumaAlloc(size_t size, size_t alignment, bool atomics) const {
|
||||
void* ptr = nullptr;
|
||||
int mode = MPOL_DEFAULT;
|
||||
unsigned long nodeMask = 0;
|
||||
auto cpuCount = cpu_agents_.size();
|
||||
|
||||
constexpr unsigned long maxNode = sizeof(nodeMask) * 8;
|
||||
long res = get_mempolicy(&mode, &nodeMask, maxNode, NULL, 0);
|
||||
if (res) {
|
||||
LogPrintfError("get_mempolicy failed with error %ld", res);
|
||||
return ptr;
|
||||
}
|
||||
ClPrint(amd::LOG_INFO, amd::LOG_RESOURCE,
|
||||
"get_mempolicy() succeed with mode %d, nodeMask 0x%lx, cpuCount %zu",
|
||||
mode, nodeMask, cpuCount);
|
||||
|
||||
switch (mode) {
|
||||
// For details, see "man get_mempolicy".
|
||||
case MPOL_BIND:
|
||||
case MPOL_PREFERRED:
|
||||
// We only care about the first CPU node
|
||||
for (unsigned int i = 0; i < cpuCount; i++) {
|
||||
if ((1u << i) & nodeMask) {
|
||||
ptr = hostAgentAlloc(size, cpu_agents_[i], atomics);
|
||||
break;
|
||||
}
|
||||
}
|
||||
break;
|
||||
default:
|
||||
// All other modes fall back to default mode
|
||||
ptr = hostAlloc(size, alignment, atomics);
|
||||
}
|
||||
return ptr;
|
||||
}
|
||||
|
||||
void Device::hostFree(void* ptr, size_t size) const { memFree(ptr, size); }
|
||||
|
||||
void* Device::deviceLocalAlloc(size_t size, bool atomics) const {
|
||||
|
||||
@@ -308,7 +308,6 @@ class Device : public NullDevice {
|
||||
virtual hsa_agent_t getBackendDevice() const { return _bkendDevice; }
|
||||
const hsa_agent_t &getCpuAgent() const { return cpu_agent_; } // Get the CPU agent with the least NUMA distance to this GPU
|
||||
|
||||
|
||||
static const std::vector<hsa_agent_t>& getGpuAgents() { return gpu_agents_; }
|
||||
static const std::vector<AgentInfo>& getCpuAgents() { return cpu_agents_; }
|
||||
|
||||
@@ -389,6 +388,12 @@ class Device : public NullDevice {
|
||||
virtual bool SetClockMode(const cl_set_device_clock_mode_input_amd setClockModeInput,
|
||||
cl_set_device_clock_mode_output_amd* pSetClockModeOutput);
|
||||
|
||||
//! Allocate host memory in terms of numa policy set by user
|
||||
void* hostNumaAlloc(size_t size, size_t alignment, bool atomics = false) const;
|
||||
|
||||
//! Allocate host memory from agent info
|
||||
void* hostAgentAlloc(size_t size, const AgentInfo& agentInfo, bool atomics = false) const;
|
||||
|
||||
//! Returns transfer engine object
|
||||
const device::BlitManager& xferMgr() const { return xferQueue()->blitMgr(); }
|
||||
|
||||
|
||||
@@ -753,11 +753,10 @@ bool Buffer::create() {
|
||||
#else
|
||||
deviceMemory_ = dev().hostAlloc(size(), 1, false);
|
||||
#endif // AMD_HMM_SUPPORT
|
||||
} else if (memFlags & CL_MEM_SVM_ATOMICS) {
|
||||
deviceMemory_ = dev().hostAlloc(size(), 1, true);
|
||||
}
|
||||
else {
|
||||
deviceMemory_ = dev().hostAlloc(size(), 1, false);
|
||||
} else if (memFlags & CL_MEM_FOLLOW_USER_NUMA_POLICY) {
|
||||
deviceMemory_ = dev().hostNumaAlloc(size(), 1, (memFlags & CL_MEM_SVM_ATOMICS) != 0);
|
||||
} else {
|
||||
deviceMemory_ = dev().hostAlloc(size(), 1, (memFlags & CL_MEM_SVM_ATOMICS) != 0);
|
||||
}
|
||||
} else {
|
||||
assert(!isHostMemDirectAccess() && "Runtime doesn't support direct access to GPU memory!");
|
||||
|
||||
Ссылка в новой задаче
Block a user