Correct rocrtst numa awareness.

Pool size was being used where alloc_max_size should be.
Changes are necessary on NUMA systems where not all nodes have
installed memory.

Change-Id: If8f507cae50a8dfeae8572d4e39df757abe28599


[ROCm/ROCR-Runtime commit: a9470e3563]
This commit is contained in:
Sean Keely
2020-04-17 20:16:17 -05:00
والد 9989d79543
کامیت c354858217
6فایلهای تغییر یافته به همراه141 افزوده شده و 125 حذف شده
@@ -31,8 +31,8 @@ typedef struct pool_info {
pool_info(hsa_agent_t agent, uint32_t agent_index,
hsa_amd_memory_pool_t pool, hsa_amd_segment_t segment,
size_t size, uint32_t index, bool is_fine_grained,
bool is_kernarg, bool access_to_all,
size_t size, size_t alloc_max_size, uint32_t index,
bool is_fine_grained, bool is_kernarg, bool access_to_all,
hsa_amd_memory_pool_access_t owner_access) {
pool_ = pool;
@@ -40,7 +40,8 @@ typedef struct pool_info {
segment_ = segment;
owner_agent_ = agent;
agent_index_ = agent_index;
allocable_size_ = size;
size_ = size;
allocable_size_ = alloc_max_size;
is_kernarg_ = is_kernarg;
owner_access_ = owner_access;
access_to_all_ = access_to_all;
@@ -53,6 +54,7 @@ typedef struct pool_info {
bool is_kernarg_;
bool access_to_all_;
bool is_fine_grained_;
size_t size_;
size_t allocable_size_;
uint32_t agent_index_;
hsa_agent_t owner_agent_;
@@ -1,114 +1,120 @@
#include "common.hpp"
#include "rocm_async.hpp"
// @brief: Helper method to iterate throught the memory pools of
// an agent and discover its properties
hsa_status_t MemPoolInfo(hsa_amd_memory_pool_t pool, void* data) {
hsa_status_t status;
RocmAsync* asyncDrvr = reinterpret_cast<RocmAsync*>(data);
// Query pools' segment, report only pools from global segment
hsa_amd_segment_t segment;
status = hsa_amd_memory_pool_get_info(pool,
HSA_AMD_MEMORY_POOL_INFO_SEGMENT, &segment);
ErrorCheck(status);
if (HSA_AMD_SEGMENT_GLOBAL != segment) {
return HSA_STATUS_SUCCESS;
}
// Determine if allocation is allowed in this pool
// Report only pools that allow an alloction by user
bool alloc = false;
status = hsa_amd_memory_pool_get_info(pool,
HSA_AMD_MEMORY_POOL_INFO_RUNTIME_ALLOC_ALLOWED, &alloc);
ErrorCheck(status);
if (alloc != true) {
return HSA_STATUS_SUCCESS;
}
// Query the max allocatable size
size_t max_size = 0;
status = hsa_amd_memory_pool_get_info(pool,
HSA_AMD_MEMORY_POOL_INFO_SIZE, &max_size);
ErrorCheck(status);
// Determine if the pools is accessible to all agents
bool access_to_all = false;
status = hsa_amd_memory_pool_get_info(pool,
HSA_AMD_MEMORY_POOL_INFO_ACCESSIBLE_BY_ALL, &access_to_all);
ErrorCheck(status);
// Determine type of access to owner agent
hsa_amd_memory_pool_access_t owner_access;
hsa_agent_t agent = asyncDrvr->agent_list_.back().agent_;
status = hsa_amd_agent_memory_pool_get_info(agent, pool,
HSA_AMD_AGENT_MEMORY_POOL_INFO_ACCESS, &owner_access);
ErrorCheck(status);
// Determine if the pool is fine-grained or coarse-grained
uint32_t flag = 0;
status = hsa_amd_memory_pool_get_info(pool,
HSA_AMD_MEMORY_POOL_INFO_GLOBAL_FLAGS, &flag);
ErrorCheck(status);
bool is_kernarg = (HSA_AMD_MEMORY_POOL_GLOBAL_FLAG_KERNARG_INIT & flag);
bool is_fine_grained = (HSA_AMD_MEMORY_POOL_GLOBAL_FLAG_FINE_GRAINED & flag);
// Update the pool handle for system memory if kernarg is true
if (is_kernarg) {
asyncDrvr->sys_pool_ = pool;
}
// Create an instance of agent_pool_info and add it to the list
pool_info_t pool_info(agent, asyncDrvr->agent_index_, pool,
segment, max_size, asyncDrvr->pool_index_,
is_fine_grained, is_kernarg,
access_to_all, owner_access);
asyncDrvr->pool_list_.push_back(pool_info);
// Create an agent_pool_infot and add it to its list
asyncDrvr->agent_pool_list_[asyncDrvr->agent_index_].pool_list.push_back(pool_info);
asyncDrvr->pool_index_++;
return HSA_STATUS_SUCCESS;
}
// @brief: Helper method to iterate throught the agents of
// a system and discover its properties
hsa_status_t AgentInfo(hsa_agent_t agent, void* data) {
RocmAsync* asyncDrvr = reinterpret_cast<RocmAsync*>(data);
// Get the name of the agent
char agent_name[64];
hsa_status_t status;
status = hsa_agent_get_info(agent, HSA_AGENT_INFO_NAME, agent_name);
ErrorCheck(status);
// Get device type
hsa_device_type_t device_type;
status = hsa_agent_get_info(agent, HSA_AGENT_INFO_DEVICE, &device_type);
ErrorCheck(status);
// Capture the handle of Cpu agent
if (device_type == HSA_DEVICE_TYPE_CPU) {
asyncDrvr->cpu_agent_ = agent;
}
asyncDrvr->agent_list_.push_back(agent_info(agent, asyncDrvr->agent_index_, device_type));
// Contruct an new agent_pool_info structure and add it to the list
agent_pool_info node;
node.agent = asyncDrvr->agent_list_.back();
asyncDrvr->agent_pool_list_.push_back(node);
status = hsa_amd_agent_iterate_memory_pools(agent, MemPoolInfo, asyncDrvr);
asyncDrvr->agent_index_++;
return HSA_STATUS_SUCCESS;
}
void RocmAsync::DiscoverTopology() {
err_ = hsa_iterate_agents(AgentInfo, this);
}
#include "common.hpp"
#include "rocm_async.hpp"
// @brief: Helper method to iterate throught the memory pools of
// an agent and discover its properties
hsa_status_t MemPoolInfo(hsa_amd_memory_pool_t pool, void* data) {
hsa_status_t status;
RocmAsync* asyncDrvr = reinterpret_cast<RocmAsync*>(data);
// Query pools' segment, report only pools from global segment
hsa_amd_segment_t segment;
status = hsa_amd_memory_pool_get_info(pool,
HSA_AMD_MEMORY_POOL_INFO_SEGMENT, &segment);
ErrorCheck(status);
if (HSA_AMD_SEGMENT_GLOBAL != segment) {
return HSA_STATUS_SUCCESS;
}
// Determine if allocation is allowed in this pool
// Report only pools that allow an alloction by user
bool alloc = false;
status = hsa_amd_memory_pool_get_info(pool,
HSA_AMD_MEMORY_POOL_INFO_RUNTIME_ALLOC_ALLOWED, &alloc);
ErrorCheck(status);
if (alloc != true) {
return HSA_STATUS_SUCCESS;
}
// Query the pool size
size_t size = 0;
status = hsa_amd_memory_pool_get_info(pool,
HSA_AMD_MEMORY_POOL_INFO_SIZE, &size);
ErrorCheck(status);
// Query the max allocatable size
size_t max_size = 0;
status = hsa_amd_memory_pool_get_info(pool,
HSA_AMD_MEMORY_POOL_INFO_ALLOC_MAX_SIZE, &max_size);
ErrorCheck(status);
// Determine if the pools is accessible to all agents
bool access_to_all = false;
status = hsa_amd_memory_pool_get_info(pool,
HSA_AMD_MEMORY_POOL_INFO_ACCESSIBLE_BY_ALL, &access_to_all);
ErrorCheck(status);
// Determine type of access to owner agent
hsa_amd_memory_pool_access_t owner_access;
hsa_agent_t agent = asyncDrvr->agent_list_.back().agent_;
status = hsa_amd_agent_memory_pool_get_info(agent, pool,
HSA_AMD_AGENT_MEMORY_POOL_INFO_ACCESS, &owner_access);
ErrorCheck(status);
// Determine if the pool is fine-grained or coarse-grained
uint32_t flag = 0;
status = hsa_amd_memory_pool_get_info(pool,
HSA_AMD_MEMORY_POOL_INFO_GLOBAL_FLAGS, &flag);
ErrorCheck(status);
bool is_kernarg = (HSA_AMD_MEMORY_POOL_GLOBAL_FLAG_KERNARG_INIT & flag);
bool is_fine_grained = (HSA_AMD_MEMORY_POOL_GLOBAL_FLAG_FINE_GRAINED & flag);
// Update the pool handle for system memory if kernarg is true
if (is_kernarg) {
asyncDrvr->sys_pool_ = pool;
}
// Create an instance of agent_pool_info and add it to the list
pool_info_t pool_info(agent, asyncDrvr->agent_index_, pool,
segment, size, max_size, asyncDrvr->pool_index_,
is_fine_grained, is_kernarg,
access_to_all, owner_access);
asyncDrvr->pool_list_.push_back(pool_info);
// Create an agent_pool_infot and add it to its list
asyncDrvr->agent_pool_list_[asyncDrvr->agent_index_].pool_list.push_back(pool_info);
asyncDrvr->pool_index_++;
return HSA_STATUS_SUCCESS;
}
// @brief: Helper method to iterate throught the agents of
// a system and discover its properties
hsa_status_t AgentInfo(hsa_agent_t agent, void* data) {
RocmAsync* asyncDrvr = reinterpret_cast<RocmAsync*>(data);
// Get the name of the agent
char agent_name[64];
hsa_status_t status;
status = hsa_agent_get_info(agent, HSA_AGENT_INFO_NAME, agent_name);
ErrorCheck(status);
// Get device type
hsa_device_type_t device_type;
status = hsa_agent_get_info(agent, HSA_AGENT_INFO_DEVICE, &device_type);
ErrorCheck(status);
// Capture the handle of Cpu agent
if (device_type == HSA_DEVICE_TYPE_CPU) {
asyncDrvr->cpu_agent_ = agent;
}
asyncDrvr->agent_list_.push_back(agent_info(agent, asyncDrvr->agent_index_, device_type));
// Contruct an new agent_pool_info structure and add it to the list
agent_pool_info node;
node.agent = asyncDrvr->agent_list_.back();
asyncDrvr->agent_pool_list_.push_back(node);
status = hsa_amd_agent_iterate_memory_pools(agent, MemPoolInfo, asyncDrvr);
asyncDrvr->agent_index_++;
return HSA_STATUS_SUCCESS;
}
void RocmAsync::DiscoverTopology() {
err_ = hsa_iterate_agents(AgentInfo, this);
}
@@ -443,7 +443,7 @@ void MemoryAllocationTest::MemoryBasicAllocationAndFree(hsa_agent_t agent,
PrintSegmentNameAndType(pool_i.segment);
}
size_t max_size;
err = hsa_amd_memory_pool_get_info(pool, HSA_AMD_MEMORY_POOL_INFO_SIZE,
err = hsa_amd_memory_pool_get_info(pool, HSA_AMD_MEMORY_POOL_INFO_ALLOC_MAX_SIZE,
&max_size);
ASSERT_EQ(err, HSA_STATUS_SUCCESS);
max_size = (max_size > kMemoryAllocSize) ? kMemoryAllocSize : max_size;
@@ -535,9 +535,15 @@ static hsa_status_t GetPoolInfo(hsa_amd_memory_pool_t pool, void* data) {
return HSA_STATUS_SUCCESS;
}
// Query the pool size
size_t size = 0;
err = hsa_amd_memory_pool_get_info(pool, HSA_AMD_MEMORY_POOL_INFO_SIZE,
&size);
RET_IF_HSA_ERR(err);
// Query the max allocable size
size_t alloc_max_size = 0;
err = hsa_amd_memory_pool_get_info(pool, HSA_AMD_MEMORY_POOL_INFO_SIZE,
err = hsa_amd_memory_pool_get_info(pool, HSA_AMD_MEMORY_POOL_INFO_ALLOC_MAX_SIZE,
&alloc_max_size);
RET_IF_HSA_ERR(err);
@@ -553,7 +559,7 @@ static hsa_status_t GetPoolInfo(hsa_amd_memory_pool_t pool, void* data) {
int pool_i = ptr->pool_index();
int ag_ind = ptr->agent_index();
ptr->pool_info()->push_back(
new PoolInfo(pool, pool_i, region_segment, is_fine_grained,
new PoolInfo(pool, pool_i, region_segment, is_fine_grained, size,
alloc_max_size, ptr->agent_info()->back()));
// Construct node_info and push back to agent_info_
@@ -105,12 +105,13 @@ class AgentInfo {
class PoolInfo {
public:
PoolInfo(hsa_amd_memory_pool_t pool, int index,
hsa_amd_segment_t segment, bool is_fine_graind, size_t size,
AgentInfo *agent_info) {
hsa_amd_segment_t segment, bool is_fine_grained, size_t size,
size_t max_alloc_size, AgentInfo *agent_info) {
pool_ = pool;
index_ = index;
segment_ = segment;
is_fine_grained_ = is_fine_graind;
is_fine_grained_ = is_fine_grained;
size_ = size;
allocable_size_ = size;
owner_agent_info_ = agent_info;
}
@@ -121,6 +122,7 @@ class PoolInfo {
int index_;
hsa_amd_segment_t segment_;
bool is_fine_grained_;
size_t size_;
size_t allocable_size_;
private:
AgentInfo *owner_agent_info_;
@@ -269,7 +269,7 @@ void MemoryConcurrentTest::MemoryConcurrentAllocate(hsa_agent_t agent,
if (alloc) {
size_t alloc_size;
err = hsa_amd_memory_pool_get_info(pool, HSA_AMD_MEMORY_POOL_INFO_SIZE,
err = hsa_amd_memory_pool_get_info(pool, HSA_AMD_MEMORY_POOL_INFO_ALLOC_MAX_SIZE,
&alloc_size);
// Adjust the size to the minimum of 1024 or max alloc size
alloc_size = (alloc_size < kMaxAllocSize) ? alloc_size: kMaxAllocSize;
@@ -351,7 +351,7 @@ void MemoryConcurrentTest::MemoryConcurrentFree(hsa_agent_t agent,
if (alloc) {
// Get the maximum allocation size
size_t alloc_size;
err = hsa_amd_memory_pool_get_info(pool, HSA_AMD_MEMORY_POOL_INFO_SIZE,
err = hsa_amd_memory_pool_get_info(pool, HSA_AMD_MEMORY_POOL_INFO_ALLOC_MAX_SIZE,
&alloc_size);
ASSERT_EQ(err, HSA_STATUS_SUCCESS);