Correct rocrtst numa awareness.
Pool size was being used where alloc_max_size should be.
Changes are necessary on NUMA systems where not all nodes have
installed memory.
Change-Id: If8f507cae50a8dfeae8572d4e39df757abe28599
[ROCm/ROCR-Runtime commit: a9470e3563]
This commit is contained in:
@@ -31,8 +31,8 @@ typedef struct pool_info {
|
||||
|
||||
pool_info(hsa_agent_t agent, uint32_t agent_index,
|
||||
hsa_amd_memory_pool_t pool, hsa_amd_segment_t segment,
|
||||
size_t size, uint32_t index, bool is_fine_grained,
|
||||
bool is_kernarg, bool access_to_all,
|
||||
size_t size, size_t alloc_max_size, uint32_t index,
|
||||
bool is_fine_grained, bool is_kernarg, bool access_to_all,
|
||||
hsa_amd_memory_pool_access_t owner_access) {
|
||||
|
||||
pool_ = pool;
|
||||
@@ -40,7 +40,8 @@ typedef struct pool_info {
|
||||
segment_ = segment;
|
||||
owner_agent_ = agent;
|
||||
agent_index_ = agent_index;
|
||||
allocable_size_ = size;
|
||||
size_ = size;
|
||||
allocable_size_ = alloc_max_size;
|
||||
is_kernarg_ = is_kernarg;
|
||||
owner_access_ = owner_access;
|
||||
access_to_all_ = access_to_all;
|
||||
@@ -53,6 +54,7 @@ typedef struct pool_info {
|
||||
bool is_kernarg_;
|
||||
bool access_to_all_;
|
||||
bool is_fine_grained_;
|
||||
size_t size_;
|
||||
size_t allocable_size_;
|
||||
uint32_t agent_index_;
|
||||
hsa_agent_t owner_agent_;
|
||||
|
||||
@@ -1,114 +1,120 @@
|
||||
#include "common.hpp"
|
||||
#include "rocm_async.hpp"
|
||||
|
||||
// @brief: Helper method to iterate throught the memory pools of
|
||||
// an agent and discover its properties
|
||||
hsa_status_t MemPoolInfo(hsa_amd_memory_pool_t pool, void* data) {
|
||||
|
||||
hsa_status_t status;
|
||||
RocmAsync* asyncDrvr = reinterpret_cast<RocmAsync*>(data);
|
||||
|
||||
// Query pools' segment, report only pools from global segment
|
||||
hsa_amd_segment_t segment;
|
||||
status = hsa_amd_memory_pool_get_info(pool,
|
||||
HSA_AMD_MEMORY_POOL_INFO_SEGMENT, &segment);
|
||||
ErrorCheck(status);
|
||||
if (HSA_AMD_SEGMENT_GLOBAL != segment) {
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
// Determine if allocation is allowed in this pool
|
||||
// Report only pools that allow an alloction by user
|
||||
bool alloc = false;
|
||||
status = hsa_amd_memory_pool_get_info(pool,
|
||||
HSA_AMD_MEMORY_POOL_INFO_RUNTIME_ALLOC_ALLOWED, &alloc);
|
||||
ErrorCheck(status);
|
||||
if (alloc != true) {
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
// Query the max allocatable size
|
||||
size_t max_size = 0;
|
||||
status = hsa_amd_memory_pool_get_info(pool,
|
||||
HSA_AMD_MEMORY_POOL_INFO_SIZE, &max_size);
|
||||
ErrorCheck(status);
|
||||
|
||||
// Determine if the pools is accessible to all agents
|
||||
bool access_to_all = false;
|
||||
status = hsa_amd_memory_pool_get_info(pool,
|
||||
HSA_AMD_MEMORY_POOL_INFO_ACCESSIBLE_BY_ALL, &access_to_all);
|
||||
ErrorCheck(status);
|
||||
|
||||
// Determine type of access to owner agent
|
||||
hsa_amd_memory_pool_access_t owner_access;
|
||||
hsa_agent_t agent = asyncDrvr->agent_list_.back().agent_;
|
||||
status = hsa_amd_agent_memory_pool_get_info(agent, pool,
|
||||
HSA_AMD_AGENT_MEMORY_POOL_INFO_ACCESS, &owner_access);
|
||||
ErrorCheck(status);
|
||||
|
||||
// Determine if the pool is fine-grained or coarse-grained
|
||||
uint32_t flag = 0;
|
||||
status = hsa_amd_memory_pool_get_info(pool,
|
||||
HSA_AMD_MEMORY_POOL_INFO_GLOBAL_FLAGS, &flag);
|
||||
ErrorCheck(status);
|
||||
bool is_kernarg = (HSA_AMD_MEMORY_POOL_GLOBAL_FLAG_KERNARG_INIT & flag);
|
||||
bool is_fine_grained = (HSA_AMD_MEMORY_POOL_GLOBAL_FLAG_FINE_GRAINED & flag);
|
||||
|
||||
// Update the pool handle for system memory if kernarg is true
|
||||
if (is_kernarg) {
|
||||
asyncDrvr->sys_pool_ = pool;
|
||||
}
|
||||
|
||||
// Create an instance of agent_pool_info and add it to the list
|
||||
pool_info_t pool_info(agent, asyncDrvr->agent_index_, pool,
|
||||
segment, max_size, asyncDrvr->pool_index_,
|
||||
is_fine_grained, is_kernarg,
|
||||
access_to_all, owner_access);
|
||||
asyncDrvr->pool_list_.push_back(pool_info);
|
||||
|
||||
// Create an agent_pool_infot and add it to its list
|
||||
asyncDrvr->agent_pool_list_[asyncDrvr->agent_index_].pool_list.push_back(pool_info);
|
||||
asyncDrvr->pool_index_++;
|
||||
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
// @brief: Helper method to iterate throught the agents of
|
||||
// a system and discover its properties
|
||||
hsa_status_t AgentInfo(hsa_agent_t agent, void* data) {
|
||||
|
||||
RocmAsync* asyncDrvr = reinterpret_cast<RocmAsync*>(data);
|
||||
|
||||
// Get the name of the agent
|
||||
char agent_name[64];
|
||||
hsa_status_t status;
|
||||
status = hsa_agent_get_info(agent, HSA_AGENT_INFO_NAME, agent_name);
|
||||
ErrorCheck(status);
|
||||
|
||||
// Get device type
|
||||
hsa_device_type_t device_type;
|
||||
status = hsa_agent_get_info(agent, HSA_AGENT_INFO_DEVICE, &device_type);
|
||||
ErrorCheck(status);
|
||||
|
||||
// Capture the handle of Cpu agent
|
||||
if (device_type == HSA_DEVICE_TYPE_CPU) {
|
||||
asyncDrvr->cpu_agent_ = agent;
|
||||
}
|
||||
|
||||
asyncDrvr->agent_list_.push_back(agent_info(agent, asyncDrvr->agent_index_, device_type));
|
||||
|
||||
// Contruct an new agent_pool_info structure and add it to the list
|
||||
agent_pool_info node;
|
||||
node.agent = asyncDrvr->agent_list_.back();
|
||||
asyncDrvr->agent_pool_list_.push_back(node);
|
||||
|
||||
status = hsa_amd_agent_iterate_memory_pools(agent, MemPoolInfo, asyncDrvr);
|
||||
asyncDrvr->agent_index_++;
|
||||
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
void RocmAsync::DiscoverTopology() {
|
||||
err_ = hsa_iterate_agents(AgentInfo, this);
|
||||
}
|
||||
|
||||
#include "common.hpp"
|
||||
#include "rocm_async.hpp"
|
||||
|
||||
// @brief: Helper method to iterate throught the memory pools of
|
||||
// an agent and discover its properties
|
||||
hsa_status_t MemPoolInfo(hsa_amd_memory_pool_t pool, void* data) {
|
||||
|
||||
hsa_status_t status;
|
||||
RocmAsync* asyncDrvr = reinterpret_cast<RocmAsync*>(data);
|
||||
|
||||
// Query pools' segment, report only pools from global segment
|
||||
hsa_amd_segment_t segment;
|
||||
status = hsa_amd_memory_pool_get_info(pool,
|
||||
HSA_AMD_MEMORY_POOL_INFO_SEGMENT, &segment);
|
||||
ErrorCheck(status);
|
||||
if (HSA_AMD_SEGMENT_GLOBAL != segment) {
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
// Determine if allocation is allowed in this pool
|
||||
// Report only pools that allow an alloction by user
|
||||
bool alloc = false;
|
||||
status = hsa_amd_memory_pool_get_info(pool,
|
||||
HSA_AMD_MEMORY_POOL_INFO_RUNTIME_ALLOC_ALLOWED, &alloc);
|
||||
ErrorCheck(status);
|
||||
if (alloc != true) {
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
// Query the pool size
|
||||
size_t size = 0;
|
||||
status = hsa_amd_memory_pool_get_info(pool,
|
||||
HSA_AMD_MEMORY_POOL_INFO_SIZE, &size);
|
||||
ErrorCheck(status);
|
||||
|
||||
// Query the max allocatable size
|
||||
size_t max_size = 0;
|
||||
status = hsa_amd_memory_pool_get_info(pool,
|
||||
HSA_AMD_MEMORY_POOL_INFO_ALLOC_MAX_SIZE, &max_size);
|
||||
ErrorCheck(status);
|
||||
|
||||
// Determine if the pools is accessible to all agents
|
||||
bool access_to_all = false;
|
||||
status = hsa_amd_memory_pool_get_info(pool,
|
||||
HSA_AMD_MEMORY_POOL_INFO_ACCESSIBLE_BY_ALL, &access_to_all);
|
||||
ErrorCheck(status);
|
||||
|
||||
// Determine type of access to owner agent
|
||||
hsa_amd_memory_pool_access_t owner_access;
|
||||
hsa_agent_t agent = asyncDrvr->agent_list_.back().agent_;
|
||||
status = hsa_amd_agent_memory_pool_get_info(agent, pool,
|
||||
HSA_AMD_AGENT_MEMORY_POOL_INFO_ACCESS, &owner_access);
|
||||
ErrorCheck(status);
|
||||
|
||||
// Determine if the pool is fine-grained or coarse-grained
|
||||
uint32_t flag = 0;
|
||||
status = hsa_amd_memory_pool_get_info(pool,
|
||||
HSA_AMD_MEMORY_POOL_INFO_GLOBAL_FLAGS, &flag);
|
||||
ErrorCheck(status);
|
||||
bool is_kernarg = (HSA_AMD_MEMORY_POOL_GLOBAL_FLAG_KERNARG_INIT & flag);
|
||||
bool is_fine_grained = (HSA_AMD_MEMORY_POOL_GLOBAL_FLAG_FINE_GRAINED & flag);
|
||||
|
||||
// Update the pool handle for system memory if kernarg is true
|
||||
if (is_kernarg) {
|
||||
asyncDrvr->sys_pool_ = pool;
|
||||
}
|
||||
|
||||
// Create an instance of agent_pool_info and add it to the list
|
||||
pool_info_t pool_info(agent, asyncDrvr->agent_index_, pool,
|
||||
segment, size, max_size, asyncDrvr->pool_index_,
|
||||
is_fine_grained, is_kernarg,
|
||||
access_to_all, owner_access);
|
||||
asyncDrvr->pool_list_.push_back(pool_info);
|
||||
|
||||
// Create an agent_pool_infot and add it to its list
|
||||
asyncDrvr->agent_pool_list_[asyncDrvr->agent_index_].pool_list.push_back(pool_info);
|
||||
asyncDrvr->pool_index_++;
|
||||
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
// @brief: Helper method to iterate throught the agents of
|
||||
// a system and discover its properties
|
||||
hsa_status_t AgentInfo(hsa_agent_t agent, void* data) {
|
||||
|
||||
RocmAsync* asyncDrvr = reinterpret_cast<RocmAsync*>(data);
|
||||
|
||||
// Get the name of the agent
|
||||
char agent_name[64];
|
||||
hsa_status_t status;
|
||||
status = hsa_agent_get_info(agent, HSA_AGENT_INFO_NAME, agent_name);
|
||||
ErrorCheck(status);
|
||||
|
||||
// Get device type
|
||||
hsa_device_type_t device_type;
|
||||
status = hsa_agent_get_info(agent, HSA_AGENT_INFO_DEVICE, &device_type);
|
||||
ErrorCheck(status);
|
||||
|
||||
// Capture the handle of Cpu agent
|
||||
if (device_type == HSA_DEVICE_TYPE_CPU) {
|
||||
asyncDrvr->cpu_agent_ = agent;
|
||||
}
|
||||
|
||||
asyncDrvr->agent_list_.push_back(agent_info(agent, asyncDrvr->agent_index_, device_type));
|
||||
|
||||
// Contruct an new agent_pool_info structure and add it to the list
|
||||
agent_pool_info node;
|
||||
node.agent = asyncDrvr->agent_list_.back();
|
||||
asyncDrvr->agent_pool_list_.push_back(node);
|
||||
|
||||
status = hsa_amd_agent_iterate_memory_pools(agent, MemPoolInfo, asyncDrvr);
|
||||
asyncDrvr->agent_index_++;
|
||||
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
void RocmAsync::DiscoverTopology() {
|
||||
err_ = hsa_iterate_agents(AgentInfo, this);
|
||||
}
|
||||
|
||||
|
||||
@@ -443,7 +443,7 @@ void MemoryAllocationTest::MemoryBasicAllocationAndFree(hsa_agent_t agent,
|
||||
PrintSegmentNameAndType(pool_i.segment);
|
||||
}
|
||||
size_t max_size;
|
||||
err = hsa_amd_memory_pool_get_info(pool, HSA_AMD_MEMORY_POOL_INFO_SIZE,
|
||||
err = hsa_amd_memory_pool_get_info(pool, HSA_AMD_MEMORY_POOL_INFO_ALLOC_MAX_SIZE,
|
||||
&max_size);
|
||||
ASSERT_EQ(err, HSA_STATUS_SUCCESS);
|
||||
max_size = (max_size > kMemoryAllocSize) ? kMemoryAllocSize : max_size;
|
||||
|
||||
@@ -535,9 +535,15 @@ static hsa_status_t GetPoolInfo(hsa_amd_memory_pool_t pool, void* data) {
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
// Query the pool size
|
||||
size_t size = 0;
|
||||
err = hsa_amd_memory_pool_get_info(pool, HSA_AMD_MEMORY_POOL_INFO_SIZE,
|
||||
&size);
|
||||
RET_IF_HSA_ERR(err);
|
||||
|
||||
// Query the max allocable size
|
||||
size_t alloc_max_size = 0;
|
||||
err = hsa_amd_memory_pool_get_info(pool, HSA_AMD_MEMORY_POOL_INFO_SIZE,
|
||||
err = hsa_amd_memory_pool_get_info(pool, HSA_AMD_MEMORY_POOL_INFO_ALLOC_MAX_SIZE,
|
||||
&alloc_max_size);
|
||||
RET_IF_HSA_ERR(err);
|
||||
|
||||
@@ -553,7 +559,7 @@ static hsa_status_t GetPoolInfo(hsa_amd_memory_pool_t pool, void* data) {
|
||||
int pool_i = ptr->pool_index();
|
||||
int ag_ind = ptr->agent_index();
|
||||
ptr->pool_info()->push_back(
|
||||
new PoolInfo(pool, pool_i, region_segment, is_fine_grained,
|
||||
new PoolInfo(pool, pool_i, region_segment, is_fine_grained, size,
|
||||
alloc_max_size, ptr->agent_info()->back()));
|
||||
|
||||
// Construct node_info and push back to agent_info_
|
||||
|
||||
@@ -105,12 +105,13 @@ class AgentInfo {
|
||||
class PoolInfo {
|
||||
public:
|
||||
PoolInfo(hsa_amd_memory_pool_t pool, int index,
|
||||
hsa_amd_segment_t segment, bool is_fine_graind, size_t size,
|
||||
AgentInfo *agent_info) {
|
||||
hsa_amd_segment_t segment, bool is_fine_grained, size_t size,
|
||||
size_t max_alloc_size, AgentInfo *agent_info) {
|
||||
pool_ = pool;
|
||||
index_ = index;
|
||||
segment_ = segment;
|
||||
is_fine_grained_ = is_fine_graind;
|
||||
is_fine_grained_ = is_fine_grained;
|
||||
size_ = size;
|
||||
allocable_size_ = size;
|
||||
owner_agent_info_ = agent_info;
|
||||
}
|
||||
@@ -121,6 +122,7 @@ class PoolInfo {
|
||||
int index_;
|
||||
hsa_amd_segment_t segment_;
|
||||
bool is_fine_grained_;
|
||||
size_t size_;
|
||||
size_t allocable_size_;
|
||||
private:
|
||||
AgentInfo *owner_agent_info_;
|
||||
|
||||
@@ -269,7 +269,7 @@ void MemoryConcurrentTest::MemoryConcurrentAllocate(hsa_agent_t agent,
|
||||
|
||||
if (alloc) {
|
||||
size_t alloc_size;
|
||||
err = hsa_amd_memory_pool_get_info(pool, HSA_AMD_MEMORY_POOL_INFO_SIZE,
|
||||
err = hsa_amd_memory_pool_get_info(pool, HSA_AMD_MEMORY_POOL_INFO_ALLOC_MAX_SIZE,
|
||||
&alloc_size);
|
||||
// Adjust the size to the minimum of 1024 or max alloc size
|
||||
alloc_size = (alloc_size < kMaxAllocSize) ? alloc_size: kMaxAllocSize;
|
||||
@@ -351,7 +351,7 @@ void MemoryConcurrentTest::MemoryConcurrentFree(hsa_agent_t agent,
|
||||
if (alloc) {
|
||||
// Get the maximum allocation size
|
||||
size_t alloc_size;
|
||||
err = hsa_amd_memory_pool_get_info(pool, HSA_AMD_MEMORY_POOL_INFO_SIZE,
|
||||
err = hsa_amd_memory_pool_get_info(pool, HSA_AMD_MEMORY_POOL_INFO_ALLOC_MAX_SIZE,
|
||||
&alloc_size);
|
||||
ASSERT_EQ(err, HSA_STATUS_SUCCESS);
|
||||
|
||||
|
||||
مرجع در شماره جدید
Block a user