Update memory allocation guide in using pool apis
This is to allow allocations in system memory that exceed sizes
reported by a CPU device
Change-Id: I3d10d192aafcefbe4107f69b7c5e30bf7f836619
[ROCm/ROCR-Runtime commit: 3201f68f72]
Этот коммит содержится в:
@@ -341,7 +341,7 @@ static hsa_status_t DumpSegment(const pool_info_t *pool_i,
|
||||
std::string const *ind_lvl) {
|
||||
hsa_status_t err;
|
||||
|
||||
fprintf(stdout, "%s%-25s", ind_lvl->c_str(), "Pool Segment:");
|
||||
fprintf(stdout, "%s%-28s", ind_lvl->c_str(), "Pool Segment:");
|
||||
std::string seg_str = "";
|
||||
std::string tmp_str;
|
||||
|
||||
@@ -412,6 +412,11 @@ hsa_status_t AcquirePoolInfo(hsa_amd_memory_pool_t pool,
|
||||
&pool_i->accessible_by_all);
|
||||
RET_IF_HSA_COMMON_ERR(err);
|
||||
|
||||
err = hsa_amd_memory_pool_get_info(pool,
|
||||
HSA_AMD_MEMORY_POOL_INFO_ALLOC_MAX_SIZE,
|
||||
&pool_i->aggregate_alloc_max);
|
||||
RET_IF_HSA_COMMON_ERR(err);
|
||||
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
@@ -422,25 +427,30 @@ hsa_status_t DumpMemoryPoolInfo(const pool_info_t *pool_i,
|
||||
DumpSegment(pool_i, &ind_lvl);
|
||||
|
||||
std::string sz_str = std::to_string(pool_i->size / 1024) + "KB";
|
||||
fprintf(stdout, "%s%-25s%-35s\n", ind_lvl.c_str(), "Pool Size:",
|
||||
fprintf(stdout, "%s%-28s%-36s\n", ind_lvl.c_str(), "Pool Size:",
|
||||
sz_str.c_str());
|
||||
|
||||
fprintf(stdout, "%s%-25s%-35s\n", ind_lvl.c_str(), "Pool Allocatable:",
|
||||
fprintf(stdout, "%s%-28s%-36s\n", ind_lvl.c_str(), "Pool Allocatable:",
|
||||
(pool_i->alloc_allowed ? "TRUE" : "FALSE"));
|
||||
|
||||
std::string gr_str = std::to_string(pool_i->alloc_granule / 1024) + "KB";
|
||||
fprintf(stdout, "%s%-25s%-35s\n", ind_lvl.c_str(), "Pool Alloc Granule:",
|
||||
fprintf(stdout, "%s%-28s%-36s\n", ind_lvl.c_str(), "Pool Alloc Granule:",
|
||||
gr_str.c_str());
|
||||
|
||||
|
||||
std::string al_str =
|
||||
std::to_string(pool_i->alloc_alignment / 1024) + "KB";
|
||||
fprintf(stdout, "%s%-25s%-35s\n", ind_lvl.c_str(), "Pool Alloc Alignment:",
|
||||
fprintf(stdout, "%s%-28s%-36s\n", ind_lvl.c_str(), "Pool Alloc Alignment:",
|
||||
al_str.c_str());
|
||||
|
||||
fprintf(stdout, "%s%-25s%-35s\n", ind_lvl.c_str(), "Pool Acessible by all:",
|
||||
fprintf(stdout, "%s%-28s%-36s\n", ind_lvl.c_str(), "Pool Acessible by all:",
|
||||
(pool_i->accessible_by_all ? "TRUE" : "FALSE"));
|
||||
|
||||
std::string agg_str =
|
||||
std::to_string(pool_i->aggregate_alloc_max / 1024) + "KB";
|
||||
fprintf(stdout, "%s%-28s%-36s\n", ind_lvl.c_str(), "Pool Aggregate Alloc Size:",
|
||||
agg_str.c_str());
|
||||
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
|
||||
@@ -88,12 +88,14 @@ typedef struct pool_info_t_ {
|
||||
size_t alloc_alignment;
|
||||
bool accessible_by_all;
|
||||
uint32_t global_flag;
|
||||
uint64_t aggregate_alloc_max;
|
||||
inline bool operator==(const pool_info_t_ &a) {
|
||||
if (a.segment == segment && a.size == size
|
||||
&& a.alloc_allowed == alloc_allowed
|
||||
&& a.alloc_granule == alloc_granule
|
||||
&& a.alloc_alignment == alloc_alignment
|
||||
&& a.accessible_by_all == accessible_by_all
|
||||
&& a.aggregate_alloc_max == aggregate_alloc_max
|
||||
&& a.global_flag == global_flag )
|
||||
return true;
|
||||
else
|
||||
|
||||
@@ -197,7 +197,7 @@ void MemoryTest::MaxSingleAllocationTest(hsa_agent_t ag,
|
||||
}
|
||||
// Do everything in "granule" units
|
||||
auto gran_sz = pool_i.alloc_granule;
|
||||
auto pool_sz = pool_i.size / gran_sz;
|
||||
auto pool_sz = pool_i.aggregate_alloc_max / gran_sz;
|
||||
|
||||
// Neg. test: Try to allocate more than the pool size
|
||||
err = TestAllocate(pool, pool_sz*gran_sz + gran_sz);
|
||||
|
||||
+11
-10
@@ -179,7 +179,7 @@ static void PrintAgentNameAndType(hsa_agent_t agent) {
|
||||
static const int kMemoryAllocSize = 1024;
|
||||
|
||||
// This test verify that hsa_memory_allocate can't allocate
|
||||
// memory more than POOL_INFO_SIZE
|
||||
// memory more than HSA_AMD_MEMORY_POOL_INFO_ALLOC_MAX_SIZE
|
||||
void MemoryAllocateNegativeTest::MaxMemoryAllocateTest(hsa_agent_t agent,
|
||||
hsa_amd_memory_pool_t pool) {
|
||||
hsa_status_t err;
|
||||
@@ -193,19 +193,20 @@ void MemoryAllocateNegativeTest::MaxMemoryAllocateTest(hsa_agent_t agent,
|
||||
}
|
||||
|
||||
// Determine if allocation is allowed in this pool
|
||||
bool alloc = false;
|
||||
err = hsa_amd_memory_pool_get_info(pool,
|
||||
HSA_AMD_MEMORY_POOL_INFO_RUNTIME_ALLOC_ALLOWED, &alloc);
|
||||
if (!pool_i.alloc_allowed || pool_i.alloc_granule == 0) {
|
||||
if (verbosity() > 0) {
|
||||
std::cout << " Test not applicable. Skipping." << std::endl;
|
||||
std::cout << kSubTestSeparator << std::endl;
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
if (alloc) {
|
||||
size_t max_size;
|
||||
err = hsa_amd_memory_pool_get_info(pool, HSA_AMD_MEMORY_POOL_INFO_SIZE,
|
||||
&max_size);
|
||||
char *memoryPtr;
|
||||
err = hsa_amd_memory_pool_allocate(pool, (max_size + 16), 0,
|
||||
auto gran_sz = pool_i.alloc_granule;
|
||||
size_t max_size = pool_i.aggregate_alloc_max;
|
||||
err = hsa_amd_memory_pool_allocate(pool, (max_size + gran_sz), 0,
|
||||
reinterpret_cast<void**>(&memoryPtr));
|
||||
ASSERT_EQ(err, HSA_STATUS_ERROR_INVALID_ALLOCATION);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
@@ -175,6 +175,9 @@ class MemoryRegion : public core::MemoryRegion {
|
||||
|
||||
size_t max_single_alloc_size_;
|
||||
|
||||
// Used to collect total system memory
|
||||
static size_t max_sysmem_alloc_size_;
|
||||
|
||||
HSAuint64 virtual_size_;
|
||||
|
||||
mutable KernelMutex access_lock_;
|
||||
|
||||
+38
-5
@@ -52,6 +52,10 @@
|
||||
#include "core/inc/exceptions.h"
|
||||
|
||||
namespace amd {
|
||||
|
||||
// Tracks aggregate size of system memory available on platform
|
||||
size_t MemoryRegion::max_sysmem_alloc_size_ = 0;
|
||||
|
||||
void* MemoryRegion::AllocateKfdMemory(const HsaMemFlags& flag,
|
||||
HSAuint32 node_id, size_t size) {
|
||||
void* ret = NULL;
|
||||
@@ -119,7 +123,7 @@ MemoryRegion::MemoryRegion(bool fine_grain, bool full_profile, core::Agent* owne
|
||||
virtual_size_ = kGpuVmSize;
|
||||
} else if (IsSystem()) {
|
||||
mem_flag_.ui32.PageSize = HSA_PAGE_SIZE_4KB;
|
||||
mem_flag_.ui32.NoSubstitute = 1;
|
||||
mem_flag_.ui32.NoSubstitute = 0;
|
||||
mem_flag_.ui32.HostAccess = 1;
|
||||
mem_flag_.ui32.CachePolicy = HSA_CACHING_CACHED;
|
||||
|
||||
@@ -127,9 +131,20 @@ MemoryRegion::MemoryRegion(bool fine_grain, bool full_profile, core::Agent* owne
|
||||
(full_profile) ? os::GetUserModeVirtualMemorySize() : kGpuVmSize;
|
||||
}
|
||||
|
||||
// Bind if memory region is coarse or fine grain
|
||||
mem_flag_.ui32.CoarseGrain = (fine_grain) ? 0 : 1;
|
||||
|
||||
// Adjust allocatable size per page align
|
||||
max_single_alloc_size_ = AlignDown(static_cast<size_t>(GetPhysicalSize()), kPageSize_);
|
||||
|
||||
mem_flag_.ui32.CoarseGrain = (fine_grain) ? 0 : 1;
|
||||
// Keep track of total system memory available
|
||||
// @note: System memory is surfaced as both coarse
|
||||
// and fine grain memory regions. To track total system
|
||||
// memory only fine grain is considered as it avoids
|
||||
// double counting
|
||||
if (IsSystem() && (fine_grain)) {
|
||||
max_sysmem_alloc_size_ += max_single_alloc_size_;
|
||||
}
|
||||
|
||||
assert(GetVirtualSize() != 0);
|
||||
assert(GetPhysicalSize() <= GetVirtualSize());
|
||||
@@ -147,7 +162,10 @@ hsa_status_t MemoryRegion::Allocate(size_t& size, AllocateFlags alloc_flags, voi
|
||||
return HSA_STATUS_ERROR_INVALID_ALLOCATION;
|
||||
}
|
||||
|
||||
if (size > max_single_alloc_size_) {
|
||||
// Alocation requests for system memory considers aggregate
|
||||
// memory available on all CPU devices
|
||||
if (size > ((IsSystem() ?
|
||||
max_sysmem_alloc_size_ : max_single_alloc_size_))) {
|
||||
return HSA_STATUS_ERROR_INVALID_ALLOCATION;
|
||||
}
|
||||
|
||||
@@ -285,9 +303,11 @@ hsa_status_t MemoryRegion::GetInfo(hsa_region_info_t attribute,
|
||||
break;
|
||||
case HSA_REGION_INFO_ALLOC_MAX_SIZE:
|
||||
switch (mem_props_.HeapType) {
|
||||
case HSA_HEAPTYPE_SYSTEM:
|
||||
*((size_t*)value) = max_sysmem_alloc_size_;
|
||||
break;
|
||||
case HSA_HEAPTYPE_FRAME_BUFFER_PRIVATE:
|
||||
case HSA_HEAPTYPE_FRAME_BUFFER_PUBLIC:
|
||||
case HSA_HEAPTYPE_SYSTEM:
|
||||
case HSA_HEAPTYPE_GPU_SCRATCH:
|
||||
*((size_t*)value) = max_single_alloc_size_;
|
||||
break;
|
||||
@@ -365,10 +385,23 @@ hsa_status_t MemoryRegion::GetPoolInfo(hsa_amd_memory_pool_info_t attribute,
|
||||
case HSA_AMD_MEMORY_POOL_INFO_RUNTIME_ALLOC_GRANULE:
|
||||
case HSA_AMD_MEMORY_POOL_INFO_RUNTIME_ALLOC_ALIGNMENT:
|
||||
return GetInfo(static_cast<hsa_region_info_t>(attribute), value);
|
||||
break;
|
||||
case HSA_AMD_MEMORY_POOL_INFO_ACCESSIBLE_BY_ALL:
|
||||
*((bool*)value) = IsSystem() ? true : false;
|
||||
break;
|
||||
case HSA_AMD_MEMORY_POOL_INFO_ALLOC_MAX_SIZE:
|
||||
switch (mem_props_.HeapType) {
|
||||
case HSA_HEAPTYPE_FRAME_BUFFER_PRIVATE:
|
||||
case HSA_HEAPTYPE_FRAME_BUFFER_PUBLIC:
|
||||
case HSA_HEAPTYPE_GPU_SCRATCH:
|
||||
return GetInfo(HSA_REGION_INFO_ALLOC_MAX_SIZE, value);
|
||||
case HSA_HEAPTYPE_SYSTEM:
|
||||
// Aggregate size available for allocation
|
||||
*((size_t*)value) = max_sysmem_alloc_size_;
|
||||
break;
|
||||
default:
|
||||
*((size_t*)value) = 0;
|
||||
}
|
||||
break;
|
||||
default:
|
||||
return HSA_STATUS_ERROR_INVALID_ARGUMENT;
|
||||
}
|
||||
|
||||
@@ -648,8 +648,26 @@ typedef enum {
|
||||
} hsa_amd_segment_t;
|
||||
|
||||
/**
|
||||
* @brief A memory pool represents physical storage on an agent.
|
||||
*/
|
||||
* @brief A memory pool encapsulates physical storage on an agent
|
||||
* along with a memory access model.
|
||||
*
|
||||
* @details A memory pool encapsulates a physical partition of an agent's
|
||||
* memory system along with a memory access model. Division of a single
|
||||
* memory system into separate pools allows querying each partition's access
|
||||
* path properties (see ::hsa_amd_agent_memory_pool_get_info). Allocations
|
||||
* from a pool are preferentially bound to that pool's physical partition.
|
||||
* Binding to the pool's preferential physical partition may not be
|
||||
* possible or persistent depending on the system's memory policy
|
||||
* and/or state which is beyond the scope of HSA APIs.
|
||||
*
|
||||
* For example, a multi-node NUMA memory system may be represented by multiple
|
||||
* pool's with each pool providing size and access path information for the
|
||||
* partition it represents. Allocations from a pool are preferentially bound
|
||||
* to the pool's partition (which in this example is a NUMA node) while
|
||||
* following its memory access model. The actual placement may vary or migrate
|
||||
* due to the system's NUMA policy and state, which is beyond the scope of
|
||||
* HSA APIs.
|
||||
*/
|
||||
typedef struct hsa_amd_memory_pool_s {
|
||||
/**
|
||||
* Opaque handle.
|
||||
@@ -729,6 +747,11 @@ typedef enum {
|
||||
* attribute is bool.
|
||||
*/
|
||||
HSA_AMD_MEMORY_POOL_INFO_ACCESSIBLE_BY_ALL = 15,
|
||||
/**
|
||||
* Maximum aggregate allocation size in bytes. The type of this attribute
|
||||
* is size_t.
|
||||
*/
|
||||
HSA_AMD_MEMORY_POOL_INFO_ALLOC_MAX_SIZE = 16,
|
||||
} hsa_amd_memory_pool_info_t;
|
||||
|
||||
/**
|
||||
@@ -817,8 +840,8 @@ hsa_status_t HSA_API hsa_amd_agent_iterate_memory_pools(
|
||||
* @retval ::HSA_STATUS_ERROR_INVALID_MEMORY_POOL The memory pool is invalid.
|
||||
*
|
||||
* @retval ::HSA_STATUS_ERROR_INVALID_ALLOCATION The host is not allowed to
|
||||
* allocate memory in @p memory_pool, or @p size is greater than the value of
|
||||
* HSA_AMD_MEMORY_POOL_INFO_ALLOC_MAX_SIZE in @p memory_pool.
|
||||
* allocate memory in @p memory_pool, or @p size is greater than
|
||||
* the value of HSA_AMD_MEMORY_POOL_INFO_ALLOC_MAX_SIZE in @p memory_pool.
|
||||
*
|
||||
* @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p ptr is NULL, or @p size is 0,
|
||||
* or flags is not 0.
|
||||
|
||||
Ссылка в новой задаче
Block a user