Update memory allocation guide in using pool apis

This is to allow allocations in system memory that exceed sizes
reported by a CPU device

Change-Id: I3d10d192aafcefbe4107f69b7c5e30bf7f836619


[ROCm/ROCR-Runtime commit: 3201f68f72]
Этот коммит содержится в:
Ramesh Errabolu
2019-06-05 11:49:44 -05:00
родитель 2b9e13a56c
Коммит 61b9d4e8b2
7 изменённых файлов: 98 добавлений и 26 удалений
+16 -6
Просмотреть файл
@@ -341,7 +341,7 @@ static hsa_status_t DumpSegment(const pool_info_t *pool_i,
std::string const *ind_lvl) {
hsa_status_t err;
fprintf(stdout, "%s%-25s", ind_lvl->c_str(), "Pool Segment:");
fprintf(stdout, "%s%-28s", ind_lvl->c_str(), "Pool Segment:");
std::string seg_str = "";
std::string tmp_str;
@@ -412,6 +412,11 @@ hsa_status_t AcquirePoolInfo(hsa_amd_memory_pool_t pool,
&pool_i->accessible_by_all);
RET_IF_HSA_COMMON_ERR(err);
err = hsa_amd_memory_pool_get_info(pool,
HSA_AMD_MEMORY_POOL_INFO_ALLOC_MAX_SIZE,
&pool_i->aggregate_alloc_max);
RET_IF_HSA_COMMON_ERR(err);
return HSA_STATUS_SUCCESS;
}
@@ -422,25 +427,30 @@ hsa_status_t DumpMemoryPoolInfo(const pool_info_t *pool_i,
DumpSegment(pool_i, &ind_lvl);
std::string sz_str = std::to_string(pool_i->size / 1024) + "KB";
fprintf(stdout, "%s%-25s%-35s\n", ind_lvl.c_str(), "Pool Size:",
fprintf(stdout, "%s%-28s%-36s\n", ind_lvl.c_str(), "Pool Size:",
sz_str.c_str());
fprintf(stdout, "%s%-25s%-35s\n", ind_lvl.c_str(), "Pool Allocatable:",
fprintf(stdout, "%s%-28s%-36s\n", ind_lvl.c_str(), "Pool Allocatable:",
(pool_i->alloc_allowed ? "TRUE" : "FALSE"));
std::string gr_str = std::to_string(pool_i->alloc_granule / 1024) + "KB";
fprintf(stdout, "%s%-25s%-35s\n", ind_lvl.c_str(), "Pool Alloc Granule:",
fprintf(stdout, "%s%-28s%-36s\n", ind_lvl.c_str(), "Pool Alloc Granule:",
gr_str.c_str());
std::string al_str =
std::to_string(pool_i->alloc_alignment / 1024) + "KB";
fprintf(stdout, "%s%-25s%-35s\n", ind_lvl.c_str(), "Pool Alloc Alignment:",
fprintf(stdout, "%s%-28s%-36s\n", ind_lvl.c_str(), "Pool Alloc Alignment:",
al_str.c_str());
fprintf(stdout, "%s%-25s%-35s\n", ind_lvl.c_str(), "Pool Acessible by all:",
fprintf(stdout, "%s%-28s%-36s\n", ind_lvl.c_str(), "Pool Acessible by all:",
(pool_i->accessible_by_all ? "TRUE" : "FALSE"));
std::string agg_str =
std::to_string(pool_i->aggregate_alloc_max / 1024) + "KB";
fprintf(stdout, "%s%-28s%-36s\n", ind_lvl.c_str(), "Pool Aggregate Alloc Size:",
agg_str.c_str());
return HSA_STATUS_SUCCESS;
}
+2
Просмотреть файл
@@ -88,12 +88,14 @@ typedef struct pool_info_t_ {
size_t alloc_alignment;
bool accessible_by_all;
uint32_t global_flag;
uint64_t aggregate_alloc_max;
inline bool operator==(const pool_info_t_ &a) {
if (a.segment == segment && a.size == size
&& a.alloc_allowed == alloc_allowed
&& a.alloc_granule == alloc_granule
&& a.alloc_alignment == alloc_alignment
&& a.accessible_by_all == accessible_by_all
&& a.aggregate_alloc_max == aggregate_alloc_max
&& a.global_flag == global_flag )
return true;
else
+1 -1
Просмотреть файл
@@ -197,7 +197,7 @@ void MemoryTest::MaxSingleAllocationTest(hsa_agent_t ag,
}
// Do everything in "granule" units
auto gran_sz = pool_i.alloc_granule;
auto pool_sz = pool_i.size / gran_sz;
auto pool_sz = pool_i.aggregate_alloc_max / gran_sz;
// Neg. test: Try to allocate more than the pool size
err = TestAllocate(pool, pool_sz*gran_sz + gran_sz);
+11 -10
Просмотреть файл
@@ -179,7 +179,7 @@ static void PrintAgentNameAndType(hsa_agent_t agent) {
static const int kMemoryAllocSize = 1024;
// This test verify that hsa_memory_allocate can't allocate
// memory more than POOL_INFO_SIZE
// memory more than HSA_AMD_MEMORY_POOL_INFO_ALLOC_MAX_SIZE
void MemoryAllocateNegativeTest::MaxMemoryAllocateTest(hsa_agent_t agent,
hsa_amd_memory_pool_t pool) {
hsa_status_t err;
@@ -193,19 +193,20 @@ void MemoryAllocateNegativeTest::MaxMemoryAllocateTest(hsa_agent_t agent,
}
// Determine if allocation is allowed in this pool
bool alloc = false;
err = hsa_amd_memory_pool_get_info(pool,
HSA_AMD_MEMORY_POOL_INFO_RUNTIME_ALLOC_ALLOWED, &alloc);
if (!pool_i.alloc_allowed || pool_i.alloc_granule == 0) {
if (verbosity() > 0) {
std::cout << " Test not applicable. Skipping." << std::endl;
std::cout << kSubTestSeparator << std::endl;
}
return;
}
if (alloc) {
size_t max_size;
err = hsa_amd_memory_pool_get_info(pool, HSA_AMD_MEMORY_POOL_INFO_SIZE,
&max_size);
char *memoryPtr;
err = hsa_amd_memory_pool_allocate(pool, (max_size + 16), 0,
auto gran_sz = pool_i.alloc_granule;
size_t max_size = pool_i.aggregate_alloc_max;
err = hsa_amd_memory_pool_allocate(pool, (max_size + gran_sz), 0,
reinterpret_cast<void**>(&memoryPtr));
ASSERT_EQ(err, HSA_STATUS_ERROR_INVALID_ALLOCATION);
}
return;
}
+3
Просмотреть файл
@@ -175,6 +175,9 @@ class MemoryRegion : public core::MemoryRegion {
size_t max_single_alloc_size_;
// Used to collect total system memory
static size_t max_sysmem_alloc_size_;
HSAuint64 virtual_size_;
mutable KernelMutex access_lock_;
+38 -5
Просмотреть файл
@@ -52,6 +52,10 @@
#include "core/inc/exceptions.h"
namespace amd {
// Tracks aggregate size of system memory available on platform
size_t MemoryRegion::max_sysmem_alloc_size_ = 0;
void* MemoryRegion::AllocateKfdMemory(const HsaMemFlags& flag,
HSAuint32 node_id, size_t size) {
void* ret = NULL;
@@ -119,7 +123,7 @@ MemoryRegion::MemoryRegion(bool fine_grain, bool full_profile, core::Agent* owne
virtual_size_ = kGpuVmSize;
} else if (IsSystem()) {
mem_flag_.ui32.PageSize = HSA_PAGE_SIZE_4KB;
mem_flag_.ui32.NoSubstitute = 1;
mem_flag_.ui32.NoSubstitute = 0;
mem_flag_.ui32.HostAccess = 1;
mem_flag_.ui32.CachePolicy = HSA_CACHING_CACHED;
@@ -127,9 +131,20 @@ MemoryRegion::MemoryRegion(bool fine_grain, bool full_profile, core::Agent* owne
(full_profile) ? os::GetUserModeVirtualMemorySize() : kGpuVmSize;
}
// Bind if memory region is coarse or fine grain
mem_flag_.ui32.CoarseGrain = (fine_grain) ? 0 : 1;
// Adjust allocatable size per page align
max_single_alloc_size_ = AlignDown(static_cast<size_t>(GetPhysicalSize()), kPageSize_);
mem_flag_.ui32.CoarseGrain = (fine_grain) ? 0 : 1;
// Keep track of total system memory available
// @note: System memory is surfaced as both coarse
// and fine grain memory regions. To track total system
// memory only fine grain is considered as it avoids
// double counting
if (IsSystem() && (fine_grain)) {
max_sysmem_alloc_size_ += max_single_alloc_size_;
}
assert(GetVirtualSize() != 0);
assert(GetPhysicalSize() <= GetVirtualSize());
@@ -147,7 +162,10 @@ hsa_status_t MemoryRegion::Allocate(size_t& size, AllocateFlags alloc_flags, voi
return HSA_STATUS_ERROR_INVALID_ALLOCATION;
}
if (size > max_single_alloc_size_) {
// Alocation requests for system memory considers aggregate
// memory available on all CPU devices
if (size > ((IsSystem() ?
max_sysmem_alloc_size_ : max_single_alloc_size_))) {
return HSA_STATUS_ERROR_INVALID_ALLOCATION;
}
@@ -285,9 +303,11 @@ hsa_status_t MemoryRegion::GetInfo(hsa_region_info_t attribute,
break;
case HSA_REGION_INFO_ALLOC_MAX_SIZE:
switch (mem_props_.HeapType) {
case HSA_HEAPTYPE_SYSTEM:
*((size_t*)value) = max_sysmem_alloc_size_;
break;
case HSA_HEAPTYPE_FRAME_BUFFER_PRIVATE:
case HSA_HEAPTYPE_FRAME_BUFFER_PUBLIC:
case HSA_HEAPTYPE_SYSTEM:
case HSA_HEAPTYPE_GPU_SCRATCH:
*((size_t*)value) = max_single_alloc_size_;
break;
@@ -365,10 +385,23 @@ hsa_status_t MemoryRegion::GetPoolInfo(hsa_amd_memory_pool_info_t attribute,
case HSA_AMD_MEMORY_POOL_INFO_RUNTIME_ALLOC_GRANULE:
case HSA_AMD_MEMORY_POOL_INFO_RUNTIME_ALLOC_ALIGNMENT:
return GetInfo(static_cast<hsa_region_info_t>(attribute), value);
break;
case HSA_AMD_MEMORY_POOL_INFO_ACCESSIBLE_BY_ALL:
*((bool*)value) = IsSystem() ? true : false;
break;
case HSA_AMD_MEMORY_POOL_INFO_ALLOC_MAX_SIZE:
switch (mem_props_.HeapType) {
case HSA_HEAPTYPE_FRAME_BUFFER_PRIVATE:
case HSA_HEAPTYPE_FRAME_BUFFER_PUBLIC:
case HSA_HEAPTYPE_GPU_SCRATCH:
return GetInfo(HSA_REGION_INFO_ALLOC_MAX_SIZE, value);
case HSA_HEAPTYPE_SYSTEM:
// Aggregate size available for allocation
*((size_t*)value) = max_sysmem_alloc_size_;
break;
default:
*((size_t*)value) = 0;
}
break;
default:
return HSA_STATUS_ERROR_INVALID_ARGUMENT;
}
+27 -4
Просмотреть файл
@@ -648,8 +648,26 @@ typedef enum {
} hsa_amd_segment_t;
/**
* @brief A memory pool represents physical storage on an agent.
*/
* @brief A memory pool encapsulates physical storage on an agent
* along with a memory access model.
*
* @details A memory pool encapsulates a physical partition of an agent's
* memory system along with a memory access model. Division of a single
* memory system into separate pools allows querying each partition's access
* path properties (see ::hsa_amd_agent_memory_pool_get_info). Allocations
* from a pool are preferentially bound to that pool's physical partition.
* Binding to the pool's preferential physical partition may not be
* possible or persistent depending on the system's memory policy
* and/or state which is beyond the scope of HSA APIs.
*
* For example, a multi-node NUMA memory system may be represented by multiple
* pool's with each pool providing size and access path information for the
* partition it represents. Allocations from a pool are preferentially bound
* to the pool's partition (which in this example is a NUMA node) while
* following its memory access model. The actual placement may vary or migrate
* due to the system's NUMA policy and state, which is beyond the scope of
* HSA APIs.
*/
typedef struct hsa_amd_memory_pool_s {
/**
* Opaque handle.
@@ -729,6 +747,11 @@ typedef enum {
* attribute is bool.
*/
HSA_AMD_MEMORY_POOL_INFO_ACCESSIBLE_BY_ALL = 15,
/**
* Maximum aggregate allocation size in bytes. The type of this attribute
* is size_t.
*/
HSA_AMD_MEMORY_POOL_INFO_ALLOC_MAX_SIZE = 16,
} hsa_amd_memory_pool_info_t;
/**
@@ -817,8 +840,8 @@ hsa_status_t HSA_API hsa_amd_agent_iterate_memory_pools(
* @retval ::HSA_STATUS_ERROR_INVALID_MEMORY_POOL The memory pool is invalid.
*
* @retval ::HSA_STATUS_ERROR_INVALID_ALLOCATION The host is not allowed to
* allocate memory in @p memory_pool, or @p size is greater than the value of
* HSA_AMD_MEMORY_POOL_INFO_ALLOC_MAX_SIZE in @p memory_pool.
* allocate memory in @p memory_pool, or @p size is greater than
* the value of HSA_AMD_MEMORY_POOL_INFO_ALLOC_MAX_SIZE in @p memory_pool.
*
* @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p ptr is NULL, or @p size is 0,
* or flags is not 0.