diff --git a/projects/rocr-runtime/rocrtst/common/common.cc b/projects/rocr-runtime/rocrtst/common/common.cc index ea95f02f75..2dc7f636f6 100755 --- a/projects/rocr-runtime/rocrtst/common/common.cc +++ b/projects/rocr-runtime/rocrtst/common/common.cc @@ -341,7 +341,7 @@ static hsa_status_t DumpSegment(const pool_info_t *pool_i, std::string const *ind_lvl) { hsa_status_t err; - fprintf(stdout, "%s%-25s", ind_lvl->c_str(), "Pool Segment:"); + fprintf(stdout, "%s%-28s", ind_lvl->c_str(), "Pool Segment:"); std::string seg_str = ""; std::string tmp_str; @@ -412,6 +412,11 @@ hsa_status_t AcquirePoolInfo(hsa_amd_memory_pool_t pool, &pool_i->accessible_by_all); RET_IF_HSA_COMMON_ERR(err); + err = hsa_amd_memory_pool_get_info(pool, + HSA_AMD_MEMORY_POOL_INFO_ALLOC_MAX_SIZE, + &pool_i->aggregate_alloc_max); + RET_IF_HSA_COMMON_ERR(err); + return HSA_STATUS_SUCCESS; } @@ -422,25 +427,30 @@ hsa_status_t DumpMemoryPoolInfo(const pool_info_t *pool_i, DumpSegment(pool_i, &ind_lvl); std::string sz_str = std::to_string(pool_i->size / 1024) + "KB"; - fprintf(stdout, "%s%-25s%-35s\n", ind_lvl.c_str(), "Pool Size:", + fprintf(stdout, "%s%-28s%-36s\n", ind_lvl.c_str(), "Pool Size:", sz_str.c_str()); - fprintf(stdout, "%s%-25s%-35s\n", ind_lvl.c_str(), "Pool Allocatable:", + fprintf(stdout, "%s%-28s%-36s\n", ind_lvl.c_str(), "Pool Allocatable:", (pool_i->alloc_allowed ? "TRUE" : "FALSE")); std::string gr_str = std::to_string(pool_i->alloc_granule / 1024) + "KB"; - fprintf(stdout, "%s%-25s%-35s\n", ind_lvl.c_str(), "Pool Alloc Granule:", + fprintf(stdout, "%s%-28s%-36s\n", ind_lvl.c_str(), "Pool Alloc Granule:", gr_str.c_str()); std::string al_str = std::to_string(pool_i->alloc_alignment / 1024) + "KB"; - fprintf(stdout, "%s%-25s%-35s\n", ind_lvl.c_str(), "Pool Alloc Alignment:", + fprintf(stdout, "%s%-28s%-36s\n", ind_lvl.c_str(), "Pool Alloc Alignment:", al_str.c_str()); - fprintf(stdout, "%s%-25s%-35s\n", ind_lvl.c_str(), "Pool Acessible by all:", + fprintf(stdout, "%s%-28s%-36s\n", ind_lvl.c_str(), "Pool Acessible by all:", (pool_i->accessible_by_all ? "TRUE" : "FALSE")); + std::string agg_str = + std::to_string(pool_i->aggregate_alloc_max / 1024) + "KB"; + fprintf(stdout, "%s%-28s%-36s\n", ind_lvl.c_str(), "Pool Aggregate Alloc Size:", + agg_str.c_str()); + return HSA_STATUS_SUCCESS; } diff --git a/projects/rocr-runtime/rocrtst/common/common.h b/projects/rocr-runtime/rocrtst/common/common.h index 378156a989..b2f8f8d563 100755 --- a/projects/rocr-runtime/rocrtst/common/common.h +++ b/projects/rocr-runtime/rocrtst/common/common.h @@ -88,12 +88,14 @@ typedef struct pool_info_t_ { size_t alloc_alignment; bool accessible_by_all; uint32_t global_flag; + uint64_t aggregate_alloc_max; inline bool operator==(const pool_info_t_ &a) { if (a.segment == segment && a.size == size && a.alloc_allowed == alloc_allowed && a.alloc_granule == alloc_granule && a.alloc_alignment == alloc_alignment && a.accessible_by_all == accessible_by_all + && a.aggregate_alloc_max == aggregate_alloc_max && a.global_flag == global_flag ) return true; else diff --git a/projects/rocr-runtime/rocrtst/suites/functional/memory_basic.cc b/projects/rocr-runtime/rocrtst/suites/functional/memory_basic.cc index 52a4e8cd0a..e862040576 100755 --- a/projects/rocr-runtime/rocrtst/suites/functional/memory_basic.cc +++ b/projects/rocr-runtime/rocrtst/suites/functional/memory_basic.cc @@ -197,7 +197,7 @@ void MemoryTest::MaxSingleAllocationTest(hsa_agent_t ag, } // Do everything in "granule" units auto gran_sz = pool_i.alloc_granule; - auto pool_sz = pool_i.size / gran_sz; + auto pool_sz = pool_i.aggregate_alloc_max / gran_sz; // Neg. test: Try to allocate more than the pool size err = TestAllocate(pool, pool_sz*gran_sz + gran_sz); diff --git a/projects/rocr-runtime/rocrtst/suites/negative/memory_allocate_negative_tests.cc b/projects/rocr-runtime/rocrtst/suites/negative/memory_allocate_negative_tests.cc index 91c4f07119..8378ede9c9 100755 --- a/projects/rocr-runtime/rocrtst/suites/negative/memory_allocate_negative_tests.cc +++ b/projects/rocr-runtime/rocrtst/suites/negative/memory_allocate_negative_tests.cc @@ -179,7 +179,7 @@ static void PrintAgentNameAndType(hsa_agent_t agent) { static const int kMemoryAllocSize = 1024; // This test verify that hsa_memory_allocate can't allocate -// memory more than POOL_INFO_SIZE +// memory more than HSA_AMD_MEMORY_POOL_INFO_ALLOC_MAX_SIZE void MemoryAllocateNegativeTest::MaxMemoryAllocateTest(hsa_agent_t agent, hsa_amd_memory_pool_t pool) { hsa_status_t err; @@ -193,19 +193,20 @@ void MemoryAllocateNegativeTest::MaxMemoryAllocateTest(hsa_agent_t agent, } // Determine if allocation is allowed in this pool - bool alloc = false; - err = hsa_amd_memory_pool_get_info(pool, - HSA_AMD_MEMORY_POOL_INFO_RUNTIME_ALLOC_ALLOWED, &alloc); + if (!pool_i.alloc_allowed || pool_i.alloc_granule == 0) { + if (verbosity() > 0) { + std::cout << " Test not applicable. Skipping." << std::endl; + std::cout << kSubTestSeparator << std::endl; + } + return; + } - if (alloc) { - size_t max_size; - err = hsa_amd_memory_pool_get_info(pool, HSA_AMD_MEMORY_POOL_INFO_SIZE, - &max_size); char *memoryPtr; - err = hsa_amd_memory_pool_allocate(pool, (max_size + 16), 0, + auto gran_sz = pool_i.alloc_granule; + size_t max_size = pool_i.aggregate_alloc_max; + err = hsa_amd_memory_pool_allocate(pool, (max_size + gran_sz), 0, reinterpret_cast(&memoryPtr)); ASSERT_EQ(err, HSA_STATUS_ERROR_INVALID_ALLOCATION); - } return; } diff --git a/projects/rocr-runtime/runtime/hsa-runtime/core/inc/amd_memory_region.h b/projects/rocr-runtime/runtime/hsa-runtime/core/inc/amd_memory_region.h index cd31518a5f..8be4644ae7 100644 --- a/projects/rocr-runtime/runtime/hsa-runtime/core/inc/amd_memory_region.h +++ b/projects/rocr-runtime/runtime/hsa-runtime/core/inc/amd_memory_region.h @@ -175,6 +175,9 @@ class MemoryRegion : public core::MemoryRegion { size_t max_single_alloc_size_; + // Used to collect total system memory + static size_t max_sysmem_alloc_size_; + HSAuint64 virtual_size_; mutable KernelMutex access_lock_; diff --git a/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/amd_memory_region.cpp b/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/amd_memory_region.cpp index 384de61963..b8f4076f8f 100644 --- a/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/amd_memory_region.cpp +++ b/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/amd_memory_region.cpp @@ -52,6 +52,10 @@ #include "core/inc/exceptions.h" namespace amd { + +// Tracks aggregate size of system memory available on platform +size_t MemoryRegion::max_sysmem_alloc_size_ = 0; + void* MemoryRegion::AllocateKfdMemory(const HsaMemFlags& flag, HSAuint32 node_id, size_t size) { void* ret = NULL; @@ -119,7 +123,7 @@ MemoryRegion::MemoryRegion(bool fine_grain, bool full_profile, core::Agent* owne virtual_size_ = kGpuVmSize; } else if (IsSystem()) { mem_flag_.ui32.PageSize = HSA_PAGE_SIZE_4KB; - mem_flag_.ui32.NoSubstitute = 1; + mem_flag_.ui32.NoSubstitute = 0; mem_flag_.ui32.HostAccess = 1; mem_flag_.ui32.CachePolicy = HSA_CACHING_CACHED; @@ -127,9 +131,20 @@ MemoryRegion::MemoryRegion(bool fine_grain, bool full_profile, core::Agent* owne (full_profile) ? os::GetUserModeVirtualMemorySize() : kGpuVmSize; } + // Bind if memory region is coarse or fine grain + mem_flag_.ui32.CoarseGrain = (fine_grain) ? 0 : 1; + + // Adjust allocatable size per page align max_single_alloc_size_ = AlignDown(static_cast(GetPhysicalSize()), kPageSize_); - mem_flag_.ui32.CoarseGrain = (fine_grain) ? 0 : 1; + // Keep track of total system memory available + // @note: System memory is surfaced as both coarse + // and fine grain memory regions. To track total system + // memory only fine grain is considered as it avoids + // double counting + if (IsSystem() && (fine_grain)) { + max_sysmem_alloc_size_ += max_single_alloc_size_; + } assert(GetVirtualSize() != 0); assert(GetPhysicalSize() <= GetVirtualSize()); @@ -147,7 +162,10 @@ hsa_status_t MemoryRegion::Allocate(size_t& size, AllocateFlags alloc_flags, voi return HSA_STATUS_ERROR_INVALID_ALLOCATION; } - if (size > max_single_alloc_size_) { + // Alocation requests for system memory considers aggregate + // memory available on all CPU devices + if (size > ((IsSystem() ? + max_sysmem_alloc_size_ : max_single_alloc_size_))) { return HSA_STATUS_ERROR_INVALID_ALLOCATION; } @@ -285,9 +303,11 @@ hsa_status_t MemoryRegion::GetInfo(hsa_region_info_t attribute, break; case HSA_REGION_INFO_ALLOC_MAX_SIZE: switch (mem_props_.HeapType) { + case HSA_HEAPTYPE_SYSTEM: + *((size_t*)value) = max_sysmem_alloc_size_; + break; case HSA_HEAPTYPE_FRAME_BUFFER_PRIVATE: case HSA_HEAPTYPE_FRAME_BUFFER_PUBLIC: - case HSA_HEAPTYPE_SYSTEM: case HSA_HEAPTYPE_GPU_SCRATCH: *((size_t*)value) = max_single_alloc_size_; break; @@ -365,10 +385,23 @@ hsa_status_t MemoryRegion::GetPoolInfo(hsa_amd_memory_pool_info_t attribute, case HSA_AMD_MEMORY_POOL_INFO_RUNTIME_ALLOC_GRANULE: case HSA_AMD_MEMORY_POOL_INFO_RUNTIME_ALLOC_ALIGNMENT: return GetInfo(static_cast(attribute), value); - break; case HSA_AMD_MEMORY_POOL_INFO_ACCESSIBLE_BY_ALL: *((bool*)value) = IsSystem() ? true : false; break; + case HSA_AMD_MEMORY_POOL_INFO_ALLOC_MAX_SIZE: + switch (mem_props_.HeapType) { + case HSA_HEAPTYPE_FRAME_BUFFER_PRIVATE: + case HSA_HEAPTYPE_FRAME_BUFFER_PUBLIC: + case HSA_HEAPTYPE_GPU_SCRATCH: + return GetInfo(HSA_REGION_INFO_ALLOC_MAX_SIZE, value); + case HSA_HEAPTYPE_SYSTEM: + // Aggregate size available for allocation + *((size_t*)value) = max_sysmem_alloc_size_; + break; + default: + *((size_t*)value) = 0; + } + break; default: return HSA_STATUS_ERROR_INVALID_ARGUMENT; } diff --git a/projects/rocr-runtime/runtime/hsa-runtime/inc/hsa_ext_amd.h b/projects/rocr-runtime/runtime/hsa-runtime/inc/hsa_ext_amd.h index de4d0890b3..40505c3420 100644 --- a/projects/rocr-runtime/runtime/hsa-runtime/inc/hsa_ext_amd.h +++ b/projects/rocr-runtime/runtime/hsa-runtime/inc/hsa_ext_amd.h @@ -648,8 +648,26 @@ typedef enum { } hsa_amd_segment_t; /** - * @brief A memory pool represents physical storage on an agent. - */ + * @brief A memory pool encapsulates physical storage on an agent + * along with a memory access model. + * + * @details A memory pool encapsulates a physical partition of an agent's + * memory system along with a memory access model. Division of a single + * memory system into separate pools allows querying each partition's access + * path properties (see ::hsa_amd_agent_memory_pool_get_info). Allocations + * from a pool are preferentially bound to that pool's physical partition. + * Binding to the pool's preferential physical partition may not be + * possible or persistent depending on the system's memory policy + * and/or state which is beyond the scope of HSA APIs. + * + * For example, a multi-node NUMA memory system may be represented by multiple + * pool's with each pool providing size and access path information for the + * partition it represents. Allocations from a pool are preferentially bound + * to the pool's partition (which in this example is a NUMA node) while + * following its memory access model. The actual placement may vary or migrate + * due to the system's NUMA policy and state, which is beyond the scope of + * HSA APIs. + */ typedef struct hsa_amd_memory_pool_s { /** * Opaque handle. @@ -729,6 +747,11 @@ typedef enum { * attribute is bool. */ HSA_AMD_MEMORY_POOL_INFO_ACCESSIBLE_BY_ALL = 15, + /** + * Maximum aggregate allocation size in bytes. The type of this attribute + * is size_t. + */ + HSA_AMD_MEMORY_POOL_INFO_ALLOC_MAX_SIZE = 16, } hsa_amd_memory_pool_info_t; /** @@ -817,8 +840,8 @@ hsa_status_t HSA_API hsa_amd_agent_iterate_memory_pools( * @retval ::HSA_STATUS_ERROR_INVALID_MEMORY_POOL The memory pool is invalid. * * @retval ::HSA_STATUS_ERROR_INVALID_ALLOCATION The host is not allowed to - * allocate memory in @p memory_pool, or @p size is greater than the value of - * HSA_AMD_MEMORY_POOL_INFO_ALLOC_MAX_SIZE in @p memory_pool. + * allocate memory in @p memory_pool, or @p size is greater than + * the value of HSA_AMD_MEMORY_POOL_INFO_ALLOC_MAX_SIZE in @p memory_pool. * * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p ptr is NULL, or @p size is 0, * or flags is not 0.