diff --git a/runtime/hsa-runtime/core/inc/agent.h b/runtime/hsa-runtime/core/inc/agent.h index 8a1b405097..bb3c3808d7 100644 --- a/runtime/hsa-runtime/core/inc/agent.h +++ b/runtime/hsa-runtime/core/inc/agent.h @@ -46,7 +46,6 @@ #define HSA_RUNTME_CORE_INC_AGENT_H_ #include - #include #include "core/inc/checked.h" @@ -234,6 +233,8 @@ class Agent : public Checked<0xF6BC25EB17E6F917> { // @details Returns the agent's instruction set architecture. virtual const Isa* isa() const = 0; + virtual uint64_t HiveId() const { return 0; } + // @brief Returns the device type (CPU/GPU/Others). __forceinline uint32_t device_type() const { return device_type_; } diff --git a/runtime/hsa-runtime/core/inc/amd_cpu_agent.h b/runtime/hsa-runtime/core/inc/amd_cpu_agent.h index af5de53d24..08c098456c 100644 --- a/runtime/hsa-runtime/core/inc/amd_cpu_agent.h +++ b/runtime/hsa-runtime/core/inc/amd_cpu_agent.h @@ -106,6 +106,9 @@ class CpuAgent : public core::Agent { // @brief Returns number of data caches. __forceinline size_t num_cache() const { return cache_props_.size(); } + // @brief Returns Hive ID + __forceinline uint64_t HiveId() const { return properties_.HiveID; } + // @brief Returns data cache property. // // @param [in] idx Cache level. diff --git a/runtime/hsa-runtime/core/inc/amd_gpu_agent.h b/runtime/hsa-runtime/core/inc/amd_gpu_agent.h index 4f81c1e337..cc00277a74 100644 --- a/runtime/hsa-runtime/core/inc/amd_gpu_agent.h +++ b/runtime/hsa-runtime/core/inc/amd_gpu_agent.h @@ -299,6 +299,9 @@ class GpuAgent : public GpuAgentInt { // Getter & setters. + // @brief Returns Hive ID + __forceinline uint64_t HiveId() const { return properties_.HiveID; } + // @brief Returns node property. __forceinline const HsaNodeProperties& properties() const { return properties_; diff --git a/runtime/hsa-runtime/core/inc/amd_memory_region.h b/runtime/hsa-runtime/core/inc/amd_memory_region.h index 08bb78d93c..cd31518a5f 100644 --- a/runtime/hsa-runtime/core/inc/amd_memory_region.h +++ b/runtime/hsa-runtime/core/inc/amd_memory_region.h @@ -48,6 +48,7 @@ #include "hsakmt.h" #include "core/inc/agent.h" +#include "core/inc/runtime.h" #include "core/inc/memory_region.h" #include "core/util/simple_heap.h" #include "core/util/locks.h" @@ -180,6 +181,10 @@ class MemoryRegion : public core::MemoryRegion { static const size_t kPageSize_ = 4096; + // Determine access type allowed to requesting device + hsa_amd_memory_pool_access_t GetAccessInfo(const core::Agent& agent, + const core::Runtime::LinkInfo& link_info) const; + class BlockAllocator { private: MemoryRegion& region_; diff --git a/runtime/hsa-runtime/core/runtime/amd_gpu_agent.cpp b/runtime/hsa-runtime/core/runtime/amd_gpu_agent.cpp index f2fc3c626f..9b79a6bbb9 100644 --- a/runtime/hsa-runtime/core/runtime/amd_gpu_agent.cpp +++ b/runtime/hsa-runtime/core/runtime/amd_gpu_agent.cpp @@ -309,8 +309,10 @@ void GpuAgent::InitRegionList() { if (region->IsLocalMemory()) { local_region_ = region; // Expose VRAM as uncached/fine grain over PCIe (if enabled) or XGMI. - if (core::Runtime::runtime_singleton_->flag().fine_grain_pcie()) + if ((properties_.HiveID != 0) || + (core::Runtime::runtime_singleton_->flag().fine_grain_pcie())) { regions_.push_back(new MemoryRegion(true, false, this, mem_props[mem_idx])); + } } break; } diff --git a/runtime/hsa-runtime/core/runtime/amd_memory_region.cpp b/runtime/hsa-runtime/core/runtime/amd_memory_region.cpp index 97daa85045..384de61963 100644 --- a/runtime/hsa-runtime/core/runtime/amd_memory_region.cpp +++ b/runtime/hsa-runtime/core/runtime/amd_memory_region.cpp @@ -376,6 +376,67 @@ hsa_status_t MemoryRegion::GetPoolInfo(hsa_amd_memory_pool_info_t attribute, return HSA_STATUS_SUCCESS; } +hsa_amd_memory_pool_access_t MemoryRegion::GetAccessInfo( + const core::Agent& agent, const core::Runtime::LinkInfo& link_info) const { + + // Return allowed by default if memory pool is owned by requesting device + if (agent.public_handle().handle == owner()->public_handle().handle) { + return HSA_AMD_MEMORY_POOL_ACCESS_ALLOWED_BY_DEFAULT; + } + + // Requesting device does not have a link + if (link_info.num_hop < 1) { + return HSA_AMD_MEMORY_POOL_ACCESS_NEVER_ALLOWED; + } + + // Determine access to fine and coarse grained system memory + // Return allowed by default if requesting device is a CPU + // Return disallowed by default if requesting device is not a CPU + if (IsSystem()) { + return (agent.device_type() == core::Agent::kAmdCpuDevice) ? + (HSA_AMD_MEMORY_POOL_ACCESS_ALLOWED_BY_DEFAULT) : + (HSA_AMD_MEMORY_POOL_ACCESS_DISALLOWED_BY_DEFAULT); + } + + // Determine access type for device local memory which is + // guaranteed to be HSA_HEAPTYPE_FRAME_BUFFER_PUBLIC + // Return disallowed by default if framebuffer is coarse grained + // without regard to type of requesting device (CPU / GPU) + // Return disallowed by default if framebuffer is fine grained + // and requesting device is connected via xGMI link + // Return never allowed if framebuffer is fine grained and + // requesting device is connected via PCIe link + if (IsLocalMemory()) { + + // Return disallowed by default if memory is coarse + // grained without regard to link type + if (fine_grain() == false) { + return HSA_AMD_MEMORY_POOL_ACCESS_DISALLOWED_BY_DEFAULT; + } + + // Determine if pool is pseudo fine-grained due to env flag + // Return disallowed by default + if (core::Runtime::runtime_singleton_->flag().fine_grain_pcie()) { + return HSA_AMD_MEMORY_POOL_ACCESS_DISALLOWED_BY_DEFAULT; + } + + // Return disallowed by default if memory is fine + // grained and link type is xGMI. + if (agent.HiveId() == owner()->HiveId()) { + return HSA_AMD_MEMORY_POOL_ACCESS_DISALLOWED_BY_DEFAULT; + } + + // Return never allowed if memory is fine grained + // link type is not xGMI i.e. link is PCIe + return HSA_AMD_MEMORY_POOL_ACCESS_NEVER_ALLOWED; + } + + // Return never allowed if above conditions are not satisified + // This can happen when memory pool references neither system + // or device local memory + return HSA_AMD_MEMORY_POOL_ACCESS_NEVER_ALLOWED; +} + hsa_status_t MemoryRegion::GetAgentPoolInfo( const core::Agent& agent, hsa_amd_agent_memory_pool_info_t attribute, void* value) const { @@ -385,26 +446,7 @@ hsa_status_t MemoryRegion::GetAgentPoolInfo( const core::Runtime::LinkInfo link_info = core::Runtime::runtime_singleton_->GetLinkInfo(node_id_from, node_id_to); - /** - * --------------------------------------------------- - * | |CPU |GPU (owner)|GPU (peer) | - * --------------------------------------------------- - * |system memory |allowed |disallowed |disallowed | - * --------------------------------------------------- - * |fb private |never |allowed |never | - * --------------------------------------------------- - * |fb public |disallowed |allowed |disallowed | - * --------------------------------------------------- - * |others |never |allowed |never | - * --------------------------------------------------- - */ - const hsa_amd_memory_pool_access_t access_type = - ((IsSystem() && (agent.device_type() == core::Agent::kAmdCpuDevice)) || - (agent.node_id() == owner()->node_id())) - ? HSA_AMD_MEMORY_POOL_ACCESS_ALLOWED_BY_DEFAULT - : (IsSystem() || (IsLocalMemory() && link_info.num_hop > 0)) - ? HSA_AMD_MEMORY_POOL_ACCESS_DISALLOWED_BY_DEFAULT - : HSA_AMD_MEMORY_POOL_ACCESS_NEVER_ALLOWED; + const hsa_amd_memory_pool_access_t access_type = GetAccessInfo(agent, link_info); switch (attribute) { case HSA_AMD_AGENT_MEMORY_POOL_INFO_ACCESS: