From 03504e2f6cfbc689aa9eba6b770c389e887972cb Mon Sep 17 00:00:00 2001 From: kjayapra-amd Date: Mon, 12 Jun 2023 12:52:29 -0500 Subject: [PATCH] FEAT-47686 - Changes to support new uncached memory segment in ROCr. Change-Id: I0ba8769d6737cdf1cc8a8644b2e82109f584a430 [ROCm/clr commit: aca7d9e14a692012ba9d95a004145843b773ee0a] --- projects/clr/rocclr/device/rocm/rocdevice.cpp | 19 +++++++++++++++---- projects/clr/rocclr/device/rocm/rocdevice.hpp | 2 ++ 2 files changed, 17 insertions(+), 4 deletions(-) diff --git a/projects/clr/rocclr/device/rocm/rocdevice.cpp b/projects/clr/rocclr/device/rocm/rocdevice.cpp index e7c2b346e2..69bec0a764 100644 --- a/projects/clr/rocclr/device/rocm/rocdevice.cpp +++ b/projects/clr/rocclr/device/rocm/rocdevice.cpp @@ -901,6 +901,12 @@ hsa_status_t Device::iterateGpuMemoryPoolCallback(hsa_amd_memory_pool_t pool, vo return stat; } + // If the flag set is ext scoped fine grain, break the loop + if ((global_flag & HSA_REGION_GLOBAL_FLAG_EXTENDED_SCOPE_FINE_GRAINED) != 0) { + dev->gpu_ext_fine_grained_segment_ = pool; + break; + } + if ((global_flag & HSA_REGION_GLOBAL_FLAG_FINE_GRAINED) != 0) { dev->gpu_fine_grained_segment_ = pool; } else if ((global_flag & HSA_REGION_GLOBAL_FLAG_COARSE_GRAINED) != 0) { @@ -959,6 +965,12 @@ hsa_status_t Device::iterateCpuMemoryPoolCallback(hsa_amd_memory_pool_t pool, vo break; } + // If the flag set is ext scoped fine grain, break the loop + if ((global_flag & HSA_REGION_GLOBAL_FLAG_EXTENDED_SCOPE_FINE_GRAINED) != 0) { + agentInfo->ext_fine_grain_pool_ = pool; + break; + } + if ((global_flag & HSA_REGION_GLOBAL_FLAG_FINE_GRAINED) != 0) { if (agentInfo->fine_grain_pool.handle == 0) { agentInfo->fine_grain_pool = pool; @@ -2158,9 +2170,8 @@ bool Device::allowPeerAccess(device::Memory* memory) const { } void* Device::deviceLocalAlloc(size_t size, bool atomics, bool pseudo_fine_grain) const { - const hsa_amd_memory_pool_t& pool = (atomics) ? gpu_fine_grained_segment_ : gpuvm_segment_; - uint32_t hsa_mem_flags = (atomics && pseudo_fine_grain) ? HSA_AMD_MEMORY_POOL_PCIE_FLAG - : HSA_AMD_MEMORY_POOL_STANDARD_FLAG; + const hsa_amd_memory_pool_t& pool = (pseudo_fine_grain) ? gpu_ext_fine_grained_segment_ + : (atomics) ? gpu_fine_grained_segment_ : gpuvm_segment_; if (pool.handle == 0 || gpuvm_segment_max_alloc_ == 0) { DevLogPrintfError("Invalid argument, pool_handle: 0x%x , max_alloc: %u \n", @@ -2169,7 +2180,7 @@ void* Device::deviceLocalAlloc(size_t size, bool atomics, bool pseudo_fine_grain } void* ptr = nullptr; - hsa_status_t stat = hsa_amd_memory_pool_allocate(pool, size, hsa_mem_flags, &ptr); + hsa_status_t stat = hsa_amd_memory_pool_allocate(pool, size, 0, &ptr); ClPrint(amd::LOG_DEBUG, amd::LOG_MEM, "Allocate hsa device memory %p, size 0x%zx", ptr, size); if (stat != HSA_STATUS_SUCCESS) { LogError("Fail allocation local memory"); diff --git a/projects/clr/rocclr/device/rocm/rocdevice.hpp b/projects/clr/rocclr/device/rocm/rocdevice.hpp index 6843924826..edce714a78 100644 --- a/projects/clr/rocclr/device/rocm/rocdevice.hpp +++ b/projects/clr/rocclr/device/rocm/rocdevice.hpp @@ -288,6 +288,7 @@ struct AgentInfo { hsa_amd_memory_pool_t fine_grain_pool; hsa_amd_memory_pool_t coarse_grain_pool; hsa_amd_memory_pool_t kern_arg_pool; + hsa_amd_memory_pool_t ext_fine_grain_pool_; }; //! A HSA device ordinal (physical HSA device) @@ -590,6 +591,7 @@ class Device : public NullDevice { hsa_amd_memory_pool_t system_kernarg_segment_; hsa_amd_memory_pool_t gpuvm_segment_; hsa_amd_memory_pool_t gpu_fine_grained_segment_; + hsa_amd_memory_pool_t gpu_ext_fine_grained_segment_; hsa_signal_t prefetch_signal_; //!< Prefetch signal, used to explicitly prefetch SVM on device std::atomic cache_state_; //!< State of cache, kUnknown/kFlushedToDevice/kFlushedToSystem