From 8a2d0028bade176b4c8ae21f8727a7bc25ed7fa5 Mon Sep 17 00:00:00 2001 From: foreman Date: Wed, 6 Nov 2019 18:00:44 -0500 Subject: [PATCH] P4 to Git Change 2026152 by cpaquot@cpaquot-ocl-lc-lnx on 2019/11/06 17:50:08 SWDEV-206239 - [HIP] RCCL: finegrain VRAM does not work Implemented fine grained VRAM allocation via ATOMICS. Affected files ... ... //depot/stg/opencl/drivers/opencl/api/hip/hip_memory.cpp#84 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocdevice.cpp#138 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocdevice.hpp#43 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocmemory.cpp#45 edit [ROCm/clr commit: 25067523487c43c57eb782697959a74fe9dbe89f] --- .../rocclr/runtime/device/rocm/rocdevice.cpp | 24 +++++++++++++++---- .../rocclr/runtime/device/rocm/rocdevice.hpp | 3 ++- .../rocclr/runtime/device/rocm/rocmemory.cpp | 2 +- 3 files changed, 22 insertions(+), 7 deletions(-) diff --git a/projects/clr/rocclr/runtime/device/rocm/rocdevice.cpp b/projects/clr/rocclr/runtime/device/rocm/rocdevice.cpp index edd487097d..aa68692b82 100644 --- a/projects/clr/rocclr/runtime/device/rocm/rocdevice.cpp +++ b/projects/clr/rocclr/runtime/device/rocm/rocdevice.cpp @@ -155,6 +155,7 @@ Device::Device(hsa_agent_t bkendDevice) system_segment_.handle = 0; system_coarse_segment_.handle = 0; gpuvm_segment_.handle = 0; + gpu_fine_grained_segment_.handle = 0; } Device::~Device() { @@ -861,7 +862,18 @@ hsa_status_t Device::iterateGpuMemoryPoolCallback(hsa_amd_memory_pool_t pool, vo switch (segment_type) { case HSA_REGION_SEGMENT_GLOBAL: { if (dev->settings().enableLocalMemory_) { - dev->gpuvm_segment_ = pool; + uint32_t global_flag = 0; + hsa_status_t stat = + hsa_amd_memory_pool_get_info(pool, HSA_AMD_MEMORY_POOL_INFO_GLOBAL_FLAGS, &global_flag); + if (stat != HSA_STATUS_SUCCESS) { + return stat; + } + + if ((global_flag & HSA_REGION_GLOBAL_FLAG_FINE_GRAINED) != 0) { + dev->gpu_fine_grained_segment_ = pool; + } else { + dev->gpuvm_segment_ = pool; + } } break; } @@ -1677,13 +1689,15 @@ void* Device::hostAlloc(size_t size, size_t alignment, bool atomics) const { void Device::hostFree(void* ptr, size_t size) const { memFree(ptr, size); } -void* Device::deviceLocalAlloc(size_t size) const { - if (gpuvm_segment_.handle == 0 || gpuvm_segment_max_alloc_ == 0) { +void* Device::deviceLocalAlloc(size_t size, bool atomics) const { + const hsa_amd_memory_pool_t& pool = (atomics)? gpu_fine_grained_segment_ : gpuvm_segment_; + + if (pool.handle == 0 || gpuvm_segment_max_alloc_ == 0) { return nullptr; } void* ptr = nullptr; - hsa_status_t stat = hsa_amd_memory_pool_allocate(gpuvm_segment_, size, 0, &ptr); + hsa_status_t stat = hsa_amd_memory_pool_allocate(pool, size, 0, &ptr); if (stat != HSA_STATUS_SUCCESS) { LogError("Fail allocation local memory"); return nullptr; @@ -1692,7 +1706,7 @@ void* Device::deviceLocalAlloc(size_t size) const { if (p2pAgents().size() > 0) { stat = hsa_amd_agents_allow_access(p2pAgents().size(), p2pAgents().data(), nullptr, ptr); if (stat != HSA_STATUS_SUCCESS) { - LogError("Allow p2p acces for memory allocation"); + LogError("Allow p2p access for memory allocation"); memFree(ptr, size); return nullptr; } diff --git a/projects/clr/rocclr/runtime/device/rocm/rocdevice.hpp b/projects/clr/rocclr/runtime/device/rocm/rocdevice.hpp index 09ffb8e825..b059931ccb 100644 --- a/projects/clr/rocclr/runtime/device/rocm/rocdevice.hpp +++ b/projects/clr/rocclr/runtime/device/rocm/rocdevice.hpp @@ -344,7 +344,7 @@ class Device : public NullDevice { virtual void hostFree(void* ptr, size_t size = 0) const; - void* deviceLocalAlloc(size_t size) const; + void* deviceLocalAlloc(size_t size, bool atomics = false) const; void memFree(void* ptr, size_t size) const; @@ -437,6 +437,7 @@ class Device : public NullDevice { hsa_amd_memory_pool_t system_segment_; hsa_amd_memory_pool_t system_coarse_segment_; hsa_amd_memory_pool_t gpuvm_segment_; + hsa_amd_memory_pool_t gpu_fine_grained_segment_; size_t gpuvm_segment_max_alloc_; size_t alloc_granularity_; static const bool offlineDevice_; diff --git a/projects/clr/rocclr/runtime/device/rocm/rocmemory.cpp b/projects/clr/rocclr/runtime/device/rocm/rocmemory.cpp index 020ec81e77..9a4004e10c 100644 --- a/projects/clr/rocclr/runtime/device/rocm/rocmemory.cpp +++ b/projects/clr/rocclr/runtime/device/rocm/rocmemory.cpp @@ -705,7 +705,7 @@ bool Buffer::create() { } flags_ |= HostMemoryDirectAccess; } else { - deviceMemory_ = dev().deviceLocalAlloc(size()); + deviceMemory_ = dev().deviceLocalAlloc(size(), (memFlags & CL_MEM_SVM_ATOMICS) != 0); } owner()->setSvmPtr(deviceMemory_); } else {