From 8a2d0028bade176b4c8ae21f8727a7bc25ed7fa5 Mon Sep 17 00:00:00 2001
From: foreman
Date: Wed, 6 Nov 2019 18:00:44 -0500
Subject: [PATCH] P4 to Git Change 2026152 by cpaquot@cpaquot-ocl-lc-lnx on
2019/11/06 17:50:08
SWDEV-206239 - [HIP] RCCL: finegrain VRAM does not work
Implemented fine grained VRAM allocation via ATOMICS.
Affected files ...
... //depot/stg/opencl/drivers/opencl/api/hip/hip_memory.cpp#84 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocdevice.cpp#138 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocdevice.hpp#43 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocmemory.cpp#45 edit
[ROCm/clr commit: 25067523487c43c57eb782697959a74fe9dbe89f]
---
.../rocclr/runtime/device/rocm/rocdevice.cpp | 24 +++++++++++++++----
.../rocclr/runtime/device/rocm/rocdevice.hpp | 3 ++-
.../rocclr/runtime/device/rocm/rocmemory.cpp | 2 +-
3 files changed, 22 insertions(+), 7 deletions(-)
diff --git a/projects/clr/rocclr/runtime/device/rocm/rocdevice.cpp b/projects/clr/rocclr/runtime/device/rocm/rocdevice.cpp
index edd487097d..aa68692b82 100644
--- a/projects/clr/rocclr/runtime/device/rocm/rocdevice.cpp
+++ b/projects/clr/rocclr/runtime/device/rocm/rocdevice.cpp
@@ -155,6 +155,7 @@ Device::Device(hsa_agent_t bkendDevice)
system_segment_.handle = 0;
system_coarse_segment_.handle = 0;
gpuvm_segment_.handle = 0;
+ gpu_fine_grained_segment_.handle = 0;
}
Device::~Device() {
@@ -861,7 +862,18 @@ hsa_status_t Device::iterateGpuMemoryPoolCallback(hsa_amd_memory_pool_t pool, vo
switch (segment_type) {
case HSA_REGION_SEGMENT_GLOBAL: {
if (dev->settings().enableLocalMemory_) {
- dev->gpuvm_segment_ = pool;
+ uint32_t global_flag = 0;
+ hsa_status_t stat =
+ hsa_amd_memory_pool_get_info(pool, HSA_AMD_MEMORY_POOL_INFO_GLOBAL_FLAGS, &global_flag);
+ if (stat != HSA_STATUS_SUCCESS) {
+ return stat;
+ }
+
+ if ((global_flag & HSA_REGION_GLOBAL_FLAG_FINE_GRAINED) != 0) {
+ dev->gpu_fine_grained_segment_ = pool;
+ } else {
+ dev->gpuvm_segment_ = pool;
+ }
}
break;
}
@@ -1677,13 +1689,15 @@ void* Device::hostAlloc(size_t size, size_t alignment, bool atomics) const {
void Device::hostFree(void* ptr, size_t size) const { memFree(ptr, size); }
-void* Device::deviceLocalAlloc(size_t size) const {
- if (gpuvm_segment_.handle == 0 || gpuvm_segment_max_alloc_ == 0) {
+void* Device::deviceLocalAlloc(size_t size, bool atomics) const {
+ const hsa_amd_memory_pool_t& pool = (atomics)? gpu_fine_grained_segment_ : gpuvm_segment_;
+
+ if (pool.handle == 0 || gpuvm_segment_max_alloc_ == 0) {
return nullptr;
}
void* ptr = nullptr;
- hsa_status_t stat = hsa_amd_memory_pool_allocate(gpuvm_segment_, size, 0, &ptr);
+ hsa_status_t stat = hsa_amd_memory_pool_allocate(pool, size, 0, &ptr);
if (stat != HSA_STATUS_SUCCESS) {
LogError("Fail allocation local memory");
return nullptr;
@@ -1692,7 +1706,7 @@ void* Device::deviceLocalAlloc(size_t size) const {
if (p2pAgents().size() > 0) {
stat = hsa_amd_agents_allow_access(p2pAgents().size(), p2pAgents().data(), nullptr, ptr);
if (stat != HSA_STATUS_SUCCESS) {
- LogError("Allow p2p acces for memory allocation");
+ LogError("Allow p2p access for memory allocation");
memFree(ptr, size);
return nullptr;
}
diff --git a/projects/clr/rocclr/runtime/device/rocm/rocdevice.hpp b/projects/clr/rocclr/runtime/device/rocm/rocdevice.hpp
index 09ffb8e825..b059931ccb 100644
--- a/projects/clr/rocclr/runtime/device/rocm/rocdevice.hpp
+++ b/projects/clr/rocclr/runtime/device/rocm/rocdevice.hpp
@@ -344,7 +344,7 @@ class Device : public NullDevice {
virtual void hostFree(void* ptr, size_t size = 0) const;
- void* deviceLocalAlloc(size_t size) const;
+ void* deviceLocalAlloc(size_t size, bool atomics = false) const;
void memFree(void* ptr, size_t size) const;
@@ -437,6 +437,7 @@ class Device : public NullDevice {
hsa_amd_memory_pool_t system_segment_;
hsa_amd_memory_pool_t system_coarse_segment_;
hsa_amd_memory_pool_t gpuvm_segment_;
+ hsa_amd_memory_pool_t gpu_fine_grained_segment_;
size_t gpuvm_segment_max_alloc_;
size_t alloc_granularity_;
static const bool offlineDevice_;
diff --git a/projects/clr/rocclr/runtime/device/rocm/rocmemory.cpp b/projects/clr/rocclr/runtime/device/rocm/rocmemory.cpp
index 020ec81e77..9a4004e10c 100644
--- a/projects/clr/rocclr/runtime/device/rocm/rocmemory.cpp
+++ b/projects/clr/rocclr/runtime/device/rocm/rocmemory.cpp
@@ -705,7 +705,7 @@ bool Buffer::create() {
}
flags_ |= HostMemoryDirectAccess;
} else {
- deviceMemory_ = dev().deviceLocalAlloc(size());
+ deviceMemory_ = dev().deviceLocalAlloc(size(), (memFlags & CL_MEM_SVM_ATOMICS) != 0);
}
owner()->setSvmPtr(deviceMemory_);
} else {