From ab70925687c9dff2912b030e76fd6268b11f9948 Mon Sep 17 00:00:00 2001 From: Jaydeep Patel Date: Fri, 12 Apr 2024 08:58:46 +0000 Subject: [PATCH] SWDEV-456279 - Adding new hip flag to access contiguous memory and pass the flag to HSA API. Change-Id: I1bafeaa3096395c729723af958d609bc41e7845c [ROCm/clr commit: 1d48f2a1ab38b632919c4b7274899b3faf4279ff] --- projects/clr/hipamd/src/hip_memory.cpp | 2 ++ projects/clr/rocclr/device/device.hpp | 2 +- projects/clr/rocclr/device/rocm/rocdevice.cpp | 8 +++++++- projects/clr/rocclr/device/rocm/rocdevice.hpp | 3 ++- projects/clr/rocclr/device/rocm/rocmemory.cpp | 3 ++- projects/clr/rocclr/platform/memory.hpp | 1 + 6 files changed, 15 insertions(+), 4 deletions(-) diff --git a/projects/clr/hipamd/src/hip_memory.cpp b/projects/clr/hipamd/src/hip_memory.cpp index 8ff94a6df7..bfe00f6eca 100644 --- a/projects/clr/hipamd/src/hip_memory.cpp +++ b/projects/clr/hipamd/src/hip_memory.cpp @@ -589,6 +589,8 @@ hipError_t hipExtMallocWithFlags(void** ptr, size_t sizeBytes, unsigned int flag ihipFlags = CL_MEM_SVM_ATOMICS; } else if (flags == hipDeviceMallocUncached) { ihipFlags = CL_MEM_SVM_ATOMICS | ROCCLR_MEM_HSA_UNCACHED; + } else if (flags == hipDeviceMallocContiguous) { + ihipFlags = ROCCLR_MEM_HSA_CONTIGUOUS | ROCCLR_MEM_HSA_UNCACHED; } else if (flags == hipMallocSignalMemory) { ihipFlags = CL_MEM_SVM_ATOMICS | CL_MEM_SVM_FINE_GRAIN_BUFFER | ROCCLR_MEM_HSA_SIGNAL_MEMORY; if (sizeBytes != 8) { diff --git a/projects/clr/rocclr/device/device.hpp b/projects/clr/rocclr/device/device.hpp index edea84f6f8..1a2bb18169 100644 --- a/projects/clr/rocclr/device/device.hpp +++ b/projects/clr/rocclr/device/device.hpp @@ -1772,7 +1772,7 @@ class Device : public RuntimeObject { } virtual void* deviceLocalAlloc(size_t size, bool atomics = false, - bool pseudo_fine_grain = false) const { + bool pseudo_fine_grain = false, bool contiguous = false) const { ShouldNotCallThis(); return NULL; } diff --git a/projects/clr/rocclr/device/rocm/rocdevice.cpp b/projects/clr/rocclr/device/rocm/rocdevice.cpp index e046700e92..757a113fe5 100644 --- a/projects/clr/rocclr/device/rocm/rocdevice.cpp +++ b/projects/clr/rocclr/device/rocm/rocdevice.cpp @@ -2331,7 +2331,8 @@ void Device::deviceVmemRelease(uint64_t mem_handle) const { } } -void* Device::deviceLocalAlloc(size_t size, bool atomics, bool pseudo_fine_grain) const { +void* Device::deviceLocalAlloc(size_t size, bool atomics, bool pseudo_fine_grain, + bool contiguous) const { const hsa_amd_memory_pool_t& pool = (pseudo_fine_grain) ? gpu_ext_fine_grained_segment_ : (atomics) ? gpu_fine_grained_segment_ : gpuvm_segment_; @@ -2341,6 +2342,11 @@ void* Device::deviceLocalAlloc(size_t size, bool atomics, bool pseudo_fine_grain return nullptr; } + uint32_t hsa_mem_flags = 0; + if (contiguous) { + hsa_mem_flags = HSA_AMD_MEMORY_POOL_CONTIGUOUS_FLAG; + } + void* ptr = nullptr; hsa_status_t stat = hsa_amd_memory_pool_allocate(pool, size, 0, &ptr); ClPrint(amd::LOG_DEBUG, amd::LOG_MEM, "Allocate hsa device memory %p, size 0x%zx", ptr, size); diff --git a/projects/clr/rocclr/device/rocm/rocdevice.hpp b/projects/clr/rocclr/device/rocm/rocdevice.hpp index d4a80ed308..6a45974628 100644 --- a/projects/clr/rocclr/device/rocm/rocdevice.hpp +++ b/projects/clr/rocclr/device/rocm/rocdevice.hpp @@ -453,7 +453,8 @@ class Device : public NullDevice { bool allowPeerAccess(device::Memory* memory) const; void deviceVmemRelease(uint64_t mem_handle) const; uint64_t deviceVmemAlloc(size_t size, uint64_t flags) const; - void* deviceLocalAlloc(size_t size, bool atomics = false, bool pseudo_fine_grain=false) const; + void* deviceLocalAlloc(size_t size, bool atomics = false, bool pseudo_fine_grain=false, + bool contiguous = false) const; void memFree(void* ptr, size_t size) const; diff --git a/projects/clr/rocclr/device/rocm/rocmemory.cpp b/projects/clr/rocclr/device/rocm/rocmemory.cpp index 65e22c7fe3..8fba64ebc1 100644 --- a/projects/clr/rocclr/device/rocm/rocmemory.cpp +++ b/projects/clr/rocclr/device/rocm/rocmemory.cpp @@ -856,7 +856,8 @@ bool Buffer::create(bool alloc_local) { } else { assert(!isHostMemDirectAccess() && "Runtime doesn't support direct access to GPU memory!"); deviceMemory_ = dev().deviceLocalAlloc(size(), (memFlags & CL_MEM_SVM_ATOMICS) != 0, - (memFlags & ROCCLR_MEM_HSA_UNCACHED) != 0); + (memFlags & ROCCLR_MEM_HSA_UNCACHED) != 0, + (memFlags & ROCCLR_MEM_HSA_CONTIGUOUS) != 0); } owner()->setSvmPtr(deviceMemory_); } else { diff --git a/projects/clr/rocclr/platform/memory.hpp b/projects/clr/rocclr/platform/memory.hpp index 87f67cd927..daa06308ea 100644 --- a/projects/clr/rocclr/platform/memory.hpp +++ b/projects/clr/rocclr/platform/memory.hpp @@ -44,6 +44,7 @@ #define ROCCLR_MEM_HSA_UNCACHED (1u << 27) #define ROCCLR_MEM_INTERPROCESS (1u << 26) #define ROCCLR_MEM_PHYMEM (1u << 25) +#define ROCCLR_MEM_HSA_CONTIGUOUS (1u << 24) namespace device { class Memory;