SWDEV-456279 - Adding new hip flag to access contiguous memory and pass the flag to HSA API.

Change-Id: I1bafeaa3096395c729723af958d609bc41e7845c


[ROCm/clr commit: 1d48f2a1ab]
This commit is contained in:
Jaydeep Patel
2024-04-12 08:58:46 +00:00
کامیت شده توسط Jaydeepkumar Patel
والد af5aaa806f
کامیت ab70925687
6فایلهای تغییر یافته به همراه15 افزوده شده و 4 حذف شده
@@ -589,6 +589,8 @@ hipError_t hipExtMallocWithFlags(void** ptr, size_t sizeBytes, unsigned int flag
ihipFlags = CL_MEM_SVM_ATOMICS;
} else if (flags == hipDeviceMallocUncached) {
ihipFlags = CL_MEM_SVM_ATOMICS | ROCCLR_MEM_HSA_UNCACHED;
} else if (flags == hipDeviceMallocContiguous) {
ihipFlags = ROCCLR_MEM_HSA_CONTIGUOUS | ROCCLR_MEM_HSA_UNCACHED;
} else if (flags == hipMallocSignalMemory) {
ihipFlags = CL_MEM_SVM_ATOMICS | CL_MEM_SVM_FINE_GRAIN_BUFFER | ROCCLR_MEM_HSA_SIGNAL_MEMORY;
if (sizeBytes != 8) {
@@ -1772,7 +1772,7 @@ class Device : public RuntimeObject {
}
virtual void* deviceLocalAlloc(size_t size, bool atomics = false,
bool pseudo_fine_grain = false) const {
bool pseudo_fine_grain = false, bool contiguous = false) const {
ShouldNotCallThis();
return NULL;
}
@@ -2331,7 +2331,8 @@ void Device::deviceVmemRelease(uint64_t mem_handle) const {
}
}
void* Device::deviceLocalAlloc(size_t size, bool atomics, bool pseudo_fine_grain) const {
void* Device::deviceLocalAlloc(size_t size, bool atomics, bool pseudo_fine_grain,
bool contiguous) const {
const hsa_amd_memory_pool_t& pool = (pseudo_fine_grain) ? gpu_ext_fine_grained_segment_
: (atomics) ? gpu_fine_grained_segment_ : gpuvm_segment_;
@@ -2341,6 +2342,11 @@ void* Device::deviceLocalAlloc(size_t size, bool atomics, bool pseudo_fine_grain
return nullptr;
}
uint32_t hsa_mem_flags = 0;
if (contiguous) {
hsa_mem_flags = HSA_AMD_MEMORY_POOL_CONTIGUOUS_FLAG;
}
void* ptr = nullptr;
hsa_status_t stat = hsa_amd_memory_pool_allocate(pool, size, 0, &ptr);
ClPrint(amd::LOG_DEBUG, amd::LOG_MEM, "Allocate hsa device memory %p, size 0x%zx", ptr, size);
@@ -453,7 +453,8 @@ class Device : public NullDevice {
bool allowPeerAccess(device::Memory* memory) const;
void deviceVmemRelease(uint64_t mem_handle) const;
uint64_t deviceVmemAlloc(size_t size, uint64_t flags) const;
void* deviceLocalAlloc(size_t size, bool atomics = false, bool pseudo_fine_grain=false) const;
void* deviceLocalAlloc(size_t size, bool atomics = false, bool pseudo_fine_grain=false,
bool contiguous = false) const;
void memFree(void* ptr, size_t size) const;
@@ -856,7 +856,8 @@ bool Buffer::create(bool alloc_local) {
} else {
assert(!isHostMemDirectAccess() && "Runtime doesn't support direct access to GPU memory!");
deviceMemory_ = dev().deviceLocalAlloc(size(), (memFlags & CL_MEM_SVM_ATOMICS) != 0,
(memFlags & ROCCLR_MEM_HSA_UNCACHED) != 0);
(memFlags & ROCCLR_MEM_HSA_UNCACHED) != 0,
(memFlags & ROCCLR_MEM_HSA_CONTIGUOUS) != 0);
}
owner()->setSvmPtr(deviceMemory_);
} else {
@@ -44,6 +44,7 @@
#define ROCCLR_MEM_HSA_UNCACHED (1u << 27)
#define ROCCLR_MEM_INTERPROCESS (1u << 26)
#define ROCCLR_MEM_PHYMEM (1u << 25)
#define ROCCLR_MEM_HSA_CONTIGUOUS (1u << 24)
namespace device {
class Memory;