SWDEV-456279 - Adding new hip flag to access contiguous memory and pass the flag to HSA API.
Change-Id: I1bafeaa3096395c729723af958d609bc41e7845c
[ROCm/clr commit: 1d48f2a1ab]
This commit is contained in:
کامیت شده توسط
Jaydeepkumar Patel
والد
af5aaa806f
کامیت
ab70925687
@@ -589,6 +589,8 @@ hipError_t hipExtMallocWithFlags(void** ptr, size_t sizeBytes, unsigned int flag
|
||||
ihipFlags = CL_MEM_SVM_ATOMICS;
|
||||
} else if (flags == hipDeviceMallocUncached) {
|
||||
ihipFlags = CL_MEM_SVM_ATOMICS | ROCCLR_MEM_HSA_UNCACHED;
|
||||
} else if (flags == hipDeviceMallocContiguous) {
|
||||
ihipFlags = ROCCLR_MEM_HSA_CONTIGUOUS | ROCCLR_MEM_HSA_UNCACHED;
|
||||
} else if (flags == hipMallocSignalMemory) {
|
||||
ihipFlags = CL_MEM_SVM_ATOMICS | CL_MEM_SVM_FINE_GRAIN_BUFFER | ROCCLR_MEM_HSA_SIGNAL_MEMORY;
|
||||
if (sizeBytes != 8) {
|
||||
|
||||
@@ -1772,7 +1772,7 @@ class Device : public RuntimeObject {
|
||||
}
|
||||
|
||||
virtual void* deviceLocalAlloc(size_t size, bool atomics = false,
|
||||
bool pseudo_fine_grain = false) const {
|
||||
bool pseudo_fine_grain = false, bool contiguous = false) const {
|
||||
ShouldNotCallThis();
|
||||
return NULL;
|
||||
}
|
||||
|
||||
@@ -2331,7 +2331,8 @@ void Device::deviceVmemRelease(uint64_t mem_handle) const {
|
||||
}
|
||||
}
|
||||
|
||||
void* Device::deviceLocalAlloc(size_t size, bool atomics, bool pseudo_fine_grain) const {
|
||||
void* Device::deviceLocalAlloc(size_t size, bool atomics, bool pseudo_fine_grain,
|
||||
bool contiguous) const {
|
||||
const hsa_amd_memory_pool_t& pool = (pseudo_fine_grain) ? gpu_ext_fine_grained_segment_
|
||||
: (atomics) ? gpu_fine_grained_segment_ : gpuvm_segment_;
|
||||
|
||||
@@ -2341,6 +2342,11 @@ void* Device::deviceLocalAlloc(size_t size, bool atomics, bool pseudo_fine_grain
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
uint32_t hsa_mem_flags = 0;
|
||||
if (contiguous) {
|
||||
hsa_mem_flags = HSA_AMD_MEMORY_POOL_CONTIGUOUS_FLAG;
|
||||
}
|
||||
|
||||
void* ptr = nullptr;
|
||||
hsa_status_t stat = hsa_amd_memory_pool_allocate(pool, size, 0, &ptr);
|
||||
ClPrint(amd::LOG_DEBUG, amd::LOG_MEM, "Allocate hsa device memory %p, size 0x%zx", ptr, size);
|
||||
|
||||
@@ -453,7 +453,8 @@ class Device : public NullDevice {
|
||||
bool allowPeerAccess(device::Memory* memory) const;
|
||||
void deviceVmemRelease(uint64_t mem_handle) const;
|
||||
uint64_t deviceVmemAlloc(size_t size, uint64_t flags) const;
|
||||
void* deviceLocalAlloc(size_t size, bool atomics = false, bool pseudo_fine_grain=false) const;
|
||||
void* deviceLocalAlloc(size_t size, bool atomics = false, bool pseudo_fine_grain=false,
|
||||
bool contiguous = false) const;
|
||||
|
||||
void memFree(void* ptr, size_t size) const;
|
||||
|
||||
|
||||
@@ -856,7 +856,8 @@ bool Buffer::create(bool alloc_local) {
|
||||
} else {
|
||||
assert(!isHostMemDirectAccess() && "Runtime doesn't support direct access to GPU memory!");
|
||||
deviceMemory_ = dev().deviceLocalAlloc(size(), (memFlags & CL_MEM_SVM_ATOMICS) != 0,
|
||||
(memFlags & ROCCLR_MEM_HSA_UNCACHED) != 0);
|
||||
(memFlags & ROCCLR_MEM_HSA_UNCACHED) != 0,
|
||||
(memFlags & ROCCLR_MEM_HSA_CONTIGUOUS) != 0);
|
||||
}
|
||||
owner()->setSvmPtr(deviceMemory_);
|
||||
} else {
|
||||
|
||||
@@ -44,6 +44,7 @@
|
||||
#define ROCCLR_MEM_HSA_UNCACHED (1u << 27)
|
||||
#define ROCCLR_MEM_INTERPROCESS (1u << 26)
|
||||
#define ROCCLR_MEM_PHYMEM (1u << 25)
|
||||
#define ROCCLR_MEM_HSA_CONTIGUOUS (1u << 24)
|
||||
|
||||
namespace device {
|
||||
class Memory;
|
||||
|
||||
مرجع در شماره جدید
Block a user