SWDEV-371904 - Adding pseudo fine grain flag to hsa memory allocation for device fine grained memory.
Change-Id: I8cada90f0e3880dfbc5bf5a3fac4554e7a0cb08e
[ROCm/clr commit: e56a611b92]
This commit is contained in:
committad av
Karthik Jayaprakash
förälder
9cd8e38967
incheckning
e295beb8ba
@@ -2081,8 +2081,10 @@ bool Device::allowPeerAccess(device::Memory* memory) const {
|
||||
return true;
|
||||
}
|
||||
|
||||
void* Device::deviceLocalAlloc(size_t size, bool atomics) const {
|
||||
const hsa_amd_memory_pool_t& pool = (atomics)? gpu_fine_grained_segment_ : gpuvm_segment_;
|
||||
void* Device::deviceLocalAlloc(size_t size, bool atomics, bool pseudo_fine_grain) const {
|
||||
const hsa_amd_memory_pool_t& pool = (atomics) ? gpu_fine_grained_segment_ : gpuvm_segment_;
|
||||
uint32_t hsa_mem_flags = (atomics && pseudo_fine_grain) ? HSA_AMD_MEMORY_POOL_PCIE_FLAG
|
||||
: HSA_AMD_MEMORY_POOL_STANDARD_FLAG;
|
||||
|
||||
if (pool.handle == 0 || gpuvm_segment_max_alloc_ == 0) {
|
||||
DevLogPrintfError("Invalid argument, pool_handle: 0x%x , max_alloc: %u \n",
|
||||
@@ -2091,7 +2093,7 @@ void* Device::deviceLocalAlloc(size_t size, bool atomics) const {
|
||||
}
|
||||
|
||||
void* ptr = nullptr;
|
||||
hsa_status_t stat = hsa_amd_memory_pool_allocate(pool, size, 0, &ptr);
|
||||
hsa_status_t stat = hsa_amd_memory_pool_allocate(pool, size, hsa_mem_flags, &ptr);
|
||||
ClPrint(amd::LOG_DEBUG, amd::LOG_MEM, "Allocate hsa device memory %p, size 0x%zx", ptr, size);
|
||||
if (stat != HSA_STATUS_SUCCESS) {
|
||||
LogError("Fail allocation local memory");
|
||||
|
||||
@@ -422,7 +422,7 @@ class Device : public NullDevice {
|
||||
|
||||
bool allowPeerAccess(device::Memory* memory) const;
|
||||
|
||||
void* deviceLocalAlloc(size_t size, bool atomics = false) const;
|
||||
void* deviceLocalAlloc(size_t size, bool atomics = false, bool pseudo_fine_grain=false) const;
|
||||
|
||||
void memFree(void* ptr, size_t size) const;
|
||||
|
||||
|
||||
@@ -796,7 +796,8 @@ bool Buffer::create(bool alloc_local) {
|
||||
}
|
||||
} else {
|
||||
assert(!isHostMemDirectAccess() && "Runtime doesn't support direct access to GPU memory!");
|
||||
deviceMemory_ = dev().deviceLocalAlloc(size(), (memFlags & CL_MEM_SVM_ATOMICS) != 0);
|
||||
deviceMemory_ = dev().deviceLocalAlloc(size(), (memFlags & CL_MEM_SVM_ATOMICS) != 0,
|
||||
(memFlags & ROCCLR_MEM_HSA_PSEUDO_FINE_GRAIN) != 0);
|
||||
}
|
||||
owner()->setSvmPtr(deviceMemory_);
|
||||
} else {
|
||||
|
||||
@@ -41,6 +41,7 @@
|
||||
#define ROCCLR_MEM_HSA_SIGNAL_MEMORY (1u << 30)
|
||||
#define ROCCLR_MEM_INTERNAL_MEMORY (1u << 29)
|
||||
#define CL_MEM_VA_RANGE_AMD (1u << 28)
|
||||
#define ROCCLR_MEM_HSA_PSEUDO_FINE_GRAIN (1u << 27)
|
||||
|
||||
namespace device {
|
||||
class Memory;
|
||||
|
||||
Referens i nytt ärende
Block a user