SWDEV-371904 - Adding pseudo fine grain flag to hsa memory allocation for device fine grained memory.

Change-Id: I8cada90f0e3880dfbc5bf5a3fac4554e7a0cb08e


[ROCm/clr commit: e56a611b92]
This commit is contained in:
kjayapra-amd
2022-12-05 13:49:41 -08:00
committad av Karthik Jayaprakash
förälder 9cd8e38967
incheckning e295beb8ba
4 ändrade filer med 9 tillägg och 5 borttagningar
+5 -3
Visa fil
@@ -2081,8 +2081,10 @@ bool Device::allowPeerAccess(device::Memory* memory) const {
return true;
}
void* Device::deviceLocalAlloc(size_t size, bool atomics) const {
const hsa_amd_memory_pool_t& pool = (atomics)? gpu_fine_grained_segment_ : gpuvm_segment_;
void* Device::deviceLocalAlloc(size_t size, bool atomics, bool pseudo_fine_grain) const {
const hsa_amd_memory_pool_t& pool = (atomics) ? gpu_fine_grained_segment_ : gpuvm_segment_;
uint32_t hsa_mem_flags = (atomics && pseudo_fine_grain) ? HSA_AMD_MEMORY_POOL_PCIE_FLAG
: HSA_AMD_MEMORY_POOL_STANDARD_FLAG;
if (pool.handle == 0 || gpuvm_segment_max_alloc_ == 0) {
DevLogPrintfError("Invalid argument, pool_handle: 0x%x , max_alloc: %u \n",
@@ -2091,7 +2093,7 @@ void* Device::deviceLocalAlloc(size_t size, bool atomics) const {
}
void* ptr = nullptr;
hsa_status_t stat = hsa_amd_memory_pool_allocate(pool, size, 0, &ptr);
hsa_status_t stat = hsa_amd_memory_pool_allocate(pool, size, hsa_mem_flags, &ptr);
ClPrint(amd::LOG_DEBUG, amd::LOG_MEM, "Allocate hsa device memory %p, size 0x%zx", ptr, size);
if (stat != HSA_STATUS_SUCCESS) {
LogError("Fail allocation local memory");
+1 -1
Visa fil
@@ -422,7 +422,7 @@ class Device : public NullDevice {
bool allowPeerAccess(device::Memory* memory) const;
void* deviceLocalAlloc(size_t size, bool atomics = false) const;
void* deviceLocalAlloc(size_t size, bool atomics = false, bool pseudo_fine_grain=false) const;
void memFree(void* ptr, size_t size) const;
+2 -1
Visa fil
@@ -796,7 +796,8 @@ bool Buffer::create(bool alloc_local) {
}
} else {
assert(!isHostMemDirectAccess() && "Runtime doesn't support direct access to GPU memory!");
deviceMemory_ = dev().deviceLocalAlloc(size(), (memFlags & CL_MEM_SVM_ATOMICS) != 0);
deviceMemory_ = dev().deviceLocalAlloc(size(), (memFlags & CL_MEM_SVM_ATOMICS) != 0,
(memFlags & ROCCLR_MEM_HSA_PSEUDO_FINE_GRAIN) != 0);
}
owner()->setSvmPtr(deviceMemory_);
} else {
+1
Visa fil
@@ -41,6 +41,7 @@
#define ROCCLR_MEM_HSA_SIGNAL_MEMORY (1u << 30)
#define ROCCLR_MEM_INTERNAL_MEMORY (1u << 29)
#define CL_MEM_VA_RANGE_AMD (1u << 28)
#define ROCCLR_MEM_HSA_PSEUDO_FINE_GRAIN (1u << 27)
namespace device {
class Memory;