SWDEV-423835 - Fixing kernel launch issues on Virtual Memory Management path.
Change-Id: I9f5e8a3d83af3809b2c50b21a10697e26113dd23
This commit is contained in:
کامیت شده توسط
Karthik Jayaprakash
والد
dd43dc930d
کامیت
f5ca620baa
@@ -2104,13 +2104,8 @@ class GraphMemAllocNode final : public GraphNode {
|
||||
// Retain memory object because command release will release it
|
||||
memory_->retain();
|
||||
size_ = aligned_size;
|
||||
// Save geenric allocation info to match VM interfaces
|
||||
memory_->getUserData().data = new hip::MemMapAllocUserData(dptr, aligned_size, va_);
|
||||
// Execute the original mapping command
|
||||
VirtualMapCommand::submit(device);
|
||||
// Update the internal svm address to ptr
|
||||
memory()->setSvmPtr(va_->getSvmPtr());
|
||||
// Can't destroy VA, because it's used in mapping even if the node will be destroyed
|
||||
va_->retain();
|
||||
ClPrint(amd::LOG_INFO, amd::LOG_MEM_POOL, "Graph MemAlloc execute: %p, %p",
|
||||
va_->getSvmPtr(), memory());
|
||||
@@ -2234,24 +2229,21 @@ class GraphMemFreeNode : public GraphNode {
|
||||
|
||||
virtual void submit(device::VirtualDevice& device) final {
|
||||
// Find memory object before unmap logic
|
||||
auto alloc = amd::MemObjMap::FindMemObj(ptr());
|
||||
auto vaddr_mem_obj = amd::MemObjMap::FindMemObj(ptr());
|
||||
amd::Memory* phys_mem_obj = vaddr_mem_obj->getUserData().phys_mem_obj;
|
||||
assert(phys_mem_obj != nullptr);
|
||||
VirtualMapCommand::submit(device);
|
||||
// Restore the original address of the generic allocation
|
||||
auto ga = reinterpret_cast<hip::MemMapAllocUserData*>(alloc->getUserData().data);
|
||||
alloc->setSvmPtr(ga->ptr_);
|
||||
if (!AMD_DIRECT_DISPATCH) {
|
||||
// Update the current device, since hip event, used in mem pools, requires device
|
||||
hip::setCurrentDevice(device_id_);
|
||||
}
|
||||
// Free virtual address
|
||||
ga->va_->release();
|
||||
alloc->getUserData().data = nullptr;
|
||||
vaddr_mem_obj->release();
|
||||
// Release the allocation back to graph's pool
|
||||
graph_->FreeMemory(ga->ptr_, static_cast<hip::Stream*>(queue()));
|
||||
amd::MemObjMap::AddMemObj(ptr(), ga->va_);
|
||||
delete ga;
|
||||
graph_->FreeMemory(phys_mem_obj->getSvmPtr(), static_cast<hip::Stream*>(queue()));
|
||||
amd::MemObjMap::AddMemObj(ptr(), vaddr_mem_obj);
|
||||
ClPrint(amd::LOG_INFO, amd::LOG_MEM_POOL, "Graph MemFree execute: %p, %p",
|
||||
ptr(), alloc);
|
||||
ptr(), vaddr_mem_obj);
|
||||
}
|
||||
|
||||
private:
|
||||
|
||||
@@ -225,6 +225,10 @@ bool MemoryPool::FreeMemory(amd::Memory* memory, Stream* stream, Event* event) {
|
||||
{
|
||||
amd::ScopedLock lock(lock_pool_ops_);
|
||||
|
||||
if (memory->getUserData().phys_mem_obj != nullptr) {
|
||||
memory = memory->getUserData().phys_mem_obj;
|
||||
}
|
||||
|
||||
// If the free heap grows over the busy heap, then force release
|
||||
if (AMD_DIRECT_DISPATCH && (free_heap_.GetTotalSize() > busy_heap_.GetTotalSize())) {
|
||||
// Use event base release to reduce memory pressure
|
||||
@@ -249,22 +253,14 @@ bool MemoryPool::FreeMemory(amd::Memory* memory, Stream* stream, Event* event) {
|
||||
}
|
||||
ClPrint(amd::LOG_INFO, amd::LOG_MEM_POOL, "Pool FreeMem: %p, %p", memory->getSvmPtr(), memory);
|
||||
|
||||
auto ga = reinterpret_cast<hip::MemMapAllocUserData*>(memory->getUserData().data);
|
||||
if (ga != nullptr) {
|
||||
if (stream == nullptr) {
|
||||
if (stream == nullptr) {
|
||||
stream = g_devices[memory->getUserData().deviceId]->NullStream();
|
||||
}
|
||||
// Unmap virtual address from memory
|
||||
auto cmd = new amd::VirtualMapCommand(*stream, amd::Command::EventWaitList{},
|
||||
memory->getSvmPtr(), ga->size_, nullptr);
|
||||
cmd->enqueue();
|
||||
cmd->release();
|
||||
memory->setSvmPtr(ga->ptr_);
|
||||
// Free virtual address and destroy generic allocation object
|
||||
ga->va_->release();
|
||||
delete ga;
|
||||
memory->getUserData().data = nullptr;
|
||||
}
|
||||
// Unmap virtual address from memory
|
||||
auto cmd = new amd::VirtualMapCommand(*stream, amd::Command::EventWaitList{},
|
||||
memory->getSvmPtr(), memory->getSize(), nullptr);
|
||||
cmd->enqueue();
|
||||
cmd->release();
|
||||
|
||||
if (stream != nullptr) {
|
||||
// The stream of destruction is a safe stream, because the app must handle sync
|
||||
|
||||
+17
-16
@@ -120,11 +120,15 @@ hipError_t hipMemCreate(hipMemGenericAllocationHandle_t* handle, size_t size,
|
||||
|
||||
// Add this to amd::Memory object, so this ptr is accesible for other hipmemory operations.
|
||||
size_t offset = 0; //this is ignored
|
||||
amd::Memory* memObj = getMemoryObject(ptr, offset);
|
||||
amd::Memory* phys_mem_obj = getMemoryObject(ptr, offset);
|
||||
//saves the current device id so that it can be accessed later
|
||||
memObj->getUserData().deviceId = prop->location.id;
|
||||
memObj->getUserData().data = new hip::GenericAllocation(ptr, size, *prop);
|
||||
*handle = reinterpret_cast<hipMemGenericAllocationHandle_t>(memObj->getUserData().data);
|
||||
phys_mem_obj->getUserData().deviceId = prop->location.id;
|
||||
phys_mem_obj->getUserData().data = new hip::GenericAllocation(*phys_mem_obj, size, *prop);
|
||||
*handle = reinterpret_cast<hipMemGenericAllocationHandle_t>(phys_mem_obj->getUserData().data);
|
||||
|
||||
// Remove because the entry of 0x1 is not needed in MemObjMap.
|
||||
// We save the copy of Phy mem obj in virtual mem obj during mapping.
|
||||
amd::MemObjMap::RemoveMemObj(ptr);
|
||||
|
||||
HIP_RETURN(hipSuccess);
|
||||
}
|
||||
@@ -225,9 +229,6 @@ hipError_t hipMemMap(void* ptr, size_t size, size_t offset, hipMemGenericAllocat
|
||||
cmd->awaitCompletion();
|
||||
cmd->release();
|
||||
|
||||
// update the internal svm address to ptr
|
||||
ga->asAmdMemory().setSvmPtr(ptr);
|
||||
|
||||
HIP_RETURN(hipSuccess);
|
||||
}
|
||||
|
||||
@@ -268,7 +269,8 @@ hipError_t hipMemRetainAllocationHandle(hipMemGenericAllocationHandle_t* handle,
|
||||
HIP_RETURN(hipErrorInvalidValue);
|
||||
}
|
||||
|
||||
*handle = reinterpret_cast<hipMemGenericAllocationHandle_t>(mem->getUserData().data);
|
||||
*handle = reinterpret_cast<hipMemGenericAllocationHandle_t>(
|
||||
mem->getUserData().phys_mem_obj->getUserData().data);
|
||||
|
||||
if (*handle == nullptr) {
|
||||
HIP_RETURN(hipErrorInvalidValue);
|
||||
@@ -312,17 +314,17 @@ hipError_t hipMemUnmap(void* ptr, size_t size) {
|
||||
HIP_RETURN(hipErrorInvalidValue);
|
||||
}
|
||||
|
||||
amd::Memory* pa = amd::MemObjMap::FindMemObj(ptr);
|
||||
if (pa == nullptr) {
|
||||
amd::Memory* vaddr_mem_obj = amd::MemObjMap::FindVirtualMemObj(ptr);
|
||||
if (vaddr_mem_obj == nullptr && vaddr_mem_obj->getSize() != size) {
|
||||
HIP_RETURN(hipErrorInvalidValue);
|
||||
}
|
||||
|
||||
amd::Memory* va = amd::MemObjMap::FindVirtualMemObj(ptr);
|
||||
if (va == nullptr && va->getSize() != size) {
|
||||
amd::Memory* phys_mem_obj = vaddr_mem_obj->getUserData().phys_mem_obj;
|
||||
if (phys_mem_obj == nullptr) {
|
||||
HIP_RETURN(hipErrorInvalidValue);
|
||||
}
|
||||
|
||||
auto& queue = *g_devices[pa->getUserData().deviceId]->NullStream();
|
||||
auto& queue = *g_devices[phys_mem_obj->getUserData().deviceId]->NullStream();
|
||||
|
||||
amd::Command* cmd = new amd::VirtualMapCommand(queue, amd::Command::EventWaitList{}, ptr, size,
|
||||
nullptr);
|
||||
@@ -331,9 +333,8 @@ hipError_t hipMemUnmap(void* ptr, size_t size) {
|
||||
cmd->release();
|
||||
|
||||
// restore the original pa of the generic allocation
|
||||
hip::GenericAllocation* ga = reinterpret_cast<hip::GenericAllocation*>(pa->getUserData().data);
|
||||
pa->setSvmPtr(ga->genericAddress());
|
||||
|
||||
hip::GenericAllocation* ga
|
||||
= reinterpret_cast<hip::GenericAllocation*>(phys_mem_obj->getUserData().data);
|
||||
ga->release();
|
||||
|
||||
HIP_RETURN(hipSuccess);
|
||||
|
||||
@@ -30,35 +30,23 @@ namespace hip {
|
||||
|
||||
hipError_t ihipFree(void* ptr);
|
||||
|
||||
struct MemMapAllocUserData {
|
||||
void* ptr_; // Original pointer of the allocation
|
||||
size_t size_; // Aligned size of the allocation
|
||||
amd::Memory* va_; // Memory object for the virtual address
|
||||
|
||||
MemMapAllocUserData(void* ptr, size_t size, amd::Memory* va) : ptr_(ptr), size_(size), va_(va) {}
|
||||
};
|
||||
|
||||
class GenericAllocation : public amd::RuntimeObject {
|
||||
void* ptr_; //<! Device ptr
|
||||
amd::Memory& phys_mem_ref_; //<! Physical memory object
|
||||
size_t size_; //<! Allocated size
|
||||
hipMemAllocationProp properties_; //<! Allocation Properties
|
||||
|
||||
public:
|
||||
GenericAllocation(void* ptr, size_t size, const hipMemAllocationProp& prop)
|
||||
: ptr_(ptr), size_(size), properties_(prop) {}
|
||||
~GenericAllocation() {
|
||||
hipError_t err = ihipFree(ptr_);
|
||||
}
|
||||
GenericAllocation(amd::Memory& phys_mem_ref, size_t size, const hipMemAllocationProp& prop)
|
||||
: phys_mem_ref_(phys_mem_ref), size_(size), properties_(prop) {}
|
||||
~GenericAllocation() {}
|
||||
|
||||
const hipMemAllocationProp& GetProperties() const { return properties_; }
|
||||
hipMemGenericAllocationHandle_t asMemGenericAllocationHandle() {
|
||||
return reinterpret_cast<hipMemGenericAllocationHandle_t>(this);
|
||||
}
|
||||
amd::Memory& asAmdMemory() {
|
||||
size_t discardOffset;
|
||||
return *getMemoryObject(genericAddress(), discardOffset);
|
||||
return phys_mem_ref_;
|
||||
}
|
||||
void* genericAddress() const { return ptr_; }
|
||||
|
||||
virtual ObjectType objectType() const { return ObjectTypeVMMAlloc; }
|
||||
};
|
||||
|
||||
@@ -2192,18 +2192,18 @@ void VirtualGPU::submitVirtualMap(amd::VirtualMapCommand& vcmd) {
|
||||
amd::ScopedLock lock(execution());
|
||||
|
||||
profilingBegin(vcmd);
|
||||
amd::Memory* va = amd::MemObjMap::FindVirtualMemObj(vcmd.ptr());
|
||||
if (va == nullptr || !(va->getMemFlags() & CL_MEM_VA_RANGE_AMD)) {
|
||||
amd::Memory* vaddr_mem_obj = amd::MemObjMap::FindVirtualMemObj(vcmd.ptr());
|
||||
if (vaddr_mem_obj == nullptr || !(vaddr_mem_obj->getMemFlags() & CL_MEM_VA_RANGE_AMD)) {
|
||||
profilingEnd(vcmd);
|
||||
return;
|
||||
}
|
||||
pal::Memory* vaRange = dev().getGpuMemory(va);
|
||||
Pal::IGpuMemory* memory = (vcmd.memory() == nullptr) ?
|
||||
pal::Memory* vaddr_pal_mem = dev().getGpuMemory(vaddr_mem_obj);
|
||||
Pal::IGpuMemory* phymem_igpu_mem = (vcmd.memory() == nullptr) ?
|
||||
nullptr : dev().getGpuMemory(vcmd.memory())->iMem();
|
||||
Pal::VirtualMemoryRemapRange range{
|
||||
vaRange->iMem(),
|
||||
vaddr_pal_mem->iMem(),
|
||||
0,
|
||||
memory,
|
||||
phymem_igpu_mem,
|
||||
0,
|
||||
vcmd.size(),
|
||||
Pal::VirtualGpuMemAccessMode::NoAccess
|
||||
@@ -2224,13 +2224,15 @@ void VirtualGPU::submitVirtualMap(amd::VirtualMapCommand& vcmd) {
|
||||
setGpuEvent(event);
|
||||
if (result == Pal::Result::Success) {
|
||||
if (vcmd.memory() != nullptr) {
|
||||
// assert the va wasn't mapped already
|
||||
// assert the vaddr_mem_obj wasn't mapped already
|
||||
assert(amd::MemObjMap::FindMemObj(vcmd.ptr()) == nullptr);
|
||||
amd::MemObjMap::AddMemObj(vcmd.ptr(), vcmd.memory());
|
||||
amd::MemObjMap::AddMemObj(vcmd.ptr(), vaddr_mem_obj);
|
||||
vaddr_mem_obj->getUserData().phys_mem_obj = vcmd.memory();
|
||||
} else {
|
||||
// assert the va is mapped and needs to be removed
|
||||
// assert the vaddr_mem_obj is mapped and needs to be removed
|
||||
assert(amd::MemObjMap::FindMemObj(vcmd.ptr()) != nullptr);
|
||||
amd::MemObjMap::RemoveMemObj(vcmd.ptr());
|
||||
vaddr_mem_obj->getUserData().phys_mem_obj = nullptr;
|
||||
}
|
||||
}
|
||||
profilingEnd(vcmd);
|
||||
|
||||
@@ -2301,6 +2301,16 @@ uint64_t Device::deviceVmemAlloc(size_t size, uint64_t flags) const {
|
||||
return hsa_vmem_handle.handle;
|
||||
}
|
||||
|
||||
void Device::deviceVmemRelease(uint64_t mem_handle) const {
|
||||
hsa_amd_vmem_alloc_handle_t hsa_vmem_handle {};
|
||||
hsa_vmem_handle.handle = mem_handle;
|
||||
|
||||
hsa_status_t hsa_status = hsa_amd_vmem_handle_release(hsa_vmem_handle);
|
||||
if (hsa_status != HSA_STATUS_SUCCESS) {
|
||||
LogPrintfError("Failed hsa_amd_vmem_handle_release! Failed with hsa status: %d \n", hsa_status);
|
||||
}
|
||||
}
|
||||
|
||||
void* Device::deviceLocalAlloc(size_t size, bool atomics, bool pseudo_fine_grain) const {
|
||||
const hsa_amd_memory_pool_t& pool = (pseudo_fine_grain) ? gpu_ext_fine_grained_segment_
|
||||
: (atomics) ? gpu_fine_grained_segment_ : gpuvm_segment_;
|
||||
@@ -2381,7 +2391,7 @@ void* Device::svmAlloc(amd::Context& context, size_t size, size_t alignment, cl_
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
if (mem->getSvmPtr() != nullptr) {
|
||||
if (mem->getSvmPtr() != nullptr || mem->getMemFlags() & ROCCLR_MEM_PHYMEM) {
|
||||
// add the information to context so that we can use it later.
|
||||
amd::MemObjMap::AddMemObj(mem->getSvmPtr(), mem);
|
||||
}
|
||||
|
||||
@@ -450,6 +450,7 @@ class Device : public NullDevice {
|
||||
bool deviceAllowAccess(void* dst) const;
|
||||
|
||||
bool allowPeerAccess(device::Memory* memory) const;
|
||||
void deviceVmemRelease(uint64_t mem_handle) const;
|
||||
uint64_t deviceVmemAlloc(size_t size, uint64_t flags) const;
|
||||
void* deviceLocalAlloc(size_t size, bool atomics = false, bool pseudo_fine_grain=false) const;
|
||||
|
||||
|
||||
@@ -648,6 +648,12 @@ void Buffer::destroy() {
|
||||
}
|
||||
const bool isFineGrain = memFlags & CL_MEM_SVM_FINE_GRAIN_BUFFER;
|
||||
|
||||
if (memFlags & ROCCLR_MEM_PHYMEM) {
|
||||
// If this is physical memory, dont call hsa free function, since device mem was never created
|
||||
dev().deviceVmemRelease(owner()->getUserData().hsa_handle);
|
||||
return;
|
||||
}
|
||||
|
||||
if (kind_ != MEMORY_KIND_PTRGIVEN) {
|
||||
if (isFineGrain) {
|
||||
if (memFlags & CL_MEM_ALLOC_HOST_PTR) {
|
||||
@@ -767,7 +773,10 @@ bool Buffer::create(bool alloc_local) {
|
||||
owner()->getUserData().hsa_handle = dev().deviceVmemAlloc(owner()->getSize(), 0);
|
||||
if (owner()->getUserData().hsa_handle == 0) {
|
||||
LogError("HSA Opaque Handle returned was null");
|
||||
return false;
|
||||
}
|
||||
deviceMemory_ = reinterpret_cast<void*>(amd::Memory::MemoryType::kPhyMemHandlePtr);
|
||||
return true;
|
||||
}
|
||||
|
||||
if ((owner()->parent() == nullptr) &&
|
||||
|
||||
@@ -2589,36 +2589,39 @@ void VirtualGPU::submitVirtualMap(amd::VirtualMapCommand& vcmd) {
|
||||
|
||||
profilingBegin(vcmd);
|
||||
|
||||
// Find the amd::Memory object for virtual ptr.
|
||||
amd::Memory* va = amd::MemObjMap::FindVirtualMemObj(vcmd.ptr());
|
||||
if (va == nullptr || !(va->getMemFlags() & CL_MEM_VA_RANGE_AMD)) {
|
||||
// Find the amd::Memory object for virtual ptr. vcmd.ptr() is vaddr.
|
||||
amd::Memory* vaddr_mem_obj = amd::MemObjMap::FindVirtualMemObj(vcmd.ptr());
|
||||
if (vaddr_mem_obj == nullptr || !(vaddr_mem_obj->getMemFlags() & CL_MEM_VA_RANGE_AMD)) {
|
||||
profilingEnd(vcmd);
|
||||
return;
|
||||
}
|
||||
|
||||
// Get the amd::Memory object for the physical address
|
||||
amd::Memory* pa = vcmd.memory();
|
||||
amd::Memory* phys_mem_obj = vcmd.memory();
|
||||
hsa_status_t hsa_status = HSA_STATUS_SUCCESS;
|
||||
|
||||
// If Physical address is not set, then it is map command. If set, it is unmap command.
|
||||
if (pa != nullptr) {
|
||||
if (phys_mem_obj != nullptr) {
|
||||
// Map the physical to virtual address the hsa api
|
||||
hsa_amd_vmem_alloc_handle_t opaque_hsa_handle;
|
||||
opaque_hsa_handle.handle = pa->getUserData().hsa_handle;
|
||||
if ((hsa_status = hsa_amd_vmem_map(va->getSvmPtr(), va->getSize(), va->getOffset(),
|
||||
opaque_hsa_handle, 0)) == HSA_STATUS_SUCCESS) {
|
||||
opaque_hsa_handle.handle = phys_mem_obj->getUserData().hsa_handle;
|
||||
if ((hsa_status = hsa_amd_vmem_map(vaddr_mem_obj->getSvmPtr(), vcmd.size(),
|
||||
vaddr_mem_obj->getOffset(), opaque_hsa_handle, 0)) == HSA_STATUS_SUCCESS) {
|
||||
assert(amd::MemObjMap::FindMemObj(vcmd.ptr()) == nullptr);
|
||||
// Now that we have mapped physical addr to virtual addr, make an entry in the MemObjMap.
|
||||
amd::MemObjMap::AddMemObj(vcmd.ptr(), vcmd.memory());
|
||||
amd::MemObjMap::AddMemObj(vcmd.ptr(), vaddr_mem_obj);
|
||||
vaddr_mem_obj->getUserData().phys_mem_obj = phys_mem_obj;
|
||||
} else {
|
||||
LogError("HSA Command: hsa_amd_vmem_map failed!");
|
||||
}
|
||||
} else {
|
||||
// Unmap the object, since the physical addr is set.
|
||||
if ((hsa_status = hsa_amd_vmem_unmap(va->getSvmPtr(), va->getSize())) == HSA_STATUS_SUCCESS) {
|
||||
if ((hsa_status = hsa_amd_vmem_unmap(vaddr_mem_obj->getSvmPtr(), vcmd.size()))
|
||||
== HSA_STATUS_SUCCESS) {
|
||||
// assert the va is mapped and needs to be removed
|
||||
assert(amd::MemObjMap::FindMemObj(vcmd.ptr()) != nullptr);
|
||||
amd::MemObjMap::RemoveMemObj(vcmd.ptr());
|
||||
vaddr_mem_obj->getUserData().phys_mem_obj = nullptr;
|
||||
} else {
|
||||
LogError("HSA Command: hsa_amd_vmem_unmap failed");
|
||||
}
|
||||
|
||||
@@ -142,13 +142,15 @@ class Memory : public amd::RuntimeObject {
|
||||
public:
|
||||
enum MemoryType {
|
||||
kSvmMemoryPtr = 0x1,
|
||||
kArenaMemoryPtr = 0x100
|
||||
kArenaMemoryPtr = 0x100,
|
||||
kPhyMemHandlePtr = 0x101
|
||||
};
|
||||
|
||||
struct UserData
|
||||
{
|
||||
int deviceId = 0; //!< Device ID memory is allocated on
|
||||
void* data = nullptr; //!< Opaque user data from CL or HIP or etc.
|
||||
amd::Memory* phys_mem_obj = nullptr; //<! Physical mem obj, only set on virtual mem
|
||||
uint64_t hsa_handle = 0; //!<Opaque hsa handle saved for Virtual memories
|
||||
unsigned int flags = 0; //!< HIP memory flags
|
||||
//! hipMallocPitch allocates buffer using width & height and returns pitch & device pointer.
|
||||
|
||||
مرجع در شماره جدید
Block a user