diff --git a/rocclr/device/pal/paldevice.cpp b/rocclr/device/pal/paldevice.cpp index c6146a3940..65a104025a 100644 --- a/rocclr/device/pal/paldevice.cpp +++ b/rocclr/device/pal/paldevice.cpp @@ -2300,12 +2300,10 @@ void Device::svmFree(void* ptr) const { } } -void* Device::virtualAlloc(void* addr, size_t size, size_t alignment) -{ - amd::Memory* mem = nullptr; - +// ================================================================================================ +void* Device::virtualAlloc(void* addr, size_t size, size_t alignment) { // create a hidden buffer, which will allocated on the device later - mem = new (context()) amd::Buffer(context(), CL_MEM_VA_RANGE_AMD, size, addr); + auto mem = new (context()) amd::Buffer(context(), CL_MEM_VA_RANGE_AMD, size, addr); if (mem == nullptr) { LogError("failed to new a va range mem object!"); return nullptr; @@ -2316,24 +2314,19 @@ void* Device::virtualAlloc(void* addr, size_t size, size_t alignment) mem->release(); return nullptr; } - // if the device supports SVM FGS, return the committed CPU address directly. - pal::Memory* gpuMem = getGpuMemory(mem); - amd::MemObjMap::AddVirtualMemObj(mem->getSvmPtr(), mem); - void* svmPtr = mem->getSvmPtr(); - - return svmPtr; + return mem->getSvmPtr(); } -void Device::virtualFree(void* addr) -{ - amd::Memory* va = amd::MemObjMap::FindVirtualMemObj(addr); - if (nullptr != va && (va->getMemFlags() & CL_MEM_VA_RANGE_AMD)) { +// ================================================================================================ +void Device::virtualFree(void* addr) { + auto va = amd::MemObjMap::FindVirtualMemObj(addr); + if (nullptr != va) { va->release(); - amd::MemObjMap::RemoveVirtualMemObj(addr); } } +// ================================================================================================ bool Device::AcquireExclusiveGpuAccess() { // Lock the virtual GPU list vgpusAccess().lock(); diff --git a/rocclr/device/pal/palvirtual.cpp b/rocclr/device/pal/palvirtual.cpp index 4b899748a5..e80e22298f 100644 --- a/rocclr/device/pal/palvirtual.cpp +++ b/rocclr/device/pal/palvirtual.cpp @@ -2190,8 +2190,7 @@ void VirtualGPU::submitStreamOperation(amd::StreamOperationCommand& cmd) { profilingEnd(cmd); } - - +// ================================================================================================ void VirtualGPU::submitVirtualMap(amd::VirtualMapCommand& vcmd) { // Make sure VirtualGPU has an exclusive access to the resources amd::ScopedLock lock(execution()); @@ -2203,7 +2202,8 @@ void VirtualGPU::submitVirtualMap(amd::VirtualMapCommand& vcmd) { return; } pal::Memory* vaRange = dev().getGpuMemory(va); - Pal::IGpuMemory* memory = (vcmd.memory() == nullptr)? nullptr : dev().getGpuMemory(vcmd.memory())->iMem(); + Pal::IGpuMemory* memory = (vcmd.memory() == nullptr) ? + nullptr : dev().getGpuMemory(vcmd.memory())->iMem(); Pal::VirtualMemoryRemapRange range{ vaRange->iMem(), 0, @@ -2212,7 +2212,12 @@ void VirtualGPU::submitVirtualMap(amd::VirtualMapCommand& vcmd) { vcmd.size(), Pal::VirtualGpuMemAccessMode::NoAccess }; - Pal::Result result = queue(MainEngine).iQueue_->RemapVirtualMemoryPages(1, &range, false, nullptr); + eventBegin(MainEngine); + auto result = queue(MainEngine).iQueue_->RemapVirtualMemoryPages(1, &range, false, nullptr); + // Capture GPU event for the paging operation + GpuEvent event; + eventEnd(MainEngine, event); + setGpuEvent(event); if (result == Pal::Result::Success) { if (vcmd.memory() != nullptr) { // assert the va wasn't mapped already diff --git a/rocclr/platform/memory.cpp b/rocclr/platform/memory.cpp index cea1098b92..b95d731484 100644 --- a/rocclr/platform/memory.cpp +++ b/rocclr/platform/memory.cpp @@ -260,6 +260,7 @@ bool Memory::allocHostMemory(void* initFrom, bool allocHostMem, bool forceCopy) return true; } +// ================================================================================================ bool Memory::create(void* initFrom, bool sysMemAlloc, bool skipAlloc, bool forceAlloc) { static const bool forceAllocHostMem = false; @@ -310,12 +311,16 @@ bool Memory::create(void* initFrom, bool sysMemAlloc, bool skipAlloc, bool force } } } - + // Add a VA range into VA range map + if (getMemFlags() & CL_MEM_VA_RANGE_AMD) { + amd::MemObjMap::AddVirtualMemObj(getSvmPtr(), this); + } // Store the unique id for each memory allocation uniqueId_ = ++numAllocs; return true; } +// ================================================================================================ bool Memory::addDeviceMemory(const Device* dev) { bool result = false; AllocState create = AllocCreate; @@ -401,6 +406,7 @@ device::Memory* Memory::getDeviceMemory(const Device& dev, bool alloc) { return dm; } +// ================================================================================================ Memory::~Memory() { // For_each destructor callback: DestructorCallBackEntry* entry; @@ -443,8 +449,13 @@ Memory::~Memory() { parent_->release(); } hostMemRef_.deallocateMemory(context_()); + + if (getMemFlags() & CL_MEM_VA_RANGE_AMD) { + amd::MemObjMap::RemoveVirtualMemObj(getSvmPtr()); + } } +// ================================================================================================ bool Memory::setDestructorCallback(DestructorCallBackFunction callback, void* data) { DestructorCallBackEntry* entry = new DestructorCallBackEntry(callback, data); if (entry == NULL) {