diff --git a/projects/clr/rocclr/device/device.cpp b/projects/clr/rocclr/device/device.cpp index c5791b49e7..bdc7e1e73a 100644 --- a/projects/clr/rocclr/device/device.cpp +++ b/projects/clr/rocclr/device/device.cpp @@ -264,6 +264,7 @@ Memory* Device::p2p_stage_ = nullptr; Monitor MemObjMap::AllocatedLock_ ROCCLR_INIT_PRIORITY(101) ("Guards MemObjMap allocation list"); std::map MemObjMap::MemObjMap_ ROCCLR_INIT_PRIORITY(101); +std::map MemObjMap::VirtualMemObjMap_ ROCCLR_INIT_PRIORITY(101); size_t MemObjMap::size() { amd::ScopedLock lock(AllocatedLock_); @@ -306,6 +307,42 @@ amd::Memory* MemObjMap::FindMemObj(const void* k) { return nullptr; } } +void MemObjMap::AddVirtualMemObj(const void* k, amd::Memory* v) { + amd::ScopedLock lock(AllocatedLock_); + auto rval = VirtualMemObjMap_.insert({ reinterpret_cast(k), v }); + if (!rval.second) { + DevLogPrintfError("Virtual Memobj map already has an entry for ptr: 0x%x", + reinterpret_cast(k)); + } +} + +void MemObjMap::RemoveVirtualMemObj(const void* k) { + amd::ScopedLock lock(AllocatedLock_); + auto rval = VirtualMemObjMap_.erase(reinterpret_cast(k)); + if (rval != 1) { + DevLogPrintfError("Virtual Memobj map does not have ptr: 0x%x", + reinterpret_cast(k)); + guarantee(false, "VirtualMemobj map does not have ptr"); + } +} + +amd::Memory* MemObjMap::FindVirtualMemObj(const void* k) { + amd::ScopedLock lock(AllocatedLock_); + uintptr_t key = reinterpret_cast(k); + auto it = VirtualMemObjMap_.upper_bound(key); + if (it == VirtualMemObjMap_.begin()) { + return nullptr; + } + + --it; + amd::Memory* mem = it->second; + if (key >= it->first && key < (it->first + mem->getSize())) { + // the k is in the range + return mem; + } else { + return nullptr; + } +} void MemObjMap::UpdateAccess(amd::Device *peerDev) { if (peerDev == nullptr) { diff --git a/projects/clr/rocclr/device/device.hpp b/projects/clr/rocclr/device/device.hpp index 9d2c0ea28c..4e2f034a3c 100644 --- a/projects/clr/rocclr/device/device.hpp +++ b/projects/clr/rocclr/device/device.hpp @@ -1289,9 +1289,17 @@ class MemObjMap : public AllStatic { const void* k); //!< find the mem object based on the input pointer static void UpdateAccess(amd::Device *peerDev); static void Purge(amd::Device* dev); //!< Purge all user allocated memories on the given device + + static void AddVirtualMemObj(const void* k, + amd::Memory* v); //!< Same as AddMemObj but for virtual addressing + static void RemoveVirtualMemObj(const void* k); //!< Same as RemoveMemObj but for virtual addressing + static amd::Memory* FindVirtualMemObj( + const void* k); //!< Same as FindMemObj but for virtual addressing private: static std::map MemObjMap_; //!< the mem object<->hostptr information container + static std::map + VirtualMemObjMap_; //!< the virtual mem object<->hostptr information container static amd::Monitor AllocatedLock_; //!< amd monitor locker }; diff --git a/projects/clr/rocclr/device/pal/paldevice.cpp b/projects/clr/rocclr/device/pal/paldevice.cpp index 6b41e554f6..c0fe12595e 100644 --- a/projects/clr/rocclr/device/pal/paldevice.cpp +++ b/projects/clr/rocclr/device/pal/paldevice.cpp @@ -2261,7 +2261,7 @@ void* Device::virtualAlloc(void* addr, size_t size, size_t alignment) } // if the device supports SVM FGS, return the committed CPU address directly. pal::Memory* gpuMem = getGpuMemory(mem); - amd::MemObjMap::AddMemObj(mem->getSvmPtr(), mem); + amd::MemObjMap::AddVirtualMemObj(mem->getSvmPtr(), mem); void* svmPtr = mem->getSvmPtr(); @@ -2270,10 +2270,10 @@ void* Device::virtualAlloc(void* addr, size_t size, size_t alignment) void Device::virtualFree(void* addr) { - amd::Memory* va = amd::MemObjMap::FindMemObj(addr); + amd::Memory* va = amd::MemObjMap::FindVirtualMemObj(addr); if (nullptr != va && (va->getMemFlags() & CL_MEM_VA_RANGE_AMD)) { va->release(); - amd::MemObjMap::RemoveMemObj(addr); + amd::MemObjMap::RemoveVirtualMemObj(addr); } } diff --git a/projects/clr/rocclr/device/pal/palvirtual.cpp b/projects/clr/rocclr/device/pal/palvirtual.cpp index 300fff261a..0a5e1484c5 100644 --- a/projects/clr/rocclr/device/pal/palvirtual.cpp +++ b/projects/clr/rocclr/device/pal/palvirtual.cpp @@ -2129,7 +2129,7 @@ void VirtualGPU::submitVirtualMap(amd::VirtualMapCommand& vcmd) { amd::ScopedLock lock(execution()); profilingBegin(vcmd); - amd::Memory* va = amd::MemObjMap::FindMemObj(vcmd.ptr()); + amd::Memory* va = amd::MemObjMap::FindVirtualMemObj(vcmd.ptr()); if (va == nullptr || !(va->getMemFlags() & CL_MEM_VA_RANGE_AMD)) { profilingEnd(vcmd); return; @@ -2145,6 +2145,17 @@ void VirtualGPU::submitVirtualMap(amd::VirtualMapCommand& vcmd) { Pal::VirtualGpuMemAccessMode::NoAccess }; Pal::Result result = queue(MainEngine).iQueue_->RemapVirtualMemoryPages(1, &range, false, nullptr); + if (result == Pal::Result::Success) { + if (vcmd.memory() != nullptr) { + // assert the va wasn't mapped already + assert(amd::MemObjMap::FindMemObj(vcmd.ptr()) == nullptr); + amd::MemObjMap::AddMemObj(vcmd.ptr(), vcmd.memory()); + } else { + // assert the va is mapped and needs to be removed + assert(amd::MemObjMap::FindMemObj(vcmd.ptr()) != nullptr); + amd::MemObjMap::RemoveMemObj(vcmd.ptr()); + } + } profilingEnd(vcmd); }