SWDEV-353281 - Move VirtualMem map update to memobj

- The implementation in mempool graphs requires refcounting VA object.
That requires release() to update the map only on the actual destruction.
- Add GPU event tracking for paging operation. Otherwise, runtime
may not always flush IB.

Change-Id: Idf99ffb894321a38e04b490116a7ca435635918d
This commit is contained in:
German
2023-04-25 17:28:28 -04:00
committed by German Andryeyev
parent 22eae6ac9e
commit 7ef2da5aba
3 changed files with 30 additions and 21 deletions
+9 -16
View File
@@ -2300,12 +2300,10 @@ void Device::svmFree(void* ptr) const {
}
}
void* Device::virtualAlloc(void* addr, size_t size, size_t alignment)
{
amd::Memory* mem = nullptr;
// ================================================================================================
void* Device::virtualAlloc(void* addr, size_t size, size_t alignment) {
// create a hidden buffer, which will allocated on the device later
mem = new (context()) amd::Buffer(context(), CL_MEM_VA_RANGE_AMD, size, addr);
auto mem = new (context()) amd::Buffer(context(), CL_MEM_VA_RANGE_AMD, size, addr);
if (mem == nullptr) {
LogError("failed to new a va range mem object!");
return nullptr;
@@ -2316,24 +2314,19 @@ void* Device::virtualAlloc(void* addr, size_t size, size_t alignment)
mem->release();
return nullptr;
}
// if the device supports SVM FGS, return the committed CPU address directly.
pal::Memory* gpuMem = getGpuMemory(mem);
amd::MemObjMap::AddVirtualMemObj(mem->getSvmPtr(), mem);
void* svmPtr = mem->getSvmPtr();
return svmPtr;
return mem->getSvmPtr();
}
void Device::virtualFree(void* addr)
{
amd::Memory* va = amd::MemObjMap::FindVirtualMemObj(addr);
if (nullptr != va && (va->getMemFlags() & CL_MEM_VA_RANGE_AMD)) {
// ================================================================================================
void Device::virtualFree(void* addr) {
auto va = amd::MemObjMap::FindVirtualMemObj(addr);
if (nullptr != va) {
va->release();
amd::MemObjMap::RemoveVirtualMemObj(addr);
}
}
// ================================================================================================
bool Device::AcquireExclusiveGpuAccess() {
// Lock the virtual GPU list
vgpusAccess().lock();
+9 -4
View File
@@ -2190,8 +2190,7 @@ void VirtualGPU::submitStreamOperation(amd::StreamOperationCommand& cmd) {
profilingEnd(cmd);
}
// ================================================================================================
void VirtualGPU::submitVirtualMap(amd::VirtualMapCommand& vcmd) {
// Make sure VirtualGPU has an exclusive access to the resources
amd::ScopedLock lock(execution());
@@ -2203,7 +2202,8 @@ void VirtualGPU::submitVirtualMap(amd::VirtualMapCommand& vcmd) {
return;
}
pal::Memory* vaRange = dev().getGpuMemory(va);
Pal::IGpuMemory* memory = (vcmd.memory() == nullptr)? nullptr : dev().getGpuMemory(vcmd.memory())->iMem();
Pal::IGpuMemory* memory = (vcmd.memory() == nullptr) ?
nullptr : dev().getGpuMemory(vcmd.memory())->iMem();
Pal::VirtualMemoryRemapRange range{
vaRange->iMem(),
0,
@@ -2212,7 +2212,12 @@ void VirtualGPU::submitVirtualMap(amd::VirtualMapCommand& vcmd) {
vcmd.size(),
Pal::VirtualGpuMemAccessMode::NoAccess
};
Pal::Result result = queue(MainEngine).iQueue_->RemapVirtualMemoryPages(1, &range, false, nullptr);
eventBegin(MainEngine);
auto result = queue(MainEngine).iQueue_->RemapVirtualMemoryPages(1, &range, false, nullptr);
// Capture GPU event for the paging operation
GpuEvent event;
eventEnd(MainEngine, event);
setGpuEvent(event);
if (result == Pal::Result::Success) {
if (vcmd.memory() != nullptr) {
// assert the va wasn't mapped already
+12 -1
View File
@@ -260,6 +260,7 @@ bool Memory::allocHostMemory(void* initFrom, bool allocHostMem, bool forceCopy)
return true;
}
// ================================================================================================
bool Memory::create(void* initFrom, bool sysMemAlloc, bool skipAlloc, bool forceAlloc) {
static const bool forceAllocHostMem = false;
@@ -310,12 +311,16 @@ bool Memory::create(void* initFrom, bool sysMemAlloc, bool skipAlloc, bool force
}
}
}
// Add a VA range into VA range map
if (getMemFlags() & CL_MEM_VA_RANGE_AMD) {
amd::MemObjMap::AddVirtualMemObj(getSvmPtr(), this);
}
// Store the unique id for each memory allocation
uniqueId_ = ++numAllocs;
return true;
}
// ================================================================================================
bool Memory::addDeviceMemory(const Device* dev) {
bool result = false;
AllocState create = AllocCreate;
@@ -401,6 +406,7 @@ device::Memory* Memory::getDeviceMemory(const Device& dev, bool alloc) {
return dm;
}
// ================================================================================================
Memory::~Memory() {
// For_each destructor callback:
DestructorCallBackEntry* entry;
@@ -443,8 +449,13 @@ Memory::~Memory() {
parent_->release();
}
hostMemRef_.deallocateMemory(context_());
if (getMemFlags() & CL_MEM_VA_RANGE_AMD) {
amd::MemObjMap::RemoveVirtualMemObj(getSvmPtr());
}
}
// ================================================================================================
bool Memory::setDestructorCallback(DestructorCallBackFunction callback, void* data) {
DestructorCallBackEntry* entry = new DestructorCallBackEntry(callback, data);
if (entry == NULL) {