SWDEV-353281 - Move VirtualMem map update to memobj
- The implementation in mempool graphs requires refcounting VA object. That requires release() to update the map only on the actual destruction. - Add GPU event tracking for paging operation. Otherwise, runtime may not always flush IB. Change-Id: Idf99ffb894321a38e04b490116a7ca435635918d
This commit is contained in:
@@ -2300,12 +2300,10 @@ void Device::svmFree(void* ptr) const {
|
||||
}
|
||||
}
|
||||
|
||||
void* Device::virtualAlloc(void* addr, size_t size, size_t alignment)
|
||||
{
|
||||
amd::Memory* mem = nullptr;
|
||||
|
||||
// ================================================================================================
|
||||
void* Device::virtualAlloc(void* addr, size_t size, size_t alignment) {
|
||||
// create a hidden buffer, which will allocated on the device later
|
||||
mem = new (context()) amd::Buffer(context(), CL_MEM_VA_RANGE_AMD, size, addr);
|
||||
auto mem = new (context()) amd::Buffer(context(), CL_MEM_VA_RANGE_AMD, size, addr);
|
||||
if (mem == nullptr) {
|
||||
LogError("failed to new a va range mem object!");
|
||||
return nullptr;
|
||||
@@ -2316,24 +2314,19 @@ void* Device::virtualAlloc(void* addr, size_t size, size_t alignment)
|
||||
mem->release();
|
||||
return nullptr;
|
||||
}
|
||||
// if the device supports SVM FGS, return the committed CPU address directly.
|
||||
pal::Memory* gpuMem = getGpuMemory(mem);
|
||||
amd::MemObjMap::AddVirtualMemObj(mem->getSvmPtr(), mem);
|
||||
|
||||
void* svmPtr = mem->getSvmPtr();
|
||||
|
||||
return svmPtr;
|
||||
return mem->getSvmPtr();
|
||||
}
|
||||
|
||||
void Device::virtualFree(void* addr)
|
||||
{
|
||||
amd::Memory* va = amd::MemObjMap::FindVirtualMemObj(addr);
|
||||
if (nullptr != va && (va->getMemFlags() & CL_MEM_VA_RANGE_AMD)) {
|
||||
// ================================================================================================
|
||||
void Device::virtualFree(void* addr) {
|
||||
auto va = amd::MemObjMap::FindVirtualMemObj(addr);
|
||||
if (nullptr != va) {
|
||||
va->release();
|
||||
amd::MemObjMap::RemoveVirtualMemObj(addr);
|
||||
}
|
||||
}
|
||||
|
||||
// ================================================================================================
|
||||
bool Device::AcquireExclusiveGpuAccess() {
|
||||
// Lock the virtual GPU list
|
||||
vgpusAccess().lock();
|
||||
|
||||
@@ -2190,8 +2190,7 @@ void VirtualGPU::submitStreamOperation(amd::StreamOperationCommand& cmd) {
|
||||
profilingEnd(cmd);
|
||||
}
|
||||
|
||||
|
||||
|
||||
// ================================================================================================
|
||||
void VirtualGPU::submitVirtualMap(amd::VirtualMapCommand& vcmd) {
|
||||
// Make sure VirtualGPU has an exclusive access to the resources
|
||||
amd::ScopedLock lock(execution());
|
||||
@@ -2203,7 +2202,8 @@ void VirtualGPU::submitVirtualMap(amd::VirtualMapCommand& vcmd) {
|
||||
return;
|
||||
}
|
||||
pal::Memory* vaRange = dev().getGpuMemory(va);
|
||||
Pal::IGpuMemory* memory = (vcmd.memory() == nullptr)? nullptr : dev().getGpuMemory(vcmd.memory())->iMem();
|
||||
Pal::IGpuMemory* memory = (vcmd.memory() == nullptr) ?
|
||||
nullptr : dev().getGpuMemory(vcmd.memory())->iMem();
|
||||
Pal::VirtualMemoryRemapRange range{
|
||||
vaRange->iMem(),
|
||||
0,
|
||||
@@ -2212,7 +2212,12 @@ void VirtualGPU::submitVirtualMap(amd::VirtualMapCommand& vcmd) {
|
||||
vcmd.size(),
|
||||
Pal::VirtualGpuMemAccessMode::NoAccess
|
||||
};
|
||||
Pal::Result result = queue(MainEngine).iQueue_->RemapVirtualMemoryPages(1, &range, false, nullptr);
|
||||
eventBegin(MainEngine);
|
||||
auto result = queue(MainEngine).iQueue_->RemapVirtualMemoryPages(1, &range, false, nullptr);
|
||||
// Capture GPU event for the paging operation
|
||||
GpuEvent event;
|
||||
eventEnd(MainEngine, event);
|
||||
setGpuEvent(event);
|
||||
if (result == Pal::Result::Success) {
|
||||
if (vcmd.memory() != nullptr) {
|
||||
// assert the va wasn't mapped already
|
||||
|
||||
@@ -260,6 +260,7 @@ bool Memory::allocHostMemory(void* initFrom, bool allocHostMem, bool forceCopy)
|
||||
return true;
|
||||
}
|
||||
|
||||
// ================================================================================================
|
||||
bool Memory::create(void* initFrom, bool sysMemAlloc, bool skipAlloc, bool forceAlloc) {
|
||||
static const bool forceAllocHostMem = false;
|
||||
|
||||
@@ -310,12 +311,16 @@ bool Memory::create(void* initFrom, bool sysMemAlloc, bool skipAlloc, bool force
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Add a VA range into VA range map
|
||||
if (getMemFlags() & CL_MEM_VA_RANGE_AMD) {
|
||||
amd::MemObjMap::AddVirtualMemObj(getSvmPtr(), this);
|
||||
}
|
||||
// Store the unique id for each memory allocation
|
||||
uniqueId_ = ++numAllocs;
|
||||
return true;
|
||||
}
|
||||
|
||||
// ================================================================================================
|
||||
bool Memory::addDeviceMemory(const Device* dev) {
|
||||
bool result = false;
|
||||
AllocState create = AllocCreate;
|
||||
@@ -401,6 +406,7 @@ device::Memory* Memory::getDeviceMemory(const Device& dev, bool alloc) {
|
||||
return dm;
|
||||
}
|
||||
|
||||
// ================================================================================================
|
||||
Memory::~Memory() {
|
||||
// For_each destructor callback:
|
||||
DestructorCallBackEntry* entry;
|
||||
@@ -443,8 +449,13 @@ Memory::~Memory() {
|
||||
parent_->release();
|
||||
}
|
||||
hostMemRef_.deallocateMemory(context_());
|
||||
|
||||
if (getMemFlags() & CL_MEM_VA_RANGE_AMD) {
|
||||
amd::MemObjMap::RemoveVirtualMemObj(getSvmPtr());
|
||||
}
|
||||
}
|
||||
|
||||
// ================================================================================================
|
||||
bool Memory::setDestructorCallback(DestructorCallBackFunction callback, void* data) {
|
||||
DestructorCallBackEntry* entry = new DestructorCallBackEntry(callback, data);
|
||||
if (entry == NULL) {
|
||||
|
||||
Reference in New Issue
Block a user