SWDEV-353281 - Move VirtualMem map update to memobj

- The implementation in mempool graphs requires refcounting VA object. That requires release() to update the map only on the actual destruction. - Add GPU event tracking for paging operation. Otherwise, runtime may not always flush IB. Change-Id: Idf99ffb894321a38e04b490116a7ca435635918d
2023-04-25 17:28:28 -04:00
parent 22eae6ac9e
commit 7ef2da5aba
3 changed files with 30 additions and 21 deletions
@@ -2300,12 +2300,10 @@ void Device::svmFree(void* ptr) const {
  }
 }

-void* Device::virtualAlloc(void* addr, size_t size, size_t alignment)
-{
-  amd::Memory* mem = nullptr;
-
+// ================================================================================================
+void* Device::virtualAlloc(void* addr, size_t size, size_t alignment) {
  // create a hidden buffer, which will allocated on the device later
-  mem = new (context()) amd::Buffer(context(), CL_MEM_VA_RANGE_AMD, size, addr);
+  auto mem = new (context()) amd::Buffer(context(), CL_MEM_VA_RANGE_AMD, size, addr);
  if (mem == nullptr) {
    LogError("failed to new a va range mem object!");
    return nullptr;
@@ -2316,24 +2314,19 @@ void* Device::virtualAlloc(void* addr, size_t size, size_t alignment)
    mem->release();
    return nullptr;
  }
-  // if the device supports SVM FGS, return the committed CPU address directly.
-  pal::Memory* gpuMem = getGpuMemory(mem);
-  amd::MemObjMap::AddVirtualMemObj(mem->getSvmPtr(), mem);

-  void* svmPtr = mem->getSvmPtr();
-
-  return svmPtr;
+  return mem->getSvmPtr();
 }

-void Device::virtualFree(void* addr)
-{
-  amd::Memory* va = amd::MemObjMap::FindVirtualMemObj(addr);
-  if (nullptr != va && (va->getMemFlags() & CL_MEM_VA_RANGE_AMD)) {
+// ================================================================================================
+void Device::virtualFree(void* addr) {
+  auto va = amd::MemObjMap::FindVirtualMemObj(addr);
+  if (nullptr != va) {
    va->release();
-    amd::MemObjMap::RemoveVirtualMemObj(addr);
  }
 }

+// ================================================================================================
 bool Device::AcquireExclusiveGpuAccess() {
  // Lock the virtual GPU list
  vgpusAccess().lock();
@@ -2190,8 +2190,7 @@ void VirtualGPU::submitStreamOperation(amd::StreamOperationCommand& cmd) {
  profilingEnd(cmd);
 }

-
-
+// ================================================================================================
 void VirtualGPU::submitVirtualMap(amd::VirtualMapCommand& vcmd) {
  // Make sure VirtualGPU has an exclusive access to the resources
  amd::ScopedLock lock(execution());
@@ -2203,7 +2202,8 @@ void VirtualGPU::submitVirtualMap(amd::VirtualMapCommand& vcmd) {
    return;
  }
  pal::Memory* vaRange = dev().getGpuMemory(va);
-  Pal::IGpuMemory* memory = (vcmd.memory() == nullptr)? nullptr : dev().getGpuMemory(vcmd.memory())->iMem();
+  Pal::IGpuMemory* memory = (vcmd.memory() == nullptr) ?
+      nullptr : dev().getGpuMemory(vcmd.memory())->iMem();
  Pal::VirtualMemoryRemapRange range{
    vaRange->iMem(),
    0,
@@ -2212,7 +2212,12 @@ void VirtualGPU::submitVirtualMap(amd::VirtualMapCommand& vcmd) {
    vcmd.size(),
    Pal::VirtualGpuMemAccessMode::NoAccess
  };
-  Pal::Result result = queue(MainEngine).iQueue_->RemapVirtualMemoryPages(1, &range, false, nullptr);
+  eventBegin(MainEngine);
+  auto result = queue(MainEngine).iQueue_->RemapVirtualMemoryPages(1, &range, false, nullptr);
+  // Capture GPU event for the paging operation
+  GpuEvent event;
+  eventEnd(MainEngine, event);
+  setGpuEvent(event);
  if (result == Pal::Result::Success) {
    if (vcmd.memory() != nullptr) {
      // assert the va wasn't mapped already
@@ -260,6 +260,7 @@ bool Memory::allocHostMemory(void* initFrom, bool allocHostMem, bool forceCopy)
  return true;
 }

+// ================================================================================================
 bool Memory::create(void* initFrom, bool sysMemAlloc, bool skipAlloc, bool forceAlloc) {
  static const bool forceAllocHostMem = false;

@@ -310,12 +311,16 @@ bool Memory::create(void* initFrom, bool sysMemAlloc, bool skipAlloc, bool force
      }
    }
  }
-
+  // Add a VA range into VA range map
+  if (getMemFlags() & CL_MEM_VA_RANGE_AMD) {
+    amd::MemObjMap::AddVirtualMemObj(getSvmPtr(), this);
+  }
  // Store the unique id for each memory allocation
  uniqueId_ = ++numAllocs;
  return true;
 }

+// ================================================================================================
 bool Memory::addDeviceMemory(const Device* dev) {
  bool result = false;
  AllocState create = AllocCreate;
@@ -401,6 +406,7 @@ device::Memory* Memory::getDeviceMemory(const Device& dev, bool alloc) {
  return dm;
 }

+// ================================================================================================
 Memory::~Memory() {
  // For_each destructor callback:
  DestructorCallBackEntry* entry;
@@ -443,8 +449,13 @@ Memory::~Memory() {
    parent_->release();
  }
  hostMemRef_.deallocateMemory(context_());
+
+  if (getMemFlags() & CL_MEM_VA_RANGE_AMD) {
+    amd::MemObjMap::RemoveVirtualMemObj(getSvmPtr());
+  }
 }

+// ================================================================================================
 bool Memory::setDestructorCallback(DestructorCallBackFunction callback, void* data) {
  DestructorCallBackEntry* entry = new DestructorCallBackEntry(callback, data);
  if (entry == NULL) {