diff --git a/rocclr/device/pal/paldevice.cpp b/rocclr/device/pal/paldevice.cpp
index c6146a3940..65a104025a 100644
--- a/rocclr/device/pal/paldevice.cpp
+++ b/rocclr/device/pal/paldevice.cpp
@@ -2300,12 +2300,10 @@ void Device::svmFree(void* ptr) const {
   }
 }
 
-void* Device::virtualAlloc(void* addr, size_t size, size_t alignment)
-{
-  amd::Memory* mem = nullptr;
-
+// ================================================================================================
+void* Device::virtualAlloc(void* addr, size_t size, size_t alignment) {
   // create a hidden buffer, which will allocated on the device later
-  mem = new (context()) amd::Buffer(context(), CL_MEM_VA_RANGE_AMD, size, addr);
+  auto mem = new (context()) amd::Buffer(context(), CL_MEM_VA_RANGE_AMD, size, addr);
   if (mem == nullptr) {
     LogError("failed to new a va range mem object!");
     return nullptr;
@@ -2316,24 +2314,19 @@ void* Device::virtualAlloc(void* addr, size_t size, size_t alignment)
     mem->release();
     return nullptr;
   }
-  // if the device supports SVM FGS, return the committed CPU address directly.
-  pal::Memory* gpuMem = getGpuMemory(mem);
-  amd::MemObjMap::AddVirtualMemObj(mem->getSvmPtr(), mem);
 
-  void* svmPtr = mem->getSvmPtr();
-
-  return svmPtr;
+  return mem->getSvmPtr();
 }
 
-void Device::virtualFree(void* addr)
-{
-  amd::Memory* va = amd::MemObjMap::FindVirtualMemObj(addr);
-  if (nullptr != va && (va->getMemFlags() & CL_MEM_VA_RANGE_AMD)) {
+// ================================================================================================
+void Device::virtualFree(void* addr) {
+  auto va = amd::MemObjMap::FindVirtualMemObj(addr);
+  if (nullptr != va) {
     va->release();
-    amd::MemObjMap::RemoveVirtualMemObj(addr);
   }
 }
 
+// ================================================================================================
 bool Device::AcquireExclusiveGpuAccess() {
   // Lock the virtual GPU list
   vgpusAccess().lock();
diff --git a/rocclr/device/pal/palvirtual.cpp b/rocclr/device/pal/palvirtual.cpp
index 4b899748a5..e80e22298f 100644
--- a/rocclr/device/pal/palvirtual.cpp
+++ b/rocclr/device/pal/palvirtual.cpp
@@ -2190,8 +2190,7 @@ void VirtualGPU::submitStreamOperation(amd::StreamOperationCommand& cmd) {
   profilingEnd(cmd);
 }
 
-
-
+// ================================================================================================
 void VirtualGPU::submitVirtualMap(amd::VirtualMapCommand& vcmd) {
   // Make sure VirtualGPU has an exclusive access to the resources
   amd::ScopedLock lock(execution());
@@ -2203,7 +2202,8 @@ void VirtualGPU::submitVirtualMap(amd::VirtualMapCommand& vcmd) {
     return;
   }
   pal::Memory* vaRange = dev().getGpuMemory(va);
-  Pal::IGpuMemory* memory = (vcmd.memory() == nullptr)? nullptr : dev().getGpuMemory(vcmd.memory())->iMem();
+  Pal::IGpuMemory* memory = (vcmd.memory() == nullptr) ?
+      nullptr : dev().getGpuMemory(vcmd.memory())->iMem();
   Pal::VirtualMemoryRemapRange range{
     vaRange->iMem(),
     0,
@@ -2212,7 +2212,12 @@ void VirtualGPU::submitVirtualMap(amd::VirtualMapCommand& vcmd) {
     vcmd.size(),
     Pal::VirtualGpuMemAccessMode::NoAccess
   };
-  Pal::Result result = queue(MainEngine).iQueue_->RemapVirtualMemoryPages(1, &range, false, nullptr);
+  eventBegin(MainEngine);
+  auto result = queue(MainEngine).iQueue_->RemapVirtualMemoryPages(1, &range, false, nullptr);
+  // Capture GPU event for the paging operation
+  GpuEvent event;
+  eventEnd(MainEngine, event);
+  setGpuEvent(event);
   if (result == Pal::Result::Success) {
     if (vcmd.memory() != nullptr) {
       // assert the va wasn't mapped already
diff --git a/rocclr/platform/memory.cpp b/rocclr/platform/memory.cpp
index cea1098b92..b95d731484 100644
--- a/rocclr/platform/memory.cpp
+++ b/rocclr/platform/memory.cpp
@@ -260,6 +260,7 @@ bool Memory::allocHostMemory(void* initFrom, bool allocHostMem, bool forceCopy)
   return true;
 }
 
+// ================================================================================================
 bool Memory::create(void* initFrom, bool sysMemAlloc, bool skipAlloc, bool forceAlloc) {
   static const bool forceAllocHostMem = false;
 
@@ -310,12 +311,16 @@ bool Memory::create(void* initFrom, bool sysMemAlloc, bool skipAlloc, bool force
       }
     }
   }
-
+  // Add a VA range into VA range map
+  if (getMemFlags() & CL_MEM_VA_RANGE_AMD) {
+    amd::MemObjMap::AddVirtualMemObj(getSvmPtr(), this);
+  }
   // Store the unique id for each memory allocation
   uniqueId_ = ++numAllocs;
   return true;
 }
 
+// ================================================================================================
 bool Memory::addDeviceMemory(const Device* dev) {
   bool result = false;
   AllocState create = AllocCreate;
@@ -401,6 +406,7 @@ device::Memory* Memory::getDeviceMemory(const Device& dev, bool alloc) {
   return dm;
 }
 
+// ================================================================================================
 Memory::~Memory() {
   // For_each destructor callback:
   DestructorCallBackEntry* entry;
@@ -443,8 +449,13 @@ Memory::~Memory() {
     parent_->release();
   }
   hostMemRef_.deallocateMemory(context_());
+
+  if (getMemFlags() & CL_MEM_VA_RANGE_AMD) {
+    amd::MemObjMap::RemoveVirtualMemObj(getSvmPtr());
+  }
 }
 
+// ================================================================================================
 bool Memory::setDestructorCallback(DestructorCallBackFunction callback, void* data) {
   DestructorCallBackEntry* entry = new DestructorCallBackEntry(callback, data);
   if (entry == NULL) {