diff --git a/hipamd/src/hip_graph_internal.hpp b/hipamd/src/hip_graph_internal.hpp
index fb62acc06f..a80bb71c7c 100644
--- a/hipamd/src/hip_graph_internal.hpp
+++ b/hipamd/src/hip_graph_internal.hpp
@@ -2104,13 +2104,8 @@ class GraphMemAllocNode final : public GraphNode {
       // Retain memory object because command release will release it
       memory_->retain();
       size_ = aligned_size;
-      // Save geenric allocation info to match VM interfaces
-      memory_->getUserData().data = new hip::MemMapAllocUserData(dptr, aligned_size, va_);
       // Execute the original mapping command
       VirtualMapCommand::submit(device);
-      // Update the internal svm address to ptr
-      memory()->setSvmPtr(va_->getSvmPtr());
-      // Can't destroy VA, because it's used in mapping even if the node will be destroyed
       va_->retain();
       ClPrint(amd::LOG_INFO, amd::LOG_MEM_POOL, "Graph MemAlloc execute: %p, %p",
           va_->getSvmPtr(), memory());
@@ -2234,24 +2229,21 @@ class GraphMemFreeNode : public GraphNode {
 
     virtual void submit(device::VirtualDevice& device) final {
       // Find memory object before unmap logic
-      auto alloc = amd::MemObjMap::FindMemObj(ptr());
+      auto vaddr_mem_obj = amd::MemObjMap::FindMemObj(ptr());
+      amd::Memory* phys_mem_obj = vaddr_mem_obj->getUserData().phys_mem_obj;
+      assert(phys_mem_obj != nullptr);
       VirtualMapCommand::submit(device);
-      // Restore the original address of the generic allocation
-      auto ga = reinterpret_cast<hip::MemMapAllocUserData*>(alloc->getUserData().data);
-      alloc->setSvmPtr(ga->ptr_);
       if (!AMD_DIRECT_DISPATCH) {
         // Update the current device, since hip event, used in mem pools, requires device
         hip::setCurrentDevice(device_id_);
       }
       // Free virtual address
-      ga->va_->release();
-      alloc->getUserData().data = nullptr;
+      vaddr_mem_obj->release();
       // Release the allocation back to graph's pool
-      graph_->FreeMemory(ga->ptr_, static_cast<hip::Stream*>(queue()));
-      amd::MemObjMap::AddMemObj(ptr(), ga->va_);
-      delete ga;
+      graph_->FreeMemory(phys_mem_obj->getSvmPtr(), static_cast<hip::Stream*>(queue()));
+      amd::MemObjMap::AddMemObj(ptr(), vaddr_mem_obj);
       ClPrint(amd::LOG_INFO, amd::LOG_MEM_POOL, "Graph MemFree execute: %p, %p",
-          ptr(), alloc);
+          ptr(), vaddr_mem_obj);
     }
 
    private:
diff --git a/hipamd/src/hip_mempool_impl.cpp b/hipamd/src/hip_mempool_impl.cpp
index dc1ca68bbc..44e23a91d9 100644
--- a/hipamd/src/hip_mempool_impl.cpp
+++ b/hipamd/src/hip_mempool_impl.cpp
@@ -225,6 +225,10 @@ bool MemoryPool::FreeMemory(amd::Memory* memory, Stream* stream, Event* event) {
   {
     amd::ScopedLock lock(lock_pool_ops_);
 
+    if (memory->getUserData().phys_mem_obj != nullptr) {
+      memory = memory->getUserData().phys_mem_obj;
+    }
+
     // If the free heap grows over the busy heap, then force release
     if (AMD_DIRECT_DISPATCH && (free_heap_.GetTotalSize() > busy_heap_.GetTotalSize())) {
       // Use event base release to reduce memory pressure
@@ -249,22 +253,14 @@ bool MemoryPool::FreeMemory(amd::Memory* memory, Stream* stream, Event* event) {
     }
     ClPrint(amd::LOG_INFO, amd::LOG_MEM_POOL, "Pool FreeMem: %p, %p", memory->getSvmPtr(), memory);
 
-    auto ga = reinterpret_cast<hip::MemMapAllocUserData*>(memory->getUserData().data);
-    if (ga != nullptr) {
-      if (stream == nullptr) {
+    if (stream == nullptr) {
         stream = g_devices[memory->getUserData().deviceId]->NullStream();
-      }
-      // Unmap virtual address from memory
-      auto cmd = new amd::VirtualMapCommand(*stream, amd::Command::EventWaitList{},
-                                            memory->getSvmPtr(), ga->size_, nullptr);
-      cmd->enqueue();
-      cmd->release();
-      memory->setSvmPtr(ga->ptr_);
-      // Free virtual address and destroy generic allocation object
-      ga->va_->release();
-      delete ga;
-      memory->getUserData().data = nullptr;
     }
+    // Unmap virtual address from memory
+    auto cmd = new amd::VirtualMapCommand(*stream, amd::Command::EventWaitList{},
+                                          memory->getSvmPtr(), memory->getSize(), nullptr);
+    cmd->enqueue();
+    cmd->release();
 
     if (stream != nullptr) {
       // The stream of destruction is a safe stream, because the app must handle sync
diff --git a/hipamd/src/hip_vm.cpp b/hipamd/src/hip_vm.cpp
index 73ba16776b..f8dd197c13 100644
--- a/hipamd/src/hip_vm.cpp
+++ b/hipamd/src/hip_vm.cpp
@@ -120,11 +120,15 @@ hipError_t hipMemCreate(hipMemGenericAllocationHandle_t* handle, size_t size,
 
   // Add this to amd::Memory object, so this ptr is accesible for other hipmemory operations.
   size_t offset = 0; //this is ignored
-  amd::Memory* memObj = getMemoryObject(ptr, offset);
+  amd::Memory* phys_mem_obj = getMemoryObject(ptr, offset);
   //saves the current device id so that it can be accessed later
-  memObj->getUserData().deviceId = prop->location.id;
-  memObj->getUserData().data = new hip::GenericAllocation(ptr, size, *prop);
-  *handle = reinterpret_cast<hipMemGenericAllocationHandle_t>(memObj->getUserData().data);
+  phys_mem_obj->getUserData().deviceId = prop->location.id;
+  phys_mem_obj->getUserData().data = new hip::GenericAllocation(*phys_mem_obj, size, *prop);
+  *handle = reinterpret_cast<hipMemGenericAllocationHandle_t>(phys_mem_obj->getUserData().data);
+
+  // Remove because the entry of 0x1 is not needed in MemObjMap.
+  // We save the copy of Phy mem obj in virtual mem obj during mapping.
+  amd::MemObjMap::RemoveMemObj(ptr);
 
   HIP_RETURN(hipSuccess);
 }
@@ -225,9 +229,6 @@ hipError_t hipMemMap(void* ptr, size_t size, size_t offset, hipMemGenericAllocat
   cmd->awaitCompletion();
   cmd->release();
 
-  // update the internal svm address to ptr
-  ga->asAmdMemory().setSvmPtr(ptr);
-
   HIP_RETURN(hipSuccess);
 }
 
@@ -268,7 +269,8 @@ hipError_t hipMemRetainAllocationHandle(hipMemGenericAllocationHandle_t* handle,
     HIP_RETURN(hipErrorInvalidValue);
   }
 
-  *handle = reinterpret_cast<hipMemGenericAllocationHandle_t>(mem->getUserData().data);
+  *handle = reinterpret_cast<hipMemGenericAllocationHandle_t>(
+              mem->getUserData().phys_mem_obj->getUserData().data);
 
   if (*handle == nullptr) {
     HIP_RETURN(hipErrorInvalidValue);
@@ -312,17 +314,17 @@ hipError_t hipMemUnmap(void* ptr, size_t size) {
     HIP_RETURN(hipErrorInvalidValue);
   }
 
-  amd::Memory* pa = amd::MemObjMap::FindMemObj(ptr);
-  if (pa == nullptr) {
+  amd::Memory* vaddr_mem_obj = amd::MemObjMap::FindVirtualMemObj(ptr);
+  if (vaddr_mem_obj == nullptr && vaddr_mem_obj->getSize() != size) {
     HIP_RETURN(hipErrorInvalidValue);
   }
 
-  amd::Memory* va = amd::MemObjMap::FindVirtualMemObj(ptr);
-  if (va == nullptr && va->getSize() != size) {
+  amd::Memory* phys_mem_obj = vaddr_mem_obj->getUserData().phys_mem_obj;
+  if (phys_mem_obj == nullptr) {
     HIP_RETURN(hipErrorInvalidValue);
   }
 
-  auto& queue = *g_devices[pa->getUserData().deviceId]->NullStream();
+  auto& queue = *g_devices[phys_mem_obj->getUserData().deviceId]->NullStream();
 
   amd::Command* cmd = new amd::VirtualMapCommand(queue, amd::Command::EventWaitList{}, ptr, size,
                                                  nullptr);
@@ -331,9 +333,8 @@ hipError_t hipMemUnmap(void* ptr, size_t size) {
   cmd->release();
 
   // restore the original pa of the generic allocation
-  hip::GenericAllocation* ga = reinterpret_cast<hip::GenericAllocation*>(pa->getUserData().data);
-  pa->setSvmPtr(ga->genericAddress());
-
+  hip::GenericAllocation* ga
+    = reinterpret_cast<hip::GenericAllocation*>(phys_mem_obj->getUserData().data);
   ga->release();
 
   HIP_RETURN(hipSuccess);
diff --git a/hipamd/src/hip_vm.hpp b/hipamd/src/hip_vm.hpp
index 3302ace54f..de7bf881e4 100644
--- a/hipamd/src/hip_vm.hpp
+++ b/hipamd/src/hip_vm.hpp
@@ -30,35 +30,23 @@ namespace hip {
 
 hipError_t ihipFree(void* ptr);
 
-struct MemMapAllocUserData {
-  void* ptr_;       // Original pointer of the allocation
-  size_t size_;     // Aligned size of the allocation
-  amd::Memory* va_; // Memory object for the virtual address
-
-  MemMapAllocUserData(void* ptr, size_t size, amd::Memory* va) : ptr_(ptr), size_(size), va_(va) {}
-};
-
 class GenericAllocation : public amd::RuntimeObject {
-  void* ptr_;                          //<! Device ptr
+  amd::Memory& phys_mem_ref_;        //<! Physical memory object
   size_t size_;                        //<! Allocated size
   hipMemAllocationProp properties_;    //<! Allocation Properties
 
 public:
-  GenericAllocation(void* ptr, size_t size, const hipMemAllocationProp& prop)
-                     : ptr_(ptr), size_(size), properties_(prop) {}
-  ~GenericAllocation() {
-    hipError_t err = ihipFree(ptr_);
-  }
+  GenericAllocation(amd::Memory& phys_mem_ref, size_t size, const hipMemAllocationProp& prop) 
+                    : phys_mem_ref_(phys_mem_ref), size_(size), properties_(prop) {}
+  ~GenericAllocation() {}
 
   const hipMemAllocationProp& GetProperties() const { return properties_; }
   hipMemGenericAllocationHandle_t asMemGenericAllocationHandle() {
     return reinterpret_cast<hipMemGenericAllocationHandle_t>(this);
   }
   amd::Memory& asAmdMemory() {
-    size_t discardOffset;
-    return *getMemoryObject(genericAddress(), discardOffset);
+    return phys_mem_ref_;
   }
-  void* genericAddress() const { return ptr_; }
 
   virtual ObjectType objectType() const { return ObjectTypeVMMAlloc; }
 };
diff --git a/rocclr/device/pal/palvirtual.cpp b/rocclr/device/pal/palvirtual.cpp
index 1378f2696c..a19d3aa1c6 100644
--- a/rocclr/device/pal/palvirtual.cpp
+++ b/rocclr/device/pal/palvirtual.cpp
@@ -2192,18 +2192,18 @@ void VirtualGPU::submitVirtualMap(amd::VirtualMapCommand& vcmd) {
   amd::ScopedLock lock(execution());
 
   profilingBegin(vcmd);
-  amd::Memory* va = amd::MemObjMap::FindVirtualMemObj(vcmd.ptr());
-  if (va == nullptr || !(va->getMemFlags() & CL_MEM_VA_RANGE_AMD)) {
+  amd::Memory* vaddr_mem_obj = amd::MemObjMap::FindVirtualMemObj(vcmd.ptr());
+  if (vaddr_mem_obj == nullptr || !(vaddr_mem_obj->getMemFlags() & CL_MEM_VA_RANGE_AMD)) {
     profilingEnd(vcmd);
     return;
   }
-  pal::Memory* vaRange = dev().getGpuMemory(va);
-  Pal::IGpuMemory* memory = (vcmd.memory() == nullptr) ?
+  pal::Memory* vaddr_pal_mem = dev().getGpuMemory(vaddr_mem_obj);
+  Pal::IGpuMemory* phymem_igpu_mem = (vcmd.memory() == nullptr) ?
       nullptr : dev().getGpuMemory(vcmd.memory())->iMem();
   Pal::VirtualMemoryRemapRange range{
-    vaRange->iMem(),
+    vaddr_pal_mem->iMem(),
     0,
-    memory,
+    phymem_igpu_mem,
     0,
     vcmd.size(),
     Pal::VirtualGpuMemAccessMode::NoAccess
@@ -2224,13 +2224,15 @@ void VirtualGPU::submitVirtualMap(amd::VirtualMapCommand& vcmd) {
   setGpuEvent(event);
   if (result == Pal::Result::Success) {
     if (vcmd.memory() != nullptr) {
-      // assert the va wasn't mapped already
+      // assert the vaddr_mem_obj wasn't mapped already
       assert(amd::MemObjMap::FindMemObj(vcmd.ptr()) == nullptr);
-      amd::MemObjMap::AddMemObj(vcmd.ptr(), vcmd.memory());
+      amd::MemObjMap::AddMemObj(vcmd.ptr(), vaddr_mem_obj);
+      vaddr_mem_obj->getUserData().phys_mem_obj = vcmd.memory();
     } else {
-      // assert the va is mapped and needs to be removed
+      // assert the vaddr_mem_obj is mapped and needs to be removed
       assert(amd::MemObjMap::FindMemObj(vcmd.ptr()) != nullptr);
       amd::MemObjMap::RemoveMemObj(vcmd.ptr());
+      vaddr_mem_obj->getUserData().phys_mem_obj = nullptr;
     }
   }
   profilingEnd(vcmd);
diff --git a/rocclr/device/rocm/rocdevice.cpp b/rocclr/device/rocm/rocdevice.cpp
index 8a3fc9c54b..c4afdf9194 100644
--- a/rocclr/device/rocm/rocdevice.cpp
+++ b/rocclr/device/rocm/rocdevice.cpp
@@ -2301,6 +2301,16 @@ uint64_t Device::deviceVmemAlloc(size_t size, uint64_t flags) const {
   return hsa_vmem_handle.handle;
 }
 
+void Device::deviceVmemRelease(uint64_t mem_handle) const {
+  hsa_amd_vmem_alloc_handle_t hsa_vmem_handle {};
+  hsa_vmem_handle.handle = mem_handle;
+
+  hsa_status_t hsa_status = hsa_amd_vmem_handle_release(hsa_vmem_handle);
+  if (hsa_status != HSA_STATUS_SUCCESS) {
+    LogPrintfError("Failed hsa_amd_vmem_handle_release! Failed with hsa status: %d \n", hsa_status);
+  }
+}
+
 void* Device::deviceLocalAlloc(size_t size, bool atomics, bool pseudo_fine_grain) const {
   const hsa_amd_memory_pool_t& pool = (pseudo_fine_grain) ? gpu_ext_fine_grained_segment_
                                       : (atomics) ? gpu_fine_grained_segment_ : gpuvm_segment_;
@@ -2381,7 +2391,7 @@ void* Device::svmAlloc(amd::Context& context, size_t size, size_t alignment, cl_
       return nullptr;
     }
 
-    if (mem->getSvmPtr() != nullptr) {
+    if (mem->getSvmPtr() != nullptr || mem->getMemFlags() & ROCCLR_MEM_PHYMEM) {
       // add the information to context so that we can use it later.
       amd::MemObjMap::AddMemObj(mem->getSvmPtr(), mem);
     }
diff --git a/rocclr/device/rocm/rocdevice.hpp b/rocclr/device/rocm/rocdevice.hpp
index 0606c593b6..eef891f2e5 100644
--- a/rocclr/device/rocm/rocdevice.hpp
+++ b/rocclr/device/rocm/rocdevice.hpp
@@ -450,6 +450,7 @@ class Device : public NullDevice {
   bool deviceAllowAccess(void* dst) const;
 
   bool allowPeerAccess(device::Memory* memory) const;
+  void deviceVmemRelease(uint64_t mem_handle) const;
   uint64_t deviceVmemAlloc(size_t size, uint64_t flags) const;
   void* deviceLocalAlloc(size_t size, bool atomics = false, bool pseudo_fine_grain=false) const;
 
diff --git a/rocclr/device/rocm/rocmemory.cpp b/rocclr/device/rocm/rocmemory.cpp
index 763317df22..6113262340 100644
--- a/rocclr/device/rocm/rocmemory.cpp
+++ b/rocclr/device/rocm/rocmemory.cpp
@@ -648,6 +648,12 @@ void Buffer::destroy() {
     }
     const bool isFineGrain = memFlags & CL_MEM_SVM_FINE_GRAIN_BUFFER;
 
+    if (memFlags & ROCCLR_MEM_PHYMEM) {
+      // If this is physical memory, dont call hsa free function, since device mem was never created
+      dev().deviceVmemRelease(owner()->getUserData().hsa_handle);
+      return;
+    }
+
     if (kind_ != MEMORY_KIND_PTRGIVEN) {
       if (isFineGrain) {
         if (memFlags & CL_MEM_ALLOC_HOST_PTR) {
@@ -767,7 +773,10 @@ bool Buffer::create(bool alloc_local) {
     owner()->getUserData().hsa_handle = dev().deviceVmemAlloc(owner()->getSize(), 0);
     if (owner()->getUserData().hsa_handle == 0) {
       LogError("HSA Opaque Handle returned was null");
+      return false;
     }
+    deviceMemory_ = reinterpret_cast<void*>(amd::Memory::MemoryType::kPhyMemHandlePtr);
+    return true;
   }
 
   if ((owner()->parent() == nullptr) &&
diff --git a/rocclr/device/rocm/rocvirtual.cpp b/rocclr/device/rocm/rocvirtual.cpp
index 6f2f879e9a..7083dfbee1 100644
--- a/rocclr/device/rocm/rocvirtual.cpp
+++ b/rocclr/device/rocm/rocvirtual.cpp
@@ -2589,36 +2589,39 @@ void VirtualGPU::submitVirtualMap(amd::VirtualMapCommand& vcmd) {
 
   profilingBegin(vcmd);
 
-  // Find the amd::Memory object for virtual ptr.
-  amd::Memory* va = amd::MemObjMap::FindVirtualMemObj(vcmd.ptr());
-  if (va == nullptr || !(va->getMemFlags() & CL_MEM_VA_RANGE_AMD)) {
+  // Find the amd::Memory object for virtual ptr. vcmd.ptr() is vaddr.
+  amd::Memory* vaddr_mem_obj = amd::MemObjMap::FindVirtualMemObj(vcmd.ptr());
+  if (vaddr_mem_obj == nullptr || !(vaddr_mem_obj->getMemFlags() & CL_MEM_VA_RANGE_AMD)) {
     profilingEnd(vcmd);
     return;
   }
 
   // Get the amd::Memory object for the physical address
-  amd::Memory* pa = vcmd.memory();
+  amd::Memory* phys_mem_obj = vcmd.memory();
   hsa_status_t hsa_status = HSA_STATUS_SUCCESS;
 
   // If Physical address is not set, then it is map command. If set, it is unmap command.
-  if (pa != nullptr) {
+  if (phys_mem_obj != nullptr) {
     // Map the physical to virtual address the hsa api
     hsa_amd_vmem_alloc_handle_t opaque_hsa_handle;
-    opaque_hsa_handle.handle = pa->getUserData().hsa_handle;
-    if ((hsa_status = hsa_amd_vmem_map(va->getSvmPtr(), va->getSize(), va->getOffset(),
-                                       opaque_hsa_handle, 0)) == HSA_STATUS_SUCCESS) {
+    opaque_hsa_handle.handle = phys_mem_obj->getUserData().hsa_handle;
+    if ((hsa_status = hsa_amd_vmem_map(vaddr_mem_obj->getSvmPtr(), vcmd.size(),
+                        vaddr_mem_obj->getOffset(), opaque_hsa_handle, 0)) == HSA_STATUS_SUCCESS) {
       assert(amd::MemObjMap::FindMemObj(vcmd.ptr()) == nullptr);
       // Now that we have mapped physical addr to virtual addr, make an entry in the MemObjMap.
-      amd::MemObjMap::AddMemObj(vcmd.ptr(), vcmd.memory());
+      amd::MemObjMap::AddMemObj(vcmd.ptr(), vaddr_mem_obj);
+      vaddr_mem_obj->getUserData().phys_mem_obj = phys_mem_obj;
     } else {
       LogError("HSA Command: hsa_amd_vmem_map failed!");
     }
   } else {
     // Unmap the object, since the physical addr is set.
-    if ((hsa_status = hsa_amd_vmem_unmap(va->getSvmPtr(), va->getSize())) == HSA_STATUS_SUCCESS) {
+    if ((hsa_status = hsa_amd_vmem_unmap(vaddr_mem_obj->getSvmPtr(), vcmd.size()))
+                        == HSA_STATUS_SUCCESS) {
       // assert the va is mapped and needs to be removed
       assert(amd::MemObjMap::FindMemObj(vcmd.ptr()) != nullptr);
       amd::MemObjMap::RemoveMemObj(vcmd.ptr());
+      vaddr_mem_obj->getUserData().phys_mem_obj = nullptr;
     } else {
       LogError("HSA Command: hsa_amd_vmem_unmap failed");
     }
diff --git a/rocclr/platform/memory.hpp b/rocclr/platform/memory.hpp
index e89ea28791..afda26623c 100644
--- a/rocclr/platform/memory.hpp
+++ b/rocclr/platform/memory.hpp
@@ -142,13 +142,15 @@ class Memory : public amd::RuntimeObject {
  public:
   enum MemoryType {
     kSvmMemoryPtr = 0x1,
-    kArenaMemoryPtr = 0x100
+    kArenaMemoryPtr = 0x100,
+    kPhyMemHandlePtr = 0x101
   };
 
   struct UserData
   {
      int deviceId = 0;     //!< Device ID memory is allocated on
      void* data = nullptr; //!< Opaque user data from CL or HIP or etc.
+     amd::Memory* phys_mem_obj = nullptr; //<! Physical mem obj, only set on virtual mem
      uint64_t hsa_handle = 0; //!<Opaque hsa handle saved for Virtual memories
      unsigned int flags = 0; //!< HIP memory flags
      //! hipMallocPitch allocates buffer using width & height and returns pitch & device pointer.