SWDEV-240804 - Remove AMD_HMM_SUPPORT define

Use dynamic logic for HMM based on it's availability Change-Id: I63751d94571d5af6eb57bef2cb0e071120bfa103 [ROCm/clr commit: 3f7a6b01e3]
2021-05-13 16:03:36 -04:00
@@ -52,11 +52,6 @@ target_include_directories(oclrocm
    ${ROCM_OCL_INCLUDES}
    $<TARGET_PROPERTY:hsa-runtime64::hsa-runtime64,INTERFACE_INCLUDE_DIRECTORIES>)

-option(BUILD_HMM "Build HMM support" ON)
-if (BUILD_HMM)
-  target_compile_definitions(oclrocm
-    PRIVATE AMD_HMM_SUPPORT)
-endif()

 if(USE_COMGR_LIBRARY)
  if(${BUILD_SHARED_LIBS})
@@ -1547,7 +1547,6 @@ bool Device::populateOCLDeviceConstants() {
        : 0;
  }

-#if AMD_HMM_SUPPORT
  // Generic support for HMM interfaces
  if (HSA_STATUS_SUCCESS != hsa_system_get_info(HSA_AMD_SYSTEM_INFO_SVM_SUPPORTED,
      &info_.hmmSupported_)) {
@@ -1559,7 +1558,8 @@ bool Device::populateOCLDeviceConstants() {
      &info_.hmmCpuMemoryAccessible_)) {
    LogError("HSA_AMD_SYSTEM_INFO_SVM_ACCESSIBLE_BY_DEFAULT query failed.");
  }
-#endif  // AMD_HMM_SUPPORT
+  LogPrintfInfo("HMM support: %d, xnack: %d\n",
+    info_.hmmSupported_, info_.hmmCpuMemoryAccessible_);

  info_.globalCUMask_ = {};

@@ -2216,63 +2216,66 @@ bool Device::SetSvmAttributesInt(const void* dev_ptr, size_t count,
      return false;
    }
  }
-#if AMD_HMM_SUPPORT
-  std::vector<hsa_amd_svm_attribute_pair_t> attr;
-  if (first_alloc) {
-    attr.push_back({HSA_AMD_SVM_ATTRIB_GLOBAL_FLAG, HSA_AMD_SVM_GLOBAL_FLAG_COARSE_GRAINED});
-  }
+  if (info().hmmSupported_) {
+    std::vector<hsa_amd_svm_attribute_pair_t> attr;
+    if (first_alloc) {
+      attr.push_back({HSA_AMD_SVM_ATTRIB_GLOBAL_FLAG, HSA_AMD_SVM_GLOBAL_FLAG_COARSE_GRAINED});
+    }

-  switch (advice) {
-    case amd::MemoryAdvice::SetReadMostly:
-      attr.push_back({HSA_AMD_SVM_ATTRIB_READ_ONLY, true});
-      break;
-    case amd::MemoryAdvice::UnsetReadMostly:
-      attr.push_back({HSA_AMD_SVM_ATTRIB_READ_ONLY, false});
-      break;
-    case amd::MemoryAdvice::SetPreferredLocation:
-      if (use_cpu) {
-        attr.push_back({HSA_AMD_SVM_ATTRIB_PREFERRED_LOCATION, getCpuAgent().handle});
-      } else {
-        attr.push_back({HSA_AMD_SVM_ATTRIB_PREFERRED_LOCATION, getBackendDevice().handle});
-      }
-      break;
-    case amd::MemoryAdvice::UnsetPreferredLocation:
-      // @note: 0 may cause a failure on old runtimes
-      attr.push_back({HSA_AMD_SVM_ATTRIB_PREFERRED_LOCATION, 0});
-      break;
-    case amd::MemoryAdvice::SetAccessedBy:
-      if (use_cpu) {
-        attr.push_back({HSA_AMD_SVM_ATTRIB_AGENT_ACCESSIBLE_IN_PLACE, getCpuAgent().handle});
-      } else {
-        if (first_alloc) {
-          // Provide access to all possible devices.
-          //! @note: HMM should support automatic page table update with xnack enabled,
-          //! but currently it doesn't and runtime explicitly enables access from all devices
-          for (const auto dev : devices()) {
-            attr.push_back({HSA_AMD_SVM_ATTRIB_AGENT_ACCESSIBLE_IN_PLACE,
-                static_cast<Device*>(dev)->getBackendDevice().handle});
-          }
+    switch (advice) {
+      case amd::MemoryAdvice::SetReadMostly:
+        attr.push_back({HSA_AMD_SVM_ATTRIB_READ_ONLY, true});
+        break;
+      case amd::MemoryAdvice::UnsetReadMostly:
+        attr.push_back({HSA_AMD_SVM_ATTRIB_READ_ONLY, false});
+        break;
+      case amd::MemoryAdvice::SetPreferredLocation:
+        if (use_cpu) {
+          attr.push_back({HSA_AMD_SVM_ATTRIB_PREFERRED_LOCATION, getCpuAgent().handle});
        } else {
-          attr.push_back({HSA_AMD_SVM_ATTRIB_AGENT_ACCESSIBLE_IN_PLACE, getBackendDevice().handle});
+          attr.push_back({HSA_AMD_SVM_ATTRIB_PREFERRED_LOCATION, getBackendDevice().handle});
        }
-      }
+        break;
+      case amd::MemoryAdvice::UnsetPreferredLocation:
+        // @note: 0 may cause a failure on old runtimes
+        attr.push_back({HSA_AMD_SVM_ATTRIB_PREFERRED_LOCATION, 0});
+        break;
+      case amd::MemoryAdvice::SetAccessedBy:
+        if (use_cpu) {
+          attr.push_back({HSA_AMD_SVM_ATTRIB_AGENT_ACCESSIBLE_IN_PLACE, getCpuAgent().handle});
+        } else {
+          if (first_alloc) {
+            // Provide access to all possible devices.
+            //! @note: HMM should support automatic page table update with xnack enabled,
+            //! but currently it doesn't and runtime explicitly enables access from all devices
+            for (const auto dev : devices()) {
+              attr.push_back({HSA_AMD_SVM_ATTRIB_AGENT_ACCESSIBLE_IN_PLACE,
+                  static_cast<Device*>(dev)->getBackendDevice().handle});
+            }
+          } else {
+            attr.push_back({HSA_AMD_SVM_ATTRIB_AGENT_ACCESSIBLE_IN_PLACE,
+                getBackendDevice().handle});
+          }
+        }
+        break;
+      case amd::MemoryAdvice::UnsetAccessedBy:
+        // @note: 0 may cause a failure on old runtimes
+        attr.push_back({HSA_AMD_SVM_ATTRIB_AGENT_ACCESSIBLE_IN_PLACE, 0});
+        break;
+      default:
+        return false;
      break;
-    case amd::MemoryAdvice::UnsetAccessedBy:
-      // @note: 0 may cause a failure on old runtimes
-      attr.push_back({HSA_AMD_SVM_ATTRIB_AGENT_ACCESSIBLE_IN_PLACE, 0});
-      break;
-    default:
-      return false;
-    break;
-  }
+    }

-  hsa_status_t status = hsa_amd_svm_attributes_set(const_cast<void*>(dev_ptr), count,
-                                                   attr.data(), attr.size());
-  if (status != HSA_STATUS_SUCCESS) {
-    LogPrintfError("hsa_amd_svm_attributes_set() failed. Advice: %d", advice);
-    return false;
+    hsa_status_t status = hsa_amd_svm_attributes_set(const_cast<void*>(dev_ptr), count,
+                                                    attr.data(), attr.size());
+    if (status != HSA_STATUS_SUCCESS) {
+      LogPrintfError("hsa_amd_svm_attributes_set() failed. Advice: %d", advice);
+      return false;
+    }
+  } else {
+    LogWarning("hsa_amd_svm_attributes_set() is ignored, because no HMM support");
  }
-#endif // AMD_HMM_SUPPORT
  return true;
 }

@@ -2296,131 +2299,139 @@ bool Device::GetSvmAttributes(void** data, size_t* data_sizes, int* attributes,
      return false;
    }
  }
-#if AMD_HMM_SUPPORT
-  uint32_t accessed_by = 0;
-  std::vector<hsa_amd_svm_attribute_pair_t> attr;
+  if (info().hmmSupported_) {
+    uint32_t accessed_by = 0;
+    std::vector<hsa_amd_svm_attribute_pair_t> attr;

-  for (size_t i = 0; i < num_attributes; ++i) {
-    switch (attributes[i]) {
-      case amd::MemRangeAttribute::ReadMostly:
-        attr.push_back({HSA_AMD_SVM_ATTRIB_READ_ONLY, 0});
+    for (size_t i = 0; i < num_attributes; ++i) {
+      switch (attributes[i]) {
+        case amd::MemRangeAttribute::ReadMostly:
+          attr.push_back({HSA_AMD_SVM_ATTRIB_READ_ONLY, 0});
+          break;
+        case amd::MemRangeAttribute::PreferredLocation:
+          attr.push_back({HSA_AMD_SVM_ATTRIB_PREFERRED_LOCATION, 0});
+          break;
+        case amd::MemRangeAttribute::AccessedBy:
+          accessed_by = attr.size();
+          // Add all GPU devices into the query
+          for (const auto agent : getGpuAgents()) {
+            attr.push_back({HSA_AMD_SVM_ATTRIB_ACCESS_QUERY, agent.handle});
+          }
+          // Add CPU devices
+          for (const auto agent_info : getCpuAgents()) {
+            attr.push_back({HSA_AMD_SVM_ATTRIB_ACCESS_QUERY, agent_info.agent.handle});
+          }
+          accessed_by = attr.size() - accessed_by;
+          break;
+        case amd::MemRangeAttribute::LastPrefetchLocation:
+          attr.push_back({HSA_AMD_SVM_ATTRIB_PREFETCH_LOCATION, 0});
+          break;
+        default:
+          return false;
        break;
-      case amd::MemRangeAttribute::PreferredLocation:
-        attr.push_back({HSA_AMD_SVM_ATTRIB_PREFERRED_LOCATION, 0});
-        break;
-      case amd::MemRangeAttribute::AccessedBy:
-        accessed_by = attr.size();
-        // Add all GPU devices into the query
-        for (const auto agent : getGpuAgents()) {
-          attr.push_back({HSA_AMD_SVM_ATTRIB_ACCESS_QUERY, agent.handle});
-        }
-        // Add CPU devices
-        for (const auto agent_info : getCpuAgents()) {
-          attr.push_back({HSA_AMD_SVM_ATTRIB_ACCESS_QUERY, agent_info.agent.handle});
-        }
-        accessed_by = attr.size() - accessed_by;
-        break;
-      case amd::MemRangeAttribute::LastPrefetchLocation:
-        attr.push_back({HSA_AMD_SVM_ATTRIB_PREFETCH_LOCATION, 0});
-        break;
-      default:
-        return false;
-      break;
+      }
    }
-  }

-  hsa_status_t status = hsa_amd_svm_attributes_get(const_cast<void*>(dev_ptr), count,
-                                                   attr.data(), attr.size());
-  if (status != HSA_STATUS_SUCCESS) {
-    LogError("hsa_amd_svm_attributes_get() failed");
+    hsa_status_t status = hsa_amd_svm_attributes_get(const_cast<void*>(dev_ptr), count,
+                                                    attr.data(), attr.size());
+    if (status != HSA_STATUS_SUCCESS) {
+      LogError("hsa_amd_svm_attributes_get() failed");
+      return false;
+    }
+
+    uint32_t idx = 0;
+    uint32_t rocr_attr = 0;
+    for (size_t i = 0; i < num_attributes; ++i) {
+      const auto& it = attr[rocr_attr];
+      switch (attributes[i]) {
+        case amd::MemRangeAttribute::ReadMostly:
+          if (data_sizes[idx] != sizeof(uint32_t)) {
+            return false;
+          }
+          // Cast ROCr value into the hip format
+          *reinterpret_cast<uint32_t*>(data[idx]) =
+              (static_cast<uint32_t>(it.value) > 0) ? true : false;
+          break;
+        // The logic should be identical for the both queries
+        case amd::MemRangeAttribute::PreferredLocation:
+        case amd::MemRangeAttribute::LastPrefetchLocation:
+          if (data_sizes[idx] != sizeof(uint32_t)) {
+            return false;
+          }
+          *reinterpret_cast<int32_t*>(data[idx]) = static_cast<int32_t>(amd::InvalidDeviceId);
+          // Find device agent returned by ROCr
+          for (auto& device : devices()) {
+            if (static_cast<Device*>(device)->getBackendDevice().handle == it.value) {
+              *reinterpret_cast<uint32_t*>(data[idx]) = static_cast<uint32_t>(device->index());
+            }
+          }
+          // Find CPU agent returned by ROCr
+          for (auto& agent_info : getCpuAgents()) {
+            if (agent_info.agent.handle == it.value) {
+              *reinterpret_cast<int32_t*>(data[idx]) = static_cast<int32_t>(amd::CpuDeviceId);
+            }
+          }
+          break;
+        case amd::MemRangeAttribute::AccessedBy: {
+          uint32_t entry = 0;
+          uint32_t device_count = data_sizes[idx] / 4;
+          // Make sure it's multiple of 4
+          if (data_sizes[idx] % 4 != 0) {
+            return false;
+          }
+          for (uint32_t att = 0; att < accessed_by; ++att) {
+            const auto& it = attr[rocr_attr + att];
+            if (entry >= device_count) {
+              // The size of the array is less than the amount of available devices
+              break;
+            }
+            switch (it.attribute) {
+              case HSA_AMD_SVM_ATTRIB_AGENT_ACCESSIBLE:
+              case HSA_AMD_SVM_ATTRIB_AGENT_NO_ACCESS:
+                break;
+              case HSA_AMD_SVM_ATTRIB_AGENT_ACCESSIBLE_IN_PLACE:
+                reinterpret_cast<int32_t*>(data[idx])[entry] =
+                  static_cast<int32_t>(amd::InvalidDeviceId);
+                // Find device agent returned by ROCr
+                for (auto& device : devices()) {
+                  if (static_cast<Device*>(device)->getBackendDevice().handle == it.value) {
+                    reinterpret_cast<uint32_t*>(data[idx])[entry] =
+                      static_cast<uint32_t>(device->index());
+                  }
+                }
+                // Find CPU agent returned by ROCr
+                for (auto& agent_info : getCpuAgents()) {
+                  if (agent_info.agent.handle == it.value) {
+                    reinterpret_cast<int32_t*>(data[idx])[entry] =
+                      static_cast<int32_t>(amd::CpuDeviceId);
+                  }
+                }
+                ++entry;
+                break;
+              default:
+                LogWarning("Unexpected result from HSA_AMD_SVM_ATTRIB_ACCESS_QUERY");
+                break;
+            }
+          }
+          rocr_attr += accessed_by;
+          for (uint32_t idx = entry; idx < device_count; ++idx) {
+            reinterpret_cast<int32_t*>(data[idx])[idx] =
+              static_cast<int32_t>(amd::InvalidDeviceId);
+          }
+          break;
+        }
+        default:
+          return false;
+        break;
+      }
+      // Find the next location in the query
+      ++idx;
+    }
+  } else {
+    LogError("GetSvmAttributes() failed, because no HMM support");
    return false;
  }

-  uint32_t idx = 0;
-  uint32_t rocr_attr = 0;
-  for (size_t i = 0; i < num_attributes; ++i) {
-    const auto& it = attr[rocr_attr];
-    switch (attributes[i]) {
-      case amd::MemRangeAttribute::ReadMostly:
-        if (data_sizes[idx] != sizeof(uint32_t)) {
-          return false;
-        }
-        // Cast ROCr value into the hip format
-        *reinterpret_cast<uint32_t*>(data[idx]) =
-            (static_cast<uint32_t>(it.value) > 0) ? true : false;
-        break;
-      // The logic should be identical for the both queries
-      case amd::MemRangeAttribute::PreferredLocation:
-      case amd::MemRangeAttribute::LastPrefetchLocation:
-        if (data_sizes[idx] != sizeof(uint32_t)) {
-          return false;
-        }
-        *reinterpret_cast<int32_t*>(data[idx]) = static_cast<int32_t>(amd::InvalidDeviceId);
-        // Find device agent returned by ROCr
-        for (auto& device : devices()) {
-          if (static_cast<Device*>(device)->getBackendDevice().handle == it.value) {
-            *reinterpret_cast<uint32_t*>(data[idx]) = static_cast<uint32_t>(device->index());
-          }
-        }
-        // Find CPU agent returned by ROCr
-        for (auto& agent_info : getCpuAgents()) {
-          if (agent_info.agent.handle == it.value) {
-            *reinterpret_cast<int32_t*>(data[idx]) = static_cast<int32_t>(amd::CpuDeviceId);
-          }
-        }
-        break;
-      case amd::MemRangeAttribute::AccessedBy: {
-        uint32_t entry = 0;
-        uint32_t device_count = data_sizes[idx] / 4;
-        // Make sure it's multiple of 4
-        if (data_sizes[idx] % 4 != 0) {
-          return false;
-        }
-        for (uint32_t att = 0; att < accessed_by; ++att) {
-          const auto& it = attr[rocr_attr + att];
-          if (entry >= device_count) {
-            // The size of the array is less than the amount of available devices
-            break;
-          }
-          switch (it.attribute) {
-            case HSA_AMD_SVM_ATTRIB_AGENT_ACCESSIBLE:
-            case HSA_AMD_SVM_ATTRIB_AGENT_NO_ACCESS:
-              break;
-            case HSA_AMD_SVM_ATTRIB_AGENT_ACCESSIBLE_IN_PLACE:
-              reinterpret_cast<int32_t*>(data[idx])[entry] = static_cast<int32_t>(amd::InvalidDeviceId);
-              // Find device agent returned by ROCr
-              for (auto& device : devices()) {
-                if (static_cast<Device*>(device)->getBackendDevice().handle == it.value) {
-                  reinterpret_cast<uint32_t*>(data[idx])[entry] = static_cast<uint32_t>(device->index());
-                }
-              }
-              // Find CPU agent returned by ROCr
-              for (auto& agent_info : getCpuAgents()) {
-                if (agent_info.agent.handle == it.value) {
-                  reinterpret_cast<int32_t*>(data[idx])[entry] = static_cast<int32_t>(amd::CpuDeviceId);
-                }
-              }
-              ++entry;
-              break;
-            default:
-              LogWarning("Unexpected result from HSA_AMD_SVM_ATTRIB_ACCESS_QUERY");
-              break;
-          }
-        }
-        rocr_attr += accessed_by;
-        for (uint32_t idx = entry; idx < device_count; ++idx) {
-          reinterpret_cast<int32_t*>(data[idx])[idx] = static_cast<int32_t>(amd::InvalidDeviceId);
-        }
-        break;
-      }
-      default:
-        return false;
-      break;
-    }
-    // Find the next location in the query
-    ++idx;
-  }
-#endif // AMD_HMM_SUPPORT
  return true;
 }

@@ -2428,35 +2439,42 @@ bool Device::GetSvmAttributes(void** data, size_t* data_sizes, int* attributes,
 bool Device::SvmAllocInit(void* memory, size_t size) const {
  amd::MemoryAdvice advice = amd::MemoryAdvice::SetAccessedBy;
  constexpr bool kFirstAlloc = true;
-  SetSvmAttributesInt(memory, size, advice, kFirstAlloc);
+  if (!SetSvmAttributesInt(memory, size, advice, kFirstAlloc)) {
+    return false;
+  }

  if (settings().hmmFlags_ & Settings::Hmm::EnableSystemMemory) {
    advice = amd::MemoryAdvice::UnsetPreferredLocation;
-    SetSvmAttributesInt(memory, size, advice);
+    if (!SetSvmAttributesInt(memory, size, advice)) {
+      return false;
+    }
  }

  if ((settings().hmmFlags_ & Settings::Hmm::EnableMallocPrefetch) == 0) {
    return true;
  }

-#if AMD_HMM_SUPPORT
-  // Initialize signal for the barrier
-  hsa_signal_store_relaxed(prefetch_signal_, kInitSignalValueOne);
+  if (info().hmmSupported_) {
+    // Initialize signal for the barrier
+    hsa_signal_store_relaxed(prefetch_signal_, kInitSignalValueOne);

-  // Initiate a prefetch command which should force memory update in HMM
-  hsa_status_t status = hsa_amd_svm_prefetch_async(memory, size, getBackendDevice(),
-                                                   0, nullptr, prefetch_signal_);
-  if (status != HSA_STATUS_SUCCESS) {
-    LogError("hsa_amd_svm_attributes_get() failed");
-    return false;
+    // Initiate a prefetch command which should force memory update in HMM
+    hsa_status_t status = hsa_amd_svm_prefetch_async(memory, size, getBackendDevice(),
+                                                     0, nullptr, prefetch_signal_);
+    if (status != HSA_STATUS_SUCCESS) {
+      LogError("hsa_amd_svm_prefetch_async() failed");
+      return false;
+    }
+
+    // Wait for the prefetch
+    if (!WaitForSignal(prefetch_signal_)) {
+      LogError("Barrier packet submission failed");
+      return false;
+    }
+  } else {
+    LogWarning("Early prefetch failed, because no HMM support");
  }

-  // Wait for the prefetch
-  if (!WaitForSignal(prefetch_signal_)) {
-    LogError("Barrier packet submission failed");
-    return false;
-  }
-#endif // AMD_HMM_SUPPORT
  return true;
 }

@@ -632,13 +632,13 @@ void Buffer::destroy() {
    if (kind_ != MEMORY_KIND_PTRGIVEN) {
      if (isFineGrain) {
        if (memFlags & CL_MEM_ALLOC_HOST_PTR) {
-  #if AMD_HMM_SUPPORT
-          // AMD HMM path. Destroy system memory
-          amd::Os::uncommitMemory(deviceMemory_, size());
-          amd::Os::releaseMemory(deviceMemory_, size());
-  #else
-          dev().hostFree(deviceMemory_, size());;
-  #endif // AMD_HMM_SUPPORT
+          if (dev().info().hmmSupported_) {
+            // AMD HMM path. Destroy system memory
+            amd::Os::uncommitMemory(deviceMemory_, size());
+            amd::Os::releaseMemory(deviceMemory_, size());
+          } else {
+            dev().hostFree(deviceMemory_, size());
+          }
        } else if (memFlags & ROCCLR_MEM_HSA_SIGNAL_MEMORY) {
          if (HSA_STATUS_SUCCESS != hsa_signal_destroy(signal_)) {
            ClPrint(amd::LOG_DEBUG, amd::LOG_MEM,
@@ -727,17 +727,20 @@ bool Buffer::create() {
    if (owner()->getSvmPtr() == reinterpret_cast<void*>(1)) {
      if (isFineGrain) {
        if (memFlags & CL_MEM_ALLOC_HOST_PTR) {
-#if AMD_HMM_SUPPORT
-          // AMD HMM path. Just allocate system memory and KFD will manage it
-          deviceMemory_ =  amd::Os::reserveMemory(
-              0, size(), amd::Os::pageSize(), amd::Os::MEM_PROT_RW);
-          amd::Os::commitMemory(deviceMemory_, size(), amd::Os::MEM_PROT_RW);
-          // Currently HMM requires cirtain initial calls to mark sysmem allocation as
-          // GPU accessible or prefetch memory into GPU
-          dev().SvmAllocInit(deviceMemory_, size());
-#else
-          deviceMemory_ = dev().hostAlloc(size(), 1, Device::MemorySegment::kNoAtomics);
-#endif // AMD_HMM_SUPPORT
+          if (dev().info().hmmSupported_) {
+            // AMD HMM path. Just allocate system memory and KFD will manage it
+            deviceMemory_ =  amd::Os::reserveMemory(
+                0, size(), amd::Os::pageSize(), amd::Os::MEM_PROT_RW);
+            amd::Os::commitMemory(deviceMemory_, size(), amd::Os::MEM_PROT_RW);
+            // Currently HMM requires cirtain initial calls to mark sysmem allocation as
+            // GPU accessible or prefetch memory into GPU
+            if (!dev().SvmAllocInit(deviceMemory_, size())) {
+              ClPrint(amd::LOG_ERROR, amd::LOG_MEM, "SVM init in ROCr failed!");
+              return false;
+            }
+          } else {
+            deviceMemory_ = dev().hostAlloc(size(), 1, Device::MemorySegment::kNoAtomics);
+          }
        } else if (memFlags & CL_MEM_FOLLOW_USER_NUMA_POLICY) {
          deviceMemory_ = dev().hostNumaAlloc(size(), 1, (memFlags & CL_MEM_SVM_ATOMICS) != 0);
        } else if (memFlags & ROCCLR_MEM_HSA_SIGNAL_MEMORY) {
@@ -1463,34 +1463,36 @@ void VirtualGPU::submitSvmFreeMemory(amd::SvmFreeMemoryCommand& cmd) {
 void VirtualGPU::submitSvmPrefetchAsync(amd::SvmPrefetchAsyncCommand& cmd) {
  // Make sure VirtualGPU has an exclusive access to the resources
  amd::ScopedLock lock(execution());
-#if AMD_HMM_SUPPORT
  profilingBegin(cmd);
-  // Initialize signal for the barrier
-  hsa_signal_t* wait_event = Barriers().WaitingSignal(HwQueueEngine::Unknown);
-  hsa_signal_t      active = Barriers().ActiveSignal(kInitSignalValueOne, timestamp_);
-  uint32_t num_wait_events = (wait_event == nullptr) ? 0 : 1;

-  // Find the requested agent for the transfer
-  hsa_agent_t agent = (cmd.cpu_access() ||
-      (dev().settings().hmmFlags_ & Settings::Hmm::EnableSystemMemory)) ?
-      dev().getCpuAgent() : gpu_device();
+  if (dev().info().hmmSupported_) {
+    // Initialize signal for the barrier
+    hsa_signal_t* wait_event = Barriers().WaitingSignal(HwQueueEngine::Unknown);
+    hsa_signal_t      active = Barriers().ActiveSignal(kInitSignalValueOne, timestamp_);
+    uint32_t num_wait_events = (wait_event == nullptr) ? 0 : 1;

-  // Initiate a prefetch command
-  hsa_status_t status = hsa_amd_svm_prefetch_async(
-      const_cast<void*>(cmd.dev_ptr()), cmd.count(), agent, num_wait_events, wait_event, active);
+    // Find the requested agent for the transfer
+    hsa_agent_t agent = (cmd.cpu_access() ||
+        (dev().settings().hmmFlags_ & Settings::Hmm::EnableSystemMemory)) ?
+        dev().getCpuAgent() : gpu_device();

-  // Wait for the prefetch. Should skip wait, but may require extra tracking for kernel execution
-  if ((status != HSA_STATUS_SUCCESS) || !Barriers().WaitCurrent()) {
-    Barriers().ResetCurrentSignal();
-    LogError("hsa_amd_svm_prefetch_async failed");
-    cmd.setStatus(CL_INVALID_OPERATION);
+    // Initiate a prefetch command
+    hsa_status_t status = hsa_amd_svm_prefetch_async(
+        const_cast<void*>(cmd.dev_ptr()), cmd.count(), agent, num_wait_events, wait_event, active);
+
+    // Wait for the prefetch. Should skip wait, but may require extra tracking for kernel execution
+    if ((status != HSA_STATUS_SUCCESS) || !Barriers().WaitCurrent()) {
+      Barriers().ResetCurrentSignal();
+      LogError("hsa_amd_svm_prefetch_async failed");
+      cmd.setStatus(CL_INVALID_OPERATION);
+    }
+
+    // Add system scope, since the prefetch scope is unclear
+    addSystemScope();
+  } else {
+    LogWarning("hsa_amd_svm_prefetch_async is ignored, because no HMM support");
  }
-
-  // Add system scope, since the prefetch scope is unclear
-  addSystemScope();
-
  profilingEnd(cmd);
-#endif // AMD_HMM_SUPPORT
 }

 // ================================================================================================