SWDEV-240804 - Remove AMD_HMM_SUPPORT define

Use dynamic logic for HMM based on it's availability

Change-Id: I63751d94571d5af6eb57bef2cb0e071120bfa103


[ROCm/clr commit: 3f7a6b01e3]
Этот коммит содержится в:
German Andryeyev
2021-05-13 16:03:36 -04:00
родитель 6b6b4c0d57
Коммит 2b409b7e57
4 изменённых файлов: 251 добавлений и 233 удалений
-5
Просмотреть файл
@@ -52,11 +52,6 @@ target_include_directories(oclrocm
${ROCM_OCL_INCLUDES}
$<TARGET_PROPERTY:hsa-runtime64::hsa-runtime64,INTERFACE_INCLUDE_DIRECTORIES>)
option(BUILD_HMM "Build HMM support" ON)
if (BUILD_HMM)
target_compile_definitions(oclrocm
PRIVATE AMD_HMM_SUPPORT)
endif()
if(USE_COMGR_LIBRARY)
if(${BUILD_SHARED_LIBS})
+206 -188
Просмотреть файл
@@ -1547,7 +1547,6 @@ bool Device::populateOCLDeviceConstants() {
: 0;
}
#if AMD_HMM_SUPPORT
// Generic support for HMM interfaces
if (HSA_STATUS_SUCCESS != hsa_system_get_info(HSA_AMD_SYSTEM_INFO_SVM_SUPPORTED,
&info_.hmmSupported_)) {
@@ -1559,7 +1558,8 @@ bool Device::populateOCLDeviceConstants() {
&info_.hmmCpuMemoryAccessible_)) {
LogError("HSA_AMD_SYSTEM_INFO_SVM_ACCESSIBLE_BY_DEFAULT query failed.");
}
#endif // AMD_HMM_SUPPORT
LogPrintfInfo("HMM support: %d, xnack: %d\n",
info_.hmmSupported_, info_.hmmCpuMemoryAccessible_);
info_.globalCUMask_ = {};
@@ -2216,63 +2216,66 @@ bool Device::SetSvmAttributesInt(const void* dev_ptr, size_t count,
return false;
}
}
#if AMD_HMM_SUPPORT
std::vector<hsa_amd_svm_attribute_pair_t> attr;
if (first_alloc) {
attr.push_back({HSA_AMD_SVM_ATTRIB_GLOBAL_FLAG, HSA_AMD_SVM_GLOBAL_FLAG_COARSE_GRAINED});
}
if (info().hmmSupported_) {
std::vector<hsa_amd_svm_attribute_pair_t> attr;
if (first_alloc) {
attr.push_back({HSA_AMD_SVM_ATTRIB_GLOBAL_FLAG, HSA_AMD_SVM_GLOBAL_FLAG_COARSE_GRAINED});
}
switch (advice) {
case amd::MemoryAdvice::SetReadMostly:
attr.push_back({HSA_AMD_SVM_ATTRIB_READ_ONLY, true});
break;
case amd::MemoryAdvice::UnsetReadMostly:
attr.push_back({HSA_AMD_SVM_ATTRIB_READ_ONLY, false});
break;
case amd::MemoryAdvice::SetPreferredLocation:
if (use_cpu) {
attr.push_back({HSA_AMD_SVM_ATTRIB_PREFERRED_LOCATION, getCpuAgent().handle});
} else {
attr.push_back({HSA_AMD_SVM_ATTRIB_PREFERRED_LOCATION, getBackendDevice().handle});
}
break;
case amd::MemoryAdvice::UnsetPreferredLocation:
// @note: 0 may cause a failure on old runtimes
attr.push_back({HSA_AMD_SVM_ATTRIB_PREFERRED_LOCATION, 0});
break;
case amd::MemoryAdvice::SetAccessedBy:
if (use_cpu) {
attr.push_back({HSA_AMD_SVM_ATTRIB_AGENT_ACCESSIBLE_IN_PLACE, getCpuAgent().handle});
} else {
if (first_alloc) {
// Provide access to all possible devices.
//! @note: HMM should support automatic page table update with xnack enabled,
//! but currently it doesn't and runtime explicitly enables access from all devices
for (const auto dev : devices()) {
attr.push_back({HSA_AMD_SVM_ATTRIB_AGENT_ACCESSIBLE_IN_PLACE,
static_cast<Device*>(dev)->getBackendDevice().handle});
}
switch (advice) {
case amd::MemoryAdvice::SetReadMostly:
attr.push_back({HSA_AMD_SVM_ATTRIB_READ_ONLY, true});
break;
case amd::MemoryAdvice::UnsetReadMostly:
attr.push_back({HSA_AMD_SVM_ATTRIB_READ_ONLY, false});
break;
case amd::MemoryAdvice::SetPreferredLocation:
if (use_cpu) {
attr.push_back({HSA_AMD_SVM_ATTRIB_PREFERRED_LOCATION, getCpuAgent().handle});
} else {
attr.push_back({HSA_AMD_SVM_ATTRIB_AGENT_ACCESSIBLE_IN_PLACE, getBackendDevice().handle});
attr.push_back({HSA_AMD_SVM_ATTRIB_PREFERRED_LOCATION, getBackendDevice().handle});
}
}
break;
case amd::MemoryAdvice::UnsetPreferredLocation:
// @note: 0 may cause a failure on old runtimes
attr.push_back({HSA_AMD_SVM_ATTRIB_PREFERRED_LOCATION, 0});
break;
case amd::MemoryAdvice::SetAccessedBy:
if (use_cpu) {
attr.push_back({HSA_AMD_SVM_ATTRIB_AGENT_ACCESSIBLE_IN_PLACE, getCpuAgent().handle});
} else {
if (first_alloc) {
// Provide access to all possible devices.
//! @note: HMM should support automatic page table update with xnack enabled,
//! but currently it doesn't and runtime explicitly enables access from all devices
for (const auto dev : devices()) {
attr.push_back({HSA_AMD_SVM_ATTRIB_AGENT_ACCESSIBLE_IN_PLACE,
static_cast<Device*>(dev)->getBackendDevice().handle});
}
} else {
attr.push_back({HSA_AMD_SVM_ATTRIB_AGENT_ACCESSIBLE_IN_PLACE,
getBackendDevice().handle});
}
}
break;
case amd::MemoryAdvice::UnsetAccessedBy:
// @note: 0 may cause a failure on old runtimes
attr.push_back({HSA_AMD_SVM_ATTRIB_AGENT_ACCESSIBLE_IN_PLACE, 0});
break;
default:
return false;
break;
case amd::MemoryAdvice::UnsetAccessedBy:
// @note: 0 may cause a failure on old runtimes
attr.push_back({HSA_AMD_SVM_ATTRIB_AGENT_ACCESSIBLE_IN_PLACE, 0});
break;
default:
return false;
break;
}
}
hsa_status_t status = hsa_amd_svm_attributes_set(const_cast<void*>(dev_ptr), count,
attr.data(), attr.size());
if (status != HSA_STATUS_SUCCESS) {
LogPrintfError("hsa_amd_svm_attributes_set() failed. Advice: %d", advice);
return false;
hsa_status_t status = hsa_amd_svm_attributes_set(const_cast<void*>(dev_ptr), count,
attr.data(), attr.size());
if (status != HSA_STATUS_SUCCESS) {
LogPrintfError("hsa_amd_svm_attributes_set() failed. Advice: %d", advice);
return false;
}
} else {
LogWarning("hsa_amd_svm_attributes_set() is ignored, because no HMM support");
}
#endif // AMD_HMM_SUPPORT
return true;
}
@@ -2296,131 +2299,139 @@ bool Device::GetSvmAttributes(void** data, size_t* data_sizes, int* attributes,
return false;
}
}
#if AMD_HMM_SUPPORT
uint32_t accessed_by = 0;
std::vector<hsa_amd_svm_attribute_pair_t> attr;
if (info().hmmSupported_) {
uint32_t accessed_by = 0;
std::vector<hsa_amd_svm_attribute_pair_t> attr;
for (size_t i = 0; i < num_attributes; ++i) {
switch (attributes[i]) {
case amd::MemRangeAttribute::ReadMostly:
attr.push_back({HSA_AMD_SVM_ATTRIB_READ_ONLY, 0});
for (size_t i = 0; i < num_attributes; ++i) {
switch (attributes[i]) {
case amd::MemRangeAttribute::ReadMostly:
attr.push_back({HSA_AMD_SVM_ATTRIB_READ_ONLY, 0});
break;
case amd::MemRangeAttribute::PreferredLocation:
attr.push_back({HSA_AMD_SVM_ATTRIB_PREFERRED_LOCATION, 0});
break;
case amd::MemRangeAttribute::AccessedBy:
accessed_by = attr.size();
// Add all GPU devices into the query
for (const auto agent : getGpuAgents()) {
attr.push_back({HSA_AMD_SVM_ATTRIB_ACCESS_QUERY, agent.handle});
}
// Add CPU devices
for (const auto agent_info : getCpuAgents()) {
attr.push_back({HSA_AMD_SVM_ATTRIB_ACCESS_QUERY, agent_info.agent.handle});
}
accessed_by = attr.size() - accessed_by;
break;
case amd::MemRangeAttribute::LastPrefetchLocation:
attr.push_back({HSA_AMD_SVM_ATTRIB_PREFETCH_LOCATION, 0});
break;
default:
return false;
break;
case amd::MemRangeAttribute::PreferredLocation:
attr.push_back({HSA_AMD_SVM_ATTRIB_PREFERRED_LOCATION, 0});
break;
case amd::MemRangeAttribute::AccessedBy:
accessed_by = attr.size();
// Add all GPU devices into the query
for (const auto agent : getGpuAgents()) {
attr.push_back({HSA_AMD_SVM_ATTRIB_ACCESS_QUERY, agent.handle});
}
// Add CPU devices
for (const auto agent_info : getCpuAgents()) {
attr.push_back({HSA_AMD_SVM_ATTRIB_ACCESS_QUERY, agent_info.agent.handle});
}
accessed_by = attr.size() - accessed_by;
break;
case amd::MemRangeAttribute::LastPrefetchLocation:
attr.push_back({HSA_AMD_SVM_ATTRIB_PREFETCH_LOCATION, 0});
break;
default:
return false;
break;
}
}
}
hsa_status_t status = hsa_amd_svm_attributes_get(const_cast<void*>(dev_ptr), count,
attr.data(), attr.size());
if (status != HSA_STATUS_SUCCESS) {
LogError("hsa_amd_svm_attributes_get() failed");
hsa_status_t status = hsa_amd_svm_attributes_get(const_cast<void*>(dev_ptr), count,
attr.data(), attr.size());
if (status != HSA_STATUS_SUCCESS) {
LogError("hsa_amd_svm_attributes_get() failed");
return false;
}
uint32_t idx = 0;
uint32_t rocr_attr = 0;
for (size_t i = 0; i < num_attributes; ++i) {
const auto& it = attr[rocr_attr];
switch (attributes[i]) {
case amd::MemRangeAttribute::ReadMostly:
if (data_sizes[idx] != sizeof(uint32_t)) {
return false;
}
// Cast ROCr value into the hip format
*reinterpret_cast<uint32_t*>(data[idx]) =
(static_cast<uint32_t>(it.value) > 0) ? true : false;
break;
// The logic should be identical for the both queries
case amd::MemRangeAttribute::PreferredLocation:
case amd::MemRangeAttribute::LastPrefetchLocation:
if (data_sizes[idx] != sizeof(uint32_t)) {
return false;
}
*reinterpret_cast<int32_t*>(data[idx]) = static_cast<int32_t>(amd::InvalidDeviceId);
// Find device agent returned by ROCr
for (auto& device : devices()) {
if (static_cast<Device*>(device)->getBackendDevice().handle == it.value) {
*reinterpret_cast<uint32_t*>(data[idx]) = static_cast<uint32_t>(device->index());
}
}
// Find CPU agent returned by ROCr
for (auto& agent_info : getCpuAgents()) {
if (agent_info.agent.handle == it.value) {
*reinterpret_cast<int32_t*>(data[idx]) = static_cast<int32_t>(amd::CpuDeviceId);
}
}
break;
case amd::MemRangeAttribute::AccessedBy: {
uint32_t entry = 0;
uint32_t device_count = data_sizes[idx] / 4;
// Make sure it's multiple of 4
if (data_sizes[idx] % 4 != 0) {
return false;
}
for (uint32_t att = 0; att < accessed_by; ++att) {
const auto& it = attr[rocr_attr + att];
if (entry >= device_count) {
// The size of the array is less than the amount of available devices
break;
}
switch (it.attribute) {
case HSA_AMD_SVM_ATTRIB_AGENT_ACCESSIBLE:
case HSA_AMD_SVM_ATTRIB_AGENT_NO_ACCESS:
break;
case HSA_AMD_SVM_ATTRIB_AGENT_ACCESSIBLE_IN_PLACE:
reinterpret_cast<int32_t*>(data[idx])[entry] =
static_cast<int32_t>(amd::InvalidDeviceId);
// Find device agent returned by ROCr
for (auto& device : devices()) {
if (static_cast<Device*>(device)->getBackendDevice().handle == it.value) {
reinterpret_cast<uint32_t*>(data[idx])[entry] =
static_cast<uint32_t>(device->index());
}
}
// Find CPU agent returned by ROCr
for (auto& agent_info : getCpuAgents()) {
if (agent_info.agent.handle == it.value) {
reinterpret_cast<int32_t*>(data[idx])[entry] =
static_cast<int32_t>(amd::CpuDeviceId);
}
}
++entry;
break;
default:
LogWarning("Unexpected result from HSA_AMD_SVM_ATTRIB_ACCESS_QUERY");
break;
}
}
rocr_attr += accessed_by;
for (uint32_t idx = entry; idx < device_count; ++idx) {
reinterpret_cast<int32_t*>(data[idx])[idx] =
static_cast<int32_t>(amd::InvalidDeviceId);
}
break;
}
default:
return false;
break;
}
// Find the next location in the query
++idx;
}
} else {
LogError("GetSvmAttributes() failed, because no HMM support");
return false;
}
uint32_t idx = 0;
uint32_t rocr_attr = 0;
for (size_t i = 0; i < num_attributes; ++i) {
const auto& it = attr[rocr_attr];
switch (attributes[i]) {
case amd::MemRangeAttribute::ReadMostly:
if (data_sizes[idx] != sizeof(uint32_t)) {
return false;
}
// Cast ROCr value into the hip format
*reinterpret_cast<uint32_t*>(data[idx]) =
(static_cast<uint32_t>(it.value) > 0) ? true : false;
break;
// The logic should be identical for the both queries
case amd::MemRangeAttribute::PreferredLocation:
case amd::MemRangeAttribute::LastPrefetchLocation:
if (data_sizes[idx] != sizeof(uint32_t)) {
return false;
}
*reinterpret_cast<int32_t*>(data[idx]) = static_cast<int32_t>(amd::InvalidDeviceId);
// Find device agent returned by ROCr
for (auto& device : devices()) {
if (static_cast<Device*>(device)->getBackendDevice().handle == it.value) {
*reinterpret_cast<uint32_t*>(data[idx]) = static_cast<uint32_t>(device->index());
}
}
// Find CPU agent returned by ROCr
for (auto& agent_info : getCpuAgents()) {
if (agent_info.agent.handle == it.value) {
*reinterpret_cast<int32_t*>(data[idx]) = static_cast<int32_t>(amd::CpuDeviceId);
}
}
break;
case amd::MemRangeAttribute::AccessedBy: {
uint32_t entry = 0;
uint32_t device_count = data_sizes[idx] / 4;
// Make sure it's multiple of 4
if (data_sizes[idx] % 4 != 0) {
return false;
}
for (uint32_t att = 0; att < accessed_by; ++att) {
const auto& it = attr[rocr_attr + att];
if (entry >= device_count) {
// The size of the array is less than the amount of available devices
break;
}
switch (it.attribute) {
case HSA_AMD_SVM_ATTRIB_AGENT_ACCESSIBLE:
case HSA_AMD_SVM_ATTRIB_AGENT_NO_ACCESS:
break;
case HSA_AMD_SVM_ATTRIB_AGENT_ACCESSIBLE_IN_PLACE:
reinterpret_cast<int32_t*>(data[idx])[entry] = static_cast<int32_t>(amd::InvalidDeviceId);
// Find device agent returned by ROCr
for (auto& device : devices()) {
if (static_cast<Device*>(device)->getBackendDevice().handle == it.value) {
reinterpret_cast<uint32_t*>(data[idx])[entry] = static_cast<uint32_t>(device->index());
}
}
// Find CPU agent returned by ROCr
for (auto& agent_info : getCpuAgents()) {
if (agent_info.agent.handle == it.value) {
reinterpret_cast<int32_t*>(data[idx])[entry] = static_cast<int32_t>(amd::CpuDeviceId);
}
}
++entry;
break;
default:
LogWarning("Unexpected result from HSA_AMD_SVM_ATTRIB_ACCESS_QUERY");
break;
}
}
rocr_attr += accessed_by;
for (uint32_t idx = entry; idx < device_count; ++idx) {
reinterpret_cast<int32_t*>(data[idx])[idx] = static_cast<int32_t>(amd::InvalidDeviceId);
}
break;
}
default:
return false;
break;
}
// Find the next location in the query
++idx;
}
#endif // AMD_HMM_SUPPORT
return true;
}
@@ -2428,35 +2439,42 @@ bool Device::GetSvmAttributes(void** data, size_t* data_sizes, int* attributes,
bool Device::SvmAllocInit(void* memory, size_t size) const {
amd::MemoryAdvice advice = amd::MemoryAdvice::SetAccessedBy;
constexpr bool kFirstAlloc = true;
SetSvmAttributesInt(memory, size, advice, kFirstAlloc);
if (!SetSvmAttributesInt(memory, size, advice, kFirstAlloc)) {
return false;
}
if (settings().hmmFlags_ & Settings::Hmm::EnableSystemMemory) {
advice = amd::MemoryAdvice::UnsetPreferredLocation;
SetSvmAttributesInt(memory, size, advice);
if (!SetSvmAttributesInt(memory, size, advice)) {
return false;
}
}
if ((settings().hmmFlags_ & Settings::Hmm::EnableMallocPrefetch) == 0) {
return true;
}
#if AMD_HMM_SUPPORT
// Initialize signal for the barrier
hsa_signal_store_relaxed(prefetch_signal_, kInitSignalValueOne);
if (info().hmmSupported_) {
// Initialize signal for the barrier
hsa_signal_store_relaxed(prefetch_signal_, kInitSignalValueOne);
// Initiate a prefetch command which should force memory update in HMM
hsa_status_t status = hsa_amd_svm_prefetch_async(memory, size, getBackendDevice(),
0, nullptr, prefetch_signal_);
if (status != HSA_STATUS_SUCCESS) {
LogError("hsa_amd_svm_attributes_get() failed");
return false;
// Initiate a prefetch command which should force memory update in HMM
hsa_status_t status = hsa_amd_svm_prefetch_async(memory, size, getBackendDevice(),
0, nullptr, prefetch_signal_);
if (status != HSA_STATUS_SUCCESS) {
LogError("hsa_amd_svm_prefetch_async() failed");
return false;
}
// Wait for the prefetch
if (!WaitForSignal(prefetch_signal_)) {
LogError("Barrier packet submission failed");
return false;
}
} else {
LogWarning("Early prefetch failed, because no HMM support");
}
// Wait for the prefetch
if (!WaitForSignal(prefetch_signal_)) {
LogError("Barrier packet submission failed");
return false;
}
#endif // AMD_HMM_SUPPORT
return true;
}
+21 -18
Просмотреть файл
@@ -632,13 +632,13 @@ void Buffer::destroy() {
if (kind_ != MEMORY_KIND_PTRGIVEN) {
if (isFineGrain) {
if (memFlags & CL_MEM_ALLOC_HOST_PTR) {
#if AMD_HMM_SUPPORT
// AMD HMM path. Destroy system memory
amd::Os::uncommitMemory(deviceMemory_, size());
amd::Os::releaseMemory(deviceMemory_, size());
#else
dev().hostFree(deviceMemory_, size());;
#endif // AMD_HMM_SUPPORT
if (dev().info().hmmSupported_) {
// AMD HMM path. Destroy system memory
amd::Os::uncommitMemory(deviceMemory_, size());
amd::Os::releaseMemory(deviceMemory_, size());
} else {
dev().hostFree(deviceMemory_, size());
}
} else if (memFlags & ROCCLR_MEM_HSA_SIGNAL_MEMORY) {
if (HSA_STATUS_SUCCESS != hsa_signal_destroy(signal_)) {
ClPrint(amd::LOG_DEBUG, amd::LOG_MEM,
@@ -727,17 +727,20 @@ bool Buffer::create() {
if (owner()->getSvmPtr() == reinterpret_cast<void*>(1)) {
if (isFineGrain) {
if (memFlags & CL_MEM_ALLOC_HOST_PTR) {
#if AMD_HMM_SUPPORT
// AMD HMM path. Just allocate system memory and KFD will manage it
deviceMemory_ = amd::Os::reserveMemory(
0, size(), amd::Os::pageSize(), amd::Os::MEM_PROT_RW);
amd::Os::commitMemory(deviceMemory_, size(), amd::Os::MEM_PROT_RW);
// Currently HMM requires cirtain initial calls to mark sysmem allocation as
// GPU accessible or prefetch memory into GPU
dev().SvmAllocInit(deviceMemory_, size());
#else
deviceMemory_ = dev().hostAlloc(size(), 1, Device::MemorySegment::kNoAtomics);
#endif // AMD_HMM_SUPPORT
if (dev().info().hmmSupported_) {
// AMD HMM path. Just allocate system memory and KFD will manage it
deviceMemory_ = amd::Os::reserveMemory(
0, size(), amd::Os::pageSize(), amd::Os::MEM_PROT_RW);
amd::Os::commitMemory(deviceMemory_, size(), amd::Os::MEM_PROT_RW);
// Currently HMM requires cirtain initial calls to mark sysmem allocation as
// GPU accessible or prefetch memory into GPU
if (!dev().SvmAllocInit(deviceMemory_, size())) {
ClPrint(amd::LOG_ERROR, amd::LOG_MEM, "SVM init in ROCr failed!");
return false;
}
} else {
deviceMemory_ = dev().hostAlloc(size(), 1, Device::MemorySegment::kNoAtomics);
}
} else if (memFlags & CL_MEM_FOLLOW_USER_NUMA_POLICY) {
deviceMemory_ = dev().hostNumaAlloc(size(), 1, (memFlags & CL_MEM_SVM_ATOMICS) != 0);
} else if (memFlags & ROCCLR_MEM_HSA_SIGNAL_MEMORY) {
+24 -22
Просмотреть файл
@@ -1463,34 +1463,36 @@ void VirtualGPU::submitSvmFreeMemory(amd::SvmFreeMemoryCommand& cmd) {
void VirtualGPU::submitSvmPrefetchAsync(amd::SvmPrefetchAsyncCommand& cmd) {
// Make sure VirtualGPU has an exclusive access to the resources
amd::ScopedLock lock(execution());
#if AMD_HMM_SUPPORT
profilingBegin(cmd);
// Initialize signal for the barrier
hsa_signal_t* wait_event = Barriers().WaitingSignal(HwQueueEngine::Unknown);
hsa_signal_t active = Barriers().ActiveSignal(kInitSignalValueOne, timestamp_);
uint32_t num_wait_events = (wait_event == nullptr) ? 0 : 1;
// Find the requested agent for the transfer
hsa_agent_t agent = (cmd.cpu_access() ||
(dev().settings().hmmFlags_ & Settings::Hmm::EnableSystemMemory)) ?
dev().getCpuAgent() : gpu_device();
if (dev().info().hmmSupported_) {
// Initialize signal for the barrier
hsa_signal_t* wait_event = Barriers().WaitingSignal(HwQueueEngine::Unknown);
hsa_signal_t active = Barriers().ActiveSignal(kInitSignalValueOne, timestamp_);
uint32_t num_wait_events = (wait_event == nullptr) ? 0 : 1;
// Initiate a prefetch command
hsa_status_t status = hsa_amd_svm_prefetch_async(
const_cast<void*>(cmd.dev_ptr()), cmd.count(), agent, num_wait_events, wait_event, active);
// Find the requested agent for the transfer
hsa_agent_t agent = (cmd.cpu_access() ||
(dev().settings().hmmFlags_ & Settings::Hmm::EnableSystemMemory)) ?
dev().getCpuAgent() : gpu_device();
// Wait for the prefetch. Should skip wait, but may require extra tracking for kernel execution
if ((status != HSA_STATUS_SUCCESS) || !Barriers().WaitCurrent()) {
Barriers().ResetCurrentSignal();
LogError("hsa_amd_svm_prefetch_async failed");
cmd.setStatus(CL_INVALID_OPERATION);
// Initiate a prefetch command
hsa_status_t status = hsa_amd_svm_prefetch_async(
const_cast<void*>(cmd.dev_ptr()), cmd.count(), agent, num_wait_events, wait_event, active);
// Wait for the prefetch. Should skip wait, but may require extra tracking for kernel execution
if ((status != HSA_STATUS_SUCCESS) || !Barriers().WaitCurrent()) {
Barriers().ResetCurrentSignal();
LogError("hsa_amd_svm_prefetch_async failed");
cmd.setStatus(CL_INVALID_OPERATION);
}
// Add system scope, since the prefetch scope is unclear
addSystemScope();
} else {
LogWarning("hsa_amd_svm_prefetch_async is ignored, because no HMM support");
}
// Add system scope, since the prefetch scope is unclear
addSystemScope();
profilingEnd(cmd);
#endif // AMD_HMM_SUPPORT
}
// ================================================================================================