SWDEV-240804 - Remove AMD_HMM_SUPPORT define
Use dynamic logic for HMM based on it's availability
Change-Id: I63751d94571d5af6eb57bef2cb0e071120bfa103
[ROCm/clr commit: 3f7a6b01e3]
Этот коммит содержится в:
@@ -52,11 +52,6 @@ target_include_directories(oclrocm
|
||||
${ROCM_OCL_INCLUDES}
|
||||
$<TARGET_PROPERTY:hsa-runtime64::hsa-runtime64,INTERFACE_INCLUDE_DIRECTORIES>)
|
||||
|
||||
option(BUILD_HMM "Build HMM support" ON)
|
||||
if (BUILD_HMM)
|
||||
target_compile_definitions(oclrocm
|
||||
PRIVATE AMD_HMM_SUPPORT)
|
||||
endif()
|
||||
|
||||
if(USE_COMGR_LIBRARY)
|
||||
if(${BUILD_SHARED_LIBS})
|
||||
|
||||
@@ -1547,7 +1547,6 @@ bool Device::populateOCLDeviceConstants() {
|
||||
: 0;
|
||||
}
|
||||
|
||||
#if AMD_HMM_SUPPORT
|
||||
// Generic support for HMM interfaces
|
||||
if (HSA_STATUS_SUCCESS != hsa_system_get_info(HSA_AMD_SYSTEM_INFO_SVM_SUPPORTED,
|
||||
&info_.hmmSupported_)) {
|
||||
@@ -1559,7 +1558,8 @@ bool Device::populateOCLDeviceConstants() {
|
||||
&info_.hmmCpuMemoryAccessible_)) {
|
||||
LogError("HSA_AMD_SYSTEM_INFO_SVM_ACCESSIBLE_BY_DEFAULT query failed.");
|
||||
}
|
||||
#endif // AMD_HMM_SUPPORT
|
||||
LogPrintfInfo("HMM support: %d, xnack: %d\n",
|
||||
info_.hmmSupported_, info_.hmmCpuMemoryAccessible_);
|
||||
|
||||
info_.globalCUMask_ = {};
|
||||
|
||||
@@ -2216,63 +2216,66 @@ bool Device::SetSvmAttributesInt(const void* dev_ptr, size_t count,
|
||||
return false;
|
||||
}
|
||||
}
|
||||
#if AMD_HMM_SUPPORT
|
||||
std::vector<hsa_amd_svm_attribute_pair_t> attr;
|
||||
if (first_alloc) {
|
||||
attr.push_back({HSA_AMD_SVM_ATTRIB_GLOBAL_FLAG, HSA_AMD_SVM_GLOBAL_FLAG_COARSE_GRAINED});
|
||||
}
|
||||
if (info().hmmSupported_) {
|
||||
std::vector<hsa_amd_svm_attribute_pair_t> attr;
|
||||
if (first_alloc) {
|
||||
attr.push_back({HSA_AMD_SVM_ATTRIB_GLOBAL_FLAG, HSA_AMD_SVM_GLOBAL_FLAG_COARSE_GRAINED});
|
||||
}
|
||||
|
||||
switch (advice) {
|
||||
case amd::MemoryAdvice::SetReadMostly:
|
||||
attr.push_back({HSA_AMD_SVM_ATTRIB_READ_ONLY, true});
|
||||
break;
|
||||
case amd::MemoryAdvice::UnsetReadMostly:
|
||||
attr.push_back({HSA_AMD_SVM_ATTRIB_READ_ONLY, false});
|
||||
break;
|
||||
case amd::MemoryAdvice::SetPreferredLocation:
|
||||
if (use_cpu) {
|
||||
attr.push_back({HSA_AMD_SVM_ATTRIB_PREFERRED_LOCATION, getCpuAgent().handle});
|
||||
} else {
|
||||
attr.push_back({HSA_AMD_SVM_ATTRIB_PREFERRED_LOCATION, getBackendDevice().handle});
|
||||
}
|
||||
break;
|
||||
case amd::MemoryAdvice::UnsetPreferredLocation:
|
||||
// @note: 0 may cause a failure on old runtimes
|
||||
attr.push_back({HSA_AMD_SVM_ATTRIB_PREFERRED_LOCATION, 0});
|
||||
break;
|
||||
case amd::MemoryAdvice::SetAccessedBy:
|
||||
if (use_cpu) {
|
||||
attr.push_back({HSA_AMD_SVM_ATTRIB_AGENT_ACCESSIBLE_IN_PLACE, getCpuAgent().handle});
|
||||
} else {
|
||||
if (first_alloc) {
|
||||
// Provide access to all possible devices.
|
||||
//! @note: HMM should support automatic page table update with xnack enabled,
|
||||
//! but currently it doesn't and runtime explicitly enables access from all devices
|
||||
for (const auto dev : devices()) {
|
||||
attr.push_back({HSA_AMD_SVM_ATTRIB_AGENT_ACCESSIBLE_IN_PLACE,
|
||||
static_cast<Device*>(dev)->getBackendDevice().handle});
|
||||
}
|
||||
switch (advice) {
|
||||
case amd::MemoryAdvice::SetReadMostly:
|
||||
attr.push_back({HSA_AMD_SVM_ATTRIB_READ_ONLY, true});
|
||||
break;
|
||||
case amd::MemoryAdvice::UnsetReadMostly:
|
||||
attr.push_back({HSA_AMD_SVM_ATTRIB_READ_ONLY, false});
|
||||
break;
|
||||
case amd::MemoryAdvice::SetPreferredLocation:
|
||||
if (use_cpu) {
|
||||
attr.push_back({HSA_AMD_SVM_ATTRIB_PREFERRED_LOCATION, getCpuAgent().handle});
|
||||
} else {
|
||||
attr.push_back({HSA_AMD_SVM_ATTRIB_AGENT_ACCESSIBLE_IN_PLACE, getBackendDevice().handle});
|
||||
attr.push_back({HSA_AMD_SVM_ATTRIB_PREFERRED_LOCATION, getBackendDevice().handle});
|
||||
}
|
||||
}
|
||||
break;
|
||||
case amd::MemoryAdvice::UnsetPreferredLocation:
|
||||
// @note: 0 may cause a failure on old runtimes
|
||||
attr.push_back({HSA_AMD_SVM_ATTRIB_PREFERRED_LOCATION, 0});
|
||||
break;
|
||||
case amd::MemoryAdvice::SetAccessedBy:
|
||||
if (use_cpu) {
|
||||
attr.push_back({HSA_AMD_SVM_ATTRIB_AGENT_ACCESSIBLE_IN_PLACE, getCpuAgent().handle});
|
||||
} else {
|
||||
if (first_alloc) {
|
||||
// Provide access to all possible devices.
|
||||
//! @note: HMM should support automatic page table update with xnack enabled,
|
||||
//! but currently it doesn't and runtime explicitly enables access from all devices
|
||||
for (const auto dev : devices()) {
|
||||
attr.push_back({HSA_AMD_SVM_ATTRIB_AGENT_ACCESSIBLE_IN_PLACE,
|
||||
static_cast<Device*>(dev)->getBackendDevice().handle});
|
||||
}
|
||||
} else {
|
||||
attr.push_back({HSA_AMD_SVM_ATTRIB_AGENT_ACCESSIBLE_IN_PLACE,
|
||||
getBackendDevice().handle});
|
||||
}
|
||||
}
|
||||
break;
|
||||
case amd::MemoryAdvice::UnsetAccessedBy:
|
||||
// @note: 0 may cause a failure on old runtimes
|
||||
attr.push_back({HSA_AMD_SVM_ATTRIB_AGENT_ACCESSIBLE_IN_PLACE, 0});
|
||||
break;
|
||||
default:
|
||||
return false;
|
||||
break;
|
||||
case amd::MemoryAdvice::UnsetAccessedBy:
|
||||
// @note: 0 may cause a failure on old runtimes
|
||||
attr.push_back({HSA_AMD_SVM_ATTRIB_AGENT_ACCESSIBLE_IN_PLACE, 0});
|
||||
break;
|
||||
default:
|
||||
return false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
hsa_status_t status = hsa_amd_svm_attributes_set(const_cast<void*>(dev_ptr), count,
|
||||
attr.data(), attr.size());
|
||||
if (status != HSA_STATUS_SUCCESS) {
|
||||
LogPrintfError("hsa_amd_svm_attributes_set() failed. Advice: %d", advice);
|
||||
return false;
|
||||
hsa_status_t status = hsa_amd_svm_attributes_set(const_cast<void*>(dev_ptr), count,
|
||||
attr.data(), attr.size());
|
||||
if (status != HSA_STATUS_SUCCESS) {
|
||||
LogPrintfError("hsa_amd_svm_attributes_set() failed. Advice: %d", advice);
|
||||
return false;
|
||||
}
|
||||
} else {
|
||||
LogWarning("hsa_amd_svm_attributes_set() is ignored, because no HMM support");
|
||||
}
|
||||
#endif // AMD_HMM_SUPPORT
|
||||
return true;
|
||||
}
|
||||
|
||||
@@ -2296,131 +2299,139 @@ bool Device::GetSvmAttributes(void** data, size_t* data_sizes, int* attributes,
|
||||
return false;
|
||||
}
|
||||
}
|
||||
#if AMD_HMM_SUPPORT
|
||||
uint32_t accessed_by = 0;
|
||||
std::vector<hsa_amd_svm_attribute_pair_t> attr;
|
||||
if (info().hmmSupported_) {
|
||||
uint32_t accessed_by = 0;
|
||||
std::vector<hsa_amd_svm_attribute_pair_t> attr;
|
||||
|
||||
for (size_t i = 0; i < num_attributes; ++i) {
|
||||
switch (attributes[i]) {
|
||||
case amd::MemRangeAttribute::ReadMostly:
|
||||
attr.push_back({HSA_AMD_SVM_ATTRIB_READ_ONLY, 0});
|
||||
for (size_t i = 0; i < num_attributes; ++i) {
|
||||
switch (attributes[i]) {
|
||||
case amd::MemRangeAttribute::ReadMostly:
|
||||
attr.push_back({HSA_AMD_SVM_ATTRIB_READ_ONLY, 0});
|
||||
break;
|
||||
case amd::MemRangeAttribute::PreferredLocation:
|
||||
attr.push_back({HSA_AMD_SVM_ATTRIB_PREFERRED_LOCATION, 0});
|
||||
break;
|
||||
case amd::MemRangeAttribute::AccessedBy:
|
||||
accessed_by = attr.size();
|
||||
// Add all GPU devices into the query
|
||||
for (const auto agent : getGpuAgents()) {
|
||||
attr.push_back({HSA_AMD_SVM_ATTRIB_ACCESS_QUERY, agent.handle});
|
||||
}
|
||||
// Add CPU devices
|
||||
for (const auto agent_info : getCpuAgents()) {
|
||||
attr.push_back({HSA_AMD_SVM_ATTRIB_ACCESS_QUERY, agent_info.agent.handle});
|
||||
}
|
||||
accessed_by = attr.size() - accessed_by;
|
||||
break;
|
||||
case amd::MemRangeAttribute::LastPrefetchLocation:
|
||||
attr.push_back({HSA_AMD_SVM_ATTRIB_PREFETCH_LOCATION, 0});
|
||||
break;
|
||||
default:
|
||||
return false;
|
||||
break;
|
||||
case amd::MemRangeAttribute::PreferredLocation:
|
||||
attr.push_back({HSA_AMD_SVM_ATTRIB_PREFERRED_LOCATION, 0});
|
||||
break;
|
||||
case amd::MemRangeAttribute::AccessedBy:
|
||||
accessed_by = attr.size();
|
||||
// Add all GPU devices into the query
|
||||
for (const auto agent : getGpuAgents()) {
|
||||
attr.push_back({HSA_AMD_SVM_ATTRIB_ACCESS_QUERY, agent.handle});
|
||||
}
|
||||
// Add CPU devices
|
||||
for (const auto agent_info : getCpuAgents()) {
|
||||
attr.push_back({HSA_AMD_SVM_ATTRIB_ACCESS_QUERY, agent_info.agent.handle});
|
||||
}
|
||||
accessed_by = attr.size() - accessed_by;
|
||||
break;
|
||||
case amd::MemRangeAttribute::LastPrefetchLocation:
|
||||
attr.push_back({HSA_AMD_SVM_ATTRIB_PREFETCH_LOCATION, 0});
|
||||
break;
|
||||
default:
|
||||
return false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
hsa_status_t status = hsa_amd_svm_attributes_get(const_cast<void*>(dev_ptr), count,
|
||||
attr.data(), attr.size());
|
||||
if (status != HSA_STATUS_SUCCESS) {
|
||||
LogError("hsa_amd_svm_attributes_get() failed");
|
||||
hsa_status_t status = hsa_amd_svm_attributes_get(const_cast<void*>(dev_ptr), count,
|
||||
attr.data(), attr.size());
|
||||
if (status != HSA_STATUS_SUCCESS) {
|
||||
LogError("hsa_amd_svm_attributes_get() failed");
|
||||
return false;
|
||||
}
|
||||
|
||||
uint32_t idx = 0;
|
||||
uint32_t rocr_attr = 0;
|
||||
for (size_t i = 0; i < num_attributes; ++i) {
|
||||
const auto& it = attr[rocr_attr];
|
||||
switch (attributes[i]) {
|
||||
case amd::MemRangeAttribute::ReadMostly:
|
||||
if (data_sizes[idx] != sizeof(uint32_t)) {
|
||||
return false;
|
||||
}
|
||||
// Cast ROCr value into the hip format
|
||||
*reinterpret_cast<uint32_t*>(data[idx]) =
|
||||
(static_cast<uint32_t>(it.value) > 0) ? true : false;
|
||||
break;
|
||||
// The logic should be identical for the both queries
|
||||
case amd::MemRangeAttribute::PreferredLocation:
|
||||
case amd::MemRangeAttribute::LastPrefetchLocation:
|
||||
if (data_sizes[idx] != sizeof(uint32_t)) {
|
||||
return false;
|
||||
}
|
||||
*reinterpret_cast<int32_t*>(data[idx]) = static_cast<int32_t>(amd::InvalidDeviceId);
|
||||
// Find device agent returned by ROCr
|
||||
for (auto& device : devices()) {
|
||||
if (static_cast<Device*>(device)->getBackendDevice().handle == it.value) {
|
||||
*reinterpret_cast<uint32_t*>(data[idx]) = static_cast<uint32_t>(device->index());
|
||||
}
|
||||
}
|
||||
// Find CPU agent returned by ROCr
|
||||
for (auto& agent_info : getCpuAgents()) {
|
||||
if (agent_info.agent.handle == it.value) {
|
||||
*reinterpret_cast<int32_t*>(data[idx]) = static_cast<int32_t>(amd::CpuDeviceId);
|
||||
}
|
||||
}
|
||||
break;
|
||||
case amd::MemRangeAttribute::AccessedBy: {
|
||||
uint32_t entry = 0;
|
||||
uint32_t device_count = data_sizes[idx] / 4;
|
||||
// Make sure it's multiple of 4
|
||||
if (data_sizes[idx] % 4 != 0) {
|
||||
return false;
|
||||
}
|
||||
for (uint32_t att = 0; att < accessed_by; ++att) {
|
||||
const auto& it = attr[rocr_attr + att];
|
||||
if (entry >= device_count) {
|
||||
// The size of the array is less than the amount of available devices
|
||||
break;
|
||||
}
|
||||
switch (it.attribute) {
|
||||
case HSA_AMD_SVM_ATTRIB_AGENT_ACCESSIBLE:
|
||||
case HSA_AMD_SVM_ATTRIB_AGENT_NO_ACCESS:
|
||||
break;
|
||||
case HSA_AMD_SVM_ATTRIB_AGENT_ACCESSIBLE_IN_PLACE:
|
||||
reinterpret_cast<int32_t*>(data[idx])[entry] =
|
||||
static_cast<int32_t>(amd::InvalidDeviceId);
|
||||
// Find device agent returned by ROCr
|
||||
for (auto& device : devices()) {
|
||||
if (static_cast<Device*>(device)->getBackendDevice().handle == it.value) {
|
||||
reinterpret_cast<uint32_t*>(data[idx])[entry] =
|
||||
static_cast<uint32_t>(device->index());
|
||||
}
|
||||
}
|
||||
// Find CPU agent returned by ROCr
|
||||
for (auto& agent_info : getCpuAgents()) {
|
||||
if (agent_info.agent.handle == it.value) {
|
||||
reinterpret_cast<int32_t*>(data[idx])[entry] =
|
||||
static_cast<int32_t>(amd::CpuDeviceId);
|
||||
}
|
||||
}
|
||||
++entry;
|
||||
break;
|
||||
default:
|
||||
LogWarning("Unexpected result from HSA_AMD_SVM_ATTRIB_ACCESS_QUERY");
|
||||
break;
|
||||
}
|
||||
}
|
||||
rocr_attr += accessed_by;
|
||||
for (uint32_t idx = entry; idx < device_count; ++idx) {
|
||||
reinterpret_cast<int32_t*>(data[idx])[idx] =
|
||||
static_cast<int32_t>(amd::InvalidDeviceId);
|
||||
}
|
||||
break;
|
||||
}
|
||||
default:
|
||||
return false;
|
||||
break;
|
||||
}
|
||||
// Find the next location in the query
|
||||
++idx;
|
||||
}
|
||||
} else {
|
||||
LogError("GetSvmAttributes() failed, because no HMM support");
|
||||
return false;
|
||||
}
|
||||
|
||||
uint32_t idx = 0;
|
||||
uint32_t rocr_attr = 0;
|
||||
for (size_t i = 0; i < num_attributes; ++i) {
|
||||
const auto& it = attr[rocr_attr];
|
||||
switch (attributes[i]) {
|
||||
case amd::MemRangeAttribute::ReadMostly:
|
||||
if (data_sizes[idx] != sizeof(uint32_t)) {
|
||||
return false;
|
||||
}
|
||||
// Cast ROCr value into the hip format
|
||||
*reinterpret_cast<uint32_t*>(data[idx]) =
|
||||
(static_cast<uint32_t>(it.value) > 0) ? true : false;
|
||||
break;
|
||||
// The logic should be identical for the both queries
|
||||
case amd::MemRangeAttribute::PreferredLocation:
|
||||
case amd::MemRangeAttribute::LastPrefetchLocation:
|
||||
if (data_sizes[idx] != sizeof(uint32_t)) {
|
||||
return false;
|
||||
}
|
||||
*reinterpret_cast<int32_t*>(data[idx]) = static_cast<int32_t>(amd::InvalidDeviceId);
|
||||
// Find device agent returned by ROCr
|
||||
for (auto& device : devices()) {
|
||||
if (static_cast<Device*>(device)->getBackendDevice().handle == it.value) {
|
||||
*reinterpret_cast<uint32_t*>(data[idx]) = static_cast<uint32_t>(device->index());
|
||||
}
|
||||
}
|
||||
// Find CPU agent returned by ROCr
|
||||
for (auto& agent_info : getCpuAgents()) {
|
||||
if (agent_info.agent.handle == it.value) {
|
||||
*reinterpret_cast<int32_t*>(data[idx]) = static_cast<int32_t>(amd::CpuDeviceId);
|
||||
}
|
||||
}
|
||||
break;
|
||||
case amd::MemRangeAttribute::AccessedBy: {
|
||||
uint32_t entry = 0;
|
||||
uint32_t device_count = data_sizes[idx] / 4;
|
||||
// Make sure it's multiple of 4
|
||||
if (data_sizes[idx] % 4 != 0) {
|
||||
return false;
|
||||
}
|
||||
for (uint32_t att = 0; att < accessed_by; ++att) {
|
||||
const auto& it = attr[rocr_attr + att];
|
||||
if (entry >= device_count) {
|
||||
// The size of the array is less than the amount of available devices
|
||||
break;
|
||||
}
|
||||
switch (it.attribute) {
|
||||
case HSA_AMD_SVM_ATTRIB_AGENT_ACCESSIBLE:
|
||||
case HSA_AMD_SVM_ATTRIB_AGENT_NO_ACCESS:
|
||||
break;
|
||||
case HSA_AMD_SVM_ATTRIB_AGENT_ACCESSIBLE_IN_PLACE:
|
||||
reinterpret_cast<int32_t*>(data[idx])[entry] = static_cast<int32_t>(amd::InvalidDeviceId);
|
||||
// Find device agent returned by ROCr
|
||||
for (auto& device : devices()) {
|
||||
if (static_cast<Device*>(device)->getBackendDevice().handle == it.value) {
|
||||
reinterpret_cast<uint32_t*>(data[idx])[entry] = static_cast<uint32_t>(device->index());
|
||||
}
|
||||
}
|
||||
// Find CPU agent returned by ROCr
|
||||
for (auto& agent_info : getCpuAgents()) {
|
||||
if (agent_info.agent.handle == it.value) {
|
||||
reinterpret_cast<int32_t*>(data[idx])[entry] = static_cast<int32_t>(amd::CpuDeviceId);
|
||||
}
|
||||
}
|
||||
++entry;
|
||||
break;
|
||||
default:
|
||||
LogWarning("Unexpected result from HSA_AMD_SVM_ATTRIB_ACCESS_QUERY");
|
||||
break;
|
||||
}
|
||||
}
|
||||
rocr_attr += accessed_by;
|
||||
for (uint32_t idx = entry; idx < device_count; ++idx) {
|
||||
reinterpret_cast<int32_t*>(data[idx])[idx] = static_cast<int32_t>(amd::InvalidDeviceId);
|
||||
}
|
||||
break;
|
||||
}
|
||||
default:
|
||||
return false;
|
||||
break;
|
||||
}
|
||||
// Find the next location in the query
|
||||
++idx;
|
||||
}
|
||||
#endif // AMD_HMM_SUPPORT
|
||||
return true;
|
||||
}
|
||||
|
||||
@@ -2428,35 +2439,42 @@ bool Device::GetSvmAttributes(void** data, size_t* data_sizes, int* attributes,
|
||||
bool Device::SvmAllocInit(void* memory, size_t size) const {
|
||||
amd::MemoryAdvice advice = amd::MemoryAdvice::SetAccessedBy;
|
||||
constexpr bool kFirstAlloc = true;
|
||||
SetSvmAttributesInt(memory, size, advice, kFirstAlloc);
|
||||
if (!SetSvmAttributesInt(memory, size, advice, kFirstAlloc)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (settings().hmmFlags_ & Settings::Hmm::EnableSystemMemory) {
|
||||
advice = amd::MemoryAdvice::UnsetPreferredLocation;
|
||||
SetSvmAttributesInt(memory, size, advice);
|
||||
if (!SetSvmAttributesInt(memory, size, advice)) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
if ((settings().hmmFlags_ & Settings::Hmm::EnableMallocPrefetch) == 0) {
|
||||
return true;
|
||||
}
|
||||
|
||||
#if AMD_HMM_SUPPORT
|
||||
// Initialize signal for the barrier
|
||||
hsa_signal_store_relaxed(prefetch_signal_, kInitSignalValueOne);
|
||||
if (info().hmmSupported_) {
|
||||
// Initialize signal for the barrier
|
||||
hsa_signal_store_relaxed(prefetch_signal_, kInitSignalValueOne);
|
||||
|
||||
// Initiate a prefetch command which should force memory update in HMM
|
||||
hsa_status_t status = hsa_amd_svm_prefetch_async(memory, size, getBackendDevice(),
|
||||
0, nullptr, prefetch_signal_);
|
||||
if (status != HSA_STATUS_SUCCESS) {
|
||||
LogError("hsa_amd_svm_attributes_get() failed");
|
||||
return false;
|
||||
// Initiate a prefetch command which should force memory update in HMM
|
||||
hsa_status_t status = hsa_amd_svm_prefetch_async(memory, size, getBackendDevice(),
|
||||
0, nullptr, prefetch_signal_);
|
||||
if (status != HSA_STATUS_SUCCESS) {
|
||||
LogError("hsa_amd_svm_prefetch_async() failed");
|
||||
return false;
|
||||
}
|
||||
|
||||
// Wait for the prefetch
|
||||
if (!WaitForSignal(prefetch_signal_)) {
|
||||
LogError("Barrier packet submission failed");
|
||||
return false;
|
||||
}
|
||||
} else {
|
||||
LogWarning("Early prefetch failed, because no HMM support");
|
||||
}
|
||||
|
||||
// Wait for the prefetch
|
||||
if (!WaitForSignal(prefetch_signal_)) {
|
||||
LogError("Barrier packet submission failed");
|
||||
return false;
|
||||
}
|
||||
#endif // AMD_HMM_SUPPORT
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
@@ -632,13 +632,13 @@ void Buffer::destroy() {
|
||||
if (kind_ != MEMORY_KIND_PTRGIVEN) {
|
||||
if (isFineGrain) {
|
||||
if (memFlags & CL_MEM_ALLOC_HOST_PTR) {
|
||||
#if AMD_HMM_SUPPORT
|
||||
// AMD HMM path. Destroy system memory
|
||||
amd::Os::uncommitMemory(deviceMemory_, size());
|
||||
amd::Os::releaseMemory(deviceMemory_, size());
|
||||
#else
|
||||
dev().hostFree(deviceMemory_, size());;
|
||||
#endif // AMD_HMM_SUPPORT
|
||||
if (dev().info().hmmSupported_) {
|
||||
// AMD HMM path. Destroy system memory
|
||||
amd::Os::uncommitMemory(deviceMemory_, size());
|
||||
amd::Os::releaseMemory(deviceMemory_, size());
|
||||
} else {
|
||||
dev().hostFree(deviceMemory_, size());
|
||||
}
|
||||
} else if (memFlags & ROCCLR_MEM_HSA_SIGNAL_MEMORY) {
|
||||
if (HSA_STATUS_SUCCESS != hsa_signal_destroy(signal_)) {
|
||||
ClPrint(amd::LOG_DEBUG, amd::LOG_MEM,
|
||||
@@ -727,17 +727,20 @@ bool Buffer::create() {
|
||||
if (owner()->getSvmPtr() == reinterpret_cast<void*>(1)) {
|
||||
if (isFineGrain) {
|
||||
if (memFlags & CL_MEM_ALLOC_HOST_PTR) {
|
||||
#if AMD_HMM_SUPPORT
|
||||
// AMD HMM path. Just allocate system memory and KFD will manage it
|
||||
deviceMemory_ = amd::Os::reserveMemory(
|
||||
0, size(), amd::Os::pageSize(), amd::Os::MEM_PROT_RW);
|
||||
amd::Os::commitMemory(deviceMemory_, size(), amd::Os::MEM_PROT_RW);
|
||||
// Currently HMM requires cirtain initial calls to mark sysmem allocation as
|
||||
// GPU accessible or prefetch memory into GPU
|
||||
dev().SvmAllocInit(deviceMemory_, size());
|
||||
#else
|
||||
deviceMemory_ = dev().hostAlloc(size(), 1, Device::MemorySegment::kNoAtomics);
|
||||
#endif // AMD_HMM_SUPPORT
|
||||
if (dev().info().hmmSupported_) {
|
||||
// AMD HMM path. Just allocate system memory and KFD will manage it
|
||||
deviceMemory_ = amd::Os::reserveMemory(
|
||||
0, size(), amd::Os::pageSize(), amd::Os::MEM_PROT_RW);
|
||||
amd::Os::commitMemory(deviceMemory_, size(), amd::Os::MEM_PROT_RW);
|
||||
// Currently HMM requires cirtain initial calls to mark sysmem allocation as
|
||||
// GPU accessible or prefetch memory into GPU
|
||||
if (!dev().SvmAllocInit(deviceMemory_, size())) {
|
||||
ClPrint(amd::LOG_ERROR, amd::LOG_MEM, "SVM init in ROCr failed!");
|
||||
return false;
|
||||
}
|
||||
} else {
|
||||
deviceMemory_ = dev().hostAlloc(size(), 1, Device::MemorySegment::kNoAtomics);
|
||||
}
|
||||
} else if (memFlags & CL_MEM_FOLLOW_USER_NUMA_POLICY) {
|
||||
deviceMemory_ = dev().hostNumaAlloc(size(), 1, (memFlags & CL_MEM_SVM_ATOMICS) != 0);
|
||||
} else if (memFlags & ROCCLR_MEM_HSA_SIGNAL_MEMORY) {
|
||||
|
||||
@@ -1463,34 +1463,36 @@ void VirtualGPU::submitSvmFreeMemory(amd::SvmFreeMemoryCommand& cmd) {
|
||||
void VirtualGPU::submitSvmPrefetchAsync(amd::SvmPrefetchAsyncCommand& cmd) {
|
||||
// Make sure VirtualGPU has an exclusive access to the resources
|
||||
amd::ScopedLock lock(execution());
|
||||
#if AMD_HMM_SUPPORT
|
||||
profilingBegin(cmd);
|
||||
// Initialize signal for the barrier
|
||||
hsa_signal_t* wait_event = Barriers().WaitingSignal(HwQueueEngine::Unknown);
|
||||
hsa_signal_t active = Barriers().ActiveSignal(kInitSignalValueOne, timestamp_);
|
||||
uint32_t num_wait_events = (wait_event == nullptr) ? 0 : 1;
|
||||
|
||||
// Find the requested agent for the transfer
|
||||
hsa_agent_t agent = (cmd.cpu_access() ||
|
||||
(dev().settings().hmmFlags_ & Settings::Hmm::EnableSystemMemory)) ?
|
||||
dev().getCpuAgent() : gpu_device();
|
||||
if (dev().info().hmmSupported_) {
|
||||
// Initialize signal for the barrier
|
||||
hsa_signal_t* wait_event = Barriers().WaitingSignal(HwQueueEngine::Unknown);
|
||||
hsa_signal_t active = Barriers().ActiveSignal(kInitSignalValueOne, timestamp_);
|
||||
uint32_t num_wait_events = (wait_event == nullptr) ? 0 : 1;
|
||||
|
||||
// Initiate a prefetch command
|
||||
hsa_status_t status = hsa_amd_svm_prefetch_async(
|
||||
const_cast<void*>(cmd.dev_ptr()), cmd.count(), agent, num_wait_events, wait_event, active);
|
||||
// Find the requested agent for the transfer
|
||||
hsa_agent_t agent = (cmd.cpu_access() ||
|
||||
(dev().settings().hmmFlags_ & Settings::Hmm::EnableSystemMemory)) ?
|
||||
dev().getCpuAgent() : gpu_device();
|
||||
|
||||
// Wait for the prefetch. Should skip wait, but may require extra tracking for kernel execution
|
||||
if ((status != HSA_STATUS_SUCCESS) || !Barriers().WaitCurrent()) {
|
||||
Barriers().ResetCurrentSignal();
|
||||
LogError("hsa_amd_svm_prefetch_async failed");
|
||||
cmd.setStatus(CL_INVALID_OPERATION);
|
||||
// Initiate a prefetch command
|
||||
hsa_status_t status = hsa_amd_svm_prefetch_async(
|
||||
const_cast<void*>(cmd.dev_ptr()), cmd.count(), agent, num_wait_events, wait_event, active);
|
||||
|
||||
// Wait for the prefetch. Should skip wait, but may require extra tracking for kernel execution
|
||||
if ((status != HSA_STATUS_SUCCESS) || !Barriers().WaitCurrent()) {
|
||||
Barriers().ResetCurrentSignal();
|
||||
LogError("hsa_amd_svm_prefetch_async failed");
|
||||
cmd.setStatus(CL_INVALID_OPERATION);
|
||||
}
|
||||
|
||||
// Add system scope, since the prefetch scope is unclear
|
||||
addSystemScope();
|
||||
} else {
|
||||
LogWarning("hsa_amd_svm_prefetch_async is ignored, because no HMM support");
|
||||
}
|
||||
|
||||
// Add system scope, since the prefetch scope is unclear
|
||||
addSystemScope();
|
||||
|
||||
profilingEnd(cmd);
|
||||
#endif // AMD_HMM_SUPPORT
|
||||
}
|
||||
|
||||
// ================================================================================================
|
||||
|
||||
Ссылка в новой задаче
Block a user