Revert SVM and XNACK support.
KFD is not ready yet.
Change-Id: I61deb292ddb92185d33504c2115169888d56e211
[ROCm/ROCR-Runtime commit: 5bd153974d]
Этот коммит содержится в:
@@ -1151,27 +1151,6 @@ hsa_status_t HSA_API hsa_amd_signal_value_pointer(hsa_signal_t signal,
|
||||
return amdExtTable->hsa_amd_signal_value_pointer_fn(signal, value_ptr);
|
||||
}
|
||||
|
||||
// Mirrors Amd Extension Apis
|
||||
hsa_status_t HSA_API hsa_amd_svm_attributes_set(void* ptr, size_t size,
|
||||
hsa_amd_svm_attribute_pair_t* attribute_list,
|
||||
size_t attribute_count) {
|
||||
return amdExtTable->hsa_amd_svm_attributes_set_fn(ptr, size, attribute_list, attribute_count);
|
||||
}
|
||||
|
||||
// Mirrors Amd Extension Apis
|
||||
hsa_status_t HSA_API hsa_amd_svm_attributes_get(void* ptr, size_t size,
|
||||
hsa_amd_svm_attribute_pair_t* attribute_list,
|
||||
size_t attribute_count) {
|
||||
return amdExtTable->hsa_amd_svm_attributes_get_fn(ptr, size, attribute_list, attribute_count);
|
||||
}
|
||||
|
||||
// Mirrors Amd Extension Apis
|
||||
hsa_status_t HSA_API hsa_amd_svm_prefetch_async(void* ptr, size_t size, hsa_agent_t agent,
|
||||
uint32_t num_dep_signals, const hsa_signal_t* dep_signals,
|
||||
hsa_signal_t completion_signal) {
|
||||
return amdExtTable->hsa_amd_svm_prefetch_async_fn(ptr, size, agent, num_dep_signals, dep_signals, completion_signal);
|
||||
}
|
||||
|
||||
// Tools only table interfaces.
|
||||
namespace rocr {
|
||||
|
||||
|
||||
@@ -173,8 +173,7 @@ class GpuAgent : public GpuAgentInt {
|
||||
// @param [in] node Node id. Each CPU in different socket will get distinct
|
||||
// id.
|
||||
// @param [in] node_props Node property.
|
||||
// @param [in] xnack_mode XNACK mode of device.
|
||||
GpuAgent(HSAuint32 node, const HsaNodeProperties& node_props, bool xnack_mode);
|
||||
GpuAgent(HSAuint32 node, const HsaNodeProperties& node_props);
|
||||
|
||||
// @brief GPU agent destructor.
|
||||
~GpuAgent();
|
||||
|
||||
@@ -246,21 +246,6 @@ hsa_status_t hsa_amd_deregister_deallocation_callback(
|
||||
hsa_status_t hsa_amd_signal_value_pointer(hsa_signal_t signal,
|
||||
volatile hsa_signal_value_t** value_ptr);
|
||||
|
||||
// Mirrors Amd Extension Apis
|
||||
hsa_status_t HSA_API hsa_amd_svm_attributes_set(void* ptr, size_t size,
|
||||
hsa_amd_svm_attribute_pair_t* attribute_list,
|
||||
size_t attribute_count);
|
||||
|
||||
// Mirrors Amd Extension Apis
|
||||
hsa_status_t HSA_API hsa_amd_svm_attributes_get(void* ptr, size_t size,
|
||||
hsa_amd_svm_attribute_pair_t* attribute_list,
|
||||
size_t attribute_count);
|
||||
|
||||
// Mirrors Amd Extension Apis
|
||||
hsa_status_t HSA_API hsa_amd_svm_prefetch_async(void* ptr, size_t size, hsa_agent_t agent,
|
||||
uint32_t num_dep_signals, const hsa_signal_t* dep_signals,
|
||||
hsa_signal_t completion_signal);
|
||||
|
||||
} // namespace amd
|
||||
} // namespace rocr
|
||||
|
||||
|
||||
@@ -292,15 +292,6 @@ class Runtime {
|
||||
|
||||
hsa_status_t IPCDetach(void* ptr);
|
||||
|
||||
hsa_status_t SetSvmAttrib(void* ptr, size_t size, hsa_amd_svm_attribute_pair_t* attribute_list,
|
||||
size_t attribute_count);
|
||||
|
||||
hsa_status_t GetSvmAttrib(void* ptr, size_t size, hsa_amd_svm_attribute_pair_t* attribute_list,
|
||||
size_t attribute_count);
|
||||
|
||||
hsa_status_t SvmPrefetch(void* ptr, size_t size, hsa_agent_t agent, uint32_t num_dep_signals,
|
||||
const hsa_signal_t* dep_signals, hsa_signal_t completion_signal);
|
||||
|
||||
const std::vector<Agent*>& cpu_agents() { return cpu_agents_; }
|
||||
|
||||
const std::vector<Agent*>& gpu_agents() { return gpu_agents_; }
|
||||
@@ -404,28 +395,6 @@ class Runtime {
|
||||
std::vector<void*> arg_;
|
||||
};
|
||||
|
||||
struct PrefetchRange;
|
||||
typedef std::map<uintptr_t, PrefetchRange> prefetch_map_t;
|
||||
|
||||
struct PrefetchOp {
|
||||
void* base;
|
||||
size_t size;
|
||||
uint32_t node_id;
|
||||
int remaining_deps;
|
||||
hsa_signal_t completion;
|
||||
std::vector<hsa_signal_t> dep_signals;
|
||||
prefetch_map_t::iterator prefetch_map_entry;
|
||||
};
|
||||
|
||||
struct PrefetchRange {
|
||||
PrefetchRange() {}
|
||||
PrefetchRange(size_t Bytes, PrefetchOp* Op) : bytes(Bytes), op(Op) {}
|
||||
size_t bytes;
|
||||
PrefetchOp* op;
|
||||
prefetch_map_t::iterator prev;
|
||||
prefetch_map_t::iterator next;
|
||||
};
|
||||
|
||||
// Will be created before any user could call hsa_init but also could be
|
||||
// destroyed before incorrectly written programs call hsa_shutdown.
|
||||
static KernelMutex bootstrap_lock_;
|
||||
@@ -475,9 +444,6 @@ class Runtime {
|
||||
/// @retval Index in ::link_matrix_.
|
||||
uint32_t GetIndexLinkInfo(uint32_t node_id_from, uint32_t node_id_to);
|
||||
|
||||
/// @brief Get most recently issued SVM prefetch agent for the range in question.
|
||||
Agent* GetSVMPrefetchAgent(void* ptr, size_t size);
|
||||
|
||||
// Mutex object to protect multithreaded access to ::allocation_map_,
|
||||
// KFD map/unmap, register/unregister, and access to hsaKmtQueryPointerInfo
|
||||
// registered & mapped arrays.
|
||||
@@ -519,10 +485,6 @@ class Runtime {
|
||||
// Contains the region, address, and size of previously allocated memory.
|
||||
std::map<const void*, AllocationRegion> allocation_map_;
|
||||
|
||||
// Pending prefetch containers.
|
||||
KernelMutex prefetch_lock_;
|
||||
prefetch_map_t prefetch_map_;
|
||||
|
||||
// Allocator using ::system_region_
|
||||
std::function<void*(size_t size, size_t align, MemoryRegion::AllocateFlags flags)> system_allocator_;
|
||||
|
||||
|
||||
@@ -77,7 +77,7 @@ extern HsaApiTable hsa_internal_api_table_;
|
||||
} // namespace core
|
||||
|
||||
namespace AMD {
|
||||
GpuAgent::GpuAgent(HSAuint32 node, const HsaNodeProperties& node_props, bool xnack_mode)
|
||||
GpuAgent::GpuAgent(HSAuint32 node, const HsaNodeProperties& node_props)
|
||||
: GpuAgentInt(node),
|
||||
properties_(node_props),
|
||||
current_coherency_type_(HSA_AMD_COHERENCY_TYPE_COHERENT),
|
||||
@@ -125,8 +125,8 @@ GpuAgent::GpuAgent(HSAuint32 node, const HsaNodeProperties& node_props, bool xna
|
||||
rocr::core::IsaFeature xnack = rocr::core::IsaFeature::Unsupported;
|
||||
if (isa_base->IsXnackSupported()) {
|
||||
// TODO: This needs to be obtained form KFD once HMM implemented.
|
||||
xnack = xnack_mode ? core::IsaFeature::Enabled
|
||||
: core::IsaFeature::Disabled;
|
||||
xnack = profile_ == HSA_PROFILE_FULL ? core::IsaFeature::Enabled
|
||||
: core::IsaFeature::Disabled;
|
||||
}
|
||||
|
||||
// Set instruction set architecture via node property, only on GPU device.
|
||||
|
||||
@@ -68,38 +68,6 @@ namespace AMD {
|
||||
static const uint kKfdVersionMajor = 0;
|
||||
static const uint kKfdVersionMinor = 99;
|
||||
|
||||
// Query for user preference and use that to determine Xnack mode of ROCm system.
|
||||
// Return true if Xnack mode is ON or false if OFF. Xnack mode of a system is
|
||||
// orthogonal to devices that do not support Xnack mode. It is legal for a
|
||||
// system with Xnack ON to have devices that do not support Xnack functionality.
|
||||
bool BindXnackMode() {
|
||||
// Get users' preference for Xnack mode of ROCm platform
|
||||
HSAint32 mode;
|
||||
mode = core::Runtime::runtime_singleton_->flag().xnack();
|
||||
bool config_xnack =
|
||||
(core::Runtime::runtime_singleton_->flag().xnack() != Flag::XNACK_REQUEST::XNACK_UNCHANGED);
|
||||
|
||||
// Indicate to driver users' preference for Xnack mode
|
||||
// Call to driver can fail and is a supported feature
|
||||
HSAKMT_STATUS status = HSAKMT_STATUS_ERROR;
|
||||
if (config_xnack) {
|
||||
status = hsaKmtSetXNACKMode(mode);
|
||||
if (status == HSAKMT_STATUS_SUCCESS) {
|
||||
return mode;
|
||||
}
|
||||
}
|
||||
|
||||
// Get Xnack mode of devices bound by driver. This could happen
|
||||
// when a call to SET Xnack mode fails or user has no particular
|
||||
// preference
|
||||
status = hsaKmtGetXNACKMode((HSAint32*)&mode);
|
||||
if(status != HSAKMT_STATUS_SUCCESS) {
|
||||
debug_print("KFD does not support xnack mode query.\nROCr must assume xnack is disabled.\n");
|
||||
return false;
|
||||
}
|
||||
return mode;
|
||||
}
|
||||
|
||||
CpuAgent* DiscoverCpu(HSAuint32 node_id, HsaNodeProperties& node_prop) {
|
||||
if (node_prop.NumCPUCores == 0) {
|
||||
return nullptr;
|
||||
@@ -111,14 +79,14 @@ CpuAgent* DiscoverCpu(HSAuint32 node_id, HsaNodeProperties& node_prop) {
|
||||
return cpu;
|
||||
}
|
||||
|
||||
GpuAgent* DiscoverGpu(HSAuint32 node_id, HsaNodeProperties& node_prop, bool xnack_mode) {
|
||||
GpuAgent* DiscoverGpu(HSAuint32 node_id, HsaNodeProperties& node_prop) {
|
||||
GpuAgent* gpu = nullptr;
|
||||
if (node_prop.NumFComputeCores == 0) {
|
||||
// Ignore non GPUs.
|
||||
return nullptr;
|
||||
}
|
||||
try {
|
||||
gpu = new GpuAgent(node_id, node_prop, xnack_mode);
|
||||
gpu = new GpuAgent(node_id, node_prop);
|
||||
|
||||
const HsaVersionInfo& kfd_version = core::Runtime::runtime_singleton_->KfdVersion();
|
||||
|
||||
@@ -244,7 +212,7 @@ void RegisterLinkInfo(uint32_t node_id, uint32_t num_link) {
|
||||
/**
|
||||
* Process the list of Gpus that are surfaced to user
|
||||
*/
|
||||
static void SurfaceGpuList(std::vector<int32_t>& gpu_list, bool xnack_mode) {
|
||||
static void SurfaceGpuList(std::vector<int32_t>& gpu_list) {
|
||||
// Process user visible Gpu devices
|
||||
int32_t invalidIdx = -1;
|
||||
int32_t list_sz = gpu_list.size();
|
||||
@@ -261,7 +229,7 @@ static void SurfaceGpuList(std::vector<int32_t>& gpu_list, bool xnack_mode) {
|
||||
// Instantiate a Gpu device. The IO links
|
||||
// of this node have already been registered
|
||||
assert((node_prop.NumFComputeCores != 0) && "Improper node used for GPU device discovery.");
|
||||
DiscoverGpu(gpu_list[idx], node_prop, xnack_mode);
|
||||
DiscoverGpu(gpu_list[idx], node_prop);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -345,11 +313,8 @@ void BuildTopology() {
|
||||
RegisterLinkInfo(node_id, node_prop.NumIOLinks);
|
||||
}
|
||||
|
||||
// Determine the Xnack mode to be bound for system
|
||||
bool xnack_mode = BindXnackMode();
|
||||
|
||||
// Instantiate ROCr objects to encapsulate Gpu devices
|
||||
SurfaceGpuList(gpu_usr_list, xnack_mode);
|
||||
SurfaceGpuList(gpu_usr_list);
|
||||
}
|
||||
|
||||
bool Load() {
|
||||
|
||||
@@ -391,9 +391,6 @@ void HsaApiTable::UpdateAmdExts() {
|
||||
amd_ext_api.hsa_amd_register_deallocation_callback_fn = AMD::hsa_amd_register_deallocation_callback;
|
||||
amd_ext_api.hsa_amd_deregister_deallocation_callback_fn = AMD::hsa_amd_deregister_deallocation_callback;
|
||||
amd_ext_api.hsa_amd_signal_value_pointer_fn = AMD::hsa_amd_signal_value_pointer;
|
||||
amd_ext_api.hsa_amd_svm_attributes_set_fn = AMD::hsa_amd_svm_attributes_set;
|
||||
amd_ext_api.hsa_amd_svm_attributes_get_fn = AMD::hsa_amd_svm_attributes_get;
|
||||
amd_ext_api.hsa_amd_svm_prefetch_async_fn = AMD::hsa_amd_svm_prefetch_async;
|
||||
}
|
||||
|
||||
void LoadInitialHsaApiTable() {
|
||||
|
||||
@@ -1009,37 +1009,5 @@ hsa_status_t hsa_amd_runtime_queue_create_register(hsa_amd_runtime_queue_notifie
|
||||
CATCH;
|
||||
}
|
||||
|
||||
hsa_status_t hsa_amd_svm_attributes_set(void* ptr, size_t size,
|
||||
hsa_amd_svm_attribute_pair_t* attribute_list,
|
||||
size_t attribute_count) {
|
||||
TRY;
|
||||
IS_OPEN();
|
||||
return core::Runtime::runtime_singleton_->SetSvmAttrib(ptr, size, attribute_list,
|
||||
attribute_count);
|
||||
CATCH;
|
||||
}
|
||||
|
||||
hsa_status_t hsa_amd_svm_attributes_get(void* ptr, size_t size,
|
||||
hsa_amd_svm_attribute_pair_t* attribute_list,
|
||||
size_t attribute_count) {
|
||||
TRY;
|
||||
IS_OPEN();
|
||||
return core::Runtime::runtime_singleton_->GetSvmAttrib(ptr, size, attribute_list,
|
||||
attribute_count);
|
||||
CATCH;
|
||||
}
|
||||
|
||||
hsa_status_t hsa_amd_svm_prefetch_async(void* ptr, size_t size, hsa_agent_t agent,
|
||||
uint32_t num_dep_signals, const hsa_signal_t* dep_signals,
|
||||
hsa_signal_t completion_signal) {
|
||||
TRY;
|
||||
IS_OPEN();
|
||||
// Validate inputs.
|
||||
// if (core::g_use_interrupt_wait && (!core::InterruptSignal::IsType(signal)))
|
||||
return core::Runtime::runtime_singleton_->SvmPrefetch(ptr, size, agent, num_dep_signals,
|
||||
dep_signals, completion_signal);
|
||||
CATCH;
|
||||
}
|
||||
|
||||
} // namespace amd
|
||||
} // namespace rocr
|
||||
|
||||
@@ -644,18 +644,6 @@ hsa_status_t Runtime::GetSystemInfo(hsa_system_info_t attribute, void* value) {
|
||||
*(const char**)value = STRING(ROCR_BUILD_ID);
|
||||
break;
|
||||
}
|
||||
case HSA_AMD_SYSTEM_INFO_SVM_SUPPORTED: {
|
||||
// todo: Get HMM kernel support info.
|
||||
*(bool*)value = true;
|
||||
break;
|
||||
}
|
||||
case HSA_AMD_SYSTEM_INFO_SVM_ACCESSIBLE_BY_DEFAULT: {
|
||||
bool ret = true;
|
||||
for(auto agent : gpu_agents_)
|
||||
ret &= (agent->isa()->GetXnack() == IsaFeature::Enabled);
|
||||
*(bool*)value = ret;
|
||||
break;
|
||||
}
|
||||
default:
|
||||
return HSA_STATUS_ERROR_INVALID_ARGUMENT;
|
||||
}
|
||||
@@ -1598,499 +1586,5 @@ void Runtime::InternalQueueCreateNotify(const hsa_queue_t* queue, hsa_agent_t ag
|
||||
internal_queue_create_notifier_(queue, agent, internal_queue_create_notifier_user_data_);
|
||||
}
|
||||
|
||||
hsa_status_t Runtime::SetSvmAttrib(void* ptr, size_t size,
|
||||
hsa_amd_svm_attribute_pair_t* attribute_list,
|
||||
size_t attribute_count) {
|
||||
uint32_t set_attribs = 0;
|
||||
std::vector<bool> agent_seen(agents_by_node_.size(), false);
|
||||
|
||||
std::vector<HSA_SVM_ATTRIBUTE> attribs;
|
||||
attribs.reserve(attribute_count);
|
||||
uint32_t set_flags = 0;
|
||||
uint32_t clear_flags = 0;
|
||||
|
||||
auto Convert = [&](uint64_t value) -> Agent* {
|
||||
hsa_agent_t handle = {value};
|
||||
Agent* agent = Agent::Convert(handle);
|
||||
if ((agent == nullptr) || !agent->IsValid())
|
||||
throw AMD::hsa_exception(HSA_STATUS_ERROR_INVALID_AGENT,
|
||||
"Invalid agent handle in Runtime::SetSvmAttrib.");
|
||||
return agent;
|
||||
};
|
||||
|
||||
auto ConvertAllowNull = [&](uint64_t value) -> Agent* {
|
||||
hsa_agent_t handle = {value};
|
||||
Agent* agent = Agent::Convert(handle);
|
||||
if ((agent != nullptr) && (!agent->IsValid()))
|
||||
throw AMD::hsa_exception(HSA_STATUS_ERROR_INVALID_AGENT,
|
||||
"Invalid agent handle in Runtime::SetSvmAttrib.");
|
||||
return agent;
|
||||
};
|
||||
|
||||
auto ConfirmNew = [&](Agent* agent) {
|
||||
if (agent_seen[agent->node_id()])
|
||||
throw AMD::hsa_exception(
|
||||
HSA_STATUS_ERROR_INCOMPATIBLE_ARGUMENTS,
|
||||
"Multiple attributes given for the same agent in Runtime::SetSvmAttrib.");
|
||||
agent_seen[agent->node_id()] = true;
|
||||
};
|
||||
|
||||
auto Check = [&](uint64_t attrib) {
|
||||
if (set_attribs & (1 << attrib))
|
||||
throw AMD::hsa_exception(HSA_STATUS_ERROR_INCOMPATIBLE_ARGUMENTS,
|
||||
"Attribute given multiple times in Runtime::SetSvmAttrib.");
|
||||
set_attribs |= (1 << attrib);
|
||||
};
|
||||
|
||||
auto kmtPair = [](uint32_t attrib, uint32_t value) {
|
||||
HSA_SVM_ATTRIBUTE pair = {attrib, value};
|
||||
return pair;
|
||||
};
|
||||
|
||||
for (uint32_t i = 0; i < attribute_count; i++) {
|
||||
auto attrib = attribute_list[i].attribute;
|
||||
auto value = attribute_list[i].value;
|
||||
|
||||
switch (attrib) {
|
||||
case HSA_AMD_SVM_ATTRIB_GLOBAL_FLAG: {
|
||||
Check(attrib);
|
||||
switch (value) {
|
||||
case HSA_AMD_SVM_GLOBAL_FLAG_FINE_GRAINED:
|
||||
set_flags |= HSA_SVM_FLAG_COHERENT;
|
||||
break;
|
||||
case HSA_AMD_SVM_GLOBAL_FLAG_COARSE_GRAINED:
|
||||
clear_flags |= HSA_SVM_FLAG_COHERENT;
|
||||
break;
|
||||
default:
|
||||
throw AMD::hsa_exception(HSA_STATUS_ERROR_INVALID_ARGUMENT,
|
||||
"Invalid HSA_AMD_SVM_ATTRIB_GLOBAL_FLAG value.");
|
||||
}
|
||||
break;
|
||||
}
|
||||
case HSA_AMD_SVM_ATTRIB_READ_ONLY: {
|
||||
Check(attrib);
|
||||
if (value)
|
||||
set_flags |= HSA_SVM_FLAG_GPU_RO;
|
||||
else
|
||||
clear_flags |= HSA_SVM_FLAG_GPU_RO;
|
||||
break;
|
||||
}
|
||||
case HSA_AMD_SVM_ATTRIB_HIVE_LOCAL: {
|
||||
Check(attrib);
|
||||
if (value)
|
||||
set_flags |= HSA_SVM_FLAG_HIVE_LOCAL;
|
||||
else
|
||||
clear_flags |= HSA_SVM_FLAG_HIVE_LOCAL;
|
||||
break;
|
||||
}
|
||||
case HSA_AMD_SVM_ATTRIB_MIGRATION_GRANULARITY: {
|
||||
Check(attrib);
|
||||
// Max migration size is 1GB.
|
||||
if (value > 18) value = 18;
|
||||
attribs.push_back(kmtPair(HSA_SVM_ATTR_GRANULARITY, value));
|
||||
break;
|
||||
}
|
||||
case HSA_AMD_SVM_ATTRIB_PREFERRED_LOCATION: {
|
||||
Check(attrib);
|
||||
Agent* agent = ConvertAllowNull(value);
|
||||
if (agent == nullptr)
|
||||
attribs.push_back(kmtPair(HSA_SVM_ATTR_PREFERRED_LOC, INVALID_NODEID));
|
||||
else
|
||||
attribs.push_back(kmtPair(HSA_SVM_ATTR_PREFERRED_LOC, agent->node_id()));
|
||||
break;
|
||||
}
|
||||
case HSA_AMD_SVM_ATTRIB_AGENT_ACCESSIBLE: {
|
||||
Agent* agent = Convert(value);
|
||||
ConfirmNew(agent);
|
||||
if (agent->device_type() == Agent::kAmdCpuDevice) {
|
||||
set_flags |= HSA_SVM_FLAG_HOST_ACCESS;
|
||||
} else {
|
||||
attribs.push_back(kmtPair(HSA_SVM_ATTR_ACCESS, agent->node_id()));
|
||||
}
|
||||
break;
|
||||
}
|
||||
case HSA_AMD_SVM_ATTRIB_AGENT_ACCESSIBLE_IN_PLACE: {
|
||||
Agent* agent = Convert(value);
|
||||
ConfirmNew(agent);
|
||||
if (agent->device_type() == Agent::kAmdCpuDevice) {
|
||||
set_flags |= HSA_SVM_FLAG_HOST_ACCESS;
|
||||
} else {
|
||||
attribs.push_back(kmtPair(HSA_SVM_ATTR_ACCESS_IN_PLACE, agent->node_id()));
|
||||
}
|
||||
break;
|
||||
}
|
||||
case HSA_AMD_SVM_ATTRIB_AGENT_NO_ACCESS: {
|
||||
Agent* agent = Convert(value);
|
||||
ConfirmNew(agent);
|
||||
if (agent->device_type() == Agent::kAmdCpuDevice) {
|
||||
clear_flags |= HSA_SVM_FLAG_HOST_ACCESS;
|
||||
} else {
|
||||
attribs.push_back(kmtPair(HSA_SVM_ATTR_NO_ACCESS, agent->node_id()));
|
||||
}
|
||||
break;
|
||||
}
|
||||
default:
|
||||
throw AMD::hsa_exception(HSA_STATUS_ERROR_INVALID_ARGUMENT,
|
||||
"Illegal or invalid attribute in Runtime::SetSvmAttrib");
|
||||
}
|
||||
}
|
||||
|
||||
// Merge CPU access properties - grant access if any CPU needs access.
|
||||
// Probably wrong.
|
||||
if (set_flags & HSA_SVM_FLAG_HOST_ACCESS) clear_flags &= ~HSA_SVM_FLAG_HOST_ACCESS;
|
||||
|
||||
// Add flag updates
|
||||
if (clear_flags) attribs.push_back(kmtPair(HSA_SVM_ATTR_CLR_FLAGS, clear_flags));
|
||||
if (set_flags) attribs.push_back(kmtPair(HSA_SVM_ATTR_SET_FLAGS, set_flags));
|
||||
|
||||
uint8_t* base = AlignDown((uint8_t*)ptr, 4096);
|
||||
uint8_t* end = AlignUp((uint8_t*)ptr + size, 4096);
|
||||
size_t len = end - base;
|
||||
HSAKMT_STATUS error = hsaKmtSVMSetAttr(base, len, attribs.size(), &attribs[0]);
|
||||
if (error != HSAKMT_STATUS_SUCCESS)
|
||||
throw AMD::hsa_exception(HSA_STATUS_ERROR, "hsaKmtSVMSetAttr failed.");
|
||||
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
hsa_status_t Runtime::GetSvmAttrib(void* ptr, size_t size,
|
||||
hsa_amd_svm_attribute_pair_t* attribute_list,
|
||||
size_t attribute_count) {
|
||||
std::vector<HSA_SVM_ATTRIBUTE> attribs;
|
||||
attribs.reserve(attribute_count);
|
||||
|
||||
std::vector<int> kmtIndices(attribute_count);
|
||||
|
||||
bool getFlags = false;
|
||||
|
||||
auto Convert = [&](uint64_t value) -> Agent* {
|
||||
hsa_agent_t handle = {value};
|
||||
Agent* agent = Agent::Convert(handle);
|
||||
if ((agent == nullptr) || !agent->IsValid())
|
||||
throw AMD::hsa_exception(HSA_STATUS_ERROR_INVALID_AGENT,
|
||||
"Invalid agent handle in Runtime::GetSvmAttrib.");
|
||||
return agent;
|
||||
};
|
||||
|
||||
auto kmtPair = [](uint32_t attrib, uint32_t value) {
|
||||
HSA_SVM_ATTRIBUTE pair = {attrib, value};
|
||||
return pair;
|
||||
};
|
||||
|
||||
for (uint32_t i = 0; i < attribute_count; i++) {
|
||||
auto& attrib = attribute_list[i].attribute;
|
||||
auto& value = attribute_list[i].value;
|
||||
|
||||
switch (attrib) {
|
||||
case HSA_AMD_SVM_ATTRIB_GLOBAL_FLAG:
|
||||
case HSA_AMD_SVM_ATTRIB_READ_ONLY:
|
||||
case HSA_AMD_SVM_ATTRIB_HIVE_LOCAL: {
|
||||
getFlags = true;
|
||||
kmtIndices[i] = -1;
|
||||
break;
|
||||
}
|
||||
case HSA_AMD_SVM_ATTRIB_MIGRATION_GRANULARITY: {
|
||||
kmtIndices[i] = attribs.size();
|
||||
attribs.push_back(kmtPair(HSA_SVM_ATTR_GRANULARITY, 0));
|
||||
break;
|
||||
}
|
||||
case HSA_AMD_SVM_ATTRIB_PREFERRED_LOCATION: {
|
||||
kmtIndices[i] = attribs.size();
|
||||
attribs.push_back(kmtPair(HSA_SVM_ATTR_PREFERRED_LOC, 0));
|
||||
break;
|
||||
}
|
||||
case HSA_AMD_SVM_ATTRIB_PREFETCH_LOCATION: {
|
||||
value = Agent::Convert(GetSVMPrefetchAgent(ptr, size)).handle;
|
||||
kmtIndices[i] = -1;
|
||||
break;
|
||||
}
|
||||
case HSA_AMD_SVM_ATTRIB_ACCESS_QUERY: {
|
||||
Agent* agent = Convert(value);
|
||||
if (agent->device_type() == Agent::kAmdCpuDevice) {
|
||||
getFlags = true;
|
||||
kmtIndices[i] = -1;
|
||||
} else {
|
||||
kmtIndices[i] = attribs.size();
|
||||
attribs.push_back(kmtPair(HSA_SVM_ATTR_ACCESS, agent->node_id()));
|
||||
}
|
||||
break;
|
||||
}
|
||||
default:
|
||||
throw AMD::hsa_exception(HSA_STATUS_ERROR_INVALID_ARGUMENT,
|
||||
"Illegal or invalid attribute in Runtime::SetSvmAttrib");
|
||||
}
|
||||
}
|
||||
|
||||
if (getFlags) attribs.push_back(kmtPair(HSA_SVM_ATTR_SET_FLAGS, 0));
|
||||
|
||||
uint8_t* base = AlignDown((uint8_t*)ptr, 4096);
|
||||
uint8_t* end = AlignUp((uint8_t*)ptr + size, 4096);
|
||||
size_t len = end - base;
|
||||
if (attribs.size() != 0) {
|
||||
HSAKMT_STATUS error = hsaKmtSVMGetAttr(base, len, attribs.size(), &attribs[0]);
|
||||
if (error != HSAKMT_STATUS_SUCCESS)
|
||||
throw AMD::hsa_exception(HSA_STATUS_ERROR, "hsaKmtSVMGetAttr failed.");
|
||||
}
|
||||
|
||||
for (uint32_t i = 0; i < attribute_count; i++) {
|
||||
auto& attrib = attribute_list[i].attribute;
|
||||
auto& value = attribute_list[i].value;
|
||||
|
||||
switch (attrib) {
|
||||
case HSA_AMD_SVM_ATTRIB_GLOBAL_FLAG: {
|
||||
if (attribs[attribs.size() - 1].value & HSA_SVM_FLAG_COHERENT)
|
||||
value = HSA_AMD_SVM_GLOBAL_FLAG_FINE_GRAINED;
|
||||
else
|
||||
value = HSA_AMD_SVM_GLOBAL_FLAG_COARSE_GRAINED;
|
||||
break;
|
||||
}
|
||||
case HSA_AMD_SVM_ATTRIB_READ_ONLY: {
|
||||
value = (attribs[attribs.size() - 1].value & HSA_SVM_FLAG_GPU_RO);
|
||||
break;
|
||||
}
|
||||
case HSA_AMD_SVM_ATTRIB_HIVE_LOCAL: {
|
||||
value = (attribs[attribs.size() - 1].value & HSA_SVM_FLAG_HIVE_LOCAL);
|
||||
break;
|
||||
}
|
||||
case HSA_AMD_SVM_ATTRIB_MIGRATION_GRANULARITY: {
|
||||
value = attribs[kmtIndices[i]].value;
|
||||
break;
|
||||
}
|
||||
case HSA_AMD_SVM_ATTRIB_PREFERRED_LOCATION: {
|
||||
uint64_t node = attribs[kmtIndices[i]].value;
|
||||
Agent* agent = nullptr;
|
||||
if (node != INVALID_NODEID) agent = agents_by_node_[node][0];
|
||||
value = Agent::Convert(agent).handle;
|
||||
break;
|
||||
}
|
||||
case HSA_AMD_SVM_ATTRIB_PREFETCH_LOCATION: {
|
||||
break;
|
||||
}
|
||||
case HSA_AMD_SVM_ATTRIB_ACCESS_QUERY: {
|
||||
if (kmtIndices[i] == -1) {
|
||||
if (attribs[attribs.size() - 1].value & HSA_SVM_FLAG_HOST_ACCESS)
|
||||
attrib = HSA_AMD_SVM_ATTRIB_AGENT_ACCESSIBLE;
|
||||
} else {
|
||||
switch (attribs[kmtIndices[i]].type) {
|
||||
case HSA_SVM_ATTR_ACCESS:
|
||||
attrib = HSA_AMD_SVM_ATTRIB_AGENT_ACCESSIBLE;
|
||||
break;
|
||||
case HSA_SVM_ATTR_ACCESS_IN_PLACE:
|
||||
attrib = HSA_AMD_SVM_ATTRIB_AGENT_ACCESSIBLE_IN_PLACE;
|
||||
break;
|
||||
case HSA_SVM_ATTR_NO_ACCESS:
|
||||
attrib = HSA_AMD_SVM_ATTRIB_AGENT_NO_ACCESS;
|
||||
break;
|
||||
default:
|
||||
assert(false && "Bad agent accessibility from KFD.");
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
default:
|
||||
throw AMD::hsa_exception(HSA_STATUS_ERROR_INVALID_ARGUMENT,
|
||||
"Illegal or invalid attribute in Runtime::GetSvmAttrib");
|
||||
}
|
||||
}
|
||||
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
hsa_status_t Runtime::SvmPrefetch(void* ptr, size_t size, hsa_agent_t agent,
|
||||
uint32_t num_dep_signals, const hsa_signal_t* dep_signals,
|
||||
hsa_signal_t completion_signal) {
|
||||
uintptr_t base = reinterpret_cast<uintptr_t>(AlignDown(ptr, 4096));
|
||||
uintptr_t end = AlignUp(reinterpret_cast<uintptr_t>(ptr) + size, 4096);
|
||||
size_t len = end - base;
|
||||
|
||||
PrefetchOp* op = new PrefetchOp();
|
||||
MAKE_NAMED_SCOPE_GUARD(OpGuard, [&]() { delete op; });
|
||||
|
||||
Agent* dest = Agent::Convert(agent);
|
||||
if (dest->device_type() == Agent::kAmdCpuDevice)
|
||||
op->node_id = 0;
|
||||
else
|
||||
op->node_id = dest->node_id();
|
||||
|
||||
op->base = reinterpret_cast<void*>(base);
|
||||
op->size = len;
|
||||
op->completion = completion_signal;
|
||||
if (num_dep_signals > 1) {
|
||||
op->remaining_deps = num_dep_signals - 1;
|
||||
for (int i = 0; i < num_dep_signals - 1; i++) op->dep_signals.push_back(dep_signals[i]);
|
||||
} else {
|
||||
op->remaining_deps = 0;
|
||||
}
|
||||
|
||||
{
|
||||
ScopedAcquire<KernelMutex> lock(&prefetch_lock_);
|
||||
// Remove all fully overlapped and trim partially overlapped ranges.
|
||||
// Get iteration bounds
|
||||
auto start = prefetch_map_.upper_bound(base);
|
||||
if (start != prefetch_map_.begin()) start--;
|
||||
auto stop = prefetch_map_.lower_bound(end);
|
||||
|
||||
auto isEndNode = [&](decltype(start) node) { return node->second.next == prefetch_map_.end(); };
|
||||
auto isFirstNode = [&](decltype(start) node) {
|
||||
return node->second.prev == prefetch_map_.end();
|
||||
};
|
||||
|
||||
// Trim and remove old ranges.
|
||||
while (start != stop) {
|
||||
uintptr_t startBase = start->first;
|
||||
uintptr_t startEnd = startBase + start->second.bytes;
|
||||
|
||||
auto ibase = Max(startBase, base);
|
||||
auto iend = Min(startEnd, end);
|
||||
// Check for overlap
|
||||
if (ibase < iend) {
|
||||
// Second range check
|
||||
if (iend < startEnd) {
|
||||
auto ret = prefetch_map_.insert(
|
||||
std::make_pair(iend, PrefetchRange(startEnd - iend, start->second.op)));
|
||||
assert(ret.second && "Prefetch map insert failed during range split.");
|
||||
|
||||
auto it = ret.first;
|
||||
it->second.prev = start;
|
||||
it->second.next = start->second.next;
|
||||
start->second.next = it;
|
||||
if (!isEndNode(it)) it->second.next->second.prev = it;
|
||||
}
|
||||
|
||||
// Is the first interval of the old range valid
|
||||
if (startBase < ibase) {
|
||||
start->second.bytes = ibase - startBase;
|
||||
} else {
|
||||
if (isFirstNode(start)) {
|
||||
start->second.op->prefetch_map_entry = start->second.next;
|
||||
if (!isEndNode(start)) start->second.next->second.prev = prefetch_map_.end();
|
||||
} else {
|
||||
start->second.prev->second.next = start->second.next;
|
||||
if (!isEndNode(start)) start->second.next->second.prev = start->second.prev;
|
||||
}
|
||||
prefetch_map_.erase(start);
|
||||
}
|
||||
}
|
||||
start++;
|
||||
}
|
||||
|
||||
// Insert new range.
|
||||
auto ret = prefetch_map_.insert(std::make_pair(base, PrefetchRange(len, op)));
|
||||
assert(ret.second && "Prefetch map insert failed.");
|
||||
|
||||
auto it = ret.first;
|
||||
op->prefetch_map_entry = it;
|
||||
it->second.next = it->second.prev = prefetch_map_.end();
|
||||
}
|
||||
|
||||
// Remove the prefetch's ranges from the map.
|
||||
static auto removePrefetchRanges = [](PrefetchOp* op) {
|
||||
ScopedAcquire<KernelMutex> lock(&Runtime::runtime_singleton_->prefetch_lock_);
|
||||
auto it = op->prefetch_map_entry;
|
||||
while (it != Runtime::runtime_singleton_->prefetch_map_.end()) {
|
||||
auto next = it->second.next;
|
||||
Runtime::runtime_singleton_->prefetch_map_.erase(it);
|
||||
it = next;
|
||||
}
|
||||
};
|
||||
|
||||
// Prefetch Signal handler for synchronization.
|
||||
static hsa_amd_signal_handler signal_handler = [](hsa_signal_value_t value, void* arg) {
|
||||
PrefetchOp* op = reinterpret_cast<PrefetchOp*>(arg);
|
||||
|
||||
if (op->remaining_deps > 0) {
|
||||
op->remaining_deps--;
|
||||
Runtime::runtime_singleton_->SetAsyncSignalHandler(
|
||||
op->dep_signals[op->remaining_deps], HSA_SIGNAL_CONDITION_EQ, 0, signal_handler, arg);
|
||||
return false;
|
||||
}
|
||||
|
||||
HSA_SVM_ATTRIBUTE attrib;
|
||||
attrib.type = HSA_SVM_ATTR_PREFETCH_LOC;
|
||||
attrib.value = op->node_id;
|
||||
HSAKMT_STATUS error = hsaKmtSVMSetAttr(op->base, op->size, 1, &attrib);
|
||||
assert(error == HSAKMT_STATUS_SUCCESS && "KFD Prefetch failed.");
|
||||
|
||||
removePrefetchRanges(op);
|
||||
|
||||
if (op->completion.handle != 0) Signal::Convert(op->completion)->SubRelaxed(1);
|
||||
delete op;
|
||||
|
||||
return false;
|
||||
};
|
||||
|
||||
auto no_dependencies = [](void* arg) { signal_handler(0, arg); };
|
||||
|
||||
MAKE_NAMED_SCOPE_GUARD(RangeGuard, [&]() { removePrefetchRanges(op); });
|
||||
|
||||
hsa_status_t err;
|
||||
if (num_dep_signals == 0)
|
||||
err = AMD::hsa_amd_async_function(no_dependencies, op);
|
||||
else
|
||||
err = SetAsyncSignalHandler(dep_signals[num_dep_signals - 1], HSA_SIGNAL_CONDITION_EQ, 0,
|
||||
signal_handler, op);
|
||||
if (err != HSA_STATUS_SUCCESS) throw AMD::hsa_exception(err, "Signal handler unable to be set.");
|
||||
|
||||
RangeGuard.Dismiss();
|
||||
OpGuard.Dismiss();
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
Agent* Runtime::GetSVMPrefetchAgent(void* ptr, size_t size) {
|
||||
uintptr_t base = reinterpret_cast<uintptr_t>(AlignDown(ptr, 4096));
|
||||
uintptr_t end = AlignUp(reinterpret_cast<uintptr_t>(ptr) + size, 4096);
|
||||
size_t len = end - base;
|
||||
|
||||
std::vector<std::pair<uintptr_t, uintptr_t>> holes;
|
||||
|
||||
ScopedAcquire<KernelMutex> lock(&Runtime::runtime_singleton_->prefetch_lock_);
|
||||
auto start = prefetch_map_.upper_bound(base);
|
||||
if (start != prefetch_map_.begin()) start--;
|
||||
auto stop = prefetch_map_.lower_bound(end);
|
||||
|
||||
// KFD returns -1 for no or mixed destinations.
|
||||
uint32_t prefetch_node = -2;
|
||||
if (start != stop) {
|
||||
prefetch_node = start->second.op->node_id;
|
||||
}
|
||||
|
||||
while (start != stop) {
|
||||
uintptr_t startBase = start->first;
|
||||
uintptr_t startEnd = startBase + start->second.bytes;
|
||||
|
||||
auto ibase = Max(base, startBase);
|
||||
auto iend = Min(end, startEnd);
|
||||
// Check for intersection with the query
|
||||
if (ibase < iend) {
|
||||
// If prefetch locations are different then we report null agent.
|
||||
if (prefetch_node != start->second.op->node_id) return nullptr;
|
||||
|
||||
// Push leading gap to an array for checking KFD.
|
||||
if (base < ibase) holes.push_back(std::make_pair(base, ibase - base));
|
||||
|
||||
// Trim query range.
|
||||
base = iend;
|
||||
}
|
||||
start++;
|
||||
}
|
||||
if (base < end) holes.push_back(std::make_pair(base, end - base));
|
||||
|
||||
HSA_SVM_ATTRIBUTE attrib;
|
||||
attrib.type = HSA_SVM_ATTR_PREFETCH_LOC;
|
||||
for (auto& range : holes) {
|
||||
HSAKMT_STATUS error =
|
||||
hsaKmtSVMGetAttr(reinterpret_cast<void*>(range.first), range.second, 1, &attrib);
|
||||
assert(error == HSAKMT_STATUS_SUCCESS && "KFD prefetch query failed.");
|
||||
|
||||
if (attrib.value == -1) return nullptr;
|
||||
if (prefetch_node == -2) prefetch_node = attrib.value;
|
||||
if (prefetch_node != attrib.value) return nullptr;
|
||||
}
|
||||
|
||||
assert(prefetch_node != -2 && "prefetch_node was not updated.");
|
||||
assert(prefetch_node != -1 && "Should have already returned.");
|
||||
return agents_by_node_[prefetch_node][0];
|
||||
}
|
||||
|
||||
} // namespace core
|
||||
} // namespace rocr
|
||||
|
||||
@@ -56,11 +56,6 @@ class Flag {
|
||||
public:
|
||||
enum SDMA_OVERRIDE { SDMA_DISABLE, SDMA_ENABLE, SDMA_DEFAULT };
|
||||
|
||||
// The values are meaningful and chosen to satisfy the thunk API.
|
||||
enum XNACK_REQUEST { XNACK_DISABLE = 0, XNACK_ENABLE = 1, XNACK_UNCHANGED = 2 };
|
||||
static_assert(XNACK_DISABLE == 0, "XNACK_REQUEST enum values improperly changed.");
|
||||
static_assert(XNACK_ENABLE == 1, "XNACK_REQUEST enum values improperly changed.");
|
||||
|
||||
enum FLAG_TRI_STATE { FLAG_DISABLE = 0, FLAG_ENABLE = 1, FLAG_DEFAULT = 2 };
|
||||
|
||||
explicit Flag() { Refresh(); }
|
||||
@@ -144,11 +139,6 @@ class Flag {
|
||||
var = os::GetEnvVar("HSA_IGNORE_SRAMECC_MISREPORT");
|
||||
check_sramecc_validity_ = (var == "1") ? false : true;
|
||||
|
||||
// Legal values are zero "0" or one "1". Any other value will
|
||||
// be interpreted as not defining the env variable
|
||||
var = os::GetEnvVar("HSA_XNACK");
|
||||
xnack_ = (var == "0") ? XNACK_DISABLE : ((var == "1") ? XNACK_ENABLE : XNACK_UNCHANGED);
|
||||
|
||||
// Legal values are zero "0" or one "1". Any other value will
|
||||
// be interpreted as not defining the env variable.
|
||||
var = os::GetEnvVar("HSA_FORCE_SRAMECC");
|
||||
@@ -206,8 +196,6 @@ class Flag {
|
||||
|
||||
bool check_sramecc_validity() const { return check_sramecc_validity_; }
|
||||
|
||||
XNACK_REQUEST xnack() const { return xnack_; }
|
||||
|
||||
FLAG_TRI_STATE sramecc() const { return sramecc_; }
|
||||
|
||||
private:
|
||||
@@ -243,9 +231,6 @@ class Flag {
|
||||
|
||||
size_t force_sdma_size_;
|
||||
|
||||
// Indicates user preference for Xnack state.
|
||||
XNACK_REQUEST xnack_;
|
||||
|
||||
// Indicates user preference for SramECC state.
|
||||
FLAG_TRI_STATE sramecc_;
|
||||
|
||||
|
||||
@@ -222,9 +222,6 @@ global:
|
||||
hsa_amd_deregister_deallocation_callback;
|
||||
hsa_amd_signal_value_pointer;
|
||||
_amdgpu_r_debug;
|
||||
hsa_amd_svm_attributes_set;
|
||||
hsa_amd_svm_attributes_get;
|
||||
hsa_amd_svm_prefetch_async;
|
||||
|
||||
local:
|
||||
*;
|
||||
|
||||
@@ -480,21 +480,7 @@ typedef enum {
|
||||
/**
|
||||
* String containing the ROCr build identifier.
|
||||
*/
|
||||
HSA_AMD_SYSTEM_INFO_BUILD_VERSION = 0x200,
|
||||
/**
|
||||
* Returns true if hsa_amd_svm_* APIs are supported by the driver. The type of
|
||||
* this attribute is bool.
|
||||
*/
|
||||
HSA_AMD_SYSTEM_INFO_SVM_SUPPORTED = 0x201,
|
||||
// TODO: Should this be per Agent?
|
||||
/**
|
||||
* Returns true if all Agents have access to system allocated memory (such as
|
||||
* that allocated by mmap, malloc, or new) by default.
|
||||
* If false then system allocated memory may only be made SVM accessible to
|
||||
* an Agent by declaration of accessibility with hsa_amd_svm_set_attributes.
|
||||
* The type of this attribute is bool.
|
||||
*/
|
||||
HSA_AMD_SYSTEM_INFO_SVM_ACCESSIBLE_BY_DEFAULT = 0x202
|
||||
HSA_AMD_SYSTEM_INFO_BUILD_VERSION = 0x200
|
||||
} hsa_system_info_t;
|
||||
|
||||
/**
|
||||
|
||||
@@ -183,9 +183,6 @@ struct AmdExtTable {
|
||||
decltype(hsa_amd_register_deallocation_callback)* hsa_amd_register_deallocation_callback_fn;
|
||||
decltype(hsa_amd_deregister_deallocation_callback)* hsa_amd_deregister_deallocation_callback_fn;
|
||||
decltype(hsa_amd_signal_value_pointer)* hsa_amd_signal_value_pointer_fn;
|
||||
decltype(hsa_amd_svm_attributes_set)* hsa_amd_svm_attributes_set_fn;
|
||||
decltype(hsa_amd_svm_attributes_get)* hsa_amd_svm_attributes_get_fn;
|
||||
decltype(hsa_amd_svm_prefetch_async)* hsa_amd_svm_prefetch_async_fn;
|
||||
};
|
||||
|
||||
// Table to export HSA Core Runtime Apis
|
||||
|
||||
@@ -2116,162 +2116,6 @@ hsa_status_t HSA_API hsa_amd_register_deallocation_callback(void* ptr,
|
||||
hsa_status_t HSA_API hsa_amd_deregister_deallocation_callback(void* ptr,
|
||||
hsa_amd_deallocation_callback_t callback);
|
||||
|
||||
typedef enum hsa_amd_svm_model_s {
|
||||
/**
|
||||
* Updates to memory with this attribute conform to HSA memory consistency model.
|
||||
*/
|
||||
HSA_AMD_SVM_GLOBAL_FLAG_FINE_GRAINED = 0,
|
||||
/**
|
||||
* Writes to memory with this attribute can be performed by a single agent at a time.
|
||||
*/
|
||||
HSA_AMD_SVM_GLOBAL_FLAG_COARSE_GRAINED = 1
|
||||
} hsa_amd_svm_model_t;
|
||||
|
||||
typedef enum hsa_amd_svm_attribute_s {
|
||||
// Memory model attribute.
|
||||
// Type of this attribute is hsa_amd_svm_model_t.
|
||||
HSA_AMD_SVM_ATTRIB_GLOBAL_FLAG = 0,
|
||||
// Marks the range read only. This allows multiple physical copies to be
|
||||
// placed local to each accessing device.
|
||||
// Type of this attribute is bool.
|
||||
HSA_AMD_SVM_ATTRIB_READ_ONLY = 1,
|
||||
// Automatic migrations should attempt to keep the memory within the xgmi hive
|
||||
// containing accessible agents.
|
||||
// Type of this attribute is bool.
|
||||
HSA_AMD_SVM_ATTRIB_HIVE_LOCAL = 2,
|
||||
// Page granularity to migrate at once. Page granularity is specified as
|
||||
// log2(page_count).
|
||||
// Type of this attribute is uint64_t.
|
||||
HSA_AMD_SVM_ATTRIB_MIGRATION_GRANULARITY = 3,
|
||||
// Physical location to prefer when automatic migration occurs.
|
||||
// Set to the null agent handle (handle == 0) to indicate there
|
||||
// is no preferred location.
|
||||
// Type of this attribute is hsa_agent_t.
|
||||
HSA_AMD_SVM_ATTRIB_PREFERRED_LOCATION = 4,
|
||||
// This attribute can not be used in ::hsa_amd_svm_attributes_set (see
|
||||
// ::hsa_amd_svm_prefetch_async).
|
||||
// Physical location of most recent prefetch command.
|
||||
// If the prefetch location has not been set or is not uniform across the
|
||||
// address range then returned hsa_agent_t::handle will be 0.
|
||||
// Querying this attribute will return the destination agent of the most
|
||||
// recent ::hsa_amd_svm_prefetch_async targeting the address range. If
|
||||
// multiple async prefetches have been issued targeting the region and the
|
||||
// most recently issued prefetch has completed then the query will return
|
||||
// the location of the most recently completed prefetch.
|
||||
// Type of this attribute is hsa_agent_t.
|
||||
HSA_AMD_SVM_ATTRIB_PREFETCH_LOCATION = 5,
|
||||
// This attribute can not be used in ::hsa_amd_svm_attributes_get.
|
||||
// Enables an agent for access to the range. Access may incur a page fault
|
||||
// and associated memory migration. Either this or
|
||||
// HSA_AMD_SVM_ATTRIB_AGENT_ACCESSIBLE_IN_PLACE is required prior to SVM
|
||||
// access if HSA_AMD_SYSTEM_INFO_SVM_ACCESSIBLE_BY_DEFAULT is false.
|
||||
// Type of this attribute is hsa_agent_t.
|
||||
HSA_AMD_SVM_ATTRIB_AGENT_ACCESSIBLE = 0x200,
|
||||
// This attribute can not be used in ::hsa_amd_svm_attributes_get.
|
||||
// Enables an agent for access to the range without page faults. Access
|
||||
// will not incur a page fault and will not cause access based migration.
|
||||
// and associated memory migration. Either this or
|
||||
// HSA_AMD_SVM_ATTRIB_AGENT_ACCESSIBLE is required prior to SVM access if
|
||||
// HSA_AMD_SYSTEM_INFO_SVM_ACCESSIBLE_BY_DEFAULT is false.
|
||||
// Type of this attribute is hsa_agent_t.
|
||||
HSA_AMD_SVM_ATTRIB_AGENT_ACCESSIBLE_IN_PLACE = 0x201,
|
||||
// This attribute can not be used in ::hsa_amd_svm_attributes_get.
|
||||
// Denies an agent access to the memory range. Access will cause a terminal
|
||||
// segfault.
|
||||
// Type of this attribute is hsa_agent_t.
|
||||
HSA_AMD_SVM_ATTRIB_AGENT_NO_ACCESS = 0x202,
|
||||
// This attribute can not be used in ::hsa_amd_svm_attributes_set.
|
||||
// Returns the access attribute associated with the agent.
|
||||
// The agent to query must be set in the attribute value field.
|
||||
// The attribute enum will be replaced with the agent's current access
|
||||
// attribute for the address range.
|
||||
// TODO: Clarify KFD return value for non-uniform access attribute.
|
||||
// Type of this attribute is hsa_agent_t.
|
||||
HSA_AMD_SVM_ATTRIB_ACCESS_QUERY = 0x203,
|
||||
} hsa_amd_svm_attribute_t;
|
||||
|
||||
// List type for hsa_amd_svm_attributes_set/get.
|
||||
typedef struct hsa_amd_svm_attribute_pair_s {
|
||||
// hsa_amd_svm_attribute_t value.
|
||||
uint64_t attribute;
|
||||
// Attribute value. Bit values should be interpreted according to the type
|
||||
// given in the associated attribute description.
|
||||
uint64_t value;
|
||||
} hsa_amd_svm_attribute_pair_t;
|
||||
|
||||
/**
|
||||
* @brief Sets SVM memory attributes.
|
||||
*
|
||||
* If HSA_AMD_SYSTEM_INFO_SVM_ACCESSIBLE_BY_DEFAULT returns false then enabling
|
||||
* access to an Agent via this API (setting HSA_AMD_SVM_ATTRIB_AGENT_ACCESSIBLE
|
||||
* or HSA_AMD_SVM_ATTRIB_AGENT_ACCESSIBLE_IN_PLACE) is required prior to SVM
|
||||
* memory access by that Agent.
|
||||
*
|
||||
* Attributes HSA_AMD_SVM_ATTRIB_ACCESS_QUERY and HSA_AMD_SVM_ATTRIB_PREFETCH_LOCATION
|
||||
* may not be used with this API.
|
||||
*
|
||||
* @param[in] ptr Will be aligned down to nearest page boundary.
|
||||
*
|
||||
* @param[in] size Will be aligned up to nearest page boundary.
|
||||
*
|
||||
* @param[in] attribute_list List of attributes to set for the address range.
|
||||
*
|
||||
* @param[in] attribute_count Length of @p attribute_list.
|
||||
*/
|
||||
hsa_status_t hsa_amd_svm_attributes_set(void* ptr, size_t size,
|
||||
hsa_amd_svm_attribute_pair_t* attribute_list,
|
||||
size_t attribute_count);
|
||||
|
||||
/**
|
||||
* @brief Gets SVM memory attributes.
|
||||
*
|
||||
* Attributes HSA_AMD_SVM_ATTRIB_AGENT_ACCESSIBLE,
|
||||
* HSA_AMD_SVM_ATTRIB_AGENT_ACCESSIBLE_IN_PLACE and
|
||||
* HSA_AMD_SVM_ATTRIB_PREFETCH_LOCATION may not be used with this API.
|
||||
*
|
||||
* Note that attribute HSA_AMD_SVM_ATTRIB_ACCESS_QUERY takes as input an
|
||||
* hsa_agent_t and returns the current access type through its attribute field.
|
||||
*
|
||||
* @param[in] ptr Will be aligned down to nearest page boundary.
|
||||
*
|
||||
* @param[in] size Will be aligned up to nearest page boundary.
|
||||
*
|
||||
* @param[in] attribute_list List of attributes to set for the address range.
|
||||
*
|
||||
* @param[in] attribute_count Length of @p attribute_list.
|
||||
*/
|
||||
hsa_status_t hsa_amd_svm_attributes_get(void* ptr, size_t size,
|
||||
hsa_amd_svm_attribute_pair_t* attribute_list,
|
||||
size_t attribute_count);
|
||||
|
||||
/**
|
||||
* @brief Asynchronously migrates memory to an agent.
|
||||
*
|
||||
* Schedules memory migration to @p agent when @p dep_signals have been observed equal to zero.
|
||||
* @p completion_signal will decrement when the migration is complete.
|
||||
*
|
||||
* @param[in] ptr Will be aligned down to nearest page boundary.
|
||||
*
|
||||
* @param[in] size Will be aligned up to nearest page boundary.
|
||||
*
|
||||
* @param[in] agent Agent to migrate to.
|
||||
*
|
||||
* @param[in] num_dep_signals Number of dependent signals. Can be 0.
|
||||
*
|
||||
* @param[in] dep_signals List of signals that must be waited on before the migration
|
||||
* operation starts. The migration will start after every signal has been observed with
|
||||
* the value 0. If @p num_dep_signals is 0, this argument is ignored.
|
||||
*
|
||||
* @param[in] completion_signal Signal used to indicate completion of the migration
|
||||
* operation. When the migration operation is finished, the value of the signal is
|
||||
* decremented. The runtime indicates that an error has occurred during the copy
|
||||
* operation by setting the value of the completion signal to a negative
|
||||
* number. If no completion signal is required this handle may be null.
|
||||
*/
|
||||
hsa_status_t hsa_amd_svm_prefetch_async(void* ptr, size_t size, hsa_agent_t agent,
|
||||
uint32_t num_dep_signals, const hsa_signal_t* dep_signals,
|
||||
hsa_signal_t completion_signal);
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // end extern "C" block
|
||||
#endif
|
||||
|
||||
Ссылка в новой задаче
Block a user