KFD is not ready yet.

Change-Id: I61deb292ddb92185d33504c2115169888d56e211


[ROCm/ROCR-Runtime commit: 5bd153974d]
Этот коммит содержится в:
Sean Keely
2021-04-01 22:35:44 -05:00
родитель 29fa097a82
Коммит da41352a93
14 изменённых файлов: 10 добавлений и 852 удалений
-21
Просмотреть файл
@@ -1151,27 +1151,6 @@ hsa_status_t HSA_API hsa_amd_signal_value_pointer(hsa_signal_t signal,
return amdExtTable->hsa_amd_signal_value_pointer_fn(signal, value_ptr);
}
// Mirrors Amd Extension Apis
hsa_status_t HSA_API hsa_amd_svm_attributes_set(void* ptr, size_t size,
hsa_amd_svm_attribute_pair_t* attribute_list,
size_t attribute_count) {
return amdExtTable->hsa_amd_svm_attributes_set_fn(ptr, size, attribute_list, attribute_count);
}
// Mirrors Amd Extension Apis
hsa_status_t HSA_API hsa_amd_svm_attributes_get(void* ptr, size_t size,
hsa_amd_svm_attribute_pair_t* attribute_list,
size_t attribute_count) {
return amdExtTable->hsa_amd_svm_attributes_get_fn(ptr, size, attribute_list, attribute_count);
}
// Mirrors Amd Extension Apis
hsa_status_t HSA_API hsa_amd_svm_prefetch_async(void* ptr, size_t size, hsa_agent_t agent,
uint32_t num_dep_signals, const hsa_signal_t* dep_signals,
hsa_signal_t completion_signal) {
return amdExtTable->hsa_amd_svm_prefetch_async_fn(ptr, size, agent, num_dep_signals, dep_signals, completion_signal);
}
// Tools only table interfaces.
namespace rocr {
+1 -2
Просмотреть файл
@@ -173,8 +173,7 @@ class GpuAgent : public GpuAgentInt {
// @param [in] node Node id. Each CPU in different socket will get distinct
// id.
// @param [in] node_props Node property.
// @param [in] xnack_mode XNACK mode of device.
GpuAgent(HSAuint32 node, const HsaNodeProperties& node_props, bool xnack_mode);
GpuAgent(HSAuint32 node, const HsaNodeProperties& node_props);
// @brief GPU agent destructor.
~GpuAgent();
-15
Просмотреть файл
@@ -246,21 +246,6 @@ hsa_status_t hsa_amd_deregister_deallocation_callback(
hsa_status_t hsa_amd_signal_value_pointer(hsa_signal_t signal,
volatile hsa_signal_value_t** value_ptr);
// Mirrors Amd Extension Apis
hsa_status_t HSA_API hsa_amd_svm_attributes_set(void* ptr, size_t size,
hsa_amd_svm_attribute_pair_t* attribute_list,
size_t attribute_count);
// Mirrors Amd Extension Apis
hsa_status_t HSA_API hsa_amd_svm_attributes_get(void* ptr, size_t size,
hsa_amd_svm_attribute_pair_t* attribute_list,
size_t attribute_count);
// Mirrors Amd Extension Apis
hsa_status_t HSA_API hsa_amd_svm_prefetch_async(void* ptr, size_t size, hsa_agent_t agent,
uint32_t num_dep_signals, const hsa_signal_t* dep_signals,
hsa_signal_t completion_signal);
} // namespace amd
} // namespace rocr
-38
Просмотреть файл
@@ -292,15 +292,6 @@ class Runtime {
hsa_status_t IPCDetach(void* ptr);
hsa_status_t SetSvmAttrib(void* ptr, size_t size, hsa_amd_svm_attribute_pair_t* attribute_list,
size_t attribute_count);
hsa_status_t GetSvmAttrib(void* ptr, size_t size, hsa_amd_svm_attribute_pair_t* attribute_list,
size_t attribute_count);
hsa_status_t SvmPrefetch(void* ptr, size_t size, hsa_agent_t agent, uint32_t num_dep_signals,
const hsa_signal_t* dep_signals, hsa_signal_t completion_signal);
const std::vector<Agent*>& cpu_agents() { return cpu_agents_; }
const std::vector<Agent*>& gpu_agents() { return gpu_agents_; }
@@ -404,28 +395,6 @@ class Runtime {
std::vector<void*> arg_;
};
struct PrefetchRange;
typedef std::map<uintptr_t, PrefetchRange> prefetch_map_t;
struct PrefetchOp {
void* base;
size_t size;
uint32_t node_id;
int remaining_deps;
hsa_signal_t completion;
std::vector<hsa_signal_t> dep_signals;
prefetch_map_t::iterator prefetch_map_entry;
};
struct PrefetchRange {
PrefetchRange() {}
PrefetchRange(size_t Bytes, PrefetchOp* Op) : bytes(Bytes), op(Op) {}
size_t bytes;
PrefetchOp* op;
prefetch_map_t::iterator prev;
prefetch_map_t::iterator next;
};
// Will be created before any user could call hsa_init but also could be
// destroyed before incorrectly written programs call hsa_shutdown.
static KernelMutex bootstrap_lock_;
@@ -475,9 +444,6 @@ class Runtime {
/// @retval Index in ::link_matrix_.
uint32_t GetIndexLinkInfo(uint32_t node_id_from, uint32_t node_id_to);
/// @brief Get most recently issued SVM prefetch agent for the range in question.
Agent* GetSVMPrefetchAgent(void* ptr, size_t size);
// Mutex object to protect multithreaded access to ::allocation_map_,
// KFD map/unmap, register/unregister, and access to hsaKmtQueryPointerInfo
// registered & mapped arrays.
@@ -519,10 +485,6 @@ class Runtime {
// Contains the region, address, and size of previously allocated memory.
std::map<const void*, AllocationRegion> allocation_map_;
// Pending prefetch containers.
KernelMutex prefetch_lock_;
prefetch_map_t prefetch_map_;
// Allocator using ::system_region_
std::function<void*(size_t size, size_t align, MemoryRegion::AllocateFlags flags)> system_allocator_;
+3 -3
Просмотреть файл
@@ -77,7 +77,7 @@ extern HsaApiTable hsa_internal_api_table_;
} // namespace core
namespace AMD {
GpuAgent::GpuAgent(HSAuint32 node, const HsaNodeProperties& node_props, bool xnack_mode)
GpuAgent::GpuAgent(HSAuint32 node, const HsaNodeProperties& node_props)
: GpuAgentInt(node),
properties_(node_props),
current_coherency_type_(HSA_AMD_COHERENCY_TYPE_COHERENT),
@@ -125,8 +125,8 @@ GpuAgent::GpuAgent(HSAuint32 node, const HsaNodeProperties& node_props, bool xna
rocr::core::IsaFeature xnack = rocr::core::IsaFeature::Unsupported;
if (isa_base->IsXnackSupported()) {
// TODO: This needs to be obtained form KFD once HMM implemented.
xnack = xnack_mode ? core::IsaFeature::Enabled
: core::IsaFeature::Disabled;
xnack = profile_ == HSA_PROFILE_FULL ? core::IsaFeature::Enabled
: core::IsaFeature::Disabled;
}
// Set instruction set architecture via node property, only on GPU device.
+5 -40
Просмотреть файл
@@ -68,38 +68,6 @@ namespace AMD {
static const uint kKfdVersionMajor = 0;
static const uint kKfdVersionMinor = 99;
// Query for user preference and use that to determine Xnack mode of ROCm system.
// Return true if Xnack mode is ON or false if OFF. Xnack mode of a system is
// orthogonal to devices that do not support Xnack mode. It is legal for a
// system with Xnack ON to have devices that do not support Xnack functionality.
bool BindXnackMode() {
// Get users' preference for Xnack mode of ROCm platform
HSAint32 mode;
mode = core::Runtime::runtime_singleton_->flag().xnack();
bool config_xnack =
(core::Runtime::runtime_singleton_->flag().xnack() != Flag::XNACK_REQUEST::XNACK_UNCHANGED);
// Indicate to driver users' preference for Xnack mode
// Call to driver can fail and is a supported feature
HSAKMT_STATUS status = HSAKMT_STATUS_ERROR;
if (config_xnack) {
status = hsaKmtSetXNACKMode(mode);
if (status == HSAKMT_STATUS_SUCCESS) {
return mode;
}
}
// Get Xnack mode of devices bound by driver. This could happen
// when a call to SET Xnack mode fails or user has no particular
// preference
status = hsaKmtGetXNACKMode((HSAint32*)&mode);
if(status != HSAKMT_STATUS_SUCCESS) {
debug_print("KFD does not support xnack mode query.\nROCr must assume xnack is disabled.\n");
return false;
}
return mode;
}
CpuAgent* DiscoverCpu(HSAuint32 node_id, HsaNodeProperties& node_prop) {
if (node_prop.NumCPUCores == 0) {
return nullptr;
@@ -111,14 +79,14 @@ CpuAgent* DiscoverCpu(HSAuint32 node_id, HsaNodeProperties& node_prop) {
return cpu;
}
GpuAgent* DiscoverGpu(HSAuint32 node_id, HsaNodeProperties& node_prop, bool xnack_mode) {
GpuAgent* DiscoverGpu(HSAuint32 node_id, HsaNodeProperties& node_prop) {
GpuAgent* gpu = nullptr;
if (node_prop.NumFComputeCores == 0) {
// Ignore non GPUs.
return nullptr;
}
try {
gpu = new GpuAgent(node_id, node_prop, xnack_mode);
gpu = new GpuAgent(node_id, node_prop);
const HsaVersionInfo& kfd_version = core::Runtime::runtime_singleton_->KfdVersion();
@@ -244,7 +212,7 @@ void RegisterLinkInfo(uint32_t node_id, uint32_t num_link) {
/**
* Process the list of Gpus that are surfaced to user
*/
static void SurfaceGpuList(std::vector<int32_t>& gpu_list, bool xnack_mode) {
static void SurfaceGpuList(std::vector<int32_t>& gpu_list) {
// Process user visible Gpu devices
int32_t invalidIdx = -1;
int32_t list_sz = gpu_list.size();
@@ -261,7 +229,7 @@ static void SurfaceGpuList(std::vector<int32_t>& gpu_list, bool xnack_mode) {
// Instantiate a Gpu device. The IO links
// of this node have already been registered
assert((node_prop.NumFComputeCores != 0) && "Improper node used for GPU device discovery.");
DiscoverGpu(gpu_list[idx], node_prop, xnack_mode);
DiscoverGpu(gpu_list[idx], node_prop);
}
}
@@ -345,11 +313,8 @@ void BuildTopology() {
RegisterLinkInfo(node_id, node_prop.NumIOLinks);
}
// Determine the Xnack mode to be bound for system
bool xnack_mode = BindXnackMode();
// Instantiate ROCr objects to encapsulate Gpu devices
SurfaceGpuList(gpu_usr_list, xnack_mode);
SurfaceGpuList(gpu_usr_list);
}
bool Load() {
-3
Просмотреть файл
@@ -391,9 +391,6 @@ void HsaApiTable::UpdateAmdExts() {
amd_ext_api.hsa_amd_register_deallocation_callback_fn = AMD::hsa_amd_register_deallocation_callback;
amd_ext_api.hsa_amd_deregister_deallocation_callback_fn = AMD::hsa_amd_deregister_deallocation_callback;
amd_ext_api.hsa_amd_signal_value_pointer_fn = AMD::hsa_amd_signal_value_pointer;
amd_ext_api.hsa_amd_svm_attributes_set_fn = AMD::hsa_amd_svm_attributes_set;
amd_ext_api.hsa_amd_svm_attributes_get_fn = AMD::hsa_amd_svm_attributes_get;
amd_ext_api.hsa_amd_svm_prefetch_async_fn = AMD::hsa_amd_svm_prefetch_async;
}
void LoadInitialHsaApiTable() {
-32
Просмотреть файл
@@ -1009,37 +1009,5 @@ hsa_status_t hsa_amd_runtime_queue_create_register(hsa_amd_runtime_queue_notifie
CATCH;
}
hsa_status_t hsa_amd_svm_attributes_set(void* ptr, size_t size,
hsa_amd_svm_attribute_pair_t* attribute_list,
size_t attribute_count) {
TRY;
IS_OPEN();
return core::Runtime::runtime_singleton_->SetSvmAttrib(ptr, size, attribute_list,
attribute_count);
CATCH;
}
hsa_status_t hsa_amd_svm_attributes_get(void* ptr, size_t size,
hsa_amd_svm_attribute_pair_t* attribute_list,
size_t attribute_count) {
TRY;
IS_OPEN();
return core::Runtime::runtime_singleton_->GetSvmAttrib(ptr, size, attribute_list,
attribute_count);
CATCH;
}
hsa_status_t hsa_amd_svm_prefetch_async(void* ptr, size_t size, hsa_agent_t agent,
uint32_t num_dep_signals, const hsa_signal_t* dep_signals,
hsa_signal_t completion_signal) {
TRY;
IS_OPEN();
// Validate inputs.
// if (core::g_use_interrupt_wait && (!core::InterruptSignal::IsType(signal)))
return core::Runtime::runtime_singleton_->SvmPrefetch(ptr, size, agent, num_dep_signals,
dep_signals, completion_signal);
CATCH;
}
} // namespace amd
} // namespace rocr
-506
Просмотреть файл
@@ -644,18 +644,6 @@ hsa_status_t Runtime::GetSystemInfo(hsa_system_info_t attribute, void* value) {
*(const char**)value = STRING(ROCR_BUILD_ID);
break;
}
case HSA_AMD_SYSTEM_INFO_SVM_SUPPORTED: {
// todo: Get HMM kernel support info.
*(bool*)value = true;
break;
}
case HSA_AMD_SYSTEM_INFO_SVM_ACCESSIBLE_BY_DEFAULT: {
bool ret = true;
for(auto agent : gpu_agents_)
ret &= (agent->isa()->GetXnack() == IsaFeature::Enabled);
*(bool*)value = ret;
break;
}
default:
return HSA_STATUS_ERROR_INVALID_ARGUMENT;
}
@@ -1598,499 +1586,5 @@ void Runtime::InternalQueueCreateNotify(const hsa_queue_t* queue, hsa_agent_t ag
internal_queue_create_notifier_(queue, agent, internal_queue_create_notifier_user_data_);
}
hsa_status_t Runtime::SetSvmAttrib(void* ptr, size_t size,
hsa_amd_svm_attribute_pair_t* attribute_list,
size_t attribute_count) {
uint32_t set_attribs = 0;
std::vector<bool> agent_seen(agents_by_node_.size(), false);
std::vector<HSA_SVM_ATTRIBUTE> attribs;
attribs.reserve(attribute_count);
uint32_t set_flags = 0;
uint32_t clear_flags = 0;
auto Convert = [&](uint64_t value) -> Agent* {
hsa_agent_t handle = {value};
Agent* agent = Agent::Convert(handle);
if ((agent == nullptr) || !agent->IsValid())
throw AMD::hsa_exception(HSA_STATUS_ERROR_INVALID_AGENT,
"Invalid agent handle in Runtime::SetSvmAttrib.");
return agent;
};
auto ConvertAllowNull = [&](uint64_t value) -> Agent* {
hsa_agent_t handle = {value};
Agent* agent = Agent::Convert(handle);
if ((agent != nullptr) && (!agent->IsValid()))
throw AMD::hsa_exception(HSA_STATUS_ERROR_INVALID_AGENT,
"Invalid agent handle in Runtime::SetSvmAttrib.");
return agent;
};
auto ConfirmNew = [&](Agent* agent) {
if (agent_seen[agent->node_id()])
throw AMD::hsa_exception(
HSA_STATUS_ERROR_INCOMPATIBLE_ARGUMENTS,
"Multiple attributes given for the same agent in Runtime::SetSvmAttrib.");
agent_seen[agent->node_id()] = true;
};
auto Check = [&](uint64_t attrib) {
if (set_attribs & (1 << attrib))
throw AMD::hsa_exception(HSA_STATUS_ERROR_INCOMPATIBLE_ARGUMENTS,
"Attribute given multiple times in Runtime::SetSvmAttrib.");
set_attribs |= (1 << attrib);
};
auto kmtPair = [](uint32_t attrib, uint32_t value) {
HSA_SVM_ATTRIBUTE pair = {attrib, value};
return pair;
};
for (uint32_t i = 0; i < attribute_count; i++) {
auto attrib = attribute_list[i].attribute;
auto value = attribute_list[i].value;
switch (attrib) {
case HSA_AMD_SVM_ATTRIB_GLOBAL_FLAG: {
Check(attrib);
switch (value) {
case HSA_AMD_SVM_GLOBAL_FLAG_FINE_GRAINED:
set_flags |= HSA_SVM_FLAG_COHERENT;
break;
case HSA_AMD_SVM_GLOBAL_FLAG_COARSE_GRAINED:
clear_flags |= HSA_SVM_FLAG_COHERENT;
break;
default:
throw AMD::hsa_exception(HSA_STATUS_ERROR_INVALID_ARGUMENT,
"Invalid HSA_AMD_SVM_ATTRIB_GLOBAL_FLAG value.");
}
break;
}
case HSA_AMD_SVM_ATTRIB_READ_ONLY: {
Check(attrib);
if (value)
set_flags |= HSA_SVM_FLAG_GPU_RO;
else
clear_flags |= HSA_SVM_FLAG_GPU_RO;
break;
}
case HSA_AMD_SVM_ATTRIB_HIVE_LOCAL: {
Check(attrib);
if (value)
set_flags |= HSA_SVM_FLAG_HIVE_LOCAL;
else
clear_flags |= HSA_SVM_FLAG_HIVE_LOCAL;
break;
}
case HSA_AMD_SVM_ATTRIB_MIGRATION_GRANULARITY: {
Check(attrib);
// Max migration size is 1GB.
if (value > 18) value = 18;
attribs.push_back(kmtPair(HSA_SVM_ATTR_GRANULARITY, value));
break;
}
case HSA_AMD_SVM_ATTRIB_PREFERRED_LOCATION: {
Check(attrib);
Agent* agent = ConvertAllowNull(value);
if (agent == nullptr)
attribs.push_back(kmtPair(HSA_SVM_ATTR_PREFERRED_LOC, INVALID_NODEID));
else
attribs.push_back(kmtPair(HSA_SVM_ATTR_PREFERRED_LOC, agent->node_id()));
break;
}
case HSA_AMD_SVM_ATTRIB_AGENT_ACCESSIBLE: {
Agent* agent = Convert(value);
ConfirmNew(agent);
if (agent->device_type() == Agent::kAmdCpuDevice) {
set_flags |= HSA_SVM_FLAG_HOST_ACCESS;
} else {
attribs.push_back(kmtPair(HSA_SVM_ATTR_ACCESS, agent->node_id()));
}
break;
}
case HSA_AMD_SVM_ATTRIB_AGENT_ACCESSIBLE_IN_PLACE: {
Agent* agent = Convert(value);
ConfirmNew(agent);
if (agent->device_type() == Agent::kAmdCpuDevice) {
set_flags |= HSA_SVM_FLAG_HOST_ACCESS;
} else {
attribs.push_back(kmtPair(HSA_SVM_ATTR_ACCESS_IN_PLACE, agent->node_id()));
}
break;
}
case HSA_AMD_SVM_ATTRIB_AGENT_NO_ACCESS: {
Agent* agent = Convert(value);
ConfirmNew(agent);
if (agent->device_type() == Agent::kAmdCpuDevice) {
clear_flags |= HSA_SVM_FLAG_HOST_ACCESS;
} else {
attribs.push_back(kmtPair(HSA_SVM_ATTR_NO_ACCESS, agent->node_id()));
}
break;
}
default:
throw AMD::hsa_exception(HSA_STATUS_ERROR_INVALID_ARGUMENT,
"Illegal or invalid attribute in Runtime::SetSvmAttrib");
}
}
// Merge CPU access properties - grant access if any CPU needs access.
// Probably wrong.
if (set_flags & HSA_SVM_FLAG_HOST_ACCESS) clear_flags &= ~HSA_SVM_FLAG_HOST_ACCESS;
// Add flag updates
if (clear_flags) attribs.push_back(kmtPair(HSA_SVM_ATTR_CLR_FLAGS, clear_flags));
if (set_flags) attribs.push_back(kmtPair(HSA_SVM_ATTR_SET_FLAGS, set_flags));
uint8_t* base = AlignDown((uint8_t*)ptr, 4096);
uint8_t* end = AlignUp((uint8_t*)ptr + size, 4096);
size_t len = end - base;
HSAKMT_STATUS error = hsaKmtSVMSetAttr(base, len, attribs.size(), &attribs[0]);
if (error != HSAKMT_STATUS_SUCCESS)
throw AMD::hsa_exception(HSA_STATUS_ERROR, "hsaKmtSVMSetAttr failed.");
return HSA_STATUS_SUCCESS;
}
hsa_status_t Runtime::GetSvmAttrib(void* ptr, size_t size,
hsa_amd_svm_attribute_pair_t* attribute_list,
size_t attribute_count) {
std::vector<HSA_SVM_ATTRIBUTE> attribs;
attribs.reserve(attribute_count);
std::vector<int> kmtIndices(attribute_count);
bool getFlags = false;
auto Convert = [&](uint64_t value) -> Agent* {
hsa_agent_t handle = {value};
Agent* agent = Agent::Convert(handle);
if ((agent == nullptr) || !agent->IsValid())
throw AMD::hsa_exception(HSA_STATUS_ERROR_INVALID_AGENT,
"Invalid agent handle in Runtime::GetSvmAttrib.");
return agent;
};
auto kmtPair = [](uint32_t attrib, uint32_t value) {
HSA_SVM_ATTRIBUTE pair = {attrib, value};
return pair;
};
for (uint32_t i = 0; i < attribute_count; i++) {
auto& attrib = attribute_list[i].attribute;
auto& value = attribute_list[i].value;
switch (attrib) {
case HSA_AMD_SVM_ATTRIB_GLOBAL_FLAG:
case HSA_AMD_SVM_ATTRIB_READ_ONLY:
case HSA_AMD_SVM_ATTRIB_HIVE_LOCAL: {
getFlags = true;
kmtIndices[i] = -1;
break;
}
case HSA_AMD_SVM_ATTRIB_MIGRATION_GRANULARITY: {
kmtIndices[i] = attribs.size();
attribs.push_back(kmtPair(HSA_SVM_ATTR_GRANULARITY, 0));
break;
}
case HSA_AMD_SVM_ATTRIB_PREFERRED_LOCATION: {
kmtIndices[i] = attribs.size();
attribs.push_back(kmtPair(HSA_SVM_ATTR_PREFERRED_LOC, 0));
break;
}
case HSA_AMD_SVM_ATTRIB_PREFETCH_LOCATION: {
value = Agent::Convert(GetSVMPrefetchAgent(ptr, size)).handle;
kmtIndices[i] = -1;
break;
}
case HSA_AMD_SVM_ATTRIB_ACCESS_QUERY: {
Agent* agent = Convert(value);
if (agent->device_type() == Agent::kAmdCpuDevice) {
getFlags = true;
kmtIndices[i] = -1;
} else {
kmtIndices[i] = attribs.size();
attribs.push_back(kmtPair(HSA_SVM_ATTR_ACCESS, agent->node_id()));
}
break;
}
default:
throw AMD::hsa_exception(HSA_STATUS_ERROR_INVALID_ARGUMENT,
"Illegal or invalid attribute in Runtime::SetSvmAttrib");
}
}
if (getFlags) attribs.push_back(kmtPair(HSA_SVM_ATTR_SET_FLAGS, 0));
uint8_t* base = AlignDown((uint8_t*)ptr, 4096);
uint8_t* end = AlignUp((uint8_t*)ptr + size, 4096);
size_t len = end - base;
if (attribs.size() != 0) {
HSAKMT_STATUS error = hsaKmtSVMGetAttr(base, len, attribs.size(), &attribs[0]);
if (error != HSAKMT_STATUS_SUCCESS)
throw AMD::hsa_exception(HSA_STATUS_ERROR, "hsaKmtSVMGetAttr failed.");
}
for (uint32_t i = 0; i < attribute_count; i++) {
auto& attrib = attribute_list[i].attribute;
auto& value = attribute_list[i].value;
switch (attrib) {
case HSA_AMD_SVM_ATTRIB_GLOBAL_FLAG: {
if (attribs[attribs.size() - 1].value & HSA_SVM_FLAG_COHERENT)
value = HSA_AMD_SVM_GLOBAL_FLAG_FINE_GRAINED;
else
value = HSA_AMD_SVM_GLOBAL_FLAG_COARSE_GRAINED;
break;
}
case HSA_AMD_SVM_ATTRIB_READ_ONLY: {
value = (attribs[attribs.size() - 1].value & HSA_SVM_FLAG_GPU_RO);
break;
}
case HSA_AMD_SVM_ATTRIB_HIVE_LOCAL: {
value = (attribs[attribs.size() - 1].value & HSA_SVM_FLAG_HIVE_LOCAL);
break;
}
case HSA_AMD_SVM_ATTRIB_MIGRATION_GRANULARITY: {
value = attribs[kmtIndices[i]].value;
break;
}
case HSA_AMD_SVM_ATTRIB_PREFERRED_LOCATION: {
uint64_t node = attribs[kmtIndices[i]].value;
Agent* agent = nullptr;
if (node != INVALID_NODEID) agent = agents_by_node_[node][0];
value = Agent::Convert(agent).handle;
break;
}
case HSA_AMD_SVM_ATTRIB_PREFETCH_LOCATION: {
break;
}
case HSA_AMD_SVM_ATTRIB_ACCESS_QUERY: {
if (kmtIndices[i] == -1) {
if (attribs[attribs.size() - 1].value & HSA_SVM_FLAG_HOST_ACCESS)
attrib = HSA_AMD_SVM_ATTRIB_AGENT_ACCESSIBLE;
} else {
switch (attribs[kmtIndices[i]].type) {
case HSA_SVM_ATTR_ACCESS:
attrib = HSA_AMD_SVM_ATTRIB_AGENT_ACCESSIBLE;
break;
case HSA_SVM_ATTR_ACCESS_IN_PLACE:
attrib = HSA_AMD_SVM_ATTRIB_AGENT_ACCESSIBLE_IN_PLACE;
break;
case HSA_SVM_ATTR_NO_ACCESS:
attrib = HSA_AMD_SVM_ATTRIB_AGENT_NO_ACCESS;
break;
default:
assert(false && "Bad agent accessibility from KFD.");
}
}
break;
}
default:
throw AMD::hsa_exception(HSA_STATUS_ERROR_INVALID_ARGUMENT,
"Illegal or invalid attribute in Runtime::GetSvmAttrib");
}
}
return HSA_STATUS_SUCCESS;
}
hsa_status_t Runtime::SvmPrefetch(void* ptr, size_t size, hsa_agent_t agent,
uint32_t num_dep_signals, const hsa_signal_t* dep_signals,
hsa_signal_t completion_signal) {
uintptr_t base = reinterpret_cast<uintptr_t>(AlignDown(ptr, 4096));
uintptr_t end = AlignUp(reinterpret_cast<uintptr_t>(ptr) + size, 4096);
size_t len = end - base;
PrefetchOp* op = new PrefetchOp();
MAKE_NAMED_SCOPE_GUARD(OpGuard, [&]() { delete op; });
Agent* dest = Agent::Convert(agent);
if (dest->device_type() == Agent::kAmdCpuDevice)
op->node_id = 0;
else
op->node_id = dest->node_id();
op->base = reinterpret_cast<void*>(base);
op->size = len;
op->completion = completion_signal;
if (num_dep_signals > 1) {
op->remaining_deps = num_dep_signals - 1;
for (int i = 0; i < num_dep_signals - 1; i++) op->dep_signals.push_back(dep_signals[i]);
} else {
op->remaining_deps = 0;
}
{
ScopedAcquire<KernelMutex> lock(&prefetch_lock_);
// Remove all fully overlapped and trim partially overlapped ranges.
// Get iteration bounds
auto start = prefetch_map_.upper_bound(base);
if (start != prefetch_map_.begin()) start--;
auto stop = prefetch_map_.lower_bound(end);
auto isEndNode = [&](decltype(start) node) { return node->second.next == prefetch_map_.end(); };
auto isFirstNode = [&](decltype(start) node) {
return node->second.prev == prefetch_map_.end();
};
// Trim and remove old ranges.
while (start != stop) {
uintptr_t startBase = start->first;
uintptr_t startEnd = startBase + start->second.bytes;
auto ibase = Max(startBase, base);
auto iend = Min(startEnd, end);
// Check for overlap
if (ibase < iend) {
// Second range check
if (iend < startEnd) {
auto ret = prefetch_map_.insert(
std::make_pair(iend, PrefetchRange(startEnd - iend, start->second.op)));
assert(ret.second && "Prefetch map insert failed during range split.");
auto it = ret.first;
it->second.prev = start;
it->second.next = start->second.next;
start->second.next = it;
if (!isEndNode(it)) it->second.next->second.prev = it;
}
// Is the first interval of the old range valid
if (startBase < ibase) {
start->second.bytes = ibase - startBase;
} else {
if (isFirstNode(start)) {
start->second.op->prefetch_map_entry = start->second.next;
if (!isEndNode(start)) start->second.next->second.prev = prefetch_map_.end();
} else {
start->second.prev->second.next = start->second.next;
if (!isEndNode(start)) start->second.next->second.prev = start->second.prev;
}
prefetch_map_.erase(start);
}
}
start++;
}
// Insert new range.
auto ret = prefetch_map_.insert(std::make_pair(base, PrefetchRange(len, op)));
assert(ret.second && "Prefetch map insert failed.");
auto it = ret.first;
op->prefetch_map_entry = it;
it->second.next = it->second.prev = prefetch_map_.end();
}
// Remove the prefetch's ranges from the map.
static auto removePrefetchRanges = [](PrefetchOp* op) {
ScopedAcquire<KernelMutex> lock(&Runtime::runtime_singleton_->prefetch_lock_);
auto it = op->prefetch_map_entry;
while (it != Runtime::runtime_singleton_->prefetch_map_.end()) {
auto next = it->second.next;
Runtime::runtime_singleton_->prefetch_map_.erase(it);
it = next;
}
};
// Prefetch Signal handler for synchronization.
static hsa_amd_signal_handler signal_handler = [](hsa_signal_value_t value, void* arg) {
PrefetchOp* op = reinterpret_cast<PrefetchOp*>(arg);
if (op->remaining_deps > 0) {
op->remaining_deps--;
Runtime::runtime_singleton_->SetAsyncSignalHandler(
op->dep_signals[op->remaining_deps], HSA_SIGNAL_CONDITION_EQ, 0, signal_handler, arg);
return false;
}
HSA_SVM_ATTRIBUTE attrib;
attrib.type = HSA_SVM_ATTR_PREFETCH_LOC;
attrib.value = op->node_id;
HSAKMT_STATUS error = hsaKmtSVMSetAttr(op->base, op->size, 1, &attrib);
assert(error == HSAKMT_STATUS_SUCCESS && "KFD Prefetch failed.");
removePrefetchRanges(op);
if (op->completion.handle != 0) Signal::Convert(op->completion)->SubRelaxed(1);
delete op;
return false;
};
auto no_dependencies = [](void* arg) { signal_handler(0, arg); };
MAKE_NAMED_SCOPE_GUARD(RangeGuard, [&]() { removePrefetchRanges(op); });
hsa_status_t err;
if (num_dep_signals == 0)
err = AMD::hsa_amd_async_function(no_dependencies, op);
else
err = SetAsyncSignalHandler(dep_signals[num_dep_signals - 1], HSA_SIGNAL_CONDITION_EQ, 0,
signal_handler, op);
if (err != HSA_STATUS_SUCCESS) throw AMD::hsa_exception(err, "Signal handler unable to be set.");
RangeGuard.Dismiss();
OpGuard.Dismiss();
return HSA_STATUS_SUCCESS;
}
Agent* Runtime::GetSVMPrefetchAgent(void* ptr, size_t size) {
uintptr_t base = reinterpret_cast<uintptr_t>(AlignDown(ptr, 4096));
uintptr_t end = AlignUp(reinterpret_cast<uintptr_t>(ptr) + size, 4096);
size_t len = end - base;
std::vector<std::pair<uintptr_t, uintptr_t>> holes;
ScopedAcquire<KernelMutex> lock(&Runtime::runtime_singleton_->prefetch_lock_);
auto start = prefetch_map_.upper_bound(base);
if (start != prefetch_map_.begin()) start--;
auto stop = prefetch_map_.lower_bound(end);
// KFD returns -1 for no or mixed destinations.
uint32_t prefetch_node = -2;
if (start != stop) {
prefetch_node = start->second.op->node_id;
}
while (start != stop) {
uintptr_t startBase = start->first;
uintptr_t startEnd = startBase + start->second.bytes;
auto ibase = Max(base, startBase);
auto iend = Min(end, startEnd);
// Check for intersection with the query
if (ibase < iend) {
// If prefetch locations are different then we report null agent.
if (prefetch_node != start->second.op->node_id) return nullptr;
// Push leading gap to an array for checking KFD.
if (base < ibase) holes.push_back(std::make_pair(base, ibase - base));
// Trim query range.
base = iend;
}
start++;
}
if (base < end) holes.push_back(std::make_pair(base, end - base));
HSA_SVM_ATTRIBUTE attrib;
attrib.type = HSA_SVM_ATTR_PREFETCH_LOC;
for (auto& range : holes) {
HSAKMT_STATUS error =
hsaKmtSVMGetAttr(reinterpret_cast<void*>(range.first), range.second, 1, &attrib);
assert(error == HSAKMT_STATUS_SUCCESS && "KFD prefetch query failed.");
if (attrib.value == -1) return nullptr;
if (prefetch_node == -2) prefetch_node = attrib.value;
if (prefetch_node != attrib.value) return nullptr;
}
assert(prefetch_node != -2 && "prefetch_node was not updated.");
assert(prefetch_node != -1 && "Should have already returned.");
return agents_by_node_[prefetch_node][0];
}
} // namespace core
} // namespace rocr
-15
Просмотреть файл
@@ -56,11 +56,6 @@ class Flag {
public:
enum SDMA_OVERRIDE { SDMA_DISABLE, SDMA_ENABLE, SDMA_DEFAULT };
// The values are meaningful and chosen to satisfy the thunk API.
enum XNACK_REQUEST { XNACK_DISABLE = 0, XNACK_ENABLE = 1, XNACK_UNCHANGED = 2 };
static_assert(XNACK_DISABLE == 0, "XNACK_REQUEST enum values improperly changed.");
static_assert(XNACK_ENABLE == 1, "XNACK_REQUEST enum values improperly changed.");
enum FLAG_TRI_STATE { FLAG_DISABLE = 0, FLAG_ENABLE = 1, FLAG_DEFAULT = 2 };
explicit Flag() { Refresh(); }
@@ -144,11 +139,6 @@ class Flag {
var = os::GetEnvVar("HSA_IGNORE_SRAMECC_MISREPORT");
check_sramecc_validity_ = (var == "1") ? false : true;
// Legal values are zero "0" or one "1". Any other value will
// be interpreted as not defining the env variable
var = os::GetEnvVar("HSA_XNACK");
xnack_ = (var == "0") ? XNACK_DISABLE : ((var == "1") ? XNACK_ENABLE : XNACK_UNCHANGED);
// Legal values are zero "0" or one "1". Any other value will
// be interpreted as not defining the env variable.
var = os::GetEnvVar("HSA_FORCE_SRAMECC");
@@ -206,8 +196,6 @@ class Flag {
bool check_sramecc_validity() const { return check_sramecc_validity_; }
XNACK_REQUEST xnack() const { return xnack_; }
FLAG_TRI_STATE sramecc() const { return sramecc_; }
private:
@@ -243,9 +231,6 @@ class Flag {
size_t force_sdma_size_;
// Indicates user preference for Xnack state.
XNACK_REQUEST xnack_;
// Indicates user preference for SramECC state.
FLAG_TRI_STATE sramecc_;
-3
Просмотреть файл
@@ -222,9 +222,6 @@ global:
hsa_amd_deregister_deallocation_callback;
hsa_amd_signal_value_pointer;
_amdgpu_r_debug;
hsa_amd_svm_attributes_set;
hsa_amd_svm_attributes_get;
hsa_amd_svm_prefetch_async;
local:
*;
+1 -15
Просмотреть файл
@@ -480,21 +480,7 @@ typedef enum {
/**
* String containing the ROCr build identifier.
*/
HSA_AMD_SYSTEM_INFO_BUILD_VERSION = 0x200,
/**
* Returns true if hsa_amd_svm_* APIs are supported by the driver. The type of
* this attribute is bool.
*/
HSA_AMD_SYSTEM_INFO_SVM_SUPPORTED = 0x201,
// TODO: Should this be per Agent?
/**
* Returns true if all Agents have access to system allocated memory (such as
* that allocated by mmap, malloc, or new) by default.
* If false then system allocated memory may only be made SVM accessible to
* an Agent by declaration of accessibility with hsa_amd_svm_set_attributes.
* The type of this attribute is bool.
*/
HSA_AMD_SYSTEM_INFO_SVM_ACCESSIBLE_BY_DEFAULT = 0x202
HSA_AMD_SYSTEM_INFO_BUILD_VERSION = 0x200
} hsa_system_info_t;
/**
-3
Просмотреть файл
@@ -183,9 +183,6 @@ struct AmdExtTable {
decltype(hsa_amd_register_deallocation_callback)* hsa_amd_register_deallocation_callback_fn;
decltype(hsa_amd_deregister_deallocation_callback)* hsa_amd_deregister_deallocation_callback_fn;
decltype(hsa_amd_signal_value_pointer)* hsa_amd_signal_value_pointer_fn;
decltype(hsa_amd_svm_attributes_set)* hsa_amd_svm_attributes_set_fn;
decltype(hsa_amd_svm_attributes_get)* hsa_amd_svm_attributes_get_fn;
decltype(hsa_amd_svm_prefetch_async)* hsa_amd_svm_prefetch_async_fn;
};
// Table to export HSA Core Runtime Apis
-156
Просмотреть файл
@@ -2116,162 +2116,6 @@ hsa_status_t HSA_API hsa_amd_register_deallocation_callback(void* ptr,
hsa_status_t HSA_API hsa_amd_deregister_deallocation_callback(void* ptr,
hsa_amd_deallocation_callback_t callback);
typedef enum hsa_amd_svm_model_s {
/**
* Updates to memory with this attribute conform to HSA memory consistency model.
*/
HSA_AMD_SVM_GLOBAL_FLAG_FINE_GRAINED = 0,
/**
* Writes to memory with this attribute can be performed by a single agent at a time.
*/
HSA_AMD_SVM_GLOBAL_FLAG_COARSE_GRAINED = 1
} hsa_amd_svm_model_t;
typedef enum hsa_amd_svm_attribute_s {
// Memory model attribute.
// Type of this attribute is hsa_amd_svm_model_t.
HSA_AMD_SVM_ATTRIB_GLOBAL_FLAG = 0,
// Marks the range read only. This allows multiple physical copies to be
// placed local to each accessing device.
// Type of this attribute is bool.
HSA_AMD_SVM_ATTRIB_READ_ONLY = 1,
// Automatic migrations should attempt to keep the memory within the xgmi hive
// containing accessible agents.
// Type of this attribute is bool.
HSA_AMD_SVM_ATTRIB_HIVE_LOCAL = 2,
// Page granularity to migrate at once. Page granularity is specified as
// log2(page_count).
// Type of this attribute is uint64_t.
HSA_AMD_SVM_ATTRIB_MIGRATION_GRANULARITY = 3,
// Physical location to prefer when automatic migration occurs.
// Set to the null agent handle (handle == 0) to indicate there
// is no preferred location.
// Type of this attribute is hsa_agent_t.
HSA_AMD_SVM_ATTRIB_PREFERRED_LOCATION = 4,
// This attribute can not be used in ::hsa_amd_svm_attributes_set (see
// ::hsa_amd_svm_prefetch_async).
// Physical location of most recent prefetch command.
// If the prefetch location has not been set or is not uniform across the
// address range then returned hsa_agent_t::handle will be 0.
// Querying this attribute will return the destination agent of the most
// recent ::hsa_amd_svm_prefetch_async targeting the address range. If
// multiple async prefetches have been issued targeting the region and the
// most recently issued prefetch has completed then the query will return
// the location of the most recently completed prefetch.
// Type of this attribute is hsa_agent_t.
HSA_AMD_SVM_ATTRIB_PREFETCH_LOCATION = 5,
// This attribute can not be used in ::hsa_amd_svm_attributes_get.
// Enables an agent for access to the range. Access may incur a page fault
// and associated memory migration. Either this or
// HSA_AMD_SVM_ATTRIB_AGENT_ACCESSIBLE_IN_PLACE is required prior to SVM
// access if HSA_AMD_SYSTEM_INFO_SVM_ACCESSIBLE_BY_DEFAULT is false.
// Type of this attribute is hsa_agent_t.
HSA_AMD_SVM_ATTRIB_AGENT_ACCESSIBLE = 0x200,
// This attribute can not be used in ::hsa_amd_svm_attributes_get.
// Enables an agent for access to the range without page faults. Access
// will not incur a page fault and will not cause access based migration.
// and associated memory migration. Either this or
// HSA_AMD_SVM_ATTRIB_AGENT_ACCESSIBLE is required prior to SVM access if
// HSA_AMD_SYSTEM_INFO_SVM_ACCESSIBLE_BY_DEFAULT is false.
// Type of this attribute is hsa_agent_t.
HSA_AMD_SVM_ATTRIB_AGENT_ACCESSIBLE_IN_PLACE = 0x201,
// This attribute can not be used in ::hsa_amd_svm_attributes_get.
// Denies an agent access to the memory range. Access will cause a terminal
// segfault.
// Type of this attribute is hsa_agent_t.
HSA_AMD_SVM_ATTRIB_AGENT_NO_ACCESS = 0x202,
// This attribute can not be used in ::hsa_amd_svm_attributes_set.
// Returns the access attribute associated with the agent.
// The agent to query must be set in the attribute value field.
// The attribute enum will be replaced with the agent's current access
// attribute for the address range.
// TODO: Clarify KFD return value for non-uniform access attribute.
// Type of this attribute is hsa_agent_t.
HSA_AMD_SVM_ATTRIB_ACCESS_QUERY = 0x203,
} hsa_amd_svm_attribute_t;
// List type for hsa_amd_svm_attributes_set/get.
typedef struct hsa_amd_svm_attribute_pair_s {
// hsa_amd_svm_attribute_t value.
uint64_t attribute;
// Attribute value. Bit values should be interpreted according to the type
// given in the associated attribute description.
uint64_t value;
} hsa_amd_svm_attribute_pair_t;
/**
* @brief Sets SVM memory attributes.
*
* If HSA_AMD_SYSTEM_INFO_SVM_ACCESSIBLE_BY_DEFAULT returns false then enabling
* access to an Agent via this API (setting HSA_AMD_SVM_ATTRIB_AGENT_ACCESSIBLE
* or HSA_AMD_SVM_ATTRIB_AGENT_ACCESSIBLE_IN_PLACE) is required prior to SVM
* memory access by that Agent.
*
* Attributes HSA_AMD_SVM_ATTRIB_ACCESS_QUERY and HSA_AMD_SVM_ATTRIB_PREFETCH_LOCATION
* may not be used with this API.
*
* @param[in] ptr Will be aligned down to nearest page boundary.
*
* @param[in] size Will be aligned up to nearest page boundary.
*
* @param[in] attribute_list List of attributes to set for the address range.
*
* @param[in] attribute_count Length of @p attribute_list.
*/
hsa_status_t hsa_amd_svm_attributes_set(void* ptr, size_t size,
hsa_amd_svm_attribute_pair_t* attribute_list,
size_t attribute_count);
/**
* @brief Gets SVM memory attributes.
*
* Attributes HSA_AMD_SVM_ATTRIB_AGENT_ACCESSIBLE,
* HSA_AMD_SVM_ATTRIB_AGENT_ACCESSIBLE_IN_PLACE and
* HSA_AMD_SVM_ATTRIB_PREFETCH_LOCATION may not be used with this API.
*
* Note that attribute HSA_AMD_SVM_ATTRIB_ACCESS_QUERY takes as input an
* hsa_agent_t and returns the current access type through its attribute field.
*
* @param[in] ptr Will be aligned down to nearest page boundary.
*
* @param[in] size Will be aligned up to nearest page boundary.
*
* @param[in] attribute_list List of attributes to set for the address range.
*
* @param[in] attribute_count Length of @p attribute_list.
*/
hsa_status_t hsa_amd_svm_attributes_get(void* ptr, size_t size,
hsa_amd_svm_attribute_pair_t* attribute_list,
size_t attribute_count);
/**
* @brief Asynchronously migrates memory to an agent.
*
* Schedules memory migration to @p agent when @p dep_signals have been observed equal to zero.
* @p completion_signal will decrement when the migration is complete.
*
* @param[in] ptr Will be aligned down to nearest page boundary.
*
* @param[in] size Will be aligned up to nearest page boundary.
*
* @param[in] agent Agent to migrate to.
*
* @param[in] num_dep_signals Number of dependent signals. Can be 0.
*
* @param[in] dep_signals List of signals that must be waited on before the migration
* operation starts. The migration will start after every signal has been observed with
* the value 0. If @p num_dep_signals is 0, this argument is ignored.
*
* @param[in] completion_signal Signal used to indicate completion of the migration
* operation. When the migration operation is finished, the value of the signal is
* decremented. The runtime indicates that an error has occurred during the copy
* operation by setting the value of the completion signal to a negative
* number. If no completion signal is required this handle may be null.
*/
hsa_status_t hsa_amd_svm_prefetch_async(void* ptr, size_t size, hsa_agent_t agent,
uint32_t num_dep_signals, const hsa_signal_t* dep_signals,
hsa_signal_t completion_signal);
#ifdef __cplusplus
} // end extern "C" block
#endif