Add pointer info support.

Change-Id: I3edcc0bfddbf12465065c9bc3b6565288faff1b8
This commit is contained in:
Sean Keely
2016-11-09 22:21:24 -06:00
rodzic e01c43578c
commit 9dd76dbeda
10 zmienionych plików z 279 dodań i 26 usunięć
@@ -1035,7 +1035,7 @@ hsa_status_t HSA_API hsa_amd_interop_unmap_buffer(void* ptr) {
return amdExtTable->hsa_amd_interop_unmap_buffer_fn(ptr);
}
// Use the function pointer from local instance Image Extension
// Mirrors Amd Extension Apis
hsa_status_t HSA_API hsa_amd_image_create(
hsa_agent_t agent,
const hsa_ext_image_descriptor_t *image_descriptor,
@@ -1047,3 +1047,13 @@ hsa_status_t HSA_API hsa_amd_image_create(
image_layout, image_data, access_permission, image);
}
// Mirrors Amd Extension Apis
hsa_status_t hsa_amd_pointer_info(void* ptr, hsa_amd_pointer_info_t* info, void* (*alloc)(size_t),
uint32_t* num_agents_accessible, hsa_agent_t** accessible) {
return amdExtTable->hsa_amd_pointer_info_fn(ptr, info, alloc, num_agents_accessible, accessible);
}
// Mirrors Amd Extension Apis
hsa_status_t hsa_amd_pointer_info_set_userdata(void* ptr, void* userptr) {
return amdExtTable->hsa_amd_pointer_info_set_userdata_fn(ptr, userptr);
}
@@ -181,6 +181,13 @@ hsa_status_t HSA_API hsa_amd_interop_map_buffer(uint32_t num_agents,
// Mirrors Amd Extension Apis
hsa_status_t HSA_API hsa_amd_interop_unmap_buffer(void* ptr);
// Mirrors Amd Extension Apis
hsa_status_t hsa_amd_pointer_info(void* ptr, hsa_amd_pointer_info_t* info, void* (*alloc)(size_t),
uint32_t* num_agents_accessible, hsa_agent_t** accessible);
// Mirrors Amd Extension Apis
hsa_status_t hsa_amd_pointer_info_set_userdata(void* ptr, void* userdata);
} // end of AMD namespace
#endif // header guard
+17 -2
Wyświetl plik
@@ -70,6 +70,11 @@
#define HSA_QUEUE_ALIGN_BYTES 64
#define HSA_PACKET_ALIGN_BYTES 64
//Avoids include
namespace amd {
class MemoryRegion;
}
namespace core {
extern bool g_use_interrupt_wait;
@@ -84,6 +89,7 @@ extern bool g_use_interrupt_wait;
/// - maintain loader state.
/// - monitor asynchronous event from agent.
class Runtime {
friend class amd::MemoryRegion;
public:
/// @brief Structure to describe connectivity between agents.
struct LinkInfo {
@@ -258,11 +264,15 @@ class Runtime {
hsa_status_t InteropUnmap(void* ptr);
hsa_status_t PtrInfo(void* ptr, hsa_amd_pointer_info_t* info, void* (*alloc)(size_t),
uint32_t* num_agents_accessible, hsa_agent_t** accessible);
hsa_status_t SetPtrInfoData(void* ptr, void* userptr);
const std::vector<Agent*>& cpu_agents() { return cpu_agents_; }
const std::vector<Agent*>& gpu_agents() { return gpu_agents_; }
const std::vector<uint32_t>& gpu_ids() { return gpu_ids_; }
Agent* blit_agent() { return blit_agent_; }
@@ -399,7 +409,9 @@ class Runtime {
// Mutex object to protect multithreaded access to ::Acquire and ::Release.
KernelMutex kernel_lock_;
// Mutex object to protect multithreaded access to ::allocation_map_.
// Mutex object to protect multithreaded access to ::allocation_map_,
// KFD map/unmap, register/unregister, and access to hsaKmtQueryPointerInfo
// registered & mapped arrays.
KernelMutex memory_lock_;
// Array containing tools library handles.
@@ -411,6 +423,9 @@ class Runtime {
// Agent list containing all compatible GPU agents in the platform.
std::vector<Agent*> gpu_agents_;
// Agent map containing all agents indexed by their KFD node IDs.
std::map<uint32_t, std::vector<Agent*> > agents_by_node_;
// Agent list containing all compatible gpu agent ids in the platform.
std::vector<uint32_t> gpu_ids_;
@@ -469,11 +469,15 @@ hsa_status_t MemoryRegion::AllowAccess(uint32_t num_agents,
HsaMemMapFlags map_flag = map_flag_;
map_flag.ui32.HostAccess |= (cpu_in_list) ? 1 : 0;
uint64_t alternate_va = 0;
if (!amd::MemoryRegion::MakeKfdMemoryResident(
whitelist_nodes.size(), &whitelist_nodes[0], const_cast<void*>(ptr),
size, &alternate_va, map_flag)) {
return HSA_STATUS_ERROR_OUT_OF_RESOURCES;
{
ScopedAcquire<KernelMutex> lock(&core::Runtime::runtime_singleton_->memory_lock_);
uint64_t alternate_va = 0;
if (!amd::MemoryRegion::MakeKfdMemoryResident(
whitelist_nodes.size(), &whitelist_nodes[0], const_cast<void*>(ptr),
size, &alternate_va, map_flag)) {
return HSA_STATUS_ERROR_OUT_OF_RESOURCES;
}
}
for (GpuAgentInt* gpu : whitelist_gpus) {
@@ -358,6 +358,8 @@ void HsaApiTable::UpdateAmdExts() {
amd_ext_api.hsa_amd_memory_fill_fn = AMD::hsa_amd_memory_fill;
amd_ext_api.hsa_amd_interop_map_buffer_fn = AMD::hsa_amd_interop_map_buffer;
amd_ext_api.hsa_amd_interop_unmap_buffer_fn = AMD::hsa_amd_interop_unmap_buffer;
amd_ext_api.hsa_amd_pointer_info_fn = AMD::hsa_amd_pointer_info;
amd_ext_api.hsa_amd_pointer_info_set_userdata_fn = AMD::hsa_amd_pointer_info_set_userdata;
}
class Init {
@@ -597,4 +597,18 @@ hsa_status_t hsa_amd_interop_unmap_buffer(void* ptr) {
return HSA_STATUS_SUCCESS;
}
hsa_status_t hsa_amd_pointer_info(void* ptr, hsa_amd_pointer_info_t* info, void* (*alloc)(size_t),
uint32_t* num_accessible, hsa_agent_t** accessible) {
IS_OPEN();
IS_BAD_PTR(ptr);
IS_BAD_PTR(info);
return core::Runtime::runtime_singleton_->PtrInfo(ptr, info, alloc, num_accessible, accessible);
}
hsa_status_t hsa_amd_pointer_info_set_userdata(void* ptr, void* userdata) {
IS_OPEN();
IS_BAD_PTR(ptr);
return core::Runtime::runtime_singleton_->SetPtrInfoData(ptr, userdata);
}
} // end of AMD namespace
@@ -142,6 +142,10 @@ bool Runtime::IsOpen() {
}
void Runtime::RegisterAgent(Agent* agent) {
// Record the agent in the node-to-agent reverse lookup table.
agents_by_node_[agent->node_id()].push_back(agent);
// Process agent as a cpu or gpu device.
if (agent->device_type() == Agent::DeviceType::kAmdCpuDevice) {
cpu_agents_.push_back(agent);
@@ -230,6 +234,8 @@ void Runtime::RegisterAgent(Agent* agent) {
}
void Runtime::DestroyAgents() {
agents_by_node_.clear();
std::for_each(gpu_agents_.begin(), gpu_agents_.end(), DeleteObject());
gpu_agents_.clear();
@@ -302,11 +308,11 @@ hsa_status_t Runtime::IterateAgent(hsa_status_t (*callback)(hsa_agent_t agent,
hsa_status_t Runtime::AllocateMemory(const MemoryRegion* region, size_t size,
MemoryRegion::AllocateFlags alloc_flags,
void** address) {
ScopedAcquire<KernelMutex> lock(&memory_lock_);
hsa_status_t status = region->Allocate(size, alloc_flags, address);
// Track the allocation result so that it could be freed properly.
if (status == HSA_STATUS_SUCCESS) {
ScopedAcquire<KernelMutex> lock(&memory_lock_);
allocation_map_[*address] = AllocationRegion(region, size);
}
@@ -320,22 +326,18 @@ hsa_status_t Runtime::FreeMemory(void* ptr) {
const MemoryRegion* region = NULL;
size_t size = 0;
{
ScopedAcquire<KernelMutex> lock(&memory_lock_);
ScopedAcquire<KernelMutex> lock(&memory_lock_);
std::map<const void*, AllocationRegion>::const_iterator it =
allocation_map_.find(ptr);
std::map<const void*, AllocationRegion>::const_iterator it = allocation_map_.find(ptr);
if (it == allocation_map_.end()) {
assert(false && "Can't find address in allocation map");
return HSA_STATUS_ERROR;
}
region = it->second.region;
size = it->second.size;
allocation_map_.erase(it);
if (it == allocation_map_.end()) {
assert(false && "Can't find address in allocation map");
return HSA_STATUS_ERROR;
}
region = it->second.region;
size = it->second.size;
allocation_map_.erase(it);
return region->Free(ptr, size);
}
@@ -463,8 +465,7 @@ hsa_status_t Runtime::AllowAccess(uint32_t num_agents,
{
ScopedAcquire<KernelMutex> lock(&memory_lock_);
std::map<const void*, AllocationRegion>::const_iterator it =
allocation_map_.find(ptr);
std::map<const void*, AllocationRegion>::const_iterator it = allocation_map_.find(ptr);
if (it == allocation_map_.end()) {
return HSA_STATUS_ERROR;
@@ -634,8 +635,7 @@ hsa_status_t Runtime::InteropMap(uint32_t num_agents, Agent** agents,
return HSA_STATUS_SUCCESS;
}
hsa_status_t Runtime::InteropUnmap(void* ptr)
{
hsa_status_t Runtime::InteropUnmap(void* ptr) {
if(hsaKmtUnmapMemoryToGPU(ptr)!=HSAKMT_STATUS_SUCCESS)
return HSA_STATUS_ERROR_INVALID_ARGUMENT;
if(hsaKmtDeregisterMemory(ptr)!=HSAKMT_STATUS_SUCCESS)
@@ -643,6 +643,78 @@ hsa_status_t Runtime::InteropUnmap(void* ptr)
return HSA_STATUS_SUCCESS;
}
hsa_status_t Runtime::PtrInfo(void* ptr, hsa_amd_pointer_info_t* info, void* (*alloc)(size_t),
uint32_t* num_agents_accessible, hsa_agent_t** accessible) {
HsaPointerInfo thunkInfo;
uint32_t* mappedNodes;
// check output struct is at least as large as the first info revision.
if (info->size < sizeof(struct hsa_amd_pointer_info_v1_s)) return HSA_STATUS_ERROR_INVALID_ARGUMENT;
bool returnListData =
((alloc != nullptr) && (num_agents_accessible != nullptr) && (accessible != nullptr));
if (returnListData) {
size_t max_agents = cpu_agents_.size() + gpu_agents_.size();
mappedNodes = (uint32_t*)alloca(max_agents * sizeof(uint32_t));
// memory_lock protects access to the NMappedNodes array since this changes with calls to memory
// APIs.
ScopedAcquire<KernelMutex> lock(&memory_lock_);
hsaKmtQueryPointerInfo(ptr, &thunkInfo);
assert(thunkInfo.NMappedNodes <= max_agents &&
"PointerInfo: Thunk returned more than all agents in NMappedNodes.");
memcpy(mappedNodes, thunkInfo.MappedNodes, thunkInfo.NMappedNodes * sizeof(uint32_t));
} else {
hsaKmtQueryPointerInfo(ptr, &thunkInfo);
}
static_assert((int)HSA_POINTER_UNKNOWN == (int)HSA_EXT_POINTER_TYPE_UNKNOWN,
"Thunk pointer info mismatch");
static_assert((int)HSA_POINTER_ALLOCATED == (int)HSA_EXT_POINTER_TYPE_HSA,
"Thunk pointer info mismatch");
static_assert((int)HSA_POINTER_REGISTERED_USER == (int)HSA_EXT_POINTER_TYPE_LOCKED,
"Thunk pointer info mismatch");
static_assert((int)HSA_POINTER_REGISTERED_GRAPHICS == (int)HSA_EXT_POINTER_TYPE_GRAPHICS,
"Thunk pointer info mismatch");
info->size = Min(info->size, sizeof(struct hsa_amd_pointer_info_v1_s));
info->type = (hsa_amd_pointer_type_t)thunkInfo.Type;
info->agentBaseAddress = (void*)thunkInfo.GPUAddress;
info->hostBaseAddress = thunkInfo.CPUAddress;
info->sizeInBytes = thunkInfo.SizeInBytes;
info->userData = thunkInfo.UserData;
if (returnListData) {
uint32_t count = 0;
for (int i = 0; i < thunkInfo.NMappedNodes; i++) {
assert(mappedNodes[i] < agents_by_node_.size() &&
"PointerInfo: Invalid node ID returned from thunk.");
count += agents_by_node_[mappedNodes[i]].size();
}
*accessible = (hsa_agent_t*)alloc(sizeof(hsa_agent_t) * count);
if ((*accessible) == nullptr) return HSA_STATUS_ERROR_OUT_OF_RESOURCES;
*num_agents_accessible = count;
uint32_t index = 0;
for (int i = 0; i < thunkInfo.NMappedNodes; i++) {
auto& list = agents_by_node_[mappedNodes[i]];
for (int j = 0; j < list.size(); j++) {
(*accessible)[index] = list[j]->public_handle();
index++;
}
}
}
return HSA_STATUS_SUCCESS;
}
hsa_status_t Runtime::SetPtrInfoData(void* ptr, void* userptr) {
if (hsaKmtSetMemoryUserData(ptr, userptr) == HSAKMT_STATUS_SUCCESS)
return HSA_STATUS_SUCCESS;
else
return HSA_STATUS_ERROR_INVALID_ARGUMENT;
}
void Runtime::AsyncEventsLoop(void*) {
auto& async_events_control_ = runtime_singleton_->async_events_control_;
auto& async_events_ = runtime_singleton_->async_events_;
+2
Wyświetl plik
@@ -203,6 +203,8 @@ global:
hsa_ext_image_destroy;
hsa_ext_sampler_create;
hsa_ext_sampler_destroy;
hsa_amd_pointer_info;
hsa_amd_pointer_info_set_userdata;
local:
*;
@@ -140,6 +140,8 @@ struct AmdExtTable {
decltype(hsa_amd_interop_map_buffer)* hsa_amd_interop_map_buffer_fn;
decltype(hsa_amd_interop_unmap_buffer)* hsa_amd_interop_unmap_buffer_fn;
decltype(hsa_amd_image_create)* hsa_amd_image_create_fn;
decltype(hsa_amd_pointer_info)* hsa_amd_pointer_info_fn;
decltype(hsa_amd_pointer_info_set_userdata)* hsa_amd_pointer_info_set_userdata_fn;
};
// Table to export HSA Core Runtime Apis
+125
Wyświetl plik
@@ -1268,6 +1268,131 @@ hsa_status_t HSA_API hsa_amd_image_create(
hsa_ext_image_t *image
);
/**
* @brief Denotes the type of memory in a pointer info query.
*/
typedef enum {
/*
Memory is not known to the HSA driver. Unallocated or unlocked system memory.
*/
HSA_EXT_POINTER_TYPE_UNKNOWN = 0,
/*
Memory was allocated with an HSA memory allocator.
*/
HSA_EXT_POINTER_TYPE_HSA = 1,
/*
System memory which has been locked for use with an HSA agent.
Memory of this type is normal malloc'd memory and is always accessible to
the CPU. Pointer info queries may not include CPU agents in the accessible
agents list as the CPU has implicit access.
*/
HSA_EXT_POINTER_TYPE_LOCKED = 2,
/*
Memory originated in a graphics component and is shared with ROCr.
*/
HSA_EXT_POINTER_TYPE_GRAPHICS = 3,
/*
Memory has been shared with the local process via ROCr IPC APIs.
*/
HSA_EXT_POINTER_TYPE_IPC = 4
} hsa_amd_pointer_type_t;
/**
* @brief Describes a memory allocation known to ROCr.
* Within a ROCr major version this structure can only grow.
*/
typedef struct hsa_amd_pointer_info_v1_s {
/*
size in bytes of this structure. Used for version control within a major ROCr
revision. Set to sizeof(hsa_amd_pointer_t) prior to calling
hsa_amd_pointer_info. If the runtime supports an older version of pointer
info then size will be smaller on return. Members starting after the return
value of size will not be updated by hsa_amd_pointer_info.
*/
uint32_t size;
/*
The type of allocation referenced.
*/
hsa_amd_pointer_type_t type;
/*
Base address at which non-host agents may access the allocation.
*/
void* agentBaseAddress;
/*
Base address at which the host agent may access the allocation.
*/
void* hostBaseAddress;
/*
Size of the allocation
*/
size_t sizeInBytes;
/*
Application provided value.
*/
void* userData;
} hsa_amd_pointer_info_t;
/**
* @brief Retrieves information about the allocation referenced by the given
* pointer. Optionally returns the number and list of agents which can
* directly access the allocation.
*
* @param[in] ptr Pointer which references the allocation to retrieve info for.
*
* @param[in, out] info Pointer to structure to be filled with allocation info.
* Data member size must be set to the size of the structure prior to calling
* hsa_amd_pointer_info. On return size will be set to the size of the
* pointer info structure supported by the runtime, if smaller. Members
* beyond the returned value of size will not be updated by the API.
* Must not be NULL.
*
* @param[in] alloc Function pointer to an allocator used to allocate the
* @p accessible array. If NULL @p accessible will not be returned.
*
* @param[out] num_agents_accessible Recieves the count of agents in
* @p accessible. If NULL @p accessible will not be returned.
*
* @param[out] accessible Recieves a pointer to the array, allocated by @p alloc,
* holding the list of agents which may directly access the allocation.
* May be NULL.
*
* @retval HSA_STATUS_SUCCESS Info retrieved successfully
*
* @retval HSA_STATUS_ERROR_NOT_INITIALIZED if HSA is not initialized
*
* @retval HSA_STATUS_ERROR_OUT_OF_RESOURCES if there is a failure in allocating
* necessary resources
*
* @retval HSA_STATUS_ERROR_INVALID_ARGUMENT NULL in @p ptr or @p info.
*/
hsa_status_t HSA_API hsa_amd_pointer_info(void* ptr,
hsa_amd_pointer_info_t* info,
void* (*alloc)(size_t),
uint32_t* num_agents_accessible,
hsa_agent_t** accessible);
/**
* @brief Associates an arbitrary pointer with an allocation known to ROCr.
* The pointer can be fetched by hsa_amd_pointer_info in the userData field.
*
* @param[in] ptr Pointer to the first byte of an allocation known to ROCr
* with which to associate @p userdata.
*
* @param[in] userdata Abitrary pointer to associate with the allocation.
*
* @retval HSA_STATUS_SUCCESS @p userdata successfully stored.
*
* @retval HSA_STATUS_ERROR_NOT_INITIALIZED if HSA is not initialized
*
* @retval HSA_STATUS_ERROR_OUT_OF_RESOURCES if there is a failure in allocating
* necessary resources
*
* @retval HSA_STATUS_ERROR_INVALID_ARGUMENT @p ptr is not known to ROCr.
*/
hsa_status_t HSA_API hsa_amd_pointer_info_set_userdata(void* ptr,
void* userdata);
#ifdef __cplusplus
} // end extern "C" block
#endif