From 9dd76dbeda23525cdef5babd326afe19c7e23bd2 Mon Sep 17 00:00:00 2001 From: Sean Keely Date: Wed, 9 Nov 2016 22:21:24 -0600 Subject: [PATCH] Add pointer info support. Change-Id: I3edcc0bfddbf12465065c9bc3b6565288faff1b8 --- .../core/common/hsa_table_interface.cpp | 12 +- .../hsa-runtime/core/inc/hsa_ext_amd_impl.h | 7 + runtime/hsa-runtime/core/inc/runtime.h | 19 ++- .../core/runtime/amd_memory_region.cpp | 14 +- .../core/runtime/hsa_api_trace.cpp | 2 + .../hsa-runtime/core/runtime/hsa_ext_amd.cpp | 14 ++ runtime/hsa-runtime/core/runtime/runtime.cpp | 108 ++++++++++++--- runtime/hsa-runtime/hsacore.so.def | 2 + runtime/hsa-runtime/inc/hsa_api_trace.h | 2 + runtime/hsa-runtime/inc/hsa_ext_amd.h | 125 ++++++++++++++++++ 10 files changed, 279 insertions(+), 26 deletions(-) diff --git a/runtime/hsa-runtime/core/common/hsa_table_interface.cpp b/runtime/hsa-runtime/core/common/hsa_table_interface.cpp index 8df54eebfa..ed736fb282 100644 --- a/runtime/hsa-runtime/core/common/hsa_table_interface.cpp +++ b/runtime/hsa-runtime/core/common/hsa_table_interface.cpp @@ -1035,7 +1035,7 @@ hsa_status_t HSA_API hsa_amd_interop_unmap_buffer(void* ptr) { return amdExtTable->hsa_amd_interop_unmap_buffer_fn(ptr); } -// Use the function pointer from local instance Image Extension +// Mirrors Amd Extension Apis hsa_status_t HSA_API hsa_amd_image_create( hsa_agent_t agent, const hsa_ext_image_descriptor_t *image_descriptor, @@ -1047,3 +1047,13 @@ hsa_status_t HSA_API hsa_amd_image_create( image_layout, image_data, access_permission, image); } +// Mirrors Amd Extension Apis +hsa_status_t hsa_amd_pointer_info(void* ptr, hsa_amd_pointer_info_t* info, void* (*alloc)(size_t), + uint32_t* num_agents_accessible, hsa_agent_t** accessible) { + return amdExtTable->hsa_amd_pointer_info_fn(ptr, info, alloc, num_agents_accessible, accessible); +} + +// Mirrors Amd Extension Apis +hsa_status_t hsa_amd_pointer_info_set_userdata(void* ptr, void* userptr) { + return amdExtTable->hsa_amd_pointer_info_set_userdata_fn(ptr, userptr); +} diff --git a/runtime/hsa-runtime/core/inc/hsa_ext_amd_impl.h b/runtime/hsa-runtime/core/inc/hsa_ext_amd_impl.h index 54f8e34588..61be099665 100644 --- a/runtime/hsa-runtime/core/inc/hsa_ext_amd_impl.h +++ b/runtime/hsa-runtime/core/inc/hsa_ext_amd_impl.h @@ -181,6 +181,13 @@ hsa_status_t HSA_API hsa_amd_interop_map_buffer(uint32_t num_agents, // Mirrors Amd Extension Apis hsa_status_t HSA_API hsa_amd_interop_unmap_buffer(void* ptr); +// Mirrors Amd Extension Apis +hsa_status_t hsa_amd_pointer_info(void* ptr, hsa_amd_pointer_info_t* info, void* (*alloc)(size_t), + uint32_t* num_agents_accessible, hsa_agent_t** accessible); + +// Mirrors Amd Extension Apis +hsa_status_t hsa_amd_pointer_info_set_userdata(void* ptr, void* userdata); + } // end of AMD namespace #endif // header guard diff --git a/runtime/hsa-runtime/core/inc/runtime.h b/runtime/hsa-runtime/core/inc/runtime.h index 8ad74bd1b4..c06b7f1ff4 100644 --- a/runtime/hsa-runtime/core/inc/runtime.h +++ b/runtime/hsa-runtime/core/inc/runtime.h @@ -70,6 +70,11 @@ #define HSA_QUEUE_ALIGN_BYTES 64 #define HSA_PACKET_ALIGN_BYTES 64 +//Avoids include +namespace amd { + class MemoryRegion; +} + namespace core { extern bool g_use_interrupt_wait; @@ -84,6 +89,7 @@ extern bool g_use_interrupt_wait; /// - maintain loader state. /// - monitor asynchronous event from agent. class Runtime { + friend class amd::MemoryRegion; public: /// @brief Structure to describe connectivity between agents. struct LinkInfo { @@ -258,11 +264,15 @@ class Runtime { hsa_status_t InteropUnmap(void* ptr); + hsa_status_t PtrInfo(void* ptr, hsa_amd_pointer_info_t* info, void* (*alloc)(size_t), + uint32_t* num_agents_accessible, hsa_agent_t** accessible); + + hsa_status_t SetPtrInfoData(void* ptr, void* userptr); + const std::vector& cpu_agents() { return cpu_agents_; } const std::vector& gpu_agents() { return gpu_agents_; } - const std::vector& gpu_ids() { return gpu_ids_; } Agent* blit_agent() { return blit_agent_; } @@ -399,7 +409,9 @@ class Runtime { // Mutex object to protect multithreaded access to ::Acquire and ::Release. KernelMutex kernel_lock_; - // Mutex object to protect multithreaded access to ::allocation_map_. + // Mutex object to protect multithreaded access to ::allocation_map_, + // KFD map/unmap, register/unregister, and access to hsaKmtQueryPointerInfo + // registered & mapped arrays. KernelMutex memory_lock_; // Array containing tools library handles. @@ -411,6 +423,9 @@ class Runtime { // Agent list containing all compatible GPU agents in the platform. std::vector gpu_agents_; + // Agent map containing all agents indexed by their KFD node IDs. + std::map > agents_by_node_; + // Agent list containing all compatible gpu agent ids in the platform. std::vector gpu_ids_; diff --git a/runtime/hsa-runtime/core/runtime/amd_memory_region.cpp b/runtime/hsa-runtime/core/runtime/amd_memory_region.cpp index 77338cf816..95b4f259ab 100644 --- a/runtime/hsa-runtime/core/runtime/amd_memory_region.cpp +++ b/runtime/hsa-runtime/core/runtime/amd_memory_region.cpp @@ -469,11 +469,15 @@ hsa_status_t MemoryRegion::AllowAccess(uint32_t num_agents, HsaMemMapFlags map_flag = map_flag_; map_flag.ui32.HostAccess |= (cpu_in_list) ? 1 : 0; - uint64_t alternate_va = 0; - if (!amd::MemoryRegion::MakeKfdMemoryResident( - whitelist_nodes.size(), &whitelist_nodes[0], const_cast(ptr), - size, &alternate_va, map_flag)) { - return HSA_STATUS_ERROR_OUT_OF_RESOURCES; + + { + ScopedAcquire lock(&core::Runtime::runtime_singleton_->memory_lock_); + uint64_t alternate_va = 0; + if (!amd::MemoryRegion::MakeKfdMemoryResident( + whitelist_nodes.size(), &whitelist_nodes[0], const_cast(ptr), + size, &alternate_va, map_flag)) { + return HSA_STATUS_ERROR_OUT_OF_RESOURCES; + } } for (GpuAgentInt* gpu : whitelist_gpus) { diff --git a/runtime/hsa-runtime/core/runtime/hsa_api_trace.cpp b/runtime/hsa-runtime/core/runtime/hsa_api_trace.cpp index a2d56d0326..346553ee39 100644 --- a/runtime/hsa-runtime/core/runtime/hsa_api_trace.cpp +++ b/runtime/hsa-runtime/core/runtime/hsa_api_trace.cpp @@ -358,6 +358,8 @@ void HsaApiTable::UpdateAmdExts() { amd_ext_api.hsa_amd_memory_fill_fn = AMD::hsa_amd_memory_fill; amd_ext_api.hsa_amd_interop_map_buffer_fn = AMD::hsa_amd_interop_map_buffer; amd_ext_api.hsa_amd_interop_unmap_buffer_fn = AMD::hsa_amd_interop_unmap_buffer; + amd_ext_api.hsa_amd_pointer_info_fn = AMD::hsa_amd_pointer_info; + amd_ext_api.hsa_amd_pointer_info_set_userdata_fn = AMD::hsa_amd_pointer_info_set_userdata; } class Init { diff --git a/runtime/hsa-runtime/core/runtime/hsa_ext_amd.cpp b/runtime/hsa-runtime/core/runtime/hsa_ext_amd.cpp index 4e4b2111e2..d450ab814b 100644 --- a/runtime/hsa-runtime/core/runtime/hsa_ext_amd.cpp +++ b/runtime/hsa-runtime/core/runtime/hsa_ext_amd.cpp @@ -597,4 +597,18 @@ hsa_status_t hsa_amd_interop_unmap_buffer(void* ptr) { return HSA_STATUS_SUCCESS; } +hsa_status_t hsa_amd_pointer_info(void* ptr, hsa_amd_pointer_info_t* info, void* (*alloc)(size_t), + uint32_t* num_accessible, hsa_agent_t** accessible) { + IS_OPEN(); + IS_BAD_PTR(ptr); + IS_BAD_PTR(info); + return core::Runtime::runtime_singleton_->PtrInfo(ptr, info, alloc, num_accessible, accessible); +} + +hsa_status_t hsa_amd_pointer_info_set_userdata(void* ptr, void* userdata) { + IS_OPEN(); + IS_BAD_PTR(ptr); + return core::Runtime::runtime_singleton_->SetPtrInfoData(ptr, userdata); +} + } // end of AMD namespace diff --git a/runtime/hsa-runtime/core/runtime/runtime.cpp b/runtime/hsa-runtime/core/runtime/runtime.cpp index 1384bd8d5b..59ba2fd895 100644 --- a/runtime/hsa-runtime/core/runtime/runtime.cpp +++ b/runtime/hsa-runtime/core/runtime/runtime.cpp @@ -142,6 +142,10 @@ bool Runtime::IsOpen() { } void Runtime::RegisterAgent(Agent* agent) { + // Record the agent in the node-to-agent reverse lookup table. + agents_by_node_[agent->node_id()].push_back(agent); + + // Process agent as a cpu or gpu device. if (agent->device_type() == Agent::DeviceType::kAmdCpuDevice) { cpu_agents_.push_back(agent); @@ -230,6 +234,8 @@ void Runtime::RegisterAgent(Agent* agent) { } void Runtime::DestroyAgents() { + agents_by_node_.clear(); + std::for_each(gpu_agents_.begin(), gpu_agents_.end(), DeleteObject()); gpu_agents_.clear(); @@ -302,11 +308,11 @@ hsa_status_t Runtime::IterateAgent(hsa_status_t (*callback)(hsa_agent_t agent, hsa_status_t Runtime::AllocateMemory(const MemoryRegion* region, size_t size, MemoryRegion::AllocateFlags alloc_flags, void** address) { + ScopedAcquire lock(&memory_lock_); hsa_status_t status = region->Allocate(size, alloc_flags, address); // Track the allocation result so that it could be freed properly. if (status == HSA_STATUS_SUCCESS) { - ScopedAcquire lock(&memory_lock_); allocation_map_[*address] = AllocationRegion(region, size); } @@ -320,22 +326,18 @@ hsa_status_t Runtime::FreeMemory(void* ptr) { const MemoryRegion* region = NULL; size_t size = 0; - { - ScopedAcquire lock(&memory_lock_); + ScopedAcquire lock(&memory_lock_); - std::map::const_iterator it = - allocation_map_.find(ptr); + std::map::const_iterator it = allocation_map_.find(ptr); - if (it == allocation_map_.end()) { - assert(false && "Can't find address in allocation map"); - return HSA_STATUS_ERROR; - } - - region = it->second.region; - size = it->second.size; - - allocation_map_.erase(it); + if (it == allocation_map_.end()) { + assert(false && "Can't find address in allocation map"); + return HSA_STATUS_ERROR; } + region = it->second.region; + size = it->second.size; + + allocation_map_.erase(it); return region->Free(ptr, size); } @@ -463,8 +465,7 @@ hsa_status_t Runtime::AllowAccess(uint32_t num_agents, { ScopedAcquire lock(&memory_lock_); - std::map::const_iterator it = - allocation_map_.find(ptr); + std::map::const_iterator it = allocation_map_.find(ptr); if (it == allocation_map_.end()) { return HSA_STATUS_ERROR; @@ -634,8 +635,7 @@ hsa_status_t Runtime::InteropMap(uint32_t num_agents, Agent** agents, return HSA_STATUS_SUCCESS; } -hsa_status_t Runtime::InteropUnmap(void* ptr) -{ +hsa_status_t Runtime::InteropUnmap(void* ptr) { if(hsaKmtUnmapMemoryToGPU(ptr)!=HSAKMT_STATUS_SUCCESS) return HSA_STATUS_ERROR_INVALID_ARGUMENT; if(hsaKmtDeregisterMemory(ptr)!=HSAKMT_STATUS_SUCCESS) @@ -643,6 +643,78 @@ hsa_status_t Runtime::InteropUnmap(void* ptr) return HSA_STATUS_SUCCESS; } +hsa_status_t Runtime::PtrInfo(void* ptr, hsa_amd_pointer_info_t* info, void* (*alloc)(size_t), + uint32_t* num_agents_accessible, hsa_agent_t** accessible) { + HsaPointerInfo thunkInfo; + uint32_t* mappedNodes; + + // check output struct is at least as large as the first info revision. + if (info->size < sizeof(struct hsa_amd_pointer_info_v1_s)) return HSA_STATUS_ERROR_INVALID_ARGUMENT; + + bool returnListData = + ((alloc != nullptr) && (num_agents_accessible != nullptr) && (accessible != nullptr)); + if (returnListData) { + size_t max_agents = cpu_agents_.size() + gpu_agents_.size(); + mappedNodes = (uint32_t*)alloca(max_agents * sizeof(uint32_t)); + // memory_lock protects access to the NMappedNodes array since this changes with calls to memory + // APIs. + ScopedAcquire lock(&memory_lock_); + hsaKmtQueryPointerInfo(ptr, &thunkInfo); + assert(thunkInfo.NMappedNodes <= max_agents && + "PointerInfo: Thunk returned more than all agents in NMappedNodes."); + memcpy(mappedNodes, thunkInfo.MappedNodes, thunkInfo.NMappedNodes * sizeof(uint32_t)); + } else { + hsaKmtQueryPointerInfo(ptr, &thunkInfo); + } + + static_assert((int)HSA_POINTER_UNKNOWN == (int)HSA_EXT_POINTER_TYPE_UNKNOWN, + "Thunk pointer info mismatch"); + static_assert((int)HSA_POINTER_ALLOCATED == (int)HSA_EXT_POINTER_TYPE_HSA, + "Thunk pointer info mismatch"); + static_assert((int)HSA_POINTER_REGISTERED_USER == (int)HSA_EXT_POINTER_TYPE_LOCKED, + "Thunk pointer info mismatch"); + static_assert((int)HSA_POINTER_REGISTERED_GRAPHICS == (int)HSA_EXT_POINTER_TYPE_GRAPHICS, + "Thunk pointer info mismatch"); + + info->size = Min(info->size, sizeof(struct hsa_amd_pointer_info_v1_s)); + info->type = (hsa_amd_pointer_type_t)thunkInfo.Type; + info->agentBaseAddress = (void*)thunkInfo.GPUAddress; + info->hostBaseAddress = thunkInfo.CPUAddress; + info->sizeInBytes = thunkInfo.SizeInBytes; + info->userData = thunkInfo.UserData; + + if (returnListData) { + uint32_t count = 0; + for (int i = 0; i < thunkInfo.NMappedNodes; i++) { + assert(mappedNodes[i] < agents_by_node_.size() && + "PointerInfo: Invalid node ID returned from thunk."); + count += agents_by_node_[mappedNodes[i]].size(); + } + + *accessible = (hsa_agent_t*)alloc(sizeof(hsa_agent_t) * count); + if ((*accessible) == nullptr) return HSA_STATUS_ERROR_OUT_OF_RESOURCES; + *num_agents_accessible = count; + + uint32_t index = 0; + for (int i = 0; i < thunkInfo.NMappedNodes; i++) { + auto& list = agents_by_node_[mappedNodes[i]]; + for (int j = 0; j < list.size(); j++) { + (*accessible)[index] = list[j]->public_handle(); + index++; + } + } + } + + return HSA_STATUS_SUCCESS; +} + +hsa_status_t Runtime::SetPtrInfoData(void* ptr, void* userptr) { + if (hsaKmtSetMemoryUserData(ptr, userptr) == HSAKMT_STATUS_SUCCESS) + return HSA_STATUS_SUCCESS; + else + return HSA_STATUS_ERROR_INVALID_ARGUMENT; +} + void Runtime::AsyncEventsLoop(void*) { auto& async_events_control_ = runtime_singleton_->async_events_control_; auto& async_events_ = runtime_singleton_->async_events_; diff --git a/runtime/hsa-runtime/hsacore.so.def b/runtime/hsa-runtime/hsacore.so.def index b8d76c517f..d2271e7b4e 100644 --- a/runtime/hsa-runtime/hsacore.so.def +++ b/runtime/hsa-runtime/hsacore.so.def @@ -203,6 +203,8 @@ global: hsa_ext_image_destroy; hsa_ext_sampler_create; hsa_ext_sampler_destroy; + hsa_amd_pointer_info; + hsa_amd_pointer_info_set_userdata; local: *; diff --git a/runtime/hsa-runtime/inc/hsa_api_trace.h b/runtime/hsa-runtime/inc/hsa_api_trace.h index 9a8f455d26..7b73ef4000 100644 --- a/runtime/hsa-runtime/inc/hsa_api_trace.h +++ b/runtime/hsa-runtime/inc/hsa_api_trace.h @@ -140,6 +140,8 @@ struct AmdExtTable { decltype(hsa_amd_interop_map_buffer)* hsa_amd_interop_map_buffer_fn; decltype(hsa_amd_interop_unmap_buffer)* hsa_amd_interop_unmap_buffer_fn; decltype(hsa_amd_image_create)* hsa_amd_image_create_fn; + decltype(hsa_amd_pointer_info)* hsa_amd_pointer_info_fn; + decltype(hsa_amd_pointer_info_set_userdata)* hsa_amd_pointer_info_set_userdata_fn; }; // Table to export HSA Core Runtime Apis diff --git a/runtime/hsa-runtime/inc/hsa_ext_amd.h b/runtime/hsa-runtime/inc/hsa_ext_amd.h index ab7757389b..c6ade88901 100755 --- a/runtime/hsa-runtime/inc/hsa_ext_amd.h +++ b/runtime/hsa-runtime/inc/hsa_ext_amd.h @@ -1268,6 +1268,131 @@ hsa_status_t HSA_API hsa_amd_image_create( hsa_ext_image_t *image ); +/** + * @brief Denotes the type of memory in a pointer info query. + */ +typedef enum { + /* + Memory is not known to the HSA driver. Unallocated or unlocked system memory. + */ + HSA_EXT_POINTER_TYPE_UNKNOWN = 0, + /* + Memory was allocated with an HSA memory allocator. + */ + HSA_EXT_POINTER_TYPE_HSA = 1, + /* + System memory which has been locked for use with an HSA agent. + + Memory of this type is normal malloc'd memory and is always accessible to + the CPU. Pointer info queries may not include CPU agents in the accessible + agents list as the CPU has implicit access. + */ + HSA_EXT_POINTER_TYPE_LOCKED = 2, + /* + Memory originated in a graphics component and is shared with ROCr. + */ + HSA_EXT_POINTER_TYPE_GRAPHICS = 3, + /* + Memory has been shared with the local process via ROCr IPC APIs. + */ + HSA_EXT_POINTER_TYPE_IPC = 4 +} hsa_amd_pointer_type_t; + +/** + * @brief Describes a memory allocation known to ROCr. + * Within a ROCr major version this structure can only grow. + */ +typedef struct hsa_amd_pointer_info_v1_s { + /* + size in bytes of this structure. Used for version control within a major ROCr + revision. Set to sizeof(hsa_amd_pointer_t) prior to calling + hsa_amd_pointer_info. If the runtime supports an older version of pointer + info then size will be smaller on return. Members starting after the return + value of size will not be updated by hsa_amd_pointer_info. + */ + uint32_t size; + /* + The type of allocation referenced. + */ + hsa_amd_pointer_type_t type; + /* + Base address at which non-host agents may access the allocation. + */ + void* agentBaseAddress; + /* + Base address at which the host agent may access the allocation. + */ + void* hostBaseAddress; + /* + Size of the allocation + */ + size_t sizeInBytes; + /* + Application provided value. + */ + void* userData; +} hsa_amd_pointer_info_t; + +/** + * @brief Retrieves information about the allocation referenced by the given + * pointer. Optionally returns the number and list of agents which can + * directly access the allocation. + * + * @param[in] ptr Pointer which references the allocation to retrieve info for. + * + * @param[in, out] info Pointer to structure to be filled with allocation info. + * Data member size must be set to the size of the structure prior to calling + * hsa_amd_pointer_info. On return size will be set to the size of the + * pointer info structure supported by the runtime, if smaller. Members + * beyond the returned value of size will not be updated by the API. + * Must not be NULL. + * + * @param[in] alloc Function pointer to an allocator used to allocate the + * @p accessible array. If NULL @p accessible will not be returned. + * + * @param[out] num_agents_accessible Recieves the count of agents in + * @p accessible. If NULL @p accessible will not be returned. + * + * @param[out] accessible Recieves a pointer to the array, allocated by @p alloc, + * holding the list of agents which may directly access the allocation. + * May be NULL. + * + * @retval HSA_STATUS_SUCCESS Info retrieved successfully + * + * @retval HSA_STATUS_ERROR_NOT_INITIALIZED if HSA is not initialized + * + * @retval HSA_STATUS_ERROR_OUT_OF_RESOURCES if there is a failure in allocating + * necessary resources + * + * @retval HSA_STATUS_ERROR_INVALID_ARGUMENT NULL in @p ptr or @p info. + */ +hsa_status_t HSA_API hsa_amd_pointer_info(void* ptr, + hsa_amd_pointer_info_t* info, + void* (*alloc)(size_t), + uint32_t* num_agents_accessible, + hsa_agent_t** accessible); + +/** + * @brief Associates an arbitrary pointer with an allocation known to ROCr. + * The pointer can be fetched by hsa_amd_pointer_info in the userData field. + * + * @param[in] ptr Pointer to the first byte of an allocation known to ROCr + * with which to associate @p userdata. + * + * @param[in] userdata Abitrary pointer to associate with the allocation. + * + * @retval HSA_STATUS_SUCCESS @p userdata successfully stored. + * + * @retval HSA_STATUS_ERROR_NOT_INITIALIZED if HSA is not initialized + * + * @retval HSA_STATUS_ERROR_OUT_OF_RESOURCES if there is a failure in allocating + * necessary resources + * + * @retval HSA_STATUS_ERROR_INVALID_ARGUMENT @p ptr is not known to ROCr. + */ +hsa_status_t HSA_API hsa_amd_pointer_info_set_userdata(void* ptr, + void* userdata); + #ifdef __cplusplus } // end extern "C" block #endif